NKessler commited on
Commit
ac9d833
·
verified ·
1 Parent(s): 589cdf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -30
app.py CHANGED
@@ -8,13 +8,15 @@ from textblob import TextBlob
8
  from transformers import pipeline
9
  import textstat
10
  import trafilatura
 
 
11
 
12
  # constants
13
  MAX_TEXT_LENGTH = 1500
14
 
15
- ARTICLE_A = """Global leaders achieved a historic breakthrough today, signing a comprehensive climate accord aimed at drastically slashing carbon emissions by 2030. Environmental advocates are celebrating the mandate, which forces heavy-polluting industries to finally take accountability for their ecological damage. While corporations warn of transition costs, scientists emphasize that failing to act now would result in catastrophic, irreversible damage to our planet's fragile ecosystems."""
16
 
17
- ARTICLE_B = """A sweeping new climate agreement signed today is drawing fierce criticism from industry leaders, who warn the aggressive emission targets will cripple economic growth. The heavy-handed regulations impose massive compliance costs on the manufacturing and energy sectors, inevitably leading to significant job losses and higher prices for consumers. Critics argue the rushed accord prioritizes bureaucratic posturing over practical, market-driven solutions to environmental concerns."""
18
 
19
 
20
  @st.cache_resource
@@ -41,6 +43,10 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
41
  "text-classification",
42
  model="roberta-large-mnli"
43
  )
 
 
 
 
44
 
45
  return {
46
  "sentiment": sentiment_analyzer,
@@ -50,13 +56,6 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
50
  "nli": nli_classifier,
51
  }
52
 
53
- return {
54
- "sentiment": sentiment_analyzer,
55
- "emotion": emotion_classifier,
56
- "ner": ner_extractor,
57
- "keyword": keyword_extractor,
58
- }
59
-
60
 
61
  def analyze_article(text: str) -> dict:
62
  """Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
@@ -94,6 +93,16 @@ def analyze_article(text: str) -> dict:
94
  )
95
  extracted_keywords = [kw[0] for kw in keyword_results]
96
 
 
 
 
 
 
 
 
 
 
 
97
  # Named Entity Recognition
98
  ner_results = models["ner"](safe_text)
99
  extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
@@ -179,12 +188,34 @@ def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
179
 
180
 
181
  def fetch_article_text(url: str) -> str:
182
- """Scrapes clean article text from a given URL."""
183
  downloaded = trafilatura.fetch_url(url)
184
  if downloaded:
185
  text = trafilatura.extract(downloaded)
186
- return text if text else "Error: Could not extract text."
187
- return "Error: Could not fetch URL. It might be protected."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
 
190
  def check_contradiction(text_a: str, text_b: str) -> dict:
@@ -199,14 +230,16 @@ def check_contradiction(text_a: str, text_b: str) -> dict:
199
  return {"relationship": result["label"], "confidence": result["score"]}
200
 
201
 
 
 
 
202
  # STATE MANAGEMENT
203
  if "results_a" not in st.session_state:
204
  st.session_state.results_a = None
205
  if "results_b" not in st.session_state:
206
  st.session_state.results_b = None
207
-
208
- # USER INTERFACE
209
- st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
210
 
211
  st.title("FrameVis")
212
  st.markdown("##### Media bias and framing effects across global news sources.")
@@ -235,11 +268,12 @@ with col2:
235
 
236
  st.write("")
237
 
238
- # Single unified execution button
239
- if st.button("Analyze & Compare Sources", use_container_width=True, type="primary"):
240
- with st.spinner("Analyzing framing semantics for both sources."):
241
  st.session_state.results_a = analyze_article(user_article_a)
242
  st.session_state.results_b = analyze_article(user_article_b)
 
243
 
244
  # Analysis Display
245
  if st.session_state.results_a and st.session_state.results_b:
@@ -247,14 +281,15 @@ if st.session_state.results_a and st.session_state.results_b:
247
  st.markdown("### Framing Analytics & Comparison")
248
 
249
  # Display Contradictions
250
- nli_result = check_contradiction(user_article_a, user_article_b)
251
- if nli_result["relationship"] == "CONTRADICTION":
252
- st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are actively disputing each other's foundational facts.")
253
- elif nli_result["relationship"] == "ENTAILMENT":
254
- st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
255
- else:
256
- st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
257
-
 
258
  st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
259
 
260
  res_col1, res_col2 = st.columns(2)
@@ -263,10 +298,12 @@ if st.session_state.results_a and st.session_state.results_b:
263
  with res_col1:
264
  r_a = st.session_state.results_a
265
  st.markdown("#### Source A Breakdown")
266
- m1, m2, m3 = st.columns(3)
 
267
  m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
268
  m2.metric("Primary Emotion", r_a['primary_tone'].title())
269
- m3.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
 
270
 
271
  st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
272
 
@@ -281,10 +318,12 @@ if st.session_state.results_a and st.session_state.results_b:
281
  with res_col2:
282
  r_b = st.session_state.results_b
283
  st.markdown("#### Source B Breakdown")
284
- m1, m2, m3 = st.columns(3)
 
285
  m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
286
  m2.metric("Primary Emotion", r_b['primary_tone'].title())
287
- m3.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
 
288
 
289
  st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
290
 
 
8
  from transformers import pipeline
9
  import textstat
10
  import trafilatura
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
 
14
  # constants
15
  MAX_TEXT_LENGTH = 1500
16
 
17
+ ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
18
 
19
+ ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
20
 
21
 
22
  @st.cache_resource
 
43
  "text-classification",
44
  model="roberta-large-mnli"
45
  )
46
+ theme_classifier = pipeline(
47
+ "zero-shot-classification",
48
+ model="typeform/distilbert-base-uncased-mnli"
49
+ )
50
 
51
  return {
52
  "sentiment": sentiment_analyzer,
 
56
  "nli": nli_classifier,
57
  }
58
 
 
 
 
 
 
 
 
59
 
60
  def analyze_article(text: str) -> dict:
61
  """Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
 
93
  )
94
  extracted_keywords = [kw[0] for kw in keyword_results]
95
 
96
+ # Thematic Framing
97
+ framing_dimensions = [
98
+ "economic consequences",
99
+ "moral and ethical fairness",
100
+ "legal and bureaucratic",
101
+ "public safety and health"
102
+ ]
103
+ theme_result = models["theme"](safe_text, framing_dimensions)
104
+ primary_theme = theme_result["labels"][0]
105
+
106
  # Named Entity Recognition
107
  ner_results = models["ner"](safe_text)
108
  extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
 
188
 
189
 
190
  def fetch_article_text(url: str) -> str:
191
+ """Scrapes article text."""
192
  downloaded = trafilatura.fetch_url(url)
193
  if downloaded:
194
  text = trafilatura.extract(downloaded)
195
+ if text and len(text) > 200:
196
+ return text
197
+
198
+ try:
199
+ headers = {
200
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
201
+ 'Accept-Language': 'en-US,en;q=0.9',
202
+ }
203
+ response = requests.get(url, headers=headers, timeout=10)
204
+ soup = BeautifulSoup(response.content, 'html.parser')
205
+
206
+ for script in soup(["script", "style"]):
207
+ script.extract()
208
+
209
+ paragraphs = soup.find_all('p')
210
+ text = ' '.join([p.get_text() for p in paragraphs])
211
+
212
+ if text and len(text) > 200:
213
+ return text.strip()
214
+
215
+ except Exception as e:
216
+ return f"Error: Could not fetch URL. Connection failed."
217
+
218
+ return "Error: Could not extract text. The site may be protected by hard paywalls."
219
 
220
 
221
  def check_contradiction(text_a: str, text_b: str) -> dict:
 
230
  return {"relationship": result["label"], "confidence": result["score"]}
231
 
232
 
233
+ # USER INTERFACE (Must be the first Streamlit command)
234
+ st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
235
+
236
  # STATE MANAGEMENT
237
  if "results_a" not in st.session_state:
238
  st.session_state.results_a = None
239
  if "results_b" not in st.session_state:
240
  st.session_state.results_b = None
241
+ if "nli_result" not in st.session_state:
242
+ st.session_state.nli_result = None
 
243
 
244
  st.title("FrameVis")
245
  st.markdown("##### Media bias and framing effects across global news sources.")
 
268
 
269
  st.write("")
270
 
271
+ # Execution button
272
+ if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
273
+ with st.spinner("Analyzing framing semantics for both sources..."):
274
  st.session_state.results_a = analyze_article(user_article_a)
275
  st.session_state.results_b = analyze_article(user_article_b)
276
+ st.session_state.nli_result = check_contradiction(user_article_a, user_article_b)
277
 
278
  # Analysis Display
279
  if st.session_state.results_a and st.session_state.results_b:
 
281
  st.markdown("### Framing Analytics & Comparison")
282
 
283
  # Display Contradictions
284
+ nli_result = st.session_state.nli_result
285
+ if nli_result:
286
+ if nli_result["relationship"] == "CONTRADICTION":
287
+ st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are disputing each other's facts.")
288
+ elif nli_result["relationship"] == "ENTAILMENT":
289
+ st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
290
+ else:
291
+ st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
292
+
293
  st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
294
 
295
  res_col1, res_col2 = st.columns(2)
 
298
  with res_col1:
299
  r_a = st.session_state.results_a
300
  st.markdown("#### Source A Breakdown")
301
+ m1, m2 = st.columns(2)
302
+ m3, m4 = st.columns(2)
303
  m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
304
  m2.metric("Primary Emotion", r_a['primary_tone'].title())
305
+ m3.metric("Framing Lens", r_a['primary_theme'].title())
306
+ m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
307
 
308
  st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
309
 
 
318
  with res_col2:
319
  r_b = st.session_state.results_b
320
  st.markdown("#### Source B Breakdown")
321
+ m1, m2 = st.columns(2)
322
+ m3, m4 = st.columns(2)
323
  m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
324
  m2.metric("Primary Emotion", r_b['primary_tone'].title())
325
+ m3.metric("Framing Lens", r_b['primary_theme'].title())
326
+ m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
327
 
328
  st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
329