NKessler commited on
Commit
7c6c69f
·
verified ·
1 Parent(s): 92ec6e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -34
app.py CHANGED
@@ -33,11 +33,6 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
33
  model="SamLowe/roberta-base-go_emotions",
34
  top_k=5
35
  )
36
- ner_extractor = pipeline(
37
- "ner",
38
- model="dslim/bert-base-NER",
39
- aggregation_strategy="simple"
40
- )
41
  nli_classifier = pipeline(
42
  "text-classification",
43
  model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
@@ -50,7 +45,6 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
50
  return {
51
  "sentiment": sentiment_analyzer,
52
  "emotion": emotion_classifier,
53
- "ner": ner_extractor,
54
  "nli": nli_classifier,
55
  "theme": theme_classifier,
56
  }
@@ -99,11 +93,7 @@ def analyze_article(text: str) -> dict:
99
  ]
100
  theme_result = models["theme"](safe_text, framing_dimensions)
101
  primary_theme = theme_result["labels"][0]
102
-
103
- # Named Entity Recognition
104
- ner_results = models["ner"](safe_text)
105
- extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
106
-
107
  # Subjectivity and Readability Analysis
108
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
109
  raw_reading_ease = textstat.flesch_reading_ease(safe_text)
@@ -117,7 +107,6 @@ def analyze_article(text: str) -> dict:
117
  "primary_tone": primary_tone,
118
  "primary_theme": primary_theme,
119
  "tone_scores": tone_scores,
120
- "entities": extracted_entities,
121
  }
122
 
123
 
@@ -184,23 +173,31 @@ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figur
184
  return fig
185
 
186
 
187
- def _highlight_subjective_sentences(text: str) -> str:
188
- """Highlights subjective/opinionated sentences in the text."""
189
- raw_sentences = text.replace("?", ".").replace("!", ".").split(".")
190
- sentences = [s.strip() + "." for s in raw_sentences if len(s.strip()) > 20]
191
-
192
- scored_sentences = [(sentence, TextBlob(sentence).sentiment.subjectivity) for sentence in sentences]
193
-
194
- top_subjective = sorted(scored_sentences, key=lambda x: x[1], reverse=True)[:3]
195
- top_sentences = [item[0] for item in top_subjective if item[1] > 0.3]
196
-
197
- if not top_sentences:
198
- return text[:250] + "..."
199
-
200
- highlighted_text = " ... ".join(top_sentences)
201
- return f"<span style='background-color: #e0e7ff; color: #3730a3; font-weight: 500; padding: 0.2rem 0.4rem; border-radius: 8px; font-size: 0.95em;'>{highlighted_text}</span>"
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
 
204
  def fetch_article_text(url: str) -> str:
205
  """Scrapes article text."""
206
  downloaded = trafilatura.fetch_url(url)
@@ -303,7 +300,7 @@ st.divider()
303
  with st.spinner("Starting NLP models."):
304
  _load_nlp_models()
305
 
306
- input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=1)
307
 
308
  col1, col2 = st.columns(2)
309
 
@@ -372,9 +369,6 @@ if st.session_state.results_a and st.session_state.results_b:
372
  m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
373
 
374
  st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
375
-
376
- if r_a["entities"]:
377
- st.markdown(f"**Extracted Entities:** `{', '.join(r_a['entities'])}`")
378
 
379
  st.markdown("**Key Framing Language:**")
380
  annotated_text = _highlight_subjective_sentences(user_article_a)
@@ -392,9 +386,6 @@ if st.session_state.results_a and st.session_state.results_b:
392
  m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
393
 
394
  st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
395
-
396
- if r_b["entities"]:
397
- st.markdown(f"**Extracted Entities:** `{', '.join(r_b['entities'])}`")
398
 
399
  st.markdown("**Key Framing Language:**")
400
  annotated_text = _highlight_subjective_sentences(user_article_b)
 
33
  model="SamLowe/roberta-base-go_emotions",
34
  top_k=5
35
  )
 
 
 
 
 
36
  nli_classifier = pipeline(
37
  "text-classification",
38
  model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
 
45
  return {
46
  "sentiment": sentiment_analyzer,
47
  "emotion": emotion_classifier,
 
48
  "nli": nli_classifier,
49
  "theme": theme_classifier,
50
  }
 
93
  ]
94
  theme_result = models["theme"](safe_text, framing_dimensions)
95
  primary_theme = theme_result["labels"][0]
96
+
 
 
 
 
97
  # Subjectivity and Readability Analysis
98
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
99
  raw_reading_ease = textstat.flesch_reading_ease(safe_text)
 
107
  "primary_tone": primary_tone,
108
  "primary_theme": primary_theme,
109
  "tone_scores": tone_scores,
 
110
  }
111
 
112
 
 
173
  return fig
174
 
175
 
176
+ def _highlight_framing_words(text: str) -> str:
177
+ """Highlights subjective or emotional words in the text snippet."""
178
+ raw_sentences = re.split(r'(?<=[.!?]) +', text)
179
+ snippet = " ".join(raw_sentences[:3])
180
+ if not snippet:
181
+ return ""
 
 
 
 
 
 
 
 
 
182
 
183
+ blob = TextBlob(snippet)
184
+ target_words = set()
185
+
186
+ for word in blob.words:
187
+ w_sentiment = TextBlob(word).sentiment
188
+ if w_sentiment.subjectivity > 0.5 or abs(w_sentiment.polarity) > 0.3:
189
+ if len(word) > 2:
190
+ target_words.add(str(word))
191
+
192
+ highlighted_snippet = snippet
193
+ for word in target_words:
194
+ pattern = r'\b(' + re.escape(word) + r')\b'
195
+ replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
196
+ highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
197
+
198
+ return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
199
 
200
+
201
  def fetch_article_text(url: str) -> str:
202
  """Scrapes article text."""
203
  downloaded = trafilatura.fetch_url(url)
 
300
  with st.spinner("Starting NLP models."):
301
  _load_nlp_models()
302
 
303
+ input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
304
 
305
  col1, col2 = st.columns(2)
306
 
 
369
  m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
370
 
371
  st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
 
 
 
372
 
373
  st.markdown("**Key Framing Language:**")
374
  annotated_text = _highlight_subjective_sentences(user_article_a)
 
386
  m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
387
 
388
  st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
 
 
 
389
 
390
  st.markdown("**Key Framing Language:**")
391
  annotated_text = _highlight_subjective_sentences(user_article_b)