Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,11 +33,6 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 33 |
model="SamLowe/roberta-base-go_emotions",
|
| 34 |
top_k=5
|
| 35 |
)
|
| 36 |
-
ner_extractor = pipeline(
|
| 37 |
-
"ner",
|
| 38 |
-
model="dslim/bert-base-NER",
|
| 39 |
-
aggregation_strategy="simple"
|
| 40 |
-
)
|
| 41 |
nli_classifier = pipeline(
|
| 42 |
"text-classification",
|
| 43 |
model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
|
|
@@ -50,7 +45,6 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 50 |
return {
|
| 51 |
"sentiment": sentiment_analyzer,
|
| 52 |
"emotion": emotion_classifier,
|
| 53 |
-
"ner": ner_extractor,
|
| 54 |
"nli": nli_classifier,
|
| 55 |
"theme": theme_classifier,
|
| 56 |
}
|
|
@@ -99,11 +93,7 @@ def analyze_article(text: str) -> dict:
|
|
| 99 |
]
|
| 100 |
theme_result = models["theme"](safe_text, framing_dimensions)
|
| 101 |
primary_theme = theme_result["labels"][0]
|
| 102 |
-
|
| 103 |
-
# Named Entity Recognition
|
| 104 |
-
ner_results = models["ner"](safe_text)
|
| 105 |
-
extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
|
| 106 |
-
|
| 107 |
# Subjectivity and Readability Analysis
|
| 108 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 109 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
|
@@ -117,7 +107,6 @@ def analyze_article(text: str) -> dict:
|
|
| 117 |
"primary_tone": primary_tone,
|
| 118 |
"primary_theme": primary_theme,
|
| 119 |
"tone_scores": tone_scores,
|
| 120 |
-
"entities": extracted_entities,
|
| 121 |
}
|
| 122 |
|
| 123 |
|
|
@@ -184,23 +173,31 @@ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figur
|
|
| 184 |
return fig
|
| 185 |
|
| 186 |
|
| 187 |
-
def
|
| 188 |
-
"""Highlights subjective
|
| 189 |
-
raw_sentences =
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
top_subjective = sorted(scored_sentences, key=lambda x: x[1], reverse=True)[:3]
|
| 195 |
-
top_sentences = [item[0] for item in top_subjective if item[1] > 0.3]
|
| 196 |
-
|
| 197 |
-
if not top_sentences:
|
| 198 |
-
return text[:250] + "..."
|
| 199 |
-
|
| 200 |
-
highlighted_text = " ... ".join(top_sentences)
|
| 201 |
-
return f"<span style='background-color: #e0e7ff; color: #3730a3; font-weight: 500; padding: 0.2rem 0.4rem; border-radius: 8px; font-size: 0.95em;'>{highlighted_text}</span>"
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
|
|
|
| 204 |
def fetch_article_text(url: str) -> str:
|
| 205 |
"""Scrapes article text."""
|
| 206 |
downloaded = trafilatura.fetch_url(url)
|
|
@@ -303,7 +300,7 @@ st.divider()
|
|
| 303 |
with st.spinner("Starting NLP models."):
|
| 304 |
_load_nlp_models()
|
| 305 |
|
| 306 |
-
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=
|
| 307 |
|
| 308 |
col1, col2 = st.columns(2)
|
| 309 |
|
|
@@ -372,9 +369,6 @@ if st.session_state.results_a and st.session_state.results_b:
|
|
| 372 |
m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
|
| 373 |
|
| 374 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
|
| 375 |
-
|
| 376 |
-
if r_a["entities"]:
|
| 377 |
-
st.markdown(f"**Extracted Entities:** `{', '.join(r_a['entities'])}`")
|
| 378 |
|
| 379 |
st.markdown("**Key Framing Language:**")
|
| 380 |
annotated_text = _highlight_subjective_sentences(user_article_a)
|
|
@@ -392,9 +386,6 @@ if st.session_state.results_a and st.session_state.results_b:
|
|
| 392 |
m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
|
| 393 |
|
| 394 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
|
| 395 |
-
|
| 396 |
-
if r_b["entities"]:
|
| 397 |
-
st.markdown(f"**Extracted Entities:** `{', '.join(r_b['entities'])}`")
|
| 398 |
|
| 399 |
st.markdown("**Key Framing Language:**")
|
| 400 |
annotated_text = _highlight_subjective_sentences(user_article_b)
|
|
|
|
| 33 |
model="SamLowe/roberta-base-go_emotions",
|
| 34 |
top_k=5
|
| 35 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
nli_classifier = pipeline(
|
| 37 |
"text-classification",
|
| 38 |
model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
|
|
|
|
| 45 |
return {
|
| 46 |
"sentiment": sentiment_analyzer,
|
| 47 |
"emotion": emotion_classifier,
|
|
|
|
| 48 |
"nli": nli_classifier,
|
| 49 |
"theme": theme_classifier,
|
| 50 |
}
|
|
|
|
| 93 |
]
|
| 94 |
theme_result = models["theme"](safe_text, framing_dimensions)
|
| 95 |
primary_theme = theme_result["labels"][0]
|
| 96 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# Subjectivity and Readability Analysis
|
| 98 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 99 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
|
|
|
| 107 |
"primary_tone": primary_tone,
|
| 108 |
"primary_theme": primary_theme,
|
| 109 |
"tone_scores": tone_scores,
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
|
|
|
|
| 173 |
return fig
|
| 174 |
|
| 175 |
|
| 176 |
+
def _highlight_framing_words(text: str) -> str:
|
| 177 |
+
"""Highlights subjective or emotional words in the text snippet."""
|
| 178 |
+
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 179 |
+
snippet = " ".join(raw_sentences[:3])
|
| 180 |
+
if not snippet:
|
| 181 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
+
blob = TextBlob(snippet)
|
| 184 |
+
target_words = set()
|
| 185 |
+
|
| 186 |
+
for word in blob.words:
|
| 187 |
+
w_sentiment = TextBlob(word).sentiment
|
| 188 |
+
if w_sentiment.subjectivity > 0.5 or abs(w_sentiment.polarity) > 0.3:
|
| 189 |
+
if len(word) > 2:
|
| 190 |
+
target_words.add(str(word))
|
| 191 |
+
|
| 192 |
+
highlighted_snippet = snippet
|
| 193 |
+
for word in target_words:
|
| 194 |
+
pattern = r'\b(' + re.escape(word) + r')\b'
|
| 195 |
+
replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
|
| 196 |
+
highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
|
| 197 |
+
|
| 198 |
+
return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
|
| 199 |
|
| 200 |
+
|
| 201 |
def fetch_article_text(url: str) -> str:
|
| 202 |
"""Scrapes article text."""
|
| 203 |
downloaded = trafilatura.fetch_url(url)
|
|
|
|
| 300 |
with st.spinner("Starting NLP models."):
|
| 301 |
_load_nlp_models()
|
| 302 |
|
| 303 |
+
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
|
| 304 |
|
| 305 |
col1, col2 = st.columns(2)
|
| 306 |
|
|
|
|
| 369 |
m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
|
| 370 |
|
| 371 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
st.markdown("**Key Framing Language:**")
|
| 374 |
annotated_text = _highlight_subjective_sentences(user_article_a)
|
|
|
|
| 386 |
m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
|
| 387 |
|
| 388 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
st.markdown("**Key Framing Language:**")
|
| 391 |
annotated_text = _highlight_subjective_sentences(user_article_b)
|