Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,13 +8,15 @@ from textblob import TextBlob
|
|
| 8 |
from transformers import pipeline
|
| 9 |
import textstat
|
| 10 |
import trafilatura
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# constants
|
| 13 |
MAX_TEXT_LENGTH = 1500
|
| 14 |
|
| 15 |
-
ARTICLE_A = """
|
| 16 |
|
| 17 |
-
ARTICLE_B = """A
|
| 18 |
|
| 19 |
|
| 20 |
@st.cache_resource
|
|
@@ -41,6 +43,10 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 41 |
"text-classification",
|
| 42 |
model="roberta-large-mnli"
|
| 43 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
return {
|
| 46 |
"sentiment": sentiment_analyzer,
|
|
@@ -50,13 +56,6 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 50 |
"nli": nli_classifier,
|
| 51 |
}
|
| 52 |
|
| 53 |
-
return {
|
| 54 |
-
"sentiment": sentiment_analyzer,
|
| 55 |
-
"emotion": emotion_classifier,
|
| 56 |
-
"ner": ner_extractor,
|
| 57 |
-
"keyword": keyword_extractor,
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
|
| 61 |
def analyze_article(text: str) -> dict:
|
| 62 |
"""Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
|
|
@@ -94,6 +93,16 @@ def analyze_article(text: str) -> dict:
|
|
| 94 |
)
|
| 95 |
extracted_keywords = [kw[0] for kw in keyword_results]
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# Named Entity Recognition
|
| 98 |
ner_results = models["ner"](safe_text)
|
| 99 |
extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
|
|
@@ -179,12 +188,34 @@ def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
|
|
| 179 |
|
| 180 |
|
| 181 |
def fetch_article_text(url: str) -> str:
|
| 182 |
-
"""Scrapes
|
| 183 |
downloaded = trafilatura.fetch_url(url)
|
| 184 |
if downloaded:
|
| 185 |
text = trafilatura.extract(downloaded)
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
def check_contradiction(text_a: str, text_b: str) -> dict:
|
|
@@ -199,14 +230,16 @@ def check_contradiction(text_a: str, text_b: str) -> dict:
|
|
| 199 |
return {"relationship": result["label"], "confidence": result["score"]}
|
| 200 |
|
| 201 |
|
|
|
|
|
|
|
|
|
|
| 202 |
# STATE MANAGEMENT
|
| 203 |
if "results_a" not in st.session_state:
|
| 204 |
st.session_state.results_a = None
|
| 205 |
if "results_b" not in st.session_state:
|
| 206 |
st.session_state.results_b = None
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
| 210 |
|
| 211 |
st.title("FrameVis")
|
| 212 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
|
@@ -235,11 +268,12 @@ with col2:
|
|
| 235 |
|
| 236 |
st.write("")
|
| 237 |
|
| 238 |
-
#
|
| 239 |
-
if st.button("Analyze
|
| 240 |
-
with st.spinner("Analyzing framing semantics for both sources."):
|
| 241 |
st.session_state.results_a = analyze_article(user_article_a)
|
| 242 |
st.session_state.results_b = analyze_article(user_article_b)
|
|
|
|
| 243 |
|
| 244 |
# Analysis Display
|
| 245 |
if st.session_state.results_a and st.session_state.results_b:
|
|
@@ -247,14 +281,15 @@ if st.session_state.results_a and st.session_state.results_b:
|
|
| 247 |
st.markdown("### Framing Analytics & Comparison")
|
| 248 |
|
| 249 |
# Display Contradictions
|
| 250 |
-
nli_result =
|
| 251 |
-
if nli_result
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
| 258 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 259 |
|
| 260 |
res_col1, res_col2 = st.columns(2)
|
|
@@ -263,10 +298,12 @@ if st.session_state.results_a and st.session_state.results_b:
|
|
| 263 |
with res_col1:
|
| 264 |
r_a = st.session_state.results_a
|
| 265 |
st.markdown("#### Source A Breakdown")
|
| 266 |
-
m1, m2
|
|
|
|
| 267 |
m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
|
| 268 |
m2.metric("Primary Emotion", r_a['primary_tone'].title())
|
| 269 |
-
m3.metric("
|
|
|
|
| 270 |
|
| 271 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 272 |
|
|
@@ -281,10 +318,12 @@ if st.session_state.results_a and st.session_state.results_b:
|
|
| 281 |
with res_col2:
|
| 282 |
r_b = st.session_state.results_b
|
| 283 |
st.markdown("#### Source B Breakdown")
|
| 284 |
-
m1, m2
|
|
|
|
| 285 |
m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
|
| 286 |
m2.metric("Primary Emotion", r_b['primary_tone'].title())
|
| 287 |
-
m3.metric("
|
|
|
|
| 288 |
|
| 289 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 290 |
|
|
|
|
| 8 |
from transformers import pipeline
|
| 9 |
import textstat
|
| 10 |
import trafilatura
|
| 11 |
+
import requests
|
| 12 |
+
from bs4 import BeautifulSoup
|
| 13 |
|
| 14 |
# constants
|
| 15 |
MAX_TEXT_LENGTH = 1500
|
| 16 |
|
| 17 |
+
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 18 |
|
| 19 |
+
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
| 20 |
|
| 21 |
|
| 22 |
@st.cache_resource
|
|
|
|
| 43 |
"text-classification",
|
| 44 |
model="roberta-large-mnli"
|
| 45 |
)
|
| 46 |
+
theme_classifier = pipeline(
|
| 47 |
+
"zero-shot-classification",
|
| 48 |
+
model="typeform/distilbert-base-uncased-mnli"
|
| 49 |
+
)
|
| 50 |
|
| 51 |
return {
|
| 52 |
"sentiment": sentiment_analyzer,
|
|
|
|
| 56 |
"nli": nli_classifier,
|
| 57 |
}
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def analyze_article(text: str) -> dict:
|
| 61 |
"""Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
|
|
|
|
| 93 |
)
|
| 94 |
extracted_keywords = [kw[0] for kw in keyword_results]
|
| 95 |
|
| 96 |
+
# Thematic Framing
|
| 97 |
+
framing_dimensions = [
|
| 98 |
+
"economic consequences",
|
| 99 |
+
"moral and ethical fairness",
|
| 100 |
+
"legal and bureaucratic",
|
| 101 |
+
"public safety and health"
|
| 102 |
+
]
|
| 103 |
+
theme_result = models["theme"](safe_text, framing_dimensions)
|
| 104 |
+
primary_theme = theme_result["labels"][0]
|
| 105 |
+
|
| 106 |
# Named Entity Recognition
|
| 107 |
ner_results = models["ner"](safe_text)
|
| 108 |
extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
def fetch_article_text(url: str) -> str:
|
| 191 |
+
"""Scrapes article text."""
|
| 192 |
downloaded = trafilatura.fetch_url(url)
|
| 193 |
if downloaded:
|
| 194 |
text = trafilatura.extract(downloaded)
|
| 195 |
+
if text and len(text) > 200:
|
| 196 |
+
return text
|
| 197 |
+
|
| 198 |
+
try:
|
| 199 |
+
headers = {
|
| 200 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
| 201 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
| 202 |
+
}
|
| 203 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 204 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 205 |
+
|
| 206 |
+
for script in soup(["script", "style"]):
|
| 207 |
+
script.extract()
|
| 208 |
+
|
| 209 |
+
paragraphs = soup.find_all('p')
|
| 210 |
+
text = ' '.join([p.get_text() for p in paragraphs])
|
| 211 |
+
|
| 212 |
+
if text and len(text) > 200:
|
| 213 |
+
return text.strip()
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
return f"Error: Could not fetch URL. Connection failed."
|
| 217 |
+
|
| 218 |
+
return "Error: Could not extract text. The site may be protected by hard paywalls."
|
| 219 |
|
| 220 |
|
| 221 |
def check_contradiction(text_a: str, text_b: str) -> dict:
|
|
|
|
| 230 |
return {"relationship": result["label"], "confidence": result["score"]}
|
| 231 |
|
| 232 |
|
| 233 |
+
# USER INTERFACE (Must be the first Streamlit command)
|
| 234 |
+
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
| 235 |
+
|
| 236 |
# STATE MANAGEMENT
|
| 237 |
if "results_a" not in st.session_state:
|
| 238 |
st.session_state.results_a = None
|
| 239 |
if "results_b" not in st.session_state:
|
| 240 |
st.session_state.results_b = None
|
| 241 |
+
if "nli_result" not in st.session_state:
|
| 242 |
+
st.session_state.nli_result = None
|
|
|
|
| 243 |
|
| 244 |
st.title("FrameVis")
|
| 245 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
|
|
|
| 268 |
|
| 269 |
st.write("")
|
| 270 |
|
| 271 |
+
# Execution button
|
| 272 |
+
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
| 273 |
+
with st.spinner("Analyzing framing semantics for both sources..."):
|
| 274 |
st.session_state.results_a = analyze_article(user_article_a)
|
| 275 |
st.session_state.results_b = analyze_article(user_article_b)
|
| 276 |
+
st.session_state.nli_result = check_contradiction(user_article_a, user_article_b)
|
| 277 |
|
| 278 |
# Analysis Display
|
| 279 |
if st.session_state.results_a and st.session_state.results_b:
|
|
|
|
| 281 |
st.markdown("### Framing Analytics & Comparison")
|
| 282 |
|
| 283 |
# Display Contradictions
|
| 284 |
+
nli_result = st.session_state.nli_result
|
| 285 |
+
if nli_result:
|
| 286 |
+
if nli_result["relationship"] == "CONTRADICTION":
|
| 287 |
+
st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are disputing each other's facts.")
|
| 288 |
+
elif nli_result["relationship"] == "ENTAILMENT":
|
| 289 |
+
st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
|
| 290 |
+
else:
|
| 291 |
+
st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
|
| 292 |
+
|
| 293 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 294 |
|
| 295 |
res_col1, res_col2 = st.columns(2)
|
|
|
|
| 298 |
with res_col1:
|
| 299 |
r_a = st.session_state.results_a
|
| 300 |
st.markdown("#### Source A Breakdown")
|
| 301 |
+
m1, m2 = st.columns(2)
|
| 302 |
+
m3, m4 = st.columns(2)
|
| 303 |
m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
|
| 304 |
m2.metric("Primary Emotion", r_a['primary_tone'].title())
|
| 305 |
+
m3.metric("Framing Lens", r_a['primary_theme'].title())
|
| 306 |
+
m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
|
| 307 |
|
| 308 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 309 |
|
|
|
|
| 318 |
with res_col2:
|
| 319 |
r_b = st.session_state.results_b
|
| 320 |
st.markdown("#### Source B Breakdown")
|
| 321 |
+
m1, m2 = st.columns(2)
|
| 322 |
+
m3, m4 = st.columns(2)
|
| 323 |
m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
|
| 324 |
m2.metric("Primary Emotion", r_b['primary_tone'].title())
|
| 325 |
+
m3.metric("Framing Lens", r_b['primary_theme'].title())
|
| 326 |
+
m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
|
| 327 |
|
| 328 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 329 |
|