Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,18 +1,36 @@
|
|
| 1 |
# imports
|
| 2 |
import re
|
| 3 |
-
import
|
|
|
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import streamlit as st
|
| 6 |
from textblob import TextBlob
|
| 7 |
-
from transformers import pipeline
|
| 8 |
import textstat
|
| 9 |
import trafilatura
|
| 10 |
import requests
|
| 11 |
from bs4 import BeautifulSoup
|
|
|
|
| 12 |
import nltk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# constants
|
| 15 |
-
MAX_TEXT_LENGTH =
|
| 16 |
|
| 17 |
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 18 |
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
|
@@ -20,193 +38,81 @@ ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves throu
|
|
| 20 |
URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
|
| 21 |
URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
|
| 22 |
|
| 23 |
-
|
| 24 |
-
def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
| 25 |
-
"""
|
| 26 |
-
Loads NLP models into memory and caches them.
|
| 27 |
-
"""
|
| 28 |
-
try:
|
| 29 |
-
nltk.data.find('tokenizers/punkt')
|
| 30 |
-
except LookupError:
|
| 31 |
-
nltk.download('punkt')
|
| 32 |
-
nltk.download('punkt_tab')
|
| 33 |
-
|
| 34 |
-
sentiment_analyzer = pipeline(
|
| 35 |
-
"text-classification",
|
| 36 |
-
model="ProsusAI/finbert",
|
| 37 |
-
)
|
| 38 |
-
emotion_classifier = pipeline(
|
| 39 |
-
"text-classification",
|
| 40 |
-
model="SamLowe/roberta-base-go_emotions",
|
| 41 |
-
top_k=5
|
| 42 |
-
)
|
| 43 |
-
nli_classifier = pipeline(
|
| 44 |
-
"text-classification",
|
| 45 |
-
model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
|
| 46 |
-
)
|
| 47 |
-
theme_classifier = pipeline(
|
| 48 |
-
"zero-shot-classification",
|
| 49 |
-
model="facebook/bart-large-mnli"
|
| 50 |
-
)
|
| 51 |
-
|
| 52 |
-
return {
|
| 53 |
-
"sentiment": sentiment_analyzer,
|
| 54 |
-
"emotion": emotion_classifier,
|
| 55 |
-
"nli": nli_classifier,
|
| 56 |
-
"theme": theme_classifier,
|
| 57 |
-
}
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def analyze_article(text: str) -> dict:
|
| 61 |
-
"""Analyzes framing using
|
| 62 |
-
models = _load_nlp_models()
|
| 63 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 64 |
-
|
| 65 |
-
# Sentiment Analysis
|
| 66 |
-
sentiment_result = models["sentiment"](safe_text, truncation=True)[0]
|
| 67 |
-
label = sentiment_result["label"].lower()
|
| 68 |
-
score = sentiment_result["score"]
|
| 69 |
-
if label == "negative":
|
| 70 |
-
sentiment_score = -score
|
| 71 |
-
elif label == "positive":
|
| 72 |
-
sentiment_score = score
|
| 73 |
-
else:
|
| 74 |
-
sentiment_score = 0.0 # Neutral
|
| 75 |
-
|
| 76 |
-
# Emotion Classification
|
| 77 |
-
emotion_results = models["emotion"](safe_text, truncation=True)
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
primary_tone = "neutral"
|
| 89 |
-
tone_scores = {"neutral": 1.0}
|
| 90 |
-
else:
|
| 91 |
-
tone_scores = {"neutral": 1.0}
|
| 92 |
-
primary_tone = "neutral"
|
| 93 |
-
|
| 94 |
-
# Thematic Framing
|
| 95 |
-
framing_dimensions = [
|
| 96 |
-
"economic consequences",
|
| 97 |
-
"moral and ethical fairness",
|
| 98 |
-
"legal and bureaucratic",
|
| 99 |
-
"public safety and health"
|
| 100 |
-
]
|
| 101 |
-
theme_result = models["theme"](safe_text, framing_dimensions)
|
| 102 |
-
primary_theme = theme_result["labels"][0]
|
| 103 |
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 106 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
| 107 |
|
| 108 |
-
reading_ease = max(0.0, min(100.0, raw_reading_ease))
|
| 109 |
-
|
| 110 |
return {
|
| 111 |
-
"sentiment_score": sentiment_score,
|
|
|
|
|
|
|
|
|
|
| 112 |
"subjectivity_score": subjectivity_score,
|
| 113 |
-
"reading_ease":
|
| 114 |
-
"primary_tone": primary_tone,
|
| 115 |
-
"primary_theme": primary_theme,
|
| 116 |
-
"tone_scores": tone_scores,
|
| 117 |
}
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
mode="gauge+number",
|
| 125 |
-
value=score,
|
| 126 |
-
domain={"x": [0, 1], "y": [0, 1]},
|
| 127 |
-
title={"text": title, "font": {"size": 16}},
|
| 128 |
-
gauge={
|
| 129 |
-
"axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
|
| 130 |
-
"bar": {"color": "#475569", "thickness": 0.2},
|
| 131 |
-
"bgcolor": "white",
|
| 132 |
-
"borderwidth": 0,
|
| 133 |
-
"steps": [
|
| 134 |
-
{"range": [-1, -0.1], "color": "#fee2e2"},
|
| 135 |
-
{"range": [-0.1, 0.1], "color": "#f1f5f9"},
|
| 136 |
-
{"range": [0.1, 1], "color": "#dcfce3"},
|
| 137 |
-
],
|
| 138 |
-
},
|
| 139 |
-
)
|
| 140 |
-
)
|
| 141 |
-
fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
|
| 142 |
-
return fig
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
|
| 146 |
-
"""Generates an overlapping radar chart to compare emotions."""
|
| 147 |
-
categories = sorted(list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys()))))
|
| 148 |
-
|
| 149 |
-
val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
|
| 150 |
-
val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
fig = go.Figure()
|
| 157 |
-
fig.add_trace(go.Scatterpolar(
|
| 158 |
-
r=val_a, theta=categories, fill='toself', name='Source A',
|
| 159 |
-
line=dict(color='#4f46e5', shape='spline', width=2),
|
| 160 |
-
fillcolor='rgba(79, 70, 229, 0.2)'
|
| 161 |
-
))
|
| 162 |
-
fig.add_trace(go.Scatterpolar(
|
| 163 |
-
r=val_b, theta=categories, fill='toself', name='Source B',
|
| 164 |
-
line=dict(color='#10b981', shape='spline', width=2),
|
| 165 |
-
fillcolor='rgba(16, 185, 129, 0.2)'
|
| 166 |
-
))
|
| 167 |
-
fig.update_layout(
|
| 168 |
-
polar=dict(
|
| 169 |
-
radialaxis=dict(visible=True, showticklabels=False, showline=False, gridcolor='rgba(0,0,0,0.1)'),
|
| 170 |
-
angularaxis=dict(gridcolor='rgba(0,0,0,0.1)', linecolor='rgba(0,0,0,0.1)')
|
| 171 |
-
),
|
| 172 |
-
showlegend=True,
|
| 173 |
-
legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
|
| 174 |
-
title={"text": "Relative Emotion Profile", "font": {"size": 18, "family": "sans-serif"}},
|
| 175 |
-
height=400,
|
| 176 |
-
margin=dict(l=40, r=40, t=60, b=40),
|
| 177 |
-
paper_bgcolor='rgba(0,0,0,0)', # Transparent
|
| 178 |
-
plot_bgcolor='rgba(0,0,0,0)'
|
| 179 |
-
)
|
| 180 |
-
return fig
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
def _highlight_framing_words(text: str) -> str:
|
| 184 |
-
"""Highlights subjective or emotional words in the text snippet."""
|
| 185 |
-
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 186 |
-
snippet = " ".join(raw_sentences[:3])
|
| 187 |
-
if not snippet:
|
| 188 |
-
return ""
|
| 189 |
-
|
| 190 |
-
blob = TextBlob(snippet)
|
| 191 |
-
target_words = set()
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
|
| 203 |
-
highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
|
| 204 |
-
|
| 205 |
-
return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
|
| 206 |
|
| 207 |
-
|
| 208 |
def fetch_article_text(url: str) -> str:
|
| 209 |
-
"""Scrapes article text."""
|
| 210 |
downloaded = trafilatura.fetch_url(url)
|
| 211 |
if downloaded:
|
| 212 |
text = trafilatura.extract(downloaded)
|
|
@@ -216,7 +122,6 @@ def fetch_article_text(url: str) -> str:
|
|
| 216 |
try:
|
| 217 |
headers = {
|
| 218 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
| 219 |
-
'Accept-Language': 'en-US,en;q=0.9',
|
| 220 |
}
|
| 221 |
response = requests.get(url, headers=headers, timeout=10)
|
| 222 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
@@ -229,171 +134,149 @@ def fetch_article_text(url: str) -> str:
|
|
| 229 |
|
| 230 |
if text and len(text) > 200:
|
| 231 |
return text.strip()
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
return f"Error: Could not fetch URL. Connection failed."
|
| 235 |
|
| 236 |
-
return "Error: Could not extract text. The site may be protected by
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
|
|
|
|
|
|
| 267 |
st.markdown("""
|
| 268 |
<style>
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
.block-container {
|
| 274 |
-
padding-top: 2rem;
|
| 275 |
-
padding-bottom: 2rem;
|
| 276 |
-
}
|
| 277 |
-
|
| 278 |
-
[data-testid="stMetric"] {
|
| 279 |
-
background-color: #f8fafc;
|
| 280 |
-
border: 1px solid #e2e8f0;
|
| 281 |
-
border-radius: 8px;
|
| 282 |
-
padding: 15px;
|
| 283 |
-
box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
|
| 284 |
-
}
|
| 285 |
-
|
| 286 |
-
[data-testid="stMetricValue"] > div {
|
| 287 |
-
white-space: normal !important;
|
| 288 |
-
word-wrap: break-word !important;
|
| 289 |
-
line-height: 1.2 !important;
|
| 290 |
-
font-size: 1.6rem !important;
|
| 291 |
-
}
|
| 292 |
</style>
|
| 293 |
""", unsafe_allow_html=True)
|
| 294 |
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
st.
|
| 298 |
-
if "results_b" not in st.session_state:
|
| 299 |
-
st.session_state.results_b = None
|
| 300 |
-
if "nli_result" not in st.session_state:
|
| 301 |
-
st.session_state.nli_result = None
|
| 302 |
|
| 303 |
st.title("FrameVis")
|
| 304 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 305 |
st.divider()
|
| 306 |
|
| 307 |
-
with st.spinner("Starting NLP models."):
|
| 308 |
-
_load_nlp_models()
|
| 309 |
-
|
| 310 |
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
|
| 311 |
|
| 312 |
col1, col2 = st.columns(2)
|
| 313 |
-
|
| 314 |
with col1:
|
| 315 |
-
if input_method == "Paste Text":
|
| 316 |
-
|
| 317 |
-
else:
|
| 318 |
url_a = st.text_input("Source A URL", value=URL_A)
|
| 319 |
-
user_article_a = fetch_article_text(url_a) if url_a else ""
|
| 320 |
-
|
| 321 |
with col2:
|
| 322 |
-
if input_method == "Paste Text":
|
| 323 |
-
|
| 324 |
-
else:
|
| 325 |
url_b = st.text_input("Source B URL", value=URL_B)
|
| 326 |
-
user_article_b = fetch_article_text(url_b) if url_b else ""
|
| 327 |
-
|
| 328 |
-
st.write("")
|
| 329 |
|
| 330 |
-
# Execution button
|
| 331 |
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
| 332 |
-
|
| 333 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
| 334 |
text_b_clean = user_article_b.strip() if user_article_b else ""
|
| 335 |
|
| 336 |
-
if not text_a_clean or not text_b_clean:
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
elif text_a_clean.startswith("Error:") or text_b_clean.startswith("Error:"):
|
| 340 |
-
st.error("One of the URLs could not be scraped. Please copy and paste the text directly.")
|
| 341 |
-
|
| 342 |
else:
|
| 343 |
-
with st.spinner("
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
-
|
| 349 |
-
if st.session_state.results_a and st.session_state.results_b:
|
| 350 |
st.divider()
|
| 351 |
st.markdown("### Framing Analytics & Comparison")
|
| 352 |
|
| 353 |
-
# Display Contradictions
|
| 354 |
nli_result = st.session_state.nli_result
|
| 355 |
if nli_result:
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
elif
|
| 359 |
-
|
| 360 |
-
else:
|
| 361 |
-
st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
|
| 362 |
|
| 363 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 364 |
|
| 365 |
res_col1, res_col2 = st.columns(2)
|
| 366 |
|
| 367 |
-
# Render Column A
|
| 368 |
with res_col1:
|
| 369 |
r_a = st.session_state.results_a
|
| 370 |
st.markdown("#### Source A Breakdown")
|
| 371 |
-
m1, m2 = st.columns(2)
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
|
| 377 |
|
| 378 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
|
| 379 |
-
|
| 380 |
st.markdown("**Key Framing Language:**")
|
| 381 |
-
|
| 382 |
-
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|
| 383 |
|
| 384 |
-
# Render Column B
|
| 385 |
with res_col2:
|
| 386 |
r_b = st.session_state.results_b
|
| 387 |
st.markdown("#### Source B Breakdown")
|
| 388 |
-
m1, m2 = st.columns(2)
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
|
| 394 |
|
| 395 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
|
| 396 |
-
|
| 397 |
st.markdown("**Key Framing Language:**")
|
| 398 |
-
|
| 399 |
-
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|
|
|
|
| 1 |
# imports
|
| 2 |
import re
|
| 3 |
+
import json
|
| 4 |
+
import concurrent.futures
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
import streamlit as st
|
| 7 |
from textblob import TextBlob
|
|
|
|
| 8 |
import textstat
|
| 9 |
import trafilatura
|
| 10 |
import requests
|
| 11 |
from bs4 import BeautifulSoup
|
| 12 |
+
from huggingface_hub import InferenceClient
|
| 13 |
import nltk
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
# --- INITIALIZATION ---
|
| 17 |
+
@st.cache_resource
|
| 18 |
+
def _initialize_app():
|
| 19 |
+
"""Downloads lightweight NLTK data needed for highlighting."""
|
| 20 |
+
try:
|
| 21 |
+
nltk.data.find('tokenizers/punkt')
|
| 22 |
+
except LookupError:
|
| 23 |
+
nltk.download('punkt')
|
| 24 |
+
nltk.download('punkt_tab')
|
| 25 |
+
|
| 26 |
+
_initialize_app()
|
| 27 |
+
|
| 28 |
+
# Initialize Hugging Face Client
|
| 29 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 30 |
+
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
|
| 31 |
|
| 32 |
# constants
|
| 33 |
+
MAX_TEXT_LENGTH = 2000 # ~400 words (safe token limit for fast API inference)
|
| 34 |
|
| 35 |
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 36 |
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
|
|
|
| 38 |
URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
|
| 39 |
URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
|
| 40 |
|
| 41 |
+
# --- CORE LOGIC ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
def _extract_json_from_llm(response_text: str) -> dict:
|
| 44 |
+
"""Robustly extracts JSON from an LLM response, ignoring markdown formatting."""
|
| 45 |
+
try:
|
| 46 |
+
match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
| 47 |
+
if match:
|
| 48 |
+
return json.loads(match.group(0))
|
| 49 |
+
return json.loads(response_text)
|
| 50 |
+
except json.JSONDecodeError:
|
| 51 |
+
return {
|
| 52 |
+
"sentiment_score": 0.0, "primary_tone": "neutral",
|
| 53 |
+
"primary_theme": "unclear", "tone_scores": {"neutral": 1.0}
|
| 54 |
+
}
|
| 55 |
|
| 56 |
def analyze_article(text: str) -> dict:
|
| 57 |
+
"""Analyzes framing using an LLM API and calculates local readability."""
|
|
|
|
| 58 |
safe_text = text[:MAX_TEXT_LENGTH]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
prompt = f"""
|
| 61 |
+
You are an expert media analyst. Analyze the following news snippet for framing, bias, and emotion.
|
| 62 |
+
Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting or explanations.
|
| 63 |
+
|
| 64 |
+
Keys to return:
|
| 65 |
+
"sentiment_score": A float between -1.0 (highly negative) and 1.0 (highly positive).
|
| 66 |
+
"primary_tone": The single dominant emotion (e.g., anger, fear, joy, sadness, surprise, neutral).
|
| 67 |
+
"primary_theme": Choose ONE from: ["economic consequences", "moral and ethical fairness", "legal and bureaucratic", "public safety and health"].
|
| 68 |
+
"tone_scores": A dictionary scoring the top 3 emotions present from 0.0 to 1.0 (e.g., {{"fear": 0.8, "anger": 0.5}}).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
Text to analyze:
|
| 71 |
+
"{safe_text}"
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
response = client.text_generation(prompt, max_new_tokens=250, temperature=0.1)
|
| 76 |
+
llm_data = _extract_json_from_llm(response)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
st.error(f"API Error: {e}")
|
| 79 |
+
llm_data = _extract_json_from_llm("") # fallback
|
| 80 |
+
|
| 81 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 82 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
| 83 |
|
|
|
|
|
|
|
| 84 |
return {
|
| 85 |
+
"sentiment_score": llm_data.get("sentiment_score", 0.0),
|
| 86 |
+
"primary_tone": llm_data.get("primary_tone", "neutral"),
|
| 87 |
+
"primary_theme": llm_data.get("primary_theme", "unclear"),
|
| 88 |
+
"tone_scores": llm_data.get("tone_scores", {"neutral": 1.0}),
|
| 89 |
"subjectivity_score": subjectivity_score,
|
| 90 |
+
"reading_ease": max(0.0, min(100.0, raw_reading_ease)),
|
|
|
|
|
|
|
|
|
|
| 91 |
}
|
| 92 |
|
| 93 |
+
def check_contradiction(text_a: str, text_b: str) -> dict:
|
| 94 |
+
"""Uses the LLM to evaluate the stance between arguments."""
|
| 95 |
+
prompt = f"""
|
| 96 |
+
You are a fact-checking analyst. Compare these two news excerpts.
|
| 97 |
+
Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
Keys to return:
|
| 100 |
+
"relationship": Choose ONE from: ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]. (Contradiction = disputing facts, Entailment = agreeing on premise).
|
| 101 |
+
"confidence": A float between 0.0 and 1.0 representing how confident you are.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
+
Text 1: "{text_a[:1000]}"
|
| 104 |
+
Text 2: "{text_b[:1000]}"
|
| 105 |
+
"""
|
| 106 |
+
try:
|
| 107 |
+
response = client.text_generation(prompt, max_new_tokens=100, temperature=0.1)
|
| 108 |
+
result = _extract_json_from_llm(response)
|
| 109 |
+
return {"relationship": result.get("relationship", "NEUTRAL"), "confidence": result.get("confidence", 0.0)}
|
| 110 |
+
except:
|
| 111 |
+
return {"relationship": "NEUTRAL", "confidence": 0.0}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
@st.cache_data(ttl=3600, show_spinner=False)
|
| 114 |
def fetch_article_text(url: str) -> str:
|
| 115 |
+
"""Scrapes article text with caching to prevent re-scraping."""
|
| 116 |
downloaded = trafilatura.fetch_url(url)
|
| 117 |
if downloaded:
|
| 118 |
text = trafilatura.extract(downloaded)
|
|
|
|
| 122 |
try:
|
| 123 |
headers = {
|
| 124 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
|
|
|
| 125 |
}
|
| 126 |
response = requests.get(url, headers=headers, timeout=10)
|
| 127 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
| 134 |
|
| 135 |
if text and len(text) > 200:
|
| 136 |
return text.strip()
|
| 137 |
+
except Exception:
|
| 138 |
+
return f"Error: Could not fetch URL."
|
|
|
|
| 139 |
|
| 140 |
+
return "Error: Could not extract text. The site may be protected by paywalls."
|
| 141 |
|
| 142 |
+
# --- UI COMPONENTS ---
|
| 143 |
+
def _highlight_framing_words(text: str) -> str:
|
| 144 |
+
"""Highlights specific subjective or emotional words in the text snippet."""
|
| 145 |
+
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 146 |
+
snippet = " ".join(raw_sentences[:3])
|
| 147 |
+
if not snippet: return ""
|
| 148 |
|
| 149 |
+
blob = TextBlob(snippet)
|
| 150 |
+
target_words = {str(w) for w in blob.words if len(w) > 2 and (TextBlob(w).sentiment.subjectivity > 0.5 or abs(TextBlob(w).sentiment.polarity) > 0.3)}
|
| 151 |
+
|
| 152 |
+
highlighted_snippet = snippet
|
| 153 |
+
for word in target_words:
|
| 154 |
+
pattern = r'\b(' + re.escape(word) + r')\b'
|
| 155 |
+
replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
|
| 156 |
+
highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
|
| 157 |
+
|
| 158 |
+
return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
|
| 159 |
+
|
| 160 |
+
def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
| 161 |
+
fig = go.Figure(go.Indicator(
|
| 162 |
+
mode="gauge+number", value=score, domain={"x": [0, 1], "y": [0, 1]},
|
| 163 |
+
title={"text": title, "font": {"size": 16}},
|
| 164 |
+
gauge={
|
| 165 |
+
"axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
|
| 166 |
+
"bar": {"color": "#475569", "thickness": 0.2},
|
| 167 |
+
"bgcolor": "white", "borderwidth": 0,
|
| 168 |
+
"steps": [{"range": [-1, -0.1], "color": "#fee2e2"}, {"range": [-0.1, 0.1], "color": "#f1f5f9"}, {"range": [0.1, 1], "color": "#dcfce3"}],
|
| 169 |
+
}
|
| 170 |
+
))
|
| 171 |
+
fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
|
| 172 |
+
return fig
|
| 173 |
+
|
| 174 |
+
def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
|
| 175 |
+
categories = sorted(list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys()))))
|
| 176 |
+
val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
|
| 177 |
+
val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
|
| 178 |
|
| 179 |
+
if categories:
|
| 180 |
+
categories.append(categories[0])
|
| 181 |
+
val_a.append(val_a[0])
|
| 182 |
+
val_b.append(val_b[0])
|
| 183 |
|
| 184 |
+
fig = go.Figure()
|
| 185 |
+
fig.add_trace(go.Scatterpolar(r=val_a, theta=categories, fill='toself', name='Source A', line=dict(color='#4f46e5', width=2), fillcolor='rgba(79, 70, 229, 0.2)'))
|
| 186 |
+
fig.add_trace(go.Scatterpolar(r=val_b, theta=categories, fill='toself', name='Source B', line=dict(color='#10b981', width=2), fillcolor='rgba(16, 185, 129, 0.2)'))
|
| 187 |
+
fig.update_layout(
|
| 188 |
+
polar=dict(radialaxis=dict(visible=True, showticklabels=False, showline=False), angularaxis=dict(gridcolor='rgba(0,0,0,0.1)')),
|
| 189 |
+
showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
|
| 190 |
+
title={"text": "Relative Emotion Profile", "font": {"size": 18}}, height=400, margin=dict(l=40, r=40, t=60, b=40), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)'
|
| 191 |
+
)
|
| 192 |
+
return fig
|
| 193 |
|
| 194 |
+
# --- STREAMLIT UI ---
|
| 195 |
+
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
| 196 |
st.markdown("""
|
| 197 |
<style>
|
| 198 |
+
.block-container { padding-top: 2rem; padding-bottom: 2rem; }
|
| 199 |
+
[data-testid="stMetric"] { background-color: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; padding: 15px; box-shadow: 0 1px 2px 0 rgba(0,0,0,0.05); }
|
| 200 |
+
[data-testid="stMetricValue"] > div { white-space: normal !important; word-wrap: break-word !important; line-height: 1.2 !important; font-size: 1.6rem !important; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
</style>
|
| 202 |
""", unsafe_allow_html=True)
|
| 203 |
|
| 204 |
+
if not HF_TOKEN:
|
| 205 |
+
st.error("Hugging Face Token Missing.")
|
| 206 |
+
st.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
st.title("FrameVis")
|
| 209 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 210 |
st.divider()
|
| 211 |
|
|
|
|
|
|
|
|
|
|
| 212 |
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
|
| 213 |
|
| 214 |
col1, col2 = st.columns(2)
|
|
|
|
| 215 |
with col1:
|
| 216 |
+
if input_method == "Paste Text": user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
| 217 |
+
else:
|
|
|
|
| 218 |
url_a = st.text_input("Source A URL", value=URL_A)
|
| 219 |
+
with st.spinner("Scraping Source A..."): user_article_a = fetch_article_text(url_a) if url_a else ""
|
|
|
|
| 220 |
with col2:
|
| 221 |
+
if input_method == "Paste Text": user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
| 222 |
+
else:
|
|
|
|
| 223 |
url_b = st.text_input("Source B URL", value=URL_B)
|
| 224 |
+
with st.spinner("Scraping Source B..."): user_article_b = fetch_article_text(url_b) if url_b else ""
|
|
|
|
|
|
|
| 225 |
|
|
|
|
| 226 |
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
|
|
|
| 227 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
| 228 |
text_b_clean = user_article_b.strip() if user_article_b else ""
|
| 229 |
|
| 230 |
+
if not text_a_clean or not text_b_clean: st.warning("Please provide text/URLs for both sources.")
|
| 231 |
+
elif text_a_clean.startswith("Error") or text_b_clean.startswith("Error"): st.error("Scraping failed. Try pasting text directly.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
else:
|
| 233 |
+
with st.spinner("Running deep semantic analysis."):
|
| 234 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 235 |
+
future_a = executor.submit(analyze_article, text_a_clean)
|
| 236 |
+
future_b = executor.submit(analyze_article, text_b_clean)
|
| 237 |
+
future_nli = executor.submit(check_contradiction, text_a_clean, text_b_clean)
|
| 238 |
+
|
| 239 |
+
st.session_state.results_a = future_a.result()
|
| 240 |
+
st.session_state.results_b = future_b.result()
|
| 241 |
+
st.session_state.nli_result = future_nli.result()
|
| 242 |
|
| 243 |
+
if "results_a" in st.session_state and st.session_state.results_a:
|
|
|
|
| 244 |
st.divider()
|
| 245 |
st.markdown("### Framing Analytics & Comparison")
|
| 246 |
|
|
|
|
| 247 |
nli_result = st.session_state.nli_result
|
| 248 |
if nli_result:
|
| 249 |
+
rel = nli_result["relationship"].upper()
|
| 250 |
+
if rel == "CONTRADICTION": st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - Disputing facts.")
|
| 251 |
+
elif rel == "ENTAILMENT": st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - Agreeing on premises.")
|
| 252 |
+
else: st.info("**NEUTRAL RELATIONSHIP** - Discussing without direct contradiction.")
|
|
|
|
|
|
|
| 253 |
|
| 254 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 255 |
|
| 256 |
res_col1, res_col2 = st.columns(2)
|
| 257 |
|
|
|
|
| 258 |
with res_col1:
|
| 259 |
r_a = st.session_state.results_a
|
| 260 |
st.markdown("#### Source A Breakdown")
|
| 261 |
+
m1, m2 = st.columns(2); m3, m4 = st.columns(2)
|
| 262 |
+
m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}", help="0 is objective, 1 is highly opinionated.")
|
| 263 |
+
m2.metric("Primary Emotion", str(r_a['primary_tone']).title())
|
| 264 |
+
m3.metric("Framing Lens", str(r_a['primary_theme']).title())
|
| 265 |
+
m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}", help="0-30 is college graduate level, 60-70 is 8th grade.")
|
|
|
|
| 266 |
|
| 267 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
|
|
|
|
| 268 |
st.markdown("**Key Framing Language:**")
|
| 269 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{_highlight_framing_words(user_article_a)}</div>", unsafe_allow_html=True)
|
|
|
|
| 270 |
|
|
|
|
| 271 |
with res_col2:
|
| 272 |
r_b = st.session_state.results_b
|
| 273 |
st.markdown("#### Source B Breakdown")
|
| 274 |
+
m1, m2 = st.columns(2); m3, m4 = st.columns(2)
|
| 275 |
+
m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}", help="0 is objective, 1 is highly opinionated.")
|
| 276 |
+
m2.metric("Primary Emotion", str(r_b['primary_tone']).title())
|
| 277 |
+
m3.metric("Framing Lens", str(r_b['primary_theme']).title())
|
| 278 |
+
m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}", help="0-30 is college graduate level, 60-70 is 8th grade.")
|
|
|
|
| 279 |
|
| 280 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
|
|
|
|
| 281 |
st.markdown("**Key Framing Language:**")
|
| 282 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{_highlight_framing_words(user_article_b)}</div>", unsafe_allow_html=True)
|
|
|