Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,22 +1,35 @@
|
|
| 1 |
# imports
|
| 2 |
import re
|
| 3 |
-
import
|
| 4 |
-
import concurrent.futures
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
import streamlit as st
|
| 7 |
from textblob import TextBlob
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import textstat
|
| 9 |
import trafilatura
|
| 10 |
import requests
|
| 11 |
from bs4 import BeautifulSoup
|
| 12 |
-
from huggingface_hub import InferenceClient
|
| 13 |
import nltk
|
| 14 |
-
import os
|
| 15 |
|
| 16 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
@st.cache_resource
|
| 18 |
def _initialize_app():
|
| 19 |
-
"""Downloads NLTK data needed for highlighting."""
|
| 20 |
try:
|
| 21 |
nltk.data.find('tokenizers/punkt')
|
| 22 |
except LookupError:
|
|
@@ -25,21 +38,6 @@ def _initialize_app():
|
|
| 25 |
|
| 26 |
_initialize_app()
|
| 27 |
|
| 28 |
-
# Initialize Hugging Face Client
|
| 29 |
-
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 30 |
-
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
|
| 31 |
-
|
| 32 |
-
# constants
|
| 33 |
-
MAX_TEXT_LENGTH = 2000 # 400 words
|
| 34 |
-
|
| 35 |
-
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 36 |
-
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
| 37 |
-
|
| 38 |
-
URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
|
| 39 |
-
URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
|
| 40 |
-
|
| 41 |
-
# CORE LOGIC
|
| 42 |
-
|
| 43 |
def _extract_json_from_llm(response_text: str) -> dict:
|
| 44 |
"""Extracts JSON from an LLM response, ignoring markdown formatting."""
|
| 45 |
try:
|
|
@@ -75,7 +73,6 @@ def analyze_article(text: str) -> dict:
|
|
| 75 |
response = client.text_generation(prompt, max_new_tokens=250, temperature=0.1)
|
| 76 |
llm_data = _extract_json_from_llm(response)
|
| 77 |
except Exception as e:
|
| 78 |
-
st.error(f"API Error: {e}")
|
| 79 |
llm_data = _extract_json_from_llm("") # fallback
|
| 80 |
|
| 81 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
|
@@ -90,29 +87,97 @@ def analyze_article(text: str) -> dict:
|
|
| 90 |
"reading_ease": max(0.0, min(100.0, raw_reading_ease)),
|
| 91 |
}
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
"confidence": A float between 0.0 and 1.0 representing how confident you are.
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 114 |
def fetch_article_text(url: str) -> str:
|
| 115 |
-
"""Scrapes article text
|
| 116 |
downloaded = trafilatura.fetch_url(url)
|
| 117 |
if downloaded:
|
| 118 |
text = trafilatura.extract(downloaded)
|
|
@@ -122,6 +187,7 @@ def fetch_article_text(url: str) -> str:
|
|
| 122 |
try:
|
| 123 |
headers = {
|
| 124 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
|
|
|
| 125 |
}
|
| 126 |
response = requests.get(url, headers=headers, timeout=10)
|
| 127 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
@@ -134,76 +200,75 @@ def fetch_article_text(url: str) -> str:
|
|
| 134 |
|
| 135 |
if text and len(text) > 200:
|
| 136 |
return text.strip()
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
return "Error: Could not extract text. The site may be protected by paywalls."
|
| 141 |
-
|
| 142 |
-
# UI
|
| 143 |
-
def _highlight_framing_words(text: str) -> str:
|
| 144 |
-
"""Highlights subjective or emotional words in the text snippet."""
|
| 145 |
-
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 146 |
-
snippet = " ".join(raw_sentences[:3])
|
| 147 |
-
if not snippet: return ""
|
| 148 |
-
|
| 149 |
-
blob = TextBlob(snippet)
|
| 150 |
-
target_words = {str(w) for w in blob.words if len(w) > 2 and (TextBlob(w).sentiment.subjectivity > 0.5 or abs(TextBlob(w).sentiment.polarity) > 0.3)}
|
| 151 |
-
|
| 152 |
-
highlighted_snippet = snippet
|
| 153 |
-
for word in target_words:
|
| 154 |
-
pattern = r'\b(' + re.escape(word) + r')\b'
|
| 155 |
-
replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
|
| 156 |
-
highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
|
| 157 |
|
| 158 |
-
return
|
| 159 |
|
| 160 |
-
def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
| 161 |
-
fig = go.Figure(go.Indicator(
|
| 162 |
-
mode="gauge+number", value=score, domain={"x": [0, 1], "y": [0, 1]},
|
| 163 |
-
title={"text": title, "font": {"size": 16}},
|
| 164 |
-
gauge={
|
| 165 |
-
"axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
|
| 166 |
-
"bar": {"color": "#475569", "thickness": 0.2},
|
| 167 |
-
"bgcolor": "white", "borderwidth": 0,
|
| 168 |
-
"steps": [{"range": [-1, -0.1], "color": "#fee2e2"}, {"range": [-0.1, 0.1], "color": "#f1f5f9"}, {"range": [0.1, 1], "color": "#dcfce3"}],
|
| 169 |
-
}
|
| 170 |
-
))
|
| 171 |
-
fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
|
| 172 |
-
return fig
|
| 173 |
|
| 174 |
-
def
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
-
if categories:
|
| 180 |
-
categories.append(categories[0])
|
| 181 |
-
val_a.append(val_a[0])
|
| 182 |
-
val_b.append(val_b[0])
|
| 183 |
-
|
| 184 |
-
fig = go.Figure()
|
| 185 |
-
fig.add_trace(go.Scatterpolar(r=val_a, theta=categories, fill='toself', name='Source A', line=dict(color='#4f46e5', width=2), fillcolor='rgba(79, 70, 229, 0.2)'))
|
| 186 |
-
fig.add_trace(go.Scatterpolar(r=val_b, theta=categories, fill='toself', name='Source B', line=dict(color='#10b981', width=2), fillcolor='rgba(16, 185, 129, 0.2)'))
|
| 187 |
-
fig.update_layout(
|
| 188 |
-
polar=dict(radialaxis=dict(visible=True, showticklabels=False, showline=False), angularaxis=dict(gridcolor='rgba(0,0,0,0.1)')),
|
| 189 |
-
showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
|
| 190 |
-
title={"text": "Relative Emotion Profile", "font": {"size": 18}}, height=400, margin=dict(l=40, r=40, t=60, b=40), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)'
|
| 191 |
-
)
|
| 192 |
-
return fig
|
| 193 |
|
| 194 |
-
#
|
| 195 |
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
st.markdown("""
|
| 197 |
<style>
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
</style>
|
| 202 |
""", unsafe_allow_html=True)
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
st.title("FrameVis")
|
| 209 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
|
@@ -212,25 +277,37 @@ st.divider()
|
|
| 212 |
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
|
| 213 |
|
| 214 |
col1, col2 = st.columns(2)
|
|
|
|
| 215 |
with col1:
|
| 216 |
-
if input_method == "Paste Text":
|
| 217 |
-
|
|
|
|
| 218 |
url_a = st.text_input("Source A URL", value=URL_A)
|
| 219 |
-
|
|
|
|
| 220 |
with col2:
|
| 221 |
-
if input_method == "Paste Text":
|
| 222 |
-
|
|
|
|
| 223 |
url_b = st.text_input("Source B URL", value=URL_B)
|
| 224 |
-
|
| 225 |
|
|
|
|
|
|
|
|
|
|
| 226 |
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
|
|
|
| 227 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
| 228 |
text_b_clean = user_article_b.strip() if user_article_b else ""
|
| 229 |
|
| 230 |
-
if not text_a_clean or not text_b_clean:
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
else:
|
| 233 |
-
with st.spinner("
|
| 234 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 235 |
future_a = executor.submit(analyze_article, text_a_clean)
|
| 236 |
future_b = executor.submit(analyze_article, text_b_clean)
|
|
@@ -240,43 +317,55 @@ if st.button("Analyze and Compare Sources", use_container_width=True, type="prim
|
|
| 240 |
st.session_state.results_b = future_b.result()
|
| 241 |
st.session_state.nli_result = future_nli.result()
|
| 242 |
|
| 243 |
-
|
|
|
|
| 244 |
st.divider()
|
| 245 |
st.markdown("### Framing Analytics & Comparison")
|
| 246 |
|
|
|
|
| 247 |
nli_result = st.session_state.nli_result
|
| 248 |
if nli_result:
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
elif
|
| 252 |
-
|
|
|
|
|
|
|
| 253 |
|
| 254 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 255 |
|
| 256 |
res_col1, res_col2 = st.columns(2)
|
| 257 |
|
|
|
|
| 258 |
with res_col1:
|
| 259 |
r_a = st.session_state.results_a
|
| 260 |
st.markdown("#### Source A Breakdown")
|
| 261 |
-
m1, m2 = st.columns(2)
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
|
|
|
| 266 |
|
| 267 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
|
|
|
|
| 268 |
st.markdown("**Key Framing Language:**")
|
| 269 |
-
|
|
|
|
| 270 |
|
|
|
|
| 271 |
with res_col2:
|
| 272 |
r_b = st.session_state.results_b
|
| 273 |
st.markdown("#### Source B Breakdown")
|
| 274 |
-
m1, m2 = st.columns(2)
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
| 279 |
|
| 280 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
|
|
|
|
| 281 |
st.markdown("**Key Framing Language:**")
|
| 282 |
-
|
|
|
|
|
|
| 1 |
# imports
|
| 2 |
import re
|
| 3 |
+
import typing
|
|
|
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import streamlit as st
|
| 6 |
from textblob import TextBlob
|
| 7 |
+
import json
|
| 8 |
+
import os
|
| 9 |
+
import concurrent.futures
|
| 10 |
+
from huggingface_hub import InferenceClient
|
| 11 |
import textstat
|
| 12 |
import trafilatura
|
| 13 |
import requests
|
| 14 |
from bs4 import BeautifulSoup
|
|
|
|
| 15 |
import nltk
|
|
|
|
| 16 |
|
| 17 |
+
# constants
|
| 18 |
+
MAX_TEXT_LENGTH = 2000
|
| 19 |
+
|
| 20 |
+
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 21 |
+
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
| 22 |
+
|
| 23 |
+
URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
|
| 24 |
+
URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
|
| 25 |
+
|
| 26 |
+
# Initialize the Hugging Face Client
|
| 27 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 28 |
+
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
|
| 29 |
+
|
| 30 |
@st.cache_resource
|
| 31 |
def _initialize_app():
|
| 32 |
+
"""Downloads lightweight NLTK data needed for highlighting."""
|
| 33 |
try:
|
| 34 |
nltk.data.find('tokenizers/punkt')
|
| 35 |
except LookupError:
|
|
|
|
| 38 |
|
| 39 |
_initialize_app()
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def _extract_json_from_llm(response_text: str) -> dict:
|
| 42 |
"""Extracts JSON from an LLM response, ignoring markdown formatting."""
|
| 43 |
try:
|
|
|
|
| 73 |
response = client.text_generation(prompt, max_new_tokens=250, temperature=0.1)
|
| 74 |
llm_data = _extract_json_from_llm(response)
|
| 75 |
except Exception as e:
|
|
|
|
| 76 |
llm_data = _extract_json_from_llm("") # fallback
|
| 77 |
|
| 78 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
|
|
|
| 87 |
"reading_ease": max(0.0, min(100.0, raw_reading_ease)),
|
| 88 |
}
|
| 89 |
|
| 90 |
+
|
| 91 |
+
def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
| 92 |
+
"""Generates a Plotly gauge chart for sentiment visualization."""
|
| 93 |
+
fig = go.Figure(
|
| 94 |
+
go.Indicator(
|
| 95 |
+
mode="gauge+number",
|
| 96 |
+
value=score,
|
| 97 |
+
domain={"x": [0, 1], "y": [0, 1]},
|
| 98 |
+
title={"text": title, "font": {"size": 16}},
|
| 99 |
+
gauge={
|
| 100 |
+
"axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
|
| 101 |
+
"bar": {"color": "#475569", "thickness": 0.2},
|
| 102 |
+
"bgcolor": "white",
|
| 103 |
+
"borderwidth": 0,
|
| 104 |
+
"steps": [
|
| 105 |
+
{"range": [-1, -0.1], "color": "#fee2e2"},
|
| 106 |
+
{"range": [-0.1, 0.1], "color": "#f1f5f9"},
|
| 107 |
+
{"range": [0.1, 1], "color": "#dcfce3"},
|
| 108 |
+
],
|
| 109 |
+
},
|
| 110 |
+
)
|
| 111 |
+
)
|
| 112 |
+
fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
|
| 113 |
+
return fig
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
|
| 117 |
+
"""Generates an overlapping radar chart to compare emotions."""
|
| 118 |
+
categories = sorted(list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys()))))
|
| 119 |
|
| 120 |
+
val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
|
| 121 |
+
val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
|
|
|
|
| 122 |
|
| 123 |
+
categories.append(categories[0])
|
| 124 |
+
val_a.append(val_a[0])
|
| 125 |
+
val_b.append(val_b[0])
|
| 126 |
+
|
| 127 |
+
fig = go.Figure()
|
| 128 |
+
fig.add_trace(go.Scatterpolar(
|
| 129 |
+
r=val_a, theta=categories, fill='toself', name='Source A',
|
| 130 |
+
line=dict(color='#4f46e5', shape='spline', width=2),
|
| 131 |
+
fillcolor='rgba(79, 70, 229, 0.2)'
|
| 132 |
+
))
|
| 133 |
+
fig.add_trace(go.Scatterpolar(
|
| 134 |
+
r=val_b, theta=categories, fill='toself', name='Source B',
|
| 135 |
+
line=dict(color='#10b981', shape='spline', width=2),
|
| 136 |
+
fillcolor='rgba(16, 185, 129, 0.2)'
|
| 137 |
+
))
|
| 138 |
+
fig.update_layout(
|
| 139 |
+
polar=dict(
|
| 140 |
+
radialaxis=dict(visible=True, showticklabels=False, showline=False, gridcolor='rgba(0,0,0,0.1)'),
|
| 141 |
+
angularaxis=dict(gridcolor='rgba(0,0,0,0.1)', linecolor='rgba(0,0,0,0.1)')
|
| 142 |
+
),
|
| 143 |
+
showlegend=True,
|
| 144 |
+
legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
|
| 145 |
+
title={"text": "Relative Emotion Profile", "font": {"size": 18, "family": "sans-serif"}},
|
| 146 |
+
height=400,
|
| 147 |
+
margin=dict(l=40, r=40, t=60, b=40),
|
| 148 |
+
paper_bgcolor='rgba(0,0,0,0)', # Transparent
|
| 149 |
+
plot_bgcolor='rgba(0,0,0,0)'
|
| 150 |
+
)
|
| 151 |
+
return fig
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _highlight_framing_words(text: str) -> str:
|
| 155 |
+
"""Highlights subjective or emotional words in the text snippet."""
|
| 156 |
+
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 157 |
+
snippet = " ".join(raw_sentences[:3])
|
| 158 |
+
if not snippet:
|
| 159 |
+
return ""
|
| 160 |
+
|
| 161 |
+
blob = TextBlob(snippet)
|
| 162 |
+
target_words = set()
|
| 163 |
+
|
| 164 |
+
for word in blob.words:
|
| 165 |
+
w_sentiment = TextBlob(word).sentiment
|
| 166 |
+
if w_sentiment.subjectivity > 0.5 or abs(w_sentiment.polarity) > 0.3:
|
| 167 |
+
if len(word) > 2:
|
| 168 |
+
target_words.add(str(word))
|
| 169 |
+
|
| 170 |
+
highlighted_snippet = snippet
|
| 171 |
+
for word in target_words:
|
| 172 |
+
pattern = r'\b(' + re.escape(word) + r')\b'
|
| 173 |
+
replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
|
| 174 |
+
highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
|
| 175 |
+
|
| 176 |
+
return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
|
| 177 |
|
| 178 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 179 |
def fetch_article_text(url: str) -> str:
|
| 180 |
+
"""Scrapes article text."""
|
| 181 |
downloaded = trafilatura.fetch_url(url)
|
| 182 |
if downloaded:
|
| 183 |
text = trafilatura.extract(downloaded)
|
|
|
|
| 187 |
try:
|
| 188 |
headers = {
|
| 189 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
| 190 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
| 191 |
}
|
| 192 |
response = requests.get(url, headers=headers, timeout=10)
|
| 193 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
| 200 |
|
| 201 |
if text and len(text) > 200:
|
| 202 |
return text.strip()
|
| 203 |
+
|
| 204 |
+
except Exception as e:
|
| 205 |
+
return f"Error: Could not fetch URL. Connection failed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
+
return "Error: Could not extract text. The site may be protected by hard paywalls."
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
+
def check_contradiction(text_a: str, text_b: str) -> dict:
|
| 211 |
+
"""Uses the LLM to evaluate the stance between arguments."""
|
| 212 |
+
prompt = f"""
|
| 213 |
+
You are a fact-checking analyst. Compare these two news excerpts.
|
| 214 |
+
Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting.
|
| 215 |
+
|
| 216 |
+
Keys to return:
|
| 217 |
+
"relationship": Choose ONE from: ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]. (Contradiction = disputing facts, Entailment = agreeing on premise).
|
| 218 |
+
"confidence": A float between 0.0 and 1.0 representing how confident you are.
|
| 219 |
+
|
| 220 |
+
Text 1: "{text_a[:1000]}"
|
| 221 |
+
Text 2: "{text_b[:1000]}"
|
| 222 |
+
"""
|
| 223 |
+
try:
|
| 224 |
+
response = client.text_generation(prompt, max_new_tokens=100, temperature=0.1)
|
| 225 |
+
result = _extract_json_from_llm(response)
|
| 226 |
+
return {"relationship": result.get("relationship", "NEUTRAL"), "confidence": result.get("confidence", 0.0)}
|
| 227 |
+
except:
|
| 228 |
+
return {"relationship": "NEUTRAL", "confidence": 0.0}
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
+
# USER INTERFACE
|
| 232 |
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
| 233 |
+
|
| 234 |
+
if not HF_TOKEN:
|
| 235 |
+
st.warning("Hugging Face Token Missing.")
|
| 236 |
+
|
| 237 |
st.markdown("""
|
| 238 |
<style>
|
| 239 |
+
#MainMenu {visibility: hidden;}
|
| 240 |
+
footer {visibility: hidden;}
|
| 241 |
+
header {visibility: hidden;}
|
| 242 |
+
|
| 243 |
+
.block-container {
|
| 244 |
+
padding-top: 2rem;
|
| 245 |
+
padding-bottom: 2rem;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
[data-testid="stMetric"] {
|
| 249 |
+
background-color: #f8fafc;
|
| 250 |
+
border: 1px solid #e2e8f0;
|
| 251 |
+
border-radius: 8px;
|
| 252 |
+
padding: 15px;
|
| 253 |
+
box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
[data-testid="stMetricValue"] > div {
|
| 257 |
+
white-space: normal !important;
|
| 258 |
+
word-wrap: break-word !important;
|
| 259 |
+
line-height: 1.2 !important;
|
| 260 |
+
font-size: 1.6rem !important;
|
| 261 |
+
}
|
| 262 |
</style>
|
| 263 |
""", unsafe_allow_html=True)
|
| 264 |
|
| 265 |
+
# STATE MANAGEMENT
|
| 266 |
+
if "results_a" not in st.session_state:
|
| 267 |
+
st.session_state.results_a = None
|
| 268 |
+
if "results_b" not in st.session_state:
|
| 269 |
+
st.session_state.results_b = None
|
| 270 |
+
if "nli_result" not in st.session_state:
|
| 271 |
+
st.session_state.nli_result = None
|
| 272 |
|
| 273 |
st.title("FrameVis")
|
| 274 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
|
|
|
| 277 |
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
|
| 278 |
|
| 279 |
col1, col2 = st.columns(2)
|
| 280 |
+
|
| 281 |
with col1:
|
| 282 |
+
if input_method == "Paste Text":
|
| 283 |
+
user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
| 284 |
+
else:
|
| 285 |
url_a = st.text_input("Source A URL", value=URL_A)
|
| 286 |
+
user_article_a = fetch_article_text(url_a) if url_a else ""
|
| 287 |
+
|
| 288 |
with col2:
|
| 289 |
+
if input_method == "Paste Text":
|
| 290 |
+
user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
| 291 |
+
else:
|
| 292 |
url_b = st.text_input("Source B URL", value=URL_B)
|
| 293 |
+
user_article_b = fetch_article_text(url_b) if url_b else ""
|
| 294 |
|
| 295 |
+
st.write("")
|
| 296 |
+
|
| 297 |
+
# Execution button
|
| 298 |
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
| 299 |
+
|
| 300 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
| 301 |
text_b_clean = user_article_b.strip() if user_article_b else ""
|
| 302 |
|
| 303 |
+
if not text_a_clean or not text_b_clean:
|
| 304 |
+
st.warning("Please provide text or a valid URL for both Source A and Source B before analyzing.")
|
| 305 |
+
|
| 306 |
+
elif text_a_clean.startswith("Error:") or text_b_clean.startswith("Error:"):
|
| 307 |
+
st.error("One of the URLs could not be scraped. Please copy and paste the text directly.")
|
| 308 |
+
|
| 309 |
else:
|
| 310 |
+
with st.spinner("Analyzing framing semantics for both sources."):
|
| 311 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 312 |
future_a = executor.submit(analyze_article, text_a_clean)
|
| 313 |
future_b = executor.submit(analyze_article, text_b_clean)
|
|
|
|
| 317 |
st.session_state.results_b = future_b.result()
|
| 318 |
st.session_state.nli_result = future_nli.result()
|
| 319 |
|
| 320 |
+
# Analysis Display
|
| 321 |
+
if st.session_state.results_a and st.session_state.results_b:
|
| 322 |
st.divider()
|
| 323 |
st.markdown("### Framing Analytics & Comparison")
|
| 324 |
|
| 325 |
+
# Display Contradictions
|
| 326 |
nli_result = st.session_state.nli_result
|
| 327 |
if nli_result:
|
| 328 |
+
if nli_result["relationship"].upper() == "CONTRADICTION":
|
| 329 |
+
st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are disputing each other's facts.")
|
| 330 |
+
elif nli_result["relationship"].upper() == "ENTAILMENT":
|
| 331 |
+
st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
|
| 332 |
+
else:
|
| 333 |
+
st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
|
| 334 |
|
| 335 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 336 |
|
| 337 |
res_col1, res_col2 = st.columns(2)
|
| 338 |
|
| 339 |
+
# Render Column A
|
| 340 |
with res_col1:
|
| 341 |
r_a = st.session_state.results_a
|
| 342 |
st.markdown("#### Source A Breakdown")
|
| 343 |
+
m1, m2 = st.columns(2)
|
| 344 |
+
m3, m4 = st.columns(2)
|
| 345 |
+
m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
|
| 346 |
+
m2.metric("Primary Emotion", r_a['primary_tone'].title())
|
| 347 |
+
m3.metric("Framing Lens", r_a['primary_theme'].title())
|
| 348 |
+
m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
|
| 349 |
|
| 350 |
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
|
| 351 |
+
|
| 352 |
st.markdown("**Key Framing Language:**")
|
| 353 |
+
annotated_text = _highlight_framing_words(user_article_a)
|
| 354 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|
| 355 |
|
| 356 |
+
# Render Column B
|
| 357 |
with res_col2:
|
| 358 |
r_b = st.session_state.results_b
|
| 359 |
st.markdown("#### Source B Breakdown")
|
| 360 |
+
m1, m2 = st.columns(2)
|
| 361 |
+
m3, m4 = st.columns(2)
|
| 362 |
+
m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
|
| 363 |
+
m2.metric("Primary Emotion", r_b['primary_tone'].title())
|
| 364 |
+
m3.metric("Framing Lens", r_b['primary_theme'].title())
|
| 365 |
+
m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
|
| 366 |
|
| 367 |
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
|
| 368 |
+
|
| 369 |
st.markdown("**Key Framing Language:**")
|
| 370 |
+
annotated_text = _highlight_framing_words(user_article_b)
|
| 371 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|