Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,24 +6,14 @@ import streamlit as st
|
|
| 6 |
from keybert import KeyBERT
|
| 7 |
from textblob import TextBlob
|
| 8 |
from transformers import pipeline
|
|
|
|
| 9 |
|
| 10 |
# constants
|
| 11 |
MAX_TEXT_LENGTH = 1500
|
| 12 |
|
| 13 |
-
|
| 14 |
-
CANDIDATE_TONES = [
|
| 15 |
-
"objective", "alarmist", "defensive", "optimistic", "critical",
|
| 16 |
-
"sensationalist", "somber", "sympathetic", "hostile", "satirical",
|
| 17 |
-
"urgent", "dismissive", "patriotic", "cynical", "apologetic"
|
| 18 |
-
]
|
| 19 |
|
| 20 |
-
|
| 21 |
-
Global leaders achieved a historic breakthrough today, signing a comprehensive climate accord aimed at drastically slashing carbon emissions by 2030. Environmental advocates are celebrating the mandate, which forces heavy-polluting industries to finally take accountability for their ecological damage. While corporations warn of transition costs, scientists emphasize that failing to act now would result in catastrophic, irreversible damage to our planet's fragile ecosystems.
|
| 22 |
-
"""
|
| 23 |
-
|
| 24 |
-
ARTICLE_B = """
|
| 25 |
-
A sweeping new climate agreement signed today is drawing fierce criticism from industry leaders, who warn the aggressive emission targets will cripple economic growth. The heavy-handed regulations impose massive compliance costs on the manufacturing and energy sectors, inevitably leading to significant job losses and higher prices for consumers. Critics argue the rushed accord prioritizes bureaucratic posturing over practical, market-driven solutions to environmental concerns.
|
| 26 |
-
"""
|
| 27 |
|
| 28 |
|
| 29 |
@st.cache_resource
|
|
@@ -32,61 +22,81 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 32 |
Loads NLP models into memory and caches them.
|
| 33 |
"""
|
| 34 |
sentiment_analyzer = pipeline(
|
| 35 |
-
"
|
| 36 |
-
model="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
)
|
| 38 |
-
|
| 39 |
-
"
|
| 40 |
-
model="
|
|
|
|
| 41 |
)
|
| 42 |
keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
|
| 43 |
|
| 44 |
return {
|
| 45 |
"sentiment": sentiment_analyzer,
|
| 46 |
-
"
|
|
|
|
| 47 |
"keyword": keyword_extractor,
|
| 48 |
}
|
| 49 |
|
| 50 |
|
| 51 |
def analyze_article(text: str) -> dict:
|
| 52 |
-
"""Analyzes framing using semantic keyphrases, sentiment,
|
| 53 |
models = _load_nlp_models()
|
| 54 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 55 |
|
| 56 |
# Sentiment Analysis
|
| 57 |
sentiment_result = models["sentiment"](safe_text)[0]
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# Semantic Keyword Extraction
|
| 71 |
keyword_results = models["keyword"].extract_keywords(
|
| 72 |
-
safe_text,
|
| 73 |
-
keyphrase_ngram_range=(1, 3),
|
| 74 |
-
stop_words="english",
|
| 75 |
use_mmr=True,
|
| 76 |
diversity=0.6,
|
| 77 |
top_n=5
|
| 78 |
)
|
| 79 |
extracted_keywords = [kw[0] for kw in keyword_results]
|
| 80 |
|
| 81 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
|
|
|
| 83 |
|
| 84 |
return {
|
| 85 |
"sentiment_score": sentiment_score,
|
| 86 |
"subjectivity_score": subjectivity_score,
|
| 87 |
-
"
|
|
|
|
| 88 |
"tone_scores": tone_scores,
|
| 89 |
"keywords": extracted_keywords,
|
|
|
|
| 90 |
}
|
| 91 |
|
| 92 |
|
|
@@ -102,9 +112,9 @@ def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
|
| 102 |
"axis": {"range": [-1, 1], "tickwidth": 1},
|
| 103 |
"bar": {"color": "darkblue"},
|
| 104 |
"steps": [
|
| 105 |
-
{"range": [-1, -0.2], "color": "#ffb3b3"},
|
| 106 |
-
{"range": [-0.2, 0.2], "color": "#f2f2f2"},
|
| 107 |
-
{"range": [0.2, 1], "color": "#b3ffb3"},
|
| 108 |
],
|
| 109 |
},
|
| 110 |
)
|
|
@@ -113,28 +123,32 @@ def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
|
| 113 |
return fig
|
| 114 |
|
| 115 |
|
| 116 |
-
def
|
| 117 |
-
"""Generates
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
fig = go.Figure(
|
| 122 |
-
go.Bar(
|
| 123 |
-
x=values,
|
| 124 |
-
y=labels,
|
| 125 |
-
orientation="h",
|
| 126 |
-
marker_color="#4f46e5",
|
| 127 |
-
marker_line_color="white",
|
| 128 |
-
marker_line_width=1.5
|
| 129 |
-
)
|
| 130 |
-
)
|
| 131 |
fig.update_layout(
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
plot_bgcolor="rgba(0,0,0,0)",
|
| 138 |
)
|
| 139 |
return fig
|
| 140 |
|
|
@@ -164,56 +178,67 @@ st.title("FrameVis")
|
|
| 164 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 165 |
st.divider()
|
| 166 |
|
| 167 |
-
with st.spinner("Starting NLP
|
| 168 |
_load_nlp_models()
|
| 169 |
|
| 170 |
col1, col2 = st.columns(2)
|
| 171 |
|
| 172 |
with col1:
|
| 173 |
user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
| 174 |
-
if st.button("Analyze A", use_container_width=True):
|
| 175 |
-
with st.spinner("Processing Source A."):
|
| 176 |
-
st.session_state.results_a = analyze_article(user_article_a)
|
| 177 |
|
| 178 |
with col2:
|
| 179 |
user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
| 180 |
-
if st.button("Analyze B", use_container_width=True):
|
| 181 |
-
with st.spinner("Processing Source B."):
|
| 182 |
-
st.session_state.results_b = analyze_article(user_article_b)
|
| 183 |
|
| 184 |
-
st.write("")
|
| 185 |
|
| 186 |
-
#
|
| 187 |
-
if st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
st.markdown("### Framing Analytics & Comparison")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
res_col1, res_col2 = st.columns(2)
|
| 190 |
|
| 191 |
# Render Column A
|
| 192 |
with res_col1:
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
# Render Column B
|
| 207 |
with res_col2:
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from keybert import KeyBERT
|
| 7 |
from textblob import TextBlob
|
| 8 |
from transformers import pipeline
|
| 9 |
+
import textstat
|
| 10 |
|
| 11 |
# constants
|
| 12 |
MAX_TEXT_LENGTH = 1500
|
| 13 |
|
| 14 |
+
ARTICLE_A = """Global leaders achieved a historic breakthrough today, signing a comprehensive climate accord aimed at drastically slashing carbon emissions by 2030. Environmental advocates are celebrating the mandate, which forces heavy-polluting industries to finally take accountability for their ecological damage. While corporations warn of transition costs, scientists emphasize that failing to act now would result in catastrophic, irreversible damage to our planet's fragile ecosystems."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
ARTICLE_B = """A sweeping new climate agreement signed today is drawing fierce criticism from industry leaders, who warn the aggressive emission targets will cripple economic growth. The heavy-handed regulations impose massive compliance costs on the manufacturing and energy sectors, inevitably leading to significant job losses and higher prices for consumers. Critics argue the rushed accord prioritizes bureaucratic posturing over practical, market-driven solutions to environmental concerns."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
@st.cache_resource
|
|
|
|
| 22 |
Loads NLP models into memory and caches them.
|
| 23 |
"""
|
| 24 |
sentiment_analyzer = pipeline(
|
| 25 |
+
"text-classification",
|
| 26 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 27 |
+
)
|
| 28 |
+
emotion_classifier = pipeline(
|
| 29 |
+
"text-classification",
|
| 30 |
+
model="SamLowe/roberta-base-go_emotions",
|
| 31 |
+
top_k=5
|
| 32 |
)
|
| 33 |
+
ner_extractor = pipeline(
|
| 34 |
+
"ner",
|
| 35 |
+
model="dslim/bert-base-NER",
|
| 36 |
+
aggregation_strategy="simple"
|
| 37 |
)
|
| 38 |
keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
|
| 39 |
|
| 40 |
return {
|
| 41 |
"sentiment": sentiment_analyzer,
|
| 42 |
+
"emotion": emotion_classifier,
|
| 43 |
+
"ner": ner_extractor,
|
| 44 |
"keyword": keyword_extractor,
|
| 45 |
}
|
| 46 |
|
| 47 |
|
| 48 |
def analyze_article(text: str) -> dict:
|
| 49 |
+
"""Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
|
| 50 |
models = _load_nlp_models()
|
| 51 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 52 |
|
| 53 |
# Sentiment Analysis
|
| 54 |
sentiment_result = models["sentiment"](safe_text)[0]
|
| 55 |
+
label = sentiment_result["label"].lower()
|
| 56 |
+
score = sentiment_result["score"]
|
| 57 |
+
if label == "negative":
|
| 58 |
+
sentiment_score = -score
|
| 59 |
+
elif label == "positive":
|
| 60 |
+
sentiment_score = score
|
| 61 |
+
else:
|
| 62 |
+
sentiment_score = 0.0 # Neutral
|
| 63 |
+
|
| 64 |
+
# Emotion Classification
|
| 65 |
+
emotion_results = models["emotion"](safe_text)[0]
|
| 66 |
+
if isinstance(emotion_results, list):
|
| 67 |
+
tone_scores = {res["label"]: res["score"] for res in emotion_results}
|
| 68 |
+
primary_tone = emotion_results[0]["label"]
|
| 69 |
+
else:
|
| 70 |
+
tone_scores = {"neutral": 1.0}
|
| 71 |
+
primary_tone = "neutral"
|
| 72 |
|
| 73 |
# Semantic Keyword Extraction
|
| 74 |
keyword_results = models["keyword"].extract_keywords(
|
| 75 |
+
safe_text,
|
| 76 |
+
keyphrase_ngram_range=(1, 3),
|
| 77 |
+
stop_words="english",
|
| 78 |
use_mmr=True,
|
| 79 |
diversity=0.6,
|
| 80 |
top_n=5
|
| 81 |
)
|
| 82 |
extracted_keywords = [kw[0] for kw in keyword_results]
|
| 83 |
|
| 84 |
+
# Named Entity Recognition
|
| 85 |
+
ner_results = models["ner"](safe_text)
|
| 86 |
+
extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
|
| 87 |
+
|
| 88 |
+
# Subjectivity & Readability Analysis
|
| 89 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 90 |
+
reading_ease = textstat.flesch_reading_ease(safe_text)
|
| 91 |
|
| 92 |
return {
|
| 93 |
"sentiment_score": sentiment_score,
|
| 94 |
"subjectivity_score": subjectivity_score,
|
| 95 |
+
"reading_ease": reading_ease,
|
| 96 |
+
"primary_tone": primary_tone,
|
| 97 |
"tone_scores": tone_scores,
|
| 98 |
"keywords": extracted_keywords,
|
| 99 |
+
"entities": extracted_entities,
|
| 100 |
}
|
| 101 |
|
| 102 |
|
|
|
|
| 112 |
"axis": {"range": [-1, 1], "tickwidth": 1},
|
| 113 |
"bar": {"color": "darkblue"},
|
| 114 |
"steps": [
|
| 115 |
+
{"range": [-1, -0.2], "color": "#ffb3b3"},
|
| 116 |
+
{"range": [-0.2, 0.2], "color": "#f2f2f2"},
|
| 117 |
+
{"range": [0.2, 1], "color": "#b3ffb3"},
|
| 118 |
],
|
| 119 |
},
|
| 120 |
)
|
|
|
|
| 123 |
return fig
|
| 124 |
|
| 125 |
|
| 126 |
+
def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
|
| 127 |
+
"""Generates an overlapping radar chart to compare emotions."""
|
| 128 |
+
categories = list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys())))
|
| 129 |
+
|
| 130 |
+
val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
|
| 131 |
+
val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
|
| 132 |
+
|
| 133 |
+
# Close the radar loop
|
| 134 |
+
categories.append(categories[0])
|
| 135 |
+
val_a.append(val_a[0])
|
| 136 |
+
val_b.append(val_b[0])
|
| 137 |
+
|
| 138 |
+
fig = go.Figure()
|
| 139 |
+
fig.add_trace(go.Scatterpolar(
|
| 140 |
+
r=val_a, theta=categories, fill='toself', name='Source A', line_color='#4f46e5'
|
| 141 |
+
))
|
| 142 |
+
fig.add_trace(go.Scatterpolar(
|
| 143 |
+
r=val_b, theta=categories, fill='toself', name='Source B', line_color='#10b981'
|
| 144 |
+
))
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
fig.update_layout(
|
| 147 |
+
polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
|
| 148 |
+
showlegend=True,
|
| 149 |
+
title={"text": "Relative Emotion Profile", "font": {"size": 18}},
|
| 150 |
+
height=450,
|
| 151 |
+
margin=dict(l=40, r=40, t=60, b=40),
|
|
|
|
| 152 |
)
|
| 153 |
return fig
|
| 154 |
|
|
|
|
| 178 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 179 |
st.divider()
|
| 180 |
|
| 181 |
+
with st.spinner("Starting NLP models."):
|
| 182 |
_load_nlp_models()
|
| 183 |
|
| 184 |
col1, col2 = st.columns(2)
|
| 185 |
|
| 186 |
with col1:
|
| 187 |
user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
with col2:
|
| 190 |
user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
+
st.write("")
|
| 193 |
|
| 194 |
+
# Single unified execution button
|
| 195 |
+
if st.button("Analyze & Compare Sources", use_container_width=True, type="primary"):
|
| 196 |
+
with st.spinner("Analyzing framing semantics for both sources."):
|
| 197 |
+
st.session_state.results_a = analyze_article(user_article_a)
|
| 198 |
+
st.session_state.results_b = analyze_article(user_article_b)
|
| 199 |
+
|
| 200 |
+
# Analysis Display
|
| 201 |
+
if st.session_state.results_a and st.session_state.results_b:
|
| 202 |
+
st.divider()
|
| 203 |
st.markdown("### Framing Analytics & Comparison")
|
| 204 |
+
|
| 205 |
+
# Radar Chart spans the top
|
| 206 |
+
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 207 |
+
|
| 208 |
res_col1, res_col2 = st.columns(2)
|
| 209 |
|
| 210 |
# Render Column A
|
| 211 |
with res_col1:
|
| 212 |
+
r_a = st.session_state.results_a
|
| 213 |
+
st.markdown("#### Source A Breakdown")
|
| 214 |
+
m1, m2, m3 = st.columns(3)
|
| 215 |
+
m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
|
| 216 |
+
m2.metric("Primary Emotion", r_a['primary_tone'].title())
|
| 217 |
+
m3.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
|
| 218 |
+
|
| 219 |
+
st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 220 |
+
|
| 221 |
+
if r_a["entities"]:
|
| 222 |
+
st.markdown(f"**Extracted Entities:** `{', '.join(r_a['entities'])}`")
|
| 223 |
+
|
| 224 |
+
st.markdown("**Key Framing Language:**")
|
| 225 |
+
annotated_text = _highlight_keywords(user_article_a, r_a["keywords"])
|
| 226 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|
| 227 |
|
| 228 |
# Render Column B
|
| 229 |
with res_col2:
|
| 230 |
+
r_b = st.session_state.results_b
|
| 231 |
+
st.markdown("#### Source B Breakdown")
|
| 232 |
+
m1, m2, m3 = st.columns(3)
|
| 233 |
+
m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
|
| 234 |
+
m2.metric("Primary Emotion", r_b['primary_tone'].title())
|
| 235 |
+
m3.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
|
| 236 |
+
|
| 237 |
+
st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 238 |
+
|
| 239 |
+
if r_b["entities"]:
|
| 240 |
+
st.markdown(f"**Extracted Entities:** `{', '.join(r_b['entities'])}`")
|
| 241 |
+
|
| 242 |
+
st.markdown("**Key Framing Language:**")
|
| 243 |
+
annotated_text = _highlight_keywords(user_article_b, r_b["keywords"])
|
| 244 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|