Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
|
|
| 1 |
import re
|
| 2 |
import typing
|
| 3 |
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import streamlit as st
|
|
|
|
|
|
|
| 6 |
from transformers import pipeline
|
| 7 |
-
import yake
|
| 8 |
|
|
|
|
| 9 |
MAX_TEXT_LENGTH = 1500
|
| 10 |
CANDIDATE_TONES = ["alarmist", "objective", "defensive", "optimistic", "critical"]
|
| 11 |
|
|
@@ -20,6 +23,16 @@ A sweeping new climate agreement signed today is drawing fierce criticism from i
|
|
| 20 |
|
| 21 |
@st.cache_resource
|
| 22 |
def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
sentiment_analyzer = pipeline(
|
| 24 |
"sentiment-analysis",
|
| 25 |
model="distilbert-base-uncased-finetuned-sst-2-english",
|
|
@@ -28,9 +41,8 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 28 |
"zero-shot-classification",
|
| 29 |
model="typeform/distilbert-base-uncased-mnli",
|
| 30 |
)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
)
|
| 34 |
|
| 35 |
return {
|
| 36 |
"sentiment": sentiment_analyzer,
|
|
@@ -40,27 +52,44 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 40 |
|
| 41 |
|
| 42 |
def analyze_article(text: str) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
models = _load_nlp_models()
|
| 44 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 45 |
|
|
|
|
| 46 |
sentiment_result = models["sentiment"](safe_text)[0]
|
| 47 |
-
tone_result = models["tone"](safe_text, CANDIDATE_TONES)
|
| 48 |
-
keyword_results = models["keyword"].extract_keywords(safe_text)
|
| 49 |
-
|
| 50 |
is_positive = sentiment_result["label"] == "POSITIVE"
|
| 51 |
sentiment_score = (
|
| 52 |
sentiment_result["score"] if is_positive else -sentiment_result["score"]
|
| 53 |
)
|
| 54 |
|
|
|
|
|
|
|
| 55 |
tone_scores = {
|
| 56 |
label: score
|
| 57 |
for label, score in zip(tone_result["labels"], tone_result["scores"])
|
| 58 |
}
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
extracted_keywords = [kw[0] for kw in keyword_results]
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
return {
|
| 63 |
"sentiment_score": sentiment_score,
|
|
|
|
| 64 |
"primary_tone": tone_result["labels"][0],
|
| 65 |
"tone_scores": tone_scores,
|
| 66 |
"keywords": extracted_keywords,
|
|
@@ -68,118 +97,136 @@ def analyze_article(text: str) -> dict:
|
|
| 68 |
|
| 69 |
|
| 70 |
def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
|
|
|
|
|
|
|
|
|
| 71 |
fig = go.Figure(
|
| 72 |
go.Indicator(
|
| 73 |
mode="gauge+number",
|
| 74 |
value=score,
|
| 75 |
domain={"x": [0, 1], "y": [0, 1]},
|
| 76 |
-
title={"text": title, "font": {"size":
|
| 77 |
gauge={
|
| 78 |
"axis": {"range": [-1, 1], "tickwidth": 1},
|
| 79 |
-
"bar": {"color": "
|
| 80 |
"steps": [
|
| 81 |
-
{"range": [-1, -0.
|
| 82 |
-
{"range": [-0.
|
| 83 |
-
{"range": [0.
|
| 84 |
],
|
| 85 |
},
|
| 86 |
)
|
| 87 |
)
|
| 88 |
-
fig.update_layout(height=
|
| 89 |
return fig
|
| 90 |
|
| 91 |
|
| 92 |
def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
|
|
|
|
| 93 |
labels = list(tone_scores.keys())
|
| 94 |
values = list(tone_scores.values())
|
| 95 |
|
| 96 |
-
fig = go.Figure(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
fig.update_layout(
|
| 98 |
-
title="Emotional Tone
|
| 99 |
-
xaxis_title="Confidence",
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
margin=dict(l=20, r=20, t=40, b=20),
|
| 103 |
yaxis={"categoryorder": "total ascending"},
|
|
|
|
| 104 |
)
|
| 105 |
return fig
|
| 106 |
|
| 107 |
|
| 108 |
def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
|
|
|
|
| 109 |
highlighted_text = text
|
| 110 |
for kw in keywords:
|
| 111 |
pattern = re.compile(rf"\b({re.escape(kw)})\b", re.IGNORECASE)
|
| 112 |
highlighted_text = pattern.sub(
|
| 113 |
-
r"<span style='background-color: #
|
| 114 |
highlighted_text,
|
| 115 |
)
|
| 116 |
return highlighted_text
|
| 117 |
|
| 118 |
|
| 119 |
-
st.set_page_config(page_title="FrameVis
|
| 120 |
-
st.title("FrameVis: Media Framing Analyzer")
|
| 121 |
-
st.markdown("Compare how different news sources frame the same event using NLP.")
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
-
st.
|
| 127 |
-
|
| 128 |
|
|
|
|
| 129 |
col1, col2 = st.columns(2)
|
| 130 |
|
| 131 |
with col1:
|
| 132 |
-
st.subheader("Source A")
|
| 133 |
user_article_a = st.text_area(
|
| 134 |
-
"
|
|
|
|
|
|
|
|
|
|
| 135 |
)
|
| 136 |
-
should_analyze_a = st.button("
|
| 137 |
|
| 138 |
with col2:
|
| 139 |
-
st.subheader("Source B")
|
| 140 |
user_article_b = st.text_area(
|
| 141 |
-
"
|
|
|
|
|
|
|
|
|
|
| 142 |
)
|
| 143 |
-
should_analyze_b = st.button("
|
| 144 |
|
| 145 |
-
st.
|
| 146 |
|
|
|
|
| 147 |
if should_analyze_a or should_analyze_b:
|
| 148 |
-
st.markdown("###
|
| 149 |
res_col1, res_col2 = st.columns(2)
|
| 150 |
|
| 151 |
if should_analyze_a and user_article_a:
|
| 152 |
-
with st.spinner("
|
| 153 |
results_a = analyze_article(user_article_a)
|
| 154 |
with res_col1:
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
)
|
| 167 |
-
st.markdown(f"
|
| 168 |
|
| 169 |
if should_analyze_b and user_article_b:
|
| 170 |
-
with st.spinner("
|
| 171 |
results_b = analyze_article(user_article_b)
|
| 172 |
with res_col2:
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
)
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
)
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
)
|
| 185 |
-
st.markdown(f"
|
|
|
|
| 1 |
+
# imports
|
| 2 |
import re
|
| 3 |
import typing
|
| 4 |
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
import streamlit as st
|
| 7 |
+
from keybert import KeyBERT
|
| 8 |
+
from textblob import TextBlob
|
| 9 |
from transformers import pipeline
|
|
|
|
| 10 |
|
| 11 |
+
# constants
|
| 12 |
MAX_TEXT_LENGTH = 1500
|
| 13 |
CANDIDATE_TONES = ["alarmist", "objective", "defensive", "optimistic", "critical"]
|
| 14 |
|
|
|
|
| 23 |
|
| 24 |
@st.cache_resource
|
| 25 |
def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
| 26 |
+
"""
|
| 27 |
+
Loads NLP model into memory and caches it.
|
| 28 |
+
|
| 29 |
+
Upgraded to include KeyBERT for semantic keyword extraction, which hopefully
|
| 30 |
+
outperforms statistical models on short news text.
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
A dictionary containing the initialized Hugging Face pipelines
|
| 34 |
+
and the KeyBERT model.
|
| 35 |
+
"""
|
| 36 |
sentiment_analyzer = pipeline(
|
| 37 |
"sentiment-analysis",
|
| 38 |
model="distilbert-base-uncased-finetuned-sst-2-english",
|
|
|
|
| 41 |
"zero-shot-classification",
|
| 42 |
model="typeform/distilbert-base-uncased-mnli",
|
| 43 |
)
|
| 44 |
+
# KeyBERT uses a tiny, fast transformer to find contextual keywords
|
| 45 |
+
keyword_extractor = KeyBERT(model="all-MiniLM-L6-v2")
|
|
|
|
| 46 |
|
| 47 |
return {
|
| 48 |
"sentiment": sentiment_analyzer,
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
def analyze_article(text: str) -> dict:
|
| 55 |
+
"""
|
| 56 |
+
Analyzes framing using semantic keyphrases, sentiment, tone, and subjectivity.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
text: The article text to analyze.
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
A dictionary containing all calculated framing metrics.
|
| 63 |
+
"""
|
| 64 |
models = _load_nlp_models()
|
| 65 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 66 |
|
| 67 |
+
# Sentiment Analysis
|
| 68 |
sentiment_result = models["sentiment"](safe_text)[0]
|
|
|
|
|
|
|
|
|
|
| 69 |
is_positive = sentiment_result["label"] == "POSITIVE"
|
| 70 |
sentiment_score = (
|
| 71 |
sentiment_result["score"] if is_positive else -sentiment_result["score"]
|
| 72 |
)
|
| 73 |
|
| 74 |
+
# Tone Classification
|
| 75 |
+
tone_result = models["tone"](safe_text, CANDIDATE_TONES)
|
| 76 |
tone_scores = {
|
| 77 |
label: score
|
| 78 |
for label, score in zip(tone_result["labels"], tone_result["scores"])
|
| 79 |
}
|
| 80 |
|
| 81 |
+
# Semantic Keyword Extraction
|
| 82 |
+
keyword_results = models["keyword"].extract_keywords(
|
| 83 |
+
safe_text, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=4
|
| 84 |
+
)
|
| 85 |
extracted_keywords = [kw[0] for kw in keyword_results]
|
| 86 |
|
| 87 |
+
# Subjectivity Analysis
|
| 88 |
+
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 89 |
+
|
| 90 |
return {
|
| 91 |
"sentiment_score": sentiment_score,
|
| 92 |
+
"subjectivity_score": subjectivity_score,
|
| 93 |
"primary_tone": tone_result["labels"][0],
|
| 94 |
"tone_scores": tone_scores,
|
| 95 |
"keywords": extracted_keywords,
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
|
| 100 |
+
"""
|
| 101 |
+
Generates a Plotly gauge chart for sentiment visualization.
|
| 102 |
+
"""
|
| 103 |
fig = go.Figure(
|
| 104 |
go.Indicator(
|
| 105 |
mode="gauge+number",
|
| 106 |
value=score,
|
| 107 |
domain={"x": [0, 1], "y": [0, 1]},
|
| 108 |
+
title={"text": title, "font": {"size": 16}},
|
| 109 |
gauge={
|
| 110 |
"axis": {"range": [-1, 1], "tickwidth": 1},
|
| 111 |
+
"bar": {"color": "darkblue"},
|
| 112 |
"steps": [
|
| 113 |
+
{"range": [-1, -0.2], "color": "#ffb3b3"}, # Red
|
| 114 |
+
{"range": [-0.2, 0.2], "color": "#f2f2f2"}, # Gray
|
| 115 |
+
{"range": [0.2, 1], "color": "#b3ffb3"}, # Green
|
| 116 |
],
|
| 117 |
},
|
| 118 |
)
|
| 119 |
)
|
| 120 |
+
fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
|
| 121 |
return fig
|
| 122 |
|
| 123 |
|
| 124 |
def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
|
| 125 |
+
"""Generates a horizontal bar chart showing tone probabilities."""
|
| 126 |
labels = list(tone_scores.keys())
|
| 127 |
values = list(tone_scores.values())
|
| 128 |
|
| 129 |
+
fig = go.Figure(
|
| 130 |
+
go.Bar(
|
| 131 |
+
x=values,
|
| 132 |
+
y=labels,
|
| 133 |
+
orientation="h",
|
| 134 |
+
marker_color="#4f46e5", # Indigo
|
| 135 |
+
bordercolor="white",
|
| 136 |
+
)
|
| 137 |
+
)
|
| 138 |
fig.update_layout(
|
| 139 |
+
title={"text": "Emotional Tone Confidence", "font": {"size": 16}},
|
| 140 |
+
xaxis_title="Confidence Matrix",
|
| 141 |
+
height=280,
|
| 142 |
+
margin=dict(l=20, r=20, t=60, b=20),
|
|
|
|
| 143 |
yaxis={"categoryorder": "total ascending"},
|
| 144 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 145 |
)
|
| 146 |
return fig
|
| 147 |
|
| 148 |
|
| 149 |
def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
|
| 150 |
+
"""Wraps keywords in HTML tags for visual highlighting."""
|
| 151 |
highlighted_text = text
|
| 152 |
for kw in keywords:
|
| 153 |
pattern = re.compile(rf"\b({re.escape(kw)})\b", re.IGNORECASE)
|
| 154 |
highlighted_text = pattern.sub(
|
| 155 |
+
r"<span style='background-color: #fef08a; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>",
|
| 156 |
highlighted_text,
|
| 157 |
)
|
| 158 |
return highlighted_text
|
| 159 |
|
| 160 |
|
| 161 |
+
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
st.title("FrameVis")
|
| 164 |
+
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 165 |
+
st.divider()
|
| 166 |
|
| 167 |
+
with st.spinner("Starting NLP model..."):
|
| 168 |
+
_load_nlp_models()
|
| 169 |
|
| 170 |
+
# Source Inputs
|
| 171 |
col1, col2 = st.columns(2)
|
| 172 |
|
| 173 |
with col1:
|
|
|
|
| 174 |
user_article_a = st.text_area(
|
| 175 |
+
"Source A",
|
| 176 |
+
value=ARTICLE_A.strip(),
|
| 177 |
+
height=220,
|
| 178 |
+
help="Paste the raw text of the first article you wish to analyze."
|
| 179 |
)
|
| 180 |
+
should_analyze_a = st.button("Process Source A", use_container_width=True)
|
| 181 |
|
| 182 |
with col2:
|
|
|
|
| 183 |
user_article_b = st.text_area(
|
| 184 |
+
"Source B",
|
| 185 |
+
value=ARTICLE_B.strip(),
|
| 186 |
+
height=220,
|
| 187 |
+
help="Paste the raw text of the second article for comparison."
|
| 188 |
)
|
| 189 |
+
should_analyze_b = st.button("Process Source B", use_container_width=True)
|
| 190 |
|
| 191 |
+
st.write("") # Spacer
|
| 192 |
|
| 193 |
+
# Analysis Display
|
| 194 |
if should_analyze_a or should_analyze_b:
|
| 195 |
+
st.markdown("### Framing Comparison")
|
| 196 |
res_col1, res_col2 = st.columns(2)
|
| 197 |
|
| 198 |
if should_analyze_a and user_article_a:
|
| 199 |
+
with st.spinner("Processing Source A..."):
|
| 200 |
results_a = analyze_article(user_article_a)
|
| 201 |
with res_col1:
|
| 202 |
+
# Top Metrics
|
| 203 |
+
m1, m2 = st.columns(2)
|
| 204 |
+
m1.metric("Subjectivity", f"{results_a['subjectivity_score']:.2f}", help="0.0 is entirely factual/objective. 1.0 is highly opinionated.")
|
| 205 |
+
m2.metric("Primary Tone", results_a['primary_tone'].title())
|
| 206 |
+
|
| 207 |
+
# Charts
|
| 208 |
+
st.plotly_chart(_create_sentiment_gauge(results_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 209 |
+
st.plotly_chart(_create_tone_bar_chart(results_a["tone_scores"]), use_container_width=True)
|
| 210 |
+
|
| 211 |
+
# Context Highlighting
|
| 212 |
+
st.markdown("**Semantic Fingerprint (Keyphrases):**")
|
| 213 |
+
annotated_text = _highlight_keywords(user_article_a, results_a["keywords"])
|
| 214 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|
| 215 |
|
| 216 |
if should_analyze_b and user_article_b:
|
| 217 |
+
with st.spinner("Processing Source B..."):
|
| 218 |
results_b = analyze_article(user_article_b)
|
| 219 |
with res_col2:
|
| 220 |
+
# Top Metrics
|
| 221 |
+
m1, m2 = st.columns(2)
|
| 222 |
+
m1.metric("Subjectivity", f"{results_b['subjectivity_score']:.2f}", help="0.0 is entirely factual/objective. 1.0 is highly opinionated.")
|
| 223 |
+
m2.metric("Primary Tone", results_b['primary_tone'].title())
|
| 224 |
+
|
| 225 |
+
# Charts
|
| 226 |
+
st.plotly_chart(_create_sentiment_gauge(results_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
|
| 227 |
+
st.plotly_chart(_create_tone_bar_chart(results_b["tone_scores"]), use_container_width=True)
|
| 228 |
+
|
| 229 |
+
# Context Highlighting
|
| 230 |
+
st.markdown("**Semantic Fingerprint (Keyphrases):**")
|
| 231 |
+
annotated_text = _highlight_keywords(user_article_b, results_b["keywords"])
|
| 232 |
+
st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
|