NKessler commited on
Commit
dce5185
·
verified ·
1 Parent(s): 86c9b5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -63
app.py CHANGED
@@ -1,11 +1,14 @@
 
1
  import re
2
  import typing
3
 
4
  import plotly.graph_objects as go
5
  import streamlit as st
 
 
6
  from transformers import pipeline
7
- import yake
8
 
 
9
  MAX_TEXT_LENGTH = 1500
10
  CANDIDATE_TONES = ["alarmist", "objective", "defensive", "optimistic", "critical"]
11
 
@@ -20,6 +23,16 @@ A sweeping new climate agreement signed today is drawing fierce criticism from i
20
 
21
  @st.cache_resource
22
  def _load_nlp_models() -> typing.Dict[str, typing.Any]:
 
 
 
 
 
 
 
 
 
 
23
  sentiment_analyzer = pipeline(
24
  "sentiment-analysis",
25
  model="distilbert-base-uncased-finetuned-sst-2-english",
@@ -28,9 +41,8 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
28
  "zero-shot-classification",
29
  model="typeform/distilbert-base-uncased-mnli",
30
  )
31
- keyword_extractor = yake.KeywordExtractor(
32
- lan="en", n=1, dedupLim=0.9, top=5, features=None
33
- )
34
 
35
  return {
36
  "sentiment": sentiment_analyzer,
@@ -40,27 +52,44 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
40
 
41
 
42
  def analyze_article(text: str) -> dict:
 
 
 
 
 
 
 
 
 
43
  models = _load_nlp_models()
44
  safe_text = text[:MAX_TEXT_LENGTH]
45
 
 
46
  sentiment_result = models["sentiment"](safe_text)[0]
47
- tone_result = models["tone"](safe_text, CANDIDATE_TONES)
48
- keyword_results = models["keyword"].extract_keywords(safe_text)
49
-
50
  is_positive = sentiment_result["label"] == "POSITIVE"
51
  sentiment_score = (
52
  sentiment_result["score"] if is_positive else -sentiment_result["score"]
53
  )
54
 
 
 
55
  tone_scores = {
56
  label: score
57
  for label, score in zip(tone_result["labels"], tone_result["scores"])
58
  }
59
 
 
 
 
 
60
  extracted_keywords = [kw[0] for kw in keyword_results]
61
 
 
 
 
62
  return {
63
  "sentiment_score": sentiment_score,
 
64
  "primary_tone": tone_result["labels"][0],
65
  "tone_scores": tone_scores,
66
  "keywords": extracted_keywords,
@@ -68,118 +97,136 @@ def analyze_article(text: str) -> dict:
68
 
69
 
70
  def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
 
 
 
71
  fig = go.Figure(
72
  go.Indicator(
73
  mode="gauge+number",
74
  value=score,
75
  domain={"x": [0, 1], "y": [0, 1]},
76
- title={"text": title, "font": {"size": 18}},
77
  gauge={
78
  "axis": {"range": [-1, 1], "tickwidth": 1},
79
- "bar": {"color": "black"},
80
  "steps": [
81
- {"range": [-1, -0.3], "color": "lightpink"},
82
- {"range": [-0.3, 0.3], "color": "lightgray"},
83
- {"range": [0.3, 1], "color": "lightgreen"},
84
  ],
85
  },
86
  )
87
  )
88
- fig.update_layout(height=250, margin=dict(l=20, r=20, t=40, b=20))
89
  return fig
90
 
91
 
92
  def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
 
93
  labels = list(tone_scores.keys())
94
  values = list(tone_scores.values())
95
 
96
- fig = go.Figure(go.Bar(x=values, y=labels, orientation="h", marker_color="royalblue"))
 
 
 
 
 
 
 
 
97
  fig.update_layout(
98
- title="Emotional Tone Distribution",
99
- xaxis_title="Confidence",
100
- yaxis_title="Tone",
101
- height=250,
102
- margin=dict(l=20, r=20, t=40, b=20),
103
  yaxis={"categoryorder": "total ascending"},
 
104
  )
105
  return fig
106
 
107
 
108
  def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
 
109
  highlighted_text = text
110
  for kw in keywords:
111
  pattern = re.compile(rf"\b({re.escape(kw)})\b", re.IGNORECASE)
112
  highlighted_text = pattern.sub(
113
- r"<span style='background-color: #ffcc00; padding: 2px; border-radius: 3px;'>\1</span>",
114
  highlighted_text,
115
  )
116
  return highlighted_text
117
 
118
 
119
- st.set_page_config(page_title="FrameVis MVP", layout="wide")
120
- st.title("FrameVis: Media Framing Analyzer")
121
- st.markdown("Compare how different news sources frame the same event using NLP.")
122
 
123
- with st.spinner("Waking up NLP models..."):
124
- _load_nlp_models()
 
125
 
126
- st.markdown("### Input Articles")
127
- st.markdown("Paste custom articles below or use the default samples to see the analysis.")
128
 
 
129
  col1, col2 = st.columns(2)
130
 
131
  with col1:
132
- st.subheader("Source A")
133
  user_article_a = st.text_area(
134
- "Paste Article A Text:", value=ARTICLE_A.strip(), height=200
 
 
 
135
  )
136
- should_analyze_a = st.button("Analyze Source A", use_container_width=True)
137
 
138
  with col2:
139
- st.subheader("Source B")
140
  user_article_b = st.text_area(
141
- "Paste Article B Text:", value=ARTICLE_B.strip(), height=200
 
 
 
142
  )
143
- should_analyze_b = st.button("Analyze Source B", use_container_width=True)
144
 
145
- st.divider()
146
 
 
147
  if should_analyze_a or should_analyze_b:
148
- st.markdown("### Visual Analytics Results")
149
  res_col1, res_col2 = st.columns(2)
150
 
151
  if should_analyze_a and user_article_a:
152
- with st.spinner("Analyzing Source A..."):
153
  results_a = analyze_article(user_article_a)
154
  with res_col1:
155
- st.plotly_chart(
156
- _create_sentiment_gauge(results_a["sentiment_score"], "Sentiment"),
157
- use_container_width=True,
158
- )
159
- st.plotly_chart(
160
- _create_tone_bar_chart(results_a["tone_scores"]),
161
- use_container_width=True,
162
- )
163
- st.markdown("**Highlighted Text (Loaded Keywords):**")
164
- annotated_text = _highlight_keywords(
165
- user_article_a, results_a["keywords"]
166
- )
167
- st.markdown(f"> {annotated_text}", unsafe_allow_html=True)
168
 
169
  if should_analyze_b and user_article_b:
170
- with st.spinner("Analyzing Source B..."):
171
  results_b = analyze_article(user_article_b)
172
  with res_col2:
173
- st.plotly_chart(
174
- _create_sentiment_gauge(results_b["sentiment_score"], "Sentiment"),
175
- use_container_width=True,
176
- )
177
- st.plotly_chart(
178
- _create_tone_bar_chart(results_b["tone_scores"]),
179
- use_container_width=True,
180
- )
181
- st.markdown("**Highlighted Text (Loaded Keywords):**")
182
- annotated_text = _highlight_keywords(
183
- user_article_b, results_b["keywords"]
184
- )
185
- st.markdown(f"> {annotated_text}", unsafe_allow_html=True)
 
1
+ # imports
2
  import re
3
  import typing
4
 
5
  import plotly.graph_objects as go
6
  import streamlit as st
7
+ from keybert import KeyBERT
8
+ from textblob import TextBlob
9
  from transformers import pipeline
 
10
 
11
+ # constants
12
  MAX_TEXT_LENGTH = 1500
13
  CANDIDATE_TONES = ["alarmist", "objective", "defensive", "optimistic", "critical"]
14
 
 
23
 
24
  @st.cache_resource
25
  def _load_nlp_models() -> typing.Dict[str, typing.Any]:
26
+ """
27
+ Loads NLP model into memory and caches it.
28
+
29
+ Upgraded to include KeyBERT for semantic keyword extraction, which hopefully
30
+ outperforms statistical models on short news text.
31
+
32
+ Returns:
33
+ A dictionary containing the initialized Hugging Face pipelines
34
+ and the KeyBERT model.
35
+ """
36
  sentiment_analyzer = pipeline(
37
  "sentiment-analysis",
38
  model="distilbert-base-uncased-finetuned-sst-2-english",
 
41
  "zero-shot-classification",
42
  model="typeform/distilbert-base-uncased-mnli",
43
  )
44
+ # KeyBERT uses a tiny, fast transformer to find contextual keywords
45
+ keyword_extractor = KeyBERT(model="all-MiniLM-L6-v2")
 
46
 
47
  return {
48
  "sentiment": sentiment_analyzer,
 
52
 
53
 
54
  def analyze_article(text: str) -> dict:
55
+ """
56
+ Analyzes framing using semantic keyphrases, sentiment, tone, and subjectivity.
57
+
58
+ Args:
59
+ text: The article text to analyze.
60
+
61
+ Returns:
62
+ A dictionary containing all calculated framing metrics.
63
+ """
64
  models = _load_nlp_models()
65
  safe_text = text[:MAX_TEXT_LENGTH]
66
 
67
+ # Sentiment Analysis
68
  sentiment_result = models["sentiment"](safe_text)[0]
 
 
 
69
  is_positive = sentiment_result["label"] == "POSITIVE"
70
  sentiment_score = (
71
  sentiment_result["score"] if is_positive else -sentiment_result["score"]
72
  )
73
 
74
+ # Tone Classification
75
+ tone_result = models["tone"](safe_text, CANDIDATE_TONES)
76
  tone_scores = {
77
  label: score
78
  for label, score in zip(tone_result["labels"], tone_result["scores"])
79
  }
80
 
81
+ # Semantic Keyword Extraction
82
+ keyword_results = models["keyword"].extract_keywords(
83
+ safe_text, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=4
84
+ )
85
  extracted_keywords = [kw[0] for kw in keyword_results]
86
 
87
+ # Subjectivity Analysis
88
+ subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
89
+
90
  return {
91
  "sentiment_score": sentiment_score,
92
+ "subjectivity_score": subjectivity_score,
93
  "primary_tone": tone_result["labels"][0],
94
  "tone_scores": tone_scores,
95
  "keywords": extracted_keywords,
 
97
 
98
 
99
  def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
100
+ """
101
+ Generates a Plotly gauge chart for sentiment visualization.
102
+ """
103
  fig = go.Figure(
104
  go.Indicator(
105
  mode="gauge+number",
106
  value=score,
107
  domain={"x": [0, 1], "y": [0, 1]},
108
+ title={"text": title, "font": {"size": 16}},
109
  gauge={
110
  "axis": {"range": [-1, 1], "tickwidth": 1},
111
+ "bar": {"color": "darkblue"},
112
  "steps": [
113
+ {"range": [-1, -0.2], "color": "#ffb3b3"}, # Red
114
+ {"range": [-0.2, 0.2], "color": "#f2f2f2"}, # Gray
115
+ {"range": [0.2, 1], "color": "#b3ffb3"}, # Green
116
  ],
117
  },
118
  )
119
  )
120
+ fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
121
  return fig
122
 
123
 
124
  def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
125
+ """Generates a horizontal bar chart showing tone probabilities."""
126
  labels = list(tone_scores.keys())
127
  values = list(tone_scores.values())
128
 
129
+ fig = go.Figure(
130
+ go.Bar(
131
+ x=values,
132
+ y=labels,
133
+ orientation="h",
134
+ marker_color="#4f46e5", # Indigo
135
+ bordercolor="white",
136
+ )
137
+ )
138
  fig.update_layout(
139
+ title={"text": "Emotional Tone Confidence", "font": {"size": 16}},
140
+ xaxis_title="Confidence Matrix",
141
+ height=280,
142
+ margin=dict(l=20, r=20, t=60, b=20),
 
143
  yaxis={"categoryorder": "total ascending"},
144
+ plot_bgcolor="rgba(0,0,0,0)",
145
  )
146
  return fig
147
 
148
 
149
  def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
150
+ """Wraps keywords in HTML tags for visual highlighting."""
151
  highlighted_text = text
152
  for kw in keywords:
153
  pattern = re.compile(rf"\b({re.escape(kw)})\b", re.IGNORECASE)
154
  highlighted_text = pattern.sub(
155
+ r"<span style='background-color: #fef08a; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>",
156
  highlighted_text,
157
  )
158
  return highlighted_text
159
 
160
 
161
+ st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
 
 
162
 
163
+ st.title("FrameVis")
164
+ st.markdown("##### Media bias and framing effects across global news sources.")
165
+ st.divider()
166
 
167
+ with st.spinner("Starting NLP model..."):
168
+ _load_nlp_models()
169
 
170
+ # Source Inputs
171
  col1, col2 = st.columns(2)
172
 
173
  with col1:
 
174
  user_article_a = st.text_area(
175
+ "Source A",
176
+ value=ARTICLE_A.strip(),
177
+ height=220,
178
+ help="Paste the raw text of the first article you wish to analyze."
179
  )
180
+ should_analyze_a = st.button("Process Source A", use_container_width=True)
181
 
182
  with col2:
 
183
  user_article_b = st.text_area(
184
+ "Source B",
185
+ value=ARTICLE_B.strip(),
186
+ height=220,
187
+ help="Paste the raw text of the second article for comparison."
188
  )
189
+ should_analyze_b = st.button("Process Source B", use_container_width=True)
190
 
191
+ st.write("") # Spacer
192
 
193
+ # Analysis Display
194
  if should_analyze_a or should_analyze_b:
195
+ st.markdown("### Framing Comparison")
196
  res_col1, res_col2 = st.columns(2)
197
 
198
  if should_analyze_a and user_article_a:
199
+ with st.spinner("Processing Source A..."):
200
  results_a = analyze_article(user_article_a)
201
  with res_col1:
202
+ # Top Metrics
203
+ m1, m2 = st.columns(2)
204
+ m1.metric("Subjectivity", f"{results_a['subjectivity_score']:.2f}", help="0.0 is entirely factual/objective. 1.0 is highly opinionated.")
205
+ m2.metric("Primary Tone", results_a['primary_tone'].title())
206
+
207
+ # Charts
208
+ st.plotly_chart(_create_sentiment_gauge(results_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
209
+ st.plotly_chart(_create_tone_bar_chart(results_a["tone_scores"]), use_container_width=True)
210
+
211
+ # Context Highlighting
212
+ st.markdown("**Semantic Fingerprint (Keyphrases):**")
213
+ annotated_text = _highlight_keywords(user_article_a, results_a["keywords"])
214
+ st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
215
 
216
  if should_analyze_b and user_article_b:
217
+ with st.spinner("Processing Source B..."):
218
  results_b = analyze_article(user_article_b)
219
  with res_col2:
220
+ # Top Metrics
221
+ m1, m2 = st.columns(2)
222
+ m1.metric("Subjectivity", f"{results_b['subjectivity_score']:.2f}", help="0.0 is entirely factual/objective. 1.0 is highly opinionated.")
223
+ m2.metric("Primary Tone", results_b['primary_tone'].title())
224
+
225
+ # Charts
226
+ st.plotly_chart(_create_sentiment_gauge(results_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
227
+ st.plotly_chart(_create_tone_bar_chart(results_b["tone_scores"]), use_container_width=True)
228
+
229
+ # Context Highlighting
230
+ st.markdown("**Semantic Fingerprint (Keyphrases):**")
231
+ annotated_text = _highlight_keywords(user_article_b, results_b["keywords"])
232
+ st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)