NKessler commited on
Commit
7da623a
·
verified ·
1 Parent(s): 2d595a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -93
app.py CHANGED
@@ -6,24 +6,14 @@ import streamlit as st
6
  from keybert import KeyBERT
7
  from textblob import TextBlob
8
  from transformers import pipeline
 
9
 
10
  # constants
11
  MAX_TEXT_LENGTH = 1500
12
 
13
- # tone list
14
- CANDIDATE_TONES = [
15
- "objective", "alarmist", "defensive", "optimistic", "critical",
16
- "sensationalist", "somber", "sympathetic", "hostile", "satirical",
17
- "urgent", "dismissive", "patriotic", "cynical", "apologetic"
18
- ]
19
 
20
- ARTICLE_A = """
21
- Global leaders achieved a historic breakthrough today, signing a comprehensive climate accord aimed at drastically slashing carbon emissions by 2030. Environmental advocates are celebrating the mandate, which forces heavy-polluting industries to finally take accountability for their ecological damage. While corporations warn of transition costs, scientists emphasize that failing to act now would result in catastrophic, irreversible damage to our planet's fragile ecosystems.
22
- """
23
-
24
- ARTICLE_B = """
25
- A sweeping new climate agreement signed today is drawing fierce criticism from industry leaders, who warn the aggressive emission targets will cripple economic growth. The heavy-handed regulations impose massive compliance costs on the manufacturing and energy sectors, inevitably leading to significant job losses and higher prices for consumers. Critics argue the rushed accord prioritizes bureaucratic posturing over practical, market-driven solutions to environmental concerns.
26
- """
27
 
28
 
29
  @st.cache_resource
@@ -32,61 +22,81 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
32
  Loads NLP models into memory and caches them.
33
  """
34
  sentiment_analyzer = pipeline(
35
- "sentiment-analysis",
36
- model="distilbert-base-uncased-finetuned-sst-2-english",
 
 
 
 
 
37
  )
38
- tone_classifier = pipeline(
39
- "zero-shot-classification",
40
- model="typeform/distilbert-base-uncased-mnli",
 
41
  )
42
  keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
43
 
44
  return {
45
  "sentiment": sentiment_analyzer,
46
- "tone": tone_classifier,
 
47
  "keyword": keyword_extractor,
48
  }
49
 
50
 
51
  def analyze_article(text: str) -> dict:
52
- """Analyzes framing using semantic keyphrases, sentiment, tone and subjectivity."""
53
  models = _load_nlp_models()
54
  safe_text = text[:MAX_TEXT_LENGTH]
55
 
56
  # Sentiment Analysis
57
  sentiment_result = models["sentiment"](safe_text)[0]
58
- is_positive = sentiment_result["label"] == "POSITIVE"
59
- sentiment_score = (
60
- sentiment_result["score"] if is_positive else -sentiment_result["score"]
61
- )
62
-
63
- # Tone Classification
64
- tone_result = models["tone"](safe_text, CANDIDATE_TONES)
65
- tone_scores = {
66
- label: score
67
- for label, score in zip(tone_result["labels"][:5], tone_result["scores"][:5])
68
- }
 
 
 
 
 
 
69
 
70
  # Semantic Keyword Extraction
71
  keyword_results = models["keyword"].extract_keywords(
72
- safe_text,
73
- keyphrase_ngram_range=(1, 3),
74
- stop_words="english",
75
  use_mmr=True,
76
  diversity=0.6,
77
  top_n=5
78
  )
79
  extracted_keywords = [kw[0] for kw in keyword_results]
80
 
81
- # Subjectivity Analysis
 
 
 
 
82
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
 
83
 
84
  return {
85
  "sentiment_score": sentiment_score,
86
  "subjectivity_score": subjectivity_score,
87
- "primary_tone": tone_result["labels"][0],
 
88
  "tone_scores": tone_scores,
89
  "keywords": extracted_keywords,
 
90
  }
91
 
92
 
@@ -102,9 +112,9 @@ def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
102
  "axis": {"range": [-1, 1], "tickwidth": 1},
103
  "bar": {"color": "darkblue"},
104
  "steps": [
105
- {"range": [-1, -0.2], "color": "#ffb3b3"},
106
- {"range": [-0.2, 0.2], "color": "#f2f2f2"},
107
- {"range": [0.2, 1], "color": "#b3ffb3"},
108
  ],
109
  },
110
  )
@@ -113,28 +123,32 @@ def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
113
  return fig
114
 
115
 
116
- def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
117
- """Generates a horizontal bar chart showing tone probabilities."""
118
- labels = list(tone_scores.keys())
119
- values = list(tone_scores.values())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- fig = go.Figure(
122
- go.Bar(
123
- x=values,
124
- y=labels,
125
- orientation="h",
126
- marker_color="#4f46e5",
127
- marker_line_color="white",
128
- marker_line_width=1.5
129
- )
130
- )
131
  fig.update_layout(
132
- title={"text": "Top 5 Emotional Tones", "font": {"size": 16}},
133
- xaxis_title="Confidence Matrix",
134
- height=280,
135
- margin=dict(l=20, r=20, t=60, b=20),
136
- yaxis={"categoryorder": "total ascending"},
137
- plot_bgcolor="rgba(0,0,0,0)",
138
  )
139
  return fig
140
 
@@ -164,56 +178,67 @@ st.title("FrameVis")
164
  st.markdown("##### Media bias and framing effects across global news sources.")
165
  st.divider()
166
 
167
- with st.spinner("Starting NLP model."):
168
  _load_nlp_models()
169
 
170
  col1, col2 = st.columns(2)
171
 
172
  with col1:
173
  user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
174
- if st.button("Analyze A", use_container_width=True):
175
- with st.spinner("Processing Source A."):
176
- st.session_state.results_a = analyze_article(user_article_a)
177
 
178
  with col2:
179
  user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
180
- if st.button("Analyze B", use_container_width=True):
181
- with st.spinner("Processing Source B."):
182
- st.session_state.results_b = analyze_article(user_article_b)
183
 
184
- st.write("")
185
 
186
- # Analysis Display
187
- if st.session_state.results_a or st.session_state.results_b:
 
 
 
 
 
 
 
188
  st.markdown("### Framing Analytics & Comparison")
 
 
 
 
189
  res_col1, res_col2 = st.columns(2)
190
 
191
  # Render Column A
192
  with res_col1:
193
- if st.session_state.results_a:
194
- r_a = st.session_state.results_a
195
- m1, m2 = st.columns(2)
196
- m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}", help="0.0 = Objective, 1.0 = Highly Opinionated")
197
- m2.metric("Primary Tone", r_a['primary_tone'].title())
198
-
199
- st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
200
- st.plotly_chart(_create_tone_bar_chart(r_a["tone_scores"]), use_container_width=True)
201
-
202
- st.markdown("**Key Framing Language:**")
203
- annotated_text = _highlight_keywords(user_article_a, r_a["keywords"])
204
- st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
 
 
 
205
 
206
  # Render Column B
207
  with res_col2:
208
- if st.session_state.results_b:
209
- r_b = st.session_state.results_b
210
- m1, m2 = st.columns(2)
211
- m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}", help="0.0 = Objective, 1.0 = Highly Opinionated")
212
- m2.metric("Primary Tone", r_b['primary_tone'].title())
213
-
214
- st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
215
- st.plotly_chart(_create_tone_bar_chart(r_b["tone_scores"]), use_container_width=True)
216
-
217
- st.markdown("**Key Framing Language:**")
218
- annotated_text = _highlight_keywords(user_article_b, r_b["keywords"])
219
- st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
 
 
 
 
6
  from keybert import KeyBERT
7
  from textblob import TextBlob
8
  from transformers import pipeline
9
+ import textstat
10
 
11
  # constants
12
  MAX_TEXT_LENGTH = 1500
13
 
14
+ ARTICLE_A = """Global leaders achieved a historic breakthrough today, signing a comprehensive climate accord aimed at drastically slashing carbon emissions by 2030. Environmental advocates are celebrating the mandate, which forces heavy-polluting industries to finally take accountability for their ecological damage. While corporations warn of transition costs, scientists emphasize that failing to act now would result in catastrophic, irreversible damage to our planet's fragile ecosystems."""
 
 
 
 
 
15
 
16
+ ARTICLE_B = """A sweeping new climate agreement signed today is drawing fierce criticism from industry leaders, who warn the aggressive emission targets will cripple economic growth. The heavy-handed regulations impose massive compliance costs on the manufacturing and energy sectors, inevitably leading to significant job losses and higher prices for consumers. Critics argue the rushed accord prioritizes bureaucratic posturing over practical, market-driven solutions to environmental concerns."""
 
 
 
 
 
 
17
 
18
 
19
  @st.cache_resource
 
22
  Loads NLP models into memory and caches them.
23
  """
24
  sentiment_analyzer = pipeline(
25
+ "text-classification",
26
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
27
+ )
28
+ emotion_classifier = pipeline(
29
+ "text-classification",
30
+ model="SamLowe/roberta-base-go_emotions",
31
+ top_k=5
32
  )
33
+ ner_extractor = pipeline(
34
+ "ner",
35
+ model="dslim/bert-base-NER",
36
+ aggregation_strategy="simple"
37
  )
38
  keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
39
 
40
  return {
41
  "sentiment": sentiment_analyzer,
42
+ "emotion": emotion_classifier,
43
+ "ner": ner_extractor,
44
  "keyword": keyword_extractor,
45
  }
46
 
47
 
48
  def analyze_article(text: str) -> dict:
49
+ """Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
50
  models = _load_nlp_models()
51
  safe_text = text[:MAX_TEXT_LENGTH]
52
 
53
  # Sentiment Analysis
54
  sentiment_result = models["sentiment"](safe_text)[0]
55
+ label = sentiment_result["label"].lower()
56
+ score = sentiment_result["score"]
57
+ if label == "negative":
58
+ sentiment_score = -score
59
+ elif label == "positive":
60
+ sentiment_score = score
61
+ else:
62
+ sentiment_score = 0.0 # Neutral
63
+
64
+ # Emotion Classification
65
+ emotion_results = models["emotion"](safe_text)[0]
66
+ if isinstance(emotion_results, list):
67
+ tone_scores = {res["label"]: res["score"] for res in emotion_results}
68
+ primary_tone = emotion_results[0]["label"]
69
+ else:
70
+ tone_scores = {"neutral": 1.0}
71
+ primary_tone = "neutral"
72
 
73
  # Semantic Keyword Extraction
74
  keyword_results = models["keyword"].extract_keywords(
75
+ safe_text,
76
+ keyphrase_ngram_range=(1, 3),
77
+ stop_words="english",
78
  use_mmr=True,
79
  diversity=0.6,
80
  top_n=5
81
  )
82
  extracted_keywords = [kw[0] for kw in keyword_results]
83
 
84
+ # Named Entity Recognition
85
+ ner_results = models["ner"](safe_text)
86
+ extracted_entities = list(set([ent["word"] for ent in ner_results if ent["score"] > 0.6]))
87
+
88
+ # Subjectivity & Readability Analysis
89
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
90
+ reading_ease = textstat.flesch_reading_ease(safe_text)
91
 
92
  return {
93
  "sentiment_score": sentiment_score,
94
  "subjectivity_score": subjectivity_score,
95
+ "reading_ease": reading_ease,
96
+ "primary_tone": primary_tone,
97
  "tone_scores": tone_scores,
98
  "keywords": extracted_keywords,
99
+ "entities": extracted_entities,
100
  }
101
 
102
 
 
112
  "axis": {"range": [-1, 1], "tickwidth": 1},
113
  "bar": {"color": "darkblue"},
114
  "steps": [
115
+ {"range": [-1, -0.2], "color": "#ffb3b3"},
116
+ {"range": [-0.2, 0.2], "color": "#f2f2f2"},
117
+ {"range": [0.2, 1], "color": "#b3ffb3"},
118
  ],
119
  },
120
  )
 
123
  return fig
124
 
125
 
126
+ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
127
+ """Generates an overlapping radar chart to compare emotions."""
128
+ categories = list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys())))
129
+
130
+ val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
131
+ val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
132
+
133
+ # Close the radar loop
134
+ categories.append(categories[0])
135
+ val_a.append(val_a[0])
136
+ val_b.append(val_b[0])
137
+
138
+ fig = go.Figure()
139
+ fig.add_trace(go.Scatterpolar(
140
+ r=val_a, theta=categories, fill='toself', name='Source A', line_color='#4f46e5'
141
+ ))
142
+ fig.add_trace(go.Scatterpolar(
143
+ r=val_b, theta=categories, fill='toself', name='Source B', line_color='#10b981'
144
+ ))
145
 
 
 
 
 
 
 
 
 
 
 
146
  fig.update_layout(
147
+ polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
148
+ showlegend=True,
149
+ title={"text": "Relative Emotion Profile", "font": {"size": 18}},
150
+ height=450,
151
+ margin=dict(l=40, r=40, t=60, b=40),
 
152
  )
153
  return fig
154
 
 
178
  st.markdown("##### Media bias and framing effects across global news sources.")
179
  st.divider()
180
 
181
+ with st.spinner("Starting NLP models."):
182
  _load_nlp_models()
183
 
184
  col1, col2 = st.columns(2)
185
 
186
  with col1:
187
  user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
 
 
 
188
 
189
  with col2:
190
  user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
 
 
 
191
 
192
+ st.write("")
193
 
194
+ # Single unified execution button
195
+ if st.button("Analyze & Compare Sources", use_container_width=True, type="primary"):
196
+ with st.spinner("Analyzing framing semantics for both sources."):
197
+ st.session_state.results_a = analyze_article(user_article_a)
198
+ st.session_state.results_b = analyze_article(user_article_b)
199
+
200
+ # Analysis Display
201
+ if st.session_state.results_a and st.session_state.results_b:
202
+ st.divider()
203
  st.markdown("### Framing Analytics & Comparison")
204
+
205
+ # Radar Chart spans the top
206
+ st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
207
+
208
  res_col1, res_col2 = st.columns(2)
209
 
210
  # Render Column A
211
  with res_col1:
212
+ r_a = st.session_state.results_a
213
+ st.markdown("#### Source A Breakdown")
214
+ m1, m2, m3 = st.columns(3)
215
+ m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
216
+ m2.metric("Primary Emotion", r_a['primary_tone'].title())
217
+ m3.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
218
+
219
+ st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
220
+
221
+ if r_a["entities"]:
222
+ st.markdown(f"**Extracted Entities:** `{', '.join(r_a['entities'])}`")
223
+
224
+ st.markdown("**Key Framing Language:**")
225
+ annotated_text = _highlight_keywords(user_article_a, r_a["keywords"])
226
+ st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
227
 
228
  # Render Column B
229
  with res_col2:
230
+ r_b = st.session_state.results_b
231
+ st.markdown("#### Source B Breakdown")
232
+ m1, m2, m3 = st.columns(3)
233
+ m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
234
+ m2.metric("Primary Emotion", r_b['primary_tone'].title())
235
+ m3.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
236
+
237
+ st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
238
+
239
+ if r_b["entities"]:
240
+ st.markdown(f"**Extracted Entities:** `{', '.join(r_b['entities'])}`")
241
+
242
+ st.markdown("**Key Framing Language:**")
243
+ annotated_text = _highlight_keywords(user_article_b, r_b["keywords"])
244
+ st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)