Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,18 +1,14 @@
|
|
| 1 |
# imports
|
| 2 |
import re
|
| 3 |
-
import typing
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import streamlit as st
|
| 6 |
-
from textblob import TextBlob
|
| 7 |
import json
|
| 8 |
import google.generativeai as genai
|
| 9 |
-
import os
|
| 10 |
import concurrent.futures
|
| 11 |
import textstat
|
| 12 |
import trafilatura
|
| 13 |
import requests
|
| 14 |
from bs4 import BeautifulSoup
|
| 15 |
-
import nltk
|
| 16 |
|
| 17 |
# constants
|
| 18 |
MAX_WORDS = 400
|
|
@@ -36,17 +32,6 @@ def _truncate_to_words(text: str, limit: int) -> str:
|
|
| 36 |
words = text.split()
|
| 37 |
return " ".join(words[:limit])
|
| 38 |
|
| 39 |
-
@st.cache_resource
|
| 40 |
-
def _initialize_app():
|
| 41 |
-
"""Downloads lightweight NLTK data needed for highlighting."""
|
| 42 |
-
try:
|
| 43 |
-
nltk.data.find('tokenizers/punkt')
|
| 44 |
-
except LookupError:
|
| 45 |
-
nltk.download('punkt')
|
| 46 |
-
nltk.download('punkt_tab')
|
| 47 |
-
|
| 48 |
-
_initialize_app()
|
| 49 |
-
|
| 50 |
def analyze_article(text: str) -> dict:
|
| 51 |
"""Analyzes framing using an LLM API and calculates readability."""
|
| 52 |
safe_text = _truncate_to_words(text, MAX_WORDS)
|
|
@@ -61,6 +46,7 @@ def analyze_article(text: str) -> dict:
|
|
| 61 |
"primary_theme": Choose ONE from: ["economic consequences", "moral and ethical fairness", "legal and bureaucratic", "public safety and health"].
|
| 62 |
"tone_scores": A dictionary scoring THESE EXACT 6 EMOTIONS from 0.0 to 1.0: {{"anger": 0.0, "fear": 0.0, "joy": 0.0, "sadness": 0.0, "surprise": 0.0, "trust": 0.0}}.
|
| 63 |
"framing_words": A list of the 5 to 8 most emotionally charged, biased, or subjective words used in the text (e.g., ["draconian", "slammed", "titans", "catastrophic"]).
|
|
|
|
| 64 |
|
| 65 |
Text to analyze:
|
| 66 |
"{safe_text}"
|
|
@@ -74,7 +60,6 @@ def analyze_article(text: str) -> dict:
|
|
| 74 |
)
|
| 75 |
llm_data = json.loads(response.text)
|
| 76 |
|
| 77 |
-
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 78 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
| 79 |
|
| 80 |
tones = llm_data.get("tone_scores", {})
|
|
@@ -93,7 +78,7 @@ def analyze_article(text: str) -> dict:
|
|
| 93 |
"primary_theme": llm_data.get("primary_theme", "unclear"),
|
| 94 |
"tone_scores": standard_tones,
|
| 95 |
"framing_words": llm_data.get("framing_words", []),
|
| 96 |
-
"subjectivity_score": subjectivity_score,
|
| 97 |
"reading_ease": max(0.0, min(100.0, raw_reading_ease)),
|
| 98 |
}
|
| 99 |
|
|
@@ -249,24 +234,40 @@ st.markdown("""
|
|
| 249 |
footer {visibility: hidden;}
|
| 250 |
header {visibility: hidden;}
|
| 251 |
|
|
|
|
| 252 |
.block-container {
|
| 253 |
padding-top: 2rem;
|
| 254 |
padding-bottom: 2rem;
|
|
|
|
| 255 |
}
|
| 256 |
|
|
|
|
| 257 |
[data-testid="stMetric"] {
|
| 258 |
-
background-color: #
|
| 259 |
border: 1px solid #e2e8f0;
|
| 260 |
-
border-radius:
|
| 261 |
-
padding:
|
| 262 |
-
box-shadow: 0 1px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
}
|
| 264 |
|
| 265 |
[data-testid="stMetricValue"] > div {
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
}
|
| 271 |
</style>
|
| 272 |
""", unsafe_allow_html=True)
|
|
@@ -283,28 +284,30 @@ st.title("FrameVis")
|
|
| 283 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 284 |
st.divider()
|
| 285 |
|
| 286 |
-
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
col1, col2 = st.columns(2)
|
| 289 |
|
| 290 |
-
with col1:
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
|
| 297 |
-
with col2:
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
|
| 304 |
-
st.write("")
|
|
|
|
| 305 |
|
| 306 |
-
|
| 307 |
-
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
| 308 |
|
| 309 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
| 310 |
text_b_clean = user_article_b.strip() if user_article_b else ""
|
|
|
|
| 1 |
# imports
|
| 2 |
import re
|
|
|
|
| 3 |
import plotly.graph_objects as go
|
| 4 |
import streamlit as st
|
|
|
|
| 5 |
import json
|
| 6 |
import google.generativeai as genai
|
|
|
|
| 7 |
import concurrent.futures
|
| 8 |
import textstat
|
| 9 |
import trafilatura
|
| 10 |
import requests
|
| 11 |
from bs4 import BeautifulSoup
|
|
|
|
| 12 |
|
| 13 |
# constants
|
| 14 |
MAX_WORDS = 400
|
|
|
|
| 32 |
words = text.split()
|
| 33 |
return " ".join(words[:limit])
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def analyze_article(text: str) -> dict:
|
| 36 |
"""Analyzes framing using an LLM API and calculates readability."""
|
| 37 |
safe_text = _truncate_to_words(text, MAX_WORDS)
|
|
|
|
| 46 |
"primary_theme": Choose ONE from: ["economic consequences", "moral and ethical fairness", "legal and bureaucratic", "public safety and health"].
|
| 47 |
"tone_scores": A dictionary scoring THESE EXACT 6 EMOTIONS from 0.0 to 1.0: {{"anger": 0.0, "fear": 0.0, "joy": 0.0, "sadness": 0.0, "surprise": 0.0, "trust": 0.0}}.
|
| 48 |
"framing_words": A list of the 5 to 8 most emotionally charged, biased, or subjective words used in the text (e.g., ["draconian", "slammed", "titans", "catastrophic"]).
|
| 49 |
+
"subjectivity_score": A float between 0.0 (completely objective/factual) and 1.0 (highly opinionated/subjective).
|
| 50 |
|
| 51 |
Text to analyze:
|
| 52 |
"{safe_text}"
|
|
|
|
| 60 |
)
|
| 61 |
llm_data = json.loads(response.text)
|
| 62 |
|
|
|
|
| 63 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
| 64 |
|
| 65 |
tones = llm_data.get("tone_scores", {})
|
|
|
|
| 78 |
"primary_theme": llm_data.get("primary_theme", "unclear"),
|
| 79 |
"tone_scores": standard_tones,
|
| 80 |
"framing_words": llm_data.get("framing_words", []),
|
| 81 |
+
"subjectivity_score": llm_data.get("subjectivity_score", 0.0),
|
| 82 |
"reading_ease": max(0.0, min(100.0, raw_reading_ease)),
|
| 83 |
}
|
| 84 |
|
|
|
|
| 234 |
footer {visibility: hidden;}
|
| 235 |
header {visibility: hidden;}
|
| 236 |
|
| 237 |
+
/* Clean up the main container */
|
| 238 |
.block-container {
|
| 239 |
padding-top: 2rem;
|
| 240 |
padding-bottom: 2rem;
|
| 241 |
+
max-width: 1200px;
|
| 242 |
}
|
| 243 |
|
| 244 |
+
/* Commercial-grade metric cards */
|
| 245 |
[data-testid="stMetric"] {
|
| 246 |
+
background-color: #ffffff;
|
| 247 |
border: 1px solid #e2e8f0;
|
| 248 |
+
border-radius: 12px;
|
| 249 |
+
padding: 20px;
|
| 250 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03);
|
| 251 |
+
transition: transform 0.2s ease-in-out;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
[data-testid="stMetric"]:hover {
|
| 255 |
+
transform: translateY(-2px);
|
| 256 |
+
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
|
| 257 |
}
|
| 258 |
|
| 259 |
[data-testid="stMetricValue"] > div {
|
| 260 |
+
color: #0f172a;
|
| 261 |
+
font-weight: 700 !important;
|
| 262 |
+
font-size: 1.8rem !important;
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
[data-testid="stMetricLabel"] > div {
|
| 266 |
+
color: #64748b;
|
| 267 |
+
font-weight: 500;
|
| 268 |
+
text-transform: uppercase;
|
| 269 |
+
letter-spacing: 0.05em;
|
| 270 |
+
font-size: 0.85rem;
|
| 271 |
}
|
| 272 |
</style>
|
| 273 |
""", unsafe_allow_html=True)
|
|
|
|
| 284 |
st.markdown("##### Media bias and framing effects across global news sources.")
|
| 285 |
st.divider()
|
| 286 |
|
| 287 |
+
with st.container(border=True):
|
| 288 |
+
st.markdown("#### Configure Data Sources")
|
| 289 |
+
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
|
| 290 |
|
| 291 |
+
col1, col2 = st.columns(2)
|
| 292 |
|
| 293 |
+
with col1:
|
| 294 |
+
if input_method == "Paste Text":
|
| 295 |
+
user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=180)
|
| 296 |
+
else:
|
| 297 |
+
url_a = st.text_input("Source A URL", value=URL_A)
|
| 298 |
+
user_article_a = fetch_article_text(url_a) if url_a else ""
|
| 299 |
|
| 300 |
+
with col2:
|
| 301 |
+
if input_method == "Paste Text":
|
| 302 |
+
user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=180)
|
| 303 |
+
else:
|
| 304 |
+
url_b = st.text_input("Source B URL", value=URL_B)
|
| 305 |
+
user_article_b = fetch_article_text(url_b) if url_b else ""
|
| 306 |
|
| 307 |
+
st.write("")
|
| 308 |
+
execute_analysis = st.button("Analyze and Compare Sources", use_container_width=True, type="primary")
|
| 309 |
|
| 310 |
+
if execute_analysis:
|
|
|
|
| 311 |
|
| 312 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
| 313 |
text_b_clean = user_article_b.strip() if user_article_b else ""
|