Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,10 +13,10 @@ from huggingface_hub import InferenceClient
|
|
| 13 |
import nltk
|
| 14 |
import os
|
| 15 |
|
| 16 |
-
#
|
| 17 |
@st.cache_resource
|
| 18 |
def _initialize_app():
|
| 19 |
-
"""Downloads
|
| 20 |
try:
|
| 21 |
nltk.data.find('tokenizers/punkt')
|
| 22 |
except LookupError:
|
|
@@ -30,7 +30,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
| 30 |
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
|
| 31 |
|
| 32 |
# constants
|
| 33 |
-
MAX_TEXT_LENGTH = 2000 #
|
| 34 |
|
| 35 |
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 36 |
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
|
@@ -38,10 +38,10 @@ ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves throu
|
|
| 38 |
URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
|
| 39 |
URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
|
| 40 |
|
| 41 |
-
#
|
| 42 |
|
| 43 |
def _extract_json_from_llm(response_text: str) -> dict:
|
| 44 |
-
"""
|
| 45 |
try:
|
| 46 |
match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
| 47 |
if match:
|
|
@@ -54,7 +54,7 @@ def _extract_json_from_llm(response_text: str) -> dict:
|
|
| 54 |
}
|
| 55 |
|
| 56 |
def analyze_article(text: str) -> dict:
|
| 57 |
-
"""Analyzes framing using an LLM API and calculates
|
| 58 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 59 |
|
| 60 |
prompt = f"""
|
|
@@ -139,9 +139,9 @@ def fetch_article_text(url: str) -> str:
|
|
| 139 |
|
| 140 |
return "Error: Could not extract text. The site may be protected by paywalls."
|
| 141 |
|
| 142 |
-
#
|
| 143 |
def _highlight_framing_words(text: str) -> str:
|
| 144 |
-
"""Highlights
|
| 145 |
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 146 |
snippet = " ".join(raw_sentences[:3])
|
| 147 |
if not snippet: return ""
|
|
@@ -191,7 +191,7 @@ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figur
|
|
| 191 |
)
|
| 192 |
return fig
|
| 193 |
|
| 194 |
-
#
|
| 195 |
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
| 196 |
st.markdown("""
|
| 197 |
<style>
|
|
@@ -216,12 +216,12 @@ with col1:
|
|
| 216 |
if input_method == "Paste Text": user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
| 217 |
else:
|
| 218 |
url_a = st.text_input("Source A URL", value=URL_A)
|
| 219 |
-
with st.spinner("Scraping Source A.
|
| 220 |
with col2:
|
| 221 |
if input_method == "Paste Text": user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
| 222 |
else:
|
| 223 |
url_b = st.text_input("Source B URL", value=URL_B)
|
| 224 |
-
with st.spinner("Scraping Source B.
|
| 225 |
|
| 226 |
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
| 227 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|
|
|
|
| 13 |
import nltk
|
| 14 |
import os
|
| 15 |
|
| 16 |
+
# INITIALIZATION
|
| 17 |
@st.cache_resource
|
| 18 |
def _initialize_app():
|
| 19 |
+
"""Downloads NLTK data needed for highlighting."""
|
| 20 |
try:
|
| 21 |
nltk.data.find('tokenizers/punkt')
|
| 22 |
except LookupError:
|
|
|
|
| 30 |
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
|
| 31 |
|
| 32 |
# constants
|
| 33 |
+
MAX_TEXT_LENGTH = 2000 # 400 words
|
| 34 |
|
| 35 |
ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
|
| 36 |
ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
|
|
|
|
| 38 |
URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
|
| 39 |
URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
|
| 40 |
|
| 41 |
+
# CORE LOGIC
|
| 42 |
|
| 43 |
def _extract_json_from_llm(response_text: str) -> dict:
|
| 44 |
+
"""Extracts JSON from an LLM response, ignoring markdown formatting."""
|
| 45 |
try:
|
| 46 |
match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
| 47 |
if match:
|
|
|
|
| 54 |
}
|
| 55 |
|
| 56 |
def analyze_article(text: str) -> dict:
|
| 57 |
+
"""Analyzes framing using an LLM API and calculates readability."""
|
| 58 |
safe_text = text[:MAX_TEXT_LENGTH]
|
| 59 |
|
| 60 |
prompt = f"""
|
|
|
|
| 139 |
|
| 140 |
return "Error: Could not extract text. The site may be protected by paywalls."
|
| 141 |
|
| 142 |
+
# UI
|
| 143 |
def _highlight_framing_words(text: str) -> str:
|
| 144 |
+
"""Highlights subjective or emotional words in the text snippet."""
|
| 145 |
raw_sentences = re.split(r'(?<=[.!?]) +', text)
|
| 146 |
snippet = " ".join(raw_sentences[:3])
|
| 147 |
if not snippet: return ""
|
|
|
|
| 191 |
)
|
| 192 |
return fig
|
| 193 |
|
| 194 |
+
# STREAMLIT UI
|
| 195 |
st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
|
| 196 |
st.markdown("""
|
| 197 |
<style>
|
|
|
|
| 216 |
if input_method == "Paste Text": user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
| 217 |
else:
|
| 218 |
url_a = st.text_input("Source A URL", value=URL_A)
|
| 219 |
+
with st.spinner("Scraping Source A."): user_article_a = fetch_article_text(url_a) if url_a else ""
|
| 220 |
with col2:
|
| 221 |
if input_method == "Paste Text": user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
| 222 |
else:
|
| 223 |
url_b = st.text_input("Source B URL", value=URL_B)
|
| 224 |
+
with st.spinner("Scraping Source B."): user_article_b = fetch_article_text(url_b) if url_b else ""
|
| 225 |
|
| 226 |
if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
|
| 227 |
text_a_clean = user_article_a.strip() if user_article_a else ""
|