NKessler commited on
Commit
d459853
·
verified ·
1 Parent(s): 84c0e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -13,10 +13,10 @@ from huggingface_hub import InferenceClient
13
  import nltk
14
  import os
15
 
16
- # --- INITIALIZATION ---
17
  @st.cache_resource
18
  def _initialize_app():
19
- """Downloads lightweight NLTK data needed for highlighting."""
20
  try:
21
  nltk.data.find('tokenizers/punkt')
22
  except LookupError:
@@ -30,7 +30,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
30
  client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
31
 
32
  # constants
33
- MAX_TEXT_LENGTH = 2000 # ~400 words (safe token limit for fast API inference)
34
 
35
  ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
36
  ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
@@ -38,10 +38,10 @@ ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves throu
38
  URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
39
  URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
40
 
41
- # --- CORE LOGIC ---
42
 
43
  def _extract_json_from_llm(response_text: str) -> dict:
44
- """Robustly extracts JSON from an LLM response, ignoring markdown formatting."""
45
  try:
46
  match = re.search(r'\{.*\}', response_text, re.DOTALL)
47
  if match:
@@ -54,7 +54,7 @@ def _extract_json_from_llm(response_text: str) -> dict:
54
  }
55
 
56
  def analyze_article(text: str) -> dict:
57
- """Analyzes framing using an LLM API and calculates local readability."""
58
  safe_text = text[:MAX_TEXT_LENGTH]
59
 
60
  prompt = f"""
@@ -139,9 +139,9 @@ def fetch_article_text(url: str) -> str:
139
 
140
  return "Error: Could not extract text. The site may be protected by paywalls."
141
 
142
- # --- UI COMPONENTS ---
143
  def _highlight_framing_words(text: str) -> str:
144
- """Highlights specific subjective or emotional words in the text snippet."""
145
  raw_sentences = re.split(r'(?<=[.!?]) +', text)
146
  snippet = " ".join(raw_sentences[:3])
147
  if not snippet: return ""
@@ -191,7 +191,7 @@ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figur
191
  )
192
  return fig
193
 
194
- # --- STREAMLIT UI ---
195
  st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
196
  st.markdown("""
197
  <style>
@@ -216,12 +216,12 @@ with col1:
216
  if input_method == "Paste Text": user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
217
  else:
218
  url_a = st.text_input("Source A URL", value=URL_A)
219
- with st.spinner("Scraping Source A..."): user_article_a = fetch_article_text(url_a) if url_a else ""
220
  with col2:
221
  if input_method == "Paste Text": user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
222
  else:
223
  url_b = st.text_input("Source B URL", value=URL_B)
224
- with st.spinner("Scraping Source B..."): user_article_b = fetch_article_text(url_b) if url_b else ""
225
 
226
  if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
227
  text_a_clean = user_article_a.strip() if user_article_a else ""
 
13
  import nltk
14
  import os
15
 
16
+ # INITIALIZATION
17
  @st.cache_resource
18
  def _initialize_app():
19
+ """Downloads NLTK data needed for highlighting."""
20
  try:
21
  nltk.data.find('tokenizers/punkt')
22
  except LookupError:
 
30
  client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
31
 
32
  # constants
33
+ MAX_TEXT_LENGTH = 2000 # 400 words
34
 
35
  ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
36
  ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
 
38
  URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
39
  URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
40
 
41
+ # CORE LOGIC
42
 
43
  def _extract_json_from_llm(response_text: str) -> dict:
44
+ """Extracts JSON from an LLM response, ignoring markdown formatting."""
45
  try:
46
  match = re.search(r'\{.*\}', response_text, re.DOTALL)
47
  if match:
 
54
  }
55
 
56
  def analyze_article(text: str) -> dict:
57
+ """Analyzes framing using an LLM API and calculates readability."""
58
  safe_text = text[:MAX_TEXT_LENGTH]
59
 
60
  prompt = f"""
 
139
 
140
  return "Error: Could not extract text. The site may be protected by paywalls."
141
 
142
+ # UI
143
  def _highlight_framing_words(text: str) -> str:
144
+ """Highlights subjective or emotional words in the text snippet."""
145
  raw_sentences = re.split(r'(?<=[.!?]) +', text)
146
  snippet = " ".join(raw_sentences[:3])
147
  if not snippet: return ""
 
191
  )
192
  return fig
193
 
194
+ # STREAMLIT UI
195
  st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
196
  st.markdown("""
197
  <style>
 
216
  if input_method == "Paste Text": user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
217
  else:
218
  url_a = st.text_input("Source A URL", value=URL_A)
219
+ with st.spinner("Scraping Source A."): user_article_a = fetch_article_text(url_a) if url_a else ""
220
  with col2:
221
  if input_method == "Paste Text": user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
222
  else:
223
  url_b = st.text_input("Source B URL", value=URL_B)
224
+ with st.spinner("Scraping Source B."): user_article_b = fetch_article_text(url_b) if url_b else ""
225
 
226
  if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
227
  text_a_clean = user_article_a.strip() if user_article_a else ""