Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ from bs4 import BeautifulSoup
|
|
| 15 |
import nltk
|
| 16 |
|
| 17 |
# constants
|
| 18 |
-
|
| 19 |
|
| 20 |
ARTICLE_A = """In a watershed moment for global tech governance, international regulatory bodies have introduced the comprehensive Artificial Intelligence Safeguard Act. For too long, Silicon Valley titans have operated in a wild west environment, prioritizing unchecked corporate greed and rapid deployment over public safety. This landmark legislation aims to establish rigorous ethical boundaries and mandatory safety audits before any advanced generative models can be released to the public. Proponents argue that without these essential guardrails, society faces catastrophic risks ranging from massive, unmitigated job displacement to the proliferation of deepfake-fueled misinformation that threatens the very fabric of our democratic institutions. "We cannot allow a handful of unelected tech billionaires to play roulette with humanity's future," stated the coalition's lead ethicist. By prioritizing human welfare over blind technological acceleration, the Act serves as a vital moral firewall, ensuring that the development of artificial general intelligence benefits society as a whole rather than just enriching the elite few."""
|
| 21 |
ARTICLE_B = """Tech industry leaders and economists are sounding the alarm over the newly proposed Artificial Intelligence Safeguard Act, warning that the draconian legislation will severely cripple the nation’s economic engine. Critics argue that the bill is a masterclass in bureaucratic overreach, drowning agile tech startups in layers of punitive red tape and effectively stifling the very innovation that drives modern prosperity. By mandating arbitrary algorithmic audits and imposing heavy-handed restrictions on model training, the government is poised to surrender our global competitive edge to foreign adversaries who are not bound by such paralyzing regulations. "This isn't about safety; it's an innovation tax that penalizes success," argued a prominent venture capitalist. Analysts project that this short-sighted policy will force thousands of AI researchers to relocate overseas, draining billions of dollars in investment capital from the domestic market. Ultimately, framing technological progress as an inherent danger will only succeed in legislating the industry into obsolescence, destroying millions of future private-sector jobs in the process."""
|
|
@@ -26,6 +26,13 @@ URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-t
|
|
| 26 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 27 |
client = Groq(api_key=GROQ_API_KEY)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
@st.cache_resource
|
| 30 |
def _initialize_app():
|
| 31 |
"""Downloads lightweight NLTK data needed for highlighting."""
|
|
@@ -37,19 +44,9 @@ def _initialize_app():
|
|
| 37 |
|
| 38 |
_initialize_app()
|
| 39 |
|
| 40 |
-
def _extract_json_from_llm(response_text: str) -> dict:
|
| 41 |
-
"""Extracts JSON from an LLM response, ignoring markdown formatting."""
|
| 42 |
-
try:
|
| 43 |
-
match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
| 44 |
-
if match:
|
| 45 |
-
return json.loads(match.group(0))
|
| 46 |
-
return json.loads(response_text)
|
| 47 |
-
except Exception as e:
|
| 48 |
-
raise ValueError(f"Failed to parse LLM response into JSON. Raw response: {response_text[:150]}...")
|
| 49 |
-
|
| 50 |
def analyze_article(text: str) -> dict:
|
| 51 |
"""Analyzes framing using an LLM API and calculates readability."""
|
| 52 |
-
safe_text = text
|
| 53 |
|
| 54 |
prompt = f"""
|
| 55 |
You are an expert media analyst. Analyze the following news snippet for framing, bias, and emotion.
|
|
@@ -74,8 +71,7 @@ def analyze_article(text: str) -> dict:
|
|
| 74 |
temperature=0.1,
|
| 75 |
response_format={"type": "json_object"}
|
| 76 |
)
|
| 77 |
-
|
| 78 |
-
llm_data = _extract_json_from_llm(response_text)
|
| 79 |
|
| 80 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 81 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
|
@@ -155,14 +151,12 @@ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figur
|
|
| 155 |
|
| 156 |
|
| 157 |
def _highlight_framing_words(text: str, target_words: list) -> str:
|
| 158 |
-
"""Highlights LLM-identified framing words in the
|
| 159 |
-
|
|
|
|
| 160 |
return ""
|
| 161 |
-
|
| 162 |
-
words = text.split()
|
| 163 |
-
display_text = " ".join(words[:400]) + ("..." if len(words) > 400 else "")
|
| 164 |
|
| 165 |
-
highlighted_text = display_text
|
| 166 |
|
| 167 |
for word in target_words:
|
| 168 |
if len(word) > 2:
|
|
@@ -206,6 +200,9 @@ def fetch_article_text(url: str) -> str:
|
|
| 206 |
|
| 207 |
def check_contradiction(text_a: str, text_b: str) -> dict:
|
| 208 |
"""Uses the LLM to evaluate the stance between arguments."""
|
|
|
|
|
|
|
|
|
|
| 209 |
prompt = f"""
|
| 210 |
You are a fact-checking analyst. Compare these two news excerpts.
|
| 211 |
Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting.
|
|
@@ -214,8 +211,8 @@ def check_contradiction(text_a: str, text_b: str) -> dict:
|
|
| 214 |
"relationship": Choose ONE from: ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]. (Contradiction = disputing facts, Entailment = agreeing on premise).
|
| 215 |
"confidence": A float between 0.0 and 1.0 representing how confident you are.
|
| 216 |
|
| 217 |
-
Text 1: "{
|
| 218 |
-
Text 2: "{
|
| 219 |
"""
|
| 220 |
messages = [{"role": "user", "content": prompt}]
|
| 221 |
response = client.chat.completions.create(
|
|
@@ -225,8 +222,8 @@ def check_contradiction(text_a: str, text_b: str) -> dict:
|
|
| 225 |
temperature=0.1,
|
| 226 |
response_format={"type": "json_object"}
|
| 227 |
)
|
| 228 |
-
|
| 229 |
-
result =
|
| 230 |
return {"relationship": result.get("relationship", "NEUTRAL"), "confidence": result.get("confidence", 0.0)}
|
| 231 |
|
| 232 |
|
|
|
|
| 15 |
import nltk
|
| 16 |
|
| 17 |
# constants
|
| 18 |
+
MAX_WORDS = 400
|
| 19 |
|
| 20 |
ARTICLE_A = """In a watershed moment for global tech governance, international regulatory bodies have introduced the comprehensive Artificial Intelligence Safeguard Act. For too long, Silicon Valley titans have operated in a wild west environment, prioritizing unchecked corporate greed and rapid deployment over public safety. This landmark legislation aims to establish rigorous ethical boundaries and mandatory safety audits before any advanced generative models can be released to the public. Proponents argue that without these essential guardrails, society faces catastrophic risks ranging from massive, unmitigated job displacement to the proliferation of deepfake-fueled misinformation that threatens the very fabric of our democratic institutions. "We cannot allow a handful of unelected tech billionaires to play roulette with humanity's future," stated the coalition's lead ethicist. By prioritizing human welfare over blind technological acceleration, the Act serves as a vital moral firewall, ensuring that the development of artificial general intelligence benefits society as a whole rather than just enriching the elite few."""
|
| 21 |
ARTICLE_B = """Tech industry leaders and economists are sounding the alarm over the newly proposed Artificial Intelligence Safeguard Act, warning that the draconian legislation will severely cripple the nation’s economic engine. Critics argue that the bill is a masterclass in bureaucratic overreach, drowning agile tech startups in layers of punitive red tape and effectively stifling the very innovation that drives modern prosperity. By mandating arbitrary algorithmic audits and imposing heavy-handed restrictions on model training, the government is poised to surrender our global competitive edge to foreign adversaries who are not bound by such paralyzing regulations. "This isn't about safety; it's an innovation tax that penalizes success," argued a prominent venture capitalist. Analysts project that this short-sighted policy will force thousands of AI researchers to relocate overseas, draining billions of dollars in investment capital from the domestic market. Ultimately, framing technological progress as an inherent danger will only succeed in legislating the industry into obsolescence, destroying millions of future private-sector jobs in the process."""
|
|
|
|
| 26 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 27 |
client = Groq(api_key=GROQ_API_KEY)
|
| 28 |
|
| 29 |
+
def _truncate_to_words(text: str, limit: int) -> str:
|
| 30 |
+
"""Truncates text by word count."""
|
| 31 |
+
if not text:
|
| 32 |
+
return ""
|
| 33 |
+
words = text.split()
|
| 34 |
+
return " ".join(words[:limit])
|
| 35 |
+
|
| 36 |
@st.cache_resource
|
| 37 |
def _initialize_app():
|
| 38 |
"""Downloads lightweight NLTK data needed for highlighting."""
|
|
|
|
| 44 |
|
| 45 |
_initialize_app()
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
def analyze_article(text: str) -> dict:
|
| 48 |
"""Analyzes framing using an LLM API and calculates readability."""
|
| 49 |
+
safe_text = _truncate_to_words(text, MAX_WORDS)
|
| 50 |
|
| 51 |
prompt = f"""
|
| 52 |
You are an expert media analyst. Analyze the following news snippet for framing, bias, and emotion.
|
|
|
|
| 71 |
temperature=0.1,
|
| 72 |
response_format={"type": "json_object"}
|
| 73 |
)
|
| 74 |
+
llm_data = json.loads(response.choices[0].message.content)
|
|
|
|
| 75 |
|
| 76 |
subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
|
| 77 |
raw_reading_ease = textstat.flesch_reading_ease(safe_text)
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
def _highlight_framing_words(text: str, target_words: list) -> str:
|
| 154 |
+
"""Highlights LLM-identified framing words in the synced text snippet."""
|
| 155 |
+
display_text = _truncate_to_words(text, MAX_WORDS)
|
| 156 |
+
if not display_text:
|
| 157 |
return ""
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
highlighted_text = display_text + ("..." if len(text.split()) > MAX_WORDS else "")
|
| 160 |
|
| 161 |
for word in target_words:
|
| 162 |
if len(word) > 2:
|
|
|
|
| 200 |
|
| 201 |
def check_contradiction(text_a: str, text_b: str) -> dict:
|
| 202 |
"""Uses the LLM to evaluate the stance between arguments."""
|
| 203 |
+
safe_a = _truncate_to_words(text_a, MAX_WORDS)
|
| 204 |
+
safe_b = _truncate_to_words(text_b, MAX_WORDS)
|
| 205 |
+
|
| 206 |
prompt = f"""
|
| 207 |
You are a fact-checking analyst. Compare these two news excerpts.
|
| 208 |
Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting.
|
|
|
|
| 211 |
"relationship": Choose ONE from: ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]. (Contradiction = disputing facts, Entailment = agreeing on premise).
|
| 212 |
"confidence": A float between 0.0 and 1.0 representing how confident you are.
|
| 213 |
|
| 214 |
+
Text 1: "{safe_a}"
|
| 215 |
+
Text 2: "{safe_b}"
|
| 216 |
"""
|
| 217 |
messages = [{"role": "user", "content": prompt}]
|
| 218 |
response = client.chat.completions.create(
|
|
|
|
| 222 |
temperature=0.1,
|
| 223 |
response_format={"type": "json_object"}
|
| 224 |
)
|
| 225 |
+
|
| 226 |
+
result = json.loads(response.choices[0].message.content)
|
| 227 |
return {"relationship": result.get("relationship", "NEUTRAL"), "confidence": result.get("confidence", 0.0)}
|
| 228 |
|
| 229 |
|