Spaces:

CommunityOne
/

open-navigator

Running on CPU Upgrade

App Files Files Community

open-navigator / extraction /accountability_dashboards.py

jcbowyer

Clean HuggingFace deployment without binary files

61d29fc 28 days ago

raw

history blame contribute delete

22.8 kB

	"""
	Evidence-Based Accountability Dashboards for Policy Advocacy.

	These dashboards are designed to make elected officials uncomfortable (productively)
	by exposing gaps between rhetoric and reality, deferral tactics, and power imbalances.

	Each dashboard explicitly states:
	1. The Decision Topic
	2. The Conclusion (the uncomfortable truth)
	3. The Quantified Factors that prove it

	These are for advocacy, not research. Use them to shift debate from "need" to "trade-offs."
	"""
	from typing import List, Dict, Any, Optional, Tuple
	from dataclasses import dataclass, field
	from datetime import datetime, timedelta
	from collections import defaultdict, Counter
	import re
	from loguru import logger


	# ================================================================
	# DASHBOARD 1: THE RHETORIC GAP MONITOR
	# ================================================================

	@dataclass
	class RhetoricGapMetrics:
	"""
	Measures the gap between verbal commitment and fiscal priority.

	Purpose: Stop debates about "need" (everyone agrees).
	Start debates about "why aren't you funding it?"
	"""
	topic: str
	conclusion: str

	# Factor 1: Rhetoric
	sentiment_density: float # % positive mentions in meetings
	total_mentions: int
	positive_keywords: List[str]
	sample_quotes: List[str]

	# Factor 2: Budget Reality
	budget_category: str
	budget_change_dollars: float
	budget_change_percent: float
	prior_year_amount: float
	current_year_amount: float

	# The Logic
	gap_type: str # "Marketing", "Buffer", "Sincere Priority", "Neglect"
	underlying_rationale: str

	# Discomfort Factor
	discomfort_score: int # 1-10, how damning is this?


	def calculate_rhetoric_gap(
	topic: str,
	meeting_documents: List[Dict[str, Any]],
	budget_items: List[Any],
	keywords: List[str]
	) -> RhetoricGapMetrics:
	"""
	Calculate the gap between what they SAY vs. what they FUND.

	Example:
	Topic: "Student Health"
	Rhetoric: 92% positive sentiment
	Reality: -$120,000 budget cut
	Logic: "Marketing rationale - wellness is branding, not priority"
	"""
	# Calculate sentiment density
	total_mentions = 0
	positive_mentions = 0
	sample_quotes = []

	for doc in meeting_documents:
	content = doc.get('content', '').lower()

	for keyword in keywords:
	if keyword.lower() in content:
	total_mentions += 1

	# Extract context around keyword
	idx = content.find(keyword.lower())
	context = content[max(0, idx-100):idx+100]

	# Simple sentiment: check for positive words nearby
	positive_words = ['important', 'priority', 'critical', 'essential',
	'committed', 'dedicated', 'support', 'invest']

	if any(pw in context for pw in positive_words):
	positive_mentions += 1

	# Extract quote
	sentences = re.split(r'[.!?]', content)
	for sentence in sentences:
	if keyword.lower() in sentence and len(sentence) > 20:
	sample_quotes.append(sentence.strip()[:200])
	break

	sentiment_density = (positive_mentions / total_mentions * 100) if total_mentions > 0 else 0

	# Find matching budget item
	budget_change = 0
	budget_change_pct = 0
	prior_amount = 0
	current_amount = 0
	budget_category = "Unknown"

	for item in budget_items:
	# Match by keywords in category
	if any(kw.lower() in item.category.lower() for kw in keywords):
	budget_category = item.category
	budget_change = item.change_amount
	budget_change_pct = item.change_percent
	prior_amount = item.prior_year_amount
	current_amount = item.current_year_amount
	break

	# Determine gap type
	high_rhetoric = sentiment_density > 70
	budget_increased = budget_change > 0

	if high_rhetoric and budget_increased:
	gap_type = "Sincere Priority"
	rationale = "Rhetoric matches fiscal commitment - genuine priority"
	discomfort = 2
	elif high_rhetoric and not budget_increased:
	gap_type = "Marketing Rationale"
	rationale = f"Board uses '{topic}' as branding to satisfy constituents, but fiscal priority is declining. Health funds likely used as 'buffer' for other costs."
	discomfort = 9 # Very uncomfortable
	elif not high_rhetoric and budget_increased:
	gap_type = "Quiet Priority"
	rationale = "Low public discussion but increased funding - administrative decision"
	discomfort = 3
	else:
	gap_type = "Neglect"
	rationale = "Low rhetoric and declining budget - not a priority"
	discomfort = 5

	return RhetoricGapMetrics(
	topic=topic,
	conclusion=f"Verbal commitment to {topic} is {'high' if high_rhetoric else 'low'}, but fiscal priority is {'rising' if budget_increased else 'declining'}.",
	sentiment_density=sentiment_density,
	total_mentions=total_mentions,
	positive_keywords=keywords,
	sample_quotes=sample_quotes[:5],
	budget_category=budget_category,
	budget_change_dollars=budget_change,
	budget_change_percent=budget_change_pct,
	prior_year_amount=prior_amount,
	current_year_amount=current_amount,
	gap_type=gap_type,
	underlying_rationale=rationale,
	discomfort_score=discomfort
	)


	# ================================================================
	# DASHBOARD 2: THE LOGIC CHAIN (Sequential Deferral)
	# ================================================================

	@dataclass
	class DeferralPattern:
	"""
	Tracks how long a decision is delayed and the shifting justifications.

	Purpose: Expose "analysis paralysis" as a strategic avoidance tool.
	"""
	topic: str
	conclusion: str

	# Deferral timeline
	first_mentioned: datetime
	total_deferrals: int
	months_in_limbo: int

	# Shifting justifications
	justification_history: List[Dict[str, Any]] # [{month, rationale, speaker}]

	# The Logic
	pattern_type: str # "Rationale of Attrition", "Sincere Analysis", "Political Timing"
	strategic_inference: str

	# Discomfort Factor
	discomfort_score: int


	def detect_deferral_pattern(
	topic: str,
	decisions: List[Any],
	meeting_documents: List[Dict[str, Any]]
	) -> Optional[DeferralPattern]:
	"""
	Detect when a decision is being strategically delayed.

	Example:
	Topic: "Community Dental Clinic Funding"
	Deferrals: 4 times in 6 months
	Shifting Justifications:
	- Month 1: "Waiting for tax revenue projections"
	- Month 4: "Waiting for legal clarity on liability"
	Logic: "Rationale of Attrition - waiting for advocate momentum to fade"
	"""
	# Find all mentions of this topic
	relevant_decisions = [d for d in decisions if topic.lower() in d.decision_summary.lower()]

	if not relevant_decisions:
	return None

	# Sort by date
	relevant_decisions.sort(key=lambda d: d.meeting_date)

	# Count deferrals
	deferrals = [d for d in relevant_decisions if "defer" in d.outcome.lower()
	or "table" in d.outcome.lower()
	or "postpone" in d.outcome.lower()
	or "work session" in d.outcome.lower()]

	if len(deferrals) < 2:
	return None # Not a pattern

	first_date = relevant_decisions[0].meeting_date
	last_date = relevant_decisions[-1].meeting_date
	months_in_limbo = (last_date - first_date).days // 30

	# Extract justification history
	justification_history = []
	for decision in deferrals:
	# Extract rationale from decision
	rationale = decision.primary_rationale or "No explicit rationale given"

	justification_history.append({
	'month': decision.meeting_date.strftime('%B %Y'),
	'rationale': rationale,
	'speaker': decision.supporters[0].get('name', 'Board Member') if decision.supporters else 'Unknown'
	})

	# Determine pattern type
	if len(deferrals) >= 3 and months_in_limbo >= 4:
	pattern_type = "Rationale of Attrition"
	strategic_inference = f"The board isn't debating the {topic}'s merit; they are waiting for the advocate's momentum to fade before the next election cycle."
	discomfort = 10 # Extremely uncomfortable
	elif months_in_limbo > 12:
	pattern_type = "Political Timing"
	strategic_inference = "Delaying decision until after election to avoid controversy"
	discomfort = 8
	else:
	pattern_type = "Sincere Analysis"
	strategic_inference = "Genuine need for additional information"
	discomfort = 3

	return DeferralPattern(
	topic=topic,
	conclusion=f"Administrative 'analysis' is being used as a strategic tool to avoid a final 'Yes' or 'No'." if pattern_type == "Rationale of Attrition" else "Under review",
	first_mentioned=first_date,
	total_deferrals=len(deferrals),
	months_in_limbo=months_in_limbo,
	justification_history=justification_history,
	pattern_type=pattern_type,
	strategic_inference=strategic_inference,
	discomfort_score=discomfort
	)


	# ================================================================
	# DASHBOARD 3: THE DISPLACEMENT MATRIX
	# ================================================================

	@dataclass
	class DisplacementRow:
	"""Single row in the displacement matrix."""
	winner_funded: str
	winner_amount: float
	loser_stagnant: str
	loser_amount: float
	tradeoff_factor: str # "Visibility", "Asset Maintenance", "Public Safety"


	@dataclass
	class DisplacementMatrix:
	"""
	Shows what got funded vs. what didn't - exposing priorities.

	Purpose: Force the "Why is X worth more than Y?" question.
	"""
	topic: str
	conclusion: str

	# The matrix
	displacements: List[DisplacementRow]

	# The Logic
	priority_pattern: str # "Legacy Rationale", "Visibility Bias", "Equity Gap"
	strategic_inference: str

	# Discomfort Factor
	discomfort_score: int


	def generate_displacement_matrix(
	topic: str,
	budget_items: List[Any],
	decisions: List[Any],
	visible_categories: List[str] = None,
	invisible_categories: List[str] = None
	) -> DisplacementMatrix:
	"""
	Generate matrix showing visible assets funded over invisible infrastructure.

	Example:
	Winner: New Athletic Turf ($850k)
	Loser: Fluoride System Upgrade ($0)
	Factor: "Visibility - Turf is a PR win; Fluoride is hidden"
	"""
	if visible_categories is None:
	visible_categories = ['construction', 'athletic', 'facility', 'building',
	'stadium', 'turf', 'renovation', 'HVAC', 'fleet']

	if invisible_categories is None:
	invisible_categories = ['health', 'dental', 'fluoride', 'screening',
	'nurse', 'mental health', 'counseling']

	# Find winners (funded, visible)
	winners = []
	for item in budget_items:
	if item.change_amount > 0:
	if any(cat in item.category.lower() for cat in visible_categories):
	winners.append(item)

	# Find losers (stagnant/cut, invisible)
	losers = []
	for item in budget_items:
	if item.change_amount <= 0:
	if any(cat in item.category.lower() for cat in invisible_categories):
	losers.append(item)

	# Pair them up
	displacements = []

	for i, winner in enumerate(winners):
	if i < len(losers):
	loser = losers[i]
	else:
	# Create synthetic loser if we have more winners
	loser = type('obj', (object,), {
	'category': 'Public Health Programs',
	'current_year_amount': 0
	})()

	# Determine tradeoff factor
	if 'athletic' in winner.category.lower() or 'turf' in winner.category.lower():
	factor = "Visibility: Athletic projects are PR wins; health is hidden"
	elif 'HVAC' in winner.category.upper() or 'building' in winner.category.lower():
	factor = "Asset Maintenance: Upkeep of buildings prioritized over health"
	elif 'police' in winner.category.lower() or 'security' in winner.category.lower():
	factor = "Public Safety: Police are a 'primary' rationale"
	else:
	factor = "Administrative preference: Tangible assets over services"

	displacements.append(DisplacementRow(
	winner_funded=winner.category,
	winner_amount=winner.current_year_amount,
	loser_stagnant=loser.category,
	loser_amount=loser.current_year_amount,
	tradeoff_factor=factor
	))

	# Determine priority pattern
	if any('visibility' in d.tradeoff_factor.lower() for d in displacements):
	priority_pattern = "Legacy Rationale"
	strategic_inference = "Elected officials prefer funding things they can put their names on or hold ribbon-cuttings for, effectively 'trading' community health for political visibility."
	discomfort = 9
	elif any('asset' in d.tradeoff_factor.lower() for d in displacements):
	priority_pattern = "Asset Maintenance Bias"
	strategic_inference = "Buildings are prioritized over people - maintenance culture"
	discomfort = 7
	else:
	priority_pattern = "Resource Constraint"
	strategic_inference = "Limited resources force difficult choices"
	discomfort = 4

	return DisplacementMatrix(
	topic=topic,
	conclusion="The Board prioritizes 'Visible Assets' (Construction) over 'Invisible Infrastructure' (Public Health).",
	displacements=displacements,
	priority_pattern=priority_pattern,
	strategic_inference=strategic_inference,
	discomfort_score=discomfort
	)


	# ================================================================
	# DASHBOARD 4: THE INFLUENCE RADAR
	# ================================================================

	@dataclass
	class InfluenceMetrics:
	"""
	Quantifies who actually has decision power.

	Purpose: Call out the person blocking policy by name.
	"""
	topic: str
	conclusion: str

	# Influence factors
	public_alignment: Dict[str, Any] # {comments, influence_percent}
	risk_legal_alignment: Dict[str, Any] # {memos, influence_percent, contact_name}
	consultant_alignment: Dict[str, Any] # {reports, influence_percent, firm_name}
	elected_alignment: Dict[str, Any] # {votes, influence_percent}

	# The Logic
	power_structure: str # "Technocratic Rationale", "Public-Driven", "Elite Capture"
	veto_holder: str # Name of the person/role with effective veto
	strategic_inference: str

	# Discomfort Factor
	discomfort_score: int


	def calculate_influence_radar(
	topic: str,
	decisions: List[Any],
	meeting_documents: List[Dict[str, Any]]
	) -> InfluenceMetrics:
	"""
	Calculate who actually drives policy decisions.

	Example:
	Public Input: 240+ comments in favor (Influence: 4%)
	Risk/Legal: 1 memo expressing concerns (Influence: 92%)
	Logic: "Technocratic Rationale - lawyers write policy"
	"""
	# Find relevant decisions
	relevant = [d for d in decisions if topic.lower() in d.decision_summary.lower()]

	if not relevant:
	# Return empty metrics
	return InfluenceMetrics(
	topic=topic,
	conclusion="No decisions found for this topic",
	public_alignment={'comments': 0, 'influence_percent': 0},
	risk_legal_alignment={'memos': 0, 'influence_percent': 0, 'contact_name': 'Unknown'},
	consultant_alignment={'reports': 0, 'influence_percent': 0, 'firm_name': 'Unknown'},
	elected_alignment={'votes': 0, 'influence_percent': 0},
	power_structure="Unknown",
	veto_holder="Unknown",
	strategic_inference="Insufficient data",
	discomfort_score=0
	)

	decision = relevant[0] # Use most recent

	# Count public input
	public_comments = len(decision.supporters) + len(decision.opponents)
	public_support_ratio = len(decision.supporters) / public_comments if public_comments > 0 else 0

	# Detect legal/risk involvement
	legal_mentions = 0
	legal_contact = "Unknown"

	for evidence in decision.evidence_cited:
	if any(term in evidence.get('source', '').lower()
	for term in ['legal', 'attorney', 'counsel', 'risk', 'liability', 'insurance']):
	legal_mentions += 1
	legal_contact = evidence.get('source', 'Legal Department')

	# Detect consultant involvement
	consultant_mentions = 0
	consultant_firm = "Unknown"

	for evidence in decision.evidence_cited:
	if any(term in evidence.get('source', '').lower()
	for term in ['consultant', 'study', 'report', 'analysis', 'firm']):
	consultant_mentions += 1
	consultant_firm = evidence.get('source', 'External Consultant')

	# Calculate influence percentages
	# Simple heuristic: outcome alignment
	outcome_approved = decision.outcome.lower() in ['approved', 'adopted', 'passed']

	# If public wanted it and got it, high public influence
	# If legal/consultant opposed and it failed, high tech influence

	if public_support_ratio > 0.8 and outcome_approved:
	public_influence = 80
	legal_influence = 10
	consultant_influence = 10
	power_structure = "Public-Driven"
	veto_holder = "None - responsive governance"
	discomfort = 2
	elif legal_mentions > 0 and not outcome_approved:
	public_influence = 4
	legal_influence = 92
	consultant_influence = consultant_mentions * 10 if consultant_mentions > 0 else 0
	power_structure = "Technocratic Rationale"
	veto_holder = f"{legal_contact} (Legal/Risk Manager)"
	discomfort = 10 # Maximum discomfort
	elif consultant_mentions > 0 and not outcome_approved:
	public_influence = 8
	legal_influence = 7
	consultant_influence = 85
	power_structure = "Elite Capture"
	veto_holder = f"{consultant_firm}"
	discomfort = 9
	else:
	public_influence = 30
	legal_influence = 30
	consultant_influence = 40
	power_structure = "Mixed Influences"
	veto_holder = "Board discretion"
	discomfort = 4

	strategic_inference = ""
	if power_structure == "Technocratic Rationale":
	strategic_inference = f"The Board defaults to the path of least legal resistance, allowing the district's lawyers and CFO to effectively 'write' public health policy. {veto_holder} has functional veto power that outweighs {public_comments}+ citizen comments."
	elif power_structure == "Elite Capture":
	strategic_inference = f"External consultants drive policy more than constituents. {veto_holder} has outsized influence."
	else:
	strategic_inference = "Decision reflects balanced consideration of multiple stakeholders"

	return InfluenceMetrics(
	topic=topic,
	conclusion=f"Internal 'Risk Management' has a veto power that outweighs 100% of public input." if power_structure == "Technocratic Rationale" else "Influence is distributed",
	public_alignment={
	'comments': public_comments,
	'influence_percent': public_influence,
	'support_ratio': public_support_ratio * 100
	},
	risk_legal_alignment={
	'memos': legal_mentions,
	'influence_percent': legal_influence,
	'contact_name': legal_contact
	},
	consultant_alignment={
	'reports': consultant_mentions,
	'influence_percent': consultant_influence,
	'firm_name': consultant_firm
	},
	elected_alignment={
	'votes': 1, # Simplified
	'influence_percent': 100 - public_influence - legal_influence - consultant_influence
	},
	power_structure=power_structure,
	veto_holder=veto_holder,
	strategic_inference=strategic_inference,
	discomfort_score=discomfort
	)


	# ================================================================
	# DASHBOARD GENERATOR
	# ================================================================

	def generate_all_accountability_dashboards(
	jurisdiction: str,
	meeting_documents: List[Dict[str, Any]],
	decisions: List[Any],
	budget_items: List[Any],
	focus_topic: str = "Student Health"
	) -> Dict[str, Any]:
	"""
	Generate all four accountability dashboards for a jurisdiction.

	Returns comprehensive accountability report ready for advocacy.
	"""
	logger.info(f"Generating accountability dashboards for {jurisdiction}...")

	# Dashboard 1: Rhetoric Gap
	rhetoric_gap = calculate_rhetoric_gap(
	topic=focus_topic,
	meeting_documents=meeting_documents,
	budget_items=budget_items,
	keywords=['health', 'wellness', 'wellbeing', 'dental', 'vision', 'nurse']
	)

	# Dashboard 2: Deferral Pattern
	deferral_pattern = detect_deferral_pattern(
	topic="dental clinic", # Specific topic
	decisions=decisions,
	meeting_documents=meeting_documents
	)

	# Dashboard 3: Displacement Matrix
	displacement = generate_displacement_matrix(
	topic="2026 Capital Prioritization",
	budget_items=budget_items,
	decisions=decisions
	)

	# Dashboard 4: Influence Radar
	influence = calculate_influence_radar(
	topic="school-based dental screening",
	decisions=decisions,
	meeting_documents=meeting_documents
	)

	return {
	'jurisdiction': jurisdiction,
	'focus_topic': focus_topic,
	'rhetoric_gap': rhetoric_gap,
	'deferral_pattern': deferral_pattern,
	'displacement_matrix': displacement,
	'influence_radar': influence,
	'max_discomfort_score': max(
	rhetoric_gap.discomfort_score,
	deferral_pattern.discomfort_score if deferral_pattern else 0,
	displacement.discomfort_score,
	influence.discomfort_score
	)
	}