Spaces:

CommunityOne
/

open-navigator

Running on CPU Upgrade

App Files Files Community

open-navigator / extraction /decision_analyzer.py

jcbowyer

Clean HuggingFace deployment without binary files

61d29fc 28 days ago

raw

history blame contribute delete

18 kB

	"""
	Decision Analysis Agent for extracting structured decision-making context.

	Captures:
	- How decisions were framed
	- Options evaluated
	- Tradeoffs discussed
	- Rationales provided
	- Stakeholder positions
	- Evidence cited
	"""
	from typing import List, Dict, Any, Optional
	from dataclasses import dataclass
	from datetime import datetime
	from loguru import logger

	from openai import OpenAI
	from config.settings import settings


	@dataclass
	class PolicyDecision:
	"""
	Structured representation of a policy decision with full context.
	"""
	# Basic info
	decision_id: str
	decision_summary: str # Brief description of what was decided
	outcome: str # "approved", "rejected", "tabled", "amended"

	# Decision framing
	primary_frame: str # e.g., "public health", "fiscal responsibility", "equity"
	competing_frames: List[str] # Alternative ways the issue was framed
	framing_language: List[str] # Key phrases that shaped the discussion

	# Options evaluated
	options_considered: List[Dict[str, str]] # Each option with description
	chosen_option: str
	rejected_options: List[Dict[str, str]] # With reasons why rejected

	# Tradeoffs & deliberation
	tradeoffs_discussed: List[Dict[str, str]] # e.g., {"tradeoff": "cost vs benefit", "discussion": "..."}
	concerns_raised: List[Dict[str, str]] # {"stakeholder": "...", "concern": "..."}
	counterarguments: List[str] # Rebuttals to concerns

	# Rationale & justification
	primary_rationale: str # Main reason for the decision
	supporting_rationales: List[str] # Additional justifications
	evidence_cited: List[Dict[str, str]] # {"type": "study/expert/data", "description": "..."}

	# Stakeholder analysis
	supporters: List[Dict[str, str]] # {"name": "...", "role": "...", "argument": "..."}
	opponents: List[Dict[str, str]]
	undecided_or_conflicted: List[Dict[str, str]]

	# Vote details
	vote_result: Optional[str] # "5-2", "unanimous", etc.
	voting_breakdown: List[Dict[str, str]] # {"member": "...", "vote": "yes/no", "stated_reason": "..."}

	# Impact & implementation
	expected_impacts: List[Dict[str, str]] # {"stakeholder_group": "...", "impact": "..."}
	implementation_timeline: Optional[str]
	cost_estimate: Optional[str]

	# Metadata
	meeting_date: datetime
	municipality: str
	state: str
	document_id: str
	confidence_score: float # 0-1: How confident are we in this analysis?


	class DecisionAnalysisAgent:
	"""
	Agent for deep analysis of policy decision-making processes.

	Uses LLM to extract structured decision context that helps understand:
	- WHY decisions were made (rationales)
	- HOW options were evaluated (deliberation process)
	- WHAT influenced the outcome (frames, evidence, stakeholders)

	Example:
	>>> agent = DecisionAnalysisAgent()
	>>> decisions = agent.analyze_document(meeting_doc)
	>>> for decision in decisions:
	>>> print(f"Decision: {decision.decision_summary}")
	>>> print(f"Framed as: {decision.primary_frame}")
	>>> print(f"Rationale: {decision.primary_rationale}")
	"""

	def __init__(self):
	"""Initialize the decision analysis agent."""
	if not settings.openai_api_key:
	raise ValueError("OpenAI API key required. Set OPENAI_API_KEY environment variable.")

	self.client = OpenAI(api_key=settings.openai_api_key)
	self.model = "gpt-4o" # Need the smarter model for complex reasoning

	def analyze_document(
	self,
	document: Dict[str, Any],
	focus_topics: Optional[List[str]] = None
	) -> List[PolicyDecision]:
	"""
	Analyze a meeting document to extract structured decision-making context.

	Args:
	document: Meeting document with content
	focus_topics: Optional list of topics to focus on (e.g., ["health", "water"])

	Returns:
	List of PolicyDecision objects with full decision context
	"""
	content = document.get("content", "")
	if len(content) < 500:
	logger.warning(f"Document {document.get('document_id')} too short for decision analysis")
	return []

	logger.info(f"Analyzing decisions in: {document.get('title', 'Unknown')}")

	# Build analysis prompt
	prompt = self._build_analysis_prompt(document, focus_topics)

	try:
	response = self.client.chat.completions.create(
	model=self.model,
	messages=[
	{
	"role": "system",
	"content": self._get_system_prompt()
	},
	{
	"role": "user",
	"content": prompt
	}
	],
	temperature=0.2, # Low temperature for factual analysis
	response_format={"type": "json_object"} # Request JSON output
	)

	response_text = response.choices[0].message.content
	import json
	parsed = json.loads(response_text)

	# Convert to PolicyDecision objects
	decisions = []
	for decision_data in parsed.get("decisions", []):
	decision = self._create_policy_decision(
	decision_data,
	document
	)
	decisions.append(decision)

	logger.success(f"Extracted {len(decisions)} policy decisions with full context")
	return decisions

	except Exception as e:
	logger.error(f"Error analyzing decisions: {e}")
	return []

	def _get_system_prompt(self) -> str:
	"""Get system prompt for decision analysis."""
	return """You are an expert policy analyst who extracts structured information about
	government decision-making processes. Your goal is to help citizens understand:

	1. How decisions are framed - What lens or perspective shapes the discussion?
	(e.g., public health frame, fiscal responsibility frame, equity frame)

	2. What options were evaluated - What alternatives were considered?
	Not just the final choice, but all options discussed.

	3. What tradeoffs were discussed - What competing values or priorities were weighed?
	(e.g., short-term costs vs long-term benefits, individual choice vs collective good)

	4. What rationales justified the decision - Why did decision-makers choose this option?
	Extract stated reasons, not just the outcome.

	5. What evidence influenced the decision - What facts, studies, or expert testimony
	were cited?

	6. Who supported/opposed - What stakeholders took positions and what were their arguments?

	You must be:
	- Precise: Only extract what is explicitly stated in the document
	- Neutral: Don't add interpretation or bias
	- Comprehensive: Capture all aspects of the deliberation, not just the final vote
	- Structured: Return well-organized JSON that can be easily analyzed

	If the document doesn't contain decisions or deliberation, return an empty decisions array."""

	def _build_analysis_prompt(
	self,
	document: Dict[str, Any],
	focus_topics: Optional[List[str]] = None
	) -> str:
	"""Build the analysis prompt."""
	content = document.get("content", "")[:30000] # Limit to ~7k tokens

	focus_instruction = ""
	if focus_topics:
	focus_instruction = f"\nFocus especially on decisions related to: {', '.join(focus_topics)}\n"

	prompt = f"""
	Analyze this local government meeting document and extract ALL policy decisions with their full context.

	Meeting Information:
	- Municipality: {document.get('municipality', 'Unknown')}
	- State: {document.get('state', '')}
	- Date: {document.get('meeting_date', 'Unknown')}
	- Title: {document.get('title', 'Unknown')}
	{focus_instruction}

	Document Content:
	{content}

	Extract for each decision:

	Return a JSON object with this structure:

	{{
	"decisions": [
	{{
	"decision_summary": "Brief description of what was decided",
	"outcome": "approved\|rejected\|tabled\|amended",

	"framing": {{
	"primary_frame": "Main way the issue was framed (e.g., 'public health', 'fiscal responsibility')",
	"competing_frames": ["Alternative frames used"],
	"framing_language": ["Key phrases that shaped the discussion"]
	}},

	"options": {{
	"considered": [
	{{"option": "Description", "pros": ["..."], "cons": ["..."]}},
	...
	],
	"chosen": "Which option was selected",
	"rejected": [
	{{"option": "Description", "reason_rejected": "Why it was not chosen"}}
	]
	}},

	"tradeoffs": [
	{{
	"tradeoff": "Cost vs. benefit",
	"discussion": "How this tradeoff was discussed"
	}}
	],

	"concerns": [
	{{
	"stakeholder": "Who raised the concern",
	"concern": "What the concern was",
	"response": "How it was addressed (if mentioned)"
	}}
	],

	"rationale": {{
	"primary": "Main reason for the decision",
	"supporting": ["Additional justifications"],
	"evidence": [
	{{
	"type": "study\|expert\|data\|precedent",
	"description": "What evidence was cited"
	}}
	]
	}},

	"stakeholders": {{
	"supporters": [
	{{
	"name": "Person/org name",
	"role": "Their position/affiliation",
	"argument": "Their main argument"
	}}
	],
	"opponents": [...],
	"undecided": [...]
	}},

	"vote": {{
	"result": "5-2 or unanimous or voice vote",
	"breakdown": [
	{{
	"member": "Council member name",
	"vote": "yes\|no\|abstain",
	"stated_reason": "Any reason they gave (if mentioned)"
	}}
	]
	}},

	"implementation": {{
	"expected_impacts": [
	{{
	"stakeholder_group": "Who will be affected",
	"impact": "How they'll be affected"
	}}
	],
	"timeline": "When this will be implemented",
	"cost_estimate": "Estimated cost (if mentioned)"
	}},

	"confidence": 0.95
	}}
	]
	}}

	Important:
	- Only include decisions that are actually in the document
	- Don't infer or assume - extract only what's explicitly stated
	- If a field is not mentioned in the document, use null or empty array
	- Multiple decisions should be in separate objects in the decisions array
	- For confidence: 1.0 = explicit and clear, 0.5 = mentioned but unclear
	"""
	return prompt.strip()

	def _create_policy_decision(
	self,
	decision_data: Dict[str, Any],
	document: Dict[str, Any]
	) -> PolicyDecision:
	"""Convert parsed JSON to PolicyDecision object."""
	from hashlib import md5

	# Generate decision ID
	decision_id = md5(
	f"{document.get('document_id', '')}{decision_data.get('decision_summary', '')}".encode()
	).hexdigest()[:16]

	framing = decision_data.get("framing", {})
	options = decision_data.get("options", {})
	rationale = decision_data.get("rationale", {})
	stakeholders = decision_data.get("stakeholders", {})
	vote = decision_data.get("vote", {})
	impl = decision_data.get("implementation", {})

	return PolicyDecision(
	decision_id=decision_id,
	decision_summary=decision_data.get("decision_summary", ""),
	outcome=decision_data.get("outcome", "unknown"),

	# Framing
	primary_frame=framing.get("primary_frame", ""),
	competing_frames=framing.get("competing_frames", []),
	framing_language=framing.get("framing_language", []),

	# Options
	options_considered=options.get("considered", []),
	chosen_option=options.get("chosen", ""),
	rejected_options=options.get("rejected", []),

	# Tradeoffs
	tradeoffs_discussed=decision_data.get("tradeoffs", []),
	concerns_raised=decision_data.get("concerns", []),
	counterarguments=[], # Would need separate extraction

	# Rationale
	primary_rationale=rationale.get("primary", ""),
	supporting_rationales=rationale.get("supporting", []),
	evidence_cited=rationale.get("evidence", []),

	# Stakeholders
	supporters=stakeholders.get("supporters", []),
	opponents=stakeholders.get("opponents", []),
	undecided_or_conflicted=stakeholders.get("undecided", []),

	# Vote
	vote_result=vote.get("result"),
	voting_breakdown=vote.get("breakdown", []),

	# Implementation
	expected_impacts=impl.get("expected_impacts", []),
	implementation_timeline=impl.get("timeline"),
	cost_estimate=impl.get("cost_estimate"),

	# Metadata
	meeting_date=document.get("meeting_date", datetime.now()),
	municipality=document.get("municipality", ""),
	state=document.get("state", ""),
	document_id=document.get("document_id", ""),
	confidence_score=decision_data.get("confidence", 0.7)
	)

	def export_decision_analysis(
	self,
	decisions: List[PolicyDecision],
	output_format: str = "json"
	) -> str:
	"""
	Export decision analysis in various formats.

	Args:
	decisions: List of PolicyDecision objects
	output_format: "json", "markdown", or "csv"

	Returns:
	Formatted output string
	"""
	if output_format == "json":
	import json
	return json.dumps(
	[self._decision_to_dict(d) for d in decisions],
	indent=2,
	default=str
	)

	elif output_format == "markdown":
	output = "# Policy Decision Analysis\n\n"
	for i, decision in enumerate(decisions, 1):
	output += f"## Decision {i}: {decision.decision_summary}\n\n"
	output += f"Outcome: {decision.outcome}\n\n"
	output += f"Primary Frame: {decision.primary_frame}\n\n"

	if decision.options_considered:
	output += "Options Considered:\n"
	for opt in decision.options_considered:
	output += f"- {opt.get('option', 'Unknown')}\n"
	output += "\n"

	if decision.tradeoffs_discussed:
	output += "Tradeoffs Discussed:\n"
	for tradeoff in decision.tradeoffs_discussed:
	output += f"- {tradeoff.get('tradeoff', '')}: {tradeoff.get('discussion', '')}\n"
	output += "\n"

	output += f"Primary Rationale: {decision.primary_rationale}\n\n"

	if decision.evidence_cited:
	output += "Evidence Cited:\n"
	for evidence in decision.evidence_cited:
	output += f"- {evidence.get('type', '')}: {evidence.get('description', '')}\n"
	output += "\n"

	output += "---\n\n"

	return output

	else:
	raise ValueError(f"Unsupported format: {output_format}")

	def _decision_to_dict(self, decision: PolicyDecision) -> Dict[str, Any]:
	"""Convert PolicyDecision to dictionary."""
	return {
	"decision_id": decision.decision_id,
	"decision_summary": decision.decision_summary,
	"outcome": decision.outcome,
	"framing": {
	"primary_frame": decision.primary_frame,
	"competing_frames": decision.competing_frames,
	"framing_language": decision.framing_language
	},
	"options": {
	"considered": decision.options_considered,
	"chosen": decision.chosen_option,
	"rejected": decision.rejected_options
	},
	"tradeoffs": decision.tradeoffs_discussed,
	"concerns": decision.concerns_raised,
	"rationale": {
	"primary": decision.primary_rationale,
	"supporting": decision.supporting_rationales,
	"evidence": decision.evidence_cited
	},
	"stakeholders": {
	"supporters": decision.supporters,
	"opponents": decision.opponents,
	"undecided": decision.undecided_or_conflicted
	},
	"vote": {
	"result": decision.vote_result,
	"breakdown": decision.voting_breakdown
	},
	"implementation": {
	"expected_impacts": decision.expected_impacts,
	"timeline": decision.implementation_timeline,
	"cost_estimate": decision.cost_estimate
	},
	"metadata": {
	"meeting_date": decision.meeting_date.isoformat() if decision.meeting_date else None,
	"municipality": decision.municipality,
	"state": decision.state,
	"document_id": decision.document_id,
	"confidence_score": decision.confidence_score
	}
	}