File size: 16,772 Bytes
61d29fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
"""
Debate Grader Agent for evaluating government decisions using debate framework.

Evaluates decisions across three dimensions:
- Harms: The problem/crisis identified
- Solvency: How the proposed solution addresses the problem
- Topicality: Whether the solution fits within jurisdiction's authority
"""
import asyncio
from typing import List, Dict, Any, Optional
from datetime import datetime
from loguru import logger

from agents.base import BaseAgent, AgentRole, AgentMessage, MessageType, AgentStatus


class DebateDimension:
    """Enumeration of debate evaluation dimensions."""
    HARMS = "harms"  # The problem
    SOLVENCY = "solvency"  # The fix
    TOPICALITY = "topicality"  # The scope


class DebateScore:
    """Score levels for each debate dimension."""
    EXCELLENT = "excellent"  # 4-5/5
    GOOD = "good"  # 3-4/5
    FAIR = "fair"  # 2-3/5
    WEAK = "weak"  # 1-2/5
    MISSING = "missing"  # 0-1/5


class DebateGraderAgent(BaseAgent):
    """
    Agent responsible for grading government decisions using debate framework.
    
    Translates debate concepts for laypeople:
    - Harms → "The Problem": Why is this a crisis in our community?
    - Solvency → "The Fix": How does this solution actually work?
    - Topicality → "The Scope": Does the government have authority to do this?
    """
    
    def __init__(self, agent_id: str = "debate-grader-001"):
        """Initialize the debate grader agent."""
        super().__init__(agent_id, AgentRole.SENTIMENT_ANALYZER)
        self._initialize_criteria()
    
    def _initialize_criteria(self):
        """Initialize evaluation criteria for each dimension."""
        
        # Harms evaluation keywords
        self.harms_indicators = {
            "problem_identification": [
                "crisis", "emergency", "critical", "urgent need",
                "widespread problem", "affecting", "impacting",
                "suffering", "lack of", "shortage", "gap in services"
            ],
            "data_evidence": [
                "statistics", "data shows", "research indicates",
                "study found", "percent", "%", "number of people",
                "cases", "instances", "reports"
            ],
            "affected_population": [
                "children", "families", "residents", "citizens",
                "low-income", "vulnerable", "underserved",
                "community members", "students", "seniors"
            ]
        }
        
        # Solvency evaluation keywords
        self.solvency_indicators = {
            "solution_clarity": [
                "will", "would", "proposes to", "plans to",
                "implement", "establish", "create", "provide",
                "offer", "deliver", "fund", "allocate"
            ],
            "mechanism": [
                "through", "by", "using", "via", "process",
                "program", "initiative", "partnership",
                "collaboration", "service", "system"
            ],
            "evidence_of_effectiveness": [
                "proven", "successful in", "works in",
                "demonstrated", "track record", "best practice",
                "evidence-based", "research-backed"
            ],
            "implementation_plan": [
                "timeline", "budget", "staff", "resources",
                "phase", "rollout", "launch", "start date",
                "completion", "milestones"
            ]
        }
        
        # Topicality evaluation keywords
        self.topicality_indicators = {
            "legal_authority": [
                "authority", "jurisdiction", "mandate",
                "chartered to", "empowered to", "authorized",
                "within our purview", "responsibility"
            ],
            "precedent": [
                "previously", "historically", "past practice",
                "similar actions", "other cities", "state law",
                "federal law", "code", "ordinance"
            ],
            "scope_appropriateness": [
                "city council", "county commission", "board",
                "department", "local government", "municipal",
                "within scope", "appropriate for"
            ]
        }
    
    async def process(self, message: AgentMessage) -> List[AgentMessage]:
        """
        Process debate grading commands.
        
        Args:
            message: Message containing decisions/documents to grade
            
        Returns:
            List of messages with debate grades
        """
        self.update_status(AgentStatus.PROCESSING, "Grading decisions with debate framework")
        
        try:
            documents = message.payload.get("documents", [])
            
            graded_documents = []
            
            for doc in documents:
                grade = await self._grade_document(doc)
                doc["debate_grade"] = grade
                graded_documents.append(doc)
            
            # Calculate aggregate insights
            insights = self._generate_insights(graded_documents)
            
            # Send results
            response = await self.send_message(
                recipient=AgentRole.ORCHESTRATOR,
                message_type=MessageType.RESPONSE,
                payload={
                    "documents": graded_documents,
                    "insights": insights,
                    "graded_count": len(graded_documents)
                }
            )
            
            self.update_status(AgentStatus.COMPLETED, f"Graded {len(graded_documents)} decisions")
            return [response]
            
        except Exception as e:
            logger.error(f"Debate grading failed: {e}")
            self.update_status(AgentStatus.ERROR, str(e))
            raise
    
    async def _grade_document(self, document: Dict[str, Any]) -> Dict[str, Any]:
        """
        Grade a single document across all debate dimensions.
        
        Args:
            document: Document to grade
            
        Returns:
            Dictionary with grades for each dimension
        """
        text = document.get("content", "").lower()
        title = document.get("title", "").lower()
        combined_text = f"{title} {text}"
        
        # Grade each dimension
        harms_score = self._grade_harms(combined_text)
        solvency_score = self._grade_solvency(combined_text)
        topicality_score = self._grade_topicality(combined_text)
        
        # Calculate overall score
        overall_score = self._calculate_overall_score(
            harms_score, solvency_score, topicality_score
        )
        
        return {
            "dimensions": {
                "harms": {
                    "score": harms_score["score"],
                    "grade": harms_score["grade"],
                    "explanation": harms_score["explanation"],
                    "layperson_label": "The Problem",
                    "layperson_question": "Why is this a crisis in our community?"
                },
                "solvency": {
                    "score": solvency_score["score"],
                    "grade": solvency_score["grade"],
                    "explanation": solvency_score["explanation"],
                    "layperson_label": "The Fix",
                    "layperson_question": "How does this solution actually work?"
                },
                "topicality": {
                    "score": topicality_score["score"],
                    "grade": topicality_score["grade"],
                    "explanation": topicality_score["explanation"],
                    "layperson_label": "The Scope",
                    "layperson_question": "Does the government have authority to do this?"
                }
            },
            "overall": {
                "score": overall_score,
                "grade": self._score_to_grade(overall_score),
                "summary": self._generate_summary(harms_score, solvency_score, topicality_score)
            },
            "timestamp": datetime.utcnow().isoformat()
        }
    
    def _grade_harms(self, text: str) -> Dict[str, Any]:
        """Grade the 'harms' dimension - problem identification."""
        score = 0
        max_score = 5
        details = []
        
        # Check for problem identification (0-2 points)
        problem_count = sum(1 for keyword in self.harms_indicators["problem_identification"] if keyword in text)
        if problem_count >= 3:
            score += 2
            details.append("Strong problem identification")
        elif problem_count >= 1:
            score += 1
            details.append("Problem mentioned but not detailed")
        
        # Check for data/evidence (0-2 points)
        data_count = sum(1 for keyword in self.harms_indicators["data_evidence"] if keyword in text)
        if data_count >= 2:
            score += 2
            details.append("Data-driven evidence provided")
        elif data_count >= 1:
            score += 1
            details.append("Some evidence mentioned")
        
        # Check for affected population (0-1 point)
        population_count = sum(1 for keyword in self.harms_indicators["affected_population"] if keyword in text)
        if population_count >= 1:
            score += 1
            details.append("Affected population identified")
        
        return {
            "score": score,
            "max_score": max_score,
            "grade": self._score_to_grade(score / max_score * 5),
            "explanation": "; ".join(details) if details else "No clear problem statement"
        }
    
    def _grade_solvency(self, text: str) -> Dict[str, Any]:
        """Grade the 'solvency' dimension - solution effectiveness."""
        score = 0
        max_score = 5
        details = []
        
        # Check for solution clarity (0-1 point)
        solution_count = sum(1 for keyword in self.solvency_indicators["solution_clarity"] if keyword in text)
        if solution_count >= 2:
            score += 1
            details.append("Clear solution proposed")
        
        # Check for mechanism (0-2 points)
        mechanism_count = sum(1 for keyword in self.solvency_indicators["mechanism"] if keyword in text)
        if mechanism_count >= 3:
            score += 2
            details.append("Implementation mechanism described")
        elif mechanism_count >= 1:
            score += 1
            details.append("Basic approach outlined")
        
        # Check for evidence of effectiveness (0-1 point)
        evidence_count = sum(1 for keyword in self.solvency_indicators["evidence_of_effectiveness"] if keyword in text)
        if evidence_count >= 1:
            score += 1
            details.append("Evidence-based approach")
        
        # Check for implementation plan (0-1 point)
        plan_count = sum(1 for keyword in self.solvency_indicators["implementation_plan"] if keyword in text)
        if plan_count >= 2:
            score += 1
            details.append("Implementation plan included")
        
        return {
            "score": score,
            "max_score": max_score,
            "grade": self._score_to_grade(score / max_score * 5),
            "explanation": "; ".join(details) if details else "No clear solution mechanism"
        }
    
    def _grade_topicality(self, text: str) -> Dict[str, Any]:
        """Grade the 'topicality' dimension - scope appropriateness."""
        score = 0
        max_score = 5
        details = []
        
        # Check for legal authority (0-2 points)
        authority_count = sum(1 for keyword in self.topicality_indicators["legal_authority"] if keyword in text)
        if authority_count >= 2:
            score += 2
            details.append("Legal authority cited")
        elif authority_count >= 1:
            score += 1
            details.append("Authority mentioned")
        
        # Check for precedent (0-2 points)
        precedent_count = sum(1 for keyword in self.topicality_indicators["precedent"] if keyword in text)
        if precedent_count >= 2:
            score += 2
            details.append("Precedent established")
        elif precedent_count >= 1:
            score += 1
            details.append("Some precedent referenced")
        
        # Check for scope appropriateness (0-1 point)
        scope_count = sum(1 for keyword in self.topicality_indicators["scope_appropriateness"] if keyword in text)
        if scope_count >= 1:
            score += 1
            details.append("Within appropriate scope")
        
        return {
            "score": score,
            "max_score": max_score,
            "grade": self._score_to_grade(score / max_score * 5),
            "explanation": "; ".join(details) if details else "Unclear jurisdictional authority"
        }
    
    def _score_to_grade(self, normalized_score: float) -> str:
        """Convert numerical score to grade."""
        if normalized_score >= 4.0:
            return DebateScore.EXCELLENT
        elif normalized_score >= 3.0:
            return DebateScore.GOOD
        elif normalized_score >= 2.0:
            return DebateScore.FAIR
        elif normalized_score >= 1.0:
            return DebateScore.WEAK
        else:
            return DebateScore.MISSING
    
    def _calculate_overall_score(
        self,
        harms: Dict[str, Any],
        solvency: Dict[str, Any],
        topicality: Dict[str, Any]
    ) -> float:
        """Calculate weighted overall score."""
        # Weight: Harms 40%, Solvency 40%, Topicality 20%
        harms_normalized = (harms["score"] / harms["max_score"]) * 5
        solvency_normalized = (solvency["score"] / solvency["max_score"]) * 5
        topicality_normalized = (topicality["score"] / topicality["max_score"]) * 5
        
        overall = (harms_normalized * 0.4) + (solvency_normalized * 0.4) + (topicality_normalized * 0.2)
        return round(overall, 2)
    
    def _generate_summary(
        self,
        harms: Dict[str, Any],
        solvency: Dict[str, Any],
        topicality: Dict[str, Any]
    ) -> str:
        """Generate human-readable summary."""
        parts = []
        
        if harms["grade"] in [DebateScore.EXCELLENT, DebateScore.GOOD]:
            parts.append("Strong problem identification")
        else:
            parts.append("Weak problem statement")
        
        if solvency["grade"] in [DebateScore.EXCELLENT, DebateScore.GOOD]:
            parts.append("clear solution")
        else:
            parts.append("unclear fix")
        
        if topicality["grade"] in [DebateScore.EXCELLENT, DebateScore.GOOD]:
            parts.append("within authority")
        else:
            parts.append("questionable scope")
        
        return "; ".join(parts).capitalize()
    
    def _generate_insights(self, documents: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Generate aggregate insights from all graded documents."""
        if not documents:
            return {}
        
        total = len(documents)
        dimension_scores = {
            "harms": [],
            "solvency": [],
            "topicality": []
        }
        overall_scores = []
        
        for doc in documents:
            grade = doc.get("debate_grade", {})
            dimensions = grade.get("dimensions", {})
            
            for dim in ["harms", "solvency", "topicality"]:
                if dim in dimensions:
                    dimension_scores[dim].append(dimensions[dim]["score"])
            
            if "overall" in grade:
                overall_scores.append(grade["overall"]["score"])
        
        # Calculate averages
        insights = {
            "total_documents": total,
            "average_scores": {
                "harms": round(sum(dimension_scores["harms"]) / len(dimension_scores["harms"]), 2) if dimension_scores["harms"] else 0,
                "solvency": round(sum(dimension_scores["solvency"]) / len(dimension_scores["solvency"]), 2) if dimension_scores["solvency"] else 0,
                "topicality": round(sum(dimension_scores["topicality"]) / len(dimension_scores["topicality"]), 2) if dimension_scores["topicality"] else 0,
                "overall": round(sum(overall_scores) / len(overall_scores), 2) if overall_scores else 0
            },
            "strongest_dimension": max(
                dimension_scores.items(),
                key=lambda x: sum(x[1]) / len(x[1]) if x[1] else 0
            )[0] if any(dimension_scores.values()) else None,
            "weakest_dimension": min(
                dimension_scores.items(),
                key=lambda x: sum(x[1]) / len(x[1]) if x[1] else 0
            )[0] if any(dimension_scores.values()) else None
        }
        
        return insights