"""Tests for per-task grading: perfect scores, partial credit, wrong service, destructive penalty.""" import pytest class TestTask1Grading: def test_perfect_resolution(self, task1): result = task1.grade_resolution( { "root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1", "affected_service": "payment-api", "recommendation": "Rollback to v2.3.0 or set the DB_CONNECTION_STRING env var", }, step_count=3, ) assert result["score"] >= 0.80 def test_wrong_service(self, task1): result = task1.grade_resolution( { "root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1", "affected_service": "order-service", "recommendation": "Rollback", }, step_count=3, ) # Should lose the affected_service points assert result["score"] <= 0.85 def test_empty_resolution(self, task1): result = task1.grade_resolution( {"root_cause": "", "affected_service": "", "recommendation": ""}, step_count=1, ) assert result["score"] <= 0.20 class TestTask2Grading: def test_perfect_resolution(self, task2): result = task2.grade_resolution( { "root_cause": "inventory-service OOM memory leak from batch processing causing checkout-service timeout", "affected_service": "inventory-service", "recommendation": "Increase memory limit to 1Gi and reduce batch size or stream results", }, step_count=4, ) assert result["score"] >= 0.70 def test_wrong_root_cause(self, task2): result = task2.grade_resolution( { "root_cause": "Network partition", "affected_service": "inventory-service", "recommendation": "Restart networking", }, step_count=3, ) assert result["score"] < 0.40 class TestTask3Grading: def test_perfect_resolution(self, task3): result = task3.grade_resolution( { "root_cause": "analytics-worker long-running query exhausted the connection pool, cascade to auth-service, user-profile-service, notification-service", "affected_service": "postgres-primary", "recommendation": "Kill the query and set statement_timeout, use read replica for analytics", }, step_count=5, ) assert result["score"] >= 0.70 def test_blames_notification_deploy(self, task3): result = task3.grade_resolution( { "root_cause": "notification-service deploy v3.1 caused the failure", "affected_service": "notification-service", "recommendation": "Rollback notification-service", }, step_count=3, ) # Should score poorly — wrong root cause and wrong affected service assert result["score"] <= 0.20 def test_partial_credit_pool_only(self, task3): result = task3.grade_resolution( { "root_cause": "postgres connection pool exhausted and full", "affected_service": "postgres-primary", "recommendation": "Increase pool size", }, step_count=3, ) # Pool identified but not analytics-worker assert 0.20 <= result["score"] <= 0.65