Spaces:
Running
Running
File size: 3,578 Bytes
33dd3ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | """Tests for per-task grading: perfect scores, partial credit, wrong service, destructive penalty."""
import pytest
class TestTask1Grading:
def test_perfect_resolution(self, task1):
result = task1.grade_resolution(
{
"root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1",
"affected_service": "payment-api",
"recommendation": "Rollback to v2.3.0 or set the DB_CONNECTION_STRING env var",
},
step_count=3,
)
assert result["score"] >= 0.80
def test_wrong_service(self, task1):
result = task1.grade_resolution(
{
"root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1",
"affected_service": "order-service",
"recommendation": "Rollback",
},
step_count=3,
)
# Should lose the affected_service points
assert result["score"] <= 0.85
def test_empty_resolution(self, task1):
result = task1.grade_resolution(
{"root_cause": "", "affected_service": "", "recommendation": ""},
step_count=1,
)
assert result["score"] <= 0.20
class TestTask2Grading:
def test_perfect_resolution(self, task2):
result = task2.grade_resolution(
{
"root_cause": "inventory-service OOM memory leak from batch processing causing checkout-service timeout",
"affected_service": "inventory-service",
"recommendation": "Increase memory limit to 1Gi and reduce batch size or stream results",
},
step_count=4,
)
assert result["score"] >= 0.70
def test_wrong_root_cause(self, task2):
result = task2.grade_resolution(
{
"root_cause": "Network partition",
"affected_service": "inventory-service",
"recommendation": "Restart networking",
},
step_count=3,
)
assert result["score"] < 0.40
class TestTask3Grading:
def test_perfect_resolution(self, task3):
result = task3.grade_resolution(
{
"root_cause": "analytics-worker long-running query exhausted the connection pool, cascade to auth-service, user-profile-service, notification-service",
"affected_service": "postgres-primary",
"recommendation": "Kill the query and set statement_timeout, use read replica for analytics",
},
step_count=5,
)
assert result["score"] >= 0.70
def test_blames_notification_deploy(self, task3):
result = task3.grade_resolution(
{
"root_cause": "notification-service deploy v3.1 caused the failure",
"affected_service": "notification-service",
"recommendation": "Rollback notification-service",
},
step_count=3,
)
# Should score poorly — wrong root cause and wrong affected service
assert result["score"] <= 0.20
def test_partial_credit_pool_only(self, task3):
result = task3.grade_resolution(
{
"root_cause": "postgres connection pool exhausted and full",
"affected_service": "postgres-primary",
"recommendation": "Increase pool size",
},
step_count=3,
)
# Pool identified but not analytics-worker
assert 0.20 <= result["score"] <= 0.65
|