File size: 3,578 Bytes
33dd3ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""Tests for per-task grading: perfect scores, partial credit, wrong service, destructive penalty."""

import pytest


class TestTask1Grading:
    def test_perfect_resolution(self, task1):
        result = task1.grade_resolution(
            {
                "root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1",
                "affected_service": "payment-api",
                "recommendation": "Rollback to v2.3.0 or set the DB_CONNECTION_STRING env var",
            },
            step_count=3,
        )
        assert result["score"] >= 0.80

    def test_wrong_service(self, task1):
        result = task1.grade_resolution(
            {
                "root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1",
                "affected_service": "order-service",
                "recommendation": "Rollback",
            },
            step_count=3,
        )
        # Should lose the affected_service points
        assert result["score"] <= 0.85

    def test_empty_resolution(self, task1):
        result = task1.grade_resolution(
            {"root_cause": "", "affected_service": "", "recommendation": ""},
            step_count=1,
        )
        assert result["score"] <= 0.20


class TestTask2Grading:
    def test_perfect_resolution(self, task2):
        result = task2.grade_resolution(
            {
                "root_cause": "inventory-service OOM memory leak from batch processing causing checkout-service timeout",
                "affected_service": "inventory-service",
                "recommendation": "Increase memory limit to 1Gi and reduce batch size or stream results",
            },
            step_count=4,
        )
        assert result["score"] >= 0.70

    def test_wrong_root_cause(self, task2):
        result = task2.grade_resolution(
            {
                "root_cause": "Network partition",
                "affected_service": "inventory-service",
                "recommendation": "Restart networking",
            },
            step_count=3,
        )
        assert result["score"] < 0.40


class TestTask3Grading:
    def test_perfect_resolution(self, task3):
        result = task3.grade_resolution(
            {
                "root_cause": "analytics-worker long-running query exhausted the connection pool, cascade to auth-service, user-profile-service, notification-service",
                "affected_service": "postgres-primary",
                "recommendation": "Kill the query and set statement_timeout, use read replica for analytics",
            },
            step_count=5,
        )
        assert result["score"] >= 0.70

    def test_blames_notification_deploy(self, task3):
        result = task3.grade_resolution(
            {
                "root_cause": "notification-service deploy v3.1 caused the failure",
                "affected_service": "notification-service",
                "recommendation": "Rollback notification-service",
            },
            step_count=3,
        )
        # Should score poorly — wrong root cause and wrong affected service
        assert result["score"] <= 0.20

    def test_partial_credit_pool_only(self, task3):
        result = task3.grade_resolution(
            {
                "root_cause": "postgres connection pool exhausted and full",
                "affected_service": "postgres-primary",
                "recommendation": "Increase pool size",
            },
            step_count=3,
        )
        # Pool identified but not analytics-worker
        assert 0.20 <= result["score"] <= 0.65