Spaces:

NoNameFound
/

sentinel_env

Running

File size: 3,578 Bytes

33dd3ee

"""Tests for per-task grading: perfect scores, partial credit, wrong service, destructive penalty."""

import pytest


class TestTask1Grading:
    def test_perfect_resolution(self, task1):
        result = task1.grade_resolution(
            {
                "root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1",
                "affected_service": "payment-api",
                "recommendation": "Rollback to v2.3.0 or set the DB_CONNECTION_STRING env var",
            },
            step_count=3,
        )
        assert result["score"] >= 0.80

    def test_wrong_service(self, task1):
        result = task1.grade_resolution(
            {
                "root_cause": "Missing DB_CONNECTION_STRING env var after deploy v2.3.1",
                "affected_service": "order-service",
                "recommendation": "Rollback",
            },
            step_count=3,
        )
        # Should lose the affected_service points
        assert result["score"] <= 0.85

    def test_empty_resolution(self, task1):
        result = task1.grade_resolution(
            {"root_cause": "", "affected_service": "", "recommendation": ""},
            step_count=1,
        )
        assert result["score"] <= 0.20


class TestTask2Grading:
    def test_perfect_resolution(self, task2):
        result = task2.grade_resolution(
            {
                "root_cause": "inventory-service OOM memory leak from batch processing causing checkout-service timeout",
                "affected_service": "inventory-service",
                "recommendation": "Increase memory limit to 1Gi and reduce batch size or stream results",
            },
            step_count=4,
        )
        assert result["score"] >= 0.70

    def test_wrong_root_cause(self, task2):
        result = task2.grade_resolution(
            {
                "root_cause": "Network partition",
                "affected_service": "inventory-service",
                "recommendation": "Restart networking",
            },
            step_count=3,
        )
        assert result["score"] < 0.40


class TestTask3Grading:
    def test_perfect_resolution(self, task3):
        result = task3.grade_resolution(
            {
                "root_cause": "analytics-worker long-running query exhausted the connection pool, cascade to auth-service, user-profile-service, notification-service",
                "affected_service": "postgres-primary",
                "recommendation": "Kill the query and set statement_timeout, use read replica for analytics",
            },
            step_count=5,
        )
        assert result["score"] >= 0.70

    def test_blames_notification_deploy(self, task3):
        result = task3.grade_resolution(
            {
                "root_cause": "notification-service deploy v3.1 caused the failure",
                "affected_service": "notification-service",
                "recommendation": "Rollback notification-service",
            },
            step_count=3,
        )
        # Should score poorly — wrong root cause and wrong affected service
        assert result["score"] <= 0.20

    def test_partial_credit_pool_only(self, task3):
        result = task3.grade_resolution(
            {
                "root_cause": "postgres connection pool exhausted and full",
                "affected_service": "postgres-primary",
                "recommendation": "Increase pool size",
            },
            step_count=3,
        )
        # Pool identified but not analytics-worker
        assert 0.20 <= result["score"] <= 0.65