File size: 3,773 Bytes
0bfc819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Unit tests for HFInferenceJudgeHandler Circuit Breaker."""

from unittest.mock import MagicMock, patch

import pytest

from src.agent_factory.judges import HFInferenceJudgeHandler
from src.utils.models import Citation, Evidence


@pytest.mark.unit
class TestJudgeCircuitBreaker:
    """Tests specifically for the circuit breaker logic."""

    @pytest.fixture
    def handler(self):
        """Create a handler with mocked dependencies."""
        with patch("src.agent_factory.judges.InferenceClient"):
            return HFInferenceJudgeHandler()

    @pytest.mark.asyncio
    async def test_circuit_breaker_triggers_after_max_failures(self, handler):
        """Verify it switches to 'synthesize' after 3 consecutive failures."""

        # Mock _call_with_retry to always fail
        with patch.object(handler, "_call_with_retry", side_effect=Exception("Model failed")):
            evidence = [
                Evidence(
                    content="test",
                    citation=Citation(source="pubmed", title="t", url="u", date="2025"),
                )
            ]

            # Call 1: Fails
            result1 = await handler.assess("test", evidence)
            assert result1.recommendation == "continue"
            assert handler.consecutive_failures == 1

            # Call 2: Fails
            result2 = await handler.assess("test", evidence)
            assert result2.recommendation == "continue"
            assert handler.consecutive_failures == 2

            # Call 3: Fails
            result3 = await handler.assess("test", evidence)
            assert result3.recommendation == "continue"
            assert handler.consecutive_failures == 3

            # Call 4: Circuit Breaker SHOULD trigger
            # Because failures >= MAX (3)
            result4 = await handler.assess("test", evidence)

            assert result4.recommendation == "synthesize"
            assert result4.sufficient is True
            # The message contains "failed 3 times" or "Unavailable"
            reasoning_lower = result4.reasoning.lower()
            assert "failed" in reasoning_lower or "unavailable" in reasoning_lower

    @pytest.mark.asyncio
    async def test_circuit_breaker_resets_on_success(self, handler):
        """Verify failures reset if a call succeeds."""

        evidence = [
            Evidence(
                content="t",
                citation=Citation(source="pubmed", title="t", url="u", date="d"),
            )
        ]

        # 1. Fail once
        with patch.object(handler, "_call_with_retry", side_effect=Exception("Fail")):
            await handler.assess("test", evidence)
            assert handler.consecutive_failures == 1

        # 2. Succeed
        valid_assessment = MagicMock(recommendation="continue", sufficient=False)
        with patch.object(handler, "_call_with_retry", return_value=valid_assessment):
            await handler.assess("test", evidence)
            assert handler.consecutive_failures == 0  # Should reset

    @pytest.mark.asyncio
    async def test_circuit_breaker_resets_on_new_question(self, handler):
        """Verify failures reset if question changes."""

        evidence = []

        # 1. Fail on Question A
        with patch.object(handler, "_call_with_retry", side_effect=Exception("Fail")):
            await handler.assess("Question A", evidence)
            assert handler.consecutive_failures == 1

            # 2. Fail on Question B (Should reset first, then increment to 1)
            await handler.assess("Question B", evidence)
            # Reset happens at start of assess:
            # if "Question B" != "Question A" -> failures = 0
            # Then it tries and fails -> failures = 1
            assert handler.consecutive_failures == 1