File size: 6,181 Bytes
ebadfda | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | """Tests for the shared extract_json utility."""
import pytest
from app.utils.json_extract import extract_json
class TestCleanJSON:
"""Cases where the input is already valid JSON."""
def test_simple_object(self):
result = extract_json('{"key": "value"}')
assert result == {"key": "value"}
def test_nested_object(self):
result = extract_json('{"a": {"b": {"c": 1}}}')
assert result == {"a": {"b": {"c": 1}}}
def test_object_with_array(self):
result = extract_json('{"items": [1, 2, 3]}')
assert result == {"items": [1, 2, 3]}
class TestMarkdownFences:
"""Cases where JSON is wrapped in markdown code fences."""
def test_json_fence(self):
text = '```json\n{"key": "value"}\n```'
assert extract_json(text) == {"key": "value"}
def test_plain_fence(self):
text = '```\n{"key": "value"}\n```'
assert extract_json(text) == {"key": "value"}
def test_trailing_backticks(self):
text = '{"key": "value"}```'
assert extract_json(text) == {"key": "value"}
def test_fence_with_whitespace(self):
text = '```json \n {"key": "value"} \n```'
assert extract_json(text) == {"key": "value"}
class TestThinkTags:
"""Cases where response contains <think> tags."""
def test_think_before_json(self):
text = '<think>Let me analyze this...</think>\n{"key": "value"}'
assert extract_json(text) == {"key": "value"}
def test_think_with_json_inside(self):
text = '<think>{"wrong": true}</think>\n{"key": "value"}'
assert extract_json(text) == {"key": "value"}
def test_think_and_fences_combined(self):
text = '<think>reasoning</think>\n```json\n{"key": "value"}\n```'
assert extract_json(text) == {"key": "value"}
class TestPreambleText:
"""Cases where JSON is preceded by free-form text."""
def test_text_before_json(self):
text = 'Here is the result:\n{"key": "value"}'
assert extract_json(text) == {"key": "value"}
def test_multiple_lines_before_json(self):
text = 'I analyzed the input.\nThe findings are:\n{"score": 0.8}'
assert extract_json(text) == {"score": 0.8}
class TestTruncatedJSON:
"""Cases where JSON is truncated (missing closing braces)."""
def test_single_missing_brace(self):
text = '{"key": "value", "score": 0.8'
result = extract_json(text)
assert result["key"] == "value"
assert result["score"] == 0.8
def test_nested_missing_braces(self):
text = '{"outer": {"inner": "value"'
result = extract_json(text)
assert result["outer"]["inner"] == "value"
def test_truncated_with_preamble(self):
text = 'Result:\n{"evidence_supports_prediction": false, "coherence_score": 0.7'
result = extract_json(text)
assert result["evidence_supports_prediction"] is False
assert result["coherence_score"] == 0.7
def test_truncated_inside_fence(self):
text = '```json\n{"should_trust_prediction": "low", "reasoning": "short text'
result = extract_json(text)
assert result["should_trust_prediction"] == "low"
def test_truncated_with_complete_fields(self):
"""Realistic case: Gemini response truncated after some complete fields."""
text = """{
"likely_adversarial": false,
"adversarial_type": null,
"authenticity_score": 0.9,
"warning": "The text appears genui"""
result = extract_json(text)
assert result["likely_adversarial"] is False
assert result["authenticity_score"] == 0.9
class TestMultipleObjects:
"""When response contains multiple JSON objects."""
def test_returns_first_valid(self):
text = 'prefix {"first": 1} suffix {"second": 2}'
result = extract_json(text)
assert result == {"first": 1}
class TestErrorCases:
"""Cases that should raise ValueError."""
def test_empty_string(self):
with pytest.raises(ValueError, match="Empty response"):
extract_json("")
def test_complete_garbage(self):
with pytest.raises(ValueError, match="No valid JSON"):
extract_json("this is not json at all")
def test_only_think_tags(self):
with pytest.raises(ValueError, match="No valid JSON"):
extract_json("<think>just thinking</think>")
def test_only_fences(self):
with pytest.raises(ValueError, match="No valid JSON"):
extract_json("```json\nnot json\n```")
class TestRealisticLLMResponses:
"""End-to-end tests matching actual Gemini response patterns from logs."""
def test_evidence_validation_response(self):
text = """{
"evidence_supports_prediction": false,
"coherence_score": 0.6,
"alternative_interpretation": "The text contains negated statements that the model may have misinterpreted",
"flagged_for_review": true
}"""
result = extract_json(text)
assert result["evidence_supports_prediction"] is False
assert result["flagged_for_review"] is True
def test_confidence_calibration_response(self):
text = """{
"should_trust_prediction": "medium",
"reasoning": "Mixed signals with some negated statements",
"potential_confounders": ["negation", "sarcasm"],
"recommended_threshold_adjustment": null
}"""
result = extract_json(text)
assert result["should_trust_prediction"] == "medium"
assert "negation" in result["potential_confounders"]
def test_explanation_with_markdown_wrap(self):
"""Matches the actual log error: response wrapped in ```json."""
text = """```json
{
"summary": "Your words suggest several patterns",
"why_model_thinks_this": "The model detected indicators",
"uncertainty_notes": "Some statements were ambiguous",
"symptom_explanations": {"DEPRESSED_MOOD": "Mood fluctuation noted"},
"safety_disclaimer": "This is not a diagnosis",
"resources": ["988 Lifeline"]
}
```"""
result = extract_json(text)
assert "patterns" in result["summary"]
assert "DEPRESSED_MOOD" in result["symptom_explanations"]
|