File size: 8,175 Bytes
aca8ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Unit tests for Pydantic schema validators.
Tests the field_validator decorators in utils/schemas.py.
"""
import pytest
from datetime import datetime

from utils.schemas import Analysis, ConsensusPoint, Contradiction, SynthesisResult


class TestAnalysisValidators:
    """Tests for Analysis schema validators."""

    def test_citations_with_nested_empty_list(self):
        """Test that nested empty lists in citations are flattened."""
        analysis = Analysis(
            paper_id="test_id",
            methodology="Test methodology",
            key_findings=["Finding 1"],
            conclusions="Test conclusions",
            limitations=["Limit 1"],
            citations=["Citation 1", [], "Citation 2"],  # Nested empty list
            main_contributions=["Contribution 1"],
            confidence_score=0.8
        )

        # Should flatten and remove empty lists
        assert analysis.citations == ["Citation 1", "Citation 2"]

    def test_citations_with_deeply_nested_lists(self):
        """Test deeply nested lists are flattened."""
        analysis = Analysis(
            paper_id="test_id",
            methodology="Test",
            key_findings=[["Nested finding"]],
            conclusions="Test",
            limitations=[[["Triple nested"]]],
            citations=[[["Deep citation"]]],
            main_contributions=[],
            confidence_score=0.5
        )

        assert analysis.key_findings == ["Nested finding"]
        assert analysis.limitations == ["Triple nested"]
        assert analysis.citations == ["Deep citation"]

    def test_mixed_types_are_normalized(self):
        """Test that mixed types in lists are handled."""
        analysis = Analysis(
            paper_id="test_id",
            methodology="Test",
            key_findings=["Finding", None, 123, ""],
            conclusions="Test",
            limitations=[456, "Limit"],
            citations=["Citation", None, ""],
            confidence_score=0.7
        )

        # None and empty strings filtered out, numbers converted to strings
        assert analysis.key_findings == ["Finding", "123"]
        assert analysis.limitations == ["456", "Limit"]
        assert analysis.citations == ["Citation"]

    def test_string_converted_to_list(self):
        """Test that strings in list fields are converted to single-element lists."""
        analysis = Analysis(
            paper_id="test_id",
            methodology="Test",
            key_findings="Single finding",  # String instead of list
            conclusions="Test",
            limitations="Single limitation",  # String instead of list
            citations=[],
            confidence_score=0.6
        )

        assert analysis.key_findings == ["Single finding"]
        assert analysis.limitations == ["Single limitation"]


class TestConsensusPointValidators:
    """Tests for ConsensusPoint schema validators."""

    def test_supporting_papers_with_nested_lists(self):
        """Test that nested lists in supporting_papers are flattened."""
        cp = ConsensusPoint(
            statement="Test consensus",
            supporting_papers=["paper1", [], ["paper2"]],
            citations=["Citation 1", [["Nested citation"]]],
            confidence=0.9
        )

        assert cp.supporting_papers == ["paper1", "paper2"]
        assert cp.citations == ["Citation 1", "Nested citation"]

    def test_empty_and_none_values_filtered(self):
        """Test that None and empty strings are filtered."""
        cp = ConsensusPoint(
            statement="Test",
            supporting_papers=["paper1", None, "", "paper2"],
            citations=["Citation", None],
            confidence=0.8
        )

        assert cp.supporting_papers == ["paper1", "paper2"]
        assert cp.citations == ["Citation"]


class TestContradictionValidators:
    """Tests for Contradiction schema validators."""

    def test_papers_lists_with_nested_values(self):
        """Test that nested lists in papers_a and papers_b are flattened."""
        contr = Contradiction(
            topic="Test topic",
            viewpoint_a="View A",
            papers_a=["paper1", [], "paper2"],
            viewpoint_b="View B",
            papers_b=[["paper3"], "paper4"],
            citations=["Citation 1", [["Nested"]]],
            confidence=0.7
        )

        assert contr.papers_a == ["paper1", "paper2"]
        assert contr.papers_b == ["paper3", "paper4"]
        assert contr.citations == ["Citation 1", "Nested"]

    def test_mixed_types_normalized(self):
        """Test mixed types in papers lists."""
        contr = Contradiction(
            topic="Test",
            viewpoint_a="A",
            papers_a=["paper1", 123, None],
            viewpoint_b="B",
            papers_b=[456, "paper2"],
            citations=["Citation"],
            confidence=0.6
        )

        assert contr.papers_a == ["paper1", "123"]
        assert contr.papers_b == ["456", "paper2"]


class TestSynthesisResultValidators:
    """Tests for SynthesisResult schema validators."""

    def test_research_gaps_with_nested_lists(self):
        """Test that nested lists in research_gaps are flattened."""
        synthesis = SynthesisResult(
            consensus_points=[],
            contradictions=[],
            research_gaps=["Gap 1", [["Nested gap"]], None],
            summary="Test summary",
            confidence_score=0.8,
            papers_analyzed=["paper1", [], "paper2"]
        )

        assert synthesis.research_gaps == ["Gap 1", "Nested gap"]
        assert synthesis.papers_analyzed == ["paper1", "paper2"]

    def test_string_converted_to_list(self):
        """Test that strings are converted to lists."""
        synthesis = SynthesisResult(
            consensus_points=[],
            contradictions=[],
            research_gaps="Single gap",  # String instead of list
            summary="Test",
            confidence_score=0.7,
            papers_analyzed="paper1"  # String instead of list
        )

        assert synthesis.research_gaps == ["Single gap"]
        assert synthesis.papers_analyzed == ["paper1"]


class TestValidatorsWithRealWorldData:
    """Tests simulating real-world LLM response edge cases."""

    def test_llm_returns_empty_arrays_within_citations(self):
        """Simulate the exact bug reported: citations contains empty lists."""
        # This is the bug: ["citation 1", [], "citation 2"]
        analysis = Analysis(
            paper_id="2303.08710v1",
            methodology="Deep learning approach",
            key_findings=["95% accuracy", [], "Outperforms baselines"],
            conclusions="Novel method works well",
            limitations=["Limited dataset", []],
            citations=["Methodology section", [], "Results section"],
            main_contributions=["Novel architecture"],
            confidence_score=0.85
        )

        # Should successfully create Analysis without Pydantic validation errors
        assert isinstance(analysis, Analysis)
        assert analysis.citations == ["Methodology section", "Results section"]
        assert analysis.key_findings == ["95% accuracy", "Outperforms baselines"]
        assert analysis.limitations == ["Limited dataset"]

    def test_llm_returns_mixed_malformed_data(self):
        """Test extremely malformed data that might come from LLM."""
        analysis = Analysis(
            paper_id="test_id",
            methodology="Test",
            key_findings=[[], "Finding", None, [["Nested"]], "", "  ", 123],
            conclusions="Test",
            limitations=[[["Deep"]], None, "Limit", []],
            citations=["Citation", [[], []], None, ""],
            main_contributions=[None, [], "Contribution", [["Deep contrib"]]],
            confidence_score=0.5
        )

        # All malformed data should be cleaned
        assert analysis.key_findings == ["Finding", "Nested", "123"]
        assert analysis.limitations == ["Deep", "Limit"]
        assert analysis.citations == ["Citation"]
        assert analysis.main_contributions == ["Contribution", "Deep contrib"]


if __name__ == "__main__":
    pytest.main([__file__, "-v"])