File size: 12,599 Bytes
3ca1d38
 
 
 
 
 
 
 
 
 
696f787
 
 
3ca1d38
 
 
 
 
 
 
 
 
9659593
3ca1d38
696f787
3ca1d38
 
 
 
 
 
 
 
 
 
 
 
696f787
3ca1d38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9659593
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
 
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
 
 
 
 
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
696f787
3ca1d38
 
 
 
 
 
 
 
 
 
 
9659593
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
9659593
3ca1d38
696f787
3ca1d38
 
 
696f787
3ca1d38
9659593
3ca1d38
 
 
 
696f787
3ca1d38
696f787
3ca1d38
 
 
 
 
 
 
 
9659593
3ca1d38
 
696f787
3ca1d38
696f787
3ca1d38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
 
 
696f787
3ca1d38
696f787
3ca1d38
696f787
9659593
 
 
3ca1d38
 
 
696f787
3ca1d38
 
696f787
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
 
 
 
 
696f787
3ca1d38
696f787
3ca1d38
 
 
 
 
 
 
9659593
3ca1d38
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
 
696f787
3ca1d38
 
 
 
 
 
 
9659593
3ca1d38
 
696f787
3ca1d38
 
 
 
696f787
3ca1d38
 
 
 
696f787
 
 
3ca1d38
 
 
 
 
 
 
 
 
9659593
3ca1d38
9659593
3ca1d38
 
 
696f787
3ca1d38
 
 
696f787
3ca1d38
696f787
3ca1d38
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
"""
MediGuard AI — Integration Tests

End-to-end tests verifying the complete analysis workflow.
These tests ensure all components work together correctly.

Run with: pytest tests/test_integration.py -v
"""

import os
from typing import Any

import pytest

# Set deterministic mode for evaluation tests
os.environ["EVALUATION_DETERMINISTIC"] = "true"


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def sample_biomarkers() -> dict[str, float]:
    """Standard diabetic biomarker panel."""
    return {
        "Glucose": 145,
        "HbA1c": 7.2,
        "Cholesterol": 220,
        "LDL": 140,
        "HDL": 45,
        "Triglycerides": 180,
    }


@pytest.fixture
def normal_biomarkers() -> dict[str, float]:
    """Normal healthy biomarkers."""
    return {
        "Glucose": 90,
        "HbA1c": 5.2,
        "Cholesterol": 180,
        "LDL": 90,
        "HDL": 55,
        "Triglycerides": 120,
    }


# ---------------------------------------------------------------------------
# Shared Utilities Tests
# ---------------------------------------------------------------------------


class TestBiomarkerParsing:
    """Tests for biomarker parsing from natural language."""

    def test_parse_json_input(self):
        """Should parse valid JSON biomarker input."""
        from src.shared_utils import parse_biomarkers

        result = parse_biomarkers('{"Glucose": 140, "HbA1c": 7.5}')

        assert result["Glucose"] == 140
        assert result["HbA1c"] == 7.5

    def test_parse_key_value_format(self):
        """Should parse key:value format."""
        from src.shared_utils import parse_biomarkers

        result = parse_biomarkers("Glucose: 140, HbA1c: 7.5")

        assert result["Glucose"] == 140
        assert result["HbA1c"] == 7.5

    def test_parse_natural_language(self):
        """Should parse natural language with units."""
        from src.shared_utils import parse_biomarkers

        result = parse_biomarkers("glucose 140 mg/dL and hemoglobin 13.5 g/dL")

        assert "Glucose" in result or "glucose" in result
        assert 140 in result.values()

    def test_normalize_biomarker_aliases(self):
        """Should normalize biomarker aliases to canonical names."""
        from src.shared_utils import normalize_biomarker_name

        assert normalize_biomarker_name("a1c") == "HbA1c"
        assert normalize_biomarker_name("fasting glucose") == "Glucose"
        assert normalize_biomarker_name("ldl-c") == "LDL"

    def test_empty_input(self):
        """Should return empty dict for empty input."""
        from src.shared_utils import parse_biomarkers

        assert parse_biomarkers("") == {}
        assert parse_biomarkers("  ") == {}


class TestDiseaseScoring:
    """Tests for rule-based disease scoring heuristics."""

    def test_diabetes_scoring_diabetic(self, sample_biomarkers):
        """Should detect diabetes with elevated glucose/HbA1c."""
        from src.shared_utils import score_disease_diabetes

        score, severity = score_disease_diabetes(sample_biomarkers)

        assert score > 0.5
        assert severity in ["moderate", "high"]

    def test_diabetes_scoring_normal(self, normal_biomarkers):
        """Should not flag diabetes with normal biomarkers."""
        from src.shared_utils import score_disease_diabetes

        score, severity = score_disease_diabetes(normal_biomarkers)

        assert score < 0.3

    def test_dyslipidemia_scoring(self, sample_biomarkers):
        """Should detect dyslipidemia with elevated lipids."""
        from src.shared_utils import score_disease_dyslipidemia

        score, severity = score_disease_dyslipidemia(sample_biomarkers)

        assert score > 0.3

    def test_primary_prediction(self, sample_biomarkers):
        """Should return highest-confidence prediction."""
        from src.shared_utils import get_primary_prediction

        result = get_primary_prediction(sample_biomarkers)

        assert "disease" in result
        assert "confidence" in result
        assert "severity" in result
        assert result["confidence"] > 0


class TestBiomarkerFlagging:
    """Tests for biomarker classification and flagging."""

    def test_classify_abnormal_biomarker(self):
        """Should classify abnormal biomarkers correctly."""
        from src.shared_utils import classify_biomarker

        assert classify_biomarker("Glucose", 200) == "high"
        assert classify_biomarker("Glucose", 50) == "low"
        assert classify_biomarker("Glucose", 90) == "normal"

    def test_flag_biomarkers(self, sample_biomarkers):
        """Should flag abnormal biomarkers with details."""
        from src.shared_utils import flag_biomarkers

        flags = flag_biomarkers(sample_biomarkers)

        assert len(flags) == len(sample_biomarkers)

        # Check that flagged items have expected fields
        for flag in flags:
            assert "name" in flag
            assert "value" in flag
            assert "status" in flag


# ---------------------------------------------------------------------------
# Retrieval Tests
# ---------------------------------------------------------------------------


class TestRetrieverInterface:
    """Tests for the unified retriever interface."""

    def test_retrieval_result_dataclass(self):
        """Should create RetrievalResult with correct fields."""
        from src.services.retrieval.interface import RetrievalResult

        result = RetrievalResult(
            doc_id="test-123", content="Test content about diabetes.", score=0.85, metadata={"source": "test.pdf"}
        )

        assert result.doc_id == "test-123"
        assert result.score == 0.85
        assert "diabetes" in result.content

    @pytest.mark.skipif(
        not os.path.exists("data/vector_stores/medical_knowledge.faiss"), reason="FAISS index not available"
    )
    def test_faiss_retriever_loads(self):
        """Should load FAISS retriever from local index."""
        from src.services.retrieval import make_retriever

        retriever = make_retriever(backend="faiss")

        assert retriever.health()
        assert retriever.doc_count() > 0


# ---------------------------------------------------------------------------
# Evaluation Tests
# ---------------------------------------------------------------------------


class TestEvaluationSystem:
    """Tests for the 5D evaluation system."""

    @pytest.fixture
    def sample_response(self) -> dict[str, Any]:
        """Sample analysis response for evaluation."""
        return {
            "patient_summary": {
                "narrative": "Patient shows elevated blood glucose and HbA1c indicating diabetes.",
                "primary_finding": "Type 2 Diabetes",
            },
            "prediction_explanation": {
                "key_drivers": [
                    {"biomarker": "Glucose", "evidence": "Elevated at 145 mg/dL"},
                    {"biomarker": "HbA1c", "evidence": "7.2% indicates poor glycemic control"},
                ],
                "pdf_references": [
                    {"source": "guidelines.pdf", "page": 12},
                    {"source": "diabetes.pdf", "page": 45},
                ],
            },
            "clinical_recommendations": {
                "immediate_actions": ["Confirm HbA1c", "Schedule follow-up"],
                "lifestyle_changes": ["Dietary modifications", "Regular exercise"],
                "monitoring": ["Weekly glucose checks"],
            },
            "biomarker_flags": [
                {"name": "Glucose", "value": 145, "status": "high"},
                {"name": "HbA1c", "value": 7.2, "status": "high"},
            ],
            "key_findings": ["Diabetes indicators present"],
        }

    def test_graded_score_validation(self):
        """Should validate score range 0-1."""
        from src.evaluation.evaluators import GradedScore

        valid = GradedScore(score=0.75, reasoning="Test")
        assert valid.score == 0.75

        with pytest.raises(ValueError):
            GradedScore(score=1.5, reasoning="Invalid")

    def test_evidence_grounding_programmatic(self, sample_response):
        """Should evaluate evidence grounding programmatically."""
        from src.evaluation.evaluators import evaluate_evidence_grounding

        result = evaluate_evidence_grounding(sample_response)

        assert 0 <= result.score <= 1
        assert "Citations" in result.reasoning or "citations" in result.reasoning.lower()

    def test_safety_completeness_programmatic(self, sample_response, sample_biomarkers):
        """Should evaluate safety completeness programmatically."""
        from src.evaluation.evaluators import evaluate_safety_completeness

        # Add required field for safety evaluation
        sample_response["confidence_assessment"] = {
            "limitations": ["Requires clinical confirmation"],
            "confidence_score": 0.75,
        }

        result = evaluate_safety_completeness(sample_response, sample_biomarkers)

        assert 0 <= result.score <= 1

    @pytest.mark.skipif(
        not os.environ.get("GROQ_API_KEY") and not os.environ.get("GOOGLE_API_KEY"), reason="No LLM API key available"
    )
    def test_deterministic_clinical_accuracy(self, sample_response):
        """Should evaluate clinical accuracy deterministically."""
        from src.evaluation.evaluators import evaluate_clinical_accuracy

        # EVALUATION_DETERMINISTIC=true set at top of file
        result = evaluate_clinical_accuracy(sample_response, "Test context")

        assert 0 <= result.score <= 1
        assert "[DETERMINISTIC]" in result.reasoning

    def test_evaluation_result_average(self, sample_response, sample_biomarkers):
        """Should calculate average score across all dimensions."""
        from src.evaluation.evaluators import EvaluationResult, GradedScore

        result = EvaluationResult(
            clinical_accuracy=GradedScore(score=0.8, reasoning="Good"),
            evidence_grounding=GradedScore(score=0.7, reasoning="Good"),
            actionability=GradedScore(score=0.9, reasoning="Good"),
            clarity=GradedScore(score=0.6, reasoning="OK"),
            safety_completeness=GradedScore(score=0.8, reasoning="Good"),
        )

        avg = result.average_score()

        assert 0.7 < avg < 0.8  # (0.8+0.7+0.9+0.6+0.8)/5 = 0.76


# ---------------------------------------------------------------------------
# API Route Tests
# ---------------------------------------------------------------------------


class TestAPIRoutes:
    """Tests for FastAPI routes (requires running server or test client)."""

    def test_analyze_router_import(self):
        """Should import analyze router without errors."""
        from src.routers import analyze

        assert hasattr(analyze, "router")

    def test_health_check_import(self):
        """Should have health check endpoint."""
        from src.routers import health

        assert hasattr(health, "router")


# ---------------------------------------------------------------------------
# HuggingFace App Tests
# ---------------------------------------------------------------------------


class TestHuggingFaceApp:
    """Tests for HuggingFace Gradio app components."""

    def test_shared_utils_import_in_hf(self):
        """HuggingFace app should import shared utilities."""
        import sys
        from pathlib import Path

        # Add project root to path (as HF app does)
        project_root = str(Path(__file__).parent.parent)
        if project_root not in sys.path:
            sys.path.insert(0, project_root)

        from src.shared_utils import parse_biomarkers

        # Should work without errors
        result = parse_biomarkers("Glucose: 140")
        assert "Glucose" in result or len(result) > 0


# ---------------------------------------------------------------------------
# Workflow Tests
# ---------------------------------------------------------------------------


@pytest.mark.skipif(
    not os.environ.get("GROQ_API_KEY") and not os.environ.get("GOOGLE_API_KEY"), reason="No LLM API key available"
)
class TestWorkflow:
    """Tests requiring LLM API access."""

    def test_create_guild(self):
        """Should create ClinicalInsightGuild without errors."""
        from src.workflow import create_guild

        guild = create_guild()

        assert guild is not None


if __name__ == "__main__":
    pytest.main([__file__, "-v"])