Spaces:
Running
Running
File size: 12,599 Bytes
3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 9659593 3ca1d38 9659593 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 696f787 3ca1d38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | """
MediGuard AI — Integration Tests
End-to-end tests verifying the complete analysis workflow.
These tests ensure all components work together correctly.
Run with: pytest tests/test_integration.py -v
"""
import os
from typing import Any
import pytest
# Set deterministic mode for evaluation tests
os.environ["EVALUATION_DETERMINISTIC"] = "true"
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def sample_biomarkers() -> dict[str, float]:
"""Standard diabetic biomarker panel."""
return {
"Glucose": 145,
"HbA1c": 7.2,
"Cholesterol": 220,
"LDL": 140,
"HDL": 45,
"Triglycerides": 180,
}
@pytest.fixture
def normal_biomarkers() -> dict[str, float]:
"""Normal healthy biomarkers."""
return {
"Glucose": 90,
"HbA1c": 5.2,
"Cholesterol": 180,
"LDL": 90,
"HDL": 55,
"Triglycerides": 120,
}
# ---------------------------------------------------------------------------
# Shared Utilities Tests
# ---------------------------------------------------------------------------
class TestBiomarkerParsing:
"""Tests for biomarker parsing from natural language."""
def test_parse_json_input(self):
"""Should parse valid JSON biomarker input."""
from src.shared_utils import parse_biomarkers
result = parse_biomarkers('{"Glucose": 140, "HbA1c": 7.5}')
assert result["Glucose"] == 140
assert result["HbA1c"] == 7.5
def test_parse_key_value_format(self):
"""Should parse key:value format."""
from src.shared_utils import parse_biomarkers
result = parse_biomarkers("Glucose: 140, HbA1c: 7.5")
assert result["Glucose"] == 140
assert result["HbA1c"] == 7.5
def test_parse_natural_language(self):
"""Should parse natural language with units."""
from src.shared_utils import parse_biomarkers
result = parse_biomarkers("glucose 140 mg/dL and hemoglobin 13.5 g/dL")
assert "Glucose" in result or "glucose" in result
assert 140 in result.values()
def test_normalize_biomarker_aliases(self):
"""Should normalize biomarker aliases to canonical names."""
from src.shared_utils import normalize_biomarker_name
assert normalize_biomarker_name("a1c") == "HbA1c"
assert normalize_biomarker_name("fasting glucose") == "Glucose"
assert normalize_biomarker_name("ldl-c") == "LDL"
def test_empty_input(self):
"""Should return empty dict for empty input."""
from src.shared_utils import parse_biomarkers
assert parse_biomarkers("") == {}
assert parse_biomarkers(" ") == {}
class TestDiseaseScoring:
"""Tests for rule-based disease scoring heuristics."""
def test_diabetes_scoring_diabetic(self, sample_biomarkers):
"""Should detect diabetes with elevated glucose/HbA1c."""
from src.shared_utils import score_disease_diabetes
score, severity = score_disease_diabetes(sample_biomarkers)
assert score > 0.5
assert severity in ["moderate", "high"]
def test_diabetes_scoring_normal(self, normal_biomarkers):
"""Should not flag diabetes with normal biomarkers."""
from src.shared_utils import score_disease_diabetes
score, severity = score_disease_diabetes(normal_biomarkers)
assert score < 0.3
def test_dyslipidemia_scoring(self, sample_biomarkers):
"""Should detect dyslipidemia with elevated lipids."""
from src.shared_utils import score_disease_dyslipidemia
score, severity = score_disease_dyslipidemia(sample_biomarkers)
assert score > 0.3
def test_primary_prediction(self, sample_biomarkers):
"""Should return highest-confidence prediction."""
from src.shared_utils import get_primary_prediction
result = get_primary_prediction(sample_biomarkers)
assert "disease" in result
assert "confidence" in result
assert "severity" in result
assert result["confidence"] > 0
class TestBiomarkerFlagging:
"""Tests for biomarker classification and flagging."""
def test_classify_abnormal_biomarker(self):
"""Should classify abnormal biomarkers correctly."""
from src.shared_utils import classify_biomarker
assert classify_biomarker("Glucose", 200) == "high"
assert classify_biomarker("Glucose", 50) == "low"
assert classify_biomarker("Glucose", 90) == "normal"
def test_flag_biomarkers(self, sample_biomarkers):
"""Should flag abnormal biomarkers with details."""
from src.shared_utils import flag_biomarkers
flags = flag_biomarkers(sample_biomarkers)
assert len(flags) == len(sample_biomarkers)
# Check that flagged items have expected fields
for flag in flags:
assert "name" in flag
assert "value" in flag
assert "status" in flag
# ---------------------------------------------------------------------------
# Retrieval Tests
# ---------------------------------------------------------------------------
class TestRetrieverInterface:
"""Tests for the unified retriever interface."""
def test_retrieval_result_dataclass(self):
"""Should create RetrievalResult with correct fields."""
from src.services.retrieval.interface import RetrievalResult
result = RetrievalResult(
doc_id="test-123", content="Test content about diabetes.", score=0.85, metadata={"source": "test.pdf"}
)
assert result.doc_id == "test-123"
assert result.score == 0.85
assert "diabetes" in result.content
@pytest.mark.skipif(
not os.path.exists("data/vector_stores/medical_knowledge.faiss"), reason="FAISS index not available"
)
def test_faiss_retriever_loads(self):
"""Should load FAISS retriever from local index."""
from src.services.retrieval import make_retriever
retriever = make_retriever(backend="faiss")
assert retriever.health()
assert retriever.doc_count() > 0
# ---------------------------------------------------------------------------
# Evaluation Tests
# ---------------------------------------------------------------------------
class TestEvaluationSystem:
"""Tests for the 5D evaluation system."""
@pytest.fixture
def sample_response(self) -> dict[str, Any]:
"""Sample analysis response for evaluation."""
return {
"patient_summary": {
"narrative": "Patient shows elevated blood glucose and HbA1c indicating diabetes.",
"primary_finding": "Type 2 Diabetes",
},
"prediction_explanation": {
"key_drivers": [
{"biomarker": "Glucose", "evidence": "Elevated at 145 mg/dL"},
{"biomarker": "HbA1c", "evidence": "7.2% indicates poor glycemic control"},
],
"pdf_references": [
{"source": "guidelines.pdf", "page": 12},
{"source": "diabetes.pdf", "page": 45},
],
},
"clinical_recommendations": {
"immediate_actions": ["Confirm HbA1c", "Schedule follow-up"],
"lifestyle_changes": ["Dietary modifications", "Regular exercise"],
"monitoring": ["Weekly glucose checks"],
},
"biomarker_flags": [
{"name": "Glucose", "value": 145, "status": "high"},
{"name": "HbA1c", "value": 7.2, "status": "high"},
],
"key_findings": ["Diabetes indicators present"],
}
def test_graded_score_validation(self):
"""Should validate score range 0-1."""
from src.evaluation.evaluators import GradedScore
valid = GradedScore(score=0.75, reasoning="Test")
assert valid.score == 0.75
with pytest.raises(ValueError):
GradedScore(score=1.5, reasoning="Invalid")
def test_evidence_grounding_programmatic(self, sample_response):
"""Should evaluate evidence grounding programmatically."""
from src.evaluation.evaluators import evaluate_evidence_grounding
result = evaluate_evidence_grounding(sample_response)
assert 0 <= result.score <= 1
assert "Citations" in result.reasoning or "citations" in result.reasoning.lower()
def test_safety_completeness_programmatic(self, sample_response, sample_biomarkers):
"""Should evaluate safety completeness programmatically."""
from src.evaluation.evaluators import evaluate_safety_completeness
# Add required field for safety evaluation
sample_response["confidence_assessment"] = {
"limitations": ["Requires clinical confirmation"],
"confidence_score": 0.75,
}
result = evaluate_safety_completeness(sample_response, sample_biomarkers)
assert 0 <= result.score <= 1
@pytest.mark.skipif(
not os.environ.get("GROQ_API_KEY") and not os.environ.get("GOOGLE_API_KEY"), reason="No LLM API key available"
)
def test_deterministic_clinical_accuracy(self, sample_response):
"""Should evaluate clinical accuracy deterministically."""
from src.evaluation.evaluators import evaluate_clinical_accuracy
# EVALUATION_DETERMINISTIC=true set at top of file
result = evaluate_clinical_accuracy(sample_response, "Test context")
assert 0 <= result.score <= 1
assert "[DETERMINISTIC]" in result.reasoning
def test_evaluation_result_average(self, sample_response, sample_biomarkers):
"""Should calculate average score across all dimensions."""
from src.evaluation.evaluators import EvaluationResult, GradedScore
result = EvaluationResult(
clinical_accuracy=GradedScore(score=0.8, reasoning="Good"),
evidence_grounding=GradedScore(score=0.7, reasoning="Good"),
actionability=GradedScore(score=0.9, reasoning="Good"),
clarity=GradedScore(score=0.6, reasoning="OK"),
safety_completeness=GradedScore(score=0.8, reasoning="Good"),
)
avg = result.average_score()
assert 0.7 < avg < 0.8 # (0.8+0.7+0.9+0.6+0.8)/5 = 0.76
# ---------------------------------------------------------------------------
# API Route Tests
# ---------------------------------------------------------------------------
class TestAPIRoutes:
"""Tests for FastAPI routes (requires running server or test client)."""
def test_analyze_router_import(self):
"""Should import analyze router without errors."""
from src.routers import analyze
assert hasattr(analyze, "router")
def test_health_check_import(self):
"""Should have health check endpoint."""
from src.routers import health
assert hasattr(health, "router")
# ---------------------------------------------------------------------------
# HuggingFace App Tests
# ---------------------------------------------------------------------------
class TestHuggingFaceApp:
"""Tests for HuggingFace Gradio app components."""
def test_shared_utils_import_in_hf(self):
"""HuggingFace app should import shared utilities."""
import sys
from pathlib import Path
# Add project root to path (as HF app does)
project_root = str(Path(__file__).parent.parent)
if project_root not in sys.path:
sys.path.insert(0, project_root)
from src.shared_utils import parse_biomarkers
# Should work without errors
result = parse_biomarkers("Glucose: 140")
assert "Glucose" in result or len(result) > 0
# ---------------------------------------------------------------------------
# Workflow Tests
# ---------------------------------------------------------------------------
@pytest.mark.skipif(
not os.environ.get("GROQ_API_KEY") and not os.environ.get("GOOGLE_API_KEY"), reason="No LLM API key available"
)
class TestWorkflow:
"""Tests requiring LLM API access."""
def test_create_guild(self):
"""Should create ClinicalInsightGuild without errors."""
from src.workflow import create_guild
guild = create_guild()
assert guild is not None
if __name__ == "__main__":
pytest.main([__file__, "-v"])
|