""" Test Suite for Clinical Synthesis Service Tests MedGemma prompt templates and synthesis functionality Author: MiniMax Agent Date: 2025-10-29 """ import sys import asyncio from datetime import datetime from typing import Dict, Any # Add backend to path sys.path.insert(0, '/workspace/medical-ai-platform/backend') from clinical_synthesis_service import get_synthesis_service from medical_schemas import ECGAnalysis, RadiologyAnalysis, LaboratoryResults, ClinicalNotesAnalysis def create_sample_ecg_data() -> Dict[str, Any]: """Create sample ECG structured data for testing""" return { "metadata": { "document_id": "ecg-test-001", "source_type": "ECG", "document_date": "2025-10-29T10:00:00Z", "facility": "Test Medical Center", "data_completeness": 0.95 }, "signal_data": { "lead_names": ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"], "sampling_rate_hz": 500, "signal_arrays": { "I": [0.5] * 5000, "II": [0.8] * 5000, "III": [0.3] * 5000, "aVR": [-0.6] * 5000, "aVL": [0.4] * 5000, "aVF": [0.6] * 5000, "V1": [0.2] * 5000, "V2": [0.4] * 5000, "V3": [0.6] * 5000, "V4": [0.8] * 5000, "V5": [0.9] * 5000, "V6": [0.8] * 5000 }, "duration_seconds": 10.0, "num_samples": 5000 }, "intervals": { "pr_ms": 165.0, "qrs_ms": 92.0, "qt_ms": 390.0, "qtc_ms": 425.0, "rr_ms": 850.0 }, "rhythm_classification": { "primary_rhythm": "Normal Sinus Rhythm", "rhythm_confidence": 0.92, "arrhythmia_types": [], "heart_rate_bpm": 71, "heart_rate_regularity": "regular" }, "arrhythmia_probabilities": { "normal_rhythm": 0.92, "atrial_fibrillation": 0.02, "atrial_flutter": 0.01, "ventricular_tachycardia": 0.01, "heart_block": 0.01, "premature_beats": 0.03 }, "derived_features": { "st_elevation_mm": {}, "st_depression_mm": {}, "t_wave_abnormalities": [], "q_wave_indicators": [], "axis_deviation": "normal" }, "confidence": { "extraction_confidence": 0.94, "model_confidence": 0.89, "data_quality": 0.95 } } def create_sample_radiology_data() -> Dict[str, Any]: """Create sample radiology structured data for testing""" return { "metadata": { "document_id": "rad-test-001", "source_type": "radiology", "document_date": "2025-10-29T11:00:00Z", "facility": "Imaging Center", "data_completeness": 0.90 }, "image_references": [ { "image_id": "img-001", "modality": "CT", "body_part": "Chest", "view_orientation": "Axial", "slice_thickness_mm": 2.5, "resolution": {"width": 512, "height": 512} } ], "findings": { "findings_text": "Chest CT shows clear lungs bilaterally. No pleural effusion. Heart size within normal limits. No mediastinal lymphadenopathy. Bones appear intact without acute fracture.", "impression_text": "No acute cardiopulmonary abnormality. Unremarkable chest CT.", "critical_findings": [], "incidental_findings": ["Mild degenerative changes in thoracic spine"], "comparison_prior": "None available", "technique_description": "Contrast-enhanced CT chest with IV contrast" }, "segmentations": [], "metrics": { "organ_volumes": {"lung_left": 2800, "lung_right": 2950, "heart": 680}, "lesion_measurements": [], "enhancement_patterns": [], "calcification_scores": {}, "tissue_density": {} }, "confidence": { "extraction_confidence": 0.88, "model_confidence": 0.85, "data_quality": 0.92 }, "criticality_level": "routine", "follow_up_recommendations": [] } def create_sample_laboratory_data() -> Dict[str, Any]: """Create sample laboratory results for testing""" return { "metadata": { "document_id": "lab-test-001", "source_type": "laboratory", "document_date": "2025-10-29T09:00:00Z", "facility": "Test Lab", "data_completeness": 0.98 }, "tests": [ { "test_name": "Glucose", "test_code": "2345-7", "value": 105.0, "unit": "mg/dL", "reference_range_low": 70.0, "reference_range_high": 99.0, "flags": ["H"] }, { "test_name": "Hemoglobin", "test_code": "718-7", "value": 14.5, "unit": "g/dL", "reference_range_low": 13.5, "reference_range_high": 17.5, "flags": [] }, { "test_name": "Creatinine", "test_code": "2160-0", "value": 1.1, "unit": "mg/dL", "reference_range_low": 0.7, "reference_range_high": 1.3, "flags": [] }, { "test_name": "Total Cholesterol", "test_code": "2093-3", "value": 215.0, "unit": "mg/dL", "reference_range_low": 0.0, "reference_range_high": 200.0, "flags": ["H"] } ], "critical_values": [], "panel_name": "Basic Metabolic Panel + Lipids", "fasting_status": "fasting", "collection_date": "2025-10-29T09:00:00Z", "confidence": { "extraction_confidence": 0.96, "model_confidence": 0.92, "data_quality": 0.98 }, "abnormal_count": 2, "critical_count": 0 } def create_sample_model_outputs() -> list: """Create sample model outputs for testing""" return [ { "model_name": "Bio_ClinicalBERT", "domain": "clinical_notes", "result": { "summary": "Analysis suggests normal baseline clinical parameters with minor metabolic considerations", "confidence": 0.87 } }, { "model_name": "MedGemma 27B", "domain": "general", "result": { "analysis": "Comprehensive medical review indicates overall satisfactory health status with attention to glucose and lipid management", "confidence": 0.85 } } ] async def test_ecg_synthesis(): """Test ECG synthesis - clinician and patient summaries""" print("\n" + "="*80) print("TEST 1: ECG SYNTHESIS") print("="*80) synthesis_service = get_synthesis_service() ecg_data = create_sample_ecg_data() model_outputs = create_sample_model_outputs() # Test clinician summary print("\n[1A] Clinician Summary - ECG") print("-" * 80) result = await synthesis_service.synthesize_clinical_summary( modality="ECG", structured_data=ecg_data, model_outputs=model_outputs, summary_type="clinician", user_id="test-user-001" ) print(f"Synthesis ID: {result['synthesis_id']}") print(f"Risk Level: {result['risk_level']}") print(f"Requires Review: {result['requires_review']}") print(f"Overall Confidence: {result['confidence_scores']['overall_confidence']*100:.1f}%") print(f"\nNarrative:\n{result['narrative'][:500]}...") print(f"\nRecommendations: {len(result['recommendations'])} items") for rec in result['recommendations'][:3]: print(f" - [{rec['priority']}] {rec['recommendation']}") # Test patient summary print("\n[1B] Patient Summary - ECG") print("-" * 80) result_patient = await synthesis_service.synthesize_clinical_summary( modality="ECG", structured_data=ecg_data, model_outputs=model_outputs, summary_type="patient", user_id="test-user-001" ) print(f"Narrative:\n{result_patient['narrative'][:500]}...") return True async def test_radiology_synthesis(): """Test radiology synthesis""" print("\n" + "="*80) print("TEST 2: RADIOLOGY SYNTHESIS") print("="*80) synthesis_service = get_synthesis_service() rad_data = create_sample_radiology_data() model_outputs = create_sample_model_outputs() # Test clinician summary print("\n[2A] Clinician Summary - Radiology") print("-" * 80) result = await synthesis_service.synthesize_clinical_summary( modality="radiology", structured_data=rad_data, model_outputs=model_outputs, summary_type="clinician", user_id="test-user-002" ) print(f"Synthesis ID: {result['synthesis_id']}") print(f"Risk Level: {result['risk_level']}") print(f"Overall Confidence: {result['confidence_scores']['overall_confidence']*100:.1f}%") print(f"\nNarrative:\n{result['narrative'][:500]}...") return True async def test_laboratory_synthesis(): """Test laboratory results synthesis""" print("\n" + "="*80) print("TEST 3: LABORATORY SYNTHESIS") print("="*80) synthesis_service = get_synthesis_service() lab_data = create_sample_laboratory_data() model_outputs = create_sample_model_outputs() # Test clinician summary print("\n[3A] Clinician Summary - Laboratory") print("-" * 80) result = await synthesis_service.synthesize_clinical_summary( modality="laboratory", structured_data=lab_data, model_outputs=model_outputs, summary_type="clinician", user_id="test-user-003" ) print(f"Synthesis ID: {result['synthesis_id']}") print(f"Risk Level: {result['risk_level']}") print(f"Abnormal Tests: {lab_data['abnormal_count']}") print(f"Overall Confidence: {result['confidence_scores']['overall_confidence']*100:.1f}%") print(f"\nNarrative:\n{result['narrative'][:500]}...") # Test patient summary print("\n[3B] Patient Summary - Laboratory") print("-" * 80) result_patient = await synthesis_service.synthesize_clinical_summary( modality="laboratory", structured_data=lab_data, model_outputs=model_outputs, summary_type="patient", user_id="test-user-003" ) print(f"Narrative:\n{result_patient['narrative'][:500]}...") return True async def test_multi_modal_synthesis(): """Test multi-modal synthesis combining multiple modalities""" print("\n" + "="*80) print("TEST 4: MULTI-MODAL SYNTHESIS") print("="*80) synthesis_service = get_synthesis_service() modalities_data = { "ECG": create_sample_ecg_data(), "radiology": create_sample_radiology_data(), "laboratory": create_sample_laboratory_data() } print("\n[4A] Multi-Modal Clinician Summary") print("-" * 80) result = await synthesis_service.synthesize_multi_modal( modalities_data=modalities_data, summary_type="clinician", user_id="test-user-004" ) print(f"Modalities Combined: {', '.join(result['modalities'])}") print(f"Overall Confidence: {result['overall_confidence']*100:.1f}%") print(f"Risk Level: {result['risk_level']}") print(f"\nNarrative:\n{result['narrative'][:500]}...") print(f"\nRecommendations: {len(result['recommendations'])} items") return True async def test_confidence_thresholds(): """Test confidence-based review requirements""" print("\n" + "="*80) print("TEST 5: CONFIDENCE THRESHOLD TESTING") print("="*80) synthesis_service = get_synthesis_service() # Test high confidence (auto-approve) high_conf_data = create_sample_ecg_data() high_conf_data['confidence'] = { "extraction_confidence": 0.95, "model_confidence": 0.92, "data_quality": 0.94 } print("\n[5A] High Confidence Case (≥0.85)") print("-" * 80) result_high = await synthesis_service.synthesize_clinical_summary( modality="ECG", structured_data=high_conf_data, model_outputs=[], summary_type="clinician", user_id="test-user-005" ) print(f"Overall Confidence: {result_high['confidence_scores']['overall_confidence']*100:.1f}%") print(f"Requires Review: {result_high['requires_review']}") print(f"Expected: False (auto-approved)") # Test moderate confidence (review required) mod_conf_data = create_sample_ecg_data() mod_conf_data['confidence'] = { "extraction_confidence": 0.75, "model_confidence": 0.72, "data_quality": 0.78 } print("\n[5B] Moderate Confidence Case (0.60-0.85)") print("-" * 80) result_mod = await synthesis_service.synthesize_clinical_summary( modality="ECG", structured_data=mod_conf_data, model_outputs=[], summary_type="clinician", user_id="test-user-005" ) print(f"Overall Confidence: {result_mod['confidence_scores']['overall_confidence']*100:.1f}%") print(f"Requires Review: {result_mod['requires_review']}") print(f"Expected: True (review required)") # Test low confidence (manual review required) low_conf_data = create_sample_ecg_data() low_conf_data['confidence'] = { "extraction_confidence": 0.55, "model_confidence": 0.50, "data_quality": 0.58 } print("\n[5C] Low Confidence Case (<0.60)") print("-" * 80) result_low = await synthesis_service.synthesize_clinical_summary( modality="ECG", structured_data=low_conf_data, model_outputs=[], summary_type="clinician", user_id="test-user-005" ) print(f"Overall Confidence: {result_low['confidence_scores']['overall_confidence']*100:.1f}%") print(f"Requires Review: {result_low['requires_review']}") print(f"Risk Level: {result_low['risk_level']}") print(f"Expected: True (manual review required), Risk: high") return True async def test_synthesis_statistics(): """Test synthesis service statistics tracking""" print("\n" + "="*80) print("TEST 6: SYNTHESIS STATISTICS") print("="*80) synthesis_service = get_synthesis_service() stats = synthesis_service.get_synthesis_statistics() print(f"\nTotal Syntheses: {stats['total_syntheses']}") print(f"Average Confidence: {stats['average_confidence']*100:.1f}%") print(f"Review Required: {stats['review_required_percentage']:.1f}%") print(f"Average Generation Time: {stats['average_generation_time']:.2f} seconds") if stats['by_modality']: print(f"\nBy Modality:") for modality, count in stats['by_modality'].items(): print(f" - {modality}: {count}") if stats['by_risk_level']: print(f"\nBy Risk Level:") for risk, count in stats['by_risk_level'].items(): print(f" - {risk}: {count}") return True async def run_all_tests(): """Run all synthesis service tests""" print("\n" + "="*80) print("MEDICAL SYNTHESIS SERVICE - COMPREHENSIVE TEST SUITE") print("Testing MedGemma Prompt Templates & Clinical Synthesis") print("="*80) print(f"Start Time: {datetime.utcnow().isoformat()}") tests = [ ("ECG Synthesis", test_ecg_synthesis), ("Radiology Synthesis", test_radiology_synthesis), ("Laboratory Synthesis", test_laboratory_synthesis), ("Multi-Modal Synthesis", test_multi_modal_synthesis), ("Confidence Thresholds", test_confidence_thresholds), ("Synthesis Statistics", test_synthesis_statistics) ] results = [] for test_name, test_func in tests: try: success = await test_func() results.append((test_name, "PASS" if success else "FAIL")) except Exception as e: print(f"\n[ERROR] {test_name} failed: {str(e)}") import traceback traceback.print_exc() results.append((test_name, "FAIL")) # Print summary print("\n" + "="*80) print("TEST SUMMARY") print("="*80) for test_name, status in results: status_symbol = "✓" if status == "PASS" else "✗" print(f"{status_symbol} {test_name}: {status}") passed = sum(1 for _, status in results if status == "PASS") total = len(results) print(f"\nTotal: {passed}/{total} tests passed ({passed/total*100:.1f}%)") print(f"End Time: {datetime.utcnow().isoformat()}") print("="*80) if __name__ == "__main__": asyncio.run(run_all_tests())