Spaces:
Sleeping
Sleeping
| """ | |
| Test Suite for Clinical Synthesis Service | |
| Tests MedGemma prompt templates and synthesis functionality | |
| Author: MiniMax Agent | |
| Date: 2025-10-29 | |
| """ | |
| import sys | |
| import asyncio | |
| from datetime import datetime | |
| from typing import Dict, Any | |
| # Add backend to path | |
| sys.path.insert(0, '/workspace/medical-ai-platform/backend') | |
| from clinical_synthesis_service import get_synthesis_service | |
| from medical_schemas import ECGAnalysis, RadiologyAnalysis, LaboratoryResults, ClinicalNotesAnalysis | |
| def create_sample_ecg_data() -> Dict[str, Any]: | |
| """Create sample ECG structured data for testing""" | |
| return { | |
| "metadata": { | |
| "document_id": "ecg-test-001", | |
| "source_type": "ECG", | |
| "document_date": "2025-10-29T10:00:00Z", | |
| "facility": "Test Medical Center", | |
| "data_completeness": 0.95 | |
| }, | |
| "signal_data": { | |
| "lead_names": ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"], | |
| "sampling_rate_hz": 500, | |
| "signal_arrays": { | |
| "I": [0.5] * 5000, | |
| "II": [0.8] * 5000, | |
| "III": [0.3] * 5000, | |
| "aVR": [-0.6] * 5000, | |
| "aVL": [0.4] * 5000, | |
| "aVF": [0.6] * 5000, | |
| "V1": [0.2] * 5000, | |
| "V2": [0.4] * 5000, | |
| "V3": [0.6] * 5000, | |
| "V4": [0.8] * 5000, | |
| "V5": [0.9] * 5000, | |
| "V6": [0.8] * 5000 | |
| }, | |
| "duration_seconds": 10.0, | |
| "num_samples": 5000 | |
| }, | |
| "intervals": { | |
| "pr_ms": 165.0, | |
| "qrs_ms": 92.0, | |
| "qt_ms": 390.0, | |
| "qtc_ms": 425.0, | |
| "rr_ms": 850.0 | |
| }, | |
| "rhythm_classification": { | |
| "primary_rhythm": "Normal Sinus Rhythm", | |
| "rhythm_confidence": 0.92, | |
| "arrhythmia_types": [], | |
| "heart_rate_bpm": 71, | |
| "heart_rate_regularity": "regular" | |
| }, | |
| "arrhythmia_probabilities": { | |
| "normal_rhythm": 0.92, | |
| "atrial_fibrillation": 0.02, | |
| "atrial_flutter": 0.01, | |
| "ventricular_tachycardia": 0.01, | |
| "heart_block": 0.01, | |
| "premature_beats": 0.03 | |
| }, | |
| "derived_features": { | |
| "st_elevation_mm": {}, | |
| "st_depression_mm": {}, | |
| "t_wave_abnormalities": [], | |
| "q_wave_indicators": [], | |
| "axis_deviation": "normal" | |
| }, | |
| "confidence": { | |
| "extraction_confidence": 0.94, | |
| "model_confidence": 0.89, | |
| "data_quality": 0.95 | |
| } | |
| } | |
| def create_sample_radiology_data() -> Dict[str, Any]: | |
| """Create sample radiology structured data for testing""" | |
| return { | |
| "metadata": { | |
| "document_id": "rad-test-001", | |
| "source_type": "radiology", | |
| "document_date": "2025-10-29T11:00:00Z", | |
| "facility": "Imaging Center", | |
| "data_completeness": 0.90 | |
| }, | |
| "image_references": [ | |
| { | |
| "image_id": "img-001", | |
| "modality": "CT", | |
| "body_part": "Chest", | |
| "view_orientation": "Axial", | |
| "slice_thickness_mm": 2.5, | |
| "resolution": {"width": 512, "height": 512} | |
| } | |
| ], | |
| "findings": { | |
| "findings_text": "Chest CT shows clear lungs bilaterally. No pleural effusion. Heart size within normal limits. No mediastinal lymphadenopathy. Bones appear intact without acute fracture.", | |
| "impression_text": "No acute cardiopulmonary abnormality. Unremarkable chest CT.", | |
| "critical_findings": [], | |
| "incidental_findings": ["Mild degenerative changes in thoracic spine"], | |
| "comparison_prior": "None available", | |
| "technique_description": "Contrast-enhanced CT chest with IV contrast" | |
| }, | |
| "segmentations": [], | |
| "metrics": { | |
| "organ_volumes": {"lung_left": 2800, "lung_right": 2950, "heart": 680}, | |
| "lesion_measurements": [], | |
| "enhancement_patterns": [], | |
| "calcification_scores": {}, | |
| "tissue_density": {} | |
| }, | |
| "confidence": { | |
| "extraction_confidence": 0.88, | |
| "model_confidence": 0.85, | |
| "data_quality": 0.92 | |
| }, | |
| "criticality_level": "routine", | |
| "follow_up_recommendations": [] | |
| } | |
| def create_sample_laboratory_data() -> Dict[str, Any]: | |
| """Create sample laboratory results for testing""" | |
| return { | |
| "metadata": { | |
| "document_id": "lab-test-001", | |
| "source_type": "laboratory", | |
| "document_date": "2025-10-29T09:00:00Z", | |
| "facility": "Test Lab", | |
| "data_completeness": 0.98 | |
| }, | |
| "tests": [ | |
| { | |
| "test_name": "Glucose", | |
| "test_code": "2345-7", | |
| "value": 105.0, | |
| "unit": "mg/dL", | |
| "reference_range_low": 70.0, | |
| "reference_range_high": 99.0, | |
| "flags": ["H"] | |
| }, | |
| { | |
| "test_name": "Hemoglobin", | |
| "test_code": "718-7", | |
| "value": 14.5, | |
| "unit": "g/dL", | |
| "reference_range_low": 13.5, | |
| "reference_range_high": 17.5, | |
| "flags": [] | |
| }, | |
| { | |
| "test_name": "Creatinine", | |
| "test_code": "2160-0", | |
| "value": 1.1, | |
| "unit": "mg/dL", | |
| "reference_range_low": 0.7, | |
| "reference_range_high": 1.3, | |
| "flags": [] | |
| }, | |
| { | |
| "test_name": "Total Cholesterol", | |
| "test_code": "2093-3", | |
| "value": 215.0, | |
| "unit": "mg/dL", | |
| "reference_range_low": 0.0, | |
| "reference_range_high": 200.0, | |
| "flags": ["H"] | |
| } | |
| ], | |
| "critical_values": [], | |
| "panel_name": "Basic Metabolic Panel + Lipids", | |
| "fasting_status": "fasting", | |
| "collection_date": "2025-10-29T09:00:00Z", | |
| "confidence": { | |
| "extraction_confidence": 0.96, | |
| "model_confidence": 0.92, | |
| "data_quality": 0.98 | |
| }, | |
| "abnormal_count": 2, | |
| "critical_count": 0 | |
| } | |
| def create_sample_model_outputs() -> list: | |
| """Create sample model outputs for testing""" | |
| return [ | |
| { | |
| "model_name": "Bio_ClinicalBERT", | |
| "domain": "clinical_notes", | |
| "result": { | |
| "summary": "Analysis suggests normal baseline clinical parameters with minor metabolic considerations", | |
| "confidence": 0.87 | |
| } | |
| }, | |
| { | |
| "model_name": "MedGemma 27B", | |
| "domain": "general", | |
| "result": { | |
| "analysis": "Comprehensive medical review indicates overall satisfactory health status with attention to glucose and lipid management", | |
| "confidence": 0.85 | |
| } | |
| } | |
| ] | |
| async def test_ecg_synthesis(): | |
| """Test ECG synthesis - clinician and patient summaries""" | |
| print("\n" + "="*80) | |
| print("TEST 1: ECG SYNTHESIS") | |
| print("="*80) | |
| synthesis_service = get_synthesis_service() | |
| ecg_data = create_sample_ecg_data() | |
| model_outputs = create_sample_model_outputs() | |
| # Test clinician summary | |
| print("\n[1A] Clinician Summary - ECG") | |
| print("-" * 80) | |
| result = await synthesis_service.synthesize_clinical_summary( | |
| modality="ECG", | |
| structured_data=ecg_data, | |
| model_outputs=model_outputs, | |
| summary_type="clinician", | |
| user_id="test-user-001" | |
| ) | |
| print(f"Synthesis ID: {result['synthesis_id']}") | |
| print(f"Risk Level: {result['risk_level']}") | |
| print(f"Requires Review: {result['requires_review']}") | |
| print(f"Overall Confidence: {result['confidence_scores']['overall_confidence']*100:.1f}%") | |
| print(f"\nNarrative:\n{result['narrative'][:500]}...") | |
| print(f"\nRecommendations: {len(result['recommendations'])} items") | |
| for rec in result['recommendations'][:3]: | |
| print(f" - [{rec['priority']}] {rec['recommendation']}") | |
| # Test patient summary | |
| print("\n[1B] Patient Summary - ECG") | |
| print("-" * 80) | |
| result_patient = await synthesis_service.synthesize_clinical_summary( | |
| modality="ECG", | |
| structured_data=ecg_data, | |
| model_outputs=model_outputs, | |
| summary_type="patient", | |
| user_id="test-user-001" | |
| ) | |
| print(f"Narrative:\n{result_patient['narrative'][:500]}...") | |
| return True | |
| async def test_radiology_synthesis(): | |
| """Test radiology synthesis""" | |
| print("\n" + "="*80) | |
| print("TEST 2: RADIOLOGY SYNTHESIS") | |
| print("="*80) | |
| synthesis_service = get_synthesis_service() | |
| rad_data = create_sample_radiology_data() | |
| model_outputs = create_sample_model_outputs() | |
| # Test clinician summary | |
| print("\n[2A] Clinician Summary - Radiology") | |
| print("-" * 80) | |
| result = await synthesis_service.synthesize_clinical_summary( | |
| modality="radiology", | |
| structured_data=rad_data, | |
| model_outputs=model_outputs, | |
| summary_type="clinician", | |
| user_id="test-user-002" | |
| ) | |
| print(f"Synthesis ID: {result['synthesis_id']}") | |
| print(f"Risk Level: {result['risk_level']}") | |
| print(f"Overall Confidence: {result['confidence_scores']['overall_confidence']*100:.1f}%") | |
| print(f"\nNarrative:\n{result['narrative'][:500]}...") | |
| return True | |
| async def test_laboratory_synthesis(): | |
| """Test laboratory results synthesis""" | |
| print("\n" + "="*80) | |
| print("TEST 3: LABORATORY SYNTHESIS") | |
| print("="*80) | |
| synthesis_service = get_synthesis_service() | |
| lab_data = create_sample_laboratory_data() | |
| model_outputs = create_sample_model_outputs() | |
| # Test clinician summary | |
| print("\n[3A] Clinician Summary - Laboratory") | |
| print("-" * 80) | |
| result = await synthesis_service.synthesize_clinical_summary( | |
| modality="laboratory", | |
| structured_data=lab_data, | |
| model_outputs=model_outputs, | |
| summary_type="clinician", | |
| user_id="test-user-003" | |
| ) | |
| print(f"Synthesis ID: {result['synthesis_id']}") | |
| print(f"Risk Level: {result['risk_level']}") | |
| print(f"Abnormal Tests: {lab_data['abnormal_count']}") | |
| print(f"Overall Confidence: {result['confidence_scores']['overall_confidence']*100:.1f}%") | |
| print(f"\nNarrative:\n{result['narrative'][:500]}...") | |
| # Test patient summary | |
| print("\n[3B] Patient Summary - Laboratory") | |
| print("-" * 80) | |
| result_patient = await synthesis_service.synthesize_clinical_summary( | |
| modality="laboratory", | |
| structured_data=lab_data, | |
| model_outputs=model_outputs, | |
| summary_type="patient", | |
| user_id="test-user-003" | |
| ) | |
| print(f"Narrative:\n{result_patient['narrative'][:500]}...") | |
| return True | |
| async def test_multi_modal_synthesis(): | |
| """Test multi-modal synthesis combining multiple modalities""" | |
| print("\n" + "="*80) | |
| print("TEST 4: MULTI-MODAL SYNTHESIS") | |
| print("="*80) | |
| synthesis_service = get_synthesis_service() | |
| modalities_data = { | |
| "ECG": create_sample_ecg_data(), | |
| "radiology": create_sample_radiology_data(), | |
| "laboratory": create_sample_laboratory_data() | |
| } | |
| print("\n[4A] Multi-Modal Clinician Summary") | |
| print("-" * 80) | |
| result = await synthesis_service.synthesize_multi_modal( | |
| modalities_data=modalities_data, | |
| summary_type="clinician", | |
| user_id="test-user-004" | |
| ) | |
| print(f"Modalities Combined: {', '.join(result['modalities'])}") | |
| print(f"Overall Confidence: {result['overall_confidence']*100:.1f}%") | |
| print(f"Risk Level: {result['risk_level']}") | |
| print(f"\nNarrative:\n{result['narrative'][:500]}...") | |
| print(f"\nRecommendations: {len(result['recommendations'])} items") | |
| return True | |
| async def test_confidence_thresholds(): | |
| """Test confidence-based review requirements""" | |
| print("\n" + "="*80) | |
| print("TEST 5: CONFIDENCE THRESHOLD TESTING") | |
| print("="*80) | |
| synthesis_service = get_synthesis_service() | |
| # Test high confidence (auto-approve) | |
| high_conf_data = create_sample_ecg_data() | |
| high_conf_data['confidence'] = { | |
| "extraction_confidence": 0.95, | |
| "model_confidence": 0.92, | |
| "data_quality": 0.94 | |
| } | |
| print("\n[5A] High Confidence Case (≥0.85)") | |
| print("-" * 80) | |
| result_high = await synthesis_service.synthesize_clinical_summary( | |
| modality="ECG", | |
| structured_data=high_conf_data, | |
| model_outputs=[], | |
| summary_type="clinician", | |
| user_id="test-user-005" | |
| ) | |
| print(f"Overall Confidence: {result_high['confidence_scores']['overall_confidence']*100:.1f}%") | |
| print(f"Requires Review: {result_high['requires_review']}") | |
| print(f"Expected: False (auto-approved)") | |
| # Test moderate confidence (review required) | |
| mod_conf_data = create_sample_ecg_data() | |
| mod_conf_data['confidence'] = { | |
| "extraction_confidence": 0.75, | |
| "model_confidence": 0.72, | |
| "data_quality": 0.78 | |
| } | |
| print("\n[5B] Moderate Confidence Case (0.60-0.85)") | |
| print("-" * 80) | |
| result_mod = await synthesis_service.synthesize_clinical_summary( | |
| modality="ECG", | |
| structured_data=mod_conf_data, | |
| model_outputs=[], | |
| summary_type="clinician", | |
| user_id="test-user-005" | |
| ) | |
| print(f"Overall Confidence: {result_mod['confidence_scores']['overall_confidence']*100:.1f}%") | |
| print(f"Requires Review: {result_mod['requires_review']}") | |
| print(f"Expected: True (review required)") | |
| # Test low confidence (manual review required) | |
| low_conf_data = create_sample_ecg_data() | |
| low_conf_data['confidence'] = { | |
| "extraction_confidence": 0.55, | |
| "model_confidence": 0.50, | |
| "data_quality": 0.58 | |
| } | |
| print("\n[5C] Low Confidence Case (<0.60)") | |
| print("-" * 80) | |
| result_low = await synthesis_service.synthesize_clinical_summary( | |
| modality="ECG", | |
| structured_data=low_conf_data, | |
| model_outputs=[], | |
| summary_type="clinician", | |
| user_id="test-user-005" | |
| ) | |
| print(f"Overall Confidence: {result_low['confidence_scores']['overall_confidence']*100:.1f}%") | |
| print(f"Requires Review: {result_low['requires_review']}") | |
| print(f"Risk Level: {result_low['risk_level']}") | |
| print(f"Expected: True (manual review required), Risk: high") | |
| return True | |
| async def test_synthesis_statistics(): | |
| """Test synthesis service statistics tracking""" | |
| print("\n" + "="*80) | |
| print("TEST 6: SYNTHESIS STATISTICS") | |
| print("="*80) | |
| synthesis_service = get_synthesis_service() | |
| stats = synthesis_service.get_synthesis_statistics() | |
| print(f"\nTotal Syntheses: {stats['total_syntheses']}") | |
| print(f"Average Confidence: {stats['average_confidence']*100:.1f}%") | |
| print(f"Review Required: {stats['review_required_percentage']:.1f}%") | |
| print(f"Average Generation Time: {stats['average_generation_time']:.2f} seconds") | |
| if stats['by_modality']: | |
| print(f"\nBy Modality:") | |
| for modality, count in stats['by_modality'].items(): | |
| print(f" - {modality}: {count}") | |
| if stats['by_risk_level']: | |
| print(f"\nBy Risk Level:") | |
| for risk, count in stats['by_risk_level'].items(): | |
| print(f" - {risk}: {count}") | |
| return True | |
| async def run_all_tests(): | |
| """Run all synthesis service tests""" | |
| print("\n" + "="*80) | |
| print("MEDICAL SYNTHESIS SERVICE - COMPREHENSIVE TEST SUITE") | |
| print("Testing MedGemma Prompt Templates & Clinical Synthesis") | |
| print("="*80) | |
| print(f"Start Time: {datetime.utcnow().isoformat()}") | |
| tests = [ | |
| ("ECG Synthesis", test_ecg_synthesis), | |
| ("Radiology Synthesis", test_radiology_synthesis), | |
| ("Laboratory Synthesis", test_laboratory_synthesis), | |
| ("Multi-Modal Synthesis", test_multi_modal_synthesis), | |
| ("Confidence Thresholds", test_confidence_thresholds), | |
| ("Synthesis Statistics", test_synthesis_statistics) | |
| ] | |
| results = [] | |
| for test_name, test_func in tests: | |
| try: | |
| success = await test_func() | |
| results.append((test_name, "PASS" if success else "FAIL")) | |
| except Exception as e: | |
| print(f"\n[ERROR] {test_name} failed: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| results.append((test_name, "FAIL")) | |
| # Print summary | |
| print("\n" + "="*80) | |
| print("TEST SUMMARY") | |
| print("="*80) | |
| for test_name, status in results: | |
| status_symbol = "✓" if status == "PASS" else "✗" | |
| print(f"{status_symbol} {test_name}: {status}") | |
| passed = sum(1 for _, status in results if status == "PASS") | |
| total = len(results) | |
| print(f"\nTotal: {passed}/{total} tests passed ({passed/total*100:.1f}%)") | |
| print(f"End Time: {datetime.utcnow().isoformat()}") | |
| print("="*80) | |
| if __name__ == "__main__": | |
| asyncio.run(run_all_tests()) | |