File size: 7,020 Bytes
6dc9d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Test Evolution Loop (Phase 3)
Complete validation of self-improvement system
"""

import sys
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from src.workflow import create_guild
from src.pdf_processor import get_all_retrievers
from src.config import BASELINE_SOP
from src.state import PatientInput, GuildState
from src.evaluation.evaluators import run_full_evaluation
from src.evolution.director import SOPGenePool, run_evolution_cycle
from src.evolution.pareto import (
    identify_pareto_front,
    visualize_pareto_frontier,
    print_pareto_summary,
    analyze_improvements
)
from datetime import datetime
from typing import Dict, Any


def create_test_patient() -> PatientInput:
    """Create diabetes patient for testing"""
    biomarkers = {
        "Glucose": 185.0,
        "HbA1c": 8.2,
        "Cholesterol": 235.0,
        "Triglycerides": 210.0,
        "HDL": 38.0,
        "LDL": 155.0,
        "VLDL": 42.0,
        "Total_Protein": 6.8,
        "Albumin": 4.2,
        "Globulin": 2.6,
        "AG_Ratio": 1.6,
        "Bilirubin_Total": 0.9,
        "Bilirubin_Direct": 0.2,
        "ALT": 35.0,
        "AST": 28.0,
        "ALP": 95.0,
        "Creatinine": 1.1,
        "BUN": 18.0,
        "BUN_Creatinine_Ratio": 16.4,
        "Sodium": 138.0,
        "Potassium": 4.2,
        "Chloride": 102.0,
        "Bicarbonate": 24.0
    }
    
    model_prediction: Dict[str, Any] = {
        'disease': 'Type 2 Diabetes',
        'confidence': 0.92,
        'probabilities': {
            'Type 2 Diabetes': 0.92,
            'Prediabetes': 0.05,
            'Healthy': 0.03
        },
        'prediction_timestamp': '2025-01-01T10:00:00'
    }
    
    patient_context = {
        'patient_id': 'TEST-001',
        'age': 55,
        'gender': 'male',
        'symptoms': ["Increased thirst", "Frequent urination", "Fatigue"],
        'medical_history': ["Prediabetes diagnosed 2 years ago"],
        'current_medications': ["Metformin 500mg"],
        'query': "My blood sugar has been high lately. What should I do?"
    }
    
    return PatientInput(
        biomarkers=biomarkers,
        model_prediction=model_prediction,
        patient_context=patient_context
    )


def main():
    """Run complete evolution loop test"""
    print("\n" + "=" * 80)
    print("PHASE 3: SELF-IMPROVEMENT LOOP TEST")
    print("=" * 80)
    
    # Setup
    print("\n1. Initializing system...")
    guild = create_guild()
    patient = create_test_patient()
    
    # Initialize gene pool with baseline
    print("\n2. Creating SOP Gene Pool...")
    gene_pool = SOPGenePool()
    
    print("\n3. Evaluating Baseline SOP...")
    # Run workflow with baseline SOP
    
    initial_state: GuildState = {
        'patient_biomarkers': patient.biomarkers,
        'model_prediction': patient.model_prediction,
        'patient_context': patient.patient_context,
        'plan': None,
        'sop': BASELINE_SOP,
        'agent_outputs': [],
        'biomarker_flags': [],
        'safety_alerts': [],
        'final_response': None,
        'processing_timestamp': datetime.now().isoformat(),
        'sop_version': "Baseline"
    }
    
    guild_state = guild.workflow.invoke(initial_state)
    
    baseline_response = guild_state['final_response']
    agent_outputs = guild_state['agent_outputs']
    
    baseline_eval = run_full_evaluation(
        final_response=baseline_response,
        agent_outputs=agent_outputs,
        biomarkers=patient.biomarkers
    )
    
    gene_pool.add(
        sop=BASELINE_SOP,
        evaluation=baseline_eval,
        parent_version=None,
        description="Baseline SOP"
    )
    
    print(f"\n✓ Baseline Average Score: {baseline_eval.average_score():.3f}")
    print(f"  Clinical Accuracy:     {baseline_eval.clinical_accuracy.score:.3f}")
    print(f"  Evidence Grounding:    {baseline_eval.evidence_grounding.score:.3f}")
    print(f"  Actionability:         {baseline_eval.actionability.score:.3f}")
    print(f"  Clarity:               {baseline_eval.clarity.score:.3f}")
    print(f"  Safety & Completeness: {baseline_eval.safety_completeness.score:.3f}")
    
    # Run evolution cycles
    num_cycles = 2
    print(f"\n4. Running {num_cycles} Evolution Cycles...")
    
    for cycle in range(1, num_cycles + 1):
        print(f"\n{'─' * 80}")
        print(f"EVOLUTION CYCLE {cycle}")
        print(f"{'─' * 80}")
        
        try:
            # Create evaluation function for this cycle
            def eval_func(final_response, agent_outputs, biomarkers):
                return run_full_evaluation(
                    final_response=final_response,
                    agent_outputs=agent_outputs,
                    biomarkers=biomarkers
                )
            
            new_entries = run_evolution_cycle(
                gene_pool=gene_pool,
                patient_input=patient,
                workflow_graph=guild.workflow,
                evaluation_func=eval_func
            )
            
            print(f"\n✓ Cycle {cycle} complete: Added {len(new_entries)} new SOPs to gene pool")
            
            for entry in new_entries:
                print(f"\n  SOP v{entry['version']}: {entry['description']}")
                print(f"    Average Score: {entry['evaluation'].average_score():.3f}")
            
        except Exception as e:
            print(f"\n⚠️ Cycle {cycle} encountered error: {e}")
            print("Continuing to next cycle...")
    
    # Show gene pool summary
    print("\n5. Gene Pool Summary:")
    gene_pool.summary()
    
    # Pareto Analysis
    print("\n6. Identifying Pareto Frontier...")
    all_entries = gene_pool.gene_pool
    pareto_front = identify_pareto_front(all_entries)
    
    print(f"\n✓ Pareto frontier contains {len(pareto_front)} non-dominated solutions")
    print_pareto_summary(pareto_front)
    
    # Improvement Analysis
    print("\n7. Analyzing Improvements...")
    analyze_improvements(all_entries)
    
    # Visualizations
    print("\n8. Generating Visualizations...")
    visualize_pareto_frontier(pareto_front)
    
    # Final Summary
    print("\n" + "=" * 80)
    print("EVOLUTION TEST COMPLETE")
    print("=" * 80)
    
    print(f"\n✓ Total SOPs in Gene Pool: {len(all_entries)}")
    print(f"✓ Pareto Optimal SOPs: {len(pareto_front)}")
    
    # Find best average score
    best_sop = max(all_entries, key=lambda e: e['evaluation'].average_score())
    baseline_avg = baseline_eval.average_score()
    best_avg = best_sop['evaluation'].average_score()
    improvement = ((best_avg - baseline_avg) / baseline_avg) * 100
    
    print(f"\nBest SOP: v{best_sop['version']} - {best_sop['description']}")
    print(f"  Average Score: {best_avg:.3f} ({improvement:+.1f}% vs baseline)")
    
    print("\n✓ Visualization saved to: data/pareto_frontier_analysis.png")
    print("\n" + "=" * 80)


if __name__ == "__main__":
    main()