File size: 6,601 Bytes
6dc9d46
 
 
 
 
 
 
 
 
 
 
 
696f787
 
 
6dc9d46
 
 
 
696f787
6dc9d46
 
696f787
6dc9d46
696f787
 
6dc9d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9659593
6dc9d46
696f787
 
9659593
 
 
 
6dc9d46
696f787
6dc9d46
9659593
 
 
 
 
 
 
6dc9d46
696f787
9659593
6dc9d46
 
 
 
 
 
 
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
696f787
6dc9d46
9659593
 
 
 
 
 
 
 
 
 
 
6dc9d46
696f787
6dc9d46
696f787
9659593
 
696f787
6dc9d46
9659593
6dc9d46
696f787
9659593
696f787
6dc9d46
 
 
 
 
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
 
 
9659593
6dc9d46
696f787
6dc9d46
9659593
6dc9d46
696f787
6dc9d46
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
696f787
6dc9d46
9659593
6dc9d46
9659593
6dc9d46
696f787
6dc9d46
 
696f787
6dc9d46
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
Test Evolution Loop (Phase 3)
Complete validation of self-improvement system
"""

import sys
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from datetime import datetime
from typing import Any

from src.config import BASELINE_SOP
from src.evaluation.evaluators import run_full_evaluation
from src.evolution.director import SOPGenePool, run_evolution_cycle
from src.evolution.pareto import (
    analyze_improvements,
    identify_pareto_front,
    print_pareto_summary,
    visualize_pareto_frontier,
)
from src.state import GuildState, PatientInput
from src.workflow import create_guild


def create_test_patient() -> PatientInput:
    """Create diabetes patient for testing"""
    biomarkers = {
        "Glucose": 185.0,
        "HbA1c": 8.2,
        "Cholesterol": 235.0,
        "Triglycerides": 210.0,
        "HDL": 38.0,
        "LDL": 155.0,
        "VLDL": 42.0,
        "Total_Protein": 6.8,
        "Albumin": 4.2,
        "Globulin": 2.6,
        "AG_Ratio": 1.6,
        "Bilirubin_Total": 0.9,
        "Bilirubin_Direct": 0.2,
        "ALT": 35.0,
        "AST": 28.0,
        "ALP": 95.0,
        "Creatinine": 1.1,
        "BUN": 18.0,
        "BUN_Creatinine_Ratio": 16.4,
        "Sodium": 138.0,
        "Potassium": 4.2,
        "Chloride": 102.0,
        "Bicarbonate": 24.0,
    }

    model_prediction: dict[str, Any] = {
        "disease": "Type 2 Diabetes",
        "confidence": 0.92,
        "probabilities": {"Type 2 Diabetes": 0.92, "Prediabetes": 0.05, "Healthy": 0.03},
        "prediction_timestamp": "2025-01-01T10:00:00",
    }

    patient_context = {
        "patient_id": "TEST-001",
        "age": 55,
        "gender": "male",
        "symptoms": ["Increased thirst", "Frequent urination", "Fatigue"],
        "medical_history": ["Prediabetes diagnosed 2 years ago"],
        "current_medications": ["Metformin 500mg"],
        "query": "My blood sugar has been high lately. What should I do?",
    }

    return PatientInput(biomarkers=biomarkers, model_prediction=model_prediction, patient_context=patient_context)


def main():
    """Run complete evolution loop test"""
    print("\n" + "=" * 80)
    print("PHASE 3: SELF-IMPROVEMENT LOOP TEST")
    print("=" * 80)

    # Setup
    print("\n1. Initializing system...")
    guild = create_guild()
    patient = create_test_patient()

    # Initialize gene pool with baseline
    print("\n2. Creating SOP Gene Pool...")
    gene_pool = SOPGenePool()

    print("\n3. Evaluating Baseline SOP...")
    # Run workflow with baseline SOP

    initial_state: GuildState = {
        "patient_biomarkers": patient.biomarkers,
        "model_prediction": patient.model_prediction,
        "patient_context": patient.patient_context,
        "plan": None,
        "sop": BASELINE_SOP,
        "agent_outputs": [],
        "biomarker_flags": [],
        "safety_alerts": [],
        "final_response": None,
        "processing_timestamp": datetime.now().isoformat(),
        "sop_version": "Baseline",
    }

    guild_state = guild.workflow.invoke(initial_state)

    baseline_response = guild_state["final_response"]
    agent_outputs = guild_state["agent_outputs"]

    baseline_eval = run_full_evaluation(
        final_response=baseline_response, agent_outputs=agent_outputs, biomarkers=patient.biomarkers
    )

    gene_pool.add(sop=BASELINE_SOP, evaluation=baseline_eval, parent_version=None, description="Baseline SOP")

    print(f"\n✓ Baseline Average Score: {baseline_eval.average_score():.3f}")
    print(f"  Clinical Accuracy:     {baseline_eval.clinical_accuracy.score:.3f}")
    print(f"  Evidence Grounding:    {baseline_eval.evidence_grounding.score:.3f}")
    print(f"  Actionability:         {baseline_eval.actionability.score:.3f}")
    print(f"  Clarity:               {baseline_eval.clarity.score:.3f}")
    print(f"  Safety & Completeness: {baseline_eval.safety_completeness.score:.3f}")

    # Run evolution cycles
    num_cycles = 2
    print(f"\n4. Running {num_cycles} Evolution Cycles...")

    for cycle in range(1, num_cycles + 1):
        print(f"\n{'─' * 80}")
        print(f"EVOLUTION CYCLE {cycle}")
        print(f"{'─' * 80}")

        try:
            # Create evaluation function for this cycle
            def eval_func(final_response, agent_outputs, biomarkers):
                return run_full_evaluation(
                    final_response=final_response, agent_outputs=agent_outputs, biomarkers=biomarkers
                )

            new_entries = run_evolution_cycle(
                gene_pool=gene_pool, patient_input=patient, workflow_graph=guild.workflow, evaluation_func=eval_func
            )

            print(f"\n✓ Cycle {cycle} complete: Added {len(new_entries)} new SOPs to gene pool")

            for entry in new_entries:
                print(f"\n  SOP v{entry['version']}: {entry['description']}")
                print(f"    Average Score: {entry['evaluation'].average_score():.3f}")

        except Exception as e:
            print(f"\n⚠️ Cycle {cycle} encountered error: {e}")
            print("Continuing to next cycle...")

    # Show gene pool summary
    print("\n5. Gene Pool Summary:")
    gene_pool.summary()

    # Pareto Analysis
    print("\n6. Identifying Pareto Frontier...")
    all_entries = gene_pool.gene_pool
    pareto_front = identify_pareto_front(all_entries)

    print(f"\n✓ Pareto frontier contains {len(pareto_front)} non-dominated solutions")
    print_pareto_summary(pareto_front)

    # Improvement Analysis
    print("\n7. Analyzing Improvements...")
    analyze_improvements(all_entries)

    # Visualizations
    print("\n8. Generating Visualizations...")
    visualize_pareto_frontier(pareto_front)

    # Final Summary
    print("\n" + "=" * 80)
    print("EVOLUTION TEST COMPLETE")
    print("=" * 80)

    print(f"\n✓ Total SOPs in Gene Pool: {len(all_entries)}")
    print(f"✓ Pareto Optimal SOPs: {len(pareto_front)}")

    # Find best average score
    best_sop = max(all_entries, key=lambda e: e["evaluation"].average_score())
    baseline_avg = baseline_eval.average_score()
    best_avg = best_sop["evaluation"].average_score()
    improvement = ((best_avg - baseline_avg) / baseline_avg) * 100

    print(f"\nBest SOP: v{best_sop['version']} - {best_sop['description']}")
    print(f"  Average Score: {best_avg:.3f} ({improvement:+.1f}% vs baseline)")

    print("\n✓ Visualization saved to: data/pareto_frontier_analysis.png")
    print("\n" + "=" * 80)


if __name__ == "__main__":
    main()