Spaces:
Sleeping
Sleeping
File size: 6,601 Bytes
6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 9659593 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 9659593 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 696f787 9659593 696f787 6dc9d46 9659593 6dc9d46 696f787 9659593 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | """
Test Evolution Loop (Phase 3)
Complete validation of self-improvement system
"""
import sys
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from datetime import datetime
from typing import Any
from src.config import BASELINE_SOP
from src.evaluation.evaluators import run_full_evaluation
from src.evolution.director import SOPGenePool, run_evolution_cycle
from src.evolution.pareto import (
analyze_improvements,
identify_pareto_front,
print_pareto_summary,
visualize_pareto_frontier,
)
from src.state import GuildState, PatientInput
from src.workflow import create_guild
def create_test_patient() -> PatientInput:
"""Create diabetes patient for testing"""
biomarkers = {
"Glucose": 185.0,
"HbA1c": 8.2,
"Cholesterol": 235.0,
"Triglycerides": 210.0,
"HDL": 38.0,
"LDL": 155.0,
"VLDL": 42.0,
"Total_Protein": 6.8,
"Albumin": 4.2,
"Globulin": 2.6,
"AG_Ratio": 1.6,
"Bilirubin_Total": 0.9,
"Bilirubin_Direct": 0.2,
"ALT": 35.0,
"AST": 28.0,
"ALP": 95.0,
"Creatinine": 1.1,
"BUN": 18.0,
"BUN_Creatinine_Ratio": 16.4,
"Sodium": 138.0,
"Potassium": 4.2,
"Chloride": 102.0,
"Bicarbonate": 24.0,
}
model_prediction: dict[str, Any] = {
"disease": "Type 2 Diabetes",
"confidence": 0.92,
"probabilities": {"Type 2 Diabetes": 0.92, "Prediabetes": 0.05, "Healthy": 0.03},
"prediction_timestamp": "2025-01-01T10:00:00",
}
patient_context = {
"patient_id": "TEST-001",
"age": 55,
"gender": "male",
"symptoms": ["Increased thirst", "Frequent urination", "Fatigue"],
"medical_history": ["Prediabetes diagnosed 2 years ago"],
"current_medications": ["Metformin 500mg"],
"query": "My blood sugar has been high lately. What should I do?",
}
return PatientInput(biomarkers=biomarkers, model_prediction=model_prediction, patient_context=patient_context)
def main():
"""Run complete evolution loop test"""
print("\n" + "=" * 80)
print("PHASE 3: SELF-IMPROVEMENT LOOP TEST")
print("=" * 80)
# Setup
print("\n1. Initializing system...")
guild = create_guild()
patient = create_test_patient()
# Initialize gene pool with baseline
print("\n2. Creating SOP Gene Pool...")
gene_pool = SOPGenePool()
print("\n3. Evaluating Baseline SOP...")
# Run workflow with baseline SOP
initial_state: GuildState = {
"patient_biomarkers": patient.biomarkers,
"model_prediction": patient.model_prediction,
"patient_context": patient.patient_context,
"plan": None,
"sop": BASELINE_SOP,
"agent_outputs": [],
"biomarker_flags": [],
"safety_alerts": [],
"final_response": None,
"processing_timestamp": datetime.now().isoformat(),
"sop_version": "Baseline",
}
guild_state = guild.workflow.invoke(initial_state)
baseline_response = guild_state["final_response"]
agent_outputs = guild_state["agent_outputs"]
baseline_eval = run_full_evaluation(
final_response=baseline_response, agent_outputs=agent_outputs, biomarkers=patient.biomarkers
)
gene_pool.add(sop=BASELINE_SOP, evaluation=baseline_eval, parent_version=None, description="Baseline SOP")
print(f"\n✓ Baseline Average Score: {baseline_eval.average_score():.3f}")
print(f" Clinical Accuracy: {baseline_eval.clinical_accuracy.score:.3f}")
print(f" Evidence Grounding: {baseline_eval.evidence_grounding.score:.3f}")
print(f" Actionability: {baseline_eval.actionability.score:.3f}")
print(f" Clarity: {baseline_eval.clarity.score:.3f}")
print(f" Safety & Completeness: {baseline_eval.safety_completeness.score:.3f}")
# Run evolution cycles
num_cycles = 2
print(f"\n4. Running {num_cycles} Evolution Cycles...")
for cycle in range(1, num_cycles + 1):
print(f"\n{'─' * 80}")
print(f"EVOLUTION CYCLE {cycle}")
print(f"{'─' * 80}")
try:
# Create evaluation function for this cycle
def eval_func(final_response, agent_outputs, biomarkers):
return run_full_evaluation(
final_response=final_response, agent_outputs=agent_outputs, biomarkers=biomarkers
)
new_entries = run_evolution_cycle(
gene_pool=gene_pool, patient_input=patient, workflow_graph=guild.workflow, evaluation_func=eval_func
)
print(f"\n✓ Cycle {cycle} complete: Added {len(new_entries)} new SOPs to gene pool")
for entry in new_entries:
print(f"\n SOP v{entry['version']}: {entry['description']}")
print(f" Average Score: {entry['evaluation'].average_score():.3f}")
except Exception as e:
print(f"\n⚠️ Cycle {cycle} encountered error: {e}")
print("Continuing to next cycle...")
# Show gene pool summary
print("\n5. Gene Pool Summary:")
gene_pool.summary()
# Pareto Analysis
print("\n6. Identifying Pareto Frontier...")
all_entries = gene_pool.gene_pool
pareto_front = identify_pareto_front(all_entries)
print(f"\n✓ Pareto frontier contains {len(pareto_front)} non-dominated solutions")
print_pareto_summary(pareto_front)
# Improvement Analysis
print("\n7. Analyzing Improvements...")
analyze_improvements(all_entries)
# Visualizations
print("\n8. Generating Visualizations...")
visualize_pareto_frontier(pareto_front)
# Final Summary
print("\n" + "=" * 80)
print("EVOLUTION TEST COMPLETE")
print("=" * 80)
print(f"\n✓ Total SOPs in Gene Pool: {len(all_entries)}")
print(f"✓ Pareto Optimal SOPs: {len(pareto_front)}")
# Find best average score
best_sop = max(all_entries, key=lambda e: e["evaluation"].average_score())
baseline_avg = baseline_eval.average_score()
best_avg = best_sop["evaluation"].average_score()
improvement = ((best_avg - baseline_avg) / baseline_avg) * 100
print(f"\nBest SOP: v{best_sop['version']} - {best_sop['description']}")
print(f" Average Score: {best_avg:.3f} ({improvement:+.1f}% vs baseline)")
print("\n✓ Visualization saved to: data/pareto_frontier_analysis.png")
print("\n" + "=" * 80)
if __name__ == "__main__":
main()
|