Spaces:
Running
Running
| """ | |
| Test Evolution Loop (Phase 3) | |
| Complete validation of self-improvement system | |
| """ | |
| import sys | |
| from pathlib import Path | |
| # Add project root to path | |
| project_root = Path(__file__).parent.parent | |
| sys.path.insert(0, str(project_root)) | |
| from datetime import datetime | |
| from typing import Any | |
| from src.config import BASELINE_SOP | |
| from src.evaluation.evaluators import run_full_evaluation | |
| from src.evolution.director import SOPGenePool, run_evolution_cycle | |
| from src.evolution.pareto import ( | |
| analyze_improvements, | |
| identify_pareto_front, | |
| print_pareto_summary, | |
| visualize_pareto_frontier, | |
| ) | |
| from src.state import GuildState, PatientInput | |
| from src.workflow import create_guild | |
| def create_test_patient() -> PatientInput: | |
| """Create diabetes patient for testing""" | |
| biomarkers = { | |
| "Glucose": 185.0, | |
| "HbA1c": 8.2, | |
| "Cholesterol": 235.0, | |
| "Triglycerides": 210.0, | |
| "HDL": 38.0, | |
| "LDL": 155.0, | |
| "VLDL": 42.0, | |
| "Total_Protein": 6.8, | |
| "Albumin": 4.2, | |
| "Globulin": 2.6, | |
| "AG_Ratio": 1.6, | |
| "Bilirubin_Total": 0.9, | |
| "Bilirubin_Direct": 0.2, | |
| "ALT": 35.0, | |
| "AST": 28.0, | |
| "ALP": 95.0, | |
| "Creatinine": 1.1, | |
| "BUN": 18.0, | |
| "BUN_Creatinine_Ratio": 16.4, | |
| "Sodium": 138.0, | |
| "Potassium": 4.2, | |
| "Chloride": 102.0, | |
| "Bicarbonate": 24.0, | |
| } | |
| model_prediction: dict[str, Any] = { | |
| "disease": "Type 2 Diabetes", | |
| "confidence": 0.92, | |
| "probabilities": {"Type 2 Diabetes": 0.92, "Prediabetes": 0.05, "Healthy": 0.03}, | |
| "prediction_timestamp": "2025-01-01T10:00:00", | |
| } | |
| patient_context = { | |
| "patient_id": "TEST-001", | |
| "age": 55, | |
| "gender": "male", | |
| "symptoms": ["Increased thirst", "Frequent urination", "Fatigue"], | |
| "medical_history": ["Prediabetes diagnosed 2 years ago"], | |
| "current_medications": ["Metformin 500mg"], | |
| "query": "My blood sugar has been high lately. What should I do?", | |
| } | |
| return PatientInput(biomarkers=biomarkers, model_prediction=model_prediction, patient_context=patient_context) | |
| def main(): | |
| """Run complete evolution loop test""" | |
| print("\n" + "=" * 80) | |
| print("PHASE 3: SELF-IMPROVEMENT LOOP TEST") | |
| print("=" * 80) | |
| # Setup | |
| print("\n1. Initializing system...") | |
| guild = create_guild() | |
| patient = create_test_patient() | |
| # Initialize gene pool with baseline | |
| print("\n2. Creating SOP Gene Pool...") | |
| gene_pool = SOPGenePool() | |
| print("\n3. Evaluating Baseline SOP...") | |
| # Run workflow with baseline SOP | |
| initial_state: GuildState = { | |
| "patient_biomarkers": patient.biomarkers, | |
| "model_prediction": patient.model_prediction, | |
| "patient_context": patient.patient_context, | |
| "plan": None, | |
| "sop": BASELINE_SOP, | |
| "agent_outputs": [], | |
| "biomarker_flags": [], | |
| "safety_alerts": [], | |
| "final_response": None, | |
| "processing_timestamp": datetime.now().isoformat(), | |
| "sop_version": "Baseline", | |
| } | |
| guild_state = guild.workflow.invoke(initial_state) | |
| baseline_response = guild_state["final_response"] | |
| agent_outputs = guild_state["agent_outputs"] | |
| baseline_eval = run_full_evaluation( | |
| final_response=baseline_response, agent_outputs=agent_outputs, biomarkers=patient.biomarkers | |
| ) | |
| gene_pool.add(sop=BASELINE_SOP, evaluation=baseline_eval, parent_version=None, description="Baseline SOP") | |
| print(f"\n✓ Baseline Average Score: {baseline_eval.average_score():.3f}") | |
| print(f" Clinical Accuracy: {baseline_eval.clinical_accuracy.score:.3f}") | |
| print(f" Evidence Grounding: {baseline_eval.evidence_grounding.score:.3f}") | |
| print(f" Actionability: {baseline_eval.actionability.score:.3f}") | |
| print(f" Clarity: {baseline_eval.clarity.score:.3f}") | |
| print(f" Safety & Completeness: {baseline_eval.safety_completeness.score:.3f}") | |
| # Run evolution cycles | |
| num_cycles = 2 | |
| print(f"\n4. Running {num_cycles} Evolution Cycles...") | |
| for cycle in range(1, num_cycles + 1): | |
| print(f"\n{'─' * 80}") | |
| print(f"EVOLUTION CYCLE {cycle}") | |
| print(f"{'─' * 80}") | |
| try: | |
| # Create evaluation function for this cycle | |
| def eval_func(final_response, agent_outputs, biomarkers): | |
| return run_full_evaluation( | |
| final_response=final_response, agent_outputs=agent_outputs, biomarkers=biomarkers | |
| ) | |
| new_entries = run_evolution_cycle( | |
| gene_pool=gene_pool, patient_input=patient, workflow_graph=guild.workflow, evaluation_func=eval_func | |
| ) | |
| print(f"\n✓ Cycle {cycle} complete: Added {len(new_entries)} new SOPs to gene pool") | |
| for entry in new_entries: | |
| print(f"\n SOP v{entry['version']}: {entry['description']}") | |
| print(f" Average Score: {entry['evaluation'].average_score():.3f}") | |
| except Exception as e: | |
| print(f"\n⚠️ Cycle {cycle} encountered error: {e}") | |
| print("Continuing to next cycle...") | |
| # Show gene pool summary | |
| print("\n5. Gene Pool Summary:") | |
| gene_pool.summary() | |
| # Pareto Analysis | |
| print("\n6. Identifying Pareto Frontier...") | |
| all_entries = gene_pool.gene_pool | |
| pareto_front = identify_pareto_front(all_entries) | |
| print(f"\n✓ Pareto frontier contains {len(pareto_front)} non-dominated solutions") | |
| print_pareto_summary(pareto_front) | |
| # Improvement Analysis | |
| print("\n7. Analyzing Improvements...") | |
| analyze_improvements(all_entries) | |
| # Visualizations | |
| print("\n8. Generating Visualizations...") | |
| visualize_pareto_frontier(pareto_front) | |
| # Final Summary | |
| print("\n" + "=" * 80) | |
| print("EVOLUTION TEST COMPLETE") | |
| print("=" * 80) | |
| print(f"\n✓ Total SOPs in Gene Pool: {len(all_entries)}") | |
| print(f"✓ Pareto Optimal SOPs: {len(pareto_front)}") | |
| # Find best average score | |
| best_sop = max(all_entries, key=lambda e: e["evaluation"].average_score()) | |
| baseline_avg = baseline_eval.average_score() | |
| best_avg = best_sop["evaluation"].average_score() | |
| improvement = ((best_avg - baseline_avg) / baseline_avg) * 100 | |
| print(f"\nBest SOP: v{best_sop['version']} - {best_sop['description']}") | |
| print(f" Average Score: {best_avg:.3f} ({improvement:+.1f}% vs baseline)") | |
| print("\n✓ Visualization saved to: data/pareto_frontier_analysis.png") | |
| print("\n" + "=" * 80) | |
| if __name__ == "__main__": | |
| main() | |