| |
| """ |
| Full Experiment with Eval Service Integration |
| |
| Runs 50 generations with eval service doing all evaluations. |
| Agent triggers every 10 generations. |
| """ |
|
|
| from shinka.core import EvolutionRunner, EvolutionConfig |
| from shinka.launch import LocalJobConfig |
| from shinka.database import DatabaseConfig |
| from pathlib import Path |
| from datetime import datetime |
| import time |
|
|
| def main(): |
| """Run 50 generation experiment with eval service""" |
| |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| experiment_name = f"with_eval_service_gen50_{timestamp}" |
| results_dir = f"examples/circle_packing/results/results_{experiment_name}" |
| |
| print("=" * 80) |
| print("🚀 Circle Packing - Full Experiment with Eval Service") |
| print("=" * 80) |
| print(f"📅 Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
| print(f"🔬 Experiment: {experiment_name}") |
| print(f"📊 Generations: 50") |
| print(f"🤖 Agent Interval: 10") |
| print(f"🔌 Eval Service: http://localhost:8765") |
| print(f"📁 Results: {results_dir}") |
| print("=" * 80) |
| print() |
| |
| |
| task_sys_msg = """You are an expert mathematician specializing in circle packing problems and computational geometry. |
| |
| Your task is to maximize the sum of radii when packing 26 circles in a unit square [0,1] x [0,1]. |
| The best known result is 2.635 (sum of radii). |
| |
| Key strategies to consider: |
| 1. Efficient spatial distribution - avoid clustering |
| 2. Utilize corners and edges effectively |
| 3. Balance between many small circles vs fewer large circles |
| 4. Consider geometric patterns: grid, hexagonal, concentric rings |
| 5. Optimize placement to minimize wasted space |
| |
| You will receive: |
| - Current code implementation |
| - Performance metrics (sum of radii) |
| - Circle center coordinates as text |
| |
| Make improvements based on the numerical data and geometric reasoning. |
| Ensure all circles are disjoint and lie inside the unit square. |
| """ |
| |
| |
| job_config = LocalJobConfig( |
| eval_program_path="examples/circle_packing/evaluate_ori.py" |
| ) |
| |
| |
| db_config = DatabaseConfig( |
| num_islands=2, |
| archive_size=40, |
| elite_selection_ratio=0.3, |
| num_archive_inspirations=4, |
| num_top_k_inspirations=2, |
| migration_interval=10, |
| migration_rate=0.1, |
| island_elitism=True, |
| parent_selection_strategy="weighted", |
| parent_selection_lambda=10.0, |
| ) |
| |
| |
| evo_config = EvolutionConfig( |
| task_sys_msg=task_sys_msg, |
| patch_types=["diff", "full", "cross"], |
| patch_type_probs=[0.6, 0.3, 0.1], |
| num_generations=50, |
| max_parallel_jobs=4, |
| max_patch_resamples=3, |
| max_patch_attempts=3, |
| job_type="local", |
| language="python", |
| |
| |
| llm_models=[ |
| "native-gemini-2.5-flash", |
| "native-gemini-2.5-pro", |
| ], |
| llm_kwargs=dict( |
| temperatures=[0.5, 0.7, 1.0], |
| max_tokens=32768, |
| ), |
| |
| |
| meta_rec_interval=10, |
| meta_llm_models=["native-gemini-2.5-flash"], |
| meta_llm_kwargs=dict(temperatures=[0.7], max_tokens=16384), |
| meta_max_recommendations=5, |
| |
| |
| embedding_model="text-embedding-3-small", |
| code_embed_sim_threshold=0.995, |
| novelty_llm_models=["native-gemini-2.5-flash"], |
| novelty_llm_kwargs=dict(temperatures=[0.7], max_tokens=16384), |
| |
| |
| llm_dynamic_selection="ucb1", |
| llm_dynamic_selection_kwargs=dict(exploration_coef=1.0), |
| |
| init_program_path="examples/circle_packing/initial.py", |
| results_dir=results_dir, |
| use_text_feedback=False, |
| |
| |
| eval_service_url="http://localhost:8765", |
| use_eval_service=True, |
| evaluator_module="examples.circle_packing.evaluate_ori", |
| evaluator_function="main", |
| ) |
| |
| print("📋 Configuration Summary:") |
| print(f" • Generations: {evo_config.num_generations}") |
| print(f" • Parallel Jobs: {evo_config.max_parallel_jobs}") |
| print(f" • Islands: {db_config.num_islands}") |
| print(f" • Archive Size: {db_config.archive_size}") |
| print(f" • Models: {', '.join(evo_config.llm_models)}") |
| print(f" • LLM Selection: {evo_config.llm_dynamic_selection}") |
| print(f" • Meta Interval: {evo_config.meta_rec_interval}") |
| print(f" • Evaluator: evaluate_ori.py") |
| print(f" • Eval Service: {evo_config.eval_service_url}") |
| print(f" • Use Eval Service: {evo_config.use_eval_service} ✅") |
| print() |
| |
| print("⚠️ Prerequisites:") |
| print(" 1. Eval service must be running:") |
| print(" python eval_agent/ev2_service_standalone.py \\") |
| print(f" --results-dir {results_dir} \\") |
| print(" --primary-evaluator examples/circle_packing/evaluate_ori.py \\") |
| print(" --trigger-mode periodic \\") |
| print(" --trigger-interval 10 \\") |
| print(" --port 8765") |
| print() |
| |
| input("Press Enter to start (Ctrl+C to cancel)...") |
| |
| start_time = time.time() |
| |
| try: |
| runner = EvolutionRunner( |
| evo_config=evo_config, |
| job_config=job_config, |
| db_config=db_config |
| ) |
| |
| print("\n🚀 Starting evolution...") |
| print("=" * 80) |
| runner.run() |
| |
| elapsed = time.time() - start_time |
| |
| print("\n" + "=" * 80) |
| print("✅ Experiment completed successfully!") |
| print("=" * 80) |
| print(f"⏱️ Total time: {elapsed/3600:.2f} hours") |
| print(f"📁 Results: {results_dir}") |
| print() |
| |
| |
| print("📊 Summary:") |
| print(f" • Total generations: 50") |
| print(f" • Check eval_agent_memory/ for Agent analysis") |
| print(f" • Check gen_*/results/metrics.json for complete metrics") |
| print("=" * 80) |
| |
| except Exception as e: |
| print("\n" + "=" * 80) |
| print(f"❌ Experiment failed: {e}") |
| print("=" * 80) |
| import traceback |
| traceback.print_exc() |
| return False |
| |
| return True |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
| success = main() |
| sys.exit(0 if success else 1) |
|
|