#!/usr/bin/env python3 """ Full Experiment with Eval Service Integration Runs 50 generations with eval service doing all evaluations. Agent triggers every 10 generations. """ from shinka.core import EvolutionRunner, EvolutionConfig from shinka.launch import LocalJobConfig from shinka.database import DatabaseConfig from pathlib import Path from datetime import datetime import time def main(): """Run 50 generation experiment with eval service""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") experiment_name = f"with_eval_service_gen50_{timestamp}" results_dir = f"examples/circle_packing/results/results_{experiment_name}" print("=" * 80) print("šŸš€ Circle Packing - Full Experiment with Eval Service") print("=" * 80) print(f"šŸ“… Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"šŸ”¬ Experiment: {experiment_name}") print(f"šŸ“Š Generations: 50") print(f"šŸ¤– Agent Interval: 10") print(f"šŸ”Œ Eval Service: http://localhost:8765") print(f"šŸ“ Results: {results_dir}") print("=" * 80) print() # Task description (same as baseline) task_sys_msg = """You are an expert mathematician specializing in circle packing problems and computational geometry. Your task is to maximize the sum of radii when packing 26 circles in a unit square [0,1] x [0,1]. The best known result is 2.635 (sum of radii). Key strategies to consider: 1. Efficient spatial distribution - avoid clustering 2. Utilize corners and edges effectively 3. Balance between many small circles vs fewer large circles 4. Consider geometric patterns: grid, hexagonal, concentric rings 5. Optimize placement to minimize wasted space You will receive: - Current code implementation - Performance metrics (sum of radii) - Circle center coordinates as text Make improvements based on the numerical data and geometric reasoning. Ensure all circles are disjoint and lie inside the unit square. """ # Job configuration - USE evaluate_ori.py job_config = LocalJobConfig( eval_program_path="examples/circle_packing/evaluate_ori.py" ) # Database configuration (same as baseline) db_config = DatabaseConfig( num_islands=2, archive_size=40, elite_selection_ratio=0.3, num_archive_inspirations=4, num_top_k_inspirations=2, migration_interval=10, migration_rate=0.1, island_elitism=True, parent_selection_strategy="weighted", parent_selection_lambda=10.0, ) # Evolution configuration evo_config = EvolutionConfig( task_sys_msg=task_sys_msg, patch_types=["diff", "full", "cross"], patch_type_probs=[0.6, 0.3, 0.1], num_generations=50, # Full 50 generations max_parallel_jobs=4, max_patch_resamples=3, max_patch_attempts=3, job_type="local", language="python", # Use native Gemini models (same as baseline) llm_models=[ "native-gemini-2.5-flash", "native-gemini-2.5-pro", ], llm_kwargs=dict( temperatures=[0.5, 0.7, 1.0], max_tokens=32768, ), # Meta recommendations every 10 generations meta_rec_interval=10, meta_llm_models=["native-gemini-2.5-flash"], meta_llm_kwargs=dict(temperatures=[0.7], max_tokens=16384), meta_max_recommendations=5, # Embedding for novelty embedding_model="text-embedding-3-small", code_embed_sim_threshold=0.995, novelty_llm_models=["native-gemini-2.5-flash"], novelty_llm_kwargs=dict(temperatures=[0.7], max_tokens=16384), # LLM selection strategy llm_dynamic_selection="ucb1", llm_dynamic_selection_kwargs=dict(exploration_coef=1.0), init_program_path="examples/circle_packing/initial.py", results_dir=results_dir, use_text_feedback=False, # ===== Eval Service Configuration ===== eval_service_url="http://localhost:8765", use_eval_service=True, # Use eval service for ALL evaluations evaluator_module="examples.circle_packing.evaluate_ori", evaluator_function="main", ) print("šŸ“‹ Configuration Summary:") print(f" • Generations: {evo_config.num_generations}") print(f" • Parallel Jobs: {evo_config.max_parallel_jobs}") print(f" • Islands: {db_config.num_islands}") print(f" • Archive Size: {db_config.archive_size}") print(f" • Models: {', '.join(evo_config.llm_models)}") print(f" • LLM Selection: {evo_config.llm_dynamic_selection}") print(f" • Meta Interval: {evo_config.meta_rec_interval}") print(f" • Evaluator: evaluate_ori.py") print(f" • Eval Service: {evo_config.eval_service_url}") print(f" • Use Eval Service: {evo_config.use_eval_service} āœ…") print() print("āš ļø Prerequisites:") print(" 1. Eval service must be running:") print(" python eval_agent/ev2_service_standalone.py \\") print(f" --results-dir {results_dir} \\") print(" --primary-evaluator examples/circle_packing/evaluate_ori.py \\") print(" --trigger-mode periodic \\") print(" --trigger-interval 10 \\") print(" --port 8765") print() input("Press Enter to start (Ctrl+C to cancel)...") start_time = time.time() try: runner = EvolutionRunner( evo_config=evo_config, job_config=job_config, db_config=db_config ) print("\nšŸš€ Starting evolution...") print("=" * 80) runner.run() elapsed = time.time() - start_time print("\n" + "=" * 80) print("āœ… Experiment completed successfully!") print("=" * 80) print(f"ā±ļø Total time: {elapsed/3600:.2f} hours") print(f"šŸ“ Results: {results_dir}") print() # Print summary print("šŸ“Š Summary:") print(f" • Total generations: 50") print(f" • Check eval_agent_memory/ for Agent analysis") print(f" • Check gen_*/results/metrics.json for complete metrics") print("=" * 80) except Exception as e: print("\n" + "=" * 80) print(f"āŒ Experiment failed: {e}") print("=" * 80) import traceback traceback.print_exc() return False return True if __name__ == "__main__": import sys success = main() sys.exit(0 if success else 1)