shinka-backup / run_full_experiment.py
JustinTX's picture
Add files using upload-large-folder tool
1ca9dbd verified
#!/usr/bin/env python3
"""
Full Experiment with Eval Service Integration
Runs 50 generations with eval service doing all evaluations.
Agent triggers every 10 generations.
"""
from shinka.core import EvolutionRunner, EvolutionConfig
from shinka.launch import LocalJobConfig
from shinka.database import DatabaseConfig
from pathlib import Path
from datetime import datetime
import time
def main():
"""Run 50 generation experiment with eval service"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
experiment_name = f"with_eval_service_gen50_{timestamp}"
results_dir = f"examples/circle_packing/results/results_{experiment_name}"
print("=" * 80)
print("🚀 Circle Packing - Full Experiment with Eval Service")
print("=" * 80)
print(f"📅 Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🔬 Experiment: {experiment_name}")
print(f"📊 Generations: 50")
print(f"🤖 Agent Interval: 10")
print(f"🔌 Eval Service: http://localhost:8765")
print(f"📁 Results: {results_dir}")
print("=" * 80)
print()
# Task description (same as baseline)
task_sys_msg = """You are an expert mathematician specializing in circle packing problems and computational geometry.
Your task is to maximize the sum of radii when packing 26 circles in a unit square [0,1] x [0,1].
The best known result is 2.635 (sum of radii).
Key strategies to consider:
1. Efficient spatial distribution - avoid clustering
2. Utilize corners and edges effectively
3. Balance between many small circles vs fewer large circles
4. Consider geometric patterns: grid, hexagonal, concentric rings
5. Optimize placement to minimize wasted space
You will receive:
- Current code implementation
- Performance metrics (sum of radii)
- Circle center coordinates as text
Make improvements based on the numerical data and geometric reasoning.
Ensure all circles are disjoint and lie inside the unit square.
"""
# Job configuration - USE evaluate_ori.py
job_config = LocalJobConfig(
eval_program_path="examples/circle_packing/evaluate_ori.py"
)
# Database configuration (same as baseline)
db_config = DatabaseConfig(
num_islands=2,
archive_size=40,
elite_selection_ratio=0.3,
num_archive_inspirations=4,
num_top_k_inspirations=2,
migration_interval=10,
migration_rate=0.1,
island_elitism=True,
parent_selection_strategy="weighted",
parent_selection_lambda=10.0,
)
# Evolution configuration
evo_config = EvolutionConfig(
task_sys_msg=task_sys_msg,
patch_types=["diff", "full", "cross"],
patch_type_probs=[0.6, 0.3, 0.1],
num_generations=50, # Full 50 generations
max_parallel_jobs=4,
max_patch_resamples=3,
max_patch_attempts=3,
job_type="local",
language="python",
# Use native Gemini models (same as baseline)
llm_models=[
"native-gemini-2.5-flash",
"native-gemini-2.5-pro",
],
llm_kwargs=dict(
temperatures=[0.5, 0.7, 1.0],
max_tokens=32768,
),
# Meta recommendations every 10 generations
meta_rec_interval=10,
meta_llm_models=["native-gemini-2.5-flash"],
meta_llm_kwargs=dict(temperatures=[0.7], max_tokens=16384),
meta_max_recommendations=5,
# Embedding for novelty
embedding_model="text-embedding-3-small",
code_embed_sim_threshold=0.995,
novelty_llm_models=["native-gemini-2.5-flash"],
novelty_llm_kwargs=dict(temperatures=[0.7], max_tokens=16384),
# LLM selection strategy
llm_dynamic_selection="ucb1",
llm_dynamic_selection_kwargs=dict(exploration_coef=1.0),
init_program_path="examples/circle_packing/initial.py",
results_dir=results_dir,
use_text_feedback=False,
# ===== Eval Service Configuration =====
eval_service_url="http://localhost:8765",
use_eval_service=True, # Use eval service for ALL evaluations
evaluator_module="examples.circle_packing.evaluate_ori",
evaluator_function="main",
)
print("📋 Configuration Summary:")
print(f" • Generations: {evo_config.num_generations}")
print(f" • Parallel Jobs: {evo_config.max_parallel_jobs}")
print(f" • Islands: {db_config.num_islands}")
print(f" • Archive Size: {db_config.archive_size}")
print(f" • Models: {', '.join(evo_config.llm_models)}")
print(f" • LLM Selection: {evo_config.llm_dynamic_selection}")
print(f" • Meta Interval: {evo_config.meta_rec_interval}")
print(f" • Evaluator: evaluate_ori.py")
print(f" • Eval Service: {evo_config.eval_service_url}")
print(f" • Use Eval Service: {evo_config.use_eval_service} ✅")
print()
print("⚠️ Prerequisites:")
print(" 1. Eval service must be running:")
print(" python eval_agent/ev2_service_standalone.py \\")
print(f" --results-dir {results_dir} \\")
print(" --primary-evaluator examples/circle_packing/evaluate_ori.py \\")
print(" --trigger-mode periodic \\")
print(" --trigger-interval 10 \\")
print(" --port 8765")
print()
input("Press Enter to start (Ctrl+C to cancel)...")
start_time = time.time()
try:
runner = EvolutionRunner(
evo_config=evo_config,
job_config=job_config,
db_config=db_config
)
print("\n🚀 Starting evolution...")
print("=" * 80)
runner.run()
elapsed = time.time() - start_time
print("\n" + "=" * 80)
print("✅ Experiment completed successfully!")
print("=" * 80)
print(f"⏱️ Total time: {elapsed/3600:.2f} hours")
print(f"📁 Results: {results_dir}")
print()
# Print summary
print("📊 Summary:")
print(f" • Total generations: 50")
print(f" • Check eval_agent_memory/ for Agent analysis")
print(f" • Check gen_*/results/metrics.json for complete metrics")
print("=" * 80)
except Exception as e:
print("\n" + "=" * 80)
print(f"❌ Experiment failed: {e}")
print("=" * 80)
import traceback
traceback.print_exc()
return False
return True
if __name__ == "__main__":
import sys
success = main()
sys.exit(0 if success else 1)