| # Quick test: 3 generations with eval service and WandB | |
| # | |
| # Prerequisites: Eval service should be running | |
| # bash scripts/dev/start_eval_server.sh | |
| set -e | |
| # ============================================================================ | |
| # Configuration | |
| # ============================================================================ | |
| EXPERIMENT_NAME_PREFIX="debug" | |
| NUM_GENERATIONS=10 | |
| MAX_PARALLEL_JOBS=2 | |
| META_INTERVAL=20 | |
| EXPERIMENT_NAME=${EXPERIMENT_NAME_PREFIX}_gen${NUM_GENERATIONS} | |
| TASK="circle_packing" | |
| LLM_MODELS="native-gemini-2.5-flash native-gemini-2.5-pro" | |
| LLM_SELECTION="ucb1" | |
| LLM_TEMPERATURES="0.5 0.7 1.0" | |
| USE_EVAL_SERVICE="--use-eval-service" | |
| EVAL_SERVICE_URL="http://localhost:8765" | |
| EVAL_TRIGGER_MODE="periodic" | |
| EVAL_TRIGGER_INTERVAL=20 | |
| USE_WANDB="--use-wandb" | |
| WANDB_PROJECT="ev2" | |
| WANDB_TAGS="${TASK} eval-service" | |
| # ============================================================================ | |
| # Run Experiment | |
| # ============================================================================ | |
| python scripts/dev/run_experiment.py \ | |
| --experiment-name "$EXPERIMENT_NAME" \ | |
| --num-generations "$NUM_GENERATIONS" \ | |
| --max-parallel-jobs "$MAX_PARALLEL_JOBS" \ | |
| --meta-interval "$META_INTERVAL" \ | |
| --task "$TASK" \ | |
| --llm-models $LLM_MODELS \ | |
| --llm-selection "$LLM_SELECTION" \ | |
| --llm-temperatures $LLM_TEMPERATURES \ | |
| $USE_EVAL_SERVICE \ | |
| --eval-service-url "$EVAL_SERVICE_URL" \ | |
| --eval-trigger-mode "$EVAL_TRIGGER_MODE" \ | |
| --eval-trigger-interval "$EVAL_TRIGGER_INTERVAL" \ | |
| $USE_WANDB \ | |
| --wandb-project "$WANDB_PROJECT" \ | |
| --wandb-tags $WANDB_TAGS \ | |
| --verbose | |