#!/bin/bash # Full test: 50 generations with eval service and WandB # # Prerequisites: Eval service should be running # bash scripts/dev/start_eval_server.sh set -e # ============================================================================ # Configuration # ============================================================================ EXPERIMENT_NAME_PREFIX="mmv1_2" NUM_GENERATIONS=200 MAX_PARALLEL_JOBS=4 META_INTERVAL=10 TASK="circle_packing" LLM_MODELS="native-gemini-2.5-flash native-gemini-2.5-pro" LLM_SELECTION="ucb1" LLM_TEMPERATURES="0.5 0.7 1.0" USE_EVAL_SERVICE="--use-eval-service" EVAL_SERVICE_URL="http://localhost:8765" EVAL_TRIGGER_MODE="periodic" EVAL_TRIGGER_INTERVAL=10 EXPERIMENT_NAME=${EXPERIMENT_NAME_PREFIX}_gen${NUM_GENERATIONS}_${EVAL_TRIGGER_MODE}${EVAL_TRIGGER_INTERVAL} USE_WANDB="--use-wandb" WANDB_PROJECT="ev2" WANDB_TAGS="${TASK} eval-service full-experiment ${EVAL_TRIGGER_MODE}" # ============================================================================ # Run Experiment # ============================================================================ python scripts/dev/run_experiment.py \ --experiment-name "$EXPERIMENT_NAME" \ --num-generations "$NUM_GENERATIONS" \ --max-parallel-jobs "$MAX_PARALLEL_JOBS" \ --meta-interval "$META_INTERVAL" \ --task "$TASK" \ --llm-models $LLM_MODELS \ --llm-selection "$LLM_SELECTION" \ --llm-temperatures $LLM_TEMPERATURES \ $USE_EVAL_SERVICE \ --eval-service-url "$EVAL_SERVICE_URL" \ --eval-trigger-mode "$EVAL_TRIGGER_MODE" \ --eval-trigger-interval "$EVAL_TRIGGER_INTERVAL" \ $USE_WANDB \ --wandb-project "$WANDB_PROJECT" \ --wandb-tags $WANDB_TAGS \ --verbose