#!/bin/bash # Smoke test: Circle packing with eval service and persistent agents # # Prerequisites: # OPENHANDS_LOG_COMPLETIONS=1 ENABLE_FULL_TRAJECTORY_LOG=1 \ # uv run eval_agent/ev2_service_standalone.py --host "0.0.0.0" --port 8733 set -e EXPERIMENT_NAME_PREFIX="circle_packing_persistent_smoke" NUM_GENERATIONS=20 MAX_PARALLEL_JOBS=4 META_INTERVAL=10 PERSISTENT_AGENTS="--persistent-agents" PERSISTENT_CONTEXT_REFRESH_INTERVAL=10 LLM_MODELS="native-gemini-3-flash-preview" LLM_SELECTION="ucb1" LLM_TEMPERATURES="0.7 1.0" LLM_MAX_TOKENS=65536 USE_EVAL_SERVICE="--use-eval-service" EVAL_SERVICE_URL="http://localhost:8733" EVAL_TRIGGER_MODE="periodic" EVAL_TRIGGER_INTERVAL=5 EXPERIMENT_NAME=${EXPERIMENT_NAME_PREFIX}_g${NUM_GENERATIONS}_${EVAL_TRIGGER_MODE}${EVAL_TRIGGER_INTERVAL} .venv/bin/python tasks/circle_packing/run_experiment.py \ --experiment-name "$EXPERIMENT_NAME" \ --num-generations "$NUM_GENERATIONS" \ --max-parallel-jobs "$MAX_PARALLEL_JOBS" \ --meta-interval "$META_INTERVAL" \ $PERSISTENT_AGENTS \ --persistent-context-refresh-interval "$PERSISTENT_CONTEXT_REFRESH_INTERVAL" \ --llm-models $LLM_MODELS \ --llm-selection "$LLM_SELECTION" \ --llm-temperatures $LLM_TEMPERATURES \ --llm-max-tokens "$LLM_MAX_TOKENS" \ $USE_EVAL_SERVICE \ --eval-service-url "$EVAL_SERVICE_URL" \ --eval-trigger-mode "$EVAL_TRIGGER_MODE" \ --eval-trigger-interval "$EVAL_TRIGGER_INTERVAL" \ --verbose \ --trajectory-log