File size: 1,851 Bytes
3f6526a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/bash
# Full test: AlphaEvolve AC2 with eval service and WandB
#
# Prerequisites: Eval service should be running
#   bash scripts/dev/start_eval_server.sh
# uv run eval_agent/ev2_service_standalone.py --host "0.0.0.0" --port 8722

set -e

# ============================================================================
# Configuration
# ============================================================================
EXPERIMENT_NAME_PREFIX="alphaevolve_ac2_mmv1"
NUM_GENERATIONS=200
MAX_PARALLEL_JOBS=5
META_INTERVAL=10

LLM_MODELS="native-gemini-3-flash-preview"
LLM_SELECTION="ucb1"
LLM_TEMPERATURES="0.0 0.5 1.0"
LLM_MAX_TOKENS=65536

USE_EVAL_SERVICE="--use-eval-service"
EVAL_SERVICE_URL="http://localhost:8722"
EVAL_TRIGGER_MODE="periodic"
EVAL_TRIGGER_INTERVAL=1000

EXPERIMENT_NAME=${EXPERIMENT_NAME_PREFIX}_gen${NUM_GENERATIONS}_${EVAL_TRIGGER_MODE}${EVAL_TRIGGER_INTERVAL}

USE_WANDB="--use-wandb"
WANDB_PROJECT="ev2"
WANDB_TAGS="alphaevolve_ac2 eval-service full-experiment ${EVAL_TRIGGER_MODE} baseline"

# ============================================================================
# Run Experiment
# ============================================================================
.venv/bin/python tasks/alphaevolve_ac2/run_experiment.py \
    --experiment-name "$EXPERIMENT_NAME" \
    --num-generations "$NUM_GENERATIONS" \
    --max-parallel-jobs "$MAX_PARALLEL_JOBS" \
    --meta-interval "$META_INTERVAL" \
    --llm-models $LLM_MODELS \
    --llm-selection "$LLM_SELECTION" \
    --llm-temperatures $LLM_TEMPERATURES \
    --llm-max-tokens "$LLM_MAX_TOKENS" \
    $USE_EVAL_SERVICE \
    --eval-service-url "$EVAL_SERVICE_URL" \
    --eval-trigger-mode "$EVAL_TRIGGER_MODE" \
    --eval-trigger-interval "$EVAL_TRIGGER_INTERVAL" \
    $USE_WANDB \
    --wandb-project "$WANDB_PROJECT" \
    --wandb-tags $WANDB_TAGS \
    --verbose