| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
| cd "$(dirname "$0")/../.." |
|
|
| PYTHON=".venv/bin/python" |
|
|
| |
| |
| |
| NUM_PROBLEMS="${1:-all}" |
| CONCURRENCY="${2:-20}" |
|
|
| GENS=50 |
| SEED_MODEL="gemini3pro" |
| LLM_MODELS="native-gemini-3-flash-preview" |
| BASE_PORT=8860 |
|
|
| EVAL_TRIGGER_MODE="periodic" |
| EVAL_TRIGGER_INTERVAL=1000 |
|
|
| TIMESTAMP="$(date +%Y%m%d_%H%M%S)" |
| EXP_NAME="vanilla_g${GENS}" |
| RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}" |
|
|
| PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems" |
| SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions" |
| LOG_DIR="logs/frontier_cs_parallel_vanilla" |
| mkdir -p "${LOG_DIR}" |
|
|
| |
| |
| |
| ALL_PIDS=() |
| for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do |
| if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then |
| ALL_PIDS+=("${pid}") |
| fi |
| done |
|
|
| if [ "${NUM_PROBLEMS}" = "all" ]; then |
| PIDS=("${ALL_PIDS[@]}") |
| else |
| PIDS=("${ALL_PIDS[@]:0:${NUM_PROBLEMS}}") |
| fi |
|
|
| echo "========================================" |
| echo "Frontier-CS Parallel Vanilla (eval service, no agent trigger)" |
| echo "========================================" |
| echo " Problems: ${#PIDS[@]} / ${#ALL_PIDS[@]} available" |
| echo " Concurrency: ${CONCURRENCY}" |
| echo " Generations: ${GENS}" |
| echo " Seed model: ${SEED_MODEL}" |
| echo " LLM: ${LLM_MODELS}" |
| echo " Run dir: ${RUN_DIR}" |
| echo " Eval ports: ${BASE_PORT}-$((BASE_PORT + CONCURRENCY - 1))" |
| echo " Trigger: ${EVAL_TRIGGER_MODE} every ${EVAL_TRIGGER_INTERVAL} gens (disabled)" |
| echo " Logs: ${LOG_DIR}/" |
| echo "========================================" |
| echo "" |
|
|
| |
| |
| |
| ALL_SERVICE_PIDS=() |
|
|
| cleanup() { |
| echo "" |
| echo "Cleaning up eval services..." |
| for spid in "${ALL_SERVICE_PIDS[@]+"${ALL_SERVICE_PIDS[@]}"}"; do |
| kill "${spid}" 2>/dev/null || true |
| done |
| |
| jobs -p | xargs -r kill 2>/dev/null || true |
| echo "Done." |
| } |
| trap cleanup EXIT INT TERM |
|
|
| |
| |
| |
| start_eval_service() { |
| local port="$1" |
| local log_file="${LOG_DIR}/eval_service_port_${port}.log" |
| local url="http://localhost:${port}" |
|
|
| |
| if curl -s "${url}/api/v1/status" > /dev/null 2>&1; then |
| echo " Eval service already running on port ${port}" |
| return 0 |
| fi |
|
|
| ${PYTHON} eval_agent/ev2_service_standalone.py \ |
| --host "0.0.0.0" --port "${port}" \ |
| > "${log_file}" 2>&1 & |
| local spid=$! |
| ALL_SERVICE_PIDS+=("${spid}") |
|
|
| |
| for i in $(seq 1 20); do |
| if curl -s "${url}/api/v1/status" > /dev/null 2>&1; then |
| echo " Eval service ready on port ${port} (pid=${spid})" |
| return 0 |
| fi |
| sleep 1 |
| done |
|
|
| echo " ERROR: Eval service failed to start on port ${port}" |
| return 1 |
| } |
|
|
| |
| |
| |
| run_problem_vanilla() { |
| local pid="$1" |
| local port="$2" |
| local url="http://localhost:${port}" |
|
|
| export FRONTIER_CS_PROBLEM_ID="${pid}" |
| export FRONTIER_CS_JUDGE_URL="http://localhost:8081" |
|
|
| ${PYTHON} tasks/frontier_cs_entry/run_experiment.py \ |
| --experiment-name "${EXP_NAME}" \ |
| --problem-id "${pid}" \ |
| --seed-model "${SEED_MODEL}" \ |
| --num-generations "${GENS}" \ |
| --max-parallel-jobs 1 \ |
| --edit-backend single_shot_patch \ |
| --llm-models ${LLM_MODELS} \ |
| --run-dir "${RUN_DIR}" \ |
| --use-eval-service \ |
| --eval-service-url "${url}" \ |
| --eval-trigger-mode "${EVAL_TRIGGER_MODE}" \ |
| --eval-trigger-interval "${EVAL_TRIGGER_INTERVAL}" \ |
| --use-wandb \ |
| --wandb-project frontier-cs \ |
| --wandb-tags frontier_cs vanilla problem_${pid} \ |
| --verbose \ |
| > "${LOG_DIR}/problem_${pid}.log" 2>&1 |
|
|
| local status=$? |
| if [ ${status} -eq 0 ]; then |
| echo "DONE problem ${pid} (port ${port})" |
| else |
| echo "FAIL problem ${pid} (port ${port}, see ${LOG_DIR}/problem_${pid}.log)" |
| fi |
| return ${status} |
| } |
|
|
| |
| |
| |
| echo "Starting ${CONCURRENCY} eval services..." |
| for slot in $(seq 0 $((CONCURRENCY - 1))); do |
| port=$((BASE_PORT + slot)) |
| start_eval_service "${port}" |
| done |
| echo "" |
|
|
| |
| |
| |
| |
| SLOT_PIDS=() |
| SLOT_PROBLEMS=() |
| for slot in $(seq 0 $((CONCURRENCY - 1))); do |
| SLOT_PIDS+=(0) |
| SLOT_PROBLEMS+=("") |
| done |
|
|
| DONE=0 |
| FAILED=0 |
| TOTAL=${#PIDS[@]} |
| IDX=0 |
|
|
| while [ ${IDX} -lt ${TOTAL} ] || [ "$(echo "${SLOT_PIDS[@]}" | tr ' ' '\n' | grep -cv '^0$')" -gt 0 ]; do |
| |
| for slot in $(seq 0 $((CONCURRENCY - 1))); do |
| if [ "${SLOT_PIDS[$slot]}" -ne 0 ]; then |
| if ! kill -0 "${SLOT_PIDS[$slot]}" 2>/dev/null; then |
| |
| wait "${SLOT_PIDS[$slot]}" 2>/dev/null |
| if [ $? -eq 0 ]; then |
| DONE=$((DONE + 1)) |
| else |
| FAILED=$((FAILED + 1)) |
| fi |
| SLOT_PIDS[$slot]=0 |
| SLOT_PROBLEMS[$slot]="" |
| fi |
| fi |
| done |
|
|
| |
| for slot in $(seq 0 $((CONCURRENCY - 1))); do |
| if [ "${SLOT_PIDS[$slot]}" -eq 0 ] && [ ${IDX} -lt ${TOTAL} ]; then |
| pid="${PIDS[$IDX]}" |
| port=$((BASE_PORT + slot)) |
| echo "START problem ${pid} on slot ${slot} (port ${port}) [${DONE}+${FAILED}/${TOTAL} complete]" |
| run_problem_vanilla "${pid}" "${port}" & |
| SLOT_PIDS[$slot]=$! |
| SLOT_PROBLEMS[$slot]="${pid}" |
| IDX=$((IDX + 1)) |
| fi |
| done |
|
|
| sleep 2 |
| done |
|
|
| echo "" |
| echo "========================================" |
| echo "Parallel vanilla (via eval service) complete" |
| echo " Succeeded: ${DONE}" |
| echo " Failed: ${FAILED}" |
| echo " Total: ${TOTAL}" |
| echo " Run dir: ${RUN_DIR}" |
| echo " Logs: ${LOG_DIR}/" |
| echo "========================================" |
|
|