shinka-backup / scripts /dev /run_frontier_cs_parallel_with_agent.sh
JustinTX's picture
Add files using upload-large-folder tool
3f6526a verified
#!/bin/bash
# Parallel batch run with eval agent: multiple Frontier-CS problems concurrently,
# each with its own eval service instance on a separate port.
#
# Usage:
# ./scripts/dev/run_frontier_cs_parallel_with_agent.sh # all 172 problems, 20 parallel
# ./scripts/dev/run_frontier_cs_parallel_with_agent.sh 50 # first 50 problems, 20 parallel
# ./scripts/dev/run_frontier_cs_parallel_with_agent.sh 50 10 # first 50 problems, 10 parallel
set -euo pipefail
cd "$(dirname "$0")/../.."
PYTHON=".venv/bin/python"
# ============================================================================
# Configuration
# ============================================================================
NUM_PROBLEMS="${1:-all}"
CONCURRENCY="${2:-20}"
GENS=50
SEED_MODEL="gemini3pro"
LLM_MODELS="native-gemini-3-flash-preview"
BASE_PORT=8760 # eval services get ports 8760, 8761, 8762, ...
EVAL_TRIGGER_MODE="periodic"
EVAL_TRIGGER_INTERVAL=5
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
EXP_NAME="agent_g${GENS}"
RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}"
PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems"
SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions"
LOG_DIR="logs/frontier_cs_parallel_agent"
mkdir -p "${LOG_DIR}"
# ============================================================================
# Collect valid problem IDs
# ============================================================================
ALL_PIDS=()
for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do
if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then
ALL_PIDS+=("${pid}")
fi
done
if [ "${NUM_PROBLEMS}" = "all" ]; then
PIDS=("${ALL_PIDS[@]}")
else
PIDS=("${ALL_PIDS[@]:0:${NUM_PROBLEMS}}")
fi
echo "========================================"
echo "Frontier-CS Parallel Batch (with eval agent)"
echo "========================================"
echo " Problems: ${#PIDS[@]} / ${#ALL_PIDS[@]} available"
echo " Concurrency: ${CONCURRENCY}"
echo " Generations: ${GENS}"
echo " Seed model: ${SEED_MODEL}"
echo " LLM: ${LLM_MODELS}"
echo " Run dir: ${RUN_DIR}"
echo " Eval ports: ${BASE_PORT}-$((BASE_PORT + CONCURRENCY - 1))"
echo " Trigger: ${EVAL_TRIGGER_MODE} every ${EVAL_TRIGGER_INTERVAL} gens"
echo " Logs: ${LOG_DIR}/"
echo "========================================"
echo ""
# ============================================================================
# Track all background PIDs for cleanup
# ============================================================================
ALL_SERVICE_PIDS=()
cleanup() {
echo ""
echo "Cleaning up eval services..."
for spid in "${ALL_SERVICE_PIDS[@]+"${ALL_SERVICE_PIDS[@]}"}"; do
kill "${spid}" 2>/dev/null || true
done
# Also kill any child processes
jobs -p | xargs -r kill 2>/dev/null || true
echo "Done."
}
trap cleanup EXIT INT TERM
# ============================================================================
# Start/stop eval service helpers
# ============================================================================
start_eval_service() {
local port="$1"
local log_file="${LOG_DIR}/eval_service_port_${port}.log"
local url="http://localhost:${port}"
# Check if already running on this port
if curl -s "${url}/api/v1/status" > /dev/null 2>&1; then
echo " Eval service already running on port ${port}"
return 0
fi
${PYTHON} eval_agent/ev2_service_standalone.py \
--host "0.0.0.0" --port "${port}" \
> "${log_file}" 2>&1 &
local spid=$!
ALL_SERVICE_PIDS+=("${spid}")
# Wait for ready (up to 20s)
for i in $(seq 1 20); do
if curl -s "${url}/api/v1/status" > /dev/null 2>&1; then
echo " Eval service ready on port ${port} (pid=${spid})"
return 0
fi
sleep 1
done
echo " ERROR: Eval service failed to start on port ${port}"
return 1
}
# ============================================================================
# Worker function: run one problem with its own eval service
# ============================================================================
run_problem_with_agent() {
local pid="$1"
local port="$2"
local url="http://localhost:${port}"
export FRONTIER_CS_PROBLEM_ID="${pid}"
export FRONTIER_CS_JUDGE_URL="http://localhost:8081"
${PYTHON} tasks/frontier_cs_entry/run_experiment.py \
--experiment-name "${EXP_NAME}" \
--problem-id "${pid}" \
--seed-model "${SEED_MODEL}" \
--num-generations "${GENS}" \
--max-parallel-jobs 1 \
--edit-backend single_shot_patch \
--llm-models ${LLM_MODELS} \
--run-dir "${RUN_DIR}" \
--use-eval-service \
--eval-service-url "${url}" \
--eval-trigger-mode "${EVAL_TRIGGER_MODE}" \
--eval-trigger-interval "${EVAL_TRIGGER_INTERVAL}" \
--use-wandb \
--wandb-project frontier-cs \
--wandb-tags frontier_cs agent problem_${pid} \
--verbose \
> "${LOG_DIR}/problem_${pid}.log" 2>&1
local status=$?
if [ ${status} -eq 0 ]; then
echo "DONE problem ${pid} (port ${port})"
else
echo "FAIL problem ${pid} (port ${port}, see ${LOG_DIR}/problem_${pid}.log)"
fi
return ${status}
}
# ============================================================================
# Start eval services (one per concurrency slot)
# ============================================================================
echo "Starting ${CONCURRENCY} eval services..."
for slot in $(seq 0 $((CONCURRENCY - 1))); do
port=$((BASE_PORT + slot))
start_eval_service "${port}"
done
echo ""
# ============================================================================
# Run problems in parallel, assigning to available slots
# ============================================================================
# Slot tracking: which slot is free
SLOT_PIDS=() # bash PID per slot (0 = free)
SLOT_PROBLEMS=() # problem ID per slot
for slot in $(seq 0 $((CONCURRENCY - 1))); do
SLOT_PIDS+=(0)
SLOT_PROBLEMS+=("")
done
DONE=0
FAILED=0
TOTAL=${#PIDS[@]}
IDX=0
while [ ${IDX} -lt ${TOTAL} ] || [ "$(echo "${SLOT_PIDS[@]}" | tr ' ' '\n' | grep -cv '^0$')" -gt 0 ]; do
# Check for finished slots
for slot in $(seq 0 $((CONCURRENCY - 1))); do
if [ "${SLOT_PIDS[$slot]}" -ne 0 ]; then
if ! kill -0 "${SLOT_PIDS[$slot]}" 2>/dev/null; then
# Job finished
wait "${SLOT_PIDS[$slot]}" 2>/dev/null
if [ $? -eq 0 ]; then
DONE=$((DONE + 1))
else
FAILED=$((FAILED + 1))
fi
SLOT_PIDS[$slot]=0
SLOT_PROBLEMS[$slot]=""
fi
fi
done
# Assign problems to free slots
for slot in $(seq 0 $((CONCURRENCY - 1))); do
if [ "${SLOT_PIDS[$slot]}" -eq 0 ] && [ ${IDX} -lt ${TOTAL} ]; then
pid="${PIDS[$IDX]}"
port=$((BASE_PORT + slot))
echo "START problem ${pid} on slot ${slot} (port ${port}) [${DONE}+${FAILED}/${TOTAL} complete]"
run_problem_with_agent "${pid}" "${port}" &
SLOT_PIDS[$slot]=$!
SLOT_PROBLEMS[$slot]="${pid}"
IDX=$((IDX + 1))
fi
done
sleep 2
done
echo ""
echo "========================================"
echo "Parallel batch with agent complete"
echo " Succeeded: ${DONE}"
echo " Failed: ${FAILED}"
echo " Total: ${TOTAL}"
echo " Logs: ${LOG_DIR}/"
echo "========================================"