File size: 5,823 Bytes
3f6526a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | #!/bin/bash
# Parallel batch run: multiple Frontier-CS problems concurrently.
#
# Runs N problems in parallel without eval service (baseline mode).
# Each problem gets its own process with FRONTIER_CS_PROBLEM_ID env var.
#
# Usage:
# ./scripts/dev/run_frontier_cs_parallel.sh # problems 0-49, 20 parallel
# ./scripts/dev/run_frontier_cs_parallel.sh 0 49 20 # same, explicit
# ./scripts/dev/run_frontier_cs_parallel.sh 50 99 10 # problems 50-99, 10 parallel
set -euo pipefail
cd "$(dirname "$0")/../.."
PYTHON=".venv/bin/python"
# ============================================================================
# Configuration
# ============================================================================
PID_START="${1:-0}"
PID_END="${2:-49}"
CONCURRENCY="${3:-20}"
GENS=50
SEED_MODEL="gemini3pro"
LLM_MODELS="native-gemini-3-flash-preview"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
EXP_NAME="vanilla_g${GENS}"
RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}"
PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems"
SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions"
LOG_DIR="logs/frontier_cs_parallel"
mkdir -p "${LOG_DIR}" "${RUN_DIR}"
# ============================================================================
# Collect valid problem IDs
# ============================================================================
PIDS=()
for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do
if [ "${pid}" -ge "${PID_START}" ] 2>/dev/null && [ "${pid}" -le "${PID_END}" ] 2>/dev/null; then
if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then
PIDS+=("${pid}")
fi
fi
done
echo "========================================"
echo "Frontier-CS Parallel Batch (no eval agent)"
echo "========================================"
echo " Problems: ${PID_START}-${PID_END} (${#PIDS[@]} valid)"
echo " Concurrency: ${CONCURRENCY}"
echo " Generations: ${GENS}"
echo " Seed model: ${SEED_MODEL}"
echo " LLM: ${LLM_MODELS}"
echo " Run dir: ${RUN_DIR}"
echo " Logs: ${LOG_DIR}/"
echo "========================================"
echo ""
# ============================================================================
# Worker function: run one problem
# ============================================================================
run_problem() {
local pid="$1"
# Set env var so evaluator knows which problem to evaluate
export FRONTIER_CS_PROBLEM_ID="${pid}"
${PYTHON} tasks/frontier_cs_entry/run_experiment.py \
--experiment-name "${EXP_NAME}" \
--problem-id "${pid}" \
--seed-model "${SEED_MODEL}" \
--num-generations "${GENS}" \
--max-parallel-jobs 1 \
--edit-backend single_shot_patch \
--llm-models ${LLM_MODELS} \
--run-dir "${RUN_DIR}" \
--use-wandb \
--wandb-project frontier-cs \
--wandb-tags frontier_cs baseline problem_${pid} \
--verbose \
> "${LOG_DIR}/problem_${pid}.log" 2>&1
local status=$?
if [ ${status} -eq 0 ]; then
echo "DONE problem ${pid}"
else
echo "FAIL problem ${pid} (exit ${status}, see ${LOG_DIR}/problem_${pid}.log)"
fi
return ${status}
}
export -f run_problem
export PYTHON GENS EXP_NAME SEED_MODEL LLM_MODELS LOG_DIR RUN_DIR
# ============================================================================
# Start progress monitor in background
# ============================================================================
${PYTHON} scripts/dev/monitor_frontier_cs.py \
--results-dir "${RUN_DIR}" --interval 30 &
MONITOR_PID=$!
trap "kill ${MONITOR_PID} 2>/dev/null || true" EXIT
# ============================================================================
# Run in parallel using background jobs
# ============================================================================
RUNNING=0
DONE=0
FAILED=0
PIDS_RUNNING=() # bash PIDs of background jobs
PIDS_PROBLEM=() # problem IDs corresponding to background jobs
for PID in "${PIDS[@]}"; do
# Wait if we've hit concurrency limit
while [ ${RUNNING} -ge ${CONCURRENCY} ]; do
# Wait for any child to finish
wait -n 2>/dev/null || true
# Check which jobs finished
NEW_RUNNING=0
NEW_PIDS_RUNNING=()
NEW_PIDS_PROBLEM=()
for i in "${!PIDS_RUNNING[@]}"; do
if kill -0 "${PIDS_RUNNING[$i]}" 2>/dev/null; then
NEW_RUNNING=$((NEW_RUNNING + 1))
NEW_PIDS_RUNNING+=("${PIDS_RUNNING[$i]}")
NEW_PIDS_PROBLEM+=("${PIDS_PROBLEM[$i]}")
else
# Job finished, check exit code
wait "${PIDS_RUNNING[$i]}" 2>/dev/null
if [ $? -eq 0 ]; then
DONE=$((DONE + 1))
else
FAILED=$((FAILED + 1))
fi
fi
done
RUNNING=${NEW_RUNNING}
PIDS_RUNNING=("${NEW_PIDS_RUNNING[@]+"${NEW_PIDS_RUNNING[@]}"}")
PIDS_PROBLEM=("${NEW_PIDS_PROBLEM[@]+"${NEW_PIDS_PROBLEM[@]}"}")
done
# Launch new job
echo "START problem ${PID} [running: ${RUNNING}, done: ${DONE}, failed: ${FAILED}]"
run_problem "${PID}" &
PIDS_RUNNING+=($!)
PIDS_PROBLEM+=("${PID}")
RUNNING=$((RUNNING + 1))
done
# Wait for remaining jobs
echo ""
echo "Waiting for remaining ${RUNNING} jobs..."
for i in "${!PIDS_RUNNING[@]}"; do
wait "${PIDS_RUNNING[$i]}" 2>/dev/null
if [ $? -eq 0 ]; then
DONE=$((DONE + 1))
else
FAILED=$((FAILED + 1))
fi
done
echo ""
echo "========================================"
echo "Parallel batch complete"
echo " Succeeded: ${DONE}"
echo " Failed: ${FAILED}"
echo " Total: ${#PIDS[@]}"
echo " Logs: ${LOG_DIR}/"
echo "========================================"
|