#!/bin/bash # Parallel batch run: multiple Frontier-CS problems concurrently. # # Runs N problems in parallel without eval service (baseline mode). # Each problem gets its own process with FRONTIER_CS_PROBLEM_ID env var. # # Usage: # ./scripts/dev/run_frontier_cs_parallel.sh # problems 0-49, 20 parallel # ./scripts/dev/run_frontier_cs_parallel.sh 0 49 20 # same, explicit # ./scripts/dev/run_frontier_cs_parallel.sh 50 99 10 # problems 50-99, 10 parallel set -euo pipefail cd "$(dirname "$0")/../.." PYTHON=".venv/bin/python" # ============================================================================ # Configuration # ============================================================================ PID_START="${1:-0}" PID_END="${2:-49}" CONCURRENCY="${3:-20}" GENS=50 SEED_MODEL="gemini3pro" LLM_MODELS="native-gemini-3-flash-preview" TIMESTAMP="$(date +%Y%m%d_%H%M%S)" EXP_NAME="vanilla_g${GENS}" RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}" PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems" SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions" LOG_DIR="logs/frontier_cs_parallel" mkdir -p "${LOG_DIR}" "${RUN_DIR}" # ============================================================================ # Collect valid problem IDs # ============================================================================ PIDS=() for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do if [ "${pid}" -ge "${PID_START}" ] 2>/dev/null && [ "${pid}" -le "${PID_END}" ] 2>/dev/null; then if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then PIDS+=("${pid}") fi fi done echo "========================================" echo "Frontier-CS Parallel Batch (no eval agent)" echo "========================================" echo " Problems: ${PID_START}-${PID_END} (${#PIDS[@]} valid)" echo " Concurrency: ${CONCURRENCY}" echo " Generations: ${GENS}" echo " Seed model: ${SEED_MODEL}" echo " LLM: ${LLM_MODELS}" echo " Run dir: ${RUN_DIR}" echo " Logs: ${LOG_DIR}/" echo "========================================" echo "" # ============================================================================ # Worker function: run one problem # ============================================================================ run_problem() { local pid="$1" # Set env var so evaluator knows which problem to evaluate export FRONTIER_CS_PROBLEM_ID="${pid}" ${PYTHON} tasks/frontier_cs_entry/run_experiment.py \ --experiment-name "${EXP_NAME}" \ --problem-id "${pid}" \ --seed-model "${SEED_MODEL}" \ --num-generations "${GENS}" \ --max-parallel-jobs 1 \ --edit-backend single_shot_patch \ --llm-models ${LLM_MODELS} \ --run-dir "${RUN_DIR}" \ --use-wandb \ --wandb-project frontier-cs \ --wandb-tags frontier_cs baseline problem_${pid} \ --verbose \ > "${LOG_DIR}/problem_${pid}.log" 2>&1 local status=$? if [ ${status} -eq 0 ]; then echo "DONE problem ${pid}" else echo "FAIL problem ${pid} (exit ${status}, see ${LOG_DIR}/problem_${pid}.log)" fi return ${status} } export -f run_problem export PYTHON GENS EXP_NAME SEED_MODEL LLM_MODELS LOG_DIR RUN_DIR # ============================================================================ # Start progress monitor in background # ============================================================================ ${PYTHON} scripts/dev/monitor_frontier_cs.py \ --results-dir "${RUN_DIR}" --interval 30 & MONITOR_PID=$! trap "kill ${MONITOR_PID} 2>/dev/null || true" EXIT # ============================================================================ # Run in parallel using background jobs # ============================================================================ RUNNING=0 DONE=0 FAILED=0 PIDS_RUNNING=() # bash PIDs of background jobs PIDS_PROBLEM=() # problem IDs corresponding to background jobs for PID in "${PIDS[@]}"; do # Wait if we've hit concurrency limit while [ ${RUNNING} -ge ${CONCURRENCY} ]; do # Wait for any child to finish wait -n 2>/dev/null || true # Check which jobs finished NEW_RUNNING=0 NEW_PIDS_RUNNING=() NEW_PIDS_PROBLEM=() for i in "${!PIDS_RUNNING[@]}"; do if kill -0 "${PIDS_RUNNING[$i]}" 2>/dev/null; then NEW_RUNNING=$((NEW_RUNNING + 1)) NEW_PIDS_RUNNING+=("${PIDS_RUNNING[$i]}") NEW_PIDS_PROBLEM+=("${PIDS_PROBLEM[$i]}") else # Job finished, check exit code wait "${PIDS_RUNNING[$i]}" 2>/dev/null if [ $? -eq 0 ]; then DONE=$((DONE + 1)) else FAILED=$((FAILED + 1)) fi fi done RUNNING=${NEW_RUNNING} PIDS_RUNNING=("${NEW_PIDS_RUNNING[@]+"${NEW_PIDS_RUNNING[@]}"}") PIDS_PROBLEM=("${NEW_PIDS_PROBLEM[@]+"${NEW_PIDS_PROBLEM[@]}"}") done # Launch new job echo "START problem ${PID} [running: ${RUNNING}, done: ${DONE}, failed: ${FAILED}]" run_problem "${PID}" & PIDS_RUNNING+=($!) PIDS_PROBLEM+=("${PID}") RUNNING=$((RUNNING + 1)) done # Wait for remaining jobs echo "" echo "Waiting for remaining ${RUNNING} jobs..." for i in "${!PIDS_RUNNING[@]}"; do wait "${PIDS_RUNNING[$i]}" 2>/dev/null if [ $? -eq 0 ]; then DONE=$((DONE + 1)) else FAILED=$((FAILED + 1)) fi done echo "" echo "========================================" echo "Parallel batch complete" echo " Succeeded: ${DONE}" echo " Failed: ${FAILED}" echo " Total: ${#PIDS[@]}" echo " Logs: ${LOG_DIR}/" echo "========================================"