shinka-backup / scripts /dev /run_frontier_cs_parallel_vanilla_server.sh

Add files using upload-large-folder tool

3f6526a verified 19 days ago

7.94 kB

	#!/bin/bash
	# Parallel vanilla run via eval service: same pipeline as agent but with
	# eval agent trigger interval set to 1000 (effectively never triggers).
	# This ensures the evaluation code path is identical to the agent run,
	# making the comparison fair.
	#
	# Usage:
	# ./scripts/dev/run_frontier_cs_parallel_vanilla_server.sh # all 172 problems, 20 parallel
	# ./scripts/dev/run_frontier_cs_parallel_vanilla_server.sh 50 # first 50 problems, 20 parallel
	# ./scripts/dev/run_frontier_cs_parallel_vanilla_server.sh 50 10 # first 50 problems, 10 parallel

	set -euo pipefail
	cd "$(dirname "$0")/../.."

	PYTHON=".venv/bin/python"

	# ============================================================================
	# Configuration
	# ============================================================================
	NUM_PROBLEMS="${1:-all}"
	CONCURRENCY="${2:-20}"

	GENS=50
	SEED_MODEL="gemini3pro"
	LLM_MODELS="native-gemini-3-flash-preview"
	BASE_PORT=8860 # different port range from agent (8760) to allow concurrent runs

	EVAL_TRIGGER_MODE="periodic"
	EVAL_TRIGGER_INTERVAL=1000 # effectively never triggers eval agent

	TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
	EXP_NAME="vanilla_g${GENS}"
	RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}"

	PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems"
	SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions"
	LOG_DIR="logs/frontier_cs_parallel_vanilla"
	mkdir -p "${LOG_DIR}"

	# ============================================================================
	# Collect valid problem IDs (sorted numerically, take first N)
	# ============================================================================
	ALL_PIDS=()
	for pid in $(ls "${PROBLEMS_DIR}" \| sort -n); do
	if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then
	ALL_PIDS+=("${pid}")
	fi
	done

	if [ "${NUM_PROBLEMS}" = "all" ]; then
	PIDS=("${ALL_PIDS[@]}")
	else
	PIDS=("${ALL_PIDS[@]:0:${NUM_PROBLEMS}}")
	fi

	echo "========================================"
	echo "Frontier-CS Parallel Vanilla (eval service, no agent trigger)"
	echo "========================================"
	echo " Problems: ${#PIDS[@]} / ${#ALL_PIDS[@]} available"
	echo " Concurrency: ${CONCURRENCY}"
	echo " Generations: ${GENS}"
	echo " Seed model: ${SEED_MODEL}"
	echo " LLM: ${LLM_MODELS}"
	echo " Run dir: ${RUN_DIR}"
	echo " Eval ports: ${BASE_PORT}-$((BASE_PORT + CONCURRENCY - 1))"
	echo " Trigger: ${EVAL_TRIGGER_MODE} every ${EVAL_TRIGGER_INTERVAL} gens (disabled)"
	echo " Logs: ${LOG_DIR}/"
	echo "========================================"
	echo ""

	# ============================================================================
	# Track all background PIDs for cleanup
	# ============================================================================
	ALL_SERVICE_PIDS=()

	cleanup() {
	echo ""
	echo "Cleaning up eval services..."
	for spid in "${ALL_SERVICE_PIDS[@]+"${ALL_SERVICE_PIDS[@]}"}"; do
	kill "${spid}" 2>/dev/null \|\| true
	done
	# Also kill any child processes
	jobs -p \| xargs -r kill 2>/dev/null \|\| true
	echo "Done."
	}
	trap cleanup EXIT INT TERM

	# ============================================================================
	# Start/stop eval service helpers
	# ============================================================================
	start_eval_service() {
	local port="$1"
	local log_file="${LOG_DIR}/eval_service_port_${port}.log"
	local url="http://localhost:${port}"

	# Check if already running on this port
	if curl -s "${url}/api/v1/status" > /dev/null 2>&1; then
	echo " Eval service already running on port ${port}"
	return 0
	fi

	${PYTHON} eval_agent/ev2_service_standalone.py \
	--host "0.0.0.0" --port "${port}" \
	> "${log_file}" 2>&1 &
	local spid=$!
	ALL_SERVICE_PIDS+=("${spid}")

	# Wait for ready (up to 20s)
	for i in $(seq 1 20); do
	if curl -s "${url}/api/v1/status" > /dev/null 2>&1; then
	echo " Eval service ready on port ${port} (pid=${spid})"
	return 0
	fi
	sleep 1
	done

	echo " ERROR: Eval service failed to start on port ${port}"
	return 1
	}

	# ============================================================================
	# Worker function: run one problem via eval service (agent never triggers)
	# ============================================================================
	run_problem_vanilla() {
	local pid="$1"
	local port="$2"
	local url="http://localhost:${port}"

	export FRONTIER_CS_PROBLEM_ID="${pid}"
	export FRONTIER_CS_JUDGE_URL="http://localhost:8081"

	${PYTHON} tasks/frontier_cs_entry/run_experiment.py \
	--experiment-name "${EXP_NAME}" \
	--problem-id "${pid}" \
	--seed-model "${SEED_MODEL}" \
	--num-generations "${GENS}" \
	--max-parallel-jobs 1 \
	--edit-backend single_shot_patch \
	--llm-models ${LLM_MODELS} \
	--run-dir "${RUN_DIR}" \
	--use-eval-service \
	--eval-service-url "${url}" \
	--eval-trigger-mode "${EVAL_TRIGGER_MODE}" \
	--eval-trigger-interval "${EVAL_TRIGGER_INTERVAL}" \
	--use-wandb \
	--wandb-project frontier-cs \
	--wandb-tags frontier_cs vanilla problem_${pid} \
	--verbose \
	> "${LOG_DIR}/problem_${pid}.log" 2>&1

	local status=$?
	if [ ${status} -eq 0 ]; then
	echo "DONE problem ${pid} (port ${port})"
	else
	echo "FAIL problem ${pid} (port ${port}, see ${LOG_DIR}/problem_${pid}.log)"
	fi
	return ${status}
	}

	# ============================================================================
	# Start eval services (one per concurrency slot)
	# ============================================================================
	echo "Starting ${CONCURRENCY} eval services..."
	for slot in $(seq 0 $((CONCURRENCY - 1))); do
	port=$((BASE_PORT + slot))
	start_eval_service "${port}"
	done
	echo ""

	# ============================================================================
	# Run problems in parallel, assigning to available slots
	# ============================================================================
	# Slot tracking: which slot is free
	SLOT_PIDS=() # bash PID per slot (0 = free)
	SLOT_PROBLEMS=() # problem ID per slot
	for slot in $(seq 0 $((CONCURRENCY - 1))); do
	SLOT_PIDS+=(0)
	SLOT_PROBLEMS+=("")
	done

	DONE=0
	FAILED=0
	TOTAL=${#PIDS[@]}
	IDX=0

	while [ ${IDX} -lt ${TOTAL} ] \|\| [ "$(echo "${SLOT_PIDS[@]}" \| tr ' ' '\n' \| grep -cv '^0$')" -gt 0 ]; do
	# Check for finished slots
	for slot in $(seq 0 $((CONCURRENCY - 1))); do
	if [ "${SLOT_PIDS[$slot]}" -ne 0 ]; then
	if ! kill -0 "${SLOT_PIDS[$slot]}" 2>/dev/null; then
	# Job finished
	wait "${SLOT_PIDS[$slot]}" 2>/dev/null
	if [ $? -eq 0 ]; then
	DONE=$((DONE + 1))
	else
	FAILED=$((FAILED + 1))
	fi
	SLOT_PIDS[$slot]=0
	SLOT_PROBLEMS[$slot]=""
	fi
	fi
	done

	# Assign problems to free slots
	for slot in $(seq 0 $((CONCURRENCY - 1))); do
	if [ "${SLOT_PIDS[$slot]}" -eq 0 ] && [ ${IDX} -lt ${TOTAL} ]; then
	pid="${PIDS[$IDX]}"
	port=$((BASE_PORT + slot))
	echo "START problem ${pid} on slot ${slot} (port ${port}) [${DONE}+${FAILED}/${TOTAL} complete]"
	run_problem_vanilla "${pid}" "${port}" &
	SLOT_PIDS[$slot]=$!
	SLOT_PROBLEMS[$slot]="${pid}"
	IDX=$((IDX + 1))
	fi
	done

	sleep 2
	done

	echo ""
	echo "========================================"
	echo "Parallel vanilla (via eval service) complete"
	echo " Succeeded: ${DONE}"
	echo " Failed: ${FAILED}"
	echo " Total: ${TOTAL}"
	echo " Run dir: ${RUN_DIR}"
	echo " Logs: ${LOG_DIR}/"
	echo "========================================"