File size: 5,823 Bytes
3f6526a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/bin/bash
# Parallel batch run: multiple Frontier-CS problems concurrently.
#
# Runs N problems in parallel without eval service (baseline mode).
# Each problem gets its own process with FRONTIER_CS_PROBLEM_ID env var.
#
# Usage:
#   ./scripts/dev/run_frontier_cs_parallel.sh                    # problems 0-49, 20 parallel
#   ./scripts/dev/run_frontier_cs_parallel.sh 0 49 20            # same, explicit
#   ./scripts/dev/run_frontier_cs_parallel.sh 50 99 10           # problems 50-99, 10 parallel

set -euo pipefail
cd "$(dirname "$0")/../.."

PYTHON=".venv/bin/python"

# ============================================================================
# Configuration
# ============================================================================
PID_START="${1:-0}"
PID_END="${2:-49}"
CONCURRENCY="${3:-20}"

GENS=50
SEED_MODEL="gemini3pro"
LLM_MODELS="native-gemini-3-flash-preview"

TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
EXP_NAME="vanilla_g${GENS}"
RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}"

PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems"
SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions"
LOG_DIR="logs/frontier_cs_parallel"
mkdir -p "${LOG_DIR}" "${RUN_DIR}"

# ============================================================================
# Collect valid problem IDs
# ============================================================================
PIDS=()
for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do
    if [ "${pid}" -ge "${PID_START}" ] 2>/dev/null && [ "${pid}" -le "${PID_END}" ] 2>/dev/null; then
        if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then
            PIDS+=("${pid}")
        fi
    fi
done

echo "========================================"
echo "Frontier-CS Parallel Batch (no eval agent)"
echo "========================================"
echo "  Problems:    ${PID_START}-${PID_END} (${#PIDS[@]} valid)"
echo "  Concurrency: ${CONCURRENCY}"
echo "  Generations: ${GENS}"
echo "  Seed model:  ${SEED_MODEL}"
echo "  LLM:         ${LLM_MODELS}"
echo "  Run dir:     ${RUN_DIR}"
echo "  Logs:        ${LOG_DIR}/"
echo "========================================"
echo ""

# ============================================================================
# Worker function: run one problem
# ============================================================================
run_problem() {
    local pid="$1"

    # Set env var so evaluator knows which problem to evaluate
    export FRONTIER_CS_PROBLEM_ID="${pid}"

    ${PYTHON} tasks/frontier_cs_entry/run_experiment.py \
        --experiment-name "${EXP_NAME}" \
        --problem-id "${pid}" \
        --seed-model "${SEED_MODEL}" \
        --num-generations "${GENS}" \
        --max-parallel-jobs 1 \
        --edit-backend single_shot_patch \
        --llm-models ${LLM_MODELS} \
        --run-dir "${RUN_DIR}" \
        --use-wandb \
        --wandb-project frontier-cs \
        --wandb-tags frontier_cs baseline problem_${pid} \
        --verbose \
        > "${LOG_DIR}/problem_${pid}.log" 2>&1

    local status=$?
    if [ ${status} -eq 0 ]; then
        echo "DONE problem ${pid}"
    else
        echo "FAIL problem ${pid} (exit ${status}, see ${LOG_DIR}/problem_${pid}.log)"
    fi
    return ${status}
}

export -f run_problem
export PYTHON GENS EXP_NAME SEED_MODEL LLM_MODELS LOG_DIR RUN_DIR

# ============================================================================
# Start progress monitor in background
# ============================================================================
${PYTHON} scripts/dev/monitor_frontier_cs.py \
    --results-dir "${RUN_DIR}" --interval 30 &
MONITOR_PID=$!
trap "kill ${MONITOR_PID} 2>/dev/null || true" EXIT

# ============================================================================
# Run in parallel using background jobs
# ============================================================================
RUNNING=0
DONE=0
FAILED=0
PIDS_RUNNING=()  # bash PIDs of background jobs
PIDS_PROBLEM=()  # problem IDs corresponding to background jobs

for PID in "${PIDS[@]}"; do
    # Wait if we've hit concurrency limit
    while [ ${RUNNING} -ge ${CONCURRENCY} ]; do
        # Wait for any child to finish
        wait -n 2>/dev/null || true

        # Check which jobs finished
        NEW_RUNNING=0
        NEW_PIDS_RUNNING=()
        NEW_PIDS_PROBLEM=()
        for i in "${!PIDS_RUNNING[@]}"; do
            if kill -0 "${PIDS_RUNNING[$i]}" 2>/dev/null; then
                NEW_RUNNING=$((NEW_RUNNING + 1))
                NEW_PIDS_RUNNING+=("${PIDS_RUNNING[$i]}")
                NEW_PIDS_PROBLEM+=("${PIDS_PROBLEM[$i]}")
            else
                # Job finished, check exit code
                wait "${PIDS_RUNNING[$i]}" 2>/dev/null
                if [ $? -eq 0 ]; then
                    DONE=$((DONE + 1))
                else
                    FAILED=$((FAILED + 1))
                fi
            fi
        done
        RUNNING=${NEW_RUNNING}
        PIDS_RUNNING=("${NEW_PIDS_RUNNING[@]+"${NEW_PIDS_RUNNING[@]}"}")
        PIDS_PROBLEM=("${NEW_PIDS_PROBLEM[@]+"${NEW_PIDS_PROBLEM[@]}"}")
    done

    # Launch new job
    echo "START problem ${PID} [running: ${RUNNING}, done: ${DONE}, failed: ${FAILED}]"
    run_problem "${PID}" &
    PIDS_RUNNING+=($!)
    PIDS_PROBLEM+=("${PID}")
    RUNNING=$((RUNNING + 1))
done

# Wait for remaining jobs
echo ""
echo "Waiting for remaining ${RUNNING} jobs..."
for i in "${!PIDS_RUNNING[@]}"; do
    wait "${PIDS_RUNNING[$i]}" 2>/dev/null
    if [ $? -eq 0 ]; then
        DONE=$((DONE + 1))
    else
        FAILED=$((FAILED + 1))
    fi
done

echo ""
echo "========================================"
echo "Parallel batch complete"
echo "  Succeeded: ${DONE}"
echo "  Failed:    ${FAILED}"
echo "  Total:     ${#PIDS[@]}"
echo "  Logs:      ${LOG_DIR}/"
echo "========================================"