#!/usr/bin/env bash set -euo pipefail ROOT_OUTPUT_DIR="${REPOBRIDGE_OUTPUT_DIR:-$(pwd)/results/200m-until-selection}" ROOT_CHECKPOINT_DIR="${TAOTERN_CHECKPOINT_DIR:-$ROOT_OUTPUT_DIR/checkpoints}" SEQ_LEN="${SEQ_LEN:-512}" BATCH_SIZE="${BATCH_SIZE:-8}" PILOT_TOKENS="${PILOT_TOKENS:-300000000}" SERIOUS_TOKENS="${SERIOUS_TOKENS:-1000000000}" PILOT_EVAL_BATCHES="${PILOT_EVAL_BATCHES:-64}" SERIOUS_EVAL_BATCHES="${SERIOUS_EVAL_BATCHES:-128}" LEARNING_RATE="${LEARNING_RATE:-0.0003}" WEIGHT_DECAY="${WEIGHT_DECAY:-0.01}" ceil_div() { local numerator="$1" local denominator="$2" echo $(( (numerator + denominator - 1) / denominator )) } run_phase() { local phase="$1" local target_tokens="$2" local eval_batches="$3" local tokens_per_step=$((BATCH_SIZE * SEQ_LEN)) local train_steps train_steps="$(ceil_div "$target_tokens" "$tokens_per_step")" printf '\n============================================================\n' printf '200M until-selection phase: %s\n' "$phase" printf 'target_tokens=%s batch=%s seq_len=%s train_steps=%s eval_batches=%s\n' \ "$target_tokens" "$BATCH_SIZE" "$SEQ_LEN" "$train_steps" "$eval_batches" printf '============================================================\n' REPOBRIDGE_OUTPUT_DIR="$ROOT_OUTPUT_DIR/$phase" \ TAOTERN_CHECKPOINT_DIR="$ROOT_CHECKPOINT_DIR/$phase" \ BATCH_SIZES="$BATCH_SIZE" \ SEQ_LEN="$SEQ_LEN" \ TRAIN_STEPS="$train_steps" \ EVAL_BATCHES="$eval_batches" \ LEARNING_RATE="$LEARNING_RATE" \ WEIGHT_DECAY="$WEIGHT_DECAY" \ bash scripts/remote/run_200m_base_suite.sh } mkdir -p "$ROOT_OUTPUT_DIR" "$ROOT_CHECKPOINT_DIR" cat > "$ROOT_OUTPUT_DIR/run_plan.json" <