#!/usr/bin/env bash set -euo pipefail RUN_ID="${RUN_ID:-taotern-200m-until-selection-$(date +%Y%m%d-%H%M%S)}" JOB_ROOT="${JOB_ROOT:-/home/student/YouZheng/jobs/taotern}" REMOTE_REPO="${REMOTE_REPO:-$(pwd)}" PYTHON_BIN="${PYTHON_BIN:-/home/student/.venv/bin/python}" SSM_REPO_PATH="${SSM_REPO_PATH:-/home/student/YouZheng/gamma_ssm_repo}" DATA_PATH="${DATA_PATH:-/home/student/Data/TaoData/pretrain.jsonl}" TOKENIZER_PATH="${TOKENIZER_PATH:-/home/student/YouZheng/tokenizers/taodata_pilot_8k/tokenizer.model}" SEQ_LEN="${SEQ_LEN:-512}" BATCH_SIZE="${BATCH_SIZE:-8}" PILOT_TOKENS="${PILOT_TOKENS:-300000000}" SERIOUS_TOKENS="${SERIOUS_TOKENS:-1000000000}" PILOT_EVAL_BATCHES="${PILOT_EVAL_BATCHES:-64}" SERIOUS_EVAL_BATCHES="${SERIOUS_EVAL_BATCHES:-128}" MAX_TOKENS="${MAX_TOKENS:-50000000}" MAX_RECORDS="${MAX_RECORDS:-100000}" LEARNING_RATE="${LEARNING_RATE:-0.0003}" WEIGHT_DECAY="${WEIGHT_DECAY:-0.01}" JOB_COMMAND="REMOTE_REPO=$REMOTE_REPO PYTHON_BIN=$PYTHON_BIN SSM_REPO_PATH=$SSM_REPO_PATH DATA_PATH=$DATA_PATH TOKENIZER_PATH=$TOKENIZER_PATH SEQ_LEN=$SEQ_LEN BATCH_SIZE=$BATCH_SIZE PILOT_TOKENS=$PILOT_TOKENS SERIOUS_TOKENS=$SERIOUS_TOKENS PILOT_EVAL_BATCHES=$PILOT_EVAL_BATCHES SERIOUS_EVAL_BATCHES=$SERIOUS_EVAL_BATCHES MAX_TOKENS=$MAX_TOKENS MAX_RECORDS=$MAX_RECORDS LEARNING_RATE=$LEARNING_RATE WEIGHT_DECAY=$WEIGHT_DECAY bash scripts/remote/run_200m_until_selection.sh" export RUN_ID JOB_ROOT JOB_COMMAND export OUTPUT_DIR="${OUTPUT_DIR:-$JOB_ROOT/$RUN_ID/outputs}" export CHECKPOINT_DIR="${CHECKPOINT_DIR:-$JOB_ROOT/$RUN_ID/checkpoints}" bash scripts/remote/submit_detached_job.sh