#!/usr/bin/env bash set -euo pipefail RUN_ID="${RUN_ID:-taotern-200m-branch-only-chat-$(date +%Y%m%d-%H%M%S)}" JOB_ROOT="${JOB_ROOT:-/home/student/YouZheng/jobs/taotern}" REMOTE_REPO="${REMOTE_REPO:-$(pwd)}" PYTHON_BIN="${PYTHON_BIN:-/home/student/.venv/bin/python}" SSM_REPO_PATH="${SSM_REPO_PATH:-/home/student/YouZheng/gamma_ssm_repo}" DATA_PATH="${DATA_PATH:-/home/student/Data/TaoData/pretrain.jsonl}" SFT_DATA_PATH="${SFT_DATA_PATH:-/home/student/Data/TaoData/sft.jsonl}" TOKENIZER_PATH="${TOKENIZER_PATH:-/home/student/YouZheng/tokenizers/taodata_pilot_8k/tokenizer.model}" SEQ_LEN="${SEQ_LEN:-512}" BATCH_SIZE="${BATCH_SIZE:-8}" PRETRAIN_TOKENS="${PRETRAIN_TOKENS:-4000000000}" SFT_STEPS="${SFT_STEPS:-50000}" PRETRAIN_LR="${PRETRAIN_LR:-0.0008}" SFT_LR="${SFT_LR:-0.00005}" WEIGHT_DECAY="${WEIGHT_DECAY:-0.01}" LOG_EVERY="${LOG_EVERY:-100}" SAVE_EVERY="${SAVE_EVERY:-100000}" SFT_SAVE_EVERY="${SFT_SAVE_EVERY:-10000}" TOKENIZER_THREADS="${TOKENIZER_THREADS:-8}" SAMPLES_PER_CHUNK="${SAMPLES_PER_CHUNK:-2000}" BLOCK_RESIDUAL_RMS_CAP="${BLOCK_RESIDUAL_RMS_CAP:-}" JOB_COMMAND="REMOTE_REPO=$REMOTE_REPO PYTHON_BIN=$PYTHON_BIN SSM_REPO_PATH=$SSM_REPO_PATH DATA_PATH=$DATA_PATH SFT_DATA_PATH=$SFT_DATA_PATH TOKENIZER_PATH=$TOKENIZER_PATH SEQ_LEN=$SEQ_LEN BATCH_SIZE=$BATCH_SIZE PRETRAIN_TOKENS=$PRETRAIN_TOKENS SFT_STEPS=$SFT_STEPS PRETRAIN_LR=$PRETRAIN_LR SFT_LR=$SFT_LR WEIGHT_DECAY=$WEIGHT_DECAY LOG_EVERY=$LOG_EVERY SAVE_EVERY=$SAVE_EVERY SFT_SAVE_EVERY=$SFT_SAVE_EVERY TOKENIZER_THREADS=$TOKENIZER_THREADS SAMPLES_PER_CHUNK=$SAMPLES_PER_CHUNK BLOCK_RESIDUAL_RMS_CAP=$BLOCK_RESIDUAL_RMS_CAP bash scripts/remote/run_200m_branch_only_chat.sh" export RUN_ID JOB_ROOT JOB_COMMAND export OUTPUT_DIR="${OUTPUT_DIR:-$JOB_ROOT/$RUN_ID/outputs}" export CHECKPOINT_DIR="${CHECKPOINT_DIR:-$JOB_ROOT/$RUN_ID/checkpoints}" bash scripts/remote/submit_detached_job.sh