| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| CONFIG="${1:-configs/base_124m.json}" |
| DATA_DIR="${DATA_DIR:-data/fwedu}" |
| REMOTE="${REMOTE:-}" |
| CKPT_DIR="$(python -c "import json,sys;print(json.load(open('$CONFIG'))['train']['ckpt_dir'])")" |
|
|
| sync_checkpoints() { |
| [ -z "$REMOTE" ] && { echo "[sync] REMOTE unset, skipping upload"; return; } |
| echo "[sync] uploading $CKPT_DIR -> $REMOTE" |
| if command -v aws >/dev/null; then aws s3 sync "$CKPT_DIR" "$REMOTE" || true |
| elif command -v rclone >/dev/null; then rclone copy "$CKPT_DIR" "$REMOTE" || true |
| fi |
| } |
| trap sync_checkpoints EXIT |
|
|
| echo "[setup] installing deps" |
| pip install -q -r requirements.txt |
|
|
| |
| if [ -n "$REMOTE" ]; then |
| mkdir -p "$CKPT_DIR" |
| (command -v aws >/dev/null && aws s3 sync "$REMOTE" "$CKPT_DIR") || \ |
| (command -v rclone >/dev/null && rclone copy "$REMOTE" "$CKPT_DIR") || true |
| fi |
|
|
| if [ ! -f "$DATA_DIR/manifest.json" ]; then |
| echo "[data] no manifest in $DATA_DIR; tokenizing FineWeb-Edu (~3B tokens)" |
| python scripts/prepare_data.py --out-dir "$DATA_DIR" --target-tokens 3000000000 |
| fi |
|
|
| echo "[train] launching $CONFIG" |
| python run.py --config "$CONFIG" --data-dir "$DATA_DIR" |
|
|