feather-runtime / overlay /scripts /long_train.sh
Jackoatmon's picture
Normalize shell line endings in Docker build
6618931 verified
#!/usr/bin/env bash
# Long-training run for full-architecture completion attempt.
#
# The 5-minute autoresearch budget is for mutation screening — it's nowhere
# near enough compute for this small model (~6M params) to produce coherent
# English. This script runs the SAME full-architecture train.py with an
# extended budget so the "factual English" completion criterion can actually
# be tested end-to-end.
#
# Usage:
# ./scripts/long_train.sh # default 1-hour budget
# HYDRA_TIME_BUDGET=7200 ./scripts/long_train.sh # 2 hours
# HYDRA_D_MODEL=384 HYDRA_N_LAYER=6 ./scripts/long_train.sh # scale model
#
# Output: run_long_<timestamp>.log in repo root. Includes factual_english_score.
set -euo pipefail
cd "$(dirname "$0")/.."
TIME_BUDGET="${HYDRA_TIME_BUDGET:-3600}"
STAMP="$(date +%Y%m%d_%H%M%S)"
LOG="run_long_${STAMP}.log"
export HYDRA_TIME_BUDGET="${TIME_BUDGET}"
echo "=== HYDRA long-training run ==="
echo "time_budget: ${TIME_BUDGET}s ($((TIME_BUDGET / 60))m)"
echo "d_model: ${HYDRA_D_MODEL:-256 (default)}"
echo "n_layer: ${HYDRA_N_LAYER:-4 (default)}"
echo "d_state: ${HYDRA_D_STATE:-64 (default)}"
echo "log: ${LOG}"
echo
.venv/bin/python train.py 2>&1 | tee "${LOG}"
echo
echo "=== Summary ==="
grep -E "^val_bpb:|^factual_english_score:|^factual_english_hits:|^peak_vram_mb:|^num_steps:" "${LOG}"