#!/usr/bin/env bash # Long-training run for full-architecture completion attempt. # # The 5-minute autoresearch budget is for mutation screening — it's nowhere # near enough compute for this small model (~6M params) to produce coherent # English. This script runs the SAME full-architecture train.py with an # extended budget so the "factual English" completion criterion can actually # be tested end-to-end. # # Usage: # ./scripts/long_train.sh # default 1-hour budget # HYDRA_TIME_BUDGET=7200 ./scripts/long_train.sh # 2 hours # HYDRA_D_MODEL=384 HYDRA_N_LAYER=6 ./scripts/long_train.sh # scale model # # Output: run_long_.log in repo root. Includes factual_english_score. set -euo pipefail cd "$(dirname "$0")/.." TIME_BUDGET="${HYDRA_TIME_BUDGET:-3600}" STAMP="$(date +%Y%m%d_%H%M%S)" LOG="run_long_${STAMP}.log" export HYDRA_TIME_BUDGET="${TIME_BUDGET}" echo "=== HYDRA long-training run ===" echo "time_budget: ${TIME_BUDGET}s ($((TIME_BUDGET / 60))m)" echo "d_model: ${HYDRA_D_MODEL:-256 (default)}" echo "n_layer: ${HYDRA_N_LAYER:-4 (default)}" echo "d_state: ${HYDRA_D_STATE:-64 (default)}" echo "log: ${LOG}" echo .venv/bin/python train.py 2>&1 | tee "${LOG}" echo echo "=== Summary ===" grep -E "^val_bpb:|^factual_english_score:|^factual_english_hits:|^peak_vram_mb:|^num_steps:" "${LOG}"