#!/usr/bin/env bash
# Long-training run for full-architecture completion attempt.
#
# The 5-minute autoresearch budget is for mutation screening — it's nowhere
# near enough compute for this small model (~6M params) to produce coherent
# English. This script runs the SAME full-architecture train.py with an
# extended budget so the "factual English" completion criterion can actually
# be tested end-to-end.
#
# Usage:
#   ./scripts/long_train.sh            # default 1-hour budget
#   HYDRA_TIME_BUDGET=7200 ./scripts/long_train.sh   # 2 hours
#   HYDRA_D_MODEL=384 HYDRA_N_LAYER=6 ./scripts/long_train.sh   # scale model
#
# Output: run_long_<timestamp>.log in repo root. Includes factual_english_score.
set -euo pipefail

cd "$(dirname "$0")/.."

TIME_BUDGET="${HYDRA_TIME_BUDGET:-3600}"
STAMP="$(date +%Y%m%d_%H%M%S)"
LOG="run_long_${STAMP}.log"

export HYDRA_TIME_BUDGET="${TIME_BUDGET}"

echo "=== HYDRA long-training run ==="
echo "time_budget:  ${TIME_BUDGET}s ($((TIME_BUDGET / 60))m)"
echo "d_model:      ${HYDRA_D_MODEL:-256 (default)}"
echo "n_layer:      ${HYDRA_N_LAYER:-4 (default)}"
echo "d_state:      ${HYDRA_D_STATE:-64 (default)}"
echo "log:          ${LOG}"
echo

.venv/bin/python train.py 2>&1 | tee "${LOG}"

echo
echo "=== Summary ==="
grep -E "^val_bpb:|^factual_english_score:|^factual_english_hits:|^peak_vram_mb:|^num_steps:" "${LOG}"