#!/usr/bin/env bash # Local sequential depth sweep on RTX 3060. # Uses real mamba_ssm Mamba3 (grafted from state-spaces/mamba main). # Config: Gen 76 local champion (d_model=96, engram=4096, target_active=327), # sweeping n_layer ∈ {1, 2, 3, 4}. Each run 300s (~5 min) → ~20 min total. set -euo pipefail cd "$(dirname "${BASH_SOURCE[0]}")/.." export CUDA_HOME=${CUDA_HOME:-/usr/local/cuda} # WSL2: libcuda.so.1 lives at /usr/lib/wsl/lib; prepend it so cudarc finds the # CUDA driver library at runtime. export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:/usr/lib/wsl/lib:${LD_LIBRARY_PATH:-} export PYTORCH_ALLOC_CONF=expandable_segments:True # GPU HTM path: use non-fused step_many_cuda (fused megakernel is Hopper-only). # This drops htm_await from ~20-40s/step (CPU) to ~0ms (GPU, async). export HYDRA_HTM_FUSED=0 # Architecture (Gen 76 + user audit: keep target_active=327 for gradient plasticity). export HYDRA_D_MODEL=96 export HYDRA_D_STATE=16 export HYDRA_HEADDIM=12 export HYDRA_EXPAND=3 export HYDRA_ENGRAM_N_COLUMNS=4096 export HYDRA_SDR_TARGET_ACTIVE=327 # Training knobs tuned for 6GB VRAM. export HYDRA_BATCH_SIZE=1 export HYDRA_TOTAL_BATCH=32768 # 1 * 8 accum * 512 seq * 8 heads = Gen 76 config export HYDRA_TIME_BUDGET=300 # 5 min per run export HYDRA_CKPT_INTERVAL=0 # don't save ckpts during sweep export HYDRA_MID_VAL_INTERVAL=250 # Full per-layer diagnostic panel. export HYDRA_LAYER_DIAGNOSTICS=1 export HYDRA_LAYER_DIAG_SVD_EVERY=100 # Use cached shards + tokenizer + retina (vocab=8192, target_active=327). # NOT streaming — already have 2049 shards from prior local runs. unset HYDRA_USE_NEMOTRON PY=/home/mikeb/work/feather/.venv/bin/python3 OUT_DIR=/tmp/local_sweep mkdir -p "$OUT_DIR" for N in 1 2 3 4; do echo "==========================================" echo "=== n_layer=$N $(date +%H:%M:%S) ===" echo "==========================================" export HYDRA_N_LAYER=$N export HYDRA_METRICS_OUT="$OUT_DIR/sweep_n${N}_metrics.json" LOG="$OUT_DIR/sweep_n${N}.log" "$PY" -u train.py > "$LOG" 2>&1 || echo "[WARN] n_layer=$N run exited non-zero (see $LOG)" echo "=== n_layer=$N done; metrics=$HYDRA_METRICS_OUT log=$LOG ===" # Quick tail of the important lines grep -E "val_bpb|LAYER_DIAG|METRICS_JSON" "$LOG" | tail -20 || true done echo "" echo "=== SWEEP COMPLETE ===" ls -la "$OUT_DIR"