File size: 2,391 Bytes
6618931
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env bash
# Local sequential depth sweep on RTX 3060.
# Uses real mamba_ssm Mamba3 (grafted from state-spaces/mamba main).
# Config: Gen 76 local champion (d_model=96, engram=4096, target_active=327),
# sweeping n_layer ∈ {1, 2, 3, 4}. Each run 300s (~5 min) → ~20 min total.

set -euo pipefail
cd "$(dirname "${BASH_SOURCE[0]}")/.."

export CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
# WSL2: libcuda.so.1 lives at /usr/lib/wsl/lib; prepend it so cudarc finds the
# CUDA driver library at runtime.
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:/usr/lib/wsl/lib:${LD_LIBRARY_PATH:-}
export PYTORCH_ALLOC_CONF=expandable_segments:True

# GPU HTM path: use non-fused step_many_cuda (fused megakernel is Hopper-only).
# This drops htm_await from ~20-40s/step (CPU) to ~0ms (GPU, async).
export HYDRA_HTM_FUSED=0

# Architecture (Gen 76 + user audit: keep target_active=327 for gradient plasticity).
export HYDRA_D_MODEL=96
export HYDRA_D_STATE=16
export HYDRA_HEADDIM=12
export HYDRA_EXPAND=3
export HYDRA_ENGRAM_N_COLUMNS=4096
export HYDRA_SDR_TARGET_ACTIVE=327

# Training knobs tuned for 6GB VRAM.
export HYDRA_BATCH_SIZE=1
export HYDRA_TOTAL_BATCH=32768        # 1 * 8 accum * 512 seq * 8 heads = Gen 76 config
export HYDRA_TIME_BUDGET=300          # 5 min per run
export HYDRA_CKPT_INTERVAL=0          # don't save ckpts during sweep
export HYDRA_MID_VAL_INTERVAL=250

# Full per-layer diagnostic panel.
export HYDRA_LAYER_DIAGNOSTICS=1
export HYDRA_LAYER_DIAG_SVD_EVERY=100

# Use cached shards + tokenizer + retina (vocab=8192, target_active=327).
# NOT streaming — already have 2049 shards from prior local runs.
unset HYDRA_USE_NEMOTRON

PY=/home/mikeb/work/feather/.venv/bin/python3
OUT_DIR=/tmp/local_sweep
mkdir -p "$OUT_DIR"

for N in 1 2 3 4; do
    echo "=========================================="
    echo "=== n_layer=$N  $(date +%H:%M:%S) ==="
    echo "=========================================="
    export HYDRA_N_LAYER=$N
    export HYDRA_METRICS_OUT="$OUT_DIR/sweep_n${N}_metrics.json"
    LOG="$OUT_DIR/sweep_n${N}.log"
    "$PY" -u train.py > "$LOG" 2>&1 || echo "[WARN] n_layer=$N run exited non-zero (see $LOG)"
    echo "=== n_layer=$N done; metrics=$HYDRA_METRICS_OUT log=$LOG ==="
    # Quick tail of the important lines
    grep -E "val_bpb|LAYER_DIAG|METRICS_JSON" "$LOG" | tail -20 || true
done

echo ""
echo "=== SWEEP COMPLETE ==="
ls -la "$OUT_DIR"