File size: 879 Bytes
9477b5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/usr/bin/env bash
# Launch the BLT-Reasoner pilot on the box. Assumes code is at /home/ubuntu/blt_reasoner/.
set -euo pipefail

cd /home/ubuntu
export TOKENIZERS_PARALLELISM=false
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
export HF_HUB_DISABLE_PROGRESS_BARS=1
# Reduce fragmentation in the long-running K-curriculum job (autograd graph
# + KV cache create many small allocations); expandable_segments lets the
# allocator coalesce returned blocks instead of OOMing at later K.
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
mkdir -p /home/ubuntu/work/blt_pilot1

LOG=/home/ubuntu/work/blt_pilot1/run.log

nohup python3 -u -m experiments.blt_reasoner.train \
    --config /home/ubuntu/experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json \
    >> "$LOG" 2>&1 &
PID=$!
echo "$PID" > /home/ubuntu/work/blt_pilot1/run.pid
echo "Launched BLT pilot pid=$PID log=$LOG"