dcostenco commited on
Commit
aba10db
Β·
verified Β·
1 Parent(s): f04e0b1

Add training/train_4b_v43_local.sh

Browse files
Files changed (1) hide show
  1. training/train_4b_v43_local.sh +64 -0
training/train_4b_v43_local.sh ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # train_4b_v43_local.sh β€” mlx_lm.lora SFT for Prism Coder 4B v43 on Apple Silicon
3
+ # Prereq: python3 build_4b_v43_corpus.py (creates /tmp/4b_v43_data/train.jsonl + valid.jsonl)
4
+ # v43.2 patch: resumed from 1400-iter checkpoint with surgical corpus patch
5
+ # (+2075 rows: param-precise, multi-turn chain, abstain, disambiguation)
6
+ set -euo pipefail
7
+
8
+ MODEL="Qwen/Qwen3-4B"
9
+ DATA_DIR="/tmp/4b_v43_data"
10
+ ADAPTER_DIR="/tmp/4b_v43_adapter"
11
+ LOG="/tmp/4b_v43_train.log"
12
+
13
+ # ── Sanity checks ────────────────────────────────────────────────────────────
14
+ if [ ! -f "$DATA_DIR/train.jsonl" ]; then
15
+ echo "ERROR: $DATA_DIR/train.jsonl not found."
16
+ echo "Run: python3 build_4b_v43_corpus.py"
17
+ exit 1
18
+ fi
19
+
20
+ TRAIN_ROWS=$(wc -l < "$DATA_DIR/train.jsonl")
21
+ VALID_ROWS=$(wc -l < "$DATA_DIR/valid.jsonl")
22
+ echo "=== Prism Coder 4B v43 β€” Local MLX Training ==="
23
+ echo " Model: $MODEL"
24
+ echo " Train rows: $TRAIN_ROWS"
25
+ echo " Valid rows: $VALID_ROWS"
26
+ echo " Adapter out: $ADAPTER_DIR"
27
+ echo " Log: $LOG"
28
+ echo ""
29
+
30
+ # ── Hyperparameters ──────────────────────────────────────────────────────────
31
+ # r=32, alpha=64 (scale=2.0), 16 LoRA layers, seq_len=2048
32
+ # Effective batch = batch_size(4) Γ— grad_accumulation(4) = 16
33
+ # iters=2000 β‰ˆ 0.35 epochs over ~23k train rows
34
+ # LR 1e-4 cosine β€” same family as v36 1.7B (5e-6 was after warmup; 1e-4 with cosine suits 4B fresh run)
35
+ # val every 100 steps, save every 200 steps
36
+ # Est. time on M5 Max: 3-5h
37
+
38
+ mkdir -p "$ADAPTER_DIR"
39
+
40
+ mlx_lm.lora \
41
+ --model "$MODEL" \
42
+ --train \
43
+ --data "$DATA_DIR" \
44
+ --adapter-path "$ADAPTER_DIR" \
45
+ --resume-adapter-file "$ADAPTER_DIR/adapters.safetensors" \
46
+ --num-layers 16 \
47
+ --batch-size 2 \
48
+ --grad-checkpoint \
49
+ --iters 250 \
50
+ --val-batches 25 \
51
+ --learning-rate 3e-5 \
52
+ --steps-per-report 10 \
53
+ --steps-per-eval 100 \
54
+ --save-every 200 \
55
+ --max-seq-length 2048 \
56
+ --clear-cache-threshold 0.7 \
57
+ --seed 2027 \
58
+ 2>&1 | tee -a "$LOG"
59
+
60
+ echo ""
61
+ echo "βœ… Training complete. Adapter at: $ADAPTER_DIR"
62
+ echo "Val loss should be checked in $LOG before merging."
63
+ echo ""
64
+ echo "Next: python3 merge_4b_v43.py (then re-eval: python3 bfcl_eval.py --model prism-coder:4b-v43)"