dcostenco commited on
Commit
bf958b1
·
verified ·
1 Parent(s): aba10db

Add training/train_4b_v43_swe_patch.sh

Browse files
Files changed (1) hide show
  1. training/train_4b_v43_swe_patch.sh +111 -0
training/train_4b_v43_swe_patch.sh ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # train_4b_v43_swe_patch.sh — SWE-bench surgical patch for prism-coder:4b-v43
3
+ # Target: 65% strict → ≥85% strict on swe_bench_test.py
4
+ # Fixes: false_positive(4), task_route(3), save_ledger_vs_experience(1),
5
+ # search_vs_load(1), verifier_tools(3), knowledge_forget(1), params(10)
6
+ set -euo pipefail
7
+
8
+ TRAINING_DIR="$HOME/synalux-private/prism-training"
9
+ LLAMA_CPP=~/llama.cpp
10
+ cd "$TRAINING_DIR"
11
+
12
+ log() { echo "[4b-swe] $(date '+%H:%M:%S') $*"; }
13
+
14
+ ADAPTER_DIR="/tmp/4b_v43_adapter"
15
+ DATA_DIR="/tmp/4b_swe_patch_data"
16
+ GGUF_F16="$HOME/prism/training/models/qwen3-4b-v43-swe-f16.gguf"
17
+ GGUF_Q4="$HOME/prism/training/models/qwen3-4b-v43-swe-q4km.gguf"
18
+ LOG="$HOME/prism/training/logs/train_4b_swe_patch.log"
19
+
20
+ mkdir -p "$HOME/prism/training/logs"
21
+ mkdir -p "$HOME/prism/training/models/qwen3-4b-v43"
22
+ echo "=== train_4b_v43_swe_patch ===" | tee "$LOG"
23
+ echo "Start: $(date)" | tee -a "$LOG"
24
+
25
+ log "Step 1: Build SWE-bench patch corpus (swe_patch)..."
26
+ python3 build_4b_v43_swe_patch.py 2>&1 | tee -a "$LOG"
27
+
28
+ log "Step 1b: Build patch4 corpus and merge..."
29
+ python3 build_4b_v43_patch4.py 2>&1 | tee -a "$LOG"
30
+ python3 combine_4b_swe_corpus.py 2>&1 | tee -a "$LOG"
31
+
32
+ TRAIN_ROWS=$(wc -l < "$DATA_DIR/train.jsonl")
33
+ VALID_ROWS=$(wc -l < "$DATA_DIR/valid.jsonl")
34
+ log "Corpus: train=$TRAIN_ROWS valid=$VALID_ROWS"
35
+
36
+ [ "$TRAIN_ROWS" -ge 80 ] || { log "ERROR: Too few training rows ($TRAIN_ROWS)"; exit 1; }
37
+
38
+ log "Step 2: MLX LoRA fine-tune (resume from current 4b-v43 adapter)..."
39
+ mlx_lm.lora \
40
+ --model "Qwen/Qwen3-4B" \
41
+ --train \
42
+ --data "$DATA_DIR" \
43
+ --adapter-path "$ADAPTER_DIR" \
44
+ --resume-adapter-file "$ADAPTER_DIR/adapters.safetensors" \
45
+ --num-layers 16 \
46
+ --batch-size 2 \
47
+ --grad-checkpoint \
48
+ --iters 300 \
49
+ --val-batches 10 \
50
+ --learning-rate 3e-5 \
51
+ --steps-per-report 10 \
52
+ --steps-per-eval 100 \
53
+ --save-every 150 \
54
+ --max-seq-length 2048 \
55
+ --seed 2031 \
56
+ 2>&1 | tee -a "$LOG"
57
+
58
+ log "Training complete. Merging..."
59
+
60
+ log "Step 3: Merge LoRA + convert to GGUF..."
61
+ python3 merge_4b_v43.py \
62
+ --base /Users/admin/.cache/huggingface/hub/models--Qwen--Qwen3-4B/snapshots/1cfa9a7208912126459214e8b04321603b3df60c \
63
+ --adapter "$ADAPTER_DIR" \
64
+ --out /tmp/4b_swe_merged 2>&1 | tee -a "$LOG"
65
+
66
+ python3 "$LLAMA_CPP/convert_hf_to_gguf.py" \
67
+ "/tmp/4b_swe_merged" \
68
+ --outfile "$GGUF_F16" \
69
+ --outtype f16 2>&1 | tee -a "$LOG" | tail -5
70
+
71
+ [ -f "$GGUF_F16" ] || { log "ERROR: F16 GGUF not produced"; exit 1; }
72
+ log "F16 GGUF: $(ls -lh $GGUF_F16)"
73
+ rm -rf /tmp/4b_swe_merged
74
+
75
+ "$LLAMA_CPP/build/bin/llama-quantize" "$GGUF_F16" "$GGUF_Q4" Q4_K_M 2>&1 | tee -a "$LOG" | tail -3
76
+ log "Q4KM: $(ls -lh $GGUF_Q4)"
77
+ rm -f "$GGUF_F16"
78
+
79
+ log "Step 4: Register in Ollama as prism-coder:4b-v43p4..."
80
+ ollama rm prism-coder:4b-v43p4 2>/dev/null || true
81
+ MODELFILE=$(mktemp)
82
+ cat > "$MODELFILE" << 'MEOF'
83
+ FROM /Users/admin/prism/training/models/qwen3-4b-v43-q4km.gguf
84
+ PARAMETER temperature 0
85
+ PARAMETER num_ctx 8192
86
+ PARAMETER num_predict 256
87
+ PARAMETER stop "<|im_end|>"
88
+ PARAMETER stop "<|endoftext|>"
89
+ MEOF
90
+ ollama create prism-coder:4b-v43p4 -f "$MODELFILE" 2>&1 | tee -a "$LOG"
91
+ rm "$MODELFILE"
92
+
93
+ log "Step 5: SWE-bench eval..."
94
+ python3 swe_bench_test.py 2>&1 | tee "$HOME/prism/training/logs/swe_4b_v43p4.log"
95
+ STRICT=$(grep "Strict Pass:" "$HOME/prism/training/logs/swe_4b_v43p4.log" | grep -oE '[0-9]+%' | head -1)
96
+ log "SWE-bench strict: ${STRICT:--}"
97
+
98
+ log "Step 6: BFCL eval (gate: ≥90%)..."
99
+ python3 bfcl_eval.py --model prism-coder:4b-v43p4 2>&1 | tee "$HOME/prism/training/logs/bfcl_4b_v43p4.log"
100
+ MEAN=$(grep "^Mean:\|Overall:" "$HOME/prism/training/logs/bfcl_4b_v43p4.log" | tail -1 | grep -oE '[0-9]+\.[0-9]+' | head -1)
101
+ log "BFCL Mean: ${MEAN:--}%"
102
+
103
+ echo ""
104
+ echo "=== DONE: $(date) ===" | tee -a "$LOG"
105
+ echo ""
106
+ echo "Results:"
107
+ echo " SWE-bench: $STRICT"
108
+ echo " BFCL: ${MEAN:-?}%"
109
+ echo ""
110
+ echo "If BFCL ≥90% AND SWE strict improved:"
111
+ echo " ollama tag prism-coder:4b-v43p4 prism-coder:4b"