prism-coder-4b / training /train_4b_v43_swe_patch.sh
dcostenco's picture
Add training/train_4b_v43_swe_patch.sh
bf958b1 verified
#!/bin/bash
# train_4b_v43_swe_patch.sh — SWE-bench surgical patch for prism-coder:4b-v43
# Target: 65% strict → ≥85% strict on swe_bench_test.py
# Fixes: false_positive(4), task_route(3), save_ledger_vs_experience(1),
# search_vs_load(1), verifier_tools(3), knowledge_forget(1), params(10)
set -euo pipefail
TRAINING_DIR="$HOME/synalux-private/prism-training"
LLAMA_CPP=~/llama.cpp
cd "$TRAINING_DIR"
log() { echo "[4b-swe] $(date '+%H:%M:%S') $*"; }
ADAPTER_DIR="/tmp/4b_v43_adapter"
DATA_DIR="/tmp/4b_swe_patch_data"
GGUF_F16="$HOME/prism/training/models/qwen3-4b-v43-swe-f16.gguf"
GGUF_Q4="$HOME/prism/training/models/qwen3-4b-v43-swe-q4km.gguf"
LOG="$HOME/prism/training/logs/train_4b_swe_patch.log"
mkdir -p "$HOME/prism/training/logs"
mkdir -p "$HOME/prism/training/models/qwen3-4b-v43"
echo "=== train_4b_v43_swe_patch ===" | tee "$LOG"
echo "Start: $(date)" | tee -a "$LOG"
log "Step 1: Build SWE-bench patch corpus (swe_patch)..."
python3 build_4b_v43_swe_patch.py 2>&1 | tee -a "$LOG"
log "Step 1b: Build patch4 corpus and merge..."
python3 build_4b_v43_patch4.py 2>&1 | tee -a "$LOG"
python3 combine_4b_swe_corpus.py 2>&1 | tee -a "$LOG"
TRAIN_ROWS=$(wc -l < "$DATA_DIR/train.jsonl")
VALID_ROWS=$(wc -l < "$DATA_DIR/valid.jsonl")
log "Corpus: train=$TRAIN_ROWS valid=$VALID_ROWS"
[ "$TRAIN_ROWS" -ge 80 ] || { log "ERROR: Too few training rows ($TRAIN_ROWS)"; exit 1; }
log "Step 2: MLX LoRA fine-tune (resume from current 4b-v43 adapter)..."
mlx_lm.lora \
--model "Qwen/Qwen3-4B" \
--train \
--data "$DATA_DIR" \
--adapter-path "$ADAPTER_DIR" \
--resume-adapter-file "$ADAPTER_DIR/adapters.safetensors" \
--num-layers 16 \
--batch-size 2 \
--grad-checkpoint \
--iters 300 \
--val-batches 10 \
--learning-rate 3e-5 \
--steps-per-report 10 \
--steps-per-eval 100 \
--save-every 150 \
--max-seq-length 2048 \
--seed 2031 \
2>&1 | tee -a "$LOG"
log "Training complete. Merging..."
log "Step 3: Merge LoRA + convert to GGUF..."
python3 merge_4b_v43.py \
--base /Users/admin/.cache/huggingface/hub/models--Qwen--Qwen3-4B/snapshots/1cfa9a7208912126459214e8b04321603b3df60c \
--adapter "$ADAPTER_DIR" \
--out /tmp/4b_swe_merged 2>&1 | tee -a "$LOG"
python3 "$LLAMA_CPP/convert_hf_to_gguf.py" \
"/tmp/4b_swe_merged" \
--outfile "$GGUF_F16" \
--outtype f16 2>&1 | tee -a "$LOG" | tail -5
[ -f "$GGUF_F16" ] || { log "ERROR: F16 GGUF not produced"; exit 1; }
log "F16 GGUF: $(ls -lh $GGUF_F16)"
rm -rf /tmp/4b_swe_merged
"$LLAMA_CPP/build/bin/llama-quantize" "$GGUF_F16" "$GGUF_Q4" Q4_K_M 2>&1 | tee -a "$LOG" | tail -3
log "Q4KM: $(ls -lh $GGUF_Q4)"
rm -f "$GGUF_F16"
log "Step 4: Register in Ollama as prism-coder:4b-v43p4..."
ollama rm prism-coder:4b-v43p4 2>/dev/null || true
MODELFILE=$(mktemp)
cat > "$MODELFILE" << 'MEOF'
FROM /Users/admin/prism/training/models/qwen3-4b-v43-q4km.gguf
PARAMETER temperature 0
PARAMETER num_ctx 8192
PARAMETER num_predict 256
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|endoftext|>"
MEOF
ollama create prism-coder:4b-v43p4 -f "$MODELFILE" 2>&1 | tee -a "$LOG"
rm "$MODELFILE"
log "Step 5: SWE-bench eval..."
python3 swe_bench_test.py 2>&1 | tee "$HOME/prism/training/logs/swe_4b_v43p4.log"
STRICT=$(grep "Strict Pass:" "$HOME/prism/training/logs/swe_4b_v43p4.log" | grep -oE '[0-9]+%' | head -1)
log "SWE-bench strict: ${STRICT:--}"
log "Step 6: BFCL eval (gate: ≥90%)..."
python3 bfcl_eval.py --model prism-coder:4b-v43p4 2>&1 | tee "$HOME/prism/training/logs/bfcl_4b_v43p4.log"
MEAN=$(grep "^Mean:\|Overall:" "$HOME/prism/training/logs/bfcl_4b_v43p4.log" | tail -1 | grep -oE '[0-9]+\.[0-9]+' | head -1)
log "BFCL Mean: ${MEAN:--}%"
echo ""
echo "=== DONE: $(date) ===" | tee -a "$LOG"
echo ""
echo "Results:"
echo " SWE-bench: $STRICT"
echo " BFCL: ${MEAN:-?}%"
echo ""
echo "If BFCL ≥90% AND SWE strict improved:"
echo " ollama tag prism-coder:4b-v43p4 prism-coder:4b"