#!/bin/bash # train_4b_v43_swe_patch.sh — SWE-bench surgical patch for prism-coder:4b-v43 # Target: 65% strict → ≥85% strict on swe_bench_test.py # Fixes: false_positive(4), task_route(3), save_ledger_vs_experience(1), # search_vs_load(1), verifier_tools(3), knowledge_forget(1), params(10) set -euo pipefail TRAINING_DIR="$HOME/synalux-private/prism-training" LLAMA_CPP=~/llama.cpp cd "$TRAINING_DIR" log() { echo "[4b-swe] $(date '+%H:%M:%S') $*"; } ADAPTER_DIR="/tmp/4b_v43_adapter" DATA_DIR="/tmp/4b_swe_patch_data" GGUF_F16="$HOME/prism/training/models/qwen3-4b-v43-swe-f16.gguf" GGUF_Q4="$HOME/prism/training/models/qwen3-4b-v43-swe-q4km.gguf" LOG="$HOME/prism/training/logs/train_4b_swe_patch.log" mkdir -p "$HOME/prism/training/logs" mkdir -p "$HOME/prism/training/models/qwen3-4b-v43" echo "=== train_4b_v43_swe_patch ===" | tee "$LOG" echo "Start: $(date)" | tee -a "$LOG" log "Step 1: Build SWE-bench patch corpus (swe_patch)..." python3 build_4b_v43_swe_patch.py 2>&1 | tee -a "$LOG" log "Step 1b: Build patch4 corpus and merge..." python3 build_4b_v43_patch4.py 2>&1 | tee -a "$LOG" python3 combine_4b_swe_corpus.py 2>&1 | tee -a "$LOG" TRAIN_ROWS=$(wc -l < "$DATA_DIR/train.jsonl") VALID_ROWS=$(wc -l < "$DATA_DIR/valid.jsonl") log "Corpus: train=$TRAIN_ROWS valid=$VALID_ROWS" [ "$TRAIN_ROWS" -ge 80 ] || { log "ERROR: Too few training rows ($TRAIN_ROWS)"; exit 1; } log "Step 2: MLX LoRA fine-tune (resume from current 4b-v43 adapter)..." mlx_lm.lora \ --model "Qwen/Qwen3-4B" \ --train \ --data "$DATA_DIR" \ --adapter-path "$ADAPTER_DIR" \ --resume-adapter-file "$ADAPTER_DIR/adapters.safetensors" \ --num-layers 16 \ --batch-size 2 \ --grad-checkpoint \ --iters 300 \ --val-batches 10 \ --learning-rate 3e-5 \ --steps-per-report 10 \ --steps-per-eval 100 \ --save-every 150 \ --max-seq-length 2048 \ --seed 2031 \ 2>&1 | tee -a "$LOG" log "Training complete. Merging..." log "Step 3: Merge LoRA + convert to GGUF..." python3 merge_4b_v43.py \ --base /Users/admin/.cache/huggingface/hub/models--Qwen--Qwen3-4B/snapshots/1cfa9a7208912126459214e8b04321603b3df60c \ --adapter "$ADAPTER_DIR" \ --out /tmp/4b_swe_merged 2>&1 | tee -a "$LOG" python3 "$LLAMA_CPP/convert_hf_to_gguf.py" \ "/tmp/4b_swe_merged" \ --outfile "$GGUF_F16" \ --outtype f16 2>&1 | tee -a "$LOG" | tail -5 [ -f "$GGUF_F16" ] || { log "ERROR: F16 GGUF not produced"; exit 1; } log "F16 GGUF: $(ls -lh $GGUF_F16)" rm -rf /tmp/4b_swe_merged "$LLAMA_CPP/build/bin/llama-quantize" "$GGUF_F16" "$GGUF_Q4" Q4_K_M 2>&1 | tee -a "$LOG" | tail -3 log "Q4KM: $(ls -lh $GGUF_Q4)" rm -f "$GGUF_F16" log "Step 4: Register in Ollama as prism-coder:4b-v43p4..." ollama rm prism-coder:4b-v43p4 2>/dev/null || true MODELFILE=$(mktemp) cat > "$MODELFILE" << 'MEOF' FROM /Users/admin/prism/training/models/qwen3-4b-v43-q4km.gguf PARAMETER temperature 0 PARAMETER num_ctx 8192 PARAMETER num_predict 256 PARAMETER stop "<|im_end|>" PARAMETER stop "<|endoftext|>" MEOF ollama create prism-coder:4b-v43p4 -f "$MODELFILE" 2>&1 | tee -a "$LOG" rm "$MODELFILE" log "Step 5: SWE-bench eval..." python3 swe_bench_test.py 2>&1 | tee "$HOME/prism/training/logs/swe_4b_v43p4.log" STRICT=$(grep "Strict Pass:" "$HOME/prism/training/logs/swe_4b_v43p4.log" | grep -oE '[0-9]+%' | head -1) log "SWE-bench strict: ${STRICT:--}" log "Step 6: BFCL eval (gate: ≥90%)..." python3 bfcl_eval.py --model prism-coder:4b-v43p4 2>&1 | tee "$HOME/prism/training/logs/bfcl_4b_v43p4.log" MEAN=$(grep "^Mean:\|Overall:" "$HOME/prism/training/logs/bfcl_4b_v43p4.log" | tail -1 | grep -oE '[0-9]+\.[0-9]+' | head -1) log "BFCL Mean: ${MEAN:--}%" echo "" echo "=== DONE: $(date) ===" | tee -a "$LOG" echo "" echo "Results:" echo " SWE-bench: $STRICT" echo " BFCL: ${MEAN:-?}%" echo "" echo "If BFCL ≥90% AND SWE strict improved:" echo " ollama tag prism-coder:4b-v43p4 prism-coder:4b"