dcostenco commited on
Commit
79a2a50
Β·
verified Β·
1 Parent(s): 18db54d

Add training/orchestrate_4b_to_100.sh

Browse files
Files changed (1) hide show
  1. training/orchestrate_4b_to_100.sh +270 -0
training/orchestrate_4b_to_100.sh ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # orchestrate_4b_to_100.sh β€” Drive prism-coder:4b to β‰₯90% strict on eval_300.py.
3
+ #
4
+ # Pipeline per round:
5
+ # 1. Build patch corpus (analyze_swe_failures.py on previous eval300 report)
6
+ # 2. MLX LoRA fine-tune (resume from current adapter, decreasing LR)
7
+ # 3. Merge β†’ GGUF F16 β†’ Q4_K_M β†’ Ollama register
8
+ # 4. eval_300.py (300-case unified eval) β†’ check strict% gate
9
+ # 5. Loop or promote (single gate, no separate BFCL check)
10
+ #
11
+ # Pre-condition: SWE swe1 patch is already running (or has been applied).
12
+ # This script starts from whatever the CURRENT adapter+Ollama state is,
13
+ # runs a baseline SWE eval, then patches until β‰₯95%.
14
+
15
+ set -euo pipefail
16
+ TRAINING_DIR="$HOME/synalux-private/prism-training"
17
+ LLAMA_CPP=~/llama.cpp
18
+ cd "$TRAINING_DIR"
19
+
20
+ log() { echo "[4b-orch] $(date '+%H:%M:%S') $*" | tee -a "$ORCH_LOG"; }
21
+ fail() { log "FATAL: $*"; exit 1; }
22
+
23
+ # ── Config ──────────────────────────────────────────────────────────────────
24
+ ADAPTER_DIR="/tmp/4b_v43_adapter"
25
+ BASE_GGUF_DIR="$HOME/prism/training/models"
26
+ ORCH_LOG="$HOME/prism/training/logs/orch_4b_to_100.log"
27
+ EVAL_GATE=0.90 # 90% strict target on eval_300.py unified eval
28
+ MAX_ROUNDS=20
29
+ # Resumption: set >0 to skip STAGE 1 and start main loop at this round
30
+ RESUME_ROUND=${RESUME_ROUND:-0}
31
+ RESUME_STRICT=${RESUME_STRICT:-0}
32
+ RESUME_TAG=${RESUME_TAG:-""}
33
+
34
+ # LR schedule β€” rounds 17-20 use 5e-6/200 (stable regime for final polish)
35
+ LRS=(3e-5 2e-5 1.5e-5 1e-5 8e-6 5e-6 8e-6 5e-6 8e-6 5e-6 5e-6 5e-6 5e-6 5e-6 8e-6 5e-6 5e-6 5e-6 5e-6 5e-6)
36
+ ITERS=(300 250 200 150 120 100 150 150 200 200 200 200 200 200 300 200 200 200 200 200)
37
+
38
+ mkdir -p "$HOME/prism/training/logs"
39
+ echo "=== orchestrate_4b_to_100 start ===" | tee "$ORCH_LOG"
40
+ echo "Start: $(date)" | tee -a "$ORCH_LOG"
41
+
42
+ # ── Helpers ──────────────────────────────────────────────────────────────────
43
+ wait_for_process() {
44
+ local pid=$1 desc=$2
45
+ log "Waiting for $desc (PID $pid) to finish..."
46
+ while kill -0 "$pid" 2>/dev/null; do sleep 20; done
47
+ log "$desc finished."
48
+ }
49
+
50
+ eval300_strict_pct() {
51
+ local report=$1
52
+ python3 -c "
53
+ import json, sys
54
+ r = json.load(open('$report'))
55
+ print(f\"{r['summary']['strict_pct']*100:.1f}\")
56
+ " 2>/dev/null || echo "0"
57
+ }
58
+
59
+ merge_and_register() {
60
+ local tag=$1 gguf_q4=$2
61
+ log "Merging LoRA β†’ GGUF β†’ Ollama ($tag)..."
62
+ local gguf_f16="${gguf_q4/q4km/f16}"
63
+ local merged_dir="/tmp/4b_merged_${tag}"
64
+
65
+ python3 merge_4b_v43.py \
66
+ --base /Users/admin/.cache/huggingface/hub/models--Qwen--Qwen3-4B/snapshots/1cfa9a7208912126459214e8b04321603b3df60c \
67
+ --adapter "$ADAPTER_DIR" \
68
+ --out "$merged_dir" 2>&1 | tee -a "$ORCH_LOG"
69
+
70
+ python3 "$LLAMA_CPP/convert_hf_to_gguf.py" \
71
+ "$merged_dir" --outfile "$gguf_f16" --outtype f16 \
72
+ 2>&1 | tee -a "$ORCH_LOG" | tail -3
73
+ [ -f "$gguf_f16" ] || fail "F16 GGUF not produced for $tag"
74
+
75
+ "$LLAMA_CPP/build/bin/llama-quantize" "$gguf_f16" "$gguf_q4" Q4_K_M \
76
+ 2>&1 | tee -a "$ORCH_LOG" | tail -3
77
+ [ -f "$gguf_q4" ] || fail "Q4KM GGUF not produced for $tag"
78
+
79
+ rm -f "$gguf_f16"
80
+ rm -rf "$merged_dir"
81
+ log "Q4KM: $(ls -lh $gguf_q4)"
82
+
83
+ ollama rm "$tag" 2>/dev/null || true
84
+ MODELFILE=$(mktemp)
85
+ cat > "$MODELFILE" << MEOF
86
+ FROM $gguf_q4
87
+ PARAMETER temperature 0
88
+ PARAMETER num_ctx 8192
89
+ PARAMETER num_predict 256
90
+ PARAMETER stop "<|im_end|>"
91
+ PARAMETER stop "<|endoftext|>"
92
+ MEOF
93
+ ollama create "$tag" -f "$MODELFILE" 2>&1 | tee -a "$ORCH_LOG"
94
+ rm "$MODELFILE"
95
+ log "Registered: $tag"
96
+ }
97
+
98
+ run_eval300() {
99
+ local tag=$1 report=$2
100
+ log "eval_300 eval: $tag..."
101
+ python3 eval_300.py --model "$tag" \
102
+ 2>&1 | tee "${report%.json}.log" || true
103
+ cp results/eval300_report.json "$report"
104
+ }
105
+
106
+ train_patch() {
107
+ local round=$1 data_dir=$2 lr=$3 iters=$4
108
+ log "Training round $round: iters=$iters lr=$lr data=$data_dir..."
109
+ mlx_lm.lora \
110
+ --model "Qwen/Qwen3-4B" \
111
+ --train \
112
+ --data "$data_dir" \
113
+ --adapter-path "$ADAPTER_DIR" \
114
+ --resume-adapter-file "$ADAPTER_DIR/adapters.safetensors" \
115
+ --num-layers 16 \
116
+ --batch-size 2 \
117
+ --grad-checkpoint \
118
+ --iters "$iters" \
119
+ --val-batches 10 \
120
+ --learning-rate "$lr" \
121
+ --steps-per-report 10 \
122
+ --steps-per-eval 100 \
123
+ --save-every 100 \
124
+ --max-seq-length 2048 \
125
+ --seed $((2031 + round)) \
126
+ 2>&1 | tee -a "$HOME/prism/training/logs/train_4b_swe_r${round}.log"
127
+ }
128
+
129
+ # ── STAGE 0: Wait for current SWE patch (swe1) to complete ────────────────
130
+ SWE1_PID=$(pgrep -f "train_4b_v43_swe_patch" || echo "")
131
+ LAUNCH_PID=$(pgrep -f "launch_4b_swe_after_1b7" || echo "")
132
+
133
+ if [ -n "$SWE1_PID" ]; then
134
+ log "SWE patch 1 training still running (PID $SWE1_PID) β€” waiting..."
135
+ wait_for_process "$SWE1_PID" "SWE patch 1 training"
136
+ elif [ -n "$LAUNCH_PID" ]; then
137
+ log "Auto-launcher still running (PID $LAUNCH_PID) β€” waiting..."
138
+ wait_for_process "$LAUNCH_PID" "4B SWE auto-launcher"
139
+ else
140
+ log "No SWE patch 1 process found β€” assuming already complete or not started."
141
+ fi
142
+
143
+ # Short settle time after training
144
+ sleep 10
145
+
146
+ # ── STAGE 1: Baseline SWE after patch1 ────────────────────────────────────
147
+ REPORT_DIR="$HOME/prism/training/results/4b_swe"
148
+ mkdir -p "$REPORT_DIR"
149
+
150
+ if [ "$RESUME_ROUND" -gt 0 ]; then
151
+ log "=== RESUMING from round $RESUME_ROUND (tag: $RESUME_TAG, strict: $RESUME_STRICT%) ==="
152
+ CURRENT_TAG="$RESUME_TAG"
153
+ PREV_REPORT="$REPORT_DIR/eval300_r$((RESUME_ROUND - 1)).json"
154
+ PREV_STRICT="$RESUME_STRICT"
155
+ STRICT="$RESUME_STRICT"
156
+ ROUND="$RESUME_ROUND"
157
+ else
158
+ log "=== STAGE 1: Baseline SWE eval (post-patch1) ==="
159
+
160
+ # Check if prism-coder:4b-swe1 exists (patch1 output tag)
161
+ CURRENT_TAG="prism-coder:4b-swe1"
162
+ if ! ollama list | grep -q "$CURRENT_TAG"; then
163
+ log "prism-coder:4b-swe1 not in Ollama β€” running merge for current adapter..."
164
+ CURRENT_TAG="prism-coder:4b-swe0"
165
+ merge_and_register "$CURRENT_TAG" "$BASE_GGUF_DIR/qwen3-4b-swe0-q4km.gguf"
166
+ fi
167
+
168
+ run_eval300 "$CURRENT_TAG" "$REPORT_DIR/eval300_r0.json"
169
+ STRICT=$(eval300_strict_pct "$REPORT_DIR/eval300_r0.json")
170
+ log "Round 0 eval_300 strict: ${STRICT}%"
171
+
172
+ # Check if already at gate
173
+ if python3 -c "import sys; sys.exit(0 if float('${STRICT}') >= ${EVAL_GATE} * 100 else 1)" 2>/dev/null; then
174
+ log "Already at eval gate (${STRICT}% β‰₯ $(python3 -c "print(${EVAL_GATE}*100)"))!"
175
+ log "Promoting prism-coder:4b..."
176
+ ollama rm prism-coder:4b 2>/dev/null || true
177
+ BEST_GGUF=$(ls -t "$BASE_GGUF_DIR"/qwen3-4b-*q4km.gguf 2>/dev/null | head -1)
178
+ if [ -n "$BEST_GGUF" ]; then
179
+ MODELFILE=$(mktemp)
180
+ printf "FROM %s\nPARAMETER temperature 0\nPARAMETER num_ctx 8192\nPARAMETER num_predict 256\nPARAMETER stop \"<|im_end|>\"\nPARAMETER stop \"<|endoftext|>\"\n" "$BEST_GGUF" > "$MODELFILE"
181
+ ollama create prism-coder:4b -f "$MODELFILE"
182
+ rm "$MODELFILE"
183
+ fi
184
+ log "SUCCESS: prism-coder:4b promoted from $CURRENT_TAG"
185
+ exit 0
186
+ fi
187
+
188
+ # ── MAIN LOOP ─────────────────────────────────────────────────────────────
189
+ PREV_REPORT="$REPORT_DIR/eval300_r0.json"
190
+ PREV_STRICT="$STRICT"
191
+ ROUND=1
192
+ fi
193
+
194
+ while [ "$ROUND" -le "$MAX_ROUNDS" ]; do
195
+ log "=== ROUND $ROUND ==="
196
+
197
+ LR_IDX=$((ROUND - 1))
198
+ LR="${LRS[$LR_IDX]:-5e-6}"
199
+ IT="${ITERS[$LR_IDX]:-100}"
200
+
201
+ # Build patch corpus from previous round's failures
202
+ DATA_DIR="/tmp/4b_swe_r${ROUND}_data"
203
+ log "Building patch corpus from failures in round $((ROUND-1))..."
204
+ python3 analyze_swe_failures.py \
205
+ --report "$PREV_REPORT" \
206
+ --version "r${ROUND}" \
207
+ --out "$DATA_DIR" \
208
+ --seed $((2031 + ROUND)) \
209
+ 2>&1 | tee -a "$ORCH_LOG"
210
+
211
+ TRAIN_ROWS=$(wc -l < "$DATA_DIR/train.jsonl")
212
+ log "Patch corpus: $TRAIN_ROWS train rows"
213
+
214
+ if [ "$TRAIN_ROWS" -lt 20 ]; then
215
+ log "Too few failures to patch ($TRAIN_ROWS rows) β€” stopping loop."
216
+ break
217
+ fi
218
+
219
+ # Train
220
+ train_patch "$ROUND" "$DATA_DIR" "$LR" "$IT"
221
+
222
+ # Merge + register
223
+ TAG="prism-coder:4b-swe${ROUND}"
224
+ GGUF_Q4="$BASE_GGUF_DIR/qwen3-4b-swe${ROUND}-q4km.gguf"
225
+ merge_and_register "$TAG" "$GGUF_Q4"
226
+
227
+ # eval_300 unified eval
228
+ REPORT="$REPORT_DIR/eval300_r${ROUND}.json"
229
+ run_eval300 "$TAG" "$REPORT"
230
+ STRICT=$(eval300_strict_pct "$REPORT")
231
+ log "Round $ROUND eval_300 strict: ${STRICT}% (prev: ${PREV_STRICT}%)"
232
+
233
+ CURRENT_TAG="$TAG"
234
+ PREV_REPORT="$REPORT"
235
+ PREV_STRICT="$STRICT"
236
+
237
+ # Check eval gate
238
+ if python3 -c "import sys; sys.exit(0 if float('${STRICT}') >= ${EVAL_GATE} * 100 else 1)" 2>/dev/null; then
239
+ log "Eval gate PASSED: ${STRICT}% β‰₯ $(python3 -c "print(${EVAL_GATE}*100)")%"
240
+ log "Promoting prism-coder:4b..."
241
+ ollama rm prism-coder:4b 2>/dev/null || true
242
+ MODELFILE=$(mktemp)
243
+ printf "FROM %s\nPARAMETER temperature 0\nPARAMETER num_ctx 8192\nPARAMETER num_predict 256\nPARAMETER stop \"<|im_end|>\"\nPARAMETER stop \"<|endoftext|>\"\n" "$GGUF_Q4" > "$MODELFILE"
244
+ ollama create prism-coder:4b -f "$MODELFILE"
245
+ rm "$MODELFILE"
246
+ log "SUCCESS: prism-coder:4b promoted (eval_300 ${STRICT}%)"
247
+ echo ""
248
+ echo "=== PROMOTED prism-coder:4b ==="
249
+ echo " eval_300 strict: ${STRICT}%"
250
+ echo " Source tag: $CURRENT_TAG"
251
+ echo " GGUF: $GGUF_Q4"
252
+ echo ""
253
+ echo "Next: run 3-seed validation before syncing to Ollama Hub:"
254
+ echo " python3 ~/synalux-private/prism-training/eval_300.py --model prism-coder:4b --runs 3"
255
+ exit 0
256
+ fi
257
+
258
+ ROUND=$((ROUND + 1))
259
+ done
260
+
261
+ # ── Exhausted rounds ─────────────────────────────────────────────────────────
262
+ echo ""
263
+ echo "=== MAX ROUNDS REACHED: $(date) ===" | tee -a "$ORCH_LOG"
264
+ echo "Best: $CURRENT_TAG at SWE ${PREV_STRICT}%" | tee -a "$ORCH_LOG"
265
+ echo "" | tee -a "$ORCH_LOG"
266
+ echo "Manual analysis needed. Remaining failures in: $PREV_REPORT" | tee -a "$ORCH_LOG"
267
+ echo "To continue:" | tee -a "$ORCH_LOG"
268
+ echo " python3 analyze_swe_failures.py --report $PREV_REPORT --version manual" | tee -a "$ORCH_LOG"
269
+ echo " # Review /tmp/4b_swe_manual_data/train.jsonl and add handcrafted examples" | tee -a "$ORCH_LOG"
270
+ echo " # Then re-run this script" | tee -a "$ORCH_LOG"