OpenCircuit commited on
Commit
2ff5691
·
1 Parent(s): ccd9618

Deploy BF-Router trainer (Qwen3-4B QLoRA + Gradio)

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -0
  2. README.md +10 -5
  3. app.py +253 -0
  4. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
2
+
3
+ RUN apt-get update && apt-get install -y python3 python3-pip git && rm -rf /var/lib/apt/lists/*
4
+
5
+ WORKDIR /app
6
+
7
+ COPY requirements.txt .
8
+ RUN pip3 install --no-cache-dir -r requirements.txt
9
+
10
+ COPY . .
11
+
12
+ EXPOSE 7860
13
+ CMD ["python3", "app.py"]
README.md CHANGED
@@ -1,10 +1,15 @@
1
  ---
2
- title: Bf Router Trainer
3
- emoji: 📚
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
+ title: BF-Router Trainer
3
+ emoji: 🔧
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
+ license: apache-2.0
9
+ app_port: 7860
10
  ---
11
 
12
+ # BF-Router Trainer
13
+
14
+ QLoRA fine-tuning of Qwen3-4B for BlueprintForge intent routing.
15
+ Training starts automatically. Monitor via Gradio UI.
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """BF-Router Trainer Space - QLoRA fine-tuning with live Gradio monitoring."""
3
+ import os, json, time, threading, traceback
4
+ import gradio as gr
5
+
6
+ status = {"state": "initializing", "epoch": 0, "total_epochs": 3, "loss": 0,
7
+ "eval_loss": 0, "progress": 0, "step": 0, "max_steps": 0,
8
+ "log": [], "agent_acc": 0, "tool_acc": 0}
9
+
10
+ def log(msg):
11
+ status["log"].append("[%s] %s" % (time.strftime("%H:%M:%S"), msg))
12
+ print(msg, flush=True)
13
+
14
+ def run_training():
15
+ try:
16
+ import torch
17
+ from datasets import load_dataset
18
+ from transformers import (AutoModelForCausalLM, AutoTokenizer,
19
+ BitsAndBytesConfig, TrainerCallback)
20
+ from peft import LoraConfig, TaskType, PeftModel
21
+ from trl import SFTConfig, SFTTrainer
22
+
23
+ status["state"] = "loading_data"
24
+ log("Loading training data from OpenCircuit/bf-router-training-data...")
25
+ dataset = load_dataset("OpenCircuit/bf-router-training-data", data_dir="data")
26
+ log("Train: %d, Val: %d, Test: %d" % (
27
+ len(dataset["train"]), len(dataset["validation"]), len(dataset["test"])))
28
+
29
+ status["state"] = "loading_model"
30
+ log("Loading Qwen3-4B-Instruct-2507 with 4-bit QLoRA...")
31
+ bnb_config = BitsAndBytesConfig(
32
+ load_in_4bit=True, bnb_4bit_quant_type="nf4",
33
+ bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16)
34
+
35
+ base_model = "Qwen/Qwen3-4B-Instruct-2507"
36
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
37
+ tokenizer.eos_token = "<|im_end|>"
38
+ tokenizer.pad_token = tokenizer.eos_token
39
+ tokenizer.padding_side = "right"
40
+
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ base_model, quantization_config=bnb_config,
43
+ device_map="auto", trust_remote_code=True)
44
+ model.config.use_cache = False
45
+ log("Model loaded: %dM params" % (model.num_parameters() / 1e6))
46
+
47
+ def fmt(s):
48
+ text = tokenizer.apply_chat_template(
49
+ s["messages"], tokenize=False, add_generation_prompt=False)
50
+ return {"text": text}
51
+
52
+ ftrain = dataset["train"].map(fmt, remove_columns=dataset["train"].column_names)
53
+ fval = dataset["validation"].map(fmt, remove_columns=dataset["validation"].column_names)
54
+
55
+ lora_config = LoraConfig(
56
+ task_type=TaskType.CAUSAL_LM, r=16, lora_alpha=32,
57
+ lora_dropout=0.05, bias="none",
58
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
59
+ "gate_proj", "up_proj", "down_proj"])
60
+
61
+ out_dir = "/app/output/bf-router-v0.5"
62
+ args = SFTConfig(
63
+ output_dir=out_dir, num_train_epochs=3,
64
+ per_device_train_batch_size=4, per_device_eval_batch_size=4,
65
+ gradient_accumulation_steps=4, gradient_checkpointing=True,
66
+ gradient_checkpointing_kwargs={"use_reentrant": False},
67
+ optim="adamw_torch_fused", learning_rate=2e-4,
68
+ lr_scheduler_type="cosine", warmup_ratio=0.03,
69
+ max_grad_norm=0.3, weight_decay=0.01, bf16=True,
70
+ max_seq_length=2048, logging_steps=10, logging_first_step=True,
71
+ save_strategy="epoch", eval_strategy="epoch", save_total_limit=3,
72
+ load_best_model_at_end=True, metric_for_best_model="eval_loss",
73
+ greater_is_better=False, report_to="none", seed=42)
74
+
75
+ class StatusCallback(TrainerCallback):
76
+ def on_log(self, a, state, control, logs=None, **kw):
77
+ if logs:
78
+ status["epoch"] = logs.get("epoch", 0)
79
+ status["loss"] = logs.get("loss", logs.get("eval_loss", 0))
80
+ if "eval_loss" in logs:
81
+ status["eval_loss"] = logs["eval_loss"]
82
+ status["step"] = state.global_step
83
+ status["max_steps"] = state.max_steps
84
+ if state.max_steps:
85
+ status["progress"] = state.global_step / state.max_steps * 100
86
+
87
+ status["state"] = "training"
88
+ log("Starting QLoRA fine-tuning (3 epochs, effective batch=16)...")
89
+
90
+ trainer = SFTTrainer(
91
+ model=model, tokenizer=tokenizer, args=args,
92
+ peft_config=lora_config, train_dataset=ftrain,
93
+ eval_dataset=fval, callbacks=[StatusCallback()])
94
+ trainer.train()
95
+
96
+ trainer.save_model(out_dir)
97
+ tokenizer.save_pretrained(out_dir)
98
+ eval_results = trainer.evaluate(fval)
99
+ status["eval_loss"] = eval_results["eval_loss"]
100
+ log("Final eval loss: %.4f" % eval_results["eval_loss"])
101
+
102
+ # Quick accuracy eval
103
+ status["state"] = "evaluating"
104
+ log("Evaluating routing accuracy on test set...")
105
+ correct_agent = 0
106
+ total = 0
107
+ test_subset = dataset["test"].select(range(min(100, len(dataset["test"]))))
108
+ model.eval()
109
+ device = next(model.parameters()).device
110
+
111
+ for sample in test_subset:
112
+ msgs = sample["messages"]
113
+ expected = json.loads(msgs[-1]["content"])
114
+ inp = tokenizer.apply_chat_template(
115
+ msgs[:-1], tokenize=False, add_generation_prompt=True)
116
+ inputs = tokenizer(inp, return_tensors="pt").to(device)
117
+ with torch.no_grad():
118
+ out = model.generate(
119
+ **inputs, max_new_tokens=256, temperature=0.3,
120
+ top_p=0.7, do_sample=True,
121
+ pad_token_id=tokenizer.pad_token_id)
122
+ gen = tokenizer.decode(
123
+ out[0][inputs["input_ids"].shape[1]:],
124
+ skip_special_tokens=True).strip()
125
+ try:
126
+ pred = json.loads(gen)
127
+ if pred.get("agent") == expected.get("agent"):
128
+ correct_agent += 1
129
+ except Exception:
130
+ pass
131
+ total += 1
132
+
133
+ acc = correct_agent / total * 100 if total else 0
134
+ status["agent_acc"] = acc
135
+ log("Agent routing accuracy: %.1f%% (%d/%d)" % (acc, correct_agent, total))
136
+
137
+ # Push to Hub
138
+ hf_token = os.environ.get("HF_TOKEN")
139
+ if hf_token:
140
+ log("Pushing model to OpenCircuit/bf-router...")
141
+ from huggingface_hub import HfApi
142
+ api = HfApi(token=hf_token)
143
+ try:
144
+ api.create_repo("OpenCircuit/bf-router", exist_ok=True)
145
+ except Exception:
146
+ pass
147
+ trainer.push_to_hub(repo_id="OpenCircuit/bf-router", token=hf_token)
148
+ log("Model pushed to Hub!")
149
+
150
+ status["state"] = "complete"
151
+ log("Training complete!")
152
+
153
+ with open(os.path.join(out_dir, "results.json"), "w") as f:
154
+ json.dump({"eval_loss": status["eval_loss"],
155
+ "agent_accuracy": acc, "total_test": total}, f, indent=2)
156
+
157
+ except Exception as e:
158
+ status["state"] = "error"
159
+ status["error"] = str(e)
160
+ log("ERROR: %s" % str(e))
161
+ log(traceback.format_exc())
162
+
163
+
164
+ # Start training in background
165
+ t = threading.Thread(target=run_training, daemon=True)
166
+ t.start()
167
+
168
+
169
+ # Gradio UI
170
+ SYSTEM_PROMPT = (
171
+ 'You are BF-Router, the intent classifier for BlueprintForge. '
172
+ 'Analyze the user\'s message and respond with JSON: '
173
+ '{"agent":"<id>","confidence":<0-1>,"reason":"<why>",'
174
+ '"tools":["<tool1>",...],"chain":[]}. '
175
+ 'Agents: manny (builder), ping (investigator), fuse (debugger), '
176
+ 'bit (planner), mainframe (knowledge), sc (tester), '
177
+ 'willow (human-translator).'
178
+ )
179
+
180
+
181
+ def get_status():
182
+ icons = {
183
+ "initializing": "hourglass", "loading_data": "chart",
184
+ "loading_model": "robot", "training": "fire",
185
+ "evaluating": "magnifier", "complete": "check", "error": "cross"
186
+ }
187
+ state = status["state"]
188
+ md = "## BF-Router Training\n\n"
189
+ md += "| Metric | Value |\n|--------|-------|\n"
190
+ md += "| **State** | %s |\n" % state
191
+ md += "| **Progress** | %.1f%% (%d/%d) |\n" % (
192
+ status["progress"], status["step"], status["max_steps"])
193
+ md += "| **Epoch** | %.2f / %d |\n" % (status["epoch"], status["total_epochs"])
194
+ md += "| **Train Loss** | %.4f |\n" % status["loss"]
195
+ md += "| **Eval Loss** | %.4f |\n" % status["eval_loss"]
196
+ md += "| **Agent Accuracy** | %.1f%% |\n" % status["agent_acc"]
197
+ if status.get("error"):
198
+ md += "\n**Error:** `%s`" % status["error"]
199
+ return md
200
+
201
+
202
+ def get_logs():
203
+ return "\n".join(status["log"][-50:])
204
+
205
+
206
+ def test_model(query):
207
+ if status["state"] != "complete":
208
+ return "Training is %s. Please wait for completion." % status["state"]
209
+ try:
210
+ import torch
211
+ from transformers import AutoModelForCausalLM, AutoTokenizer
212
+ from peft import PeftModel
213
+ out_dir = "/app/output/bf-router-v0.5"
214
+ tok = AutoTokenizer.from_pretrained(out_dir, trust_remote_code=True)
215
+ mdl = AutoModelForCausalLM.from_pretrained(
216
+ "Qwen/Qwen3-4B-Instruct-2507",
217
+ device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
218
+ mdl = PeftModel.from_pretrained(mdl, out_dir)
219
+ msgs = [
220
+ {"role": "system", "content": SYSTEM_PROMPT},
221
+ {"role": "user", "content": query}
222
+ ]
223
+ txt = tok.apply_chat_template(
224
+ msgs, tokenize=False, add_generation_prompt=True)
225
+ inp = tok(txt, return_tensors="pt").to(mdl.device)
226
+ with torch.no_grad():
227
+ out = mdl.generate(
228
+ **inp, max_new_tokens=256, temperature=0.3,
229
+ top_p=0.7, do_sample=True)
230
+ return tok.decode(
231
+ out[0][inp["input_ids"].shape[1]:], skip_special_tokens=True)
232
+ except Exception as ex:
233
+ return "Error: %s" % str(ex)
234
+
235
+
236
+ with gr.Blocks(title="BF-Router Trainer") as demo:
237
+ gr.Markdown(
238
+ "# BF-Router Fine-Tuning\n"
239
+ "QLoRA training of Qwen3-4B for BlueprintForge 7-agent routing"
240
+ )
241
+ with gr.Row():
242
+ with gr.Column(scale=1):
243
+ status_md = gr.Markdown(get_status, every=5)
244
+ with gr.Column(scale=2):
245
+ log_box = gr.Textbox(get_logs, label="Training Log", lines=20, every=5)
246
+ gr.Markdown("---\n## Test Model")
247
+ with gr.Row():
248
+ q = gr.Textbox(label="Query", placeholder="Build a health bar for the player")
249
+ btn = gr.Button("Route", variant="primary")
250
+ out = gr.JSON(label="BF-Router Response")
251
+ btn.click(test_model, inputs=q, outputs=out)
252
+
253
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=2.4.0
2
+ transformers>=4.51.0
3
+ peft>=0.14.0
4
+ trl>=0.16.0
5
+ datasets>=3.0.0
6
+ bitsandbytes>=0.45.0
7
+ accelerate>=1.3.0
8
+ huggingface_hub>=0.28.0
9
+ safetensors>=0.4.0
10
+ gradio>=5.0.0