td-toolkit / td_lang /examples /demo_td_loop.td
td-builder's picture
Fixed code: vocab mismatch fix for cross-arch merging (Llama/Falcon)
5d61448 verified
# demo_td_loop.td — The complete TD self-improvement pipeline
# This is what td_loop runs: merge, then iterate to get smarter
gate { must_pass = [canary, perplexity, thinking_mode] }
budget { max_gpu_hours = 50 max_cost = 200.00 }
data_contract {
required_fields = [prompt, response]
min_samples = 50
max_perplexity = 50.0
}
reward_contract {
verifiers = [code_compiles, math_correct]
min_reward = 0.3
}
# Step 1: Load base model
load "Qwen/Qwen3-VL-8B-Instruct" as base
# Step 2: Fuse all donor models in one shot
fuse ["deepseek-ai/DeepSeek-R1", "MiMo-7B", "meta-llama/Llama-3.1-8B", "tiiuae/Falcon-H1R-7B"] into base
# Step 3: Heal the merge damage
heal base lora_r 32 epochs 2
snapshot base -> snapshots/
# Step 4: Self-improvement loop (the core of TD)
repeat 5 {
diagnose base -> weaknesses.json
synth base from base filter cherry_llm -> training_data.jsonl
train base on "training_data.jsonl" using grpo steps 64 lr 5e-5
eval base -> eval_results.json
if eval_passed base {
commit base
snapshot base -> snapshots/
} else {
reset base to "snapshots/"
}
}
# Step 5: Final report
report -> final_economics.json