| # demo_td_loop.td — The complete TD self-improvement pipeline | |
| # This is what td_loop runs: merge, then iterate to get smarter | |
| gate { must_pass = [canary, perplexity, thinking_mode] } | |
| budget { max_gpu_hours = 50 max_cost = 200.00 } | |
| data_contract { | |
| required_fields = [prompt, response] | |
| min_samples = 50 | |
| max_perplexity = 50.0 | |
| } | |
| reward_contract { | |
| verifiers = [code_compiles, math_correct] | |
| min_reward = 0.3 | |
| } | |
| # Step 1: Load base model | |
| load "Qwen/Qwen3-VL-8B-Instruct" as base | |
| # Step 2: Fuse all donor models in one shot | |
| fuse ["deepseek-ai/DeepSeek-R1", "MiMo-7B", "meta-llama/Llama-3.1-8B", "tiiuae/Falcon-H1R-7B"] into base | |
| # Step 3: Heal the merge damage | |
| heal base lora_r 32 epochs 2 | |
| snapshot base -> snapshots/ | |
| # Step 4: Self-improvement loop (the core of TD) | |
| repeat 5 { | |
| diagnose base -> weaknesses.json | |
| synth base from base filter cherry_llm -> training_data.jsonl | |
| train base on "training_data.jsonl" using grpo steps 64 lr 5e-5 | |
| eval base -> eval_results.json | |
| if eval_passed base { | |
| commit base | |
| snapshot base -> snapshots/ | |
| } else { | |
| reset base to "snapshots/" | |
| } | |
| } | |
| # Step 5: Final report | |
| report -> final_economics.json | |