Fixed code: vocab mismatch fix for cross-arch merging (Llama/Falcon)

5d61448 verified 3 months ago

1.2 kB

	# demo_td_loop.td — The complete TD self-improvement pipeline
	# This is what td_loop runs: merge, then iterate to get smarter

	gate { must_pass = [canary, perplexity, thinking_mode] }
	budget { max_gpu_hours = 50 max_cost = 200.00 }

	data_contract {
	required_fields = [prompt, response]
	min_samples = 50
	max_perplexity = 50.0
	}

	reward_contract {
	verifiers = [code_compiles, math_correct]
	min_reward = 0.3
	}

	# Step 1: Load base model
	load "Qwen/Qwen3-VL-8B-Instruct" as base

	# Step 2: Fuse all donor models in one shot
	fuse ["deepseek-ai/DeepSeek-R1", "MiMo-7B", "meta-llama/Llama-3.1-8B", "tiiuae/Falcon-H1R-7B"] into base

	# Step 3: Heal the merge damage
	heal base lora_r 32 epochs 2
	snapshot base -> snapshots/

	# Step 4: Self-improvement loop (the core of TD)
	repeat 5 {
	diagnose base -> weaknesses.json
	synth base from base filter cherry_llm -> training_data.jsonl
	train base on "training_data.jsonl" using grpo steps 64 lr 5e-5
	eval base -> eval_results.json

	if eval_passed base {
	commit base
	snapshot base -> snapshots/
	} else {
	reset base to "snapshots/"
	}
	}

	# Step 5: Final report
	report -> final_economics.json