| # test_phase2.td β Testing all Phase 2 commands | |
| load "Qwen/Qwen3-VL-8B-Instruct" as base | |
| # diagnose base -> weaknesses.json β asks the model what it's bad at | |
| diagnose base -> weaknesses.json | |
| # synth base from web_curated filter cherry_llm -> data.jsonl β generates training data | |
| synth base from web_curated filter cherry_llm -> data.jsonl | |
| # train base on "data.jsonl" using grpo steps 64 β GRPO training | |
| train base on "data.jsonl" using grpo steps 64 | |
| # debate base rounds 3 candidates 8 -> pairs.jsonl β persona debate for preference pairs | |
| debate base rounds 3 candidates 8 -> pairs.jsonl | |
| eval base -> final_eval.json | |
| commit base | |