| { | |
| "trainer_kind": "grpo", | |
| "base_model": "openai/gpt-oss-20b", | |
| "renderer_name": "gpt_oss_medium_reasoning", | |
| "checkpoint_uri": "tinker://103b7c28-6715-5755-9482-d2ea06acaa2f:train:0/sampler_weights/final", | |
| "token_usage": { | |
| "sampling_input_tokens": 6801392, | |
| "sampling_output_tokens": 3661879, | |
| "num_examples": 2322, | |
| "num_steps": 20 | |
| }, | |
| "trainer_config_snapshot": { | |
| "kind": "grpo", | |
| "base_model": "openai/gpt-oss-20b", | |
| "renderer_name": "gpt_oss_medium_reasoning", | |
| "learning_rate": 0.00004, | |
| "group_size": 8, | |
| "groups_per_batch": 8, | |
| "num_iters": 20, | |
| "save_every": 5, | |
| "max_tokens": 8192, | |
| "lora_rank": 32, | |
| "temperature": 1.0, | |
| "reward": { | |
| "kind": "correctness_crps" | |
| } | |
| } | |
| } |