| { | |
| "trainer_kind": "grpo", | |
| "base_model": "openai/gpt-oss-20b", | |
| "renderer_name": "gpt_oss_medium_reasoning", | |
| "checkpoint_uri": "tinker://74ca18ef-36ae-5a4d-9aab-21ac220f42dd:train:0/sampler_weights/final", | |
| "token_usage": { | |
| "sampling_input_tokens": 3053072, | |
| "sampling_output_tokens": 825595, | |
| "num_examples": 2322, | |
| "num_steps": 20 | |
| }, | |
| "trainer_config_snapshot": { | |
| "kind": "grpo", | |
| "base_model": "openai/gpt-oss-20b", | |
| "renderer_name": "gpt_oss_medium_reasoning", | |
| "learning_rate": 0.00004, | |
| "group_size": 8, | |
| "groups_per_batch": 8, | |
| "num_iters": 20, | |
| "save_every": 5, | |
| "max_tokens": 8192, | |
| "lora_rank": 32, | |
| "temperature": 1.0, | |
| "reward": { | |
| "kind": "correctness" | |
| } | |
| } | |
| } |