{ "trainer_kind": "grpo", "base_model": "openai/gpt-oss-20b", "renderer_name": "gpt_oss_medium_reasoning", "checkpoint_uri": "tinker://74ca18ef-36ae-5a4d-9aab-21ac220f42dd:train:0/sampler_weights/final", "token_usage": { "sampling_input_tokens": 3053072, "sampling_output_tokens": 825595, "num_examples": 2322, "num_steps": 20 }, "trainer_config_snapshot": { "kind": "grpo", "base_model": "openai/gpt-oss-20b", "renderer_name": "gpt_oss_medium_reasoning", "learning_rate": 0.00004, "group_size": 8, "groups_per_batch": 8, "num_iters": 20, "save_every": 5, "max_tokens": 8192, "lora_rank": 32, "temperature": 1.0, "reward": { "kind": "correctness" } } }