| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9893390191897655, | |
| "eval_steps": 60, | |
| "global_step": 174, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 705.7877807617188, | |
| "epoch": 0.017057569296375266, | |
| "grad_norm": 0.2958298623561859, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0834, | |
| "reward": 0.47402435541152954, | |
| "reward_std": 0.3045174852013588, | |
| "rewards/cosine_scaled_reward": 0.47402435541152954, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 692.6982593536377, | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 0.17230483889579773, | |
| "kl": 0.00200594961643219, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1076, | |
| "reward": 0.4518069000914693, | |
| "reward_std": 0.2899208152666688, | |
| "rewards/cosine_scaled_reward": 0.4518069000914693, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 685.9979393005372, | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 0.16429604589939117, | |
| "kl": 0.00012786388397216796, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1077, | |
| "reward": 0.4576279394328594, | |
| "reward_std": 0.2894581612199545, | |
| "rewards/cosine_scaled_reward": 0.4576279394328594, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 664.4338722229004, | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 0.29537156224250793, | |
| "kl": 0.0002796053886413574, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0932, | |
| "reward": 0.48066430613398553, | |
| "reward_std": 0.2709693659096956, | |
| "rewards/cosine_scaled_reward": 0.48066430613398553, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 661.8659011840821, | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 0.17067702114582062, | |
| "kl": 0.0004772186279296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0929, | |
| "reward": 0.4877126231789589, | |
| "reward_std": 0.26678171902894976, | |
| "rewards/cosine_scaled_reward": 0.4877126231789589, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 686.9898681640625, | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 0.22597813606262207, | |
| "kl": 0.00075836181640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0922, | |
| "reward": 0.4946805603802204, | |
| "reward_std": 0.26594343446195123, | |
| "rewards/cosine_scaled_reward": 0.4946805603802204, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 700.4682479858399, | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 0.1372915506362915, | |
| "kl": 0.0009161949157714844, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0724, | |
| "reward": 0.49349360913038254, | |
| "reward_std": 0.2691137969493866, | |
| "rewards/cosine_scaled_reward": 0.49349360913038254, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 678.8015800476074, | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.11743893474340439, | |
| "kl": 0.0013933181762695312, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0516, | |
| "reward": 0.49990383088588713, | |
| "reward_std": 0.2354368444532156, | |
| "rewards/cosine_scaled_reward": 0.49990383088588713, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 670.113818359375, | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 0.1322937160730362, | |
| "kl": 0.001470184326171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0423, | |
| "reward": 0.4925546832382679, | |
| "reward_std": 0.23934022188186646, | |
| "rewards/cosine_scaled_reward": 0.4925546832382679, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 693.9797073364258, | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 0.11684294044971466, | |
| "kl": 0.0016462326049804688, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0472, | |
| "reward": 0.5110804848372936, | |
| "reward_std": 0.24233248196542262, | |
| "rewards/cosine_scaled_reward": 0.5110804848372936, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 703.8312713623047, | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 0.13510337471961975, | |
| "kl": 0.0022981643676757814, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0378, | |
| "reward": 0.5143898174166679, | |
| "reward_std": 0.24455392695963382, | |
| "rewards/cosine_scaled_reward": 0.5143898174166679, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 676.6164245605469, | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 0.12922672927379608, | |
| "kl": 0.0027494430541992188, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0368, | |
| "reward": 0.538255549967289, | |
| "reward_std": 0.23528089113533496, | |
| "rewards/cosine_scaled_reward": 0.538255549967289, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0341151385927505, | |
| "grad_norm": 0.1454162746667862, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0173, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0341151385927505, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 693.2279481887817, | |
| "eval_kl": 0.004572391510009766, | |
| "eval_loss": 0.017638780176639557, | |
| "eval_reward": 0.4241527561098337, | |
| "eval_reward_std": 0.27252104552462697, | |
| "eval_rewards/cosine_scaled_reward": 0.4241527561098337, | |
| "eval_runtime": 731.0086, | |
| "eval_samples_per_second": 0.684, | |
| "eval_steps_per_second": 0.008, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 658.7437713623046, | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 0.14889299869537354, | |
| "kl": 0.004099464416503907, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0233, | |
| "reward": 0.5276085119694471, | |
| "reward_std": 0.22797267828136683, | |
| "rewards/cosine_scaled_reward": 0.5276085119694471, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 661.8356964111329, | |
| "epoch": 1.2046908315565032, | |
| "grad_norm": 0.13085317611694336, | |
| "kl": 0.0031612396240234377, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0077, | |
| "reward": 0.5444076530635357, | |
| "reward_std": 0.22460445892065764, | |
| "rewards/cosine_scaled_reward": 0.5444076530635357, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 702.0494995117188, | |
| "epoch": 1.2899786780383795, | |
| "grad_norm": 0.15662160515785217, | |
| "kl": 0.003662109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0192, | |
| "reward": 0.5370461963117122, | |
| "reward_std": 0.23422690220177173, | |
| "rewards/cosine_scaled_reward": 0.5370461963117122, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 675.3036636352539, | |
| "epoch": 1.375266524520256, | |
| "grad_norm": 0.14926958084106445, | |
| "kl": 0.004243087768554687, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0059, | |
| "reward": 0.5500567473471165, | |
| "reward_std": 0.2124465636909008, | |
| "rewards/cosine_scaled_reward": 0.5500567473471165, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 690.0687698364258, | |
| "epoch": 1.4605543710021323, | |
| "grad_norm": 0.12932759523391724, | |
| "kl": 0.004961395263671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0239, | |
| "reward": 0.5469569325447082, | |
| "reward_std": 0.22115669399499893, | |
| "rewards/cosine_scaled_reward": 0.5469569325447082, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 704.1914291381836, | |
| "epoch": 1.5458422174840085, | |
| "grad_norm": 0.27263641357421875, | |
| "kl": 0.011516571044921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0134, | |
| "reward": 0.540603245049715, | |
| "reward_std": 0.220616265386343, | |
| "rewards/cosine_scaled_reward": 0.540603245049715, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 705.2396011352539, | |
| "epoch": 1.6311300639658848, | |
| "grad_norm": 0.1261100471019745, | |
| "kl": 0.0060760498046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0069, | |
| "reward": 0.5562940575182438, | |
| "reward_std": 0.22738375030457975, | |
| "rewards/cosine_scaled_reward": 0.5562940575182438, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 708.0935134887695, | |
| "epoch": 1.716417910447761, | |
| "grad_norm": 0.20003671944141388, | |
| "kl": 0.0080291748046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.01, | |
| "reward": 0.5645768508315087, | |
| "reward_std": 0.22589275762438774, | |
| "rewards/cosine_scaled_reward": 0.5645768508315087, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 730.0916885375976, | |
| "epoch": 1.8017057569296375, | |
| "grad_norm": 0.14709459245204926, | |
| "kl": 0.0078338623046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0176, | |
| "reward": 0.5447159253060818, | |
| "reward_std": 0.2277662731707096, | |
| "rewards/cosine_scaled_reward": 0.5447159253060818, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 733.7476760864258, | |
| "epoch": 1.886993603411514, | |
| "grad_norm": 0.14343297481536865, | |
| "kl": 0.009282684326171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0221, | |
| "reward": 0.5597066521644593, | |
| "reward_std": 0.23097761012613774, | |
| "rewards/cosine_scaled_reward": 0.5597066521644593, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 745.3560104370117, | |
| "epoch": 1.9722814498933903, | |
| "grad_norm": 0.21631674468517303, | |
| "kl": 0.011328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0179, | |
| "reward": 0.5601713679730892, | |
| "reward_std": 0.2243567120283842, | |
| "rewards/cosine_scaled_reward": 0.5601713679730892, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.068230277185501, | |
| "grad_norm": 0.22386282682418823, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0123, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.068230277185501, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 754.1520328521729, | |
| "eval_kl": 0.016979217529296875, | |
| "eval_loss": 0.012520050629973412, | |
| "eval_reward": 0.4722373131662607, | |
| "eval_reward_std": 0.2587718339636922, | |
| "eval_rewards/cosine_scaled_reward": 0.4722373131662607, | |
| "eval_runtime": 724.6498, | |
| "eval_samples_per_second": 0.69, | |
| "eval_steps_per_second": 0.008, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 749.0557495117188, | |
| "epoch": 2.1535181236673773, | |
| "grad_norm": 0.17756131291389465, | |
| "kl": 0.0161590576171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0042, | |
| "reward": 0.5662507023662329, | |
| "reward_std": 0.22460255604237317, | |
| "rewards/cosine_scaled_reward": 0.5662507023662329, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 757.4320526123047, | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.24769122898578644, | |
| "kl": 0.02352294921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0179, | |
| "reward": 0.5909504756331444, | |
| "reward_std": 0.22746318429708481, | |
| "rewards/cosine_scaled_reward": 0.5909504756331444, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 778.7044509887695, | |
| "epoch": 2.3240938166311302, | |
| "grad_norm": 0.25195103883743286, | |
| "kl": 0.02833251953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0091, | |
| "reward": 0.5315394312143326, | |
| "reward_std": 0.23098385594785215, | |
| "rewards/cosine_scaled_reward": 0.5315394312143326, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 780.8755416870117, | |
| "epoch": 2.4093816631130065, | |
| "grad_norm": 0.3621465265750885, | |
| "kl": 0.0350982666015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0015, | |
| "reward": 0.5728602990508079, | |
| "reward_std": 0.24233178310096265, | |
| "rewards/cosine_scaled_reward": 0.5728602990508079, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 777.0599136352539, | |
| "epoch": 2.4946695095948828, | |
| "grad_norm": 0.2630998492240906, | |
| "kl": 0.053851318359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0229, | |
| "reward": 0.5749510392546654, | |
| "reward_std": 0.25164939016103743, | |
| "rewards/cosine_scaled_reward": 0.5749510392546654, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 806.2307495117187, | |
| "epoch": 2.579957356076759, | |
| "grad_norm": 0.7152215242385864, | |
| "kl": 0.075592041015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0256, | |
| "reward": 0.5316810458898544, | |
| "reward_std": 0.24946709722280502, | |
| "rewards/cosine_scaled_reward": 0.5316810458898544, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 807.8872589111328, | |
| "epoch": 2.6652452025586353, | |
| "grad_norm": 0.9139208197593689, | |
| "kl": 0.1150634765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0312, | |
| "reward": 0.4877690590918064, | |
| "reward_std": 0.28349833004176617, | |
| "rewards/cosine_scaled_reward": 0.4877690590918064, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 926.7297088623047, | |
| "epoch": 2.750533049040512, | |
| "grad_norm": 1.1683220863342285, | |
| "kl": 0.2032470703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0749, | |
| "reward": 0.2821820305660367, | |
| "reward_std": 0.3167072061449289, | |
| "rewards/cosine_scaled_reward": 0.2821820305660367, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 932.1104385375977, | |
| "epoch": 2.835820895522388, | |
| "grad_norm": 6.447605609893799, | |
| "kl": 0.3660888671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0324, | |
| "reward": 0.02865399098955095, | |
| "reward_std": 0.30335349403321743, | |
| "rewards/cosine_scaled_reward": 0.02865399098955095, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 664.3466369628907, | |
| "epoch": 2.9211087420042645, | |
| "grad_norm": 26.408769607543945, | |
| "kl": 0.82470703125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0468, | |
| "reward": -0.12603640989982523, | |
| "reward_std": 0.2628266651183367, | |
| "rewards/cosine_scaled_reward": -0.12603640989982523, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 562.9258012771606, | |
| "epoch": 2.9893390191897655, | |
| "kl": 0.9171142578125, | |
| "reward": -0.19254306121729314, | |
| "reward_std": 0.23406662652269006, | |
| "rewards/cosine_scaled_reward": -0.19254306121729314, | |
| "step": 174, | |
| "total_flos": 0.0, | |
| "train_loss": 0.03207497191133684, | |
| "train_runtime": 41076.4058, | |
| "train_samples_per_second": 0.548, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 174, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |