| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.380952380952381, | |
| "eval_steps": 500, | |
| "global_step": 150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 51.85, | |
| "epoch": 0.07936507936507936, | |
| "grad_norm": 2.2403488159179688, | |
| "kl": 0.0027098871301859616, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": 0.0001, | |
| "reward": 0.16500000339001417, | |
| "reward_std": 0.11435296162962913, | |
| "rewards/cbt_content_reward": 0.16500000339001417, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 58.675, | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 3.0842247009277344, | |
| "kl": 0.002581492770696059, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0001, | |
| "reward": 0.18250000290572643, | |
| "reward_std": 0.14434738419950008, | |
| "rewards/cbt_content_reward": 0.18250000439584255, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 55.7, | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 1.792191982269287, | |
| "kl": 0.0019858392188325524, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1250000039115548, | |
| "reward_std": 0.1089518491178751, | |
| "rewards/cbt_content_reward": 0.1250000039115548, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 73.025, | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 2.7672274112701416, | |
| "kl": 0.0017400937096681446, | |
| "learning_rate": 9.703703703703704e-07, | |
| "loss": 0.0001, | |
| "reward": 0.15250000283122062, | |
| "reward_std": 0.1529246997088194, | |
| "rewards/cbt_content_reward": 0.15250000432133676, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 52.825, | |
| "epoch": 0.3968253968253968, | |
| "grad_norm": 1.1795265674591064, | |
| "kl": 0.0027943541877903043, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": 0.0001, | |
| "reward": 0.14500000327825546, | |
| "reward_std": 0.10991083942353726, | |
| "rewards/cbt_content_reward": 0.14500000178813935, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 53.3375, | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 2.038141965866089, | |
| "kl": 0.002141835092334077, | |
| "learning_rate": 8.962962962962963e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1750000048428774, | |
| "reward_std": 0.11487694047391414, | |
| "rewards/cbt_content_reward": 0.17500000335276128, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 66.55, | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 2.512485980987549, | |
| "kl": 0.0019801797869149597, | |
| "learning_rate": 8.592592592592592e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1575000027194619, | |
| "reward_std": 0.1333638343960047, | |
| "rewards/cbt_content_reward": 0.1575000027194619, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 56.8, | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 4.515315055847168, | |
| "kl": 0.002533014601795003, | |
| "learning_rate": 8.222222222222221e-07, | |
| "loss": 0.0001, | |
| "reward": 0.18250000663101673, | |
| "reward_std": 0.1475393757224083, | |
| "rewards/cbt_content_reward": 0.18250000514090062, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 64.275, | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 2.102088212966919, | |
| "kl": 0.002205433923518285, | |
| "learning_rate": 7.851851851851852e-07, | |
| "loss": 0.0001, | |
| "reward": 0.180000002682209, | |
| "reward_std": 0.13480928540229797, | |
| "rewards/cbt_content_reward": 0.180000002682209, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "completion_length": 47.3, | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 3.474039077758789, | |
| "kl": 0.0022507158428197727, | |
| "learning_rate": 7.481481481481481e-07, | |
| "loss": 0.0001, | |
| "reward": 0.15000000335276126, | |
| "reward_std": 0.1302133210003376, | |
| "rewards/cbt_content_reward": 0.15000000335276126, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 64.8125, | |
| "epoch": 0.873015873015873, | |
| "grad_norm": 2.440229654312134, | |
| "kl": 0.00192810871230904, | |
| "learning_rate": 7.111111111111111e-07, | |
| "loss": 0.0001, | |
| "reward": 0.16500000488013028, | |
| "reward_std": 0.0976612851023674, | |
| "rewards/cbt_content_reward": 0.16500000488013028, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "completion_length": 59.7375, | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1.8490701913833618, | |
| "kl": 0.002136132796294987, | |
| "learning_rate": 6.74074074074074e-07, | |
| "loss": 0.0001, | |
| "reward": 0.17750000581145287, | |
| "reward_std": 0.14892779104411602, | |
| "rewards/cbt_content_reward": 0.17750000432133675, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 54.7125, | |
| "epoch": 1.0317460317460316, | |
| "grad_norm": 2.5725176334381104, | |
| "kl": 0.002353719263919629, | |
| "learning_rate": 6.37037037037037e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1800000037997961, | |
| "reward_std": 0.12799057997763158, | |
| "rewards/cbt_content_reward": 0.18000000230968, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "completion_length": 66.5625, | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 1.7574554681777954, | |
| "kl": 0.00198215174023062, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0001, | |
| "reward": 0.22500000521540642, | |
| "reward_std": 0.12019186988472938, | |
| "rewards/cbt_content_reward": 0.2250000037252903, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 58.825, | |
| "epoch": 1.1904761904761905, | |
| "grad_norm": 2.7693963050842285, | |
| "kl": 0.002443282786407508, | |
| "learning_rate": 5.62962962962963e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1500000048428774, | |
| "reward_std": 0.13859827741980552, | |
| "rewards/cbt_content_reward": 0.15000000186264514, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "completion_length": 63.875, | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 2.8838014602661133, | |
| "kl": 0.002126309886807576, | |
| "learning_rate": 5.259259259259258e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1775000037625432, | |
| "reward_std": 0.11867224015295505, | |
| "rewards/cbt_content_reward": 0.1775000037625432, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 53.85, | |
| "epoch": 1.3492063492063493, | |
| "grad_norm": 3.608692169189453, | |
| "kl": 0.002317077317275107, | |
| "learning_rate": 4.888888888888889e-07, | |
| "loss": 0.0001, | |
| "reward": 0.18250000216066836, | |
| "reward_std": 0.1294781118631363, | |
| "rewards/cbt_content_reward": 0.18250000514090062, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "completion_length": 56.3375, | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 2.851497173309326, | |
| "kl": 0.0024501425621565433, | |
| "learning_rate": 4.5185185185185183e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1675000036135316, | |
| "reward_std": 0.11531616114079953, | |
| "rewards/cbt_content_reward": 0.1675000036135316, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 54.6625, | |
| "epoch": 1.507936507936508, | |
| "grad_norm": 2.257418632507324, | |
| "kl": 0.002075143059482798, | |
| "learning_rate": 4.1481481481481476e-07, | |
| "loss": 0.0001, | |
| "reward": 0.14500000271946192, | |
| "reward_std": 0.12950329035520552, | |
| "rewards/cbt_content_reward": 0.14500000271946192, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "completion_length": 60.6, | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 2.6931891441345215, | |
| "kl": 0.002077191596617922, | |
| "learning_rate": 3.7777777777777775e-07, | |
| "loss": 0.0001, | |
| "reward": 0.13250000309199095, | |
| "reward_std": 0.13716318383812903, | |
| "rewards/cbt_content_reward": 0.13250000458210706, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 78.9, | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.117309808731079, | |
| "kl": 0.0018815583549439906, | |
| "learning_rate": 3.407407407407407e-07, | |
| "loss": 0.0001, | |
| "reward": 0.21000000461935997, | |
| "reward_std": 0.14268166311085223, | |
| "rewards/cbt_content_reward": 0.21000000461935997, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "completion_length": 54.3125, | |
| "epoch": 1.746031746031746, | |
| "grad_norm": 2.469907760620117, | |
| "kl": 0.002604751317994669, | |
| "learning_rate": 3.037037037037037e-07, | |
| "loss": 0.0001, | |
| "reward": 0.21750000603497027, | |
| "reward_std": 0.1493647824972868, | |
| "rewards/cbt_content_reward": 0.21750000603497027, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "completion_length": 56.85, | |
| "epoch": 1.8253968253968254, | |
| "grad_norm": 2.3624932765960693, | |
| "kl": 0.002011225459864363, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1475000012665987, | |
| "reward_std": 0.12692904993891715, | |
| "rewards/cbt_content_reward": 0.14750000275671482, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "completion_length": 55.625, | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 3.7563695907592773, | |
| "kl": 0.002532219042768702, | |
| "learning_rate": 2.296296296296296e-07, | |
| "loss": 0.0001, | |
| "reward": 0.16250000335276127, | |
| "reward_std": 0.11915707401931286, | |
| "rewards/cbt_content_reward": 0.16250000037252904, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "completion_length": 57.4625, | |
| "epoch": 1.9841269841269842, | |
| "grad_norm": 2.7810513973236084, | |
| "kl": 0.002170709293568507, | |
| "learning_rate": 1.9259259259259257e-07, | |
| "loss": 0.0001, | |
| "reward": 0.15750000439584255, | |
| "reward_std": 0.1208796363323927, | |
| "rewards/cbt_content_reward": 0.15750000439584255, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "completion_length": 49.35, | |
| "epoch": 2.0634920634920633, | |
| "grad_norm": 2.166604518890381, | |
| "kl": 0.0023496907786466183, | |
| "learning_rate": 1.5555555555555556e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1425000036135316, | |
| "reward_std": 0.14228889718651772, | |
| "rewards/cbt_content_reward": 0.1425000036135316, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "completion_length": 63.175, | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 3.5904221534729004, | |
| "kl": 0.002583104814402759, | |
| "learning_rate": 1.1851851851851851e-07, | |
| "loss": 0.0001, | |
| "reward": 0.20750000271946192, | |
| "reward_std": 0.12724252939224243, | |
| "rewards/cbt_content_reward": 0.20750000420957804, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "completion_length": 57.625, | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 2.712351083755493, | |
| "kl": 0.0022835541458334774, | |
| "learning_rate": 8.148148148148149e-08, | |
| "loss": 0.0001, | |
| "reward": 0.1375000037252903, | |
| "reward_std": 0.118898082152009, | |
| "rewards/cbt_content_reward": 0.13750000223517417, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "completion_length": 52.4375, | |
| "epoch": 2.3015873015873014, | |
| "grad_norm": 3.027590751647949, | |
| "kl": 0.0020462898130062966, | |
| "learning_rate": 4.444444444444444e-08, | |
| "loss": 0.0001, | |
| "reward": 0.1750000027939677, | |
| "reward_std": 0.11545386202633381, | |
| "rewards/cbt_content_reward": 0.1750000027939677, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 145 | |
| }, | |
| { | |
| "completion_length": 70.325, | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 3.1476638317108154, | |
| "kl": 0.0017971335852053016, | |
| "learning_rate": 7.407407407407407e-09, | |
| "loss": 0.0001, | |
| "reward": 0.19500000346451998, | |
| "reward_std": 0.12282740026712417, | |
| "rewards/cbt_content_reward": 0.19500000346451998, | |
| "rewards/check_cbt_structure": 0.0, | |
| "step": 150 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |