| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.986666666666667, |
| "eval_steps": 500, |
| "global_step": 336, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 0.9586673974990845, |
| "kl": 0.19199295341968536, |
| "learning_rate": 1.4705882352941177e-06, |
| "logps/chosen": -145.66531982421876, |
| "loss": 0.5076, |
| "rewards/chosen": -0.011382170021533966, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.9881852269172668, |
| "kl": 0.22232285141944885, |
| "learning_rate": 2.9411764705882355e-06, |
| "logps/chosen": -144.47635498046876, |
| "loss": 0.506, |
| "rewards/chosen": -0.0017147257924079895, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 0.9230504035949707, |
| "kl": 0.2664317190647125, |
| "learning_rate": 4.411764705882353e-06, |
| "logps/chosen": -144.5919189453125, |
| "loss": 0.5053, |
| "rewards/chosen": 0.005377597734332084, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.9962123036384583, |
| "kl": 0.35221290588378906, |
| "learning_rate": 4.995131923687488e-06, |
| "logps/chosen": -142.9361328125, |
| "loss": 0.5012, |
| "rewards/chosen": 0.03048495054244995, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.229694128036499, |
| "kl": 0.8521757125854492, |
| "learning_rate": 4.965451197130373e-06, |
| "logps/chosen": -142.90025634765624, |
| "loss": 0.5014, |
| "rewards/chosen": 0.07963300943374634, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.35447359085083, |
| "kl": 1.8928890228271484, |
| "learning_rate": 4.90911473983908e-06, |
| "logps/chosen": -147.9736572265625, |
| "loss": 0.504, |
| "rewards/chosen": 0.17341512441635132, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 1.301222324371338, |
| "kl": 3.797394275665283, |
| "learning_rate": 4.826731644963705e-06, |
| "logps/chosen": -144.52886962890625, |
| "loss": 0.514, |
| "rewards/chosen": 0.323371958732605, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.4558587074279785, |
| "kl": 6.093751907348633, |
| "learning_rate": 4.71919261421297e-06, |
| "logps/chosen": -150.49774169921875, |
| "loss": 0.5034, |
| "rewards/chosen": 0.5952839851379395, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3663493394851685, |
| "kl": 8.93967342376709, |
| "learning_rate": 4.587660327850203e-06, |
| "logps/chosen": -134.95494384765624, |
| "loss": 0.5298, |
| "rewards/chosen": 0.7676385402679443, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.547540545463562, |
| "kl": 11.540582656860352, |
| "learning_rate": 4.43355687413747e-06, |
| "logps/chosen": -128.8071044921875, |
| "loss": 0.521, |
| "rewards/chosen": 1.0660431861877442, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 1.3188670873641968, |
| "kl": 14.06761360168457, |
| "learning_rate": 4.258548374136976e-06, |
| "logps/chosen": -130.93094482421876, |
| "loss": 0.5228, |
| "rewards/chosen": 1.3124106407165528, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 1.3436963558197021, |
| "kl": 16.5745906829834, |
| "learning_rate": 4.064526968101844e-06, |
| "logps/chosen": -124.19566650390625, |
| "loss": 0.5302, |
| "rewards/chosen": 1.527147102355957, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.1555555555555554, |
| "grad_norm": 1.3086756467819214, |
| "kl": 18.41370964050293, |
| "learning_rate": 3.853590358214119e-06, |
| "logps/chosen": -129.2854736328125, |
| "loss": 0.5383, |
| "rewards/chosen": 1.6645441055297852, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 1.2180778980255127, |
| "kl": 18.382841110229492, |
| "learning_rate": 3.6280191288478437e-06, |
| "logps/chosen": -127.15008544921875, |
| "loss": 0.5155, |
| "rewards/chosen": 1.7688350677490234, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.142663836479187, |
| "kl": 20.387691497802734, |
| "learning_rate": 3.3902520895638674e-06, |
| "logps/chosen": -130.60250244140624, |
| "loss": 0.5408, |
| "rewards/chosen": 1.8582941055297852, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 1.1040987968444824, |
| "kl": 20.953645706176758, |
| "learning_rate": 3.142859907420615e-06, |
| "logps/chosen": -120.3420654296875, |
| "loss": 0.5502, |
| "rewards/chosen": 1.877711868286133, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.511111111111111, |
| "grad_norm": 1.446460485458374, |
| "kl": 22.705646514892578, |
| "learning_rate": 2.8885173136805126e-06, |
| "logps/chosen": -121.3359130859375, |
| "loss": 0.5312, |
| "rewards/chosen": 2.1138553619384766, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.9512816667556763, |
| "kl": 22.2016658782959, |
| "learning_rate": 2.629974185404951e-06, |
| "logps/chosen": -126.013720703125, |
| "loss": 0.5678, |
| "rewards/chosen": 1.8899778366088866, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.6888888888888889, |
| "grad_norm": 1.202101469039917, |
| "kl": 22.479812622070312, |
| "learning_rate": 2.3700258145950495e-06, |
| "logps/chosen": -127.17119140625, |
| "loss": 0.4841, |
| "rewards/chosen": 2.30587100982666, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 1.2024880647659302, |
| "kl": 23.706804275512695, |
| "learning_rate": 2.1114826863194882e-06, |
| "logps/chosen": -119.14400634765624, |
| "loss": 0.5279, |
| "rewards/chosen": 2.2496980667114257, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 1.2260613441467285, |
| "kl": 22.990591049194336, |
| "learning_rate": 1.8571400925793855e-06, |
| "logps/chosen": -128.0656005859375, |
| "loss": 0.5057, |
| "rewards/chosen": 2.2760807037353517, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.9555555555555557, |
| "grad_norm": 1.1307311058044434, |
| "kl": 23.434030532836914, |
| "learning_rate": 1.6097479104361328e-06, |
| "logps/chosen": -124.27503662109375, |
| "loss": 0.539, |
| "rewards/chosen": 2.1261518478393553, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.0444444444444443, |
| "grad_norm": 1.2096738815307617, |
| "kl": 24.883241653442383, |
| "learning_rate": 1.3719808711521573e-06, |
| "logps/chosen": -125.847314453125, |
| "loss": 0.5433, |
| "rewards/chosen": 2.296957015991211, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 1.2274669408798218, |
| "kl": 23.971763610839844, |
| "learning_rate": 1.1464096417858821e-06, |
| "logps/chosen": -122.934521484375, |
| "loss": 0.4878, |
| "rewards/chosen": 2.448800468444824, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 1.433097243309021, |
| "kl": 24.515583038330078, |
| "learning_rate": 9.354730318981561e-07, |
| "logps/chosen": -111.53873291015626, |
| "loss": 0.5241, |
| "rewards/chosen": 2.335133171081543, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.311111111111111, |
| "grad_norm": 1.2111012935638428, |
| "kl": 24.930599212646484, |
| "learning_rate": 7.414516258630245e-07, |
| "logps/chosen": -128.9908935546875, |
| "loss": 0.4901, |
| "rewards/chosen": 2.533807373046875, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 1.340284824371338, |
| "kl": 24.58279037475586, |
| "learning_rate": 5.664431258625305e-07, |
| "logps/chosen": -122.60418701171875, |
| "loss": 0.4913, |
| "rewards/chosen": 2.4873432159423827, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.488888888888889, |
| "grad_norm": 1.3646727800369263, |
| "kl": 24.800968170166016, |
| "learning_rate": 4.123396721497977e-07, |
| "logps/chosen": -118.5873046875, |
| "loss": 0.5525, |
| "rewards/chosen": 2.2357526779174806, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.5777777777777775, |
| "grad_norm": 1.2073670625686646, |
| "kl": 25.220767974853516, |
| "learning_rate": 2.8080738578703054e-07, |
| "logps/chosen": -118.42447509765626, |
| "loss": 0.5128, |
| "rewards/chosen": 2.481866455078125, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 1.2993130683898926, |
| "kl": 25.565937042236328, |
| "learning_rate": 1.7326835503629542e-07, |
| "logps/chosen": -120.5993408203125, |
| "loss": 0.5379, |
| "rewards/chosen": 2.363565444946289, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.7555555555555555, |
| "grad_norm": 1.1960065364837646, |
| "kl": 23.65550422668457, |
| "learning_rate": 9.088526016092142e-08, |
| "logps/chosen": -128.53665771484376, |
| "loss": 0.4636, |
| "rewards/chosen": 2.549031639099121, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.8444444444444446, |
| "grad_norm": 1.4810154438018799, |
| "kl": 24.803632736206055, |
| "learning_rate": 3.4548802869627806e-08, |
| "logps/chosen": -119.30946044921875, |
| "loss": 0.5054, |
| "rewards/chosen": 2.466901397705078, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.9333333333333336, |
| "grad_norm": 0.9765888452529907, |
| "kl": 25.129100799560547, |
| "learning_rate": 4.868076312512515e-09, |
| "logps/chosen": -119.96820068359375, |
| "loss": 0.5472, |
| "rewards/chosen": 2.2562030792236327, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.986666666666667, |
| "step": 336, |
| "total_flos": 7.102230505945498e+16, |
| "train_loss": 0.5188913842042288, |
| "train_runtime": 2226.9521, |
| "train_samples_per_second": 1.212, |
| "train_steps_per_second": 0.151 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 336, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.102230505945498e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|