| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.064, |
| "grad_norm": 32.64683532714844, |
| "learning_rate": 5.625e-07, |
| "logits/chosen": 0.08142563700675964, |
| "logits/rejected": 0.227357417345047, |
| "logps/chosen": -1236.4244384765625, |
| "logps/rejected": -2043.849853515625, |
| "loss": 0.6733, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.14909373223781586, |
| "rewards/margins": 0.2537827491760254, |
| "rewards/rejected": -0.40287646651268005, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.24403050541877747, |
| "learning_rate": 9.98867437523228e-07, |
| "logits/chosen": 0.10452975332736969, |
| "logits/rejected": 0.20738160610198975, |
| "logps/chosen": -1335.4716796875, |
| "logps/rejected": -2215.779541015625, |
| "loss": 0.1419, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -5.359838485717773, |
| "rewards/margins": 7.954916954040527, |
| "rewards/rejected": -13.3147554397583, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.18414834141731262, |
| "learning_rate": 9.788754083424652e-07, |
| "logits/chosen": 0.10984750092029572, |
| "logits/rejected": 0.09670254588127136, |
| "logps/chosen": -1364.2596435546875, |
| "logps/rejected": -2435.3935546875, |
| "loss": 0.124, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -13.650239944458008, |
| "rewards/margins": 25.381425857543945, |
| "rewards/rejected": -39.03166580200195, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.12591849267482758, |
| "learning_rate": 9.348705665778477e-07, |
| "logits/chosen": 0.18178126215934753, |
| "logits/rejected": 0.1629951149225235, |
| "logps/chosen": -1315.8212890625, |
| "logps/rejected": -2339.572021484375, |
| "loss": 0.1063, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -9.503636360168457, |
| "rewards/margins": 26.079574584960938, |
| "rewards/rejected": -35.583213806152344, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.14210306107997894, |
| "learning_rate": 8.690594987436704e-07, |
| "logits/chosen": 0.22390194237232208, |
| "logits/rejected": 0.22648893296718597, |
| "logps/chosen": -1331.87841796875, |
| "logps/rejected": -2378.160400390625, |
| "loss": 0.1001, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -3.748126745223999, |
| "rewards/margins": 25.280818939208984, |
| "rewards/rejected": -29.02894401550293, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.15365009009838104, |
| "learning_rate": 7.84742246584226e-07, |
| "logits/chosen": 0.23472987115383148, |
| "logits/rejected": 0.24391409754753113, |
| "logps/chosen": -1244.014892578125, |
| "logps/rejected": -2237.36669921875, |
| "loss": 0.0876, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5522196292877197, |
| "rewards/margins": 24.502920150756836, |
| "rewards/rejected": -22.950700759887695, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.6987534761428833, |
| "learning_rate": 6.861468292009726e-07, |
| "logits/chosen": 0.2525210976600647, |
| "logits/rejected": 0.2721753716468811, |
| "logps/chosen": -1115.734130859375, |
| "logps/rejected": -2156.46875, |
| "loss": 0.0863, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.926352024078369, |
| "rewards/margins": 22.947837829589844, |
| "rewards/rejected": -17.021486282348633, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.10186341404914856, |
| "learning_rate": 5.782172325201155e-07, |
| "logits/chosen": 0.267190545797348, |
| "logits/rejected": 0.2903681993484497, |
| "logps/chosen": -1151.381103515625, |
| "logps/rejected": -2185.71044921875, |
| "loss": 0.0881, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": 7.338265419006348, |
| "rewards/margins": 22.687971115112305, |
| "rewards/rejected": -15.349705696105957, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.10365493595600128, |
| "learning_rate": 4.6636549719398016e-07, |
| "logits/chosen": 0.24797296524047852, |
| "logits/rejected": 0.26663631200790405, |
| "logps/chosen": -1155.973876953125, |
| "logps/rejected": -2196.724609375, |
| "loss": 0.091, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": 7.644052505493164, |
| "rewards/margins": 21.082582473754883, |
| "rewards/rejected": -13.438529968261719, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.08818171918392181, |
| "learning_rate": 3.562003362839914e-07, |
| "logits/chosen": 0.26866415143013, |
| "logits/rejected": 0.3038444519042969, |
| "logps/chosen": -1111.370849609375, |
| "logps/rejected": -2152.794677734375, |
| "loss": 0.0769, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": 9.541829109191895, |
| "rewards/margins": 20.739093780517578, |
| "rewards/rejected": -11.197265625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.09460079669952393, |
| "learning_rate": 2.5324589096782656e-07, |
| "logits/chosen": 0.261665940284729, |
| "logits/rejected": 0.3049652576446533, |
| "logps/chosen": -1119.0865478515625, |
| "logps/rejected": -2136.193603515625, |
| "loss": 0.1036, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": 10.547566413879395, |
| "rewards/margins": 18.816768646240234, |
| "rewards/rejected": -8.269205093383789, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 33.47285461425781, |
| "learning_rate": 1.6266472703396284e-07, |
| "logits/chosen": 0.2868819236755371, |
| "logits/rejected": 0.3263227939605713, |
| "logps/chosen": -1207.3726806640625, |
| "logps/rejected": -2139.579833984375, |
| "loss": 0.1038, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 10.92021656036377, |
| "rewards/margins": 18.513587951660156, |
| "rewards/rejected": -7.5933709144592285, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.14188329875469208, |
| "learning_rate": 8.899896227604508e-08, |
| "logits/chosen": 0.28316810727119446, |
| "logits/rejected": 0.3238982856273651, |
| "logps/chosen": -1150.779296875, |
| "logps/rejected": -2133.03759765625, |
| "loss": 0.1056, |
| "rewards/accuracies": 0.9874999523162842, |
| "rewards/chosen": 11.900278091430664, |
| "rewards/margins": 18.61746597290039, |
| "rewards/rejected": -6.717187404632568, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.09306726604700089, |
| "learning_rate": 3.594250574048058e-08, |
| "logits/chosen": 0.26064103841781616, |
| "logits/rejected": 0.29156985878944397, |
| "logps/chosen": -1061.2724609375, |
| "logps/rejected": -2110.465087890625, |
| "loss": 0.0853, |
| "rewards/accuracies": 0.9874999523162842, |
| "rewards/chosen": 12.033426284790039, |
| "rewards/margins": 19.080835342407227, |
| "rewards/rejected": -7.047410488128662, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.09628653526306152, |
| "learning_rate": 6.15582970243117e-09, |
| "logits/chosen": 0.2842194139957428, |
| "logits/rejected": 0.30415046215057373, |
| "logps/chosen": -1132.39501953125, |
| "logps/rejected": -2141.17333984375, |
| "loss": 0.0888, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": 11.403092384338379, |
| "rewards/margins": 18.90988540649414, |
| "rewards/rejected": -7.506793975830078, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9984, |
| "step": 156, |
| "total_flos": 1.1538976647738819e+18, |
| "train_loss": 0.13609233192908457, |
| "train_runtime": 8850.4888, |
| "train_samples_per_second": 0.565, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1538976647738819e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|