| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999344906649198, | |
| "eval_steps": 500, | |
| "global_step": 3816, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.050049132001310184, | |
| "grad_norm": 4.843938827514648, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.5328307747840881, | |
| "logits/rejected": -0.46889373660087585, | |
| "logps/chosen": -74.61598205566406, | |
| "logps/rejected": -13.190138816833496, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.5595549941062927, | |
| "rewards/chosen": 0.007412092760205269, | |
| "rewards/margins": 0.008624909445643425, | |
| "rewards/rejected": -0.001212816801853478, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.10009826400262037, | |
| "grad_norm": 3.657888174057007, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -0.5486236810684204, | |
| "logits/rejected": -0.48813965916633606, | |
| "logps/chosen": -70.09341430664062, | |
| "logps/rejected": -13.666769981384277, | |
| "loss": 0.5809, | |
| "rewards/accuracies": 0.8239529132843018, | |
| "rewards/chosen": 0.24100865423679352, | |
| "rewards/margins": 0.2950609624385834, | |
| "rewards/rejected": -0.054052311927080154, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.15014739600393057, | |
| "grad_norm": 1.726294755935669, | |
| "learning_rate": 9.443797320908561e-07, | |
| "logits/chosen": -0.5495401620864868, | |
| "logits/rejected": -0.4997619390487671, | |
| "logps/chosen": -61.40409469604492, | |
| "logps/rejected": -19.322750091552734, | |
| "loss": 0.2712, | |
| "rewards/accuracies": 0.9463350772857666, | |
| "rewards/chosen": 1.1437506675720215, | |
| "rewards/margins": 1.791763424873352, | |
| "rewards/rejected": -0.648012638092041, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.20019652800524074, | |
| "grad_norm": 9.303996086120605, | |
| "learning_rate": 8.887594641817122e-07, | |
| "logits/chosen": -0.5349320769309998, | |
| "logits/rejected": -0.4746370315551758, | |
| "logps/chosen": -61.47517776489258, | |
| "logps/rejected": -35.966888427734375, | |
| "loss": 0.0598, | |
| "rewards/accuracies": 0.9869109988212585, | |
| "rewards/chosen": 1.5467382669448853, | |
| "rewards/margins": 3.8473398685455322, | |
| "rewards/rejected": -2.3006019592285156, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2502456600065509, | |
| "grad_norm": 0.23748359084129333, | |
| "learning_rate": 8.331391962725683e-07, | |
| "logits/chosen": -0.4757286310195923, | |
| "logits/rejected": -0.3977571725845337, | |
| "logps/chosen": -57.084896087646484, | |
| "logps/rejected": -45.92123794555664, | |
| "loss": 0.0267, | |
| "rewards/accuracies": 0.9901832342147827, | |
| "rewards/chosen": 1.6708416938781738, | |
| "rewards/margins": 5.011633396148682, | |
| "rewards/rejected": -3.340791702270508, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.30029479200786113, | |
| "grad_norm": 0.43927711248397827, | |
| "learning_rate": 7.775189283634246e-07, | |
| "logits/chosen": -0.4342685043811798, | |
| "logits/rejected": -0.33603060245513916, | |
| "logps/chosen": -57.238773345947266, | |
| "logps/rejected": -50.37546920776367, | |
| "loss": 0.0331, | |
| "rewards/accuracies": 0.9869109988212585, | |
| "rewards/chosen": 1.7030671834945679, | |
| "rewards/margins": 5.485808372497559, | |
| "rewards/rejected": -3.7827417850494385, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.3503439240091713, | |
| "grad_norm": 0.08208785951137543, | |
| "learning_rate": 7.218986604542807e-07, | |
| "logits/chosen": -0.4299260973930359, | |
| "logits/rejected": -0.32499420642852783, | |
| "logps/chosen": -56.95598220825195, | |
| "logps/rejected": -55.50334548950195, | |
| "loss": 0.0327, | |
| "rewards/accuracies": 0.985602080821991, | |
| "rewards/chosen": 1.6657171249389648, | |
| "rewards/margins": 5.907327651977539, | |
| "rewards/rejected": -4.241610527038574, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.4003930560104815, | |
| "grad_norm": 0.18074464797973633, | |
| "learning_rate": 6.662783925451368e-07, | |
| "logits/chosen": -0.389539510011673, | |
| "logits/rejected": -0.26959964632987976, | |
| "logps/chosen": -56.53901672363281, | |
| "logps/rejected": -59.943992614746094, | |
| "loss": 0.0247, | |
| "rewards/accuracies": 0.9914921522140503, | |
| "rewards/chosen": 1.684263825416565, | |
| "rewards/margins": 6.446012496948242, | |
| "rewards/rejected": -4.761748790740967, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.4504421880117917, | |
| "grad_norm": 0.07140897214412689, | |
| "learning_rate": 6.10658124635993e-07, | |
| "logits/chosen": -0.38458481431007385, | |
| "logits/rejected": -0.25353050231933594, | |
| "logps/chosen": -56.987735748291016, | |
| "logps/rejected": -65.21515655517578, | |
| "loss": 0.0252, | |
| "rewards/accuracies": 0.9882199168205261, | |
| "rewards/chosen": 1.7450363636016846, | |
| "rewards/margins": 6.95145845413208, | |
| "rewards/rejected": -5.206421375274658, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 0.5004913200131018, | |
| "grad_norm": 0.13229215145111084, | |
| "learning_rate": 5.550378567268491e-07, | |
| "logits/chosen": -0.3671688139438629, | |
| "logits/rejected": -0.23125909268856049, | |
| "logps/chosen": -55.54978942871094, | |
| "logps/rejected": -69.9174575805664, | |
| "loss": 0.0311, | |
| "rewards/accuracies": 0.9908376932144165, | |
| "rewards/chosen": 1.6499972343444824, | |
| "rewards/margins": 7.394839286804199, | |
| "rewards/rejected": -5.744842529296875, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.550540452014412, | |
| "grad_norm": 0.03863115608692169, | |
| "learning_rate": 4.994175888177053e-07, | |
| "logits/chosen": -0.34299236536026, | |
| "logits/rejected": -0.21112458407878876, | |
| "logps/chosen": -56.28550338745117, | |
| "logps/rejected": -74.3562240600586, | |
| "loss": 0.0203, | |
| "rewards/accuracies": 0.9908376932144165, | |
| "rewards/chosen": 1.6116416454315186, | |
| "rewards/margins": 7.8128886222839355, | |
| "rewards/rejected": -6.201247215270996, | |
| "step": 2101 | |
| }, | |
| { | |
| "epoch": 0.6005895840157223, | |
| "grad_norm": 0.16626648604869843, | |
| "learning_rate": 4.4379732090856143e-07, | |
| "logits/chosen": -0.317820280790329, | |
| "logits/rejected": -0.16657008230686188, | |
| "logps/chosen": -58.859397888183594, | |
| "logps/rejected": -77.39883422851562, | |
| "loss": 0.0317, | |
| "rewards/accuracies": 0.9875654578208923, | |
| "rewards/chosen": 1.6255024671554565, | |
| "rewards/margins": 8.082979202270508, | |
| "rewards/rejected": -6.4574761390686035, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.6506387160170324, | |
| "grad_norm": 0.01698416657745838, | |
| "learning_rate": 3.8817705299941754e-07, | |
| "logits/chosen": -0.3418547213077545, | |
| "logits/rejected": -0.19180215895175934, | |
| "logps/chosen": -57.59861373901367, | |
| "logps/rejected": -80.31951141357422, | |
| "loss": 0.0122, | |
| "rewards/accuracies": 0.9934555292129517, | |
| "rewards/chosen": 1.641247034072876, | |
| "rewards/margins": 8.408109664916992, | |
| "rewards/rejected": -6.766862392425537, | |
| "step": 2483 | |
| }, | |
| { | |
| "epoch": 0.7006878480183426, | |
| "grad_norm": 0.03158177435398102, | |
| "learning_rate": 3.3255678509027375e-07, | |
| "logits/chosen": -0.32275617122650146, | |
| "logits/rejected": -0.18107445538043976, | |
| "logps/chosen": -59.604644775390625, | |
| "logps/rejected": -81.69689178466797, | |
| "loss": 0.0214, | |
| "rewards/accuracies": 0.9908376932144165, | |
| "rewards/chosen": 1.6721757650375366, | |
| "rewards/margins": 8.503122329711914, | |
| "rewards/rejected": -6.83094596862793, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.7507369800196528, | |
| "grad_norm": 0.08755598962306976, | |
| "learning_rate": 2.7693651718112985e-07, | |
| "logits/chosen": -0.32179561257362366, | |
| "logits/rejected": -0.17599359154701233, | |
| "logps/chosen": -57.8769416809082, | |
| "logps/rejected": -83.73433685302734, | |
| "loss": 0.0192, | |
| "rewards/accuracies": 0.9908376932144165, | |
| "rewards/chosen": 1.6639045476913452, | |
| "rewards/margins": 8.702630043029785, | |
| "rewards/rejected": -7.038724899291992, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.800786112020963, | |
| "grad_norm": 0.015691732987761497, | |
| "learning_rate": 2.21316249271986e-07, | |
| "logits/chosen": -0.3434266149997711, | |
| "logits/rejected": -0.19399824738502502, | |
| "logps/chosen": -56.69347381591797, | |
| "logps/rejected": -85.22378540039062, | |
| "loss": 0.0277, | |
| "rewards/accuracies": 0.9914921522140503, | |
| "rewards/chosen": 1.6015362739562988, | |
| "rewards/margins": 8.894182205200195, | |
| "rewards/rejected": -7.2926459312438965, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 0.8508352440222732, | |
| "grad_norm": 0.010581793263554573, | |
| "learning_rate": 1.6569598136284217e-07, | |
| "logits/chosen": -0.33340033888816833, | |
| "logits/rejected": -0.18355616927146912, | |
| "logps/chosen": -58.657325744628906, | |
| "logps/rejected": -85.88097381591797, | |
| "loss": 0.0251, | |
| "rewards/accuracies": 0.9921466112136841, | |
| "rewards/chosen": 1.615267276763916, | |
| "rewards/margins": 8.870849609375, | |
| "rewards/rejected": -7.255581855773926, | |
| "step": 3247 | |
| }, | |
| { | |
| "epoch": 0.9008843760235834, | |
| "grad_norm": 0.02463706023991108, | |
| "learning_rate": 1.100757134536983e-07, | |
| "logits/chosen": -0.3216906785964966, | |
| "logits/rejected": -0.17967411875724792, | |
| "logps/chosen": -55.944915771484375, | |
| "logps/rejected": -87.04055786132812, | |
| "loss": 0.0236, | |
| "rewards/accuracies": 0.9882199168205261, | |
| "rewards/chosen": 1.5252350568771362, | |
| "rewards/margins": 8.986153602600098, | |
| "rewards/rejected": -7.460918426513672, | |
| "step": 3438 | |
| }, | |
| { | |
| "epoch": 0.9509335080248935, | |
| "grad_norm": 0.1806812733411789, | |
| "learning_rate": 5.445544554455445e-08, | |
| "logits/chosen": -0.3310312032699585, | |
| "logits/rejected": -0.17037709057331085, | |
| "logps/chosen": -56.92354202270508, | |
| "logps/rejected": -86.47986602783203, | |
| "loss": 0.0231, | |
| "rewards/accuracies": 0.9928010702133179, | |
| "rewards/chosen": 1.5716968774795532, | |
| "rewards/margins": 9.01201057434082, | |
| "rewards/rejected": -7.440313339233398, | |
| "step": 3629 | |
| }, | |
| { | |
| "epoch": 0.9999344906649198, | |
| "step": 3816, | |
| "total_flos": 2.3140851990119055e+18, | |
| "train_loss": 0.09998038786012421, | |
| "train_runtime": 57165.5253, | |
| "train_samples_per_second": 0.534, | |
| "train_steps_per_second": 0.067 | |
| } | |
| ], | |
| "logging_steps": 191, | |
| "max_steps": 3816, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3140851990119055e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |