| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008, |
| "grad_norm": 234.60278509074556, |
| "learning_rate": 0.0, |
| "logits/chosen": 0.107421875, |
| "logits/rejected": 0.08984375, |
| "logps/chosen": -262.0, |
| "logps/rejected": -342.0, |
| "loss": 0.6914, |
| "nll_loss": 1.015625, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 282.5477340162511, |
| "learning_rate": 3.6e-08, |
| "logits/chosen": -0.1501736044883728, |
| "logits/rejected": 0.009562174789607525, |
| "logps/chosen": -338.22222900390625, |
| "logps/rejected": -378.0, |
| "loss": 0.6885, |
| "nll_loss": 0.9717881679534912, |
| "rewards/accuracies": 0.3194444477558136, |
| "rewards/chosen": 0.015223185531795025, |
| "rewards/margins": 0.0276963971555233, |
| "rewards/rejected": -0.01256646029651165, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 206.12921184995773, |
| "learning_rate": 7.599999999999999e-08, |
| "logits/chosen": 0.1673583984375, |
| "logits/rejected": 0.0367431640625, |
| "logps/chosen": -207.0500030517578, |
| "logps/rejected": -415.20001220703125, |
| "loss": 0.6115, |
| "nll_loss": 0.9085937738418579, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.02422180213034153, |
| "rewards/margins": 0.18081054091453552, |
| "rewards/rejected": -0.20512695610523224, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 158.12897373074685, |
| "learning_rate": 1.16e-07, |
| "logits/chosen": -0.01387939415872097, |
| "logits/rejected": 0.06098632887005806, |
| "logps/chosen": -323.70001220703125, |
| "logps/rejected": -389.6000061035156, |
| "loss": 0.4236, |
| "nll_loss": 0.9488281011581421, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.1749267578125, |
| "rewards/margins": 0.7095702886581421, |
| "rewards/rejected": -0.8843749761581421, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 97.36760160402432, |
| "learning_rate": 1.56e-07, |
| "logits/chosen": -0.011962890625, |
| "logits/rejected": 0.02890625037252903, |
| "logps/chosen": -329.5, |
| "logps/rejected": -412.3999938964844, |
| "loss": 0.2188, |
| "nll_loss": 0.99609375, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -0.4403320252895355, |
| "rewards/margins": 1.7859375476837158, |
| "rewards/rejected": -2.2265625, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 67.3942566184986, |
| "learning_rate": 1.96e-07, |
| "logits/chosen": 0.07175292819738388, |
| "logits/rejected": -0.018310546875, |
| "logps/chosen": -261.70001220703125, |
| "logps/rejected": -420.20001220703125, |
| "loss": 0.1252, |
| "nll_loss": 1.0148437023162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.517773449420929, |
| "rewards/margins": 2.8515625, |
| "rewards/rejected": -3.3734374046325684, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 25.358406316482952, |
| "learning_rate": 2.3599999999999997e-07, |
| "logits/chosen": 0.17060546576976776, |
| "logits/rejected": 0.15923461318016052, |
| "logps/chosen": -264.1000061035156, |
| "logps/rejected": -445.6000061035156, |
| "loss": 0.0376, |
| "nll_loss": 0.9632812738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8160156011581421, |
| "rewards/margins": 4.489062309265137, |
| "rewards/rejected": -5.306250095367432, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 83.05887552576917, |
| "learning_rate": 2.7600000000000004e-07, |
| "logits/chosen": 0.147216796875, |
| "logits/rejected": 0.18815918266773224, |
| "logps/chosen": -271.70001220703125, |
| "logps/rejected": -455.20001220703125, |
| "loss": 0.0631, |
| "nll_loss": 0.940625011920929, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -1.316015601158142, |
| "rewards/margins": 6.240624904632568, |
| "rewards/rejected": -7.556250095367432, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 25.278258079670092, |
| "learning_rate": 3.1599999999999997e-07, |
| "logits/chosen": 0.17365722358226776, |
| "logits/rejected": 0.2812866270542145, |
| "logps/chosen": -328.8999938964844, |
| "logps/rejected": -457.0, |
| "loss": 0.0121, |
| "nll_loss": 1.033203125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.478515625, |
| "rewards/margins": 7.271874904632568, |
| "rewards/rejected": -8.743749618530273, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 8.036414090919388, |
| "learning_rate": 3.5599999999999996e-07, |
| "logits/chosen": 0.3252929747104645, |
| "logits/rejected": 0.3529296815395355, |
| "logps/chosen": -282.1000061035156, |
| "logps/rejected": -486.0, |
| "loss": 0.0349, |
| "nll_loss": 1.041406273841858, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -2.042187452316284, |
| "rewards/margins": 8.703125, |
| "rewards/rejected": -10.743749618530273, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.06277898862179868, |
| "learning_rate": 3.96e-07, |
| "logits/chosen": 0.11843261867761612, |
| "logits/rejected": 0.28974610567092896, |
| "logps/chosen": -338.29998779296875, |
| "logps/rejected": -518.5999755859375, |
| "loss": 0.0113, |
| "nll_loss": 1.068750023841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.958593726158142, |
| "rewards/margins": 10.34375, |
| "rewards/rejected": -12.306249618530273, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 186.15959679077883, |
| "learning_rate": 4.36e-07, |
| "logits/chosen": 0.16660156846046448, |
| "logits/rejected": 0.23691406846046448, |
| "logps/chosen": -336.79998779296875, |
| "logps/rejected": -498.3999938964844, |
| "loss": 0.0368, |
| "nll_loss": 1.010156273841858, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.2421875, |
| "rewards/margins": 10.175000190734863, |
| "rewards/rejected": -12.431249618530273, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.030662736090751, |
| "learning_rate": 4.76e-07, |
| "logits/chosen": 0.3314208984375, |
| "logits/rejected": 0.39873045682907104, |
| "logps/chosen": -291.20001220703125, |
| "logps/rejected": -559.7999877929688, |
| "loss": 0.0055, |
| "nll_loss": 0.977734386920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.3921875953674316, |
| "rewards/margins": 13.274999618530273, |
| "rewards/rejected": -15.681249618530273, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 10.790384157037435, |
| "learning_rate": 4.982222222222223e-07, |
| "logits/chosen": 0.33642578125, |
| "logits/rejected": 0.3980468809604645, |
| "logps/chosen": -316.8999938964844, |
| "logps/rejected": -563.7999877929688, |
| "loss": 0.0057, |
| "nll_loss": 1.100000023841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.5250000953674316, |
| "rewards/margins": 14.125, |
| "rewards/rejected": -16.65625, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 6.132805095404835, |
| "learning_rate": 4.937777777777777e-07, |
| "logits/chosen": 0.42326658964157104, |
| "logits/rejected": 0.41484373807907104, |
| "logps/chosen": -286.6000061035156, |
| "logps/rejected": -567.0, |
| "loss": 0.0025, |
| "nll_loss": 1.1179687976837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.112499952316284, |
| "rewards/margins": 14.568750381469727, |
| "rewards/rejected": -17.6875, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.021958637023240073, |
| "learning_rate": 4.893333333333333e-07, |
| "logits/chosen": 0.45771485567092896, |
| "logits/rejected": 0.517138659954071, |
| "logps/chosen": -292.1000061035156, |
| "logps/rejected": -587.5999755859375, |
| "loss": 0.0195, |
| "nll_loss": 1.0183594226837158, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.3921875953674316, |
| "rewards/margins": 15.631250381469727, |
| "rewards/rejected": -19.018749237060547, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.13947826744502106, |
| "learning_rate": 4.848888888888888e-07, |
| "logits/chosen": 0.2855468690395355, |
| "logits/rejected": 0.37548828125, |
| "logps/chosen": -297.95001220703125, |
| "logps/rejected": -582.4000244140625, |
| "loss": 0.0012, |
| "nll_loss": 1.080078125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.78515625, |
| "rewards/margins": 15.831250190734863, |
| "rewards/rejected": -18.618749618530273, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.16318898226871553, |
| "learning_rate": 4.804444444444444e-07, |
| "logits/chosen": 0.34228515625, |
| "logits/rejected": 0.42265623807907104, |
| "logps/chosen": -280.8999938964844, |
| "logps/rejected": -588.0, |
| "loss": 0.0029, |
| "nll_loss": 1.0382812023162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.028125047683716, |
| "rewards/margins": 16.481250762939453, |
| "rewards/rejected": -19.512500762939453, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 16.913628536041927, |
| "learning_rate": 4.76e-07, |
| "logits/chosen": 0.29877930879592896, |
| "logits/rejected": 0.38134765625, |
| "logps/chosen": -337.3999938964844, |
| "logps/rejected": -592.4000244140625, |
| "loss": 0.0117, |
| "nll_loss": 1.0945312976837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.65234375, |
| "rewards/margins": 17.700000762939453, |
| "rewards/rejected": -20.362499237060547, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 9.054585013706896, |
| "learning_rate": 4.7155555555555556e-07, |
| "logits/chosen": 0.4610839784145355, |
| "logits/rejected": 0.5546875, |
| "logps/chosen": -301.6000061035156, |
| "logps/rejected": -574.4000244140625, |
| "loss": 0.0198, |
| "nll_loss": 1.0695312023162842, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.5296874046325684, |
| "rewards/margins": 16.774999618530273, |
| "rewards/rejected": -19.318750381469727, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.025795673574277502, |
| "learning_rate": 4.6711111111111104e-07, |
| "logits/chosen": 0.42558592557907104, |
| "logits/rejected": 0.5215820074081421, |
| "logps/chosen": -290.5, |
| "logps/rejected": -604.4000244140625, |
| "loss": 0.0011, |
| "nll_loss": 1.0128905773162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.750781297683716, |
| "rewards/margins": 18.137500762939453, |
| "rewards/rejected": -20.887500762939453, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.049230968184915055, |
| "learning_rate": 4.6266666666666663e-07, |
| "logits/chosen": 0.4349609315395355, |
| "logits/rejected": 0.5816406011581421, |
| "logps/chosen": -299.20001220703125, |
| "logps/rejected": -581.4000244140625, |
| "loss": 0.0012, |
| "nll_loss": 1.058984398841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.5531249046325684, |
| "rewards/margins": 17.506250381469727, |
| "rewards/rejected": -20.075000762939453, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.12051591908423682, |
| "learning_rate": 4.5822222222222216e-07, |
| "logits/chosen": 0.33723145723342896, |
| "logits/rejected": 0.4976562559604645, |
| "logps/chosen": -331.29998779296875, |
| "logps/rejected": -598.7999877929688, |
| "loss": 0.0014, |
| "nll_loss": 1.0636718273162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.875, |
| "rewards/margins": 17.549999237060547, |
| "rewards/rejected": -20.412500381469727, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.02082872725280439, |
| "learning_rate": 4.5377777777777775e-07, |
| "logits/chosen": 0.44482421875, |
| "logits/rejected": 0.587109386920929, |
| "logps/chosen": -266.6000061035156, |
| "logps/rejected": -608.0, |
| "loss": 0.0113, |
| "nll_loss": 0.9273437261581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.444531202316284, |
| "rewards/margins": 18.037500381469727, |
| "rewards/rejected": -20.487499237060547, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.2819745634114876, |
| "learning_rate": 4.493333333333333e-07, |
| "logits/chosen": 0.3896484375, |
| "logits/rejected": 0.533886730670929, |
| "logps/chosen": -330.5, |
| "logps/rejected": -569.7999877929688, |
| "loss": 0.0097, |
| "nll_loss": 0.998828113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.526562452316284, |
| "rewards/margins": 17.625, |
| "rewards/rejected": -20.162500381469727, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.6312660403152253, |
| "learning_rate": 4.4488888888888887e-07, |
| "logits/chosen": 0.39438170194625854, |
| "logits/rejected": 0.45518797636032104, |
| "logps/chosen": -317.6000061035156, |
| "logps/rejected": -540.5999755859375, |
| "loss": 0.0351, |
| "nll_loss": 1.03515625, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.47265625, |
| "rewards/margins": 15.524999618530273, |
| "rewards/rejected": -18.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.013098055970262888, |
| "learning_rate": 4.4044444444444445e-07, |
| "logits/chosen": 0.3513244688510895, |
| "logits/rejected": 0.47832030057907104, |
| "logps/chosen": -316.3999938964844, |
| "logps/rejected": -599.5999755859375, |
| "loss": 0.0237, |
| "nll_loss": 1.089453101158142, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.32421875, |
| "rewards/margins": 17.71875, |
| "rewards/rejected": -20.075000762939453, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.02660915986368852, |
| "learning_rate": 4.36e-07, |
| "logits/chosen": 0.4693359434604645, |
| "logits/rejected": 0.5892578363418579, |
| "logps/chosen": -300.79998779296875, |
| "logps/rejected": -594.2000122070312, |
| "loss": 0.0108, |
| "nll_loss": 1.070703148841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.1343750953674316, |
| "rewards/margins": 17.587499618530273, |
| "rewards/rejected": -20.737499237060547, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.026490947430251415, |
| "learning_rate": 4.3155555555555557e-07, |
| "logits/chosen": 0.40800780057907104, |
| "logits/rejected": 0.58984375, |
| "logps/chosen": -319.79998779296875, |
| "logps/rejected": -617.4000244140625, |
| "loss": 0.0016, |
| "nll_loss": 1.0261719226837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.5062499046325684, |
| "rewards/margins": 19.556249618530273, |
| "rewards/rejected": -22.087499618530273, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.026710360631259922, |
| "learning_rate": 4.271111111111111e-07, |
| "logits/chosen": 0.554211437702179, |
| "logits/rejected": 0.658398449420929, |
| "logps/chosen": -281.5, |
| "logps/rejected": -627.2000122070312, |
| "loss": 0.0065, |
| "nll_loss": 0.9703124761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4585938453674316, |
| "rewards/margins": 21.037500381469727, |
| "rewards/rejected": -23.5, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.0629472840725987, |
| "learning_rate": 4.226666666666667e-07, |
| "logits/chosen": 0.492919921875, |
| "logits/rejected": 0.6646484136581421, |
| "logps/chosen": -280.3999938964844, |
| "logps/rejected": -636.4000244140625, |
| "loss": 0.0011, |
| "nll_loss": 1.0402343273162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.484375, |
| "rewards/margins": 21.662500381469727, |
| "rewards/rejected": -24.125, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.015481044961707728, |
| "learning_rate": 4.1822222222222217e-07, |
| "logits/chosen": 0.518505871295929, |
| "logits/rejected": 0.6767578125, |
| "logps/chosen": -301.6000061035156, |
| "logps/rejected": -672.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 1.0242187976837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.29296875, |
| "rewards/margins": 22.825000762939453, |
| "rewards/rejected": -25.087499618530273, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.011465936087268223, |
| "learning_rate": 4.1377777777777776e-07, |
| "logits/chosen": 0.39887696504592896, |
| "logits/rejected": 0.503710925579071, |
| "logps/chosen": -398.6000061035156, |
| "logps/rejected": -589.7999877929688, |
| "loss": 0.0023, |
| "nll_loss": 1.108984351158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.241406202316284, |
| "rewards/margins": 18.181249618530273, |
| "rewards/rejected": -20.412500381469727, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.009443129326879707, |
| "learning_rate": 4.093333333333333e-07, |
| "logits/chosen": 0.42631834745407104, |
| "logits/rejected": 0.555468738079071, |
| "logps/chosen": -311.70001220703125, |
| "logps/rejected": -587.2000122070312, |
| "loss": 0.0022, |
| "nll_loss": 0.985546886920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.837499976158142, |
| "rewards/margins": 18.837499618530273, |
| "rewards/rejected": -20.649999618530273, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.0101159188120434, |
| "learning_rate": 4.048888888888889e-07, |
| "logits/chosen": 0.3521057069301605, |
| "logits/rejected": 0.47089844942092896, |
| "logps/chosen": -259.6000061035156, |
| "logps/rejected": -614.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 0.975390613079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9140625, |
| "rewards/margins": 20.387500762939453, |
| "rewards/rejected": -22.318750381469727, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.01605643719718742, |
| "learning_rate": 4.004444444444444e-07, |
| "logits/chosen": 0.3182617127895355, |
| "logits/rejected": 0.4351562559604645, |
| "logps/chosen": -269.79998779296875, |
| "logps/rejected": -607.2000122070312, |
| "loss": 0.0078, |
| "nll_loss": 0.967968761920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5499999523162842, |
| "rewards/margins": 20.274999618530273, |
| "rewards/rejected": -21.799999237060547, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.01795986015889223, |
| "learning_rate": 3.96e-07, |
| "logits/chosen": 0.4128173887729645, |
| "logits/rejected": 0.5787109136581421, |
| "logps/chosen": -280.20001220703125, |
| "logps/rejected": -583.5999755859375, |
| "loss": 0.0054, |
| "nll_loss": 1.0378906726837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6953125, |
| "rewards/margins": 19.5, |
| "rewards/rejected": -21.212499618530273, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.032900120617305545, |
| "learning_rate": 3.9155555555555553e-07, |
| "logits/chosen": 0.3366943299770355, |
| "logits/rejected": 0.56396484375, |
| "logps/chosen": -314.6000061035156, |
| "logps/rejected": -623.2000122070312, |
| "loss": 0.0012, |
| "nll_loss": 1.031640648841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.967187523841858, |
| "rewards/margins": 22.0, |
| "rewards/rejected": -23.962499618530273, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.03368133834851561, |
| "learning_rate": 3.871111111111111e-07, |
| "logits/chosen": 0.443115234375, |
| "logits/rejected": 0.6058593988418579, |
| "logps/chosen": -303.5, |
| "logps/rejected": -611.2000122070312, |
| "loss": 0.0033, |
| "nll_loss": 1.0988280773162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.264843702316284, |
| "rewards/margins": 20.337499618530273, |
| "rewards/rejected": -22.612499237060547, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 8.91531946245202, |
| "learning_rate": 3.8266666666666665e-07, |
| "logits/chosen": 0.37250977754592896, |
| "logits/rejected": 0.570019543170929, |
| "logps/chosen": -355.79998779296875, |
| "logps/rejected": -587.0, |
| "loss": 0.0075, |
| "nll_loss": 1.019140601158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7722656726837158, |
| "rewards/margins": 18.850000381469727, |
| "rewards/rejected": -20.612499237060547, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.014188316050120144, |
| "learning_rate": 3.7822222222222224e-07, |
| "logits/chosen": 0.28227537870407104, |
| "logits/rejected": 0.44189453125, |
| "logps/chosen": -302.5, |
| "logps/rejected": -622.2000122070312, |
| "loss": 0.0018, |
| "nll_loss": 1.0828125476837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.759374976158142, |
| "rewards/margins": 20.856250762939453, |
| "rewards/rejected": -22.625, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.01629482028046429, |
| "learning_rate": 3.7377777777777777e-07, |
| "logits/chosen": 0.4126953184604645, |
| "logits/rejected": 0.501953125, |
| "logps/chosen": -356.8999938964844, |
| "logps/rejected": -628.0, |
| "loss": 0.001, |
| "nll_loss": 1.040624976158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.0132813453674316, |
| "rewards/margins": 21.137500762939453, |
| "rewards/rejected": -23.149999618530273, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.017142773967819217, |
| "learning_rate": 3.693333333333333e-07, |
| "logits/chosen": 0.3507751524448395, |
| "logits/rejected": 0.4869628846645355, |
| "logps/chosen": -333.95001220703125, |
| "logps/rejected": -605.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 0.9984375238418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.606054663658142, |
| "rewards/margins": 20.237499237060547, |
| "rewards/rejected": -21.862499237060547, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.18744587283856406, |
| "learning_rate": 3.6488888888888884e-07, |
| "logits/chosen": 0.4670043885707855, |
| "logits/rejected": 0.5835937261581421, |
| "logps/chosen": -272.5, |
| "logps/rejected": -609.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 0.9847656488418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.394140601158142, |
| "rewards/margins": 20.149999618530273, |
| "rewards/rejected": -21.5625, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.7115134446778305, |
| "learning_rate": 3.604444444444444e-07, |
| "logits/chosen": 0.32639771699905396, |
| "logits/rejected": 0.49003905057907104, |
| "logps/chosen": -271.1000061035156, |
| "logps/rejected": -617.0, |
| "loss": 0.001, |
| "nll_loss": 0.901562511920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.182031273841858, |
| "rewards/margins": 21.112499237060547, |
| "rewards/rejected": -22.3125, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.08581115615312221, |
| "learning_rate": 3.5599999999999996e-07, |
| "logits/chosen": 0.4197753965854645, |
| "logits/rejected": 0.5601562261581421, |
| "logps/chosen": -299.20001220703125, |
| "logps/rejected": -558.4000244140625, |
| "loss": 0.0051, |
| "nll_loss": 0.979296863079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.885546863079071, |
| "rewards/margins": 18.625, |
| "rewards/rejected": -19.512500762939453, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.050537690816069084, |
| "learning_rate": 3.5155555555555554e-07, |
| "logits/chosen": 0.36616212129592896, |
| "logits/rejected": 0.5220702886581421, |
| "logps/chosen": -293.20001220703125, |
| "logps/rejected": -601.4000244140625, |
| "loss": 0.0011, |
| "nll_loss": 1.007421851158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.21875, |
| "rewards/margins": 20.475000381469727, |
| "rewards/rejected": -21.6875, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.02175945703204624, |
| "learning_rate": 3.471111111111111e-07, |
| "logits/chosen": 0.4150390625, |
| "logits/rejected": 0.5416015386581421, |
| "logps/chosen": -276.8999938964844, |
| "logps/rejected": -617.7999877929688, |
| "loss": 0.0011, |
| "nll_loss": 1.1179687976837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5373046398162842, |
| "rewards/margins": 20.962499618530273, |
| "rewards/rejected": -22.5, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.026063826437843437, |
| "learning_rate": 3.4266666666666666e-07, |
| "logits/chosen": 0.45039063692092896, |
| "logits/rejected": 0.612500011920929, |
| "logps/chosen": -272.3999938964844, |
| "logps/rejected": -599.7999877929688, |
| "loss": 0.0011, |
| "nll_loss": 0.9125000238418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2761719226837158, |
| "rewards/margins": 21.975000381469727, |
| "rewards/rejected": -23.25, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.014336581093924161, |
| "learning_rate": 3.382222222222222e-07, |
| "logits/chosen": 0.38768309354782104, |
| "logits/rejected": 0.51953125, |
| "logps/chosen": -373.20001220703125, |
| "logps/rejected": -591.5999755859375, |
| "loss": 0.0011, |
| "nll_loss": 1.019921898841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.96875, |
| "rewards/margins": 20.899999618530273, |
| "rewards/rejected": -22.862499237060547, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.015681965151029324, |
| "learning_rate": 3.337777777777778e-07, |
| "logits/chosen": 0.28288573026657104, |
| "logits/rejected": 0.4932617247104645, |
| "logps/chosen": -301.70001220703125, |
| "logps/rejected": -696.0, |
| "loss": 0.0011, |
| "nll_loss": 1.058203101158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.958593726158142, |
| "rewards/margins": 25.9375, |
| "rewards/rejected": -27.912500381469727, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.029148974279792687, |
| "learning_rate": 3.293333333333333e-07, |
| "logits/chosen": 0.31782227754592896, |
| "logits/rejected": 0.4458984434604645, |
| "logps/chosen": -302.6000061035156, |
| "logps/rejected": -636.0, |
| "loss": 0.0136, |
| "nll_loss": 0.9886718988418579, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -1.5339844226837158, |
| "rewards/margins": 22.774999618530273, |
| "rewards/rejected": -24.287500381469727, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.019209526445581045, |
| "learning_rate": 3.248888888888889e-07, |
| "logits/chosen": 0.3396972715854645, |
| "logits/rejected": 0.4786132872104645, |
| "logps/chosen": -296.29998779296875, |
| "logps/rejected": -648.0, |
| "loss": 0.0011, |
| "nll_loss": 1.05078125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.097070336341858, |
| "rewards/margins": 23.450000762939453, |
| "rewards/rejected": -24.575000762939453, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.016375124643676898, |
| "learning_rate": 3.204444444444444e-07, |
| "logits/chosen": 0.2938476502895355, |
| "logits/rejected": 0.45917969942092896, |
| "logps/chosen": -328.5, |
| "logps/rejected": -683.2000122070312, |
| "loss": 0.0011, |
| "nll_loss": 1.078515648841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.935742199420929, |
| "rewards/margins": 24.362499237060547, |
| "rewards/rejected": -25.274999618530273, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.03023805422018604, |
| "learning_rate": 3.1599999999999997e-07, |
| "logits/chosen": 0.3899902403354645, |
| "logits/rejected": 0.4580078125, |
| "logps/chosen": -258.04998779296875, |
| "logps/rejected": -600.0, |
| "loss": 0.0009, |
| "nll_loss": 0.878125011920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.675488293170929, |
| "rewards/margins": 21.325000762939453, |
| "rewards/rejected": -22.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.01610635441474965, |
| "learning_rate": 3.115555555555555e-07, |
| "logits/chosen": 0.3402954041957855, |
| "logits/rejected": 0.46113282442092896, |
| "logps/chosen": -274.29998779296875, |
| "logps/rejected": -666.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 0.967968761920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11225585639476776, |
| "rewards/margins": 23.512500762939453, |
| "rewards/rejected": -23.625, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.09940501062248701, |
| "learning_rate": 3.071111111111111e-07, |
| "logits/chosen": 0.13032226264476776, |
| "logits/rejected": 0.25639647245407104, |
| "logps/chosen": -349.79998779296875, |
| "logps/rejected": -608.5999755859375, |
| "loss": 0.0056, |
| "nll_loss": 1.128515601158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.763476550579071, |
| "rewards/margins": 20.899999618530273, |
| "rewards/rejected": -21.637500762939453, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.015044416279848708, |
| "learning_rate": 3.026666666666666e-07, |
| "logits/chosen": 0.2685302793979645, |
| "logits/rejected": 0.44746094942092896, |
| "logps/chosen": -277.29998779296875, |
| "logps/rejected": -619.2000122070312, |
| "loss": 0.0032, |
| "nll_loss": 0.977343738079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5107055902481079, |
| "rewards/margins": 22.412500381469727, |
| "rewards/rejected": -22.912500381469727, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.010492314365964279, |
| "learning_rate": 2.982222222222222e-07, |
| "logits/chosen": 0.24605712294578552, |
| "logits/rejected": 0.38178712129592896, |
| "logps/chosen": -282.45001220703125, |
| "logps/rejected": -596.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 0.9585937261581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3153625428676605, |
| "rewards/margins": 21.8125, |
| "rewards/rejected": -22.125, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.011244630233026583, |
| "learning_rate": 2.937777777777778e-07, |
| "logits/chosen": 0.185791015625, |
| "logits/rejected": 0.36054688692092896, |
| "logps/chosen": -298.29998779296875, |
| "logps/rejected": -561.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 0.9632812738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.29365235567092896, |
| "rewards/margins": 20.21875, |
| "rewards/rejected": -20.512500762939453, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.014891293094420017, |
| "learning_rate": 2.8933333333333333e-07, |
| "logits/chosen": 0.3611816465854645, |
| "logits/rejected": 0.46757811307907104, |
| "logps/chosen": -321.70001220703125, |
| "logps/rejected": -618.0, |
| "loss": 0.0012, |
| "nll_loss": 1.0792968273162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0291016101837158, |
| "rewards/margins": 22.037500381469727, |
| "rewards/rejected": -23.049999237060547, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.016715039332789686, |
| "learning_rate": 2.848888888888889e-07, |
| "logits/chosen": 0.3804687559604645, |
| "logits/rejected": 0.546875, |
| "logps/chosen": -266.79998779296875, |
| "logps/rejected": -629.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.9234374761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6703125238418579, |
| "rewards/margins": 22.493749618530273, |
| "rewards/rejected": -23.149999618530273, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.007446183758849815, |
| "learning_rate": 2.8044444444444445e-07, |
| "logits/chosen": 0.38258057832717896, |
| "logits/rejected": 0.46435546875, |
| "logps/chosen": -264.04998779296875, |
| "logps/rejected": -692.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 0.966015636920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.674023449420929, |
| "rewards/margins": 25.799999237060547, |
| "rewards/rejected": -26.475000381469727, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.06960779594217158, |
| "learning_rate": 2.7600000000000004e-07, |
| "logits/chosen": 0.22910156846046448, |
| "logits/rejected": 0.3974609375, |
| "logps/chosen": -257.70001220703125, |
| "logps/rejected": -636.4000244140625, |
| "loss": 0.0136, |
| "nll_loss": 0.969531238079071, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -1.108984351158142, |
| "rewards/margins": 23.912500381469727, |
| "rewards/rejected": -25.012500762939453, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.018519025389240995, |
| "learning_rate": 2.715555555555555e-07, |
| "logits/chosen": 0.5155273675918579, |
| "logits/rejected": 0.7171875238418579, |
| "logps/chosen": -312.0, |
| "logps/rejected": -652.0, |
| "loss": 0.0041, |
| "nll_loss": 0.944531261920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.06640625, |
| "rewards/margins": 25.774999618530273, |
| "rewards/rejected": -26.850000381469727, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.02373063016084682, |
| "learning_rate": 2.671111111111111e-07, |
| "logits/chosen": 0.4817748963832855, |
| "logits/rejected": 0.6402343511581421, |
| "logps/chosen": -285.20001220703125, |
| "logps/rejected": -635.0, |
| "loss": 0.0023, |
| "nll_loss": 1.037500023841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.0257811546325684, |
| "rewards/margins": 25.0, |
| "rewards/rejected": -27.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.04312745328739261, |
| "learning_rate": 2.6266666666666664e-07, |
| "logits/chosen": 0.45927733182907104, |
| "logits/rejected": 0.6669921875, |
| "logps/chosen": -315.5, |
| "logps/rejected": -683.5999755859375, |
| "loss": 0.0011, |
| "nll_loss": 1.0671875476837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6691405773162842, |
| "rewards/margins": 27.862499237060547, |
| "rewards/rejected": -29.524999618530273, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 8.406219083200051, |
| "learning_rate": 2.582222222222222e-07, |
| "logits/chosen": 0.51904296875, |
| "logits/rejected": 0.666015625, |
| "logps/chosen": -280.0, |
| "logps/rejected": -706.7999877929688, |
| "loss": 0.0013, |
| "nll_loss": 1.0515625476837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.578515648841858, |
| "rewards/margins": 28.737499237060547, |
| "rewards/rejected": -30.299999237060547, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.00797165057348372, |
| "learning_rate": 2.5377777777777776e-07, |
| "logits/chosen": 0.425048828125, |
| "logits/rejected": 0.6025390625, |
| "logps/chosen": -304.20001220703125, |
| "logps/rejected": -616.2000122070312, |
| "loss": 0.0073, |
| "nll_loss": 0.931640625, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0339844226837158, |
| "rewards/margins": 23.674999237060547, |
| "rewards/rejected": -24.6875, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.02117016630770859, |
| "learning_rate": 2.493333333333333e-07, |
| "logits/chosen": 0.43408203125, |
| "logits/rejected": 0.5884765386581421, |
| "logps/chosen": -271.20001220703125, |
| "logps/rejected": -651.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 1.0207030773162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.912792980670929, |
| "rewards/margins": 25.412500381469727, |
| "rewards/rejected": -26.325000762939453, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.05013906609321948, |
| "learning_rate": 2.448888888888889e-07, |
| "logits/chosen": 0.5001465082168579, |
| "logits/rejected": 0.587890625, |
| "logps/chosen": -291.0, |
| "logps/rejected": -642.5999755859375, |
| "loss": 0.0011, |
| "nll_loss": 1.056249976158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5542968511581421, |
| "rewards/margins": 24.600000381469727, |
| "rewards/rejected": -25.149999618530273, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.010508494344882753, |
| "learning_rate": 2.404444444444444e-07, |
| "logits/chosen": 0.4154296815395355, |
| "logits/rejected": 0.53466796875, |
| "logps/chosen": -281.8999938964844, |
| "logps/rejected": -616.5999755859375, |
| "loss": 0.001, |
| "nll_loss": 1.004296898841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6047607660293579, |
| "rewards/margins": 23.875, |
| "rewards/rejected": -24.487499237060547, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.02085599761375334, |
| "learning_rate": 2.3599999999999997e-07, |
| "logits/chosen": 0.40234375, |
| "logits/rejected": 0.593945324420929, |
| "logps/chosen": -295.20001220703125, |
| "logps/rejected": -631.2000122070312, |
| "loss": 0.0011, |
| "nll_loss": 1.082421898841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5250488519668579, |
| "rewards/margins": 23.5625, |
| "rewards/rejected": -24.112499237060547, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.022872642095837056, |
| "learning_rate": 2.3155555555555553e-07, |
| "logits/chosen": 0.3960937559604645, |
| "logits/rejected": 0.5274413824081421, |
| "logps/chosen": -269.79998779296875, |
| "logps/rejected": -597.2000122070312, |
| "loss": 0.0009, |
| "nll_loss": 0.9195312261581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.25908201932907104, |
| "rewards/margins": 22.393749237060547, |
| "rewards/rejected": -22.643749237060547, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.012548738999103248, |
| "learning_rate": 2.2711111111111112e-07, |
| "logits/chosen": 0.3612304627895355, |
| "logits/rejected": 0.47871094942092896, |
| "logps/chosen": -264.20001220703125, |
| "logps/rejected": -630.0, |
| "loss": 0.0009, |
| "nll_loss": 0.899609386920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.07817383110523224, |
| "rewards/margins": 23.512500762939453, |
| "rewards/rejected": -23.649999618530273, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.009873598939126866, |
| "learning_rate": 2.2266666666666668e-07, |
| "logits/chosen": 0.3773437440395355, |
| "logits/rejected": 0.5074218511581421, |
| "logps/chosen": -303.3999938964844, |
| "logps/rejected": -563.2000122070312, |
| "loss": 0.0009, |
| "nll_loss": 0.873828113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1182861328125, |
| "rewards/margins": 20.112499237060547, |
| "rewards/rejected": -19.987499237060547, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 2.1722284009630792, |
| "learning_rate": 2.1822222222222224e-07, |
| "logits/chosen": 0.45292967557907104, |
| "logits/rejected": 0.45878905057907104, |
| "logps/chosen": -267.79998779296875, |
| "logps/rejected": -575.7999877929688, |
| "loss": 0.0013, |
| "nll_loss": 0.91796875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.02753906324505806, |
| "rewards/margins": 20.549999237060547, |
| "rewards/rejected": -20.587499618530273, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.015566357128601925, |
| "learning_rate": 2.1377777777777777e-07, |
| "logits/chosen": 0.3982177674770355, |
| "logits/rejected": 0.540234386920929, |
| "logps/chosen": -265.5, |
| "logps/rejected": -687.5999755859375, |
| "loss": 0.0096, |
| "nll_loss": 0.9742187261581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.7779296636581421, |
| "rewards/margins": 28.487499237060547, |
| "rewards/rejected": -29.274999618530273, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.0852360643714337, |
| "learning_rate": 2.0933333333333333e-07, |
| "logits/chosen": 0.3617187440395355, |
| "logits/rejected": 0.48701173067092896, |
| "logps/chosen": -265.8500061035156, |
| "logps/rejected": -620.0, |
| "loss": 0.0009, |
| "nll_loss": 0.9300781488418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07557983696460724, |
| "rewards/margins": 23.362499237060547, |
| "rewards/rejected": -23.274999618530273, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.00755420050240308, |
| "learning_rate": 2.048888888888889e-07, |
| "logits/chosen": 0.28996580839157104, |
| "logits/rejected": 0.535351574420929, |
| "logps/chosen": -256.5, |
| "logps/rejected": -633.2000122070312, |
| "loss": 0.007, |
| "nll_loss": 0.967578113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.40595704317092896, |
| "rewards/margins": 24.274999618530273, |
| "rewards/rejected": -24.700000762939453, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.01713698594594714, |
| "learning_rate": 2.0044444444444445e-07, |
| "logits/chosen": 0.24697265028953552, |
| "logits/rejected": 0.4248046875, |
| "logps/chosen": -283.8999938964844, |
| "logps/rejected": -630.0, |
| "loss": 0.001, |
| "nll_loss": 0.9644531011581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10643310844898224, |
| "rewards/margins": 23.575000762939453, |
| "rewards/rejected": -23.487499237060547, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.008040661363419143, |
| "learning_rate": 1.96e-07, |
| "logits/chosen": 0.31492918729782104, |
| "logits/rejected": 0.41838377714157104, |
| "logps/chosen": -306.79998779296875, |
| "logps/rejected": -615.2000122070312, |
| "loss": 0.0014, |
| "nll_loss": 0.9761718511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2574706971645355, |
| "rewards/margins": 22.5, |
| "rewards/rejected": -22.762500762939453, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.011050522622669116, |
| "learning_rate": 1.9155555555555554e-07, |
| "logits/chosen": 0.36284178495407104, |
| "logits/rejected": 0.5755859613418579, |
| "logps/chosen": -284.6000061035156, |
| "logps/rejected": -627.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 1.021875023841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.15849609673023224, |
| "rewards/margins": 24.737499237060547, |
| "rewards/rejected": -24.912500381469727, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 1.3243299715147776, |
| "learning_rate": 1.871111111111111e-07, |
| "logits/chosen": 0.40766602754592896, |
| "logits/rejected": 0.55078125, |
| "logps/chosen": -256.0, |
| "logps/rejected": -638.7999877929688, |
| "loss": 0.001, |
| "nll_loss": 0.9371093511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.19802245497703552, |
| "rewards/margins": 25.137500762939453, |
| "rewards/rejected": -24.962499618530273, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.14410920057877055, |
| "learning_rate": 1.8266666666666666e-07, |
| "logits/chosen": 0.37744140625, |
| "logits/rejected": 0.558398425579071, |
| "logps/chosen": -298.3999938964844, |
| "logps/rejected": -634.0, |
| "loss": 0.0016, |
| "nll_loss": 0.9925781488418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5572754144668579, |
| "rewards/margins": 24.287500381469727, |
| "rewards/rejected": -24.862499237060547, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.008316643842253967, |
| "learning_rate": 1.7822222222222222e-07, |
| "logits/chosen": 0.3272949159145355, |
| "logits/rejected": 0.524121105670929, |
| "logps/chosen": -298.6000061035156, |
| "logps/rejected": -672.7999877929688, |
| "loss": 0.0127, |
| "nll_loss": 0.9996093511581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.3311523497104645, |
| "rewards/margins": 25.774999618530273, |
| "rewards/rejected": -26.100000381469727, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.008261030762950254, |
| "learning_rate": 1.7377777777777778e-07, |
| "logits/chosen": 0.4465576112270355, |
| "logits/rejected": 0.6617187261581421, |
| "logps/chosen": -281.1000061035156, |
| "logps/rejected": -612.0, |
| "loss": 0.0011, |
| "nll_loss": 0.98046875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.07645263522863388, |
| "rewards/margins": 22.987499237060547, |
| "rewards/rejected": -23.0625, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.012271312104445061, |
| "learning_rate": 1.6933333333333334e-07, |
| "logits/chosen": 0.4715820252895355, |
| "logits/rejected": 0.6121581792831421, |
| "logps/chosen": -285.6000061035156, |
| "logps/rejected": -622.4000244140625, |
| "loss": 0.0011, |
| "nll_loss": 0.889453113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10859374701976776, |
| "rewards/margins": 24.6875, |
| "rewards/rejected": -24.587499618530273, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.008446600990786186, |
| "learning_rate": 1.6488888888888887e-07, |
| "logits/chosen": 0.4478515684604645, |
| "logits/rejected": 0.648632824420929, |
| "logps/chosen": -293.20001220703125, |
| "logps/rejected": -619.2000122070312, |
| "loss": 0.0008, |
| "nll_loss": 0.8179687261581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14560547471046448, |
| "rewards/margins": 24.024999618530273, |
| "rewards/rejected": -23.875, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.007390463100531587, |
| "learning_rate": 1.6044444444444443e-07, |
| "logits/chosen": 0.47856444120407104, |
| "logits/rejected": 0.5884765386581421, |
| "logps/chosen": -263.3999938964844, |
| "logps/rejected": -658.0, |
| "loss": 0.0009, |
| "nll_loss": 0.9476562738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11513672024011612, |
| "rewards/margins": 26.512500762939453, |
| "rewards/rejected": -26.375, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.00835958049715363, |
| "learning_rate": 1.56e-07, |
| "logits/chosen": 0.24870605766773224, |
| "logits/rejected": 0.455322265625, |
| "logps/chosen": -257.79998779296875, |
| "logps/rejected": -668.0, |
| "loss": 0.0009, |
| "nll_loss": 0.9390624761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.04535522311925888, |
| "rewards/margins": 26.987499237060547, |
| "rewards/rejected": -27.037500381469727, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.009631942998860495, |
| "learning_rate": 1.5155555555555555e-07, |
| "logits/chosen": 0.4524902403354645, |
| "logits/rejected": 0.631640613079071, |
| "logps/chosen": -226.6999969482422, |
| "logps/rejected": -674.4000244140625, |
| "loss": 0.0009, |
| "nll_loss": 0.9175781011581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06406249850988388, |
| "rewards/margins": 27.8125, |
| "rewards/rejected": -27.75, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.0089109719938143, |
| "learning_rate": 1.4711111111111111e-07, |
| "logits/chosen": 0.3174072206020355, |
| "logits/rejected": 0.40791016817092896, |
| "logps/chosen": -313.5, |
| "logps/rejected": -609.5999755859375, |
| "loss": 0.001, |
| "nll_loss": 0.943359375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3261352479457855, |
| "rewards/margins": 23.612499237060547, |
| "rewards/rejected": -23.287500381469727, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.017708759105074332, |
| "learning_rate": 1.4266666666666665e-07, |
| "logits/chosen": 0.30195313692092896, |
| "logits/rejected": 0.4756835997104645, |
| "logps/chosen": -248.89999389648438, |
| "logps/rejected": -651.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.919921875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13002929091453552, |
| "rewards/margins": 26.625, |
| "rewards/rejected": -26.487499237060547, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.02847716886316666, |
| "learning_rate": 1.382222222222222e-07, |
| "logits/chosen": 0.3302246034145355, |
| "logits/rejected": 0.5365234613418579, |
| "logps/chosen": -278.1000061035156, |
| "logps/rejected": -616.4000244140625, |
| "loss": 0.021, |
| "nll_loss": 1.017187476158142, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.10097656399011612, |
| "rewards/margins": 23.318750381469727, |
| "rewards/rejected": -23.237499237060547, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.0045187999902578015, |
| "learning_rate": 1.3377777777777777e-07, |
| "logits/chosen": 0.31437987089157104, |
| "logits/rejected": 0.5342773199081421, |
| "logps/chosen": -319.70001220703125, |
| "logps/rejected": -623.0, |
| "loss": 0.001, |
| "nll_loss": 0.9664062261581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3951171934604645, |
| "rewards/margins": 24.412500381469727, |
| "rewards/rejected": -24.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.07958922138660834, |
| "learning_rate": 1.2933333333333333e-07, |
| "logits/chosen": 0.3418945372104645, |
| "logits/rejected": 0.6171875, |
| "logps/chosen": -278.20001220703125, |
| "logps/rejected": -639.2000122070312, |
| "loss": 0.0009, |
| "nll_loss": 0.899609386920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.687060534954071, |
| "rewards/margins": 24.649999618530273, |
| "rewards/rejected": -24.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.008878025257232514, |
| "learning_rate": 1.2488888888888889e-07, |
| "logits/chosen": 0.36860352754592896, |
| "logits/rejected": 0.5000976324081421, |
| "logps/chosen": -252.1999969482422, |
| "logps/rejected": -642.4000244140625, |
| "loss": 0.0008, |
| "nll_loss": 0.837890625, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6498047113418579, |
| "rewards/margins": 24.712499618530273, |
| "rewards/rejected": -24.075000762939453, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.00886428654417825, |
| "learning_rate": 1.2044444444444445e-07, |
| "logits/chosen": 0.3182617127895355, |
| "logits/rejected": 0.526562511920929, |
| "logps/chosen": -296.20001220703125, |
| "logps/rejected": -644.4000244140625, |
| "loss": 0.0009, |
| "nll_loss": 0.914843738079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.883837878704071, |
| "rewards/margins": 24.725000381469727, |
| "rewards/rejected": -23.837499618530273, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.010048774551857776, |
| "learning_rate": 1.16e-07, |
| "logits/chosen": 0.20156249403953552, |
| "logits/rejected": 0.45976561307907104, |
| "logps/chosen": -333.5, |
| "logps/rejected": -593.0, |
| "loss": 0.0011, |
| "nll_loss": 1.0207030773162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.47138673067092896, |
| "rewards/margins": 22.174999237060547, |
| "rewards/rejected": -21.6875, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 5.74291381581626, |
| "learning_rate": 1.1155555555555555e-07, |
| "logits/chosen": 0.3219238221645355, |
| "logits/rejected": 0.49858397245407104, |
| "logps/chosen": -278.75, |
| "logps/rejected": -644.2000122070312, |
| "loss": 0.0019, |
| "nll_loss": 1.019140601158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7679687738418579, |
| "rewards/margins": 25.0, |
| "rewards/rejected": -24.200000762939453, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.02192293336950942, |
| "learning_rate": 1.0711111111111111e-07, |
| "logits/chosen": 0.569140613079071, |
| "logits/rejected": 0.702343761920929, |
| "logps/chosen": -254.3000030517578, |
| "logps/rejected": -685.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.868359386920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.591113269329071, |
| "rewards/margins": 27.024999618530273, |
| "rewards/rejected": -26.424999237060547, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.009300952984474926, |
| "learning_rate": 1.0266666666666666e-07, |
| "logits/chosen": 0.533111572265625, |
| "logits/rejected": 0.6361328363418579, |
| "logps/chosen": -233.25, |
| "logps/rejected": -630.4000244140625, |
| "loss": 0.0009, |
| "nll_loss": 0.8453124761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5562499761581421, |
| "rewards/margins": 24.325000762939453, |
| "rewards/rejected": -23.75, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.046196100888199323, |
| "learning_rate": 9.822222222222222e-08, |
| "logits/chosen": 0.42668455839157104, |
| "logits/rejected": 0.6005859375, |
| "logps/chosen": -274.5, |
| "logps/rejected": -626.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 0.940625011920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.694628894329071, |
| "rewards/margins": 24.924999237060547, |
| "rewards/rejected": -24.25, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.005292906779736119, |
| "learning_rate": 9.377777777777778e-08, |
| "logits/chosen": 0.4056640565395355, |
| "logits/rejected": 0.615234375, |
| "logps/chosen": -284.70001220703125, |
| "logps/rejected": -645.5999755859375, |
| "loss": 0.0041, |
| "nll_loss": 0.9710937738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.47343748807907104, |
| "rewards/margins": 25.4375, |
| "rewards/rejected": -24.962499618530273, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.07957225270899694, |
| "learning_rate": 8.933333333333333e-08, |
| "logits/chosen": 0.4527343809604645, |
| "logits/rejected": 0.659960925579071, |
| "logps/chosen": -297.8999938964844, |
| "logps/rejected": -642.2000122070312, |
| "loss": 0.0011, |
| "nll_loss": 1.062890648841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14885254204273224, |
| "rewards/margins": 25.674999237060547, |
| "rewards/rejected": -25.549999237060547, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.02087149426239816, |
| "learning_rate": 8.488888888888889e-08, |
| "logits/chosen": 0.45966798067092896, |
| "logits/rejected": 0.640820324420929, |
| "logps/chosen": -267.29998779296875, |
| "logps/rejected": -657.5999755859375, |
| "loss": 0.0048, |
| "nll_loss": 0.8785156011581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.26695555448532104, |
| "rewards/margins": 27.325000762939453, |
| "rewards/rejected": -27.0625, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.013941417548488667, |
| "learning_rate": 8.044444444444445e-08, |
| "logits/chosen": 0.39580076932907104, |
| "logits/rejected": 0.616015613079071, |
| "logps/chosen": -279.3999938964844, |
| "logps/rejected": -689.5999755859375, |
| "loss": 0.001, |
| "nll_loss": 0.974609375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3509277403354645, |
| "rewards/margins": 29.162500381469727, |
| "rewards/rejected": -28.825000762939453, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.05356592295983735, |
| "learning_rate": 7.599999999999999e-08, |
| "logits/chosen": 0.35834962129592896, |
| "logits/rejected": 0.5601562261581421, |
| "logps/chosen": -264.6000061035156, |
| "logps/rejected": -654.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 0.9996093511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.139892578125, |
| "rewards/margins": 26.475000381469727, |
| "rewards/rejected": -26.337499618530273, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.04051423211552107, |
| "learning_rate": 7.155555555555555e-08, |
| "logits/chosen": 0.4306640625, |
| "logits/rejected": 0.589648425579071, |
| "logps/chosen": -277.3999938964844, |
| "logps/rejected": -653.5999755859375, |
| "loss": 0.0045, |
| "nll_loss": 0.9339843988418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3447265625, |
| "rewards/margins": 26.325000762939453, |
| "rewards/rejected": -25.975000381469727, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.02104507845831199, |
| "learning_rate": 6.71111111111111e-08, |
| "logits/chosen": 0.269287109375, |
| "logits/rejected": 0.5531250238418579, |
| "logps/chosen": -333.29998779296875, |
| "logps/rejected": -603.0, |
| "loss": 0.001, |
| "nll_loss": 0.9921875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.49541014432907104, |
| "rewards/margins": 23.712499618530273, |
| "rewards/rejected": -23.212499618530273, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.026162991645433887, |
| "learning_rate": 6.266666666666666e-08, |
| "logits/chosen": 0.5933593511581421, |
| "logits/rejected": 0.702343761920929, |
| "logps/chosen": -242.4499969482422, |
| "logps/rejected": -657.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.883984386920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2967773377895355, |
| "rewards/margins": 26.862499237060547, |
| "rewards/rejected": -26.575000762939453, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.03066308474144947, |
| "learning_rate": 5.822222222222222e-08, |
| "logits/chosen": 0.4716796875, |
| "logits/rejected": 0.6839843988418579, |
| "logps/chosen": -220.10000610351562, |
| "logps/rejected": -684.7999877929688, |
| "loss": 0.0009, |
| "nll_loss": 0.9468749761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.584765613079071, |
| "rewards/margins": 28.125, |
| "rewards/rejected": -27.549999237060547, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.023039708522050593, |
| "learning_rate": 5.377777777777778e-08, |
| "logits/chosen": 0.3741699159145355, |
| "logits/rejected": 0.5889648199081421, |
| "logps/chosen": -277.29998779296875, |
| "logps/rejected": -665.7999877929688, |
| "loss": 0.0014, |
| "nll_loss": 0.9859374761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4515624940395355, |
| "rewards/margins": 26.174999237060547, |
| "rewards/rejected": -25.725000381469727, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.04174862262602521, |
| "learning_rate": 4.933333333333333e-08, |
| "logits/chosen": 0.3539062440395355, |
| "logits/rejected": 0.5293945074081421, |
| "logps/chosen": -331.8999938964844, |
| "logps/rejected": -592.0, |
| "loss": 0.0064, |
| "nll_loss": 0.887890636920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.621826171875, |
| "rewards/margins": 22.587499618530273, |
| "rewards/rejected": -21.975000381469727, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.36396437493512307, |
| "learning_rate": 4.4888888888888885e-08, |
| "logits/chosen": 0.39692384004592896, |
| "logits/rejected": 0.5400390625, |
| "logps/chosen": -262.8999938964844, |
| "logps/rejected": -643.7999877929688, |
| "loss": 0.001, |
| "nll_loss": 0.9195312261581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6341797113418579, |
| "rewards/margins": 25.431249618530273, |
| "rewards/rejected": -24.799999237060547, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.01230667079616308, |
| "learning_rate": 4.044444444444444e-08, |
| "logits/chosen": 0.29730224609375, |
| "logits/rejected": 0.5694335699081421, |
| "logps/chosen": -283.8999938964844, |
| "logps/rejected": -612.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.8515625, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.666796863079071, |
| "rewards/margins": 24.575000762939453, |
| "rewards/rejected": -23.899999618530273, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.014453975200438642, |
| "learning_rate": 3.6e-08, |
| "logits/chosen": 0.3432373106479645, |
| "logits/rejected": 0.5855468511581421, |
| "logps/chosen": -291.3999938964844, |
| "logps/rejected": -665.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 1.019921898841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6011718511581421, |
| "rewards/margins": 26.850000381469727, |
| "rewards/rejected": -26.262500762939453, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.01768958135813815, |
| "learning_rate": 3.155555555555556e-08, |
| "logits/chosen": 0.31098634004592896, |
| "logits/rejected": 0.5472656488418579, |
| "logps/chosen": -295.70001220703125, |
| "logps/rejected": -587.2000122070312, |
| "loss": 0.0055, |
| "nll_loss": 0.8902343511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17841796576976776, |
| "rewards/margins": 22.399999618530273, |
| "rewards/rejected": -22.225000381469727, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.009303330717789412, |
| "learning_rate": 2.7111111111111108e-08, |
| "logits/chosen": 0.263427734375, |
| "logits/rejected": 0.49003905057907104, |
| "logps/chosen": -262.6000061035156, |
| "logps/rejected": -649.2000122070312, |
| "loss": 0.0009, |
| "nll_loss": 0.8871093988418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3846679627895355, |
| "rewards/margins": 25.137500762939453, |
| "rewards/rejected": -24.762500762939453, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.033005829470572054, |
| "learning_rate": 2.2666666666666668e-08, |
| "logits/chosen": 0.3676391541957855, |
| "logits/rejected": 0.5830078125, |
| "logps/chosen": -295.70001220703125, |
| "logps/rejected": -625.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 0.9925781488418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7079833745956421, |
| "rewards/margins": 25.0625, |
| "rewards/rejected": -24.3125, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.040751069146410926, |
| "learning_rate": 1.822222222222222e-08, |
| "logits/chosen": 0.3670410215854645, |
| "logits/rejected": 0.5015624761581421, |
| "logps/chosen": -256.29998779296875, |
| "logps/rejected": -645.4000244140625, |
| "loss": 0.0049, |
| "nll_loss": 0.907031238079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8086913824081421, |
| "rewards/margins": 26.362499237060547, |
| "rewards/rejected": -25.549999237060547, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.28250040304556245, |
| "learning_rate": 1.3777777777777778e-08, |
| "logits/chosen": 0.431640625, |
| "logits/rejected": 0.626171886920929, |
| "logps/chosen": -267.3500061035156, |
| "logps/rejected": -647.5999755859375, |
| "loss": 0.0021, |
| "nll_loss": 0.9437500238418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.539379894733429, |
| "rewards/margins": 26.012500762939453, |
| "rewards/rejected": -25.462499618530273, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 0.039993394141365025, |
| "learning_rate": 9.333333333333334e-09, |
| "logits/chosen": 0.45268553495407104, |
| "logits/rejected": 0.6796875, |
| "logps/chosen": -277.79998779296875, |
| "logps/rejected": -639.5999755859375, |
| "loss": 0.0008, |
| "nll_loss": 0.813281238079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.44189453125, |
| "rewards/margins": 26.350000381469727, |
| "rewards/rejected": -25.924999237060547, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.010994332832260341, |
| "learning_rate": 4.888888888888888e-09, |
| "logits/chosen": 0.42723387479782104, |
| "logits/rejected": 0.5927734375, |
| "logps/chosen": -252.60000610351562, |
| "logps/rejected": -644.7999877929688, |
| "loss": 0.0012, |
| "nll_loss": 0.8550781011581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.605175793170929, |
| "rewards/margins": 25.524999618530273, |
| "rewards/rejected": -24.924999237060547, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.02065652761694791, |
| "learning_rate": 4.4444444444444443e-10, |
| "logits/chosen": 0.35319823026657104, |
| "logits/rejected": 0.5824218988418579, |
| "logps/chosen": -258.6000061035156, |
| "logps/rejected": -651.2000122070312, |
| "loss": 0.0134, |
| "nll_loss": 0.932421863079071, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.7752929925918579, |
| "rewards/margins": 26.612499237060547, |
| "rewards/rejected": -25.837499618530273, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_logits/chosen": 0.22201773524284363, |
| "eval_logits/rejected": 0.42946213483810425, |
| "eval_logps/chosen": -328.9230651855469, |
| "eval_logps/rejected": -597.076904296875, |
| "eval_loss": 0.012361373752355576, |
| "eval_nll_loss": 0.9699519276618958, |
| "eval_rewards/accuracies": 0.9903846383094788, |
| "eval_rewards/chosen": 0.4366079568862915, |
| "eval_rewards/margins": 22.413461685180664, |
| "eval_rewards/rejected": -21.975961685180664, |
| "eval_runtime": 8.634, |
| "eval_samples_per_second": 11.582, |
| "eval_steps_per_second": 1.506, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1250, |
| "total_flos": 0.0, |
| "train_loss": 0.02150259389877319, |
| "train_runtime": 2425.829, |
| "train_samples_per_second": 4.122, |
| "train_steps_per_second": 0.515 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|