| { |
| "best_global_step": null, |
| "best_metric": 0.007082384079694748, |
| "best_model_checkpoint": null, |
| "epoch": 6.07181328545781, |
| "eval_steps": 50, |
| "global_step": 850, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03590664272890485, |
| "grad_norm": 17.81623077392578, |
| "learning_rate": 4.55034563588381e-06, |
| "logits/chosen": -2.323437452316284, |
| "logits/rejected": -2.70703125, |
| "logps/chosen": -72.5999984741211, |
| "logps/rejected": -82.94999694824219, |
| "loss": 0.6863, |
| "rewards/accuracies": 0.29374998807907104, |
| "rewards/chosen": -0.008679199032485485, |
| "rewards/margins": 0.01282348670065403, |
| "rewards/rejected": -0.02150878868997097, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0718132854578097, |
| "grad_norm": 13.013919830322266, |
| "learning_rate": 1.0238277680738572e-05, |
| "logits/chosen": -2.299609422683716, |
| "logits/rejected": -2.8023438453674316, |
| "logps/chosen": -76.6624984741211, |
| "logps/rejected": -89.5250015258789, |
| "loss": 0.5385, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -0.25728148221969604, |
| "rewards/margins": 0.44196778535842896, |
| "rewards/rejected": -0.6999267339706421, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10771992818671454, |
| "grad_norm": 10.523333549499512, |
| "learning_rate": 1.5926209725593337e-05, |
| "logits/chosen": -3.29296875, |
| "logits/rejected": -3.5562500953674316, |
| "logps/chosen": -86.69999694824219, |
| "logps/rejected": -112.17500305175781, |
| "loss": 0.325, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -1.2952148914337158, |
| "rewards/margins": 1.656640648841858, |
| "rewards/rejected": -2.951953172683716, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1436265709156194, |
| "grad_norm": 8.033699035644531, |
| "learning_rate": 2.1614141770448097e-05, |
| "logits/chosen": -4.182031154632568, |
| "logits/rejected": -4.242968559265137, |
| "logps/chosen": -109.25, |
| "logps/rejected": -148.60000610351562, |
| "loss": 0.2014, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -3.4242186546325684, |
| "rewards/margins": 3.3539061546325684, |
| "rewards/rejected": -6.779687404632568, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17953321364452424, |
| "grad_norm": 32.956817626953125, |
| "learning_rate": 2.730207381530286e-05, |
| "logits/chosen": -6.021874904632568, |
| "logits/rejected": -6.440625190734863, |
| "logps/chosen": -132.1750030517578, |
| "logps/rejected": -206.5, |
| "loss": 0.1298, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -6.412499904632568, |
| "rewards/margins": 6.567187309265137, |
| "rewards/rejected": -12.984375, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.21543985637342908, |
| "grad_norm": 5.681188583374023, |
| "learning_rate": 3.2990005860157616e-05, |
| "logits/chosen": -6.560937404632568, |
| "logits/rejected": -6.815625190734863, |
| "logps/chosen": -157.5500030517578, |
| "logps/rejected": -244.64999389648438, |
| "loss": 0.0997, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -8.732812881469727, |
| "rewards/margins": 7.779687404632568, |
| "rewards/rejected": -16.496875762939453, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2513464991023339, |
| "grad_norm": 10.916585922241211, |
| "learning_rate": 3.8677937905012385e-05, |
| "logits/chosen": -5.189062595367432, |
| "logits/rejected": -5.775781154632568, |
| "logps/chosen": -156.9499969482422, |
| "logps/rejected": -237.1999969482422, |
| "loss": 0.1292, |
| "rewards/accuracies": 0.9437500238418579, |
| "rewards/chosen": -8.623437881469727, |
| "rewards/margins": 7.248437404632568, |
| "rewards/rejected": -15.871874809265137, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2872531418312388, |
| "grad_norm": 6.150071620941162, |
| "learning_rate": 4.436586994986715e-05, |
| "logits/chosen": -4.82421875, |
| "logits/rejected": -5.315625190734863, |
| "logps/chosen": -158.4250030517578, |
| "logps/rejected": -236.89999389648438, |
| "loss": 0.1002, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -8.595312118530273, |
| "rewards/margins": 7.290625095367432, |
| "rewards/rejected": -15.868749618530273, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3231597845601436, |
| "grad_norm": 5.110800743103027, |
| "learning_rate": 5.005380199472191e-05, |
| "logits/chosen": -6.574999809265137, |
| "logits/rejected": -7.1171875, |
| "logps/chosen": -172.0500030517578, |
| "logps/rejected": -269.04998779296875, |
| "loss": 0.086, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -10.365625381469727, |
| "rewards/margins": 8.489062309265137, |
| "rewards/rejected": -18.862499237060547, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3590664272890485, |
| "grad_norm": 7.762596130371094, |
| "learning_rate": 5.574173403957667e-05, |
| "logits/chosen": -6.243750095367432, |
| "logits/rejected": -6.870312690734863, |
| "logps/chosen": -145.4499969482422, |
| "logps/rejected": -242.5500030517578, |
| "loss": 0.0698, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -7.448437690734863, |
| "rewards/margins": 8.868749618530273, |
| "rewards/rejected": -16.306249618530273, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3590664272890485, |
| "eval_logits/chosen": -6.295955657958984, |
| "eval_logits/rejected": -7.025735378265381, |
| "eval_logps/chosen": -148.5294189453125, |
| "eval_logps/rejected": -254.23529052734375, |
| "eval_loss": 0.04325719177722931, |
| "eval_rewards/accuracies": 0.9816176295280457, |
| "eval_rewards/chosen": -7.556985378265381, |
| "eval_rewards/margins": 9.713234901428223, |
| "eval_rewards/rejected": -17.257352828979492, |
| "eval_runtime": 7.5302, |
| "eval_samples_per_second": 35.457, |
| "eval_steps_per_second": 2.258, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39497307001795334, |
| "grad_norm": 4.944767475128174, |
| "learning_rate": 6.142966608443144e-05, |
| "logits/chosen": -7.017187595367432, |
| "logits/rejected": -7.801562309265137, |
| "logps/chosen": -165.39999389648438, |
| "logps/rejected": -271.54998779296875, |
| "loss": 0.0805, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -9.506250381469727, |
| "rewards/margins": 9.890625, |
| "rewards/rejected": -19.396875381469727, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.43087971274685816, |
| "grad_norm": 5.226822376251221, |
| "learning_rate": 6.711759812928619e-05, |
| "logits/chosen": -7.185937404632568, |
| "logits/rejected": -8.079687118530273, |
| "logps/chosen": -187.5, |
| "logps/rejected": -307.6000061035156, |
| "loss": 0.076, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -11.8125, |
| "rewards/margins": 11.323437690734863, |
| "rewards/rejected": -23.143749237060547, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.466786355475763, |
| "grad_norm": 2.203220844268799, |
| "learning_rate": 7.280553017414096e-05, |
| "logits/chosen": -3.3968749046325684, |
| "logits/rejected": -4.358593940734863, |
| "logps/chosen": -151.75, |
| "logps/rejected": -241.5500030517578, |
| "loss": 0.1146, |
| "rewards/accuracies": 0.9593750238418579, |
| "rewards/chosen": -8.064062118530273, |
| "rewards/margins": 8.042187690734863, |
| "rewards/rejected": -16.112499237060547, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5026929982046678, |
| "grad_norm": 1.6759897470474243, |
| "learning_rate": 7.849346221899571e-05, |
| "logits/chosen": -2.987499952316284, |
| "logits/rejected": -3.8515625, |
| "logps/chosen": -123.80000305175781, |
| "logps/rejected": -217.60000610351562, |
| "loss": 0.0679, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -5.29296875, |
| "rewards/margins": 8.653124809265137, |
| "rewards/rejected": -13.934374809265137, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5385996409335727, |
| "grad_norm": 3.1203854084014893, |
| "learning_rate": 8.418139426385048e-05, |
| "logits/chosen": -9.951562881469727, |
| "logits/rejected": -10.609375, |
| "logps/chosen": -246.6999969482422, |
| "logps/rejected": -369.29998779296875, |
| "loss": 0.0558, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -17.778125762939453, |
| "rewards/margins": 11.425000190734863, |
| "rewards/rejected": -29.212499618530273, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5745062836624776, |
| "grad_norm": 1.693241834640503, |
| "learning_rate": 8.986932630870525e-05, |
| "logits/chosen": -8.556249618530273, |
| "logits/rejected": -9.748437881469727, |
| "logps/chosen": -256.1000061035156, |
| "logps/rejected": -398.79998779296875, |
| "loss": 0.1092, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -18.521875381469727, |
| "rewards/margins": 13.868749618530273, |
| "rewards/rejected": -32.38750076293945, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6104129263913824, |
| "grad_norm": 1.1957429647445679, |
| "learning_rate": 9.555725835356e-05, |
| "logits/chosen": -4.921875, |
| "logits/rejected": -6.415625095367432, |
| "logps/chosen": -206.8000030517578, |
| "logps/rejected": -348.70001220703125, |
| "loss": 0.014, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -13.831250190734863, |
| "rewards/margins": 13.28125, |
| "rewards/rejected": -27.112499237060547, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6463195691202872, |
| "grad_norm": 1.4185799360275269, |
| "learning_rate": 0.00010124519039841476, |
| "logits/chosen": -8.342187881469727, |
| "logits/rejected": -9.996874809265137, |
| "logps/chosen": -340.20001220703125, |
| "logps/rejected": -538.7000122070312, |
| "loss": 0.0447, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -27.181249618530273, |
| "rewards/margins": 19.196874618530273, |
| "rewards/rejected": -46.38750076293945, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6822262118491921, |
| "grad_norm": 1.2728252410888672, |
| "learning_rate": 0.00010693312244326953, |
| "logits/chosen": -8.214062690734863, |
| "logits/rejected": -9.143750190734863, |
| "logps/chosen": -374.8999938964844, |
| "logps/rejected": -553.9000244140625, |
| "loss": 0.1363, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -31.162500381469727, |
| "rewards/margins": 16.59375, |
| "rewards/rejected": -47.724998474121094, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.718132854578097, |
| "grad_norm": 4.153715133666992, |
| "learning_rate": 0.00011262105448812429, |
| "logits/chosen": -8.5078125, |
| "logits/rejected": -10.462499618530273, |
| "logps/chosen": -284.1000061035156, |
| "logps/rejected": -456.29998779296875, |
| "loss": 0.0684, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -21.65625, |
| "rewards/margins": 16.381250381469727, |
| "rewards/rejected": -38.01250076293945, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.718132854578097, |
| "eval_logits/chosen": -11.941176414489746, |
| "eval_logits/rejected": -14.220588684082031, |
| "eval_logps/chosen": -293.1764831542969, |
| "eval_logps/rejected": -514.4705810546875, |
| "eval_loss": 0.029092751443386078, |
| "eval_rewards/accuracies": 0.9926470518112183, |
| "eval_rewards/chosen": -22.25, |
| "eval_rewards/margins": 21.441177368164062, |
| "eval_rewards/rejected": -43.661766052246094, |
| "eval_runtime": 8.7058, |
| "eval_samples_per_second": 30.669, |
| "eval_steps_per_second": 1.953, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7540394973070018, |
| "grad_norm": 5.066140651702881, |
| "learning_rate": 0.0001137585950751329, |
| "logits/chosen": -13.131250381469727, |
| "logits/rejected": -14.824999809265137, |
| "logps/chosen": -293.25, |
| "logps/rejected": -497.3999938964844, |
| "loss": 0.0586, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -22.725000381469727, |
| "rewards/margins": 19.806249618530273, |
| "rewards/rejected": -42.5, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7899461400359067, |
| "grad_norm": 2.763026714324951, |
| "learning_rate": 0.00011375840892353745, |
| "logits/chosen": -10.834375381469727, |
| "logits/rejected": -12.490625381469727, |
| "logps/chosen": -248.6999969482422, |
| "logps/rejected": -443.8999938964844, |
| "loss": 0.0501, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -17.862499237060547, |
| "rewards/margins": 18.778125762939453, |
| "rewards/rejected": -36.63750076293945, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8258527827648114, |
| "grad_norm": 2.0490527153015137, |
| "learning_rate": 0.00011375807957890452, |
| "logits/chosen": -7.245312690734863, |
| "logits/rejected": -9.215624809265137, |
| "logps/chosen": -165.6750030517578, |
| "logps/rejected": -346.3999938964844, |
| "loss": 0.0537, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -9.360937118530273, |
| "rewards/margins": 17.399999618530273, |
| "rewards/rejected": -26.75, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8617594254937163, |
| "grad_norm": 1.383483648300171, |
| "learning_rate": 0.00011375760704206321, |
| "logits/chosen": -5.696875095367432, |
| "logits/rejected": -7.876562595367432, |
| "logps/chosen": -145.625, |
| "logps/rejected": -273.0, |
| "loss": 0.0769, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -7.3359375, |
| "rewards/margins": 12.178125381469727, |
| "rewards/rejected": -19.512500762939453, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8976660682226212, |
| "grad_norm": 1.2604446411132812, |
| "learning_rate": 0.00011375699131420316, |
| "logits/chosen": -6.131249904632568, |
| "logits/rejected": -8.760937690734863, |
| "logps/chosen": -154.0, |
| "logps/rejected": -332.79998779296875, |
| "loss": 0.0711, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -8.456250190734863, |
| "rewards/margins": 16.806249618530273, |
| "rewards/rejected": -25.268749237060547, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.933572710951526, |
| "grad_norm": 3.156709671020508, |
| "learning_rate": 0.00011375623239687444, |
| "logits/chosen": -6.84375, |
| "logits/rejected": -8.912500381469727, |
| "logps/chosen": -213.52499389648438, |
| "logps/rejected": -368.04998779296875, |
| "loss": 0.0617, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -14.483593940734863, |
| "rewards/margins": 14.620312690734863, |
| "rewards/rejected": -29.118749618530273, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9694793536804309, |
| "grad_norm": 1.4515728950500488, |
| "learning_rate": 0.00011375533029198762, |
| "logits/chosen": -9.087499618530273, |
| "logits/rejected": -10.668749809265137, |
| "logps/chosen": -258.79998779296875, |
| "logps/rejected": -419.79998779296875, |
| "loss": 0.0257, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -18.887500762939453, |
| "rewards/margins": 15.359375, |
| "rewards/rejected": -34.26874923706055, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 3.5940001010894775, |
| "learning_rate": 0.00011375428500181377, |
| "logits/chosen": -6.693014621734619, |
| "logits/rejected": -10.113970756530762, |
| "logps/chosen": -174.76470947265625, |
| "logps/rejected": -416.3529357910156, |
| "loss": 0.0226, |
| "rewards/accuracies": 0.9779411554336548, |
| "rewards/chosen": -10.352941513061523, |
| "rewards/margins": 23.735294342041016, |
| "rewards/rejected": -34.117645263671875, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0359066427289048, |
| "grad_norm": 3.284130573272705, |
| "learning_rate": 0.00011375309652898442, |
| "logits/chosen": -11.453125, |
| "logits/rejected": -13.603124618530273, |
| "logps/chosen": -387.0, |
| "logps/rejected": -576.7999877929688, |
| "loss": 0.0758, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -32.08124923706055, |
| "rewards/margins": 18.231250762939453, |
| "rewards/rejected": -50.32500076293945, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.0718132854578097, |
| "grad_norm": 2.5212385654449463, |
| "learning_rate": 0.00011375176487649152, |
| "logits/chosen": -9.884374618530273, |
| "logits/rejected": -11.106249809265137, |
| "logps/chosen": -335.29998779296875, |
| "logps/rejected": -490.0, |
| "loss": 0.0654, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -26.9375, |
| "rewards/margins": 14.440625190734863, |
| "rewards/rejected": -41.349998474121094, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0718132854578097, |
| "eval_logits/chosen": -8.321691513061523, |
| "eval_logits/rejected": -10.400734901428223, |
| "eval_logps/chosen": -270.1764831542969, |
| "eval_logps/rejected": -451.76470947265625, |
| "eval_loss": 0.04131906107068062, |
| "eval_rewards/accuracies": 0.9779411554336548, |
| "eval_rewards/chosen": -19.90441131591797, |
| "eval_rewards/margins": 17.422794342041016, |
| "eval_rewards/rejected": -37.35293960571289, |
| "eval_runtime": 8.5972, |
| "eval_samples_per_second": 31.057, |
| "eval_steps_per_second": 1.977, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1077199281867145, |
| "grad_norm": 0.6480632424354553, |
| "learning_rate": 0.00011375029004768751, |
| "logits/chosen": -8.751562118530273, |
| "logits/rejected": -10.721875190734863, |
| "logps/chosen": -271.6499938964844, |
| "logps/rejected": -462.8999938964844, |
| "loss": 0.0452, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -20.331249237060547, |
| "rewards/margins": 18.353124618530273, |
| "rewards/rejected": -38.681251525878906, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1436265709156195, |
| "grad_norm": 1.824242353439331, |
| "learning_rate": 0.00011374867204628526, |
| "logits/chosen": -8.482812881469727, |
| "logits/rejected": -10.028124809265137, |
| "logps/chosen": -293.45001220703125, |
| "logps/rejected": -478.5, |
| "loss": 0.042, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -22.506250381469727, |
| "rewards/margins": 17.868749618530273, |
| "rewards/rejected": -40.33124923706055, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1795332136445242, |
| "grad_norm": 6.0788984298706055, |
| "learning_rate": 0.00011374691087635812, |
| "logits/chosen": -9.996874809265137, |
| "logits/rejected": -11.309374809265137, |
| "logps/chosen": -378.8999938964844, |
| "logps/rejected": -676.7000122070312, |
| "loss": 0.0607, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -31.174999237060547, |
| "rewards/margins": 29.106250762939453, |
| "rewards/rejected": -60.26250076293945, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.215439856373429, |
| "grad_norm": 2.306962013244629, |
| "learning_rate": 0.00011374500654233978, |
| "logits/chosen": -6.659375190734863, |
| "logits/rejected": -8.645312309265137, |
| "logps/chosen": -239.0, |
| "logps/rejected": -461.29998779296875, |
| "loss": 0.0665, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -16.965625762939453, |
| "rewards/margins": 21.640625, |
| "rewards/rejected": -38.599998474121094, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.251346499102334, |
| "grad_norm": 2.2957208156585693, |
| "learning_rate": 0.00011374295904902446, |
| "logits/chosen": -7.462500095367432, |
| "logits/rejected": -9.553125381469727, |
| "logps/chosen": -236.8000030517578, |
| "logps/rejected": -444.5, |
| "loss": 0.0593, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -16.668750762939453, |
| "rewards/margins": 20.456249237060547, |
| "rewards/rejected": -37.10625076293945, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.2872531418312387, |
| "grad_norm": 9.126953125, |
| "learning_rate": 0.00011374076840156666, |
| "logits/chosen": -6.907812595367432, |
| "logits/rejected": -9.303125381469727, |
| "logps/chosen": -205.5, |
| "logps/rejected": -396.20001220703125, |
| "loss": 0.0372, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -13.418749809265137, |
| "rewards/margins": 18.612499237060547, |
| "rewards/rejected": -32.037498474121094, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3231597845601435, |
| "grad_norm": 1.2403417825698853, |
| "learning_rate": 0.00011373843460548139, |
| "logits/chosen": -7.637499809265137, |
| "logits/rejected": -10.353124618530273, |
| "logps/chosen": -251.9499969482422, |
| "logps/rejected": -456.20001220703125, |
| "loss": 0.0473, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -18.381250381469727, |
| "rewards/margins": 19.59375, |
| "rewards/rejected": -37.974998474121094, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.3590664272890485, |
| "grad_norm": 3.5465104579925537, |
| "learning_rate": 0.00011373595766664395, |
| "logits/chosen": -10.581250190734863, |
| "logits/rejected": -12.559374809265137, |
| "logps/chosen": -374.29998779296875, |
| "logps/rejected": -551.2000122070312, |
| "loss": 0.0332, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -30.412500381469727, |
| "rewards/margins": 17.181249618530273, |
| "rewards/rejected": -47.599998474121094, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3949730700179535, |
| "grad_norm": 0.9382606744766235, |
| "learning_rate": 0.00011373333759129004, |
| "logits/chosen": -12.212499618530273, |
| "logits/rejected": -13.796875, |
| "logps/chosen": -380.1000061035156, |
| "logps/rejected": -573.7999877929688, |
| "loss": 0.0366, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -31.475000381469727, |
| "rewards/margins": 18.6875, |
| "rewards/rejected": -50.20000076293945, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.4308797127468582, |
| "grad_norm": 0.6339579224586487, |
| "learning_rate": 0.00011373057438601568, |
| "logits/chosen": -9.418749809265137, |
| "logits/rejected": -11.771875381469727, |
| "logps/chosen": -282.20001220703125, |
| "logps/rejected": -479.29998779296875, |
| "loss": 0.048, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -21.262500762939453, |
| "rewards/margins": 19.103124618530273, |
| "rewards/rejected": -40.36249923706055, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4308797127468582, |
| "eval_logits/chosen": -6.602941036224365, |
| "eval_logits/rejected": -9.966911315917969, |
| "eval_logps/chosen": -166.5294189453125, |
| "eval_logps/rejected": -406.70587158203125, |
| "eval_loss": 0.028071066364645958, |
| "eval_rewards/accuracies": 0.9889705777168274, |
| "eval_rewards/chosen": -9.369484901428223, |
| "eval_rewards/margins": 23.360294342041016, |
| "eval_rewards/rejected": -32.72793960571289, |
| "eval_runtime": 8.6276, |
| "eval_samples_per_second": 30.947, |
| "eval_steps_per_second": 1.97, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.466786355475763, |
| "grad_norm": 3.074913501739502, |
| "learning_rate": 0.00011372766805777725, |
| "logits/chosen": -4.8984375, |
| "logits/rejected": -8.321874618530273, |
| "logps/chosen": -133.85000610351562, |
| "logps/rejected": -351.5, |
| "loss": 0.0452, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -6.512499809265137, |
| "rewards/margins": 21.049999237060547, |
| "rewards/rejected": -27.5625, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.502692998204668, |
| "grad_norm": 3.414430618286133, |
| "learning_rate": 0.00011372461861389142, |
| "logits/chosen": -5.293749809265137, |
| "logits/rejected": -7.599999904632568, |
| "logps/chosen": -180.0, |
| "logps/rejected": -397.3999938964844, |
| "loss": 0.0937, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -11.2421875, |
| "rewards/margins": 20.878124237060547, |
| "rewards/rejected": -32.10625076293945, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5385996409335727, |
| "grad_norm": 1.9072022438049316, |
| "learning_rate": 0.00011372142606203516, |
| "logits/chosen": -4.057812690734863, |
| "logits/rejected": -6.109375, |
| "logps/chosen": -156.0749969482422, |
| "logps/rejected": -324.79998779296875, |
| "loss": 0.0461, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -8.564062118530273, |
| "rewards/margins": 16.028125762939453, |
| "rewards/rejected": -24.587499618530273, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.5745062836624775, |
| "grad_norm": 1.2352949380874634, |
| "learning_rate": 0.00011371809041024573, |
| "logits/chosen": -5.315625190734863, |
| "logits/rejected": -7.754687309265137, |
| "logps/chosen": -189.64999389648438, |
| "logps/rejected": -392.0, |
| "loss": 0.0332, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -11.946874618530273, |
| "rewards/margins": 19.446874618530273, |
| "rewards/rejected": -31.424999237060547, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6104129263913824, |
| "grad_norm": 0.5811780095100403, |
| "learning_rate": 0.00011371461166692062, |
| "logits/chosen": -7.451562404632568, |
| "logits/rejected": -10.225000381469727, |
| "logps/chosen": -218.4499969482422, |
| "logps/rejected": -433.79998779296875, |
| "loss": 0.0486, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -14.824999809265137, |
| "rewards/margins": 20.75, |
| "rewards/rejected": -35.57500076293945, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.6463195691202872, |
| "grad_norm": 0.33781036734580994, |
| "learning_rate": 0.00011371098984081755, |
| "logits/chosen": -9.0390625, |
| "logits/rejected": -11.784375190734863, |
| "logps/chosen": -232.75, |
| "logps/rejected": -425.79998779296875, |
| "loss": 0.0465, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -16.509374618530273, |
| "rewards/margins": 18.971874237060547, |
| "rewards/rejected": -35.45624923706055, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.682226211849192, |
| "grad_norm": 0.9501491189002991, |
| "learning_rate": 0.0001137072249410545, |
| "logits/chosen": -8.489062309265137, |
| "logits/rejected": -11.706250190734863, |
| "logps/chosen": -229.6999969482422, |
| "logps/rejected": -451.3999938964844, |
| "loss": 0.0196, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -16.003124237060547, |
| "rewards/margins": 21.600000381469727, |
| "rewards/rejected": -37.599998474121094, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.718132854578097, |
| "grad_norm": 2.2919723987579346, |
| "learning_rate": 0.00011370331697710956, |
| "logits/chosen": -8.271875381469727, |
| "logits/rejected": -12.678125381469727, |
| "logps/chosen": -205.75, |
| "logps/rejected": -511.79998779296875, |
| "loss": 0.0752, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -13.65625, |
| "rewards/margins": 30.087499618530273, |
| "rewards/rejected": -43.75, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.754039497307002, |
| "grad_norm": 2.5256083011627197, |
| "learning_rate": 0.00011369926595882104, |
| "logits/chosen": -10.071874618530273, |
| "logits/rejected": -13.768750190734863, |
| "logps/chosen": -291.04998779296875, |
| "logps/rejected": -619.2000122070312, |
| "loss": 0.0377, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -22.446874618530273, |
| "rewards/margins": 32.150001525878906, |
| "rewards/rejected": -54.57500076293945, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.7899461400359067, |
| "grad_norm": 1.3631178140640259, |
| "learning_rate": 0.00011369507189638736, |
| "logits/chosen": -3.7313232421875, |
| "logits/rejected": -7.044335842132568, |
| "logps/chosen": -175.27499389648438, |
| "logps/rejected": -371.45001220703125, |
| "loss": 0.0627, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -10.771875381469727, |
| "rewards/margins": 18.6015625, |
| "rewards/rejected": -29.353124618530273, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.7899461400359067, |
| "eval_logits/chosen": -0.22195972502231598, |
| "eval_logits/rejected": -1.6530331373214722, |
| "eval_logps/chosen": -116.17646789550781, |
| "eval_logps/rejected": -208.88235473632812, |
| "eval_loss": 0.07754824310541153, |
| "eval_rewards/accuracies": 0.9485294222831726, |
| "eval_rewards/chosen": -4.249080657958984, |
| "eval_rewards/margins": 8.38786792755127, |
| "eval_rewards/rejected": -12.650734901428223, |
| "eval_runtime": 8.5185, |
| "eval_samples_per_second": 31.344, |
| "eval_steps_per_second": 1.996, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8258527827648114, |
| "grad_norm": 1.8161951303482056, |
| "learning_rate": 0.00011369073480036712, |
| "logits/chosen": -2.3915038108825684, |
| "logits/rejected": -3.521484375, |
| "logps/chosen": -145.4250030517578, |
| "logps/rejected": -257.20001220703125, |
| "loss": 0.0791, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -7.525000095367432, |
| "rewards/margins": 10.292187690734863, |
| "rewards/rejected": -17.818750381469727, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.8617594254937164, |
| "grad_norm": 0.3177375793457031, |
| "learning_rate": 0.00011368625468167889, |
| "logits/chosen": -8.206250190734863, |
| "logits/rejected": -10.065625190734863, |
| "logps/chosen": -325.3500061035156, |
| "logps/rejected": -509.1000061035156, |
| "loss": 0.0508, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -26.078125, |
| "rewards/margins": 17.381250381469727, |
| "rewards/rejected": -43.42499923706055, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8976660682226212, |
| "grad_norm": 0.7164928317070007, |
| "learning_rate": 0.00011368163155160139, |
| "logits/chosen": -10.165624618530273, |
| "logits/rejected": -11.875, |
| "logps/chosen": -410.29998779296875, |
| "logps/rejected": -585.4000244140625, |
| "loss": 0.0577, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -34.45624923706055, |
| "rewards/margins": 16.634374618530273, |
| "rewards/rejected": -51.07500076293945, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.933572710951526, |
| "grad_norm": 2.070739507675171, |
| "learning_rate": 0.00011367686542177336, |
| "logits/chosen": -6.4375, |
| "logits/rejected": -7.814062595367432, |
| "logps/chosen": -340.1000061035156, |
| "logps/rejected": -469.79998779296875, |
| "loss": 0.0576, |
| "rewards/accuracies": 0.9593750238418579, |
| "rewards/chosen": -27.168750762939453, |
| "rewards/margins": 12.381250381469727, |
| "rewards/rejected": -39.54999923706055, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.969479353680431, |
| "grad_norm": 1.2207964658737183, |
| "learning_rate": 0.0001136719563041935, |
| "logits/chosen": -4.547656059265137, |
| "logits/rejected": -6.598437309265137, |
| "logps/chosen": -263.0, |
| "logps/rejected": -423.8999938964844, |
| "loss": 0.0322, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -19.46875, |
| "rewards/margins": 15.509374618530273, |
| "rewards/rejected": -34.993751525878906, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 6.416799545288086, |
| "learning_rate": 0.0001136669042112205, |
| "logits/chosen": -5.321691036224365, |
| "logits/rejected": -8.242647171020508, |
| "logps/chosen": -274.0, |
| "logps/rejected": -515.6470336914062, |
| "loss": 0.0428, |
| "rewards/accuracies": 0.9852941036224365, |
| "rewards/chosen": -20.264705657958984, |
| "rewards/margins": 23.514705657958984, |
| "rewards/rejected": -43.80882263183594, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.035906642728905, |
| "grad_norm": 0.0026381895877420902, |
| "learning_rate": 0.00011366170915557303, |
| "logits/chosen": -6.020312309265137, |
| "logits/rejected": -9.284375190734863, |
| "logps/chosen": -289.1499938964844, |
| "logps/rejected": -603.0, |
| "loss": 0.0676, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -22.331249237060547, |
| "rewards/margins": 30.418750762939453, |
| "rewards/rejected": -52.75, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.0718132854578095, |
| "grad_norm": 9.352706909179688, |
| "learning_rate": 0.00011365637115032964, |
| "logits/chosen": -5.478906154632568, |
| "logits/rejected": -9.690625190734863, |
| "logps/chosen": -263.3500061035156, |
| "logps/rejected": -613.4000244140625, |
| "loss": 0.0791, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -19.450000762939453, |
| "rewards/margins": 34.474998474121094, |
| "rewards/rejected": -53.9375, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.1077199281867145, |
| "grad_norm": 1.2689452171325684, |
| "learning_rate": 0.0001136508902089287, |
| "logits/chosen": -2.486132860183716, |
| "logits/rejected": -5.1484375, |
| "logps/chosen": -145.1999969482422, |
| "logps/rejected": -346.20001220703125, |
| "loss": 0.1107, |
| "rewards/accuracies": 0.9593750238418579, |
| "rewards/chosen": -7.432031154632568, |
| "rewards/margins": 19.28125, |
| "rewards/rejected": -26.706249237060547, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.1436265709156195, |
| "grad_norm": 0.04833826795220375, |
| "learning_rate": 0.00011364526634516852, |
| "logits/chosen": -5.057812690734863, |
| "logits/rejected": -8.810937881469727, |
| "logps/chosen": -240.14999389648438, |
| "logps/rejected": -519.7999877929688, |
| "loss": 0.1121, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -17.071874618530273, |
| "rewards/margins": 27.412500381469727, |
| "rewards/rejected": -44.462501525878906, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1436265709156195, |
| "eval_logits/chosen": -4.876838207244873, |
| "eval_logits/rejected": -8.235294342041016, |
| "eval_logps/chosen": -253.05882263183594, |
| "eval_logps/rejected": -514.8235473632812, |
| "eval_loss": 0.02023915760219097, |
| "eval_rewards/accuracies": 0.9836230278015137, |
| "eval_rewards/chosen": -18.169116973876953, |
| "eval_rewards/margins": 25.522058486938477, |
| "eval_rewards/rejected": -43.661766052246094, |
| "eval_runtime": 8.6202, |
| "eval_samples_per_second": 30.974, |
| "eval_steps_per_second": 1.972, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1795332136445245, |
| "grad_norm": 3.4645302295684814, |
| "learning_rate": 0.00011363949957320717, |
| "logits/chosen": -4.932812690734863, |
| "logits/rejected": -7.857812404632568, |
| "logps/chosen": -253.5500030517578, |
| "logps/rejected": -483.5, |
| "loss": 0.0512, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -18.543750762939453, |
| "rewards/margins": 22.434375762939453, |
| "rewards/rejected": -40.98749923706055, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.215439856373429, |
| "grad_norm": 0.09185401350259781, |
| "learning_rate": 0.00011363358990756246, |
| "logits/chosen": -4.626562595367432, |
| "logits/rejected": -7.34375, |
| "logps/chosen": -205.1999969482422, |
| "logps/rejected": -409.70001220703125, |
| "loss": 0.0464, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -13.615625381469727, |
| "rewards/margins": 19.90625, |
| "rewards/rejected": -33.506248474121094, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.251346499102334, |
| "grad_norm": 1.7695757150650024, |
| "learning_rate": 0.00011362753736311199, |
| "logits/chosen": -4.758593559265137, |
| "logits/rejected": -7.090624809265137, |
| "logps/chosen": -183.60000610351562, |
| "logps/rejected": -357.79998779296875, |
| "loss": 0.0192, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -11.206250190734863, |
| "rewards/margins": 16.837499618530273, |
| "rewards/rejected": -28.024999618530273, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.287253141831239, |
| "grad_norm": 0.2544287145137787, |
| "learning_rate": 0.00011362134195509304, |
| "logits/chosen": -5.989062309265137, |
| "logits/rejected": -8.729687690734863, |
| "logps/chosen": -169.6999969482422, |
| "logps/rejected": -410.1000061035156, |
| "loss": 0.0094, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -10.1484375, |
| "rewards/margins": 23.103124618530273, |
| "rewards/rejected": -33.243751525878906, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.3231597845601435, |
| "grad_norm": 0.007753673940896988, |
| "learning_rate": 0.00011361500369910252, |
| "logits/chosen": -7.881249904632568, |
| "logits/rejected": -10.868749618530273, |
| "logps/chosen": -216.0, |
| "logps/rejected": -479.5, |
| "loss": 0.0083, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -14.771875381469727, |
| "rewards/margins": 25.493749618530273, |
| "rewards/rejected": -40.26250076293945, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.3590664272890485, |
| "grad_norm": 0.980211615562439, |
| "learning_rate": 0.000113608522611097, |
| "logits/chosen": -8.223437309265137, |
| "logits/rejected": -10.987500190734863, |
| "logps/chosen": -212.89999389648438, |
| "logps/rejected": -467.20001220703125, |
| "loss": 0.0133, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -14.378125190734863, |
| "rewards/margins": 24.59375, |
| "rewards/rejected": -38.974998474121094, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.3949730700179535, |
| "grad_norm": 0.30649715662002563, |
| "learning_rate": 0.0001136018987073926, |
| "logits/chosen": -7.28125, |
| "logits/rejected": -9.106249809265137, |
| "logps/chosen": -161.5500030517578, |
| "logps/rejected": -330.25, |
| "loss": 0.0511, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -8.954687118530273, |
| "rewards/margins": 16.278125762939453, |
| "rewards/rejected": -25.225000381469727, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.430879712746858, |
| "grad_norm": 0.8643183708190918, |
| "learning_rate": 0.000113595132004665, |
| "logits/chosen": -8.449999809265137, |
| "logits/rejected": -9.612500190734863, |
| "logps/chosen": -192.39999389648438, |
| "logps/rejected": -365.29998779296875, |
| "loss": 0.0235, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -12.34375, |
| "rewards/margins": 16.703125, |
| "rewards/rejected": -29.043750762939453, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.466786355475763, |
| "grad_norm": 1.0305479764938354, |
| "learning_rate": 0.00011358822251994936, |
| "logits/chosen": -7.7890625, |
| "logits/rejected": -8.912500381469727, |
| "logps/chosen": -169.97500610351562, |
| "logps/rejected": -350.1000061035156, |
| "loss": 0.0523, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -9.7421875, |
| "rewards/margins": 17.625, |
| "rewards/rejected": -27.356250762939453, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.502692998204668, |
| "grad_norm": 0.1656515747308731, |
| "learning_rate": 0.00011358117027064029, |
| "logits/chosen": -4.857812404632568, |
| "logits/rejected": -6.118750095367432, |
| "logps/chosen": -116.25, |
| "logps/rejected": -289.1499938964844, |
| "loss": 0.0293, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -4.669531345367432, |
| "rewards/margins": 16.421875, |
| "rewards/rejected": -21.103124618530273, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.502692998204668, |
| "eval_logits/chosen": -4.408088207244873, |
| "eval_logits/rejected": -5.608455657958984, |
| "eval_logps/chosen": -151.94117736816406, |
| "eval_logps/rejected": -344.941162109375, |
| "eval_loss": 0.030790157616138458, |
| "eval_rewards/accuracies": 0.9852941036224365, |
| "eval_rewards/chosen": -7.900735378265381, |
| "eval_rewards/margins": 18.610294342041016, |
| "eval_rewards/rejected": -26.5, |
| "eval_runtime": 8.8079, |
| "eval_samples_per_second": 30.314, |
| "eval_steps_per_second": 1.93, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.5385996409335725, |
| "grad_norm": 2.206739664077759, |
| "learning_rate": 0.00011357397527449183, |
| "logits/chosen": -4.839062690734863, |
| "logits/rejected": -5.840624809265137, |
| "logps/chosen": -205.10000610351562, |
| "logps/rejected": -381.20001220703125, |
| "loss": 0.0298, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -13.587499618530273, |
| "rewards/margins": 16.90625, |
| "rewards/rejected": -30.512500762939453, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.5745062836624775, |
| "grad_norm": 0.4848335385322571, |
| "learning_rate": 0.00011356663754961738, |
| "logits/chosen": -5.310937404632568, |
| "logits/rejected": -6.078125, |
| "logps/chosen": -284.79998779296875, |
| "logps/rejected": -437.70001220703125, |
| "loss": 0.0368, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -21.575000762939453, |
| "rewards/margins": 14.528124809265137, |
| "rewards/rejected": -36.09375, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.6104129263913824, |
| "grad_norm": 1.6452534198760986, |
| "learning_rate": 0.00011355915711448965, |
| "logits/chosen": -5.776562690734863, |
| "logits/rejected": -6.996874809265137, |
| "logps/chosen": -325.5, |
| "logps/rejected": -500.8999938964844, |
| "loss": 0.0305, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -25.756250381469727, |
| "rewards/margins": 16.803125381469727, |
| "rewards/rejected": -42.537498474121094, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.646319569120287, |
| "grad_norm": 0.1357572078704834, |
| "learning_rate": 0.00011355153398794061, |
| "logits/chosen": -3.745312452316284, |
| "logits/rejected": -5.971875190734863, |
| "logps/chosen": -241.9499969482422, |
| "logps/rejected": -457.1000061035156, |
| "loss": 0.043, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -17.171875, |
| "rewards/margins": 20.743749618530273, |
| "rewards/rejected": -37.943748474121094, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.682226211849192, |
| "grad_norm": 0.02111443318426609, |
| "learning_rate": 0.00011354376818916149, |
| "logits/chosen": -2.9437499046325684, |
| "logits/rejected": -5.345312595367432, |
| "logps/chosen": -217.10000610351562, |
| "logps/rejected": -448.8999938964844, |
| "loss": 0.0395, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -14.665624618530273, |
| "rewards/margins": 22.515625, |
| "rewards/rejected": -37.15625, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.718132854578097, |
| "grad_norm": 0.670870304107666, |
| "learning_rate": 0.00011353585973770268, |
| "logits/chosen": -2.5054688453674316, |
| "logits/rejected": -6.078125, |
| "logps/chosen": -211.9499969482422, |
| "logps/rejected": -524.2999877929688, |
| "loss": 0.0439, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -14.5, |
| "rewards/margins": 30.537500381469727, |
| "rewards/rejected": -45.037498474121094, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.754039497307002, |
| "grad_norm": 0.44046124815940857, |
| "learning_rate": 0.00011352780865347368, |
| "logits/chosen": -3.2164063453674316, |
| "logits/rejected": -7.603125095367432, |
| "logps/chosen": -232.9499969482422, |
| "logps/rejected": -583.7999877929688, |
| "loss": 0.0776, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -16.34375, |
| "rewards/margins": 34.71875, |
| "rewards/rejected": -51.025001525878906, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.789946140035907, |
| "grad_norm": 0.58872389793396, |
| "learning_rate": 0.0001135196149567431, |
| "logits/chosen": -5.271874904632568, |
| "logits/rejected": -8.928125381469727, |
| "logps/chosen": -300.0, |
| "logps/rejected": -614.7999877929688, |
| "loss": 0.0626, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -23.056249618530273, |
| "rewards/margins": 31.15625, |
| "rewards/rejected": -54.212501525878906, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.8258527827648114, |
| "grad_norm": 3.51865553855896, |
| "learning_rate": 0.00011351127866813858, |
| "logits/chosen": -3.4632811546325684, |
| "logits/rejected": -7.084374904632568, |
| "logps/chosen": -203.10000610351562, |
| "logps/rejected": -554.0999755859375, |
| "loss": 0.045, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -13.490625381469727, |
| "rewards/margins": 34.400001525878906, |
| "rewards/rejected": -47.900001525878906, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.8617594254937164, |
| "grad_norm": 1.450678825378418, |
| "learning_rate": 0.00011350279980864665, |
| "logits/chosen": -5.461718559265137, |
| "logits/rejected": -8.4375, |
| "logps/chosen": -304.70001220703125, |
| "logps/rejected": -655.7999877929688, |
| "loss": 0.0565, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -23.524999618530273, |
| "rewards/margins": 35.0, |
| "rewards/rejected": -58.5625, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.8617594254937164, |
| "eval_logits/chosen": -5.751838207244873, |
| "eval_logits/rejected": -9.095588684082031, |
| "eval_logps/chosen": -330.8235168457031, |
| "eval_logps/rejected": -722.11767578125, |
| "eval_loss": 0.026857540011405945, |
| "eval_rewards/accuracies": 0.9816176295280457, |
| "eval_rewards/chosen": -26.066177368164062, |
| "eval_rewards/margins": 38.764705657958984, |
| "eval_rewards/rejected": -64.8382339477539, |
| "eval_runtime": 8.5165, |
| "eval_samples_per_second": 31.351, |
| "eval_steps_per_second": 1.996, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.8976660682226214, |
| "grad_norm": 0.8611961603164673, |
| "learning_rate": 0.00011349417839961291, |
| "logits/chosen": -3.793750047683716, |
| "logits/rejected": -7.610937595367432, |
| "logps/chosen": -244.9499969482422, |
| "logps/rejected": -608.2999877929688, |
| "loss": 0.0411, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -17.553125381469727, |
| "rewards/margins": 35.88750076293945, |
| "rewards/rejected": -53.4375, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.933572710951526, |
| "grad_norm": 0.9868994951248169, |
| "learning_rate": 0.0001134854144627417, |
| "logits/chosen": -2.541015625, |
| "logits/rejected": -5.84375, |
| "logps/chosen": -179.35000610351562, |
| "logps/rejected": -500.79998779296875, |
| "loss": 0.0966, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -11.278124809265137, |
| "rewards/margins": 31.325000762939453, |
| "rewards/rejected": -42.625, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.969479353680431, |
| "grad_norm": 0.8936044573783875, |
| "learning_rate": 0.00011347650802009624, |
| "logits/chosen": -3.5687499046325684, |
| "logits/rejected": -5.125, |
| "logps/chosen": -250.89999389648438, |
| "logps/rejected": -453.70001220703125, |
| "loss": 0.0359, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -17.871875762939453, |
| "rewards/margins": 19.834375381469727, |
| "rewards/rejected": -37.73125076293945, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.828643560409546, |
| "learning_rate": 0.00011346745909409847, |
| "logits/chosen": -4.330882549285889, |
| "logits/rejected": -5.753676414489746, |
| "logps/chosen": -265.941162109375, |
| "logps/rejected": -454.1176452636719, |
| "loss": 0.0519, |
| "rewards/accuracies": 0.9742646813392639, |
| "rewards/chosen": -19.676469802856445, |
| "rewards/margins": 18.297794342041016, |
| "rewards/rejected": -37.992645263671875, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.035906642728905, |
| "grad_norm": 0.023664651438593864, |
| "learning_rate": 0.00011345826770752906, |
| "logits/chosen": -3.649218797683716, |
| "logits/rejected": -5.8515625, |
| "logps/chosen": -209.39999389648438, |
| "logps/rejected": -417.3999938964844, |
| "loss": 0.0118, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -14.103124618530273, |
| "rewards/margins": 20.243749618530273, |
| "rewards/rejected": -34.38750076293945, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.0718132854578095, |
| "grad_norm": 0.01370246708393097, |
| "learning_rate": 0.00011344893388352732, |
| "logits/chosen": -2.9320311546325684, |
| "logits/rejected": -5.650000095367432, |
| "logps/chosen": -178.85000610351562, |
| "logps/rejected": -392.1000061035156, |
| "loss": 0.0472, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -10.657812118530273, |
| "rewards/margins": 20.962499618530273, |
| "rewards/rejected": -31.631250381469727, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.1077199281867145, |
| "grad_norm": 0.00010757453128462657, |
| "learning_rate": 0.00011343945764559112, |
| "logits/chosen": -2.715625047683716, |
| "logits/rejected": -5.421875, |
| "logps/chosen": -158.60000610351562, |
| "logps/rejected": -340.20001220703125, |
| "loss": 0.0307, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -8.817187309265137, |
| "rewards/margins": 17.318750381469727, |
| "rewards/rejected": -26.162500381469727, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.1436265709156195, |
| "grad_norm": 0.32624539732933044, |
| "learning_rate": 0.0001134298390175769, |
| "logits/chosen": -3.8375000953674316, |
| "logits/rejected": -5.959374904632568, |
| "logps/chosen": -176.8000030517578, |
| "logps/rejected": -336.8999938964844, |
| "loss": 0.0305, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -10.962499618530273, |
| "rewards/margins": 15.490625381469727, |
| "rewards/rejected": -26.450000762939453, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.1795332136445245, |
| "grad_norm": 0.16668373346328735, |
| "learning_rate": 0.00011342007802369956, |
| "logits/chosen": -4.637499809265137, |
| "logits/rejected": -6.957812309265137, |
| "logps/chosen": -203.14999389648438, |
| "logps/rejected": -389.79998779296875, |
| "loss": 0.0325, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -13.321874618530273, |
| "rewards/margins": 17.899999618530273, |
| "rewards/rejected": -31.21875, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.215439856373429, |
| "grad_norm": 0.008436158299446106, |
| "learning_rate": 0.00011341017468853234, |
| "logits/chosen": -5.528124809265137, |
| "logits/rejected": -8.003125190734863, |
| "logps/chosen": -208.4499969482422, |
| "logps/rejected": -445.1000061035156, |
| "loss": 0.0125, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -13.625, |
| "rewards/margins": 23.018749237060547, |
| "rewards/rejected": -36.64374923706055, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.215439856373429, |
| "eval_logits/chosen": -5.895220756530762, |
| "eval_logits/rejected": -8.316176414489746, |
| "eval_logps/chosen": -224.41175842285156, |
| "eval_logps/rejected": -471.76470947265625, |
| "eval_loss": 0.02364749275147915, |
| "eval_rewards/accuracies": 0.9889705777168274, |
| "eval_rewards/chosen": -15.242647171020508, |
| "eval_rewards/margins": 24.080883026123047, |
| "eval_rewards/rejected": -39.35293960571289, |
| "eval_runtime": 8.4254, |
| "eval_samples_per_second": 31.69, |
| "eval_steps_per_second": 2.018, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.251346499102334, |
| "grad_norm": 0.6932575702667236, |
| "learning_rate": 0.00011340012903700693, |
| "logits/chosen": -6.3359375, |
| "logits/rejected": -8.709375381469727, |
| "logps/chosen": -242.10000610351562, |
| "logps/rejected": -478.70001220703125, |
| "loss": 0.0263, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -17.1875, |
| "rewards/margins": 23.299999237060547, |
| "rewards/rejected": -40.5, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.287253141831239, |
| "grad_norm": 1.7907814979553223, |
| "learning_rate": 0.0001133899410944132, |
| "logits/chosen": -7.318749904632568, |
| "logits/rejected": -9.934374809265137, |
| "logps/chosen": -268.8999938964844, |
| "logps/rejected": -554.9000244140625, |
| "loss": 0.0484, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -20.271875381469727, |
| "rewards/margins": 27.850000381469727, |
| "rewards/rejected": -48.150001525878906, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.3231597845601435, |
| "grad_norm": 1.6142306327819824, |
| "learning_rate": 0.0001133796108863993, |
| "logits/chosen": -5.221093654632568, |
| "logits/rejected": -7.642187595367432, |
| "logps/chosen": -200.6999969482422, |
| "logps/rejected": -408.20001220703125, |
| "loss": 0.0293, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -12.826562881469727, |
| "rewards/margins": 20.359375, |
| "rewards/rejected": -33.20000076293945, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.3590664272890485, |
| "grad_norm": 1.1235191822052002, |
| "learning_rate": 0.00011336913843897153, |
| "logits/chosen": -2.270312547683716, |
| "logits/rejected": -4.194531440734863, |
| "logps/chosen": -138.75, |
| "logps/rejected": -287.20001220703125, |
| "loss": 0.0528, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -6.962500095367432, |
| "rewards/margins": 13.737500190734863, |
| "rewards/rejected": -20.700000762939453, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.3949730700179535, |
| "grad_norm": 0.024500994011759758, |
| "learning_rate": 0.00011335852377849424, |
| "logits/chosen": -1.961328148841858, |
| "logits/rejected": -3.6734375953674316, |
| "logps/chosen": -136.3249969482422, |
| "logps/rejected": -296.3999938964844, |
| "loss": 0.0834, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -6.860937595367432, |
| "rewards/margins": 15.140625, |
| "rewards/rejected": -22.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.430879712746858, |
| "grad_norm": 0.7605228424072266, |
| "learning_rate": 0.00011334776693168985, |
| "logits/chosen": -3.0999999046325684, |
| "logits/rejected": -4.667187690734863, |
| "logps/chosen": -149.60000610351562, |
| "logps/rejected": -347.70001220703125, |
| "loss": 0.0254, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -8.057812690734863, |
| "rewards/margins": 18.965625762939453, |
| "rewards/rejected": -27.03125, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.466786355475763, |
| "grad_norm": 3.233030319213867, |
| "learning_rate": 0.0001133368679256387, |
| "logits/chosen": -4.842187404632568, |
| "logits/rejected": -6.603125095367432, |
| "logps/chosen": -202.375, |
| "logps/rejected": -458.3999938964844, |
| "loss": 0.0157, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -13.3359375, |
| "rewards/margins": 24.912500381469727, |
| "rewards/rejected": -38.1875, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.502692998204668, |
| "grad_norm": 0.12913425266742706, |
| "learning_rate": 0.000113325826787779, |
| "logits/chosen": -6.7109375, |
| "logits/rejected": -8.201562881469727, |
| "logps/chosen": -253.0500030517578, |
| "logps/rejected": -495.5, |
| "loss": 0.0287, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -18.384374618530273, |
| "rewards/margins": 23.737499237060547, |
| "rewards/rejected": -42.11249923706055, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.5385996409335725, |
| "grad_norm": 0.36198368668556213, |
| "learning_rate": 0.00011331464354590684, |
| "logits/chosen": -5.528124809265137, |
| "logits/rejected": -7.0, |
| "logps/chosen": -176.5500030517578, |
| "logps/rejected": -370.3999938964844, |
| "loss": 0.0606, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -10.745312690734863, |
| "rewards/margins": 18.90625, |
| "rewards/rejected": -29.637500762939453, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.5745062836624775, |
| "grad_norm": 1.8532159328460693, |
| "learning_rate": 0.000113303318228176, |
| "logits/chosen": -4.196875095367432, |
| "logits/rejected": -5.509375095367432, |
| "logps/chosen": -167.60000610351562, |
| "logps/rejected": -333.8999938964844, |
| "loss": 0.0652, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -9.654687881469727, |
| "rewards/margins": 16.065624237060547, |
| "rewards/rejected": -25.712499618530273, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.5745062836624775, |
| "eval_logits/chosen": -4.733455657958984, |
| "eval_logits/rejected": -6.211397171020508, |
| "eval_logps/chosen": -219.64706420898438, |
| "eval_logps/rejected": -395.8823547363281, |
| "eval_loss": 0.014533035457134247, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -14.753676414489746, |
| "eval_rewards/margins": 16.90441131591797, |
| "eval_rewards/rejected": -31.676469802856445, |
| "eval_runtime": 8.5526, |
| "eval_samples_per_second": 31.219, |
| "eval_steps_per_second": 1.988, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.6104129263913824, |
| "grad_norm": 0.02227591536939144, |
| "learning_rate": 0.00011329185086309797, |
| "logits/chosen": -5.440625190734863, |
| "logits/rejected": -6.892187595367432, |
| "logps/chosen": -263.04998779296875, |
| "logps/rejected": -438.5, |
| "loss": 0.0284, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -19.5625, |
| "rewards/margins": 16.524999618530273, |
| "rewards/rejected": -36.099998474121094, |
| "step": 505 |
| }, |
| { |
| "epoch": 3.646319569120287, |
| "grad_norm": 0.263701468706131, |
| "learning_rate": 0.00011328024147954178, |
| "logits/chosen": -6.275000095367432, |
| "logits/rejected": -7.809374809265137, |
| "logps/chosen": -318.5, |
| "logps/rejected": -494.5, |
| "loss": 0.0307, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -25.049999237060547, |
| "rewards/margins": 17.087499618530273, |
| "rewards/rejected": -42.13750076293945, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.682226211849192, |
| "grad_norm": 0.08680078387260437, |
| "learning_rate": 0.00011326849010673409, |
| "logits/chosen": -7.175000190734863, |
| "logits/rejected": -8.606249809265137, |
| "logps/chosen": -344.70001220703125, |
| "logps/rejected": -576.4000244140625, |
| "loss": 0.0187, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -27.662500381469727, |
| "rewards/margins": 22.403125762939453, |
| "rewards/rejected": -50.0625, |
| "step": 515 |
| }, |
| { |
| "epoch": 3.718132854578097, |
| "grad_norm": 0.3637928068637848, |
| "learning_rate": 0.00011325659677425894, |
| "logits/chosen": -6.360937595367432, |
| "logits/rejected": -8.0078125, |
| "logps/chosen": -321.70001220703125, |
| "logps/rejected": -571.2999877929688, |
| "loss": 0.0563, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -25.375, |
| "rewards/margins": 24.231250762939453, |
| "rewards/rejected": -49.5625, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.754039497307002, |
| "grad_norm": 0.06884779036045074, |
| "learning_rate": 0.00011324456151205779, |
| "logits/chosen": -5.998437404632568, |
| "logits/rejected": -7.151562690734863, |
| "logps/chosen": -310.5, |
| "logps/rejected": -512.7999877929688, |
| "loss": 0.0412, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -24.200000762939453, |
| "rewards/margins": 19.4375, |
| "rewards/rejected": -43.63750076293945, |
| "step": 525 |
| }, |
| { |
| "epoch": 3.789946140035907, |
| "grad_norm": 0.19923055171966553, |
| "learning_rate": 0.0001132323843504294, |
| "logits/chosen": -6.621874809265137, |
| "logits/rejected": -7.451562404632568, |
| "logps/chosen": -316.8999938964844, |
| "logps/rejected": -483.79998779296875, |
| "loss": 0.0418, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -24.6875, |
| "rewards/margins": 16.034374237060547, |
| "rewards/rejected": -40.724998474121094, |
| "step": 530 |
| }, |
| { |
| "epoch": 3.8258527827648114, |
| "grad_norm": 0.025405047461390495, |
| "learning_rate": 0.00011322006532002976, |
| "logits/chosen": -7.5390625, |
| "logits/rejected": -8.484375, |
| "logps/chosen": -325.1000061035156, |
| "logps/rejected": -495.0, |
| "loss": 0.0575, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -25.862499237060547, |
| "rewards/margins": 16.359375, |
| "rewards/rejected": -42.224998474121094, |
| "step": 535 |
| }, |
| { |
| "epoch": 3.8617594254937164, |
| "grad_norm": 0.7414963841438293, |
| "learning_rate": 0.00011320760445187202, |
| "logits/chosen": -8.795312881469727, |
| "logits/rejected": -10.037500381469727, |
| "logps/chosen": -355.8999938964844, |
| "logps/rejected": -577.0, |
| "loss": 0.0196, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -28.868749618530273, |
| "rewards/margins": 21.512500762939453, |
| "rewards/rejected": -50.38750076293945, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.8976660682226214, |
| "grad_norm": 2.0291175842285156, |
| "learning_rate": 0.00011319500177732639, |
| "logits/chosen": -8.653124809265137, |
| "logits/rejected": -9.693750381469727, |
| "logps/chosen": -344.0, |
| "logps/rejected": -551.2999877929688, |
| "loss": 0.0377, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -27.618749618530273, |
| "rewards/margins": 19.924999237060547, |
| "rewards/rejected": -47.537498474121094, |
| "step": 545 |
| }, |
| { |
| "epoch": 3.933572710951526, |
| "grad_norm": 0.06643925607204437, |
| "learning_rate": 0.00011318225732812008, |
| "logits/chosen": -6.587500095367432, |
| "logits/rejected": -7.754687309265137, |
| "logps/chosen": -280.04998779296875, |
| "logps/rejected": -456.8999938964844, |
| "loss": 0.033, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -21.100000381469727, |
| "rewards/margins": 17.134374618530273, |
| "rewards/rejected": -38.25, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.933572710951526, |
| "eval_logits/chosen": -5.768382549285889, |
| "eval_logits/rejected": -6.928308963775635, |
| "eval_logps/chosen": -264.4117736816406, |
| "eval_logps/rejected": -437.76470947265625, |
| "eval_loss": 0.04973261430859566, |
| "eval_rewards/accuracies": 0.9852941036224365, |
| "eval_rewards/chosen": -19.34558868408203, |
| "eval_rewards/margins": 16.566177368164062, |
| "eval_rewards/rejected": -35.89706039428711, |
| "eval_runtime": 8.691, |
| "eval_samples_per_second": 30.721, |
| "eval_steps_per_second": 1.956, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.969479353680431, |
| "grad_norm": 0.709564208984375, |
| "learning_rate": 0.00011316937113633724, |
| "logits/chosen": -5.145312309265137, |
| "logits/rejected": -6.15625, |
| "logps/chosen": -254.3000030517578, |
| "logps/rejected": -400.3999938964844, |
| "loss": 0.0689, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -18.596874237060547, |
| "rewards/margins": 13.71875, |
| "rewards/rejected": -32.318748474121094, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.014319206587970257, |
| "learning_rate": 0.00011315634323441883, |
| "logits/chosen": -4.669117450714111, |
| "logits/rejected": -5.674632549285889, |
| "logps/chosen": -250.88235473632812, |
| "logps/rejected": -388.23529052734375, |
| "loss": 0.0647, |
| "rewards/accuracies": 0.9632353186607361, |
| "rewards/chosen": -18.419116973876953, |
| "rewards/margins": 12.941176414489746, |
| "rewards/rejected": -31.33823585510254, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.0359066427289045, |
| "grad_norm": 1.3408515453338623, |
| "learning_rate": 0.0001131431736551626, |
| "logits/chosen": -4.814062595367432, |
| "logits/rejected": -6.176562309265137, |
| "logps/chosen": -249.4499969482422, |
| "logps/rejected": -412.79998779296875, |
| "loss": 0.0462, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -18.274999618530273, |
| "rewards/margins": 15.787500381469727, |
| "rewards/rejected": -34.068748474121094, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.07181328545781, |
| "grad_norm": 0.39796170592308044, |
| "learning_rate": 0.00011312986243172293, |
| "logits/chosen": -4.244531154632568, |
| "logits/rejected": -6.751562595367432, |
| "logps/chosen": -225.6999969482422, |
| "logps/rejected": -486.6000061035156, |
| "loss": 0.013, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -15.662500381469727, |
| "rewards/margins": 25.412500381469727, |
| "rewards/rejected": -41.087501525878906, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.1077199281867145, |
| "grad_norm": 2.608715772628784, |
| "learning_rate": 0.00011311640959761081, |
| "logits/chosen": -3.823437452316284, |
| "logits/rejected": -7.4375, |
| "logps/chosen": -240.14999389648438, |
| "logps/rejected": -562.5999755859375, |
| "loss": 0.053, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -17.225000381469727, |
| "rewards/margins": 31.493749618530273, |
| "rewards/rejected": -48.75, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.143626570915619, |
| "grad_norm": 2.569322347640991, |
| "learning_rate": 0.00011310281518669376, |
| "logits/chosen": -5.467187404632568, |
| "logits/rejected": -8.5078125, |
| "logps/chosen": -370.6000061035156, |
| "logps/rejected": -627.0, |
| "loss": 0.0235, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -30.15625, |
| "rewards/margins": 25.225000381469727, |
| "rewards/rejected": -55.38750076293945, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.1795332136445245, |
| "grad_norm": 7.271477699279785, |
| "learning_rate": 0.00011308907923319566, |
| "logits/chosen": -4.629687309265137, |
| "logits/rejected": -7.557812690734863, |
| "logps/chosen": -309.3500061035156, |
| "logps/rejected": -587.4000244140625, |
| "loss": 0.0519, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -24.274999618530273, |
| "rewards/margins": 27.075000762939453, |
| "rewards/rejected": -51.337501525878906, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.215439856373429, |
| "grad_norm": 4.082700729370117, |
| "learning_rate": 0.00011307520177169676, |
| "logits/chosen": -4.9609375, |
| "logits/rejected": -7.151562690734863, |
| "logps/chosen": -320.79998779296875, |
| "logps/rejected": -574.0, |
| "loss": 0.0997, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -25.225000381469727, |
| "rewards/margins": 25.018749237060547, |
| "rewards/rejected": -50.23749923706055, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.2513464991023335, |
| "grad_norm": 1.2855074405670166, |
| "learning_rate": 0.00011306118283713357, |
| "logits/chosen": -3.370312452316284, |
| "logits/rejected": -4.561718940734863, |
| "logps/chosen": -268.54998779296875, |
| "logps/rejected": -507.8999938964844, |
| "loss": 0.0289, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -20.206249237060547, |
| "rewards/margins": 23.3125, |
| "rewards/rejected": -43.537498474121094, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.287253141831239, |
| "grad_norm": 0.21659362316131592, |
| "learning_rate": 0.00011304702246479876, |
| "logits/chosen": -3.285937547683716, |
| "logits/rejected": -4.146093845367432, |
| "logps/chosen": -270.3500061035156, |
| "logps/rejected": -488.20001220703125, |
| "loss": 0.0174, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -19.931249618530273, |
| "rewards/margins": 21.193750381469727, |
| "rewards/rejected": -41.125, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.287253141831239, |
| "eval_logits/chosen": -3.882352828979492, |
| "eval_logits/rejected": -5.0, |
| "eval_logps/chosen": -272.76470947265625, |
| "eval_logps/rejected": -492.8235168457031, |
| "eval_loss": 0.014545433223247528, |
| "eval_rewards/accuracies": 0.9926470518112183, |
| "eval_rewards/chosen": -20.176469802856445, |
| "eval_rewards/margins": 21.330883026123047, |
| "eval_rewards/rejected": -41.52941131591797, |
| "eval_runtime": 8.5511, |
| "eval_samples_per_second": 31.224, |
| "eval_steps_per_second": 1.988, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.3231597845601435, |
| "grad_norm": 0.36118730902671814, |
| "learning_rate": 0.00011303272069034098, |
| "logits/chosen": -4.321875095367432, |
| "logits/rejected": -5.332812309265137, |
| "logps/chosen": -282.45001220703125, |
| "logps/rejected": -488.0, |
| "loss": 0.0449, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -21.653125762939453, |
| "rewards/margins": 19.787500381469727, |
| "rewards/rejected": -41.45000076293945, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.359066427289049, |
| "grad_norm": 0.5386459231376648, |
| "learning_rate": 0.00011301827754976498, |
| "logits/chosen": -5.6015625, |
| "logits/rejected": -6.984375, |
| "logps/chosen": -302.29998779296875, |
| "logps/rejected": -557.7000122070312, |
| "loss": 0.0153, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -23.518749237060547, |
| "rewards/margins": 24.784374237060547, |
| "rewards/rejected": -48.29999923706055, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.3949730700179535, |
| "grad_norm": 4.88869571685791, |
| "learning_rate": 0.00011300369307943137, |
| "logits/chosen": -6.215624809265137, |
| "logits/rejected": -7.949999809265137, |
| "logps/chosen": -270.1000061035156, |
| "logps/rejected": -572.2999877929688, |
| "loss": 0.0224, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -19.809375762939453, |
| "rewards/margins": 29.512500762939453, |
| "rewards/rejected": -49.3125, |
| "step": 615 |
| }, |
| { |
| "epoch": 4.430879712746858, |
| "grad_norm": 2.2598838806152344, |
| "learning_rate": 0.00011298896731605649, |
| "logits/chosen": -3.9453125, |
| "logits/rejected": -5.546875, |
| "logps/chosen": -203.35000610351562, |
| "logps/rejected": -407.79998779296875, |
| "loss": 0.0373, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -13.240625381469727, |
| "rewards/margins": 20.003124237060547, |
| "rewards/rejected": -33.26250076293945, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.466786355475763, |
| "grad_norm": 0.47177407145500183, |
| "learning_rate": 0.00011297410029671247, |
| "logits/chosen": -3.74609375, |
| "logits/rejected": -5.087500095367432, |
| "logps/chosen": -281.70001220703125, |
| "logps/rejected": -448.79998779296875, |
| "loss": 0.04, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -21.34375, |
| "rewards/margins": 16.043750762939453, |
| "rewards/rejected": -37.42499923706055, |
| "step": 625 |
| }, |
| { |
| "epoch": 4.502692998204668, |
| "grad_norm": 0.2046031653881073, |
| "learning_rate": 0.00011295909205882698, |
| "logits/chosen": -1.002783179283142, |
| "logits/rejected": -3.4195313453674316, |
| "logps/chosen": -189.8249969482422, |
| "logps/rejected": -390.1000061035156, |
| "loss": 0.0272, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -11.790624618530273, |
| "rewards/margins": 19.309375762939453, |
| "rewards/rejected": -31.09375, |
| "step": 630 |
| }, |
| { |
| "epoch": 4.5385996409335725, |
| "grad_norm": 0.29450154304504395, |
| "learning_rate": 0.00011294394264018326, |
| "logits/chosen": -2.082812547683716, |
| "logits/rejected": -4.514062404632568, |
| "logps/chosen": -224.8000030517578, |
| "logps/rejected": -433.79998779296875, |
| "loss": 0.0158, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -15.709375381469727, |
| "rewards/margins": 19.975000381469727, |
| "rewards/rejected": -35.70000076293945, |
| "step": 635 |
| }, |
| { |
| "epoch": 4.574506283662478, |
| "grad_norm": 0.15016689896583557, |
| "learning_rate": 0.00011292865207891994, |
| "logits/chosen": -3.026562452316284, |
| "logits/rejected": -5.426562309265137, |
| "logps/chosen": -247.1999969482422, |
| "logps/rejected": -477.1000061035156, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -17.771875381469727, |
| "rewards/margins": 22.262500762939453, |
| "rewards/rejected": -40.025001525878906, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.6104129263913824, |
| "grad_norm": 3.1011385917663574, |
| "learning_rate": 0.00011291322041353101, |
| "logits/chosen": -3.1812500953674316, |
| "logits/rejected": -5.546875, |
| "logps/chosen": -231.9499969482422, |
| "logps/rejected": -464.5, |
| "loss": 0.0109, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -16.149999618530273, |
| "rewards/margins": 22.75, |
| "rewards/rejected": -38.912498474121094, |
| "step": 645 |
| }, |
| { |
| "epoch": 4.646319569120287, |
| "grad_norm": 0.06376684457063675, |
| "learning_rate": 0.00011289764768286565, |
| "logits/chosen": -4.444531440734863, |
| "logits/rejected": -6.487500190734863, |
| "logps/chosen": -285.45001220703125, |
| "logps/rejected": -519.7999877929688, |
| "loss": 0.0149, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -21.674999237060547, |
| "rewards/margins": 22.8125, |
| "rewards/rejected": -44.525001525878906, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.646319569120287, |
| "eval_logits/chosen": -4.110294342041016, |
| "eval_logits/rejected": -6.588235378265381, |
| "eval_logps/chosen": -257.6470642089844, |
| "eval_logps/rejected": -516.5882568359375, |
| "eval_loss": 0.029071472585201263, |
| "eval_rewards/accuracies": 0.9889705777168274, |
| "eval_rewards/chosen": -18.647058486938477, |
| "eval_rewards/margins": 25.28676414489746, |
| "eval_rewards/rejected": -43.94117736816406, |
| "eval_runtime": 8.8159, |
| "eval_samples_per_second": 30.286, |
| "eval_steps_per_second": 1.928, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.682226211849192, |
| "grad_norm": 3.26318621635437, |
| "learning_rate": 0.00011288193392612822, |
| "logits/chosen": -3.928906202316284, |
| "logits/rejected": -6.315625190734863, |
| "logps/chosen": -248.64999389648438, |
| "logps/rejected": -503.79998779296875, |
| "loss": 0.056, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -18.081249237060547, |
| "rewards/margins": 24.456249237060547, |
| "rewards/rejected": -42.537498474121094, |
| "step": 655 |
| }, |
| { |
| "epoch": 4.718132854578097, |
| "grad_norm": 0.18917639553546906, |
| "learning_rate": 0.00011286607918287803, |
| "logits/chosen": -2.7222657203674316, |
| "logits/rejected": -5.206250190734863, |
| "logps/chosen": -258.1499938964844, |
| "logps/rejected": -481.79998779296875, |
| "loss": 0.0583, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -19.225000381469727, |
| "rewards/margins": 21.606250762939453, |
| "rewards/rejected": -40.849998474121094, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.7540394973070015, |
| "grad_norm": 0.9138699173927307, |
| "learning_rate": 0.00011285008349302943, |
| "logits/chosen": -2.116406202316284, |
| "logits/rejected": -4.259375095367432, |
| "logps/chosen": -294.20001220703125, |
| "logps/rejected": -505.20001220703125, |
| "loss": 0.0192, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -23.012500762939453, |
| "rewards/margins": 20.296875, |
| "rewards/rejected": -43.337501525878906, |
| "step": 665 |
| }, |
| { |
| "epoch": 4.789946140035907, |
| "grad_norm": 0.5865360498428345, |
| "learning_rate": 0.00011283394689685153, |
| "logits/chosen": -3.46875, |
| "logits/rejected": -5.800000190734863, |
| "logps/chosen": -292.0, |
| "logps/rejected": -535.2999877929688, |
| "loss": 0.0238, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -22.268749237060547, |
| "rewards/margins": 23.831249237060547, |
| "rewards/rejected": -46.04999923706055, |
| "step": 670 |
| }, |
| { |
| "epoch": 4.825852782764811, |
| "grad_norm": 1.3726475238800049, |
| "learning_rate": 0.0001128176694349682, |
| "logits/chosen": -3.3148436546325684, |
| "logits/rejected": -5.324999809265137, |
| "logps/chosen": -286.29998779296875, |
| "logps/rejected": -542.9000244140625, |
| "loss": 0.0127, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -22.024999618530273, |
| "rewards/margins": 24.868749618530273, |
| "rewards/rejected": -46.912498474121094, |
| "step": 675 |
| }, |
| { |
| "epoch": 4.861759425493716, |
| "grad_norm": 10.441712379455566, |
| "learning_rate": 0.00011280125114835791, |
| "logits/chosen": -2.067578077316284, |
| "logits/rejected": -4.528124809265137, |
| "logps/chosen": -216.75, |
| "logps/rejected": -488.70001220703125, |
| "loss": 0.0218, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -15.034375190734863, |
| "rewards/margins": 26.6875, |
| "rewards/rejected": -41.712501525878906, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.897666068222621, |
| "grad_norm": 0.9471856355667114, |
| "learning_rate": 0.00011278469207835369, |
| "logits/chosen": -0.960888683795929, |
| "logits/rejected": -3.39453125, |
| "logps/chosen": -179.25, |
| "logps/rejected": -433.1000061035156, |
| "loss": 0.0337, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -11.198437690734863, |
| "rewards/margins": 24.59375, |
| "rewards/rejected": -35.787498474121094, |
| "step": 685 |
| }, |
| { |
| "epoch": 4.933572710951526, |
| "grad_norm": 4.058782577514648, |
| "learning_rate": 0.000112767992266643, |
| "logits/chosen": -2.7562499046325684, |
| "logits/rejected": -4.982031345367432, |
| "logps/chosen": -250.60000610351562, |
| "logps/rejected": -498.20001220703125, |
| "loss": 0.0548, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -18.268749237060547, |
| "rewards/margins": 23.912500381469727, |
| "rewards/rejected": -42.1875, |
| "step": 690 |
| }, |
| { |
| "epoch": 4.9694793536804305, |
| "grad_norm": 5.3591084480285645, |
| "learning_rate": 0.00011275115175526756, |
| "logits/chosen": -3.2984375953674316, |
| "logits/rejected": -5.337500095367432, |
| "logps/chosen": -273.29998779296875, |
| "logps/rejected": -526.4000244140625, |
| "loss": 0.0574, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -20.40625, |
| "rewards/margins": 24.493749618530273, |
| "rewards/rejected": -44.900001525878906, |
| "step": 695 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 3.2741596698760986, |
| "learning_rate": 0.00011273417058662334, |
| "logits/chosen": -1.62890625, |
| "logits/rejected": -3.163602828979492, |
| "logps/chosen": -178.4705810546875, |
| "logps/rejected": -387.76470947265625, |
| "loss": 0.0274, |
| "rewards/accuracies": 0.9852941036224365, |
| "rewards/chosen": -10.939338684082031, |
| "rewards/margins": 20.0, |
| "rewards/rejected": -30.941177368164062, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_logits/chosen": -1.3389246463775635, |
| "eval_logits/rejected": -2.8189337253570557, |
| "eval_logps/chosen": -160.88235473632812, |
| "eval_logps/rejected": -377.29412841796875, |
| "eval_loss": 0.009149392135441303, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -8.79411792755127, |
| "eval_rewards/margins": 20.941177368164062, |
| "eval_rewards/rejected": -29.757352828979492, |
| "eval_runtime": 8.8399, |
| "eval_samples_per_second": 30.204, |
| "eval_steps_per_second": 1.923, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.0359066427289045, |
| "grad_norm": 0.25538453459739685, |
| "learning_rate": 0.00011271704880346044, |
| "logits/chosen": -1.1785156726837158, |
| "logits/rejected": -2.3316407203674316, |
| "logps/chosen": -155.8000030517578, |
| "logps/rejected": -362.29998779296875, |
| "loss": 0.0164, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -8.415624618530273, |
| "rewards/margins": 20.053125381469727, |
| "rewards/rejected": -28.475000381469727, |
| "step": 705 |
| }, |
| { |
| "epoch": 5.07181328545781, |
| "grad_norm": 0.008221164345741272, |
| "learning_rate": 0.0001126997864488829, |
| "logits/chosen": -1.918554663658142, |
| "logits/rejected": -2.684375047683716, |
| "logps/chosen": -154.64999389648438, |
| "logps/rejected": -376.79998779296875, |
| "loss": 0.0105, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -8.796875, |
| "rewards/margins": 21.412500381469727, |
| "rewards/rejected": -30.21875, |
| "step": 710 |
| }, |
| { |
| "epoch": 5.1077199281867145, |
| "grad_norm": 0.4989652931690216, |
| "learning_rate": 0.0001126823835663487, |
| "logits/chosen": -3.301562547683716, |
| "logits/rejected": -4.116406440734863, |
| "logps/chosen": -197.75, |
| "logps/rejected": -458.0, |
| "loss": 0.0212, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -12.659375190734863, |
| "rewards/margins": 25.631250381469727, |
| "rewards/rejected": -38.287498474121094, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.143626570915619, |
| "grad_norm": 0.927310585975647, |
| "learning_rate": 0.00011266484019966958, |
| "logits/chosen": -4.263281345367432, |
| "logits/rejected": -4.912499904632568, |
| "logps/chosen": -235.35000610351562, |
| "logps/rejected": -507.5, |
| "loss": 0.0091, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -16.387500762939453, |
| "rewards/margins": 26.818750381469727, |
| "rewards/rejected": -43.224998474121094, |
| "step": 720 |
| }, |
| { |
| "epoch": 5.1795332136445245, |
| "grad_norm": 0.03083197772502899, |
| "learning_rate": 0.00011264715639301091, |
| "logits/chosen": -4.6484375, |
| "logits/rejected": -5.534375190734863, |
| "logps/chosen": -255.85000610351562, |
| "logps/rejected": -538.7000122070312, |
| "loss": 0.0437, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -18.915624618530273, |
| "rewards/margins": 27.412500381469727, |
| "rewards/rejected": -46.349998474121094, |
| "step": 725 |
| }, |
| { |
| "epoch": 5.215439856373429, |
| "grad_norm": 0.13269655406475067, |
| "learning_rate": 0.00011262933219089168, |
| "logits/chosen": -4.479687690734863, |
| "logits/rejected": -5.400000095367432, |
| "logps/chosen": -285.70001220703125, |
| "logps/rejected": -533.5999755859375, |
| "loss": 0.0194, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -21.600000381469727, |
| "rewards/margins": 24.21875, |
| "rewards/rejected": -45.837501525878906, |
| "step": 730 |
| }, |
| { |
| "epoch": 5.2513464991023335, |
| "grad_norm": 3.4042835235595703, |
| "learning_rate": 0.0001126113676381843, |
| "logits/chosen": -3.9671874046325684, |
| "logits/rejected": -5.073437690734863, |
| "logps/chosen": -251.8000030517578, |
| "logps/rejected": -491.6000061035156, |
| "loss": 0.0386, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -18.399999618530273, |
| "rewards/margins": 23.25, |
| "rewards/rejected": -41.650001525878906, |
| "step": 735 |
| }, |
| { |
| "epoch": 5.287253141831239, |
| "grad_norm": 0.5153644680976868, |
| "learning_rate": 0.00011259326278011449, |
| "logits/chosen": -4.012499809265137, |
| "logits/rejected": -5.053124904632568, |
| "logps/chosen": -254.85000610351562, |
| "logps/rejected": -454.6000061035156, |
| "loss": 0.0352, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -18.446874618530273, |
| "rewards/margins": 19.393749237060547, |
| "rewards/rejected": -37.849998474121094, |
| "step": 740 |
| }, |
| { |
| "epoch": 5.3231597845601435, |
| "grad_norm": 0.4011136591434479, |
| "learning_rate": 0.00011257501766226122, |
| "logits/chosen": -4.405468940734863, |
| "logits/rejected": -5.582812309265137, |
| "logps/chosen": -254.9499969482422, |
| "logps/rejected": -468.0, |
| "loss": 0.023, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -18.615625381469727, |
| "rewards/margins": 20.684375762939453, |
| "rewards/rejected": -39.25, |
| "step": 745 |
| }, |
| { |
| "epoch": 5.359066427289049, |
| "grad_norm": 0.0850766971707344, |
| "learning_rate": 0.00011255663233055655, |
| "logits/chosen": -3.621875047683716, |
| "logits/rejected": -5.439062595367432, |
| "logps/chosen": -175.75, |
| "logps/rejected": -402.1000061035156, |
| "loss": 0.0341, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -10.673437118530273, |
| "rewards/margins": 22.149999618530273, |
| "rewards/rejected": -32.837501525878906, |
| "step": 750 |
| }, |
| { |
| "epoch": 5.359066427289049, |
| "eval_logits/chosen": -2.5533087253570557, |
| "eval_logits/rejected": -4.464154243469238, |
| "eval_logps/chosen": -134.6764678955078, |
| "eval_logps/rejected": -359.8823547363281, |
| "eval_loss": 0.008504279889166355, |
| "eval_rewards/accuracies": 0.9963235259056091, |
| "eval_rewards/chosen": -6.143382549285889, |
| "eval_rewards/margins": 21.83823585510254, |
| "eval_rewards/rejected": -27.977941513061523, |
| "eval_runtime": 8.5224, |
| "eval_samples_per_second": 31.329, |
| "eval_steps_per_second": 1.995, |
| "step": 750 |
| }, |
| { |
| "epoch": 5.3949730700179535, |
| "grad_norm": 0.8295687437057495, |
| "learning_rate": 0.00011253810683128554, |
| "logits/chosen": -3.987499952316284, |
| "logits/rejected": -6.296875, |
| "logps/chosen": -207.02499389648438, |
| "logps/rejected": -479.1000061035156, |
| "loss": 0.0617, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -13.651562690734863, |
| "rewards/margins": 26.793750762939453, |
| "rewards/rejected": -40.443748474121094, |
| "step": 755 |
| }, |
| { |
| "epoch": 5.430879712746858, |
| "grad_norm": 0.09694784134626389, |
| "learning_rate": 0.0001125194412110861, |
| "logits/chosen": -5.464062690734863, |
| "logits/rejected": -7.379687309265137, |
| "logps/chosen": -310.6000061035156, |
| "logps/rejected": -564.5999755859375, |
| "loss": 0.0176, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -24.225000381469727, |
| "rewards/margins": 24.837499618530273, |
| "rewards/rejected": -49.025001525878906, |
| "step": 760 |
| }, |
| { |
| "epoch": 5.466786355475763, |
| "grad_norm": 0.035386599600315094, |
| "learning_rate": 0.00011250063551694892, |
| "logits/chosen": -5.551562309265137, |
| "logits/rejected": -7.342187404632568, |
| "logps/chosen": -306.95001220703125, |
| "logps/rejected": -559.7000122070312, |
| "loss": 0.011, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -23.700000762939453, |
| "rewards/margins": 24.424999237060547, |
| "rewards/rejected": -48.13750076293945, |
| "step": 765 |
| }, |
| { |
| "epoch": 5.502692998204668, |
| "grad_norm": 0.7666543126106262, |
| "learning_rate": 0.00011248168979621728, |
| "logits/chosen": -3.92578125, |
| "logits/rejected": -5.90625, |
| "logps/chosen": -260.75, |
| "logps/rejected": -506.8999938964844, |
| "loss": 0.0214, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -19.481250762939453, |
| "rewards/margins": 23.65625, |
| "rewards/rejected": -43.150001525878906, |
| "step": 770 |
| }, |
| { |
| "epoch": 5.5385996409335725, |
| "grad_norm": 0.0009977294830605388, |
| "learning_rate": 0.00011246260409658705, |
| "logits/chosen": -4.388281345367432, |
| "logits/rejected": -7.181250095367432, |
| "logps/chosen": -261.20001220703125, |
| "logps/rejected": -591.7000122070312, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -19.140625, |
| "rewards/margins": 32.525001525878906, |
| "rewards/rejected": -51.724998474121094, |
| "step": 775 |
| }, |
| { |
| "epoch": 5.574506283662478, |
| "grad_norm": 0.01044029463082552, |
| "learning_rate": 0.00011244337846610643, |
| "logits/chosen": -4.952343940734863, |
| "logits/rejected": -7.689062595367432, |
| "logps/chosen": -266.25, |
| "logps/rejected": -594.2999877929688, |
| "loss": 0.0375, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -19.524999618530273, |
| "rewards/margins": 32.75, |
| "rewards/rejected": -52.26250076293945, |
| "step": 780 |
| }, |
| { |
| "epoch": 5.6104129263913824, |
| "grad_norm": 0.5297859907150269, |
| "learning_rate": 0.00011242401295317595, |
| "logits/chosen": -4.126562595367432, |
| "logits/rejected": -6.842187404632568, |
| "logps/chosen": -225.5, |
| "logps/rejected": -552.0, |
| "loss": 0.0101, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -15.793749809265137, |
| "rewards/margins": 32.14374923706055, |
| "rewards/rejected": -47.912498474121094, |
| "step": 785 |
| }, |
| { |
| "epoch": 5.646319569120287, |
| "grad_norm": 5.579909801483154, |
| "learning_rate": 0.00011240450760654824, |
| "logits/chosen": -4.430468559265137, |
| "logits/rejected": -7.059374809265137, |
| "logps/chosen": -252.64999389648438, |
| "logps/rejected": -567.7000122070312, |
| "loss": 0.085, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -18.368749618530273, |
| "rewards/margins": 31.174999237060547, |
| "rewards/rejected": -49.537498474121094, |
| "step": 790 |
| }, |
| { |
| "epoch": 5.682226211849192, |
| "grad_norm": 3.711723566055298, |
| "learning_rate": 0.000112384862475328, |
| "logits/chosen": -4.119531154632568, |
| "logits/rejected": -6.137499809265137, |
| "logps/chosen": -228.8000030517578, |
| "logps/rejected": -467.29998779296875, |
| "loss": 0.0374, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -16.090625762939453, |
| "rewards/margins": 23.287500381469727, |
| "rewards/rejected": -39.375, |
| "step": 795 |
| }, |
| { |
| "epoch": 5.718132854578097, |
| "grad_norm": 0.38980910181999207, |
| "learning_rate": 0.00011236507760897182, |
| "logits/chosen": -5.25, |
| "logits/rejected": -7.279687404632568, |
| "logps/chosen": -253.60000610351562, |
| "logps/rejected": -505.0, |
| "loss": 0.0149, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -18.53125, |
| "rewards/margins": 24.393749237060547, |
| "rewards/rejected": -42.9375, |
| "step": 800 |
| }, |
| { |
| "epoch": 5.718132854578097, |
| "eval_logits/chosen": -5.795955657958984, |
| "eval_logits/rejected": -7.886029243469238, |
| "eval_logps/chosen": -267.8823547363281, |
| "eval_logps/rejected": -518.4705810546875, |
| "eval_loss": 0.0229768268764019, |
| "eval_rewards/accuracies": 0.9799466133117676, |
| "eval_rewards/chosen": -19.705883026123047, |
| "eval_rewards/margins": 24.47058868408203, |
| "eval_rewards/rejected": -44.132354736328125, |
| "eval_runtime": 8.5466, |
| "eval_samples_per_second": 31.241, |
| "eval_steps_per_second": 1.989, |
| "step": 800 |
| }, |
| { |
| "epoch": 5.7540394973070015, |
| "grad_norm": 0.08777919411659241, |
| "learning_rate": 0.00011234515305728806, |
| "logits/chosen": -6.481249809265137, |
| "logits/rejected": -8.475000381469727, |
| "logps/chosen": -288.45001220703125, |
| "logps/rejected": -538.9000244140625, |
| "loss": 0.0167, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -22.262500762939453, |
| "rewards/margins": 24.28125, |
| "rewards/rejected": -46.5625, |
| "step": 805 |
| }, |
| { |
| "epoch": 5.789946140035907, |
| "grad_norm": 0.591503918170929, |
| "learning_rate": 0.00011232508887043676, |
| "logits/chosen": -6.189062595367432, |
| "logits/rejected": -7.951562404632568, |
| "logps/chosen": -279.8999938964844, |
| "logps/rejected": -496.5, |
| "loss": 0.0469, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -21.21875, |
| "rewards/margins": 20.728124618530273, |
| "rewards/rejected": -41.98749923706055, |
| "step": 810 |
| }, |
| { |
| "epoch": 5.825852782764811, |
| "grad_norm": 0.4492699205875397, |
| "learning_rate": 0.00011230488509892953, |
| "logits/chosen": -4.164843559265137, |
| "logits/rejected": -6.059374809265137, |
| "logps/chosen": -243.3000030517578, |
| "logps/rejected": -473.20001220703125, |
| "loss": 0.0103, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -17.274999618530273, |
| "rewards/margins": 22.53125, |
| "rewards/rejected": -39.82500076293945, |
| "step": 815 |
| }, |
| { |
| "epoch": 5.861759425493716, |
| "grad_norm": 3.889610767364502, |
| "learning_rate": 0.0001122845417936293, |
| "logits/chosen": -5.235937595367432, |
| "logits/rejected": -7.465624809265137, |
| "logps/chosen": -300.8999938964844, |
| "logps/rejected": -565.2999877929688, |
| "loss": 0.0517, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -23.375, |
| "rewards/margins": 25.899999618530273, |
| "rewards/rejected": -49.3125, |
| "step": 820 |
| }, |
| { |
| "epoch": 5.897666068222621, |
| "grad_norm": 7.49802303314209, |
| "learning_rate": 0.00011226405900575031, |
| "logits/chosen": -5.620312690734863, |
| "logits/rejected": -7.892187595367432, |
| "logps/chosen": -335.0, |
| "logps/rejected": -602.4000244140625, |
| "loss": 0.0668, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -26.631250381469727, |
| "rewards/margins": 26.475000381469727, |
| "rewards/rejected": -53.099998474121094, |
| "step": 825 |
| }, |
| { |
| "epoch": 5.933572710951526, |
| "grad_norm": 2.100908041000366, |
| "learning_rate": 0.00011224343678685797, |
| "logits/chosen": -6.767187595367432, |
| "logits/rejected": -8.921875, |
| "logps/chosen": -385.0, |
| "logps/rejected": -648.7999877929688, |
| "loss": 0.0552, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -31.762500762939453, |
| "rewards/margins": 25.825000762939453, |
| "rewards/rejected": -57.625, |
| "step": 830 |
| }, |
| { |
| "epoch": 5.9694793536804305, |
| "grad_norm": 0.25847867131233215, |
| "learning_rate": 0.00011222267518886872, |
| "logits/chosen": -6.832812309265137, |
| "logits/rejected": -8.893750190734863, |
| "logps/chosen": -343.79998779296875, |
| "logps/rejected": -640.9000244140625, |
| "loss": 0.0192, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -27.587499618530273, |
| "rewards/margins": 29.162500381469727, |
| "rewards/rejected": -56.724998474121094, |
| "step": 835 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.0401495099067688, |
| "learning_rate": 0.00011220177426404981, |
| "logits/chosen": -5.762867450714111, |
| "logits/rejected": -8.838234901428223, |
| "logps/chosen": -296.70587158203125, |
| "logps/rejected": -677.6470336914062, |
| "loss": 0.0087, |
| "rewards/accuracies": 0.9963235259056091, |
| "rewards/chosen": -22.544116973876953, |
| "rewards/margins": 37.463233947753906, |
| "rewards/rejected": -60.014705657958984, |
| "step": 840 |
| }, |
| { |
| "epoch": 6.0359066427289045, |
| "grad_norm": 0.012815682217478752, |
| "learning_rate": 0.00011218073406501931, |
| "logits/chosen": -6.901562690734863, |
| "logits/rejected": -9.628125190734863, |
| "logps/chosen": -326.0, |
| "logps/rejected": -691.5999755859375, |
| "loss": 0.0166, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -25.706249237060547, |
| "rewards/margins": 36.09375, |
| "rewards/rejected": -61.79999923706055, |
| "step": 845 |
| }, |
| { |
| "epoch": 6.07181328545781, |
| "grad_norm": 0.030361467972397804, |
| "learning_rate": 0.0001121595546447459, |
| "logits/chosen": -6.171875, |
| "logits/rejected": -8.364062309265137, |
| "logps/chosen": -288.5, |
| "logps/rejected": -570.2999877929688, |
| "loss": 0.0259, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -22.3125, |
| "rewards/margins": 27.575000762939453, |
| "rewards/rejected": -49.837501525878906, |
| "step": 850 |
| }, |
| { |
| "epoch": 6.07181328545781, |
| "eval_logits/chosen": -4.895220756530762, |
| "eval_logits/rejected": -7.121323585510254, |
| "eval_logps/chosen": -238.05882263183594, |
| "eval_logps/rejected": -518.5882568359375, |
| "eval_loss": 0.007082384079694748, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -16.643383026123047, |
| "eval_rewards/margins": 27.455883026123047, |
| "eval_rewards/rejected": -44.10293960571289, |
| "eval_runtime": 8.5014, |
| "eval_samples_per_second": 31.406, |
| "eval_steps_per_second": 2.0, |
| "step": 850 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 72, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0001 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|