| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 938, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010666666666666666, |
| "grad_norm": 46.42798146527443, |
| "learning_rate": 9.574468085106382e-08, |
| "logits/chosen": 2.382258415222168, |
| "logits/rejected": 2.7442336082458496, |
| "logps/chosen": -138.31182861328125, |
| "logps/rejected": -147.63272094726562, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.4281249940395355, |
| "rewards/chosen": -0.0015843239380046725, |
| "rewards/margins": 3.248453140258789e-06, |
| "rewards/rejected": -0.0015875725075602531, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021333333333333333, |
| "grad_norm": 48.61255110254263, |
| "learning_rate": 2.0212765957446807e-07, |
| "logits/chosen": 2.313032627105713, |
| "logits/rejected": 2.844193458557129, |
| "logps/chosen": -138.99807739257812, |
| "logps/rejected": -135.5998992919922, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.008929151110351086, |
| "rewards/margins": 0.0014812585432082415, |
| "rewards/rejected": -0.010410408489406109, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 41.089142727664616, |
| "learning_rate": 3.085106382978723e-07, |
| "logits/chosen": 2.291680097579956, |
| "logits/rejected": 2.7390356063842773, |
| "logps/chosen": -126.77516174316406, |
| "logps/rejected": -131.89663696289062, |
| "loss": 0.6804, |
| "rewards/accuracies": 0.6312499642372131, |
| "rewards/chosen": -0.013881472870707512, |
| "rewards/margins": 0.028002463281154633, |
| "rewards/rejected": -0.041883938014507294, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.042666666666666665, |
| "grad_norm": 45.92788214228633, |
| "learning_rate": 4.148936170212766e-07, |
| "logits/chosen": 2.523430585861206, |
| "logits/rejected": 2.84600567817688, |
| "logps/chosen": -134.11170959472656, |
| "logps/rejected": -140.52801513671875, |
| "loss": 0.6492, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.009366204962134361, |
| "rewards/margins": 0.1010168194770813, |
| "rewards/rejected": -0.11038301885128021, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 39.55493674378011, |
| "learning_rate": 5.212765957446809e-07, |
| "logits/chosen": 2.463056802749634, |
| "logits/rejected": 3.003122329711914, |
| "logps/chosen": -134.13511657714844, |
| "logps/rejected": -153.1929473876953, |
| "loss": 0.6023, |
| "rewards/accuracies": 0.7468750476837158, |
| "rewards/chosen": -0.04466788098216057, |
| "rewards/margins": 0.24204480648040771, |
| "rewards/rejected": -0.2867127060890198, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 30.3497904000491, |
| "learning_rate": 6.276595744680851e-07, |
| "logits/chosen": 2.2184526920318604, |
| "logits/rejected": 2.7464191913604736, |
| "logps/chosen": -125.64830017089844, |
| "logps/rejected": -129.73838806152344, |
| "loss": 0.5494, |
| "rewards/accuracies": 0.7531250715255737, |
| "rewards/chosen": 0.06083741411566734, |
| "rewards/margins": 0.429790735244751, |
| "rewards/rejected": -0.3689533472061157, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07466666666666667, |
| "grad_norm": 38.90457231803085, |
| "learning_rate": 7.340425531914893e-07, |
| "logits/chosen": 2.197575569152832, |
| "logits/rejected": 2.578211784362793, |
| "logps/chosen": -135.662353515625, |
| "logps/rejected": -136.03961181640625, |
| "loss": 0.5285, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": 0.29434823989868164, |
| "rewards/margins": 0.5787851810455322, |
| "rewards/rejected": -0.28443700075149536, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08533333333333333, |
| "grad_norm": 43.610746377169825, |
| "learning_rate": 8.404255319148936e-07, |
| "logits/chosen": 2.2334251403808594, |
| "logits/rejected": 2.8087658882141113, |
| "logps/chosen": -132.78274536132812, |
| "logps/rejected": -142.40440368652344, |
| "loss": 0.4869, |
| "rewards/accuracies": 0.7718750834465027, |
| "rewards/chosen": 0.16994354128837585, |
| "rewards/margins": 0.8923333287239075, |
| "rewards/rejected": -0.722389817237854, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 26.982449978848148, |
| "learning_rate": 9.468085106382978e-07, |
| "logits/chosen": 2.2855160236358643, |
| "logits/rejected": 2.6503257751464844, |
| "logps/chosen": -136.7552947998047, |
| "logps/rejected": -148.2338104248047, |
| "loss": 0.4819, |
| "rewards/accuracies": 0.778124988079071, |
| "rewards/chosen": -0.1698712259531021, |
| "rewards/margins": 1.1215158700942993, |
| "rewards/rejected": -1.2913870811462402, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 29.896207171331433, |
| "learning_rate": 9.999134070902206e-07, |
| "logits/chosen": 2.382267951965332, |
| "logits/rejected": 2.517061948776245, |
| "logps/chosen": -126.52420806884766, |
| "logps/rejected": -158.33543395996094, |
| "loss": 0.4183, |
| "rewards/accuracies": 0.778124988079071, |
| "rewards/chosen": -0.3784619867801666, |
| "rewards/margins": 1.4470703601837158, |
| "rewards/rejected": -1.82553231716156, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11733333333333333, |
| "grad_norm": 22.595589720258207, |
| "learning_rate": 9.99220843761565e-07, |
| "logits/chosen": 2.2189204692840576, |
| "logits/rejected": 2.7971031665802, |
| "logps/chosen": -145.4073486328125, |
| "logps/rejected": -148.38609313964844, |
| "loss": 0.3777, |
| "rewards/accuracies": 0.8343750238418579, |
| "rewards/chosen": -0.6497014760971069, |
| "rewards/margins": 1.5745065212249756, |
| "rewards/rejected": -2.224207878112793, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 30.721800605452906, |
| "learning_rate": 9.97836676558346e-07, |
| "logits/chosen": 2.174267292022705, |
| "logits/rejected": 2.853877544403076, |
| "logps/chosen": -142.8085479736328, |
| "logps/rejected": -149.57443237304688, |
| "loss": 0.4048, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.5126703381538391, |
| "rewards/margins": 1.7328587770462036, |
| "rewards/rejected": -2.2455291748046875, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13866666666666666, |
| "grad_norm": 34.136357440276, |
| "learning_rate": 9.957628230595525e-07, |
| "logits/chosen": 2.1472320556640625, |
| "logits/rejected": 2.640443801879883, |
| "logps/chosen": -141.8199005126953, |
| "logps/rejected": -155.64419555664062, |
| "loss": 0.4673, |
| "rewards/accuracies": 0.746874988079071, |
| "rewards/chosen": -0.25329455733299255, |
| "rewards/margins": 1.7003322839736938, |
| "rewards/rejected": -1.9536267518997192, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14933333333333335, |
| "grad_norm": 42.43483647388403, |
| "learning_rate": 9.9300215631252e-07, |
| "logits/chosen": 2.1069414615631104, |
| "logits/rejected": 2.291423797607422, |
| "logps/chosen": -125.19463348388672, |
| "logps/rejected": -158.3802032470703, |
| "loss": 0.3758, |
| "rewards/accuracies": 0.8437499403953552, |
| "rewards/chosen": 0.07672759145498276, |
| "rewards/margins": 2.0006155967712402, |
| "rewards/rejected": -1.9238877296447754, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 36.02384513576028, |
| "learning_rate": 9.895585008527075e-07, |
| "logits/chosen": 2.3169870376586914, |
| "logits/rejected": 2.500821590423584, |
| "logps/chosen": -138.5598907470703, |
| "logps/rejected": -168.0284881591797, |
| "loss": 0.4251, |
| "rewards/accuracies": 0.8218750953674316, |
| "rewards/chosen": -0.4848916530609131, |
| "rewards/margins": 2.033202886581421, |
| "rewards/rejected": -2.518094301223755, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17066666666666666, |
| "grad_norm": 15.042416822714568, |
| "learning_rate": 9.854366274053124e-07, |
| "logits/chosen": 2.0532939434051514, |
| "logits/rejected": 2.4741499423980713, |
| "logps/chosen": -135.32594299316406, |
| "logps/rejected": -155.9827880859375, |
| "loss": 0.3675, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -0.19416652619838715, |
| "rewards/margins": 2.0035641193389893, |
| "rewards/rejected": -2.197730541229248, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18133333333333335, |
| "grad_norm": 29.38433081829733, |
| "learning_rate": 9.806422462760687e-07, |
| "logits/chosen": 2.0831096172332764, |
| "logits/rejected": 2.4652578830718994, |
| "logps/chosen": -139.71656799316406, |
| "logps/rejected": -160.39244079589844, |
| "loss": 0.4298, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -0.6061689853668213, |
| "rewards/margins": 2.0064656734466553, |
| "rewards/rejected": -2.6126346588134766, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 30.944868310764818, |
| "learning_rate": 9.7518199944038e-07, |
| "logits/chosen": 2.0607786178588867, |
| "logits/rejected": 2.363945245742798, |
| "logps/chosen": -136.1494598388672, |
| "logps/rejected": -157.36947631835938, |
| "loss": 0.4231, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.39128583669662476, |
| "rewards/margins": 1.969771385192871, |
| "rewards/rejected": -2.3610572814941406, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20266666666666666, |
| "grad_norm": 42.05053142859476, |
| "learning_rate": 9.690634513417486e-07, |
| "logits/chosen": 2.169426918029785, |
| "logits/rejected": 2.328373670578003, |
| "logps/chosen": -135.956298828125, |
| "logps/rejected": -170.6649169921875, |
| "loss": 0.3561, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.7195563912391663, |
| "rewards/margins": 2.492656946182251, |
| "rewards/rejected": -3.2122128009796143, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 35.520349085403645, |
| "learning_rate": 9.622950784122471e-07, |
| "logits/chosen": 2.020885944366455, |
| "logits/rejected": 2.4902377128601074, |
| "logps/chosen": -139.86936950683594, |
| "logps/rejected": -165.3739013671875, |
| "loss": 0.3948, |
| "rewards/accuracies": 0.8093750476837158, |
| "rewards/chosen": -0.8580716252326965, |
| "rewards/margins": 2.244415283203125, |
| "rewards/rejected": -3.1024868488311768, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 30.914393418212303, |
| "learning_rate": 9.54886257329555e-07, |
| "logits/chosen": 2.032724380493164, |
| "logits/rejected": 2.399937152862549, |
| "logps/chosen": -131.96702575683594, |
| "logps/rejected": -155.24063110351562, |
| "loss": 0.4144, |
| "rewards/accuracies": 0.8125000596046448, |
| "rewards/chosen": -0.2814589738845825, |
| "rewards/margins": 2.195185422897339, |
| "rewards/rejected": -2.476644515991211, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.23466666666666666, |
| "grad_norm": 33.70657801293605, |
| "learning_rate": 9.468472520268205e-07, |
| "logits/chosen": 2.0024030208587646, |
| "logits/rejected": 2.4853296279907227, |
| "logps/chosen": -137.35540771484375, |
| "logps/rejected": -155.17100524902344, |
| "loss": 0.38, |
| "rewards/accuracies": 0.8218749761581421, |
| "rewards/chosen": -0.004300939850509167, |
| "rewards/margins": 2.1314268112182617, |
| "rewards/rejected": -2.135727643966675, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24533333333333332, |
| "grad_norm": 37.495577601362356, |
| "learning_rate": 9.381891994733519e-07, |
| "logits/chosen": 2.243539810180664, |
| "logits/rejected": 2.352928400039673, |
| "logps/chosen": -129.24896240234375, |
| "logps/rejected": -169.98135375976562, |
| "loss": 0.3679, |
| "rewards/accuracies": 0.8218750357627869, |
| "rewards/chosen": -0.28722256422042847, |
| "rewards/margins": 2.4391627311706543, |
| "rewards/rejected": -2.7263851165771484, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 25.343951064869156, |
| "learning_rate": 9.289240942458321e-07, |
| "logits/chosen": 2.0728087425231934, |
| "logits/rejected": 2.4773340225219727, |
| "logps/chosen": -135.0269775390625, |
| "logps/rejected": -168.2124481201172, |
| "loss": 0.3123, |
| "rewards/accuracies": 0.846875011920929, |
| "rewards/chosen": -0.4651724398136139, |
| "rewards/margins": 2.803205966949463, |
| "rewards/rejected": -3.268378734588623, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 29.0620379587301, |
| "learning_rate": 9.190647719114326e-07, |
| "logits/chosen": 1.8383233547210693, |
| "logits/rejected": 2.2059688568115234, |
| "logps/chosen": -136.12245178222656, |
| "logps/rejected": -174.0221405029297, |
| "loss": 0.4021, |
| "rewards/accuracies": 0.8250000476837158, |
| "rewards/chosen": -0.4718845784664154, |
| "rewards/margins": 2.437692642211914, |
| "rewards/rejected": -2.9095773696899414, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2773333333333333, |
| "grad_norm": 32.206451154260584, |
| "learning_rate": 9.086248912458483e-07, |
| "logits/chosen": 1.901822805404663, |
| "logits/rejected": 2.3750545978546143, |
| "logps/chosen": -137.2100830078125, |
| "logps/rejected": -160.05950927734375, |
| "loss": 0.3368, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.45527350902557373, |
| "rewards/margins": 2.3181633949279785, |
| "rewards/rejected": -2.7734367847442627, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 22.28437598082458, |
| "learning_rate": 8.976189153108852e-07, |
| "logits/chosen": 1.8406670093536377, |
| "logits/rejected": 2.231459617614746, |
| "logps/chosen": -136.8876495361328, |
| "logps/rejected": -162.5147247314453, |
| "loss": 0.3545, |
| "rewards/accuracies": 0.8468750715255737, |
| "rewards/chosen": -0.5715955495834351, |
| "rewards/margins": 2.6194005012512207, |
| "rewards/rejected": -3.1909961700439453, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2986666666666667, |
| "grad_norm": 26.163035485727516, |
| "learning_rate": 8.860620914178187e-07, |
| "logits/chosen": 1.7282989025115967, |
| "logits/rejected": 2.090369462966919, |
| "logps/chosen": -133.57354736328125, |
| "logps/rejected": -169.84217834472656, |
| "loss": 0.3629, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.4477410912513733, |
| "rewards/margins": 2.430133581161499, |
| "rewards/rejected": -2.8778748512268066, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.30933333333333335, |
| "grad_norm": 27.544558000777418, |
| "learning_rate": 8.739704300042778e-07, |
| "logits/chosen": 1.6349306106567383, |
| "logits/rejected": 2.2942090034484863, |
| "logps/chosen": -137.38523864746094, |
| "logps/rejected": -158.7125244140625, |
| "loss": 0.3788, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.5446420907974243, |
| "rewards/margins": 2.5425944328308105, |
| "rewards/rejected": -3.0872364044189453, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 28.15835022856019, |
| "learning_rate": 8.613606824539197e-07, |
| "logits/chosen": 1.611955165863037, |
| "logits/rejected": 2.1477112770080566, |
| "logps/chosen": -137.20155334472656, |
| "logps/rejected": -163.80764770507812, |
| "loss": 0.3507, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.41365841031074524, |
| "rewards/margins": 2.4483883380889893, |
| "rewards/rejected": -2.8620471954345703, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.33066666666666666, |
| "grad_norm": 26.9394473330418, |
| "learning_rate": 8.482503178896226e-07, |
| "logits/chosen": 1.5950888395309448, |
| "logits/rejected": 1.8947237730026245, |
| "logps/chosen": -138.35462951660156, |
| "logps/rejected": -167.69569396972656, |
| "loss": 0.3696, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -0.8275109529495239, |
| "rewards/margins": 2.685105323791504, |
| "rewards/rejected": -3.5126163959503174, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3413333333333333, |
| "grad_norm": 23.332637225870037, |
| "learning_rate": 8.346574989723469e-07, |
| "logits/chosen": 1.7531938552856445, |
| "logits/rejected": 2.2487497329711914, |
| "logps/chosen": -143.16526794433594, |
| "logps/rejected": -166.20895385742188, |
| "loss": 0.3582, |
| "rewards/accuracies": 0.8437500596046448, |
| "rewards/chosen": -0.5256407260894775, |
| "rewards/margins": 2.5411202907562256, |
| "rewards/rejected": -3.0667612552642822, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 21.87942904244536, |
| "learning_rate": 8.206010567391916e-07, |
| "logits/chosen": 1.593059778213501, |
| "logits/rejected": 2.1650094985961914, |
| "logps/chosen": -131.16326904296875, |
| "logps/rejected": -158.94223022460938, |
| "loss": 0.3461, |
| "rewards/accuracies": 0.8468750715255737, |
| "rewards/chosen": -0.4646984934806824, |
| "rewards/margins": 2.3126156330108643, |
| "rewards/rejected": -2.7773139476776123, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3626666666666667, |
| "grad_norm": 31.101098463657916, |
| "learning_rate": 8.061004645155048e-07, |
| "logits/chosen": 1.5677554607391357, |
| "logits/rejected": 1.9733402729034424, |
| "logps/chosen": -146.71609497070312, |
| "logps/rejected": -171.69955444335938, |
| "loss": 0.3235, |
| "rewards/accuracies": 0.8656250238418579, |
| "rewards/chosen": -0.48211732506752014, |
| "rewards/margins": 2.4466898441314697, |
| "rewards/rejected": -2.928807020187378, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 30.645699097464068, |
| "learning_rate": 7.911758109371889e-07, |
| "logits/chosen": 1.6403833627700806, |
| "logits/rejected": 1.8808367252349854, |
| "logps/chosen": -140.71217346191406, |
| "logps/rejected": -175.47921752929688, |
| "loss": 0.3459, |
| "rewards/accuracies": 0.8437500596046448, |
| "rewards/chosen": -0.7281967401504517, |
| "rewards/margins": 2.3733932971954346, |
| "rewards/rejected": -3.1015896797180176, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 24.51134274765049, |
| "learning_rate": 7.758477721205765e-07, |
| "logits/chosen": 1.7783101797103882, |
| "logits/rejected": 2.1137070655822754, |
| "logps/chosen": -137.43540954589844, |
| "logps/rejected": -163.09820556640625, |
| "loss": 0.3483, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.7030624747276306, |
| "rewards/margins": 2.4687132835388184, |
| "rewards/rejected": -3.1717755794525146, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.39466666666666667, |
| "grad_norm": 18.074083008686905, |
| "learning_rate": 7.601375830184295e-07, |
| "logits/chosen": 1.5980160236358643, |
| "logits/rejected": 1.8996552228927612, |
| "logps/chosen": -140.5148162841797, |
| "logps/rejected": -171.46630859375, |
| "loss": 0.313, |
| "rewards/accuracies": 0.8875000476837158, |
| "rewards/chosen": -0.7159416675567627, |
| "rewards/margins": 2.979588508605957, |
| "rewards/rejected": -3.695530414581299, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4053333333333333, |
| "grad_norm": 29.247823050349254, |
| "learning_rate": 7.440670080017454e-07, |
| "logits/chosen": 1.5842480659484863, |
| "logits/rejected": 2.019357919692993, |
| "logps/chosen": -132.16673278808594, |
| "logps/rejected": -162.817138671875, |
| "loss": 0.3415, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.69346022605896, |
| "rewards/margins": 2.366546392440796, |
| "rewards/rejected": -3.060007095336914, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 31.9631460774584, |
| "learning_rate": 7.276583107081242e-07, |
| "logits/chosen": 1.6213833093643188, |
| "logits/rejected": 2.0778937339782715, |
| "logps/chosen": -141.9418487548828, |
| "logps/rejected": -174.61468505859375, |
| "loss": 0.3329, |
| "rewards/accuracies": 0.8624999523162842, |
| "rewards/chosen": -1.060402750968933, |
| "rewards/margins": 2.9680538177490234, |
| "rewards/rejected": -4.028456211090088, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 26.16381075907044, |
| "learning_rate": 7.109342231984698e-07, |
| "logits/chosen": 1.3554099798202515, |
| "logits/rejected": 2.0283570289611816, |
| "logps/chosen": -138.100341796875, |
| "logps/rejected": -159.7268829345703, |
| "loss": 0.3786, |
| "rewards/accuracies": 0.8343749642372131, |
| "rewards/chosen": -1.067877173423767, |
| "rewards/margins": 2.4551329612731934, |
| "rewards/rejected": -3.52301025390625, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.43733333333333335, |
| "grad_norm": 11.140411163657106, |
| "learning_rate": 6.939179144647515e-07, |
| "logits/chosen": 1.385094165802002, |
| "logits/rejected": 1.9930095672607422, |
| "logps/chosen": -129.92941284179688, |
| "logps/rejected": -155.423095703125, |
| "loss": 0.2945, |
| "rewards/accuracies": 0.878125011920929, |
| "rewards/chosen": -0.29770562052726746, |
| "rewards/margins": 2.735492706298828, |
| "rewards/rejected": -3.033198118209839, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 28.56957418913061, |
| "learning_rate": 6.766329583324581e-07, |
| "logits/chosen": 1.437608003616333, |
| "logits/rejected": 1.9395537376403809, |
| "logps/chosen": -131.2879638671875, |
| "logps/rejected": -159.0998992919922, |
| "loss": 0.3127, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -0.49805647134780884, |
| "rewards/margins": 2.613166570663452, |
| "rewards/rejected": -3.111222743988037, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.45866666666666667, |
| "grad_norm": 33.23437534680263, |
| "learning_rate": 6.591033008022067e-07, |
| "logits/chosen": 1.3858729600906372, |
| "logits/rejected": 1.6989398002624512, |
| "logps/chosen": -134.23770141601562, |
| "logps/rejected": -176.20919799804688, |
| "loss": 0.3096, |
| "rewards/accuracies": 0.871874988079071, |
| "rewards/chosen": -0.535869836807251, |
| "rewards/margins": 3.0371181964874268, |
| "rewards/rejected": -3.572988510131836, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4693333333333333, |
| "grad_norm": 28.346897945132458, |
| "learning_rate": 6.413532268757537e-07, |
| "logits/chosen": 1.4444637298583984, |
| "logits/rejected": 1.6581867933273315, |
| "logps/chosen": -138.23431396484375, |
| "logps/rejected": -168.9981689453125, |
| "loss": 0.4194, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.7419812083244324, |
| "rewards/margins": 2.335297107696533, |
| "rewards/rejected": -3.0772783756256104, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 29.164626019209308, |
| "learning_rate": 6.234073269123653e-07, |
| "logits/chosen": 1.393236756324768, |
| "logits/rejected": 1.7022786140441895, |
| "logps/chosen": -142.96592712402344, |
| "logps/rejected": -173.4695587158203, |
| "loss": 0.3408, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.0506477355957031, |
| "rewards/margins": 2.8269805908203125, |
| "rewards/rejected": -3.8776283264160156, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.49066666666666664, |
| "grad_norm": 27.21924736505641, |
| "learning_rate": 6.052904625621555e-07, |
| "logits/chosen": 1.4608548879623413, |
| "logits/rejected": 1.7432273626327515, |
| "logps/chosen": -138.21566772460938, |
| "logps/rejected": -167.56101989746094, |
| "loss": 0.3906, |
| "rewards/accuracies": 0.8343750238418579, |
| "rewards/chosen": -0.971255362033844, |
| "rewards/margins": 2.769684314727783, |
| "rewards/rejected": -3.7409396171569824, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5013333333333333, |
| "grad_norm": 23.667407037284928, |
| "learning_rate": 5.870277323235871e-07, |
| "logits/chosen": 1.3296135663986206, |
| "logits/rejected": 1.8589414358139038, |
| "logps/chosen": -145.2051239013672, |
| "logps/rejected": -172.4295196533203, |
| "loss": 0.3328, |
| "rewards/accuracies": 0.8593749403953552, |
| "rewards/chosen": -0.9092783331871033, |
| "rewards/margins": 2.5038325786590576, |
| "rewards/rejected": -3.4131109714508057, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 32.4308081410924, |
| "learning_rate": 5.686444367728494e-07, |
| "logits/chosen": 1.3602862358093262, |
| "logits/rejected": 1.752657413482666, |
| "logps/chosen": -137.1173858642578, |
| "logps/rejected": -167.48741149902344, |
| "loss": 0.3152, |
| "rewards/accuracies": 0.8812499642372131, |
| "rewards/chosen": -0.6123771071434021, |
| "rewards/margins": 2.6645078659057617, |
| "rewards/rejected": -3.2768850326538086, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5226666666666666, |
| "grad_norm": 22.650259255481703, |
| "learning_rate": 5.50166043513287e-07, |
| "logits/chosen": 1.3106579780578613, |
| "logits/rejected": 1.7208105325698853, |
| "logps/chosen": -132.9499969482422, |
| "logps/rejected": -168.179443359375, |
| "loss": 0.3225, |
| "rewards/accuracies": 0.8250000476837158, |
| "rewards/chosen": -0.5453524589538574, |
| "rewards/margins": 2.830275058746338, |
| "rewards/rejected": -3.3756275177001953, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 34.87144459717604, |
| "learning_rate": 5.316181518934318e-07, |
| "logits/chosen": 1.4427666664123535, |
| "logits/rejected": 1.6337779760360718, |
| "logps/chosen": -131.83047485351562, |
| "logps/rejected": -163.92172241210938, |
| "loss": 0.3208, |
| "rewards/accuracies": 0.8531249761581421, |
| "rewards/chosen": -0.47837477922439575, |
| "rewards/margins": 2.7933568954467773, |
| "rewards/rejected": -3.2717316150665283, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 31.52951767510459, |
| "learning_rate": 5.130264575425224e-07, |
| "logits/chosen": 1.4013196229934692, |
| "logits/rejected": 1.6610275506973267, |
| "logps/chosen": -132.53924560546875, |
| "logps/rejected": -167.6267547607422, |
| "loss": 0.272, |
| "rewards/accuracies": 0.8593749403953552, |
| "rewards/chosen": -0.5326389074325562, |
| "rewards/margins": 2.8615810871124268, |
| "rewards/rejected": -3.3942196369171143, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5546666666666666, |
| "grad_norm": 29.37500979396824, |
| "learning_rate": 4.944167167726367e-07, |
| "logits/chosen": 1.4267271757125854, |
| "logits/rejected": 1.6526139974594116, |
| "logps/chosen": -133.26963806152344, |
| "logps/rejected": -173.13449096679688, |
| "loss": 0.3273, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.6281304955482483, |
| "rewards/margins": 3.023805856704712, |
| "rewards/rejected": -3.6519365310668945, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5653333333333334, |
| "grad_norm": 45.4295336560468, |
| "learning_rate": 4.758147108967584e-07, |
| "logits/chosen": 1.5470153093338013, |
| "logits/rejected": 1.7743972539901733, |
| "logps/chosen": -150.69190979003906, |
| "logps/rejected": -186.68701171875, |
| "loss": 0.3734, |
| "rewards/accuracies": 0.8281250596046448, |
| "rewards/chosen": -1.2518389225006104, |
| "rewards/margins": 2.885488271713257, |
| "rewards/rejected": -4.137327194213867, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 21.855665145983874, |
| "learning_rate": 4.572462105122077e-07, |
| "logits/chosen": 1.3561698198318481, |
| "logits/rejected": 1.7270151376724243, |
| "logps/chosen": -139.5657501220703, |
| "logps/rejected": -175.99508666992188, |
| "loss": 0.2833, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -1.112634301185608, |
| "rewards/margins": 3.2117066383361816, |
| "rewards/rejected": -4.3243408203125, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 24.367461053001698, |
| "learning_rate": 4.3873693979891696e-07, |
| "logits/chosen": 1.4616541862487793, |
| "logits/rejected": 1.7883496284484863, |
| "logps/chosen": -140.05160522460938, |
| "logps/rejected": -181.77622985839844, |
| "loss": 0.2513, |
| "rewards/accuracies": 0.9062500596046448, |
| "rewards/chosen": -0.8183116912841797, |
| "rewards/margins": 3.3123300075531006, |
| "rewards/rejected": -4.130641937255859, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5973333333333334, |
| "grad_norm": 23.757622976541885, |
| "learning_rate": 4.203125408820105e-07, |
| "logits/chosen": 1.4851560592651367, |
| "logits/rejected": 1.794302225112915, |
| "logps/chosen": -134.1141357421875, |
| "logps/rejected": -170.49234008789062, |
| "loss": 0.3149, |
| "rewards/accuracies": 0.8843750357627869, |
| "rewards/chosen": -0.9360587000846863, |
| "rewards/margins": 2.853070020675659, |
| "rewards/rejected": -3.7891287803649902, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 21.213296976158986, |
| "learning_rate": 4.019985383080632e-07, |
| "logits/chosen": 1.237152338027954, |
| "logits/rejected": 1.7590258121490479, |
| "logps/chosen": -138.93270874023438, |
| "logps/rejected": -160.0484619140625, |
| "loss": 0.3722, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -0.7550384998321533, |
| "rewards/margins": 2.6985340118408203, |
| "rewards/rejected": -3.4535727500915527, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6186666666666667, |
| "grad_norm": 26.294885755983195, |
| "learning_rate": 3.8382030368424454e-07, |
| "logits/chosen": 1.3558965921401978, |
| "logits/rejected": 1.6953362226486206, |
| "logps/chosen": -137.65817260742188, |
| "logps/rejected": -170.1033477783203, |
| "loss": 0.3269, |
| "rewards/accuracies": 0.8531249761581421, |
| "rewards/chosen": -0.6933549046516418, |
| "rewards/margins": 2.7456483840942383, |
| "rewards/rejected": -3.4390032291412354, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6293333333333333, |
| "grad_norm": 33.07924807089865, |
| "learning_rate": 3.6580302052934297e-07, |
| "logits/chosen": 1.2663443088531494, |
| "logits/rejected": 1.5939358472824097, |
| "logps/chosen": -132.91456604003906, |
| "logps/rejected": -162.4006805419922, |
| "loss": 0.331, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.6310861706733704, |
| "rewards/margins": 2.8310067653656006, |
| "rewards/rejected": -3.4620931148529053, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 20.115198065979413, |
| "learning_rate": 3.479716493853611e-07, |
| "logits/chosen": 1.18405020236969, |
| "logits/rejected": 1.536871075630188, |
| "logps/chosen": -131.33584594726562, |
| "logps/rejected": -168.03636169433594, |
| "loss": 0.2805, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -0.6372289061546326, |
| "rewards/margins": 3.1736860275268555, |
| "rewards/rejected": -3.810914993286133, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6506666666666666, |
| "grad_norm": 19.273605121929624, |
| "learning_rate": 3.303508932380132e-07, |
| "logits/chosen": 1.2737094163894653, |
| "logits/rejected": 1.6445541381835938, |
| "logps/chosen": -136.96719360351562, |
| "logps/rejected": -174.71083068847656, |
| "loss": 0.3445, |
| "rewards/accuracies": 0.8562500476837158, |
| "rewards/chosen": -0.6345027685165405, |
| "rewards/margins": 2.950732946395874, |
| "rewards/rejected": -3.585235834121704, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6613333333333333, |
| "grad_norm": 30.667064690871456, |
| "learning_rate": 3.129651632940362e-07, |
| "logits/chosen": 1.292923927307129, |
| "logits/rejected": 1.5391473770141602, |
| "logps/chosen": -141.96238708496094, |
| "logps/rejected": -173.04873657226562, |
| "loss": 0.2906, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.597599983215332, |
| "rewards/margins": 3.03706955909729, |
| "rewards/rejected": -3.634669303894043, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 25.676210849380702, |
| "learning_rate": 2.958385451627181e-07, |
| "logits/chosen": 1.231809139251709, |
| "logits/rejected": 1.600536823272705, |
| "logps/chosen": -139.60745239257812, |
| "logps/rejected": -178.7290496826172, |
| "loss": 0.2972, |
| "rewards/accuracies": 0.871874988079071, |
| "rewards/chosen": -0.7131984829902649, |
| "rewards/margins": 3.2930028438568115, |
| "rewards/rejected": -4.006201267242432, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6826666666666666, |
| "grad_norm": 30.707834181954617, |
| "learning_rate": 2.7899476548850043e-07, |
| "logits/chosen": 1.299914836883545, |
| "logits/rejected": 1.624165415763855, |
| "logps/chosen": -139.61639404296875, |
| "logps/rejected": -172.74610900878906, |
| "loss": 0.3445, |
| "rewards/accuracies": 0.8500000834465027, |
| "rewards/chosen": -0.7801929116249084, |
| "rewards/margins": 2.922720432281494, |
| "rewards/rejected": -3.702913284301758, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 26.04525358558716, |
| "learning_rate": 2.6245715908087804e-07, |
| "logits/chosen": 1.3557870388031006, |
| "logits/rejected": 1.6440367698669434, |
| "logps/chosen": -138.72109985351562, |
| "logps/rejected": -166.85986328125, |
| "loss": 0.3891, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.8479130864143372, |
| "rewards/margins": 2.6863441467285156, |
| "rewards/rejected": -3.534257173538208, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 38.60539175189378, |
| "learning_rate": 2.462486365871338e-07, |
| "logits/chosen": 1.117995262145996, |
| "logits/rejected": 1.5660450458526611, |
| "logps/chosen": -139.3802490234375, |
| "logps/rejected": -170.3036346435547, |
| "loss": 0.3707, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.7362462878227234, |
| "rewards/margins": 2.719696521759033, |
| "rewards/rejected": -3.4559431076049805, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7146666666666667, |
| "grad_norm": 29.88817411661078, |
| "learning_rate": 2.3039165275269214e-07, |
| "logits/chosen": 1.2235151529312134, |
| "logits/rejected": 1.5882010459899902, |
| "logps/chosen": -133.53123474121094, |
| "logps/rejected": -165.77561950683594, |
| "loss": 0.3481, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.7423657178878784, |
| "rewards/margins": 2.9302687644958496, |
| "rewards/rejected": -3.6726343631744385, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7253333333333334, |
| "grad_norm": 26.689941208436807, |
| "learning_rate": 2.1490817531306775e-07, |
| "logits/chosen": 1.199167013168335, |
| "logits/rejected": 1.4307794570922852, |
| "logps/chosen": -138.82012939453125, |
| "logps/rejected": -172.9408416748047, |
| "loss": 0.3102, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -0.6661445498466492, |
| "rewards/margins": 3.147897958755493, |
| "rewards/rejected": -3.814042329788208, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 26.51411323153538, |
| "learning_rate": 1.9981965456049598e-07, |
| "logits/chosen": 1.2847270965576172, |
| "logits/rejected": 1.527156949043274, |
| "logps/chosen": -144.3943634033203, |
| "logps/rejected": -172.289306640625, |
| "loss": 0.3899, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -0.9222854971885681, |
| "rewards/margins": 2.454240322113037, |
| "rewards/rejected": -3.376525640487671, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 28.227088451077186, |
| "learning_rate": 1.8514699362741738e-07, |
| "logits/chosen": 1.2821115255355835, |
| "logits/rejected": 1.5955793857574463, |
| "logps/chosen": -143.79371643066406, |
| "logps/rejected": -168.42379760742188, |
| "loss": 0.2676, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.8676904439926147, |
| "rewards/margins": 3.0038087368011475, |
| "rewards/rejected": -3.8714990615844727, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7573333333333333, |
| "grad_norm": 33.39329635319409, |
| "learning_rate": 1.7091051952797402e-07, |
| "logits/chosen": 1.2979816198349, |
| "logits/rejected": 1.5841856002807617, |
| "logps/chosen": -140.95835876464844, |
| "logps/rejected": -180.0380401611328, |
| "loss": 0.2788, |
| "rewards/accuracies": 0.878125011920929, |
| "rewards/chosen": -0.6436134576797485, |
| "rewards/margins": 3.160693645477295, |
| "rewards/rejected": -3.804307222366333, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 29.663583912080895, |
| "learning_rate": 1.571299549976456e-07, |
| "logits/chosen": 1.376884937286377, |
| "logits/rejected": 1.7266371250152588, |
| "logps/chosen": -134.19406127929688, |
| "logps/rejected": -173.26148986816406, |
| "loss": 0.3333, |
| "rewards/accuracies": 0.846875011920929, |
| "rewards/chosen": -0.7068169116973877, |
| "rewards/margins": 2.9150471687316895, |
| "rewards/rejected": -3.6218643188476562, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7786666666666666, |
| "grad_norm": 23.891419469257354, |
| "learning_rate": 1.4382439117002936e-07, |
| "logits/chosen": 1.2695218324661255, |
| "logits/rejected": 1.564518928527832, |
| "logps/chosen": -136.83729553222656, |
| "logps/rejected": -178.53765869140625, |
| "loss": 0.3344, |
| "rewards/accuracies": 0.8281250596046448, |
| "rewards/chosen": -0.796572744846344, |
| "rewards/margins": 2.8927884101867676, |
| "rewards/rejected": -3.689361095428467, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7893333333333333, |
| "grad_norm": 23.85877560834035, |
| "learning_rate": 1.310122611286223e-07, |
| "logits/chosen": 1.2756718397140503, |
| "logits/rejected": 1.7924617528915405, |
| "logps/chosen": -140.0908203125, |
| "logps/rejected": -170.6318817138672, |
| "loss": 0.3521, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -0.7002599239349365, |
| "rewards/margins": 3.0346288681030273, |
| "rewards/rejected": -3.734888792037964, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 31.266912743625518, |
| "learning_rate": 1.187113143702429e-07, |
| "logits/chosen": 1.1684255599975586, |
| "logits/rejected": 1.6504334211349487, |
| "logps/chosen": -138.86920166015625, |
| "logps/rejected": -172.11294555664062, |
| "loss": 0.3586, |
| "rewards/accuracies": 0.8625000715255737, |
| "rewards/chosen": -0.6606683135032654, |
| "rewards/margins": 2.9746954441070557, |
| "rewards/rejected": -3.6353635787963867, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8106666666666666, |
| "grad_norm": 27.45969382970204, |
| "learning_rate": 1.0693859221547113e-07, |
| "logits/chosen": 1.2002224922180176, |
| "logits/rejected": 1.4490042924880981, |
| "logps/chosen": -135.4897918701172, |
| "logps/rejected": -168.1680908203125, |
| "loss": 0.3814, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.8046519160270691, |
| "rewards/margins": 2.8222813606262207, |
| "rewards/rejected": -3.6269333362579346, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8213333333333334, |
| "grad_norm": 27.34194048181446, |
| "learning_rate": 9.571040420017323e-08, |
| "logits/chosen": 1.2844430208206177, |
| "logits/rejected": 1.5368437767028809, |
| "logps/chosen": -137.79006958007812, |
| "logps/rejected": -180.3217010498047, |
| "loss": 0.2863, |
| "rewards/accuracies": 0.8718750476837158, |
| "rewards/chosen": -0.5950286388397217, |
| "rewards/margins": 3.1103334426879883, |
| "rewards/rejected": -3.705361843109131, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 22.71500177829076, |
| "learning_rate": 8.504230548081498e-08, |
| "logits/chosen": 1.1534736156463623, |
| "logits/rejected": 1.5489188432693481, |
| "logps/chosen": -135.84356689453125, |
| "logps/rejected": -163.74789428710938, |
| "loss": 0.3245, |
| "rewards/accuracies": 0.8406250476837158, |
| "rewards/chosen": -0.6919512152671814, |
| "rewards/margins": 2.70729398727417, |
| "rewards/rejected": -3.399244785308838, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8426666666666667, |
| "grad_norm": 33.585488423583136, |
| "learning_rate": 7.494907528486799e-08, |
| "logits/chosen": 1.1899046897888184, |
| "logits/rejected": 1.558382272720337, |
| "logps/chosen": -144.0318603515625, |
| "logps/rejected": -173.2529754638672, |
| "loss": 0.2885, |
| "rewards/accuracies": 0.8687500357627869, |
| "rewards/chosen": -0.45610812306404114, |
| "rewards/margins": 2.986624240875244, |
| "rewards/rejected": -3.442732572555542, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 23.51906755223801, |
| "learning_rate": 6.54446964361619e-08, |
| "logits/chosen": 1.3951364755630493, |
| "logits/rejected": 1.5346930027008057, |
| "logps/chosen": -135.73672485351562, |
| "logps/rejected": -177.74282836914062, |
| "loss": 0.3352, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.6097190380096436, |
| "rewards/margins": 2.9158732891082764, |
| "rewards/rejected": -3.52559232711792, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 18.099767999889643, |
| "learning_rate": 5.6542335983547515e-08, |
| "logits/chosen": 1.0958290100097656, |
| "logits/rejected": 1.7642685174942017, |
| "logps/chosen": -137.59483337402344, |
| "logps/rejected": -159.05628967285156, |
| "loss": 0.3021, |
| "rewards/accuracies": 0.8468749523162842, |
| "rewards/chosen": -0.548262357711792, |
| "rewards/margins": 2.8460323810577393, |
| "rewards/rejected": -3.3942949771881104, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8746666666666667, |
| "grad_norm": 22.736224970421958, |
| "learning_rate": 4.8254326959706714e-08, |
| "logits/chosen": 1.1229872703552246, |
| "logits/rejected": 1.6853474378585815, |
| "logps/chosen": -139.9053497314453, |
| "logps/rejected": -174.4130401611328, |
| "loss": 0.3367, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -0.6694452166557312, |
| "rewards/margins": 2.701251268386841, |
| "rewards/rejected": -3.370696544647217, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8853333333333333, |
| "grad_norm": 28.380431064935998, |
| "learning_rate": 4.059215129538246e-08, |
| "logits/chosen": 1.2847788333892822, |
| "logits/rejected": 1.3838608264923096, |
| "logps/chosen": -139.0540313720703, |
| "logps/rejected": -172.7044677734375, |
| "loss": 0.3149, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.6882542371749878, |
| "rewards/margins": 2.8262672424316406, |
| "rewards/rejected": -3.514521360397339, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 24.47450186333891, |
| "learning_rate": 3.3566423912694045e-08, |
| "logits/chosen": 1.3405706882476807, |
| "logits/rejected": 1.4811562299728394, |
| "logps/chosen": -142.65008544921875, |
| "logps/rejected": -186.91891479492188, |
| "loss": 0.3103, |
| "rewards/accuracies": 0.8750000596046448, |
| "rewards/chosen": -0.6546552181243896, |
| "rewards/margins": 3.121577024459839, |
| "rewards/rejected": -3.7762320041656494, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9066666666666666, |
| "grad_norm": 26.10582659575781, |
| "learning_rate": 2.7186878019580194e-08, |
| "logits/chosen": 1.2894738912582397, |
| "logits/rejected": 1.5952208042144775, |
| "logps/chosen": -134.9425048828125, |
| "logps/rejected": -180.87374877929688, |
| "loss": 0.2955, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.5013420581817627, |
| "rewards/margins": 2.893864154815674, |
| "rewards/rejected": -3.3952059745788574, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9173333333333333, |
| "grad_norm": 16.69269334633985, |
| "learning_rate": 2.1462351625736673e-08, |
| "logits/chosen": 1.1675758361816406, |
| "logits/rejected": 1.6442891359329224, |
| "logps/chosen": -130.08786010742188, |
| "logps/rejected": -170.487548828125, |
| "loss": 0.2872, |
| "rewards/accuracies": 0.871874988079071, |
| "rewards/chosen": -0.6101741790771484, |
| "rewards/margins": 3.180131196975708, |
| "rewards/rejected": -3.7903053760528564, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 27.109723201805032, |
| "learning_rate": 1.6400775298734015e-08, |
| "logits/chosen": 1.1326353549957275, |
| "logits/rejected": 1.4924323558807373, |
| "logps/chosen": -130.98184204101562, |
| "logps/rejected": -169.8687286376953, |
| "loss": 0.2554, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.5216690301895142, |
| "rewards/margins": 3.1709208488464355, |
| "rewards/rejected": -3.6925899982452393, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9386666666666666, |
| "grad_norm": 14.828413315706442, |
| "learning_rate": 1.200916117727374e-08, |
| "logits/chosen": 0.9768412113189697, |
| "logits/rejected": 1.5602909326553345, |
| "logps/chosen": -140.74815368652344, |
| "logps/rejected": -172.26649475097656, |
| "loss": 0.2573, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -0.5175585746765137, |
| "rewards/margins": 3.088252544403076, |
| "rewards/rejected": -3.605811595916748, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9493333333333334, |
| "grad_norm": 26.86192866644961, |
| "learning_rate": 8.293593256805842e-09, |
| "logits/chosen": 1.1421420574188232, |
| "logits/rejected": 1.6538828611373901, |
| "logps/chosen": -141.6370086669922, |
| "logps/rejected": -169.20147705078125, |
| "loss": 0.3091, |
| "rewards/accuracies": 0.8593750596046448, |
| "rewards/chosen": -0.536719799041748, |
| "rewards/margins": 3.0636281967163086, |
| "rewards/rejected": -3.6003482341766357, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 19.615598554875728, |
| "learning_rate": 5.2592189609648726e-09, |
| "logits/chosen": 1.0914634466171265, |
| "logits/rejected": 1.4714587926864624, |
| "logps/chosen": -131.58399963378906, |
| "logps/rejected": -164.37319946289062, |
| "loss": 0.3104, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -0.6866143345832825, |
| "rewards/margins": 2.836606025695801, |
| "rewards/rejected": -3.5232203006744385, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9706666666666667, |
| "grad_norm": 16.989834579672234, |
| "learning_rate": 2.910242010500996e-09, |
| "logits/chosen": 1.3143634796142578, |
| "logits/rejected": 1.5896141529083252, |
| "logps/chosen": -136.51361083984375, |
| "logps/rejected": -165.6531524658203, |
| "loss": 0.3146, |
| "rewards/accuracies": 0.8343749642372131, |
| "rewards/chosen": -0.6686439514160156, |
| "rewards/margins": 2.7611334323883057, |
| "rewards/rejected": -3.4297773838043213, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9813333333333333, |
| "grad_norm": 27.194483822978505, |
| "learning_rate": 1.249916599585954e-09, |
| "logits/chosen": 1.2227872610092163, |
| "logits/rejected": 1.5990573167800903, |
| "logps/chosen": -140.06248474121094, |
| "logps/rejected": -176.98333740234375, |
| "loss": 0.2738, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.6044124960899353, |
| "rewards/margins": 3.01471209526062, |
| "rewards/rejected": -3.619124412536621, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 18.707222551914008, |
| "learning_rate": 2.8054288756129696e-10, |
| "logits/chosen": 1.316845417022705, |
| "logits/rejected": 1.5853126049041748, |
| "logps/chosen": -131.83575439453125, |
| "logps/rejected": -169.5233917236328, |
| "loss": 0.3086, |
| "rewards/accuracies": 0.8687500357627869, |
| "rewards/chosen": -0.5091441869735718, |
| "rewards/margins": 2.9370408058166504, |
| "rewards/rejected": -3.4461851119995117, |
| "step": 930 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 938, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 111524967546880.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|