| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.943820224719101, | |
| "eval_steps": 500, | |
| "global_step": 132, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02247191011235955, | |
| "grad_norm": 489.5653076171875, | |
| "learning_rate": 2.1428571428571428e-07, | |
| "logits/chosen": 1.4551408290863037, | |
| "logits/rejected": 1.478129267692566, | |
| "logps/chosen": -2968.771240234375, | |
| "logps/rejected": -3035.35302734375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0449438202247191, | |
| "grad_norm": 419.54876708984375, | |
| "learning_rate": 4.2857142857142857e-07, | |
| "logits/chosen": 1.5314003229141235, | |
| "logits/rejected": 1.4525893926620483, | |
| "logps/chosen": -3010.43994140625, | |
| "logps/rejected": -2926.948974609375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.06741573033707865, | |
| "grad_norm": 789.9224243164062, | |
| "learning_rate": 6.428571428571428e-07, | |
| "logits/chosen": 1.482939600944519, | |
| "logits/rejected": 1.5616533756256104, | |
| "logps/chosen": -2998.501708984375, | |
| "logps/rejected": -3179.81982421875, | |
| "loss": 0.9204, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.08596238493919373, | |
| "rewards/margins": -0.19251862168312073, | |
| "rewards/rejected": 0.10655620694160461, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0898876404494382, | |
| "grad_norm": 378.14190673828125, | |
| "learning_rate": 8.571428571428571e-07, | |
| "logits/chosen": 1.6036081314086914, | |
| "logits/rejected": 1.7028334140777588, | |
| "logps/chosen": -2979.7060546875, | |
| "logps/rejected": -2913.69091796875, | |
| "loss": 0.6588, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.28274667263031006, | |
| "rewards/margins": 0.22482016682624817, | |
| "rewards/rejected": -0.5075668692588806, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.11235955056179775, | |
| "grad_norm": 307.0648193359375, | |
| "learning_rate": 1.0714285714285716e-06, | |
| "logits/chosen": 1.3923085927963257, | |
| "logits/rejected": 1.4200749397277832, | |
| "logps/chosen": -3015.828125, | |
| "logps/rejected": -3068.435302734375, | |
| "loss": 0.5615, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03155745938420296, | |
| "rewards/margins": 0.5133614540100098, | |
| "rewards/rejected": -0.5449188947677612, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1348314606741573, | |
| "grad_norm": 282.67034912109375, | |
| "learning_rate": 1.2857142857142856e-06, | |
| "logits/chosen": 1.5581945180892944, | |
| "logits/rejected": 1.405899167060852, | |
| "logps/chosen": -3204.767333984375, | |
| "logps/rejected": -3163.357177734375, | |
| "loss": 0.5469, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.25397348403930664, | |
| "rewards/margins": 0.6482839584350586, | |
| "rewards/rejected": -0.9022574424743652, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.15730337078651685, | |
| "grad_norm": 218.5866241455078, | |
| "learning_rate": 1.5e-06, | |
| "logits/chosen": 1.496790885925293, | |
| "logits/rejected": 1.4303985834121704, | |
| "logps/chosen": -3185.8203125, | |
| "logps/rejected": -3225.123046875, | |
| "loss": 0.4709, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.04301854223012924, | |
| "rewards/margins": 1.6269282102584839, | |
| "rewards/rejected": -1.6699466705322266, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.1797752808988764, | |
| "grad_norm": 181.489501953125, | |
| "learning_rate": 1.7142857142857143e-06, | |
| "logits/chosen": 1.6130130290985107, | |
| "logits/rejected": 1.5007115602493286, | |
| "logps/chosen": -3087.791748046875, | |
| "logps/rejected": -2948.8115234375, | |
| "loss": 0.396, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.08753497898578644, | |
| "rewards/margins": 2.817833185195923, | |
| "rewards/rejected": -2.9053683280944824, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.20224719101123595, | |
| "grad_norm": 188.34768676757812, | |
| "learning_rate": 1.928571428571429e-06, | |
| "logits/chosen": 1.5587732791900635, | |
| "logits/rejected": 1.6744489669799805, | |
| "logps/chosen": -2374.6494140625, | |
| "logps/rejected": -2492.75537109375, | |
| "loss": 0.448, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.14219728112220764, | |
| "rewards/margins": 2.7199909687042236, | |
| "rewards/rejected": -2.8621885776519775, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.2247191011235955, | |
| "grad_norm": 167.6234588623047, | |
| "learning_rate": 2.142857142857143e-06, | |
| "logits/chosen": 1.581652283668518, | |
| "logits/rejected": 1.5243756771087646, | |
| "logps/chosen": -2837.341552734375, | |
| "logps/rejected": -2842.2666015625, | |
| "loss": 0.3618, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.06367100775241852, | |
| "rewards/margins": 6.429449081420898, | |
| "rewards/rejected": -6.493120193481445, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.24719101123595505, | |
| "grad_norm": 195.05810546875, | |
| "learning_rate": 2.357142857142857e-06, | |
| "logits/chosen": 1.531968355178833, | |
| "logits/rejected": 1.5490195751190186, | |
| "logps/chosen": -2785.763427734375, | |
| "logps/rejected": -2938.71533203125, | |
| "loss": 0.3962, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.2717077136039734, | |
| "rewards/margins": 8.072213172912598, | |
| "rewards/rejected": -8.343921661376953, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.2696629213483146, | |
| "grad_norm": 204.53872680664062, | |
| "learning_rate": 2.571428571428571e-06, | |
| "logits/chosen": 1.5632414817810059, | |
| "logits/rejected": 1.5352647304534912, | |
| "logps/chosen": -2883.001220703125, | |
| "logps/rejected": -3065.4296875, | |
| "loss": 0.4155, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.09219703823328018, | |
| "rewards/margins": 11.51332950592041, | |
| "rewards/rejected": -11.421133041381836, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.29213483146067415, | |
| "grad_norm": 181.2421112060547, | |
| "learning_rate": 2.785714285714286e-06, | |
| "logits/chosen": 1.5124785900115967, | |
| "logits/rejected": 1.4263392686843872, | |
| "logps/chosen": -3015.5341796875, | |
| "logps/rejected": -3136.56982421875, | |
| "loss": 0.3343, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.1826700121164322, | |
| "rewards/margins": 16.418424606323242, | |
| "rewards/rejected": -16.601093292236328, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.3146067415730337, | |
| "grad_norm": 178.02650451660156, | |
| "learning_rate": 3e-06, | |
| "logits/chosen": 1.4881091117858887, | |
| "logits/rejected": 1.4641259908676147, | |
| "logps/chosen": -2906.181396484375, | |
| "logps/rejected": -3083.74755859375, | |
| "loss": 0.3189, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": -0.07007797807455063, | |
| "rewards/margins": 18.051210403442383, | |
| "rewards/rejected": -18.121288299560547, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.33707865168539325, | |
| "grad_norm": 188.4379425048828, | |
| "learning_rate": 2.999468416685179e-06, | |
| "logits/chosen": 1.4958661794662476, | |
| "logits/rejected": 1.5740702152252197, | |
| "logps/chosen": -2589.415771484375, | |
| "logps/rejected": -2884.312744140625, | |
| "loss": 0.3903, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.1765696406364441, | |
| "rewards/margins": 17.232072830200195, | |
| "rewards/rejected": -17.408641815185547, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.3595505617977528, | |
| "grad_norm": 161.3037872314453, | |
| "learning_rate": 2.9978740435151427e-06, | |
| "logits/chosen": 1.5349267721176147, | |
| "logits/rejected": 1.491062045097351, | |
| "logps/chosen": -2951.84619140625, | |
| "logps/rejected": -3206.8662109375, | |
| "loss": 0.3059, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.7078287601470947, | |
| "rewards/margins": 23.868520736694336, | |
| "rewards/rejected": -25.57634925842285, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.38202247191011235, | |
| "grad_norm": 186.13180541992188, | |
| "learning_rate": 2.995218010546125e-06, | |
| "logits/chosen": 1.4998528957366943, | |
| "logits/rejected": 1.4576878547668457, | |
| "logps/chosen": -3011.727783203125, | |
| "logps/rejected": -3261.4501953125, | |
| "loss": 0.3808, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.25169306993484497, | |
| "rewards/margins": 35.25308609008789, | |
| "rewards/rejected": -35.50477600097656, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.4044943820224719, | |
| "grad_norm": 185.6712188720703, | |
| "learning_rate": 2.9915022003152055e-06, | |
| "logits/chosen": 1.6139241456985474, | |
| "logits/rejected": 1.5550901889801025, | |
| "logps/chosen": -2965.4423828125, | |
| "logps/rejected": -3224.514404296875, | |
| "loss": 0.3542, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 1.8823347091674805, | |
| "rewards/margins": 39.0025634765625, | |
| "rewards/rejected": -37.12023162841797, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.42696629213483145, | |
| "grad_norm": 182.43603515625, | |
| "learning_rate": 2.986729246506011e-06, | |
| "logits/chosen": 1.244603157043457, | |
| "logits/rejected": 1.2053301334381104, | |
| "logps/chosen": -2764.19189453125, | |
| "logps/rejected": -3084.441650390625, | |
| "loss": 0.367, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -1.6243125200271606, | |
| "rewards/margins": 43.56684112548828, | |
| "rewards/rejected": -45.1911506652832, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.449438202247191, | |
| "grad_norm": 198.76722717285156, | |
| "learning_rate": 2.980902532082017e-06, | |
| "logits/chosen": 1.4910385608673096, | |
| "logits/rejected": 1.4667646884918213, | |
| "logps/chosen": -2632.417724609375, | |
| "logps/rejected": -2912.476806640625, | |
| "loss": 0.4946, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.317056179046631, | |
| "rewards/margins": 34.359012603759766, | |
| "rewards/rejected": -36.676063537597656, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.47191011235955055, | |
| "grad_norm": 203.78700256347656, | |
| "learning_rate": 2.9740261868887817e-06, | |
| "logits/chosen": 1.4394636154174805, | |
| "logits/rejected": 1.3155745267868042, | |
| "logps/chosen": -2808.47509765625, | |
| "logps/rejected": -3043.707763671875, | |
| "loss": 0.4802, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 1.6056139469146729, | |
| "rewards/margins": 43.16130065917969, | |
| "rewards/rejected": -41.555686950683594, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.4943820224719101, | |
| "grad_norm": 199.40330505371094, | |
| "learning_rate": 2.9661050847268e-06, | |
| "logits/chosen": 1.3054568767547607, | |
| "logits/rejected": 1.2870110273361206, | |
| "logps/chosen": -2704.07568359375, | |
| "logps/rejected": -3091.42626953125, | |
| "loss": 0.4924, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -4.835676670074463, | |
| "rewards/margins": 40.92457580566406, | |
| "rewards/rejected": -45.76025390625, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.5168539325842697, | |
| "grad_norm": 184.34901428222656, | |
| "learning_rate": 2.957144839897065e-06, | |
| "logits/chosen": 1.5794934034347534, | |
| "logits/rejected": 1.374954104423523, | |
| "logps/chosen": -2828.36083984375, | |
| "logps/rejected": -3111.46875, | |
| "loss": 0.4932, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": 3.432398796081543, | |
| "rewards/margins": 62.3823356628418, | |
| "rewards/rejected": -58.9499397277832, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.5393258426966292, | |
| "grad_norm": 198.54269409179688, | |
| "learning_rate": 2.947151803221774e-06, | |
| "logits/chosen": 1.6772565841674805, | |
| "logits/rejected": 1.6362934112548828, | |
| "logps/chosen": -2880.4677734375, | |
| "logps/rejected": -3303.3857421875, | |
| "loss": 0.3869, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": 0.12497274577617645, | |
| "rewards/margins": 53.7283821105957, | |
| "rewards/rejected": -53.60340881347656, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.5617977528089888, | |
| "grad_norm": 173.3833465576172, | |
| "learning_rate": 2.936133057543008e-06, | |
| "logits/chosen": 1.4493129253387451, | |
| "logits/rejected": 1.3350006341934204, | |
| "logps/chosen": -2721.460693359375, | |
| "logps/rejected": -3138.864990234375, | |
| "loss": 0.3981, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 2.794492244720459, | |
| "rewards/margins": 69.71061706542969, | |
| "rewards/rejected": -66.91613006591797, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.5842696629213483, | |
| "grad_norm": 232.13525390625, | |
| "learning_rate": 2.924096412702572e-06, | |
| "logits/chosen": 1.7099878787994385, | |
| "logits/rejected": 1.5226480960845947, | |
| "logps/chosen": -2983.288330078125, | |
| "logps/rejected": -3093.673095703125, | |
| "loss": 0.613, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 2.1761527061462402, | |
| "rewards/margins": 59.57087326049805, | |
| "rewards/rejected": -57.394718170166016, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.6067415730337079, | |
| "grad_norm": 162.77978515625, | |
| "learning_rate": 2.91105040000655e-06, | |
| "logits/chosen": 1.4071202278137207, | |
| "logits/rejected": 1.4425785541534424, | |
| "logps/chosen": -2522.546630859375, | |
| "logps/rejected": -3321.0537109375, | |
| "loss": 0.4005, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": 1.8253318071365356, | |
| "rewards/margins": 63.75608825683594, | |
| "rewards/rejected": -61.930755615234375, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.6292134831460674, | |
| "grad_norm": 207.4031219482422, | |
| "learning_rate": 2.897004266178508e-06, | |
| "logits/chosen": 1.5841655731201172, | |
| "logits/rejected": 1.4097201824188232, | |
| "logps/chosen": -3239.787841796875, | |
| "logps/rejected": -3663.88232421875, | |
| "loss": 0.522, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.2217176854610443, | |
| "rewards/margins": 58.664180755615234, | |
| "rewards/rejected": -58.88589859008789, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.651685393258427, | |
| "grad_norm": 172.96218872070312, | |
| "learning_rate": 2.8819679668056195e-06, | |
| "logits/chosen": 1.6320128440856934, | |
| "logits/rejected": 1.5467625856399536, | |
| "logps/chosen": -2654.78271484375, | |
| "logps/rejected": -3225.193359375, | |
| "loss": 0.3816, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": 2.769482374191284, | |
| "rewards/margins": 65.22299194335938, | |
| "rewards/rejected": -62.453514099121094, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.6741573033707865, | |
| "grad_norm": 200.36915588378906, | |
| "learning_rate": 2.8659521592823702e-06, | |
| "logits/chosen": 1.6264617443084717, | |
| "logits/rejected": 1.421095848083496, | |
| "logps/chosen": -2914.17529296875, | |
| "logps/rejected": -3396.08544921875, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": 7.334710121154785, | |
| "rewards/margins": 89.93038177490234, | |
| "rewards/rejected": -82.59567260742188, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6966292134831461, | |
| "grad_norm": 250.5316162109375, | |
| "learning_rate": 2.848968195256829e-06, | |
| "logits/chosen": 1.6201553344726562, | |
| "logits/rejected": 1.4870961904525757, | |
| "logps/chosen": -3036.192138671875, | |
| "logps/rejected": -3605.6904296875, | |
| "loss": 0.708, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 4.598369121551514, | |
| "rewards/margins": 79.35784149169922, | |
| "rewards/rejected": -74.75946807861328, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.7191011235955056, | |
| "grad_norm": 228.1786346435547, | |
| "learning_rate": 2.831028112584857e-06, | |
| "logits/chosen": 1.3086817264556885, | |
| "logits/rejected": 1.2920796871185303, | |
| "logps/chosen": -2828.72900390625, | |
| "logps/rejected": -3492.97802734375, | |
| "loss": 0.5514, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.8046822547912598, | |
| "rewards/margins": 77.88575744628906, | |
| "rewards/rejected": -77.08108520507812, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.7415730337078652, | |
| "grad_norm": 156.25662231445312, | |
| "learning_rate": 2.812144626797942e-06, | |
| "logits/chosen": 1.3912537097930908, | |
| "logits/rejected": 1.1646690368652344, | |
| "logps/chosen": -3173.48388671875, | |
| "logps/rejected": -3708.0390625, | |
| "loss": 0.4043, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.820896863937378, | |
| "rewards/margins": 82.55420684814453, | |
| "rewards/rejected": -79.73331451416016, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.7640449438202247, | |
| "grad_norm": 189.89682006835938, | |
| "learning_rate": 2.792331122090709e-06, | |
| "logits/chosen": 1.525010108947754, | |
| "logits/rejected": 1.4141947031021118, | |
| "logps/chosen": -2818.591064453125, | |
| "logps/rejected": -3415.1484375, | |
| "loss": 0.4825, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": 1.3273561000823975, | |
| "rewards/margins": 81.49795532226562, | |
| "rewards/rejected": -80.17059326171875, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.7865168539325843, | |
| "grad_norm": 198.3324432373047, | |
| "learning_rate": 2.7716016418345064e-06, | |
| "logits/chosen": 1.5669187307357788, | |
| "logits/rejected": 1.3444348573684692, | |
| "logps/chosen": -2831.2744140625, | |
| "logps/rejected": -3359.554931640625, | |
| "loss": 0.4821, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 4.969450950622559, | |
| "rewards/margins": 95.5076675415039, | |
| "rewards/rejected": -90.53821563720703, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.8089887640449438, | |
| "grad_norm": 202.50929260253906, | |
| "learning_rate": 2.7499708786237724e-06, | |
| "logits/chosen": 1.6073535680770874, | |
| "logits/rejected": 1.5690536499023438, | |
| "logps/chosen": -2898.311279296875, | |
| "logps/rejected": -3199.489013671875, | |
| "loss": 0.5359, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.0962305068969727, | |
| "rewards/margins": 49.8695182800293, | |
| "rewards/rejected": -52.96574783325195, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.8314606741573034, | |
| "grad_norm": 172.3883056640625, | |
| "learning_rate": 2.7274541638622533e-06, | |
| "logits/chosen": 1.5025634765625, | |
| "logits/rejected": 1.2939093112945557, | |
| "logps/chosen": -2682.772705078125, | |
| "logps/rejected": -3070.16259765625, | |
| "loss": 0.5118, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -0.5182172060012817, | |
| "rewards/margins": 86.14014434814453, | |
| "rewards/rejected": -86.65835571289062, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.8539325842696629, | |
| "grad_norm": 200.7554473876953, | |
| "learning_rate": 2.7040674568964452e-06, | |
| "logits/chosen": 1.4808025360107422, | |
| "logits/rejected": 1.3251252174377441, | |
| "logps/chosen": -2854.599365234375, | |
| "logps/rejected": -3208.1640625, | |
| "loss": 0.5253, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 1.5150139331817627, | |
| "rewards/margins": 78.78499603271484, | |
| "rewards/rejected": -77.26997375488281, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.8764044943820225, | |
| "grad_norm": 217.05526733398438, | |
| "learning_rate": 2.679827333703964e-06, | |
| "logits/chosen": 1.5550140142440796, | |
| "logits/rejected": 1.5405230522155762, | |
| "logps/chosen": -2775.199951171875, | |
| "logps/rejected": -3292.66650390625, | |
| "loss": 0.5094, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.5831690430641174, | |
| "rewards/margins": 75.25239562988281, | |
| "rewards/rejected": -75.8355712890625, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 260.61224365234375, | |
| "learning_rate": 2.6547509751448593e-06, | |
| "logits/chosen": 1.5327131748199463, | |
| "logits/rejected": 1.404789924621582, | |
| "logps/chosen": -2995.2666015625, | |
| "logps/rejected": -3701.7333984375, | |
| "loss": 0.7054, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 4.574828147888184, | |
| "rewards/margins": 96.09221649169922, | |
| "rewards/rejected": -91.51737976074219, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.9213483146067416, | |
| "grad_norm": 210.46607971191406, | |
| "learning_rate": 2.6288561547842076e-06, | |
| "logits/chosen": 1.5143060684204102, | |
| "logits/rejected": 1.2557826042175293, | |
| "logps/chosen": -2932.751953125, | |
| "logps/rejected": -3389.65185546875, | |
| "loss": 0.6426, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 3.5902769565582275, | |
| "rewards/margins": 102.1531982421875, | |
| "rewards/rejected": -98.56291198730469, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.9438202247191011, | |
| "grad_norm": 203.90863037109375, | |
| "learning_rate": 2.602161226294601e-06, | |
| "logits/chosen": 1.4669859409332275, | |
| "logits/rejected": 1.254248023033142, | |
| "logps/chosen": -3275.650146484375, | |
| "logps/rejected": -3885.744873046875, | |
| "loss": 0.5032, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -7.145351886749268, | |
| "rewards/margins": 94.66647338867188, | |
| "rewards/rejected": -101.81182861328125, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.9662921348314607, | |
| "grad_norm": 190.71495056152344, | |
| "learning_rate": 2.5746851104474728e-06, | |
| "logits/chosen": 1.4877179861068726, | |
| "logits/rejected": 1.3816105127334595, | |
| "logps/chosen": -2700.980224609375, | |
| "logps/rejected": -3283.328125, | |
| "loss": 0.4432, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": 1.710632085800171, | |
| "rewards/margins": 75.0985107421875, | |
| "rewards/rejected": -73.38786315917969, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.9887640449438202, | |
| "grad_norm": 192.31964111328125, | |
| "learning_rate": 2.5464472817024772e-06, | |
| "logits/chosen": 1.3617230653762817, | |
| "logits/rejected": 1.2478257417678833, | |
| "logps/chosen": -2841.803466796875, | |
| "logps/rejected": -3503.9794921875, | |
| "loss": 0.5194, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 4.092733383178711, | |
| "rewards/margins": 110.31430053710938, | |
| "rewards/rejected": -106.22156524658203, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 192.31964111328125, | |
| "learning_rate": 2.517467754404424e-06, | |
| "logits/chosen": 1.3865031003952026, | |
| "logits/rejected": 1.2281872034072876, | |
| "logps/chosen": -2563.0751953125, | |
| "logps/rejected": -2940.1357421875, | |
| "loss": 0.2103, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 4.377815246582031, | |
| "rewards/margins": 81.93372344970703, | |
| "rewards/rejected": -77.555908203125, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.0224719101123596, | |
| "grad_norm": 135.86026000976562, | |
| "learning_rate": 2.487767068597558e-06, | |
| "logits/chosen": 1.5341211557388306, | |
| "logits/rejected": 1.4015753269195557, | |
| "logps/chosen": -3250.149658203125, | |
| "logps/rejected": -3893.629150390625, | |
| "loss": 0.0037, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.023714065551758, | |
| "rewards/margins": 134.42942810058594, | |
| "rewards/rejected": -115.40570068359375, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.0449438202247192, | |
| "grad_norm": 1.9560177326202393, | |
| "learning_rate": 2.4573662754672303e-06, | |
| "logits/chosen": 1.4638060331344604, | |
| "logits/rejected": 1.396654486656189, | |
| "logps/chosen": -2667.339599609375, | |
| "logps/rejected": -3516.595703125, | |
| "loss": 0.0114, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 8.45435905456543, | |
| "rewards/margins": 107.95783996582031, | |
| "rewards/rejected": -99.50347900390625, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.0674157303370786, | |
| "grad_norm": 14.909017562866211, | |
| "learning_rate": 2.426286922419288e-06, | |
| "logits/chosen": 1.6447203159332275, | |
| "logits/rejected": 1.6282371282577515, | |
| "logps/chosen": -2377.240478515625, | |
| "logps/rejected": -2950.48583984375, | |
| "loss": 0.0154, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 7.06836462020874, | |
| "rewards/margins": 84.36599731445312, | |
| "rewards/rejected": -77.29763793945312, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.0898876404494382, | |
| "grad_norm": 4.328535556793213, | |
| "learning_rate": 2.3945510378077523e-06, | |
| "logits/chosen": 1.3356518745422363, | |
| "logits/rejected": 1.2965461015701294, | |
| "logps/chosen": -2788.0400390625, | |
| "logps/rejected": -3457.5185546875, | |
| "loss": 0.0024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 11.870361328125, | |
| "rewards/margins": 103.6649169921875, | |
| "rewards/rejected": -91.79456329345703, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.1123595505617978, | |
| "grad_norm": 6.1306352615356445, | |
| "learning_rate": 2.3621811153216106e-06, | |
| "logits/chosen": 1.3586758375167847, | |
| "logits/rejected": 1.2172551155090332, | |
| "logps/chosen": -3142.0791015625, | |
| "logps/rejected": -3848.3056640625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.018255710601807, | |
| "rewards/margins": 121.07866668701172, | |
| "rewards/rejected": -116.06040954589844, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.1348314606741572, | |
| "grad_norm": 2.2042205333709717, | |
| "learning_rate": 2.32920009804179e-06, | |
| "logits/chosen": 1.676792860031128, | |
| "logits/rejected": 1.4110440015792847, | |
| "logps/chosen": -2846.33056640625, | |
| "logps/rejected": -3573.93359375, | |
| "loss": 0.0116, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 16.190317153930664, | |
| "rewards/margins": 119.14263153076172, | |
| "rewards/rejected": -102.95230102539062, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.1573033707865168, | |
| "grad_norm": 13.62660026550293, | |
| "learning_rate": 2.2956313621796135e-06, | |
| "logits/chosen": 1.5751538276672363, | |
| "logits/rejected": 1.4073097705841064, | |
| "logps/chosen": -2536.8515625, | |
| "logps/rejected": -3102.68896484375, | |
| "loss": 0.0147, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 7.306772232055664, | |
| "rewards/margins": 98.24702453613281, | |
| "rewards/rejected": -90.94024658203125, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.1797752808988764, | |
| "grad_norm": 1.355103850364685, | |
| "learning_rate": 2.26149870050826e-06, | |
| "logits/chosen": 1.363991618156433, | |
| "logits/rejected": 1.1863415241241455, | |
| "logps/chosen": -3056.833740234375, | |
| "logps/rejected": -3680.160888671875, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.664068222045898, | |
| "rewards/margins": 112.41234588623047, | |
| "rewards/rejected": -102.74827575683594, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.202247191011236, | |
| "grad_norm": 2.3306772708892822, | |
| "learning_rate": 2.2268263054989753e-06, | |
| "logits/chosen": 1.54270339012146, | |
| "logits/rejected": 1.475841760635376, | |
| "logps/chosen": -2780.744384765625, | |
| "logps/rejected": -3487.5322265625, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 11.756105422973633, | |
| "rewards/margins": 107.98931884765625, | |
| "rewards/rejected": -96.23321533203125, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.2247191011235956, | |
| "grad_norm": 1.47923743724823, | |
| "learning_rate": 2.191638752173989e-06, | |
| "logits/chosen": 1.6175808906555176, | |
| "logits/rejected": 1.5379141569137573, | |
| "logps/chosen": -2748.61328125, | |
| "logps/rejected": -3274.468017578125, | |
| "loss": 0.0117, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 8.739614486694336, | |
| "rewards/margins": 110.58942413330078, | |
| "rewards/rejected": -101.84980010986328, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.247191011235955, | |
| "grad_norm": 3.0752482414245605, | |
| "learning_rate": 2.1559609806882834e-06, | |
| "logits/chosen": 1.4324688911437988, | |
| "logits/rejected": 1.2107815742492676, | |
| "logps/chosen": -2790.97509765625, | |
| "logps/rejected": -3406.87744140625, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.457365989685059, | |
| "rewards/margins": 89.03166198730469, | |
| "rewards/rejected": -83.57430267333984, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.2696629213483146, | |
| "grad_norm": 0.07106953859329224, | |
| "learning_rate": 2.1198182786525674e-06, | |
| "logits/chosen": 1.409006118774414, | |
| "logits/rejected": 1.2638301849365234, | |
| "logps/chosen": -2571.373046875, | |
| "logps/rejected": -3436.89892578125, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 12.910816192626953, | |
| "rewards/margins": 133.70639038085938, | |
| "rewards/rejected": -120.79557800292969, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.2921348314606742, | |
| "grad_norm": 1.3202946186065674, | |
| "learning_rate": 2.0832362632099813e-06, | |
| "logits/chosen": 1.4980010986328125, | |
| "logits/rejected": 1.1623045206069946, | |
| "logps/chosen": -3144.611083984375, | |
| "logps/rejected": -3731.18212890625, | |
| "loss": 0.0051, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.096885681152344, | |
| "rewards/margins": 142.87937927246094, | |
| "rewards/rejected": -133.78250122070312, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.3146067415730336, | |
| "grad_norm": 2.9557082653045654, | |
| "learning_rate": 2.0462408628792335e-06, | |
| "logits/chosen": 1.6109601259231567, | |
| "logits/rejected": 1.4365208148956299, | |
| "logps/chosen": -2812.40625, | |
| "logps/rejected": -3437.3193359375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.375179290771484, | |
| "rewards/margins": 111.16755676269531, | |
| "rewards/rejected": -102.79237365722656, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.3370786516853932, | |
| "grad_norm": 0.2892356514930725, | |
| "learning_rate": 2.008858299177045e-06, | |
| "logits/chosen": 1.4753564596176147, | |
| "logits/rejected": 1.2640880346298218, | |
| "logps/chosen": -2899.793212890625, | |
| "logps/rejected": -3406.771240234375, | |
| "loss": 0.0157, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 7.380945682525635, | |
| "rewards/margins": 106.26220703125, | |
| "rewards/rejected": -98.88125610351562, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.3595505617977528, | |
| "grad_norm": 50.00154495239258, | |
| "learning_rate": 1.9711150680329234e-06, | |
| "logits/chosen": 1.6642662286758423, | |
| "logits/rejected": 1.473952054977417, | |
| "logps/chosen": -2834.24072265625, | |
| "logps/rejected": -3363.942138671875, | |
| "loss": 0.0175, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 8.414569854736328, | |
| "rewards/margins": 110.77262115478516, | |
| "rewards/rejected": -102.35804748535156, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.3820224719101124, | |
| "grad_norm": 0.07520447671413422, | |
| "learning_rate": 1.9330379210094315e-06, | |
| "logits/chosen": 1.5798277854919434, | |
| "logits/rejected": 1.4446996450424194, | |
| "logps/chosen": -2692.41162109375, | |
| "logps/rejected": -3175.50830078125, | |
| "loss": 0.0118, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.677203178405762, | |
| "rewards/margins": 96.32395935058594, | |
| "rewards/rejected": -90.64675903320312, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.404494382022472, | |
| "grad_norm": 3.16860032081604, | |
| "learning_rate": 1.8946538463412818e-06, | |
| "logits/chosen": 1.606536865234375, | |
| "logits/rejected": 1.5855745077133179, | |
| "logps/chosen": -2659.635986328125, | |
| "logps/rejected": -3431.36572265625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 10.329705238342285, | |
| "rewards/margins": 98.20384216308594, | |
| "rewards/rejected": -87.87415313720703, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.4269662921348314, | |
| "grad_norm": 0.042245469987392426, | |
| "learning_rate": 1.8559900498066726e-06, | |
| "logits/chosen": 1.605839490890503, | |
| "logits/rejected": 1.3888914585113525, | |
| "logps/chosen": -2774.67529296875, | |
| "logps/rejected": -3620.492431640625, | |
| "loss": 0.0092, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 14.000102996826172, | |
| "rewards/margins": 140.67535400390625, | |
| "rewards/rejected": -126.67523956298828, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.449438202247191, | |
| "grad_norm": 28.373090744018555, | |
| "learning_rate": 1.8170739354444366e-06, | |
| "logits/chosen": 1.5468522310256958, | |
| "logits/rejected": 1.316043734550476, | |
| "logps/chosen": -2898.541015625, | |
| "logps/rejected": -3607.741943359375, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.336808204650879, | |
| "rewards/margins": 125.04135131835938, | |
| "rewards/rejected": -115.70454406738281, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.4719101123595506, | |
| "grad_norm": 3.688307046890259, | |
| "learning_rate": 1.7779330861306717e-06, | |
| "logits/chosen": 1.4648973941802979, | |
| "logits/rejected": 1.3168296813964844, | |
| "logps/chosen": -3060.658935546875, | |
| "logps/rejected": -4020.65185546875, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3615617752075195, | |
| "rewards/margins": 130.01849365234375, | |
| "rewards/rejected": -126.65692138671875, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.49438202247191, | |
| "grad_norm": 21.308137893676758, | |
| "learning_rate": 1.738595244028608e-06, | |
| "logits/chosen": 1.4748642444610596, | |
| "logits/rejected": 1.3131040334701538, | |
| "logps/chosen": -2794.14599609375, | |
| "logps/rejected": -3351.5478515625, | |
| "loss": 0.0081, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8835487365722656, | |
| "rewards/margins": 98.07205963134766, | |
| "rewards/rejected": -95.18850708007812, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.5168539325842696, | |
| "grad_norm": 1.3383527994155884, | |
| "learning_rate": 1.699088290925583e-06, | |
| "logits/chosen": 1.372517704963684, | |
| "logits/rejected": 1.302228569984436, | |
| "logps/chosen": -2794.654052734375, | |
| "logps/rejected": -3820.33837890625, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.68542766571045, | |
| "rewards/margins": 141.4244842529297, | |
| "rewards/rejected": -131.73907470703125, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.5393258426966292, | |
| "grad_norm": 1.4769072532653809, | |
| "learning_rate": 1.6594402284710481e-06, | |
| "logits/chosen": 1.5602664947509766, | |
| "logits/rejected": 1.4328043460845947, | |
| "logps/chosen": -2850.06640625, | |
| "logps/rejected": -3549.932861328125, | |
| "loss": 0.026, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.793665409088135, | |
| "rewards/margins": 124.38016510009766, | |
| "rewards/rejected": -118.58650970458984, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.5617977528089888, | |
| "grad_norm": 5.262300968170166, | |
| "learning_rate": 1.6196791583296247e-06, | |
| "logits/chosen": 1.4012134075164795, | |
| "logits/rejected": 1.2154825925827026, | |
| "logps/chosen": -2862.569580078125, | |
| "logps/rejected": -3687.36328125, | |
| "loss": 0.003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 12.932228088378906, | |
| "rewards/margins": 135.03558349609375, | |
| "rewards/rejected": -122.10337829589844, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.5842696629213484, | |
| "grad_norm": 2.9438984394073486, | |
| "learning_rate": 1.579833262263268e-06, | |
| "logits/chosen": 1.4590383768081665, | |
| "logits/rejected": 1.1356399059295654, | |
| "logps/chosen": -2651.068603515625, | |
| "logps/rejected": -3142.91455078125, | |
| "loss": 0.0118, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.391037940979004, | |
| "rewards/margins": 119.59295654296875, | |
| "rewards/rejected": -110.2019271850586, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.606741573033708, | |
| "grad_norm": 0.6242117881774902, | |
| "learning_rate": 1.5399307821566623e-06, | |
| "logits/chosen": 1.5220391750335693, | |
| "logits/rejected": 1.2139172554016113, | |
| "logps/chosen": -2834.0634765625, | |
| "logps/rejected": -3674.3623046875, | |
| "loss": 0.0218, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 14.53393268585205, | |
| "rewards/margins": 154.6046142578125, | |
| "rewards/rejected": -140.0706787109375, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.6292134831460674, | |
| "grad_norm": 0.17758429050445557, | |
| "learning_rate": 1.5e-06, | |
| "logits/chosen": 1.531368374824524, | |
| "logits/rejected": 1.3681552410125732, | |
| "logps/chosen": -2943.841064453125, | |
| "logps/rejected": -3831.00927734375, | |
| "loss": 0.0117, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 11.650660514831543, | |
| "rewards/margins": 151.18350219726562, | |
| "rewards/rejected": -139.5328369140625, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.651685393258427, | |
| "grad_norm": 12.694519996643066, | |
| "learning_rate": 1.460069217843338e-06, | |
| "logits/chosen": 1.416333794593811, | |
| "logits/rejected": 1.1884994506835938, | |
| "logps/chosen": -3090.49658203125, | |
| "logps/rejected": -3794.48095703125, | |
| "loss": 0.004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 12.209739685058594, | |
| "rewards/margins": 145.9217529296875, | |
| "rewards/rejected": -133.71200561523438, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.6741573033707864, | |
| "grad_norm": 5.181153774261475, | |
| "learning_rate": 1.4201667377367324e-06, | |
| "logits/chosen": 1.5291459560394287, | |
| "logits/rejected": 1.390205979347229, | |
| "logps/chosen": -2819.557861328125, | |
| "logps/rejected": -3400.41748046875, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 6.913262367248535, | |
| "rewards/margins": 108.99024200439453, | |
| "rewards/rejected": -102.07699584960938, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.696629213483146, | |
| "grad_norm": 5.866981506347656, | |
| "learning_rate": 1.3803208416703752e-06, | |
| "logits/chosen": 1.509679913520813, | |
| "logits/rejected": 1.3863307237625122, | |
| "logps/chosen": -2517.104736328125, | |
| "logps/rejected": -3187.1181640625, | |
| "loss": 0.0026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.015058517456055, | |
| "rewards/margins": 110.0936508178711, | |
| "rewards/rejected": -104.07859802246094, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.7191011235955056, | |
| "grad_norm": 3.792738199234009, | |
| "learning_rate": 1.3405597715289522e-06, | |
| "logits/chosen": 1.4075974225997925, | |
| "logits/rejected": 1.297675609588623, | |
| "logps/chosen": -3116.082275390625, | |
| "logps/rejected": -3820.78271484375, | |
| "loss": 0.0017, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.922908782958984, | |
| "rewards/margins": 124.51133728027344, | |
| "rewards/rejected": -117.58842468261719, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.7415730337078652, | |
| "grad_norm": 8.345385551452637, | |
| "learning_rate": 1.3009117090744173e-06, | |
| "logits/chosen": 1.5826494693756104, | |
| "logits/rejected": 1.2875326871871948, | |
| "logps/chosen": -2909.03515625, | |
| "logps/rejected": -3438.2587890625, | |
| "loss": 0.0111, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.310379981994629, | |
| "rewards/margins": 140.91641235351562, | |
| "rewards/rejected": -132.6060333251953, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.7640449438202248, | |
| "grad_norm": 0.4116104245185852, | |
| "learning_rate": 1.2614047559713923e-06, | |
| "logits/chosen": 1.4220818281173706, | |
| "logits/rejected": 1.2691839933395386, | |
| "logps/chosen": -3212.60693359375, | |
| "logps/rejected": -3793.721435546875, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4821667671203613, | |
| "rewards/margins": 128.71267700195312, | |
| "rewards/rejected": -126.23049926757812, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.7865168539325844, | |
| "grad_norm": 0.8209803700447083, | |
| "learning_rate": 1.2220669138693288e-06, | |
| "logits/chosen": 1.3909624814987183, | |
| "logits/rejected": 1.1474812030792236, | |
| "logps/chosen": -2994.385009765625, | |
| "logps/rejected": -3750.771728515625, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.527303695678711, | |
| "rewards/margins": 137.7163543701172, | |
| "rewards/rejected": -128.18905639648438, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.8089887640449438, | |
| "grad_norm": 1.4425156116485596, | |
| "learning_rate": 1.1829260645555634e-06, | |
| "logits/chosen": 1.3281006813049316, | |
| "logits/rejected": 1.039908766746521, | |
| "logps/chosen": -3059.208251953125, | |
| "logps/rejected": -3867.33349609375, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 12.086620330810547, | |
| "rewards/margins": 160.84959411621094, | |
| "rewards/rejected": -148.76295471191406, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.8314606741573034, | |
| "grad_norm": 0.7217972278594971, | |
| "learning_rate": 1.1440099501933277e-06, | |
| "logits/chosen": 1.3363004922866821, | |
| "logits/rejected": 1.2744730710983276, | |
| "logps/chosen": -3156.716796875, | |
| "logps/rejected": -4011.334716796875, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.8549346923828125, | |
| "rewards/margins": 134.17984008789062, | |
| "rewards/rejected": -129.3249053955078, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.8539325842696628, | |
| "grad_norm": 1.5164899826049805, | |
| "learning_rate": 1.1053461536587183e-06, | |
| "logits/chosen": 1.4580892324447632, | |
| "logits/rejected": 1.2366647720336914, | |
| "logps/chosen": -2984.4619140625, | |
| "logps/rejected": -3910.234375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.195051193237305, | |
| "rewards/margins": 148.3942413330078, | |
| "rewards/rejected": -139.1991729736328, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.8764044943820224, | |
| "grad_norm": 3.071080446243286, | |
| "learning_rate": 1.0669620789905688e-06, | |
| "logits/chosen": 1.5336228609085083, | |
| "logits/rejected": 1.3450926542282104, | |
| "logps/chosen": -2671.64892578125, | |
| "logps/rejected": -3312.888427734375, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.30421257019043, | |
| "rewards/margins": 96.9708023071289, | |
| "rewards/rejected": -91.66659545898438, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.898876404494382, | |
| "grad_norm": 0.2966591715812683, | |
| "learning_rate": 1.0288849319670773e-06, | |
| "logits/chosen": 1.5615055561065674, | |
| "logits/rejected": 1.4262051582336426, | |
| "logps/chosen": -2924.010498046875, | |
| "logps/rejected": -3439.7509765625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.811070919036865, | |
| "rewards/margins": 107.32271575927734, | |
| "rewards/rejected": -102.51164245605469, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.9213483146067416, | |
| "grad_norm": 0.05935266241431236, | |
| "learning_rate": 9.911417008229545e-07, | |
| "logits/chosen": 1.4063825607299805, | |
| "logits/rejected": 1.1860499382019043, | |
| "logps/chosen": -2746.5126953125, | |
| "logps/rejected": -3493.92578125, | |
| "loss": 0.0325, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 11.67589282989502, | |
| "rewards/margins": 137.2821502685547, | |
| "rewards/rejected": -125.60626220703125, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.9438202247191012, | |
| "grad_norm": 0.21089386940002441, | |
| "learning_rate": 9.537591371207668e-07, | |
| "logits/chosen": 1.5266857147216797, | |
| "logits/rejected": 1.4005635976791382, | |
| "logps/chosen": -2387.665771484375, | |
| "logps/rejected": -3293.546630859375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.131157875061035, | |
| "rewards/margins": 137.9029083251953, | |
| "rewards/rejected": -132.77175903320312, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.9662921348314608, | |
| "grad_norm": 0.4727032780647278, | |
| "learning_rate": 9.167637367900192e-07, | |
| "logits/chosen": 1.5321190357208252, | |
| "logits/rejected": 1.3832690715789795, | |
| "logps/chosen": -2469.994384765625, | |
| "logps/rejected": -3097.712890625, | |
| "loss": 0.0117, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 13.177355766296387, | |
| "rewards/margins": 116.04686737060547, | |
| "rewards/rejected": -102.8695068359375, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.9887640449438202, | |
| "grad_norm": 0.39027953147888184, | |
| "learning_rate": 8.801817213474331e-07, | |
| "logits/chosen": 1.5794587135314941, | |
| "logits/rejected": 1.3486638069152832, | |
| "logps/chosen": -2815.1982421875, | |
| "logps/rejected": -3435.67919921875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.544872283935547, | |
| "rewards/margins": 112.28601837158203, | |
| "rewards/rejected": -103.74114227294922, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.14720159769058228, | |
| "learning_rate": 8.44039019311717e-07, | |
| "logits/chosen": 1.492700457572937, | |
| "logits/rejected": 1.3120732307434082, | |
| "logps/chosen": -3285.24267578125, | |
| "logps/rejected": -3985.763916015625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 11.00776481628418, | |
| "rewards/margins": 157.06927490234375, | |
| "rewards/rejected": -146.06150817871094, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.0224719101123596, | |
| "grad_norm": 0.019609661772847176, | |
| "learning_rate": 8.08361247826011e-07, | |
| "logits/chosen": 1.3633915185928345, | |
| "logits/rejected": 1.1915699243545532, | |
| "logps/chosen": -3307.618408203125, | |
| "logps/rejected": -4103.1875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.502930760383606, | |
| "rewards/margins": 150.0188446044922, | |
| "rewards/rejected": -151.52178955078125, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 2.044943820224719, | |
| "grad_norm": 0.026041870936751366, | |
| "learning_rate": 7.731736945010249e-07, | |
| "logits/chosen": 1.4235529899597168, | |
| "logits/rejected": 1.0836195945739746, | |
| "logps/chosen": -3224.001708984375, | |
| "logps/rejected": -3803.459228515625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.049484252929688, | |
| "rewards/margins": 149.46070861816406, | |
| "rewards/rejected": -140.41123962402344, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 2.067415730337079, | |
| "grad_norm": 0.36662229895591736, | |
| "learning_rate": 7.385012994917405e-07, | |
| "logits/chosen": 1.461303949356079, | |
| "logits/rejected": 1.401003360748291, | |
| "logps/chosen": -2710.856689453125, | |
| "logps/rejected": -3409.259765625, | |
| "loss": 0.0109, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.063204765319824, | |
| "rewards/margins": 96.820068359375, | |
| "rewards/rejected": -91.75686645507812, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 2.0898876404494384, | |
| "grad_norm": 0.22327114641666412, | |
| "learning_rate": 7.043686378203864e-07, | |
| "logits/chosen": 1.5914536714553833, | |
| "logits/rejected": 1.3907164335250854, | |
| "logps/chosen": -2657.873291015625, | |
| "logps/rejected": -3420.0283203125, | |
| "loss": 0.0109, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 12.433341979980469, | |
| "rewards/margins": 118.74362182617188, | |
| "rewards/rejected": -106.31027221679688, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.1123595505617976, | |
| "grad_norm": 0.006661942228674889, | |
| "learning_rate": 6.707999019582104e-07, | |
| "logits/chosen": 1.4297124147415161, | |
| "logits/rejected": 1.2694649696350098, | |
| "logps/chosen": -2567.587890625, | |
| "logps/rejected": -3557.106201171875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.91953182220459, | |
| "rewards/margins": 146.32005310058594, | |
| "rewards/rejected": -138.4005126953125, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.134831460674157, | |
| "grad_norm": 0.010272935964167118, | |
| "learning_rate": 6.378188846783898e-07, | |
| "logits/chosen": 1.584874153137207, | |
| "logits/rejected": 1.3883558511734009, | |
| "logps/chosen": -2836.077880859375, | |
| "logps/rejected": -3408.93115234375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.626905918121338, | |
| "rewards/margins": 121.95980834960938, | |
| "rewards/rejected": -115.33291625976562, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.157303370786517, | |
| "grad_norm": 0.006059441715478897, | |
| "learning_rate": 6.054489621922477e-07, | |
| "logits/chosen": 1.6233469247817993, | |
| "logits/rejected": 1.4364811182022095, | |
| "logps/chosen": -2997.014404296875, | |
| "logps/rejected": -3488.54150390625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 12.179953575134277, | |
| "rewards/margins": 123.74882507324219, | |
| "rewards/rejected": -111.56886291503906, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 2.1797752808988764, | |
| "grad_norm": 0.23592473566532135, | |
| "learning_rate": 5.737130775807122e-07, | |
| "logits/chosen": 1.4150291681289673, | |
| "logits/rejected": 1.3036937713623047, | |
| "logps/chosen": -2623.100830078125, | |
| "logps/rejected": -3417.743408203125, | |
| "loss": 0.011, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.777491569519043, | |
| "rewards/margins": 126.9278335571289, | |
| "rewards/rejected": -117.15032196044922, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.202247191011236, | |
| "grad_norm": 0.0040085772052407265, | |
| "learning_rate": 5.426337245327703e-07, | |
| "logits/chosen": 1.3026162385940552, | |
| "logits/rejected": 1.194283127784729, | |
| "logps/chosen": -2882.58154296875, | |
| "logps/rejected": -3794.05078125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 10.322346687316895, | |
| "rewards/margins": 140.7698211669922, | |
| "rewards/rejected": -130.44747924804688, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 2.2247191011235956, | |
| "grad_norm": 0.005036317277699709, | |
| "learning_rate": 5.122329314024422e-07, | |
| "logits/chosen": 1.4347069263458252, | |
| "logits/rejected": 1.2561771869659424, | |
| "logps/chosen": -2425.357177734375, | |
| "logps/rejected": -3138.833740234375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 13.752297401428223, | |
| "rewards/margins": 120.6755599975586, | |
| "rewards/rejected": -106.92326354980469, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.247191011235955, | |
| "grad_norm": 0.267286479473114, | |
| "learning_rate": 4.825322455955759e-07, | |
| "logits/chosen": 1.376643419265747, | |
| "logits/rejected": 1.2739124298095703, | |
| "logps/chosen": -2709.716796875, | |
| "logps/rejected": -3520.384765625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 10.822145462036133, | |
| "rewards/margins": 141.28472900390625, | |
| "rewards/rejected": -130.4625701904297, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 2.2696629213483144, | |
| "grad_norm": 0.37806662917137146, | |
| "learning_rate": 4.5355271829752307e-07, | |
| "logits/chosen": 1.4881722927093506, | |
| "logits/rejected": 1.346581220626831, | |
| "logps/chosen": -2821.6923828125, | |
| "logps/rejected": -3442.4619140625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.021244049072266, | |
| "rewards/margins": 126.26439666748047, | |
| "rewards/rejected": -117.2431640625, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.292134831460674, | |
| "grad_norm": 0.0023486721329391003, | |
| "learning_rate": 4.2531488955252726e-07, | |
| "logits/chosen": 1.4559850692749023, | |
| "logits/rejected": 1.1960179805755615, | |
| "logps/chosen": -2982.266357421875, | |
| "logps/rejected": -3776.720458984375, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 13.267072677612305, | |
| "rewards/margins": 156.5282440185547, | |
| "rewards/rejected": -143.26113891601562, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 2.3146067415730336, | |
| "grad_norm": 0.006942716892808676, | |
| "learning_rate": 3.978387737053994e-07, | |
| "logits/chosen": 1.5748894214630127, | |
| "logits/rejected": 1.4408270120620728, | |
| "logps/chosen": -2752.75634765625, | |
| "logps/rejected": -3425.216064453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 15.169326782226562, | |
| "rewards/margins": 107.41685485839844, | |
| "rewards/rejected": -92.24752807617188, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.337078651685393, | |
| "grad_norm": 0.1621246337890625, | |
| "learning_rate": 3.7114384521579234e-07, | |
| "logits/chosen": 1.6052483320236206, | |
| "logits/rejected": 1.446576714515686, | |
| "logps/chosen": -2733.099609375, | |
| "logps/rejected": -3558.54931640625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.2836151123046875, | |
| "rewards/margins": 120.5184326171875, | |
| "rewards/rejected": -114.23482513427734, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.359550561797753, | |
| "grad_norm": 0.0010318144923076034, | |
| "learning_rate": 3.4524902485514043e-07, | |
| "logits/chosen": 1.5261331796646118, | |
| "logits/rejected": 1.2617827653884888, | |
| "logps/chosen": -2832.090576171875, | |
| "logps/rejected": -3448.433837890625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.34963607788086, | |
| "rewards/margins": 127.82434844970703, | |
| "rewards/rejected": -119.47471618652344, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.3820224719101124, | |
| "grad_norm": 0.001886666170321405, | |
| "learning_rate": 3.201726662960363e-07, | |
| "logits/chosen": 1.4487926959991455, | |
| "logits/rejected": 1.2953495979309082, | |
| "logps/chosen": -2931.4873046875, | |
| "logps/rejected": -3765.528564453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4385576248168945, | |
| "rewards/margins": 141.048583984375, | |
| "rewards/rejected": -135.6100311279297, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 2.404494382022472, | |
| "grad_norm": 0.0003725312708411366, | |
| "learning_rate": 2.9593254310355485e-07, | |
| "logits/chosen": 1.5249533653259277, | |
| "logits/rejected": 1.36188805103302, | |
| "logps/chosen": -2958.6279296875, | |
| "logps/rejected": -3625.80859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.046311378479004, | |
| "rewards/margins": 136.48867797851562, | |
| "rewards/rejected": -128.44235229492188, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 2.4269662921348316, | |
| "grad_norm": 0.0058527453802526, | |
| "learning_rate": 2.725458361377465e-07, | |
| "logits/chosen": 1.449507236480713, | |
| "logits/rejected": 1.195552110671997, | |
| "logps/chosen": -3101.913330078125, | |
| "logps/rejected": -3919.42626953125, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.668648719787598, | |
| "rewards/margins": 170.04879760742188, | |
| "rewards/rejected": -160.38015747070312, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 2.449438202247191, | |
| "grad_norm": 0.004259227309376001, | |
| "learning_rate": 2.5002912137622743e-07, | |
| "logits/chosen": 1.3936243057250977, | |
| "logits/rejected": 1.1740200519561768, | |
| "logps/chosen": -2701.333740234375, | |
| "logps/rejected": -3472.6923828125, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 11.122644424438477, | |
| "rewards/margins": 145.8236083984375, | |
| "rewards/rejected": -134.70095825195312, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.4719101123595504, | |
| "grad_norm": 0.010651292279362679, | |
| "learning_rate": 2.2839835816549365e-07, | |
| "logits/chosen": 1.711632490158081, | |
| "logits/rejected": 1.4845446348190308, | |
| "logps/chosen": -3014.84912109375, | |
| "logps/rejected": -3401.6298828125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.185779571533203, | |
| "rewards/margins": 117.65122985839844, | |
| "rewards/rejected": -109.4654541015625, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 2.49438202247191, | |
| "grad_norm": 0.21365472674369812, | |
| "learning_rate": 2.0766887790929072e-07, | |
| "logits/chosen": 1.5201102495193481, | |
| "logits/rejected": 1.3360121250152588, | |
| "logps/chosen": -2596.279296875, | |
| "logps/rejected": -3536.295166015625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 9.575386047363281, | |
| "rewards/margins": 136.92886352539062, | |
| "rewards/rejected": -127.35346221923828, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.5168539325842696, | |
| "grad_norm": 0.06359975039958954, | |
| "learning_rate": 1.8785537320205808e-07, | |
| "logits/chosen": 1.4054570198059082, | |
| "logits/rejected": 1.304233431816101, | |
| "logps/chosen": -2882.770263671875, | |
| "logps/rejected": -3637.910888671875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.500956535339355, | |
| "rewards/margins": 114.78219604492188, | |
| "rewards/rejected": -105.28123474121094, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.539325842696629, | |
| "grad_norm": 0.039696987718343735, | |
| "learning_rate": 1.6897188741514286e-07, | |
| "logits/chosen": 1.3486000299453735, | |
| "logits/rejected": 1.2321511507034302, | |
| "logps/chosen": -2972.344970703125, | |
| "logps/rejected": -3984.229248046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.131504535675049, | |
| "rewards/margins": 162.7792205810547, | |
| "rewards/rejected": -157.64772033691406, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.561797752808989, | |
| "grad_norm": 0.002948309760540724, | |
| "learning_rate": 1.510318047431713e-07, | |
| "logits/chosen": 1.4727129936218262, | |
| "logits/rejected": 1.3785285949707031, | |
| "logps/chosen": -2675.683837890625, | |
| "logps/rejected": -3297.158447265625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.861666679382324, | |
| "rewards/margins": 110.47186279296875, | |
| "rewards/rejected": -102.61019134521484, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.5842696629213484, | |
| "grad_norm": 0.07731137424707413, | |
| "learning_rate": 1.3404784071763015e-07, | |
| "logits/chosen": 1.4941082000732422, | |
| "logits/rejected": 1.4053186178207397, | |
| "logps/chosen": -2728.80615234375, | |
| "logps/rejected": -3415.1708984375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 10.857705116271973, | |
| "rewards/margins": 109.21708679199219, | |
| "rewards/rejected": -98.35939025878906, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.606741573033708, | |
| "grad_norm": 0.01123058795928955, | |
| "learning_rate": 1.1803203319438056e-07, | |
| "logits/chosen": 1.4337643384933472, | |
| "logits/rejected": 1.2645751237869263, | |
| "logps/chosen": -2684.67041015625, | |
| "logps/rejected": -3446.0908203125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 13.534300804138184, | |
| "rewards/margins": 135.90628051757812, | |
| "rewards/rejected": -122.37198638916016, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.629213483146067, | |
| "grad_norm": 0.7818881869316101, | |
| "learning_rate": 1.0299573382149235e-07, | |
| "logits/chosen": 1.4340091943740845, | |
| "logits/rejected": 1.2151674032211304, | |
| "logps/chosen": -3169.663330078125, | |
| "logps/rejected": -4115.5751953125, | |
| "loss": 0.0219, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 11.765824317932129, | |
| "rewards/margins": 178.14181518554688, | |
| "rewards/rejected": -166.37596130371094, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.6516853932584272, | |
| "grad_norm": 0.11178380995988846, | |
| "learning_rate": 8.894959999345015e-08, | |
| "logits/chosen": 1.4085586071014404, | |
| "logits/rejected": 1.317073941230774, | |
| "logps/chosen": -2706.8623046875, | |
| "logps/rejected": -3629.9091796875, | |
| "loss": 0.0109, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.750637531280518, | |
| "rewards/margins": 140.9330291748047, | |
| "rewards/rejected": -134.18240356445312, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.6741573033707864, | |
| "grad_norm": 0.009486271999776363, | |
| "learning_rate": 7.590358729742808e-08, | |
| "logits/chosen": 1.5044245719909668, | |
| "logits/rejected": 1.3787866830825806, | |
| "logps/chosen": -2867.752197265625, | |
| "logps/rejected": -3833.509765625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.230460166931152, | |
| "rewards/margins": 134.28904724121094, | |
| "rewards/rejected": -128.05857849121094, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.696629213483146, | |
| "grad_norm": 0.009250489063560963, | |
| "learning_rate": 6.386694245699181e-08, | |
| "logits/chosen": 1.5157657861709595, | |
| "logits/rejected": 1.2433254718780518, | |
| "logps/chosen": -3022.373046875, | |
| "logps/rejected": -3732.22900390625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7557570934295654, | |
| "rewards/margins": 130.84677124023438, | |
| "rewards/rejected": -128.0910186767578, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.7191011235955056, | |
| "grad_norm": 0.1917319893836975, | |
| "learning_rate": 5.284819677822611e-08, | |
| "logits/chosen": 1.6072005033493042, | |
| "logits/rejected": 1.528849720954895, | |
| "logps/chosen": -2894.672119140625, | |
| "logps/rejected": -3495.853515625, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 3.3133740425109863, | |
| "rewards/margins": 105.75206756591797, | |
| "rewards/rejected": -102.43870544433594, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.741573033707865, | |
| "grad_norm": 0.03384300321340561, | |
| "learning_rate": 4.285516010293522e-08, | |
| "logits/chosen": 1.4517195224761963, | |
| "logits/rejected": 1.3014264106750488, | |
| "logps/chosen": -2851.070556640625, | |
| "logps/rejected": -3593.665771484375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.4544267654418945, | |
| "rewards/margins": 122.42274475097656, | |
| "rewards/rejected": -114.96832275390625, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.764044943820225, | |
| "grad_norm": 0.24889694154262543, | |
| "learning_rate": 3.389491527319999e-08, | |
| "logits/chosen": 1.4583051204681396, | |
| "logits/rejected": 1.2614139318466187, | |
| "logps/chosen": -2827.8134765625, | |
| "logps/rejected": -3561.30810546875, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 0.6058197617530823, | |
| "rewards/margins": 129.5867919921875, | |
| "rewards/rejected": -128.98095703125, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.7865168539325844, | |
| "grad_norm": 0.06888113170862198, | |
| "learning_rate": 2.5973813111218548e-08, | |
| "logits/chosen": 1.529250144958496, | |
| "logits/rejected": 1.247063159942627, | |
| "logps/chosen": -2882.323974609375, | |
| "logps/rejected": -3656.96044921875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.58204174041748, | |
| "rewards/margins": 154.1719970703125, | |
| "rewards/rejected": -144.5899658203125, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.808988764044944, | |
| "grad_norm": 0.0029755791183561087, | |
| "learning_rate": 1.909746791798317e-08, | |
| "logits/chosen": 1.4555425643920898, | |
| "logits/rejected": 1.2920844554901123, | |
| "logps/chosen": -2807.64208984375, | |
| "logps/rejected": -3475.54931640625, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.643215179443359, | |
| "rewards/margins": 125.7391128540039, | |
| "rewards/rejected": -120.09590148925781, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.831460674157303, | |
| "grad_norm": 0.009821542538702488, | |
| "learning_rate": 1.3270753493989374e-08, | |
| "logits/chosen": 1.535863995552063, | |
| "logits/rejected": 1.3580735921859741, | |
| "logps/chosen": -2754.88818359375, | |
| "logps/rejected": -3732.697021484375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.623423099517822, | |
| "rewards/margins": 136.6768035888672, | |
| "rewards/rejected": -129.05337524414062, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.853932584269663, | |
| "grad_norm": 0.5018057227134705, | |
| "learning_rate": 8.49779968479436e-09, | |
| "logits/chosen": 1.3728063106536865, | |
| "logits/rejected": 1.154386281967163, | |
| "logps/chosen": -3219.5546875, | |
| "logps/rejected": -3955.0615234375, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.1031904220581055, | |
| "rewards/margins": 135.11688232421875, | |
| "rewards/rejected": -133.01368713378906, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.8764044943820224, | |
| "grad_norm": 0.0029928251169621944, | |
| "learning_rate": 4.781989453874814e-09, | |
| "logits/chosen": 1.589327335357666, | |
| "logits/rejected": 1.44749116897583, | |
| "logps/chosen": -2659.24462890625, | |
| "logps/rejected": -3233.244873046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 10.386514663696289, | |
| "rewards/margins": 102.26481628417969, | |
| "rewards/rejected": -91.87830352783203, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.898876404494382, | |
| "grad_norm": 0.009541017934679985, | |
| "learning_rate": 2.1259564848570834e-09, | |
| "logits/chosen": 1.5677722692489624, | |
| "logits/rejected": 1.2758667469024658, | |
| "logps/chosen": -2889.547607421875, | |
| "logps/rejected": -3603.37109375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 15.972006797790527, | |
| "rewards/margins": 140.3019256591797, | |
| "rewards/rejected": -124.32991790771484, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.9213483146067416, | |
| "grad_norm": 0.007502752356231213, | |
| "learning_rate": 5.315833148210603e-10, | |
| "logits/chosen": 1.6323837041854858, | |
| "logits/rejected": 1.446678876876831, | |
| "logps/chosen": -2922.07568359375, | |
| "logps/rejected": -3691.432373046875, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 12.317670822143555, | |
| "rewards/margins": 135.18690490722656, | |
| "rewards/rejected": -122.86924743652344, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.943820224719101, | |
| "grad_norm": 0.2958358824253082, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 1.4742579460144043, | |
| "logits/rejected": 1.2774202823638916, | |
| "logps/chosen": -2621.55615234375, | |
| "logps/rejected": -3527.73193359375, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 11.16303539276123, | |
| "rewards/margins": 133.13824462890625, | |
| "rewards/rejected": -121.9752197265625, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.943820224719101, | |
| "step": 132, | |
| "total_flos": 228521444442112.0, | |
| "train_loss": 0.17045999738028772, | |
| "train_runtime": 5166.54, | |
| "train_samples_per_second": 1.651, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 132, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 228521444442112.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |