| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0002, |
| "grad_norm": 153.0, |
| "learning_rate": 0.0, |
| "logits/chosen": 0.38140869140625, |
| "logits/rejected": -0.0750732421875, |
| "logps/chosen": -179.625, |
| "logps/rejected": -175.5, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.01251220703125, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.01251220703125, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 152.0, |
| "learning_rate": 1.6333333333333334e-07, |
| "logits/chosen": -0.07141295820474625, |
| "logits/rejected": -0.49311113357543945, |
| "logps/chosen": -169.4234619140625, |
| "logps/rejected": -166.2244873046875, |
| "loss": 0.6897, |
| "rewards/accuracies": 0.32397958636283875, |
| "rewards/chosen": 0.022099709138274193, |
| "rewards/margins": 0.00855239573866129, |
| "rewards/rejected": 0.013539839535951614, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 173.0, |
| "learning_rate": 3.3e-07, |
| "logits/chosen": -0.19355152547359467, |
| "logits/rejected": -0.6559900045394897, |
| "logps/chosen": -169.9737548828125, |
| "logps/rejected": -168.53875732421875, |
| "loss": 0.6714, |
| "rewards/accuracies": 0.47999998927116394, |
| "rewards/chosen": 0.056079406291246414, |
| "rewards/margins": 0.04595337063074112, |
| "rewards/rejected": 0.010139770805835724, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 159.0, |
| "learning_rate": 4.966666666666666e-07, |
| "logits/chosen": -0.1707330346107483, |
| "logits/rejected": -0.6063339114189148, |
| "logps/chosen": -168.2937469482422, |
| "logps/rejected": -166.5187530517578, |
| "loss": 0.6156, |
| "rewards/accuracies": 0.7900000214576721, |
| "rewards/chosen": 0.1966903656721115, |
| "rewards/margins": 0.16668151319026947, |
| "rewards/rejected": 0.030118407681584358, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 87.0, |
| "learning_rate": 6.633333333333334e-07, |
| "logits/chosen": -0.08012771606445312, |
| "logits/rejected": -0.5313219428062439, |
| "logps/chosen": -163.4462432861328, |
| "logps/rejected": -166.76124572753906, |
| "loss": 0.5115, |
| "rewards/accuracies": 0.8550000190734863, |
| "rewards/chosen": 0.4547726511955261, |
| "rewards/margins": 0.4213232398033142, |
| "rewards/rejected": 0.03348724544048309, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 67.5, |
| "learning_rate": 8.300000000000001e-07, |
| "logits/chosen": -0.17801956832408905, |
| "logits/rejected": -0.6736994981765747, |
| "logps/chosen": -162.8975067138672, |
| "logps/rejected": -169.00250244140625, |
| "loss": 0.3636, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 0.7460748553276062, |
| "rewards/margins": 0.8772411942481995, |
| "rewards/rejected": -0.1307925432920456, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 29.625, |
| "learning_rate": 9.966666666666667e-07, |
| "logits/chosen": -0.3131498694419861, |
| "logits/rejected": -0.8370306491851807, |
| "logps/chosen": -156.57749938964844, |
| "logps/rejected": -171.12249755859375, |
| "loss": 0.2411, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 0.9195745587348938, |
| "rewards/margins": 1.5089257955551147, |
| "rewards/rejected": -0.5890390276908875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 8.625, |
| "learning_rate": 1.1633333333333333e-06, |
| "logits/chosen": -0.809218168258667, |
| "logits/rejected": -1.343685269355774, |
| "logps/chosen": -165.8387451171875, |
| "logps/rejected": -189.9462432861328, |
| "loss": 0.1339, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.9223541021347046, |
| "rewards/margins": 2.6102733612060547, |
| "rewards/rejected": -1.6883777379989624, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.3300000000000002e-06, |
| "logits/chosen": -1.2586804628372192, |
| "logits/rejected": -1.9825918674468994, |
| "logps/chosen": -158.1649932861328, |
| "logps/rejected": -198.03750610351562, |
| "loss": 0.0961, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 1.2969841957092285, |
| "rewards/margins": 4.182460784912109, |
| "rewards/rejected": -2.8857672214508057, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.4966666666666668e-06, |
| "logits/chosen": -1.5829663276672363, |
| "logits/rejected": -2.467480421066284, |
| "logps/chosen": -147.5625, |
| "logps/rejected": -204.00750732421875, |
| "loss": 0.1038, |
| "rewards/accuracies": 0.8525000214576721, |
| "rewards/chosen": 1.9529736042022705, |
| "rewards/margins": 5.97265625, |
| "rewards/rejected": -4.018417835235596, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.6633333333333334e-06, |
| "logits/chosen": -1.8108301162719727, |
| "logits/rejected": -2.774589776992798, |
| "logps/chosen": -147.88250732421875, |
| "logps/rejected": -219.89625549316406, |
| "loss": 0.0802, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 2.4084813594818115, |
| "rewards/margins": 7.462968826293945, |
| "rewards/rejected": -5.055732250213623, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.21484375, |
| "learning_rate": 1.83e-06, |
| "logits/chosen": -1.8340039253234863, |
| "logits/rejected": -2.798632860183716, |
| "logps/chosen": -138.77374267578125, |
| "logps/rejected": -212.2449951171875, |
| "loss": 0.1111, |
| "rewards/accuracies": 0.8399999737739563, |
| "rewards/chosen": 2.4734740257263184, |
| "rewards/margins": 7.538750171661377, |
| "rewards/rejected": -5.063173770904541, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 1.996666666666667e-06, |
| "logits/chosen": -1.9733397960662842, |
| "logits/rejected": -2.937753915786743, |
| "logps/chosen": -142.70875549316406, |
| "logps/rejected": -223.96624755859375, |
| "loss": 0.0953, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.598142147064209, |
| "rewards/margins": 8.3950777053833, |
| "rewards/rejected": -5.798213005065918, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.2265625, |
| "learning_rate": 2.1633333333333335e-06, |
| "logits/chosen": -2.0656299591064453, |
| "logits/rejected": -3.027539014816284, |
| "logps/chosen": -142.24249267578125, |
| "logps/rejected": -230.47250366210938, |
| "loss": 0.0883, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.734023332595825, |
| "rewards/margins": 9.052812576293945, |
| "rewards/rejected": -6.316962718963623, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.30859375, |
| "learning_rate": 2.33e-06, |
| "logits/chosen": -1.9706201553344727, |
| "logits/rejected": -2.9912109375, |
| "logps/chosen": -138.24249267578125, |
| "logps/rejected": -228.1925048828125, |
| "loss": 0.09, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 2.9716455936431885, |
| "rewards/margins": 9.053828239440918, |
| "rewards/rejected": -6.082441329956055, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.01446533203125, |
| "learning_rate": 2.4966666666666668e-06, |
| "logits/chosen": -1.9857901334762573, |
| "logits/rejected": -2.991640567779541, |
| "logps/chosen": -134.00999450683594, |
| "logps/rejected": -224.47625732421875, |
| "loss": 0.0848, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 3.2274608612060547, |
| "rewards/margins": 9.318828582763672, |
| "rewards/rejected": -6.091513633728027, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.15625, |
| "learning_rate": 2.6633333333333334e-06, |
| "logits/chosen": -1.9683300256729126, |
| "logits/rejected": -3.0144922733306885, |
| "logps/chosen": -142.32000732421875, |
| "logps/rejected": -238.3975067138672, |
| "loss": 0.071, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 3.2487499713897705, |
| "rewards/margins": 9.865859031677246, |
| "rewards/rejected": -6.616034984588623, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.00604248046875, |
| "learning_rate": 2.83e-06, |
| "logits/chosen": -2.0511231422424316, |
| "logits/rejected": -3.087148427963257, |
| "logps/chosen": -138.8387451171875, |
| "logps/rejected": -237.4425048828125, |
| "loss": 0.0813, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 3.2463672161102295, |
| "rewards/margins": 10.090624809265137, |
| "rewards/rejected": -6.84329080581665, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.01470947265625, |
| "learning_rate": 2.996666666666667e-06, |
| "logits/chosen": -2.1478612422943115, |
| "logits/rejected": -3.173710823059082, |
| "logps/chosen": -143.4824981689453, |
| "logps/rejected": -246.1737518310547, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 3.197275400161743, |
| "rewards/margins": 10.487030982971191, |
| "rewards/rejected": -7.29049825668335, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.0067138671875, |
| "learning_rate": 3.1633333333333337e-06, |
| "logits/chosen": -1.9968359470367432, |
| "logits/rejected": -3.075078010559082, |
| "logps/chosen": -136.73875427246094, |
| "logps/rejected": -234.65499877929688, |
| "loss": 0.0813, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 3.270380973815918, |
| "rewards/margins": 10.061562538146973, |
| "rewards/rejected": -6.791113376617432, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 3.3300000000000003e-06, |
| "logits/chosen": -2.032480478286743, |
| "logits/rejected": -3.120234489440918, |
| "logps/chosen": -141.06625366210938, |
| "logps/rejected": -247.29124450683594, |
| "loss": 0.0813, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 3.2919139862060547, |
| "rewards/margins": 10.692343711853027, |
| "rewards/rejected": -7.398417949676514, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.017822265625, |
| "learning_rate": 3.496666666666667e-06, |
| "logits/chosen": -2.1911962032318115, |
| "logits/rejected": -3.1728124618530273, |
| "logps/chosen": -143.2687530517578, |
| "logps/rejected": -246.8125, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 3.0637621879577637, |
| "rewards/margins": 10.748281478881836, |
| "rewards/rejected": -7.685234546661377, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.060546875, |
| "learning_rate": 3.6633333333333336e-06, |
| "logits/chosen": -2.195253849029541, |
| "logits/rejected": -3.108535051345825, |
| "logps/chosen": -134.12750244140625, |
| "logps/rejected": -233.7412567138672, |
| "loss": 0.1124, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.9126367568969727, |
| "rewards/margins": 10.165234565734863, |
| "rewards/rejected": -7.252851486206055, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.00811767578125, |
| "learning_rate": 3.830000000000001e-06, |
| "logits/chosen": -2.157832145690918, |
| "logits/rejected": -3.157460927963257, |
| "logps/chosen": -140.08250427246094, |
| "logps/rejected": -249.22000122070312, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 3.393681526184082, |
| "rewards/margins": 11.291093826293945, |
| "rewards/rejected": -7.8983154296875, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.37109375, |
| "learning_rate": 3.996666666666667e-06, |
| "logits/chosen": -2.176523447036743, |
| "logits/rejected": -3.145156145095825, |
| "logps/chosen": -136.72625732421875, |
| "logps/rejected": -240.5812530517578, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 3.007997989654541, |
| "rewards/margins": 10.645390510559082, |
| "rewards/rejected": -7.636513710021973, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.0303955078125, |
| "learning_rate": 4.163333333333334e-06, |
| "logits/chosen": -2.346796989440918, |
| "logits/rejected": -3.239375114440918, |
| "logps/chosen": -141.64999389648438, |
| "logps/rejected": -254.55624389648438, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 3.1400487422943115, |
| "rewards/margins": 11.464765548706055, |
| "rewards/rejected": -8.323661804199219, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.8828125, |
| "learning_rate": 4.33e-06, |
| "logits/chosen": -2.233154296875, |
| "logits/rejected": -3.121083974838257, |
| "logps/chosen": -137.37875366210938, |
| "logps/rejected": -247.23875427246094, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.990058660507202, |
| "rewards/margins": 11.343280792236328, |
| "rewards/rejected": -8.351679801940918, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.0081787109375, |
| "learning_rate": 4.496666666666667e-06, |
| "logits/chosen": -2.3371288776397705, |
| "logits/rejected": -3.1724023818969727, |
| "logps/chosen": -142.37249755859375, |
| "logps/rejected": -257.1512451171875, |
| "loss": 0.0519, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 3.159780263900757, |
| "rewards/margins": 11.788749694824219, |
| "rewards/rejected": -8.627890586853027, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.663333333333333e-06, |
| "logits/chosen": -2.136476993560791, |
| "logits/rejected": -3.0350780487060547, |
| "logps/chosen": -141.83624267578125, |
| "logps/rejected": -258.1824951171875, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.5560545921325684, |
| "rewards/margins": 11.789999961853027, |
| "rewards/rejected": -9.23446273803711, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.001434326171875, |
| "learning_rate": 4.83e-06, |
| "logits/chosen": -2.327712297439575, |
| "logits/rejected": -3.039235830307007, |
| "logps/chosen": -151.52749633789062, |
| "logps/rejected": -260.1824951171875, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 1.725927710533142, |
| "rewards/margins": 11.106093406677246, |
| "rewards/rejected": -9.376816749572754, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.000873565673828125, |
| "learning_rate": 4.9966666666666665e-06, |
| "logits/chosen": -2.236884832382202, |
| "logits/rejected": -2.9620020389556885, |
| "logps/chosen": -152.15249633789062, |
| "logps/rejected": -266.6524963378906, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 1.9792224168777466, |
| "rewards/margins": 11.753125190734863, |
| "rewards/rejected": -9.775390625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.99983747144442e-06, |
| "logits/chosen": -2.1749095916748047, |
| "logits/rejected": -3.0108959674835205, |
| "logps/chosen": -158.8925018310547, |
| "logps/rejected": -274.04998779296875, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 1.5659692287445068, |
| "rewards/margins": 11.828437805175781, |
| "rewards/rejected": -10.266562461853027, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.00445556640625, |
| "learning_rate": 4.999336572604176e-06, |
| "logits/chosen": -2.3599512577056885, |
| "logits/rejected": -2.979990243911743, |
| "logps/chosen": -150.02499389648438, |
| "logps/rejected": -264.9649963378906, |
| "loss": 0.0934, |
| "rewards/accuracies": 0.8650000095367432, |
| "rewards/chosen": 1.7431994676589966, |
| "rewards/margins": 11.767656326293945, |
| "rewards/rejected": -10.0248441696167, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.07763671875, |
| "learning_rate": 4.998497303600191e-06, |
| "logits/chosen": -2.491284132003784, |
| "logits/rejected": -3.098569393157959, |
| "logps/chosen": -151.4949951171875, |
| "logps/rejected": -261.3475036621094, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 1.6169995069503784, |
| "rewards/margins": 11.283594131469727, |
| "rewards/rejected": -9.667499542236328, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.001434326171875, |
| "learning_rate": 4.997319778056057e-06, |
| "logits/chosen": -2.4677734375, |
| "logits/rejected": -3.164921760559082, |
| "logps/chosen": -160.6125030517578, |
| "logps/rejected": -291.8324890136719, |
| "loss": 0.0779, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 1.3262377977371216, |
| "rewards/margins": 13.2251558303833, |
| "rewards/rejected": -11.900468826293945, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.0142822265625, |
| "learning_rate": 4.995804155389881e-06, |
| "logits/chosen": -2.2496564388275146, |
| "logits/rejected": -2.910781145095825, |
| "logps/chosen": -163.3000030517578, |
| "logps/rejected": -282.75, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 1.067590355873108, |
| "rewards/margins": 12.181406021118164, |
| "rewards/rejected": -11.118515968322754, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.9939506407927115e-06, |
| "logits/chosen": -2.496284246444702, |
| "logits/rejected": -3.016840934753418, |
| "logps/chosen": -165.72999572753906, |
| "logps/rejected": -291.8699951171875, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 0.7725061178207397, |
| "rewards/margins": 12.811249732971191, |
| "rewards/rejected": -12.039375305175781, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.0125732421875, |
| "learning_rate": 4.991759485200754e-06, |
| "logits/chosen": -2.5551586151123047, |
| "logits/rejected": -2.9110498428344727, |
| "logps/chosen": -162.89500427246094, |
| "logps/rejected": -287.5174865722656, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 0.4655786156654358, |
| "rewards/margins": 12.595937728881836, |
| "rewards/rejected": -12.12906265258789, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.989230985261403e-06, |
| "logits/chosen": -2.6131153106689453, |
| "logits/rejected": -2.968681573867798, |
| "logps/chosen": -168.32749938964844, |
| "logps/rejected": -289.5, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 0.41573241353034973, |
| "rewards/margins": 12.315781593322754, |
| "rewards/rejected": -11.899062156677246, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.0218505859375, |
| "learning_rate": 4.986365483293072e-06, |
| "logits/chosen": -2.2319605350494385, |
| "logits/rejected": -2.6794629096984863, |
| "logps/chosen": -165.3937530517578, |
| "logps/rejected": -280.6925048828125, |
| "loss": 0.0832, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 0.6209277510643005, |
| "rewards/margins": 11.815312385559082, |
| "rewards/rejected": -11.191679954528809, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.9831633672388605e-06, |
| "logits/chosen": -2.148073673248291, |
| "logits/rejected": -2.8818554878234863, |
| "logps/chosen": -156.14500427246094, |
| "logps/rejected": -290.29998779296875, |
| "loss": 0.0674, |
| "rewards/accuracies": 0.9024999737739563, |
| "rewards/chosen": 1.61712646484375, |
| "rewards/margins": 13.622812271118164, |
| "rewards/rejected": -12.008906364440918, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.00494384765625, |
| "learning_rate": 4.979625070614023e-06, |
| "logits/chosen": -1.928134799003601, |
| "logits/rejected": -2.771103620529175, |
| "logps/chosen": -149.2050018310547, |
| "logps/rejected": -288.3500061035156, |
| "loss": 0.0674, |
| "rewards/accuracies": 0.9024999737739563, |
| "rewards/chosen": 2.3412108421325684, |
| "rewards/margins": 14.15218734741211, |
| "rewards/rejected": -11.807969093322754, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 4.975751072447283e-06, |
| "logits/chosen": -1.7242242097854614, |
| "logits/rejected": -2.762929677963257, |
| "logps/chosen": -138.24374389648438, |
| "logps/rejected": -269.7225036621094, |
| "loss": 0.0622, |
| "rewards/accuracies": 0.9100000262260437, |
| "rewards/chosen": 3.4145116806030273, |
| "rewards/margins": 13.523906707763672, |
| "rewards/rejected": -10.108515739440918, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.9715418972159794e-06, |
| "logits/chosen": -1.8216897249221802, |
| "logits/rejected": -2.836932420730591, |
| "logps/chosen": -136.2324981689453, |
| "logps/rejected": -263.7225036621094, |
| "loss": 0.102, |
| "rewards/accuracies": 0.8525000214576721, |
| "rewards/chosen": 3.042714834213257, |
| "rewards/margins": 12.95718765258789, |
| "rewards/rejected": -9.909453392028809, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.000362396240234375, |
| "learning_rate": 4.96699811477506e-06, |
| "logits/chosen": -2.153923273086548, |
| "logits/rejected": -3.0781617164611816, |
| "logps/chosen": -139.92250061035156, |
| "logps/rejected": -269.2449951171875, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.6978321075439453, |
| "rewards/margins": 13.111719131469727, |
| "rewards/rejected": -10.4213285446167, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.00933837890625, |
| "learning_rate": 4.962120340279933e-06, |
| "logits/chosen": -2.206913948059082, |
| "logits/rejected": -3.013432502746582, |
| "logps/chosen": -149.11500549316406, |
| "logps/rejected": -284.74749755859375, |
| "loss": 0.0778, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 2.5205469131469727, |
| "rewards/margins": 13.883281707763672, |
| "rewards/rejected": -11.363203048706055, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.007232666015625, |
| "learning_rate": 4.956909234103184e-06, |
| "logits/chosen": -2.232741594314575, |
| "logits/rejected": -2.846698045730591, |
| "logps/chosen": -147.88999938964844, |
| "logps/rejected": -290.8949890136719, |
| "loss": 0.0691, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 2.5749120712280273, |
| "rewards/margins": 14.527656555175781, |
| "rewards/rejected": -11.950780868530273, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.6015625, |
| "learning_rate": 4.951365501745172e-06, |
| "logits/chosen": -2.397939443588257, |
| "logits/rejected": -2.8929004669189453, |
| "logps/chosen": -144.75750732421875, |
| "logps/rejected": -275.7850036621094, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 2.490410089492798, |
| "rewards/margins": 13.264843940734863, |
| "rewards/rejected": -10.7734375, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.02099609375, |
| "learning_rate": 4.945489893738518e-06, |
| "logits/chosen": -2.36920166015625, |
| "logits/rejected": -2.889394521713257, |
| "logps/chosen": -136.7274932861328, |
| "logps/rejected": -272.572509765625, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.901992082595825, |
| "rewards/margins": 13.673593521118164, |
| "rewards/rejected": -10.7650785446167, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.007110595703125, |
| "learning_rate": 4.93928320554649e-06, |
| "logits/chosen": -1.9525684118270874, |
| "logits/rejected": -2.8195831775665283, |
| "logps/chosen": -135.28875732421875, |
| "logps/rejected": -266.6099853515625, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 3.462538957595825, |
| "rewards/margins": 13.346015930175781, |
| "rewards/rejected": -9.8804292678833, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.932746277455317e-06, |
| "logits/chosen": -1.662766456604004, |
| "logits/rejected": -2.691631555557251, |
| "logps/chosen": -133.22000122070312, |
| "logps/rejected": -272.822509765625, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 3.5004687309265137, |
| "rewards/margins": 14.248437881469727, |
| "rewards/rejected": -10.7514066696167, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.00982666015625, |
| "learning_rate": 4.9258799944604215e-06, |
| "logits/chosen": -1.7904162406921387, |
| "logits/rejected": -2.6735968589782715, |
| "logps/chosen": -132.4512481689453, |
| "logps/rejected": -281.63751220703125, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 3.9552342891693115, |
| "rewards/margins": 15.276874542236328, |
| "rewards/rejected": -11.324609756469727, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.013916015625, |
| "learning_rate": 4.918685286146611e-06, |
| "logits/chosen": -1.7311677932739258, |
| "logits/rejected": -2.691605567932129, |
| "logps/chosen": -132.29249572753906, |
| "logps/rejected": -285.6025085449219, |
| "loss": 0.0834, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 3.967148542404175, |
| "rewards/margins": 15.4975004196167, |
| "rewards/rejected": -11.534062385559082, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.005035400390625, |
| "learning_rate": 4.911163126562218e-06, |
| "logits/chosen": -1.7594763040542603, |
| "logits/rejected": -2.734794855117798, |
| "logps/chosen": -134.93499755859375, |
| "logps/rejected": -289.4525146484375, |
| "loss": 0.0933, |
| "rewards/accuracies": 0.8650000095367432, |
| "rewards/chosen": 3.4932031631469727, |
| "rewards/margins": 15.765155792236328, |
| "rewards/rejected": -12.267890930175781, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.00014495849609375, |
| "learning_rate": 4.903314534087243e-06, |
| "logits/chosen": -1.64252507686615, |
| "logits/rejected": -2.694523811340332, |
| "logps/chosen": -137.2375030517578, |
| "logps/rejected": -277.3800048828125, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 3.0264551639556885, |
| "rewards/margins": 14.148124694824219, |
| "rewards/rejected": -11.121874809265137, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.73046875, |
| "learning_rate": 4.895140571295469e-06, |
| "logits/chosen": -1.6052197217941284, |
| "logits/rejected": -2.6878418922424316, |
| "logps/chosen": -130.99374389648438, |
| "logps/rejected": -274.2774963378906, |
| "loss": 0.0933, |
| "rewards/accuracies": 0.8650000095367432, |
| "rewards/chosen": 3.450859308242798, |
| "rewards/margins": 14.58187484741211, |
| "rewards/rejected": -11.129921913146973, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.001068115234375, |
| "learning_rate": 4.886642344810612e-06, |
| "logits/chosen": -2.1891088485717773, |
| "logits/rejected": -2.992116689682007, |
| "logps/chosen": -131.99749755859375, |
| "logps/rejected": -261.6474914550781, |
| "loss": 0.1072, |
| "rewards/accuracies": 0.8450000286102295, |
| "rewards/chosen": 3.183906316757202, |
| "rewards/margins": 13.224843978881836, |
| "rewards/rejected": -10.036718368530273, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.0004825592041015625, |
| "learning_rate": 4.877821005156504e-06, |
| "logits/chosen": -2.3619725704193115, |
| "logits/rejected": -3.226367235183716, |
| "logps/chosen": -138.77000427246094, |
| "logps/rejected": -263.8999938964844, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 3.243339776992798, |
| "rewards/margins": 12.722031593322754, |
| "rewards/rejected": -9.478281021118164, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.0098876953125, |
| "learning_rate": 4.868677746601325e-06, |
| "logits/chosen": -2.1765477657318115, |
| "logits/rejected": -3.075366258621216, |
| "logps/chosen": -134.6649932861328, |
| "logps/rejected": -251.0850067138672, |
| "loss": 0.102, |
| "rewards/accuracies": 0.8525000214576721, |
| "rewards/chosen": 3.067031145095825, |
| "rewards/margins": 11.9115629196167, |
| "rewards/rejected": -8.846875190734863, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.859213806995924e-06, |
| "logits/chosen": -2.022216796875, |
| "logits/rejected": -3.042172908782959, |
| "logps/chosen": -135.00750732421875, |
| "logps/rejected": -262.7250061035156, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 3.4410157203674316, |
| "rewards/margins": 13.021562576293945, |
| "rewards/rejected": -9.582109451293945, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.0026092529296875, |
| "learning_rate": 4.849430467606228e-06, |
| "logits/chosen": -1.728432059288025, |
| "logits/rejected": -2.991831064224243, |
| "logps/chosen": -129.99374389648438, |
| "logps/rejected": -245.56500244140625, |
| "loss": 0.1003, |
| "rewards/accuracies": 0.8550000190734863, |
| "rewards/chosen": 3.511894464492798, |
| "rewards/margins": 11.818437576293945, |
| "rewards/rejected": -8.303203582763672, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.00012111663818359375, |
| "learning_rate": 4.839329052939784e-06, |
| "logits/chosen": -1.3972948789596558, |
| "logits/rejected": -2.9753711223602295, |
| "logps/chosen": -133.10374450683594, |
| "logps/rejected": -260.8924865722656, |
| "loss": 0.0778, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 3.828476667404175, |
| "rewards/margins": 12.948124885559082, |
| "rewards/rejected": -9.120469093322754, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.53515625, |
| "learning_rate": 4.82891093056644e-06, |
| "logits/chosen": -1.142480492591858, |
| "logits/rejected": -2.7538769245147705, |
| "logps/chosen": -126.52874755859375, |
| "logps/rejected": -251.2100067138672, |
| "loss": 0.0985, |
| "rewards/accuracies": 0.8575000166893005, |
| "rewards/chosen": 4.20089864730835, |
| "rewards/margins": 12.569218635559082, |
| "rewards/rejected": -8.369375228881836, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.0034332275390625, |
| "learning_rate": 4.818177510933194e-06, |
| "logits/chosen": -1.1324691772460938, |
| "logits/rejected": -2.7247815132141113, |
| "logps/chosen": -128.28125, |
| "logps/rejected": -250.6425018310547, |
| "loss": 0.0933, |
| "rewards/accuracies": 0.8650000095367432, |
| "rewards/chosen": 4.047187328338623, |
| "rewards/margins": 12.607500076293945, |
| "rewards/rejected": -8.559687614440918, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.015380859375, |
| "learning_rate": 4.807130247173252e-06, |
| "logits/chosen": -0.9328582882881165, |
| "logits/rejected": -2.533745050430298, |
| "logps/chosen": -124.90499877929688, |
| "logps/rejected": -245.25250244140625, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 4.320508003234863, |
| "rewards/margins": 12.207812309265137, |
| "rewards/rejected": -7.890781402587891, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.546875, |
| "learning_rate": 4.795770634909287e-06, |
| "logits/chosen": -0.9098020792007446, |
| "logits/rejected": -2.5764403343200684, |
| "logps/chosen": -127.57749938964844, |
| "logps/rejected": -257.6549987792969, |
| "loss": 0.0657, |
| "rewards/accuracies": 0.9049999713897705, |
| "rewards/chosen": 4.306952953338623, |
| "rewards/margins": 13.306875228881836, |
| "rewards/rejected": -8.999530792236328, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.00775146484375, |
| "learning_rate": 4.784100212050959e-06, |
| "logits/chosen": -1.165554165840149, |
| "logits/rejected": -2.720310688018799, |
| "logps/chosen": -135.34750366210938, |
| "logps/rejected": -268.3550109863281, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 3.560781240463257, |
| "rewards/margins": 13.581093788146973, |
| "rewards/rejected": -10.021249771118164, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.67578125, |
| "learning_rate": 4.772120558586711e-06, |
| "logits/chosen": -1.4726300239562988, |
| "logits/rejected": -2.9082860946655273, |
| "logps/chosen": -136.72500610351562, |
| "logps/rejected": -266.2174987792969, |
| "loss": 0.0864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 3.1438281536102295, |
| "rewards/margins": 13.237656593322754, |
| "rewards/rejected": -10.093280792236328, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.000457763671875, |
| "learning_rate": 4.759833296369855e-06, |
| "logits/chosen": -1.7112644910812378, |
| "logits/rejected": -2.9018359184265137, |
| "logps/chosen": -144.86500549316406, |
| "logps/rejected": -274.8800048828125, |
| "loss": 0.0864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 2.424466609954834, |
| "rewards/margins": 13.268437385559082, |
| "rewards/rejected": -10.842421531677246, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.00023937225341796875, |
| "learning_rate": 4.747240088899007e-06, |
| "logits/chosen": -1.8725781440734863, |
| "logits/rejected": -2.871826171875, |
| "logps/chosen": -145.64500427246094, |
| "logps/rejected": -284.7799987792969, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 2.372001886367798, |
| "rewards/margins": 13.883749961853027, |
| "rewards/rejected": -11.511327743530273, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.0089111328125, |
| "learning_rate": 4.734342641092873e-06, |
| "logits/chosen": -1.76885986328125, |
| "logits/rejected": -2.764961004257202, |
| "logps/chosen": -133.86749267578125, |
| "logps/rejected": -264.427490234375, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 3.250908136367798, |
| "rewards/margins": 13.281562805175781, |
| "rewards/rejected": -10.029687881469727, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.009765625, |
| "learning_rate": 4.72114269905943e-06, |
| "logits/chosen": -1.7043505907058716, |
| "logits/rejected": -2.7690234184265137, |
| "logps/chosen": -130.1699981689453, |
| "logps/rejected": -268.5350036621094, |
| "loss": 0.0761, |
| "rewards/accuracies": 0.8899999856948853, |
| "rewards/chosen": 3.886406183242798, |
| "rewards/margins": 13.834218978881836, |
| "rewards/rejected": -9.946484565734863, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.004180908203125, |
| "learning_rate": 4.70764204985953e-06, |
| "logits/chosen": -1.7190561294555664, |
| "logits/rejected": -2.7930660247802734, |
| "logps/chosen": -132.5762481689453, |
| "logps/rejected": -261.9324951171875, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 3.5982422828674316, |
| "rewards/margins": 13.246718406677246, |
| "rewards/rejected": -9.651249885559082, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.3984375, |
| "learning_rate": 4.693842521264963e-06, |
| "logits/chosen": -1.6216427087783813, |
| "logits/rejected": -2.808516263961792, |
| "logps/chosen": -126.45999908447266, |
| "logps/rejected": -260.43499755859375, |
| "loss": 0.0968, |
| "rewards/accuracies": 0.8600000143051147, |
| "rewards/chosen": 3.8330078125, |
| "rewards/margins": 13.640625, |
| "rewards/rejected": -9.81070327758789, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.0030670166015625, |
| "learning_rate": 4.679745981511005e-06, |
| "logits/chosen": -1.497025728225708, |
| "logits/rejected": -2.7889842987060547, |
| "logps/chosen": -127.35624694824219, |
| "logps/rejected": -263.0849914550781, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 4.245625019073486, |
| "rewards/margins": 13.9360933303833, |
| "rewards/rejected": -9.694062232971191, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.4609375, |
| "learning_rate": 4.665354339043487e-06, |
| "logits/chosen": -1.61189603805542, |
| "logits/rejected": -2.818247079849243, |
| "logps/chosen": -125.53874969482422, |
| "logps/rejected": -262.56500244140625, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 4.035273551940918, |
| "rewards/margins": 13.862030982971191, |
| "rewards/rejected": -9.828046798706055, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.005584716796875, |
| "learning_rate": 4.650669542260426e-06, |
| "logits/chosen": -1.8283697366714478, |
| "logits/rejected": -3.02060604095459, |
| "logps/chosen": -132.30624389648438, |
| "logps/rejected": -265.7875061035156, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 3.852304697036743, |
| "rewards/margins": 13.544843673706055, |
| "rewards/rejected": -9.695077896118164, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.006256103515625, |
| "learning_rate": 4.635693579248238e-06, |
| "logits/chosen": -1.77850341796875, |
| "logits/rejected": -2.859311580657959, |
| "logps/chosen": -131.875, |
| "logps/rejected": -267.4624938964844, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 3.6142187118530273, |
| "rewards/margins": 13.896875381469727, |
| "rewards/rejected": -10.281562805175781, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.0023345947265625, |
| "learning_rate": 4.620428477512588e-06, |
| "logits/chosen": -1.6927603483200073, |
| "logits/rejected": -2.8547582626342773, |
| "logps/chosen": -132.7550048828125, |
| "logps/rejected": -273.0150146484375, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 3.655820369720459, |
| "rewards/margins": 14.208906173706055, |
| "rewards/rejected": -10.555000305175781, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.604876303703892e-06, |
| "logits/chosen": -1.5046154260635376, |
| "logits/rejected": -2.71980357170105, |
| "logps/chosen": -127.0512466430664, |
| "logps/rejected": -260.4525146484375, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 3.969980478286743, |
| "rewards/margins": 13.666093826293945, |
| "rewards/rejected": -9.696015357971191, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.5890391633375345e-06, |
| "logits/chosen": -1.6800073385238647, |
| "logits/rejected": -2.810532331466675, |
| "logps/chosen": -141.1374969482422, |
| "logps/rejected": -285.43499755859375, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 3.1419239044189453, |
| "rewards/margins": 14.576562881469727, |
| "rewards/rejected": -11.4341402053833, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.572919200508805e-06, |
| "logits/chosen": -1.7100884914398193, |
| "logits/rejected": -2.719179630279541, |
| "logps/chosen": -143.1024932861328, |
| "logps/rejected": -274.3575134277344, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.4612793922424316, |
| "rewards/margins": 13.33968734741211, |
| "rewards/rejected": -10.880234718322754, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.78515625, |
| "learning_rate": 4.556518597602633e-06, |
| "logits/chosen": -1.8037463426589966, |
| "logits/rejected": -2.8130078315734863, |
| "logps/chosen": -149.1824951171875, |
| "logps/rejected": -280.9200134277344, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 2.325624942779541, |
| "rewards/margins": 13.437812805175781, |
| "rewards/rejected": -11.111093521118164, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.00099945068359375, |
| "learning_rate": 4.539839574998117e-06, |
| "logits/chosen": -1.8497778177261353, |
| "logits/rejected": -2.873305559158325, |
| "logps/chosen": -151.7624969482422, |
| "logps/rejected": -289.9024963378906, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 2.024599552154541, |
| "rewards/margins": 14.141249656677246, |
| "rewards/rejected": -12.12093734741211, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 5.90625, |
| "learning_rate": 4.522884390767928e-06, |
| "logits/chosen": -2.027651309967041, |
| "logits/rejected": -2.663203239440918, |
| "logps/chosen": -154.8524932861328, |
| "logps/rejected": -288.4624938964844, |
| "loss": 0.1054, |
| "rewards/accuracies": 0.8475000262260437, |
| "rewards/chosen": 0.9060644507408142, |
| "rewards/margins": 13.541406631469727, |
| "rewards/rejected": -12.634062767028809, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.671875, |
| "learning_rate": 4.5056553403726014e-06, |
| "logits/chosen": -1.9349523782730103, |
| "logits/rejected": -2.789375066757202, |
| "logps/chosen": -148.86500549316406, |
| "logps/rejected": -268.6025085449219, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 2.1701366901397705, |
| "rewards/margins": 12.2095308303833, |
| "rewards/rejected": -10.041191101074219, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 5.78125, |
| "learning_rate": 4.488154756349765e-06, |
| "logits/chosen": -1.8653271198272705, |
| "logits/rejected": -2.8621387481689453, |
| "logps/chosen": -143.14625549316406, |
| "logps/rejected": -274.0362548828125, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.540623664855957, |
| "rewards/margins": 13.264374732971191, |
| "rewards/rejected": -10.722421646118164, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.65234375, |
| "learning_rate": 4.470385007998354e-06, |
| "logits/chosen": -2.208327054977417, |
| "logits/rejected": -3.1323602199554443, |
| "logps/chosen": -146.74249267578125, |
| "logps/rejected": -301.1075134277344, |
| "loss": 0.0968, |
| "rewards/accuracies": 0.8600000143051147, |
| "rewards/chosen": 2.182009220123291, |
| "rewards/margins": 15.532031059265137, |
| "rewards/rejected": -13.352360725402832, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.93359375, |
| "learning_rate": 4.452348501057847e-06, |
| "logits/chosen": -2.0818140506744385, |
| "logits/rejected": -3.202207088470459, |
| "logps/chosen": -157.0050048828125, |
| "logps/rejected": -315.1000061035156, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 1.374355435371399, |
| "rewards/margins": 15.971718788146973, |
| "rewards/rejected": -14.5912504196167, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.0002765655517578125, |
| "learning_rate": 4.434047677382563e-06, |
| "logits/chosen": -1.5668798685073853, |
| "logits/rejected": -2.893112897872925, |
| "logps/chosen": -147.7725067138672, |
| "logps/rejected": -312.5950012207031, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 1.8955810070037842, |
| "rewards/margins": 16.66828155517578, |
| "rewards/rejected": -14.76968765258789, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 7.724761962890625e-05, |
| "learning_rate": 4.415485014611076e-06, |
| "logits/chosen": -1.6306848526000977, |
| "logits/rejected": -3.0001611709594727, |
| "logps/chosen": -143.4774932861328, |
| "logps/rejected": -301.7674865722656, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.233813524246216, |
| "rewards/margins": 16.112031936645508, |
| "rewards/rejected": -13.87906265258789, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.396663025830785e-06, |
| "logits/chosen": -1.5060467720031738, |
| "logits/rejected": -3.0440375804901123, |
| "logps/chosen": -146.45249938964844, |
| "logps/rejected": -308.010009765625, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.640126943588257, |
| "rewards/margins": 16.458906173706055, |
| "rewards/rejected": -13.8149995803833, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.00164031982421875, |
| "learning_rate": 4.377584259237676e-06, |
| "logits/chosen": -1.5768399238586426, |
| "logits/rejected": -3.0472412109375, |
| "logps/chosen": -144.15249633789062, |
| "logps/rejected": -300.93499755859375, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.490654230117798, |
| "rewards/margins": 16.099843978881836, |
| "rewards/rejected": -13.607500076293945, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.012451171875, |
| "learning_rate": 4.358251297791342e-06, |
| "logits/chosen": -1.487817406654358, |
| "logits/rejected": -3.055607795715332, |
| "logps/chosen": -156.3074951171875, |
| "logps/rejected": -315.9574890136719, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 1.7694629430770874, |
| "rewards/margins": 16.351093292236328, |
| "rewards/rejected": -14.579843521118164, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.8359375, |
| "learning_rate": 4.338666758865291e-06, |
| "logits/chosen": -1.5760504007339478, |
| "logits/rejected": -3.079218864440918, |
| "logps/chosen": -161.78250122070312, |
| "logps/rejected": -324.3299865722656, |
| "loss": 0.0778, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.9159045219421387, |
| "rewards/margins": 16.36734390258789, |
| "rewards/rejected": -15.450937271118164, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.000255584716796875, |
| "learning_rate": 4.318833293892593e-06, |
| "logits/chosen": -1.5287631750106812, |
| "logits/rejected": -2.8774261474609375, |
| "logps/chosen": -165.83999633789062, |
| "logps/rejected": -315.05499267578125, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 0.16832397878170013, |
| "rewards/margins": 15.089219093322754, |
| "rewards/rejected": -14.9165620803833, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.82421875, |
| "learning_rate": 4.2987535880069194e-06, |
| "logits/chosen": -1.7194920778274536, |
| "logits/rejected": -3.0331249237060547, |
| "logps/chosen": -173.74000549316406, |
| "logps/rejected": -324.07501220703125, |
| "loss": 0.0864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.08797118812799454, |
| "rewards/margins": 15.2670316696167, |
| "rewards/rejected": -15.170312881469727, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.68359375, |
| "learning_rate": 4.278430359679022e-06, |
| "logits/chosen": -1.6618307828903198, |
| "logits/rejected": -2.7870311737060547, |
| "logps/chosen": -171.0500030517578, |
| "logps/rejected": -332.36248779296875, |
| "loss": 0.0761, |
| "rewards/accuracies": 0.8899999856948853, |
| "rewards/chosen": -0.015126952901482582, |
| "rewards/margins": 16.22640609741211, |
| "rewards/rejected": -16.239219665527344, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.0081787109375, |
| "learning_rate": 4.2578663603486916e-06, |
| "logits/chosen": -1.4954382181167603, |
| "logits/rejected": -2.641606330871582, |
| "logps/chosen": -165.57749938964844, |
| "logps/rejected": -321.75, |
| "loss": 0.1158, |
| "rewards/accuracies": 0.8324999809265137, |
| "rewards/chosen": -0.45833495259284973, |
| "rewards/margins": 15.786406517028809, |
| "rewards/rejected": -16.246719360351562, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.009765625, |
| "learning_rate": 4.23706437405226e-06, |
| "logits/chosen": -1.6313989162445068, |
| "logits/rejected": -2.8603711128234863, |
| "logps/chosen": -175.71499633789062, |
| "logps/rejected": -339.1875, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 0.04604126140475273, |
| "rewards/margins": 16.614530563354492, |
| "rewards/rejected": -16.570938110351562, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0008697509765625, |
| "learning_rate": 4.21602721704568e-06, |
| "logits/chosen": -1.7376703023910522, |
| "logits/rejected": -2.959348678588867, |
| "logps/chosen": -163.42250061035156, |
| "logps/rejected": -324.82000732421875, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 0.5958447456359863, |
| "rewards/margins": 16.27703094482422, |
| "rewards/rejected": -15.682969093322754, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.00130462646484375, |
| "learning_rate": 4.194757737423261e-06, |
| "logits/chosen": -1.9893441200256348, |
| "logits/rejected": -3.1228907108306885, |
| "logps/chosen": -161.6875, |
| "logps/rejected": -316.3900146484375, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 0.8289526104927063, |
| "rewards/margins": 15.733905792236328, |
| "rewards/rejected": -14.90640640258789, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.003387451171875, |
| "learning_rate": 4.1732588147320705e-06, |
| "logits/chosen": -1.9210351705551147, |
| "logits/rejected": -3.0717577934265137, |
| "logps/chosen": -151.19000244140625, |
| "logps/rejected": -294.67999267578125, |
| "loss": 0.1054, |
| "rewards/accuracies": 0.8475000262260437, |
| "rewards/chosen": 0.8471630811691284, |
| "rewards/margins": 14.576250076293945, |
| "rewards/rejected": -13.729218482971191, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.1515333595820975e-06, |
| "logits/chosen": -1.982885718345642, |
| "logits/rejected": -3.0044140815734863, |
| "logps/chosen": -163.9550018310547, |
| "logps/rejected": -320.55999755859375, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 0.7394506931304932, |
| "rewards/margins": 15.829375267028809, |
| "rewards/rejected": -15.094843864440918, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 9.393692016601562e-05, |
| "learning_rate": 4.129584313252198e-06, |
| "logits/chosen": -1.7410473823547363, |
| "logits/rejected": -2.6847314834594727, |
| "logps/chosen": -158.25999450683594, |
| "logps/rejected": -317.07000732421875, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 1.0339257717132568, |
| "rewards/margins": 15.897500038146973, |
| "rewards/rejected": -14.865156173706055, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.107414647291893e-06, |
| "logits/chosen": -1.84354829788208, |
| "logits/rejected": -2.650254011154175, |
| "logps/chosen": -160.79750061035156, |
| "logps/rejected": -312.51251220703125, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 1.0474340915679932, |
| "rewards/margins": 15.432499885559082, |
| "rewards/rejected": -14.383749961853027, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.0002899169921875, |
| "learning_rate": 4.085027363119076e-06, |
| "logits/chosen": -1.7570642232894897, |
| "logits/rejected": -2.6784496307373047, |
| "logps/chosen": -159.75750732421875, |
| "logps/rejected": -307.239990234375, |
| "loss": 0.0985, |
| "rewards/accuracies": 0.8575000166893005, |
| "rewards/chosen": 0.8954675197601318, |
| "rewards/margins": 15.072968482971191, |
| "rewards/rejected": -14.178437232971191, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.0067138671875, |
| "learning_rate": 4.062425491613656e-06, |
| "logits/chosen": -1.6061410903930664, |
| "logits/rejected": -2.5976123809814453, |
| "logps/chosen": -157.30250549316406, |
| "logps/rejected": -299.4425048828125, |
| "loss": 0.0968, |
| "rewards/accuracies": 0.8600000143051147, |
| "rewards/chosen": 0.9188379049301147, |
| "rewards/margins": 14.39859390258789, |
| "rewards/rejected": -13.4799222946167, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 2.578125, |
| "learning_rate": 4.039612092707236e-06, |
| "logits/chosen": -1.7808789014816284, |
| "logits/rejected": -2.769533634185791, |
| "logps/chosen": -161.10499572753906, |
| "logps/rejected": -301.5849914550781, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 0.6963549852371216, |
| "rewards/margins": 14.181875228881836, |
| "rewards/rejected": -13.483905792236328, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 1.0, |
| "learning_rate": 4.016590254968842e-06, |
| "logits/chosen": -1.786810278892517, |
| "logits/rejected": -2.6385669708251953, |
| "logps/chosen": -168.65750122070312, |
| "logps/rejected": -320.4674987792969, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 0.3265624940395355, |
| "rewards/margins": 15.369843482971191, |
| "rewards/rejected": -15.044530868530273, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.000202178955078125, |
| "learning_rate": 3.993363095186781e-06, |
| "logits/chosen": -1.775051236152649, |
| "logits/rejected": -2.604965925216675, |
| "logps/chosen": -175.4250030517578, |
| "logps/rejected": -334.4700012207031, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": -0.4224267601966858, |
| "rewards/margins": 16.112499237060547, |
| "rewards/rejected": -16.530624389648438, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.0089111328125, |
| "learning_rate": 3.9699337579466765e-06, |
| "logits/chosen": -1.4332165718078613, |
| "logits/rejected": -2.4917151927948, |
| "logps/chosen": -174.8524932861328, |
| "logps/rejected": -330.2449951171875, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": -0.360107421875, |
| "rewards/margins": 15.817968368530273, |
| "rewards/rejected": -16.182968139648438, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.000827789306640625, |
| "learning_rate": 3.946305415205748e-06, |
| "logits/chosen": -1.3821977376937866, |
| "logits/rejected": -2.346054792404175, |
| "logps/chosen": -173.88250732421875, |
| "logps/rejected": -325.8175048828125, |
| "loss": 0.1003, |
| "rewards/accuracies": 0.8550000190734863, |
| "rewards/chosen": -0.9856836199760437, |
| "rewards/margins": 15.3203125, |
| "rewards/rejected": -16.30843734741211, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.922481265863371e-06, |
| "logits/chosen": -1.5632950067520142, |
| "logits/rejected": -2.638700008392334, |
| "logps/chosen": -176.65750122070312, |
| "logps/rejected": -339.3949890136719, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": -0.8098046779632568, |
| "rewards/margins": 16.346094131469727, |
| "rewards/rejected": -17.149063110351562, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.1400000000000001, |
| "grad_norm": 0.8984375, |
| "learning_rate": 3.898464535327997e-06, |
| "logits/chosen": -1.559891700744629, |
| "logits/rejected": -2.6719970703125, |
| "logps/chosen": -175.10499572753906, |
| "logps/rejected": -332.5799865722656, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": -0.6723046898841858, |
| "rewards/margins": 16.07062530517578, |
| "rewards/rejected": -16.74625015258789, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 1.52587890625e-05, |
| "learning_rate": 3.874258475080497e-06, |
| "logits/chosen": -1.848596215248108, |
| "logits/rejected": -2.9883642196655273, |
| "logps/chosen": -171.56500244140625, |
| "logps/rejected": -336.0675048828125, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": -0.05261474475264549, |
| "rewards/margins": 16.615468978881836, |
| "rewards/rejected": -16.658750534057617, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.0010528564453125, |
| "learning_rate": 3.849866362233947e-06, |
| "logits/chosen": -1.624019742012024, |
| "logits/rejected": -2.9648828506469727, |
| "logps/chosen": -168.875, |
| "logps/rejected": -331.17999267578125, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 0.45997437834739685, |
| "rewards/margins": 16.332500457763672, |
| "rewards/rejected": -15.879219055175781, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.001678466796875, |
| "learning_rate": 3.8252914990899695e-06, |
| "logits/chosen": -1.573280692100525, |
| "logits/rejected": -2.7242345809936523, |
| "logps/chosen": -169.05499267578125, |
| "logps/rejected": -322.07501220703125, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": -0.01105346716940403, |
| "rewards/margins": 15.53531265258789, |
| "rewards/rejected": -15.5521879196167, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.01416015625, |
| "learning_rate": 3.800537212691651e-06, |
| "logits/chosen": -1.745273470878601, |
| "logits/rejected": -2.6969921588897705, |
| "logps/chosen": -178.67250061035156, |
| "logps/rejected": -330.9800109863281, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": -0.30939698219299316, |
| "rewards/margins": 15.374062538146973, |
| "rewards/rejected": -15.685625076293945, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.000164031982421875, |
| "learning_rate": 3.775606854373115e-06, |
| "logits/chosen": -1.655666470527649, |
| "logits/rejected": -2.6190404891967773, |
| "logps/chosen": -174.1925048828125, |
| "logps/rejected": -326.572509765625, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 0.033031005412340164, |
| "rewards/margins": 15.436562538146973, |
| "rewards/rejected": -15.404999732971191, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.546875, |
| "learning_rate": 3.7505037993058046e-06, |
| "logits/chosen": -1.7215648889541626, |
| "logits/rejected": -2.621614933013916, |
| "logps/chosen": -175.375, |
| "logps/rejected": -334.135009765625, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": -0.3496679663658142, |
| "rewards/margins": 16.133594512939453, |
| "rewards/rejected": -16.4854679107666, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.7252314460415396e-06, |
| "logits/chosen": -1.5741479396820068, |
| "logits/rejected": -2.679941415786743, |
| "logps/chosen": -172.75250244140625, |
| "logps/rejected": -326.0625, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 0.0013964843237772584, |
| "rewards/margins": 15.612030982971191, |
| "rewards/rejected": -15.608750343322754, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 2.3245811462402344e-05, |
| "learning_rate": 3.6997932160524018e-06, |
| "logits/chosen": -1.6118944883346558, |
| "logits/rejected": -2.7812891006469727, |
| "logps/chosen": -174.97500610351562, |
| "logps/rejected": -325.45001220703125, |
| "loss": 0.0778, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.1942773461341858, |
| "rewards/margins": 15.537187576293945, |
| "rewards/rejected": -15.733750343322754, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 6.961822509765625e-05, |
| "learning_rate": 3.6741925532675297e-06, |
| "logits/chosen": -1.480682373046875, |
| "logits/rejected": -2.7474658489227295, |
| "logps/chosen": -165.65750122070312, |
| "logps/rejected": -317.760009765625, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 0.29445311427116394, |
| "rewards/margins": 15.585000038146973, |
| "rewards/rejected": -15.289531707763672, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.0042724609375, |
| "learning_rate": 3.648432923606862e-06, |
| "logits/chosen": -1.3990026712417603, |
| "logits/rejected": -2.915020704269409, |
| "logps/chosen": -164.90750122070312, |
| "logps/rejected": -323.4700012207031, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 0.8015918135643005, |
| "rewards/margins": 16.133438110351562, |
| "rewards/rejected": -15.328437805175781, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.00024127960205078125, |
| "learning_rate": 3.622517814511906e-06, |
| "logits/chosen": -1.320135474205017, |
| "logits/rejected": -2.860288143157959, |
| "logps/chosen": -163.79750061035156, |
| "logps/rejected": -321.0450134277344, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 0.6940441727638245, |
| "rewards/margins": 16.037656784057617, |
| "rewards/rejected": -15.3423433303833, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.5964507344735965e-06, |
| "logits/chosen": -1.176222324371338, |
| "logits/rejected": -2.6737987995147705, |
| "logps/chosen": -160.44500732421875, |
| "logps/rejected": -313.2349853515625, |
| "loss": 0.1003, |
| "rewards/accuracies": 0.8550000190734863, |
| "rewards/chosen": 0.5672607421875, |
| "rewards/margins": 15.430312156677246, |
| "rewards/rejected": -14.865625381469727, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.000461578369140625, |
| "learning_rate": 3.5702352125573015e-06, |
| "logits/chosen": -1.2549536228179932, |
| "logits/rejected": -2.675966739654541, |
| "logps/chosen": -159.0675048828125, |
| "logps/rejected": -311.6025085449219, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 0.6797509789466858, |
| "rewards/margins": 15.3959379196167, |
| "rewards/rejected": -14.717031478881836, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.0001392364501953125, |
| "learning_rate": 3.543874797925042e-06, |
| "logits/chosen": -1.5146979093551636, |
| "logits/rejected": -2.7764551639556885, |
| "logps/chosen": -170.92750549316406, |
| "logps/rejected": -323.7449951171875, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 0.3626416027545929, |
| "rewards/margins": 15.621562957763672, |
| "rewards/rejected": -15.259843826293945, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.0126953125, |
| "learning_rate": 3.5173730593549947e-06, |
| "logits/chosen": -1.8136059045791626, |
| "logits/rejected": -2.790639638900757, |
| "logps/chosen": -169.0, |
| "logps/rejected": -316.1000061035156, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 0.23876464366912842, |
| "rewards/margins": 14.961406707763672, |
| "rewards/rejected": -14.722187042236328, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.00799560546875, |
| "learning_rate": 3.4907335847583356e-06, |
| "logits/chosen": -1.9238842725753784, |
| "logits/rejected": -2.8575196266174316, |
| "logps/chosen": -170.00999450683594, |
| "logps/rejected": -319.3324890136719, |
| "loss": 0.1003, |
| "rewards/accuracies": 0.8550000190734863, |
| "rewards/chosen": -0.1691601574420929, |
| "rewards/margins": 15.154375076293945, |
| "rewards/rejected": -15.327031135559082, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.0074462890625, |
| "learning_rate": 3.463959980693492e-06, |
| "logits/chosen": -1.840087890625, |
| "logits/rejected": -2.790390729904175, |
| "logps/chosen": -169.75750732421875, |
| "logps/rejected": -326.3450012207031, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 0.2927929759025574, |
| "rewards/margins": 15.926562309265137, |
| "rewards/rejected": -15.626562118530273, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 6.341934204101562e-05, |
| "learning_rate": 3.4370558718778753e-06, |
| "logits/chosen": -1.810239315032959, |
| "logits/rejected": -2.8067736625671387, |
| "logps/chosen": -168.5050048828125, |
| "logps/rejected": -325.92498779296875, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 0.42659667134284973, |
| "rewards/margins": 16.05078125, |
| "rewards/rejected": -15.6264066696167, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.0002422332763671875, |
| "learning_rate": 3.4100249006971514e-06, |
| "logits/chosen": -2.036135196685791, |
| "logits/rejected": -2.8965821266174316, |
| "logps/chosen": -168.71499633789062, |
| "logps/rejected": -319.447509765625, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 0.32066404819488525, |
| "rewards/margins": 15.444375038146973, |
| "rewards/rejected": -15.124062538146973, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.00063323974609375, |
| "learning_rate": 3.3828707267121185e-06, |
| "logits/chosen": -1.9675488471984863, |
| "logits/rejected": -2.8052685260772705, |
| "logps/chosen": -165.78250122070312, |
| "logps/rejected": -315.56500244140625, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 0.36207762360572815, |
| "rewards/margins": 15.29671859741211, |
| "rewards/rejected": -14.934218406677246, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.355597026163264e-06, |
| "logits/chosen": -1.784468412399292, |
| "logits/rejected": -2.6825194358825684, |
| "logps/chosen": -162.88250732421875, |
| "logps/rejected": -311.5199890136719, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.07732421904802322, |
| "rewards/margins": 15.029531478881836, |
| "rewards/rejected": -14.949531555175781, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.8828125, |
| "learning_rate": 3.3282074914730577e-06, |
| "logits/chosen": -1.9187493324279785, |
| "logits/rejected": -2.836562395095825, |
| "logps/chosen": -158.41000366210938, |
| "logps/rejected": -310.2674865722656, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 1.508481502532959, |
| "rewards/margins": 15.624530792236328, |
| "rewards/rejected": -14.117656707763672, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.0068359375, |
| "learning_rate": 3.300705830746057e-06, |
| "logits/chosen": -1.7984619140625, |
| "logits/rejected": -2.8072216510772705, |
| "logps/chosen": -147.9375, |
| "logps/rejected": -300.1875, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 2.547187566757202, |
| "rewards/margins": 15.463281631469727, |
| "rewards/rejected": -12.914375305175781, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.01104736328125, |
| "learning_rate": 3.2730957672668917e-06, |
| "logits/chosen": -1.6440536975860596, |
| "logits/rejected": -2.777017593383789, |
| "logps/chosen": -143.3625030517578, |
| "logps/rejected": -295.2699890136719, |
| "loss": 0.0726, |
| "rewards/accuracies": 0.8949999809265137, |
| "rewards/chosen": 2.895751953125, |
| "rewards/margins": 15.431718826293945, |
| "rewards/rejected": -12.537031173706055, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.3900000000000001, |
| "grad_norm": 0.0013580322265625, |
| "learning_rate": 3.245381038996188e-06, |
| "logits/chosen": -1.747806429862976, |
| "logits/rejected": -2.857853889465332, |
| "logps/chosen": -139.8975067138672, |
| "logps/rejected": -288.42999267578125, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 3.0215137004852295, |
| "rewards/margins": 15.106406211853027, |
| "rewards/rejected": -12.087187767028809, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.0035247802734375, |
| "learning_rate": 3.2175653980645096e-06, |
| "logits/chosen": -1.6131787300109863, |
| "logits/rejected": -2.7717015743255615, |
| "logps/chosen": -134.77499389648438, |
| "logps/rejected": -281.7250061035156, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 3.087773323059082, |
| "rewards/margins": 14.971718788146973, |
| "rewards/rejected": -11.884687423706055, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.003173828125, |
| "learning_rate": 3.189652610264379e-06, |
| "logits/chosen": -1.6364532709121704, |
| "logits/rejected": -2.7611522674560547, |
| "logps/chosen": -148.1074981689453, |
| "logps/rejected": -292.260009765625, |
| "loss": 0.0985, |
| "rewards/accuracies": 0.8575000166893005, |
| "rewards/chosen": 2.152085065841675, |
| "rewards/margins": 14.622968673706055, |
| "rewards/rejected": -12.472969055175781, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.00909423828125, |
| "learning_rate": 3.1616464545404486e-06, |
| "logits/chosen": -1.6323034763336182, |
| "logits/rejected": -2.8021483421325684, |
| "logps/chosen": -152.21749877929688, |
| "logps/rejected": -307.4024963378906, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 2.286259651184082, |
| "rewards/margins": 15.906719207763672, |
| "rewards/rejected": -13.616406440734863, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.00014591217041015625, |
| "learning_rate": 3.133550722477896e-06, |
| "logits/chosen": -1.5869457721710205, |
| "logits/rejected": -2.831690788269043, |
| "logps/chosen": -151.6699981689453, |
| "logps/rejected": -312.9800109863281, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 2.021599054336548, |
| "rewards/margins": 16.44562530517578, |
| "rewards/rejected": -14.4232816696167, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.0057373046875, |
| "learning_rate": 3.105369217789099e-06, |
| "logits/chosen": -1.5832836627960205, |
| "logits/rejected": -2.826430559158325, |
| "logps/chosen": -147.8800048828125, |
| "logps/rejected": -299.2049865722656, |
| "loss": 0.102, |
| "rewards/accuracies": 0.8525000214576721, |
| "rewards/chosen": 1.6627343893051147, |
| "rewards/margins": 15.454687118530273, |
| "rewards/rejected": -13.7876558303833, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.077105755798675e-06, |
| "logits/chosen": -1.5947095155715942, |
| "logits/rejected": -2.803586483001709, |
| "logps/chosen": -149.30250549316406, |
| "logps/rejected": -300.635009765625, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 1.6690233945846558, |
| "rewards/margins": 15.402812957763672, |
| "rewards/rejected": -13.731562614440918, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.0003814697265625, |
| "learning_rate": 3.0487641629269515e-06, |
| "logits/chosen": -1.3740723133087158, |
| "logits/rejected": -2.788759708404541, |
| "logps/chosen": -149.9949951171875, |
| "logps/rejected": -307.3275146484375, |
| "loss": 0.0864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 1.7442187070846558, |
| "rewards/margins": 15.862968444824219, |
| "rewards/rejected": -14.121718406677246, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.6015625, |
| "learning_rate": 3.0203482761719226e-06, |
| "logits/chosen": -1.3477171659469604, |
| "logits/rejected": -2.810493230819702, |
| "logps/chosen": -152.57249450683594, |
| "logps/rejected": -303.9750061035156, |
| "loss": 0.1054, |
| "rewards/accuracies": 0.8475000262260437, |
| "rewards/chosen": 1.5104199647903442, |
| "rewards/margins": 15.333281517028809, |
| "rewards/rejected": -13.822187423706055, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.59375, |
| "learning_rate": 2.991861942589788e-06, |
| "logits/chosen": -1.4101388454437256, |
| "logits/rejected": -2.9320101737976074, |
| "logps/chosen": -151.7100067138672, |
| "logps/rejected": -306.79998779296875, |
| "loss": 0.0761, |
| "rewards/accuracies": 0.8899999856948853, |
| "rewards/chosen": 2.018501043319702, |
| "rewards/margins": 15.747187614440918, |
| "rewards/rejected": -13.726093292236328, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 4.76837158203125e-05, |
| "learning_rate": 2.9633090187741186e-06, |
| "logits/chosen": -1.3069872856140137, |
| "logits/rejected": -2.8863134384155273, |
| "logps/chosen": -145.5574951171875, |
| "logps/rejected": -290.4125061035156, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.1462109088897705, |
| "rewards/margins": 14.840781211853027, |
| "rewards/rejected": -12.689844131469727, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.0001659393310546875, |
| "learning_rate": 2.934693370333739e-06, |
| "logits/chosen": -1.3929543495178223, |
| "logits/rejected": -2.97314453125, |
| "logps/chosen": -147.5225067138672, |
| "logps/rejected": -305.05999755859375, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 2.6558544635772705, |
| "rewards/margins": 15.836562156677246, |
| "rewards/rejected": -13.185937881469727, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.9060188713693794e-06, |
| "logits/chosen": -1.3370373249053955, |
| "logits/rejected": -2.9380431175231934, |
| "logps/chosen": -141.3574981689453, |
| "logps/rejected": -294.44500732421875, |
| "loss": 0.0761, |
| "rewards/accuracies": 0.8899999856948853, |
| "rewards/chosen": 2.661679744720459, |
| "rewards/margins": 15.43765640258789, |
| "rewards/rejected": -12.771562576293945, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.78515625, |
| "learning_rate": 2.8772894039491938e-06, |
| "logits/chosen": -1.4408868551254272, |
| "logits/rejected": -2.962076425552368, |
| "logps/chosen": -142.13999938964844, |
| "logps/rejected": -295.55999755859375, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 2.679374933242798, |
| "rewards/margins": 15.471875190734863, |
| "rewards/rejected": -12.795391082763672, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.01397705078125, |
| "learning_rate": 2.848508857583183e-06, |
| "logits/chosen": -1.521622896194458, |
| "logits/rejected": -3.0157811641693115, |
| "logps/chosen": -142.84500122070312, |
| "logps/rejected": -294.93499755859375, |
| "loss": 0.0864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 2.692207098007202, |
| "rewards/margins": 15.400468826293945, |
| "rewards/rejected": -12.703906059265137, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.0037689208984375, |
| "learning_rate": 2.81968112869662e-06, |
| "logits/chosen": -1.334303379058838, |
| "logits/rejected": -2.9068212509155273, |
| "logps/chosen": -137.8925018310547, |
| "logps/rejected": -280.385009765625, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.869335889816284, |
| "rewards/margins": 14.509531021118164, |
| "rewards/rejected": -11.638280868530273, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.790810120102534e-06, |
| "logits/chosen": -1.453762173652649, |
| "logits/rejected": -2.9707226753234863, |
| "logps/chosen": -146.41000366210938, |
| "logps/rejected": -292.67498779296875, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 2.3908984661102295, |
| "rewards/margins": 14.83578109741211, |
| "rewards/rejected": -12.446874618530273, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.00040435791015625, |
| "learning_rate": 2.7618997404733365e-06, |
| "logits/chosen": -1.6041711568832397, |
| "logits/rejected": -2.9843311309814453, |
| "logps/chosen": -150.46749877929688, |
| "logps/rejected": -298.6700134277344, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 2.042372941970825, |
| "rewards/margins": 15.013437271118164, |
| "rewards/rejected": -12.966405868530273, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.5699999999999998, |
| "grad_norm": 0.0004024505615234375, |
| "learning_rate": 2.7329539038116453e-06, |
| "logits/chosen": -1.4521881341934204, |
| "logits/rejected": -2.926464796066284, |
| "logps/chosen": -150.1074981689453, |
| "logps/rejected": -298.0425109863281, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 2.3667969703674316, |
| "rewards/margins": 14.889687538146973, |
| "rewards/rejected": -12.527030944824219, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.7039765289203947e-06, |
| "logits/chosen": -1.412445068359375, |
| "logits/rejected": -2.8667969703674316, |
| "logps/chosen": -146.1300048828125, |
| "logps/rejected": -291.2850036621094, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 2.3307225704193115, |
| "rewards/margins": 14.735937118530273, |
| "rewards/rejected": -12.403437614440918, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.5899999999999999, |
| "grad_norm": 0.67578125, |
| "learning_rate": 2.6749715388722865e-06, |
| "logits/chosen": -1.2646269798278809, |
| "logits/rejected": -2.821697473526001, |
| "logps/chosen": -145.84750366210938, |
| "logps/rejected": -289.67498779296875, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 2.281503915786743, |
| "rewards/margins": 14.5806245803833, |
| "rewards/rejected": -12.300156593322754, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.0015411376953125, |
| "learning_rate": 2.6459428604786757e-06, |
| "logits/chosen": -1.311349630355835, |
| "logits/rejected": -2.7577929496765137, |
| "logps/chosen": -150.4425048828125, |
| "logps/rejected": -293.7149963378906, |
| "loss": 0.0985, |
| "rewards/accuracies": 0.8575000166893005, |
| "rewards/chosen": 1.655849575996399, |
| "rewards/margins": 14.475312232971191, |
| "rewards/rejected": -12.818437576293945, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.6099999999999999, |
| "grad_norm": 0.00147247314453125, |
| "learning_rate": 2.616894423757941e-06, |
| "logits/chosen": -1.4159927368164062, |
| "logits/rejected": -2.9021289348602295, |
| "logps/chosen": -151.3925018310547, |
| "logps/rejected": -296.8399963378906, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 2.0074121952056885, |
| "rewards/margins": 14.760781288146973, |
| "rewards/rejected": -12.754219055175781, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.000164031982421875, |
| "learning_rate": 2.587830161403419e-06, |
| "logits/chosen": -1.3917722702026367, |
| "logits/rejected": -2.887305974960327, |
| "logps/chosen": -152.03500366210938, |
| "logps/rejected": -288.260009765625, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 1.9093506336212158, |
| "rewards/margins": 13.984375, |
| "rewards/rejected": -12.0795316696167, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.5587540082509864e-06, |
| "logits/chosen": -1.3086663484573364, |
| "logits/rejected": -2.910728693008423, |
| "logps/chosen": -148.97250366210938, |
| "logps/rejected": -287.9200134277344, |
| "loss": 0.0795, |
| "rewards/accuracies": 0.8849999904632568, |
| "rewards/chosen": 2.129091739654541, |
| "rewards/margins": 14.16812515258789, |
| "rewards/rejected": -12.041093826293945, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.0361328125, |
| "learning_rate": 2.5296699007463434e-06, |
| "logits/chosen": -1.198205590248108, |
| "logits/rejected": -2.7841992378234863, |
| "logps/chosen": -147.5749969482422, |
| "logps/rejected": -290.5050048828125, |
| "loss": 0.0657, |
| "rewards/accuracies": 0.9049999713897705, |
| "rewards/chosen": 2.2004687786102295, |
| "rewards/margins": 14.422344207763672, |
| "rewards/rejected": -12.222969055175781, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.0004787445068359375, |
| "learning_rate": 2.500581776412081e-06, |
| "logits/chosen": -1.3178759813308716, |
| "logits/rejected": -2.763364315032959, |
| "logps/chosen": -153.17999267578125, |
| "logps/rejected": -296.3699951171875, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 1.7891894578933716, |
| "rewards/margins": 14.428750038146973, |
| "rewards/rejected": -12.639843940734863, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.6600000000000001, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.471493573314605e-06, |
| "logits/chosen": -1.273976445198059, |
| "logits/rejected": -2.638751745223999, |
| "logps/chosen": -147.61749267578125, |
| "logps/rejected": -281.2300109863281, |
| "loss": 0.1089, |
| "rewards/accuracies": 0.8424999713897705, |
| "rewards/chosen": 1.724277377128601, |
| "rewards/margins": 13.661250114440918, |
| "rewards/rejected": -11.94156265258789, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.442409229530985e-06, |
| "logits/chosen": -1.3456776142120361, |
| "logits/rejected": -2.6645333766937256, |
| "logps/chosen": -148.5449981689453, |
| "logps/rejected": -290.0400085449219, |
| "loss": 0.0968, |
| "rewards/accuracies": 0.8600000143051147, |
| "rewards/chosen": 1.7859375476837158, |
| "rewards/margins": 14.419530868530273, |
| "rewards/rejected": -12.630781173706055, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.4133326826158006e-06, |
| "logits/chosen": -1.452473759651184, |
| "logits/rejected": -2.719834089279175, |
| "logps/chosen": -151.13999938964844, |
| "logps/rejected": -297.2650146484375, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 1.8502148389816284, |
| "rewards/margins": 14.8579683303833, |
| "rewards/rejected": -13.008437156677246, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.0001697540283203125, |
| "learning_rate": 2.3842678690680612e-06, |
| "logits/chosen": -1.2888891696929932, |
| "logits/rejected": -2.6560449600219727, |
| "logps/chosen": -150.96499633789062, |
| "logps/rejected": -290.1199951171875, |
| "loss": 0.0968, |
| "rewards/accuracies": 0.8600000143051147, |
| "rewards/chosen": 1.7348560094833374, |
| "rewards/margins": 14.02734375, |
| "rewards/rejected": -12.290937423706055, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.00016498565673828125, |
| "learning_rate": 2.355218723798264e-06, |
| "logits/chosen": -1.3379980325698853, |
| "logits/rejected": -2.7865869998931885, |
| "logps/chosen": -153.50999450683594, |
| "logps/rejected": -295.2149963378906, |
| "loss": 0.0778, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 2.0467870235443115, |
| "rewards/margins": 14.391094207763672, |
| "rewards/rejected": -12.346562385559082, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 1.0, |
| "learning_rate": 2.326189179595676e-06, |
| "logits/chosen": -1.2621526718139648, |
| "logits/rejected": -2.6848363876342773, |
| "logps/chosen": -151.13999938964844, |
| "logps/rejected": -294.8949890136719, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 2.0117383003234863, |
| "rewards/margins": 14.526562690734863, |
| "rewards/rejected": -12.5053129196167, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.0203857421875, |
| "learning_rate": 2.297183166595889e-06, |
| "logits/chosen": -1.3091638088226318, |
| "logits/rejected": -2.7887303829193115, |
| "logps/chosen": -155.8625030517578, |
| "logps/rejected": -300.9750061035156, |
| "loss": 0.0588, |
| "rewards/accuracies": 0.9150000214576721, |
| "rewards/chosen": 2.133901357650757, |
| "rewards/margins": 14.717499732971191, |
| "rewards/rejected": -12.588281631469727, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 1.25, |
| "learning_rate": 2.26820461174875e-06, |
| "logits/chosen": -1.2939709424972534, |
| "logits/rejected": -2.6905393600463867, |
| "logps/chosen": -148.60499572753906, |
| "logps/rejected": -286.3699951171875, |
| "loss": 0.0933, |
| "rewards/accuracies": 0.8650000095367432, |
| "rewards/chosen": 1.790956974029541, |
| "rewards/margins": 13.9829683303833, |
| "rewards/rejected": -12.193750381469727, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.23925743828671e-06, |
| "logits/chosen": -1.1340380907058716, |
| "logits/rejected": -2.585568904876709, |
| "logps/chosen": -146.1999969482422, |
| "logps/rejected": -279.0899963378906, |
| "loss": 0.1037, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 1.663818359375, |
| "rewards/margins": 13.498906135559082, |
| "rewards/rejected": -11.834218978881836, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.00543212890625, |
| "learning_rate": 2.2103455651936824e-06, |
| "logits/chosen": -1.1339178085327148, |
| "logits/rejected": -2.6299610137939453, |
| "logps/chosen": -146.9774932861328, |
| "logps/rejected": -285.3175048828125, |
| "loss": 0.0968, |
| "rewards/accuracies": 0.8600000143051147, |
| "rewards/chosen": 1.9546289443969727, |
| "rewards/margins": 14.03499984741211, |
| "rewards/rejected": -12.078594207763672, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.005828857421875, |
| "learning_rate": 2.181472906674478e-06, |
| "logits/chosen": -1.313624382019043, |
| "logits/rejected": -2.7783007621765137, |
| "logps/chosen": -154.29750061035156, |
| "logps/rejected": -303.6675109863281, |
| "loss": 0.0553, |
| "rewards/accuracies": 0.9200000166893005, |
| "rewards/chosen": 2.0906200408935547, |
| "rewards/margins": 15.23062515258789, |
| "rewards/rejected": -13.136249542236328, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.00531005859375, |
| "learning_rate": 2.152643371624878e-06, |
| "logits/chosen": -1.257965087890625, |
| "logits/rejected": -2.8671875, |
| "logps/chosen": -148.28500366210938, |
| "logps/rejected": -293.7850036621094, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 2.257265567779541, |
| "rewards/margins": 14.864843368530273, |
| "rewards/rejected": -12.609844207763672, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.005889892578125, |
| "learning_rate": 2.1238608631024416e-06, |
| "logits/chosen": -1.217246651649475, |
| "logits/rejected": -2.843193292617798, |
| "logps/chosen": -151.90249633789062, |
| "logps/rejected": -294.79998779296875, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 1.9859668016433716, |
| "rewards/margins": 14.599687576293945, |
| "rewards/rejected": -12.60953140258789, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.79296875, |
| "learning_rate": 2.095129277798084e-06, |
| "logits/chosen": -1.2338311672210693, |
| "logits/rejected": -2.8333849906921387, |
| "logps/chosen": -149.3524932861328, |
| "logps/rejected": -293.375, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.1244139671325684, |
| "rewards/margins": 14.682812690734863, |
| "rewards/rejected": -12.559218406677246, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.00011968612670898438, |
| "learning_rate": 2.0664525055085353e-06, |
| "logits/chosen": -1.25933837890625, |
| "logits/rejected": -2.8884618282318115, |
| "logps/chosen": -149.42750549316406, |
| "logps/rejected": -296.8999938964844, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 2.1980907917022705, |
| "rewards/margins": 14.94124984741211, |
| "rewards/rejected": -12.743906021118164, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.005859375, |
| "learning_rate": 2.037834428609718e-06, |
| "logits/chosen": -1.220767855644226, |
| "logits/rejected": -2.8833789825439453, |
| "logps/chosen": -150.5800018310547, |
| "logps/rejected": -299.7900085449219, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 2.273066520690918, |
| "rewards/margins": 15.19124984741211, |
| "rewards/rejected": -12.915781021118164, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.8199999999999998, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.009278921531141e-06, |
| "logits/chosen": -1.1782631874084473, |
| "logits/rejected": -2.8433496952056885, |
| "logps/chosen": -148.50250244140625, |
| "logps/rejected": -299.1025085449219, |
| "loss": 0.0743, |
| "rewards/accuracies": 0.8924999833106995, |
| "rewards/chosen": 2.413759708404541, |
| "rewards/margins": 15.21875, |
| "rewards/rejected": -12.807656288146973, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.0002918243408203125, |
| "learning_rate": 1.9807898502313577e-06, |
| "logits/chosen": -1.0487598180770874, |
| "logits/rejected": -2.6698436737060547, |
| "logps/chosen": -143.33250427246094, |
| "logps/rejected": -277.25, |
| "loss": 0.1227, |
| "rewards/accuracies": 0.8224999904632568, |
| "rewards/chosen": 1.6386914253234863, |
| "rewards/margins": 13.599374771118164, |
| "rewards/rejected": -11.961718559265137, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.00640869140625, |
| "learning_rate": 1.9523710716745846e-06, |
| "logits/chosen": -1.1239213943481445, |
| "logits/rejected": -2.814990282058716, |
| "logps/chosen": -145.6374969482422, |
| "logps/rejected": -289.4549865722656, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 2.229843854904175, |
| "rewards/margins": 14.695937156677246, |
| "rewards/rejected": -12.4609375, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.9240264333085247e-06, |
| "logits/chosen": -1.1269491910934448, |
| "logits/rejected": -2.812253475189209, |
| "logps/chosen": -142.22000122070312, |
| "logps/rejected": -289.4075012207031, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.428847551345825, |
| "rewards/margins": 14.7670316696167, |
| "rewards/rejected": -12.332500457763672, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.8599999999999999, |
| "grad_norm": 0.0002460479736328125, |
| "learning_rate": 1.8957597725434814e-06, |
| "logits/chosen": -1.052548885345459, |
| "logits/rejected": -2.777416944503784, |
| "logps/chosen": -144.0449981689453, |
| "logps/rejected": -290.26251220703125, |
| "loss": 0.083, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": 2.608515739440918, |
| "rewards/margins": 14.833125114440918, |
| "rewards/rejected": -12.229687690734863, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.000820159912109375, |
| "learning_rate": 1.8675749162328472e-06, |
| "logits/chosen": -0.9933964610099792, |
| "logits/rejected": -2.699892520904541, |
| "logps/chosen": -144.44000244140625, |
| "logps/rejected": -286.8374938964844, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 2.7115039825439453, |
| "rewards/margins": 14.4439058303833, |
| "rewards/rejected": -11.732656478881836, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.839475680154994e-06, |
| "logits/chosen": -0.9599626064300537, |
| "logits/rejected": -2.6507763862609863, |
| "logps/chosen": -139.96249389648438, |
| "logps/rejected": -276.3900146484375, |
| "loss": 0.1106, |
| "rewards/accuracies": 0.8399999737739563, |
| "rewards/chosen": 2.6600000858306885, |
| "rewards/margins": 13.848750114440918, |
| "rewards/rejected": -11.1899995803833, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.8900000000000001, |
| "grad_norm": 0.0001621246337890625, |
| "learning_rate": 1.8114658684966893e-06, |
| "logits/chosen": -0.9667956829071045, |
| "logits/rejected": -2.65411376953125, |
| "logps/chosen": -140.2100067138672, |
| "logps/rejected": -285.072509765625, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 2.996386766433716, |
| "rewards/margins": 14.584375381469727, |
| "rewards/rejected": -11.588281631469727, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.00021076202392578125, |
| "learning_rate": 1.7835492733380621e-06, |
| "logits/chosen": -0.9071944952011108, |
| "logits/rejected": -2.6002306938171387, |
| "logps/chosen": -140.1374969482422, |
| "logps/rejected": -280.19000244140625, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.7462353706359863, |
| "rewards/margins": 14.235312461853027, |
| "rewards/rejected": -11.487500190734863, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.9100000000000001, |
| "grad_norm": 8.487701416015625e-05, |
| "learning_rate": 1.755729674139224e-06, |
| "logits/chosen": -0.9953001141548157, |
| "logits/rejected": -2.6695971488952637, |
| "logps/chosen": -138.27499389648438, |
| "logps/rejected": -274.2049865722656, |
| "loss": 0.1106, |
| "rewards/accuracies": 0.8399999737739563, |
| "rewards/chosen": 2.5620312690734863, |
| "rewards/margins": 13.859375, |
| "rewards/rejected": -11.292655944824219, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.7280108372285804e-06, |
| "logits/chosen": -1.1185680627822876, |
| "logits/rejected": -2.6991472244262695, |
| "logps/chosen": -143.2100067138672, |
| "logps/rejected": -288.0799865722656, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 2.851914167404175, |
| "rewards/margins": 14.767969131469727, |
| "rewards/rejected": -11.918749809265137, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.9300000000000002, |
| "grad_norm": 1.53125, |
| "learning_rate": 1.700396515292942e-06, |
| "logits/chosen": -1.051497220993042, |
| "logits/rejected": -2.6542186737060547, |
| "logps/chosen": -140.2550048828125, |
| "logps/rejected": -285.32000732421875, |
| "loss": 0.0933, |
| "rewards/accuracies": 0.8650000095367432, |
| "rewards/chosen": 2.637402296066284, |
| "rewards/margins": 14.5482816696167, |
| "rewards/rejected": -11.911328315734863, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.0093994140625, |
| "learning_rate": 1.67289044686946e-06, |
| "logits/chosen": -1.104941725730896, |
| "logits/rejected": -2.7016260623931885, |
| "logps/chosen": -141.42750549316406, |
| "logps/rejected": -286.2049865722656, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8824999928474426, |
| "rewards/chosen": 2.8697266578674316, |
| "rewards/margins": 14.7017183303833, |
| "rewards/rejected": -11.830156326293945, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.0042724609375, |
| "learning_rate": 1.6454963558394954e-06, |
| "logits/chosen": -1.0008597373962402, |
| "logits/rejected": -2.603630304336548, |
| "logps/chosen": -137.0625, |
| "logps/rejected": -274.5350036621094, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8675000071525574, |
| "rewards/chosen": 2.792910099029541, |
| "rewards/margins": 13.903437614440918, |
| "rewards/rejected": -11.112500190734863, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.00189971923828125, |
| "learning_rate": 1.6182179509244623e-06, |
| "logits/chosen": -0.8819994926452637, |
| "logits/rejected": -2.5757317543029785, |
| "logps/chosen": -136.65750122070312, |
| "logps/rejected": -267.739990234375, |
| "loss": 0.1054, |
| "rewards/accuracies": 0.8475000262260437, |
| "rewards/chosen": 2.8574609756469727, |
| "rewards/margins": 13.30062484741211, |
| "rewards/rejected": -10.443046569824219, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.001617431640625, |
| "learning_rate": 1.5910589251837258e-06, |
| "logits/chosen": -0.9589361548423767, |
| "logits/rejected": -2.654125928878784, |
| "logps/chosen": -141.08250427246094, |
| "logps/rejected": -274.427490234375, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8700000047683716, |
| "rewards/chosen": 2.935683488845825, |
| "rewards/margins": 13.803203582763672, |
| "rewards/rejected": -10.866328239440918, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.000946044921875, |
| "learning_rate": 1.5640229555146237e-06, |
| "logits/chosen": -0.9323858618736267, |
| "logits/rejected": -2.6372363567352295, |
| "logps/chosen": -140.74000549316406, |
| "logps/rejected": -276.75, |
| "loss": 0.0882, |
| "rewards/accuracies": 0.8725000023841858, |
| "rewards/chosen": 3.178281307220459, |
| "rewards/margins": 13.927734375, |
| "rewards/rejected": -10.7514066696167, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.537113702154668e-06, |
| "logits/chosen": -1.0329382419586182, |
| "logits/rejected": -2.759021520614624, |
| "logps/chosen": -140.61500549316406, |
| "logps/rejected": -277.2174987792969, |
| "loss": 0.0847, |
| "rewards/accuracies": 0.8774999976158142, |
| "rewards/chosen": 2.995312452316284, |
| "rewards/margins": 13.927968978881836, |
| "rewards/rejected": -10.926405906677246, |
| "step": 9950 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.5103348081860159e-06, |
| "logits/chosen": -0.9912976026535034, |
| "logits/rejected": -2.705747127532959, |
| "logps/chosen": -140.69749450683594, |
| "logps/rejected": -283.8374938964844, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.8974999785423279, |
| "rewards/chosen": 3.1512889862060547, |
| "rewards/margins": 14.588281631469727, |
| "rewards/rejected": -11.444062232971191, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 15000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|