| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9994767137624281, | |
| "eval_steps": 500, | |
| "global_step": 955, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0052328623757195184, | |
| "grad_norm": 819697.7987526867, | |
| "learning_rate": 2.6041666666666667e-08, | |
| "logits/chosen": -2.897020101547241, | |
| "logits/rejected": -2.8810553550720215, | |
| "logps/chosen": -281.18853759765625, | |
| "logps/rejected": -241.4916534423828, | |
| "loss": 62511.5062, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -281.18853759765625, | |
| "rewards/margins": -39.69694519042969, | |
| "rewards/rejected": -241.4916534423828, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.010465724751439037, | |
| "grad_norm": 856447.0339256247, | |
| "learning_rate": 5.208333333333333e-08, | |
| "logits/chosen": -2.8515119552612305, | |
| "logits/rejected": -2.852177381515503, | |
| "logps/chosen": -227.5166778564453, | |
| "logps/rejected": -218.9936065673828, | |
| "loss": 62508.0563, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -227.5166778564453, | |
| "rewards/margins": -8.523069381713867, | |
| "rewards/rejected": -218.9936065673828, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015698587127158554, | |
| "grad_norm": 608077.0241737472, | |
| "learning_rate": 7.812499999999999e-08, | |
| "logits/chosen": -2.8871281147003174, | |
| "logits/rejected": -2.8566455841064453, | |
| "logps/chosen": -296.6144104003906, | |
| "logps/rejected": -248.87496948242188, | |
| "loss": 62494.775, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -296.6144104003906, | |
| "rewards/margins": -47.739437103271484, | |
| "rewards/rejected": -248.87496948242188, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.020931449502878074, | |
| "grad_norm": 547713.4134125254, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logits/chosen": -2.8649909496307373, | |
| "logits/rejected": -2.900007963180542, | |
| "logps/chosen": -300.6615905761719, | |
| "logps/rejected": -290.6969909667969, | |
| "loss": 62498.0375, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -300.6615905761719, | |
| "rewards/margins": -9.964593887329102, | |
| "rewards/rejected": -290.6969909667969, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026164311878597593, | |
| "grad_norm": 550202.226793022, | |
| "learning_rate": 1.3020833333333334e-07, | |
| "logits/chosen": -2.861807346343994, | |
| "logits/rejected": -2.8286397457122803, | |
| "logps/chosen": -297.4012756347656, | |
| "logps/rejected": -225.73532104492188, | |
| "loss": 62479.4313, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -297.4012756347656, | |
| "rewards/margins": -71.66590881347656, | |
| "rewards/rejected": -225.73532104492188, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03139717425431711, | |
| "grad_norm": 575687.3818314937, | |
| "learning_rate": 1.5624999999999999e-07, | |
| "logits/chosen": -2.8637468814849854, | |
| "logits/rejected": -2.855187177658081, | |
| "logps/chosen": -261.7722473144531, | |
| "logps/rejected": -266.75311279296875, | |
| "loss": 62467.7375, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -261.7722473144531, | |
| "rewards/margins": 4.980858325958252, | |
| "rewards/rejected": -266.75311279296875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 601042.0970547737, | |
| "learning_rate": 1.8229166666666666e-07, | |
| "logits/chosen": -2.882888078689575, | |
| "logits/rejected": -2.8436450958251953, | |
| "logps/chosen": -322.3620300292969, | |
| "logps/rejected": -236.65188598632812, | |
| "loss": 62398.2562, | |
| "rewards/accuracies": 0.3499999940395355, | |
| "rewards/chosen": -322.3620300292969, | |
| "rewards/margins": -85.71016693115234, | |
| "rewards/rejected": -236.65188598632812, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04186289900575615, | |
| "grad_norm": 1270156.296221847, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -2.926880121231079, | |
| "logits/rejected": -2.873258590698242, | |
| "logps/chosen": -266.81585693359375, | |
| "logps/rejected": -222.47512817382812, | |
| "loss": 62382.9187, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -266.81585693359375, | |
| "rewards/margins": -44.34074401855469, | |
| "rewards/rejected": -222.47512817382812, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04709576138147567, | |
| "grad_norm": 562197.8356415116, | |
| "learning_rate": 2.3437499999999998e-07, | |
| "logits/chosen": -2.934823989868164, | |
| "logits/rejected": -2.8437087535858154, | |
| "logps/chosen": -337.57647705078125, | |
| "logps/rejected": -253.1848602294922, | |
| "loss": 62295.2562, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -337.57647705078125, | |
| "rewards/margins": -84.39164733886719, | |
| "rewards/rejected": -253.1848602294922, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.052328623757195186, | |
| "grad_norm": 579259.1669227169, | |
| "learning_rate": 2.604166666666667e-07, | |
| "logits/chosen": -2.8226637840270996, | |
| "logits/rejected": -2.8579444885253906, | |
| "logps/chosen": -235.44284057617188, | |
| "logps/rejected": -253.05126953125, | |
| "loss": 62140.85, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -235.44284057617188, | |
| "rewards/margins": 17.60841941833496, | |
| "rewards/rejected": -253.05126953125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0575614861329147, | |
| "grad_norm": 599221.2375408602, | |
| "learning_rate": 2.864583333333333e-07, | |
| "logits/chosen": -2.9071204662323, | |
| "logits/rejected": -2.86643385887146, | |
| "logps/chosen": -295.3536376953125, | |
| "logps/rejected": -295.96044921875, | |
| "loss": 62103.8438, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -295.3536376953125, | |
| "rewards/margins": 0.6068130731582642, | |
| "rewards/rejected": -295.96044921875, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06279434850863422, | |
| "grad_norm": 587217.7209315128, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -2.825546979904175, | |
| "logits/rejected": -2.853196620941162, | |
| "logps/chosen": -280.54376220703125, | |
| "logps/rejected": -290.41162109375, | |
| "loss": 61848.075, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -280.54376220703125, | |
| "rewards/margins": 9.867898941040039, | |
| "rewards/rejected": -290.41162109375, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06802721088435375, | |
| "grad_norm": 692483.3715259883, | |
| "learning_rate": 3.3854166666666667e-07, | |
| "logits/chosen": -2.8896777629852295, | |
| "logits/rejected": -2.869809150695801, | |
| "logps/chosen": -279.5859375, | |
| "logps/rejected": -267.8680725097656, | |
| "loss": 61784.125, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -279.5859375, | |
| "rewards/margins": -11.717863082885742, | |
| "rewards/rejected": -267.8680725097656, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 643621.4786223344, | |
| "learning_rate": 3.645833333333333e-07, | |
| "logits/chosen": -2.86277174949646, | |
| "logits/rejected": -2.849290132522583, | |
| "logps/chosen": -270.3601989746094, | |
| "logps/rejected": -299.9423828125, | |
| "loss": 61415.6375, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -270.3601989746094, | |
| "rewards/margins": 29.582199096679688, | |
| "rewards/rejected": -299.9423828125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07849293563579278, | |
| "grad_norm": 797625.2394802963, | |
| "learning_rate": 3.9062499999999997e-07, | |
| "logits/chosen": -2.875126600265503, | |
| "logits/rejected": -2.8260860443115234, | |
| "logps/chosen": -278.2060241699219, | |
| "logps/rejected": -263.739990234375, | |
| "loss": 61252.4812, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -278.2060241699219, | |
| "rewards/margins": -14.466039657592773, | |
| "rewards/rejected": -263.739990234375, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0837257980115123, | |
| "grad_norm": 570934.2395758026, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -2.8297553062438965, | |
| "logits/rejected": -2.818152904510498, | |
| "logps/chosen": -244.71047973632812, | |
| "logps/rejected": -216.3663330078125, | |
| "loss": 61182.05, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -244.71047973632812, | |
| "rewards/margins": -28.344135284423828, | |
| "rewards/rejected": -216.3663330078125, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08895866038723181, | |
| "grad_norm": 647636.324219079, | |
| "learning_rate": 4.427083333333333e-07, | |
| "logits/chosen": -2.8677287101745605, | |
| "logits/rejected": -2.8416037559509277, | |
| "logps/chosen": -280.59759521484375, | |
| "logps/rejected": -278.1571044921875, | |
| "loss": 60841.875, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -280.59759521484375, | |
| "rewards/margins": -2.440479278564453, | |
| "rewards/rejected": -278.1571044921875, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09419152276295134, | |
| "grad_norm": 693686.6913297386, | |
| "learning_rate": 4.6874999999999996e-07, | |
| "logits/chosen": -2.8715648651123047, | |
| "logits/rejected": -2.886065721511841, | |
| "logps/chosen": -303.4865417480469, | |
| "logps/rejected": -300.1495361328125, | |
| "loss": 60200.25, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -303.4865417480469, | |
| "rewards/margins": -3.336996555328369, | |
| "rewards/rejected": -300.1495361328125, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09942438513867086, | |
| "grad_norm": 695048.3682737482, | |
| "learning_rate": 4.947916666666667e-07, | |
| "logits/chosen": -2.8399910926818848, | |
| "logits/rejected": -2.8273520469665527, | |
| "logps/chosen": -285.8985900878906, | |
| "logps/rejected": -278.19525146484375, | |
| "loss": 59913.85, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -285.8985900878906, | |
| "rewards/margins": -7.703277587890625, | |
| "rewards/rejected": -278.19525146484375, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10465724751439037, | |
| "grad_norm": 926619.6100320778, | |
| "learning_rate": 4.999732492681437e-07, | |
| "logits/chosen": -2.839812994003296, | |
| "logits/rejected": -2.814923048019409, | |
| "logps/chosen": -280.0777587890625, | |
| "logps/rejected": -326.1065979003906, | |
| "loss": 58985.2125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -280.0777587890625, | |
| "rewards/margins": 46.02882385253906, | |
| "rewards/rejected": -326.1065979003906, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 730881.0367768478, | |
| "learning_rate": 4.998645842314724e-07, | |
| "logits/chosen": -2.8014039993286133, | |
| "logits/rejected": -2.7791314125061035, | |
| "logps/chosen": -325.879638671875, | |
| "logps/rejected": -323.22125244140625, | |
| "loss": 59519.525, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -325.879638671875, | |
| "rewards/margins": -2.658414363861084, | |
| "rewards/rejected": -323.22125244140625, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1151229722658294, | |
| "grad_norm": 787482.5379143337, | |
| "learning_rate": 4.996723692767926e-07, | |
| "logits/chosen": -2.877906322479248, | |
| "logits/rejected": -2.860431671142578, | |
| "logps/chosen": -331.8926086425781, | |
| "logps/rejected": -336.84979248046875, | |
| "loss": 59833.9437, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -331.8926086425781, | |
| "rewards/margins": 4.957190036773682, | |
| "rewards/rejected": -336.84979248046875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12035583464154893, | |
| "grad_norm": 758644.6025801541, | |
| "learning_rate": 4.993966686770933e-07, | |
| "logits/chosen": -2.8740134239196777, | |
| "logits/rejected": -2.849520683288574, | |
| "logps/chosen": -286.97998046875, | |
| "logps/rejected": -302.22589111328125, | |
| "loss": 59542.8562, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -286.97998046875, | |
| "rewards/margins": 15.245903015136719, | |
| "rewards/rejected": -302.22589111328125, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12558869701726844, | |
| "grad_norm": 839068.6603800668, | |
| "learning_rate": 4.990375746213598e-07, | |
| "logits/chosen": -2.8500800132751465, | |
| "logits/rejected": -2.813788414001465, | |
| "logps/chosen": -252.0970458984375, | |
| "logps/rejected": -269.5028381347656, | |
| "loss": 58766.425, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -252.0970458984375, | |
| "rewards/margins": 17.40580177307129, | |
| "rewards/rejected": -269.5028381347656, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13082155939298795, | |
| "grad_norm": 790620.3365762861, | |
| "learning_rate": 4.985952071837474e-07, | |
| "logits/chosen": -2.8092734813690186, | |
| "logits/rejected": -2.8068203926086426, | |
| "logps/chosen": -272.0372619628906, | |
| "logps/rejected": -282.043701171875, | |
| "loss": 57950.4375, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -272.0372619628906, | |
| "rewards/margins": 10.00644588470459, | |
| "rewards/rejected": -282.043701171875, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1360544217687075, | |
| "grad_norm": 800005.8930982946, | |
| "learning_rate": 4.980697142834314e-07, | |
| "logits/chosen": -2.9066848754882812, | |
| "logits/rejected": -2.889483690261841, | |
| "logps/chosen": -358.52880859375, | |
| "logps/rejected": -351.5975341796875, | |
| "loss": 57769.1687, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -358.52880859375, | |
| "rewards/margins": -6.931341648101807, | |
| "rewards/rejected": -351.5975341796875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.141287284144427, | |
| "grad_norm": 991860.3958541746, | |
| "learning_rate": 4.974612716351446e-07, | |
| "logits/chosen": -2.8132946491241455, | |
| "logits/rejected": -2.807452917098999, | |
| "logps/chosen": -269.17333984375, | |
| "logps/rejected": -304.22784423828125, | |
| "loss": 57210.9125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -269.17333984375, | |
| "rewards/margins": 35.054466247558594, | |
| "rewards/rejected": -304.22784423828125, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 1217484.7693174647, | |
| "learning_rate": 4.967700826904229e-07, | |
| "logits/chosen": -2.881108045578003, | |
| "logits/rejected": -2.877159357070923, | |
| "logps/chosen": -324.2433166503906, | |
| "logps/rejected": -289.4080505371094, | |
| "loss": 58436.2625, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -324.2433166503906, | |
| "rewards/margins": -34.83523941040039, | |
| "rewards/rejected": -289.4080505371094, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15175300889586604, | |
| "grad_norm": 1144094.5245424435, | |
| "learning_rate": 4.95996378569574e-07, | |
| "logits/chosen": -2.861013889312744, | |
| "logits/rejected": -2.8163299560546875, | |
| "logps/chosen": -310.35223388671875, | |
| "logps/rejected": -315.37078857421875, | |
| "loss": 56525.3125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -310.35223388671875, | |
| "rewards/margins": 5.018545627593994, | |
| "rewards/rejected": -315.37078857421875, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.15698587127158556, | |
| "grad_norm": 906591.8638946635, | |
| "learning_rate": 4.951404179843962e-07, | |
| "logits/chosen": -2.8345422744750977, | |
| "logits/rejected": -2.8686890602111816, | |
| "logps/chosen": -276.36981201171875, | |
| "logps/rejected": -285.62548828125, | |
| "loss": 58509.9375, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -276.36981201171875, | |
| "rewards/margins": 9.255735397338867, | |
| "rewards/rejected": -285.62548828125, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16221873364730507, | |
| "grad_norm": 907999.1062550667, | |
| "learning_rate": 4.942024871516694e-07, | |
| "logits/chosen": -2.8697471618652344, | |
| "logits/rejected": -2.8267807960510254, | |
| "logps/chosen": -320.91058349609375, | |
| "logps/rejected": -321.4515075683594, | |
| "loss": 58345.9, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -320.91058349609375, | |
| "rewards/margins": 0.5409385561943054, | |
| "rewards/rejected": -321.4515075683594, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1674515960230246, | |
| "grad_norm": 885328.1305549938, | |
| "learning_rate": 4.931828996974498e-07, | |
| "logits/chosen": -2.7532379627227783, | |
| "logits/rejected": -2.7566537857055664, | |
| "logps/chosen": -237.9208526611328, | |
| "logps/rejected": -254.9251251220703, | |
| "loss": 58183.8625, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -237.9208526611328, | |
| "rewards/margins": 17.00423812866211, | |
| "rewards/rejected": -254.9251251220703, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1726844583987441, | |
| "grad_norm": 1593598.7457023177, | |
| "learning_rate": 4.920819965521997e-07, | |
| "logits/chosen": -2.6699514389038086, | |
| "logits/rejected": -2.670328378677368, | |
| "logps/chosen": -305.18328857421875, | |
| "logps/rejected": -284.074951171875, | |
| "loss": 57758.7562, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -305.18328857421875, | |
| "rewards/margins": -21.108369827270508, | |
| "rewards/rejected": -284.074951171875, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.17791732077446362, | |
| "grad_norm": 993568.4034911739, | |
| "learning_rate": 4.909001458367866e-07, | |
| "logits/chosen": -2.7054855823516846, | |
| "logits/rejected": -2.7096757888793945, | |
| "logps/chosen": -286.2120666503906, | |
| "logps/rejected": -321.2934265136719, | |
| "loss": 57056.9187, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -286.2120666503906, | |
| "rewards/margins": 35.081356048583984, | |
| "rewards/rejected": -321.2934265136719, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 934004.2559217811, | |
| "learning_rate": 4.896377427393911e-07, | |
| "logits/chosen": -2.7484357357025146, | |
| "logits/rejected": -2.7158854007720947, | |
| "logps/chosen": -286.253662109375, | |
| "logps/rejected": -315.47406005859375, | |
| "loss": 57739.1625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -286.253662109375, | |
| "rewards/margins": 29.220422744750977, | |
| "rewards/rejected": -315.47406005859375, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.18838304552590268, | |
| "grad_norm": 854532.9754199074, | |
| "learning_rate": 4.882952093833627e-07, | |
| "logits/chosen": -2.6975908279418945, | |
| "logits/rejected": -2.697767972946167, | |
| "logps/chosen": -299.58221435546875, | |
| "logps/rejected": -306.10247802734375, | |
| "loss": 56578.5375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -299.58221435546875, | |
| "rewards/margins": 6.520210266113281, | |
| "rewards/rejected": -306.10247802734375, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1936159079016222, | |
| "grad_norm": 1018591.7582230872, | |
| "learning_rate": 4.868729946860708e-07, | |
| "logits/chosen": -2.697580575942993, | |
| "logits/rejected": -2.6543309688568115, | |
| "logps/chosen": -300.19854736328125, | |
| "logps/rejected": -279.4755859375, | |
| "loss": 56696.2875, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -300.19854736328125, | |
| "rewards/margins": -20.722976684570312, | |
| "rewards/rejected": -279.4755859375, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1988487702773417, | |
| "grad_norm": 1865987.6965253549, | |
| "learning_rate": 4.853715742087946e-07, | |
| "logits/chosen": -2.715686321258545, | |
| "logits/rejected": -2.6946115493774414, | |
| "logps/chosen": -261.4237060546875, | |
| "logps/rejected": -260.59552001953125, | |
| "loss": 55295.3625, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -261.4237060546875, | |
| "rewards/margins": -0.8281745910644531, | |
| "rewards/rejected": -260.59552001953125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 2075031.8789570439, | |
| "learning_rate": 4.837914499977052e-07, | |
| "logits/chosen": -2.7049078941345215, | |
| "logits/rejected": -2.649726152420044, | |
| "logps/chosen": -348.7242126464844, | |
| "logps/rejected": -302.77056884765625, | |
| "loss": 56870.6875, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -348.7242126464844, | |
| "rewards/margins": -45.95365524291992, | |
| "rewards/rejected": -302.77056884765625, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.20931449502878074, | |
| "grad_norm": 1044606.2904041886, | |
| "learning_rate": 4.821331504159906e-07, | |
| "logits/chosen": -2.662055253982544, | |
| "logits/rejected": -2.6654608249664307, | |
| "logps/chosen": -240.9337921142578, | |
| "logps/rejected": -280.35516357421875, | |
| "loss": 57408.1, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -240.9337921142578, | |
| "rewards/margins": 39.421363830566406, | |
| "rewards/rejected": -280.35516357421875, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21454735740450026, | |
| "grad_norm": 1613413.1304386982, | |
| "learning_rate": 4.80397229967181e-07, | |
| "logits/chosen": -2.5958218574523926, | |
| "logits/rejected": -2.5995872020721436, | |
| "logps/chosen": -260.1720275878906, | |
| "logps/rejected": -268.8197326660156, | |
| "loss": 57515.7125, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -260.1720275878906, | |
| "rewards/margins": 8.647693634033203, | |
| "rewards/rejected": -268.8197326660156, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 965375.2956772823, | |
| "learning_rate": 4.785842691097342e-07, | |
| "logits/chosen": -2.722567081451416, | |
| "logits/rejected": -2.6706037521362305, | |
| "logps/chosen": -301.97955322265625, | |
| "logps/rejected": -308.42462158203125, | |
| "loss": 56186.2937, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -301.97955322265625, | |
| "rewards/margins": 6.445120334625244, | |
| "rewards/rejected": -308.42462158203125, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2250130821559393, | |
| "grad_norm": 1682586.8851408535, | |
| "learning_rate": 4.7669487406294076e-07, | |
| "logits/chosen": -2.691540479660034, | |
| "logits/rejected": -2.6860575675964355, | |
| "logps/chosen": -292.8274230957031, | |
| "logps/rejected": -352.30621337890625, | |
| "loss": 57221.1375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -292.8274230957031, | |
| "rewards/margins": 59.47880172729492, | |
| "rewards/rejected": -352.30621337890625, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2302459445316588, | |
| "grad_norm": 1043252.7613651449, | |
| "learning_rate": 4.7472967660421603e-07, | |
| "logits/chosen": -2.7390644550323486, | |
| "logits/rejected": -2.6686208248138428, | |
| "logps/chosen": -251.1779327392578, | |
| "logps/rejected": -252.0894317626953, | |
| "loss": 56568.1813, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -251.1779327392578, | |
| "rewards/margins": 0.9114850163459778, | |
| "rewards/rejected": -252.0894317626953, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23547880690737832, | |
| "grad_norm": 1024702.6869019131, | |
| "learning_rate": 4.7268933385784627e-07, | |
| "logits/chosen": -2.682610273361206, | |
| "logits/rejected": -2.640778064727783, | |
| "logps/chosen": -247.3615264892578, | |
| "logps/rejected": -284.06402587890625, | |
| "loss": 56326.825, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -247.3615264892578, | |
| "rewards/margins": 36.7025146484375, | |
| "rewards/rejected": -284.06402587890625, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.24071166928309787, | |
| "grad_norm": 977761.8133840163, | |
| "learning_rate": 4.705745280752585e-07, | |
| "logits/chosen": -2.6460351943969727, | |
| "logits/rejected": -2.5948281288146973, | |
| "logps/chosen": -320.40252685546875, | |
| "logps/rejected": -341.423583984375, | |
| "loss": 56747.225, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -320.40252685546875, | |
| "rewards/margins": 21.021081924438477, | |
| "rewards/rejected": -341.423583984375, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.24594453165881738, | |
| "grad_norm": 987590.8827444692, | |
| "learning_rate": 4.68385966406889e-07, | |
| "logits/chosen": -2.592116117477417, | |
| "logits/rejected": -2.5488688945770264, | |
| "logps/chosen": -270.15057373046875, | |
| "logps/rejected": -273.60870361328125, | |
| "loss": 57541.425, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -270.15057373046875, | |
| "rewards/margins": 3.458080768585205, | |
| "rewards/rejected": -273.60870361328125, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.25117739403453687, | |
| "grad_norm": 1115088.0063134031, | |
| "learning_rate": 4.6612438066572555e-07, | |
| "logits/chosen": -2.5365209579467773, | |
| "logits/rejected": -2.5030362606048584, | |
| "logps/chosen": -303.14288330078125, | |
| "logps/rejected": -285.1949462890625, | |
| "loss": 57592.9, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -303.14288330078125, | |
| "rewards/margins": -17.947914123535156, | |
| "rewards/rejected": -285.1949462890625, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 872007.2172912332, | |
| "learning_rate": 4.6379052708260356e-07, | |
| "logits/chosen": -2.571394443511963, | |
| "logits/rejected": -2.5047571659088135, | |
| "logps/chosen": -271.99029541015625, | |
| "logps/rejected": -268.821533203125, | |
| "loss": 57330.4187, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -271.99029541015625, | |
| "rewards/margins": -3.168781280517578, | |
| "rewards/rejected": -268.821533203125, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2616431187859759, | |
| "grad_norm": 1283172.0120638541, | |
| "learning_rate": 4.6138518605333664e-07, | |
| "logits/chosen": -2.591219663619995, | |
| "logits/rejected": -2.5536255836486816, | |
| "logps/chosen": -340.99761962890625, | |
| "logps/rejected": -338.05084228515625, | |
| "loss": 58544.5125, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -340.99761962890625, | |
| "rewards/margins": -2.9467933177948, | |
| "rewards/rejected": -338.05084228515625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2668759811616955, | |
| "grad_norm": 887220.7931197283, | |
| "learning_rate": 4.589091618777674e-07, | |
| "logits/chosen": -2.452988862991333, | |
| "logits/rejected": -2.426440715789795, | |
| "logps/chosen": -310.2080993652344, | |
| "logps/rejected": -326.74005126953125, | |
| "loss": 59796.9938, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -310.2080993652344, | |
| "rewards/margins": 16.531951904296875, | |
| "rewards/rejected": -326.74005126953125, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.272108843537415, | |
| "grad_norm": 1079677.4885566523, | |
| "learning_rate": 4.5636328249082514e-07, | |
| "logits/chosen": -2.6359188556671143, | |
| "logits/rejected": -2.5355026721954346, | |
| "logps/chosen": -310.75189208984375, | |
| "logps/rejected": -308.8578796386719, | |
| "loss": 59678.8, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -310.75189208984375, | |
| "rewards/margins": -1.893977403640747, | |
| "rewards/rejected": -308.8578796386719, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2773417059131345, | |
| "grad_norm": 1084565.0284754713, | |
| "learning_rate": 4.5374839918567996e-07, | |
| "logits/chosen": -2.6321051120758057, | |
| "logits/rejected": -2.567678928375244, | |
| "logps/chosen": -327.0538635253906, | |
| "logps/rejected": -316.69342041015625, | |
| "loss": 58093.8688, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -327.0538635253906, | |
| "rewards/margins": -10.360448837280273, | |
| "rewards/rejected": -316.69342041015625, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.282574568288854, | |
| "grad_norm": 1295931.023547164, | |
| "learning_rate": 4.510653863290871e-07, | |
| "logits/chosen": -2.627354383468628, | |
| "logits/rejected": -2.5420610904693604, | |
| "logps/chosen": -284.817138671875, | |
| "logps/rejected": -295.0710144042969, | |
| "loss": 56263.8875, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -284.817138671875, | |
| "rewards/margins": 10.253904342651367, | |
| "rewards/rejected": -295.0710144042969, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.28780743066457354, | |
| "grad_norm": 1013086.5803710954, | |
| "learning_rate": 4.483151410690151e-07, | |
| "logits/chosen": -2.6444249153137207, | |
| "logits/rejected": -2.5427169799804688, | |
| "logps/chosen": -279.9425354003906, | |
| "logps/rejected": -270.55450439453125, | |
| "loss": 54940.2875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -279.9425354003906, | |
| "rewards/margins": -9.388038635253906, | |
| "rewards/rejected": -270.55450439453125, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 1576188.4710046574, | |
| "learning_rate": 4.4549858303465737e-07, | |
| "logits/chosen": -2.6243691444396973, | |
| "logits/rejected": -2.5685534477233887, | |
| "logps/chosen": -296.85418701171875, | |
| "logps/rejected": -310.7580261230469, | |
| "loss": 56116.5938, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -296.85418701171875, | |
| "rewards/margins": 13.903894424438477, | |
| "rewards/rejected": -310.7580261230469, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.29827315541601257, | |
| "grad_norm": 1319520.840838825, | |
| "learning_rate": 4.4261665402892476e-07, | |
| "logits/chosen": -2.5911037921905518, | |
| "logits/rejected": -2.5209097862243652, | |
| "logps/chosen": -265.95025634765625, | |
| "logps/rejected": -334.61431884765625, | |
| "loss": 57866.4625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -265.95025634765625, | |
| "rewards/margins": 68.66404724121094, | |
| "rewards/rejected": -334.61431884765625, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3035060177917321, | |
| "grad_norm": 1164732.143957571, | |
| "learning_rate": 4.396703177135261e-07, | |
| "logits/chosen": -2.6242473125457764, | |
| "logits/rejected": -2.5436782836914062, | |
| "logps/chosen": -349.99383544921875, | |
| "logps/rejected": -329.6797180175781, | |
| "loss": 56799.7375, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -349.99383544921875, | |
| "rewards/margins": -20.31418800354004, | |
| "rewards/rejected": -329.6797180175781, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3087388801674516, | |
| "grad_norm": 1036095.2706155936, | |
| "learning_rate": 4.3666055928673697e-07, | |
| "logits/chosen": -2.6259796619415283, | |
| "logits/rejected": -2.596653938293457, | |
| "logps/chosen": -294.8160400390625, | |
| "logps/rejected": -268.30645751953125, | |
| "loss": 55223.3125, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -294.8160400390625, | |
| "rewards/margins": -26.509592056274414, | |
| "rewards/rejected": -268.30645751953125, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3139717425431711, | |
| "grad_norm": 1421793.3450062282, | |
| "learning_rate": 4.335883851539693e-07, | |
| "logits/chosen": -2.536402702331543, | |
| "logits/rejected": -2.470693588256836, | |
| "logps/chosen": -266.8374328613281, | |
| "logps/rejected": -269.9141845703125, | |
| "loss": 54316.75, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -266.8374328613281, | |
| "rewards/margins": 3.0767579078674316, | |
| "rewards/rejected": -269.9141845703125, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31920460491889063, | |
| "grad_norm": 1145803.694963496, | |
| "learning_rate": 4.304548225912481e-07, | |
| "logits/chosen": -2.4925479888916016, | |
| "logits/rejected": -2.4637606143951416, | |
| "logps/chosen": -268.6978454589844, | |
| "logps/rejected": -288.32489013671875, | |
| "loss": 56123.5, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -268.6978454589844, | |
| "rewards/margins": 19.627042770385742, | |
| "rewards/rejected": -288.32489013671875, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.32443746729461015, | |
| "grad_norm": 1320708.0317829524, | |
| "learning_rate": 4.272609194017105e-07, | |
| "logits/chosen": -2.427326202392578, | |
| "logits/rejected": -2.375277519226074, | |
| "logps/chosen": -273.1225280761719, | |
| "logps/rejected": -294.8364562988281, | |
| "loss": 55285.2125, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -273.1225280761719, | |
| "rewards/margins": 21.713897705078125, | |
| "rewards/rejected": -294.8364562988281, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 1175173.27697323, | |
| "learning_rate": 4.2400774356524003e-07, | |
| "logits/chosen": -2.463435649871826, | |
| "logits/rejected": -2.390852689743042, | |
| "logps/chosen": -291.9449768066406, | |
| "logps/rejected": -351.9012756347656, | |
| "loss": 55227.475, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -291.9449768066406, | |
| "rewards/margins": 59.956260681152344, | |
| "rewards/rejected": -351.9012756347656, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3349031920460492, | |
| "grad_norm": 1736713.8372362903, | |
| "learning_rate": 4.2069638288135547e-07, | |
| "logits/chosen": -2.424726724624634, | |
| "logits/rejected": -2.4184367656707764, | |
| "logps/chosen": -293.0435485839844, | |
| "logps/rejected": -315.5990905761719, | |
| "loss": 56523.2438, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -293.0435485839844, | |
| "rewards/margins": 22.555578231811523, | |
| "rewards/rejected": -315.5990905761719, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3401360544217687, | |
| "grad_norm": 1392044.8934369895, | |
| "learning_rate": 4.1732794460547037e-07, | |
| "logits/chosen": -2.4518871307373047, | |
| "logits/rejected": -2.444579601287842, | |
| "logps/chosen": -241.4635009765625, | |
| "logps/rejected": -265.34478759765625, | |
| "loss": 57858.325, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -241.4635009765625, | |
| "rewards/margins": 23.881275177001953, | |
| "rewards/rejected": -265.34478759765625, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3453689167974882, | |
| "grad_norm": 1151359.8150083232, | |
| "learning_rate": 4.139035550786494e-07, | |
| "logits/chosen": -2.4895317554473877, | |
| "logits/rejected": -2.476973056793213, | |
| "logps/chosen": -236.6543426513672, | |
| "logps/rejected": -301.2790832519531, | |
| "loss": 54808.6438, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -236.6543426513672, | |
| "rewards/margins": 64.62477111816406, | |
| "rewards/rejected": -301.2790832519531, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.35060177917320773, | |
| "grad_norm": 1148068.4271174779, | |
| "learning_rate": 4.104243593509806e-07, | |
| "logits/chosen": -2.511590003967285, | |
| "logits/rejected": -2.449333906173706, | |
| "logps/chosen": -255.1795196533203, | |
| "logps/rejected": -306.39111328125, | |
| "loss": 56303.15, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -255.1795196533203, | |
| "rewards/margins": 51.211570739746094, | |
| "rewards/rejected": -306.39111328125, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.35583464154892724, | |
| "grad_norm": 1398198.9347442659, | |
| "learning_rate": 4.0689152079869306e-07, | |
| "logits/chosen": -2.4384443759918213, | |
| "logits/rejected": -2.4097814559936523, | |
| "logps/chosen": -313.1650085449219, | |
| "logps/rejected": -348.493896484375, | |
| "loss": 54666.4, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -313.1650085449219, | |
| "rewards/margins": 35.3288688659668, | |
| "rewards/rejected": -348.493896484375, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36106750392464676, | |
| "grad_norm": 1048517.8304177759, | |
| "learning_rate": 4.0330622073514606e-07, | |
| "logits/chosen": -2.456749439239502, | |
| "logits/rejected": -2.353886127471924, | |
| "logps/chosen": -325.97222900390625, | |
| "logps/rejected": -289.00445556640625, | |
| "loss": 55775.8562, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -325.97222900390625, | |
| "rewards/margins": -36.96786117553711, | |
| "rewards/rejected": -289.00445556640625, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 1751549.502482873, | |
| "learning_rate": 3.99669658015821e-07, | |
| "logits/chosen": -2.325648784637451, | |
| "logits/rejected": -2.3088955879211426, | |
| "logps/chosen": -249.3928680419922, | |
| "logps/rejected": -300.55267333984375, | |
| "loss": 56549.7063, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -249.3928680419922, | |
| "rewards/margins": 51.1598014831543, | |
| "rewards/rejected": -300.55267333984375, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3715332286760858, | |
| "grad_norm": 1743710.2168055333, | |
| "learning_rate": 3.9598304863744615e-07, | |
| "logits/chosen": -2.3647897243499756, | |
| "logits/rejected": -2.302427053451538, | |
| "logps/chosen": -264.53399658203125, | |
| "logps/rejected": -291.4407958984375, | |
| "loss": 55886.3688, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -264.53399658203125, | |
| "rewards/margins": 26.906795501708984, | |
| "rewards/rejected": -291.4407958984375, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.37676609105180536, | |
| "grad_norm": 1076304.712482483, | |
| "learning_rate": 3.92247625331392e-07, | |
| "logits/chosen": -2.3268961906433105, | |
| "logits/rejected": -2.2726428508758545, | |
| "logps/chosen": -233.88784790039062, | |
| "logps/rejected": -254.55062866210938, | |
| "loss": 55491.6813, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -233.88784790039062, | |
| "rewards/margins": 20.662763595581055, | |
| "rewards/rejected": -254.55062866210938, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3819989534275249, | |
| "grad_norm": 1089826.502625074, | |
| "learning_rate": 3.8846463715146867e-07, | |
| "logits/chosen": -2.4054033756256104, | |
| "logits/rejected": -2.35465669631958, | |
| "logps/chosen": -293.21893310546875, | |
| "logps/rejected": -311.3880920410156, | |
| "loss": 56063.525, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -293.21893310546875, | |
| "rewards/margins": 18.16920280456543, | |
| "rewards/rejected": -311.3880920410156, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3872318158032444, | |
| "grad_norm": 1476308.11101748, | |
| "learning_rate": 3.846353490562664e-07, | |
| "logits/chosen": -2.3780322074890137, | |
| "logits/rejected": -2.329284191131592, | |
| "logps/chosen": -254.2071990966797, | |
| "logps/rejected": -289.7637634277344, | |
| "loss": 55320.5563, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -254.2071990966797, | |
| "rewards/margins": 35.55649948120117, | |
| "rewards/rejected": -289.7637634277344, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3924646781789639, | |
| "grad_norm": 1105169.4119545654, | |
| "learning_rate": 3.8076104148617817e-07, | |
| "logits/chosen": -2.3992652893066406, | |
| "logits/rejected": -2.3519163131713867, | |
| "logps/chosen": -297.7577209472656, | |
| "logps/rejected": -303.87060546875, | |
| "loss": 55865.7375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -297.7577209472656, | |
| "rewards/margins": 6.11287260055542, | |
| "rewards/rejected": -303.87060546875, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3976975405546834, | |
| "grad_norm": 1165531.8808372426, | |
| "learning_rate": 3.768430099352445e-07, | |
| "logits/chosen": -2.4510560035705566, | |
| "logits/rejected": -2.369868278503418, | |
| "logps/chosen": -297.7958984375, | |
| "logps/rejected": -273.23046875, | |
| "loss": 57969.975, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -297.7958984375, | |
| "rewards/margins": -24.565448760986328, | |
| "rewards/rejected": -273.23046875, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 1756290.8872507422, | |
| "learning_rate": 3.728825645179653e-07, | |
| "logits/chosen": -2.4245288372039795, | |
| "logits/rejected": -2.3175346851348877, | |
| "logps/chosen": -359.3501892089844, | |
| "logps/rejected": -339.73492431640625, | |
| "loss": 57982.1, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -359.3501892089844, | |
| "rewards/margins": -19.61526107788086, | |
| "rewards/rejected": -339.73492431640625, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 1538328.85318582, | |
| "learning_rate": 3.6888102953122304e-07, | |
| "logits/chosen": -2.190237045288086, | |
| "logits/rejected": -2.2050204277038574, | |
| "logps/chosen": -305.574951171875, | |
| "logps/rejected": -310.0028991699219, | |
| "loss": 56215.6438, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -305.574951171875, | |
| "rewards/margins": 4.427947044372559, | |
| "rewards/rejected": -310.0028991699219, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.413396127681842, | |
| "grad_norm": 1032609.5663318251, | |
| "learning_rate": 3.6483974301146263e-07, | |
| "logits/chosen": -2.409813165664673, | |
| "logits/rejected": -2.279897451400757, | |
| "logps/chosen": -289.708984375, | |
| "logps/rejected": -292.1055908203125, | |
| "loss": 55959.5, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -289.708984375, | |
| "rewards/margins": 2.3966078758239746, | |
| "rewards/rejected": -292.1055908203125, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4186289900575615, | |
| "grad_norm": 1349205.656483844, | |
| "learning_rate": 3.607600562872785e-07, | |
| "logits/chosen": -2.303772211074829, | |
| "logits/rejected": -2.219710111618042, | |
| "logps/chosen": -319.6715087890625, | |
| "logps/rejected": -316.0106506347656, | |
| "loss": 57163.6375, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -319.6715087890625, | |
| "rewards/margins": -3.66082501411438, | |
| "rewards/rejected": -316.0106506347656, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.423861852433281, | |
| "grad_norm": 979251.717327062, | |
| "learning_rate": 3.566433335275558e-07, | |
| "logits/chosen": -2.2218708992004395, | |
| "logits/rejected": -2.146432876586914, | |
| "logps/chosen": -270.113037109375, | |
| "logps/rejected": -288.05926513671875, | |
| "loss": 54550.1687, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -270.113037109375, | |
| "rewards/margins": 17.94621467590332, | |
| "rewards/rejected": -288.05926513671875, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4290947148090005, | |
| "grad_norm": 1332038.5850987951, | |
| "learning_rate": 3.5249095128531856e-07, | |
| "logits/chosen": -2.1922194957733154, | |
| "logits/rejected": -2.0968267917633057, | |
| "logps/chosen": -301.511962890625, | |
| "logps/rejected": -319.2638244628906, | |
| "loss": 55946.6562, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -301.511962890625, | |
| "rewards/margins": 17.751834869384766, | |
| "rewards/rejected": -319.2638244628906, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.43432757718472004, | |
| "grad_norm": 1088873.3097436083, | |
| "learning_rate": 3.4830429803743705e-07, | |
| "logits/chosen": -2.3102076053619385, | |
| "logits/rejected": -2.264838695526123, | |
| "logps/chosen": -313.7403869628906, | |
| "logps/rejected": -312.85125732421875, | |
| "loss": 55392.65, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -313.7403869628906, | |
| "rewards/margins": -0.8891464471817017, | |
| "rewards/rejected": -312.85125732421875, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 1374961.051492005, | |
| "learning_rate": 3.4408477372034736e-07, | |
| "logits/chosen": -2.271077871322632, | |
| "logits/rejected": -2.221766948699951, | |
| "logps/chosen": -293.64752197265625, | |
| "logps/rejected": -334.50390625, | |
| "loss": 56162.7438, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -293.64752197265625, | |
| "rewards/margins": 40.85638427734375, | |
| "rewards/rejected": -334.50390625, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.44479330193615907, | |
| "grad_norm": 1029506.0927493338, | |
| "learning_rate": 3.3983378926194015e-07, | |
| "logits/chosen": -2.24725079536438, | |
| "logits/rejected": -2.1463942527770996, | |
| "logps/chosen": -292.072021484375, | |
| "logps/rejected": -306.45660400390625, | |
| "loss": 55289.5437, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -292.072021484375, | |
| "rewards/margins": 14.384634017944336, | |
| "rewards/rejected": -306.45660400390625, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4500261643118786, | |
| "grad_norm": 1002942.6665806974, | |
| "learning_rate": 3.3555276610977276e-07, | |
| "logits/chosen": -2.2519736289978027, | |
| "logits/rejected": -2.1914682388305664, | |
| "logps/chosen": -308.74169921875, | |
| "logps/rejected": -312.30438232421875, | |
| "loss": 55870.9875, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -308.74169921875, | |
| "rewards/margins": 3.5627059936523438, | |
| "rewards/rejected": -312.30438232421875, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4552590266875981, | |
| "grad_norm": 1018589.2167974291, | |
| "learning_rate": 3.3124313575576487e-07, | |
| "logits/chosen": -2.17337703704834, | |
| "logits/rejected": -2.1850523948669434, | |
| "logps/chosen": -284.9986267089844, | |
| "logps/rejected": -300.66607666015625, | |
| "loss": 54878.6375, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -284.9986267089844, | |
| "rewards/margins": 15.66742992401123, | |
| "rewards/rejected": -300.66607666015625, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4604918890633176, | |
| "grad_norm": 1076240.3043484294, | |
| "learning_rate": 3.269063392575352e-07, | |
| "logits/chosen": -2.107131242752075, | |
| "logits/rejected": -2.0504283905029297, | |
| "logps/chosen": -245.75363159179688, | |
| "logps/rejected": -265.7157897949219, | |
| "loss": 55359.2375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -245.75363159179688, | |
| "rewards/margins": 19.962154388427734, | |
| "rewards/rejected": -265.7157897949219, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.46572475143903713, | |
| "grad_norm": 1259375.5689547102, | |
| "learning_rate": 3.2254382675653905e-07, | |
| "logits/chosen": -2.274196147918701, | |
| "logits/rejected": -2.182969331741333, | |
| "logps/chosen": -341.2582092285156, | |
| "logps/rejected": -347.05010986328125, | |
| "loss": 55359.3875, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -341.2582092285156, | |
| "rewards/margins": 5.791925430297852, | |
| "rewards/rejected": -347.05010986328125, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.47095761381475665, | |
| "grad_norm": 1927449.2632160257, | |
| "learning_rate": 3.1815705699316964e-07, | |
| "logits/chosen": -2.264638662338257, | |
| "logits/rejected": -2.235848903656006, | |
| "logps/chosen": -247.626220703125, | |
| "logps/rejected": -299.621826171875, | |
| "loss": 55009.4375, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -247.626220703125, | |
| "rewards/margins": 51.99560546875, | |
| "rewards/rejected": -299.621826171875, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 2337791.6005255897, | |
| "learning_rate": 3.1374749681898216e-07, | |
| "logits/chosen": -2.189664125442505, | |
| "logits/rejected": -2.1661365032196045, | |
| "logps/chosen": -283.037841796875, | |
| "logps/rejected": -331.63189697265625, | |
| "loss": 56368.575, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -283.037841796875, | |
| "rewards/margins": 48.59403991699219, | |
| "rewards/rejected": -331.63189697265625, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.48142333856619574, | |
| "grad_norm": 1581637.4423084452, | |
| "learning_rate": 3.0931662070620794e-07, | |
| "logits/chosen": -2.2392799854278564, | |
| "logits/rejected": -2.1875014305114746, | |
| "logps/chosen": -283.40338134765625, | |
| "logps/rejected": -318.47039794921875, | |
| "loss": 55090.0, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -283.40338134765625, | |
| "rewards/margins": 35.06700897216797, | |
| "rewards/rejected": -318.47039794921875, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.48665620094191525, | |
| "grad_norm": 1456316.7528858548, | |
| "learning_rate": 3.048659102547186e-07, | |
| "logits/chosen": -2.3513636589050293, | |
| "logits/rejected": -2.2428977489471436, | |
| "logps/chosen": -318.89703369140625, | |
| "logps/rejected": -347.49859619140625, | |
| "loss": 56281.025, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -318.89703369140625, | |
| "rewards/margins": 28.60154151916504, | |
| "rewards/rejected": -347.49859619140625, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.49188906331763477, | |
| "grad_norm": 1026249.5450756603, | |
| "learning_rate": 3.003968536966078e-07, | |
| "logits/chosen": -2.180349826812744, | |
| "logits/rejected": -2.0016205310821533, | |
| "logps/chosen": -281.388916015625, | |
| "logps/rejected": -276.56427001953125, | |
| "loss": 53576.8, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -281.388916015625, | |
| "rewards/margins": -4.824639320373535, | |
| "rewards/rejected": -276.56427001953125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4971219256933543, | |
| "grad_norm": 1429705.0746835866, | |
| "learning_rate": 2.959109453985547e-07, | |
| "logits/chosen": -2.2324633598327637, | |
| "logits/rejected": -2.0949769020080566, | |
| "logps/chosen": -299.1005554199219, | |
| "logps/rejected": -289.7796325683594, | |
| "loss": 55444.925, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -299.1005554199219, | |
| "rewards/margins": -9.320911407470703, | |
| "rewards/rejected": -289.7796325683594, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5023547880690737, | |
| "grad_norm": 1213056.8914210084, | |
| "learning_rate": 2.9140968536213693e-07, | |
| "logits/chosen": -2.1725077629089355, | |
| "logits/rejected": -2.1495959758758545, | |
| "logps/chosen": -259.3185729980469, | |
| "logps/rejected": -283.7967529296875, | |
| "loss": 54958.5125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -259.3185729980469, | |
| "rewards/margins": 24.478168487548828, | |
| "rewards/rejected": -283.7967529296875, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5075876504447933, | |
| "grad_norm": 1461214.8419503546, | |
| "learning_rate": 2.868945787222582e-07, | |
| "logits/chosen": -2.1361522674560547, | |
| "logits/rejected": -2.180379867553711, | |
| "logps/chosen": -234.53329467773438, | |
| "logps/rejected": -269.418701171875, | |
| "loss": 55915.4812, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -234.53329467773438, | |
| "rewards/margins": 34.885379791259766, | |
| "rewards/rejected": -269.418701171875, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 1374060.3676287297, | |
| "learning_rate": 2.823671352438608e-07, | |
| "logits/chosen": -2.101999044418335, | |
| "logits/rejected": -2.050888776779175, | |
| "logps/chosen": -254.61770629882812, | |
| "logps/rejected": -283.10235595703125, | |
| "loss": 55689.6875, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -254.61770629882812, | |
| "rewards/margins": 28.48464012145996, | |
| "rewards/rejected": -283.10235595703125, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5180533751962323, | |
| "grad_norm": 1234397.4379234589, | |
| "learning_rate": 2.7782886881708866e-07, | |
| "logits/chosen": -2.2712063789367676, | |
| "logits/rejected": -2.099457263946533, | |
| "logps/chosen": -310.07879638671875, | |
| "logps/rejected": -374.5906677246094, | |
| "loss": 54732.425, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -310.07879638671875, | |
| "rewards/margins": 64.51188659667969, | |
| "rewards/rejected": -374.5906677246094, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5232862375719518, | |
| "grad_norm": 2228603.377630035, | |
| "learning_rate": 2.73281296951072e-07, | |
| "logits/chosen": -2.017988920211792, | |
| "logits/rejected": -2.0341272354125977, | |
| "logps/chosen": -222.3467559814453, | |
| "logps/rejected": -274.4383239746094, | |
| "loss": 57469.6625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -222.3467559814453, | |
| "rewards/margins": 52.0915641784668, | |
| "rewards/rejected": -274.4383239746094, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5285190999476713, | |
| "grad_norm": 1599169.5252724146, | |
| "learning_rate": 2.6872594026650096e-07, | |
| "logits/chosen": -2.240408420562744, | |
| "logits/rejected": -2.2190628051757812, | |
| "logps/chosen": -270.1956481933594, | |
| "logps/rejected": -335.818359375, | |
| "loss": 53495.1125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -270.1956481933594, | |
| "rewards/margins": 65.6227035522461, | |
| "rewards/rejected": -335.818359375, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.533751962323391, | |
| "grad_norm": 1490279.794358093, | |
| "learning_rate": 2.641643219871597e-07, | |
| "logits/chosen": -2.219712734222412, | |
| "logits/rejected": -2.139911651611328, | |
| "logps/chosen": -288.52215576171875, | |
| "logps/rejected": -317.8120422363281, | |
| "loss": 54654.5563, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -288.52215576171875, | |
| "rewards/margins": 29.289892196655273, | |
| "rewards/rejected": -317.8120422363281, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5389848246991105, | |
| "grad_norm": 935331.0867922652, | |
| "learning_rate": 2.595979674305891e-07, | |
| "logits/chosen": -2.084282398223877, | |
| "logits/rejected": -2.0336263179779053, | |
| "logps/chosen": -237.1022186279297, | |
| "logps/rejected": -258.7450256347656, | |
| "loss": 54242.45, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -237.1022186279297, | |
| "rewards/margins": 21.642807006835938, | |
| "rewards/rejected": -258.7450256347656, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.54421768707483, | |
| "grad_norm": 856340.2472942632, | |
| "learning_rate": 2.550284034980507e-07, | |
| "logits/chosen": -2.1015374660491943, | |
| "logits/rejected": -2.0551133155822754, | |
| "logps/chosen": -279.68505859375, | |
| "logps/rejected": -288.3494567871094, | |
| "loss": 55627.8063, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -279.68505859375, | |
| "rewards/margins": 8.664429664611816, | |
| "rewards/rejected": -288.3494567871094, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 990531.7402526786, | |
| "learning_rate": 2.5045715816395916e-07, | |
| "logits/chosen": -2.2954821586608887, | |
| "logits/rejected": -2.194169521331787, | |
| "logps/chosen": -299.71234130859375, | |
| "logps/rejected": -318.68243408203125, | |
| "loss": 55352.35, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -299.71234130859375, | |
| "rewards/margins": 18.970050811767578, | |
| "rewards/rejected": -318.68243408203125, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.554683411826269, | |
| "grad_norm": 1450335.2512408984, | |
| "learning_rate": 2.4588575996495794e-07, | |
| "logits/chosen": -2.2317874431610107, | |
| "logits/rejected": -2.169450521469116, | |
| "logps/chosen": -276.4530334472656, | |
| "logps/rejected": -311.96551513671875, | |
| "loss": 54057.4375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -276.4530334472656, | |
| "rewards/margins": 35.512451171875, | |
| "rewards/rejected": -311.96551513671875, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5599162742019885, | |
| "grad_norm": 1671087.2529512038, | |
| "learning_rate": 2.413157374888054e-07, | |
| "logits/chosen": -2.2822182178497314, | |
| "logits/rejected": -2.2092044353485107, | |
| "logps/chosen": -297.13531494140625, | |
| "logps/rejected": -293.6783142089844, | |
| "loss": 56565.0062, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -297.13531494140625, | |
| "rewards/margins": -3.457014560699463, | |
| "rewards/rejected": -293.6783142089844, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.565149136577708, | |
| "grad_norm": 1228860.5418419128, | |
| "learning_rate": 2.367486188632446e-07, | |
| "logits/chosen": -2.173696279525757, | |
| "logits/rejected": -2.099151849746704, | |
| "logps/chosen": -266.07257080078125, | |
| "logps/rejected": -315.889892578125, | |
| "loss": 56023.175, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -266.07257080078125, | |
| "rewards/margins": 49.81734085083008, | |
| "rewards/rejected": -315.889892578125, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5703819989534276, | |
| "grad_norm": 1271355.6361364825, | |
| "learning_rate": 2.321859312450267e-07, | |
| "logits/chosen": -2.364675760269165, | |
| "logits/rejected": -2.297121286392212, | |
| "logps/chosen": -312.4518737792969, | |
| "logps/rejected": -373.5928039550781, | |
| "loss": 54175.6875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -312.4518737792969, | |
| "rewards/margins": 61.14093017578125, | |
| "rewards/rejected": -373.5928039550781, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5756148613291471, | |
| "grad_norm": 1398597.9993464884, | |
| "learning_rate": 2.276292003092593e-07, | |
| "logits/chosen": -2.2173264026641846, | |
| "logits/rejected": -2.1705925464630127, | |
| "logps/chosen": -307.5565490722656, | |
| "logps/rejected": -310.6933898925781, | |
| "loss": 54367.9, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -307.5565490722656, | |
| "rewards/margins": 3.136824131011963, | |
| "rewards/rejected": -310.6933898925781, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5808477237048666, | |
| "grad_norm": 1551718.3427722957, | |
| "learning_rate": 2.230799497392495e-07, | |
| "logits/chosen": -2.2841944694519043, | |
| "logits/rejected": -2.225440502166748, | |
| "logps/chosen": -272.79681396484375, | |
| "logps/rejected": -291.16204833984375, | |
| "loss": 56317.2063, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -272.79681396484375, | |
| "rewards/margins": 18.365182876586914, | |
| "rewards/rejected": -291.16204833984375, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 1420409.765508531, | |
| "learning_rate": 2.185397007170141e-07, | |
| "logits/chosen": -2.300354480743408, | |
| "logits/rejected": -2.2717068195343018, | |
| "logps/chosen": -313.4623107910156, | |
| "logps/rejected": -360.75189208984375, | |
| "loss": 55098.0625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -313.4623107910156, | |
| "rewards/margins": 47.28960418701172, | |
| "rewards/rejected": -360.75189208984375, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5913134484563056, | |
| "grad_norm": 1392120.5854896335, | |
| "learning_rate": 2.14009971414625e-07, | |
| "logits/chosen": -2.2033753395080566, | |
| "logits/rejected": -2.1571030616760254, | |
| "logps/chosen": -282.2511291503906, | |
| "logps/rejected": -287.3548889160156, | |
| "loss": 54579.0062, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -282.2511291503906, | |
| "rewards/margins": 5.103717803955078, | |
| "rewards/rejected": -287.3548889160156, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5965463108320251, | |
| "grad_norm": 1488981.6454046704, | |
| "learning_rate": 2.094922764865619e-07, | |
| "logits/chosen": -2.218703031539917, | |
| "logits/rejected": -2.244843006134033, | |
| "logps/chosen": -232.9685821533203, | |
| "logps/rejected": -295.5643615722656, | |
| "loss": 56100.95, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -232.9685821533203, | |
| "rewards/margins": 62.59580612182617, | |
| "rewards/rejected": -295.5643615722656, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6017791732077447, | |
| "grad_norm": 1135626.4886801469, | |
| "learning_rate": 2.0498812656324064e-07, | |
| "logits/chosen": -2.142216205596924, | |
| "logits/rejected": -2.1622607707977295, | |
| "logps/chosen": -289.1842041015625, | |
| "logps/rejected": -325.3665466308594, | |
| "loss": 54899.825, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -289.1842041015625, | |
| "rewards/margins": 36.182373046875, | |
| "rewards/rejected": -325.3665466308594, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6070120355834642, | |
| "grad_norm": 1329372.6719742662, | |
| "learning_rate": 2.0049902774588797e-07, | |
| "logits/chosen": -2.207730770111084, | |
| "logits/rejected": -2.0855050086975098, | |
| "logps/chosen": -299.4252014160156, | |
| "logps/rejected": -332.8768615722656, | |
| "loss": 55401.0125, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -299.4252014160156, | |
| "rewards/margins": 33.4516487121582, | |
| "rewards/rejected": -332.8768615722656, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 1187559.6151840126, | |
| "learning_rate": 1.960264811029297e-07, | |
| "logits/chosen": -2.22457218170166, | |
| "logits/rejected": -2.148383617401123, | |
| "logps/chosen": -281.52923583984375, | |
| "logps/rejected": -282.35784912109375, | |
| "loss": 56603.25, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -281.52923583984375, | |
| "rewards/margins": 0.828582763671875, | |
| "rewards/rejected": -282.35784912109375, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6174777603349032, | |
| "grad_norm": 1671629.5147047387, | |
| "learning_rate": 1.9157198216806238e-07, | |
| "logits/chosen": -2.209186315536499, | |
| "logits/rejected": -2.1159491539001465, | |
| "logps/chosen": -251.740966796875, | |
| "logps/rejected": -269.68011474609375, | |
| "loss": 55453.7562, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -251.740966796875, | |
| "rewards/margins": 17.939146041870117, | |
| "rewards/rejected": -269.68011474609375, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 1173783.4292100056, | |
| "learning_rate": 1.8713702044017577e-07, | |
| "logits/chosen": -2.1656856536865234, | |
| "logits/rejected": -2.1623783111572266, | |
| "logps/chosen": -301.41497802734375, | |
| "logps/rejected": -317.84295654296875, | |
| "loss": 54113.325, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -301.41497802734375, | |
| "rewards/margins": 16.427982330322266, | |
| "rewards/rejected": -317.84295654296875, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6279434850863422, | |
| "grad_norm": 1387972.3011875993, | |
| "learning_rate": 1.8272307888529274e-07, | |
| "logits/chosen": -2.1883492469787598, | |
| "logits/rejected": -2.1378281116485596, | |
| "logps/chosen": -257.42822265625, | |
| "logps/rejected": -320.2197265625, | |
| "loss": 55090.8625, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -257.42822265625, | |
| "rewards/margins": 62.7915153503418, | |
| "rewards/rejected": -320.2197265625, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6331763474620618, | |
| "grad_norm": 1488821.1810637303, | |
| "learning_rate": 1.783316334406939e-07, | |
| "logits/chosen": -2.185284376144409, | |
| "logits/rejected": -2.0930609703063965, | |
| "logps/chosen": -322.49005126953125, | |
| "logps/rejected": -319.7123718261719, | |
| "loss": 54071.0125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -322.49005126953125, | |
| "rewards/margins": -2.777683973312378, | |
| "rewards/rejected": -319.7123718261719, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6384092098377813, | |
| "grad_norm": 1496837.8340915893, | |
| "learning_rate": 1.7396415252139288e-07, | |
| "logits/chosen": -2.2097795009613037, | |
| "logits/rejected": -2.0639331340789795, | |
| "logps/chosen": -308.24530029296875, | |
| "logps/rejected": -331.1815490722656, | |
| "loss": 54010.9875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -308.24530029296875, | |
| "rewards/margins": 22.936208724975586, | |
| "rewards/rejected": -331.1815490722656, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6436420722135008, | |
| "grad_norm": 1535540.9500706908, | |
| "learning_rate": 1.6962209652912625e-07, | |
| "logits/chosen": -2.1692049503326416, | |
| "logits/rejected": -2.077504873275757, | |
| "logps/chosen": -255.7120361328125, | |
| "logps/rejected": -309.77008056640625, | |
| "loss": 54530.4875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -255.7120361328125, | |
| "rewards/margins": 54.058021545410156, | |
| "rewards/rejected": -309.77008056640625, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6488749345892203, | |
| "grad_norm": 1397345.2747377793, | |
| "learning_rate": 1.6530691736402316e-07, | |
| "logits/chosen": -2.1868765354156494, | |
| "logits/rejected": -2.1478359699249268, | |
| "logps/chosen": -292.8278503417969, | |
| "logps/rejected": -312.1719055175781, | |
| "loss": 54489.7375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -292.8278503417969, | |
| "rewards/margins": 19.344045639038086, | |
| "rewards/rejected": -312.1719055175781, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6541077969649398, | |
| "grad_norm": 1977409.2998021427, | |
| "learning_rate": 1.610200579391182e-07, | |
| "logits/chosen": -2.1679329872131348, | |
| "logits/rejected": -2.1316826343536377, | |
| "logps/chosen": -283.0874938964844, | |
| "logps/rejected": -364.5801696777344, | |
| "loss": 55410.75, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -283.0874938964844, | |
| "rewards/margins": 81.49267578125, | |
| "rewards/rejected": -364.5801696777344, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 1362818.5877687463, | |
| "learning_rate": 1.5676295169786864e-07, | |
| "logits/chosen": -2.0093648433685303, | |
| "logits/rejected": -1.9298946857452393, | |
| "logps/chosen": -282.3995056152344, | |
| "logps/rejected": -278.3210754394531, | |
| "loss": 54493.85, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -282.3995056152344, | |
| "rewards/margins": -4.078440189361572, | |
| "rewards/rejected": -278.3210754394531, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6645735217163788, | |
| "grad_norm": 956804.5377818815, | |
| "learning_rate": 1.5253702213483842e-07, | |
| "logits/chosen": -2.1643216609954834, | |
| "logits/rejected": -2.119776964187622, | |
| "logps/chosen": -271.3257751464844, | |
| "logps/rejected": -303.90423583984375, | |
| "loss": 54765.8125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -271.3257751464844, | |
| "rewards/margins": 32.57844924926758, | |
| "rewards/rejected": -303.90423583984375, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6698063840920984, | |
| "grad_norm": 1933509.9856251064, | |
| "learning_rate": 1.483436823197092e-07, | |
| "logits/chosen": -2.093644857406616, | |
| "logits/rejected": -2.10066556930542, | |
| "logps/chosen": -269.1563415527344, | |
| "logps/rejected": -319.5292663574219, | |
| "loss": 54325.475, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -269.1563415527344, | |
| "rewards/margins": 50.37293243408203, | |
| "rewards/rejected": -319.5292663574219, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6750392464678179, | |
| "grad_norm": 1218847.4753339728, | |
| "learning_rate": 1.4418433442477703e-07, | |
| "logits/chosen": -2.216813087463379, | |
| "logits/rejected": -2.1345386505126953, | |
| "logps/chosen": -338.1468505859375, | |
| "logps/rejected": -352.8824768066406, | |
| "loss": 53920.6188, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -338.1468505859375, | |
| "rewards/margins": 14.735623359680176, | |
| "rewards/rejected": -352.8824768066406, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6802721088435374, | |
| "grad_norm": 1418811.7836556053, | |
| "learning_rate": 1.4006036925609243e-07, | |
| "logits/chosen": -2.139899492263794, | |
| "logits/rejected": -2.0506820678710938, | |
| "logps/chosen": -257.8123779296875, | |
| "logps/rejected": -283.587890625, | |
| "loss": 55958.4187, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -257.8123779296875, | |
| "rewards/margins": 25.775487899780273, | |
| "rewards/rejected": -283.587890625, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6855049712192569, | |
| "grad_norm": 1109912.054173663, | |
| "learning_rate": 1.3597316578840216e-07, | |
| "logits/chosen": -2.0801479816436768, | |
| "logits/rejected": -2.0766029357910156, | |
| "logps/chosen": -256.91619873046875, | |
| "logps/rejected": -276.5906677246094, | |
| "loss": 54215.7375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -256.91619873046875, | |
| "rewards/margins": 19.674455642700195, | |
| "rewards/rejected": -276.5906677246094, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6907378335949764, | |
| "grad_norm": 1210210.3382933068, | |
| "learning_rate": 1.319240907040458e-07, | |
| "logits/chosen": -2.245999574661255, | |
| "logits/rejected": -2.1108059883117676, | |
| "logps/chosen": -321.09796142578125, | |
| "logps/rejected": -322.8074645996094, | |
| "loss": 55360.3, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -321.09796142578125, | |
| "rewards/margins": 1.7095245122909546, | |
| "rewards/rejected": -322.8074645996094, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 1098340.112919491, | |
| "learning_rate": 1.279144979359641e-07, | |
| "logits/chosen": -2.1789064407348633, | |
| "logits/rejected": -2.157804489135742, | |
| "logps/chosen": -267.2674865722656, | |
| "logps/rejected": -305.3499450683594, | |
| "loss": 55846.8812, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -267.2674865722656, | |
| "rewards/margins": 38.08247756958008, | |
| "rewards/rejected": -305.3499450683594, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7012035583464155, | |
| "grad_norm": 1447767.6648965469, | |
| "learning_rate": 1.2394572821496948e-07, | |
| "logits/chosen": -2.2281277179718018, | |
| "logits/rejected": -2.21685791015625, | |
| "logps/chosen": -273.71417236328125, | |
| "logps/rejected": -327.2356262207031, | |
| "loss": 54601.5563, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -273.71417236328125, | |
| "rewards/margins": 53.521484375, | |
| "rewards/rejected": -327.2356262207031, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.706436420722135, | |
| "grad_norm": 1277662.338967538, | |
| "learning_rate": 1.2001910862143174e-07, | |
| "logits/chosen": -2.2508022785186768, | |
| "logits/rejected": -2.217378854751587, | |
| "logps/chosen": -325.85906982421875, | |
| "logps/rejected": -380.45074462890625, | |
| "loss": 55330.475, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -325.85906982421875, | |
| "rewards/margins": 54.59168243408203, | |
| "rewards/rejected": -380.45074462890625, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7116692830978545, | |
| "grad_norm": 1785233.744803184, | |
| "learning_rate": 1.1613595214152711e-07, | |
| "logits/chosen": -2.2163052558898926, | |
| "logits/rejected": -2.1031951904296875, | |
| "logps/chosen": -284.403076171875, | |
| "logps/rejected": -271.61138916015625, | |
| "loss": 54460.6625, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -284.403076171875, | |
| "rewards/margins": -12.791729927062988, | |
| "rewards/rejected": -271.61138916015625, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.716902145473574, | |
| "grad_norm": 1084153.5773127347, | |
| "learning_rate": 1.122975572282018e-07, | |
| "logits/chosen": -2.19317364692688, | |
| "logits/rejected": -2.1025004386901855, | |
| "logps/chosen": -290.7996520996094, | |
| "logps/rejected": -270.1470947265625, | |
| "loss": 54599.6188, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -290.7996520996094, | |
| "rewards/margins": -20.65255355834961, | |
| "rewards/rejected": -270.1470947265625, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7221350078492935, | |
| "grad_norm": 1089542.9473462715, | |
| "learning_rate": 1.0850520736699362e-07, | |
| "logits/chosen": -2.144193172454834, | |
| "logits/rejected": -2.0945630073547363, | |
| "logps/chosen": -264.43109130859375, | |
| "logps/rejected": -340.2378845214844, | |
| "loss": 54947.6625, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -264.43109130859375, | |
| "rewards/margins": 75.80680084228516, | |
| "rewards/rejected": -340.2378845214844, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.727367870225013, | |
| "grad_norm": 1178567.4912604708, | |
| "learning_rate": 1.0476017064685941e-07, | |
| "logits/chosen": -2.2328460216522217, | |
| "logits/rejected": -2.1399552822113037, | |
| "logps/chosen": -284.4504089355469, | |
| "logps/rejected": -293.85321044921875, | |
| "loss": 55292.35, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -284.4504089355469, | |
| "rewards/margins": 9.402796745300293, | |
| "rewards/rejected": -293.85321044921875, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 1333559.7423557746, | |
| "learning_rate": 1.0106369933615042e-07, | |
| "logits/chosen": -2.011481761932373, | |
| "logits/rejected": -1.935136079788208, | |
| "logps/chosen": -258.0648193359375, | |
| "logps/rejected": -269.0512390136719, | |
| "loss": 56453.9, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -258.0648193359375, | |
| "rewards/margins": 10.98639965057373, | |
| "rewards/rejected": -269.0512390136719, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7378335949764521, | |
| "grad_norm": 1081251.2157163108, | |
| "learning_rate": 9.741702946387748e-08, | |
| "logits/chosen": -2.1545426845550537, | |
| "logits/rejected": -2.0765717029571533, | |
| "logps/chosen": -247.3363494873047, | |
| "logps/rejected": -301.45672607421875, | |
| "loss": 54404.8, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -247.3363494873047, | |
| "rewards/margins": 54.120391845703125, | |
| "rewards/rejected": -301.45672607421875, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7430664573521716, | |
| "grad_norm": 1104364.5468847684, | |
| "learning_rate": 9.382138040640714e-08, | |
| "logits/chosen": -1.989871621131897, | |
| "logits/rejected": -1.9418586492538452, | |
| "logps/chosen": -244.57852172851562, | |
| "logps/rejected": -289.2986755371094, | |
| "loss": 54110.525, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -244.57852172851562, | |
| "rewards/margins": 44.72013854980469, | |
| "rewards/rejected": -289.2986755371094, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7482993197278912, | |
| "grad_norm": 1104089.7558876271, | |
| "learning_rate": 9.027795447972545e-08, | |
| "logits/chosen": -2.2300283908843994, | |
| "logits/rejected": -2.1951324939727783, | |
| "logps/chosen": -286.88922119140625, | |
| "logps/rejected": -345.98822021484375, | |
| "loss": 52983.1375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -286.88922119140625, | |
| "rewards/margins": 59.0989990234375, | |
| "rewards/rejected": -345.98822021484375, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7535321821036107, | |
| "grad_norm": 1208966.7828290404, | |
| "learning_rate": 8.678793653740632e-08, | |
| "logits/chosen": -2.19745135307312, | |
| "logits/rejected": -2.0950427055358887, | |
| "logps/chosen": -259.8890686035156, | |
| "logps/rejected": -310.04876708984375, | |
| "loss": 55099.525, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -259.8890686035156, | |
| "rewards/margins": 50.15970993041992, | |
| "rewards/rejected": -310.04876708984375, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7587650444793302, | |
| "grad_norm": 1272614.4979089308, | |
| "learning_rate": 8.335249357441945e-08, | |
| "logits/chosen": -2.041647434234619, | |
| "logits/rejected": -2.0392508506774902, | |
| "logps/chosen": -260.08172607421875, | |
| "logps/rejected": -329.94854736328125, | |
| "loss": 54837.7125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -260.08172607421875, | |
| "rewards/margins": 69.86690521240234, | |
| "rewards/rejected": -329.94854736328125, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7639979068550498, | |
| "grad_norm": 1162448.1540473108, | |
| "learning_rate": 7.997277433690983e-08, | |
| "logits/chosen": -2.1625466346740723, | |
| "logits/rejected": -2.0773284435272217, | |
| "logps/chosen": -268.3184814453125, | |
| "logps/rejected": -292.38433837890625, | |
| "loss": 55808.2125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -268.3184814453125, | |
| "rewards/margins": 24.065847396850586, | |
| "rewards/rejected": -292.38433837890625, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1243184.3713818155, | |
| "learning_rate": 7.664990893807885e-08, | |
| "logits/chosen": -2.1861138343811035, | |
| "logits/rejected": -2.1057441234588623, | |
| "logps/chosen": -248.58114624023438, | |
| "logps/rejected": -316.4317626953125, | |
| "loss": 54297.5375, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -248.58114624023438, | |
| "rewards/margins": 67.85064697265625, | |
| "rewards/rejected": -316.4317626953125, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7744636316064888, | |
| "grad_norm": 1338047.2392976265, | |
| "learning_rate": 7.338500848029602e-08, | |
| "logits/chosen": -2.1806750297546387, | |
| "logits/rejected": -2.1461918354034424, | |
| "logps/chosen": -285.19451904296875, | |
| "logps/rejected": -319.1790466308594, | |
| "loss": 55123.75, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -285.19451904296875, | |
| "rewards/margins": 33.984554290771484, | |
| "rewards/rejected": -319.1790466308594, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7796964939822083, | |
| "grad_norm": 1984510.6026826864, | |
| "learning_rate": 7.01791646835681e-08, | |
| "logits/chosen": -2.2138607501983643, | |
| "logits/rejected": -2.1573081016540527, | |
| "logps/chosen": -270.462890625, | |
| "logps/rejected": -285.9214172363281, | |
| "loss": 54790.0, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -270.462890625, | |
| "rewards/margins": 15.458574295043945, | |
| "rewards/rejected": -285.9214172363281, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7849293563579278, | |
| "grad_norm": 1378850.8751623577, | |
| "learning_rate": 6.70334495204884e-08, | |
| "logits/chosen": -2.117934465408325, | |
| "logits/rejected": -2.0909981727600098, | |
| "logps/chosen": -254.19442749023438, | |
| "logps/rejected": -307.30255126953125, | |
| "loss": 54093.9875, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -254.19442749023438, | |
| "rewards/margins": 53.108154296875, | |
| "rewards/rejected": -307.30255126953125, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7901622187336473, | |
| "grad_norm": 1370111.0134525597, | |
| "learning_rate": 6.394891485779022e-08, | |
| "logits/chosen": -2.266648292541504, | |
| "logits/rejected": -2.2330288887023926, | |
| "logps/chosen": -290.75335693359375, | |
| "logps/rejected": -312.68597412109375, | |
| "loss": 54021.125, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -290.75335693359375, | |
| "rewards/margins": 21.932575225830078, | |
| "rewards/rejected": -312.68597412109375, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7953950811093669, | |
| "grad_norm": 1572823.5723971077, | |
| "learning_rate": 6.092659210462231e-08, | |
| "logits/chosen": -2.1503944396972656, | |
| "logits/rejected": -2.113105297088623, | |
| "logps/chosen": -281.13037109375, | |
| "logps/rejected": -321.85693359375, | |
| "loss": 54900.25, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -281.13037109375, | |
| "rewards/margins": 40.7265510559082, | |
| "rewards/rejected": -321.85693359375, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8006279434850864, | |
| "grad_norm": 1225741.5170516171, | |
| "learning_rate": 5.7967491867665975e-08, | |
| "logits/chosen": -2.0941481590270996, | |
| "logits/rejected": -2.064021348953247, | |
| "logps/chosen": -251.21670532226562, | |
| "logps/rejected": -310.03631591796875, | |
| "loss": 54873.5938, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -251.21670532226562, | |
| "rewards/margins": 58.8195915222168, | |
| "rewards/rejected": -310.03631591796875, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 1642379.1878661881, | |
| "learning_rate": 5.507260361320737e-08, | |
| "logits/chosen": -2.1802749633789062, | |
| "logits/rejected": -2.097052812576294, | |
| "logps/chosen": -280.42254638671875, | |
| "logps/rejected": -292.81768798828125, | |
| "loss": 54552.0125, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -280.42254638671875, | |
| "rewards/margins": 12.39512825012207, | |
| "rewards/rejected": -292.81768798828125, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8110936682365254, | |
| "grad_norm": 1328549.6940408363, | |
| "learning_rate": 5.2242895336278734e-08, | |
| "logits/chosen": -2.2298295497894287, | |
| "logits/rejected": -2.1420650482177734, | |
| "logps/chosen": -275.43951416015625, | |
| "logps/rejected": -293.7701110839844, | |
| "loss": 54556.4625, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -275.43951416015625, | |
| "rewards/margins": 18.330612182617188, | |
| "rewards/rejected": -293.7701110839844, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 1403447.9375936964, | |
| "learning_rate": 4.947931323697982e-08, | |
| "logits/chosen": -2.1510796546936035, | |
| "logits/rejected": -2.070650339126587, | |
| "logps/chosen": -281.1759338378906, | |
| "logps/rejected": -301.5060119628906, | |
| "loss": 53222.4187, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -281.1759338378906, | |
| "rewards/margins": 20.330089569091797, | |
| "rewards/rejected": -301.5060119628906, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8215593929879644, | |
| "grad_norm": 1248429.4711292263, | |
| "learning_rate": 4.678278140408667e-08, | |
| "logits/chosen": -2.2055792808532715, | |
| "logits/rejected": -2.0527145862579346, | |
| "logps/chosen": -284.21142578125, | |
| "logps/rejected": -297.88018798828125, | |
| "loss": 51932.0875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -284.21142578125, | |
| "rewards/margins": 13.668767929077148, | |
| "rewards/rejected": -297.88018798828125, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.826792255363684, | |
| "grad_norm": 1250406.1121283756, | |
| "learning_rate": 4.415420150605398e-08, | |
| "logits/chosen": -2.110973596572876, | |
| "logits/rejected": -1.9595458507537842, | |
| "logps/chosen": -252.9059295654297, | |
| "logps/rejected": -279.2762451171875, | |
| "loss": 55843.9812, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -252.9059295654297, | |
| "rewards/margins": 26.370315551757812, | |
| "rewards/rejected": -279.2762451171875, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8320251177394035, | |
| "grad_norm": 1200316.971470453, | |
| "learning_rate": 4.159445248951457e-08, | |
| "logits/chosen": -2.0804190635681152, | |
| "logits/rejected": -2.0888171195983887, | |
| "logps/chosen": -227.65390014648438, | |
| "logps/rejected": -293.1388244628906, | |
| "loss": 54166.2125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -227.65390014648438, | |
| "rewards/margins": 65.48490905761719, | |
| "rewards/rejected": -293.1388244628906, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.837257980115123, | |
| "grad_norm": 1453921.71532858, | |
| "learning_rate": 3.9104390285376374e-08, | |
| "logits/chosen": -2.275310754776001, | |
| "logits/rejected": -2.17592191696167, | |
| "logps/chosen": -284.0006103515625, | |
| "logps/rejected": -263.94525146484375, | |
| "loss": 55792.875, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -284.0006103515625, | |
| "rewards/margins": -20.05536460876465, | |
| "rewards/rejected": -263.94525146484375, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 1115550.7841994467, | |
| "learning_rate": 3.6684847522615664e-08, | |
| "logits/chosen": -2.1132473945617676, | |
| "logits/rejected": -2.0296568870544434, | |
| "logps/chosen": -242.7162628173828, | |
| "logps/rejected": -279.27545166015625, | |
| "loss": 55248.8063, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -242.7162628173828, | |
| "rewards/margins": 36.55915069580078, | |
| "rewards/rejected": -279.27545166015625, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.847723704866562, | |
| "grad_norm": 1588544.8496029316, | |
| "learning_rate": 3.433663324986208e-08, | |
| "logits/chosen": -2.1658711433410645, | |
| "logits/rejected": -2.0674452781677246, | |
| "logps/chosen": -296.5272216796875, | |
| "logps/rejected": -326.5904541015625, | |
| "loss": 55337.175, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -296.5272216796875, | |
| "rewards/margins": 30.063217163085938, | |
| "rewards/rejected": -326.5904541015625, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8529565672422815, | |
| "grad_norm": 1954751.458337351, | |
| "learning_rate": 3.206053266486808e-08, | |
| "logits/chosen": -2.254883289337158, | |
| "logits/rejected": -2.1984355449676514, | |
| "logps/chosen": -274.1257629394531, | |
| "logps/rejected": -293.55303955078125, | |
| "loss": 54866.6188, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -274.1257629394531, | |
| "rewards/margins": 19.427263259887695, | |
| "rewards/rejected": -293.55303955078125, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.858189429618001, | |
| "grad_norm": 1259920.9995805293, | |
| "learning_rate": 2.9857306851953897e-08, | |
| "logits/chosen": -2.12813663482666, | |
| "logits/rejected": -2.065500259399414, | |
| "logps/chosen": -282.5124206542969, | |
| "logps/rejected": -329.7523498535156, | |
| "loss": 54957.6875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -282.5124206542969, | |
| "rewards/margins": 47.23994064331055, | |
| "rewards/rejected": -329.7523498535156, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8634222919937206, | |
| "grad_norm": 2932710.1060309387, | |
| "learning_rate": 2.772769252751575e-08, | |
| "logits/chosen": -2.2625370025634766, | |
| "logits/rejected": -2.1728615760803223, | |
| "logps/chosen": -326.66375732421875, | |
| "logps/rejected": -282.999755859375, | |
| "loss": 55274.6625, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -326.66375732421875, | |
| "rewards/margins": -43.66400909423828, | |
| "rewards/rejected": -282.999755859375, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8686551543694401, | |
| "grad_norm": 1506078.4494627095, | |
| "learning_rate": 2.567240179368185e-08, | |
| "logits/chosen": -2.1724421977996826, | |
| "logits/rejected": -2.121241569519043, | |
| "logps/chosen": -305.38079833984375, | |
| "logps/rejected": -287.86627197265625, | |
| "loss": 53377.5625, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -305.38079833984375, | |
| "rewards/margins": -17.514530181884766, | |
| "rewards/rejected": -287.86627197265625, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8738880167451596, | |
| "grad_norm": 1304314.0364927459, | |
| "learning_rate": 2.3692121900199174e-08, | |
| "logits/chosen": -2.153219699859619, | |
| "logits/rejected": -2.0992071628570557, | |
| "logps/chosen": -261.697998046875, | |
| "logps/rejected": -283.06072998046875, | |
| "loss": 54374.4625, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -261.697998046875, | |
| "rewards/margins": 21.36276626586914, | |
| "rewards/rejected": -283.06072998046875, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 1648439.4660647989, | |
| "learning_rate": 2.1787515014630357e-08, | |
| "logits/chosen": -2.146265983581543, | |
| "logits/rejected": -2.111722946166992, | |
| "logps/chosen": -265.7535705566406, | |
| "logps/rejected": -268.1636962890625, | |
| "loss": 55597.7875, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -265.7535705566406, | |
| "rewards/margins": 2.4101357460021973, | |
| "rewards/rejected": -268.1636962890625, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8843537414965986, | |
| "grad_norm": 1734398.4767520986, | |
| "learning_rate": 1.995921800093761e-08, | |
| "logits/chosen": -2.073884963989258, | |
| "logits/rejected": -1.9895031452178955, | |
| "logps/chosen": -282.88983154296875, | |
| "logps/rejected": -306.662353515625, | |
| "loss": 53997.5125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -282.88983154296875, | |
| "rewards/margins": 23.772525787353516, | |
| "rewards/rejected": -306.662353515625, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8895866038723181, | |
| "grad_norm": 1454626.9788120938, | |
| "learning_rate": 1.820784220652766e-08, | |
| "logits/chosen": -2.1386914253234863, | |
| "logits/rejected": -2.0203399658203125, | |
| "logps/chosen": -289.72161865234375, | |
| "logps/rejected": -275.7218017578125, | |
| "loss": 55009.9875, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -289.72161865234375, | |
| "rewards/margins": -13.99982738494873, | |
| "rewards/rejected": -275.7218017578125, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8948194662480377, | |
| "grad_norm": 1089368.648801681, | |
| "learning_rate": 1.6533973257828765e-08, | |
| "logits/chosen": -2.091768980026245, | |
| "logits/rejected": -2.0091001987457275, | |
| "logps/chosen": -287.00640869140625, | |
| "logps/rejected": -331.1282958984375, | |
| "loss": 54365.375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -287.00640869140625, | |
| "rewards/margins": 44.12189483642578, | |
| "rewards/rejected": -331.1282958984375, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9000523286237572, | |
| "grad_norm": 1510934.5115232496, | |
| "learning_rate": 1.4938170864468636e-08, | |
| "logits/chosen": -2.1866893768310547, | |
| "logits/rejected": -2.085561513900757, | |
| "logps/chosen": -258.2256774902344, | |
| "logps/rejected": -292.9275817871094, | |
| "loss": 54320.6625, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -258.2256774902344, | |
| "rewards/margins": 34.7019157409668, | |
| "rewards/rejected": -292.9275817871094, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9052851909994767, | |
| "grad_norm": 1724641.859318044, | |
| "learning_rate": 1.342096863211828e-08, | |
| "logits/chosen": -2.1254117488861084, | |
| "logits/rejected": -2.0715444087982178, | |
| "logps/chosen": -281.90814208984375, | |
| "logps/rejected": -320.0205078125, | |
| "loss": 56361.75, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -281.90814208984375, | |
| "rewards/margins": 38.11237335205078, | |
| "rewards/rejected": -320.0205078125, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9105180533751962, | |
| "grad_norm": 1063487.5975205353, | |
| "learning_rate": 1.1982873884064465e-08, | |
| "logits/chosen": -1.9770715236663818, | |
| "logits/rejected": -2.01908540725708, | |
| "logps/chosen": -227.65396118164062, | |
| "logps/rejected": -317.01251220703125, | |
| "loss": 55278.3875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -227.65396118164062, | |
| "rewards/margins": 89.35859680175781, | |
| "rewards/rejected": -317.01251220703125, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 1565693.148460451, | |
| "learning_rate": 1.062436749157053e-08, | |
| "logits/chosen": -2.1096649169921875, | |
| "logits/rejected": -2.111191749572754, | |
| "logps/chosen": -293.599609375, | |
| "logps/rejected": -321.7491760253906, | |
| "loss": 54704.9375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -293.599609375, | |
| "rewards/margins": 28.14957046508789, | |
| "rewards/rejected": -321.7491760253906, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9209837781266352, | |
| "grad_norm": 1036263.9285741834, | |
| "learning_rate": 9.345903713082304e-09, | |
| "logits/chosen": -2.1749892234802246, | |
| "logits/rejected": -2.0691840648651123, | |
| "logps/chosen": -331.82086181640625, | |
| "logps/rejected": -299.9912414550781, | |
| "loss": 53077.875, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -331.82086181640625, | |
| "rewards/margins": -31.82961082458496, | |
| "rewards/rejected": -299.9912414550781, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9262166405023547, | |
| "grad_norm": 1469306.753540594, | |
| "learning_rate": 8.147910042332922e-09, | |
| "logits/chosen": -2.1455626487731934, | |
| "logits/rejected": -2.0270955562591553, | |
| "logps/chosen": -334.5442810058594, | |
| "logps/rejected": -350.59002685546875, | |
| "loss": 55319.25, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -334.5442810058594, | |
| "rewards/margins": 16.04566764831543, | |
| "rewards/rejected": -350.59002685546875, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9314495028780743, | |
| "grad_norm": 1665409.940510744, | |
| "learning_rate": 7.030787065396865e-09, | |
| "logits/chosen": -2.038339614868164, | |
| "logits/rejected": -1.9863135814666748, | |
| "logps/chosen": -280.74298095703125, | |
| "logps/rejected": -290.1654052734375, | |
| "loss": 54026.875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -280.74298095703125, | |
| "rewards/margins": 9.422399520874023, | |
| "rewards/rejected": -290.1654052734375, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9366823652537938, | |
| "grad_norm": 1264428.2705349482, | |
| "learning_rate": 5.994908326741876e-09, | |
| "logits/chosen": -2.1871466636657715, | |
| "logits/rejected": -2.144632339477539, | |
| "logps/chosen": -302.3477478027344, | |
| "logps/rejected": -335.5939636230469, | |
| "loss": 54326.7562, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -302.3477478027344, | |
| "rewards/margins": 33.246219635009766, | |
| "rewards/rejected": -335.5939636230469, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9419152276295133, | |
| "grad_norm": 1732479.872330989, | |
| "learning_rate": 5.04062020432286e-09, | |
| "logits/chosen": -2.223008632659912, | |
| "logits/rejected": -2.123403787612915, | |
| "logps/chosen": -267.91107177734375, | |
| "logps/rejected": -292.2001953125, | |
| "loss": 53162.075, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -267.91107177734375, | |
| "rewards/margins": 24.28915023803711, | |
| "rewards/rejected": -292.2001953125, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9471480900052328, | |
| "grad_norm": 1668574.1463273366, | |
| "learning_rate": 4.168241793759658e-09, | |
| "logits/chosen": -2.1200461387634277, | |
| "logits/rejected": -2.0498270988464355, | |
| "logps/chosen": -266.21112060546875, | |
| "logps/rejected": -335.3847351074219, | |
| "loss": 52995.9688, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -266.21112060546875, | |
| "rewards/margins": 69.17359924316406, | |
| "rewards/rejected": -335.3847351074219, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1455951.9893622866, | |
| "learning_rate": 3.3780648016376866e-09, | |
| "logits/chosen": -2.221703052520752, | |
| "logits/rejected": -2.0837242603302, | |
| "logps/chosen": -328.39630126953125, | |
| "logps/rejected": -332.1032409667969, | |
| "loss": 55753.5, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -328.39630126953125, | |
| "rewards/margins": 3.7069344520568848, | |
| "rewards/rejected": -332.1032409667969, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.957613814756672, | |
| "grad_norm": 1397349.994078792, | |
| "learning_rate": 2.6703534479667887e-09, | |
| "logits/chosen": -2.1655023097991943, | |
| "logits/rejected": -2.0703787803649902, | |
| "logps/chosen": -253.6987762451172, | |
| "logps/rejected": -273.0363464355469, | |
| "loss": 53243.575, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -253.6987762451172, | |
| "rewards/margins": 19.337589263916016, | |
| "rewards/rejected": -273.0363464355469, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9628466771323915, | |
| "grad_norm": 1342408.6426420235, | |
| "learning_rate": 2.0453443778310766e-09, | |
| "logits/chosen": -2.0957493782043457, | |
| "logits/rejected": -2.029906988143921, | |
| "logps/chosen": -270.45806884765625, | |
| "logps/rejected": -297.3926086425781, | |
| "loss": 54182.1375, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -270.45806884765625, | |
| "rewards/margins": 26.934490203857422, | |
| "rewards/rejected": -297.3926086425781, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.968079539508111, | |
| "grad_norm": 1458294.4502452172, | |
| "learning_rate": 1.5032465822596153e-09, | |
| "logits/chosen": -2.1939797401428223, | |
| "logits/rejected": -2.1166329383850098, | |
| "logps/chosen": -300.76947021484375, | |
| "logps/rejected": -320.9613952636719, | |
| "loss": 54235.6937, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -300.76947021484375, | |
| "rewards/margins": 20.191925048828125, | |
| "rewards/rejected": -320.9613952636719, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9733124018838305, | |
| "grad_norm": 2290841.929562142, | |
| "learning_rate": 1.0442413283435758e-09, | |
| "logits/chosen": -2.114621639251709, | |
| "logits/rejected": -2.098475217819214, | |
| "logps/chosen": -277.58563232421875, | |
| "logps/rejected": -333.00006103515625, | |
| "loss": 53597.825, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -277.58563232421875, | |
| "rewards/margins": 55.41447830200195, | |
| "rewards/rejected": -333.00006103515625, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.97854526425955, | |
| "grad_norm": 2365466.4829686345, | |
| "learning_rate": 6.684820986240513e-10, | |
| "logits/chosen": -2.1461949348449707, | |
| "logits/rejected": -2.1061387062072754, | |
| "logps/chosen": -285.5892333984375, | |
| "logps/rejected": -329.62567138671875, | |
| "loss": 55886.8125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -285.5892333984375, | |
| "rewards/margins": 44.03642272949219, | |
| "rewards/rejected": -329.62567138671875, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.9837781266352695, | |
| "grad_norm": 1714580.7073031003, | |
| "learning_rate": 3.760945397705828e-10, | |
| "logits/chosen": -2.290830135345459, | |
| "logits/rejected": -2.2668721675872803, | |
| "logps/chosen": -314.2235107421875, | |
| "logps/rejected": -362.34698486328125, | |
| "loss": 54598.4375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -314.2235107421875, | |
| "rewards/margins": 48.12348556518555, | |
| "rewards/rejected": -362.34698486328125, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 1433494.2103824487, | |
| "learning_rate": 1.6717642056721104e-10, | |
| "logits/chosen": -2.0160892009735107, | |
| "logits/rejected": -2.0129268169403076, | |
| "logps/chosen": -284.138916015625, | |
| "logps/rejected": -306.3015441894531, | |
| "loss": 54053.5687, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -284.138916015625, | |
| "rewards/margins": 22.162614822387695, | |
| "rewards/rejected": -306.3015441894531, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.9942438513867086, | |
| "grad_norm": 1191159.0388659274, | |
| "learning_rate": 4.17975992204056e-11, | |
| "logits/chosen": -2.057304620742798, | |
| "logits/rejected": -2.056112289428711, | |
| "logps/chosen": -266.7309875488281, | |
| "logps/rejected": -323.4661865234375, | |
| "loss": 55682.3375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -266.7309875488281, | |
| "rewards/margins": 56.735191345214844, | |
| "rewards/rejected": -323.4661865234375, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9994767137624281, | |
| "grad_norm": 1276570.622002559, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.1545863151550293, | |
| "logits/rejected": -2.146925449371338, | |
| "logps/chosen": -280.2084045410156, | |
| "logps/rejected": -343.4630432128906, | |
| "loss": 54058.05, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -280.2084045410156, | |
| "rewards/margins": 63.254638671875, | |
| "rewards/rejected": -343.4630432128906, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.9994767137624281, | |
| "step": 955, | |
| "total_flos": 0.0, | |
| "train_loss": 56244.764594240834, | |
| "train_runtime": 21694.4484, | |
| "train_samples_per_second": 2.818, | |
| "train_steps_per_second": 0.044 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 955, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |