Invalid JSON:
Unexpected token 'N', ..."ejected": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996033320111067, | |
| "eval_steps": 500, | |
| "global_step": 210, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004760015866719556, | |
| "grad_norm": 2.6129452623698715, | |
| "learning_rate": 4.761904761904761e-09, | |
| "logits/chosen": 1.8391927480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1184.0, | |
| "logps/rejected": -355.4166564941406, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009520031733439112, | |
| "grad_norm": 2.5770813189869757, | |
| "learning_rate": 9.523809523809522e-09, | |
| "logits/chosen": 1.822265625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1286.0, | |
| "logps/rejected": -413.0833435058594, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.014280047600158666, | |
| "grad_norm": 2.7660310366335357, | |
| "learning_rate": 1.4285714285714284e-08, | |
| "logits/chosen": 1.8483072519302368, | |
| "logits/rejected": 1.1949056386947632, | |
| "logps/chosen": -1194.6666259765625, | |
| "logps/rejected": -369.75, | |
| "loss": 0.6947, | |
| "rewards/accuracies": 0.1666666716337204, | |
| "rewards/chosen": -0.0016682943096384406, | |
| "rewards/margins": -0.0037485759239643812, | |
| "rewards/rejected": 0.0020853679161518812, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.019040063466878223, | |
| "grad_norm": 2.691530161920512, | |
| "learning_rate": 1.9047619047619045e-08, | |
| "logits/chosen": 1.6959635019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1280.0, | |
| "logps/rejected": -184.3333282470703, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.2083333283662796, | |
| "rewards/chosen": 0.00250244140625, | |
| "rewards/margins": 0.0008341471548192203, | |
| "rewards/rejected": 0.0016682943096384406, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02380007933359778, | |
| "grad_norm": 3.010246381817704, | |
| "learning_rate": 2.3809523809523807e-08, | |
| "logits/chosen": 1.8460286855697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1411.6666259765625, | |
| "logps/rejected": -199.0, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.2291666716337204, | |
| "rewards/chosen": -0.0041707358323037624, | |
| "rewards/margins": -0.0033467609900981188, | |
| "rewards/rejected": -0.0008341471548192203, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.028560095200317333, | |
| "grad_norm": 2.659135397165267, | |
| "learning_rate": 2.857142857142857e-08, | |
| "logits/chosen": 1.8626302480697632, | |
| "logits/rejected": 1.3460286855697632, | |
| "logps/chosen": -1142.3333740234375, | |
| "logps/rejected": -213.5833282470703, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": 0.006256103515625, | |
| "rewards/margins": 0.0063578286208212376, | |
| "rewards/rejected": -0.00010426839435240254, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03332011106703689, | |
| "grad_norm": 3.0961108718066885, | |
| "learning_rate": 3.333333333333333e-08, | |
| "logits/chosen": 1.9749349355697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1445.0, | |
| "logps/rejected": -273.3333435058594, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.2708333432674408, | |
| "rewards/chosen": 0.015411376953125, | |
| "rewards/margins": 0.01598103903234005, | |
| "rewards/rejected": -0.0005213419790379703, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.038080126933756446, | |
| "grad_norm": 3.398529990595042, | |
| "learning_rate": 3.809523809523809e-08, | |
| "logits/chosen": 2.0989582538604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1522.0, | |
| "logps/rejected": -731.1666870117188, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.2708333432674408, | |
| "rewards/chosen": 0.0029195148963481188, | |
| "rewards/margins": 0.003326416015625, | |
| "rewards/rejected": -0.00041707357740961015, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.042840142800476, | |
| "grad_norm": 2.8555673284882, | |
| "learning_rate": 4.285714285714285e-08, | |
| "logits/chosen": 1.7252603769302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1250.6666259765625, | |
| "logps/rejected": -429.1666564941406, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": 0.009175618179142475, | |
| "rewards/margins": 0.008351643569767475, | |
| "rewards/rejected": 0.0008341471548192203, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04760015866719556, | |
| "grad_norm": 3.039275077247777, | |
| "learning_rate": 4.7619047619047613e-08, | |
| "logits/chosen": 1.8528646230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1399.0, | |
| "logps/rejected": -494.5, | |
| "loss": 0.6919, | |
| "rewards/accuracies": 0.2916666567325592, | |
| "rewards/chosen": -0.0020853679161518812, | |
| "rewards/margins": -0.0016682943096384406, | |
| "rewards/rejected": -0.00041707357740961015, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.052360174533915116, | |
| "grad_norm": 2.7307097358717765, | |
| "learning_rate": 5.238095238095238e-08, | |
| "logits/chosen": 1.8912760019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1295.0, | |
| "logps/rejected": -192.0, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.2708333432674408, | |
| "rewards/chosen": 0.008738200180232525, | |
| "rewards/margins": 0.00872802734375, | |
| "rewards/rejected": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.057120190400634666, | |
| "grad_norm": 2.858482330959412, | |
| "learning_rate": 5.714285714285714e-08, | |
| "logits/chosen": 2.0084636211395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1321.3333740234375, | |
| "logps/rejected": -203.4166717529297, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.2916666567325592, | |
| "rewards/chosen": 0.0066731772385537624, | |
| "rewards/margins": 0.0054219565354287624, | |
| "rewards/rejected": 0.001251220703125, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06188020626735422, | |
| "grad_norm": 3.436218339030269, | |
| "learning_rate": 6.19047619047619e-08, | |
| "logits/chosen": 2.0472004413604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1647.6666259765625, | |
| "logps/rejected": -269.6666564941406, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.25, | |
| "rewards/chosen": -0.00250244140625, | |
| "rewards/margins": -0.0033416748046875, | |
| "rewards/rejected": 0.0008341471548192203, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06664022213407378, | |
| "grad_norm": 2.734000397750556, | |
| "learning_rate": 6.666666666666665e-08, | |
| "logits/chosen": 1.9791666269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1338.6666259765625, | |
| "logps/rejected": -475.9166564941406, | |
| "loss": 0.694, | |
| "rewards/accuracies": 0.3333333432674408, | |
| "rewards/chosen": 0.0020853679161518812, | |
| "rewards/margins": -0.0031280517578125, | |
| "rewards/rejected": 0.0052134194411337376, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07140023800079334, | |
| "grad_norm": 2.781191186330521, | |
| "learning_rate": 7.142857142857142e-08, | |
| "logits/chosen": 2.01171875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1366.3333740234375, | |
| "logps/rejected": -614.0833129882812, | |
| "loss": 0.6967, | |
| "rewards/accuracies": 0.1458333283662796, | |
| "rewards/chosen": -0.009175618179142475, | |
| "rewards/margins": -0.011052449233829975, | |
| "rewards/rejected": 0.0018768310546875, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07616025386751289, | |
| "grad_norm": 3.296898067532412, | |
| "learning_rate": 7.619047619047618e-08, | |
| "logits/chosen": 2.0338542461395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1591.0, | |
| "logps/rejected": -361.1666564941406, | |
| "loss": 0.6959, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.0041707358323037624, | |
| "rewards/margins": -0.0056254067458212376, | |
| "rewards/rejected": 0.0014597574481740594, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08092026973423244, | |
| "grad_norm": 3.56304516688633, | |
| "learning_rate": 8.095238095238095e-08, | |
| "logits/chosen": 2.1790363788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1688.0, | |
| "logps/rejected": -361.0, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.3333333432674408, | |
| "rewards/chosen": 0.0008341471548192203, | |
| "rewards/margins": 0.0008392333984375, | |
| "rewards/rejected": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.085680285600952, | |
| "grad_norm": 3.1437723398960795, | |
| "learning_rate": 8.57142857142857e-08, | |
| "logits/chosen": 1.9895833730697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1379.0, | |
| "logps/rejected": -372.3333435058594, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.3333333432674408, | |
| "rewards/chosen": -0.001251220703125, | |
| "rewards/margins": -1.0172526344831567e-05, | |
| "rewards/rejected": -0.001251220703125, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09044030146767155, | |
| "grad_norm": 2.9085087039360804, | |
| "learning_rate": 9.047619047619047e-08, | |
| "logits/chosen": 1.962890625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1443.6666259765625, | |
| "logps/rejected": -299.9166564941406, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.3333333432674408, | |
| "rewards/chosen": 0.00041707357740961015, | |
| "rewards/margins": 0.0039723715744912624, | |
| "rewards/rejected": -0.0035451252479106188, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09520031733439112, | |
| "grad_norm": 3.046632619071066, | |
| "learning_rate": 9.523809523809523e-08, | |
| "logits/chosen": 2.1256511211395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1336.6666259765625, | |
| "logps/rejected": -334.5833435058594, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.3958333432674408, | |
| "rewards/chosen": 0.00041707357740961015, | |
| "rewards/margins": 0.0023040771484375, | |
| "rewards/rejected": -0.0018768310546875, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09996033320111067, | |
| "grad_norm": 2.8826119418741762, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": 2.072265625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1300.0, | |
| "logps/rejected": -194.4166717529297, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.3541666567325592, | |
| "rewards/chosen": 0.010416666977107525, | |
| "rewards/margins": 0.008550007827579975, | |
| "rewards/rejected": 0.0018768310546875, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10472034906783023, | |
| "grad_norm": 2.8488586675307856, | |
| "learning_rate": 9.999309273455527e-08, | |
| "logits/chosen": 1.8509114980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1268.6666259765625, | |
| "logps/rejected": -404.0833435058594, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.0012715657940134406, | |
| "rewards/margins": 0.0008494059438817203, | |
| "rewards/rejected": -0.0020853679161518812, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10948036493454978, | |
| "grad_norm": 3.496537373458162, | |
| "learning_rate": 9.997237284663377e-08, | |
| "logits/chosen": 2.130859375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1653.6666259765625, | |
| "logps/rejected": -229.25, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.4791666567325592, | |
| "rewards/chosen": 0.006256103515625, | |
| "rewards/margins": 0.009714762680232525, | |
| "rewards/rejected": -0.0034383137244731188, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11424038080126933, | |
| "grad_norm": 3.1101740687622885, | |
| "learning_rate": 9.993784606094611e-08, | |
| "logits/chosen": 2.0432941913604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1445.3333740234375, | |
| "logps/rejected": -414.5833435058594, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": 0.014180500991642475, | |
| "rewards/margins": 0.01689656637609005, | |
| "rewards/rejected": -0.0027109782677143812, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1190003966679889, | |
| "grad_norm": 3.443772746353538, | |
| "learning_rate": 9.988952191691924e-08, | |
| "logits/chosen": 2.2135417461395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1725.6666259765625, | |
| "logps/rejected": -335.4166564941406, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.0020853679161518812, | |
| "rewards/margins": 0.0006205241079442203, | |
| "rewards/rejected": -0.0027109782677143812, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12376041253470844, | |
| "grad_norm": 2.5776558770728704, | |
| "learning_rate": 9.982741376606077e-08, | |
| "logits/chosen": 2.0091145038604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1190.0, | |
| "logps/rejected": -194.75, | |
| "loss": 0.6879, | |
| "rewards/accuracies": 0.4166666567325592, | |
| "rewards/chosen": 0.0050048828125, | |
| "rewards/margins": 0.0075022378005087376, | |
| "rewards/rejected": -0.00250244140625, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.128520428401428, | |
| "grad_norm": 3.0490668633558244, | |
| "learning_rate": 9.975153876827008e-08, | |
| "logits/chosen": 1.8876953125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1605.3333740234375, | |
| "logps/rejected": -379.0416564941406, | |
| "loss": 0.6835, | |
| "rewards/accuracies": 0.4166666567325592, | |
| "rewards/chosen": 0.014180500991642475, | |
| "rewards/margins": 0.01519775390625, | |
| "rewards/rejected": -0.0010426839580759406, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13328044426814756, | |
| "grad_norm": 3.0844768372489675, | |
| "learning_rate": 9.966191788709714e-08, | |
| "logits/chosen": 2.40625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1512.6666259765625, | |
| "logps/rejected": -516.8333129882812, | |
| "loss": 0.6835, | |
| "rewards/accuracies": 0.4166666567325592, | |
| "rewards/chosen": 0.011271159164607525, | |
| "rewards/margins": 0.0121002197265625, | |
| "rewards/rejected": -0.0008341471548192203, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.13804046013486712, | |
| "grad_norm": 2.9796492194683073, | |
| "learning_rate": 9.955857588395063e-08, | |
| "logits/chosen": 2.0364582538604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1308.6666259765625, | |
| "logps/rejected": -206.4166717529297, | |
| "loss": 0.6828, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.01751708984375, | |
| "rewards/margins": 0.02065022848546505, | |
| "rewards/rejected": -0.0031280517578125, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.14280047600158668, | |
| "grad_norm": 2.9021818802869466, | |
| "learning_rate": 9.944154131125642e-08, | |
| "logits/chosen": 1.9016927480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1332.0, | |
| "logps/rejected": -271.5, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.5833333134651184, | |
| "rewards/chosen": 0.013346354477107525, | |
| "rewards/margins": 0.01731363870203495, | |
| "rewards/rejected": -0.0039621987380087376, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14756049186830622, | |
| "grad_norm": 3.7103574249848617, | |
| "learning_rate": 9.931084650456892e-08, | |
| "logits/chosen": 2.2962238788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1784.6666259765625, | |
| "logps/rejected": -222.375, | |
| "loss": 0.683, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.01793416403234005, | |
| "rewards/margins": 0.0197270717471838, | |
| "rewards/rejected": -0.0017725626239553094, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15232050773502578, | |
| "grad_norm": 2.8083484443590043, | |
| "learning_rate": 9.916652757363697e-08, | |
| "logits/chosen": 1.9866536855697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1528.6666259765625, | |
| "logps/rejected": -199.3333282470703, | |
| "loss": 0.6799, | |
| "rewards/accuracies": 0.5416666865348816, | |
| "rewards/chosen": 0.02545166015625, | |
| "rewards/margins": 0.02791849710047245, | |
| "rewards/rejected": -0.00250244140625, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.15708052360174535, | |
| "grad_norm": 3.195298894130754, | |
| "learning_rate": 9.900862439242718e-08, | |
| "logits/chosen": 2.1595051288604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1539.6666259765625, | |
| "logps/rejected": -337.3333435058594, | |
| "loss": 0.6778, | |
| "rewards/accuracies": 0.6666666865348816, | |
| "rewards/chosen": 0.02418009378015995, | |
| "rewards/margins": 0.02998860739171505, | |
| "rewards/rejected": -0.0058390297926962376, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.16184053946846488, | |
| "grad_norm": 3.3044326013571346, | |
| "learning_rate": 9.883718058810706e-08, | |
| "logits/chosen": 1.95703125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1542.6666259765625, | |
| "logps/rejected": -503.9166564941406, | |
| "loss": 0.6765, | |
| "rewards/accuracies": 0.6458333134651184, | |
| "rewards/chosen": 0.02669270895421505, | |
| "rewards/margins": 0.0319112129509449, | |
| "rewards/rejected": -0.0052134194411337376, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.16660055533518445, | |
| "grad_norm": 2.995151668350896, | |
| "learning_rate": 9.865224352899118e-08, | |
| "logits/chosen": 1.7864583730697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1488.6666259765625, | |
| "logps/rejected": -686.9166870117188, | |
| "loss": 0.6766, | |
| "rewards/accuracies": 0.6666666865348816, | |
| "rewards/chosen": 0.024200439453125, | |
| "rewards/margins": 0.0314687080681324, | |
| "rewards/rejected": -0.0072987875901162624, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.171360571201904, | |
| "grad_norm": 2.8084038402424976, | |
| "learning_rate": 9.84538643114539e-08, | |
| "logits/chosen": 1.8294271230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1364.6666259765625, | |
| "logps/rejected": -272.5, | |
| "loss": 0.6661, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.045074462890625, | |
| "rewards/margins": 0.04962158203125, | |
| "rewards/rejected": -0.0045878090895712376, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.17612058706862357, | |
| "grad_norm": 2.9054773433025387, | |
| "learning_rate": 9.824209774581174e-08, | |
| "logits/chosen": 2.0826823711395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1418.3333740234375, | |
| "logps/rejected": -362.8333435058594, | |
| "loss": 0.671, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.042999267578125, | |
| "rewards/margins": 0.0488077811896801, | |
| "rewards/rejected": -0.0058390297926962376, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1808806029353431, | |
| "grad_norm": 2.8670036159141534, | |
| "learning_rate": 9.801700234117999e-08, | |
| "logits/chosen": 1.9225260019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1426.0, | |
| "logps/rejected": -433.0, | |
| "loss": 0.6672, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.037933349609375, | |
| "rewards/margins": 0.0521748848259449, | |
| "rewards/rejected": -0.014165242202579975, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.18564061880206267, | |
| "grad_norm": 2.8679096733310594, | |
| "learning_rate": 9.777864028930704e-08, | |
| "logits/chosen": 2.0826823711395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1393.0, | |
| "logps/rejected": -193.5, | |
| "loss": 0.6719, | |
| "rewards/accuracies": 0.7708333134651184, | |
| "rewards/chosen": 0.03546142578125, | |
| "rewards/margins": 0.04315185546875, | |
| "rewards/rejected": -0.0077158608473837376, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.19040063466878224, | |
| "grad_norm": 2.7627109082266226, | |
| "learning_rate": 9.752707744739145e-08, | |
| "logits/chosen": 1.9762369394302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1240.0, | |
| "logps/rejected": -183.2916717529297, | |
| "loss": 0.6674, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.0434061698615551, | |
| "rewards/margins": 0.0514933280646801, | |
| "rewards/rejected": -0.0081329345703125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19516065053550177, | |
| "grad_norm": 2.6388334579169563, | |
| "learning_rate": 9.726238331988623e-08, | |
| "logits/chosen": 1.9010416269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1267.0, | |
| "logps/rejected": -245.5833282470703, | |
| "loss": 0.6681, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.04217529296875, | |
| "rewards/margins": 0.0500335693359375, | |
| "rewards/rejected": -0.007924397476017475, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.19992066640222134, | |
| "grad_norm": 2.9593554398274113, | |
| "learning_rate": 9.698463103929542e-08, | |
| "logits/chosen": 1.8557943105697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1371.0, | |
| "logps/rejected": -711.0416870117188, | |
| "loss": 0.6645, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.04425048828125, | |
| "rewards/margins": 0.0631612166762352, | |
| "rewards/rejected": -0.019012451171875, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.2046806822689409, | |
| "grad_norm": 3.042969595506808, | |
| "learning_rate": 9.669389734596817e-08, | |
| "logits/chosen": 2.0110676288604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1616.6666259765625, | |
| "logps/rejected": -491.0, | |
| "loss": 0.6688, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.035888671875, | |
| "rewards/margins": 0.04876708984375, | |
| "rewards/rejected": -0.0129241943359375, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.20944069813566046, | |
| "grad_norm": 3.630337496903089, | |
| "learning_rate": 9.639026256689626e-08, | |
| "logits/chosen": 2.490234375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1765.3333740234375, | |
| "logps/rejected": -429.5833435058594, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.04840087890625, | |
| "rewards/margins": 0.0708109512925148, | |
| "rewards/rejected": -0.02239990234375, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.21420071400238, | |
| "grad_norm": 3.1793801997133273, | |
| "learning_rate": 9.607381059352038e-08, | |
| "logits/chosen": 2.3997395038604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1595.0, | |
| "logps/rejected": -361.125, | |
| "loss": 0.6697, | |
| "rewards/accuracies": 0.7708333134651184, | |
| "rewards/chosen": 0.0383707694709301, | |
| "rewards/margins": 0.0468241386115551, | |
| "rewards/rejected": -0.008550007827579975, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.21896072986909956, | |
| "grad_norm": 3.2332464466304356, | |
| "learning_rate": 9.574462885855172e-08, | |
| "logits/chosen": 2.4381511211395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1500.0, | |
| "logps/rejected": -399.75, | |
| "loss": 0.6653, | |
| "rewards/accuracies": 0.7708333134651184, | |
| "rewards/chosen": 0.0367024727165699, | |
| "rewards/margins": 0.0567220039665699, | |
| "rewards/rejected": -0.02001953125, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.22372074573581913, | |
| "grad_norm": 2.770044306282577, | |
| "learning_rate": 9.540280831181524e-08, | |
| "logits/chosen": 1.8899739980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1376.6666259765625, | |
| "logps/rejected": -205.4166717529297, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.0479838065803051, | |
| "rewards/margins": 0.0577799491584301, | |
| "rewards/rejected": -0.009801228530704975, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.22848076160253866, | |
| "grad_norm": 2.7921454090980418, | |
| "learning_rate": 9.504844339512095e-08, | |
| "logits/chosen": 1.8723958730697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1307.3333740234375, | |
| "logps/rejected": -644.2083129882812, | |
| "loss": 0.6586, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.0417378731071949, | |
| "rewards/margins": 0.0626017227768898, | |
| "rewards/rejected": -0.02083333395421505, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.23324077746925823, | |
| "grad_norm": 2.682362712903166, | |
| "learning_rate": 9.468163201617062e-08, | |
| "logits/chosen": 1.8743489980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1292.0, | |
| "logps/rejected": -209.5, | |
| "loss": 0.6641, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.0467325858771801, | |
| "rewards/margins": 0.05859375, | |
| "rewards/rejected": -0.011891682632267475, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2380007933359778, | |
| "grad_norm": 2.8024075453444026, | |
| "learning_rate": 9.430247552150672e-08, | |
| "logits/chosen": 1.8948568105697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1348.6666259765625, | |
| "logps/rejected": -197.0833282470703, | |
| "loss": 0.6694, | |
| "rewards/accuracies": 0.8333333134651184, | |
| "rewards/chosen": 0.0388387031853199, | |
| "rewards/margins": 0.0472005195915699, | |
| "rewards/rejected": -0.008443196304142475, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24276080920269735, | |
| "grad_norm": 2.8973595776612586, | |
| "learning_rate": 9.391107866851143e-08, | |
| "logits/chosen": 1.484375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1479.3333740234375, | |
| "logps/rejected": -207.1666717529297, | |
| "loss": 0.6632, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.0480244942009449, | |
| "rewards/margins": 0.05792236328125, | |
| "rewards/rejected": -0.010014851577579975, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2475208250694169, | |
| "grad_norm": 2.8257249326867346, | |
| "learning_rate": 9.350754959646305e-08, | |
| "logits/chosen": 1.8541666269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1380.6666259765625, | |
| "logps/rejected": -368.3333435058594, | |
| "loss": 0.6635, | |
| "rewards/accuracies": 0.8958333134651184, | |
| "rewards/chosen": 0.0508626289665699, | |
| "rewards/margins": 0.0612589530646801, | |
| "rewards/rejected": -0.01043701171875, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.25228084093613645, | |
| "grad_norm": 2.986625683949948, | |
| "learning_rate": 9.30919997966582e-08, | |
| "logits/chosen": 2.048828125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1540.0, | |
| "logps/rejected": -188.9166717529297, | |
| "loss": 0.6523, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.0700785294175148, | |
| "rewards/margins": 0.0830078125, | |
| "rewards/rejected": -0.012929280288517475, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.257040856802856, | |
| "grad_norm": 2.5763656997654762, | |
| "learning_rate": 9.266454408160778e-08, | |
| "logits/chosen": 1.9134114980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1294.3333740234375, | |
| "logps/rejected": -198.6666717529297, | |
| "loss": 0.6475, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.0808512344956398, | |
| "rewards/margins": 0.0957234725356102, | |
| "rewards/rejected": -0.014806111343204975, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2618008726695756, | |
| "grad_norm": 2.787476695165978, | |
| "learning_rate": 9.222530055331539e-08, | |
| "logits/chosen": 2.0227863788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1496.3333740234375, | |
| "logps/rejected": -546.8333129882812, | |
| "loss": 0.6325, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.0945231094956398, | |
| "rewards/margins": 0.1277669221162796, | |
| "rewards/rejected": -0.0333506278693676, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2665608885362951, | |
| "grad_norm": 2.780436919632163, | |
| "learning_rate": 9.177439057064682e-08, | |
| "logits/chosen": 1.9576822519302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1275.3333740234375, | |
| "logps/rejected": -240.7916717529297, | |
| "loss": 0.6396, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.0884602889418602, | |
| "rewards/margins": 0.111083984375, | |
| "rewards/rejected": -0.02263387106359005, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.27132090440301465, | |
| "grad_norm": 2.694503560733566, | |
| "learning_rate": 9.131193871579974e-08, | |
| "logits/chosen": 2.001953125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1475.0, | |
| "logps/rejected": -257.75, | |
| "loss": 0.6303, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1059977188706398, | |
| "rewards/margins": 0.1266682893037796, | |
| "rewards/rejected": -0.02086385153234005, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.27608092026973424, | |
| "grad_norm": 2.6164739225559663, | |
| "learning_rate": 9.083807275988283e-08, | |
| "logits/chosen": 1.9791666269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1329.6666259765625, | |
| "logps/rejected": -335.5833435058594, | |
| "loss": 0.6262, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.10791015625, | |
| "rewards/margins": 0.1393636018037796, | |
| "rewards/rejected": -0.03131103515625, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2808409361364538, | |
| "grad_norm": 2.596903683235154, | |
| "learning_rate": 9.03529236276138e-08, | |
| "logits/chosen": 1.9534505605697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1323.6666259765625, | |
| "logps/rejected": -594.5, | |
| "loss": 0.6238, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1062825545668602, | |
| "rewards/margins": 0.1328938752412796, | |
| "rewards/rejected": -0.02649942971765995, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.28560095200317337, | |
| "grad_norm": 2.848334529424154, | |
| "learning_rate": 8.985662536114613e-08, | |
| "logits/chosen": 2.0240886211395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1506.3333740234375, | |
| "logps/rejected": -521.6666870117188, | |
| "loss": 0.6219, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.115234375, | |
| "rewards/margins": 0.1461588591337204, | |
| "rewards/rejected": -0.03110249899327755, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2903609678698929, | |
| "grad_norm": 2.876426795335225, | |
| "learning_rate": 8.934931508303445e-08, | |
| "logits/chosen": 2.25, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1557.0, | |
| "logps/rejected": -221.75, | |
| "loss": 0.6266, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1195882186293602, | |
| "rewards/margins": 0.1431477814912796, | |
| "rewards/rejected": -0.02335103414952755, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.29512098373661244, | |
| "grad_norm": 2.537423595701227, | |
| "learning_rate": 8.883113295834892e-08, | |
| "logits/chosen": 1.9329427480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1155.3333740234375, | |
| "logps/rejected": -416.75, | |
| "loss": 0.6213, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1212972030043602, | |
| "rewards/margins": 0.1614583283662796, | |
| "rewards/rejected": -0.0398966483771801, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.29988099960333203, | |
| "grad_norm": 2.7038971300904473, | |
| "learning_rate": 8.83022221559489e-08, | |
| "logits/chosen": 2.2102863788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1405.0, | |
| "logps/rejected": -200.5, | |
| "loss": 0.6227, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.1200764998793602, | |
| "rewards/margins": 0.1464029997587204, | |
| "rewards/rejected": -0.02629598043859005, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.30464101547005157, | |
| "grad_norm": 2.986877484832217, | |
| "learning_rate": 8.776272880892674e-08, | |
| "logits/chosen": 1.94921875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1536.0, | |
| "logps/rejected": -524.0833129882812, | |
| "loss": 0.6094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1381022185087204, | |
| "rewards/margins": 0.181884765625, | |
| "rewards/rejected": -0.0437825508415699, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3094010313367711, | |
| "grad_norm": 2.9882962132000306, | |
| "learning_rate": 8.721280197423258e-08, | |
| "logits/chosen": 2.1028645038604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1636.3333740234375, | |
| "logps/rejected": -249.125, | |
| "loss": 0.6254, | |
| "rewards/accuracies": 0.8958333134651184, | |
| "rewards/chosen": 0.1089274063706398, | |
| "rewards/margins": 0.1392822265625, | |
| "rewards/rejected": -0.030487060546875, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3141610472034907, | |
| "grad_norm": 2.632751517436848, | |
| "learning_rate": 8.665259359149131e-08, | |
| "logits/chosen": 1.7799478769302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1252.3333740234375, | |
| "logps/rejected": -425.9166564941406, | |
| "loss": 0.6191, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1243082657456398, | |
| "rewards/margins": 0.1647542268037796, | |
| "rewards/rejected": -0.040557861328125, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.31892106307021023, | |
| "grad_norm": 3.244181144372953, | |
| "learning_rate": 8.608225844102311e-08, | |
| "logits/chosen": 2.185546875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1859.6666259765625, | |
| "logps/rejected": -222.2916717529297, | |
| "loss": 0.6187, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1287841796875, | |
| "rewards/margins": 0.15850830078125, | |
| "rewards/rejected": -0.02971394918859005, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.32368107893692977, | |
| "grad_norm": 3.1054803317794772, | |
| "learning_rate": 8.550195410107901e-08, | |
| "logits/chosen": 2.0748698711395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1731.0, | |
| "logps/rejected": -723.5833129882812, | |
| "loss": 0.6134, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1177164688706398, | |
| "rewards/margins": 0.1701253205537796, | |
| "rewards/rejected": -0.05230712890625, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.32844109480364936, | |
| "grad_norm": 2.81107358272895, | |
| "learning_rate": 8.491184090430364e-08, | |
| "logits/chosen": 2.0362141132354736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1541.6666259765625, | |
| "logps/rejected": -279.0833435058594, | |
| "loss": 0.6195, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1339925080537796, | |
| "rewards/margins": 0.1614176481962204, | |
| "rewards/rejected": -0.02710978128015995, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3332011106703689, | |
| "grad_norm": 2.398827229418021, | |
| "learning_rate": 8.431208189343668e-08, | |
| "logits/chosen": 1.7063802480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1305.3333740234375, | |
| "logps/rejected": -189.5, | |
| "loss": 0.6207, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1221516951918602, | |
| "rewards/margins": 0.1538899689912796, | |
| "rewards/rejected": -0.031707763671875, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.33796112653708843, | |
| "grad_norm": 2.976311987538715, | |
| "learning_rate": 8.370284277626575e-08, | |
| "logits/chosen": 1.9915364980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1480.3333740234375, | |
| "logps/rejected": -386.1666564941406, | |
| "loss": 0.6108, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.138671875, | |
| "rewards/margins": 0.1826578825712204, | |
| "rewards/rejected": -0.04400634765625, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.342721142403808, | |
| "grad_norm": 2.7906860167659495, | |
| "learning_rate": 8.308429187984297e-08, | |
| "logits/chosen": 2.017578125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1594.3333740234375, | |
| "logps/rejected": -200.625, | |
| "loss": 0.6132, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.14013671875, | |
| "rewards/margins": 0.1676432341337204, | |
| "rewards/rejected": -0.02764892578125, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.34748115827052756, | |
| "grad_norm": 2.956423551442551, | |
| "learning_rate": 8.24566001039776e-08, | |
| "logits/chosen": 1.8580728769302368, | |
| "logits/rejected": 1.398193359375, | |
| "logps/chosen": -1390.3333740234375, | |
| "logps/rejected": -239.0, | |
| "loss": 0.6187, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.133544921875, | |
| "rewards/margins": 0.1695556640625, | |
| "rewards/rejected": -0.0360616035759449, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.35224117413724715, | |
| "grad_norm": 2.57139969502871, | |
| "learning_rate": 8.181994087401818e-08, | |
| "logits/chosen": 2.0625, | |
| "logits/rejected": 0.9173991084098816, | |
| "logps/chosen": -1206.3333740234375, | |
| "logps/rejected": -1035.25, | |
| "loss": 0.5967, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1431477814912796, | |
| "rewards/margins": 0.2132161408662796, | |
| "rewards/rejected": -0.070159912109375, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3570011900039667, | |
| "grad_norm": 2.3789263635601756, | |
| "learning_rate": 8.117449009293668e-08, | |
| "logits/chosen": 1.8365885019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1216.6666259765625, | |
| "logps/rejected": -417.4166564941406, | |
| "loss": 0.6078, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.14013671875, | |
| "rewards/margins": 0.1861979216337204, | |
| "rewards/rejected": -0.0458984375, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3617612058706862, | |
| "grad_norm": 2.6269014458132616, | |
| "learning_rate": 8.052042609272816e-08, | |
| "logits/chosen": 1.9140625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1333.6666259765625, | |
| "logps/rejected": -309.5, | |
| "loss": 0.6164, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1212565079331398, | |
| "rewards/margins": 0.1600748747587204, | |
| "rewards/rejected": -0.0386962890625, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3665212217374058, | |
| "grad_norm": 2.739178703516763, | |
| "learning_rate": 7.98579295851393e-08, | |
| "logits/chosen": 2.1529948711395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1397.0, | |
| "logps/rejected": -290.9166564941406, | |
| "loss": 0.61, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1343180388212204, | |
| "rewards/margins": 0.1739095002412796, | |
| "rewards/rejected": -0.0396219901740551, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.37128123760412535, | |
| "grad_norm": 2.3079066820004304, | |
| "learning_rate": 7.91871836117395e-08, | |
| "logits/chosen": 1.8785806894302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1415.3333740234375, | |
| "logps/rejected": -222.1666717529297, | |
| "loss": 0.6086, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.1553548127412796, | |
| "rewards/margins": 0.1886393278837204, | |
| "rewards/rejected": -0.0333455391228199, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3760412534708449, | |
| "grad_norm": 2.5191243394068144, | |
| "learning_rate": 7.850837349334809e-08, | |
| "logits/chosen": 2.1647136211395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1232.6666259765625, | |
| "logps/rejected": -314.0833435058594, | |
| "loss": 0.6022, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.15283203125, | |
| "rewards/margins": 0.1917317658662796, | |
| "rewards/rejected": -0.0390218086540699, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3808012693375645, | |
| "grad_norm": 2.464350768449704, | |
| "learning_rate": 7.782168677883206e-08, | |
| "logits/chosen": 1.916015625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1364.3333740234375, | |
| "logps/rejected": -656.0833129882812, | |
| "loss": 0.6058, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1260579377412796, | |
| "rewards/margins": 0.1813151091337204, | |
| "rewards/rejected": -0.0555216483771801, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.385561285204284, | |
| "grad_norm": 2.717004939585792, | |
| "learning_rate": 7.712731319328797e-08, | |
| "logits/chosen": 1.9537760019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1486.6666259765625, | |
| "logps/rejected": -224.25, | |
| "loss": 0.611, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.13720703125, | |
| "rewards/margins": 0.1704915314912796, | |
| "rewards/rejected": -0.0333760567009449, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.39032130107100355, | |
| "grad_norm": 3.2264984436857738, | |
| "learning_rate": 7.642544458562277e-08, | |
| "logits/chosen": 2.267578125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1840.6666259765625, | |
| "logps/rejected": -411.1666564941406, | |
| "loss": 0.6014, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1565348356962204, | |
| "rewards/margins": 0.1999918669462204, | |
| "rewards/rejected": -0.0435587577521801, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.39508131693772314, | |
| "grad_norm": 2.683314265623292, | |
| "learning_rate": 7.571627487554768e-08, | |
| "logits/chosen": 2.0540363788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1430.0, | |
| "logps/rejected": -212.0, | |
| "loss": 0.6104, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.13525390625, | |
| "rewards/margins": 0.1728515625, | |
| "rewards/rejected": -0.0377400703728199, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3998413328044427, | |
| "grad_norm": 2.7144969695615284, | |
| "learning_rate": 7.5e-08, | |
| "logits/chosen": 2.173828125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1470.0, | |
| "logps/rejected": -171.8333282470703, | |
| "loss": 0.6058, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1543782502412796, | |
| "rewards/margins": 0.1909993439912796, | |
| "rewards/rejected": -0.0367228202521801, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.40460134867116226, | |
| "grad_norm": 2.7975541338538963, | |
| "learning_rate": 7.42768178590076e-08, | |
| "logits/chosen": 1.98046875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1496.6666259765625, | |
| "logps/rejected": -337.4583435058594, | |
| "loss": 0.6065, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1447346955537796, | |
| "rewards/margins": 0.1871744841337204, | |
| "rewards/rejected": -0.0425211600959301, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4093613645378818, | |
| "grad_norm": 2.5565364148587646, | |
| "learning_rate": 7.354692826101101e-08, | |
| "logits/chosen": 1.9537760019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1364.6666259765625, | |
| "logps/rejected": -325.1666564941406, | |
| "loss": 0.6036, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1505126953125, | |
| "rewards/margins": 0.1897786408662796, | |
| "rewards/rejected": -0.0393575020134449, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.41412138040460134, | |
| "grad_norm": 2.6614568460230212, | |
| "learning_rate": 7.281053286765815e-08, | |
| "logits/chosen": 2.2415363788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1447.3333740234375, | |
| "logps/rejected": -194.0833282470703, | |
| "loss": 0.6062, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1497395783662796, | |
| "rewards/margins": 0.1826985627412796, | |
| "rewards/rejected": -0.0329793281853199, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4188813962713209, | |
| "grad_norm": 2.8504744317923145, | |
| "learning_rate": 7.206783513808719e-08, | |
| "logits/chosen": 2.0045573711395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1482.3333740234375, | |
| "logps/rejected": -231.9166717529297, | |
| "loss": 0.6066, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1485595703125, | |
| "rewards/margins": 0.1824544221162796, | |
| "rewards/rejected": -0.0341898612678051, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.42364141213804046, | |
| "grad_norm": 3.0789510684671173, | |
| "learning_rate": 7.131904027271269e-08, | |
| "logits/chosen": 2.4290363788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1558.6666259765625, | |
| "logps/rejected": -210.25, | |
| "loss": 0.6094, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1439615935087204, | |
| "rewards/margins": 0.180908203125, | |
| "rewards/rejected": -0.0371195487678051, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.42840142800476, | |
| "grad_norm": 2.3401989437208512, | |
| "learning_rate": 7.056435515653059e-08, | |
| "logits/chosen": 1.7708333730697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1350.3333740234375, | |
| "logps/rejected": -375.2083435058594, | |
| "loss": 0.5939, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1522623747587204, | |
| "rewards/margins": 0.1978352814912796, | |
| "rewards/rejected": -0.0454508475959301, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4331614438714796, | |
| "grad_norm": 2.617864829611724, | |
| "learning_rate": 6.980398830195784e-08, | |
| "logits/chosen": 2.0774738788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1413.0, | |
| "logps/rejected": -191.6666717529297, | |
| "loss": 0.6024, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1588541716337204, | |
| "rewards/margins": 0.1947428435087204, | |
| "rewards/rejected": -0.03582763671875, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4379214597381991, | |
| "grad_norm": 2.945106819409662, | |
| "learning_rate": 6.903814979122247e-08, | |
| "logits/chosen": 2.1328125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1458.0, | |
| "logps/rejected": -405.75, | |
| "loss": 0.5955, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.1756184846162796, | |
| "rewards/margins": 0.219482421875, | |
| "rewards/rejected": -0.0438232421875, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.44268147560491866, | |
| "grad_norm": 2.3715073720788062, | |
| "learning_rate": 6.826705121831976e-08, | |
| "logits/chosen": 1.724609375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1186.3333740234375, | |
| "logps/rejected": -194.1666717529297, | |
| "loss": 0.5867, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.1758626252412796, | |
| "rewards/margins": 0.222900390625, | |
| "rewards/rejected": -0.0468953438103199, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.44744149147163825, | |
| "grad_norm": 2.695026787169654, | |
| "learning_rate": 6.749090563055075e-08, | |
| "logits/chosen": 2.0143229961395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1569.3333740234375, | |
| "logps/rejected": -333.0, | |
| "loss": 0.5999, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1517740935087204, | |
| "rewards/margins": 0.1964518278837204, | |
| "rewards/rejected": -0.0446370430290699, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4522015073383578, | |
| "grad_norm": 2.6523018617450473, | |
| "learning_rate": 6.670992746965938e-08, | |
| "logits/chosen": 2.1263020038604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1398.3333740234375, | |
| "logps/rejected": -290.5, | |
| "loss": 0.6019, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1586100310087204, | |
| "rewards/margins": 0.2041015625, | |
| "rewards/rejected": -0.0452473945915699, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4569615232050773, | |
| "grad_norm": 2.8165136119650693, | |
| "learning_rate": 6.592433251258423e-08, | |
| "logits/chosen": 2.0849609375, | |
| "logits/rejected": 1.6292318105697632, | |
| "logps/chosen": -1399.6666259765625, | |
| "logps/rejected": -301.4166564941406, | |
| "loss": 0.5977, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.1651204377412796, | |
| "rewards/margins": 0.2105305939912796, | |
| "rewards/rejected": -0.04522705078125, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4617215390717969, | |
| "grad_norm": 2.795118120284924, | |
| "learning_rate": 6.51343378118413e-08, | |
| "logits/chosen": 1.8997396230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1594.0, | |
| "logps/rejected": -372.8333435058594, | |
| "loss": 0.5988, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1497395783662796, | |
| "rewards/margins": 0.202392578125, | |
| "rewards/rejected": -0.05255126953125, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.46648155493851645, | |
| "grad_norm": 2.3527175308766677, | |
| "learning_rate": 6.434016163555452e-08, | |
| "logits/chosen": 2.013671875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1271.6666259765625, | |
| "logps/rejected": -446.5833435058594, | |
| "loss": 0.5936, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.1669108122587204, | |
| "rewards/margins": 0.2158203125, | |
| "rewards/rejected": -0.0489501953125, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.47124157080523604, | |
| "grad_norm": 2.269292458197308, | |
| "learning_rate": 6.354202340715025e-08, | |
| "logits/chosen": 1.7379556894302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1289.0, | |
| "logps/rejected": -432.0, | |
| "loss": 0.592, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1646321564912796, | |
| "rewards/margins": 0.2252604216337204, | |
| "rewards/rejected": -0.0604654960334301, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4760015866719556, | |
| "grad_norm": 2.654026647395738, | |
| "learning_rate": 6.274014364473274e-08, | |
| "logits/chosen": 1.7734375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1326.3333740234375, | |
| "logps/rejected": -368.9166564941406, | |
| "loss": 0.5782, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1892903596162796, | |
| "rewards/margins": 0.2517903745174408, | |
| "rewards/rejected": -0.0622355155646801, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4807616025386751, | |
| "grad_norm": 2.224568158982869, | |
| "learning_rate": 6.19347439001569e-08, | |
| "logits/chosen": 1.6510416269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1172.6666259765625, | |
| "logps/rejected": -398.5, | |
| "loss": 0.5793, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1842447966337204, | |
| "rewards/margins": 0.2439778596162796, | |
| "rewards/rejected": -0.0596516914665699, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4855216184053947, | |
| "grad_norm": 2.597661312044313, | |
| "learning_rate": 6.112604669781571e-08, | |
| "logits/chosen": 2.0286457538604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1323.0, | |
| "logps/rejected": -420.6666564941406, | |
| "loss": 0.5632, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22021484375, | |
| "rewards/margins": 0.2819010317325592, | |
| "rewards/rejected": -0.0615030936896801, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.49028163427211424, | |
| "grad_norm": 2.4892560953648024, | |
| "learning_rate": 6.031427547315888e-08, | |
| "logits/chosen": 2.1149089336395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1374.0, | |
| "logps/rejected": -347.8333435058594, | |
| "loss": 0.5768, | |
| "rewards/accuracies": 0.9166666865348816, | |
| "rewards/chosen": 0.1827799528837204, | |
| "rewards/margins": 0.25146484375, | |
| "rewards/rejected": -0.0687662735581398, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4950416501388338, | |
| "grad_norm": 2.5037829768257054, | |
| "learning_rate": 5.949965451095951e-08, | |
| "logits/chosen": 2.2347004413604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1490.3333740234375, | |
| "logps/rejected": -179.4583282470703, | |
| "loss": 0.5827, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1913655549287796, | |
| "rewards/margins": 0.24267578125, | |
| "rewards/rejected": -0.0511983223259449, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.49980166600555337, | |
| "grad_norm": 2.3007706020312946, | |
| "learning_rate": 5.868240888334653e-08, | |
| "logits/chosen": 2.0305988788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1358.6666259765625, | |
| "logps/rejected": -378.1666564941406, | |
| "loss": 0.5672, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.2044270783662796, | |
| "rewards/margins": 0.2724609375, | |
| "rewards/rejected": -0.0679931640625, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5045616818722729, | |
| "grad_norm": 2.687454169363537, | |
| "learning_rate": 5.786276438761927e-08, | |
| "logits/chosen": 1.939453125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1579.0, | |
| "logps/rejected": -280.125, | |
| "loss": 0.5729, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.1995442658662796, | |
| "rewards/margins": 0.2615559995174408, | |
| "rewards/rejected": -0.0616353340446949, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5093216977389925, | |
| "grad_norm": 2.0530034264243957, | |
| "learning_rate": 5.7040947483861834e-08, | |
| "logits/chosen": 1.6572265625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1141.0, | |
| "logps/rejected": -210.5, | |
| "loss": 0.5526, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2364908903837204, | |
| "rewards/margins": 0.3084309995174408, | |
| "rewards/rejected": -0.0723470076918602, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.514081713605712, | |
| "grad_norm": 2.0586503729863037, | |
| "learning_rate": 5.621718523237427e-08, | |
| "logits/chosen": 1.61279296875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1207.3333740234375, | |
| "logps/rejected": -202.9166717529297, | |
| "loss": 0.5562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2274576872587204, | |
| "rewards/margins": 0.3038736879825592, | |
| "rewards/rejected": -0.0763753280043602, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5188417294724316, | |
| "grad_norm": 2.127852797306922, | |
| "learning_rate": 5.5391705230937934e-08, | |
| "logits/chosen": 1.677734375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1323.0, | |
| "logps/rejected": -385.1666564941406, | |
| "loss": 0.5467, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2367350310087204, | |
| "rewards/margins": 0.3196614682674408, | |
| "rewards/rejected": -0.0827229842543602, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5236017453391512, | |
| "grad_norm": 2.5008366890366354, | |
| "learning_rate": 5.4564735551932416e-08, | |
| "logits/chosen": 2.212890625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1570.3333740234375, | |
| "logps/rejected": -427.4166564941406, | |
| "loss": 0.5413, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2569173276424408, | |
| "rewards/margins": 0.3401692807674408, | |
| "rewards/rejected": -0.0832926407456398, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5283617612058706, | |
| "grad_norm": 1.956784631889553, | |
| "learning_rate": 5.373650467932121e-08, | |
| "logits/chosen": 1.7138671875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1150.6666259765625, | |
| "logps/rejected": -475.75, | |
| "loss": 0.5356, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.25244140625, | |
| "rewards/margins": 0.3492838442325592, | |
| "rewards/rejected": -0.0970052108168602, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5331217770725902, | |
| "grad_norm": 2.5190391554421487, | |
| "learning_rate": 5.2907241445523785e-08, | |
| "logits/chosen": 2.2141926288604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1508.3333740234375, | |
| "logps/rejected": -337.4583435058594, | |
| "loss": 0.5448, | |
| "rewards/accuracies": 0.9583333134651184, | |
| "rewards/chosen": 0.2505696713924408, | |
| "rewards/margins": 0.3324381411075592, | |
| "rewards/rejected": -0.0818684920668602, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5378817929393098, | |
| "grad_norm": 2.311957407228179, | |
| "learning_rate": 5.207717496819134e-08, | |
| "logits/chosen": 1.5335286855697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1486.3333740234375, | |
| "logps/rejected": -319.1666564941406, | |
| "loss": 0.5288, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.269775390625, | |
| "rewards/margins": 0.36865234375, | |
| "rewards/rejected": -0.0992431640625, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5426418088060293, | |
| "grad_norm": 2.0481092553278843, | |
| "learning_rate": 5.124653458690364e-08, | |
| "logits/chosen": 1.94921875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1287.3333740234375, | |
| "logps/rejected": -398.25, | |
| "loss": 0.5248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2877604067325592, | |
| "rewards/margins": 0.3829752504825592, | |
| "rewards/rejected": -0.09527587890625, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5474018246727489, | |
| "grad_norm": 2.214152823316418, | |
| "learning_rate": 5.0415549799804857e-08, | |
| "logits/chosen": 1.81640625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1390.3333740234375, | |
| "logps/rejected": -218.0, | |
| "loss": 0.5158, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.29296875, | |
| "rewards/margins": 0.3922525942325592, | |
| "rewards/rejected": -0.0993245467543602, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5521618405394685, | |
| "grad_norm": 2.213377743308588, | |
| "learning_rate": 4.958445020019515e-08, | |
| "logits/chosen": 2.0950520038604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1339.0, | |
| "logps/rejected": -443.8333435058594, | |
| "loss": 0.5209, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.27783203125, | |
| "rewards/margins": 0.3819986879825592, | |
| "rewards/rejected": -0.103759765625, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5569218564061881, | |
| "grad_norm": 2.256076170183229, | |
| "learning_rate": 4.875346541309636e-08, | |
| "logits/chosen": 1.9759114980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1347.3333740234375, | |
| "logps/rejected": -333.3333435058594, | |
| "loss": 0.5226, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.2803548276424408, | |
| "rewards/margins": 0.38427734375, | |
| "rewards/rejected": -0.1038004532456398, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5616818722729076, | |
| "grad_norm": 2.1024738378940504, | |
| "learning_rate": 4.792282503180867e-08, | |
| "logits/chosen": 1.8665364980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1256.0, | |
| "logps/rejected": -280.6666564941406, | |
| "loss": 0.5211, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.2827962338924408, | |
| "rewards/margins": 0.3873697817325592, | |
| "rewards/rejected": -0.10430908203125, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5664418881396271, | |
| "grad_norm": 2.3120916296769574, | |
| "learning_rate": 4.709275855447621e-08, | |
| "logits/chosen": 1.7356771230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1440.6666259765625, | |
| "logps/rejected": -469.8333435058594, | |
| "loss": 0.5148, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2939453125, | |
| "rewards/margins": 0.4103190004825592, | |
| "rewards/rejected": -0.1161702498793602, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5712019040063467, | |
| "grad_norm": 2.1793904669362107, | |
| "learning_rate": 4.6263495320678784e-08, | |
| "logits/chosen": 1.9680989980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1382.0, | |
| "logps/rejected": -320.25, | |
| "loss": 0.5156, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.2911783754825592, | |
| "rewards/margins": 0.3990885317325592, | |
| "rewards/rejected": -0.1077677384018898, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5759619198730662, | |
| "grad_norm": 2.217847138959823, | |
| "learning_rate": 4.543526444806759e-08, | |
| "logits/chosen": 1.9404296875, | |
| "logits/rejected": 1.2596029043197632, | |
| "logps/chosen": -1353.6666259765625, | |
| "logps/rejected": -852.0, | |
| "loss": 0.495, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3121744692325592, | |
| "rewards/margins": 0.4568684995174408, | |
| "rewards/rejected": -0.1444905549287796, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5807219357397858, | |
| "grad_norm": 2.2556436443719847, | |
| "learning_rate": 4.460829476906207e-08, | |
| "logits/chosen": 2.3072917461395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1423.6666259765625, | |
| "logps/rejected": -225.0833282470703, | |
| "loss": 0.4993, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3306477963924408, | |
| "rewards/margins": 0.4436849057674408, | |
| "rewards/rejected": -0.1129150390625, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5854819516065054, | |
| "grad_norm": 2.4496973638231547, | |
| "learning_rate": 4.3782814767625755e-08, | |
| "logits/chosen": 1.9225260019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1690.3333740234375, | |
| "logps/rejected": -516.0833129882812, | |
| "loss": 0.5034, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3191731870174408, | |
| "rewards/margins": 0.44287109375, | |
| "rewards/rejected": -0.12384033203125, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5902419674732249, | |
| "grad_norm": 2.3479931534510783, | |
| "learning_rate": 4.295905251613817e-08, | |
| "logits/chosen": 1.806640625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1628.6666259765625, | |
| "logps/rejected": -218.4166717529297, | |
| "loss": 0.5097, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3076985776424408, | |
| "rewards/margins": 0.4147135317325592, | |
| "rewards/rejected": -0.1068522110581398, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5950019833399445, | |
| "grad_norm": 2.3806255168384465, | |
| "learning_rate": 4.213723561238074e-08, | |
| "logits/chosen": 1.8951822519302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1689.3333740234375, | |
| "logps/rejected": -394.0, | |
| "loss": 0.4976, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3312174379825592, | |
| "rewards/margins": 0.4498697817325592, | |
| "rewards/rejected": -0.1182861328125, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5997619992066641, | |
| "grad_norm": 2.15828623531091, | |
| "learning_rate": 4.131759111665348e-08, | |
| "logits/chosen": 1.6080728769302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1303.6666259765625, | |
| "logps/rejected": -230.1666717529297, | |
| "loss": 0.4974, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3365071713924408, | |
| "rewards/margins": 0.4469400942325592, | |
| "rewards/rejected": -0.11065673828125, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6045220150733835, | |
| "grad_norm": 2.117376463270655, | |
| "learning_rate": 4.0500345489040513e-08, | |
| "logits/chosen": 1.7604166269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1392.6666259765625, | |
| "logps/rejected": -411.1666564941406, | |
| "loss": 0.5065, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3119303286075592, | |
| "rewards/margins": 0.4251302182674408, | |
| "rewards/rejected": -0.1136067733168602, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6092820309401031, | |
| "grad_norm": 2.304820104315325, | |
| "learning_rate": 3.9685724526841126e-08, | |
| "logits/chosen": 1.86328125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1548.6666259765625, | |
| "logps/rejected": -331.9166564941406, | |
| "loss": 0.4876, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3339029848575592, | |
| "rewards/margins": 0.4640299379825592, | |
| "rewards/rejected": -0.1304117888212204, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6140420468068227, | |
| "grad_norm": 1.8048639966913345, | |
| "learning_rate": 3.887395330218428e-08, | |
| "logits/chosen": 1.5784505605697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1254.6666259765625, | |
| "logps/rejected": -213.5, | |
| "loss": 0.5005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3151041567325592, | |
| "rewards/margins": 0.4314778745174408, | |
| "rewards/rejected": -0.1164143905043602, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6188020626735422, | |
| "grad_norm": 2.3570364514405417, | |
| "learning_rate": 3.8065256099843115e-08, | |
| "logits/chosen": 2.11328125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1585.3333740234375, | |
| "logps/rejected": -489.9583435058594, | |
| "loss": 0.4932, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3302408754825592, | |
| "rewards/margins": 0.4661458432674408, | |
| "rewards/rejected": -0.1361083984375, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6235620785402618, | |
| "grad_norm": 2.808262013514303, | |
| "learning_rate": 3.7259856355267275e-08, | |
| "logits/chosen": 2.390625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1765.0, | |
| "logps/rejected": -400.75, | |
| "loss": 0.5049, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3160807192325592, | |
| "rewards/margins": 0.4415690004825592, | |
| "rewards/rejected": -0.1250813752412796, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6283220944069814, | |
| "grad_norm": 2.286497267688774, | |
| "learning_rate": 3.645797659284975e-08, | |
| "logits/chosen": 1.759765625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1537.3333740234375, | |
| "logps/rejected": -192.25, | |
| "loss": 0.4949, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3328450620174408, | |
| "rewards/margins": 0.4503580629825592, | |
| "rewards/rejected": -0.1178792342543602, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6330821102737009, | |
| "grad_norm": 1.9113524972685845, | |
| "learning_rate": 3.56598383644455e-08, | |
| "logits/chosen": 1.8893228769302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1182.6666259765625, | |
| "logps/rejected": -212.25, | |
| "loss": 0.4939, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3352864682674408, | |
| "rewards/margins": 0.45947265625, | |
| "rewards/rejected": -0.1245524063706398, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6378421261404205, | |
| "grad_norm": 2.193102688331593, | |
| "learning_rate": 3.486566218815871e-08, | |
| "logits/chosen": 2.0107421875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1234.0, | |
| "logps/rejected": -230.25, | |
| "loss": 0.4969, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.326171875, | |
| "rewards/margins": 0.4485677182674408, | |
| "rewards/rejected": -0.1224772110581398, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.64260214200714, | |
| "grad_norm": 2.0784106910572673, | |
| "learning_rate": 3.407566748741578e-08, | |
| "logits/chosen": 1.8997396230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1351.3333740234375, | |
| "logps/rejected": -383.75, | |
| "loss": 0.4852, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3331705629825592, | |
| "rewards/margins": 0.48193359375, | |
| "rewards/rejected": -0.14892578125, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6473621578738595, | |
| "grad_norm": 1.9321500226818664, | |
| "learning_rate": 3.329007253034062e-08, | |
| "logits/chosen": 1.6796875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1373.3333740234375, | |
| "logps/rejected": -673.5833129882812, | |
| "loss": 0.4815, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3312174379825592, | |
| "rewards/margins": 0.5047200322151184, | |
| "rewards/rejected": -0.1734822541475296, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6521221737405791, | |
| "grad_norm": 1.996048135547203, | |
| "learning_rate": 3.2509094369449276e-08, | |
| "logits/chosen": 1.765625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1214.0, | |
| "logps/rejected": -314.8333435058594, | |
| "loss": 0.4861, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3360188901424408, | |
| "rewards/margins": 0.4677734375, | |
| "rewards/rejected": -0.1311442106962204, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6568821896072987, | |
| "grad_norm": 2.398499680880508, | |
| "learning_rate": 3.1732948781680246e-08, | |
| "logits/chosen": 2.24609375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1620.6666259765625, | |
| "logps/rejected": -409.0833435058594, | |
| "loss": 0.4791, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3566080629825592, | |
| "rewards/margins": 0.4921875, | |
| "rewards/rejected": -0.1356608122587204, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6616422054740182, | |
| "grad_norm": 2.0131909940504205, | |
| "learning_rate": 3.096185020877752e-08, | |
| "logits/chosen": 2.1484375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1380.3333740234375, | |
| "logps/rejected": -233.8333282470703, | |
| "loss": 0.4908, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3385416567325592, | |
| "rewards/margins": 0.4611002504825592, | |
| "rewards/rejected": -0.1222330704331398, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6664022213407378, | |
| "grad_norm": 1.747753302823929, | |
| "learning_rate": 3.0196011698042156e-08, | |
| "logits/chosen": 1.775390625, | |
| "logits/rejected": 1.3990885019302368, | |
| "logps/chosen": -1055.8333740234375, | |
| "logps/rejected": -290.4166564941406, | |
| "loss": 0.4865, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3341471254825592, | |
| "rewards/margins": 0.4755859375, | |
| "rewards/rejected": -0.1412760466337204, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6711622372074574, | |
| "grad_norm": 1.6738880638021751, | |
| "learning_rate": 2.9435644843469432e-08, | |
| "logits/chosen": 1.69140625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1106.6666259765625, | |
| "logps/rejected": -361.3333435058594, | |
| "loss": 0.4833, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.34716796875, | |
| "rewards/margins": 0.48974609375, | |
| "rewards/rejected": -0.14306640625, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6759222530741769, | |
| "grad_norm": 2.3037248507214776, | |
| "learning_rate": 2.868095972728731e-08, | |
| "logits/chosen": 1.759765625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1547.3333740234375, | |
| "logps/rejected": -339.1666564941406, | |
| "loss": 0.4789, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3564453125, | |
| "rewards/margins": 0.486328125, | |
| "rewards/rejected": -0.1299641877412796, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6806822689408965, | |
| "grad_norm": 2.1036297721447967, | |
| "learning_rate": 2.79321648619128e-08, | |
| "logits/chosen": 1.69921875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1345.6666259765625, | |
| "logps/rejected": -251.1666717529297, | |
| "loss": 0.476, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3704427182674408, | |
| "rewards/margins": 0.4988606870174408, | |
| "rewards/rejected": -0.1288655549287796, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.685442284807616, | |
| "grad_norm": 2.169307583974489, | |
| "learning_rate": 2.7189467132341847e-08, | |
| "logits/chosen": 1.8224283456802368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1462.0, | |
| "logps/rejected": -446.6666564941406, | |
| "loss": 0.4822, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.34521484375, | |
| "rewards/margins": 0.4856770932674408, | |
| "rewards/rejected": -0.1404622346162796, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6902023006743356, | |
| "grad_norm": 2.0550176987990922, | |
| "learning_rate": 2.645307173898901e-08, | |
| "logits/chosen": 1.8990885019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1392.0, | |
| "logps/rejected": -569.5, | |
| "loss": 0.4623, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3741861879825592, | |
| "rewards/margins": 0.5369465947151184, | |
| "rewards/rejected": -0.1636149138212204, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6949623165410551, | |
| "grad_norm": 1.9559413913722827, | |
| "learning_rate": 2.5723182140992383e-08, | |
| "logits/chosen": 1.6575521230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1391.6666259765625, | |
| "logps/rejected": -486.4583435058594, | |
| "loss": 0.4781, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.34716796875, | |
| "rewards/margins": 0.4957682192325592, | |
| "rewards/rejected": -0.14892578125, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6997223324077747, | |
| "grad_norm": 1.77184690368037, | |
| "learning_rate": 2.500000000000001e-08, | |
| "logits/chosen": 1.8196614980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1120.3333740234375, | |
| "logps/rejected": -378.0416564941406, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3556315004825592, | |
| "rewards/margins": 0.5065104365348816, | |
| "rewards/rejected": -0.1513671875, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7044823482744943, | |
| "grad_norm": 2.3332126766504016, | |
| "learning_rate": 2.4283725124452327e-08, | |
| "logits/chosen": 1.7135416269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1617.0, | |
| "logps/rejected": -409.25, | |
| "loss": 0.4777, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.35986328125, | |
| "rewards/margins": 0.5079752802848816, | |
| "rewards/rejected": -0.147705078125, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7092423641412138, | |
| "grad_norm": 1.795140752408747, | |
| "learning_rate": 2.3574555414377228e-08, | |
| "logits/chosen": 1.8294271230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1275.3333740234375, | |
| "logps/rejected": -272.125, | |
| "loss": 0.4873, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.337158203125, | |
| "rewards/margins": 0.4729817807674408, | |
| "rewards/rejected": -0.1357421875, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7140023800079334, | |
| "grad_norm": 1.952752981212216, | |
| "learning_rate": 2.2872686806712033e-08, | |
| "logits/chosen": 1.7740885019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1366.6666259765625, | |
| "logps/rejected": -290.1666564941406, | |
| "loss": 0.482, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3479817807674408, | |
| "rewards/margins": 0.4830729067325592, | |
| "rewards/rejected": -0.1351318359375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.718762395874653, | |
| "grad_norm": 1.789618404415905, | |
| "learning_rate": 2.2178313221167965e-08, | |
| "logits/chosen": 1.8151041269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1119.3333740234375, | |
| "logps/rejected": -217.75, | |
| "loss": 0.4856, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3388671875, | |
| "rewards/margins": 0.4736328125, | |
| "rewards/rejected": -0.1352132111787796, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7235224117413724, | |
| "grad_norm": 2.4468941870774255, | |
| "learning_rate": 2.1491626506651912e-08, | |
| "logits/chosen": 2.0755207538604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1539.3333740234375, | |
| "logps/rejected": -324.0, | |
| "loss": 0.4802, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3562825620174408, | |
| "rewards/margins": 0.4931640625, | |
| "rewards/rejected": -0.136474609375, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.728282427608092, | |
| "grad_norm": 1.8555464035051497, | |
| "learning_rate": 2.0812816388260517e-08, | |
| "logits/chosen": 2.1106770038604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1203.0, | |
| "logps/rejected": -651.1666870117188, | |
| "loss": 0.4611, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3645833432674408, | |
| "rewards/margins": 0.5540364384651184, | |
| "rewards/rejected": -0.1890462189912796, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7330424434748116, | |
| "grad_norm": 2.129230370787164, | |
| "learning_rate": 2.0142070414860702e-08, | |
| "logits/chosen": 1.779296875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1340.0, | |
| "logps/rejected": -256.25, | |
| "loss": 0.4843, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3509114682674408, | |
| "rewards/margins": 0.4837239682674408, | |
| "rewards/rejected": -0.1325276643037796, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7378024593415311, | |
| "grad_norm": 1.8326859248201792, | |
| "learning_rate": 1.9479573907271845e-08, | |
| "logits/chosen": 2.130859375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1219.3333740234375, | |
| "logps/rejected": -333.0, | |
| "loss": 0.484, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3313802182674408, | |
| "rewards/margins": 0.4890950620174408, | |
| "rewards/rejected": -0.15771484375, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7425624752082507, | |
| "grad_norm": 2.43553161056017, | |
| "learning_rate": 1.8825509907063325e-08, | |
| "logits/chosen": 1.99609375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1680.3333740234375, | |
| "logps/rejected": -209.6666717529297, | |
| "loss": 0.4848, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3712565004825592, | |
| "rewards/margins": 0.486328125, | |
| "rewards/rejected": -0.1145426407456398, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7473224910749703, | |
| "grad_norm": 1.7213663847904304, | |
| "learning_rate": 1.818005912598182e-08, | |
| "logits/chosen": 1.6399739980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1296.3333740234375, | |
| "logps/rejected": -779.25, | |
| "loss": 0.4594, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3583984375, | |
| "rewards/margins": 0.5672200322151184, | |
| "rewards/rejected": -0.2093098908662796, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7520825069416898, | |
| "grad_norm": 2.138543660798964, | |
| "learning_rate": 1.7543399896022403e-08, | |
| "logits/chosen": 2.0455729961395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1411.3333740234375, | |
| "logps/rejected": -248.5, | |
| "loss": 0.4781, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3616536557674408, | |
| "rewards/margins": 0.4970703125, | |
| "rewards/rejected": -0.1357421875, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7568425228084094, | |
| "grad_norm": 1.903956596321935, | |
| "learning_rate": 1.691570812015704e-08, | |
| "logits/chosen": 1.8932291269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1360.0, | |
| "logps/rejected": -421.3333435058594, | |
| "loss": 0.4789, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3427734375, | |
| "rewards/margins": 0.5009765625, | |
| "rewards/rejected": -0.1580403596162796, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.761602538675129, | |
| "grad_norm": 1.8076086685577293, | |
| "learning_rate": 1.6297157223734225e-08, | |
| "logits/chosen": 1.8173828125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1186.0, | |
| "logps/rejected": -303.8333435058594, | |
| "loss": 0.4796, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.35595703125, | |
| "rewards/margins": 0.4944661557674408, | |
| "rewards/rejected": -0.1382242888212204, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7663625545418484, | |
| "grad_norm": 2.506667550726532, | |
| "learning_rate": 1.5687918106563324e-08, | |
| "logits/chosen": 1.8919271230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1604.6666259765625, | |
| "logps/rejected": -204.75, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3763020932674408, | |
| "rewards/margins": 0.5084635615348816, | |
| "rewards/rejected": -0.1322835236787796, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.771122570408568, | |
| "grad_norm": 2.2406757166716016, | |
| "learning_rate": 1.5088159095696363e-08, | |
| "logits/chosen": 1.8639322519302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1440.3333740234375, | |
| "logps/rejected": -462.75, | |
| "loss": 0.4573, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3792317807674408, | |
| "rewards/margins": 0.5618489384651184, | |
| "rewards/rejected": -0.1825764924287796, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7758825862752876, | |
| "grad_norm": 2.0966224181917315, | |
| "learning_rate": 1.4498045898920986e-08, | |
| "logits/chosen": 1.70703125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1400.3333740234375, | |
| "logps/rejected": -519.6666870117188, | |
| "loss": 0.4605, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.38330078125, | |
| "rewards/margins": 0.5519205927848816, | |
| "rewards/rejected": -0.1690266877412796, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7806426021420071, | |
| "grad_norm": 2.1990999218901957, | |
| "learning_rate": 1.3917741558976892e-08, | |
| "logits/chosen": 1.8307291269302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1525.0, | |
| "logps/rejected": -323.9166564941406, | |
| "loss": 0.4728, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3665364682674408, | |
| "rewards/margins": 0.5110676884651184, | |
| "rewards/rejected": -0.1442464143037796, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7854026180087267, | |
| "grad_norm": 1.8977299979885642, | |
| "learning_rate": 1.3347406408508693e-08, | |
| "logits/chosen": 1.9837239980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1439.0, | |
| "logps/rejected": -407.9166564941406, | |
| "loss": 0.4755, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3600260317325592, | |
| "rewards/margins": 0.51171875, | |
| "rewards/rejected": -0.151123046875, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7901626338754463, | |
| "grad_norm": 2.505572334038866, | |
| "learning_rate": 1.2787198025767415e-08, | |
| "logits/chosen": 2.1725261211395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1913.6666259765625, | |
| "logps/rejected": -311.5208435058594, | |
| "loss": 0.4691, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.384765625, | |
| "rewards/margins": 0.52978515625, | |
| "rewards/rejected": -0.1443888396024704, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7949226497421658, | |
| "grad_norm": 2.436963521338477, | |
| "learning_rate": 1.2237271191073268e-08, | |
| "logits/chosen": 2.14453125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1648.3333740234375, | |
| "logps/rejected": -361.0, | |
| "loss": 0.4705, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3693033754825592, | |
| "rewards/margins": 0.5159505009651184, | |
| "rewards/rejected": -0.1475016325712204, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7996826656088853, | |
| "grad_norm": 2.1545871993874997, | |
| "learning_rate": 1.1697777844051105e-08, | |
| "logits/chosen": 2.181640625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1643.0, | |
| "logps/rejected": -659.75, | |
| "loss": 0.4708, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3497721254825592, | |
| "rewards/margins": 0.53369140625, | |
| "rewards/rejected": -0.183837890625, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8044426814756049, | |
| "grad_norm": 1.9783928114364981, | |
| "learning_rate": 1.1168867041651081e-08, | |
| "logits/chosen": 2.0305988788604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1307.3333740234375, | |
| "logps/rejected": -385.375, | |
| "loss": 0.4649, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3761393129825592, | |
| "rewards/margins": 0.5364583134651184, | |
| "rewards/rejected": -0.1610514372587204, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8092026973423245, | |
| "grad_norm": 2.048419402752998, | |
| "learning_rate": 1.0650684916965558e-08, | |
| "logits/chosen": 1.5989583730697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1502.3333740234375, | |
| "logps/rejected": -197.5, | |
| "loss": 0.4778, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.35791015625, | |
| "rewards/margins": 0.48974609375, | |
| "rewards/rejected": -0.1322224885225296, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.813962713209044, | |
| "grad_norm": 1.9383784775969035, | |
| "learning_rate": 1.0143374638853892e-08, | |
| "logits/chosen": 1.6829427480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1231.0, | |
| "logps/rejected": -525.6666870117188, | |
| "loss": 0.4662, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3564453125, | |
| "rewards/margins": 0.53271484375, | |
| "rewards/rejected": -0.1761881560087204, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8187227290757636, | |
| "grad_norm": 2.11654254419848, | |
| "learning_rate": 9.647076372386193e-09, | |
| "logits/chosen": 1.6419271230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1409.0, | |
| "logps/rejected": -275.1666564941406, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3515625, | |
| "rewards/margins": 0.5096028447151184, | |
| "rewards/rejected": -0.1581624299287796, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8234827449424832, | |
| "grad_norm": 2.031187504683685, | |
| "learning_rate": 9.161927240117174e-09, | |
| "logits/chosen": 1.7906900644302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1438.3333740234375, | |
| "logps/rejected": -287.0833435058594, | |
| "loss": 0.493, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3235677182674408, | |
| "rewards/margins": 0.4562174379825592, | |
| "rewards/rejected": -0.1332194060087204, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8282427608092027, | |
| "grad_norm": 1.7371218629594014, | |
| "learning_rate": 8.688061284200266e-09, | |
| "logits/chosen": 1.873046875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1163.6666259765625, | |
| "logps/rejected": -443.25, | |
| "loss": 0.4719, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3527018129825592, | |
| "rewards/margins": 0.5113932490348816, | |
| "rewards/rejected": -0.1587321013212204, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8330027766759223, | |
| "grad_norm": 2.137935345105564, | |
| "learning_rate": 8.225609429353186e-09, | |
| "logits/chosen": 1.9832357168197632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1427.6666259765625, | |
| "logps/rejected": -440.0, | |
| "loss": 0.4677, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3673502504825592, | |
| "rewards/margins": 0.5314127802848816, | |
| "rewards/rejected": -0.163330078125, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8377627925426419, | |
| "grad_norm": 2.2149373272583577, | |
| "learning_rate": 7.774699446684607e-09, | |
| "logits/chosen": 1.810546875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1571.3333740234375, | |
| "logps/rejected": -198.25, | |
| "loss": 0.4745, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3662109375, | |
| "rewards/margins": 0.5026041865348816, | |
| "rewards/rejected": -0.1363932341337204, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8425228084093613, | |
| "grad_norm": 1.6437345264974124, | |
| "learning_rate": 7.335455918392219e-09, | |
| "logits/chosen": 1.6298828125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1051.3333740234375, | |
| "logps/rejected": -219.25, | |
| "loss": 0.4659, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3740234375, | |
| "rewards/margins": 0.53076171875, | |
| "rewards/rejected": -0.15673828125, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8472828242760809, | |
| "grad_norm": 2.2648226740875437, | |
| "learning_rate": 6.908000203341802e-09, | |
| "logits/chosen": 1.7578125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1593.3333740234375, | |
| "logps/rejected": -556.6666870117188, | |
| "loss": 0.4665, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3670247495174408, | |
| "rewards/margins": 0.5305989384651184, | |
| "rewards/rejected": -0.1636962890625, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8520428401428005, | |
| "grad_norm": 1.8643505256455393, | |
| "learning_rate": 6.492450403536959e-09, | |
| "logits/chosen": 1.88671875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1274.3333740234375, | |
| "logps/rejected": -242.5833282470703, | |
| "loss": 0.4638, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.3751627504825592, | |
| "rewards/margins": 0.5281575322151184, | |
| "rewards/rejected": -0.1529947966337204, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.85680285600952, | |
| "grad_norm": 1.8118886195560266, | |
| "learning_rate": 6.088921331488567e-09, | |
| "logits/chosen": 1.72265625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1327.3333740234375, | |
| "logps/rejected": -337.1666564941406, | |
| "loss": 0.4771, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3557942807674408, | |
| "rewards/margins": 0.50927734375, | |
| "rewards/rejected": -0.1533610075712204, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8615628718762396, | |
| "grad_norm": 1.7763301753189165, | |
| "learning_rate": 5.697524478493287e-09, | |
| "logits/chosen": 1.7522786855697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1195.6666259765625, | |
| "logps/rejected": -213.6666717529297, | |
| "loss": 0.47, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3663736879825592, | |
| "rewards/margins": 0.5123698115348816, | |
| "rewards/rejected": -0.14599609375, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8663228877429592, | |
| "grad_norm": 1.4514297510493193, | |
| "learning_rate": 5.3183679838293915e-09, | |
| "logits/chosen": 1.4544271230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1031.6666259765625, | |
| "logps/rejected": -418.75, | |
| "loss": 0.4691, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3513997495174408, | |
| "rewards/margins": 0.5218098759651184, | |
| "rewards/rejected": -0.1700846403837204, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8710829036096787, | |
| "grad_norm": 2.1059124601925374, | |
| "learning_rate": 4.951556604879048e-09, | |
| "logits/chosen": 1.7239583730697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1482.3333740234375, | |
| "logps/rejected": -267.8333435058594, | |
| "loss": 0.4813, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3521321713924408, | |
| "rewards/margins": 0.4930013120174408, | |
| "rewards/rejected": -0.1409912109375, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8758429194763983, | |
| "grad_norm": 2.4415439647837265, | |
| "learning_rate": 4.597191688184754e-09, | |
| "logits/chosen": 1.7776693105697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1659.3333740234375, | |
| "logps/rejected": -300.9166564941406, | |
| "loss": 0.4708, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.37109375, | |
| "rewards/margins": 0.51513671875, | |
| "rewards/rejected": -0.1446126252412796, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8806029353431178, | |
| "grad_norm": 1.8529794337457115, | |
| "learning_rate": 4.255371141448272e-09, | |
| "logits/chosen": 1.8059896230697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1316.0, | |
| "logps/rejected": -379.5, | |
| "loss": 0.4629, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3702799379825592, | |
| "rewards/margins": 0.54248046875, | |
| "rewards/rejected": -0.1726887971162796, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8853629512098373, | |
| "grad_norm": 2.1226991701058293, | |
| "learning_rate": 3.926189406479613e-09, | |
| "logits/chosen": 1.791015625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1405.3333740234375, | |
| "logps/rejected": -452.8333435058594, | |
| "loss": 0.4643, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3753255307674408, | |
| "rewards/margins": 0.54052734375, | |
| "rewards/rejected": -0.1650390625, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8901229670765569, | |
| "grad_norm": 2.30275268133539, | |
| "learning_rate": 3.609737433103732e-09, | |
| "logits/chosen": 2.056640625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1542.6666259765625, | |
| "logps/rejected": -506.0, | |
| "loss": 0.4651, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.36181640625, | |
| "rewards/margins": 0.5431315302848816, | |
| "rewards/rejected": -0.181884765625, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8948829829432765, | |
| "grad_norm": 2.0856238119236163, | |
| "learning_rate": 3.3061026540318227e-09, | |
| "logits/chosen": 2.0104167461395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1519.3333740234375, | |
| "logps/rejected": -205.5, | |
| "loss": 0.4808, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3570963442325592, | |
| "rewards/margins": 0.4864908754825592, | |
| "rewards/rejected": -0.1291910856962204, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.899642998809996, | |
| "grad_norm": 2.1869075364111135, | |
| "learning_rate": 3.015368960704584e-09, | |
| "logits/chosen": 2.2467448711395264, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1450.3333740234375, | |
| "logps/rejected": -357.0833435058594, | |
| "loss": 0.4728, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": 0.356689453125, | |
| "rewards/margins": 0.50927734375, | |
| "rewards/rejected": -0.1526285856962204, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9044030146767156, | |
| "grad_norm": 1.9167057092483704, | |
| "learning_rate": 2.737616680113758e-09, | |
| "logits/chosen": 1.978515625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1395.0, | |
| "logps/rejected": -656.5, | |
| "loss": 0.4674, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3466796875, | |
| "rewards/margins": 0.54443359375, | |
| "rewards/rejected": -0.1977946013212204, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9091630305434352, | |
| "grad_norm": 2.2088575890028, | |
| "learning_rate": 2.4729225526085585e-09, | |
| "logits/chosen": 1.8391927480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1550.0, | |
| "logps/rejected": -212.4166717529297, | |
| "loss": 0.4724, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3707682192325592, | |
| "rewards/margins": 0.5139973759651184, | |
| "rewards/rejected": -0.1436360627412796, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9139230464101546, | |
| "grad_norm": 1.6175308557989694, | |
| "learning_rate": 2.2213597106929607e-09, | |
| "logits/chosen": 1.7259114980697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1176.3333740234375, | |
| "logps/rejected": -336.0, | |
| "loss": 0.4801, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3434244692325592, | |
| "rewards/margins": 0.4947916567325592, | |
| "rewards/rejected": -0.1509602814912796, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9186830622768742, | |
| "grad_norm": 1.885584304766745, | |
| "learning_rate": 1.9829976588200126e-09, | |
| "logits/chosen": 1.892578125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1342.6666259765625, | |
| "logps/rejected": -364.3333435058594, | |
| "loss": 0.4707, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3595377504825592, | |
| "rewards/margins": 0.5169270634651184, | |
| "rewards/rejected": -0.1571858674287796, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9234430781435938, | |
| "grad_norm": 2.294659887781663, | |
| "learning_rate": 1.7579022541882539e-09, | |
| "logits/chosen": 1.986328125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1536.3333740234375, | |
| "logps/rejected": -279.9166564941406, | |
| "loss": 0.4647, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3738606870174408, | |
| "rewards/margins": 0.52587890625, | |
| "rewards/rejected": -0.1516520231962204, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9282030940103134, | |
| "grad_norm": 2.060577728033778, | |
| "learning_rate": 1.5461356885461075e-09, | |
| "logits/chosen": 1.8860677480697632, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1427.0, | |
| "logps/rejected": -492.9166564941406, | |
| "loss": 0.4543, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3899739682674408, | |
| "rewards/margins": 0.5662434697151184, | |
| "rewards/rejected": -0.1758219450712204, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9329631098770329, | |
| "grad_norm": 2.098117892116623, | |
| "learning_rate": 1.3477564710088097e-09, | |
| "logits/chosen": 1.6959635019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1369.6666259765625, | |
| "logps/rejected": -407.1666564941406, | |
| "loss": 0.463, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3759765625, | |
| "rewards/margins": 0.54541015625, | |
| "rewards/rejected": -0.1689860075712204, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9377231257437525, | |
| "grad_norm": 2.157174977293862, | |
| "learning_rate": 1.1628194118929402e-09, | |
| "logits/chosen": 2.107421875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1396.3333740234375, | |
| "logps/rejected": -231.3333282470703, | |
| "loss": 0.4862, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.345947265625, | |
| "rewards/margins": 0.4767252504825592, | |
| "rewards/rejected": -0.1308797150850296, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9424831416104721, | |
| "grad_norm": 1.8625361719410045, | |
| "learning_rate": 9.913756075728085e-10, | |
| "logits/chosen": 1.9921875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1281.6666259765625, | |
| "logps/rejected": -779.25, | |
| "loss": 0.4592, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3557942807674408, | |
| "rewards/margins": 0.572265625, | |
| "rewards/rejected": -0.2164713591337204, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9472431574771916, | |
| "grad_norm": 1.90411986231157, | |
| "learning_rate": 8.334724263630299e-10, | |
| "logits/chosen": 1.927734375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1244.0, | |
| "logps/rejected": -475.8333435058594, | |
| "loss": 0.457, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3772786557674408, | |
| "rewards/margins": 0.5545247197151184, | |
| "rewards/rejected": -0.1775716096162796, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9520031733439112, | |
| "grad_norm": 2.2273492902165857, | |
| "learning_rate": 6.891534954310885e-10, | |
| "logits/chosen": 1.8365885019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1464.6666259765625, | |
| "logps/rejected": -230.8333282470703, | |
| "loss": 0.4679, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3821614682674408, | |
| "rewards/margins": 0.52490234375, | |
| "rewards/rejected": -0.1425374299287796, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9567631892106307, | |
| "grad_norm": 1.9356040315595135, | |
| "learning_rate": 5.584586887435739e-10, | |
| "logits/chosen": 1.8515625, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1235.0, | |
| "logps/rejected": -473.3333435058594, | |
| "loss": 0.4593, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3761393129825592, | |
| "rewards/margins": 0.5594075322151184, | |
| "rewards/rejected": -0.18310546875, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.9615232050773502, | |
| "grad_norm": 2.0045721624389117, | |
| "learning_rate": 4.414241160493659e-10, | |
| "logits/chosen": 1.513671875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1449.6666259765625, | |
| "logps/rejected": -620.375, | |
| "loss": 0.4642, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3624674379825592, | |
| "rewards/margins": 0.53857421875, | |
| "rewards/rejected": -0.1766154021024704, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9662832209440698, | |
| "grad_norm": 2.258022376849562, | |
| "learning_rate": 3.3808211290284883e-10, | |
| "logits/chosen": 2.310546875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1613.6666259765625, | |
| "logps/rejected": -673.5, | |
| "loss": 0.4624, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.36181640625, | |
| "rewards/margins": 0.53857421875, | |
| "rewards/rejected": -0.1770833283662796, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9710432368107894, | |
| "grad_norm": 2.0920594811785582, | |
| "learning_rate": 2.484612317299295e-10, | |
| "logits/chosen": 1.9537760019302368, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1398.0, | |
| "logps/rejected": -373.9166564941406, | |
| "loss": 0.4568, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3958333432674408, | |
| "rewards/margins": 0.5577799677848816, | |
| "rewards/rejected": -0.1617838591337204, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9758032526775089, | |
| "grad_norm": 2.5414097510677287, | |
| "learning_rate": 1.7258623393922588e-10, | |
| "logits/chosen": 2.0263671875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1744.6666259765625, | |
| "logps/rejected": -483.7708435058594, | |
| "loss": 0.4702, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3580729067325592, | |
| "rewards/margins": 0.5281575322151184, | |
| "rewards/rejected": -0.1700846403837204, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9805632685442285, | |
| "grad_norm": 2.3268108178524383, | |
| "learning_rate": 1.1047808308075057e-10, | |
| "logits/chosen": 2.20703125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1482.3333740234375, | |
| "logps/rejected": -611.0, | |
| "loss": 0.4564, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3743489682674408, | |
| "rewards/margins": 0.55517578125, | |
| "rewards/rejected": -0.1808268278837204, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9853232844109481, | |
| "grad_norm": 2.096746342425909, | |
| "learning_rate": 6.215393905388277e-11, | |
| "logits/chosen": 1.955078125, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1452.0, | |
| "logps/rejected": -208.4166717529297, | |
| "loss": 0.477, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3597005307674408, | |
| "rewards/margins": 0.5052083134651184, | |
| "rewards/rejected": -0.1449788361787796, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9900833002776676, | |
| "grad_norm": 2.048833798956088, | |
| "learning_rate": 2.7627153366222012e-11, | |
| "logits/chosen": 2.0823566913604736, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1296.6666259765625, | |
| "logps/rejected": -302.5833435058594, | |
| "loss": 0.4793, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3583984375, | |
| "rewards/margins": 0.4939778745174408, | |
| "rewards/rejected": -0.1356608122587204, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9948433161443871, | |
| "grad_norm": 2.1643804376502103, | |
| "learning_rate": 6.907265444716648e-12, | |
| "logits/chosen": 2.162109375, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1641.1666259765625, | |
| "logps/rejected": -360.5833435058594, | |
| "loss": 0.4901, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3214518129825592, | |
| "rewards/margins": 0.4715169370174408, | |
| "rewards/rejected": -0.1501871794462204, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9996033320111067, | |
| "grad_norm": 1.7319154590819232, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 1.9921875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1177.3333740234375, | |
| "logps/rejected": -452.6666564941406, | |
| "loss": 0.4729, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.3430989682674408, | |
| "rewards/margins": 0.5192057490348816, | |
| "rewards/rejected": -0.17626953125, | |
| "step": 210 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 210, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |