| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 100, | |
| "global_step": 750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013333333333333334, | |
| "grad_norm": 75.5, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": 1803143936.0, | |
| "logps/chosen": -1764.083251953125, | |
| "loss": 0.632, | |
| "rewards/chosen": -0.6753141283988953, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 77.0, | |
| "kl": 2.966403007507324, | |
| "learning_rate": 9.88e-07, | |
| "logits/chosen": 1835470165.3333333, | |
| "logps/chosen": -1562.392795138889, | |
| "loss": 0.5078, | |
| "rewards/chosen": 0.2530868848164876, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 70.0, | |
| "kl": 17.817913055419922, | |
| "learning_rate": 9.746666666666666e-07, | |
| "logits/chosen": 1839740518.4, | |
| "logps/chosen": -1660.749609375, | |
| "loss": 0.4871, | |
| "rewards/chosen": 1.8496942520141602, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 63.0, | |
| "kl": 30.11983871459961, | |
| "learning_rate": 9.613333333333334e-07, | |
| "logits/chosen": 1844791091.2, | |
| "logps/chosen": -1590.46630859375, | |
| "loss": 0.5174, | |
| "rewards/chosen": 2.9328380584716798, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 44.0, | |
| "kl": 39.447776794433594, | |
| "learning_rate": 9.479999999999999e-07, | |
| "logits/chosen": 1830737728.2397003, | |
| "logits/rejected": 1802774180.2264152, | |
| "logps/chosen": -1604.0685861423221, | |
| "logps/rejected": -1642.3963738207547, | |
| "loss": 0.497, | |
| "rewards/chosen": 4.055163894253277, | |
| "rewards/margins": 0.6231006295864137, | |
| "rewards/rejected": 3.4320632646668634, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 61.0, | |
| "kl": 27.023143768310547, | |
| "learning_rate": 9.346666666666666e-07, | |
| "logits/rejected": 1793442406.4, | |
| "logps/rejected": -1498.494921875, | |
| "loss": 0.4883, | |
| "rewards/rejected": 2.6893266677856444, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 70.5, | |
| "kl": 16.976736068725586, | |
| "learning_rate": 9.213333333333333e-07, | |
| "logits/rejected": 1816138956.8, | |
| "logps/rejected": -1576.112890625, | |
| "loss": 0.4765, | |
| "rewards/rejected": 1.6485807418823242, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 78.0, | |
| "kl": 3.4582889080047607, | |
| "learning_rate": 9.08e-07, | |
| "logits/rejected": 1791525068.8, | |
| "logps/rejected": -1609.57080078125, | |
| "loss": 0.4898, | |
| "rewards/rejected": 0.33603610992431643, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 96.0, | |
| "kl": 4.4611639976501465, | |
| "learning_rate": 8.946666666666667e-07, | |
| "logits/chosen": 1793128038.4, | |
| "logits/rejected": 1786466099.2, | |
| "logps/chosen": -1679.2375, | |
| "logps/rejected": -1366.859375, | |
| "loss": 0.4474, | |
| "rewards/chosen": 0.8079164505004883, | |
| "rewards/margins": 1.4511647701263428, | |
| "rewards/rejected": -0.6432483196258545, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 73.0, | |
| "kl": 20.16000747680664, | |
| "learning_rate": 8.813333333333332e-07, | |
| "logits/chosen": 1788976128.0, | |
| "logps/chosen": -1553.37900390625, | |
| "loss": 0.4946, | |
| "rewards/chosen": 2.074075126647949, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 55.25, | |
| "kl": 30.880172729492188, | |
| "learning_rate": 8.68e-07, | |
| "logits/chosen": 1800670822.4, | |
| "logps/chosen": -1629.187109375, | |
| "loss": 0.4936, | |
| "rewards/chosen": 3.1399845123291015, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4666666666666668, | |
| "grad_norm": 47.75, | |
| "kl": 42.36182403564453, | |
| "learning_rate": 8.546666666666666e-07, | |
| "logits/chosen": 1819451801.6, | |
| "logps/chosen": -1631.21865234375, | |
| "loss": 0.4832, | |
| "rewards/chosen": 4.322412109375, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 65.0, | |
| "kl": 29.110525131225586, | |
| "learning_rate": 8.413333333333333e-07, | |
| "logits/chosen": 1786882220.2616823, | |
| "logits/rejected": 1767476998.0093896, | |
| "logps/chosen": -1454.260660046729, | |
| "logps/rejected": -1486.950850938967, | |
| "loss": 0.4809, | |
| "rewards/chosen": 4.8924052871276285, | |
| "rewards/margins": 2.9423321882151434, | |
| "rewards/rejected": 1.9500730989124853, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.7333333333333334, | |
| "grad_norm": 68.0, | |
| "kl": 10.5321044921875, | |
| "learning_rate": 8.28e-07, | |
| "logits/rejected": 1757802086.4, | |
| "logps/rejected": -1584.4416015625, | |
| "loss": 0.4952, | |
| "rewards/rejected": 1.1045079231262207, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 74.5, | |
| "kl": 2.656236171722412, | |
| "learning_rate": 8.146666666666666e-07, | |
| "logits/rejected": 1784456601.6, | |
| "logps/rejected": -1606.92255859375, | |
| "loss": 0.4411, | |
| "rewards/rejected": 0.04395853877067566, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 47.25, | |
| "kl": 0.02235652133822441, | |
| "learning_rate": 8.013333333333333e-07, | |
| "logits/rejected": 1755790950.4, | |
| "logps/rejected": -1492.91025390625, | |
| "loss": 0.3227, | |
| "rewards/rejected": -0.965308952331543, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 74.5, | |
| "kl": 22.667367935180664, | |
| "learning_rate": 7.88e-07, | |
| "logits/chosen": 1760644710.4, | |
| "logps/chosen": -1561.63623046875, | |
| "loss": 0.5121, | |
| "rewards/chosen": 2.2527976989746095, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.2666666666666666, | |
| "grad_norm": 75.5, | |
| "kl": 35.21966552734375, | |
| "learning_rate": 7.746666666666666e-07, | |
| "logits/chosen": 1774286233.6, | |
| "logps/chosen": -1643.762109375, | |
| "loss": 0.4987, | |
| "rewards/chosen": 3.5484432220458983, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 52.25, | |
| "kl": 43.11100387573242, | |
| "learning_rate": 7.613333333333333e-07, | |
| "logits/chosen": 1786122035.2, | |
| "logps/chosen": -1576.08076171875, | |
| "loss": 0.4918, | |
| "rewards/chosen": 4.371388244628906, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 49.75, | |
| "kl": 43.39256286621094, | |
| "learning_rate": 7.48e-07, | |
| "logits/chosen": 1780707128.5692885, | |
| "logits/rejected": 1751267212.0754716, | |
| "logps/chosen": -1593.653675093633, | |
| "logps/rejected": -1665.3343160377358, | |
| "loss": 0.481, | |
| "rewards/chosen": 5.09666597173455, | |
| "rewards/margins": 3.9583941361195003, | |
| "rewards/rejected": 1.13827183561505, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 63.5, | |
| "kl": 5.4629011154174805, | |
| "learning_rate": 7.346666666666666e-07, | |
| "logits/rejected": 1737874841.6, | |
| "logps/rejected": -1520.10458984375, | |
| "loss": 0.4785, | |
| "rewards/rejected": 0.5283474445343017, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 59.25, | |
| "kl": 1.2280102968215942, | |
| "learning_rate": 7.213333333333334e-07, | |
| "logits/rejected": 1761185177.6, | |
| "logps/rejected": -1594.97919921875, | |
| "loss": 0.4105, | |
| "rewards/rejected": -0.23803796768188476, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 70.5, | |
| "kl": 0.0, | |
| "learning_rate": 7.079999999999999e-07, | |
| "logits/rejected": 1738660864.0, | |
| "logps/rejected": -1625.093359375, | |
| "loss": 0.2885, | |
| "rewards/rejected": -1.2162075996398927, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.066666666666667, | |
| "grad_norm": 75.5, | |
| "kl": 14.01048469543457, | |
| "learning_rate": 6.946666666666666e-07, | |
| "logits/chosen": 1750113075.2, | |
| "logits/rejected": 1743708160.0, | |
| "logps/chosen": -1658.2826171875, | |
| "logps/rejected": -1375.04150390625, | |
| "loss": 0.3769, | |
| "rewards/chosen": 2.9034093856811523, | |
| "rewards/margins": 4.364873313903809, | |
| "rewards/rejected": -1.4614639282226562, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 63.0, | |
| "kl": 37.20917510986328, | |
| "learning_rate": 6.813333333333333e-07, | |
| "logits/chosen": 1746606899.2, | |
| "logps/chosen": -1536.5771484375, | |
| "loss": 0.4982, | |
| "rewards/chosen": 3.75426025390625, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 54.5, | |
| "kl": 43.48841094970703, | |
| "learning_rate": 6.68e-07, | |
| "logits/chosen": 1757109657.6, | |
| "logps/chosen": -1617.16201171875, | |
| "loss": 0.5003, | |
| "rewards/chosen": 4.342501831054688, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 52.5, | |
| "kl": 51.4334716796875, | |
| "learning_rate": 6.546666666666665e-07, | |
| "logits/chosen": 1780435148.8, | |
| "logps/chosen": -1622.2716796875, | |
| "loss": 0.4796, | |
| "rewards/chosen": 5.217120742797851, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 67.5, | |
| "kl": 19.67037582397461, | |
| "learning_rate": 6.413333333333333e-07, | |
| "logits/chosen": 1755938050.3925233, | |
| "logits/rejected": 1727550343.8122065, | |
| "logps/chosen": -1448.7593457943926, | |
| "logps/rejected": -1504.9633215962442, | |
| "loss": 0.4334, | |
| "rewards/chosen": 5.442524277161215, | |
| "rewards/margins": 5.293697337761961, | |
| "rewards/rejected": 0.1488269393992536, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "grad_norm": 51.75, | |
| "kl": 0.6852197647094727, | |
| "learning_rate": 6.28e-07, | |
| "logits/rejected": 1722981785.6, | |
| "logps/rejected": -1600.43251953125, | |
| "loss": 0.3926, | |
| "rewards/rejected": -0.49457273483276365, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.8666666666666667, | |
| "grad_norm": 49.0, | |
| "kl": 0.2839541435241699, | |
| "learning_rate": 6.146666666666667e-07, | |
| "logits/rejected": 1752589516.8, | |
| "logps/rejected": -1619.22822265625, | |
| "loss": 0.2889, | |
| "rewards/rejected": -1.1866175651550293, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 37.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.013333333333334e-07, | |
| "logits/rejected": 1728595353.6, | |
| "logps/rejected": -1498.5271484375, | |
| "loss": 0.2549, | |
| "rewards/rejected": -1.526987361907959, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.133333333333334, | |
| "grad_norm": 54.0, | |
| "kl": 39.12127685546875, | |
| "learning_rate": 5.879999999999999e-07, | |
| "logits/chosen": 1737360384.0, | |
| "logps/chosen": -1544.43232421875, | |
| "loss": 0.4935, | |
| "rewards/chosen": 3.9731983184814452, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 172.0, | |
| "kl": 46.1120491027832, | |
| "learning_rate": 5.746666666666667e-07, | |
| "logits/chosen": 1747944038.4, | |
| "logps/chosen": -1632.6375, | |
| "loss": 0.4933, | |
| "rewards/chosen": 4.660909652709961, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 60.0, | |
| "kl": 50.143592834472656, | |
| "learning_rate": 5.613333333333333e-07, | |
| "logits/chosen": 1759613952.0, | |
| "logps/chosen": -1568.59072265625, | |
| "loss": 0.4785, | |
| "rewards/chosen": 5.1204078674316404, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 42.5, | |
| "kl": 45.67478942871094, | |
| "learning_rate": 5.48e-07, | |
| "logits/chosen": 1756831406.5018728, | |
| "logits/rejected": 1722636635.7735848, | |
| "logps/chosen": -1589.1077949438202, | |
| "logps/rejected": -1681.454304245283, | |
| "loss": 0.4447, | |
| "rewards/chosen": 5.551258558637641, | |
| "rewards/margins": 6.024997390931457, | |
| "rewards/rejected": -0.47373883229381636, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 217.0, | |
| "kl": 0.32102876901626587, | |
| "learning_rate": 5.346666666666666e-07, | |
| "logits/rejected": 1712375193.6, | |
| "logps/rejected": -1534.0259765625, | |
| "loss": 0.338, | |
| "rewards/rejected": -0.8637893676757813, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 47.0, | |
| "kl": 0.029415320605039597, | |
| "learning_rate": 5.213333333333333e-07, | |
| "logits/rejected": 1739532697.6, | |
| "logps/rejected": -1605.47685546875, | |
| "loss": 0.2836, | |
| "rewards/rejected": -1.2877922058105469, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.933333333333334, | |
| "grad_norm": 50.5, | |
| "kl": 0.0, | |
| "learning_rate": 5.079999999999999e-07, | |
| "logits/rejected": 1720070963.2, | |
| "logps/rejected": -1630.8001953125, | |
| "loss": 0.2262, | |
| "rewards/rejected": -1.7868902206420898, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.066666666666666, | |
| "grad_norm": 70.5, | |
| "kl": 21.522884368896484, | |
| "learning_rate": 4.946666666666666e-07, | |
| "logits/chosen": 1738568908.8, | |
| "logits/rejected": 1726313472.0, | |
| "logps/chosen": -1644.7123046875, | |
| "logps/rejected": -1377.4642578125, | |
| "loss": 0.3804, | |
| "rewards/chosen": 4.260452651977539, | |
| "rewards/margins": 5.9641773223876955, | |
| "rewards/rejected": -1.7037246704101563, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 49.75, | |
| "kl": 47.77645492553711, | |
| "learning_rate": 4.813333333333334e-07, | |
| "logits/chosen": 1732489625.6, | |
| "logps/chosen": -1525.93984375, | |
| "loss": 0.4944, | |
| "rewards/chosen": 4.817991256713867, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 50.25, | |
| "kl": 49.042144775390625, | |
| "learning_rate": 4.68e-07, | |
| "logits/chosen": 1741244620.8, | |
| "logps/chosen": -1609.87353515625, | |
| "loss": 0.4644, | |
| "rewards/chosen": 5.071358489990234, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.466666666666667, | |
| "grad_norm": 48.0, | |
| "kl": 55.76483154296875, | |
| "learning_rate": 4.5466666666666666e-07, | |
| "logits/chosen": 1760807936.0, | |
| "logps/chosen": -1618.9923828125, | |
| "loss": 0.5002, | |
| "rewards/chosen": 5.545055770874024, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 50.0, | |
| "kl": 18.410531997680664, | |
| "learning_rate": 4.413333333333333e-07, | |
| "logits/chosen": 1736117544.672897, | |
| "logits/rejected": 1708876583.6619718, | |
| "logps/chosen": -1447.1404789719627, | |
| "logps/rejected": -1516.6236795774648, | |
| "loss": 0.3554, | |
| "rewards/chosen": 5.60440890588493, | |
| "rewards/margins": 6.6215995436077515, | |
| "rewards/rejected": -1.0171906377228213, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 5.733333333333333, | |
| "grad_norm": 39.25, | |
| "kl": 0.028980541974306107, | |
| "learning_rate": 4.2799999999999997e-07, | |
| "logits/rejected": 1704340684.8, | |
| "logps/rejected": -1610.3271484375, | |
| "loss": 0.269, | |
| "rewards/rejected": -1.4840303421020509, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 5.866666666666667, | |
| "grad_norm": 53.5, | |
| "kl": 0.0, | |
| "learning_rate": 4.146666666666667e-07, | |
| "logits/rejected": 1737012224.0, | |
| "logps/rejected": -1625.72900390625, | |
| "loss": 0.2277, | |
| "rewards/rejected": -1.8366947174072266, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 39.25, | |
| "kl": 0.0, | |
| "learning_rate": 4.0133333333333333e-07, | |
| "logits/rejected": 1715832217.6, | |
| "logps/rejected": -1501.8396484375, | |
| "loss": 0.2283, | |
| "rewards/rejected": -1.8582498550415039, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.133333333333334, | |
| "grad_norm": 55.25, | |
| "kl": 47.387760162353516, | |
| "learning_rate": 3.88e-07, | |
| "logits/chosen": 1729806540.8, | |
| "logps/chosen": -1536.3201171875, | |
| "loss": 0.4921, | |
| "rewards/chosen": 4.784424209594727, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 6.266666666666667, | |
| "grad_norm": 77.0, | |
| "kl": 51.74030685424805, | |
| "learning_rate": 3.7466666666666663e-07, | |
| "logits/chosen": 1736568832.0, | |
| "logps/chosen": -1627.24794921875, | |
| "loss": 0.4934, | |
| "rewards/chosen": 5.199858093261719, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 61.75, | |
| "kl": 53.5655517578125, | |
| "learning_rate": 3.6133333333333334e-07, | |
| "logits/chosen": 1746720563.2, | |
| "logps/chosen": -1565.3529296875, | |
| "loss": 0.4801, | |
| "rewards/chosen": 5.444185256958008, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.533333333333333, | |
| "grad_norm": 40.75, | |
| "kl": 47.25954055786133, | |
| "learning_rate": 3.4799999999999994e-07, | |
| "logits/chosen": 1744727741.8426967, | |
| "logits/rejected": 1706023105.2075472, | |
| "logps/chosen": -1587.4740168539327, | |
| "logps/rejected": -1689.911704009434, | |
| "loss": 0.4287, | |
| "rewards/chosen": 5.714643957016619, | |
| "rewards/margins": 7.034099805566516, | |
| "rewards/rejected": -1.3194558485498968, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 58.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.3466666666666665e-07, | |
| "logits/rejected": 1700372172.8, | |
| "logps/rejected": -1541.06552734375, | |
| "loss": 0.265, | |
| "rewards/rejected": -1.5677401542663574, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 39.75, | |
| "kl": 0.0, | |
| "learning_rate": 3.2133333333333335e-07, | |
| "logits/rejected": 1729151795.2, | |
| "logps/rejected": -1609.8298828125, | |
| "loss": 0.2389, | |
| "rewards/rejected": -1.7230974197387696, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 6.933333333333334, | |
| "grad_norm": 42.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.08e-07, | |
| "logits/rejected": 1712730828.8, | |
| "logps/rejected": -1633.16318359375, | |
| "loss": 0.2112, | |
| "rewards/rejected": -2.023202896118164, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 7.066666666666666, | |
| "grad_norm": 64.5, | |
| "kl": 25.04488754272461, | |
| "learning_rate": 2.9466666666666666e-07, | |
| "logits/chosen": 1731994419.2, | |
| "logits/rejected": 1720656691.2, | |
| "logps/chosen": -1637.28173828125, | |
| "logps/rejected": -1377.9115234375, | |
| "loss": 0.3763, | |
| "rewards/chosen": 5.003516006469726, | |
| "rewards/margins": 6.7519731521606445, | |
| "rewards/rejected": -1.748457145690918, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 56.0, | |
| "kl": 52.43037033081055, | |
| "learning_rate": 2.813333333333333e-07, | |
| "logits/chosen": 1726875648.0, | |
| "logps/chosen": -1521.06044921875, | |
| "loss": 0.4829, | |
| "rewards/chosen": 5.305931854248047, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 7.333333333333333, | |
| "grad_norm": 50.0, | |
| "kl": 51.872047424316406, | |
| "learning_rate": 2.68e-07, | |
| "logits/chosen": 1734085222.4, | |
| "logps/chosen": -1607.64345703125, | |
| "loss": 0.4712, | |
| "rewards/chosen": 5.294354629516602, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.466666666666667, | |
| "grad_norm": 51.0, | |
| "kl": 55.77549362182617, | |
| "learning_rate": 2.546666666666666e-07, | |
| "logits/chosen": 1753063219.2, | |
| "logps/chosen": -1617.415625, | |
| "loss": 0.4773, | |
| "rewards/chosen": 5.702725982666015, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 43.75, | |
| "kl": 18.287708282470703, | |
| "learning_rate": 2.413333333333333e-07, | |
| "logits/chosen": 1731032073.5700934, | |
| "logits/rejected": 1702629587.5305164, | |
| "logps/chosen": -1447.9690420560748, | |
| "logps/rejected": -1521.8135269953052, | |
| "loss": 0.3238, | |
| "rewards/chosen": 5.521567086193049, | |
| "rewards/margins": 7.057752337348419, | |
| "rewards/rejected": -1.5361852511553697, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 7.733333333333333, | |
| "grad_norm": 41.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.28e-07, | |
| "logits/rejected": 1697045913.6, | |
| "logps/rejected": -1615.4837890625, | |
| "loss": 0.2158, | |
| "rewards/rejected": -1.9997014999389648, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 7.866666666666667, | |
| "grad_norm": 38.75, | |
| "kl": 0.0, | |
| "learning_rate": 2.1466666666666666e-07, | |
| "logits/rejected": 1732887756.8, | |
| "logps/rejected": -1627.6578125, | |
| "loss": 0.206, | |
| "rewards/rejected": -2.029564094543457, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 43.25, | |
| "kl": 0.0, | |
| "learning_rate": 2.0133333333333334e-07, | |
| "logits/rejected": 1715685171.2, | |
| "logps/rejected": -1501.2095703125, | |
| "loss": 0.241, | |
| "rewards/rejected": -1.795237922668457, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.133333333333333, | |
| "grad_norm": 55.0, | |
| "kl": 51.67559051513672, | |
| "learning_rate": 1.88e-07, | |
| "logits/chosen": 1727099904.0, | |
| "logps/chosen": -1532.07939453125, | |
| "loss": 0.4961, | |
| "rewards/chosen": 5.208480453491211, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 8.266666666666667, | |
| "grad_norm": 64.5, | |
| "kl": 54.74528884887695, | |
| "learning_rate": 1.7466666666666667e-07, | |
| "logits/chosen": 1733115904.0, | |
| "logps/chosen": -1624.519921875, | |
| "loss": 0.5013, | |
| "rewards/chosen": 5.472665786743164, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 54.75, | |
| "kl": 54.551849365234375, | |
| "learning_rate": 1.6133333333333332e-07, | |
| "logits/chosen": 1742884659.2, | |
| "logps/chosen": -1564.97412109375, | |
| "loss": 0.4915, | |
| "rewards/chosen": 5.482054138183594, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 8.533333333333333, | |
| "grad_norm": 30.625, | |
| "kl": 46.306522369384766, | |
| "learning_rate": 1.4799999999999998e-07, | |
| "logits/chosen": 1739496766.3220973, | |
| "logits/rejected": 1702134687.3962264, | |
| "logps/chosen": -1588.0999531835207, | |
| "logps/rejected": -1696.4740566037735, | |
| "loss": 0.4184, | |
| "rewards/chosen": 5.652020515127575, | |
| "rewards/margins": 7.627730826153518, | |
| "rewards/rejected": -1.9757103110259433, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 8.666666666666666, | |
| "grad_norm": 56.5, | |
| "kl": 0.0, | |
| "learning_rate": 1.3466666666666665e-07, | |
| "logits/rejected": 1696173260.8, | |
| "logps/rejected": -1544.7572265625, | |
| "loss": 0.2282, | |
| "rewards/rejected": -1.9369186401367187, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 36.75, | |
| "kl": 0.0, | |
| "learning_rate": 1.2133333333333333e-07, | |
| "logits/rejected": 1725317120.0, | |
| "logps/rejected": -1612.53291015625, | |
| "loss": 0.2132, | |
| "rewards/rejected": -1.9934148788452148, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 8.933333333333334, | |
| "grad_norm": 43.25, | |
| "kl": 0.0, | |
| "learning_rate": 1.0799999999999999e-07, | |
| "logits/rejected": 1711136563.2, | |
| "logps/rejected": -1633.72958984375, | |
| "loss": 0.2084, | |
| "rewards/rejected": -2.079827880859375, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 9.066666666666666, | |
| "grad_norm": 74.5, | |
| "kl": 26.150564193725586, | |
| "learning_rate": 9.466666666666665e-08, | |
| "logits/chosen": 1734777036.8, | |
| "logits/rejected": 1720304844.8, | |
| "logps/chosen": -1634.3982421875, | |
| "logps/rejected": -1376.32314453125, | |
| "loss": 0.3847, | |
| "rewards/chosen": 5.2918556213378904, | |
| "rewards/margins": 6.881484413146973, | |
| "rewards/rejected": -1.5896287918090821, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 45.0, | |
| "kl": 54.32807540893555, | |
| "learning_rate": 8.133333333333332e-08, | |
| "logits/chosen": 1726023270.4, | |
| "logps/chosen": -1519.271875, | |
| "loss": 0.4882, | |
| "rewards/chosen": 5.484774017333985, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 9.333333333333334, | |
| "grad_norm": 51.75, | |
| "kl": 53.640716552734375, | |
| "learning_rate": 6.8e-08, | |
| "logits/chosen": 1733355929.6, | |
| "logps/chosen": -1606.2349609375, | |
| "loss": 0.4805, | |
| "rewards/chosen": 5.435222625732422, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 9.466666666666667, | |
| "grad_norm": 46.75, | |
| "kl": 56.87353515625, | |
| "learning_rate": 5.4666666666666666e-08, | |
| "logits/chosen": 1751316684.8, | |
| "logps/chosen": -1617.39033203125, | |
| "loss": 0.4885, | |
| "rewards/chosen": 5.705249786376953, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 38.25, | |
| "kl": 17.8724308013916, | |
| "learning_rate": 4.133333333333333e-08, | |
| "logits/chosen": 1729312212.9345794, | |
| "logits/rejected": 1699881844.5821595, | |
| "logps/chosen": -1447.3971962616822, | |
| "logps/rejected": -1524.6291079812206, | |
| "loss": 0.2937, | |
| "rewards/chosen": 5.578735921984521, | |
| "rewards/margins": 7.396489386947476, | |
| "rewards/rejected": -1.8177534649629548, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 9.733333333333333, | |
| "grad_norm": 30.375, | |
| "kl": 0.0, | |
| "learning_rate": 2.8e-08, | |
| "logits/rejected": 1695596134.4, | |
| "logps/rejected": -1616.91201171875, | |
| "loss": 0.2028, | |
| "rewards/rejected": -2.142536735534668, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 9.866666666666667, | |
| "grad_norm": 42.5, | |
| "kl": 0.0, | |
| "learning_rate": 1.4666666666666666e-08, | |
| "logits/rejected": 1732339097.6, | |
| "logps/rejected": -1627.45458984375, | |
| "loss": 0.211, | |
| "rewards/rejected": -2.0092498779296877, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 42.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3333333333333333e-09, | |
| "logits/rejected": 1714343731.2, | |
| "logps/rejected": -1500.21591796875, | |
| "loss": 0.2487, | |
| "rewards/rejected": -1.6958515167236328, | |
| "step": 750 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |