| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9850746268656716, |
| "eval_steps": 100, |
| "global_step": 132, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.3593978881836, |
| "epoch": 0.014925373134328358, |
| "grad_norm": 0.40167078375816345, |
| "learning_rate": 7.142857142857142e-08, |
| "loss": 0.0181, |
| "num_tokens": 614922.0, |
| "reward": 0.28459822572767735, |
| "reward_std": 0.3557117339223623, |
| "rewards/accuracy_reward": 0.2845982164144516, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 501.1183319091797, |
| "epoch": 0.029850746268656716, |
| "grad_norm": 0.35646918416023254, |
| "learning_rate": 1.4285714285714285e-07, |
| "loss": 0.0126, |
| "num_tokens": 1210628.0, |
| "reward": 0.29017858393490314, |
| "reward_std": 0.3156624883413315, |
| "rewards/accuracy_reward": 0.29017857648432255, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 512.6149787902832, |
| "epoch": 0.04477611940298507, |
| "grad_norm": 0.5019609928131104, |
| "learning_rate": 2.1428571428571426e-07, |
| "loss": 0.0133, |
| "num_tokens": 1832067.0, |
| "reward": 0.2935268022119999, |
| "reward_std": 0.3709743171930313, |
| "rewards/accuracy_reward": 0.29352678544819355, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 546.1875190734863, |
| "epoch": 0.05970149253731343, |
| "grad_norm": 0.3622360825538635, |
| "learning_rate": 2.857142857142857e-07, |
| "loss": 0.0023, |
| "num_tokens": 2466195.0, |
| "reward": 0.25334822200238705, |
| "reward_std": 0.3327263258397579, |
| "rewards/accuracy_reward": 0.25334821455180645, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.721004486084, |
| "epoch": 0.07462686567164178, |
| "grad_norm": 0.43017202615737915, |
| "learning_rate": 3.5714285714285716e-07, |
| "loss": 0.0169, |
| "num_tokens": 3092665.0, |
| "reward": 0.28348215855658054, |
| "reward_std": 0.3276054132729769, |
| "rewards/accuracy_reward": 0.2834821371361613, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 481.4542579650879, |
| "epoch": 0.08955223880597014, |
| "grad_norm": 0.4641503393650055, |
| "learning_rate": 4.285714285714285e-07, |
| "loss": -0.0264, |
| "num_tokens": 3663768.0, |
| "reward": 0.2912946604192257, |
| "reward_std": 0.3355025686323643, |
| "rewards/accuracy_reward": 0.29129463620483875, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 502.98106384277344, |
| "epoch": 0.1044776119402985, |
| "grad_norm": 0.43246936798095703, |
| "learning_rate": 5e-07, |
| "loss": 0.0148, |
| "num_tokens": 4258719.0, |
| "reward": 0.30133930034935474, |
| "reward_std": 0.34083990193903446, |
| "rewards/accuracy_reward": 0.3013392835855484, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 494.30359649658203, |
| "epoch": 0.11940298507462686, |
| "grad_norm": 0.47649499773979187, |
| "learning_rate": 5.714285714285714e-07, |
| "loss": 0.0064, |
| "num_tokens": 4834495.0, |
| "reward": 0.3147321566939354, |
| "reward_std": 0.36961813643574715, |
| "rewards/accuracy_reward": 0.31473213993012905, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 496.06921768188477, |
| "epoch": 0.13432835820895522, |
| "grad_norm": 0.3420712947845459, |
| "learning_rate": 6.428571428571429e-07, |
| "loss": -0.0137, |
| "num_tokens": 5419541.0, |
| "reward": 0.3325892984867096, |
| "reward_std": 0.3436175622045994, |
| "rewards/accuracy_reward": 0.3325892873108387, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 494.09265518188477, |
| "epoch": 0.14925373134328357, |
| "grad_norm": 0.5828936696052551, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 0.0259, |
| "num_tokens": 6002120.0, |
| "reward": 0.3482143022119999, |
| "reward_std": 0.3884076401591301, |
| "rewards/accuracy_reward": 0.34821428172290325, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.0357360839844, |
| "epoch": 0.16417910447761194, |
| "grad_norm": 0.5086678266525269, |
| "learning_rate": 7.857142857142856e-07, |
| "loss": -0.0038, |
| "num_tokens": 6629488.0, |
| "reward": 0.3716518059372902, |
| "reward_std": 0.3423391878604889, |
| "rewards/accuracy_reward": 0.37165178544819355, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.3069458007812, |
| "epoch": 0.1791044776119403, |
| "grad_norm": 0.34488609433174133, |
| "learning_rate": 8.57142857142857e-07, |
| "loss": 0.014, |
| "num_tokens": 7248163.0, |
| "reward": 0.3917410857975483, |
| "reward_std": 0.3631545342504978, |
| "rewards/accuracy_reward": 0.3917410708963871, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 574.1094093322754, |
| "epoch": 0.19402985074626866, |
| "grad_norm": 0.5286847949028015, |
| "learning_rate": 9.285714285714285e-07, |
| "loss": -0.0135, |
| "num_tokens": 7900981.0, |
| "reward": 0.4866071715950966, |
| "reward_std": 0.35308703035116196, |
| "rewards/accuracy_reward": 0.4866071417927742, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.4152069091797, |
| "epoch": 0.208955223880597, |
| "grad_norm": 0.5608493685722351, |
| "learning_rate": 1e-06, |
| "loss": 0.049, |
| "num_tokens": 8496521.0, |
| "reward": 0.5814732350409031, |
| "reward_std": 0.354553097859025, |
| "rewards/accuracy_reward": 0.5814732164144516, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 506.89064025878906, |
| "epoch": 0.22388059701492538, |
| "grad_norm": 0.415044903755188, |
| "learning_rate": 9.998286624877785e-07, |
| "loss": -0.0023, |
| "num_tokens": 9080431.0, |
| "reward": 0.5881696678698063, |
| "reward_std": 0.33997591957449913, |
| "rewards/accuracy_reward": 0.5881696380674839, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 564.6194458007812, |
| "epoch": 0.23880597014925373, |
| "grad_norm": 0.4049900472164154, |
| "learning_rate": 9.99314767377287e-07, |
| "loss": 0.0035, |
| "num_tokens": 9724930.0, |
| "reward": 0.616071455180645, |
| "reward_std": 0.30958189629018307, |
| "rewards/accuracy_reward": 0.6160714253783226, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 554.748908996582, |
| "epoch": 0.2537313432835821, |
| "grad_norm": 0.2018774151802063, |
| "learning_rate": 9.98458666866564e-07, |
| "loss": 0.0057, |
| "num_tokens": 10386817.0, |
| "reward": 0.6763393059372902, |
| "reward_std": 0.2512755785137415, |
| "rewards/accuracy_reward": 0.676339291036129, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.0759201049805, |
| "epoch": 0.26865671641791045, |
| "grad_norm": 0.21940495073795319, |
| "learning_rate": 9.972609476841365e-07, |
| "loss": 0.02, |
| "num_tokens": 11070389.0, |
| "reward": 0.6908482387661934, |
| "reward_std": 0.2618648335337639, |
| "rewards/accuracy_reward": 0.6908482164144516, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 615.6250267028809, |
| "epoch": 0.2835820895522388, |
| "grad_norm": 0.22275717556476593, |
| "learning_rate": 9.957224306869053e-07, |
| "loss": 0.0155, |
| "num_tokens": 11762013.0, |
| "reward": 0.7031250298023224, |
| "reward_std": 0.21245348826050758, |
| "rewards/accuracy_reward": 0.7031249925494194, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 590.3951225280762, |
| "epoch": 0.29850746268656714, |
| "grad_norm": 0.17150121927261353, |
| "learning_rate": 9.938441702975689e-07, |
| "loss": 0.0286, |
| "num_tokens": 12433383.0, |
| "reward": 0.768973246216774, |
| "reward_std": 0.20385023020207882, |
| "rewards/accuracy_reward": 0.768973208963871, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 662.1127624511719, |
| "epoch": 0.31343283582089554, |
| "grad_norm": 0.13017700612545013, |
| "learning_rate": 9.916274537819773e-07, |
| "loss": 0.0282, |
| "num_tokens": 13166340.0, |
| "reward": 0.709821455180645, |
| "reward_std": 0.18888892605900764, |
| "rewards/accuracy_reward": 0.7098214253783226, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 582.2143135070801, |
| "epoch": 0.3283582089552239, |
| "grad_norm": 0.161940336227417, |
| "learning_rate": 9.890738003669027e-07, |
| "loss": 0.0415, |
| "num_tokens": 13832700.0, |
| "reward": 0.7511161044239998, |
| "reward_std": 0.23555724322795868, |
| "rewards/accuracy_reward": 0.7511160671710968, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 577.9185562133789, |
| "epoch": 0.34328358208955223, |
| "grad_norm": 0.30369359254837036, |
| "learning_rate": 9.861849601988383e-07, |
| "loss": 0.0423, |
| "num_tokens": 14496323.0, |
| "reward": 0.7354910969734192, |
| "reward_std": 0.15988358668982983, |
| "rewards/accuracy_reward": 0.7354910746216774, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 671.771240234375, |
| "epoch": 0.3582089552238806, |
| "grad_norm": 0.2169778198003769, |
| "learning_rate": 9.82962913144534e-07, |
| "loss": 0.0421, |
| "num_tokens": 15249582.0, |
| "reward": 0.6964286044239998, |
| "reward_std": 0.21053165383636951, |
| "rewards/accuracy_reward": 0.6964285597205162, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 625.8772583007812, |
| "epoch": 0.373134328358209, |
| "grad_norm": 0.20843251049518585, |
| "learning_rate": 9.794098674340966e-07, |
| "loss": 0.0417, |
| "num_tokens": 15940760.0, |
| "reward": 0.800223246216774, |
| "reward_std": 0.14455711469054222, |
| "rewards/accuracy_reward": 0.800223208963871, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 673.678596496582, |
| "epoch": 0.3880597014925373, |
| "grad_norm": 0.14218851923942566, |
| "learning_rate": 9.755282581475767e-07, |
| "loss": 0.0623, |
| "num_tokens": 16697072.0, |
| "reward": 0.7087053805589676, |
| "reward_std": 0.1666869930922985, |
| "rewards/accuracy_reward": 0.7087053582072258, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 670.2087326049805, |
| "epoch": 0.40298507462686567, |
| "grad_norm": 0.14040139317512512, |
| "learning_rate": 9.713207455460892e-07, |
| "loss": 0.0681, |
| "num_tokens": 17439499.0, |
| "reward": 0.675223246216774, |
| "reward_std": 0.20043206959962845, |
| "rewards/accuracy_reward": 0.675223208963871, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 620.5960083007812, |
| "epoch": 0.417910447761194, |
| "grad_norm": 0.17488999664783478, |
| "learning_rate": 9.667902132486008e-07, |
| "loss": 0.0639, |
| "num_tokens": 18130177.0, |
| "reward": 0.7008928880095482, |
| "reward_std": 0.20606223493814468, |
| "rewards/accuracy_reward": 0.7008928656578064, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 701.1161117553711, |
| "epoch": 0.43283582089552236, |
| "grad_norm": 0.1295379400253296, |
| "learning_rate": 9.619397662556433e-07, |
| "loss": 0.037, |
| "num_tokens": 18912849.0, |
| "reward": 0.6618303880095482, |
| "reward_std": 0.2049681916832924, |
| "rewards/accuracy_reward": 0.6618303582072258, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 666.1741409301758, |
| "epoch": 0.44776119402985076, |
| "grad_norm": 0.12175110727548599, |
| "learning_rate": 9.567727288213004e-07, |
| "loss": 0.0543, |
| "num_tokens": 19647693.0, |
| "reward": 0.7388393208384514, |
| "reward_std": 0.2054214347153902, |
| "rewards/accuracy_reward": 0.738839291036129, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 657.982177734375, |
| "epoch": 0.4626865671641791, |
| "grad_norm": 0.11877922713756561, |
| "learning_rate": 9.512926421749303e-07, |
| "loss": 0.0172, |
| "num_tokens": 20376269.0, |
| "reward": 0.7031250298023224, |
| "reward_std": 0.15015378780663013, |
| "rewards/accuracy_reward": 0.7031249925494194, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 621.2243576049805, |
| "epoch": 0.47761194029850745, |
| "grad_norm": 0.2129260003566742, |
| "learning_rate": 9.455032620941839e-07, |
| "loss": 0.0567, |
| "num_tokens": 21076854.0, |
| "reward": 0.7354911044239998, |
| "reward_std": 0.17187250033020973, |
| "rewards/accuracy_reward": 0.7354910746216774, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 687.317008972168, |
| "epoch": 0.4925373134328358, |
| "grad_norm": 0.1212068498134613, |
| "learning_rate": 9.394085563309826e-07, |
| "loss": 0.0524, |
| "num_tokens": 21833434.0, |
| "reward": 0.6997768133878708, |
| "reward_std": 0.17784573789685965, |
| "rewards/accuracy_reward": 0.699776791036129, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 683.4386520385742, |
| "epoch": 0.5074626865671642, |
| "grad_norm": 0.1783527433872223, |
| "learning_rate": 9.330127018922193e-07, |
| "loss": 0.034, |
| "num_tokens": 22591323.0, |
| "reward": 0.7120535895228386, |
| "reward_std": 0.1899106204509735, |
| "rewards/accuracy_reward": 0.712053582072258, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 648.4263763427734, |
| "epoch": 0.5223880597014925, |
| "grad_norm": 0.2292163074016571, |
| "learning_rate": 9.26320082177046e-07, |
| "loss": 0.0355, |
| "num_tokens": 23327401.0, |
| "reward": 0.7366071790456772, |
| "reward_std": 0.1552294003777206, |
| "rewards/accuracy_reward": 0.7366071492433548, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 651.7165374755859, |
| "epoch": 0.5373134328358209, |
| "grad_norm": 0.24602435529232025, |
| "learning_rate": 9.19335283972712e-07, |
| "loss": 0.0426, |
| "num_tokens": 24070107.0, |
| "reward": 0.7287946790456772, |
| "reward_std": 0.18044043332338333, |
| "rewards/accuracy_reward": 0.7287946417927742, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 616.3973541259766, |
| "epoch": 0.5522388059701493, |
| "grad_norm": 0.24687685072422028, |
| "learning_rate": 9.120630943110077e-07, |
| "loss": 0.0284, |
| "num_tokens": 24765055.0, |
| "reward": 0.7399553954601288, |
| "reward_std": 0.15244218427687883, |
| "rewards/accuracy_reward": 0.7399553582072258, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 639.3995895385742, |
| "epoch": 0.5671641791044776, |
| "grad_norm": 0.42036116123199463, |
| "learning_rate": 9.045084971874737e-07, |
| "loss": 0.0429, |
| "num_tokens": 25498973.0, |
| "reward": 0.7455357536673546, |
| "reward_std": 0.1501858728006482, |
| "rewards/accuracy_reward": 0.745535708963871, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 626.0937843322754, |
| "epoch": 0.582089552238806, |
| "grad_norm": 0.18518702685832977, |
| "learning_rate": 8.966766701456176e-07, |
| "loss": 0.0309, |
| "num_tokens": 26201849.0, |
| "reward": 0.6930803842842579, |
| "reward_std": 0.1755145499482751, |
| "rewards/accuracy_reward": 0.6930803470313549, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 725.0134201049805, |
| "epoch": 0.5970149253731343, |
| "grad_norm": 0.11620926111936569, |
| "learning_rate": 8.885729807284854e-07, |
| "loss": 0.0474, |
| "num_tokens": 26990389.0, |
| "reward": 0.7142857536673546, |
| "reward_std": 0.18479816243052483, |
| "rewards/accuracy_reward": 0.7142857164144516, |
| "step": 40 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 641.0569534301758, |
| "epoch": 0.6119402985074627, |
| "grad_norm": 0.17211312055587769, |
| "learning_rate": 8.802029828000155e-07, |
| "loss": 0.0455, |
| "num_tokens": 27720616.0, |
| "reward": 0.6741071715950966, |
| "reward_std": 0.2003892920911312, |
| "rewards/accuracy_reward": 0.6741071417927742, |
| "step": 41 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 604.3359603881836, |
| "epoch": 0.6268656716417911, |
| "grad_norm": 0.15277408063411713, |
| "learning_rate": 8.71572412738697e-07, |
| "loss": 0.0497, |
| "num_tokens": 28399813.0, |
| "reward": 0.8292411118745804, |
| "reward_std": 0.13516291230916977, |
| "rewards/accuracy_reward": 0.8292410746216774, |
| "step": 42 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 622.8638687133789, |
| "epoch": 0.6417910447761194, |
| "grad_norm": 0.12167245894670486, |
| "learning_rate": 8.626871855061437e-07, |
| "loss": 0.0265, |
| "num_tokens": 29108747.0, |
| "reward": 0.7633928954601288, |
| "reward_std": 0.16634858772158623, |
| "rewards/accuracy_reward": 0.7633928582072258, |
| "step": 43 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 607.7199020385742, |
| "epoch": 0.6567164179104478, |
| "grad_norm": 0.11686498671770096, |
| "learning_rate": 8.535533905932737e-07, |
| "loss": 0.0335, |
| "num_tokens": 29795408.0, |
| "reward": 0.7488839626312256, |
| "reward_std": 0.15169290080666542, |
| "rewards/accuracy_reward": 0.7488839328289032, |
| "step": 44 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 619.7567291259766, |
| "epoch": 0.6716417910447762, |
| "grad_norm": 0.09357903152704239, |
| "learning_rate": 8.441772878468769e-07, |
| "loss": 0.0355, |
| "num_tokens": 30492150.0, |
| "reward": 0.7790178954601288, |
| "reward_std": 0.1362890424206853, |
| "rewards/accuracy_reward": 0.7790178507566452, |
| "step": 45 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 701.7835083007812, |
| "epoch": 0.6865671641791045, |
| "grad_norm": 0.1133616715669632, |
| "learning_rate": 8.34565303179429e-07, |
| "loss": 0.0535, |
| "num_tokens": 31269972.0, |
| "reward": 0.6875000298023224, |
| "reward_std": 0.1872752346098423, |
| "rewards/accuracy_reward": 0.6875, |
| "step": 46 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 640.8549346923828, |
| "epoch": 0.7014925373134329, |
| "grad_norm": 0.08914685249328613, |
| "learning_rate": 8.247240241650917e-07, |
| "loss": 0.0132, |
| "num_tokens": 31975466.0, |
| "reward": 0.7589286118745804, |
| "reward_std": 0.12584246136248112, |
| "rewards/accuracy_reward": 0.7589285671710968, |
| "step": 47 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 637.5033836364746, |
| "epoch": 0.7164179104477612, |
| "grad_norm": 0.10743953287601471, |
| "learning_rate": 8.146601955249187e-07, |
| "loss": 0.0135, |
| "num_tokens": 32688717.0, |
| "reward": 0.7801339626312256, |
| "reward_std": 0.14560949243605137, |
| "rewards/accuracy_reward": 0.7801339253783226, |
| "step": 48 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 655.8404159545898, |
| "epoch": 0.7313432835820896, |
| "grad_norm": 0.11401466280221939, |
| "learning_rate": 8.043807145043603e-07, |
| "loss": 0.0236, |
| "num_tokens": 33427982.0, |
| "reward": 0.7265625223517418, |
| "reward_std": 0.1881414633244276, |
| "rewards/accuracy_reward": 0.7265625, |
| "step": 49 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 619.4062728881836, |
| "epoch": 0.746268656716418, |
| "grad_norm": 0.16785398125648499, |
| "learning_rate": 7.938926261462365e-07, |
| "loss": 0.0325, |
| "num_tokens": 34136354.0, |
| "reward": 0.6930803954601288, |
| "reward_std": 0.16814983636140823, |
| "rewards/accuracy_reward": 0.6930803582072258, |
| "step": 50 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 603.1082992553711, |
| "epoch": 0.7611940298507462, |
| "grad_norm": 0.09643765538930893, |
| "learning_rate": 7.832031184624164e-07, |
| "loss": 0.0244, |
| "num_tokens": 34829755.0, |
| "reward": 0.7366071715950966, |
| "reward_std": 0.15785480476915836, |
| "rewards/accuracy_reward": 0.7366071492433548, |
| "step": 51 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 617.9788246154785, |
| "epoch": 0.7761194029850746, |
| "grad_norm": 0.1667710691690445, |
| "learning_rate": 7.723195175075135e-07, |
| "loss": 0.0208, |
| "num_tokens": 35527344.0, |
| "reward": 0.7198661044239998, |
| "reward_std": 0.11693863570690155, |
| "rewards/accuracy_reward": 0.7198660671710968, |
| "step": 52 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 620.7823944091797, |
| "epoch": 0.7910447761194029, |
| "grad_norm": 0.18700341880321503, |
| "learning_rate": 7.612492823579744e-07, |
| "loss": 0.0049, |
| "num_tokens": 36226469.0, |
| "reward": 0.7020089626312256, |
| "reward_std": 0.16777005605399609, |
| "rewards/accuracy_reward": 0.7020089328289032, |
| "step": 53 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 591.4152069091797, |
| "epoch": 0.8059701492537313, |
| "grad_norm": 0.10829013586044312, |
| "learning_rate": 7.5e-07, |
| "loss": 0.0084, |
| "num_tokens": 36906441.0, |
| "reward": 0.7477678880095482, |
| "reward_std": 0.14278795383870602, |
| "rewards/accuracy_reward": 0.7477678507566452, |
| "step": 54 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 602.3995742797852, |
| "epoch": 0.8208955223880597, |
| "grad_norm": 0.1018439456820488, |
| "learning_rate": 7.385793801298042e-07, |
| "loss": 0.0166, |
| "num_tokens": 37600543.0, |
| "reward": 0.7566964626312256, |
| "reward_std": 0.1503392457962036, |
| "rewards/accuracy_reward": 0.756696417927742, |
| "step": 55 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 617.087085723877, |
| "epoch": 0.835820895522388, |
| "grad_norm": 0.1039031594991684, |
| "learning_rate": 7.269952498697734e-07, |
| "loss": 0.0266, |
| "num_tokens": 38299741.0, |
| "reward": 0.7165178880095482, |
| "reward_std": 0.17983242496848106, |
| "rewards/accuracy_reward": 0.7165178582072258, |
| "step": 56 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 588.6105194091797, |
| "epoch": 0.8507462686567164, |
| "grad_norm": 0.09185818582773209, |
| "learning_rate": 7.152555484041475e-07, |
| "loss": 0.0155, |
| "num_tokens": 38965368.0, |
| "reward": 0.7834821790456772, |
| "reward_std": 0.13771093636751175, |
| "rewards/accuracy_reward": 0.7834821343421936, |
| "step": 57 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 611.9263687133789, |
| "epoch": 0.8656716417910447, |
| "grad_norm": 0.1022716760635376, |
| "learning_rate": 7.033683215379002e-07, |
| "loss": 0.0119, |
| "num_tokens": 39654854.0, |
| "reward": 0.7209821715950966, |
| "reward_std": 0.13534655049443245, |
| "rewards/accuracy_reward": 0.7209821492433548, |
| "step": 58 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 615.6919937133789, |
| "epoch": 0.8805970149253731, |
| "grad_norm": 0.1027907207608223, |
| "learning_rate": 6.913417161825449e-07, |
| "loss": 0.0009, |
| "num_tokens": 40347154.0, |
| "reward": 0.7879464626312256, |
| "reward_std": 0.1539812944829464, |
| "rewards/accuracy_reward": 0.7879464328289032, |
| "step": 59 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 579.677490234375, |
| "epoch": 0.8955223880597015, |
| "grad_norm": 0.09836462140083313, |
| "learning_rate": 6.7918397477265e-07, |
| "loss": 0.0268, |
| "num_tokens": 41016673.0, |
| "reward": 0.7477678805589676, |
| "reward_std": 0.1540593858808279, |
| "rewards/accuracy_reward": 0.7477678656578064, |
| "step": 60 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 636.4364013671875, |
| "epoch": 0.9104477611940298, |
| "grad_norm": 0.0979558676481247, |
| "learning_rate": 6.669034296168854e-07, |
| "loss": 0.0155, |
| "num_tokens": 41744328.0, |
| "reward": 0.734375037252903, |
| "reward_std": 0.13192950701341033, |
| "rewards/accuracy_reward": 0.7343749925494194, |
| "step": 61 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 593.4297180175781, |
| "epoch": 0.9253731343283582, |
| "grad_norm": 0.1013529971241951, |
| "learning_rate": 6.545084971874736e-07, |
| "loss": 0.0192, |
| "num_tokens": 42416689.0, |
| "reward": 0.8013393208384514, |
| "reward_std": 0.15724862087517977, |
| "rewards/accuracy_reward": 0.801339291036129, |
| "step": 62 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 653.9620895385742, |
| "epoch": 0.9402985074626866, |
| "grad_norm": 0.10375187546014786, |
| "learning_rate": 6.420076723519614e-07, |
| "loss": 0.0264, |
| "num_tokens": 43147383.0, |
| "reward": 0.7466518208384514, |
| "reward_std": 0.19015955366194248, |
| "rewards/accuracy_reward": 0.7466517835855484, |
| "step": 63 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 568.0390853881836, |
| "epoch": 0.9552238805970149, |
| "grad_norm": 0.10790595412254333, |
| "learning_rate": 6.294095225512604e-07, |
| "loss": 0.0073, |
| "num_tokens": 43808258.0, |
| "reward": 0.7767857536673546, |
| "reward_std": 0.15112948138266802, |
| "rewards/accuracy_reward": 0.776785708963871, |
| "step": 64 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 572.7187805175781, |
| "epoch": 0.9701492537313433, |
| "grad_norm": 0.10618384927511215, |
| "learning_rate": 6.167226819279527e-07, |
| "loss": 0.0079, |
| "num_tokens": 44469838.0, |
| "reward": 0.7399553954601288, |
| "reward_std": 0.15288866125047207, |
| "rewards/accuracy_reward": 0.7399553507566452, |
| "step": 65 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 647.2789344787598, |
| "epoch": 0.9850746268656716, |
| "grad_norm": 0.09327281266450882, |
| "learning_rate": 6.039558454088795e-07, |
| "loss": 0.016, |
| "num_tokens": 45187944.0, |
| "reward": 0.7433036044239998, |
| "reward_std": 0.1536117848008871, |
| "rewards/accuracy_reward": 0.7433035746216774, |
| "step": 66 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.9609642028809, |
| "epoch": 1.0149253731343284, |
| "grad_norm": 0.08279585093259811, |
| "learning_rate": 5.911177627460738e-07, |
| "loss": 0.0129, |
| "num_tokens": 45854645.0, |
| "reward": 0.7912946864962578, |
| "reward_std": 0.1345644756220281, |
| "rewards/accuracy_reward": 0.7912946492433548, |
| "step": 67 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 649.5837326049805, |
| "epoch": 1.0298507462686568, |
| "grad_norm": 0.08794418722391129, |
| "learning_rate": 5.782172325201155e-07, |
| "loss": 0.0213, |
| "num_tokens": 46584856.0, |
| "reward": 0.7645089626312256, |
| "reward_std": 0.12895571067929268, |
| "rewards/accuracy_reward": 0.7645089328289032, |
| "step": 68 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 580.2131881713867, |
| "epoch": 1.044776119402985, |
| "grad_norm": 0.10208556056022644, |
| "learning_rate": 5.652630961100258e-07, |
| "loss": 0.0136, |
| "num_tokens": 47245295.0, |
| "reward": 0.7622768208384514, |
| "reward_std": 0.12223039288073778, |
| "rewards/accuracy_reward": 0.7622767835855484, |
| "step": 69 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 577.7857437133789, |
| "epoch": 1.0597014925373134, |
| "grad_norm": 0.10996542125940323, |
| "learning_rate": 5.522642316338268e-07, |
| "loss": 0.0203, |
| "num_tokens": 47905519.0, |
| "reward": 0.7935268357396126, |
| "reward_std": 0.13906850665807724, |
| "rewards/accuracy_reward": 0.7935267835855484, |
| "step": 70 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 575.6328315734863, |
| "epoch": 1.0746268656716418, |
| "grad_norm": 0.10389033704996109, |
| "learning_rate": 5.392295478639225e-07, |
| "loss": 0.0182, |
| "num_tokens": 48571510.0, |
| "reward": 0.7901786118745804, |
| "reward_std": 0.1126860505901277, |
| "rewards/accuracy_reward": 0.7901785746216774, |
| "step": 71 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 606.5357360839844, |
| "epoch": 1.0895522388059702, |
| "grad_norm": 0.09268435835838318, |
| "learning_rate": 5.26167978121472e-07, |
| "loss": 0.0128, |
| "num_tokens": 49252990.0, |
| "reward": 0.7868303954601288, |
| "reward_std": 0.14030383061617613, |
| "rewards/accuracy_reward": 0.7868303433060646, |
| "step": 72 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 605.1551513671875, |
| "epoch": 1.1044776119402986, |
| "grad_norm": 0.11817800253629684, |
| "learning_rate": 5.130884741539366e-07, |
| "loss": 0.0319, |
| "num_tokens": 49943121.0, |
| "reward": 0.6997768208384514, |
| "reward_std": 0.17450424656271935, |
| "rewards/accuracy_reward": 0.6997767873108387, |
| "step": 73 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 575.7511291503906, |
| "epoch": 1.1194029850746268, |
| "grad_norm": 0.11024966835975647, |
| "learning_rate": 5e-07, |
| "loss": 0.0089, |
| "num_tokens": 50587458.0, |
| "reward": 0.7901786118745804, |
| "reward_std": 0.15706316381692886, |
| "rewards/accuracy_reward": 0.7901785671710968, |
| "step": 74 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 588.0446662902832, |
| "epoch": 1.1343283582089552, |
| "grad_norm": 0.10334320366382599, |
| "learning_rate": 4.869115258460634e-07, |
| "loss": 0.0111, |
| "num_tokens": 51253962.0, |
| "reward": 0.7678571790456772, |
| "reward_std": 0.13136567501351237, |
| "rewards/accuracy_reward": 0.7678571492433548, |
| "step": 75 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 579.7600708007812, |
| "epoch": 1.1492537313432836, |
| "grad_norm": 0.0887996256351471, |
| "learning_rate": 4.7383202187852804e-07, |
| "loss": 0.0138, |
| "num_tokens": 51915475.0, |
| "reward": 0.8258928954601288, |
| "reward_std": 0.11524363234639168, |
| "rewards/accuracy_reward": 0.8258928582072258, |
| "step": 76 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 502.2779235839844, |
| "epoch": 1.164179104477612, |
| "grad_norm": 0.10169437527656555, |
| "learning_rate": 4.6077045213607755e-07, |
| "loss": 0.009, |
| "num_tokens": 52506180.0, |
| "reward": 0.8459821790456772, |
| "reward_std": 0.09597061527892947, |
| "rewards/accuracy_reward": 0.8459821343421936, |
| "step": 77 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 619.1116333007812, |
| "epoch": 1.1791044776119404, |
| "grad_norm": 0.0879545658826828, |
| "learning_rate": 4.477357683661733e-07, |
| "loss": 0.0187, |
| "num_tokens": 53202944.0, |
| "reward": 0.7243303954601288, |
| "reward_std": 0.14004420721903443, |
| "rewards/accuracy_reward": 0.7243303507566452, |
| "step": 78 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 620.1060485839844, |
| "epoch": 1.1940298507462686, |
| "grad_norm": 0.10439935326576233, |
| "learning_rate": 4.347369038899743e-07, |
| "loss": 0.0064, |
| "num_tokens": 53904935.0, |
| "reward": 0.7511161044239998, |
| "reward_std": 0.1467349175363779, |
| "rewards/accuracy_reward": 0.7511160634458065, |
| "step": 79 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 634.0725631713867, |
| "epoch": 1.208955223880597, |
| "grad_norm": 0.10633595287799835, |
| "learning_rate": 4.2178276747988444e-07, |
| "loss": 0.0142, |
| "num_tokens": 54614712.0, |
| "reward": 0.697544664144516, |
| "reward_std": 0.14913531299680471, |
| "rewards/accuracy_reward": 0.6975446492433548, |
| "step": 80 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 590.0245780944824, |
| "epoch": 1.2238805970149254, |
| "grad_norm": 0.09192880988121033, |
| "learning_rate": 4.0888223725392624e-07, |
| "loss": 0.0119, |
| "num_tokens": 55283518.0, |
| "reward": 0.7555803880095482, |
| "reward_std": 0.11814074404537678, |
| "rewards/accuracy_reward": 0.7555803656578064, |
| "step": 81 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 551.6027030944824, |
| "epoch": 1.2388059701492538, |
| "grad_norm": 0.09458038955926895, |
| "learning_rate": 3.960441545911204e-07, |
| "loss": 0.0167, |
| "num_tokens": 55930090.0, |
| "reward": 0.7935268208384514, |
| "reward_std": 0.15564057044684887, |
| "rewards/accuracy_reward": 0.793526791036129, |
| "step": 82 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 599.2087249755859, |
| "epoch": 1.2537313432835822, |
| "grad_norm": 0.2296370565891266, |
| "learning_rate": 3.8327731807204744e-07, |
| "loss": 0.0074, |
| "num_tokens": 56610981.0, |
| "reward": 0.7232143133878708, |
| "reward_std": 0.1519518168643117, |
| "rewards/accuracy_reward": 0.723214291036129, |
| "step": 83 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 599.6797103881836, |
| "epoch": 1.2686567164179103, |
| "grad_norm": 0.11104385554790497, |
| "learning_rate": 3.7059047744873955e-07, |
| "loss": 0.0174, |
| "num_tokens": 57302150.0, |
| "reward": 0.7466518208384514, |
| "reward_std": 0.1660846211016178, |
| "rewards/accuracy_reward": 0.746651791036129, |
| "step": 84 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.4330596923828, |
| "epoch": 1.2835820895522387, |
| "grad_norm": 0.08728226274251938, |
| "learning_rate": 3.5799232764803867e-07, |
| "loss": 0.0137, |
| "num_tokens": 57956850.0, |
| "reward": 0.8035714626312256, |
| "reward_std": 0.13602055981755257, |
| "rewards/accuracy_reward": 0.8035714328289032, |
| "step": 85 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.2500228881836, |
| "epoch": 1.2985074626865671, |
| "grad_norm": 0.07698384672403336, |
| "learning_rate": 3.454915028125263e-07, |
| "loss": 0.0094, |
| "num_tokens": 58627226.0, |
| "reward": 0.7165178880095482, |
| "reward_std": 0.10528812417760491, |
| "rewards/accuracy_reward": 0.7165178582072258, |
| "step": 86 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 612.5201187133789, |
| "epoch": 1.3134328358208955, |
| "grad_norm": 0.09053296595811844, |
| "learning_rate": 3.330965703831146e-07, |
| "loss": 0.0179, |
| "num_tokens": 59323436.0, |
| "reward": 0.7477678805589676, |
| "reward_std": 0.128175038844347, |
| "rewards/accuracy_reward": 0.7477678656578064, |
| "step": 87 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 564.0803680419922, |
| "epoch": 1.328358208955224, |
| "grad_norm": 0.1006743460893631, |
| "learning_rate": 3.2081602522734985e-07, |
| "loss": 0.0006, |
| "num_tokens": 59982468.0, |
| "reward": 0.8537946864962578, |
| "reward_std": 0.11889003310352564, |
| "rewards/accuracy_reward": 0.8537946417927742, |
| "step": 88 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 543.8102951049805, |
| "epoch": 1.3432835820895521, |
| "grad_norm": 0.10308939963579178, |
| "learning_rate": 3.086582838174551e-07, |
| "loss": 0.0275, |
| "num_tokens": 60610842.0, |
| "reward": 0.8113839700818062, |
| "reward_std": 0.13978207390755415, |
| "rewards/accuracy_reward": 0.811383917927742, |
| "step": 89 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 607.444221496582, |
| "epoch": 1.3582089552238805, |
| "grad_norm": 0.10902010649442673, |
| "learning_rate": 2.9663167846209996e-07, |
| "loss": 0.0158, |
| "num_tokens": 61303472.0, |
| "reward": 0.714285746216774, |
| "reward_std": 0.14834184758365154, |
| "rewards/accuracy_reward": 0.714285708963871, |
| "step": 90 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 543.1886367797852, |
| "epoch": 1.373134328358209, |
| "grad_norm": 0.10252662748098373, |
| "learning_rate": 2.847444515958523e-07, |
| "loss": 0.0222, |
| "num_tokens": 61937529.0, |
| "reward": 0.8281250447034836, |
| "reward_std": 0.136213474906981, |
| "rewards/accuracy_reward": 0.828125, |
| "step": 91 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 602.7846336364746, |
| "epoch": 1.3880597014925373, |
| "grad_norm": 0.08475417643785477, |
| "learning_rate": 2.730047501302266e-07, |
| "loss": 0.0157, |
| "num_tokens": 62618920.0, |
| "reward": 0.7968750298023224, |
| "reward_std": 0.12377202790230513, |
| "rewards/accuracy_reward": 0.796875, |
| "step": 92 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 605.2578353881836, |
| "epoch": 1.4029850746268657, |
| "grad_norm": 0.0900595411658287, |
| "learning_rate": 2.6142061987019574e-07, |
| "loss": 0.0148, |
| "num_tokens": 63296367.0, |
| "reward": 0.784598246216774, |
| "reward_std": 0.13316552620381117, |
| "rewards/accuracy_reward": 0.7845982238650322, |
| "step": 93 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 563.7623062133789, |
| "epoch": 1.417910447761194, |
| "grad_norm": 0.08642500638961792, |
| "learning_rate": 2.500000000000001e-07, |
| "loss": 0.0067, |
| "num_tokens": 63959314.0, |
| "reward": 0.7645089626312256, |
| "reward_std": 0.12305664969608188, |
| "rewards/accuracy_reward": 0.7645089253783226, |
| "step": 94 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 602.0781555175781, |
| "epoch": 1.4328358208955223, |
| "grad_norm": 0.10483755171298981, |
| "learning_rate": 2.387507176420256e-07, |
| "loss": 0.0038, |
| "num_tokens": 64645648.0, |
| "reward": 0.7388393133878708, |
| "reward_std": 0.12692945264279842, |
| "rewards/accuracy_reward": 0.738839291036129, |
| "step": 95 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 634.3705749511719, |
| "epoch": 1.4477611940298507, |
| "grad_norm": 0.08823440223932266, |
| "learning_rate": 2.2768048249248644e-07, |
| "loss": 0.0214, |
| "num_tokens": 65358500.0, |
| "reward": 0.7645089626312256, |
| "reward_std": 0.12505152076482773, |
| "rewards/accuracy_reward": 0.7645089253783226, |
| "step": 96 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 555.154052734375, |
| "epoch": 1.462686567164179, |
| "grad_norm": 0.11077926307916641, |
| "learning_rate": 2.167968815375837e-07, |
| "loss": 0.0077, |
| "num_tokens": 66006806.0, |
| "reward": 0.7756696715950966, |
| "reward_std": 0.12779708206653595, |
| "rewards/accuracy_reward": 0.7756696492433548, |
| "step": 97 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 632.3147735595703, |
| "epoch": 1.4776119402985075, |
| "grad_norm": 0.13761980831623077, |
| "learning_rate": 2.0610737385376348e-07, |
| "loss": 0.0282, |
| "num_tokens": 66730488.0, |
| "reward": 0.7299107536673546, |
| "reward_std": 0.15939321741461754, |
| "rewards/accuracy_reward": 0.729910708963871, |
| "step": 98 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 654.2366409301758, |
| "epoch": 1.4925373134328357, |
| "grad_norm": 0.11041553318500519, |
| "learning_rate": 1.9561928549563966e-07, |
| "loss": 0.0084, |
| "num_tokens": 67470004.0, |
| "reward": 0.670758955180645, |
| "reward_std": 0.15143328253179789, |
| "rewards/accuracy_reward": 0.6707589365541935, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.5074626865671643, |
| "grad_norm": 0.09152326732873917, |
| "learning_rate": 1.8533980447508135e-07, |
| "loss": 0.0088, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.5074626865671643, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 581.8313814301731, |
| "eval_loss": 0.016322219744324684, |
| "eval_num_tokens": 68121683.0, |
| "eval_reward": 0.7219673107123242, |
| "eval_reward_std": 0.16760184048732232, |
| "eval_rewards/accuracy_reward": 0.7219672777466268, |
| "eval_runtime": 7556.7096, |
| "eval_samples_per_second": 0.662, |
| "eval_steps_per_second": 0.006, |
| "step": 100 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 590.9570579528809, |
| "epoch": 1.5223880597014925, |
| "grad_norm": 0.09221798926591873, |
| "learning_rate": 1.7527597583490823e-07, |
| "loss": 0.0013, |
| "num_tokens": 68805103.0, |
| "reward": 0.7901786044239998, |
| "reward_std": 0.1202246134635061, |
| "rewards/accuracy_reward": 0.7901785746216774, |
| "step": 101 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 568.4319305419922, |
| "epoch": 1.537313432835821, |
| "grad_norm": 0.08379292488098145, |
| "learning_rate": 1.6543469682057104e-07, |
| "loss": 0.0114, |
| "num_tokens": 69465698.0, |
| "reward": 0.7633928954601288, |
| "reward_std": 0.10716536361724138, |
| "rewards/accuracy_reward": 0.7633928582072258, |
| "step": 102 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 626.8917694091797, |
| "epoch": 1.5522388059701493, |
| "grad_norm": 0.09183470904827118, |
| "learning_rate": 1.5582271215312293e-07, |
| "loss": 0.0145, |
| "num_tokens": 70169185.0, |
| "reward": 0.753348246216774, |
| "reward_std": 0.12400025688111782, |
| "rewards/accuracy_reward": 0.7533482164144516, |
| "step": 103 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 561.1886329650879, |
| "epoch": 1.5671641791044775, |
| "grad_norm": 0.11465712636709213, |
| "learning_rate": 1.4644660940672627e-07, |
| "loss": 0.0233, |
| "num_tokens": 70823314.0, |
| "reward": 0.7723214700818062, |
| "reward_std": 0.14902723766863346, |
| "rewards/accuracy_reward": 0.7723214328289032, |
| "step": 104 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 595.260066986084, |
| "epoch": 1.582089552238806, |
| "grad_norm": 0.10910345613956451, |
| "learning_rate": 1.3731281449385628e-07, |
| "loss": 0.0132, |
| "num_tokens": 71509203.0, |
| "reward": 0.7834821790456772, |
| "reward_std": 0.13775580935180187, |
| "rewards/accuracy_reward": 0.7834821417927742, |
| "step": 105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 622.6629753112793, |
| "epoch": 1.5970149253731343, |
| "grad_norm": 0.10859620571136475, |
| "learning_rate": 1.284275872613028e-07, |
| "loss": -0.0033, |
| "num_tokens": 72213229.0, |
| "reward": 0.7332589700818062, |
| "reward_std": 0.13835103251039982, |
| "rewards/accuracy_reward": 0.7332589253783226, |
| "step": 106 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 647.8147659301758, |
| "epoch": 1.6119402985074627, |
| "grad_norm": 0.12190267443656921, |
| "learning_rate": 1.1979701719998454e-07, |
| "loss": 0.0389, |
| "num_tokens": 72943663.0, |
| "reward": 0.7343750298023224, |
| "reward_std": 0.20019244402647018, |
| "rewards/accuracy_reward": 0.7343749925494194, |
| "step": 107 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 584.2020416259766, |
| "epoch": 1.626865671641791, |
| "grad_norm": 0.09703335165977478, |
| "learning_rate": 1.1142701927151454e-07, |
| "loss": 0.0177, |
| "num_tokens": 73602268.0, |
| "reward": 0.8080357536673546, |
| "reward_std": 0.14263570308685303, |
| "rewards/accuracy_reward": 0.808035708963871, |
| "step": 108 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 606.7857437133789, |
| "epoch": 1.6417910447761193, |
| "grad_norm": 0.10704245418310165, |
| "learning_rate": 1.0332332985438247e-07, |
| "loss": 0.0166, |
| "num_tokens": 74295444.0, |
| "reward": 0.7779018208384514, |
| "reward_std": 0.15495909843593836, |
| "rewards/accuracy_reward": 0.777901791036129, |
| "step": 109 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 562.9475708007812, |
| "epoch": 1.6567164179104479, |
| "grad_norm": 0.08908524364233017, |
| "learning_rate": 9.549150281252632e-08, |
| "loss": 0.0095, |
| "num_tokens": 74940989.0, |
| "reward": 0.7834821790456772, |
| "reward_std": 0.10505919344723225, |
| "rewards/accuracy_reward": 0.7834821492433548, |
| "step": 110 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 624.4922180175781, |
| "epoch": 1.671641791044776, |
| "grad_norm": 0.09753753989934921, |
| "learning_rate": 8.793690568899215e-08, |
| "loss": 0.0206, |
| "num_tokens": 75639342.0, |
| "reward": 0.7767857536673546, |
| "reward_std": 0.14068038668483496, |
| "rewards/accuracy_reward": 0.776785708963871, |
| "step": 111 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 622.2422256469727, |
| "epoch": 1.6865671641791045, |
| "grad_norm": 0.09617064148187637, |
| "learning_rate": 8.066471602728803e-08, |
| "loss": 0.0037, |
| "num_tokens": 76329767.0, |
| "reward": 0.7946428954601288, |
| "reward_std": 0.11434714496135712, |
| "rewards/accuracy_reward": 0.7946428582072258, |
| "step": 112 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 600.4765892028809, |
| "epoch": 1.7014925373134329, |
| "grad_norm": 0.08512350916862488, |
| "learning_rate": 7.36799178229539e-08, |
| "loss": 0.0183, |
| "num_tokens": 77010834.0, |
| "reward": 0.7366071715950966, |
| "reward_std": 0.10942125041037798, |
| "rewards/accuracy_reward": 0.7366071343421936, |
| "step": 113 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 561.8080596923828, |
| "epoch": 1.716417910447761, |
| "grad_norm": 0.1167522743344307, |
| "learning_rate": 6.698729810778064e-08, |
| "loss": 0.0032, |
| "num_tokens": 77661150.0, |
| "reward": 0.7801339700818062, |
| "reward_std": 0.1291090790182352, |
| "rewards/accuracy_reward": 0.7801339253783226, |
| "step": 114 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 627.9944458007812, |
| "epoch": 1.7313432835820897, |
| "grad_norm": 0.09695158898830414, |
| "learning_rate": 6.059144366901736e-08, |
| "loss": 0.0176, |
| "num_tokens": 78367609.0, |
| "reward": 0.7332589700818062, |
| "reward_std": 0.12523557152599096, |
| "rewards/accuracy_reward": 0.733258917927742, |
| "step": 115 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 562.5022583007812, |
| "epoch": 1.7462686567164178, |
| "grad_norm": 0.08717180788516998, |
| "learning_rate": 5.44967379058161e-08, |
| "loss": 0.0087, |
| "num_tokens": 79007427.0, |
| "reward": 0.7689732387661934, |
| "reward_std": 0.10855362564325333, |
| "rewards/accuracy_reward": 0.768973208963871, |
| "step": 116 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 622.9989051818848, |
| "epoch": 1.7611940298507462, |
| "grad_norm": 0.12031041085720062, |
| "learning_rate": 4.870735782506979e-08, |
| "loss": 0.0127, |
| "num_tokens": 79736314.0, |
| "reward": 0.725446455180645, |
| "reward_std": 0.16799716651439667, |
| "rewards/accuracy_reward": 0.725446417927742, |
| "step": 117 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 629.5993537902832, |
| "epoch": 1.7761194029850746, |
| "grad_norm": 0.11032404005527496, |
| "learning_rate": 4.322727117869951e-08, |
| "loss": 0.0073, |
| "num_tokens": 80442075.0, |
| "reward": 0.7622768208384514, |
| "reward_std": 0.14162609539926052, |
| "rewards/accuracy_reward": 0.7622767835855484, |
| "step": 118 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 597.2299423217773, |
| "epoch": 1.7910447761194028, |
| "grad_norm": 0.09072305262088776, |
| "learning_rate": 3.806023374435663e-08, |
| "loss": 0.0189, |
| "num_tokens": 81117833.0, |
| "reward": 0.7522321715950966, |
| "reward_std": 0.12486787885427475, |
| "rewards/accuracy_reward": 0.7522321417927742, |
| "step": 119 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 599.5346298217773, |
| "epoch": 1.8059701492537314, |
| "grad_norm": 0.10171713680028915, |
| "learning_rate": 3.3209786751399184e-08, |
| "loss": 0.009, |
| "num_tokens": 81809056.0, |
| "reward": 0.7946428954601288, |
| "reward_std": 0.1557179531082511, |
| "rewards/accuracy_reward": 0.7946428507566452, |
| "step": 120 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 579.971004486084, |
| "epoch": 1.8208955223880596, |
| "grad_norm": 0.09700023382902145, |
| "learning_rate": 2.8679254453910785e-08, |
| "loss": 0.0068, |
| "num_tokens": 82474246.0, |
| "reward": 0.7354911044239998, |
| "reward_std": 0.1312894057482481, |
| "rewards/accuracy_reward": 0.7354910746216774, |
| "step": 121 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 620.3493499755859, |
| "epoch": 1.835820895522388, |
| "grad_norm": 0.09332066029310226, |
| "learning_rate": 2.4471741852423233e-08, |
| "loss": 0.0129, |
| "num_tokens": 83191831.0, |
| "reward": 0.7555803880095482, |
| "reward_std": 0.1526290439069271, |
| "rewards/accuracy_reward": 0.7555803582072258, |
| "step": 122 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 618.1640853881836, |
| "epoch": 1.8507462686567164, |
| "grad_norm": 0.1387184113264084, |
| "learning_rate": 2.0590132565903473e-08, |
| "loss": 0.0281, |
| "num_tokens": 83893978.0, |
| "reward": 0.7700893133878708, |
| "reward_std": 0.17633870337158442, |
| "rewards/accuracy_reward": 0.770089291036129, |
| "step": 123 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.0748023986816, |
| "epoch": 1.8656716417910446, |
| "grad_norm": 0.11000484973192215, |
| "learning_rate": 1.7037086855465898e-08, |
| "loss": 0.01, |
| "num_tokens": 84554317.0, |
| "reward": 0.7511161118745804, |
| "reward_std": 0.13362086936831474, |
| "rewards/accuracy_reward": 0.7511160671710968, |
| "step": 124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 628.025707244873, |
| "epoch": 1.8805970149253732, |
| "grad_norm": 0.0943480134010315, |
| "learning_rate": 1.3815039801161722e-08, |
| "loss": 0.0185, |
| "num_tokens": 85259028.0, |
| "reward": 0.7377232313156128, |
| "reward_std": 0.1586036691442132, |
| "rewards/accuracy_reward": 0.7377232238650322, |
| "step": 125 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 545.2154235839844, |
| "epoch": 1.8955223880597014, |
| "grad_norm": 0.11781350523233414, |
| "learning_rate": 1.0926199633097154e-08, |
| "loss": 0.023, |
| "num_tokens": 85888949.0, |
| "reward": 0.787946455180645, |
| "reward_std": 0.13380562094971538, |
| "rewards/accuracy_reward": 0.7879464328289032, |
| "step": 126 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 609.3426513671875, |
| "epoch": 1.9104477611940298, |
| "grad_norm": 0.11208699643611908, |
| "learning_rate": 8.372546218022746e-09, |
| "loss": 0.0213, |
| "num_tokens": 86586952.0, |
| "reward": 0.7243303805589676, |
| "reward_std": 0.1444790279492736, |
| "rewards/accuracy_reward": 0.7243303656578064, |
| "step": 127 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.1094207763672, |
| "epoch": 1.9253731343283582, |
| "grad_norm": 0.09035325050354004, |
| "learning_rate": 6.15582970243117e-09, |
| "loss": 0.0098, |
| "num_tokens": 87253226.0, |
| "reward": 0.8147321790456772, |
| "reward_std": 0.11614769464358687, |
| "rewards/accuracy_reward": 0.8147321492433548, |
| "step": 128 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 607.8404235839844, |
| "epoch": 1.9402985074626866, |
| "grad_norm": 0.11255651712417603, |
| "learning_rate": 4.277569313094809e-09, |
| "loss": 0.0127, |
| "num_tokens": 87936411.0, |
| "reward": 0.7700893208384514, |
| "reward_std": 0.17164426296949387, |
| "rewards/accuracy_reward": 0.7700892761349678, |
| "step": 129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 625.9174423217773, |
| "epoch": 1.955223880597015, |
| "grad_norm": 0.09853976219892502, |
| "learning_rate": 2.739052315863355e-09, |
| "loss": 0.0062, |
| "num_tokens": 88654209.0, |
| "reward": 0.7131696715950966, |
| "reward_std": 0.1297498755156994, |
| "rewards/accuracy_reward": 0.7131696417927742, |
| "step": 130 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 566.3649787902832, |
| "epoch": 1.9701492537313432, |
| "grad_norm": 0.11755723506212234, |
| "learning_rate": 1.541333133436018e-09, |
| "loss": 0.003, |
| "num_tokens": 89303936.0, |
| "reward": 0.7968750447034836, |
| "reward_std": 0.16296614985913038, |
| "rewards/accuracy_reward": 0.7968750074505806, |
| "step": 131 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 602.0601844787598, |
| "epoch": 1.9850746268656716, |
| "grad_norm": 0.09191662073135376, |
| "learning_rate": 6.852326227130833e-10, |
| "loss": 0.0104, |
| "num_tokens": 89996910.0, |
| "reward": 0.7433036118745804, |
| "reward_std": 0.12050722911953926, |
| "rewards/accuracy_reward": 0.7433035671710968, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.9850746268656716, |
| "step": 132, |
| "total_flos": 0.0, |
| "train_loss": 0.019297132296328942, |
| "train_runtime": 35491.2022, |
| "train_samples_per_second": 0.423, |
| "train_steps_per_second": 0.004 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 134, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|