| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.374772190851782, |
| "eval_steps": 100, |
| "global_step": 1566, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 562.7477951049805, |
| "epoch": 0.0019121030145499089, |
| "grad_norm": 0.022798627614974976, |
| "kl": 0.0, |
| "learning_rate": 1.5923566878980894e-08, |
| "loss": 0.002, |
| "num_tokens": 3752220.0, |
| "reward": 0.011439732770668343, |
| "reward_std": 0.019404857070185244, |
| "rewards/pure_accuracy_reward_math": 0.011439732537837699, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "epoch": 0.0038242060290998177, |
| "grad_norm": 0.02280641719698906, |
| "kl": 0.0, |
| "learning_rate": 3.184713375796179e-08, |
| "loss": 0.002, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 7.760171627069212e-05, |
| "epoch": 0.005736309043649726, |
| "grad_norm": 0.02249608002603054, |
| "kl": 0.00034177303314208984, |
| "learning_rate": 4.777070063694268e-08, |
| "loss": 0.002, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 7.010291557207893e-05, |
| "epoch": 0.0076484120581996355, |
| "grad_norm": 0.022546162828803062, |
| "kl": 0.0003476440906524658, |
| "learning_rate": 6.369426751592358e-08, |
| "loss": 0.002, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 6.121935876990392e-05, |
| "epoch": 0.009560515072749545, |
| "grad_norm": 0.022293007001280785, |
| "kl": 0.00034675002098083496, |
| "learning_rate": 7.961783439490447e-08, |
| "loss": 0.002, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 569.8786544799805, |
| "epoch": 0.011472618087299453, |
| "grad_norm": 0.04666038230061531, |
| "kl": 0.000458449125289917, |
| "learning_rate": 9.554140127388536e-08, |
| "loss": 0.0036, |
| "num_tokens": 7526881.0, |
| "reward": 0.010323661233996972, |
| "reward_std": 0.01923220051685348, |
| "rewards/pure_accuracy_reward_math": 0.01032366111758165, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 9.284320668712098e-05, |
| "epoch": 0.013384721101849363, |
| "grad_norm": 0.03701707720756531, |
| "kl": 0.0004444718360900879, |
| "learning_rate": 1.1146496815286625e-07, |
| "loss": 0.0037, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.00010049525423028172, |
| "epoch": 0.015296824116399271, |
| "grad_norm": 0.05443934351205826, |
| "kl": 0.0004649162292480469, |
| "learning_rate": 1.2738853503184715e-07, |
| "loss": 0.0037, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 9.395023369052069e-05, |
| "epoch": 0.01720892713094918, |
| "grad_norm": 0.0357414111495018, |
| "kl": 0.0004501640796661377, |
| "learning_rate": 1.4331210191082803e-07, |
| "loss": 0.0037, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.00010371651984542041, |
| "epoch": 0.01912103014549909, |
| "grad_norm": 0.05199029669165611, |
| "kl": 0.0004614591598510742, |
| "learning_rate": 1.5923566878980893e-07, |
| "loss": 0.0037, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 570.0694994926453, |
| "epoch": 0.021033133160048997, |
| "grad_norm": 0.022845298051834106, |
| "kl": 0.00035685300827026367, |
| "learning_rate": 1.751592356687898e-07, |
| "loss": 0.0025, |
| "num_tokens": 11302358.0, |
| "reward": 0.00948660756694153, |
| "reward_std": 0.017558093182742596, |
| "rewards/pure_accuracy_reward_math": 0.00948660756694153, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 7.08361723127382e-05, |
| "epoch": 0.022945236174598906, |
| "grad_norm": 0.02234972082078457, |
| "kl": 0.0003580451011657715, |
| "learning_rate": 1.9108280254777072e-07, |
| "loss": 0.0025, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 6.80922717606336e-05, |
| "epoch": 0.024857339189148814, |
| "grad_norm": 0.021554963663220406, |
| "kl": 0.00035765767097473145, |
| "learning_rate": 2.070063694267516e-07, |
| "loss": 0.0024, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 7.82350492158912e-05, |
| "epoch": 0.026769442203698725, |
| "grad_norm": 0.02103673666715622, |
| "kl": 0.000364154577255249, |
| "learning_rate": 2.229299363057325e-07, |
| "loss": 0.0025, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 7.339451894949889e-05, |
| "epoch": 0.028681545218248634, |
| "grad_norm": 0.023219415917992592, |
| "kl": 0.00036078691482543945, |
| "learning_rate": 2.3885350318471343e-07, |
| "loss": 0.0025, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 560.8797726631165, |
| "epoch": 0.030593648232798542, |
| "grad_norm": 0.024746257811784744, |
| "kl": 0.0003574490547180176, |
| "learning_rate": 2.547770700636943e-07, |
| "loss": 0.0041, |
| "num_tokens": 15044695.0, |
| "reward": 0.011160714813740924, |
| "reward_std": 0.0194911856087856, |
| "rewards/pure_accuracy_reward_math": 0.011160714755533263, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 9.0199953319825e-05, |
| "epoch": 0.032505751247348454, |
| "grad_norm": 0.02409624680876732, |
| "kl": 0.0003629624843597412, |
| "learning_rate": 2.707006369426752e-07, |
| "loss": 0.0042, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 8.157364351291108e-05, |
| "epoch": 0.03441785426189836, |
| "grad_norm": 0.023118698969483376, |
| "kl": 0.0003673136234283447, |
| "learning_rate": 2.8662420382165606e-07, |
| "loss": 0.0041, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 9.048881202033954e-05, |
| "epoch": 0.03632995727644827, |
| "grad_norm": 0.02316245064139366, |
| "kl": 0.00036725401878356934, |
| "learning_rate": 3.02547770700637e-07, |
| "loss": 0.0041, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 8.188984941170929e-05, |
| "epoch": 0.03824206029099818, |
| "grad_norm": 0.021714523434638977, |
| "kl": 0.0003698766231536865, |
| "learning_rate": 3.1847133757961787e-07, |
| "loss": 0.0041, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 554.6908774375916, |
| "epoch": 0.040154163305548086, |
| "grad_norm": 0.021168457344174385, |
| "kl": 0.000368267297744751, |
| "learning_rate": 3.3439490445859875e-07, |
| "loss": 0.0026, |
| "num_tokens": 18758275.0, |
| "reward": 0.010044643335277215, |
| "reward_std": 0.018202457285951823, |
| "rewards/pure_accuracy_reward_math": 0.010044643277069554, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 7.562077911416054e-05, |
| "epoch": 0.042066266320097995, |
| "grad_norm": 0.020001132041215897, |
| "kl": 0.00037425756454467773, |
| "learning_rate": 3.503184713375796e-07, |
| "loss": 0.0026, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 7.507880479806772e-05, |
| "epoch": 0.0439783693346479, |
| "grad_norm": 0.019386926665902138, |
| "kl": 0.0003781616687774658, |
| "learning_rate": 3.6624203821656055e-07, |
| "loss": 0.0026, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 7.805726602327923e-05, |
| "epoch": 0.04589047234919781, |
| "grad_norm": 0.018619129434227943, |
| "kl": 0.0003878176212310791, |
| "learning_rate": 3.8216560509554143e-07, |
| "loss": 0.0026, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 6.671031508176384e-05, |
| "epoch": 0.04780257536374772, |
| "grad_norm": 0.01833859272301197, |
| "kl": 0.00040024518966674805, |
| "learning_rate": 3.980891719745223e-07, |
| "loss": 0.0026, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 552.0828938484192, |
| "epoch": 0.04971467837829763, |
| "grad_norm": 0.02587960660457611, |
| "kl": 0.00041344761848449707, |
| "learning_rate": 4.140127388535032e-07, |
| "loss": 0.0024, |
| "num_tokens": 22468764.0, |
| "reward": 0.012276786321308464, |
| "reward_std": 0.022195036057382822, |
| "rewards/pure_accuracy_reward_math": 0.012276786204893142, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 9.613389988771814e-05, |
| "epoch": 0.05162678139284754, |
| "grad_norm": 0.02422533929347992, |
| "kl": 0.00043016672134399414, |
| "learning_rate": 4.2993630573248406e-07, |
| "loss": 0.0024, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 8.45099556840978e-05, |
| "epoch": 0.05353888440739745, |
| "grad_norm": 0.023998353630304337, |
| "kl": 0.0004411041736602783, |
| "learning_rate": 4.45859872611465e-07, |
| "loss": 0.0024, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 9.715859295056362e-05, |
| "epoch": 0.05545098742194736, |
| "grad_norm": 0.023024486377835274, |
| "kl": 0.0004749894142150879, |
| "learning_rate": 4.6178343949044587e-07, |
| "loss": 0.0024, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 9.816014483021718e-05, |
| "epoch": 0.05736309043649727, |
| "grad_norm": 0.022171439602971077, |
| "kl": 0.0005015134811401367, |
| "learning_rate": 4.777070063694269e-07, |
| "loss": 0.0024, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 549.791042804718, |
| "epoch": 0.059275193451047176, |
| "grad_norm": 0.027614159509539604, |
| "kl": 0.0005223453044891357, |
| "learning_rate": 4.936305732484077e-07, |
| "loss": 0.0029, |
| "num_tokens": 26170579.0, |
| "reward": 0.017299108090810478, |
| "reward_std": 0.03018019301816821, |
| "rewards/pure_accuracy_reward_math": 0.017299107741564512, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.00012569415866892086, |
| "epoch": 0.061187296465597084, |
| "grad_norm": 0.02653171494603157, |
| "kl": 0.0005522072315216064, |
| "learning_rate": 5.095541401273886e-07, |
| "loss": 0.0029, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.00012863677034147258, |
| "epoch": 0.06309939948014699, |
| "grad_norm": 0.0255680400878191, |
| "kl": 0.0005916953086853027, |
| "learning_rate": 5.254777070063695e-07, |
| "loss": 0.0029, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.00012797017114962728, |
| "epoch": 0.06501150249469691, |
| "grad_norm": 0.02455417811870575, |
| "kl": 0.0006306171417236328, |
| "learning_rate": 5.414012738853504e-07, |
| "loss": 0.0029, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.00012855784757448419, |
| "epoch": 0.06692360550924681, |
| "grad_norm": 0.024154040962457657, |
| "kl": 0.0006751418113708496, |
| "learning_rate": 5.573248407643312e-07, |
| "loss": 0.0029, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 552.728542804718, |
| "epoch": 0.06883570852379672, |
| "grad_norm": 0.023450903594493866, |
| "kl": 0.000738978385925293, |
| "learning_rate": 5.732484076433121e-07, |
| "loss": 0.0034, |
| "num_tokens": 29883398.0, |
| "reward": 0.018415179511066526, |
| "reward_std": 0.030997214023955166, |
| "rewards/pure_accuracy_reward_math": 0.01841517922002822, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.00012425195308196635, |
| "epoch": 0.07074781153834662, |
| "grad_norm": 0.023070134222507477, |
| "kl": 0.0007783770561218262, |
| "learning_rate": 5.89171974522293e-07, |
| "loss": 0.0034, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.00012334759713894528, |
| "epoch": 0.07265991455289654, |
| "grad_norm": 0.023447532206773758, |
| "kl": 0.0008447170257568359, |
| "learning_rate": 6.05095541401274e-07, |
| "loss": 0.0034, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.00012615493608336692, |
| "epoch": 0.07457201756744644, |
| "grad_norm": 0.024682210758328438, |
| "kl": 0.0009213089942932129, |
| "learning_rate": 6.210191082802549e-07, |
| "loss": 0.0034, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.00012461718182521508, |
| "epoch": 0.07648412058199636, |
| "grad_norm": 0.02555885910987854, |
| "kl": 0.000977635383605957, |
| "learning_rate": 6.369426751592357e-07, |
| "loss": 0.0033, |
| "step": 40 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.030158996582, |
| "epoch": 0.07839622359654626, |
| "grad_norm": 0.059237755835056305, |
| "kl": 0.001125633716583252, |
| "learning_rate": 6.528662420382166e-07, |
| "loss": 0.0031, |
| "num_tokens": 33502406.0, |
| "reward": 0.024832590454025194, |
| "reward_std": 0.04194520629243925, |
| "rewards/pure_accuracy_reward_math": 0.02483259010477923, |
| "step": 41 |
| }, |
| { |
| "clip_ratio": 0.00016323180295785278, |
| "epoch": 0.08030832661109617, |
| "grad_norm": 0.029172642156481743, |
| "kl": 0.0011183619499206543, |
| "learning_rate": 6.687898089171975e-07, |
| "loss": 0.0031, |
| "step": 42 |
| }, |
| { |
| "clip_ratio": 0.0001751068371618203, |
| "epoch": 0.08222042962564609, |
| "grad_norm": 0.030453085899353027, |
| "kl": 0.0011813640594482422, |
| "learning_rate": 6.847133757961784e-07, |
| "loss": 0.0031, |
| "step": 43 |
| }, |
| { |
| "clip_ratio": 0.00018521026674989116, |
| "epoch": 0.08413253264019599, |
| "grad_norm": 0.03091653250157833, |
| "kl": 0.0012224912643432617, |
| "learning_rate": 7.006369426751592e-07, |
| "loss": 0.0031, |
| "step": 44 |
| }, |
| { |
| "clip_ratio": 0.00017049979595640252, |
| "epoch": 0.0860446356547459, |
| "grad_norm": 0.030593233183026314, |
| "kl": 0.0012733936309814453, |
| "learning_rate": 7.165605095541401e-07, |
| "loss": 0.0031, |
| "step": 45 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.7360739707947, |
| "epoch": 0.0879567386692958, |
| "grad_norm": 0.03371572494506836, |
| "kl": 0.0012704133987426758, |
| "learning_rate": 7.324840764331211e-07, |
| "loss": 0.0039, |
| "num_tokens": 37133676.0, |
| "reward": 0.029017858760198578, |
| "reward_std": 0.04838265001308173, |
| "rewards/pure_accuracy_reward_math": 0.029017858061706647, |
| "step": 46 |
| }, |
| { |
| "clip_ratio": 0.000227557278265067, |
| "epoch": 0.08986884168384572, |
| "grad_norm": 0.033185359090566635, |
| "kl": 0.0012688040733337402, |
| "learning_rate": 7.48407643312102e-07, |
| "loss": 0.0039, |
| "step": 47 |
| }, |
| { |
| "clip_ratio": 0.0002238695693677073, |
| "epoch": 0.09178094469839562, |
| "grad_norm": 0.03329231217503548, |
| "kl": 0.0013200044631958008, |
| "learning_rate": 7.643312101910829e-07, |
| "loss": 0.0039, |
| "step": 48 |
| }, |
| { |
| "clip_ratio": 0.00021458888153347289, |
| "epoch": 0.09369304771294554, |
| "grad_norm": 0.03329336270689964, |
| "kl": 0.0013244152069091797, |
| "learning_rate": 7.802547770700637e-07, |
| "loss": 0.0039, |
| "step": 49 |
| }, |
| { |
| "clip_ratio": 0.0002193794426830209, |
| "epoch": 0.09560515072749544, |
| "grad_norm": 0.0323607362806797, |
| "kl": 0.0013269782066345215, |
| "learning_rate": 7.961783439490446e-07, |
| "loss": 0.0039, |
| "step": 50 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.9352931976318, |
| "epoch": 0.09751725374204535, |
| "grad_norm": 0.030199358239769936, |
| "kl": 0.0013506412506103516, |
| "learning_rate": 8.121019108280255e-07, |
| "loss": 0.0042, |
| "num_tokens": 40789896.0, |
| "reward": 0.030970983498264104, |
| "reward_std": 0.0486878992523998, |
| "rewards/pure_accuracy_reward_math": 0.030970983090810478, |
| "step": 51 |
| }, |
| { |
| "clip_ratio": 0.00019589511845197194, |
| "epoch": 0.09942935675659526, |
| "grad_norm": 0.029786745086312294, |
| "kl": 0.001370549201965332, |
| "learning_rate": 8.280254777070064e-07, |
| "loss": 0.0042, |
| "step": 52 |
| }, |
| { |
| "clip_ratio": 0.00021279048064570816, |
| "epoch": 0.10134145977114517, |
| "grad_norm": 0.029834378510713577, |
| "kl": 0.0013399124145507812, |
| "learning_rate": 8.439490445859872e-07, |
| "loss": 0.0042, |
| "step": 53 |
| }, |
| { |
| "clip_ratio": 0.000190277668878025, |
| "epoch": 0.10325356278569509, |
| "grad_norm": 0.029410598799586296, |
| "kl": 0.00139617919921875, |
| "learning_rate": 8.598726114649681e-07, |
| "loss": 0.0042, |
| "step": 54 |
| }, |
| { |
| "clip_ratio": 0.00019459096591845082, |
| "epoch": 0.10516566580024499, |
| "grad_norm": 0.02935440093278885, |
| "kl": 0.0014204978942871094, |
| "learning_rate": 8.757961783439491e-07, |
| "loss": 0.0042, |
| "step": 55 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.9548239707947, |
| "epoch": 0.1070777688147949, |
| "grad_norm": 0.02805081568658352, |
| "kl": 0.0014168024063110352, |
| "learning_rate": 8.9171974522293e-07, |
| "loss": 0.0048, |
| "num_tokens": 44444894.0, |
| "reward": 0.027901787223527208, |
| "reward_std": 0.04121451411629096, |
| "rewards/pure_accuracy_reward_math": 0.02790178669965826, |
| "step": 56 |
| }, |
| { |
| "clip_ratio": 0.00016821016617996065, |
| "epoch": 0.1089898718293448, |
| "grad_norm": 0.02779608964920044, |
| "kl": 0.0014551877975463867, |
| "learning_rate": 9.076433121019109e-07, |
| "loss": 0.0048, |
| "step": 57 |
| }, |
| { |
| "clip_ratio": 0.00018197509814399382, |
| "epoch": 0.11090197484389472, |
| "grad_norm": 0.02721741609275341, |
| "kl": 0.0014206171035766602, |
| "learning_rate": 9.235668789808917e-07, |
| "loss": 0.0048, |
| "step": 58 |
| }, |
| { |
| "clip_ratio": 0.00016919344039934003, |
| "epoch": 0.11281407785844462, |
| "grad_norm": 0.02676265314221382, |
| "kl": 0.0014575719833374023, |
| "learning_rate": 9.394904458598727e-07, |
| "loss": 0.0048, |
| "step": 59 |
| }, |
| { |
| "clip_ratio": 0.00017069062050723005, |
| "epoch": 0.11472618087299453, |
| "grad_norm": 0.027010478079319, |
| "kl": 0.0014843940734863281, |
| "learning_rate": 9.554140127388537e-07, |
| "loss": 0.0048, |
| "step": 60 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.1998043060303, |
| "epoch": 0.11663828388754444, |
| "grad_norm": 0.030231643468141556, |
| "kl": 0.0015106201171875, |
| "learning_rate": 9.713375796178345e-07, |
| "loss": 0.0029, |
| "num_tokens": 48046694.0, |
| "reward": 0.02762276935391128, |
| "reward_std": 0.04623683576937765, |
| "rewards/pure_accuracy_reward_math": 0.02762276877183467, |
| "step": 61 |
| }, |
| { |
| "clip_ratio": 0.0001882643781527804, |
| "epoch": 0.11855038690209435, |
| "grad_norm": 0.030413959175348282, |
| "kl": 0.0015065670013427734, |
| "learning_rate": 9.872611464968155e-07, |
| "loss": 0.0029, |
| "step": 62 |
| }, |
| { |
| "clip_ratio": 0.00019050979824442038, |
| "epoch": 0.12046248991664425, |
| "grad_norm": 0.029997214674949646, |
| "kl": 0.0014984607696533203, |
| "learning_rate": 1.0031847133757962e-06, |
| "loss": 0.0029, |
| "step": 63 |
| }, |
| { |
| "clip_ratio": 0.0001963579389325787, |
| "epoch": 0.12237459293119417, |
| "grad_norm": 0.02927768975496292, |
| "kl": 0.0014634132385253906, |
| "learning_rate": 1.0191082802547772e-06, |
| "loss": 0.0029, |
| "step": 64 |
| }, |
| { |
| "clip_ratio": 0.0002130206620449826, |
| "epoch": 0.12428669594574408, |
| "grad_norm": 0.028719380497932434, |
| "kl": 0.0014470815658569336, |
| "learning_rate": 1.035031847133758e-06, |
| "loss": 0.0029, |
| "step": 65 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.4283137321472, |
| "epoch": 0.12619879896029398, |
| "grad_norm": 0.031215306371450424, |
| "kl": 0.0014127492904663086, |
| "learning_rate": 1.050955414012739e-06, |
| "loss": 0.0038, |
| "num_tokens": 51628501.0, |
| "reward": 0.03487723405123688, |
| "reward_std": 0.05173706269124523, |
| "rewards/pure_accuracy_reward_math": 0.03487723323632963, |
| "step": 66 |
| }, |
| { |
| "clip_ratio": 0.00019433782460964721, |
| "epoch": 0.1281109019748439, |
| "grad_norm": 0.03108724020421505, |
| "kl": 0.0014324188232421875, |
| "learning_rate": 1.06687898089172e-06, |
| "loss": 0.0038, |
| "step": 67 |
| }, |
| { |
| "clip_ratio": 0.00020085336353758976, |
| "epoch": 0.13002300498939381, |
| "grad_norm": 0.030220478773117065, |
| "kl": 0.0014306306838989258, |
| "learning_rate": 1.0828025477707007e-06, |
| "loss": 0.0038, |
| "step": 68 |
| }, |
| { |
| "clip_ratio": 0.00021161197844321578, |
| "epoch": 0.1319351080039437, |
| "grad_norm": 0.030320733785629272, |
| "kl": 0.001450181007385254, |
| "learning_rate": 1.0987261146496817e-06, |
| "loss": 0.0038, |
| "step": 69 |
| }, |
| { |
| "clip_ratio": 0.00019352555551677142, |
| "epoch": 0.13384721101849362, |
| "grad_norm": 0.02980073168873787, |
| "kl": 0.0014796257019042969, |
| "learning_rate": 1.1146496815286625e-06, |
| "loss": 0.0038, |
| "step": 70 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.2854599952698, |
| "epoch": 0.13575931403304353, |
| "grad_norm": 0.0338500440120697, |
| "kl": 0.0015006065368652344, |
| "learning_rate": 1.1305732484076435e-06, |
| "loss": 0.006, |
| "num_tokens": 55247180.0, |
| "reward": 0.03710937674622983, |
| "reward_std": 0.05426825548056513, |
| "rewards/pure_accuracy_reward_math": 0.037109375989530236, |
| "step": 71 |
| }, |
| { |
| "clip_ratio": 0.0002256608086668166, |
| "epoch": 0.13767141704759345, |
| "grad_norm": 0.03328324109315872, |
| "kl": 0.0015664100646972656, |
| "learning_rate": 1.1464968152866242e-06, |
| "loss": 0.006, |
| "step": 72 |
| }, |
| { |
| "clip_ratio": 0.0002166868289350532, |
| "epoch": 0.13958352006214333, |
| "grad_norm": 0.03267475962638855, |
| "kl": 0.0016113519668579102, |
| "learning_rate": 1.1624203821656052e-06, |
| "loss": 0.006, |
| "step": 73 |
| }, |
| { |
| "clip_ratio": 0.00024709346627105333, |
| "epoch": 0.14149562307669325, |
| "grad_norm": 0.032320376485586166, |
| "kl": 0.0017037391662597656, |
| "learning_rate": 1.178343949044586e-06, |
| "loss": 0.006, |
| "step": 74 |
| }, |
| { |
| "clip_ratio": 0.00021453456992048814, |
| "epoch": 0.14340772609124317, |
| "grad_norm": 0.0322573184967041, |
| "kl": 0.0017703771591186523, |
| "learning_rate": 1.194267515923567e-06, |
| "loss": 0.006, |
| "step": 75 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.314199924469, |
| "epoch": 0.14531982910579308, |
| "grad_norm": 0.03833702206611633, |
| "kl": 0.0018039941787719727, |
| "learning_rate": 1.210191082802548e-06, |
| "loss": 0.0055, |
| "num_tokens": 58912938.0, |
| "reward": 0.04045759144355543, |
| "reward_std": 0.060838291130494326, |
| "rewards/pure_accuracy_reward_math": 0.040457590454025194, |
| "step": 76 |
| }, |
| { |
| "clip_ratio": 0.0002450900424548763, |
| "epoch": 0.147231932120343, |
| "grad_norm": 0.03705858439207077, |
| "kl": 0.0018303394317626953, |
| "learning_rate": 1.2261146496815287e-06, |
| "loss": 0.0055, |
| "step": 77 |
| }, |
| { |
| "clip_ratio": 0.0002520209266094753, |
| "epoch": 0.14914403513489288, |
| "grad_norm": 0.03624257072806358, |
| "kl": 0.0019118785858154297, |
| "learning_rate": 1.2420382165605097e-06, |
| "loss": 0.0055, |
| "step": 78 |
| }, |
| { |
| "clip_ratio": 0.00023157394139161624, |
| "epoch": 0.1510561381494428, |
| "grad_norm": 0.03626013919711113, |
| "kl": 0.001949906349182129, |
| "learning_rate": 1.2579617834394905e-06, |
| "loss": 0.0055, |
| "step": 79 |
| }, |
| { |
| "clip_ratio": 0.0002889583781211513, |
| "epoch": 0.1529682411639927, |
| "grad_norm": 0.03634464740753174, |
| "kl": 0.001984238624572754, |
| "learning_rate": 1.2738853503184715e-06, |
| "loss": 0.0055, |
| "step": 80 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.2025918960571, |
| "epoch": 0.15488034417854263, |
| "grad_norm": 0.032439347356557846, |
| "kl": 0.0019190311431884766, |
| "learning_rate": 1.2898089171974522e-06, |
| "loss": 0.0067, |
| "num_tokens": 62551992.0, |
| "reward": 0.03766741280560382, |
| "reward_std": 0.0572711571585387, |
| "rewards/pure_accuracy_reward_math": 0.0376674119324889, |
| "step": 81 |
| }, |
| { |
| "clip_ratio": 0.00025730342139240747, |
| "epoch": 0.15679244719309252, |
| "grad_norm": 0.03198026493191719, |
| "kl": 0.001917123794555664, |
| "learning_rate": 1.3057324840764332e-06, |
| "loss": 0.0067, |
| "step": 82 |
| }, |
| { |
| "clip_ratio": 0.0002504205738205201, |
| "epoch": 0.15870455020764243, |
| "grad_norm": 0.02998184598982334, |
| "kl": 0.0019073486328125, |
| "learning_rate": 1.3216560509554142e-06, |
| "loss": 0.0067, |
| "step": 83 |
| }, |
| { |
| "clip_ratio": 0.00025362581419585695, |
| "epoch": 0.16061665322219235, |
| "grad_norm": 0.029601849615573883, |
| "kl": 0.0019354820251464844, |
| "learning_rate": 1.337579617834395e-06, |
| "loss": 0.0067, |
| "step": 84 |
| }, |
| { |
| "clip_ratio": 0.0003167184295307379, |
| "epoch": 0.16252875623674226, |
| "grad_norm": 0.030052170157432556, |
| "kl": 0.0019598007202148438, |
| "learning_rate": 1.353503184713376e-06, |
| "loss": 0.0067, |
| "step": 85 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.9562168121338, |
| "epoch": 0.16444085925129218, |
| "grad_norm": 0.03331635147333145, |
| "kl": 0.002047300338745117, |
| "learning_rate": 1.3694267515923567e-06, |
| "loss": 0.0076, |
| "num_tokens": 66182275.0, |
| "reward": 0.04045759132714011, |
| "reward_std": 0.06074576545506716, |
| "rewards/pure_accuracy_reward_math": 0.04045759068685584, |
| "step": 86 |
| }, |
| { |
| "clip_ratio": 0.0002471263709367122, |
| "epoch": 0.16635296226584206, |
| "grad_norm": 0.03298444300889969, |
| "kl": 0.0020711421966552734, |
| "learning_rate": 1.3853503184713377e-06, |
| "loss": 0.0076, |
| "step": 87 |
| }, |
| { |
| "clip_ratio": 0.00024866302578629984, |
| "epoch": 0.16826506528039198, |
| "grad_norm": 0.03206898272037506, |
| "kl": 0.0020384788513183594, |
| "learning_rate": 1.4012738853503185e-06, |
| "loss": 0.0076, |
| "step": 88 |
| }, |
| { |
| "clip_ratio": 0.00026278120321876486, |
| "epoch": 0.1701771682949419, |
| "grad_norm": 0.03115510568022728, |
| "kl": 0.002008795738220215, |
| "learning_rate": 1.4171974522292995e-06, |
| "loss": 0.0076, |
| "step": 89 |
| }, |
| { |
| "clip_ratio": 0.000245522400405207, |
| "epoch": 0.1720892713094918, |
| "grad_norm": 0.030577220022678375, |
| "kl": 0.0019922256469726562, |
| "learning_rate": 1.4331210191082802e-06, |
| "loss": 0.0076, |
| "step": 90 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.3948340415955, |
| "epoch": 0.1740013743240417, |
| "grad_norm": 0.0348118431866169, |
| "kl": 0.0019588470458984375, |
| "learning_rate": 1.4490445859872612e-06, |
| "loss": 0.0046, |
| "num_tokens": 69793534.0, |
| "reward": 0.04436384132714011, |
| "reward_std": 0.059376906079705805, |
| "rewards/pure_accuracy_reward_math": 0.044363840570440516, |
| "step": 91 |
| }, |
| { |
| "clip_ratio": 0.00021377558331892033, |
| "epoch": 0.1759134773385916, |
| "grad_norm": 0.03493114933371544, |
| "kl": 0.0019345283508300781, |
| "learning_rate": 1.4649681528662422e-06, |
| "loss": 0.0046, |
| "step": 92 |
| }, |
| { |
| "clip_ratio": 0.00023636125789039397, |
| "epoch": 0.17782558035314153, |
| "grad_norm": 0.03362264111638069, |
| "kl": 0.0019860267639160156, |
| "learning_rate": 1.480891719745223e-06, |
| "loss": 0.0046, |
| "step": 93 |
| }, |
| { |
| "clip_ratio": 0.00022836430440520417, |
| "epoch": 0.17973768336769144, |
| "grad_norm": 0.03336656093597412, |
| "kl": 0.002032160758972168, |
| "learning_rate": 1.496815286624204e-06, |
| "loss": 0.0045, |
| "step": 94 |
| }, |
| { |
| "clip_ratio": 0.00024139108904819295, |
| "epoch": 0.18164978638224133, |
| "grad_norm": 0.03235051408410072, |
| "kl": 0.0021082162857055664, |
| "learning_rate": 1.5127388535031847e-06, |
| "loss": 0.0045, |
| "step": 95 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.8058252334595, |
| "epoch": 0.18356188939679124, |
| "grad_norm": 0.03482769802212715, |
| "kl": 0.0021872520446777344, |
| "learning_rate": 1.5286624203821657e-06, |
| "loss": 0.0075, |
| "num_tokens": 73427974.0, |
| "reward": 0.04101562709547579, |
| "reward_std": 0.06101094774203375, |
| "rewards/pure_accuracy_reward_math": 0.04101562616415322, |
| "step": 96 |
| }, |
| { |
| "clip_ratio": 0.00024072786442275174, |
| "epoch": 0.18547399241134116, |
| "grad_norm": 0.03345990553498268, |
| "kl": 0.002261519432067871, |
| "learning_rate": 1.5445859872611465e-06, |
| "loss": 0.0075, |
| "step": 97 |
| }, |
| { |
| "clip_ratio": 0.00024480573915752757, |
| "epoch": 0.18738609542589107, |
| "grad_norm": 0.03318383917212486, |
| "kl": 0.0022890567779541016, |
| "learning_rate": 1.5605095541401275e-06, |
| "loss": 0.0075, |
| "step": 98 |
| }, |
| { |
| "clip_ratio": 0.00027489714915418517, |
| "epoch": 0.189298198440441, |
| "grad_norm": 0.03230712562799454, |
| "kl": 0.0023267269134521484, |
| "learning_rate": 1.5764331210191083e-06, |
| "loss": 0.0074, |
| "step": 99 |
| }, |
| { |
| "clip_ratio": 0.00029621877195040724, |
| "epoch": 0.19121030145499088, |
| "grad_norm": 0.03260359168052673, |
| "kl": 0.002334117889404297, |
| "learning_rate": 1.5923566878980892e-06, |
| "loss": 0.0074, |
| "step": 100 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.358283996582, |
| "epoch": 0.0019121030145499089, |
| "grad_norm": 0.30763256549835205, |
| "kl": 0.0024461746215820312, |
| "learning_rate": 1.6082802547770702e-06, |
| "loss": 0.0053, |
| "num_tokens": 3621800.0, |
| "reward": 0.0546875023865141, |
| "reward_std": 0.06997958500869572, |
| "rewards/pure_accuracy_reward_math": 0.054687501629814506, |
| "step": 101 |
| }, |
| { |
| "clip_ratio": 0.00028505406811518696, |
| "epoch": 0.0038242060290998177, |
| "grad_norm": 0.7424792647361755, |
| "kl": 0.005189061164855957, |
| "learning_rate": 1.624203821656051e-06, |
| "loss": 0.0054, |
| "step": 102 |
| }, |
| { |
| "clip_ratio": 0.000307778484739174, |
| "epoch": 0.005736309043649726, |
| "grad_norm": 0.5747273564338684, |
| "kl": 0.005206584930419922, |
| "learning_rate": 1.640127388535032e-06, |
| "loss": 0.0054, |
| "step": 103 |
| }, |
| { |
| "clip_ratio": 0.0003712488735345687, |
| "epoch": 0.0076484120581996355, |
| "grad_norm": 0.15304483473300934, |
| "kl": 0.0026189088821411133, |
| "learning_rate": 1.6560509554140127e-06, |
| "loss": 0.0053, |
| "step": 104 |
| }, |
| { |
| "clip_ratio": 0.00037476027159755176, |
| "epoch": 0.009560515072749545, |
| "grad_norm": 0.2118157148361206, |
| "kl": 0.00246584415435791, |
| "learning_rate": 1.6719745222929937e-06, |
| "loss": 0.0053, |
| "step": 105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.46431016922, |
| "epoch": 0.011472618087299453, |
| "grad_norm": 0.2036779820919037, |
| "kl": 0.0037467479705810547, |
| "learning_rate": 1.6878980891719745e-06, |
| "loss": 0.0067, |
| "num_tokens": 7244448.0, |
| "reward": 0.05161830596625805, |
| "reward_std": 0.06822534638922662, |
| "rewards/pure_accuracy_reward_math": 0.05161830474389717, |
| "step": 106 |
| }, |
| { |
| "clip_ratio": 0.0002751678786125922, |
| "epoch": 0.013384721101849363, |
| "grad_norm": 0.1858554631471634, |
| "kl": 0.0035070180892944336, |
| "learning_rate": 1.7038216560509555e-06, |
| "loss": 0.0067, |
| "step": 107 |
| }, |
| { |
| "clip_ratio": 0.0002901391828800115, |
| "epoch": 0.015296824116399271, |
| "grad_norm": 0.06319136172533035, |
| "kl": 0.0033702850341796875, |
| "learning_rate": 1.7197452229299363e-06, |
| "loss": 0.0067, |
| "step": 108 |
| }, |
| { |
| "clip_ratio": 0.00029408001091724145, |
| "epoch": 0.01720892713094918, |
| "grad_norm": 0.061827220022678375, |
| "kl": 0.00351715087890625, |
| "learning_rate": 1.7356687898089172e-06, |
| "loss": 0.0067, |
| "step": 109 |
| }, |
| { |
| "clip_ratio": 0.0002710100695253459, |
| "epoch": 0.01912103014549909, |
| "grad_norm": 0.13167870044708252, |
| "kl": 0.0036835670471191406, |
| "learning_rate": 1.7515923566878982e-06, |
| "loss": 0.0067, |
| "step": 110 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 515.4391984939575, |
| "epoch": 0.021033133160048997, |
| "grad_norm": 0.034568388015031815, |
| "kl": 0.0024957656860351562, |
| "learning_rate": 1.767515923566879e-06, |
| "loss": 0.0068, |
| "num_tokens": 10824130.0, |
| "reward": 0.0468750023865141, |
| "reward_std": 0.06221334758447483, |
| "rewards/pure_accuracy_reward_math": 0.04687500116415322, |
| "step": 111 |
| }, |
| { |
| "clip_ratio": 0.00025272632768746917, |
| "epoch": 0.022945236174598906, |
| "grad_norm": 0.03421744704246521, |
| "kl": 0.002499222755432129, |
| "learning_rate": 1.78343949044586e-06, |
| "loss": 0.0068, |
| "step": 112 |
| }, |
| { |
| "clip_ratio": 0.00025192988658773174, |
| "epoch": 0.024857339189148814, |
| "grad_norm": 0.03444651514291763, |
| "kl": 0.002528548240661621, |
| "learning_rate": 1.7993630573248407e-06, |
| "loss": 0.0068, |
| "step": 113 |
| }, |
| { |
| "clip_ratio": 0.0002639102876287325, |
| "epoch": 0.026769442203698725, |
| "grad_norm": 0.033966146409511566, |
| "kl": 0.0025298595428466797, |
| "learning_rate": 1.8152866242038217e-06, |
| "loss": 0.0067, |
| "step": 114 |
| }, |
| { |
| "clip_ratio": 0.0002613060296994263, |
| "epoch": 0.028681545218248634, |
| "grad_norm": 0.03252725675702095, |
| "kl": 0.0025829076766967773, |
| "learning_rate": 1.8312101910828025e-06, |
| "loss": 0.0067, |
| "step": 115 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 511.59126138687134, |
| "epoch": 0.030593648232798542, |
| "grad_norm": 0.04161737114191055, |
| "kl": 0.002777099609375, |
| "learning_rate": 1.8471337579617835e-06, |
| "loss": 0.0084, |
| "num_tokens": 14389817.0, |
| "reward": 0.04464285934227519, |
| "reward_std": 0.0631567623349838, |
| "rewards/pure_accuracy_reward_math": 0.044642858469160274, |
| "step": 116 |
| }, |
| { |
| "clip_ratio": 0.0002685248994680478, |
| "epoch": 0.032505751247348454, |
| "grad_norm": 0.03920653462409973, |
| "kl": 0.002690911293029785, |
| "learning_rate": 1.8630573248407643e-06, |
| "loss": 0.0084, |
| "step": 117 |
| }, |
| { |
| "clip_ratio": 0.00028247613772691693, |
| "epoch": 0.03441785426189836, |
| "grad_norm": 0.037915512919425964, |
| "kl": 0.0026444196701049805, |
| "learning_rate": 1.8789808917197455e-06, |
| "loss": 0.0084, |
| "step": 118 |
| }, |
| { |
| "clip_ratio": 0.00028578577973803476, |
| "epoch": 0.03632995727644827, |
| "grad_norm": 0.03727024793624878, |
| "kl": 0.002573251724243164, |
| "learning_rate": 1.8949044585987264e-06, |
| "loss": 0.0083, |
| "step": 119 |
| }, |
| { |
| "clip_ratio": 0.0003107314861381383, |
| "epoch": 0.03824206029099818, |
| "grad_norm": 0.03734543174505234, |
| "kl": 0.002534151077270508, |
| "learning_rate": 1.9108280254777074e-06, |
| "loss": 0.0083, |
| "step": 120 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 511.96654319763184, |
| "epoch": 0.040154163305548086, |
| "grad_norm": 18.524425506591797, |
| "kl": 0.05213046073913574, |
| "learning_rate": 1.926751592356688e-06, |
| "loss": 0.0067, |
| "num_tokens": 17950273.0, |
| "reward": 0.044642859254963696, |
| "reward_std": 0.0572310917195864, |
| "rewards/pure_accuracy_reward_math": 0.04464285838184878, |
| "step": 121 |
| }, |
| { |
| "clip_ratio": 0.00024330438452579983, |
| "epoch": 0.042066266320097995, |
| "grad_norm": 0.06961806118488312, |
| "kl": 0.0025354623794555664, |
| "learning_rate": 1.942675159235669e-06, |
| "loss": 0.0047, |
| "step": 122 |
| }, |
| { |
| "clip_ratio": 0.00023799908234423128, |
| "epoch": 0.0439783693346479, |
| "grad_norm": 0.038592379540205, |
| "kl": 0.0024437904357910156, |
| "learning_rate": 1.95859872611465e-06, |
| "loss": 0.0047, |
| "step": 123 |
| }, |
| { |
| "clip_ratio": 0.00023513944393016573, |
| "epoch": 0.04589047234919781, |
| "grad_norm": 0.036785636097192764, |
| "kl": 0.002588033676147461, |
| "learning_rate": 1.974522292993631e-06, |
| "loss": 0.0047, |
| "step": 124 |
| }, |
| { |
| "clip_ratio": 0.0002449645085107477, |
| "epoch": 0.04780257536374772, |
| "grad_norm": 0.03537231311202049, |
| "kl": 0.002721548080444336, |
| "learning_rate": 1.9904458598726117e-06, |
| "loss": 0.0047, |
| "step": 125 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.618884563446, |
| "epoch": 0.04971467837829763, |
| "grad_norm": 0.03746291249990463, |
| "kl": 0.0026862621307373047, |
| "learning_rate": 2.0063694267515925e-06, |
| "loss": 0.0063, |
| "num_tokens": 21522907.0, |
| "reward": 0.04492187732830644, |
| "reward_std": 0.061436392075847834, |
| "rewards/pure_accuracy_reward_math": 0.04492187616415322, |
| "step": 126 |
| }, |
| { |
| "clip_ratio": 0.0002821582585283977, |
| "epoch": 0.05162678139284754, |
| "grad_norm": 0.036032602190971375, |
| "kl": 0.0027321577072143555, |
| "learning_rate": 2.0222929936305737e-06, |
| "loss": 0.0063, |
| "step": 127 |
| }, |
| { |
| "clip_ratio": 0.0002675421079629814, |
| "epoch": 0.05353888440739745, |
| "grad_norm": 0.03723033517599106, |
| "kl": 0.002848386764526367, |
| "learning_rate": 2.0382165605095544e-06, |
| "loss": 0.0062, |
| "step": 128 |
| }, |
| { |
| "clip_ratio": 0.00030748845301786787, |
| "epoch": 0.05545098742194736, |
| "grad_norm": 0.03697400540113449, |
| "kl": 0.002881765365600586, |
| "learning_rate": 2.054140127388535e-06, |
| "loss": 0.0062, |
| "step": 129 |
| }, |
| { |
| "clip_ratio": 0.0003087153630758621, |
| "epoch": 0.05736309043649727, |
| "grad_norm": 0.03756724298000336, |
| "kl": 0.002836942672729492, |
| "learning_rate": 2.070063694267516e-06, |
| "loss": 0.0062, |
| "step": 130 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.1615762710571, |
| "epoch": 0.059275193451047176, |
| "grad_norm": 0.039371710270643234, |
| "kl": 0.00270843505859375, |
| "learning_rate": 2.085987261146497e-06, |
| "loss": 0.0064, |
| "num_tokens": 25111362.0, |
| "reward": 0.05106027016881853, |
| "reward_std": 0.06736206263303757, |
| "rewards/pure_accuracy_reward_math": 0.051060269062872976, |
| "step": 131 |
| }, |
| { |
| "clip_ratio": 0.0002896036380661826, |
| "epoch": 0.061187296465597084, |
| "grad_norm": 0.03780793026089668, |
| "kl": 0.0027250051498413086, |
| "learning_rate": 2.101910828025478e-06, |
| "loss": 0.0064, |
| "step": 132 |
| }, |
| { |
| "clip_ratio": 0.0002853632216783808, |
| "epoch": 0.06309939948014699, |
| "grad_norm": 0.03720535710453987, |
| "kl": 0.0027070045471191406, |
| "learning_rate": 2.1178343949044587e-06, |
| "loss": 0.0064, |
| "step": 133 |
| }, |
| { |
| "clip_ratio": 0.0002896762144928289, |
| "epoch": 0.06501150249469691, |
| "grad_norm": 0.036468133330345154, |
| "kl": 0.0027469396591186523, |
| "learning_rate": 2.13375796178344e-06, |
| "loss": 0.0064, |
| "step": 134 |
| }, |
| { |
| "clip_ratio": 0.0003120482754184195, |
| "epoch": 0.06692360550924681, |
| "grad_norm": 0.03586801886558533, |
| "kl": 0.002748727798461914, |
| "learning_rate": 2.1496815286624207e-06, |
| "loss": 0.0063, |
| "step": 135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.0351796150208, |
| "epoch": 0.06883570852379672, |
| "grad_norm": 0.03092282824218273, |
| "kl": 0.002766728401184082, |
| "learning_rate": 2.1656050955414015e-06, |
| "loss": 0.0056, |
| "num_tokens": 28735680.0, |
| "reward": 0.04017857348662801, |
| "reward_std": 0.05289319949224591, |
| "rewards/pure_accuracy_reward_math": 0.040178572438890114, |
| "step": 136 |
| }, |
| { |
| "clip_ratio": 0.00020221989311153266, |
| "epoch": 0.07074781153834662, |
| "grad_norm": 0.030703941360116005, |
| "kl": 0.0028089284896850586, |
| "learning_rate": 2.1815286624203822e-06, |
| "loss": 0.0056, |
| "step": 137 |
| }, |
| { |
| "clip_ratio": 0.00019867721590571819, |
| "epoch": 0.07265991455289654, |
| "grad_norm": 0.030248478055000305, |
| "kl": 0.0027884244918823242, |
| "learning_rate": 2.1974522292993634e-06, |
| "loss": 0.0056, |
| "step": 138 |
| }, |
| { |
| "clip_ratio": 0.00021304549886735913, |
| "epoch": 0.07457201756744644, |
| "grad_norm": 0.029539138078689575, |
| "kl": 0.002767205238342285, |
| "learning_rate": 2.213375796178344e-06, |
| "loss": 0.0056, |
| "step": 139 |
| }, |
| { |
| "clip_ratio": 0.00021535260020755231, |
| "epoch": 0.07648412058199636, |
| "grad_norm": 0.02955791726708412, |
| "kl": 0.002725839614868164, |
| "learning_rate": 2.229299363057325e-06, |
| "loss": 0.0055, |
| "step": 140 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.7212834358215, |
| "epoch": 0.07839622359654626, |
| "grad_norm": 0.060058850795030594, |
| "kl": 0.0032591819763183594, |
| "learning_rate": 2.245222929936306e-06, |
| "loss": 0.0071, |
| "num_tokens": 32349997.0, |
| "reward": 0.048828127211891115, |
| "reward_std": 0.056028691527899355, |
| "rewards/pure_accuracy_reward_math": 0.04882812628056854, |
| "step": 141 |
| }, |
| { |
| "clip_ratio": 0.00022036872547914754, |
| "epoch": 0.08030832661109617, |
| "grad_norm": 0.03533012047410011, |
| "kl": 0.002978205680847168, |
| "learning_rate": 2.261146496815287e-06, |
| "loss": 0.0071, |
| "step": 142 |
| }, |
| { |
| "clip_ratio": 0.0002158615123448726, |
| "epoch": 0.08222042962564609, |
| "grad_norm": 0.029908612370491028, |
| "kl": 0.002841353416442871, |
| "learning_rate": 2.2770700636942677e-06, |
| "loss": 0.0071, |
| "step": 143 |
| }, |
| { |
| "clip_ratio": 0.0002112481060976279, |
| "epoch": 0.08413253264019599, |
| "grad_norm": 0.028638474643230438, |
| "kl": 0.002796173095703125, |
| "learning_rate": 2.2929936305732485e-06, |
| "loss": 0.0071, |
| "step": 144 |
| }, |
| { |
| "clip_ratio": 0.00022246911356660348, |
| "epoch": 0.0860446356547459, |
| "grad_norm": 0.02828238159418106, |
| "kl": 0.0027240514755249023, |
| "learning_rate": 2.3089171974522297e-06, |
| "loss": 0.007, |
| "step": 145 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.0318322181702, |
| "epoch": 0.0879567386692958, |
| "grad_norm": 3.060509443283081, |
| "kl": 0.022321224212646484, |
| "learning_rate": 2.3248407643312104e-06, |
| "loss": 0.0062, |
| "num_tokens": 35996663.0, |
| "reward": 0.04436384144355543, |
| "reward_std": 0.06130999844754115, |
| "rewards/pure_accuracy_reward_math": 0.04436384039581753, |
| "step": 146 |
| }, |
| { |
| "clip_ratio": 0.00023404289771633557, |
| "epoch": 0.08986884168384572, |
| "grad_norm": 0.28904739022254944, |
| "kl": 0.004893064498901367, |
| "learning_rate": 2.3407643312101912e-06, |
| "loss": 0.0055, |
| "step": 147 |
| }, |
| { |
| "clip_ratio": 0.00024259101735424338, |
| "epoch": 0.09178094469839562, |
| "grad_norm": 0.03826431185007095, |
| "kl": 0.0027625560760498047, |
| "learning_rate": 2.356687898089172e-06, |
| "loss": 0.0054, |
| "step": 148 |
| }, |
| { |
| "clip_ratio": 0.0002517821457672653, |
| "epoch": 0.09369304771294554, |
| "grad_norm": 0.03572425991296768, |
| "kl": 0.002875208854675293, |
| "learning_rate": 2.372611464968153e-06, |
| "loss": 0.0054, |
| "step": 149 |
| }, |
| { |
| "clip_ratio": 0.00024034848578935453, |
| "epoch": 0.09560515072749544, |
| "grad_norm": 0.036431849002838135, |
| "kl": 0.0031164884567260742, |
| "learning_rate": 2.388535031847134e-06, |
| "loss": 0.0054, |
| "step": 150 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.2168254852295, |
| "epoch": 0.09751725374204535, |
| "grad_norm": 0.03362743556499481, |
| "kl": 0.002684354782104492, |
| "learning_rate": 2.4044585987261147e-06, |
| "loss": 0.0027, |
| "num_tokens": 39661060.0, |
| "reward": 0.05133928792201914, |
| "reward_std": 0.06672389659797773, |
| "rewards/pure_accuracy_reward_math": 0.05133928681607358, |
| "step": 151 |
| }, |
| { |
| "clip_ratio": 0.0002668876670099962, |
| "epoch": 0.09942935675659526, |
| "grad_norm": 0.033922772854566574, |
| "kl": 0.002791762351989746, |
| "learning_rate": 2.420382165605096e-06, |
| "loss": 0.0027, |
| "step": 152 |
| }, |
| { |
| "clip_ratio": 0.0002435101382616267, |
| "epoch": 0.10134145977114517, |
| "grad_norm": 0.03526493161916733, |
| "kl": 0.002907991409301758, |
| "learning_rate": 2.4363057324840767e-06, |
| "loss": 0.0027, |
| "step": 153 |
| }, |
| { |
| "clip_ratio": 0.00025345294346834635, |
| "epoch": 0.10325356278569509, |
| "grad_norm": 0.034125424921512604, |
| "kl": 0.0029108524322509766, |
| "learning_rate": 2.4522292993630575e-06, |
| "loss": 0.0027, |
| "step": 154 |
| }, |
| { |
| "clip_ratio": 0.0002378649581942227, |
| "epoch": 0.10516566580024499, |
| "grad_norm": 0.033436987549066544, |
| "kl": 0.002874612808227539, |
| "learning_rate": 2.4681528662420382e-06, |
| "loss": 0.0027, |
| "step": 155 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 541.2424907684326, |
| "epoch": 0.1070777688147949, |
| "grad_norm": 0.031592246145009995, |
| "kl": 0.002785801887512207, |
| "learning_rate": 2.4840764331210194e-06, |
| "loss": 0.005, |
| "num_tokens": 43331425.0, |
| "reward": 0.044363841181620955, |
| "reward_std": 0.05607495462754741, |
| "rewards/pure_accuracy_reward_math": 0.044363840599544346, |
| "step": 156 |
| }, |
| { |
| "clip_ratio": 0.00019312051063025137, |
| "epoch": 0.1089898718293448, |
| "grad_norm": 0.030642936006188393, |
| "kl": 0.0027495622634887695, |
| "learning_rate": 2.5e-06, |
| "loss": 0.005, |
| "step": 157 |
| }, |
| { |
| "clip_ratio": 0.0002267159566713417, |
| "epoch": 0.11090197484389472, |
| "grad_norm": 0.03025418519973755, |
| "kl": 0.002672433853149414, |
| "learning_rate": 2.515923566878981e-06, |
| "loss": 0.0049, |
| "step": 158 |
| }, |
| { |
| "clip_ratio": 0.00023296605036193796, |
| "epoch": 0.11281407785844462, |
| "grad_norm": 0.03024701401591301, |
| "kl": 0.0026074647903442383, |
| "learning_rate": 2.531847133757962e-06, |
| "loss": 0.0049, |
| "step": 159 |
| }, |
| { |
| "clip_ratio": 0.00024551542321660236, |
| "epoch": 0.11472618087299453, |
| "grad_norm": 0.03065372072160244, |
| "kl": 0.0025725364685058594, |
| "learning_rate": 2.547770700636943e-06, |
| "loss": 0.0049, |
| "step": 160 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.590705871582, |
| "epoch": 0.11663828388754444, |
| "grad_norm": 0.03286377340555191, |
| "kl": 0.002618551254272461, |
| "learning_rate": 2.5636942675159237e-06, |
| "loss": 0.0032, |
| "num_tokens": 46966882.0, |
| "reward": 0.0401785735739395, |
| "reward_std": 0.05864621384534985, |
| "rewards/pure_accuracy_reward_math": 0.04017857258440927, |
| "step": 161 |
| }, |
| { |
| "clip_ratio": 0.000249601399104904, |
| "epoch": 0.11855038690209435, |
| "grad_norm": 0.03168044239282608, |
| "kl": 0.0025817155838012695, |
| "learning_rate": 2.5796178343949045e-06, |
| "loss": 0.0032, |
| "step": 162 |
| }, |
| { |
| "clip_ratio": 0.0002426054838338132, |
| "epoch": 0.12046248991664425, |
| "grad_norm": 0.03161012753844261, |
| "kl": 0.0025763511657714844, |
| "learning_rate": 2.5955414012738857e-06, |
| "loss": 0.0032, |
| "step": 163 |
| }, |
| { |
| "clip_ratio": 0.0002400714004124893, |
| "epoch": 0.12237459293119417, |
| "grad_norm": 0.031408168375492096, |
| "kl": 0.002588987350463867, |
| "learning_rate": 2.6114649681528665e-06, |
| "loss": 0.0032, |
| "step": 164 |
| }, |
| { |
| "clip_ratio": 0.00024877328468164706, |
| "epoch": 0.12428669594574408, |
| "grad_norm": 0.030564049258828163, |
| "kl": 0.0026369094848632812, |
| "learning_rate": 2.6273885350318472e-06, |
| "loss": 0.0031, |
| "step": 165 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.5862393379211, |
| "epoch": 0.12619879896029398, |
| "grad_norm": 0.03767310827970505, |
| "kl": 0.0026383399963378906, |
| "learning_rate": 2.6433121019108284e-06, |
| "loss": 0.0062, |
| "num_tokens": 50581511.0, |
| "reward": 0.04966518055880442, |
| "reward_std": 0.06985319027444348, |
| "rewards/pure_accuracy_reward_math": 0.04966517968568951, |
| "step": 166 |
| }, |
| { |
| "clip_ratio": 0.0002872111982696879, |
| "epoch": 0.1281109019748439, |
| "grad_norm": 0.03578091412782669, |
| "kl": 0.0027115345001220703, |
| "learning_rate": 2.659235668789809e-06, |
| "loss": 0.0062, |
| "step": 167 |
| }, |
| { |
| "clip_ratio": 0.0002957127134664006, |
| "epoch": 0.13002300498939381, |
| "grad_norm": 0.03471493721008301, |
| "kl": 0.0028066635131835938, |
| "learning_rate": 2.67515923566879e-06, |
| "loss": 0.0062, |
| "step": 168 |
| }, |
| { |
| "clip_ratio": 0.0003112256898702981, |
| "epoch": 0.1319351080039437, |
| "grad_norm": 0.035491716116666794, |
| "kl": 0.0028966665267944336, |
| "learning_rate": 2.6910828025477707e-06, |
| "loss": 0.0062, |
| "step": 169 |
| }, |
| { |
| "clip_ratio": 0.0003354581235726073, |
| "epoch": 0.13384721101849362, |
| "grad_norm": 0.03574714809656143, |
| "kl": 0.0029289722442626953, |
| "learning_rate": 2.707006369426752e-06, |
| "loss": 0.0061, |
| "step": 170 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.8571667671204, |
| "epoch": 0.13575931403304353, |
| "grad_norm": 0.03648287057876587, |
| "kl": 0.0030307769775390625, |
| "learning_rate": 2.7229299363057327e-06, |
| "loss": 0.0061, |
| "num_tokens": 54209407.0, |
| "reward": 0.05161830587894656, |
| "reward_std": 0.06465821276651695, |
| "rewards/pure_accuracy_reward_math": 0.05161830494762398, |
| "step": 171 |
| }, |
| { |
| "clip_ratio": 0.0002587431810354701, |
| "epoch": 0.13767141704759345, |
| "grad_norm": 0.03615426644682884, |
| "kl": 0.0030341148376464844, |
| "learning_rate": 2.7388535031847135e-06, |
| "loss": 0.0061, |
| "step": 172 |
| }, |
| { |
| "clip_ratio": 0.0002548517101104153, |
| "epoch": 0.13958352006214333, |
| "grad_norm": 0.03565597161650658, |
| "kl": 0.002932310104370117, |
| "learning_rate": 2.7547770700636942e-06, |
| "loss": 0.0061, |
| "step": 173 |
| }, |
| { |
| "clip_ratio": 0.00027394448250106507, |
| "epoch": 0.14149562307669325, |
| "grad_norm": 0.035612594336271286, |
| "kl": 0.0029175281524658203, |
| "learning_rate": 2.7707006369426754e-06, |
| "loss": 0.0061, |
| "step": 174 |
| }, |
| { |
| "clip_ratio": 0.00027776476230201297, |
| "epoch": 0.14340772609124317, |
| "grad_norm": 0.036588992923498154, |
| "kl": 0.002942800521850586, |
| "learning_rate": 2.786624203821656e-06, |
| "loss": 0.006, |
| "step": 175 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.2430500984192, |
| "epoch": 0.14531982910579308, |
| "grad_norm": 0.03312006592750549, |
| "kl": 0.0028772354125976562, |
| "learning_rate": 2.802547770700637e-06, |
| "loss": 0.0056, |
| "num_tokens": 57839070.0, |
| "reward": 0.04854910931317136, |
| "reward_std": 0.05881887051509693, |
| "rewards/pure_accuracy_reward_math": 0.048549108614679426, |
| "step": 176 |
| }, |
| { |
| "clip_ratio": 0.00022063881021949783, |
| "epoch": 0.147231932120343, |
| "grad_norm": 0.0327099934220314, |
| "kl": 0.002942681312561035, |
| "learning_rate": 2.818471337579618e-06, |
| "loss": 0.0056, |
| "step": 177 |
| }, |
| { |
| "clip_ratio": 0.00021944492368675128, |
| "epoch": 0.14914403513489288, |
| "grad_norm": 0.03261202201247215, |
| "kl": 0.002986431121826172, |
| "learning_rate": 2.834394904458599e-06, |
| "loss": 0.0056, |
| "step": 178 |
| }, |
| { |
| "clip_ratio": 0.0002127133307396889, |
| "epoch": 0.1510561381494428, |
| "grad_norm": 0.03220335766673088, |
| "kl": 0.002970457077026367, |
| "learning_rate": 2.8503184713375797e-06, |
| "loss": 0.0056, |
| "step": 179 |
| }, |
| { |
| "clip_ratio": 0.0001991192841614975, |
| "epoch": 0.1529682411639927, |
| "grad_norm": 0.03179548308253288, |
| "kl": 0.0029560327529907227, |
| "learning_rate": 2.8662420382165605e-06, |
| "loss": 0.0056, |
| "step": 180 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.1275358200073, |
| "epoch": 0.15488034417854263, |
| "grad_norm": 0.030966561287641525, |
| "kl": 0.0029218196868896484, |
| "learning_rate": 2.8821656050955417e-06, |
| "loss": 0.0048, |
| "num_tokens": 61445599.0, |
| "reward": 0.04352678795112297, |
| "reward_std": 0.05598862626357004, |
| "rewards/pure_accuracy_reward_math": 0.043526787078008056, |
| "step": 181 |
| }, |
| { |
| "clip_ratio": 0.00021554413663693595, |
| "epoch": 0.15679244719309252, |
| "grad_norm": 0.030419446527957916, |
| "kl": 0.0029065608978271484, |
| "learning_rate": 2.8980891719745225e-06, |
| "loss": 0.0048, |
| "step": 182 |
| }, |
| { |
| "clip_ratio": 0.0002025423377176594, |
| "epoch": 0.15870455020764243, |
| "grad_norm": 0.030062729492783546, |
| "kl": 0.0028995275497436523, |
| "learning_rate": 2.9140127388535032e-06, |
| "loss": 0.0048, |
| "step": 183 |
| }, |
| { |
| "clip_ratio": 0.00023064417456453157, |
| "epoch": 0.16061665322219235, |
| "grad_norm": 0.029301613569259644, |
| "kl": 0.002888321876525879, |
| "learning_rate": 2.9299363057324844e-06, |
| "loss": 0.0048, |
| "step": 184 |
| }, |
| { |
| "clip_ratio": 0.0002338091023261768, |
| "epoch": 0.16252875623674226, |
| "grad_norm": 0.029127391055226326, |
| "kl": 0.0028772354125976562, |
| "learning_rate": 2.945859872611465e-06, |
| "loss": 0.0047, |
| "step": 185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 533.0510892868042, |
| "epoch": 0.16444085925129218, |
| "grad_norm": 0.036479271948337555, |
| "kl": 0.002923727035522461, |
| "learning_rate": 2.961783439490446e-06, |
| "loss": 0.0063, |
| "num_tokens": 65094142.0, |
| "reward": 0.05022321717115119, |
| "reward_std": 0.06538890500087291, |
| "rewards/pure_accuracy_reward_math": 0.050223215424921364, |
| "step": 186 |
| }, |
| { |
| "clip_ratio": 0.00026048495129771254, |
| "epoch": 0.16635296226584206, |
| "grad_norm": 0.036232370883226395, |
| "kl": 0.0029561519622802734, |
| "learning_rate": 2.9777070063694267e-06, |
| "loss": 0.0063, |
| "step": 187 |
| }, |
| { |
| "clip_ratio": 0.0002226464382033555, |
| "epoch": 0.16826506528039198, |
| "grad_norm": 0.03523917496204376, |
| "kl": 0.003048419952392578, |
| "learning_rate": 2.993630573248408e-06, |
| "loss": 0.0063, |
| "step": 188 |
| }, |
| { |
| "clip_ratio": 0.0002362887615845466, |
| "epoch": 0.1701771682949419, |
| "grad_norm": 0.03477315977215767, |
| "kl": 0.003025054931640625, |
| "learning_rate": 3.0095541401273887e-06, |
| "loss": 0.0062, |
| "step": 189 |
| }, |
| { |
| "clip_ratio": 0.00023160997727700305, |
| "epoch": 0.1720892713094918, |
| "grad_norm": 0.03342609107494354, |
| "kl": 0.0030221939086914062, |
| "learning_rate": 3.0254777070063695e-06, |
| "loss": 0.0062, |
| "step": 190 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.7776494026184, |
| "epoch": 0.1740013743240417, |
| "grad_norm": 0.03668810427188873, |
| "kl": 0.0029752254486083984, |
| "learning_rate": 3.0414012738853503e-06, |
| "loss": 0.0066, |
| "num_tokens": 68728277.0, |
| "reward": 0.04994419863214716, |
| "reward_std": 0.06135626137256622, |
| "rewards/pure_accuracy_reward_math": 0.04994419787544757, |
| "step": 191 |
| }, |
| { |
| "clip_ratio": 0.0002391185845453947, |
| "epoch": 0.1759134773385916, |
| "grad_norm": 0.035618141293525696, |
| "kl": 0.0029642581939697266, |
| "learning_rate": 3.0573248407643314e-06, |
| "loss": 0.0066, |
| "step": 192 |
| }, |
| { |
| "clip_ratio": 0.00024402707180115613, |
| "epoch": 0.17782558035314153, |
| "grad_norm": 0.032588809728622437, |
| "kl": 0.002981424331665039, |
| "learning_rate": 3.0732484076433122e-06, |
| "loss": 0.0066, |
| "step": 193 |
| }, |
| { |
| "clip_ratio": 0.0002546731577126593, |
| "epoch": 0.17973768336769144, |
| "grad_norm": 0.0323190875351429, |
| "kl": 0.0030133724212646484, |
| "learning_rate": 3.089171974522293e-06, |
| "loss": 0.0066, |
| "step": 194 |
| }, |
| { |
| "clip_ratio": 0.0002784079450179888, |
| "epoch": 0.18164978638224133, |
| "grad_norm": 0.03181909769773483, |
| "kl": 0.002997159957885742, |
| "learning_rate": 3.105095541401274e-06, |
| "loss": 0.0065, |
| "step": 195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.7226796150208, |
| "epoch": 0.18356188939679124, |
| "grad_norm": 0.034835390746593475, |
| "kl": 0.003053426742553711, |
| "learning_rate": 3.121019108280255e-06, |
| "loss": 0.0053, |
| "num_tokens": 72383923.0, |
| "reward": 0.04352678789291531, |
| "reward_std": 0.06164911447558552, |
| "rewards/pure_accuracy_reward_math": 0.043526787078008056, |
| "step": 196 |
| }, |
| { |
| "clip_ratio": 0.00022759345233680506, |
| "epoch": 0.18547399241134116, |
| "grad_norm": 0.03316686674952507, |
| "kl": 0.003064870834350586, |
| "learning_rate": 3.1369426751592357e-06, |
| "loss": 0.0053, |
| "step": 197 |
| }, |
| { |
| "clip_ratio": 0.00024183520912401946, |
| "epoch": 0.18738609542589107, |
| "grad_norm": 0.0329214446246624, |
| "kl": 0.003040313720703125, |
| "learning_rate": 3.1528662420382165e-06, |
| "loss": 0.0053, |
| "step": 198 |
| }, |
| { |
| "clip_ratio": 0.0002539973459079192, |
| "epoch": 0.189298198440441, |
| "grad_norm": 0.031231405213475227, |
| "kl": 0.0030624866485595703, |
| "learning_rate": 3.1687898089171977e-06, |
| "loss": 0.0052, |
| "step": 199 |
| }, |
| { |
| "clip_ratio": 0.0002776768195076329, |
| "epoch": 0.19121030145499088, |
| "grad_norm": 0.031124714761972427, |
| "kl": 0.0030813217163085938, |
| "learning_rate": 3.1847133757961785e-06, |
| "loss": 0.0052, |
| "step": 200 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.4048767089844, |
| "epoch": 0.1931224044695408, |
| "grad_norm": 0.03386811539530754, |
| "kl": 0.003122568130493164, |
| "learning_rate": 3.2006369426751592e-06, |
| "loss": 0.0052, |
| "num_tokens": 75984438.0, |
| "reward": 0.04771205616998486, |
| "reward_std": 0.06319682823959738, |
| "rewards/pure_accuracy_reward_math": 0.04771205471479334, |
| "step": 201 |
| }, |
| { |
| "clip_ratio": 0.00024403837670661233, |
| "epoch": 0.1950345074840907, |
| "grad_norm": 0.03252818062901497, |
| "kl": 0.003181934356689453, |
| "learning_rate": 3.2165605095541404e-06, |
| "loss": 0.0052, |
| "step": 202 |
| }, |
| { |
| "clip_ratio": 0.0002548924753114079, |
| "epoch": 0.19694661049864062, |
| "grad_norm": 0.03233063966035843, |
| "kl": 0.0032570362091064453, |
| "learning_rate": 3.232484076433121e-06, |
| "loss": 0.0052, |
| "step": 203 |
| }, |
| { |
| "clip_ratio": 0.0003048134046252926, |
| "epoch": 0.1988587135131905, |
| "grad_norm": 0.032457806169986725, |
| "kl": 0.0032837390899658203, |
| "learning_rate": 3.248407643312102e-06, |
| "loss": 0.0051, |
| "step": 204 |
| }, |
| { |
| "clip_ratio": 0.0003034327668842707, |
| "epoch": 0.20077081652774043, |
| "grad_norm": 0.03239855542778969, |
| "kl": 0.0032906532287597656, |
| "learning_rate": 3.2643312101910827e-06, |
| "loss": 0.0051, |
| "step": 205 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.6657609939575, |
| "epoch": 0.20268291954229034, |
| "grad_norm": 0.0370325967669487, |
| "kl": 0.0033235549926757812, |
| "learning_rate": 3.280254777070064e-06, |
| "loss": 0.0075, |
| "num_tokens": 79548556.0, |
| "reward": 0.052176341792801395, |
| "reward_std": 0.06135626166360453, |
| "rewards/pure_accuracy_reward_math": 0.0521763407450635, |
| "step": 206 |
| }, |
| { |
| "clip_ratio": 0.00026798775621728055, |
| "epoch": 0.20459502255684026, |
| "grad_norm": 0.03616202250123024, |
| "kl": 0.0032608509063720703, |
| "learning_rate": 3.2961783439490447e-06, |
| "loss": 0.0075, |
| "step": 207 |
| }, |
| { |
| "clip_ratio": 0.0002652346859690624, |
| "epoch": 0.20650712557139017, |
| "grad_norm": 0.03537038713693619, |
| "kl": 0.0032129287719726562, |
| "learning_rate": 3.3121019108280255e-06, |
| "loss": 0.0074, |
| "step": 208 |
| }, |
| { |
| "clip_ratio": 0.00026950107780976396, |
| "epoch": 0.20841922858594006, |
| "grad_norm": 0.03502323478460312, |
| "kl": 0.0031485557556152344, |
| "learning_rate": 3.3280254777070063e-06, |
| "loss": 0.0074, |
| "step": 209 |
| }, |
| { |
| "clip_ratio": 0.00025725525091502277, |
| "epoch": 0.21033133160048997, |
| "grad_norm": 0.03380832076072693, |
| "kl": 0.0031027793884277344, |
| "learning_rate": 3.3439490445859875e-06, |
| "loss": 0.0074, |
| "step": 210 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 511.36637449264526, |
| "epoch": 0.2122434346150399, |
| "grad_norm": 1.5961617231369019, |
| "kl": 0.007061004638671875, |
| "learning_rate": 3.3598726114649682e-06, |
| "loss": 0.0062, |
| "num_tokens": 83116585.0, |
| "reward": 0.05078125247382559, |
| "reward_std": 0.06568795558996499, |
| "rewards/pure_accuracy_reward_math": 0.05078125119325705, |
| "step": 211 |
| }, |
| { |
| "clip_ratio": 0.0002800602194383828, |
| "epoch": 0.2141555376295898, |
| "grad_norm": 0.04389820247888565, |
| "kl": 0.004379749298095703, |
| "learning_rate": 3.375796178343949e-06, |
| "loss": 0.0061, |
| "step": 212 |
| }, |
| { |
| "clip_ratio": 0.0002803218378630845, |
| "epoch": 0.2160676406441397, |
| "grad_norm": 0.04022788628935814, |
| "kl": 0.0043125152587890625, |
| "learning_rate": 3.39171974522293e-06, |
| "loss": 0.0061, |
| "step": 213 |
| }, |
| { |
| "clip_ratio": 0.0002704095267631601, |
| "epoch": 0.2179797436586896, |
| "grad_norm": 0.041697319597005844, |
| "kl": 0.004408597946166992, |
| "learning_rate": 3.407643312101911e-06, |
| "loss": 0.0061, |
| "step": 214 |
| }, |
| { |
| "clip_ratio": 0.0003097587871820906, |
| "epoch": 0.21989184667323952, |
| "grad_norm": 0.04933662340044975, |
| "kl": 0.004500150680541992, |
| "learning_rate": 3.4235668789808917e-06, |
| "loss": 0.006, |
| "step": 215 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.8122463226318, |
| "epoch": 0.22180394968778944, |
| "grad_norm": 0.03384365886449814, |
| "kl": 0.0032858848571777344, |
| "learning_rate": 3.4394904458598725e-06, |
| "loss": 0.0069, |
| "num_tokens": 86710660.0, |
| "reward": 0.041015627270098776, |
| "reward_std": 0.05345123494043946, |
| "rewards/pure_accuracy_reward_math": 0.041015626047737896, |
| "step": 216 |
| }, |
| { |
| "clip_ratio": 0.00022953049290208583, |
| "epoch": 0.22371605270233935, |
| "grad_norm": 0.03259577602148056, |
| "kl": 0.003277301788330078, |
| "learning_rate": 3.4554140127388537e-06, |
| "loss": 0.0069, |
| "step": 217 |
| }, |
| { |
| "clip_ratio": 0.00024143920052210888, |
| "epoch": 0.22562815571688924, |
| "grad_norm": 0.031054330989718437, |
| "kl": 0.0031991004943847656, |
| "learning_rate": 3.4713375796178345e-06, |
| "loss": 0.0069, |
| "step": 218 |
| }, |
| { |
| "clip_ratio": 0.0002552373456978785, |
| "epoch": 0.22754025873143915, |
| "grad_norm": 0.031755171716213226, |
| "kl": 0.003099679946899414, |
| "learning_rate": 3.4872611464968152e-06, |
| "loss": 0.0069, |
| "step": 219 |
| }, |
| { |
| "clip_ratio": 0.0002681780064790473, |
| "epoch": 0.22945236174598907, |
| "grad_norm": 0.031188273802399635, |
| "kl": 0.003045320510864258, |
| "learning_rate": 3.5031847133757964e-06, |
| "loss": 0.0068, |
| "step": 220 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.6825013160706, |
| "epoch": 0.23136446476053898, |
| "grad_norm": 0.03775335103273392, |
| "kl": 0.003011941909790039, |
| "learning_rate": 3.5191082802547772e-06, |
| "loss": 0.0063, |
| "num_tokens": 90291858.0, |
| "reward": 0.04715401996509172, |
| "reward_std": 0.06113734241807833, |
| "rewards/pure_accuracy_reward_math": 0.04715401915018447, |
| "step": 221 |
| }, |
| { |
| "clip_ratio": 0.0002582234144483664, |
| "epoch": 0.23327656777508887, |
| "grad_norm": 0.03602875769138336, |
| "kl": 0.002973794937133789, |
| "learning_rate": 3.535031847133758e-06, |
| "loss": 0.0063, |
| "step": 222 |
| }, |
| { |
| "clip_ratio": 0.0002264754746761355, |
| "epoch": 0.2351886707896388, |
| "grad_norm": 0.03449266403913498, |
| "kl": 0.002980470657348633, |
| "learning_rate": 3.5509554140127388e-06, |
| "loss": 0.0063, |
| "step": 223 |
| }, |
| { |
| "clip_ratio": 0.00025999376231311544, |
| "epoch": 0.2371007738041887, |
| "grad_norm": 0.0329199843108654, |
| "kl": 0.002971053123474121, |
| "learning_rate": 3.56687898089172e-06, |
| "loss": 0.0062, |
| "step": 224 |
| }, |
| { |
| "clip_ratio": 0.000296181439978227, |
| "epoch": 0.23901287681873862, |
| "grad_norm": 0.033409375697374344, |
| "kl": 0.0030214786529541016, |
| "learning_rate": 3.5828025477707007e-06, |
| "loss": 0.0062, |
| "step": 225 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.4132494926453, |
| "epoch": 0.2409249798332885, |
| "grad_norm": 0.03549947962164879, |
| "kl": 0.004068970680236816, |
| "learning_rate": 3.5987261146496815e-06, |
| "loss": 0.0083, |
| "num_tokens": 93927655.0, |
| "reward": 0.039899555675219744, |
| "reward_std": 0.05890519870445132, |
| "rewards/pure_accuracy_reward_math": 0.03989955480210483, |
| "step": 226 |
| }, |
| { |
| "clip_ratio": 0.00024125495940552355, |
| "epoch": 0.24283708284783842, |
| "grad_norm": 0.033262889832258224, |
| "kl": 0.0040683746337890625, |
| "learning_rate": 3.6146496815286623e-06, |
| "loss": 0.0083, |
| "step": 227 |
| }, |
| { |
| "clip_ratio": 0.00024547909194438944, |
| "epoch": 0.24474918586238834, |
| "grad_norm": 0.03303634375333786, |
| "kl": 0.004040956497192383, |
| "learning_rate": 3.6305732484076435e-06, |
| "loss": 0.0083, |
| "step": 228 |
| }, |
| { |
| "clip_ratio": 0.0002773670349256463, |
| "epoch": 0.24666128887693825, |
| "grad_norm": 0.03389015421271324, |
| "kl": 0.00404667854309082, |
| "learning_rate": 3.6464968152866242e-06, |
| "loss": 0.0083, |
| "step": 229 |
| }, |
| { |
| "clip_ratio": 0.000270649900215858, |
| "epoch": 0.24857339189148817, |
| "grad_norm": 0.035877879709005356, |
| "kl": 0.0038802623748779297, |
| "learning_rate": 3.662420382165605e-06, |
| "loss": 0.0082, |
| "step": 230 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.9338984489441, |
| "epoch": 0.25048549490603805, |
| "grad_norm": 0.032850634306669235, |
| "kl": 0.0030744075775146484, |
| "learning_rate": 3.678343949044586e-06, |
| "loss": 0.0064, |
| "num_tokens": 97554714.0, |
| "reward": 0.04743303795112297, |
| "reward_std": 0.061522720265202224, |
| "rewards/pure_accuracy_reward_math": 0.047433037019800395, |
| "step": 231 |
| }, |
| { |
| "clip_ratio": 0.00024459305313939694, |
| "epoch": 0.25239759792058797, |
| "grad_norm": 0.03185749799013138, |
| "kl": 0.00302886962890625, |
| "learning_rate": 3.694267515923567e-06, |
| "loss": 0.0064, |
| "step": 232 |
| }, |
| { |
| "clip_ratio": 0.00025332184179660544, |
| "epoch": 0.2543097009351379, |
| "grad_norm": 0.03135737404227257, |
| "kl": 0.002967357635498047, |
| "learning_rate": 3.7101910828025477e-06, |
| "loss": 0.0064, |
| "step": 233 |
| }, |
| { |
| "clip_ratio": 0.0002861271710798974, |
| "epoch": 0.2562218039496878, |
| "grad_norm": 0.030725885182619095, |
| "kl": 0.0029573440551757812, |
| "learning_rate": 3.7261146496815285e-06, |
| "loss": 0.0064, |
| "step": 234 |
| }, |
| { |
| "clip_ratio": 0.0002841630366674508, |
| "epoch": 0.2581339069642377, |
| "grad_norm": 0.030670415610074997, |
| "kl": 0.002954721450805664, |
| "learning_rate": 3.7420382165605097e-06, |
| "loss": 0.0063, |
| "step": 235 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.4467325210571, |
| "epoch": 0.26004600997878763, |
| "grad_norm": 0.03534790128469467, |
| "kl": 0.003011465072631836, |
| "learning_rate": 3.757961783439491e-06, |
| "loss": 0.0041, |
| "num_tokens": 101193143.0, |
| "reward": 0.04631696638534777, |
| "reward_std": 0.0601075982558541, |
| "rewards/pure_accuracy_reward_math": 0.046316965454025194, |
| "step": 236 |
| }, |
| { |
| "clip_ratio": 0.00022260297603793333, |
| "epoch": 0.2619581129933375, |
| "grad_norm": 0.03438499942421913, |
| "kl": 0.0030508041381835938, |
| "learning_rate": 3.773885350318472e-06, |
| "loss": 0.0041, |
| "step": 237 |
| }, |
| { |
| "clip_ratio": 0.00024397839513312647, |
| "epoch": 0.2638702160078874, |
| "grad_norm": 0.032804593443870544, |
| "kl": 0.0030994415283203125, |
| "learning_rate": 3.789808917197453e-06, |
| "loss": 0.0041, |
| "step": 238 |
| }, |
| { |
| "clip_ratio": 0.0002508007286223801, |
| "epoch": 0.2657823190224373, |
| "grad_norm": 0.03402625024318695, |
| "kl": 0.0031244754791259766, |
| "learning_rate": 3.8057324840764336e-06, |
| "loss": 0.004, |
| "step": 239 |
| }, |
| { |
| "clip_ratio": 0.00025242620182552855, |
| "epoch": 0.26769442203698723, |
| "grad_norm": 0.03291900083422661, |
| "kl": 0.003187417984008789, |
| "learning_rate": 3.821656050955415e-06, |
| "loss": 0.004, |
| "step": 240 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.9913763999939, |
| "epoch": 0.26960652505153715, |
| "grad_norm": 0.033690325915813446, |
| "kl": 0.003125429153442383, |
| "learning_rate": 3.837579617834396e-06, |
| "loss": 0.0089, |
| "num_tokens": 104860392.0, |
| "reward": 0.05496652069268748, |
| "reward_std": 0.07028483308386058, |
| "rewards/pure_accuracy_reward_math": 0.05496651929570362, |
| "step": 241 |
| }, |
| { |
| "clip_ratio": 0.0002661047830088137, |
| "epoch": 0.27151862806608706, |
| "grad_norm": 0.03227640688419342, |
| "kl": 0.0031244754791259766, |
| "learning_rate": 3.853503184713376e-06, |
| "loss": 0.009, |
| "step": 242 |
| }, |
| { |
| "clip_ratio": 0.00027503777869242185, |
| "epoch": 0.273430731080637, |
| "grad_norm": 0.03168897703289986, |
| "kl": 0.003157377243041992, |
| "learning_rate": 3.869426751592357e-06, |
| "loss": 0.0089, |
| "step": 243 |
| }, |
| { |
| "clip_ratio": 0.00029653536631712996, |
| "epoch": 0.2753428340951869, |
| "grad_norm": 0.03222280368208885, |
| "kl": 0.0031862258911132812, |
| "learning_rate": 3.885350318471338e-06, |
| "loss": 0.0089, |
| "step": 244 |
| }, |
| { |
| "clip_ratio": 0.0003081631187455969, |
| "epoch": 0.2772549371097368, |
| "grad_norm": 0.03176514804363251, |
| "kl": 0.0032341480255126953, |
| "learning_rate": 3.901273885350319e-06, |
| "loss": 0.0088, |
| "step": 245 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.5616898536682, |
| "epoch": 0.27916704012428667, |
| "grad_norm": 0.037929438054561615, |
| "kl": 0.0035233497619628906, |
| "learning_rate": 3.9171974522293e-06, |
| "loss": 0.0075, |
| "num_tokens": 108427949.0, |
| "reward": 0.0544084852153901, |
| "reward_std": 0.0659469406818971, |
| "rewards/pure_accuracy_reward_math": 0.054408483527367935, |
| "step": 246 |
| }, |
| { |
| "clip_ratio": 0.0002633177949178389, |
| "epoch": 0.2810791431388366, |
| "grad_norm": 0.03561301901936531, |
| "kl": 0.0035467147827148438, |
| "learning_rate": 3.933121019108281e-06, |
| "loss": 0.0075, |
| "step": 247 |
| }, |
| { |
| "clip_ratio": 0.0003005996498472996, |
| "epoch": 0.2829912461533865, |
| "grad_norm": 0.035342708230018616, |
| "kl": 0.003578662872314453, |
| "learning_rate": 3.949044585987262e-06, |
| "loss": 0.0075, |
| "step": 248 |
| }, |
| { |
| "clip_ratio": 0.0003206986277177748, |
| "epoch": 0.2849033491679364, |
| "grad_norm": 0.03841444477438927, |
| "kl": 0.0036001205444335938, |
| "learning_rate": 3.964968152866243e-06, |
| "loss": 0.0075, |
| "step": 249 |
| }, |
| { |
| "clip_ratio": 0.00030761192169848073, |
| "epoch": 0.28681545218248633, |
| "grad_norm": 0.03515273705124855, |
| "kl": 0.003624439239501953, |
| "learning_rate": 3.980891719745223e-06, |
| "loss": 0.0074, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 504.73858308792114, |
| "epoch": 0.28872755519703625, |
| "grad_norm": 0.04030496999621391, |
| "kl": 0.003686189651489258, |
| "learning_rate": 3.996815286624204e-06, |
| "loss": 0.0081, |
| "num_tokens": 111975532.0, |
| "reward": 0.0647321458964143, |
| "reward_std": 0.07547981187235564, |
| "rewards/pure_accuracy_reward_math": 0.06473214420839213, |
| "step": 251 |
| }, |
| { |
| "clip_ratio": 0.00031485489739679906, |
| "epoch": 0.29063965821158616, |
| "grad_norm": 0.04058763012290001, |
| "kl": 0.003683328628540039, |
| "learning_rate": 4.012738853503185e-06, |
| "loss": 0.0081, |
| "step": 252 |
| }, |
| { |
| "clip_ratio": 0.0003329372994471669, |
| "epoch": 0.2925517612261361, |
| "grad_norm": 0.039948880672454834, |
| "kl": 0.003644227981567383, |
| "learning_rate": 4.0286624203821666e-06, |
| "loss": 0.0081, |
| "step": 253 |
| }, |
| { |
| "clip_ratio": 0.00031999613804600813, |
| "epoch": 0.294463864240686, |
| "grad_norm": 0.038771189749240875, |
| "kl": 0.003670930862426758, |
| "learning_rate": 4.044585987261147e-06, |
| "loss": 0.008, |
| "step": 254 |
| }, |
| { |
| "clip_ratio": 0.0003391868065136805, |
| "epoch": 0.29637596725523585, |
| "grad_norm": 0.03820183873176575, |
| "kl": 0.0036439895629882812, |
| "learning_rate": 4.060509554140128e-06, |
| "loss": 0.0079, |
| "step": 255 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 507.980770111084, |
| "epoch": 0.29828807026978577, |
| "grad_norm": 0.0373733825981617, |
| "kl": 0.003556966781616211, |
| "learning_rate": 4.076433121019109e-06, |
| "loss": 0.0047, |
| "num_tokens": 115530899.0, |
| "reward": 0.05217634199652821, |
| "reward_std": 0.06624599173665047, |
| "rewards/pure_accuracy_reward_math": 0.05217634030850604, |
| "step": 256 |
| }, |
| { |
| "clip_ratio": 0.0002444871162765594, |
| "epoch": 0.3002001732843357, |
| "grad_norm": 0.03655192255973816, |
| "kl": 0.003623485565185547, |
| "learning_rate": 4.09235668789809e-06, |
| "loss": 0.0047, |
| "step": 257 |
| }, |
| { |
| "clip_ratio": 0.0002544127338524049, |
| "epoch": 0.3021122762988856, |
| "grad_norm": 0.035692181438207626, |
| "kl": 0.003640890121459961, |
| "learning_rate": 4.10828025477707e-06, |
| "loss": 0.0046, |
| "step": 258 |
| }, |
| { |
| "clip_ratio": 0.0002950017506577751, |
| "epoch": 0.3040243793134355, |
| "grad_norm": 0.03550735488533974, |
| "kl": 0.0036733150482177734, |
| "learning_rate": 4.124203821656051e-06, |
| "loss": 0.0046, |
| "step": 259 |
| }, |
| { |
| "clip_ratio": 0.0002894491571510116, |
| "epoch": 0.3059364823279854, |
| "grad_norm": 0.03471330925822258, |
| "kl": 0.00366973876953125, |
| "learning_rate": 4.140127388535032e-06, |
| "loss": 0.0045, |
| "step": 260 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.2921543121338, |
| "epoch": 0.30784858534253534, |
| "grad_norm": 0.6632264852523804, |
| "kl": 0.007929325103759766, |
| "learning_rate": 4.156050955414014e-06, |
| "loss": 0.0041, |
| "num_tokens": 119123970.0, |
| "reward": 0.046875002153683454, |
| "reward_std": 0.06358220643596724, |
| "rewards/pure_accuracy_reward_math": 0.04687500122236088, |
| "step": 261 |
| }, |
| { |
| "clip_ratio": 0.00027907352409783925, |
| "epoch": 0.30976068835708526, |
| "grad_norm": 0.03735750913619995, |
| "kl": 0.0038709640502929688, |
| "learning_rate": 4.171974522292994e-06, |
| "loss": 0.004, |
| "step": 262 |
| }, |
| { |
| "clip_ratio": 0.000277261100677606, |
| "epoch": 0.31167279137163517, |
| "grad_norm": 0.03806532546877861, |
| "kl": 0.004002094268798828, |
| "learning_rate": 4.187898089171975e-06, |
| "loss": 0.004, |
| "step": 263 |
| }, |
| { |
| "clip_ratio": 0.00026404397090118437, |
| "epoch": 0.31358489438618503, |
| "grad_norm": 0.03587675094604492, |
| "kl": 0.00407719612121582, |
| "learning_rate": 4.203821656050956e-06, |
| "loss": 0.0039, |
| "step": 264 |
| }, |
| { |
| "clip_ratio": 0.0003132741497324787, |
| "epoch": 0.31549699740073495, |
| "grad_norm": 0.03516336902976036, |
| "kl": 0.004099607467651367, |
| "learning_rate": 4.219745222929937e-06, |
| "loss": 0.0039, |
| "step": 265 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.596004486084, |
| "epoch": 0.31740910041528486, |
| "grad_norm": 0.038204919546842575, |
| "kl": 0.0035691261291503906, |
| "learning_rate": 4.2356687898089174e-06, |
| "loss": 0.006, |
| "num_tokens": 122758966.0, |
| "reward": 0.054966520925518125, |
| "reward_std": 0.06770737608894706, |
| "rewards/pure_accuracy_reward_math": 0.05496651912108064, |
| "step": 266 |
| }, |
| { |
| "clip_ratio": 0.00026713599251593223, |
| "epoch": 0.3193212034298348, |
| "grad_norm": 0.03804617002606392, |
| "kl": 0.003623485565185547, |
| "learning_rate": 4.251592356687898e-06, |
| "loss": 0.006, |
| "step": 267 |
| }, |
| { |
| "clip_ratio": 0.00027288361513910786, |
| "epoch": 0.3212333064443847, |
| "grad_norm": 0.03765474632382393, |
| "kl": 0.003659486770629883, |
| "learning_rate": 4.26751592356688e-06, |
| "loss": 0.006, |
| "step": 268 |
| }, |
| { |
| "clip_ratio": 0.0002754389876429286, |
| "epoch": 0.3231454094589346, |
| "grad_norm": 0.037356842309236526, |
| "kl": 0.0036840438842773438, |
| "learning_rate": 4.283439490445861e-06, |
| "loss": 0.0059, |
| "step": 269 |
| }, |
| { |
| "clip_ratio": 0.0002686067065269526, |
| "epoch": 0.3250575124734845, |
| "grad_norm": 0.03656876087188721, |
| "kl": 0.003694295883178711, |
| "learning_rate": 4.299363057324841e-06, |
| "loss": 0.0059, |
| "step": 270 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 543.1746897697449, |
| "epoch": 0.32696961548803444, |
| "grad_norm": 0.03417838364839554, |
| "kl": 0.0035529136657714844, |
| "learning_rate": 4.315286624203822e-06, |
| "loss": 0.0076, |
| "num_tokens": 126443800.0, |
| "reward": 0.04882812697906047, |
| "reward_std": 0.05766273388871923, |
| "rewards/pure_accuracy_reward_math": 0.04882812616415322, |
| "step": 271 |
| }, |
| { |
| "clip_ratio": 0.0002270729566475893, |
| "epoch": 0.32888171850258435, |
| "grad_norm": 0.03328363224864006, |
| "kl": 0.0035278797149658203, |
| "learning_rate": 4.331210191082803e-06, |
| "loss": 0.0076, |
| "step": 272 |
| }, |
| { |
| "clip_ratio": 0.0002132950650661769, |
| "epoch": 0.3307938215171342, |
| "grad_norm": 0.03230879083275795, |
| "kl": 0.0034902095794677734, |
| "learning_rate": 4.347133757961784e-06, |
| "loss": 0.0076, |
| "step": 273 |
| }, |
| { |
| "clip_ratio": 0.0002096330554195447, |
| "epoch": 0.3327059245316841, |
| "grad_norm": 0.031601596623659134, |
| "kl": 0.003440380096435547, |
| "learning_rate": 4.3630573248407645e-06, |
| "loss": 0.0076, |
| "step": 274 |
| }, |
| { |
| "clip_ratio": 0.00027223577194490645, |
| "epoch": 0.33461802754623404, |
| "grad_norm": 0.033090248703956604, |
| "kl": 0.003412485122680664, |
| "learning_rate": 4.378980891719746e-06, |
| "loss": 0.0075, |
| "step": 275 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.8942775726318, |
| "epoch": 0.33653013056078396, |
| "grad_norm": 0.03229549527168274, |
| "kl": 0.003350973129272461, |
| "learning_rate": 4.394904458598727e-06, |
| "loss": 0.0057, |
| "num_tokens": 130099677.0, |
| "reward": 0.04966518087894656, |
| "reward_std": 0.060705700190737844, |
| "rewards/pure_accuracy_reward_math": 0.049665179773001, |
| "step": 276 |
| }, |
| { |
| "clip_ratio": 0.00025271691475836633, |
| "epoch": 0.3384422335753339, |
| "grad_norm": 0.03214692696928978, |
| "kl": 0.0033435821533203125, |
| "learning_rate": 4.410828025477708e-06, |
| "loss": 0.0057, |
| "step": 277 |
| }, |
| { |
| "clip_ratio": 0.00023837689644778948, |
| "epoch": 0.3403543365898838, |
| "grad_norm": 0.03055053949356079, |
| "kl": 0.003403902053833008, |
| "learning_rate": 4.426751592356688e-06, |
| "loss": 0.0057, |
| "step": 278 |
| }, |
| { |
| "clip_ratio": 0.0002586998209039848, |
| "epoch": 0.3422664396044337, |
| "grad_norm": 0.030119990929961205, |
| "kl": 0.003477334976196289, |
| "learning_rate": 4.442675159235669e-06, |
| "loss": 0.0057, |
| "step": 279 |
| }, |
| { |
| "clip_ratio": 0.00026621688834893575, |
| "epoch": 0.3441785426189836, |
| "grad_norm": 0.030735207721590996, |
| "kl": 0.0035724639892578125, |
| "learning_rate": 4.45859872611465e-06, |
| "loss": 0.0056, |
| "step": 280 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 542.7466769218445, |
| "epoch": 0.34609064563353353, |
| "grad_norm": 0.033374350517988205, |
| "kl": 0.003545999526977539, |
| "learning_rate": 4.474522292993631e-06, |
| "loss": 0.0036, |
| "num_tokens": 133773381.0, |
| "reward": 0.051339288300368935, |
| "reward_std": 0.06345581240020692, |
| "rewards/pure_accuracy_reward_math": 0.05133928690338507, |
| "step": 281 |
| }, |
| { |
| "clip_ratio": 0.0002734534241994879, |
| "epoch": 0.3480027486480834, |
| "grad_norm": 0.03312847390770912, |
| "kl": 0.0035567283630371094, |
| "learning_rate": 4.490445859872612e-06, |
| "loss": 0.0036, |
| "step": 282 |
| }, |
| { |
| "clip_ratio": 0.00022532319422907676, |
| "epoch": 0.3499148516626333, |
| "grad_norm": 0.03281605243682861, |
| "kl": 0.0035707950592041016, |
| "learning_rate": 4.506369426751593e-06, |
| "loss": 0.0035, |
| "step": 283 |
| }, |
| { |
| "clip_ratio": 0.0002544033526419298, |
| "epoch": 0.3518269546771832, |
| "grad_norm": 0.032299675047397614, |
| "kl": 0.003595113754272461, |
| "learning_rate": 4.522292993630574e-06, |
| "loss": 0.0035, |
| "step": 284 |
| }, |
| { |
| "clip_ratio": 0.00024219880805276262, |
| "epoch": 0.35373905769173314, |
| "grad_norm": 0.031959276646375656, |
| "kl": 0.0035622119903564453, |
| "learning_rate": 4.538216560509555e-06, |
| "loss": 0.0035, |
| "step": 285 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.3122510910034, |
| "epoch": 0.35565116070628305, |
| "grad_norm": 0.035966720432043076, |
| "kl": 0.003755331039428711, |
| "learning_rate": 4.554140127388535e-06, |
| "loss": 0.0076, |
| "num_tokens": 137425032.0, |
| "reward": 0.05524553809664212, |
| "reward_std": 0.07191267621237785, |
| "rewards/pure_accuracy_reward_math": 0.055245536990696564, |
| "step": 286 |
| }, |
| { |
| "clip_ratio": 0.00029696975889237365, |
| "epoch": 0.35756326372083297, |
| "grad_norm": 0.03485076501965523, |
| "kl": 0.0036923885345458984, |
| "learning_rate": 4.570063694267516e-06, |
| "loss": 0.0076, |
| "step": 287 |
| }, |
| { |
| "clip_ratio": 0.0003252405772968814, |
| "epoch": 0.3594753667353829, |
| "grad_norm": 0.03465472534298897, |
| "kl": 0.003720998764038086, |
| "learning_rate": 4.585987261146497e-06, |
| "loss": 0.0076, |
| "step": 288 |
| }, |
| { |
| "clip_ratio": 0.0003269365803362234, |
| "epoch": 0.3613874697499328, |
| "grad_norm": 0.033384956419467926, |
| "kl": 0.003762483596801758, |
| "learning_rate": 4.601910828025479e-06, |
| "loss": 0.0075, |
| "step": 289 |
| }, |
| { |
| "clip_ratio": 0.0003269619904813226, |
| "epoch": 0.36329957276448266, |
| "grad_norm": 0.03343256562948227, |
| "kl": 0.0037889480590820312, |
| "learning_rate": 4.617834394904459e-06, |
| "loss": 0.0075, |
| "step": 290 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 533.6155371665955, |
| "epoch": 0.3652116757790326, |
| "grad_norm": 0.035127099603414536, |
| "kl": 0.0037310123443603516, |
| "learning_rate": 4.63375796178344e-06, |
| "loss": 0.0084, |
| "num_tokens": 141070278.0, |
| "reward": 0.05580357421422377, |
| "reward_std": 0.06861072615720332, |
| "rewards/pure_accuracy_reward_math": 0.05580357281723991, |
| "step": 291 |
| }, |
| { |
| "clip_ratio": 0.00026876470258230256, |
| "epoch": 0.3671237787935825, |
| "grad_norm": 0.034193847328424454, |
| "kl": 0.0037539005279541016, |
| "learning_rate": 4.649681528662421e-06, |
| "loss": 0.0084, |
| "step": 292 |
| }, |
| { |
| "clip_ratio": 0.00024497293054537295, |
| "epoch": 0.3690358818081324, |
| "grad_norm": 0.033800724893808365, |
| "kl": 0.0037734508514404297, |
| "learning_rate": 4.665605095541402e-06, |
| "loss": 0.0084, |
| "step": 293 |
| }, |
| { |
| "clip_ratio": 0.0002538224067620831, |
| "epoch": 0.3709479848226823, |
| "grad_norm": 0.03376767784357071, |
| "kl": 0.003782033920288086, |
| "learning_rate": 4.6815286624203824e-06, |
| "loss": 0.0083, |
| "step": 294 |
| }, |
| { |
| "clip_ratio": 0.00027697558522277177, |
| "epoch": 0.37286008783723223, |
| "grad_norm": 0.03229675441980362, |
| "kl": 0.003787994384765625, |
| "learning_rate": 4.697452229299363e-06, |
| "loss": 0.0083, |
| "step": 295 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.5739660263062, |
| "epoch": 0.37477219085178215, |
| "grad_norm": 0.035769619047641754, |
| "kl": 0.0037794113159179688, |
| "learning_rate": 4.713375796178344e-06, |
| "loss": 0.0057, |
| "num_tokens": 144715023.0, |
| "reward": 0.05915178812574595, |
| "reward_std": 0.07096926274243742, |
| "rewards/pure_accuracy_reward_math": 0.059151787019800395, |
| "step": 296 |
| }, |
| { |
| "clip_ratio": 0.00030428163654505624, |
| "epoch": 0.37668429386633207, |
| "grad_norm": 0.035648081451654434, |
| "kl": 0.003717660903930664, |
| "learning_rate": 4.729299363057326e-06, |
| "loss": 0.0057, |
| "step": 297 |
| }, |
| { |
| "clip_ratio": 0.00029741515106707084, |
| "epoch": 0.378596396880882, |
| "grad_norm": 0.03551783785223961, |
| "kl": 0.0036716461181640625, |
| "learning_rate": 4.745222929936306e-06, |
| "loss": 0.0057, |
| "step": 298 |
| }, |
| { |
| "clip_ratio": 0.0003008591765478741, |
| "epoch": 0.38050849989543184, |
| "grad_norm": 0.03452136367559433, |
| "kl": 0.0036542415618896484, |
| "learning_rate": 4.761146496815287e-06, |
| "loss": 0.0056, |
| "step": 299 |
| }, |
| { |
| "clip_ratio": 0.00032588979291858777, |
| "epoch": 0.38242060290998175, |
| "grad_norm": 0.03325437009334564, |
| "kl": 0.003694295883178711, |
| "learning_rate": 4.777070063694268e-06, |
| "loss": 0.0056, |
| "step": 300 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.2416524887085, |
| "epoch": 0.38433270592453167, |
| "grad_norm": 0.04327908158302307, |
| "kl": 0.004815816879272461, |
| "learning_rate": 4.792993630573249e-06, |
| "loss": 0.0041, |
| "num_tokens": 148307505.0, |
| "reward": 0.05329241341678426, |
| "reward_std": 0.061954362492542714, |
| "rewards/pure_accuracy_reward_math": 0.0532924123108387, |
| "step": 301 |
| }, |
| { |
| "clip_ratio": 0.0002521659018839273, |
| "epoch": 0.3862448089390816, |
| "grad_norm": 0.041329506784677505, |
| "kl": 0.004758596420288086, |
| "learning_rate": 4.8089171974522295e-06, |
| "loss": 0.0041, |
| "step": 302 |
| }, |
| { |
| "clip_ratio": 0.0002661041191913682, |
| "epoch": 0.3881569119536315, |
| "grad_norm": 0.03914090245962143, |
| "kl": 0.0045318603515625, |
| "learning_rate": 4.82484076433121e-06, |
| "loss": 0.0041, |
| "step": 303 |
| }, |
| { |
| "clip_ratio": 0.0002647961523507547, |
| "epoch": 0.3900690149681814, |
| "grad_norm": 0.0363956093788147, |
| "kl": 0.0043642520904541016, |
| "learning_rate": 4.840764331210192e-06, |
| "loss": 0.004, |
| "step": 304 |
| }, |
| { |
| "clip_ratio": 0.00030025097066754824, |
| "epoch": 0.39198111798273133, |
| "grad_norm": 0.05623022839426994, |
| "kl": 0.00441288948059082, |
| "learning_rate": 4.856687898089173e-06, |
| "loss": 0.004, |
| "step": 305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.0496897697449, |
| "epoch": 0.39389322099728125, |
| "grad_norm": 0.03662995249032974, |
| "kl": 0.0038270950317382812, |
| "learning_rate": 4.872611464968153e-06, |
| "loss": 0.0077, |
| "num_tokens": 151936939.0, |
| "reward": 0.0560825914144516, |
| "reward_std": 0.061781705473549664, |
| "rewards/pure_accuracy_reward_math": 0.05608259071595967, |
| "step": 306 |
| }, |
| { |
| "clip_ratio": 0.00025576306325092446, |
| "epoch": 0.39580532401183116, |
| "grad_norm": 0.03553188219666481, |
| "kl": 0.00376129150390625, |
| "learning_rate": 4.888535031847134e-06, |
| "loss": 0.0076, |
| "step": 307 |
| }, |
| { |
| "clip_ratio": 0.00027371336784653977, |
| "epoch": 0.397717427026381, |
| "grad_norm": 0.035399794578552246, |
| "kl": 0.0036725997924804688, |
| "learning_rate": 4.904458598726115e-06, |
| "loss": 0.0076, |
| "step": 308 |
| }, |
| { |
| "clip_ratio": 0.0002955471370569285, |
| "epoch": 0.39962953004093094, |
| "grad_norm": 0.03487352281808853, |
| "kl": 0.003664731979370117, |
| "learning_rate": 4.920382165605096e-06, |
| "loss": 0.0076, |
| "step": 309 |
| }, |
| { |
| "clip_ratio": 0.00030850259520320833, |
| "epoch": 0.40154163305548085, |
| "grad_norm": 0.03433185815811157, |
| "kl": 0.003676176071166992, |
| "learning_rate": 4.9363057324840765e-06, |
| "loss": 0.0075, |
| "step": 310 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.8312191963196, |
| "epoch": 0.40345373607003077, |
| "grad_norm": 0.03824182599782944, |
| "kl": 0.003762483596801758, |
| "learning_rate": 4.952229299363058e-06, |
| "loss": 0.0062, |
| "num_tokens": 155550782.0, |
| "reward": 0.05496652075089514, |
| "reward_std": 0.0689961050520651, |
| "rewards/pure_accuracy_reward_math": 0.0549665194703266, |
| "step": 311 |
| }, |
| { |
| "clip_ratio": 0.0002548059320588436, |
| "epoch": 0.4053658390845807, |
| "grad_norm": 0.036028265953063965, |
| "kl": 0.003760099411010742, |
| "learning_rate": 4.968152866242039e-06, |
| "loss": 0.0062, |
| "step": 312 |
| }, |
| { |
| "clip_ratio": 0.00029642158040132927, |
| "epoch": 0.4072779420991306, |
| "grad_norm": 0.03537724167108536, |
| "kl": 0.0038378238677978516, |
| "learning_rate": 4.98407643312102e-06, |
| "loss": 0.0062, |
| "step": 313 |
| }, |
| { |
| "clip_ratio": 0.00030970463706125884, |
| "epoch": 0.4091900451136805, |
| "grad_norm": 0.03521754965186119, |
| "kl": 0.003871440887451172, |
| "learning_rate": 5e-06, |
| "loss": 0.0062, |
| "step": 314 |
| }, |
| { |
| "clip_ratio": 0.000315766970174991, |
| "epoch": 0.4111021481282304, |
| "grad_norm": 0.034070126712322235, |
| "kl": 0.0037851333618164062, |
| "learning_rate": 4.999992129526286e-06, |
| "loss": 0.0061, |
| "step": 315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.3727917671204, |
| "epoch": 0.41301425114278034, |
| "grad_norm": 0.12440560013055801, |
| "kl": 0.005699872970581055, |
| "learning_rate": 4.999968518154701e-06, |
| "loss": 0.0041, |
| "num_tokens": 159174918.0, |
| "reward": 0.05050223457510583, |
| "reward_std": 0.06435916194459423, |
| "rewards/pure_accuracy_reward_math": 0.050502233527367935, |
| "step": 316 |
| }, |
| { |
| "clip_ratio": 0.0002532021657657424, |
| "epoch": 0.4149263541573302, |
| "grad_norm": 0.05440036952495575, |
| "kl": 0.005144357681274414, |
| "learning_rate": 4.99992916603391e-06, |
| "loss": 0.004, |
| "step": 317 |
| }, |
| { |
| "clip_ratio": 0.00025051761485883617, |
| "epoch": 0.4168384571718801, |
| "grad_norm": 0.051424141973257065, |
| "kl": 0.005103111267089844, |
| "learning_rate": 4.999874073411688e-06, |
| "loss": 0.004, |
| "step": 318 |
| }, |
| { |
| "clip_ratio": 0.0002561948363677402, |
| "epoch": 0.41875056018643003, |
| "grad_norm": 0.06930891424417496, |
| "kl": 0.004969120025634766, |
| "learning_rate": 4.9998032406349205e-06, |
| "loss": 0.0039, |
| "step": 319 |
| }, |
| { |
| "clip_ratio": 0.0002573228107394243, |
| "epoch": 0.42066266320097995, |
| "grad_norm": 0.06900722533464432, |
| "kl": 0.004853248596191406, |
| "learning_rate": 4.9997166681495975e-06, |
| "loss": 0.0039, |
| "step": 320 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.6638069152832, |
| "epoch": 0.42257476621552986, |
| "grad_norm": 0.03829098492860794, |
| "kl": 0.0038361549377441406, |
| "learning_rate": 4.999614356500811e-06, |
| "loss": 0.0072, |
| "num_tokens": 162764497.0, |
| "reward": 0.06110491356230341, |
| "reward_std": 0.07393209857400507, |
| "rewards/pure_accuracy_reward_math": 0.06110491222352721, |
| "step": 321 |
| }, |
| { |
| "clip_ratio": 0.0002886460991931017, |
| "epoch": 0.4244868692300798, |
| "grad_norm": 0.03761793673038483, |
| "kl": 0.0038406848907470703, |
| "learning_rate": 4.999496306332755e-06, |
| "loss": 0.0072, |
| "step": 322 |
| }, |
| { |
| "clip_ratio": 0.00029219654425105546, |
| "epoch": 0.4263989722446297, |
| "grad_norm": 0.03714153915643692, |
| "kl": 0.003914356231689453, |
| "learning_rate": 4.999362518388718e-06, |
| "loss": 0.0071, |
| "step": 323 |
| }, |
| { |
| "clip_ratio": 0.0003099845329757045, |
| "epoch": 0.4283110752591796, |
| "grad_norm": 0.03610815480351448, |
| "kl": 0.0039288997650146484, |
| "learning_rate": 4.99921299351108e-06, |
| "loss": 0.0071, |
| "step": 324 |
| }, |
| { |
| "clip_ratio": 0.0003404705674370234, |
| "epoch": 0.4302231782737295, |
| "grad_norm": 0.03599926084280014, |
| "kl": 0.003935813903808594, |
| "learning_rate": 4.999047732641305e-06, |
| "loss": 0.007, |
| "step": 325 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 510.4832811355591, |
| "epoch": 0.4321352812882794, |
| "grad_norm": 0.04078551381826401, |
| "kl": 0.003900766372680664, |
| "learning_rate": 4.998866736819938e-06, |
| "loss": 0.0063, |
| "num_tokens": 166324161.0, |
| "reward": 0.059151788242161274, |
| "reward_std": 0.07354671962093562, |
| "rewards/pure_accuracy_reward_math": 0.05915178725263104, |
| "step": 326 |
| }, |
| { |
| "clip_ratio": 0.00026936357801332633, |
| "epoch": 0.4340473843028293, |
| "grad_norm": 0.03855260834097862, |
| "kl": 0.003957986831665039, |
| "learning_rate": 4.998670007186599e-06, |
| "loss": 0.0063, |
| "step": 327 |
| }, |
| { |
| "clip_ratio": 0.0002843770836875592, |
| "epoch": 0.4359594873173792, |
| "grad_norm": 0.03724536672234535, |
| "kl": 0.0039751529693603516, |
| "learning_rate": 4.998457544979971e-06, |
| "loss": 0.0062, |
| "step": 328 |
| }, |
| { |
| "clip_ratio": 0.0003156123698886404, |
| "epoch": 0.43787159033192913, |
| "grad_norm": 0.03662634268403053, |
| "kl": 0.0040798187255859375, |
| "learning_rate": 4.998229351537797e-06, |
| "loss": 0.0062, |
| "step": 329 |
| }, |
| { |
| "clip_ratio": 0.0003457550078564964, |
| "epoch": 0.43978369334647904, |
| "grad_norm": 0.03598077967762947, |
| "kl": 0.004061460494995117, |
| "learning_rate": 4.997985428296869e-06, |
| "loss": 0.0061, |
| "step": 330 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.4207811355591, |
| "epoch": 0.44169579636102896, |
| "grad_norm": 0.08678283542394638, |
| "kl": 0.008905410766601562, |
| "learning_rate": 4.997725776793021e-06, |
| "loss": 0.0058, |
| "num_tokens": 169950285.0, |
| "reward": 0.05636160948779434, |
| "reward_std": 0.07148723275167868, |
| "rewards/pure_accuracy_reward_math": 0.05636160867288709, |
| "step": 331 |
| }, |
| { |
| "clip_ratio": 0.00029096677934603576, |
| "epoch": 0.4436078993755789, |
| "grad_norm": 0.09512893110513687, |
| "kl": 0.007820606231689453, |
| "learning_rate": 4.997450398661117e-06, |
| "loss": 0.0058, |
| "step": 332 |
| }, |
| { |
| "clip_ratio": 0.00029938158724007735, |
| "epoch": 0.4455200023901288, |
| "grad_norm": 0.24316293001174927, |
| "kl": 0.007544517517089844, |
| "learning_rate": 4.9971592956350405e-06, |
| "loss": 0.0057, |
| "step": 333 |
| }, |
| { |
| "clip_ratio": 0.00032061134919558754, |
| "epoch": 0.4474321054046787, |
| "grad_norm": 0.07169396430253983, |
| "kl": 0.006528377532958984, |
| "learning_rate": 4.996852469547688e-06, |
| "loss": 0.0057, |
| "step": 334 |
| }, |
| { |
| "clip_ratio": 0.00034978831735088534, |
| "epoch": 0.44934420841922856, |
| "grad_norm": 0.06073050945997238, |
| "kl": 0.0060198307037353516, |
| "learning_rate": 4.996529922330954e-06, |
| "loss": 0.0056, |
| "step": 335 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.8259167671204, |
| "epoch": 0.4512563114337785, |
| "grad_norm": 0.034031759947538376, |
| "kl": 0.0037636756896972656, |
| "learning_rate": 4.996191656015715e-06, |
| "loss": 0.0063, |
| "num_tokens": 173606605.0, |
| "reward": 0.05273437770665623, |
| "reward_std": 0.061655311612412333, |
| "rewards/pure_accuracy_reward_math": 0.05273437625146471, |
| "step": 336 |
| }, |
| { |
| "clip_ratio": 0.0002175188884052659, |
| "epoch": 0.4531684144483284, |
| "grad_norm": 0.03333257883787155, |
| "kl": 0.0038194656372070312, |
| "learning_rate": 4.995837672731827e-06, |
| "loss": 0.0063, |
| "step": 337 |
| }, |
| { |
| "clip_ratio": 0.00022021491247414815, |
| "epoch": 0.4550805174628783, |
| "grad_norm": 0.032678041607141495, |
| "kl": 0.0038101673126220703, |
| "learning_rate": 4.9954679747081e-06, |
| "loss": 0.0063, |
| "step": 338 |
| }, |
| { |
| "clip_ratio": 0.000264580338352971, |
| "epoch": 0.4569926204774282, |
| "grad_norm": 0.032030362635850906, |
| "kl": 0.0037910938262939453, |
| "learning_rate": 4.995082564272295e-06, |
| "loss": 0.0062, |
| "step": 339 |
| }, |
| { |
| "clip_ratio": 0.00027159255438391483, |
| "epoch": 0.45890472349197814, |
| "grad_norm": 0.031298909336328506, |
| "kl": 0.0038001537322998047, |
| "learning_rate": 4.994681443851102e-06, |
| "loss": 0.0062, |
| "step": 340 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.6174931526184, |
| "epoch": 0.46081682650652805, |
| "grad_norm": 0.04015278443694115, |
| "kl": 0.004010200500488281, |
| "learning_rate": 4.994264615970126e-06, |
| "loss": 0.0062, |
| "num_tokens": 177226454.0, |
| "reward": 0.056361609895247966, |
| "reward_std": 0.06633232033345848, |
| "rewards/pure_accuracy_reward_math": 0.05636160867288709, |
| "step": 341 |
| }, |
| { |
| "clip_ratio": 0.00026669438159387937, |
| "epoch": 0.46272892952107797, |
| "grad_norm": 0.03813392296433449, |
| "kl": 0.0039997100830078125, |
| "learning_rate": 4.993832083253874e-06, |
| "loss": 0.0062, |
| "step": 342 |
| }, |
| { |
| "clip_ratio": 0.0003048689098363866, |
| "epoch": 0.46464103253562783, |
| "grad_norm": 0.03776548057794571, |
| "kl": 0.004065752029418945, |
| "learning_rate": 4.993383848425736e-06, |
| "loss": 0.0061, |
| "step": 343 |
| }, |
| { |
| "clip_ratio": 0.0003051352168768062, |
| "epoch": 0.46655313555017774, |
| "grad_norm": 0.03955227509140968, |
| "kl": 0.0041925907135009766, |
| "learning_rate": 4.992919914307969e-06, |
| "loss": 0.0061, |
| "step": 344 |
| }, |
| { |
| "clip_ratio": 0.00030118576887616655, |
| "epoch": 0.46846523856472766, |
| "grad_norm": 0.036648593842983246, |
| "kl": 0.00420832633972168, |
| "learning_rate": 4.992440283821676e-06, |
| "loss": 0.006, |
| "step": 345 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.5131411552429, |
| "epoch": 0.4703773415792776, |
| "grad_norm": 13.381791114807129, |
| "kl": 0.1310877799987793, |
| "learning_rate": 4.991944959986793e-06, |
| "loss": 0.018, |
| "num_tokens": 180852413.0, |
| "reward": 0.06138393163564615, |
| "reward_std": 0.07144096971023828, |
| "rewards/pure_accuracy_reward_math": 0.061383930064039305, |
| "step": 346 |
| }, |
| { |
| "clip_ratio": 0.00030088673440786806, |
| "epoch": 0.4722894445938275, |
| "grad_norm": 1.359532356262207, |
| "kl": 0.01866316795349121, |
| "learning_rate": 4.991433945922068e-06, |
| "loss": 0.0135, |
| "step": 347 |
| }, |
| { |
| "clip_ratio": 0.0003527746957843192, |
| "epoch": 0.4742015476083774, |
| "grad_norm": 0.050763800740242004, |
| "kl": 0.005962371826171875, |
| "learning_rate": 4.9909072448450386e-06, |
| "loss": 0.013, |
| "step": 348 |
| }, |
| { |
| "clip_ratio": 0.0003426602560239189, |
| "epoch": 0.4761136506229273, |
| "grad_norm": 0.0476795993745327, |
| "kl": 0.006250858306884766, |
| "learning_rate": 4.990364860072014e-06, |
| "loss": 0.013, |
| "step": 349 |
| }, |
| { |
| "clip_ratio": 0.00033057811066328213, |
| "epoch": 0.47802575363747724, |
| "grad_norm": 0.04783082380890846, |
| "kl": 0.0066144466400146484, |
| "learning_rate": 4.989806795018054e-06, |
| "loss": 0.013, |
| "step": 350 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.409900188446, |
| "epoch": 0.47993785665202715, |
| "grad_norm": 0.036505699157714844, |
| "kl": 0.0040128231048583984, |
| "learning_rate": 4.989233053196948e-06, |
| "loss": 0.0024, |
| "num_tokens": 184454394.0, |
| "reward": 0.04771205602446571, |
| "reward_std": 0.05920424917712808, |
| "rewards/pure_accuracy_reward_math": 0.047712054976727813, |
| "step": 351 |
| }, |
| { |
| "clip_ratio": 0.00023261837060317703, |
| "epoch": 0.481849959666577, |
| "grad_norm": 0.037214819341897964, |
| "kl": 0.004108428955078125, |
| "learning_rate": 4.988643638221193e-06, |
| "loss": 0.0024, |
| "step": 352 |
| }, |
| { |
| "clip_ratio": 0.0002573013600795093, |
| "epoch": 0.4837620626811269, |
| "grad_norm": 0.03702811896800995, |
| "kl": 0.004202127456665039, |
| "learning_rate": 4.9880385538019665e-06, |
| "loss": 0.0024, |
| "step": 353 |
| }, |
| { |
| "clip_ratio": 0.0002758479482167786, |
| "epoch": 0.48567416569567684, |
| "grad_norm": 0.03838437795639038, |
| "kl": 0.004250764846801758, |
| "learning_rate": 4.987417803749112e-06, |
| "loss": 0.0023, |
| "step": 354 |
| }, |
| { |
| "clip_ratio": 0.00024451872050690326, |
| "epoch": 0.48758626871022676, |
| "grad_norm": 0.035314518958330154, |
| "kl": 0.00424647331237793, |
| "learning_rate": 4.986781391971105e-06, |
| "loss": 0.0023, |
| "step": 355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.8789310455322, |
| "epoch": 0.48949837172477667, |
| "grad_norm": 0.038822874426841736, |
| "kl": 0.004703998565673828, |
| "learning_rate": 4.986129322475037e-06, |
| "loss": 0.006, |
| "num_tokens": 188061244.0, |
| "reward": 0.05887277075089514, |
| "reward_std": 0.0715272988891229, |
| "rewards/pure_accuracy_reward_math": 0.058872769062872976, |
| "step": 356 |
| }, |
| { |
| "clip_ratio": 0.0003040988601696881, |
| "epoch": 0.4914104747393266, |
| "grad_norm": 0.03750370442867279, |
| "kl": 0.004604816436767578, |
| "learning_rate": 4.985461599366583e-06, |
| "loss": 0.006, |
| "step": 357 |
| }, |
| { |
| "clip_ratio": 0.0003311016299676339, |
| "epoch": 0.4933225777538765, |
| "grad_norm": 0.03735021874308586, |
| "kl": 0.004613637924194336, |
| "learning_rate": 4.984778226849983e-06, |
| "loss": 0.0059, |
| "step": 358 |
| }, |
| { |
| "clip_ratio": 0.00031427563314423423, |
| "epoch": 0.4952346807684264, |
| "grad_norm": 0.037090424448251724, |
| "kl": 0.00463104248046875, |
| "learning_rate": 4.984079209228007e-06, |
| "loss": 0.0059, |
| "step": 359 |
| }, |
| { |
| "clip_ratio": 0.0003153682554284387, |
| "epoch": 0.49714678378297633, |
| "grad_norm": 0.03496375307440758, |
| "kl": 0.004604816436767578, |
| "learning_rate": 4.983364550901936e-06, |
| "loss": 0.0058, |
| "step": 360 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.5016980171204, |
| "epoch": 0.4990588867975262, |
| "grad_norm": 1978.1619873046875, |
| "kl": 5.663617134094238, |
| "learning_rate": 4.982634256371529e-06, |
| "loss": 0.2313, |
| "num_tokens": 191670522.0, |
| "reward": 0.05943080599536188, |
| "reward_std": 0.06242607004242018, |
| "rewards/pure_accuracy_reward_math": 0.059430805064039305, |
| "step": 361 |
| }, |
| { |
| "clip_ratio": 0.0003008291907349303, |
| "epoch": 0.5009709898120761, |
| "grad_norm": 6.705481052398682, |
| "kl": 0.07292413711547852, |
| "learning_rate": 4.981888330234998e-06, |
| "loss": 0.0076, |
| "step": 362 |
| }, |
| { |
| "clip_ratio": 0.00038137949604788446, |
| "epoch": 0.502883092826626, |
| "grad_norm": 0.4056338369846344, |
| "kl": 0.013193130493164062, |
| "learning_rate": 4.981126777188976e-06, |
| "loss": 0.0053, |
| "step": 363 |
| }, |
| { |
| "clip_ratio": 0.00039371675529764616, |
| "epoch": 0.5047951958411759, |
| "grad_norm": 0.40032151341438293, |
| "kl": 0.009969472885131836, |
| "learning_rate": 4.980349602028489e-06, |
| "loss": 0.0052, |
| "step": 364 |
| }, |
| { |
| "clip_ratio": 0.0003270253398568457, |
| "epoch": 0.5067072988557259, |
| "grad_norm": 0.08224909007549286, |
| "kl": 0.010345458984375, |
| "learning_rate": 4.979556809646928e-06, |
| "loss": 0.0051, |
| "step": 365 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.6082878112793, |
| "epoch": 0.5086194018702758, |
| "grad_norm": 0.036373648792505264, |
| "kl": 0.003941535949707031, |
| "learning_rate": 4.978748405036014e-06, |
| "loss": 0.0071, |
| "num_tokens": 195317270.0, |
| "reward": 0.05552455584984273, |
| "reward_std": 0.06775363947963342, |
| "rewards/pure_accuracy_reward_math": 0.05552455486031249, |
| "step": 366 |
| }, |
| { |
| "clip_ratio": 0.00027453447256675645, |
| "epoch": 0.5105315048848257, |
| "grad_norm": 0.03525104746222496, |
| "kl": 0.0039365291595458984, |
| "learning_rate": 4.977924393285767e-06, |
| "loss": 0.0072, |
| "step": 367 |
| }, |
| { |
| "clip_ratio": 0.0003015769660521528, |
| "epoch": 0.5124436078993756, |
| "grad_norm": 0.03737647458910942, |
| "kl": 0.0039522647857666016, |
| "learning_rate": 4.977084779584479e-06, |
| "loss": 0.0071, |
| "step": 368 |
| }, |
| { |
| "clip_ratio": 0.0002889172319555655, |
| "epoch": 0.5143557109139255, |
| "grad_norm": 0.03506501764059067, |
| "kl": 0.0039052963256835938, |
| "learning_rate": 4.976229569218676e-06, |
| "loss": 0.0071, |
| "step": 369 |
| }, |
| { |
| "clip_ratio": 0.0002910121094146234, |
| "epoch": 0.5162678139284754, |
| "grad_norm": 0.03558839485049248, |
| "kl": 0.003898143768310547, |
| "learning_rate": 4.975358767573085e-06, |
| "loss": 0.007, |
| "step": 370 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.1417660713196, |
| "epoch": 0.5181799169430253, |
| "grad_norm": 9.403284072875977, |
| "kl": 0.0705575942993164, |
| "learning_rate": 4.974472380130605e-06, |
| "loss": 0.0078, |
| "num_tokens": 198926094.0, |
| "reward": 0.06305803885334171, |
| "reward_std": 0.0737193762906827, |
| "rewards/pure_accuracy_reward_math": 0.06305803733994253, |
| "step": 371 |
| }, |
| { |
| "clip_ratio": 0.00028168898450076085, |
| "epoch": 0.5200920199575753, |
| "grad_norm": 0.10174906253814697, |
| "kl": 0.005540609359741211, |
| "learning_rate": 4.9735704124722665e-06, |
| "loss": 0.0053, |
| "step": 372 |
| }, |
| { |
| "clip_ratio": 0.00026055807722968893, |
| "epoch": 0.5220041229721252, |
| "grad_norm": 0.036394841969013214, |
| "kl": 0.004784584045410156, |
| "learning_rate": 4.9726528702771985e-06, |
| "loss": 0.0052, |
| "step": 373 |
| }, |
| { |
| "clip_ratio": 0.0003154287535949152, |
| "epoch": 0.523916225986675, |
| "grad_norm": 0.03702308237552643, |
| "kl": 0.004788875579833984, |
| "learning_rate": 4.971719759322596e-06, |
| "loss": 0.0052, |
| "step": 374 |
| }, |
| { |
| "clip_ratio": 0.000301387064496339, |
| "epoch": 0.5258283290012249, |
| "grad_norm": 0.03516030311584473, |
| "kl": 0.004770994186401367, |
| "learning_rate": 4.97077108548368e-06, |
| "loss": 0.0051, |
| "step": 375 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.7994132041931, |
| "epoch": 0.5277404320157748, |
| "grad_norm": 0.04183080792427063, |
| "kl": 0.006031513214111328, |
| "learning_rate": 4.969806854733658e-06, |
| "loss": 0.0091, |
| "num_tokens": 202522419.0, |
| "reward": 0.0638950924621895, |
| "reward_std": 0.07990403153235093, |
| "rewards/pure_accuracy_reward_math": 0.0638950903667137, |
| "step": 376 |
| }, |
| { |
| "clip_ratio": 0.00032519385399609746, |
| "epoch": 0.5296525350303247, |
| "grad_norm": 0.0407201424241066, |
| "kl": 0.005979061126708984, |
| "learning_rate": 4.968827073143694e-06, |
| "loss": 0.0091, |
| "step": 377 |
| }, |
| { |
| "clip_ratio": 0.00031682528469900717, |
| "epoch": 0.5315646380448746, |
| "grad_norm": 0.040043942630290985, |
| "kl": 0.005922555923461914, |
| "learning_rate": 4.967831746882863e-06, |
| "loss": 0.0091, |
| "step": 378 |
| }, |
| { |
| "clip_ratio": 0.00033513708405052967, |
| "epoch": 0.5334767410594246, |
| "grad_norm": 0.03983679041266441, |
| "kl": 0.005841970443725586, |
| "learning_rate": 4.966820882218118e-06, |
| "loss": 0.009, |
| "step": 379 |
| }, |
| { |
| "clip_ratio": 0.00034104771594911654, |
| "epoch": 0.5353888440739745, |
| "grad_norm": 0.03983955457806587, |
| "kl": 0.005755186080932617, |
| "learning_rate": 4.965794485514245e-06, |
| "loss": 0.0089, |
| "step": 380 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.5067186355591, |
| "epoch": 0.5373009470885244, |
| "grad_norm": 0.034092146903276443, |
| "kl": 0.0043926239013671875, |
| "learning_rate": 4.964752563233826e-06, |
| "loss": 0.008, |
| "num_tokens": 206122403.0, |
| "reward": 0.055803573748562485, |
| "reward_std": 0.05980854749213904, |
| "rewards/pure_accuracy_reward_math": 0.05580357275903225, |
| "step": 381 |
| }, |
| { |
| "clip_ratio": 0.00025422318708478997, |
| "epoch": 0.5392130501030743, |
| "grad_norm": 0.03263320028781891, |
| "kl": 0.0043218135833740234, |
| "learning_rate": 4.9636951219372e-06, |
| "loss": 0.008, |
| "step": 382 |
| }, |
| { |
| "clip_ratio": 0.00025885856206286917, |
| "epoch": 0.5411251531176242, |
| "grad_norm": 0.032487623393535614, |
| "kl": 0.004242420196533203, |
| "learning_rate": 4.962622168282416e-06, |
| "loss": 0.008, |
| "step": 383 |
| }, |
| { |
| "clip_ratio": 0.0002850476581102157, |
| "epoch": 0.5430372561321741, |
| "grad_norm": 0.032427769154310226, |
| "kl": 0.004185199737548828, |
| "learning_rate": 4.961533709025199e-06, |
| "loss": 0.0079, |
| "step": 384 |
| }, |
| { |
| "clip_ratio": 0.00029774147623129466, |
| "epoch": 0.544949359146724, |
| "grad_norm": 0.031092027202248573, |
| "kl": 0.004144430160522461, |
| "learning_rate": 4.960429751018901e-06, |
| "loss": 0.0079, |
| "step": 385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.9258050918579, |
| "epoch": 0.546861462161274, |
| "grad_norm": 0.6398438811302185, |
| "kl": 0.013398170471191406, |
| "learning_rate": 4.959310301214458e-06, |
| "loss": 0.0048, |
| "num_tokens": 209727833.0, |
| "reward": 0.06668527127476409, |
| "reward_std": 0.07586519059259444, |
| "rewards/pure_accuracy_reward_math": 0.06668526941211894, |
| "step": 386 |
| }, |
| { |
| "clip_ratio": 0.0002956847454242961, |
| "epoch": 0.5487735651758239, |
| "grad_norm": 0.09603609144687653, |
| "kl": 0.006535530090332031, |
| "learning_rate": 4.958175366660352e-06, |
| "loss": 0.0045, |
| "step": 387 |
| }, |
| { |
| "clip_ratio": 0.00032585520455086225, |
| "epoch": 0.5506856681903738, |
| "grad_norm": 0.042251698672771454, |
| "kl": 0.004881858825683594, |
| "learning_rate": 4.95702495450256e-06, |
| "loss": 0.0045, |
| "step": 388 |
| }, |
| { |
| "clip_ratio": 0.00030688931195754776, |
| "epoch": 0.5525977712049237, |
| "grad_norm": 0.03725959733128548, |
| "kl": 0.00462651252746582, |
| "learning_rate": 4.955859071984512e-06, |
| "loss": 0.0044, |
| "step": 389 |
| }, |
| { |
| "clip_ratio": 0.0002833517196449975, |
| "epoch": 0.5545098742194736, |
| "grad_norm": 0.03557269275188446, |
| "kl": 0.004591941833496094, |
| "learning_rate": 4.954677726447049e-06, |
| "loss": 0.0044, |
| "step": 390 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.50141954422, |
| "epoch": 0.5564219772340235, |
| "grad_norm": 0.03767434135079384, |
| "kl": 0.0041730403900146484, |
| "learning_rate": 4.953480925328369e-06, |
| "loss": 0.0053, |
| "num_tokens": 213359594.0, |
| "reward": 0.05636160998255946, |
| "reward_std": 0.06873711966909468, |
| "rewards/pure_accuracy_reward_math": 0.05636160829453729, |
| "step": 391 |
| }, |
| { |
| "clip_ratio": 0.0002943199858691514, |
| "epoch": 0.5583340802485733, |
| "grad_norm": 0.03691519424319267, |
| "kl": 0.004199981689453125, |
| "learning_rate": 4.952268676163984e-06, |
| "loss": 0.0053, |
| "step": 392 |
| }, |
| { |
| "clip_ratio": 0.00028674039270981666, |
| "epoch": 0.5602461832631233, |
| "grad_norm": 0.036044176667928696, |
| "kl": 0.004216432571411133, |
| "learning_rate": 4.951040986586676e-06, |
| "loss": 0.0053, |
| "step": 393 |
| }, |
| { |
| "clip_ratio": 0.0003071572371595721, |
| "epoch": 0.5621582862776732, |
| "grad_norm": 0.0358373187482357, |
| "kl": 0.004226207733154297, |
| "learning_rate": 4.949797864326442e-06, |
| "loss": 0.0053, |
| "step": 394 |
| }, |
| { |
| "clip_ratio": 0.000308680556543095, |
| "epoch": 0.5640703892922231, |
| "grad_norm": 0.0356404110789299, |
| "kl": 0.004263877868652344, |
| "learning_rate": 4.9485393172104525e-06, |
| "loss": 0.0052, |
| "step": 395 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.1506924629211, |
| "epoch": 0.565982492306773, |
| "grad_norm": 0.03425108641386032, |
| "kl": 0.004232645034790039, |
| "learning_rate": 4.947265353162997e-06, |
| "loss": 0.0047, |
| "num_tokens": 216984490.0, |
| "reward": 0.05831473466241732, |
| "reward_std": 0.06912249873857945, |
| "rewards/pure_accuracy_reward_math": 0.058314733556471765, |
| "step": 396 |
| }, |
| { |
| "clip_ratio": 0.0002443079777663115, |
| "epoch": 0.5678945953213229, |
| "grad_norm": 0.03406741842627525, |
| "kl": 0.004246950149536133, |
| "learning_rate": 4.945975980205435e-06, |
| "loss": 0.0046, |
| "step": 397 |
| }, |
| { |
| "clip_ratio": 0.00025582832455484095, |
| "epoch": 0.5698066983358728, |
| "grad_norm": 0.033892109990119934, |
| "kl": 0.004239320755004883, |
| "learning_rate": 4.944671206456148e-06, |
| "loss": 0.0046, |
| "step": 398 |
| }, |
| { |
| "clip_ratio": 0.0002801110364885062, |
| "epoch": 0.5717188013504227, |
| "grad_norm": 0.03294463828206062, |
| "kl": 0.0042018890380859375, |
| "learning_rate": 4.943351040130485e-06, |
| "loss": 0.0046, |
| "step": 399 |
| }, |
| { |
| "clip_ratio": 0.00030015600407296006, |
| "epoch": 0.5736309043649727, |
| "grad_norm": 0.03228214010596275, |
| "kl": 0.004125118255615234, |
| "learning_rate": 4.942015489540715e-06, |
| "loss": 0.0045, |
| "step": 400 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.8225684165955, |
| "epoch": 0.5755430073795226, |
| "grad_norm": 0.037567272782325745, |
| "kl": 0.005152702331542969, |
| "learning_rate": 4.94066456309597e-06, |
| "loss": 0.0071, |
| "num_tokens": 220604938.0, |
| "reward": 0.06166294886497781, |
| "reward_std": 0.07311507751001045, |
| "rewards/pure_accuracy_reward_math": 0.06166294764261693, |
| "step": 401 |
| }, |
| { |
| "clip_ratio": 0.0002694410874823916, |
| "epoch": 0.5774551103940725, |
| "grad_norm": 0.036373041570186615, |
| "kl": 0.005210161209106445, |
| "learning_rate": 4.939298269302194e-06, |
| "loss": 0.0071, |
| "step": 402 |
| }, |
| { |
| "clip_ratio": 0.0002891406058438406, |
| "epoch": 0.5793672134086224, |
| "grad_norm": 0.03582580015063286, |
| "kl": 0.0052187442779541016, |
| "learning_rate": 4.9379166167620915e-06, |
| "loss": 0.007, |
| "step": 403 |
| }, |
| { |
| "clip_ratio": 0.00030127688086167836, |
| "epoch": 0.5812793164231723, |
| "grad_norm": 0.035248763859272, |
| "kl": 0.005229949951171875, |
| "learning_rate": 4.93651961417507e-06, |
| "loss": 0.007, |
| "step": 404 |
| }, |
| { |
| "clip_ratio": 0.00031262176707969047, |
| "epoch": 0.5831914194377222, |
| "grad_norm": 0.03461577743291855, |
| "kl": 0.00519251823425293, |
| "learning_rate": 4.9351072703371885e-06, |
| "loss": 0.0069, |
| "step": 405 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.0067219734192, |
| "epoch": 0.5851035224522722, |
| "grad_norm": 0.0363302007317543, |
| "kl": 0.004278659820556641, |
| "learning_rate": 4.933679594141096e-06, |
| "loss": 0.0041, |
| "num_tokens": 224253906.0, |
| "reward": 0.06222098533180542, |
| "reward_std": 0.07462272536940873, |
| "rewards/pure_accuracy_reward_math": 0.06222098329453729, |
| "step": 406 |
| }, |
| { |
| "clip_ratio": 0.0002887690876320903, |
| "epoch": 0.5870156254668221, |
| "grad_norm": 0.03538454696536064, |
| "kl": 0.004297971725463867, |
| "learning_rate": 4.932236594575986e-06, |
| "loss": 0.0041, |
| "step": 407 |
| }, |
| { |
| "clip_ratio": 0.00029836769689950415, |
| "epoch": 0.588927728481372, |
| "grad_norm": 0.03521309420466423, |
| "kl": 0.004305362701416016, |
| "learning_rate": 4.9307782807275304e-06, |
| "loss": 0.0041, |
| "step": 408 |
| }, |
| { |
| "clip_ratio": 0.0003077857980144927, |
| "epoch": 0.5908398314959219, |
| "grad_norm": 0.03468110039830208, |
| "kl": 0.004298210144042969, |
| "learning_rate": 4.929304661777823e-06, |
| "loss": 0.0041, |
| "step": 409 |
| }, |
| { |
| "clip_ratio": 0.00030735837987094783, |
| "epoch": 0.5927519345104717, |
| "grad_norm": 0.03504593297839165, |
| "kl": 0.004282474517822266, |
| "learning_rate": 4.9278157470053305e-06, |
| "loss": 0.004, |
| "step": 410 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.0987973213196, |
| "epoch": 0.5946640375250216, |
| "grad_norm": 0.03893313929438591, |
| "kl": 0.004411935806274414, |
| "learning_rate": 4.926311545784823e-06, |
| "loss": 0.0081, |
| "num_tokens": 227887088.0, |
| "reward": 0.06138393160654232, |
| "reward_std": 0.07560620526783168, |
| "rewards/pure_accuracy_reward_math": 0.061383930034935474, |
| "step": 411 |
| }, |
| { |
| "clip_ratio": 0.0003015478255292692, |
| "epoch": 0.5965761405395715, |
| "grad_norm": 0.03745520859956741, |
| "kl": 0.004415750503540039, |
| "learning_rate": 4.924792067587321e-06, |
| "loss": 0.0081, |
| "step": 412 |
| }, |
| { |
| "clip_ratio": 0.00033068407248038056, |
| "epoch": 0.5984882435541214, |
| "grad_norm": 0.037219781428575516, |
| "kl": 0.004396915435791016, |
| "learning_rate": 4.923257321980036e-06, |
| "loss": 0.0081, |
| "step": 413 |
| }, |
| { |
| "clip_ratio": 0.00037280973344877566, |
| "epoch": 0.6004003465686714, |
| "grad_norm": 0.03754372149705887, |
| "kl": 0.0044384002685546875, |
| "learning_rate": 4.9217073186263075e-06, |
| "loss": 0.0081, |
| "step": 414 |
| }, |
| { |
| "clip_ratio": 0.0003646712993372603, |
| "epoch": 0.6023124495832213, |
| "grad_norm": 0.03602118790149689, |
| "kl": 0.004477262496948242, |
| "learning_rate": 4.920142067285544e-06, |
| "loss": 0.008, |
| "step": 415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 508.44282722473145, |
| "epoch": 0.6042245525977712, |
| "grad_norm": 0.039943527430295944, |
| "kl": 0.004469871520996094, |
| "learning_rate": 4.9185615778131614e-06, |
| "loss": 0.0078, |
| "num_tokens": 231443183.0, |
| "reward": 0.0705915211874526, |
| "reward_std": 0.07968511217040941, |
| "rewards/pure_accuracy_reward_math": 0.07059151926659979, |
| "step": 416 |
| }, |
| { |
| "clip_ratio": 0.00031770144798315414, |
| "epoch": 0.6061366556123211, |
| "grad_norm": 0.039055656641721725, |
| "kl": 0.004549264907836914, |
| "learning_rate": 4.916965860160521e-06, |
| "loss": 0.0078, |
| "step": 417 |
| }, |
| { |
| "clip_ratio": 0.00030108455553090607, |
| "epoch": 0.608048758626871, |
| "grad_norm": 0.03719799593091011, |
| "kl": 0.004551410675048828, |
| "learning_rate": 4.915354924374864e-06, |
| "loss": 0.0078, |
| "step": 418 |
| }, |
| { |
| "clip_ratio": 0.0003208976940527464, |
| "epoch": 0.6099608616414209, |
| "grad_norm": 0.03626833111047745, |
| "kl": 0.004576444625854492, |
| "learning_rate": 4.913728780599254e-06, |
| "loss": 0.0077, |
| "step": 419 |
| }, |
| { |
| "clip_ratio": 0.00030395733068644404, |
| "epoch": 0.6118729646559709, |
| "grad_norm": 0.035672470927238464, |
| "kl": 0.004616498947143555, |
| "learning_rate": 4.912087439072508e-06, |
| "loss": 0.0077, |
| "step": 420 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.3401436805725, |
| "epoch": 0.6137850676705208, |
| "grad_norm": 0.035979609936475754, |
| "kl": 0.004936695098876953, |
| "learning_rate": 4.9104309101291345e-06, |
| "loss": 0.008, |
| "num_tokens": 235040570.0, |
| "reward": 0.0558035739522893, |
| "reward_std": 0.06414644059259444, |
| "rewards/pure_accuracy_reward_math": 0.05580357278813608, |
| "step": 421 |
| }, |
| { |
| "clip_ratio": 0.0002606460908509689, |
| "epoch": 0.6156971706850707, |
| "grad_norm": 0.034824173897504807, |
| "kl": 0.004873991012573242, |
| "learning_rate": 4.908759204199268e-06, |
| "loss": 0.008, |
| "step": 422 |
| }, |
| { |
| "clip_ratio": 0.0002711625579081556, |
| "epoch": 0.6176092736996206, |
| "grad_norm": 0.034011878073215485, |
| "kl": 0.00480341911315918, |
| "learning_rate": 4.907072331808602e-06, |
| "loss": 0.008, |
| "step": 423 |
| }, |
| { |
| "clip_ratio": 0.0002719364555332504, |
| "epoch": 0.6195213767141705, |
| "grad_norm": 0.0330798402428627, |
| "kl": 0.00470733642578125, |
| "learning_rate": 4.905370303578324e-06, |
| "loss": 0.0079, |
| "step": 424 |
| }, |
| { |
| "clip_ratio": 0.0003164075427548596, |
| "epoch": 0.6214334797287204, |
| "grad_norm": 0.03356935828924179, |
| "kl": 0.004645586013793945, |
| "learning_rate": 4.903653130225049e-06, |
| "loss": 0.0079, |
| "step": 425 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.4051547050476, |
| "epoch": 0.6233455827432703, |
| "grad_norm": 0.037987031042575836, |
| "kl": 0.004395723342895508, |
| "learning_rate": 4.901920822560753e-06, |
| "loss": 0.004, |
| "num_tokens": 238650146.0, |
| "reward": 0.056082592491293326, |
| "reward_std": 0.06946781190345064, |
| "rewards/pure_accuracy_reward_math": 0.05608259033760987, |
| "step": 426 |
| }, |
| { |
| "clip_ratio": 0.0002752577877913609, |
| "epoch": 0.6252576857578201, |
| "grad_norm": 0.03711739555001259, |
| "kl": 0.0043413639068603516, |
| "learning_rate": 4.900173391492698e-06, |
| "loss": 0.004, |
| "step": 427 |
| }, |
| { |
| "clip_ratio": 0.0002780464546390249, |
| "epoch": 0.6271697887723701, |
| "grad_norm": 0.03583519160747528, |
| "kl": 0.004349231719970703, |
| "learning_rate": 4.898410848023374e-06, |
| "loss": 0.004, |
| "step": 428 |
| }, |
| { |
| "clip_ratio": 0.0002759867400072835, |
| "epoch": 0.62908189178692, |
| "grad_norm": 0.035115331411361694, |
| "kl": 0.0043909549713134766, |
| "learning_rate": 4.896633203250424e-06, |
| "loss": 0.0039, |
| "step": 429 |
| }, |
| { |
| "clip_ratio": 0.0002873923492074937, |
| "epoch": 0.6309939948014699, |
| "grad_norm": 0.03465187922120094, |
| "kl": 0.004460573196411133, |
| "learning_rate": 4.89484046836657e-06, |
| "loss": 0.0039, |
| "step": 430 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.1116304397583, |
| "epoch": 0.6329060978160198, |
| "grad_norm": 0.03591939061880112, |
| "kl": 0.004395723342895508, |
| "learning_rate": 4.893032654659554e-06, |
| "loss": 0.0068, |
| "num_tokens": 242275198.0, |
| "reward": 0.05859375320142135, |
| "reward_std": 0.06461814750218764, |
| "rewards/pure_accuracy_reward_math": 0.05859375110594556, |
| "step": 431 |
| }, |
| { |
| "clip_ratio": 0.00021255032419276176, |
| "epoch": 0.6348182008305697, |
| "grad_norm": 0.03488593176007271, |
| "kl": 0.0043849945068359375, |
| "learning_rate": 4.891209773512054e-06, |
| "loss": 0.0068, |
| "step": 432 |
| }, |
| { |
| "clip_ratio": 0.00023523596212271514, |
| "epoch": 0.6367303038451196, |
| "grad_norm": 0.03410722687840462, |
| "kl": 0.004419565200805664, |
| "learning_rate": 4.889371836401621e-06, |
| "loss": 0.0067, |
| "step": 433 |
| }, |
| { |
| "clip_ratio": 0.00024576090385153293, |
| "epoch": 0.6386424068596696, |
| "grad_norm": 0.03335421159863472, |
| "kl": 0.004421710968017578, |
| "learning_rate": 4.887518854900603e-06, |
| "loss": 0.0067, |
| "step": 434 |
| }, |
| { |
| "clip_ratio": 0.0002828803910119859, |
| "epoch": 0.6405545098742195, |
| "grad_norm": 0.03240649402141571, |
| "kl": 0.004340171813964844, |
| "learning_rate": 4.885650840676074e-06, |
| "loss": 0.0066, |
| "step": 435 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.2051043510437, |
| "epoch": 0.6424666128887694, |
| "grad_norm": 0.03588009625673294, |
| "kl": 0.0044574737548828125, |
| "learning_rate": 4.88376780548976e-06, |
| "loss": 0.0041, |
| "num_tokens": 245917009.0, |
| "reward": 0.05775669912691228, |
| "reward_std": 0.06611959752626717, |
| "rewards/pure_accuracy_reward_math": 0.05775669778813608, |
| "step": 436 |
| }, |
| { |
| "clip_ratio": 0.0002524082638899472, |
| "epoch": 0.6443787159033193, |
| "grad_norm": 0.03471923619508743, |
| "kl": 0.0044062137603759766, |
| "learning_rate": 4.881869761197963e-06, |
| "loss": 0.0041, |
| "step": 437 |
| }, |
| { |
| "clip_ratio": 0.0002889056303843063, |
| "epoch": 0.6462908189178692, |
| "grad_norm": 0.03379988297820091, |
| "kl": 0.004372119903564453, |
| "learning_rate": 4.879956719751491e-06, |
| "loss": 0.004, |
| "step": 438 |
| }, |
| { |
| "clip_ratio": 0.0003009145272017122, |
| "epoch": 0.6482029219324191, |
| "grad_norm": 0.03446533530950546, |
| "kl": 0.004400730133056641, |
| "learning_rate": 4.878028693195577e-06, |
| "loss": 0.004, |
| "step": 439 |
| }, |
| { |
| "clip_ratio": 0.00030466545126728306, |
| "epoch": 0.650115024946969, |
| "grad_norm": 0.03484022617340088, |
| "kl": 0.004462242126464844, |
| "learning_rate": 4.876085693669806e-06, |
| "loss": 0.0039, |
| "step": 440 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.0904240608215, |
| "epoch": 0.652027127961519, |
| "grad_norm": 0.0366295725107193, |
| "kl": 0.004509925842285156, |
| "learning_rate": 4.8741277334080405e-06, |
| "loss": 0.0066, |
| "num_tokens": 249502673.0, |
| "reward": 0.05719866382423788, |
| "reward_std": 0.06594694149680436, |
| "rewards/pure_accuracy_reward_math": 0.057198662078008056, |
| "step": 441 |
| }, |
| { |
| "clip_ratio": 0.00023539985437537325, |
| "epoch": 0.6539392309760689, |
| "grad_norm": 0.03590084984898567, |
| "kl": 0.0045740604400634766, |
| "learning_rate": 4.87215482473834e-06, |
| "loss": 0.0066, |
| "step": 442 |
| }, |
| { |
| "clip_ratio": 0.00022167488214108744, |
| "epoch": 0.6558513339906188, |
| "grad_norm": 0.03433714434504509, |
| "kl": 0.004676342010498047, |
| "learning_rate": 4.870166980082885e-06, |
| "loss": 0.0066, |
| "step": 443 |
| }, |
| { |
| "clip_ratio": 0.0002476425726172238, |
| "epoch": 0.6577634370051687, |
| "grad_norm": 0.03389691188931465, |
| "kl": 0.004789113998413086, |
| "learning_rate": 4.868164211957899e-06, |
| "loss": 0.0065, |
| "step": 444 |
| }, |
| { |
| "clip_ratio": 0.00025810993128061455, |
| "epoch": 0.6596755400197185, |
| "grad_norm": 0.03417885676026344, |
| "kl": 0.004879474639892578, |
| "learning_rate": 4.866146532973569e-06, |
| "loss": 0.0064, |
| "step": 445 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.3697214126587, |
| "epoch": 0.6615876430342684, |
| "grad_norm": 0.03560737892985344, |
| "kl": 0.00455927848815918, |
| "learning_rate": 4.864113955833967e-06, |
| "loss": 0.0056, |
| "num_tokens": 253104314.0, |
| "reward": 0.06584821722935885, |
| "reward_std": 0.07672227645525709, |
| "rewards/pure_accuracy_reward_math": 0.06584821565775201, |
| "step": 446 |
| }, |
| { |
| "clip_ratio": 0.00029780695723502504, |
| "epoch": 0.6634997460488183, |
| "grad_norm": 0.034836821258068085, |
| "kl": 0.0045278072357177734, |
| "learning_rate": 4.862066493336967e-06, |
| "loss": 0.0056, |
| "step": 447 |
| }, |
| { |
| "clip_ratio": 0.00030120932990485016, |
| "epoch": 0.6654118490633683, |
| "grad_norm": 0.03460467606782913, |
| "kl": 0.0045435428619384766, |
| "learning_rate": 4.860004158374172e-06, |
| "loss": 0.0055, |
| "step": 448 |
| }, |
| { |
| "clip_ratio": 0.000313081463019671, |
| "epoch": 0.6673239520779182, |
| "grad_norm": 0.03467562422156334, |
| "kl": 0.004552364349365234, |
| "learning_rate": 4.857926963930822e-06, |
| "loss": 0.0055, |
| "step": 449 |
| }, |
| { |
| "clip_ratio": 0.00031086072692687594, |
| "epoch": 0.6692360550924681, |
| "grad_norm": 0.03409102186560631, |
| "kl": 0.004626035690307617, |
| "learning_rate": 4.855834923085721e-06, |
| "loss": 0.0054, |
| "step": 450 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.4771447181702, |
| "epoch": 0.671148158107018, |
| "grad_norm": 0.03815117105841637, |
| "kl": 0.005002737045288086, |
| "learning_rate": 4.853728049011151e-06, |
| "loss": 0.0091, |
| "num_tokens": 256687388.0, |
| "reward": 0.06556919938884676, |
| "reward_std": 0.07874169782735407, |
| "rewards/pure_accuracy_reward_math": 0.06556919787544757, |
| "step": 451 |
| }, |
| { |
| "clip_ratio": 0.0003133106871473501, |
| "epoch": 0.6730602611215679, |
| "grad_norm": 0.03761136531829834, |
| "kl": 0.005041837692260742, |
| "learning_rate": 4.851606354972791e-06, |
| "loss": 0.0091, |
| "step": 452 |
| }, |
| { |
| "clip_ratio": 0.00034106033973557714, |
| "epoch": 0.6749723641361178, |
| "grad_norm": 0.0372379869222641, |
| "kl": 0.0050508975982666016, |
| "learning_rate": 4.849469854329629e-06, |
| "loss": 0.0091, |
| "step": 453 |
| }, |
| { |
| "clip_ratio": 0.00033749614277667206, |
| "epoch": 0.6768844671506677, |
| "grad_norm": 0.03686762601137161, |
| "kl": 0.005095005035400391, |
| "learning_rate": 4.847318560533882e-06, |
| "loss": 0.009, |
| "step": 454 |
| }, |
| { |
| "clip_ratio": 0.00035140375177888927, |
| "epoch": 0.6787965701652177, |
| "grad_norm": 0.036469750106334686, |
| "kl": 0.005120754241943359, |
| "learning_rate": 4.845152487130914e-06, |
| "loss": 0.009, |
| "step": 455 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 512.4866299629211, |
| "epoch": 0.6807086731797676, |
| "grad_norm": 0.037901297211647034, |
| "kl": 0.004809379577636719, |
| "learning_rate": 4.842971647759142e-06, |
| "loss": 0.0063, |
| "num_tokens": 260253700.0, |
| "reward": 0.05775669912691228, |
| "reward_std": 0.06710927549283952, |
| "rewards/pure_accuracy_reward_math": 0.05775669767172076, |
| "step": 456 |
| }, |
| { |
| "clip_ratio": 0.00026634283756266086, |
| "epoch": 0.6826207761943175, |
| "grad_norm": 0.03568252548575401, |
| "kl": 0.0047724246978759766, |
| "learning_rate": 4.840776056149957e-06, |
| "loss": 0.0063, |
| "step": 457 |
| }, |
| { |
| "clip_ratio": 0.00027518686636085476, |
| "epoch": 0.6845328792088674, |
| "grad_norm": 0.0351024754345417, |
| "kl": 0.004754543304443359, |
| "learning_rate": 4.838565726127636e-06, |
| "loss": 0.0063, |
| "step": 458 |
| }, |
| { |
| "clip_ratio": 0.0003387172891393675, |
| "epoch": 0.6864449822234173, |
| "grad_norm": 0.03477272391319275, |
| "kl": 0.004698753356933594, |
| "learning_rate": 4.836340671609255e-06, |
| "loss": 0.0062, |
| "step": 459 |
| }, |
| { |
| "clip_ratio": 0.0003592506114102889, |
| "epoch": 0.6883570852379672, |
| "grad_norm": 0.035812895745038986, |
| "kl": 0.004735708236694336, |
| "learning_rate": 4.834100906604601e-06, |
| "loss": 0.0062, |
| "step": 460 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.1403703689575, |
| "epoch": 0.6902691882525172, |
| "grad_norm": 0.03566034138202667, |
| "kl": 0.004418611526489258, |
| "learning_rate": 4.831846445216082e-06, |
| "loss": 0.0056, |
| "num_tokens": 263902651.0, |
| "reward": 0.05161830614088103, |
| "reward_std": 0.06899610540131107, |
| "rewards/pure_accuracy_reward_math": 0.051618304976727813, |
| "step": 461 |
| }, |
| { |
| "clip_ratio": 0.00028340513017610647, |
| "epoch": 0.6921812912670671, |
| "grad_norm": 0.03495897352695465, |
| "kl": 0.004414081573486328, |
| "learning_rate": 4.829577301638642e-06, |
| "loss": 0.0056, |
| "step": 462 |
| }, |
| { |
| "clip_ratio": 0.0002825141077664739, |
| "epoch": 0.6940933942816169, |
| "grad_norm": 0.034486111253499985, |
| "kl": 0.004411220550537109, |
| "learning_rate": 4.827293490159668e-06, |
| "loss": 0.0056, |
| "step": 463 |
| }, |
| { |
| "clip_ratio": 0.00031019614829119746, |
| "epoch": 0.6960054972961668, |
| "grad_norm": 0.035884980112314224, |
| "kl": 0.004367351531982422, |
| "learning_rate": 4.824995025158903e-06, |
| "loss": 0.0055, |
| "step": 464 |
| }, |
| { |
| "clip_ratio": 0.0003045983889933268, |
| "epoch": 0.6979176003107167, |
| "grad_norm": 0.03378836810588837, |
| "kl": 0.004292488098144531, |
| "learning_rate": 4.822681921108355e-06, |
| "loss": 0.0055, |
| "step": 465 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.3783731460571, |
| "epoch": 0.6998297033252666, |
| "grad_norm": 0.03726997971534729, |
| "kl": 0.0065157413482666016, |
| "learning_rate": 4.8203541925722016e-06, |
| "loss": 0.0017, |
| "num_tokens": 267508687.0, |
| "reward": 0.06724330646102317, |
| "reward_std": 0.07591145433252677, |
| "rewards/pure_accuracy_reward_math": 0.06724330500583164, |
| "step": 466 |
| }, |
| { |
| "clip_ratio": 0.00026273680936128585, |
| "epoch": 0.7017418063398165, |
| "grad_norm": 0.03638988733291626, |
| "kl": 0.0064983367919921875, |
| "learning_rate": 4.818011854206706e-06, |
| "loss": 0.0017, |
| "step": 467 |
| }, |
| { |
| "clip_ratio": 0.0002903113285128711, |
| "epoch": 0.7036539093543664, |
| "grad_norm": 0.0360158272087574, |
| "kl": 0.006509542465209961, |
| "learning_rate": 4.815654920760117e-06, |
| "loss": 0.0016, |
| "step": 468 |
| }, |
| { |
| "clip_ratio": 0.0002849762186087901, |
| "epoch": 0.7055660123689164, |
| "grad_norm": 0.03577370196580887, |
| "kl": 0.006470680236816406, |
| "learning_rate": 4.81328340707258e-06, |
| "loss": 0.0016, |
| "step": 469 |
| }, |
| { |
| "clip_ratio": 0.00031370155647891806, |
| "epoch": 0.7074781153834663, |
| "grad_norm": 0.03484919294714928, |
| "kl": 0.006468772888183594, |
| "learning_rate": 4.810897328076045e-06, |
| "loss": 0.0015, |
| "step": 470 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.1677136421204, |
| "epoch": 0.7093902183980162, |
| "grad_norm": 0.04198005422949791, |
| "kl": 0.004724264144897461, |
| "learning_rate": 4.808496698794171e-06, |
| "loss": 0.0046, |
| "num_tokens": 271138708.0, |
| "reward": 0.07310268204309978, |
| "reward_std": 0.07646948879119009, |
| "rewards/pure_accuracy_reward_math": 0.07310267994762398, |
| "step": 471 |
| }, |
| { |
| "clip_ratio": 0.00028702764876697984, |
| "epoch": 0.7113023214125661, |
| "grad_norm": 0.04015243798494339, |
| "kl": 0.004670619964599609, |
| "learning_rate": 4.8060815343422265e-06, |
| "loss": 0.0045, |
| "step": 472 |
| }, |
| { |
| "clip_ratio": 0.0002947892680822406, |
| "epoch": 0.713214424427116, |
| "grad_norm": 0.0385352224111557, |
| "kl": 0.0046727657318115234, |
| "learning_rate": 4.803651849927004e-06, |
| "loss": 0.0045, |
| "step": 473 |
| }, |
| { |
| "clip_ratio": 0.00036661511779811917, |
| "epoch": 0.7151265274416659, |
| "grad_norm": 0.03803607076406479, |
| "kl": 0.00463414192199707, |
| "learning_rate": 4.801207660846717e-06, |
| "loss": 0.0044, |
| "step": 474 |
| }, |
| { |
| "clip_ratio": 0.00040073674449558894, |
| "epoch": 0.7170386304562159, |
| "grad_norm": 0.03870271518826485, |
| "kl": 0.00464320182800293, |
| "learning_rate": 4.798748982490908e-06, |
| "loss": 0.0044, |
| "step": 475 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.262857913971, |
| "epoch": 0.7189507334707658, |
| "grad_norm": 0.0374424010515213, |
| "kl": 0.0045392513275146484, |
| "learning_rate": 4.796275830340344e-06, |
| "loss": 0.0081, |
| "num_tokens": 274802094.0, |
| "reward": 0.061941967433085665, |
| "reward_std": 0.07401842664694414, |
| "rewards/pure_accuracy_reward_math": 0.06194196522119455, |
| "step": 476 |
| }, |
| { |
| "clip_ratio": 0.00026828293908920386, |
| "epoch": 0.7208628364853157, |
| "grad_norm": 0.03758076950907707, |
| "kl": 0.004576683044433594, |
| "learning_rate": 4.793788219966931e-06, |
| "loss": 0.0081, |
| "step": 477 |
| }, |
| { |
| "clip_ratio": 0.0002991793934654652, |
| "epoch": 0.7227749394998656, |
| "grad_norm": 0.03570091351866722, |
| "kl": 0.0045130252838134766, |
| "learning_rate": 4.7912861670336065e-06, |
| "loss": 0.008, |
| "step": 478 |
| }, |
| { |
| "clip_ratio": 0.00031140293214093617, |
| "epoch": 0.7246870425144155, |
| "grad_norm": 0.034991368651390076, |
| "kl": 0.0044956207275390625, |
| "learning_rate": 4.788769687294243e-06, |
| "loss": 0.008, |
| "step": 479 |
| }, |
| { |
| "clip_ratio": 0.00034215352269484356, |
| "epoch": 0.7265991455289653, |
| "grad_norm": 0.03517301753163338, |
| "kl": 0.00450587272644043, |
| "learning_rate": 4.7862387965935504e-06, |
| "loss": 0.0079, |
| "step": 480 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.2455615997314, |
| "epoch": 0.7285112485435152, |
| "grad_norm": 0.03517255187034607, |
| "kl": 0.004718780517578125, |
| "learning_rate": 4.783693510866977e-06, |
| "loss": 0.0066, |
| "num_tokens": 278455030.0, |
| "reward": 0.06222098530270159, |
| "reward_std": 0.069766862958204, |
| "rewards/pure_accuracy_reward_math": 0.062220983498264104, |
| "step": 481 |
| }, |
| { |
| "clip_ratio": 0.00026954136529866446, |
| "epoch": 0.7304233515580651, |
| "grad_norm": 0.03456445038318634, |
| "kl": 0.004766225814819336, |
| "learning_rate": 4.781133846140606e-06, |
| "loss": 0.0066, |
| "step": 482 |
| }, |
| { |
| "clip_ratio": 0.000250861422671278, |
| "epoch": 0.7323354545726151, |
| "grad_norm": 0.033632129430770874, |
| "kl": 0.004829883575439453, |
| "learning_rate": 4.778559818531055e-06, |
| "loss": 0.0066, |
| "step": 483 |
| }, |
| { |
| "clip_ratio": 0.0002590245896385568, |
| "epoch": 0.734247557587165, |
| "grad_norm": 0.03314875811338425, |
| "kl": 0.00486445426940918, |
| "learning_rate": 4.775971444245379e-06, |
| "loss": 0.0065, |
| "step": 484 |
| }, |
| { |
| "clip_ratio": 0.0002899982684425595, |
| "epoch": 0.7361596606017149, |
| "grad_norm": 0.03288432955741882, |
| "kl": 0.004921674728393555, |
| "learning_rate": 4.773368739580963e-06, |
| "loss": 0.0065, |
| "step": 485 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.4258046150208, |
| "epoch": 0.7380717636162648, |
| "grad_norm": 0.08309170603752136, |
| "kl": 0.006993293762207031, |
| "learning_rate": 4.770751720925422e-06, |
| "loss": 0.0023, |
| "num_tokens": 282068152.0, |
| "reward": 0.06222098495345563, |
| "reward_std": 0.0712282478925772, |
| "rewards/pure_accuracy_reward_math": 0.06222098338184878, |
| "step": 486 |
| }, |
| { |
| "clip_ratio": 0.0002442373284452515, |
| "epoch": 0.7399838666308147, |
| "grad_norm": 0.042120546102523804, |
| "kl": 0.006081581115722656, |
| "learning_rate": 4.768120404756497e-06, |
| "loss": 0.0023, |
| "step": 487 |
| }, |
| { |
| "clip_ratio": 0.0002956131474434187, |
| "epoch": 0.7418959696453646, |
| "grad_norm": 0.036061204969882965, |
| "kl": 0.0057599544525146484, |
| "learning_rate": 4.765474807641951e-06, |
| "loss": 0.0022, |
| "step": 488 |
| }, |
| { |
| "clip_ratio": 0.00030389728723889675, |
| "epoch": 0.7438080726599146, |
| "grad_norm": 0.03613469749689102, |
| "kl": 0.005738019943237305, |
| "learning_rate": 4.762814946239468e-06, |
| "loss": 0.0022, |
| "step": 489 |
| }, |
| { |
| "clip_ratio": 0.00033159017920070255, |
| "epoch": 0.7457201756744645, |
| "grad_norm": 0.0360892117023468, |
| "kl": 0.00572967529296875, |
| "learning_rate": 4.760140837296542e-06, |
| "loss": 0.0021, |
| "step": 490 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 550.3144750595093, |
| "epoch": 0.7476322786890144, |
| "grad_norm": 0.03636733815073967, |
| "kl": 0.004332542419433594, |
| "learning_rate": 4.757452497650377e-06, |
| "loss": 0.0072, |
| "num_tokens": 285770403.0, |
| "reward": 0.055803573777666315, |
| "reward_std": 0.07161362667102367, |
| "rewards/pure_accuracy_reward_math": 0.05580357278813608, |
| "step": 491 |
| }, |
| { |
| "clip_ratio": 0.00027637260956225873, |
| "epoch": 0.7495443817035643, |
| "grad_norm": 0.035727791488170624, |
| "kl": 0.004361629486083984, |
| "learning_rate": 4.754749944227777e-06, |
| "loss": 0.0072, |
| "step": 492 |
| }, |
| { |
| "clip_ratio": 0.0002587454115428045, |
| "epoch": 0.7514564847181142, |
| "grad_norm": 0.03512200713157654, |
| "kl": 0.0043697357177734375, |
| "learning_rate": 4.752033194045044e-06, |
| "loss": 0.0072, |
| "step": 493 |
| }, |
| { |
| "clip_ratio": 0.00025780797875540884, |
| "epoch": 0.7533685877326641, |
| "grad_norm": 0.033817108720541, |
| "kl": 0.0043947696685791016, |
| "learning_rate": 4.7493022642078654e-06, |
| "loss": 0.0071, |
| "step": 494 |
| }, |
| { |
| "clip_ratio": 0.00029674232627030506, |
| "epoch": 0.755280690747214, |
| "grad_norm": 0.03317062556743622, |
| "kl": 0.004454851150512695, |
| "learning_rate": 4.746557171911211e-06, |
| "loss": 0.0071, |
| "step": 495 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 543.0692186355591, |
| "epoch": 0.757192793761764, |
| "grad_norm": 0.05020016431808472, |
| "kl": 0.0062062740325927734, |
| "learning_rate": 4.7437979344392236e-06, |
| "loss": 0.0059, |
| "num_tokens": 289451319.0, |
| "reward": 0.0616629492433276, |
| "reward_std": 0.07071027776692063, |
| "rewards/pure_accuracy_reward_math": 0.06166294778813608, |
| "step": 496 |
| }, |
| { |
| "clip_ratio": 0.00028460744590574905, |
| "epoch": 0.7591048967763139, |
| "grad_norm": 0.03948064520955086, |
| "kl": 0.0061266422271728516, |
| "learning_rate": 4.741024569165105e-06, |
| "loss": 0.0059, |
| "step": 497 |
| }, |
| { |
| "clip_ratio": 0.0002803450769306437, |
| "epoch": 0.7610169997908637, |
| "grad_norm": 0.03621263429522514, |
| "kl": 0.00614476203918457, |
| "learning_rate": 4.7382370935510165e-06, |
| "loss": 0.0059, |
| "step": 498 |
| }, |
| { |
| "clip_ratio": 0.0003022695020717947, |
| "epoch": 0.7629291028054136, |
| "grad_norm": 0.037622902542352676, |
| "kl": 0.006256580352783203, |
| "learning_rate": 4.73543552514796e-06, |
| "loss": 0.0058, |
| "step": 499 |
| }, |
| { |
| "clip_ratio": 0.00030265802058693225, |
| "epoch": 0.7648412058199635, |
| "grad_norm": 0.03813454508781433, |
| "kl": 0.006264209747314453, |
| "learning_rate": 4.732619881595672e-06, |
| "loss": 0.0057, |
| "step": 500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 543.3538174629211, |
| "epoch": 0.7667533088345134, |
| "grad_norm": 0.07500133663415909, |
| "kl": 0.005916118621826172, |
| "learning_rate": 4.729790180622512e-06, |
| "loss": 0.0072, |
| "num_tokens": 293127839.0, |
| "reward": 0.0513392879802268, |
| "reward_std": 0.06792009877972305, |
| "rewards/pure_accuracy_reward_math": 0.051339287048904225, |
| "step": 501 |
| }, |
| { |
| "clip_ratio": 0.0002826226679530919, |
| "epoch": 0.7686654118490633, |
| "grad_norm": 0.03498294949531555, |
| "kl": 0.0057086944580078125, |
| "learning_rate": 4.726946440045348e-06, |
| "loss": 0.0072, |
| "step": 502 |
| }, |
| { |
| "clip_ratio": 0.000292762170943206, |
| "epoch": 0.7705775148636133, |
| "grad_norm": 0.0338723324239254, |
| "kl": 0.0054700374603271484, |
| "learning_rate": 4.7240886777694495e-06, |
| "loss": 0.0071, |
| "step": 503 |
| }, |
| { |
| "clip_ratio": 0.00031638332251304746, |
| "epoch": 0.7724896178781632, |
| "grad_norm": 0.03360189124941826, |
| "kl": 0.00526118278503418, |
| "learning_rate": 4.721216911788371e-06, |
| "loss": 0.0071, |
| "step": 504 |
| }, |
| { |
| "clip_ratio": 0.0003445502737804418, |
| "epoch": 0.7744017208927131, |
| "grad_norm": 0.03321666270494461, |
| "kl": 0.005108356475830078, |
| "learning_rate": 4.71833116018384e-06, |
| "loss": 0.007, |
| "step": 505 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.3041553497314, |
| "epoch": 0.776313823907263, |
| "grad_norm": 0.039082907140254974, |
| "kl": 0.0048329830169677734, |
| "learning_rate": 4.715431441125639e-06, |
| "loss": 0.0072, |
| "num_tokens": 296745449.0, |
| "reward": 0.056640627823071554, |
| "reward_std": 0.066464910923969, |
| "rewards/pure_accuracy_reward_math": 0.05664062636788003, |
| "step": 506 |
| }, |
| { |
| "clip_ratio": 0.0002697859709428485, |
| "epoch": 0.7782259269218129, |
| "grad_norm": 0.036139652132987976, |
| "kl": 0.0048868656158447266, |
| "learning_rate": 4.712517772871503e-06, |
| "loss": 0.0072, |
| "step": 507 |
| }, |
| { |
| "clip_ratio": 0.0002602223319172481, |
| "epoch": 0.7801380299363628, |
| "grad_norm": 0.03708622604608536, |
| "kl": 0.004920244216918945, |
| "learning_rate": 4.709590173766988e-06, |
| "loss": 0.0072, |
| "step": 508 |
| }, |
| { |
| "clip_ratio": 0.00030563702995323183, |
| "epoch": 0.7820501329509127, |
| "grad_norm": 0.03873802721500397, |
| "kl": 0.004922151565551758, |
| "learning_rate": 4.706648662245368e-06, |
| "loss": 0.0071, |
| "step": 509 |
| }, |
| { |
| "clip_ratio": 0.00027421732914945096, |
| "epoch": 0.7839622359654627, |
| "grad_norm": 0.0337008535861969, |
| "kl": 0.004686117172241211, |
| "learning_rate": 4.703693256827515e-06, |
| "loss": 0.0071, |
| "step": 510 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 544.4595675468445, |
| "epoch": 0.7858743389800126, |
| "grad_norm": 0.032148003578186035, |
| "kl": 0.004284381866455078, |
| "learning_rate": 4.700723976121782e-06, |
| "loss": 0.0079, |
| "num_tokens": 300427724.0, |
| "reward": 0.05998884211294353, |
| "reward_std": 0.06822534691309556, |
| "rewards/pure_accuracy_reward_math": 0.059988840483129025, |
| "step": 511 |
| }, |
| { |
| "clip_ratio": 0.00023266997004611767, |
| "epoch": 0.7877864419945625, |
| "grad_norm": 0.03213036060333252, |
| "kl": 0.004235267639160156, |
| "learning_rate": 4.697740838823884e-06, |
| "loss": 0.0079, |
| "step": 512 |
| }, |
| { |
| "clip_ratio": 0.00023210655439243055, |
| "epoch": 0.7896985450091124, |
| "grad_norm": 0.03171762451529503, |
| "kl": 0.004268169403076172, |
| "learning_rate": 4.694743863716784e-06, |
| "loss": 0.0078, |
| "step": 513 |
| }, |
| { |
| "clip_ratio": 0.0002433597992990144, |
| "epoch": 0.7916106480236623, |
| "grad_norm": 0.030378276482224464, |
| "kl": 0.004282712936401367, |
| "learning_rate": 4.691733069670575e-06, |
| "loss": 0.0078, |
| "step": 514 |
| }, |
| { |
| "clip_ratio": 0.00024098603546462982, |
| "epoch": 0.7935227510382122, |
| "grad_norm": 0.030135801061987877, |
| "kl": 0.004299640655517578, |
| "learning_rate": 4.688708475642356e-06, |
| "loss": 0.0078, |
| "step": 515 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.5064425468445, |
| "epoch": 0.795434854052762, |
| "grad_norm": 0.03758488968014717, |
| "kl": 0.004748344421386719, |
| "learning_rate": 4.685670100676117e-06, |
| "loss": 0.0056, |
| "num_tokens": 304030899.0, |
| "reward": 0.059151788300368935, |
| "reward_std": 0.06615966308163479, |
| "rewards/pure_accuracy_reward_math": 0.05915178684517741, |
| "step": 516 |
| }, |
| { |
| "clip_ratio": 0.00024922658519699326, |
| "epoch": 0.797346957067312, |
| "grad_norm": 0.03667794167995453, |
| "kl": 0.004762172698974609, |
| "learning_rate": 4.6826179639026185e-06, |
| "loss": 0.0056, |
| "step": 517 |
| }, |
| { |
| "clip_ratio": 0.00024439046995894387, |
| "epoch": 0.7992590600818619, |
| "grad_norm": 0.03566230833530426, |
| "kl": 0.004770755767822266, |
| "learning_rate": 4.679552084539271e-06, |
| "loss": 0.0055, |
| "step": 518 |
| }, |
| { |
| "clip_ratio": 0.00025443012202686077, |
| "epoch": 0.8011711630964118, |
| "grad_norm": 0.03555983304977417, |
| "kl": 0.004889011383056641, |
| "learning_rate": 4.676472481890012e-06, |
| "loss": 0.0055, |
| "step": 519 |
| }, |
| { |
| "clip_ratio": 0.0002555244412860702, |
| "epoch": 0.8030832661109617, |
| "grad_norm": 0.03477266803383827, |
| "kl": 0.004910707473754883, |
| "learning_rate": 4.673379175345187e-06, |
| "loss": 0.0054, |
| "step": 520 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.2039861679077, |
| "epoch": 0.8049953691255116, |
| "grad_norm": 0.03352927044034004, |
| "kl": 0.004728078842163086, |
| "learning_rate": 4.670272184381426e-06, |
| "loss": 0.0064, |
| "num_tokens": 307666714.0, |
| "reward": 0.05106027063447982, |
| "reward_std": 0.061781705473549664, |
| "rewards/pure_accuracy_reward_math": 0.05106026888824999, |
| "step": 521 |
| }, |
| { |
| "clip_ratio": 0.00022480493561261028, |
| "epoch": 0.8069074721400615, |
| "grad_norm": 0.0328591950237751, |
| "kl": 0.004677772521972656, |
| "learning_rate": 4.667151528561522e-06, |
| "loss": 0.0064, |
| "step": 522 |
| }, |
| { |
| "clip_ratio": 0.0002208993353463029, |
| "epoch": 0.8088195751546114, |
| "grad_norm": 0.0323566235601902, |
| "kl": 0.004681825637817383, |
| "learning_rate": 4.664017227534308e-06, |
| "loss": 0.0064, |
| "step": 523 |
| }, |
| { |
| "clip_ratio": 0.0002261604544742113, |
| "epoch": 0.8107316781691614, |
| "grad_norm": 0.03178941085934639, |
| "kl": 0.004633665084838867, |
| "learning_rate": 4.6608693010345285e-06, |
| "loss": 0.0063, |
| "step": 524 |
| }, |
| { |
| "clip_ratio": 0.0002347389614101303, |
| "epoch": 0.8126437811837113, |
| "grad_norm": 0.03144075721502304, |
| "kl": 0.004633426666259766, |
| "learning_rate": 4.657707768882723e-06, |
| "loss": 0.0063, |
| "step": 525 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.2078919410706, |
| "epoch": 0.8145558841982612, |
| "grad_norm": 36658.046875, |
| "kl": 696.0046517848969, |
| "learning_rate": 4.6545326509850965e-06, |
| "loss": 27.8583, |
| "num_tokens": 311314491.0, |
| "reward": 0.05747768114088103, |
| "reward_std": 0.06521624798187986, |
| "rewards/pure_accuracy_reward_math": 0.057477680093143135, |
| "step": 526 |
| }, |
| { |
| "clip_ratio": 0.0006453408203128674, |
| "epoch": 0.8164679872128111, |
| "grad_norm": 3234.42724609375, |
| "kl": 42.254658937454224, |
| "learning_rate": 4.651343967333394e-06, |
| "loss": 1.7021, |
| "step": 527 |
| }, |
| { |
| "clip_ratio": 0.0006781478184620937, |
| "epoch": 0.818380090227361, |
| "grad_norm": 430.01318359375, |
| "kl": 0.21270966529846191, |
| "learning_rate": 4.648141738004776e-06, |
| "loss": 0.256, |
| "step": 528 |
| }, |
| { |
| "clip_ratio": 0.0006916913723671314, |
| "epoch": 0.8202921932419109, |
| "grad_norm": 457.1385803222656, |
| "kl": 0.1541590690612793, |
| "learning_rate": 4.644925983161691e-06, |
| "loss": 0.3118, |
| "step": 529 |
| }, |
| { |
| "clip_ratio": 0.0007114471513887111, |
| "epoch": 0.8222042962564609, |
| "grad_norm": 61.02793884277344, |
| "kl": 1.6688117980957031, |
| "learning_rate": 4.641696723051753e-06, |
| "loss": 0.1081, |
| "step": 530 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 544.7664904594421, |
| "epoch": 0.8241163992710108, |
| "grad_norm": 0.03665775805711746, |
| "kl": 0.0046710968017578125, |
| "learning_rate": 4.638453978007606e-06, |
| "loss": 0.0033, |
| "num_tokens": 315000186.0, |
| "reward": 0.05691964577999897, |
| "reward_std": 0.06766731111565605, |
| "rewards/pure_accuracy_reward_math": 0.056919643975561485, |
| "step": 531 |
| }, |
| { |
| "clip_ratio": 0.000247030089042255, |
| "epoch": 0.8260285022855607, |
| "grad_norm": 0.03543345257639885, |
| "kl": 0.004717826843261719, |
| "learning_rate": 4.635197768446799e-06, |
| "loss": 0.0033, |
| "step": 532 |
| }, |
| { |
| "clip_ratio": 0.00024415442914005325, |
| "epoch": 0.8279406053001105, |
| "grad_norm": 0.034531209617853165, |
| "kl": 0.004744768142700195, |
| "learning_rate": 4.631928114871667e-06, |
| "loss": 0.0032, |
| "step": 533 |
| }, |
| { |
| "clip_ratio": 0.0002580326566032909, |
| "epoch": 0.8298527083146604, |
| "grad_norm": 0.03323632851243019, |
| "kl": 0.004830360412597656, |
| "learning_rate": 4.628645037869183e-06, |
| "loss": 0.0032, |
| "step": 534 |
| }, |
| { |
| "clip_ratio": 0.00029695888167680096, |
| "epoch": 0.8317648113292103, |
| "grad_norm": 0.03470376506447792, |
| "kl": 0.0048847198486328125, |
| "learning_rate": 4.625348558110846e-06, |
| "loss": 0.0031, |
| "step": 535 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 543.506443977356, |
| "epoch": 0.8336769143437602, |
| "grad_norm": 33.48581314086914, |
| "kl": 0.7041072845458984, |
| "learning_rate": 4.6220386963525425e-06, |
| "loss": 0.0349, |
| "num_tokens": 318683697.0, |
| "reward": 0.06333705675206147, |
| "reward_std": 0.0759915838134475, |
| "rewards/pure_accuracy_reward_math": 0.063337054773001, |
| "step": 536 |
| }, |
| { |
| "clip_ratio": 0.00030500417074108555, |
| "epoch": 0.8355890173583101, |
| "grad_norm": 5.391356468200684, |
| "kl": 0.12163639068603516, |
| "learning_rate": 4.6187154734344144e-06, |
| "loss": 0.0115, |
| "step": 537 |
| }, |
| { |
| "clip_ratio": 0.0003094891900445873, |
| "epoch": 0.8375011203728601, |
| "grad_norm": 0.24674992263317108, |
| "kl": 0.011260032653808594, |
| "learning_rate": 4.615378910280735e-06, |
| "loss": 0.007, |
| "step": 538 |
| }, |
| { |
| "clip_ratio": 0.0003443351265559613, |
| "epoch": 0.83941322338741, |
| "grad_norm": 0.040490083396434784, |
| "kl": 0.0068547725677490234, |
| "learning_rate": 4.61202902789977e-06, |
| "loss": 0.0068, |
| "step": 539 |
| }, |
| { |
| "clip_ratio": 0.0003249310258297555, |
| "epoch": 0.8413253264019599, |
| "grad_norm": 0.037383101880550385, |
| "kl": 0.006977081298828125, |
| "learning_rate": 4.608665847383646e-06, |
| "loss": 0.0068, |
| "step": 540 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.8432207107544, |
| "epoch": 0.8432374294165098, |
| "grad_norm": 0.0408562608063221, |
| "kl": 0.005080223083496094, |
| "learning_rate": 4.6052893899082244e-06, |
| "loss": 0.0092, |
| "num_tokens": 322311955.0, |
| "reward": 0.07505580695578828, |
| "reward_std": 0.08672685426427051, |
| "rewards/pure_accuracy_reward_math": 0.07505580462748185, |
| "step": 541 |
| }, |
| { |
| "clip_ratio": 0.0003254984287082152, |
| "epoch": 0.8451495324310597, |
| "grad_norm": 0.03888032212853432, |
| "kl": 0.005081653594970703, |
| "learning_rate": 4.60189967673296e-06, |
| "loss": 0.0091, |
| "step": 542 |
| }, |
| { |
| "clip_ratio": 0.00032150591908930437, |
| "epoch": 0.8470616354456096, |
| "grad_norm": 0.03769301995635033, |
| "kl": 0.005054950714111328, |
| "learning_rate": 4.598496729200772e-06, |
| "loss": 0.0091, |
| "step": 543 |
| }, |
| { |
| "clip_ratio": 0.0003807161001532222, |
| "epoch": 0.8489737384601596, |
| "grad_norm": 0.03671475872397423, |
| "kl": 0.005011320114135742, |
| "learning_rate": 4.595080568737907e-06, |
| "loss": 0.009, |
| "step": 544 |
| }, |
| { |
| "clip_ratio": 0.00040073374452731514, |
| "epoch": 0.8508858414747095, |
| "grad_norm": 0.03656642884016037, |
| "kl": 0.004985332489013672, |
| "learning_rate": 4.591651216853808e-06, |
| "loss": 0.009, |
| "step": 545 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 521.1850123405457, |
| "epoch": 0.8527979444892594, |
| "grad_norm": 0.04072614386677742, |
| "kl": 0.005250692367553711, |
| "learning_rate": 4.588208695140972e-06, |
| "loss": 0.008, |
| "num_tokens": 325915646.0, |
| "reward": 0.06891741379513405, |
| "reward_std": 0.07457646180409938, |
| "rewards/pure_accuracy_reward_math": 0.0689174119324889, |
| "step": 546 |
| }, |
| { |
| "clip_ratio": 0.0002774237623270892, |
| "epoch": 0.8547100475038093, |
| "grad_norm": 0.03891909867525101, |
| "kl": 0.005267620086669922, |
| "learning_rate": 4.5847530252748206e-06, |
| "loss": 0.008, |
| "step": 547 |
| }, |
| { |
| "clip_ratio": 0.0003099276901821213, |
| "epoch": 0.8566221505183592, |
| "grad_norm": 0.03776893764734268, |
| "kl": 0.005312681198120117, |
| "learning_rate": 4.581284229013561e-06, |
| "loss": 0.008, |
| "step": 548 |
| }, |
| { |
| "clip_ratio": 0.0003329096458060121, |
| "epoch": 0.8585342535329091, |
| "grad_norm": 0.03786613792181015, |
| "kl": 0.0053446292877197266, |
| "learning_rate": 4.57780232819805e-06, |
| "loss": 0.0079, |
| "step": 549 |
| }, |
| { |
| "clip_ratio": 0.0003465502328481307, |
| "epoch": 0.860446356547459, |
| "grad_norm": 0.03782954812049866, |
| "kl": 0.00535893440246582, |
| "learning_rate": 4.574307344751654e-06, |
| "loss": 0.0079, |
| "step": 550 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 512.2042679786682, |
| "epoch": 0.8623584595620088, |
| "grad_norm": 0.04082540422677994, |
| "kl": 0.005150318145751953, |
| "learning_rate": 4.570799300680112e-06, |
| "loss": 0.0061, |
| "num_tokens": 329486142.0, |
| "reward": 0.06696428914438002, |
| "reward_std": 0.07865536911413074, |
| "rewards/pure_accuracy_reward_math": 0.06696428681607358, |
| "step": 551 |
| }, |
| { |
| "clip_ratio": 0.0002784457984148503, |
| "epoch": 0.8642705625765588, |
| "grad_norm": 0.039590511471033096, |
| "kl": 0.005137205123901367, |
| "learning_rate": 4.5672782180714005e-06, |
| "loss": 0.0061, |
| "step": 552 |
| }, |
| { |
| "clip_ratio": 0.0003210699376268167, |
| "epoch": 0.8661826655911087, |
| "grad_norm": 0.03983275964856148, |
| "kl": 0.005161285400390625, |
| "learning_rate": 4.56374411909559e-06, |
| "loss": 0.0061, |
| "step": 553 |
| }, |
| { |
| "clip_ratio": 0.00032905748116718314, |
| "epoch": 0.8680947686056586, |
| "grad_norm": 0.03924131765961647, |
| "kl": 0.0051097869873046875, |
| "learning_rate": 4.560197026004706e-06, |
| "loss": 0.006, |
| "step": 554 |
| }, |
| { |
| "clip_ratio": 0.00036174511694753164, |
| "epoch": 0.8700068716202085, |
| "grad_norm": 0.03864859789609909, |
| "kl": 0.0051233768463134766, |
| "learning_rate": 4.556636961132591e-06, |
| "loss": 0.0059, |
| "step": 555 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.8490724563599, |
| "epoch": 0.8719189746347584, |
| "grad_norm": 0.03831901028752327, |
| "kl": 0.005173921585083008, |
| "learning_rate": 4.553063946894765e-06, |
| "loss": 0.0089, |
| "num_tokens": 333101169.0, |
| "reward": 0.05970982427243143, |
| "reward_std": 0.06925509037682787, |
| "rewards/pure_accuracy_reward_math": 0.05970982293365523, |
| "step": 556 |
| }, |
| { |
| "clip_ratio": 0.00024058804717697058, |
| "epoch": 0.8738310776493083, |
| "grad_norm": 0.03815346583724022, |
| "kl": 0.005152463912963867, |
| "learning_rate": 4.549478005788276e-06, |
| "loss": 0.0088, |
| "step": 557 |
| }, |
| { |
| "clip_ratio": 0.0002689754076072859, |
| "epoch": 0.8757431806638583, |
| "grad_norm": 0.03663227707147598, |
| "kl": 0.00511932373046875, |
| "learning_rate": 4.5458791603915695e-06, |
| "loss": 0.0088, |
| "step": 558 |
| }, |
| { |
| "clip_ratio": 0.0002769273295371022, |
| "epoch": 0.8776552836784082, |
| "grad_norm": 0.03534897044301033, |
| "kl": 0.005173921585083008, |
| "learning_rate": 4.5422674333643415e-06, |
| "loss": 0.0087, |
| "step": 559 |
| }, |
| { |
| "clip_ratio": 0.0003186316080245888, |
| "epoch": 0.8795673866929581, |
| "grad_norm": 0.03454131633043289, |
| "kl": 0.005182981491088867, |
| "learning_rate": 4.538642847447393e-06, |
| "loss": 0.0087, |
| "step": 560 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 499.49025869369507, |
| "epoch": 0.881479489707508, |
| "grad_norm": 0.03870520368218422, |
| "kl": 0.005303621292114258, |
| "learning_rate": 4.53500542546249e-06, |
| "loss": 0.0063, |
| "num_tokens": 336621146.0, |
| "reward": 0.06724330663564615, |
| "reward_std": 0.07539348350837827, |
| "rewards/pure_accuracy_reward_math": 0.0672433050640393, |
| "step": 561 |
| }, |
| { |
| "clip_ratio": 0.0002930208739826412, |
| "epoch": 0.8833915927220579, |
| "grad_norm": 0.03670111671090126, |
| "kl": 0.005410432815551758, |
| "learning_rate": 4.5313551903122195e-06, |
| "loss": 0.0063, |
| "step": 562 |
| }, |
| { |
| "clip_ratio": 0.00033625421181682214, |
| "epoch": 0.8853036957366078, |
| "grad_norm": 0.03873737156391144, |
| "kl": 0.0054399967193603516, |
| "learning_rate": 4.5276921649798475e-06, |
| "loss": 0.0063, |
| "step": 563 |
| }, |
| { |
| "clip_ratio": 0.0003349392310383337, |
| "epoch": 0.8872157987511577, |
| "grad_norm": 0.038494061678647995, |
| "kl": 0.0053806304931640625, |
| "learning_rate": 4.524016372529168e-06, |
| "loss": 0.0062, |
| "step": 564 |
| }, |
| { |
| "clip_ratio": 0.00031196477385719845, |
| "epoch": 0.8891279017657077, |
| "grad_norm": 0.03559175133705139, |
| "kl": 0.005260467529296875, |
| "learning_rate": 4.520327836104363e-06, |
| "loss": 0.0061, |
| "step": 565 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 521.2452793121338, |
| "epoch": 0.8910400047802576, |
| "grad_norm": 0.033526018261909485, |
| "kl": 0.0050280094146728516, |
| "learning_rate": 4.516626578929857e-06, |
| "loss": 0.0083, |
| "num_tokens": 340217537.0, |
| "reward": 0.05970982470898889, |
| "reward_std": 0.06920882686972618, |
| "rewards/pure_accuracy_reward_math": 0.059709822555305436, |
| "step": 566 |
| }, |
| { |
| "clip_ratio": 0.0002854210310374583, |
| "epoch": 0.8929521077948075, |
| "grad_norm": 0.03320698440074921, |
| "kl": 0.00494694709777832, |
| "learning_rate": 4.512912624310166e-06, |
| "loss": 0.0083, |
| "step": 567 |
| }, |
| { |
| "clip_ratio": 0.00028784406134718665, |
| "epoch": 0.8948642108093574, |
| "grad_norm": 0.0334990993142128, |
| "kl": 0.004927158355712891, |
| "learning_rate": 4.509185995629758e-06, |
| "loss": 0.0083, |
| "step": 568 |
| }, |
| { |
| "clip_ratio": 0.00028731861192454744, |
| "epoch": 0.8967763138239072, |
| "grad_norm": 0.032721105962991714, |
| "kl": 0.004916667938232422, |
| "learning_rate": 4.505446716352898e-06, |
| "loss": 0.0083, |
| "step": 569 |
| }, |
| { |
| "clip_ratio": 0.0003211342911981774, |
| "epoch": 0.8986884168384571, |
| "grad_norm": 0.031691305339336395, |
| "kl": 0.0050427913665771484, |
| "learning_rate": 4.501694810023506e-06, |
| "loss": 0.0082, |
| "step": 570 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.3175444602966, |
| "epoch": 0.900600519853007, |
| "grad_norm": 0.039067283272743225, |
| "kl": 0.0051767826080322266, |
| "learning_rate": 4.497930300265005e-06, |
| "loss": 0.0062, |
| "num_tokens": 343792675.0, |
| "reward": 0.07254464668221772, |
| "reward_std": 0.07260330504504964, |
| "rewards/pure_accuracy_reward_math": 0.07254464394645765, |
| "step": 571 |
| }, |
| { |
| "clip_ratio": 0.000284439854624452, |
| "epoch": 0.902512622867557, |
| "grad_norm": 0.03746037185192108, |
| "kl": 0.0051670074462890625, |
| "learning_rate": 4.494153210780177e-06, |
| "loss": 0.0062, |
| "step": 572 |
| }, |
| { |
| "clip_ratio": 0.0002894837679718876, |
| "epoch": 0.9044247258821069, |
| "grad_norm": 0.0363248772919178, |
| "kl": 0.0051119327545166016, |
| "learning_rate": 4.490363565351007e-06, |
| "loss": 0.0061, |
| "step": 573 |
| }, |
| { |
| "clip_ratio": 0.00029392389137683494, |
| "epoch": 0.9063368288966568, |
| "grad_norm": 0.03513769805431366, |
| "kl": 0.005059242248535156, |
| "learning_rate": 4.486561387838539e-06, |
| "loss": 0.0061, |
| "step": 574 |
| }, |
| { |
| "clip_ratio": 0.0003296555175325011, |
| "epoch": 0.9082489319112067, |
| "grad_norm": 0.03513012453913689, |
| "kl": 0.005059242248535156, |
| "learning_rate": 4.482746702182725e-06, |
| "loss": 0.006, |
| "step": 575 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.8926033973694, |
| "epoch": 0.9101610349257566, |
| "grad_norm": 0.049145400524139404, |
| "kl": 0.011604547500610352, |
| "learning_rate": 4.478919532402271e-06, |
| "loss": 0.0046, |
| "num_tokens": 347395370.0, |
| "reward": 0.07170759254950099, |
| "reward_std": 0.0817445982247591, |
| "rewards/pure_accuracy_reward_math": 0.07170759091968648, |
| "step": 576 |
| }, |
| { |
| "clip_ratio": 0.00030760892423131736, |
| "epoch": 0.9120731379403065, |
| "grad_norm": 0.04954507574439049, |
| "kl": 0.011447906494140625, |
| "learning_rate": 4.4750799025944866e-06, |
| "loss": 0.0045, |
| "step": 577 |
| }, |
| { |
| "clip_ratio": 0.0003202956161487691, |
| "epoch": 0.9139852409548564, |
| "grad_norm": 0.04883984476327896, |
| "kl": 0.010998249053955078, |
| "learning_rate": 4.471227836935139e-06, |
| "loss": 0.0045, |
| "step": 578 |
| }, |
| { |
| "clip_ratio": 0.0003312723312660637, |
| "epoch": 0.9158973439694064, |
| "grad_norm": 0.049066606909036636, |
| "kl": 0.010381698608398438, |
| "learning_rate": 4.467363359678291e-06, |
| "loss": 0.0044, |
| "step": 579 |
| }, |
| { |
| "clip_ratio": 0.00041312941800697445, |
| "epoch": 0.9178094469839563, |
| "grad_norm": 0.053418997675180435, |
| "kl": 0.009602546691894531, |
| "learning_rate": 4.463486495156157e-06, |
| "loss": 0.0043, |
| "step": 580 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.5678267478943, |
| "epoch": 0.9197215499985062, |
| "grad_norm": 0.03747523948550224, |
| "kl": 0.004802227020263672, |
| "learning_rate": 4.459597267778945e-06, |
| "loss": 0.0041, |
| "num_tokens": 351065793.0, |
| "reward": 0.062220984895247966, |
| "reward_std": 0.07298868335783482, |
| "rewards/pure_accuracy_reward_math": 0.0622209832072258, |
| "step": 581 |
| }, |
| { |
| "clip_ratio": 0.0002890200073579763, |
| "epoch": 0.9216336530130561, |
| "grad_norm": 0.03557584062218666, |
| "kl": 0.004851579666137695, |
| "learning_rate": 4.455695702034705e-06, |
| "loss": 0.0041, |
| "step": 582 |
| }, |
| { |
| "clip_ratio": 0.00031045296407228307, |
| "epoch": 0.923545756027606, |
| "grad_norm": 0.034734807908535004, |
| "kl": 0.004895925521850586, |
| "learning_rate": 4.451781822489173e-06, |
| "loss": 0.0041, |
| "step": 583 |
| }, |
| { |
| "clip_ratio": 0.00032734786560695284, |
| "epoch": 0.9254578590421559, |
| "grad_norm": 0.03634972497820854, |
| "kl": 0.004976511001586914, |
| "learning_rate": 4.447855653785617e-06, |
| "loss": 0.004, |
| "step": 584 |
| }, |
| { |
| "clip_ratio": 0.00036698238614008005, |
| "epoch": 0.9273699620567059, |
| "grad_norm": 0.036671172827482224, |
| "kl": 0.004954338073730469, |
| "learning_rate": 4.4439172206446845e-06, |
| "loss": 0.0039, |
| "step": 585 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.6261405944824, |
| "epoch": 0.9292820650712557, |
| "grad_norm": 0.03805253654718399, |
| "kl": 0.005060434341430664, |
| "learning_rate": 4.439966547864243e-06, |
| "loss": 0.0061, |
| "num_tokens": 354732057.0, |
| "reward": 0.06194196725846268, |
| "reward_std": 0.07766569184605032, |
| "rewards/pure_accuracy_reward_math": 0.06194196580327116, |
| "step": 586 |
| }, |
| { |
| "clip_ratio": 0.0002944122598478316, |
| "epoch": 0.9311941680858056, |
| "grad_norm": 0.03603314608335495, |
| "kl": 0.005051136016845703, |
| "learning_rate": 4.436003660319224e-06, |
| "loss": 0.0061, |
| "step": 587 |
| }, |
| { |
| "clip_ratio": 0.0003042620955966413, |
| "epoch": 0.9331062711003555, |
| "grad_norm": 0.035505130887031555, |
| "kl": 0.005032539367675781, |
| "learning_rate": 4.432028582961472e-06, |
| "loss": 0.006, |
| "step": 588 |
| }, |
| { |
| "clip_ratio": 0.00032173160303727855, |
| "epoch": 0.9350183741149054, |
| "grad_norm": 0.03633759915828705, |
| "kl": 0.00509190559387207, |
| "learning_rate": 4.428041340819579e-06, |
| "loss": 0.006, |
| "step": 589 |
| }, |
| { |
| "clip_ratio": 0.00038377046530513326, |
| "epoch": 0.9369304771294553, |
| "grad_norm": 0.03761395812034607, |
| "kl": 0.005148649215698242, |
| "learning_rate": 4.424041958998732e-06, |
| "loss": 0.0059, |
| "step": 590 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 540.8948354721069, |
| "epoch": 0.9388425801440052, |
| "grad_norm": 0.04139011353254318, |
| "kl": 0.005031585693359375, |
| "learning_rate": 4.420030462680554e-06, |
| "loss": 0.007, |
| "num_tokens": 358409840.0, |
| "reward": 0.0714285749127157, |
| "reward_std": 0.07565246830927208, |
| "rewards/pure_accuracy_reward_math": 0.07142857275903225, |
| "step": 591 |
| }, |
| { |
| "clip_ratio": 0.0002982392526291733, |
| "epoch": 0.9407546831585551, |
| "grad_norm": 0.03948375955224037, |
| "kl": 0.005082845687866211, |
| "learning_rate": 4.416006877122948e-06, |
| "loss": 0.007, |
| "step": 592 |
| }, |
| { |
| "clip_ratio": 0.00033647330587882607, |
| "epoch": 0.9426667861731051, |
| "grad_norm": 0.041717879474163055, |
| "kl": 0.005113363265991211, |
| "learning_rate": 4.411971227659933e-06, |
| "loss": 0.0069, |
| "step": 593 |
| }, |
| { |
| "clip_ratio": 0.00036752876485479646, |
| "epoch": 0.944578889187655, |
| "grad_norm": 0.04109462723135948, |
| "kl": 0.005068063735961914, |
| "learning_rate": 4.407923539701486e-06, |
| "loss": 0.0069, |
| "step": 594 |
| }, |
| { |
| "clip_ratio": 0.0003528254699176614, |
| "epoch": 0.9464909922022049, |
| "grad_norm": 0.03620041161775589, |
| "kl": 0.0049245357513427734, |
| "learning_rate": 4.403863838733386e-06, |
| "loss": 0.0068, |
| "step": 595 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 545.2444491386414, |
| "epoch": 0.9484030952167548, |
| "grad_norm": 42.05046463012695, |
| "kl": 0.3311493396759033, |
| "learning_rate": 4.399792150317048e-06, |
| "loss": 0.0203, |
| "num_tokens": 362096328.0, |
| "reward": 0.06026786071015522, |
| "reward_std": 0.07324766798410565, |
| "rewards/pure_accuracy_reward_math": 0.06026785832364112, |
| "step": 596 |
| }, |
| { |
| "clip_ratio": 0.0003009684866128737, |
| "epoch": 0.9503151982313047, |
| "grad_norm": 0.575372040271759, |
| "kl": 0.01551508903503418, |
| "learning_rate": 4.395708500089366e-06, |
| "loss": 0.0076, |
| "step": 597 |
| }, |
| { |
| "clip_ratio": 0.0003299758830053179, |
| "epoch": 0.9522273012458546, |
| "grad_norm": 0.052088066935539246, |
| "kl": 0.01082468032836914, |
| "learning_rate": 4.391612913762549e-06, |
| "loss": 0.0074, |
| "step": 598 |
| }, |
| { |
| "clip_ratio": 0.00032988658261956516, |
| "epoch": 0.9541394042604046, |
| "grad_norm": 0.046673182398080826, |
| "kl": 0.011472225189208984, |
| "learning_rate": 4.38750541712396e-06, |
| "loss": 0.0074, |
| "step": 599 |
| }, |
| { |
| "clip_ratio": 0.00031585949000145774, |
| "epoch": 0.9560515072749545, |
| "grad_norm": 0.04350757598876953, |
| "kl": 0.011662006378173828, |
| "learning_rate": 4.383386036035956e-06, |
| "loss": 0.0074, |
| "step": 600 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.0309958457947, |
| "epoch": 0.9579636102895044, |
| "grad_norm": 0.04193362593650818, |
| "kl": 0.005011081695556641, |
| "learning_rate": 4.379254796435719e-06, |
| "loss": 0.0085, |
| "num_tokens": 365761119.0, |
| "reward": 0.06696428923169151, |
| "reward_std": 0.08311965479515493, |
| "rewards/pure_accuracy_reward_math": 0.06696428667055443, |
| "step": 601 |
| }, |
| { |
| "clip_ratio": 0.0003076634293392999, |
| "epoch": 0.9598757133040543, |
| "grad_norm": 0.04204736277461052, |
| "kl": 0.005095720291137695, |
| "learning_rate": 4.375111724335102e-06, |
| "loss": 0.0085, |
| "step": 602 |
| }, |
| { |
| "clip_ratio": 0.0002991189727481469, |
| "epoch": 0.9617878163186042, |
| "grad_norm": 0.041649866849184036, |
| "kl": 0.00509333610534668, |
| "learning_rate": 4.370956845820455e-06, |
| "loss": 0.0085, |
| "step": 603 |
| }, |
| { |
| "clip_ratio": 0.0003053998929090085, |
| "epoch": 0.963699919333154, |
| "grad_norm": 0.03969484567642212, |
| "kl": 0.005100727081298828, |
| "learning_rate": 4.366790187052468e-06, |
| "loss": 0.0084, |
| "step": 604 |
| }, |
| { |
| "clip_ratio": 0.0003063883330014505, |
| "epoch": 0.9656120223477039, |
| "grad_norm": 0.03833401948213577, |
| "kl": 0.005064487457275391, |
| "learning_rate": 4.362611774266005e-06, |
| "loss": 0.0083, |
| "step": 605 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.4046006202698, |
| "epoch": 0.9675241253622539, |
| "grad_norm": 0.038279399275779724, |
| "kl": 0.005177021026611328, |
| "learning_rate": 4.358421633769934e-06, |
| "loss": 0.0061, |
| "num_tokens": 369412689.0, |
| "reward": 0.07087053885334171, |
| "reward_std": 0.08299326128326356, |
| "rewards/pure_accuracy_reward_math": 0.0708705369324889, |
| "step": 606 |
| }, |
| { |
| "clip_ratio": 0.00030927538728064974, |
| "epoch": 0.9694362283768038, |
| "grad_norm": 0.037665851414203644, |
| "kl": 0.005164146423339844, |
| "learning_rate": 4.35421979194697e-06, |
| "loss": 0.0061, |
| "step": 607 |
| }, |
| { |
| "clip_ratio": 0.0003293242310178357, |
| "epoch": 0.9713483313913537, |
| "grad_norm": 0.036888375878334045, |
| "kl": 0.005212306976318359, |
| "learning_rate": 4.3500062752535e-06, |
| "loss": 0.006, |
| "step": 608 |
| }, |
| { |
| "clip_ratio": 0.0003369250752029984, |
| "epoch": 0.9732604344059036, |
| "grad_norm": 0.03607965633273125, |
| "kl": 0.005278587341308594, |
| "learning_rate": 4.3457811102194225e-06, |
| "loss": 0.006, |
| "step": 609 |
| }, |
| { |
| "clip_ratio": 0.00034393194414406025, |
| "epoch": 0.9751725374204535, |
| "grad_norm": 0.036863330751657486, |
| "kl": 0.005379676818847656, |
| "learning_rate": 4.341544323447978e-06, |
| "loss": 0.0059, |
| "step": 610 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.9905385971069, |
| "epoch": 0.9770846404350034, |
| "grad_norm": 0.03825363516807556, |
| "kl": 0.005227804183959961, |
| "learning_rate": 4.33729594161558e-06, |
| "loss": 0.0103, |
| "num_tokens": 373041503.0, |
| "reward": 0.07254464607103728, |
| "reward_std": 0.07848271250259131, |
| "rewards/pure_accuracy_reward_math": 0.07254464444122277, |
| "step": 611 |
| }, |
| { |
| "clip_ratio": 0.0002938344064205012, |
| "epoch": 0.9789967434495533, |
| "grad_norm": 0.037028077989816666, |
| "kl": 0.005240917205810547, |
| "learning_rate": 4.333035991471653e-06, |
| "loss": 0.0102, |
| "step": 612 |
| }, |
| { |
| "clip_ratio": 0.00029232190240691125, |
| "epoch": 0.9809088464641033, |
| "grad_norm": 0.03623189404606819, |
| "kl": 0.005187034606933594, |
| "learning_rate": 4.328764499838456e-06, |
| "loss": 0.0102, |
| "step": 613 |
| }, |
| { |
| "clip_ratio": 0.000318144969014611, |
| "epoch": 0.9828209494786532, |
| "grad_norm": 0.036878351122140884, |
| "kl": 0.005211830139160156, |
| "learning_rate": 4.324481493610919e-06, |
| "loss": 0.0101, |
| "step": 614 |
| }, |
| { |
| "clip_ratio": 0.0003371401809317831, |
| "epoch": 0.9847330524932031, |
| "grad_norm": 0.036278340965509415, |
| "kl": 0.0051462650299072266, |
| "learning_rate": 4.320186999756473e-06, |
| "loss": 0.0101, |
| "step": 615 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.4927659034729, |
| "epoch": 0.986645155507753, |
| "grad_norm": 0.037584077566862106, |
| "kl": 0.005333662033081055, |
| "learning_rate": 4.315881045314878e-06, |
| "loss": 0.007, |
| "num_tokens": 376615645.0, |
| "reward": 0.07087053899886087, |
| "reward_std": 0.07342032523592934, |
| "rewards/pure_accuracy_reward_math": 0.0708705370198004, |
| "step": 616 |
| }, |
| { |
| "clip_ratio": 0.0002886684330292155, |
| "epoch": 0.9885572585223029, |
| "grad_norm": 0.035872798413038254, |
| "kl": 0.005288362503051758, |
| "learning_rate": 4.311563657398056e-06, |
| "loss": 0.007, |
| "step": 617 |
| }, |
| { |
| "clip_ratio": 0.0002961605097766551, |
| "epoch": 0.9904693615368528, |
| "grad_norm": 0.034989748150110245, |
| "kl": 0.0052263736724853516, |
| "learning_rate": 4.307234863189917e-06, |
| "loss": 0.007, |
| "step": 618 |
| }, |
| { |
| "clip_ratio": 0.0003532402791392997, |
| "epoch": 0.9923814645514027, |
| "grad_norm": 0.0338488332927227, |
| "kl": 0.005165576934814453, |
| "learning_rate": 4.302894689946189e-06, |
| "loss": 0.0069, |
| "step": 619 |
| }, |
| { |
| "clip_ratio": 0.00035387994120128496, |
| "epoch": 0.9942935675659527, |
| "grad_norm": 0.03370453417301178, |
| "kl": 0.005126953125, |
| "learning_rate": 4.298543164994249e-06, |
| "loss": 0.0069, |
| "step": 620 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.433337688446, |
| "epoch": 1.00191210301455, |
| "grad_norm": 0.0355641208589077, |
| "kl": 0.004958152770996094, |
| "learning_rate": 4.294180315732946e-06, |
| "loss": 0.0063, |
| "num_tokens": 380233970.0, |
| "reward": 0.05412946696742438, |
| "reward_std": 0.06637858302565292, |
| "rewards/pure_accuracy_reward_math": 0.0541294657450635, |
| "step": 621 |
| }, |
| { |
| "clip_ratio": 0.0002793830541349962, |
| "epoch": 1.0038242060290998, |
| "grad_norm": 0.034697938710451126, |
| "kl": 0.004967689514160156, |
| "learning_rate": 4.289806169632434e-06, |
| "loss": 0.0063, |
| "step": 622 |
| }, |
| { |
| "clip_ratio": 0.00026950584020823953, |
| "epoch": 1.0057363090436497, |
| "grad_norm": 0.034267228096723557, |
| "kl": 0.005029439926147461, |
| "learning_rate": 4.285420754233992e-06, |
| "loss": 0.0062, |
| "step": 623 |
| }, |
| { |
| "clip_ratio": 0.0002694177366606709, |
| "epoch": 1.0076484120581997, |
| "grad_norm": 0.03245500102639198, |
| "kl": 0.005047798156738281, |
| "learning_rate": 4.2810240971498594e-06, |
| "loss": 0.0062, |
| "step": 624 |
| }, |
| { |
| "clip_ratio": 0.0002762260926942872, |
| "epoch": 1.0095605150727496, |
| "grad_norm": 0.03143523633480072, |
| "kl": 0.005035400390625, |
| "learning_rate": 4.276616226063055e-06, |
| "loss": 0.0061, |
| "step": 625 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.094889163971, |
| "epoch": 1.0114726180872995, |
| "grad_norm": 0.03780335932970047, |
| "kl": 0.005240440368652344, |
| "learning_rate": 4.272197168727204e-06, |
| "loss": 0.0082, |
| "num_tokens": 383858818.0, |
| "reward": 0.06891741388244554, |
| "reward_std": 0.07891435397323221, |
| "rewards/pure_accuracy_reward_math": 0.06891741207800806, |
| "step": 626 |
| }, |
| { |
| "clip_ratio": 0.0002971897219481434, |
| "epoch": 1.0133847211018494, |
| "grad_norm": 0.03676832467317581, |
| "kl": 0.005240440368652344, |
| "learning_rate": 4.267766952966369e-06, |
| "loss": 0.0082, |
| "step": 627 |
| }, |
| { |
| "clip_ratio": 0.00032256075144232454, |
| "epoch": 1.0152968241163993, |
| "grad_norm": 0.03722486272454262, |
| "kl": 0.005322933197021484, |
| "learning_rate": 4.263325606674865e-06, |
| "loss": 0.0082, |
| "step": 628 |
| }, |
| { |
| "clip_ratio": 0.00031109488622860226, |
| "epoch": 1.0172089271309492, |
| "grad_norm": 0.036808740347623825, |
| "kl": 0.0054111480712890625, |
| "learning_rate": 4.258873157817093e-06, |
| "loss": 0.0081, |
| "step": 629 |
| }, |
| { |
| "clip_ratio": 0.00032292150183366175, |
| "epoch": 1.0191210301454992, |
| "grad_norm": 0.03518703579902649, |
| "kl": 0.005442619323730469, |
| "learning_rate": 4.254409634427356e-06, |
| "loss": 0.008, |
| "step": 630 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 515.6958961486816, |
| "epoch": 1.021033133160049, |
| "grad_norm": 0.03399791195988655, |
| "kl": 0.005387306213378906, |
| "learning_rate": 4.249935064609692e-06, |
| "loss": 0.0031, |
| "num_tokens": 387438928.0, |
| "reward": 0.06250000285217538, |
| "reward_std": 0.06757478544022888, |
| "rewards/pure_accuracy_reward_math": 0.06250000145519152, |
| "step": 631 |
| }, |
| { |
| "clip_ratio": 0.0002553542814212051, |
| "epoch": 1.022945236174599, |
| "grad_norm": 0.03381386399269104, |
| "kl": 0.005375385284423828, |
| "learning_rate": 4.245449476537685e-06, |
| "loss": 0.0031, |
| "step": 632 |
| }, |
| { |
| "clip_ratio": 0.00023506408626872144, |
| "epoch": 1.024857339189149, |
| "grad_norm": 0.03337083011865616, |
| "kl": 0.00537109375, |
| "learning_rate": 4.2409528984543e-06, |
| "loss": 0.003, |
| "step": 633 |
| }, |
| { |
| "clip_ratio": 0.0002632986112871549, |
| "epoch": 1.0267694422036988, |
| "grad_norm": 0.03213095664978027, |
| "kl": 0.005321979522705078, |
| "learning_rate": 4.236445358671696e-06, |
| "loss": 0.003, |
| "step": 634 |
| }, |
| { |
| "clip_ratio": 0.00025607587781451, |
| "epoch": 1.0286815452182487, |
| "grad_norm": 0.03154142573475838, |
| "kl": 0.005255699157714844, |
| "learning_rate": 4.23192688557105e-06, |
| "loss": 0.0029, |
| "step": 635 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.272346496582, |
| "epoch": 1.0305936482327986, |
| "grad_norm": 0.039318569004535675, |
| "kl": 0.005155801773071289, |
| "learning_rate": 4.2273975076023835e-06, |
| "loss": 0.0075, |
| "num_tokens": 391053556.0, |
| "reward": 0.06473214598372579, |
| "reward_std": 0.07401842583203688, |
| "rewards/pure_accuracy_reward_math": 0.06473214412108064, |
| "step": 636 |
| }, |
| { |
| "clip_ratio": 0.0003024499371804268, |
| "epoch": 1.0325057512473486, |
| "grad_norm": 0.03726111724972725, |
| "kl": 0.0050776004791259766, |
| "learning_rate": 4.222857253284376e-06, |
| "loss": 0.0075, |
| "step": 637 |
| }, |
| { |
| "clip_ratio": 0.0003151753968495541, |
| "epoch": 1.0344178542618985, |
| "grad_norm": 0.03595959022641182, |
| "kl": 0.005060434341430664, |
| "learning_rate": 4.218306151204188e-06, |
| "loss": 0.0074, |
| "step": 638 |
| }, |
| { |
| "clip_ratio": 0.0003387899199083222, |
| "epoch": 1.0363299572764482, |
| "grad_norm": 0.03628028184175491, |
| "kl": 0.005034923553466797, |
| "learning_rate": 4.213744230017283e-06, |
| "loss": 0.0074, |
| "step": 639 |
| }, |
| { |
| "clip_ratio": 0.00037899152403042535, |
| "epoch": 1.038242060290998, |
| "grad_norm": 0.03670131787657738, |
| "kl": 0.005095720291137695, |
| "learning_rate": 4.209171518447248e-06, |
| "loss": 0.0073, |
| "step": 640 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.5907049179077, |
| "epoch": 1.040154163305548, |
| "grad_norm": 0.03938442841172218, |
| "kl": 0.0051763057708740234, |
| "learning_rate": 4.204588045285607e-06, |
| "loss": 0.0022, |
| "num_tokens": 394708581.0, |
| "reward": 0.06333705710130744, |
| "reward_std": 0.07792467664694414, |
| "rewards/pure_accuracy_reward_math": 0.06333705500583164, |
| "step": 641 |
| }, |
| { |
| "clip_ratio": 0.0002767174905216052, |
| "epoch": 1.042066266320098, |
| "grad_norm": 0.037835828959941864, |
| "kl": 0.005267143249511719, |
| "learning_rate": 4.1999938393916424e-06, |
| "loss": 0.0022, |
| "step": 642 |
| }, |
| { |
| "clip_ratio": 0.0003277845591469486, |
| "epoch": 1.0439783693346478, |
| "grad_norm": 0.03832162916660309, |
| "kl": 0.005464792251586914, |
| "learning_rate": 4.195388929692217e-06, |
| "loss": 0.0022, |
| "step": 643 |
| }, |
| { |
| "clip_ratio": 0.00035426640954483446, |
| "epoch": 1.0458904723491977, |
| "grad_norm": 0.03823033347725868, |
| "kl": 0.005482673645019531, |
| "learning_rate": 4.190773345181587e-06, |
| "loss": 0.0021, |
| "step": 644 |
| }, |
| { |
| "clip_ratio": 0.0003763593267649412, |
| "epoch": 1.0478025753637477, |
| "grad_norm": 0.036984797567129135, |
| "kl": 0.005467653274536133, |
| "learning_rate": 4.186147114921221e-06, |
| "loss": 0.002, |
| "step": 645 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.9266424179077, |
| "epoch": 1.0497146783782976, |
| "grad_norm": 0.0355878509581089, |
| "kl": 0.005333423614501953, |
| "learning_rate": 4.18151026803962e-06, |
| "loss": 0.0056, |
| "num_tokens": 398334618.0, |
| "reward": 0.06305803850409575, |
| "reward_std": 0.06942774693015963, |
| "rewards/pure_accuracy_reward_math": 0.06305803699069656, |
| "step": 646 |
| }, |
| { |
| "clip_ratio": 0.00024814905674475085, |
| "epoch": 1.0516267813928475, |
| "grad_norm": 0.034741513431072235, |
| "kl": 0.005269289016723633, |
| "learning_rate": 4.176862833732127e-06, |
| "loss": 0.0056, |
| "step": 647 |
| }, |
| { |
| "clip_ratio": 0.00027503305113896204, |
| "epoch": 1.0535388844073974, |
| "grad_norm": 0.03375249356031418, |
| "kl": 0.005173683166503906, |
| "learning_rate": 4.1722048412607495e-06, |
| "loss": 0.0055, |
| "step": 648 |
| }, |
| { |
| "clip_ratio": 0.0002895867207826086, |
| "epoch": 1.0554509874219473, |
| "grad_norm": 0.0341072678565979, |
| "kl": 0.005132198333740234, |
| "learning_rate": 4.167536319953976e-06, |
| "loss": 0.0055, |
| "step": 649 |
| }, |
| { |
| "clip_ratio": 0.0003005371929134526, |
| "epoch": 1.0573630904364972, |
| "grad_norm": 0.033096957951784134, |
| "kl": 0.005170345306396484, |
| "learning_rate": 4.162857299206584e-06, |
| "loss": 0.0054, |
| "step": 650 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.7528138160706, |
| "epoch": 1.0592751934510471, |
| "grad_norm": 0.03696604445576668, |
| "kl": 0.0052814483642578125, |
| "learning_rate": 4.158167808479461e-06, |
| "loss": 0.0097, |
| "num_tokens": 401997276.0, |
| "reward": 0.05943080657743849, |
| "reward_std": 0.07388583471765742, |
| "rewards/pure_accuracy_reward_math": 0.05943080494762398, |
| "step": 651 |
| }, |
| { |
| "clip_ratio": 0.00029416859939601636, |
| "epoch": 1.061187296465597, |
| "grad_norm": 0.03565770015120506, |
| "kl": 0.005290031433105469, |
| "learning_rate": 4.153467877299419e-06, |
| "loss": 0.0097, |
| "step": 652 |
| }, |
| { |
| "clip_ratio": 0.00029473524284640007, |
| "epoch": 1.063099399480147, |
| "grad_norm": 0.03546367585659027, |
| "kl": 0.005368709564208984, |
| "learning_rate": 4.148757535259004e-06, |
| "loss": 0.0096, |
| "step": 653 |
| }, |
| { |
| "clip_ratio": 0.00032781071104182047, |
| "epoch": 1.065011502494697, |
| "grad_norm": 0.03601039946079254, |
| "kl": 0.005382061004638672, |
| "learning_rate": 4.144036812016317e-06, |
| "loss": 0.0096, |
| "step": 654 |
| }, |
| { |
| "clip_ratio": 0.0003433626044397897, |
| "epoch": 1.0669236055092468, |
| "grad_norm": 0.035073794424533844, |
| "kl": 0.0053446292877197266, |
| "learning_rate": 4.139305737294818e-06, |
| "loss": 0.0095, |
| "step": 655 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.1163725852966, |
| "epoch": 1.0688357085237967, |
| "grad_norm": 0.03852629289031029, |
| "kl": 0.005383491516113281, |
| "learning_rate": 4.134564340883148e-06, |
| "loss": 0.0083, |
| "num_tokens": 405593985.0, |
| "reward": 0.06445312793948688, |
| "reward_std": 0.07135464163729921, |
| "rewards/pure_accuracy_reward_math": 0.06445312654250301, |
| "step": 656 |
| }, |
| { |
| "clip_ratio": 0.0002591365355897324, |
| "epoch": 1.0707478115383466, |
| "grad_norm": 0.03745557367801666, |
| "kl": 0.0053327083587646484, |
| "learning_rate": 4.129812652634936e-06, |
| "loss": 0.0083, |
| "step": 657 |
| }, |
| { |
| "clip_ratio": 0.0003071958567772981, |
| "epoch": 1.0726599145528966, |
| "grad_norm": 0.037043727934360504, |
| "kl": 0.00532078742980957, |
| "learning_rate": 4.1250507024686115e-06, |
| "loss": 0.0083, |
| "step": 658 |
| }, |
| { |
| "clip_ratio": 0.00029935286954696494, |
| "epoch": 1.0745720175674465, |
| "grad_norm": 0.03582773730158806, |
| "kl": 0.005355358123779297, |
| "learning_rate": 4.120278520367217e-06, |
| "loss": 0.0082, |
| "step": 659 |
| }, |
| { |
| "clip_ratio": 0.0003111159166451216, |
| "epoch": 1.0764841205819964, |
| "grad_norm": 0.035313159227371216, |
| "kl": 0.005402326583862305, |
| "learning_rate": 4.115496136378219e-06, |
| "loss": 0.0081, |
| "step": 660 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.2994108200073, |
| "epoch": 1.0783962235965463, |
| "grad_norm": 0.041104141622781754, |
| "kl": 0.005465507507324219, |
| "learning_rate": 4.110703580613321e-06, |
| "loss": 0.0074, |
| "num_tokens": 409156330.0, |
| "reward": 0.0641741098370403, |
| "reward_std": 0.08329231233801693, |
| "rewards/pure_accuracy_reward_math": 0.06417410826543346, |
| "step": 661 |
| }, |
| { |
| "clip_ratio": 0.0003218170786567498, |
| "epoch": 1.0803083266110962, |
| "grad_norm": 0.03970121592283249, |
| "kl": 0.005608558654785156, |
| "learning_rate": 4.105900883248269e-06, |
| "loss": 0.0074, |
| "step": 662 |
| }, |
| { |
| "clip_ratio": 0.00032362689415776913, |
| "epoch": 1.0822204296256461, |
| "grad_norm": 0.039676353335380554, |
| "kl": 0.005734920501708984, |
| "learning_rate": 4.101088074522667e-06, |
| "loss": 0.0074, |
| "step": 663 |
| }, |
| { |
| "clip_ratio": 0.000323468098201829, |
| "epoch": 1.084132532640196, |
| "grad_norm": 0.03883183002471924, |
| "kl": 0.005713939666748047, |
| "learning_rate": 4.096265184739781e-06, |
| "loss": 0.0073, |
| "step": 664 |
| }, |
| { |
| "clip_ratio": 0.00033196881122421473, |
| "epoch": 1.086044635654746, |
| "grad_norm": 0.037281692028045654, |
| "kl": 0.0056934356689453125, |
| "learning_rate": 4.091432244266354e-06, |
| "loss": 0.0072, |
| "step": 665 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.48774766922, |
| "epoch": 1.0879567386692959, |
| "grad_norm": 0.037982553243637085, |
| "kl": 0.005854606628417969, |
| "learning_rate": 4.08658928353241e-06, |
| "loss": 0.0086, |
| "num_tokens": 412758914.0, |
| "reward": 0.06835937799769454, |
| "reward_std": 0.07526708859950304, |
| "rewards/pure_accuracy_reward_math": 0.06835937630967237, |
| "step": 666 |
| }, |
| { |
| "clip_ratio": 0.0002976899445457093, |
| "epoch": 1.0898688416838458, |
| "grad_norm": 0.03663322329521179, |
| "kl": 0.005788326263427734, |
| "learning_rate": 4.081736333031066e-06, |
| "loss": 0.0086, |
| "step": 667 |
| }, |
| { |
| "clip_ratio": 0.0002965517393818118, |
| "epoch": 1.0917809446983957, |
| "grad_norm": 0.03593512997031212, |
| "kl": 0.005764484405517578, |
| "learning_rate": 4.0768734233183376e-06, |
| "loss": 0.0085, |
| "step": 668 |
| }, |
| { |
| "clip_ratio": 0.0003466513953753747, |
| "epoch": 1.0936930477129456, |
| "grad_norm": 0.03643948212265968, |
| "kl": 0.005777835845947266, |
| "learning_rate": 4.072000585012947e-06, |
| "loss": 0.0085, |
| "step": 669 |
| }, |
| { |
| "clip_ratio": 0.00037185640462666925, |
| "epoch": 1.0956051507274955, |
| "grad_norm": 0.03601692244410515, |
| "kl": 0.0058193206787109375, |
| "learning_rate": 4.06711784879613e-06, |
| "loss": 0.0084, |
| "step": 670 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.0530390739441, |
| "epoch": 1.0975172537420455, |
| "grad_norm": 0.03892623260617256, |
| "kl": 0.005596637725830078, |
| "learning_rate": 4.062225245411444e-06, |
| "loss": 0.007, |
| "num_tokens": 416383588.0, |
| "reward": 0.061104913387680426, |
| "reward_std": 0.07539348275167868, |
| "rewards/pure_accuracy_reward_math": 0.06110491187428124, |
| "step": 671 |
| }, |
| { |
| "clip_ratio": 0.0003017952032280391, |
| "epoch": 1.0994293567565951, |
| "grad_norm": 0.0375184491276741, |
| "kl": 0.0056912899017333984, |
| "learning_rate": 4.057322805664576e-06, |
| "loss": 0.007, |
| "step": 672 |
| }, |
| { |
| "clip_ratio": 0.0002928147856096075, |
| "epoch": 1.1013414597711453, |
| "grad_norm": 0.03731007128953934, |
| "kl": 0.0057830810546875, |
| "learning_rate": 4.0524105604231435e-06, |
| "loss": 0.0069, |
| "step": 673 |
| }, |
| { |
| "clip_ratio": 0.000317500726794151, |
| "epoch": 1.103253562785695, |
| "grad_norm": 0.03885798528790474, |
| "kl": 0.005819559097290039, |
| "learning_rate": 4.047488540616503e-06, |
| "loss": 0.0069, |
| "step": 674 |
| }, |
| { |
| "clip_ratio": 0.0003141532706649741, |
| "epoch": 1.105165665800245, |
| "grad_norm": 0.03583172708749771, |
| "kl": 0.005753278732299805, |
| "learning_rate": 4.042556777235558e-06, |
| "loss": 0.0068, |
| "step": 675 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.9950060844421, |
| "epoch": 1.1070777688147948, |
| "grad_norm": 0.03652811422944069, |
| "kl": 0.005724668502807617, |
| "learning_rate": 4.037615301332559e-06, |
| "loss": 0.0088, |
| "num_tokens": 419993906.0, |
| "reward": 0.061383931315504014, |
| "reward_std": 0.07067021139664575, |
| "rewards/pure_accuracy_reward_math": 0.06138392974389717, |
| "step": 676 |
| }, |
| { |
| "clip_ratio": 0.00028260578790195723, |
| "epoch": 1.1089898718293447, |
| "grad_norm": 0.035632383078336716, |
| "kl": 0.0056421756744384766, |
| "learning_rate": 4.0326641440209114e-06, |
| "loss": 0.0088, |
| "step": 677 |
| }, |
| { |
| "clip_ratio": 0.0002882395116614589, |
| "epoch": 1.1109019748438946, |
| "grad_norm": 0.03453977406024933, |
| "kl": 0.005593061447143555, |
| "learning_rate": 4.027703336474979e-06, |
| "loss": 0.0087, |
| "step": 678 |
| }, |
| { |
| "clip_ratio": 0.000319835560901538, |
| "epoch": 1.1128140778584446, |
| "grad_norm": 0.03415689244866371, |
| "kl": 0.005594968795776367, |
| "learning_rate": 4.022732909929883e-06, |
| "loss": 0.0087, |
| "step": 679 |
| }, |
| { |
| "clip_ratio": 0.00033849146848297096, |
| "epoch": 1.1147261808729945, |
| "grad_norm": 0.03406994044780731, |
| "kl": 0.005631208419799805, |
| "learning_rate": 4.017752895681315e-06, |
| "loss": 0.0086, |
| "step": 680 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 521.6057720184326, |
| "epoch": 1.1166382838875444, |
| "grad_norm": 0.06026715040206909, |
| "kl": 0.005751848220825195, |
| "learning_rate": 4.012763325085332e-06, |
| "loss": 0.0067, |
| "num_tokens": 423598941.0, |
| "reward": 0.07198661082657054, |
| "reward_std": 0.08763020345941186, |
| "rewards/pure_accuracy_reward_math": 0.07198660844005644, |
| "step": 681 |
| }, |
| { |
| "clip_ratio": 0.00031779767027728667, |
| "epoch": 1.1185503869020943, |
| "grad_norm": 2.6160011291503906, |
| "kl": 0.005651235580444336, |
| "learning_rate": 4.0077642295581605e-06, |
| "loss": 0.007, |
| "step": 682 |
| }, |
| { |
| "clip_ratio": 0.00035409004277653366, |
| "epoch": 1.1204624899166442, |
| "grad_norm": 6.490725994110107, |
| "kl": 0.04636049270629883, |
| "learning_rate": 4.002755640576002e-06, |
| "loss": 0.0083, |
| "step": 683 |
| }, |
| { |
| "clip_ratio": 0.000386831109835839, |
| "epoch": 1.1223745929311941, |
| "grad_norm": 0.13183599710464478, |
| "kl": 0.0063648223876953125, |
| "learning_rate": 3.997737589674828e-06, |
| "loss": 0.0067, |
| "step": 684 |
| }, |
| { |
| "clip_ratio": 0.00042002629169246575, |
| "epoch": 1.124286695945744, |
| "grad_norm": 61.113468170166016, |
| "kl": 0.00571751594543457, |
| "learning_rate": 3.992710108450192e-06, |
| "loss": 0.0205, |
| "step": 685 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.679431438446, |
| "epoch": 1.126198798960294, |
| "grad_norm": 0.0341753326356411, |
| "kl": 0.006865501403808594, |
| "learning_rate": 3.987673228557017e-06, |
| "loss": 0.0032, |
| "num_tokens": 427249916.0, |
| "reward": 0.056919645285233855, |
| "reward_std": 0.06538890511728823, |
| "rewards/pure_accuracy_reward_math": 0.05691964429570362, |
| "step": 686 |
| }, |
| { |
| "clip_ratio": 0.00022898520234093667, |
| "epoch": 1.1281109019748439, |
| "grad_norm": 0.03356679156422615, |
| "kl": 0.006783246994018555, |
| "learning_rate": 3.982626981709412e-06, |
| "loss": 0.0032, |
| "step": 687 |
| }, |
| { |
| "clip_ratio": 0.00023695471924156664, |
| "epoch": 1.1300230049893938, |
| "grad_norm": 0.03283276781439781, |
| "kl": 0.006662845611572266, |
| "learning_rate": 3.977571399680457e-06, |
| "loss": 0.0031, |
| "step": 688 |
| }, |
| { |
| "clip_ratio": 0.000234549945901108, |
| "epoch": 1.1319351080039437, |
| "grad_norm": 0.032041046768426895, |
| "kl": 0.00657343864440918, |
| "learning_rate": 3.972506514302013e-06, |
| "loss": 0.0031, |
| "step": 689 |
| }, |
| { |
| "clip_ratio": 0.00026119674055280484, |
| "epoch": 1.1338472110184936, |
| "grad_norm": 0.03098335862159729, |
| "kl": 0.006501674652099609, |
| "learning_rate": 3.967432357464518e-06, |
| "loss": 0.003, |
| "step": 690 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 533.4330596923828, |
| "epoch": 1.1357593140330435, |
| "grad_norm": 0.03648236393928528, |
| "kl": 0.005389690399169922, |
| "learning_rate": 3.962348961116786e-06, |
| "loss": 0.0075, |
| "num_tokens": 430894100.0, |
| "reward": 0.059151788300368935, |
| "reward_std": 0.06680402747588232, |
| "rewards/pure_accuracy_reward_math": 0.059151787078008056, |
| "step": 691 |
| }, |
| { |
| "clip_ratio": 0.00024069582485708452, |
| "epoch": 1.1376714170475934, |
| "grad_norm": 0.03502041473984718, |
| "kl": 0.005405902862548828, |
| "learning_rate": 3.957256357265806e-06, |
| "loss": 0.0075, |
| "step": 692 |
| }, |
| { |
| "clip_ratio": 0.00026108162376203836, |
| "epoch": 1.1395835200621434, |
| "grad_norm": 0.03438780456781387, |
| "kl": 0.0054416656494140625, |
| "learning_rate": 3.952154577976543e-06, |
| "loss": 0.0075, |
| "step": 693 |
| }, |
| { |
| "clip_ratio": 0.0002536772994972125, |
| "epoch": 1.1414956230766933, |
| "grad_norm": 0.03388332575559616, |
| "kl": 0.005480289459228516, |
| "learning_rate": 3.947043655371734e-06, |
| "loss": 0.0075, |
| "step": 694 |
| }, |
| { |
| "clip_ratio": 0.00027197748300977764, |
| "epoch": 1.1434077260912432, |
| "grad_norm": 0.03378571942448616, |
| "kl": 0.005473136901855469, |
| "learning_rate": 3.941923621631683e-06, |
| "loss": 0.0074, |
| "step": 695 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.0050506591797, |
| "epoch": 1.145319829105793, |
| "grad_norm": 0.040138646960258484, |
| "kl": 0.005397796630859375, |
| "learning_rate": 3.936794508994062e-06, |
| "loss": 0.0033, |
| "num_tokens": 434502306.0, |
| "reward": 0.07142857456346974, |
| "reward_std": 0.08093377470504493, |
| "rewards/pure_accuracy_reward_math": 0.07142857316648588, |
| "step": 696 |
| }, |
| { |
| "clip_ratio": 0.00026038982610998573, |
| "epoch": 1.147231932120343, |
| "grad_norm": 0.03855022042989731, |
| "kl": 0.005437135696411133, |
| "learning_rate": 3.931656349753709e-06, |
| "loss": 0.0033, |
| "step": 697 |
| }, |
| { |
| "clip_ratio": 0.0002577857798655714, |
| "epoch": 1.149144035134893, |
| "grad_norm": 0.03805391117930412, |
| "kl": 0.005386829376220703, |
| "learning_rate": 3.9265091762624225e-06, |
| "loss": 0.0032, |
| "step": 698 |
| }, |
| { |
| "clip_ratio": 0.0002938498616913421, |
| "epoch": 1.1510561381494429, |
| "grad_norm": 0.03830750659108162, |
| "kl": 0.005461931228637695, |
| "learning_rate": 3.921353020928756e-06, |
| "loss": 0.0032, |
| "step": 699 |
| }, |
| { |
| "clip_ratio": 0.00026367085320089245, |
| "epoch": 1.1529682411639928, |
| "grad_norm": 0.03759397566318512, |
| "kl": 0.0055010318756103516, |
| "learning_rate": 3.916187916217818e-06, |
| "loss": 0.0031, |
| "step": 700 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.7466740608215, |
| "epoch": 1.1548803441785427, |
| "grad_norm": 0.03618447855114937, |
| "kl": 0.0054166316986083984, |
| "learning_rate": 3.911013894651067e-06, |
| "loss": 0.0066, |
| "num_tokens": 438144462.0, |
| "reward": 0.06501116344588809, |
| "reward_std": 0.07457646209513769, |
| "rewards/pure_accuracy_reward_math": 0.06501116175786592, |
| "step": 701 |
| }, |
| { |
| "clip_ratio": 0.00028753443712048465, |
| "epoch": 1.1567924471930926, |
| "grad_norm": 0.035918354988098145, |
| "kl": 0.005413532257080078, |
| "learning_rate": 3.905830988806101e-06, |
| "loss": 0.0066, |
| "step": 702 |
| }, |
| { |
| "clip_ratio": 0.0002842856440565811, |
| "epoch": 1.1587045502076425, |
| "grad_norm": 0.03422370180487633, |
| "kl": 0.005442619323730469, |
| "learning_rate": 3.90063923131646e-06, |
| "loss": 0.0066, |
| "step": 703 |
| }, |
| { |
| "clip_ratio": 0.0002819241568090547, |
| "epoch": 1.1606166532221924, |
| "grad_norm": 0.03359530121088028, |
| "kl": 0.00537109375, |
| "learning_rate": 3.895438654871416e-06, |
| "loss": 0.0065, |
| "step": 704 |
| }, |
| { |
| "clip_ratio": 0.0003241457142166837, |
| "epoch": 1.1625287562367423, |
| "grad_norm": 0.033465541899204254, |
| "kl": 0.0053484439849853516, |
| "learning_rate": 3.890229292215773e-06, |
| "loss": 0.0065, |
| "step": 705 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.7639741897583, |
| "epoch": 1.1644408592512923, |
| "grad_norm": 0.03731166943907738, |
| "kl": 0.00535893440246582, |
| "learning_rate": 3.885011176149647e-06, |
| "loss": 0.0071, |
| "num_tokens": 441760876.0, |
| "reward": 0.06612723506987095, |
| "reward_std": 0.06822534691309556, |
| "rewards/pure_accuracy_reward_math": 0.06612723367288709, |
| "step": 706 |
| }, |
| { |
| "clip_ratio": 0.00025104734473302415, |
| "epoch": 1.166352962265842, |
| "grad_norm": 0.03429851680994034, |
| "kl": 0.005263566970825195, |
| "learning_rate": 3.879784339528277e-06, |
| "loss": 0.0071, |
| "step": 707 |
| }, |
| { |
| "clip_ratio": 0.0002501190919019791, |
| "epoch": 1.168265065280392, |
| "grad_norm": 0.034958597272634506, |
| "kl": 0.0052831172943115234, |
| "learning_rate": 3.874548815261809e-06, |
| "loss": 0.0071, |
| "step": 708 |
| }, |
| { |
| "clip_ratio": 0.0002633173795629773, |
| "epoch": 1.1701771682949418, |
| "grad_norm": 0.032111622393131256, |
| "kl": 0.005318403244018555, |
| "learning_rate": 3.869304636315085e-06, |
| "loss": 0.007, |
| "step": 709 |
| }, |
| { |
| "clip_ratio": 0.00028521847832507774, |
| "epoch": 1.172089271309492, |
| "grad_norm": 0.03191748261451721, |
| "kl": 0.005407810211181641, |
| "learning_rate": 3.864051835707444e-06, |
| "loss": 0.007, |
| "step": 710 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.3457269668579, |
| "epoch": 1.1740013743240416, |
| "grad_norm": 0.05126773193478584, |
| "kl": 0.01187896728515625, |
| "learning_rate": 3.85879044651251e-06, |
| "loss": 0.0066, |
| "num_tokens": 445370959.0, |
| "reward": 0.06863839653669856, |
| "reward_std": 0.07951865292852744, |
| "rewards/pure_accuracy_reward_math": 0.06863839438301511, |
| "step": 711 |
| }, |
| { |
| "clip_ratio": 0.00028669004558423694, |
| "epoch": 1.1759134773385915, |
| "grad_norm": 0.051731474697589874, |
| "kl": 0.011458396911621094, |
| "learning_rate": 3.853520501857981e-06, |
| "loss": 0.0066, |
| "step": 712 |
| }, |
| { |
| "clip_ratio": 0.0003143258599038745, |
| "epoch": 1.1778255803531414, |
| "grad_norm": 0.051190439611673355, |
| "kl": 0.010621786117553711, |
| "learning_rate": 3.848242034925429e-06, |
| "loss": 0.0065, |
| "step": 713 |
| }, |
| { |
| "clip_ratio": 0.00033165596249773444, |
| "epoch": 1.1797376833676914, |
| "grad_norm": 0.04840007424354553, |
| "kl": 0.009693622589111328, |
| "learning_rate": 3.842955078950079e-06, |
| "loss": 0.0064, |
| "step": 714 |
| }, |
| { |
| "clip_ratio": 0.00035113433239075675, |
| "epoch": 1.1816497863822413, |
| "grad_norm": 0.048264067620038986, |
| "kl": 0.008889198303222656, |
| "learning_rate": 3.837659667220612e-06, |
| "loss": 0.0063, |
| "step": 715 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 547.5633645057678, |
| "epoch": 1.1835618893967912, |
| "grad_norm": 0.03458649665117264, |
| "kl": 0.005284786224365234, |
| "learning_rate": 3.832355833078945e-06, |
| "loss": 0.0047, |
| "num_tokens": 449069046.0, |
| "reward": 0.05691964572179131, |
| "reward_std": 0.06861072586616501, |
| "rewards/pure_accuracy_reward_math": 0.05691964415018447, |
| "step": 716 |
| }, |
| { |
| "clip_ratio": 0.0002876185501463624, |
| "epoch": 1.185473992411341, |
| "grad_norm": 0.033646877855062485, |
| "kl": 0.005215167999267578, |
| "learning_rate": 3.82704360992003e-06, |
| "loss": 0.0047, |
| "step": 717 |
| }, |
| { |
| "clip_ratio": 0.0003252235952686533, |
| "epoch": 1.187386095425891, |
| "grad_norm": 0.03455204889178276, |
| "kl": 0.0051419734954833984, |
| "learning_rate": 3.8217230311916365e-06, |
| "loss": 0.0046, |
| "step": 718 |
| }, |
| { |
| "clip_ratio": 0.0003351885409870192, |
| "epoch": 1.189298198440441, |
| "grad_norm": 0.033362697809934616, |
| "kl": 0.0050907135009765625, |
| "learning_rate": 3.816394130394142e-06, |
| "loss": 0.0046, |
| "step": 719 |
| }, |
| { |
| "clip_ratio": 0.00032723310141591355, |
| "epoch": 1.1912103014549908, |
| "grad_norm": 0.03211547061800957, |
| "kl": 0.0051004886627197266, |
| "learning_rate": 3.811056941080329e-06, |
| "loss": 0.0045, |
| "step": 720 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 537.3167090415955, |
| "epoch": 1.1931224044695408, |
| "grad_norm": 0.03566175699234009, |
| "kl": 0.0053424835205078125, |
| "learning_rate": 3.805711496855161e-06, |
| "loss": 0.009, |
| "num_tokens": 452726381.0, |
| "reward": 0.06054687776486389, |
| "reward_std": 0.07264336961088702, |
| "rewards/pure_accuracy_reward_math": 0.06054687677533366, |
| "step": 721 |
| }, |
| { |
| "clip_ratio": 0.00029346574888222676, |
| "epoch": 1.1950345074840907, |
| "grad_norm": 0.03476826474070549, |
| "kl": 0.005379438400268555, |
| "learning_rate": 3.800357831375583e-06, |
| "loss": 0.009, |
| "step": 722 |
| }, |
| { |
| "clip_ratio": 0.00027920183202923, |
| "epoch": 1.1969466104986406, |
| "grad_norm": 0.03446114435791969, |
| "kl": 0.005425691604614258, |
| "learning_rate": 3.794995978350301e-06, |
| "loss": 0.009, |
| "step": 723 |
| }, |
| { |
| "clip_ratio": 0.00031396149876172785, |
| "epoch": 1.1988587135131905, |
| "grad_norm": 0.0340140238404274, |
| "kl": 0.005489826202392578, |
| "learning_rate": 3.7896259715395727e-06, |
| "loss": 0.0089, |
| "step": 724 |
| }, |
| { |
| "clip_ratio": 0.0002986833567888425, |
| "epoch": 1.2007708165277404, |
| "grad_norm": 0.03497212752699852, |
| "kl": 0.005522489547729492, |
| "learning_rate": 3.784247844754997e-06, |
| "loss": 0.0088, |
| "step": 725 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 548.8044338226318, |
| "epoch": 1.2026829195422903, |
| "grad_norm": 0.04050953686237335, |
| "kl": 0.005362510681152344, |
| "learning_rate": 3.778861631859298e-06, |
| "loss": 0.0112, |
| "num_tokens": 456433388.0, |
| "reward": 0.06696428879513405, |
| "reward_std": 0.08140548242954537, |
| "rewards/pure_accuracy_reward_math": 0.06696428728173487, |
| "step": 726 |
| }, |
| { |
| "clip_ratio": 0.0003468562302373357, |
| "epoch": 1.2045950225568403, |
| "grad_norm": 0.03805195167660713, |
| "kl": 0.005377531051635742, |
| "learning_rate": 3.7734673667661133e-06, |
| "loss": 0.0112, |
| "step": 727 |
| }, |
| { |
| "clip_ratio": 0.00037477223943938043, |
| "epoch": 1.2065071255713902, |
| "grad_norm": 0.03666882589459419, |
| "kl": 0.005417585372924805, |
| "learning_rate": 3.7680650834397804e-06, |
| "loss": 0.0112, |
| "step": 728 |
| }, |
| { |
| "clip_ratio": 0.0003945930936311015, |
| "epoch": 1.20841922858594, |
| "grad_norm": 0.03651399165391922, |
| "kl": 0.005425453186035156, |
| "learning_rate": 3.762654815895122e-06, |
| "loss": 0.0111, |
| "step": 729 |
| }, |
| { |
| "clip_ratio": 0.0004650242010484362, |
| "epoch": 1.21033133160049, |
| "grad_norm": 0.03792130947113037, |
| "kl": 0.005422115325927734, |
| "learning_rate": 3.7572365981972335e-06, |
| "loss": 0.0111, |
| "step": 730 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.6861305236816, |
| "epoch": 1.21224343461504, |
| "grad_norm": 0.0365571565926075, |
| "kl": 0.005487203598022461, |
| "learning_rate": 3.7518104644612663e-06, |
| "loss": 0.0098, |
| "num_tokens": 460061367.0, |
| "reward": 0.06417411062284373, |
| "reward_std": 0.07478918455308303, |
| "rewards/pure_accuracy_reward_math": 0.06417410852736793, |
| "step": 731 |
| }, |
| { |
| "clip_ratio": 0.0002798708824229834, |
| "epoch": 1.2141555376295898, |
| "grad_norm": 0.036456115543842316, |
| "kl": 0.005484342575073242, |
| "learning_rate": 3.746376448852216e-06, |
| "loss": 0.0098, |
| "step": 732 |
| }, |
| { |
| "clip_ratio": 0.0003001830394850913, |
| "epoch": 1.2160676406441397, |
| "grad_norm": 0.036120470613241196, |
| "kl": 0.005544900894165039, |
| "learning_rate": 3.740934585584702e-06, |
| "loss": 0.0098, |
| "step": 733 |
| }, |
| { |
| "clip_ratio": 0.00028155883609315424, |
| "epoch": 1.2179797436586897, |
| "grad_norm": 0.03475060313940048, |
| "kl": 0.005614042282104492, |
| "learning_rate": 3.735484908922759e-06, |
| "loss": 0.0097, |
| "step": 734 |
| }, |
| { |
| "clip_ratio": 0.00027523975251142474, |
| "epoch": 1.2198918466732396, |
| "grad_norm": 0.03388671204447746, |
| "kl": 0.005706310272216797, |
| "learning_rate": 3.730027453179617e-06, |
| "loss": 0.0096, |
| "step": 735 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.6091203689575, |
| "epoch": 1.2218039496877895, |
| "grad_norm": 0.039098870009183884, |
| "kl": 0.005930900573730469, |
| "learning_rate": 3.7245622527174858e-06, |
| "loss": 0.0072, |
| "num_tokens": 463651718.0, |
| "reward": 0.06277902098372579, |
| "reward_std": 0.06552149687195197, |
| "rewards/pure_accuracy_reward_math": 0.06277901912108064, |
| "step": 736 |
| }, |
| { |
| "clip_ratio": 0.000267848483247235, |
| "epoch": 1.2237160527023394, |
| "grad_norm": 0.03896670043468475, |
| "kl": 0.005952358245849609, |
| "learning_rate": 3.719089341947337e-06, |
| "loss": 0.0072, |
| "step": 737 |
| }, |
| { |
| "clip_ratio": 0.00026333254504606884, |
| "epoch": 1.2256281557168893, |
| "grad_norm": 0.03838280960917473, |
| "kl": 0.005873680114746094, |
| "learning_rate": 3.7136087553286916e-06, |
| "loss": 0.0072, |
| "step": 738 |
| }, |
| { |
| "clip_ratio": 0.0002850479507969794, |
| "epoch": 1.2275402587314392, |
| "grad_norm": 0.03708336502313614, |
| "kl": 0.005741596221923828, |
| "learning_rate": 3.7081205273694005e-06, |
| "loss": 0.0071, |
| "step": 739 |
| }, |
| { |
| "clip_ratio": 0.00030947004142944934, |
| "epoch": 1.2294523617459892, |
| "grad_norm": 0.03616032376885414, |
| "kl": 0.005689144134521484, |
| "learning_rate": 3.702624692625427e-06, |
| "loss": 0.007, |
| "step": 740 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 515.3027577400208, |
| "epoch": 1.231364464760539, |
| "grad_norm": 473.16009521484375, |
| "kl": 7.4117608070373535, |
| "learning_rate": 3.6971212857006277e-06, |
| "loss": 0.3027, |
| "num_tokens": 467231411.0, |
| "reward": 0.07003348527359776, |
| "reward_std": 0.07058388437144458, |
| "rewards/pure_accuracy_reward_math": 0.07003348364378326, |
| "step": 741 |
| }, |
| { |
| "clip_ratio": 0.00048789031319529386, |
| "epoch": 1.2332765677750888, |
| "grad_norm": 15.009349822998047, |
| "kl": 0.3277552127838135, |
| "learning_rate": 3.6916103412465405e-06, |
| "loss": 0.0207, |
| "step": 742 |
| }, |
| { |
| "clip_ratio": 0.0005436847095552366, |
| "epoch": 1.235188670789639, |
| "grad_norm": 34.010345458984375, |
| "kl": 0.01839423179626465, |
| "learning_rate": 3.6860918939621586e-06, |
| "loss": 0.0299, |
| "step": 743 |
| }, |
| { |
| "clip_ratio": 0.000597593801558105, |
| "epoch": 1.2371007738041886, |
| "grad_norm": 13.507566452026367, |
| "kl": 0.02814960479736328, |
| "learning_rate": 3.6805659785937176e-06, |
| "loss": 0.0188, |
| "step": 744 |
| }, |
| { |
| "clip_ratio": 0.0005609532486232638, |
| "epoch": 1.2390128768187387, |
| "grad_norm": 6.263442516326904, |
| "kl": 0.20073914527893066, |
| "learning_rate": 3.675032629934475e-06, |
| "loss": 0.0163, |
| "step": 745 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.3340101242065, |
| "epoch": 1.2409249798332884, |
| "grad_norm": 0.051358480006456375, |
| "kl": 0.0063626766204833984, |
| "learning_rate": 3.6694918828244923e-06, |
| "loss": 0.0095, |
| "num_tokens": 470866344.0, |
| "reward": 0.06333705666474998, |
| "reward_std": 0.07530095760012046, |
| "rewards/pure_accuracy_reward_math": 0.06333705509314314, |
| "step": 746 |
| }, |
| { |
| "clip_ratio": 0.00029982604212364095, |
| "epoch": 1.2428370828478383, |
| "grad_norm": 0.03713027015328407, |
| "kl": 0.006081342697143555, |
| "learning_rate": 3.6639437721504108e-06, |
| "loss": 0.0095, |
| "step": 747 |
| }, |
| { |
| "clip_ratio": 0.0002941023938660692, |
| "epoch": 1.2447491858623883, |
| "grad_norm": 0.03500093147158623, |
| "kl": 0.006156444549560547, |
| "learning_rate": 3.65838833284524e-06, |
| "loss": 0.0095, |
| "step": 748 |
| }, |
| { |
| "clip_ratio": 0.0002858027814340858, |
| "epoch": 1.2466612888769382, |
| "grad_norm": 0.03525420278310776, |
| "kl": 0.006234169006347656, |
| "learning_rate": 3.652825599888129e-06, |
| "loss": 0.0094, |
| "step": 749 |
| }, |
| { |
| "clip_ratio": 0.0002950350276478275, |
| "epoch": 1.248573391891488, |
| "grad_norm": 0.03545543923974037, |
| "kl": 0.006281852722167969, |
| "learning_rate": 3.647255608304154e-06, |
| "loss": 0.0093, |
| "step": 750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.79438829422, |
| "epoch": 1.250485494906038, |
| "grad_norm": 0.03711007162928581, |
| "kl": 0.005670070648193359, |
| "learning_rate": 3.641678393164092e-06, |
| "loss": 0.0131, |
| "num_tokens": 474505191.0, |
| "reward": 0.07170759318978526, |
| "reward_std": 0.07251697574974969, |
| "rewards/pure_accuracy_reward_math": 0.0717075907450635, |
| "step": 751 |
| }, |
| { |
| "clip_ratio": 0.00029345202176500607, |
| "epoch": 1.252397597920588, |
| "grad_norm": 0.036423034965991974, |
| "kl": 0.005608320236206055, |
| "learning_rate": 3.636093989584204e-06, |
| "loss": 0.0131, |
| "step": 752 |
| }, |
| { |
| "clip_ratio": 0.00030187425932126644, |
| "epoch": 1.2543097009351378, |
| "grad_norm": 0.03613322973251343, |
| "kl": 0.005610466003417969, |
| "learning_rate": 3.630502432726012e-06, |
| "loss": 0.013, |
| "step": 753 |
| }, |
| { |
| "clip_ratio": 0.0003275847485610939, |
| "epoch": 1.2562218039496877, |
| "grad_norm": 0.03452349826693535, |
| "kl": 0.0057184696197509766, |
| "learning_rate": 3.6249037577960744e-06, |
| "loss": 0.013, |
| "step": 754 |
| }, |
| { |
| "clip_ratio": 0.00034663524741063156, |
| "epoch": 1.2581339069642377, |
| "grad_norm": 0.034864939749240875, |
| "kl": 0.005825996398925781, |
| "learning_rate": 3.619298000045773e-06, |
| "loss": 0.0129, |
| "step": 755 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 495.8814425468445, |
| "epoch": 1.2600460099787876, |
| "grad_norm": 528.279052734375, |
| "kl": 9.193241596221924, |
| "learning_rate": 3.6136851947710804e-06, |
| "loss": 0.3749, |
| "num_tokens": 478011678.0, |
| "reward": 0.07979911071015522, |
| "reward_std": 0.07470905361697078, |
| "rewards/pure_accuracy_reward_math": 0.0797991082072258, |
| "step": 756 |
| }, |
| { |
| "clip_ratio": 0.00028275052295612113, |
| "epoch": 1.2619581129933375, |
| "grad_norm": 44.662696838378906, |
| "kl": 1.2635960578918457, |
| "learning_rate": 3.608065377312348e-06, |
| "loss": 0.057, |
| "step": 757 |
| }, |
| { |
| "clip_ratio": 0.00029553008619132015, |
| "epoch": 1.2638702160078874, |
| "grad_norm": 4.775911808013916, |
| "kl": 0.1474595069885254, |
| "learning_rate": 3.6024385830540758e-06, |
| "loss": 0.0123, |
| "step": 758 |
| }, |
| { |
| "clip_ratio": 0.00033371773997714627, |
| "epoch": 1.2657823190224373, |
| "grad_norm": 0.30982905626296997, |
| "kl": 0.01830148696899414, |
| "learning_rate": 3.5968048474246925e-06, |
| "loss": 0.0071, |
| "step": 759 |
| }, |
| { |
| "clip_ratio": 0.0003257711730952906, |
| "epoch": 1.2676944220369872, |
| "grad_norm": 0.05356259644031525, |
| "kl": 0.011959552764892578, |
| "learning_rate": 3.591164205896332e-06, |
| "loss": 0.0068, |
| "step": 760 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.9149203300476, |
| "epoch": 1.2696065250515371, |
| "grad_norm": 0.04138460382819176, |
| "kl": 0.00600886344909668, |
| "learning_rate": 3.585516693984612e-06, |
| "loss": 0.0061, |
| "num_tokens": 481610981.0, |
| "reward": 0.07059152136207558, |
| "reward_std": 0.07616424100706354, |
| "rewards/pure_accuracy_reward_math": 0.07059151938301511, |
| "step": 761 |
| }, |
| { |
| "clip_ratio": 0.00029173931721970803, |
| "epoch": 1.271518628066087, |
| "grad_norm": 0.04057340323925018, |
| "kl": 0.0059850215911865234, |
| "learning_rate": 3.5798623472484074e-06, |
| "loss": 0.006, |
| "step": 762 |
| }, |
| { |
| "clip_ratio": 0.00031361054851686276, |
| "epoch": 1.273430731080637, |
| "grad_norm": 0.0383637472987175, |
| "kl": 0.005931377410888672, |
| "learning_rate": 3.5742012012896273e-06, |
| "loss": 0.006, |
| "step": 763 |
| }, |
| { |
| "clip_ratio": 0.000302841177983737, |
| "epoch": 1.275342834095187, |
| "grad_norm": 0.037009891122579575, |
| "kl": 0.005960226058959961, |
| "learning_rate": 3.5685332917529936e-06, |
| "loss": 0.0059, |
| "step": 764 |
| }, |
| { |
| "clip_ratio": 0.00032496250122449055, |
| "epoch": 1.2772549371097368, |
| "grad_norm": 0.036052413284778595, |
| "kl": 0.0060160160064697266, |
| "learning_rate": 3.5628586543258116e-06, |
| "loss": 0.0058, |
| "step": 765 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 505.19645166397095, |
| "epoch": 1.2791670401242867, |
| "grad_norm": 0.039108723402023315, |
| "kl": 0.0060214996337890625, |
| "learning_rate": 3.5571773247377495e-06, |
| "loss": 0.0077, |
| "num_tokens": 485155493.0, |
| "reward": 0.06473214537254535, |
| "reward_std": 0.07595151849091053, |
| "rewards/pure_accuracy_reward_math": 0.06473214438301511, |
| "step": 766 |
| }, |
| { |
| "clip_ratio": 0.00031215936860462534, |
| "epoch": 1.2810791431388366, |
| "grad_norm": 0.03890209272503853, |
| "kl": 0.0060939788818359375, |
| "learning_rate": 3.5514893387606113e-06, |
| "loss": 0.0078, |
| "step": 767 |
| }, |
| { |
| "clip_ratio": 0.00029648321913100517, |
| "epoch": 1.2829912461533866, |
| "grad_norm": 0.038266174495220184, |
| "kl": 0.0061397552490234375, |
| "learning_rate": 3.5457947322081126e-06, |
| "loss": 0.0077, |
| "step": 768 |
| }, |
| { |
| "clip_ratio": 0.0002988063008615427, |
| "epoch": 1.2849033491679365, |
| "grad_norm": 0.03760776296257973, |
| "kl": 0.006152629852294922, |
| "learning_rate": 3.5400935409356534e-06, |
| "loss": 0.0076, |
| "step": 769 |
| }, |
| { |
| "clip_ratio": 0.00032748817852734646, |
| "epoch": 1.2868154521824864, |
| "grad_norm": 0.037058234214782715, |
| "kl": 0.006194591522216797, |
| "learning_rate": 3.5343858008400955e-06, |
| "loss": 0.0076, |
| "step": 770 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.085681438446, |
| "epoch": 1.2887275551970363, |
| "grad_norm": 0.04272163286805153, |
| "kl": 0.006904125213623047, |
| "learning_rate": 3.5286715478595335e-06, |
| "loss": 0.0066, |
| "num_tokens": 488731916.0, |
| "reward": 0.06668527112924494, |
| "reward_std": 0.07779828266939148, |
| "rewards/pure_accuracy_reward_math": 0.0666852695576381, |
| "step": 771 |
| }, |
| { |
| "clip_ratio": 0.0002989328136209224, |
| "epoch": 1.2906396582115862, |
| "grad_norm": 0.039898019284009933, |
| "kl": 0.006760597229003906, |
| "learning_rate": 3.52295081797307e-06, |
| "loss": 0.0066, |
| "step": 772 |
| }, |
| { |
| "clip_ratio": 0.0003237332452385999, |
| "epoch": 1.2925517612261361, |
| "grad_norm": 0.0380416214466095, |
| "kl": 0.006653547286987305, |
| "learning_rate": 3.5172236472005866e-06, |
| "loss": 0.0065, |
| "step": 773 |
| }, |
| { |
| "clip_ratio": 0.0004160679777100995, |
| "epoch": 1.294463864240686, |
| "grad_norm": 0.03860335052013397, |
| "kl": 0.006639003753662109, |
| "learning_rate": 3.511490071602523e-06, |
| "loss": 0.0065, |
| "step": 774 |
| }, |
| { |
| "clip_ratio": 0.0004345110206713798, |
| "epoch": 1.2963759672552357, |
| "grad_norm": 0.0405069962143898, |
| "kl": 0.006697654724121094, |
| "learning_rate": 3.505750127279643e-06, |
| "loss": 0.0064, |
| "step": 775 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.7695565223694, |
| "epoch": 1.2982880702697859, |
| "grad_norm": 0.040585048496723175, |
| "kl": 0.006101369857788086, |
| "learning_rate": 3.500003850372811e-06, |
| "loss": 0.0043, |
| "num_tokens": 492363370.0, |
| "reward": 0.07477678926079534, |
| "reward_std": 0.08466117118950933, |
| "rewards/pure_accuracy_reward_math": 0.07477678704890423, |
| "step": 776 |
| }, |
| { |
| "clip_ratio": 0.0003347315081327906, |
| "epoch": 1.3002001732843356, |
| "grad_norm": 0.039613205939531326, |
| "kl": 0.0060977935791015625, |
| "learning_rate": 3.4942512770627655e-06, |
| "loss": 0.0043, |
| "step": 777 |
| }, |
| { |
| "clip_ratio": 0.0003803396672310555, |
| "epoch": 1.3021122762988857, |
| "grad_norm": 0.03965132310986519, |
| "kl": 0.006110668182373047, |
| "learning_rate": 3.4884924435698875e-06, |
| "loss": 0.0042, |
| "step": 778 |
| }, |
| { |
| "clip_ratio": 0.00035469116983222193, |
| "epoch": 1.3040243793134354, |
| "grad_norm": 0.038701362907886505, |
| "kl": 0.005974292755126953, |
| "learning_rate": 3.482727386153974e-06, |
| "loss": 0.0041, |
| "step": 779 |
| }, |
| { |
| "clip_ratio": 0.00038596760680320585, |
| "epoch": 1.3059364823279855, |
| "grad_norm": 0.03767050802707672, |
| "kl": 0.0059070587158203125, |
| "learning_rate": 3.4769561411140123e-06, |
| "loss": 0.0041, |
| "step": 780 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.3593993186951, |
| "epoch": 1.3078485853425352, |
| "grad_norm": 0.04520969092845917, |
| "kl": 0.015022039413452148, |
| "learning_rate": 3.471178744787948e-06, |
| "loss": 0.0107, |
| "num_tokens": 495988466.0, |
| "reward": 0.07449777098372579, |
| "reward_std": 0.08161820413079113, |
| "rewards/pure_accuracy_reward_math": 0.07449777016881853, |
| "step": 781 |
| }, |
| { |
| "clip_ratio": 0.00032587463357458546, |
| "epoch": 1.3097606883570854, |
| "grad_norm": 0.04337235167622566, |
| "kl": 0.01485586166381836, |
| "learning_rate": 3.465395233552458e-06, |
| "loss": 0.0107, |
| "step": 782 |
| }, |
| { |
| "clip_ratio": 0.00031156001216459117, |
| "epoch": 1.311672791371635, |
| "grad_norm": 0.04306100681424141, |
| "kl": 0.014668941497802734, |
| "learning_rate": 3.459605643822721e-06, |
| "loss": 0.0106, |
| "step": 783 |
| }, |
| { |
| "clip_ratio": 0.00031179932597069637, |
| "epoch": 1.313584894386185, |
| "grad_norm": 0.04292943701148033, |
| "kl": 0.014333724975585938, |
| "learning_rate": 3.4538100120521884e-06, |
| "loss": 0.0106, |
| "step": 784 |
| }, |
| { |
| "clip_ratio": 0.00034586368491318353, |
| "epoch": 1.315496997400735, |
| "grad_norm": 0.04207218065857887, |
| "kl": 0.013885498046875, |
| "learning_rate": 3.4480083747323527e-06, |
| "loss": 0.0105, |
| "step": 785 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 521.3471217155457, |
| "epoch": 1.3174091004152848, |
| "grad_norm": 0.04057139530777931, |
| "kl": 0.006026268005371094, |
| "learning_rate": 3.4422007683925224e-06, |
| "loss": 0.0119, |
| "num_tokens": 499590878.0, |
| "reward": 0.08091518239234574, |
| "reward_std": 0.08763020328478888, |
| "rewards/pure_accuracy_reward_math": 0.08091518023866229, |
| "step": 786 |
| }, |
| { |
| "clip_ratio": 0.00030802900647586284, |
| "epoch": 1.3193212034298347, |
| "grad_norm": 0.039306215941905975, |
| "kl": 0.00603485107421875, |
| "learning_rate": 3.436387229599587e-06, |
| "loss": 0.0119, |
| "step": 787 |
| }, |
| { |
| "clip_ratio": 0.00034579116845634417, |
| "epoch": 1.3212333064443846, |
| "grad_norm": 0.03839893266558647, |
| "kl": 0.006104469299316406, |
| "learning_rate": 3.4305677949577915e-06, |
| "loss": 0.0118, |
| "step": 788 |
| }, |
| { |
| "clip_ratio": 0.00036078316020393686, |
| "epoch": 1.3231454094589346, |
| "grad_norm": 0.03700988367199898, |
| "kl": 0.006115436553955078, |
| "learning_rate": 3.4247425011084993e-06, |
| "loss": 0.0118, |
| "step": 789 |
| }, |
| { |
| "clip_ratio": 0.0003916456239494437, |
| "epoch": 1.3250575124734845, |
| "grad_norm": 0.03749685734510422, |
| "kl": 0.006115436553955078, |
| "learning_rate": 3.418911384729971e-06, |
| "loss": 0.0117, |
| "step": 790 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 502.7112407684326, |
| "epoch": 1.3269696154880344, |
| "grad_norm": 0.03917763754725456, |
| "kl": 0.009302139282226562, |
| "learning_rate": 3.413074482537123e-06, |
| "loss": 0.0077, |
| "num_tokens": 503128079.0, |
| "reward": 0.07059152112924494, |
| "reward_std": 0.07702752505429089, |
| "rewards/pure_accuracy_reward_math": 0.07059151944122277, |
| "step": 791 |
| }, |
| { |
| "clip_ratio": 0.0002787132019079763, |
| "epoch": 1.3288817185025843, |
| "grad_norm": 0.03894754871726036, |
| "kl": 0.009203910827636719, |
| "learning_rate": 3.4072318312813044e-06, |
| "loss": 0.0077, |
| "step": 792 |
| }, |
| { |
| "clip_ratio": 0.00031091465683630304, |
| "epoch": 1.3307938215171342, |
| "grad_norm": 0.03774462640285492, |
| "kl": 0.008921146392822266, |
| "learning_rate": 3.4013834677500612e-06, |
| "loss": 0.0077, |
| "step": 793 |
| }, |
| { |
| "clip_ratio": 0.00030987418773520403, |
| "epoch": 1.3327059245316841, |
| "grad_norm": 0.03737964481115341, |
| "kl": 0.008791923522949219, |
| "learning_rate": 3.395529428766907e-06, |
| "loss": 0.0076, |
| "step": 794 |
| }, |
| { |
| "clip_ratio": 0.0003597256319380904, |
| "epoch": 1.334618027546234, |
| "grad_norm": 0.03793202340602875, |
| "kl": 0.008593559265136719, |
| "learning_rate": 3.3896697511910898e-06, |
| "loss": 0.0075, |
| "step": 795 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.8552160263062, |
| "epoch": 1.336530130560784, |
| "grad_norm": 0.03877223655581474, |
| "kl": 0.005873441696166992, |
| "learning_rate": 3.3838044719173603e-06, |
| "loss": 0.0086, |
| "num_tokens": 506711636.0, |
| "reward": 0.06529018195578828, |
| "reward_std": 0.06942774722119793, |
| "rewards/pure_accuracy_reward_math": 0.06529017997672781, |
| "step": 796 |
| }, |
| { |
| "clip_ratio": 0.0002862633294853367, |
| "epoch": 1.3384422335753339, |
| "grad_norm": 0.0376199446618557, |
| "kl": 0.005820274353027344, |
| "learning_rate": 3.377933627875739e-06, |
| "loss": 0.0086, |
| "step": 797 |
| }, |
| { |
| "clip_ratio": 0.0002861461452994263, |
| "epoch": 1.3403543365898838, |
| "grad_norm": 0.036890070885419846, |
| "kl": 0.005822658538818359, |
| "learning_rate": 3.3720572560312854e-06, |
| "loss": 0.0086, |
| "step": 798 |
| }, |
| { |
| "clip_ratio": 0.0003201163677317709, |
| "epoch": 1.3422664396044337, |
| "grad_norm": 0.03669756278395653, |
| "kl": 0.005821704864501953, |
| "learning_rate": 3.366175393383863e-06, |
| "loss": 0.0085, |
| "step": 799 |
| }, |
| { |
| "clip_ratio": 0.0003494162402830625, |
| "epoch": 1.3441785426189836, |
| "grad_norm": 0.03721420839428902, |
| "kl": 0.005818843841552734, |
| "learning_rate": 3.360288076967909e-06, |
| "loss": 0.0084, |
| "step": 800 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 505.6105146408081, |
| "epoch": 1.3460906456335335, |
| "grad_norm": 0.040034398436546326, |
| "kl": 0.006266117095947266, |
| "learning_rate": 3.3543953438521983e-06, |
| "loss": 0.0091, |
| "num_tokens": 510255728.0, |
| "reward": 0.0675223250000272, |
| "reward_std": 0.07577886182116345, |
| "rewards/pure_accuracy_reward_math": 0.06752232249709778, |
| "step": 801 |
| }, |
| { |
| "clip_ratio": 0.00027677676553139463, |
| "epoch": 1.3480027486480834, |
| "grad_norm": 0.038657769560813904, |
| "kl": 0.006215572357177734, |
| "learning_rate": 3.3484972311396114e-06, |
| "loss": 0.0091, |
| "step": 802 |
| }, |
| { |
| "clip_ratio": 0.0002909586188479807, |
| "epoch": 1.3499148516626334, |
| "grad_norm": 0.036970507353544235, |
| "kl": 0.006129741668701172, |
| "learning_rate": 3.342593775966901e-06, |
| "loss": 0.009, |
| "step": 803 |
| }, |
| { |
| "clip_ratio": 0.0003427068459700422, |
| "epoch": 1.3518269546771833, |
| "grad_norm": 0.03707785904407501, |
| "kl": 0.006056785583496094, |
| "learning_rate": 3.3366850155044595e-06, |
| "loss": 0.009, |
| "step": 804 |
| }, |
| { |
| "clip_ratio": 0.00038909467849634893, |
| "epoch": 1.3537390576917332, |
| "grad_norm": 0.03700149059295654, |
| "kl": 0.005985736846923828, |
| "learning_rate": 3.33077098695608e-06, |
| "loss": 0.0089, |
| "step": 805 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.0212287902832, |
| "epoch": 1.355651160706283, |
| "grad_norm": 0.04373861476778984, |
| "kl": 0.005824565887451172, |
| "learning_rate": 3.3248517275587292e-06, |
| "loss": 0.0094, |
| "num_tokens": 513879112.0, |
| "reward": 0.0703125029685907, |
| "reward_std": 0.08085364429280162, |
| "rewards/pure_accuracy_reward_math": 0.07031250145519152, |
| "step": 806 |
| }, |
| { |
| "clip_ratio": 0.00031092700191948097, |
| "epoch": 1.357563263720833, |
| "grad_norm": 0.04273909702897072, |
| "kl": 0.0058460235595703125, |
| "learning_rate": 3.318927274582307e-06, |
| "loss": 0.0094, |
| "step": 807 |
| }, |
| { |
| "clip_ratio": 0.0003359753473546334, |
| "epoch": 1.359475366735383, |
| "grad_norm": 0.04217194393277168, |
| "kl": 0.005980014801025391, |
| "learning_rate": 3.312997665329414e-06, |
| "loss": 0.0093, |
| "step": 808 |
| }, |
| { |
| "clip_ratio": 0.0003392697701940506, |
| "epoch": 1.3613874697499329, |
| "grad_norm": 0.04189891368150711, |
| "kl": 0.0061492919921875, |
| "learning_rate": 3.3070629371351176e-06, |
| "loss": 0.0093, |
| "step": 809 |
| }, |
| { |
| "clip_ratio": 0.0003985974152556082, |
| "epoch": 1.3632995727644825, |
| "grad_norm": 0.04113880172371864, |
| "kl": 0.0062618255615234375, |
| "learning_rate": 3.3011231273667155e-06, |
| "loss": 0.0092, |
| "step": 810 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.8002490997314, |
| "epoch": 1.3652116757790327, |
| "grad_norm": 0.039511535316705704, |
| "kl": 0.007502555847167969, |
| "learning_rate": 3.295178273423501e-06, |
| "loss": 0.0065, |
| "num_tokens": 517489928.0, |
| "reward": 0.06835937840514816, |
| "reward_std": 0.0761642413563095, |
| "rewards/pure_accuracy_reward_math": 0.06835937636788003, |
| "step": 811 |
| }, |
| { |
| "clip_ratio": 0.00033993283830113796, |
| "epoch": 1.3671237787935824, |
| "grad_norm": 0.03911852091550827, |
| "kl": 0.0074634552001953125, |
| "learning_rate": 3.2892284127365277e-06, |
| "loss": 0.0065, |
| "step": 812 |
| }, |
| { |
| "clip_ratio": 0.00029188678922764666, |
| "epoch": 1.3690358818081325, |
| "grad_norm": 0.038789719343185425, |
| "kl": 0.007461071014404297, |
| "learning_rate": 3.2832735827683733e-06, |
| "loss": 0.0064, |
| "step": 813 |
| }, |
| { |
| "clip_ratio": 0.00031692377649505943, |
| "epoch": 1.3709479848226822, |
| "grad_norm": 0.03795900195837021, |
| "kl": 0.007411956787109375, |
| "learning_rate": 3.2773138210129037e-06, |
| "loss": 0.0063, |
| "step": 814 |
| }, |
| { |
| "clip_ratio": 0.0003394908647464945, |
| "epoch": 1.3728600878372323, |
| "grad_norm": 0.03683575242757797, |
| "kl": 0.0073795318603515625, |
| "learning_rate": 3.2713491649950375e-06, |
| "loss": 0.0063, |
| "step": 815 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.1018648147583, |
| "epoch": 1.374772190851782, |
| "grad_norm": 0.036948177963495255, |
| "kl": 0.0058441162109375, |
| "learning_rate": 3.26537965227051e-06, |
| "loss": 0.0062, |
| "num_tokens": 521113961.0, |
| "reward": 0.06333705675206147, |
| "reward_std": 0.07041122711962089, |
| "rewards/pure_accuracy_reward_math": 0.06333705494762398, |
| "step": 816 |
| }, |
| { |
| "clip_ratio": 0.0002517415915690435, |
| "epoch": 1.3766842938663322, |
| "grad_norm": 0.03634682297706604, |
| "kl": 0.005847454071044922, |
| "learning_rate": 3.2594053204256344e-06, |
| "loss": 0.0062, |
| "step": 817 |
| }, |
| { |
| "clip_ratio": 0.00027403954436522326, |
| "epoch": 1.3785963968808819, |
| "grad_norm": 0.034690070897340775, |
| "kl": 0.005870342254638672, |
| "learning_rate": 3.253426207077069e-06, |
| "loss": 0.0062, |
| "step": 818 |
| }, |
| { |
| "clip_ratio": 0.0002389855896467452, |
| "epoch": 1.3805084998954318, |
| "grad_norm": 0.034505974501371384, |
| "kl": 0.005900382995605469, |
| "learning_rate": 3.2474423498715772e-06, |
| "loss": 0.0061, |
| "step": 819 |
| }, |
| { |
| "clip_ratio": 0.000287152882663122, |
| "epoch": 1.3824206029099817, |
| "grad_norm": 0.03524321690201759, |
| "kl": 0.005913734436035156, |
| "learning_rate": 3.241453786485792e-06, |
| "loss": 0.0061, |
| "step": 820 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.66520071029663, |
| "epoch": 1.3843327059245316, |
| "grad_norm": 0.039214182645082474, |
| "kl": 0.006892681121826172, |
| "learning_rate": 3.2354605546259777e-06, |
| "loss": 0.0032, |
| "num_tokens": 524677265.0, |
| "reward": 0.07979911041911691, |
| "reward_std": 0.07959878293331712, |
| "rewards/pure_accuracy_reward_math": 0.07979910867288709, |
| "step": 821 |
| }, |
| { |
| "clip_ratio": 0.0002965318878409562, |
| "epoch": 1.3862448089390815, |
| "grad_norm": 0.037640273571014404, |
| "kl": 0.0067348480224609375, |
| "learning_rate": 3.2294626920277928e-06, |
| "loss": 0.0031, |
| "step": 822 |
| }, |
| { |
| "clip_ratio": 0.00035153192868619954, |
| "epoch": 1.3881569119536314, |
| "grad_norm": 0.038182858377695084, |
| "kl": 0.006665706634521484, |
| "learning_rate": 3.2234602364560543e-06, |
| "loss": 0.0031, |
| "step": 823 |
| }, |
| { |
| "clip_ratio": 0.0003338070732752385, |
| "epoch": 1.3900690149681814, |
| "grad_norm": 0.038163840770721436, |
| "kl": 0.00667572021484375, |
| "learning_rate": 3.2174532257044957e-06, |
| "loss": 0.003, |
| "step": 824 |
| }, |
| { |
| "clip_ratio": 0.0003418834434683049, |
| "epoch": 1.3919811179827313, |
| "grad_norm": 0.03628409281373024, |
| "kl": 0.0067596435546875, |
| "learning_rate": 3.2114416975955347e-06, |
| "loss": 0.003, |
| "step": 825 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.1027045249939, |
| "epoch": 1.3938932209972812, |
| "grad_norm": 0.037393856793642044, |
| "kl": 0.005987644195556641, |
| "learning_rate": 3.20542568998003e-06, |
| "loss": 0.0097, |
| "num_tokens": 528270425.0, |
| "reward": 0.07784598556463607, |
| "reward_std": 0.0774529695045203, |
| "rewards/pure_accuracy_reward_math": 0.07784598329453729, |
| "step": 826 |
| }, |
| { |
| "clip_ratio": 0.0002753000243274073, |
| "epoch": 1.395805324011831, |
| "grad_norm": 0.03632253408432007, |
| "kl": 0.00603485107421875, |
| "learning_rate": 3.199405240737045e-06, |
| "loss": 0.0097, |
| "step": 827 |
| }, |
| { |
| "clip_ratio": 0.00028145005671831314, |
| "epoch": 1.397717427026381, |
| "grad_norm": 0.035320475697517395, |
| "kl": 0.0060482025146484375, |
| "learning_rate": 3.1933803877736103e-06, |
| "loss": 0.0097, |
| "step": 828 |
| }, |
| { |
| "clip_ratio": 0.00029773840276448027, |
| "epoch": 1.399629530040931, |
| "grad_norm": 0.03532904013991356, |
| "kl": 0.006001472473144531, |
| "learning_rate": 3.187351169024483e-06, |
| "loss": 0.0096, |
| "step": 829 |
| }, |
| { |
| "clip_ratio": 0.0003131672060590063, |
| "epoch": 1.4015416330554809, |
| "grad_norm": 0.03497399017214775, |
| "kl": 0.0059299468994140625, |
| "learning_rate": 3.181317622451909e-06, |
| "loss": 0.0095, |
| "step": 830 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.5547099113464, |
| "epoch": 1.4034537360700308, |
| "grad_norm": 0.03596203401684761, |
| "kl": 0.005957126617431641, |
| "learning_rate": 3.1752797860453854e-06, |
| "loss": 0.0099, |
| "num_tokens": 531863545.0, |
| "reward": 0.06584821754950099, |
| "reward_std": 0.07359298237133771, |
| "rewards/pure_accuracy_reward_math": 0.06584821580327116, |
| "step": 831 |
| }, |
| { |
| "clip_ratio": 0.0002871401754873659, |
| "epoch": 1.4053658390845807, |
| "grad_norm": 0.03569914028048515, |
| "kl": 0.005918025970458984, |
| "learning_rate": 3.169237697821417e-06, |
| "loss": 0.0099, |
| "step": 832 |
| }, |
| { |
| "clip_ratio": 0.0002649255456503852, |
| "epoch": 1.4072779420991306, |
| "grad_norm": 0.035189539194107056, |
| "kl": 0.005944252014160156, |
| "learning_rate": 3.163191395823281e-06, |
| "loss": 0.0098, |
| "step": 833 |
| }, |
| { |
| "clip_ratio": 0.0002522150609252094, |
| "epoch": 1.4091900451136805, |
| "grad_norm": 0.03371162712574005, |
| "kl": 0.006028652191162109, |
| "learning_rate": 3.1571409181207867e-06, |
| "loss": 0.0098, |
| "step": 834 |
| }, |
| { |
| "clip_ratio": 0.00028182740913962334, |
| "epoch": 1.4111021481282304, |
| "grad_norm": 0.03411802276968956, |
| "kl": 0.006129264831542969, |
| "learning_rate": 3.151086302810035e-06, |
| "loss": 0.0097, |
| "step": 835 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.0455017089844, |
| "epoch": 1.4130142511427803, |
| "grad_norm": 0.042647283524274826, |
| "kl": 0.006505012512207031, |
| "learning_rate": 3.1450275880131782e-06, |
| "loss": 0.0051, |
| "num_tokens": 535420068.0, |
| "reward": 0.06919643201399595, |
| "reward_std": 0.06989945442182943, |
| "rewards/pure_accuracy_reward_math": 0.06919642980210483, |
| "step": 836 |
| }, |
| { |
| "clip_ratio": 0.0002792542761653749, |
| "epoch": 1.4149263541573303, |
| "grad_norm": 0.03879564628005028, |
| "kl": 0.006262302398681641, |
| "learning_rate": 3.1389648118781795e-06, |
| "loss": 0.0051, |
| "step": 837 |
| }, |
| { |
| "clip_ratio": 0.00032867032479089175, |
| "epoch": 1.4168384571718802, |
| "grad_norm": 0.03632555902004242, |
| "kl": 0.006078004837036133, |
| "learning_rate": 3.132898012578577e-06, |
| "loss": 0.005, |
| "step": 838 |
| }, |
| { |
| "clip_ratio": 0.0003705890379706034, |
| "epoch": 1.41875056018643, |
| "grad_norm": 0.03687159717082977, |
| "kl": 0.0058705806732177734, |
| "learning_rate": 3.1268272283132374e-06, |
| "loss": 0.005, |
| "step": 839 |
| }, |
| { |
| "clip_ratio": 0.00039090512018447043, |
| "epoch": 1.42066266320098, |
| "grad_norm": 0.03681857883930206, |
| "kl": 0.005755186080932617, |
| "learning_rate": 3.1207524973061183e-06, |
| "loss": 0.0049, |
| "step": 840 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.0865178108215, |
| "epoch": 1.42257476621553, |
| "grad_norm": 0.077212393283844, |
| "kl": 0.006708621978759766, |
| "learning_rate": 3.1146738578060293e-06, |
| "loss": 0.0034, |
| "num_tokens": 539042994.0, |
| "reward": 0.05468750235741027, |
| "reward_std": 0.06221334764268249, |
| "rewards/pure_accuracy_reward_math": 0.05468750130967237, |
| "step": 841 |
| }, |
| { |
| "clip_ratio": 0.00023407521496210393, |
| "epoch": 1.4244868692300798, |
| "grad_norm": 0.03766750544309616, |
| "kl": 0.005887508392333984, |
| "learning_rate": 3.108591348086388e-06, |
| "loss": 0.0034, |
| "step": 842 |
| }, |
| { |
| "clip_ratio": 0.00021864835269980176, |
| "epoch": 1.4263989722446297, |
| "grad_norm": 0.03435171768069267, |
| "kl": 0.0057353973388671875, |
| "learning_rate": 3.102505006444981e-06, |
| "loss": 0.0033, |
| "step": 843 |
| }, |
| { |
| "clip_ratio": 0.0002327330819866802, |
| "epoch": 1.4283110752591797, |
| "grad_norm": 0.03385370597243309, |
| "kl": 0.005730628967285156, |
| "learning_rate": 3.096414871203721e-06, |
| "loss": 0.0033, |
| "step": 844 |
| }, |
| { |
| "clip_ratio": 0.00025595308994752486, |
| "epoch": 1.4302231782737296, |
| "grad_norm": 0.0320701077580452, |
| "kl": 0.005660533905029297, |
| "learning_rate": 3.0903209807084085e-06, |
| "loss": 0.0032, |
| "step": 845 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.2009177207947, |
| "epoch": 1.4321352812882795, |
| "grad_norm": 0.035687774419784546, |
| "kl": 0.006323099136352539, |
| "learning_rate": 3.0842233733284866e-06, |
| "loss": 0.0055, |
| "num_tokens": 542686090.0, |
| "reward": 0.06389509252039716, |
| "reward_std": 0.06839800346642733, |
| "rewards/pure_accuracy_reward_math": 0.06389509059954435, |
| "step": 846 |
| }, |
| { |
| "clip_ratio": 0.0002455309293054597, |
| "epoch": 1.4340473843028292, |
| "grad_norm": 0.03433489799499512, |
| "kl": 0.006294965744018555, |
| "learning_rate": 3.078122087456802e-06, |
| "loss": 0.0055, |
| "step": 847 |
| }, |
| { |
| "clip_ratio": 0.0003179283777399178, |
| "epoch": 1.4359594873173793, |
| "grad_norm": 0.03377856686711311, |
| "kl": 0.00630497932434082, |
| "learning_rate": 3.072017161509364e-06, |
| "loss": 0.0054, |
| "step": 848 |
| }, |
| { |
| "clip_ratio": 0.00030606188772708265, |
| "epoch": 1.437871590331929, |
| "grad_norm": 0.03379327058792114, |
| "kl": 0.006325483322143555, |
| "learning_rate": 3.065908633925099e-06, |
| "loss": 0.0054, |
| "step": 849 |
| }, |
| { |
| "clip_ratio": 0.00029904921905199444, |
| "epoch": 1.4397836933464792, |
| "grad_norm": 0.03319833427667618, |
| "kl": 0.006340742111206055, |
| "learning_rate": 3.0597965431656125e-06, |
| "loss": 0.0053, |
| "step": 850 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.9991841316223, |
| "epoch": 1.00191210301455, |
| "grad_norm": 0.03730909898877144, |
| "kl": 0.005851268768310547, |
| "learning_rate": 3.0536809277149433e-06, |
| "loss": 0.0058, |
| "num_tokens": 3602593.0, |
| "reward": 0.061662948777666315, |
| "reward_std": 0.0712745109340176, |
| "rewards/pure_accuracy_reward_math": 0.06166294767172076, |
| "step": 851 |
| }, |
| { |
| "clip_ratio": 0.0002445870232463676, |
| "epoch": 1.0038242060290998, |
| "grad_norm": 0.036420926451683044, |
| "kl": 0.005807399749755859, |
| "learning_rate": 3.047561826079324e-06, |
| "loss": 0.0057, |
| "step": 852 |
| }, |
| { |
| "clip_ratio": 0.0002342841784184202, |
| "epoch": 1.0057363090436497, |
| "grad_norm": 0.03534744307398796, |
| "kl": 0.005809783935546875, |
| "learning_rate": 3.041439276786937e-06, |
| "loss": 0.0057, |
| "step": 853 |
| }, |
| { |
| "clip_ratio": 0.0003130897791834286, |
| "epoch": 1.0076484120581997, |
| "grad_norm": 0.03456578403711319, |
| "kl": 0.005836963653564453, |
| "learning_rate": 3.0353133183876745e-06, |
| "loss": 0.0056, |
| "step": 854 |
| }, |
| { |
| "clip_ratio": 0.0003235736477336104, |
| "epoch": 1.0095605150727496, |
| "grad_norm": 0.03683493658900261, |
| "kl": 0.00588226318359375, |
| "learning_rate": 3.0291839894528907e-06, |
| "loss": 0.0056, |
| "step": 855 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.2422127723694, |
| "epoch": 1.0114726180872995, |
| "grad_norm": 3.6328346729278564, |
| "kl": 0.07409882545471191, |
| "learning_rate": 3.023051328575164e-06, |
| "loss": 0.0092, |
| "num_tokens": 7231613.0, |
| "reward": 0.06696428847499192, |
| "reward_std": 0.07320140569936484, |
| "rewards/pure_accuracy_reward_math": 0.06696428725263104, |
| "step": 856 |
| }, |
| { |
| "clip_ratio": 0.0002944787788692338, |
| "epoch": 1.0133847211018494, |
| "grad_norm": 0.23805810511112213, |
| "kl": 0.01258087158203125, |
| "learning_rate": 3.016915374368052e-06, |
| "loss": 0.0068, |
| "step": 857 |
| }, |
| { |
| "clip_ratio": 0.000328014534943577, |
| "epoch": 1.0152968241163993, |
| "grad_norm": 0.038860052824020386, |
| "kl": 0.008163928985595703, |
| "learning_rate": 3.0107761654658464e-06, |
| "loss": 0.0066, |
| "step": 858 |
| }, |
| { |
| "clip_ratio": 0.00033978425187797257, |
| "epoch": 1.0172089271309492, |
| "grad_norm": 0.037539608776569366, |
| "kl": 0.008237600326538086, |
| "learning_rate": 3.0046337405233334e-06, |
| "loss": 0.0065, |
| "step": 859 |
| }, |
| { |
| "clip_ratio": 0.0003289994185706746, |
| "epoch": 1.0191210301454992, |
| "grad_norm": 0.03649570420384407, |
| "kl": 0.008342981338500977, |
| "learning_rate": 2.9984881382155484e-06, |
| "loss": 0.0065, |
| "step": 860 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.7709541320801, |
| "epoch": 1.021033133160049, |
| "grad_norm": 0.03506062552332878, |
| "kl": 0.0056056976318359375, |
| "learning_rate": 2.9923393972375337e-06, |
| "loss": 0.0075, |
| "num_tokens": 10898500.0, |
| "reward": 0.06389509155997075, |
| "reward_std": 0.07427741104038432, |
| "rewards/pure_accuracy_reward_math": 0.06389509086147882, |
| "step": 861 |
| }, |
| { |
| "clip_ratio": 0.00025894983372154456, |
| "epoch": 1.022945236174599, |
| "grad_norm": 0.03387964144349098, |
| "kl": 0.005673408508300781, |
| "learning_rate": 2.986187556304091e-06, |
| "loss": 0.0075, |
| "step": 862 |
| }, |
| { |
| "clip_ratio": 0.00026048227840647087, |
| "epoch": 1.024857339189149, |
| "grad_norm": 0.0339200459420681, |
| "kl": 0.005715370178222656, |
| "learning_rate": 2.9800326541495427e-06, |
| "loss": 0.0074, |
| "step": 863 |
| }, |
| { |
| "clip_ratio": 0.000286817725225319, |
| "epoch": 1.0267694422036988, |
| "grad_norm": 0.033578090369701385, |
| "kl": 0.0057220458984375, |
| "learning_rate": 2.973874729527486e-06, |
| "loss": 0.0074, |
| "step": 864 |
| }, |
| { |
| "clip_ratio": 0.00031288620994018856, |
| "epoch": 1.0286815452182487, |
| "grad_norm": 0.03253786265850067, |
| "kl": 0.005726814270019531, |
| "learning_rate": 2.967713821210547e-06, |
| "loss": 0.0073, |
| "step": 865 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.484959602356, |
| "epoch": 1.0305936482327986, |
| "grad_norm": 0.040393006056547165, |
| "kl": 0.005712032318115234, |
| "learning_rate": 2.961549967990139e-06, |
| "loss": 0.0094, |
| "num_tokens": 14539070.0, |
| "reward": 0.0700334852153901, |
| "reward_std": 0.07968511193757877, |
| "rewards/pure_accuracy_reward_math": 0.07003348364378326, |
| "step": 866 |
| }, |
| { |
| "clip_ratio": 0.00034418605622477116, |
| "epoch": 1.0325057512473486, |
| "grad_norm": 0.03829828277230263, |
| "kl": 0.00571441650390625, |
| "learning_rate": 2.95538320867622e-06, |
| "loss": 0.0094, |
| "step": 867 |
| }, |
| { |
| "clip_ratio": 0.0003270462358386794, |
| "epoch": 1.0344178542618985, |
| "grad_norm": 0.03763904795050621, |
| "kl": 0.005820035934448242, |
| "learning_rate": 2.949213582097042e-06, |
| "loss": 0.0094, |
| "step": 868 |
| }, |
| { |
| "clip_ratio": 0.00039861036464117205, |
| "epoch": 1.0363299572764482, |
| "grad_norm": 0.03893045708537102, |
| "kl": 0.005897045135498047, |
| "learning_rate": 2.9430411270989112e-06, |
| "loss": 0.0093, |
| "step": 869 |
| }, |
| { |
| "clip_ratio": 0.0004073582798014286, |
| "epoch": 1.038242060290998, |
| "grad_norm": 0.03808417171239853, |
| "kl": 0.0059051513671875, |
| "learning_rate": 2.9368658825459452e-06, |
| "loss": 0.0092, |
| "step": 870 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.7159852981567, |
| "epoch": 1.040154163305548, |
| "grad_norm": 0.03680076450109482, |
| "kl": 0.006183147430419922, |
| "learning_rate": 2.9306878873198227e-06, |
| "loss": 0.0073, |
| "num_tokens": 18123716.0, |
| "reward": 0.06975446810247377, |
| "reward_std": 0.07255704078124836, |
| "rewards/pure_accuracy_reward_math": 0.06975446600699797, |
| "step": 871 |
| }, |
| { |
| "clip_ratio": 0.00025267474336487794, |
| "epoch": 1.042066266320098, |
| "grad_norm": 0.036574870347976685, |
| "kl": 0.006196498870849609, |
| "learning_rate": 2.9245071803195435e-06, |
| "loss": 0.0072, |
| "step": 872 |
| }, |
| { |
| "clip_ratio": 0.0002888958638322947, |
| "epoch": 1.0439783693346478, |
| "grad_norm": 0.03539302200078964, |
| "kl": 0.006276130676269531, |
| "learning_rate": 2.9183238004611815e-06, |
| "loss": 0.0072, |
| "step": 873 |
| }, |
| { |
| "clip_ratio": 0.00027933804358326597, |
| "epoch": 1.0458904723491977, |
| "grad_norm": 0.03457676246762276, |
| "kl": 0.00629425048828125, |
| "learning_rate": 2.912137786677639e-06, |
| "loss": 0.0071, |
| "step": 874 |
| }, |
| { |
| "clip_ratio": 0.00026495220328115465, |
| "epoch": 1.0478025753637477, |
| "grad_norm": 0.034882258623838425, |
| "kl": 0.006371974945068359, |
| "learning_rate": 2.905949177918403e-06, |
| "loss": 0.0071, |
| "step": 875 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.4989104270935, |
| "epoch": 1.0497146783782976, |
| "grad_norm": 0.04403652995824814, |
| "kl": 0.0064754486083984375, |
| "learning_rate": 2.8997580131493004e-06, |
| "loss": 0.0104, |
| "num_tokens": 21706672.0, |
| "reward": 0.07421875311410986, |
| "reward_std": 0.08282060426427051, |
| "rewards/pure_accuracy_reward_math": 0.07421875130967237, |
| "step": 876 |
| }, |
| { |
| "clip_ratio": 0.00034863107299543117, |
| "epoch": 1.0516267813928475, |
| "grad_norm": 0.040730468928813934, |
| "kl": 0.006359100341796875, |
| "learning_rate": 2.89356433135225e-06, |
| "loss": 0.0104, |
| "step": 877 |
| }, |
| { |
| "clip_ratio": 0.0003696895219036378, |
| "epoch": 1.0535388844073974, |
| "grad_norm": 0.040028344839811325, |
| "kl": 0.006321430206298828, |
| "learning_rate": 2.8873681715250197e-06, |
| "loss": 0.0104, |
| "step": 878 |
| }, |
| { |
| "clip_ratio": 0.00041197048278718285, |
| "epoch": 1.0554509874219473, |
| "grad_norm": 0.04009086638689041, |
| "kl": 0.0062351226806640625, |
| "learning_rate": 2.881169572680981e-06, |
| "loss": 0.0103, |
| "step": 879 |
| }, |
| { |
| "clip_ratio": 0.0004460485272943515, |
| "epoch": 1.0573630904364972, |
| "grad_norm": 0.03965138643980026, |
| "kl": 0.006242275238037109, |
| "learning_rate": 2.87496857384886e-06, |
| "loss": 0.0102, |
| "step": 880 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.4285945892334, |
| "epoch": 1.0592751934510471, |
| "grad_norm": 0.03920762613415718, |
| "kl": 0.005979061126708984, |
| "learning_rate": 2.868765214072495e-06, |
| "loss": 0.0082, |
| "num_tokens": 25317588.0, |
| "reward": 0.07338170023285784, |
| "reward_std": 0.0805021328269504, |
| "rewards/pure_accuracy_reward_math": 0.07338169755530544, |
| "step": 881 |
| }, |
| { |
| "clip_ratio": 0.0003169273815046836, |
| "epoch": 1.061187296465597, |
| "grad_norm": 0.03858224302530289, |
| "kl": 0.006028175354003906, |
| "learning_rate": 2.8625595324105925e-06, |
| "loss": 0.0082, |
| "step": 882 |
| }, |
| { |
| "clip_ratio": 0.0003076135093351695, |
| "epoch": 1.063099399480147, |
| "grad_norm": 0.03754101321101189, |
| "kl": 0.006089687347412109, |
| "learning_rate": 2.8563515679364733e-06, |
| "loss": 0.0081, |
| "step": 883 |
| }, |
| { |
| "clip_ratio": 0.0003307215861809709, |
| "epoch": 1.065011502494697, |
| "grad_norm": 0.03692120686173439, |
| "kl": 0.006084442138671875, |
| "learning_rate": 2.850141359737836e-06, |
| "loss": 0.008, |
| "step": 884 |
| }, |
| { |
| "clip_ratio": 0.0003362660154380137, |
| "epoch": 1.0669236055092468, |
| "grad_norm": 0.03691774606704712, |
| "kl": 0.006087303161621094, |
| "learning_rate": 2.843928946916504e-06, |
| "loss": 0.008, |
| "step": 885 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 541.91938829422, |
| "epoch": 1.0688357085237967, |
| "grad_norm": 0.03421162813901901, |
| "kl": 0.005934238433837891, |
| "learning_rate": 2.8377143685881835e-06, |
| "loss": 0.0048, |
| "num_tokens": 28991667.0, |
| "reward": 0.06138393090805039, |
| "reward_std": 0.05770279868738726, |
| "rewards/pure_accuracy_reward_math": 0.06138392991852015, |
| "step": 886 |
| }, |
| { |
| "clip_ratio": 0.00021627708133564738, |
| "epoch": 1.0707478115383466, |
| "grad_norm": 0.0331665463745594, |
| "kl": 0.005833148956298828, |
| "learning_rate": 2.8314976638822145e-06, |
| "loss": 0.0048, |
| "step": 887 |
| }, |
| { |
| "clip_ratio": 0.00023772416773226723, |
| "epoch": 1.0726599145528966, |
| "grad_norm": 0.03265010192990303, |
| "kl": 0.00572967529296875, |
| "learning_rate": 2.825278871941325e-06, |
| "loss": 0.0048, |
| "step": 888 |
| }, |
| { |
| "clip_ratio": 0.000255867875353033, |
| "epoch": 1.0745720175674465, |
| "grad_norm": 0.031934551894664764, |
| "kl": 0.0056514739990234375, |
| "learning_rate": 2.819058031921387e-06, |
| "loss": 0.0047, |
| "step": 889 |
| }, |
| { |
| "clip_ratio": 0.0002752940895334177, |
| "epoch": 1.0764841205819964, |
| "grad_norm": 0.03180062025785446, |
| "kl": 0.005589008331298828, |
| "learning_rate": 2.812835182991166e-06, |
| "loss": 0.0047, |
| "step": 890 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 541.6253051757812, |
| "epoch": 1.0783962235965463, |
| "grad_norm": 0.0352044515311718, |
| "kl": 0.006504535675048828, |
| "learning_rate": 2.8066103643320774e-06, |
| "loss": 0.005, |
| "num_tokens": 32662984.0, |
| "reward": 0.07003348544822074, |
| "reward_std": 0.07148103549843654, |
| "rewards/pure_accuracy_reward_math": 0.07003348341095261, |
| "step": 891 |
| }, |
| { |
| "clip_ratio": 0.0002908879878305015, |
| "epoch": 1.0803083266110962, |
| "grad_norm": 0.03477974981069565, |
| "kl": 0.006473064422607422, |
| "learning_rate": 2.800383615137939e-06, |
| "loss": 0.0049, |
| "step": 892 |
| }, |
| { |
| "clip_ratio": 0.00027559091887496834, |
| "epoch": 1.0822204296256461, |
| "grad_norm": 0.03371204808354378, |
| "kl": 0.006519317626953125, |
| "learning_rate": 2.7941549746147234e-06, |
| "loss": 0.0049, |
| "step": 893 |
| }, |
| { |
| "clip_ratio": 0.00026331023877901316, |
| "epoch": 1.084132532640196, |
| "grad_norm": 0.03233867511153221, |
| "kl": 0.00655364990234375, |
| "learning_rate": 2.7879244819803104e-06, |
| "loss": 0.0048, |
| "step": 894 |
| }, |
| { |
| "clip_ratio": 0.0003059378379361988, |
| "epoch": 1.086044635654746, |
| "grad_norm": 0.032591916620731354, |
| "kl": 0.006562709808349609, |
| "learning_rate": 2.781692176464244e-06, |
| "loss": 0.0048, |
| "step": 895 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.9467296600342, |
| "epoch": 1.0879567386692959, |
| "grad_norm": 0.0399605967104435, |
| "kl": 0.007935047149658203, |
| "learning_rate": 2.7754580973074817e-06, |
| "loss": 0.0078, |
| "num_tokens": 36327265.0, |
| "reward": 0.06640625328873284, |
| "reward_std": 0.07582512497901917, |
| "rewards/pure_accuracy_reward_math": 0.06640625142608769, |
| "step": 896 |
| }, |
| { |
| "clip_ratio": 0.00029080147635340836, |
| "epoch": 1.0898688416838458, |
| "grad_norm": 0.036669787019491196, |
| "kl": 0.007892131805419922, |
| "learning_rate": 2.769222283762148e-06, |
| "loss": 0.0077, |
| "step": 897 |
| }, |
| { |
| "clip_ratio": 0.0003202801690349588, |
| "epoch": 1.0917809446983957, |
| "grad_norm": 0.036093369126319885, |
| "kl": 0.007870197296142578, |
| "learning_rate": 2.7629847750912885e-06, |
| "loss": 0.0077, |
| "step": 898 |
| }, |
| { |
| "clip_ratio": 0.00034906711715620986, |
| "epoch": 1.0936930477129456, |
| "grad_norm": 0.036899976432323456, |
| "kl": 0.007824897766113281, |
| "learning_rate": 2.756745610568622e-06, |
| "loss": 0.0076, |
| "step": 899 |
| }, |
| { |
| "clip_ratio": 0.0003909627172333785, |
| "epoch": 1.0956051507274955, |
| "grad_norm": 0.03607386723160744, |
| "kl": 0.00782632827758789, |
| "learning_rate": 2.7505048294782914e-06, |
| "loss": 0.0076, |
| "step": 900 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.9687776565552, |
| "epoch": 1.0975172537420455, |
| "grad_norm": 0.04138408601284027, |
| "kl": 0.006854534149169922, |
| "learning_rate": 2.7442624711146206e-06, |
| "loss": 0.0105, |
| "num_tokens": 39926261.0, |
| "reward": 0.07561384263681248, |
| "reward_std": 0.08660046180011705, |
| "rewards/pure_accuracy_reward_math": 0.07561384089058265, |
| "step": 901 |
| }, |
| { |
| "clip_ratio": 0.0003407098130878694, |
| "epoch": 1.0994293567565951, |
| "grad_norm": 0.04008745029568672, |
| "kl": 0.006922245025634766, |
| "learning_rate": 2.7380185747818628e-06, |
| "loss": 0.0105, |
| "step": 902 |
| }, |
| { |
| "clip_ratio": 0.0003345158028196238, |
| "epoch": 1.1013414597711453, |
| "grad_norm": 0.039206936955451965, |
| "kl": 0.006981372833251953, |
| "learning_rate": 2.7317731797939566e-06, |
| "loss": 0.0104, |
| "step": 903 |
| }, |
| { |
| "clip_ratio": 0.0003512224284918375, |
| "epoch": 1.103253562785695, |
| "grad_norm": 0.03816502168774605, |
| "kl": 0.006984233856201172, |
| "learning_rate": 2.7255263254742746e-06, |
| "loss": 0.0103, |
| "step": 904 |
| }, |
| { |
| "clip_ratio": 0.00038539456500075175, |
| "epoch": 1.105165665800245, |
| "grad_norm": 0.03802499175071716, |
| "kl": 0.006890773773193359, |
| "learning_rate": 2.71927805115538e-06, |
| "loss": 0.0103, |
| "step": 905 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.6635279655457, |
| "epoch": 1.1070777688147948, |
| "grad_norm": 0.03780652955174446, |
| "kl": 0.005947589874267578, |
| "learning_rate": 2.713028396178776e-06, |
| "loss": 0.0044, |
| "num_tokens": 43530039.0, |
| "reward": 0.0691964318684768, |
| "reward_std": 0.0774129043566063, |
| "rewards/pure_accuracy_reward_math": 0.06919642988941632, |
| "step": 906 |
| }, |
| { |
| "clip_ratio": 0.0002883933650537074, |
| "epoch": 1.1089898718293447, |
| "grad_norm": 0.03706151619553566, |
| "kl": 0.005948543548583984, |
| "learning_rate": 2.706777399894656e-06, |
| "loss": 0.0044, |
| "step": 907 |
| }, |
| { |
| "clip_ratio": 0.0003032470573316459, |
| "epoch": 1.1109019748438946, |
| "grad_norm": 0.03684515878558159, |
| "kl": 0.005936622619628906, |
| "learning_rate": 2.700525101661665e-06, |
| "loss": 0.0044, |
| "step": 908 |
| }, |
| { |
| "clip_ratio": 0.0003385747261290817, |
| "epoch": 1.1128140778584446, |
| "grad_norm": 0.03632361814379692, |
| "kl": 0.005986690521240234, |
| "learning_rate": 2.6942715408466406e-06, |
| "loss": 0.0043, |
| "step": 909 |
| }, |
| { |
| "clip_ratio": 0.00035084231319615355, |
| "epoch": 1.1147261808729945, |
| "grad_norm": 0.0364714041352272, |
| "kl": 0.005983829498291016, |
| "learning_rate": 2.6880167568243716e-06, |
| "loss": 0.0042, |
| "step": 910 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.6629705429077, |
| "epoch": 1.1166382838875444, |
| "grad_norm": 0.037073228508234024, |
| "kl": 0.006183624267578125, |
| "learning_rate": 2.681760788977349e-06, |
| "loss": 0.0075, |
| "num_tokens": 47140667.0, |
| "reward": 0.06166294956346974, |
| "reward_std": 0.07140090485336259, |
| "rewards/pure_accuracy_reward_math": 0.061662947526201606, |
| "step": 911 |
| }, |
| { |
| "clip_ratio": 0.00026335007953548484, |
| "epoch": 1.1185503869020943, |
| "grad_norm": 0.03628791868686676, |
| "kl": 0.006221771240234375, |
| "learning_rate": 2.6755036766955172e-06, |
| "loss": 0.0075, |
| "step": 912 |
| }, |
| { |
| "clip_ratio": 0.00029098790395210017, |
| "epoch": 1.1204624899166442, |
| "grad_norm": 0.03659017011523247, |
| "kl": 0.006258964538574219, |
| "learning_rate": 2.6692454593760255e-06, |
| "loss": 0.0075, |
| "step": 913 |
| }, |
| { |
| "clip_ratio": 0.00033703100632465066, |
| "epoch": 1.1223745929311941, |
| "grad_norm": 0.0357106551527977, |
| "kl": 0.006211757659912109, |
| "learning_rate": 2.6629861764229824e-06, |
| "loss": 0.0074, |
| "step": 914 |
| }, |
| { |
| "clip_ratio": 0.0003104925490902133, |
| "epoch": 1.124286695945744, |
| "grad_norm": 0.03461490571498871, |
| "kl": 0.006183624267578125, |
| "learning_rate": 2.6567258672472064e-06, |
| "loss": 0.0073, |
| "step": 915 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.3962297439575, |
| "epoch": 1.126198798960294, |
| "grad_norm": 0.038919847458601, |
| "kl": 0.0060977935791015625, |
| "learning_rate": 2.650464571265975e-06, |
| "loss": 0.0062, |
| "num_tokens": 50733111.0, |
| "reward": 0.06584821734577417, |
| "reward_std": 0.07367311330744997, |
| "rewards/pure_accuracy_reward_math": 0.06584821583237499, |
| "step": 916 |
| }, |
| { |
| "clip_ratio": 0.0002951280029606096, |
| "epoch": 1.1281109019748439, |
| "grad_norm": 0.038201622664928436, |
| "kl": 0.0060329437255859375, |
| "learning_rate": 2.6442023279027805e-06, |
| "loss": 0.0061, |
| "step": 917 |
| }, |
| { |
| "clip_ratio": 0.00029004437487856194, |
| "epoch": 1.1300230049893938, |
| "grad_norm": 0.03696547448635101, |
| "kl": 0.006039619445800781, |
| "learning_rate": 2.6379391765870828e-06, |
| "loss": 0.0061, |
| "step": 918 |
| }, |
| { |
| "clip_ratio": 0.0003163389113183257, |
| "epoch": 1.1319351080039437, |
| "grad_norm": 0.03571280464529991, |
| "kl": 0.006005764007568359, |
| "learning_rate": 2.6316751567540527e-06, |
| "loss": 0.006, |
| "step": 919 |
| }, |
| { |
| "clip_ratio": 0.0003592208154259424, |
| "epoch": 1.1338472110184936, |
| "grad_norm": 0.03568287193775177, |
| "kl": 0.005993366241455078, |
| "learning_rate": 2.625410307844335e-06, |
| "loss": 0.006, |
| "step": 920 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.2659268379211, |
| "epoch": 1.1357593140330435, |
| "grad_norm": 0.03899242356419563, |
| "kl": 0.005813121795654297, |
| "learning_rate": 2.6191446693037924e-06, |
| "loss": 0.0071, |
| "num_tokens": 54398312.0, |
| "reward": 0.07226562857977115, |
| "reward_std": 0.07861530320951715, |
| "rewards/pure_accuracy_reward_math": 0.07226562648429535, |
| "step": 921 |
| }, |
| { |
| "clip_ratio": 0.00029711308371815903, |
| "epoch": 1.1376714170475934, |
| "grad_norm": 0.038164544850587845, |
| "kl": 0.0058841705322265625, |
| "learning_rate": 2.6128782805832605e-06, |
| "loss": 0.0071, |
| "step": 922 |
| }, |
| { |
| "clip_ratio": 0.0003027216810664868, |
| "epoch": 1.1395835200621434, |
| "grad_norm": 0.03706645965576172, |
| "kl": 0.005882740020751953, |
| "learning_rate": 2.606611181138295e-06, |
| "loss": 0.007, |
| "step": 923 |
| }, |
| { |
| "clip_ratio": 0.00032618250162386175, |
| "epoch": 1.1414956230766933, |
| "grad_norm": 0.036637816578149796, |
| "kl": 0.005909442901611328, |
| "learning_rate": 2.600343410428931e-06, |
| "loss": 0.007, |
| "step": 924 |
| }, |
| { |
| "clip_ratio": 0.00032713054685018506, |
| "epoch": 1.1434077260912432, |
| "grad_norm": 0.036758605390787125, |
| "kl": 0.005947589874267578, |
| "learning_rate": 2.5940750079194275e-06, |
| "loss": 0.0069, |
| "step": 925 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 542.0072803497314, |
| "epoch": 1.145319829105793, |
| "grad_norm": 0.03791532665491104, |
| "kl": 0.0061702728271484375, |
| "learning_rate": 2.5878060130780225e-06, |
| "loss": 0.0074, |
| "num_tokens": 58073722.0, |
| "reward": 0.06835937863797881, |
| "reward_std": 0.07715391897363588, |
| "rewards/pure_accuracy_reward_math": 0.06835937636788003, |
| "step": 926 |
| }, |
| { |
| "clip_ratio": 0.00030884258325158953, |
| "epoch": 1.147231932120343, |
| "grad_norm": 0.03749171644449234, |
| "kl": 0.006160736083984375, |
| "learning_rate": 2.581536465376684e-06, |
| "loss": 0.0074, |
| "step": 927 |
| }, |
| { |
| "clip_ratio": 0.000279198229350186, |
| "epoch": 1.149144035134893, |
| "grad_norm": 0.03681938722729683, |
| "kl": 0.006136417388916016, |
| "learning_rate": 2.575266404290859e-06, |
| "loss": 0.0073, |
| "step": 928 |
| }, |
| { |
| "clip_ratio": 0.0002930849948370451, |
| "epoch": 1.1510561381494429, |
| "grad_norm": 0.035750068724155426, |
| "kl": 0.006227970123291016, |
| "learning_rate": 2.5689958692992284e-06, |
| "loss": 0.0072, |
| "step": 929 |
| }, |
| { |
| "clip_ratio": 0.00028936977611238035, |
| "epoch": 1.1529682411639928, |
| "grad_norm": 0.03503425419330597, |
| "kl": 0.006281375885009766, |
| "learning_rate": 2.562724899883458e-06, |
| "loss": 0.0072, |
| "step": 930 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.6188879013062, |
| "epoch": 1.1548803441785427, |
| "grad_norm": 0.05187267065048218, |
| "kl": 0.007277965545654297, |
| "learning_rate": 2.5564535355279464e-06, |
| "loss": 0.0072, |
| "num_tokens": 61714268.0, |
| "reward": 0.07505580713041127, |
| "reward_std": 0.08531173289520666, |
| "rewards/pure_accuracy_reward_math": 0.07505580491852015, |
| "step": 931 |
| }, |
| { |
| "clip_ratio": 0.00033635866333270314, |
| "epoch": 1.1567924471930926, |
| "grad_norm": 0.039655230939388275, |
| "kl": 0.0072231292724609375, |
| "learning_rate": 2.550181815719581e-06, |
| "loss": 0.0072, |
| "step": 932 |
| }, |
| { |
| "clip_ratio": 0.00035109808851530033, |
| "epoch": 1.1587045502076425, |
| "grad_norm": 0.038757406175136566, |
| "kl": 0.007157802581787109, |
| "learning_rate": 2.5439097799474867e-06, |
| "loss": 0.0072, |
| "step": 933 |
| }, |
| { |
| "clip_ratio": 0.00037538493586453114, |
| "epoch": 1.1606166532221924, |
| "grad_norm": 0.03841486573219299, |
| "kl": 0.007115840911865234, |
| "learning_rate": 2.537637467702777e-06, |
| "loss": 0.0071, |
| "step": 934 |
| }, |
| { |
| "clip_ratio": 0.0003936579208243529, |
| "epoch": 1.1625287562367423, |
| "grad_norm": 0.038453541696071625, |
| "kl": 0.0070896148681640625, |
| "learning_rate": 2.531364918478308e-06, |
| "loss": 0.007, |
| "step": 935 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 547.6250252723694, |
| "epoch": 1.1644408592512923, |
| "grad_norm": 0.03738933801651001, |
| "kl": 0.00615692138671875, |
| "learning_rate": 2.5250921717684247e-06, |
| "loss": 0.0061, |
| "num_tokens": 65415044.0, |
| "reward": 0.07561384260770865, |
| "reward_std": 0.07745296956272796, |
| "rewards/pure_accuracy_reward_math": 0.07561384062864818, |
| "step": 936 |
| }, |
| { |
| "clip_ratio": 0.0002929231292227996, |
| "epoch": 1.166352962265842, |
| "grad_norm": 0.03690778836607933, |
| "kl": 0.006189823150634766, |
| "learning_rate": 2.5188192670687186e-06, |
| "loss": 0.0061, |
| "step": 937 |
| }, |
| { |
| "clip_ratio": 0.000294325235870474, |
| "epoch": 1.168265065280392, |
| "grad_norm": 0.03613179549574852, |
| "kl": 0.006130695343017578, |
| "learning_rate": 2.512546243875776e-06, |
| "loss": 0.0061, |
| "step": 938 |
| }, |
| { |
| "clip_ratio": 0.00031920797795237377, |
| "epoch": 1.1701771682949418, |
| "grad_norm": 0.03461304306983948, |
| "kl": 0.006014347076416016, |
| "learning_rate": 2.5062731416869267e-06, |
| "loss": 0.006, |
| "step": 939 |
| }, |
| { |
| "clip_ratio": 0.00037188214912475814, |
| "epoch": 1.172089271309492, |
| "grad_norm": 0.03454398363828659, |
| "kl": 0.005980968475341797, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0059, |
| "step": 940 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.1423244476318, |
| "epoch": 1.1740013743240416, |
| "grad_norm": 0.03934042155742645, |
| "kl": 0.006266117095947266, |
| "learning_rate": 2.493726858313074e-06, |
| "loss": 0.0078, |
| "num_tokens": 69057654.0, |
| "reward": 0.07477678928989917, |
| "reward_std": 0.08299326134147123, |
| "rewards/pure_accuracy_reward_math": 0.07477678690338507, |
| "step": 941 |
| }, |
| { |
| "clip_ratio": 0.00031629414758072016, |
| "epoch": 1.1759134773385915, |
| "grad_norm": 0.03872406855225563, |
| "kl": 0.0062713623046875, |
| "learning_rate": 2.4874537561242253e-06, |
| "loss": 0.0078, |
| "step": 942 |
| }, |
| { |
| "clip_ratio": 0.0003434862284166229, |
| "epoch": 1.1778255803531414, |
| "grad_norm": 0.03723340108990669, |
| "kl": 0.00623321533203125, |
| "learning_rate": 2.481180732931282e-06, |
| "loss": 0.0077, |
| "step": 943 |
| }, |
| { |
| "clip_ratio": 0.00034986940886483353, |
| "epoch": 1.1797376833676914, |
| "grad_norm": 0.03732794523239136, |
| "kl": 0.006276607513427734, |
| "learning_rate": 2.4749078282315757e-06, |
| "loss": 0.0076, |
| "step": 944 |
| }, |
| { |
| "clip_ratio": 0.0003579597876637308, |
| "epoch": 1.1816497863822413, |
| "grad_norm": 0.03668594732880592, |
| "kl": 0.006198883056640625, |
| "learning_rate": 2.468635081521693e-06, |
| "loss": 0.0076, |
| "step": 945 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.1718993186951, |
| "epoch": 1.1835618893967912, |
| "grad_norm": 0.03715552017092705, |
| "kl": 0.006759166717529297, |
| "learning_rate": 2.462362532297224e-06, |
| "loss": 0.0079, |
| "num_tokens": 72682654.0, |
| "reward": 0.06891741449362598, |
| "reward_std": 0.08248148870188743, |
| "rewards/pure_accuracy_reward_math": 0.06891741199069656, |
| "step": 946 |
| }, |
| { |
| "clip_ratio": 0.0003075862115053951, |
| "epoch": 1.185473992411341, |
| "grad_norm": 0.03616279736161232, |
| "kl": 0.006741523742675781, |
| "learning_rate": 2.456090220052514e-06, |
| "loss": 0.0079, |
| "step": 947 |
| }, |
| { |
| "clip_ratio": 0.00027696539024191225, |
| "epoch": 1.187386095425891, |
| "grad_norm": 0.03556762635707855, |
| "kl": 0.006789684295654297, |
| "learning_rate": 2.44981818428042e-06, |
| "loss": 0.0079, |
| "step": 948 |
| }, |
| { |
| "clip_ratio": 0.0002739789470638243, |
| "epoch": 1.189298198440441, |
| "grad_norm": 0.03486724570393562, |
| "kl": 0.006869316101074219, |
| "learning_rate": 2.4435464644720544e-06, |
| "loss": 0.0078, |
| "step": 949 |
| }, |
| { |
| "clip_ratio": 0.00031816330425726846, |
| "epoch": 1.1912103014549908, |
| "grad_norm": 0.03446395695209503, |
| "kl": 0.006869316101074219, |
| "learning_rate": 2.4372751001165427e-06, |
| "loss": 0.0077, |
| "step": 950 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.6573901176453, |
| "epoch": 1.1931224044695408, |
| "grad_norm": 0.03734345734119415, |
| "kl": 0.006131649017333984, |
| "learning_rate": 2.4310041307007716e-06, |
| "loss": 0.0062, |
| "num_tokens": 76305578.0, |
| "reward": 0.07114955657743849, |
| "reward_std": 0.07526708883233368, |
| "rewards/pure_accuracy_reward_math": 0.07114955488941632, |
| "step": 951 |
| }, |
| { |
| "clip_ratio": 0.00029005661951941875, |
| "epoch": 1.1950345074840907, |
| "grad_norm": 0.036443449556827545, |
| "kl": 0.006079196929931641, |
| "learning_rate": 2.4247335957091418e-06, |
| "loss": 0.0062, |
| "step": 952 |
| }, |
| { |
| "clip_ratio": 0.0002579906781647878, |
| "epoch": 1.1969466104986406, |
| "grad_norm": 0.034940823912620544, |
| "kl": 0.006037235260009766, |
| "learning_rate": 2.4184635346233166e-06, |
| "loss": 0.0061, |
| "step": 953 |
| }, |
| { |
| "clip_ratio": 0.00032199256943954424, |
| "epoch": 1.1988587135131905, |
| "grad_norm": 0.03445851802825928, |
| "kl": 0.006024360656738281, |
| "learning_rate": 2.4121939869219784e-06, |
| "loss": 0.0061, |
| "step": 954 |
| }, |
| { |
| "clip_ratio": 0.0003193520489048751, |
| "epoch": 1.2007708165277404, |
| "grad_norm": 0.03448885306715965, |
| "kl": 0.005992889404296875, |
| "learning_rate": 2.405924992080573e-06, |
| "loss": 0.006, |
| "step": 955 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.4358487129211, |
| "epoch": 1.2026829195422903, |
| "grad_norm": 0.11665105819702148, |
| "kl": 0.008374214172363281, |
| "learning_rate": 2.3996565895710692e-06, |
| "loss": 0.0065, |
| "num_tokens": 79904712.0, |
| "reward": 0.07366071760770865, |
| "reward_std": 0.08458104060264304, |
| "rewards/pure_accuracy_reward_math": 0.07366071591968648, |
| "step": 956 |
| }, |
| { |
| "clip_ratio": 0.00031160829769305565, |
| "epoch": 1.2045950225568403, |
| "grad_norm": 0.04096413403749466, |
| "kl": 0.006944179534912109, |
| "learning_rate": 2.3933888188617054e-06, |
| "loss": 0.0064, |
| "step": 957 |
| }, |
| { |
| "clip_ratio": 0.00032232171946589006, |
| "epoch": 1.2065071255713902, |
| "grad_norm": 0.04049144312739372, |
| "kl": 0.006976127624511719, |
| "learning_rate": 2.3871217194167407e-06, |
| "loss": 0.0063, |
| "step": 958 |
| }, |
| { |
| "clip_ratio": 0.0003416440970340773, |
| "epoch": 1.20841922858594, |
| "grad_norm": 0.039766065776348114, |
| "kl": 0.007042884826660156, |
| "learning_rate": 2.380855330696208e-06, |
| "loss": 0.0063, |
| "step": 959 |
| }, |
| { |
| "clip_ratio": 0.0003523347779150754, |
| "epoch": 1.21033133160049, |
| "grad_norm": 0.03884311020374298, |
| "kl": 0.007153987884521484, |
| "learning_rate": 2.3745896921556656e-06, |
| "loss": 0.0062, |
| "step": 960 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.392322063446, |
| "epoch": 1.21224343461504, |
| "grad_norm": 0.04043371230363846, |
| "kl": 0.008221149444580078, |
| "learning_rate": 2.368324843245948e-06, |
| "loss": 0.0086, |
| "num_tokens": 83540930.0, |
| "reward": 0.07952009316068143, |
| "reward_std": 0.08836089639225975, |
| "rewards/pure_accuracy_reward_math": 0.0795200911234133, |
| "step": 961 |
| }, |
| { |
| "clip_ratio": 0.0003234188988017195, |
| "epoch": 1.2141555376295898, |
| "grad_norm": 0.039239391684532166, |
| "kl": 0.008275985717773438, |
| "learning_rate": 2.362060823412919e-06, |
| "loss": 0.0086, |
| "step": 962 |
| }, |
| { |
| "clip_ratio": 0.00033211900500873526, |
| "epoch": 1.2160676406441397, |
| "grad_norm": 0.03923904523253441, |
| "kl": 0.008409500122070312, |
| "learning_rate": 2.355797672097219e-06, |
| "loss": 0.0086, |
| "step": 963 |
| }, |
| { |
| "clip_ratio": 0.00036667373893806143, |
| "epoch": 1.2179797436586897, |
| "grad_norm": 0.038865529000759125, |
| "kl": 0.008434295654296875, |
| "learning_rate": 2.349535428734026e-06, |
| "loss": 0.0085, |
| "step": 964 |
| }, |
| { |
| "clip_ratio": 0.0003816600048480723, |
| "epoch": 1.2198918466732396, |
| "grad_norm": 0.037728771567344666, |
| "kl": 0.00834512710571289, |
| "learning_rate": 2.343274132752795e-06, |
| "loss": 0.0084, |
| "step": 965 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.4799346923828, |
| "epoch": 1.2218039496877895, |
| "grad_norm": 0.03813539817929268, |
| "kl": 0.005985260009765625, |
| "learning_rate": 2.3370138235770184e-06, |
| "loss": 0.0088, |
| "num_tokens": 87187574.0, |
| "reward": 0.060267860419116914, |
| "reward_std": 0.07384576939512044, |
| "rewards/pure_accuracy_reward_math": 0.060267858498264104, |
| "step": 966 |
| }, |
| { |
| "clip_ratio": 0.0002719826344446119, |
| "epoch": 1.2237160527023394, |
| "grad_norm": 0.03676025941967964, |
| "kl": 0.006021976470947266, |
| "learning_rate": 2.330754540623975e-06, |
| "loss": 0.0088, |
| "step": 967 |
| }, |
| { |
| "clip_ratio": 0.0002730399019696961, |
| "epoch": 1.2256281557168893, |
| "grad_norm": 0.03579593822360039, |
| "kl": 0.006060123443603516, |
| "learning_rate": 2.324496323304484e-06, |
| "loss": 0.0088, |
| "step": 968 |
| }, |
| { |
| "clip_ratio": 0.0002800920712502375, |
| "epoch": 1.2275402587314392, |
| "grad_norm": 0.0353357158601284, |
| "kl": 0.0061092376708984375, |
| "learning_rate": 2.318239211022651e-06, |
| "loss": 0.0087, |
| "step": 969 |
| }, |
| { |
| "clip_ratio": 0.0003294056899108, |
| "epoch": 1.2294523617459892, |
| "grad_norm": 0.03521355986595154, |
| "kl": 0.006182193756103516, |
| "learning_rate": 2.3119832431756284e-06, |
| "loss": 0.0086, |
| "step": 970 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.8870182037354, |
| "epoch": 1.231364464760539, |
| "grad_norm": 0.03882085531949997, |
| "kl": 0.006420135498046875, |
| "learning_rate": 2.3057284591533598e-06, |
| "loss": 0.0093, |
| "num_tokens": 90758753.0, |
| "reward": 0.07505580718861893, |
| "reward_std": 0.07715391827514395, |
| "rewards/pure_accuracy_reward_math": 0.0750558051513508, |
| "step": 971 |
| }, |
| { |
| "clip_ratio": 0.0003045887907546785, |
| "epoch": 1.2332765677750888, |
| "grad_norm": 0.03775356709957123, |
| "kl": 0.006350040435791016, |
| "learning_rate": 2.299474898338336e-06, |
| "loss": 0.0093, |
| "step": 972 |
| }, |
| { |
| "clip_ratio": 0.0003195773986703898, |
| "epoch": 1.235188670789639, |
| "grad_norm": 0.03639310225844383, |
| "kl": 0.006343841552734375, |
| "learning_rate": 2.2932226001053444e-06, |
| "loss": 0.0092, |
| "step": 973 |
| }, |
| { |
| "clip_ratio": 0.0003582680616318612, |
| "epoch": 1.2371007738041886, |
| "grad_norm": 0.036272380501031876, |
| "kl": 0.006300926208496094, |
| "learning_rate": 2.286971603821226e-06, |
| "loss": 0.0092, |
| "step": 974 |
| }, |
| { |
| "clip_ratio": 0.0003946863821511215, |
| "epoch": 1.2390128768187387, |
| "grad_norm": 0.03584066033363342, |
| "kl": 0.006391048431396484, |
| "learning_rate": 2.280721948844621e-06, |
| "loss": 0.0091, |
| "step": 975 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.3044323921204, |
| "epoch": 1.2409249798332884, |
| "grad_norm": 0.038236722350120544, |
| "kl": 0.006694316864013672, |
| "learning_rate": 2.274473674525726e-06, |
| "loss": 0.0094, |
| "num_tokens": 94365488.0, |
| "reward": 0.06556919953436591, |
| "reward_std": 0.07405849196948111, |
| "rewards/pure_accuracy_reward_math": 0.06556919802096672, |
| "step": 976 |
| }, |
| { |
| "clip_ratio": 0.00029697347130763774, |
| "epoch": 1.2428370828478383, |
| "grad_norm": 0.0369977168738842, |
| "kl": 0.006660938262939453, |
| "learning_rate": 2.268226820206044e-06, |
| "loss": 0.0094, |
| "step": 977 |
| }, |
| { |
| "clip_ratio": 0.000319464833580696, |
| "epoch": 1.2447491858623883, |
| "grad_norm": 0.03550850227475166, |
| "kl": 0.006519794464111328, |
| "learning_rate": 2.261981425218138e-06, |
| "loss": 0.0094, |
| "step": 978 |
| }, |
| { |
| "clip_ratio": 0.0003469139706453461, |
| "epoch": 1.2466612888769382, |
| "grad_norm": 0.03525082767009735, |
| "kl": 0.006406307220458984, |
| "learning_rate": 2.2557375288853803e-06, |
| "loss": 0.0093, |
| "step": 979 |
| }, |
| { |
| "clip_ratio": 0.0003654695393606744, |
| "epoch": 1.248573391891488, |
| "grad_norm": 0.0355265848338604, |
| "kl": 0.006331443786621094, |
| "learning_rate": 2.2494951705217095e-06, |
| "loss": 0.0092, |
| "step": 980 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.76704454422, |
| "epoch": 1.250485494906038, |
| "grad_norm": 0.03745350241661072, |
| "kl": 0.0065135955810546875, |
| "learning_rate": 2.2432543894313797e-06, |
| "loss": 0.0042, |
| "num_tokens": 97952525.0, |
| "reward": 0.06501116385334171, |
| "reward_std": 0.07316133996937424, |
| "rewards/pure_accuracy_reward_math": 0.06501116222352721, |
| "step": 981 |
| }, |
| { |
| "clip_ratio": 0.00029299165072416145, |
| "epoch": 1.252397597920588, |
| "grad_norm": 0.03690091893076897, |
| "kl": 0.006426095962524414, |
| "learning_rate": 2.2370152249087114e-06, |
| "loss": 0.0042, |
| "step": 982 |
| }, |
| { |
| "clip_ratio": 0.0003187885846500649, |
| "epoch": 1.2543097009351378, |
| "grad_norm": 0.03645962476730347, |
| "kl": 0.006396055221557617, |
| "learning_rate": 2.2307777162378523e-06, |
| "loss": 0.0042, |
| "step": 983 |
| }, |
| { |
| "clip_ratio": 0.00033352292155086616, |
| "epoch": 1.2562218039496877, |
| "grad_norm": 0.03598187491297722, |
| "kl": 0.006333351135253906, |
| "learning_rate": 2.2245419026925187e-06, |
| "loss": 0.0041, |
| "step": 984 |
| }, |
| { |
| "clip_ratio": 0.0003533332319989313, |
| "epoch": 1.2581339069642377, |
| "grad_norm": 0.03577181696891785, |
| "kl": 0.006278276443481445, |
| "learning_rate": 2.218307823535757e-06, |
| "loss": 0.004, |
| "step": 985 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 522.8172650337219, |
| "epoch": 1.2600460099787876, |
| "grad_norm": 0.03590444475412369, |
| "kl": 0.005995273590087891, |
| "learning_rate": 2.2120755180196904e-06, |
| "loss": 0.0045, |
| "num_tokens": 101560026.0, |
| "reward": 0.06054687811410986, |
| "reward_std": 0.06865079078124836, |
| "rewards/pure_accuracy_reward_math": 0.06054687619325705, |
| "step": 986 |
| }, |
| { |
| "clip_ratio": 0.00024842098838462334, |
| "epoch": 1.2619581129933375, |
| "grad_norm": 0.03513624891638756, |
| "kl": 0.0059719085693359375, |
| "learning_rate": 2.2058450253852783e-06, |
| "loss": 0.0045, |
| "step": 987 |
| }, |
| { |
| "clip_ratio": 0.000271169978702801, |
| "epoch": 1.2638702160078874, |
| "grad_norm": 0.03392768278717995, |
| "kl": 0.005938529968261719, |
| "learning_rate": 2.1996163848620612e-06, |
| "loss": 0.0044, |
| "step": 988 |
| }, |
| { |
| "clip_ratio": 0.0002971922116898895, |
| "epoch": 1.2657823190224373, |
| "grad_norm": 0.03286145627498627, |
| "kl": 0.0060443878173828125, |
| "learning_rate": 2.1933896356679226e-06, |
| "loss": 0.0044, |
| "step": 989 |
| }, |
| { |
| "clip_ratio": 0.0003229031350429068, |
| "epoch": 1.2676944220369872, |
| "grad_norm": 0.032496001571416855, |
| "kl": 0.006091594696044922, |
| "learning_rate": 2.1871648170088347e-06, |
| "loss": 0.0043, |
| "step": 990 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.8125224113464, |
| "epoch": 1.2696065250515371, |
| "grad_norm": 0.21526122093200684, |
| "kl": 0.007075309753417969, |
| "learning_rate": 2.1809419680786143e-06, |
| "loss": 0.0072, |
| "num_tokens": 105223050.0, |
| "reward": 0.07421875381260179, |
| "reward_std": 0.08054219774203375, |
| "rewards/pure_accuracy_reward_math": 0.07421875130967237, |
| "step": 991 |
| }, |
| { |
| "clip_ratio": 0.00032863151136552915, |
| "epoch": 1.271518628066087, |
| "grad_norm": 0.03788222745060921, |
| "kl": 0.006428241729736328, |
| "learning_rate": 2.1747211280586758e-06, |
| "loss": 0.0072, |
| "step": 992 |
| }, |
| { |
| "clip_ratio": 0.00034688404628013814, |
| "epoch": 1.273430731080637, |
| "grad_norm": 0.03719337284564972, |
| "kl": 0.0064296722412109375, |
| "learning_rate": 2.168502336117787e-06, |
| "loss": 0.0071, |
| "step": 993 |
| }, |
| { |
| "clip_ratio": 0.00034599834629034376, |
| "epoch": 1.275342834095187, |
| "grad_norm": 0.036535993218421936, |
| "kl": 0.006348133087158203, |
| "learning_rate": 2.1622856314118178e-06, |
| "loss": 0.0071, |
| "step": 994 |
| }, |
| { |
| "clip_ratio": 0.00036459101005448247, |
| "epoch": 1.2772549371097368, |
| "grad_norm": 0.03548647463321686, |
| "kl": 0.006353855133056641, |
| "learning_rate": 2.156071053083496e-06, |
| "loss": 0.007, |
| "step": 995 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.536018371582, |
| "epoch": 1.2791670401242867, |
| "grad_norm": 0.03945273160934448, |
| "kl": 0.006157398223876953, |
| "learning_rate": 2.1498586402621646e-06, |
| "loss": 0.0062, |
| "num_tokens": 108847859.0, |
| "reward": 0.07366071807336994, |
| "reward_std": 0.072430647269357, |
| "rewards/pure_accuracy_reward_math": 0.07366071533760987, |
| "step": 996 |
| }, |
| { |
| "clip_ratio": 0.0002439655858097467, |
| "epoch": 1.2810791431388366, |
| "grad_norm": 0.03839760273694992, |
| "kl": 0.006161689758300781, |
| "learning_rate": 2.1436484320635275e-06, |
| "loss": 0.0061, |
| "step": 997 |
| }, |
| { |
| "clip_ratio": 0.0002514519866281262, |
| "epoch": 1.2829912461533866, |
| "grad_norm": 0.03733210638165474, |
| "kl": 0.0061798095703125, |
| "learning_rate": 2.1374404675894083e-06, |
| "loss": 0.0061, |
| "step": 998 |
| }, |
| { |
| "clip_ratio": 0.0002774860670342605, |
| "epoch": 1.2849033491679365, |
| "grad_norm": 0.03640332072973251, |
| "kl": 0.006183147430419922, |
| "learning_rate": 2.131234785927505e-06, |
| "loss": 0.006, |
| "step": 999 |
| }, |
| { |
| "clip_ratio": 0.0002877332713069336, |
| "epoch": 1.2868154521824864, |
| "grad_norm": 0.03559413552284241, |
| "kl": 0.006213665008544922, |
| "learning_rate": 2.1250314261511414e-06, |
| "loss": 0.0059, |
| "step": 1000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.9492444992065, |
| "epoch": 1.2887275551970363, |
| "grad_norm": 0.04216492921113968, |
| "kl": 0.0073282718658447266, |
| "learning_rate": 2.1188304273190196e-06, |
| "loss": 0.0102, |
| "num_tokens": 112482213.0, |
| "reward": 0.0772879500000272, |
| "reward_std": 0.07908701087580994, |
| "rewards/pure_accuracy_reward_math": 0.07728794772992842, |
| "step": 1001 |
| }, |
| { |
| "clip_ratio": 0.0003075964003755871, |
| "epoch": 1.2906396582115862, |
| "grad_norm": 0.039000045508146286, |
| "kl": 0.007200002670288086, |
| "learning_rate": 2.1126318284749807e-06, |
| "loss": 0.0102, |
| "step": 1002 |
| }, |
| { |
| "clip_ratio": 0.0003138856436635251, |
| "epoch": 1.2925517612261361, |
| "grad_norm": 0.036585696041584015, |
| "kl": 0.00716710090637207, |
| "learning_rate": 2.106435668647751e-06, |
| "loss": 0.0101, |
| "step": 1003 |
| }, |
| { |
| "clip_ratio": 0.00033263966838603665, |
| "epoch": 1.294463864240686, |
| "grad_norm": 0.03634057566523552, |
| "kl": 0.007274150848388672, |
| "learning_rate": 2.1002419868507005e-06, |
| "loss": 0.01, |
| "step": 1004 |
| }, |
| { |
| "clip_ratio": 0.00035104663936635916, |
| "epoch": 1.2963759672552357, |
| "grad_norm": 0.03524275869131088, |
| "kl": 0.0072422027587890625, |
| "learning_rate": 2.0940508220815978e-06, |
| "loss": 0.01, |
| "step": 1005 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.5226221084595, |
| "epoch": 1.2982880702697859, |
| "grad_norm": 0.04047563299536705, |
| "kl": 0.006965160369873047, |
| "learning_rate": 2.087862213322362e-06, |
| "loss": 0.0078, |
| "num_tokens": 116078946.0, |
| "reward": 0.06752232470898889, |
| "reward_std": 0.08269421081058681, |
| "rewards/pure_accuracy_reward_math": 0.0675223229045514, |
| "step": 1006 |
| }, |
| { |
| "clip_ratio": 0.00033451643105308904, |
| "epoch": 1.3002001732843356, |
| "grad_norm": 0.03818976879119873, |
| "kl": 0.0069293975830078125, |
| "learning_rate": 2.0816761995388198e-06, |
| "loss": 0.0078, |
| "step": 1007 |
| }, |
| { |
| "clip_ratio": 0.0003828123747666723, |
| "epoch": 1.3021122762988857, |
| "grad_norm": 0.03969357907772064, |
| "kl": 0.006967067718505859, |
| "learning_rate": 2.075492819680457e-06, |
| "loss": 0.0078, |
| "step": 1008 |
| }, |
| { |
| "clip_ratio": 0.0003832018163620887, |
| "epoch": 1.3040243793134354, |
| "grad_norm": 0.040100231766700745, |
| "kl": 0.007086753845214844, |
| "learning_rate": 2.0693121126801778e-06, |
| "loss": 0.0077, |
| "step": 1009 |
| }, |
| { |
| "clip_ratio": 0.0003569153510625256, |
| "epoch": 1.3059364823279855, |
| "grad_norm": 0.037368252873420715, |
| "kl": 0.007195472717285156, |
| "learning_rate": 2.063134117454055e-06, |
| "loss": 0.0076, |
| "step": 1010 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.7126340866089, |
| "epoch": 1.3078485853425352, |
| "grad_norm": 0.0401712991297245, |
| "kl": 0.00678253173828125, |
| "learning_rate": 2.0569588729010896e-06, |
| "loss": 0.0063, |
| "num_tokens": 119662772.0, |
| "reward": 0.0705915214784909, |
| "reward_std": 0.08484002540353686, |
| "rewards/pure_accuracy_reward_math": 0.0705915190919768, |
| "step": 1011 |
| }, |
| { |
| "clip_ratio": 0.0003401347770477514, |
| "epoch": 1.3097606883570854, |
| "grad_norm": 0.03972383588552475, |
| "kl": 0.006781578063964844, |
| "learning_rate": 2.0507864179029592e-06, |
| "loss": 0.0062, |
| "step": 1012 |
| }, |
| { |
| "clip_ratio": 0.00040657852025560715, |
| "epoch": 1.311672791371635, |
| "grad_norm": 0.04063359647989273, |
| "kl": 0.006711006164550781, |
| "learning_rate": 2.044616791323781e-06, |
| "loss": 0.0062, |
| "step": 1013 |
| }, |
| { |
| "clip_ratio": 0.0004189488300880839, |
| "epoch": 1.313584894386185, |
| "grad_norm": 0.03818094730377197, |
| "kl": 0.006552696228027344, |
| "learning_rate": 2.0384500320098604e-06, |
| "loss": 0.0061, |
| "step": 1014 |
| }, |
| { |
| "clip_ratio": 0.000448550158978378, |
| "epoch": 1.315496997400735, |
| "grad_norm": 0.03749743476510048, |
| "kl": 0.0064678192138671875, |
| "learning_rate": 2.032286178789454e-06, |
| "loss": 0.006, |
| "step": 1015 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.0069990158081, |
| "epoch": 1.3174091004152848, |
| "grad_norm": 0.03775123134255409, |
| "kl": 0.006552696228027344, |
| "learning_rate": 2.0261252704725143e-06, |
| "loss": 0.0047, |
| "num_tokens": 123299241.0, |
| "reward": 0.06919643163564615, |
| "reward_std": 0.0781373989302665, |
| "rewards/pure_accuracy_reward_math": 0.06919642994762398, |
| "step": 1016 |
| }, |
| { |
| "clip_ratio": 0.0003128642913452495, |
| "epoch": 1.3193212034298347, |
| "grad_norm": 0.03666616231203079, |
| "kl": 0.006560325622558594, |
| "learning_rate": 2.0199673458504577e-06, |
| "loss": 0.0047, |
| "step": 1017 |
| }, |
| { |
| "clip_ratio": 0.00030665075905744743, |
| "epoch": 1.3212333064443846, |
| "grad_norm": 0.035805702209472656, |
| "kl": 0.006537437438964844, |
| "learning_rate": 2.01381244369591e-06, |
| "loss": 0.0046, |
| "step": 1018 |
| }, |
| { |
| "clip_ratio": 0.0003063842187316368, |
| "epoch": 1.3231454094589346, |
| "grad_norm": 0.03492369130253792, |
| "kl": 0.006512641906738281, |
| "learning_rate": 2.0076606027624676e-06, |
| "loss": 0.0046, |
| "step": 1019 |
| }, |
| { |
| "clip_ratio": 0.00033027163379983904, |
| "epoch": 1.3250575124734845, |
| "grad_norm": 0.03507117182016373, |
| "kl": 0.006590366363525391, |
| "learning_rate": 2.0015118617844516e-06, |
| "loss": 0.0045, |
| "step": 1020 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.10493516922, |
| "epoch": 1.3269696154880344, |
| "grad_norm": 0.04077515751123428, |
| "kl": 0.006287097930908203, |
| "learning_rate": 1.9953662594766675e-06, |
| "loss": 0.007, |
| "num_tokens": 126958737.0, |
| "reward": 0.0756138427532278, |
| "reward_std": 0.08067478984594345, |
| "rewards/pure_accuracy_reward_math": 0.07561384083237499, |
| "step": 1021 |
| }, |
| { |
| "clip_ratio": 0.0003038725464534764, |
| "epoch": 1.3288817185025843, |
| "grad_norm": 0.03825462609529495, |
| "kl": 0.0063266754150390625, |
| "learning_rate": 1.9892238345341544e-06, |
| "loss": 0.007, |
| "step": 1022 |
| }, |
| { |
| "clip_ratio": 0.0003366774006963169, |
| "epoch": 1.3307938215171342, |
| "grad_norm": 0.03734288364648819, |
| "kl": 0.006364345550537109, |
| "learning_rate": 1.983084625631949e-06, |
| "loss": 0.0069, |
| "step": 1023 |
| }, |
| { |
| "clip_ratio": 0.0003749641306853846, |
| "epoch": 1.3327059245316841, |
| "grad_norm": 0.03799683600664139, |
| "kl": 0.006411075592041016, |
| "learning_rate": 1.9769486714248367e-06, |
| "loss": 0.0068, |
| "step": 1024 |
| }, |
| { |
| "clip_ratio": 0.0003729545476289786, |
| "epoch": 1.334618027546234, |
| "grad_norm": 0.03601997718214989, |
| "kl": 0.006434917449951172, |
| "learning_rate": 1.9708160105471105e-06, |
| "loss": 0.0068, |
| "step": 1025 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.7709493637085, |
| "epoch": 1.336530130560784, |
| "grad_norm": 0.04102141782641411, |
| "kl": 0.006857395172119141, |
| "learning_rate": 1.964686681612327e-06, |
| "loss": 0.0055, |
| "num_tokens": 130592668.0, |
| "reward": 0.06556919959257357, |
| "reward_std": 0.06470447563333437, |
| "rewards/pure_accuracy_reward_math": 0.0655691981955897, |
| "step": 1026 |
| }, |
| { |
| "clip_ratio": 0.00021823535962539609, |
| "epoch": 1.3384422335753339, |
| "grad_norm": 0.03428492322564125, |
| "kl": 0.006598472595214844, |
| "learning_rate": 1.9585607232130636e-06, |
| "loss": 0.0054, |
| "step": 1027 |
| }, |
| { |
| "clip_ratio": 0.00024637427833340553, |
| "epoch": 1.3403543365898838, |
| "grad_norm": 0.032555270940065384, |
| "kl": 0.006415843963623047, |
| "learning_rate": 1.952438173920677e-06, |
| "loss": 0.0054, |
| "step": 1028 |
| }, |
| { |
| "clip_ratio": 0.0002563797440870985, |
| "epoch": 1.3422664396044337, |
| "grad_norm": 0.03202388435602188, |
| "kl": 0.006371498107910156, |
| "learning_rate": 1.946319072285058e-06, |
| "loss": 0.0053, |
| "step": 1029 |
| }, |
| { |
| "clip_ratio": 0.0002687414232696028, |
| "epoch": 1.3441785426189836, |
| "grad_norm": 0.03169838339090347, |
| "kl": 0.006340980529785156, |
| "learning_rate": 1.9402034568343888e-06, |
| "loss": 0.0053, |
| "step": 1030 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 549.2184953689575, |
| "epoch": 1.3460906456335335, |
| "grad_norm": 0.054084766656160355, |
| "kl": 0.006264686584472656, |
| "learning_rate": 1.9340913660749015e-06, |
| "loss": 0.0071, |
| "num_tokens": 134289567.0, |
| "reward": 0.06668527112924494, |
| "reward_std": 0.07140090392204002, |
| "rewards/pure_accuracy_reward_math": 0.06668526903376915, |
| "step": 1031 |
| }, |
| { |
| "clip_ratio": 0.00022883353369707038, |
| "epoch": 1.3480027486480834, |
| "grad_norm": 0.03612653911113739, |
| "kl": 0.006344318389892578, |
| "learning_rate": 1.9279828384906373e-06, |
| "loss": 0.0071, |
| "step": 1032 |
| }, |
| { |
| "clip_ratio": 0.0002760976024376305, |
| "epoch": 1.3499148516626334, |
| "grad_norm": 0.036703869700431824, |
| "kl": 0.006397724151611328, |
| "learning_rate": 1.921877912543198e-06, |
| "loss": 0.0071, |
| "step": 1033 |
| }, |
| { |
| "clip_ratio": 0.00027991523592163503, |
| "epoch": 1.3518269546771833, |
| "grad_norm": 0.036445919424295425, |
| "kl": 0.006428718566894531, |
| "learning_rate": 1.9157766266715142e-06, |
| "loss": 0.007, |
| "step": 1034 |
| }, |
| { |
| "clip_ratio": 0.0003110420944381076, |
| "epoch": 1.3537390576917332, |
| "grad_norm": 0.032879918813705444, |
| "kl": 0.006253242492675781, |
| "learning_rate": 1.909679019291592e-06, |
| "loss": 0.0069, |
| "step": 1035 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.200918674469, |
| "epoch": 1.355651160706283, |
| "grad_norm": 0.0374806709587574, |
| "kl": 0.006623744964599609, |
| "learning_rate": 1.9035851287962797e-06, |
| "loss": 0.0088, |
| "num_tokens": 137901395.0, |
| "reward": 0.07170759295695461, |
| "reward_std": 0.0834249026956968, |
| "rewards/pure_accuracy_reward_math": 0.0717075907450635, |
| "step": 1036 |
| }, |
| { |
| "clip_ratio": 0.0002719677876825699, |
| "epoch": 1.357563263720833, |
| "grad_norm": 0.03692527487874031, |
| "kl": 0.006625652313232422, |
| "learning_rate": 1.8974949935550202e-06, |
| "loss": 0.0088, |
| "step": 1037 |
| }, |
| { |
| "clip_ratio": 0.0003176050505544481, |
| "epoch": 1.359475366735383, |
| "grad_norm": 0.03605135530233383, |
| "kl": 0.006484031677246094, |
| "learning_rate": 1.8914086519136133e-06, |
| "loss": 0.0088, |
| "step": 1038 |
| }, |
| { |
| "clip_ratio": 0.0003420261080577802, |
| "epoch": 1.3613874697499329, |
| "grad_norm": 0.03582129627466202, |
| "kl": 0.006468296051025391, |
| "learning_rate": 1.8853261421939718e-06, |
| "loss": 0.0087, |
| "step": 1039 |
| }, |
| { |
| "clip_ratio": 0.00034158617637558564, |
| "epoch": 1.3632995727644825, |
| "grad_norm": 0.0346604622900486, |
| "kl": 0.006458282470703125, |
| "learning_rate": 1.8792475026938823e-06, |
| "loss": 0.0086, |
| "step": 1040 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.6152620315552, |
| "epoch": 1.3652116757790327, |
| "grad_norm": 0.03809192404150963, |
| "kl": 0.006644248962402344, |
| "learning_rate": 1.8731727716867632e-06, |
| "loss": 0.0098, |
| "num_tokens": 141517968.0, |
| "reward": 0.07477678963914514, |
| "reward_std": 0.0749618403497152, |
| "rewards/pure_accuracy_reward_math": 0.07477678678696975, |
| "step": 1041 |
| }, |
| { |
| "clip_ratio": 0.0002677642194726104, |
| "epoch": 1.3671237787935824, |
| "grad_norm": 0.0377020426094532, |
| "kl": 0.0066089630126953125, |
| "learning_rate": 1.8671019874214237e-06, |
| "loss": 0.0098, |
| "step": 1042 |
| }, |
| { |
| "clip_ratio": 0.0002758102658617645, |
| "epoch": 1.3690358818081325, |
| "grad_norm": 0.03678804636001587, |
| "kl": 0.006642341613769531, |
| "learning_rate": 1.8610351881218211e-06, |
| "loss": 0.0098, |
| "step": 1043 |
| }, |
| { |
| "clip_ratio": 0.0002790037015074631, |
| "epoch": 1.3709479848226822, |
| "grad_norm": 0.03615477308630943, |
| "kl": 0.006649971008300781, |
| "learning_rate": 1.8549724119868235e-06, |
| "loss": 0.0097, |
| "step": 1044 |
| }, |
| { |
| "clip_ratio": 0.0002795595634097481, |
| "epoch": 1.3728600878372323, |
| "grad_norm": 0.03598296642303467, |
| "kl": 0.006653785705566406, |
| "learning_rate": 1.8489136971899658e-06, |
| "loss": 0.0096, |
| "step": 1045 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.382839679718, |
| "epoch": 1.374772190851782, |
| "grad_norm": 0.03458879515528679, |
| "kl": 0.0064601898193359375, |
| "learning_rate": 1.8428590818792135e-06, |
| "loss": 0.0038, |
| "num_tokens": 145187116.0, |
| "reward": 0.06584821731667034, |
| "reward_std": 0.07200520334299654, |
| "rewards/pure_accuracy_reward_math": 0.06584821562864818, |
| "step": 1046 |
| }, |
| { |
| "clip_ratio": 0.00023162108237784196, |
| "epoch": 1.3766842938663322, |
| "grad_norm": 0.03385276347398758, |
| "kl": 0.006392478942871094, |
| "learning_rate": 1.836808604176719e-06, |
| "loss": 0.0038, |
| "step": 1047 |
| }, |
| { |
| "clip_ratio": 0.00026906593984676874, |
| "epoch": 1.3785963968808819, |
| "grad_norm": 0.0331512950360775, |
| "kl": 0.0062427520751953125, |
| "learning_rate": 1.8307623021785837e-06, |
| "loss": 0.0037, |
| "step": 1048 |
| }, |
| { |
| "clip_ratio": 0.00025022312701139526, |
| "epoch": 1.3805084998954318, |
| "grad_norm": 0.032765790820121765, |
| "kl": 0.006190299987792969, |
| "learning_rate": 1.8247202139546155e-06, |
| "loss": 0.0037, |
| "step": 1049 |
| }, |
| { |
| "clip_ratio": 0.0002507307134465009, |
| "epoch": 1.3824206029099817, |
| "grad_norm": 0.0325283482670784, |
| "kl": 0.006188869476318359, |
| "learning_rate": 1.8186823775480917e-06, |
| "loss": 0.0036, |
| "step": 1050 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.5159296989441, |
| "epoch": 1.3843327059245316, |
| "grad_norm": 0.03628634661436081, |
| "kl": 0.007945537567138672, |
| "learning_rate": 1.8126488309755178e-06, |
| "loss": 0.0101, |
| "num_tokens": 148852261.0, |
| "reward": 0.06194196696742438, |
| "reward_std": 0.06792009872151539, |
| "rewards/pure_accuracy_reward_math": 0.06194196580327116, |
| "step": 1051 |
| }, |
| { |
| "clip_ratio": 0.00025563780241100176, |
| "epoch": 1.3862448089390815, |
| "grad_norm": 0.035264719277620316, |
| "kl": 0.007953643798828125, |
| "learning_rate": 1.80661961222639e-06, |
| "loss": 0.0101, |
| "step": 1052 |
| }, |
| { |
| "clip_ratio": 0.0002401949207069265, |
| "epoch": 1.3881569119536314, |
| "grad_norm": 0.034110233187675476, |
| "kl": 0.007923126220703125, |
| "learning_rate": 1.8005947592629551e-06, |
| "loss": 0.0101, |
| "step": 1053 |
| }, |
| { |
| "clip_ratio": 0.00026547102737595196, |
| "epoch": 1.3900690149681814, |
| "grad_norm": 0.03364601358771324, |
| "kl": 0.00788116455078125, |
| "learning_rate": 1.7945743100199706e-06, |
| "loss": 0.01, |
| "step": 1054 |
| }, |
| { |
| "clip_ratio": 0.0002951583905996813, |
| "epoch": 1.3919811179827313, |
| "grad_norm": 0.03397928550839424, |
| "kl": 0.007859230041503906, |
| "learning_rate": 1.788558302404466e-06, |
| "loss": 0.0099, |
| "step": 1055 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.25337266922, |
| "epoch": 1.3938932209972812, |
| "grad_norm": 0.03863634541630745, |
| "kl": 0.006538867950439453, |
| "learning_rate": 1.7825467742955052e-06, |
| "loss": 0.0066, |
| "num_tokens": 152486009.0, |
| "reward": 0.06780134289874695, |
| "reward_std": 0.06736206321511418, |
| "rewards/pure_accuracy_reward_math": 0.06780134057044052, |
| "step": 1056 |
| }, |
| { |
| "clip_ratio": 0.00027592373527340897, |
| "epoch": 1.395805324011831, |
| "grad_norm": 0.036583587527275085, |
| "kl": 0.0065402984619140625, |
| "learning_rate": 1.7765397635439468e-06, |
| "loss": 0.0066, |
| "step": 1057 |
| }, |
| { |
| "clip_ratio": 0.0002849266509201698, |
| "epoch": 1.397717427026381, |
| "grad_norm": 0.03605053946375847, |
| "kl": 0.006500244140625, |
| "learning_rate": 1.7705373079722083e-06, |
| "loss": 0.0065, |
| "step": 1058 |
| }, |
| { |
| "clip_ratio": 0.0003116865132142266, |
| "epoch": 1.399629530040931, |
| "grad_norm": 0.03675729036331177, |
| "kl": 0.006489276885986328, |
| "learning_rate": 1.7645394453740227e-06, |
| "loss": 0.0064, |
| "step": 1059 |
| }, |
| { |
| "clip_ratio": 0.0003249485117748918, |
| "epoch": 1.4015416330554809, |
| "grad_norm": 0.03623329848051071, |
| "kl": 0.006478786468505859, |
| "learning_rate": 1.7585462135142083e-06, |
| "loss": 0.0064, |
| "step": 1060 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.029598236084, |
| "epoch": 1.4034537360700308, |
| "grad_norm": 0.03506990894675255, |
| "kl": 0.006392955780029297, |
| "learning_rate": 1.752557650128423e-06, |
| "loss": 0.0096, |
| "num_tokens": 156082643.0, |
| "reward": 0.06194196664728224, |
| "reward_std": 0.07560620515141636, |
| "rewards/pure_accuracy_reward_math": 0.061941966181620955, |
| "step": 1061 |
| }, |
| { |
| "clip_ratio": 0.0002744606111662051, |
| "epoch": 1.4053658390845807, |
| "grad_norm": 0.03450053185224533, |
| "kl": 0.006424903869628906, |
| "learning_rate": 1.7465737929229317e-06, |
| "loss": 0.0096, |
| "step": 1062 |
| }, |
| { |
| "clip_ratio": 0.00027279697263793423, |
| "epoch": 1.4072779420991306, |
| "grad_norm": 0.033764585852622986, |
| "kl": 0.006496906280517578, |
| "learning_rate": 1.7405946795743665e-06, |
| "loss": 0.0096, |
| "step": 1063 |
| }, |
| { |
| "clip_ratio": 0.000298209258943416, |
| "epoch": 1.4091900451136805, |
| "grad_norm": 0.03335048630833626, |
| "kl": 0.0065898895263671875, |
| "learning_rate": 1.7346203477294916e-06, |
| "loss": 0.0095, |
| "step": 1064 |
| }, |
| { |
| "clip_ratio": 0.00030832760762677935, |
| "epoch": 1.4111021481282304, |
| "grad_norm": 0.03299354016780853, |
| "kl": 0.006653308868408203, |
| "learning_rate": 1.7286508350049627e-06, |
| "loss": 0.0094, |
| "step": 1065 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.4023675918579, |
| "epoch": 1.4130142511427803, |
| "grad_norm": 0.04127517342567444, |
| "kl": 0.010558605194091797, |
| "learning_rate": 1.722686178987097e-06, |
| "loss": 0.0076, |
| "num_tokens": 159696133.0, |
| "reward": 0.06640625282307155, |
| "reward_std": 0.07264956791186705, |
| "rewards/pure_accuracy_reward_math": 0.06640625101863407, |
| "step": 1066 |
| }, |
| { |
| "clip_ratio": 0.00030437137564831573, |
| "epoch": 1.4149263541573303, |
| "grad_norm": 0.039496634155511856, |
| "kl": 0.010538101196289062, |
| "learning_rate": 1.7167264172316273e-06, |
| "loss": 0.0076, |
| "step": 1067 |
| }, |
| { |
| "clip_ratio": 0.0003244270092181978, |
| "epoch": 1.4168384571718802, |
| "grad_norm": 0.039376117289066315, |
| "kl": 0.010515689849853516, |
| "learning_rate": 1.7107715872634731e-06, |
| "loss": 0.0075, |
| "step": 1068 |
| }, |
| { |
| "clip_ratio": 0.0003491952173817481, |
| "epoch": 1.41875056018643, |
| "grad_norm": 0.03863466531038284, |
| "kl": 0.01038360595703125, |
| "learning_rate": 1.7048217265764993e-06, |
| "loss": 0.0075, |
| "step": 1069 |
| }, |
| { |
| "clip_ratio": 0.00037865171140083476, |
| "epoch": 1.42066266320098, |
| "grad_norm": 0.03795957565307617, |
| "kl": 0.010157585144042969, |
| "learning_rate": 1.6988768726332856e-06, |
| "loss": 0.0074, |
| "step": 1070 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 512.8691644668579, |
| "epoch": 1.42257476621553, |
| "grad_norm": 0.04360206797719002, |
| "kl": 0.0067138671875, |
| "learning_rate": 1.6929370628648828e-06, |
| "loss": 0.0086, |
| "num_tokens": 163268528.0, |
| "reward": 0.08565848623402417, |
| "reward_std": 0.08861368341604248, |
| "rewards/pure_accuracy_reward_math": 0.08565848384751007, |
| "step": 1071 |
| }, |
| { |
| "clip_ratio": 0.00031944918799808875, |
| "epoch": 1.4244868692300798, |
| "grad_norm": 0.04292250797152519, |
| "kl": 0.006737709045410156, |
| "learning_rate": 1.6870023346705866e-06, |
| "loss": 0.0085, |
| "step": 1072 |
| }, |
| { |
| "clip_ratio": 0.00031442818647064996, |
| "epoch": 1.4263989722446297, |
| "grad_norm": 0.04044810310006142, |
| "kl": 0.006873607635498047, |
| "learning_rate": 1.6810727254176937e-06, |
| "loss": 0.0085, |
| "step": 1073 |
| }, |
| { |
| "clip_ratio": 0.0003650832475727839, |
| "epoch": 1.4283110752591797, |
| "grad_norm": 0.04156485199928284, |
| "kl": 0.006984233856201172, |
| "learning_rate": 1.6751482724412716e-06, |
| "loss": 0.0084, |
| "step": 1074 |
| }, |
| { |
| "clip_ratio": 0.0003947964444250829, |
| "epoch": 1.4302231782737296, |
| "grad_norm": 0.04023054987192154, |
| "kl": 0.007004737854003906, |
| "learning_rate": 1.669229013043921e-06, |
| "loss": 0.0083, |
| "step": 1075 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 512.7343969345093, |
| "epoch": 1.4321352812882795, |
| "grad_norm": 0.03780645504593849, |
| "kl": 0.006886005401611328, |
| "learning_rate": 1.6633149844955415e-06, |
| "loss": 0.0094, |
| "num_tokens": 166836260.0, |
| "reward": 0.0797991111758165, |
| "reward_std": 0.08157813875004649, |
| "rewards/pure_accuracy_reward_math": 0.07979910867288709, |
| "step": 1076 |
| }, |
| { |
| "clip_ratio": 0.0002608302990552147, |
| "epoch": 1.4340473843028292, |
| "grad_norm": 0.03681138530373573, |
| "kl": 0.006786823272705078, |
| "learning_rate": 1.6574062240330996e-06, |
| "loss": 0.0093, |
| "step": 1077 |
| }, |
| { |
| "clip_ratio": 0.00031450060896531795, |
| "epoch": 1.4359594873173793, |
| "grad_norm": 0.036778852343559265, |
| "kl": 0.0066986083984375, |
| "learning_rate": 1.651502768860389e-06, |
| "loss": 0.0093, |
| "step": 1078 |
| }, |
| { |
| "clip_ratio": 0.0003176571812559814, |
| "epoch": 1.437871590331929, |
| "grad_norm": 0.03592304140329361, |
| "kl": 0.006758213043212891, |
| "learning_rate": 1.6456046561478023e-06, |
| "loss": 0.0092, |
| "step": 1079 |
| }, |
| { |
| "clip_ratio": 0.0003236016519281293, |
| "epoch": 1.4397836933464792, |
| "grad_norm": 0.03520684316754341, |
| "kl": 0.006850242614746094, |
| "learning_rate": 1.6397119230320919e-06, |
| "loss": 0.0092, |
| "step": 1080 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 508.80498933792114, |
| "epoch": 1.4416957963610288, |
| "grad_norm": 0.04630957916378975, |
| "kl": 0.01150655746459961, |
| "learning_rate": 1.633824606616138e-06, |
| "loss": 0.008, |
| "num_tokens": 170392081.0, |
| "reward": 0.07589286129223183, |
| "reward_std": 0.08140548272058368, |
| "rewards/pure_accuracy_reward_math": 0.07589285844005644, |
| "step": 1081 |
| }, |
| { |
| "clip_ratio": 0.00028873196572476445, |
| "epoch": 1.443607899375579, |
| "grad_norm": 0.04534924402832985, |
| "kl": 0.01107931137084961, |
| "learning_rate": 1.6279427439687154e-06, |
| "loss": 0.008, |
| "step": 1082 |
| }, |
| { |
| "clip_ratio": 0.000319909158235987, |
| "epoch": 1.4455200023901287, |
| "grad_norm": 0.044707395136356354, |
| "kl": 0.010364532470703125, |
| "learning_rate": 1.622066372124262e-06, |
| "loss": 0.0079, |
| "step": 1083 |
| }, |
| { |
| "clip_ratio": 0.0003388643909829625, |
| "epoch": 1.4474321054046788, |
| "grad_norm": 0.038643479347229004, |
| "kl": 0.009525775909423828, |
| "learning_rate": 1.6161955280826399e-06, |
| "loss": 0.0078, |
| "step": 1084 |
| }, |
| { |
| "clip_ratio": 0.0003223289492098047, |
| "epoch": 1.4493442084192285, |
| "grad_norm": 0.12098709493875504, |
| "kl": 0.010370254516601562, |
| "learning_rate": 1.6103302488089104e-06, |
| "loss": 0.0078, |
| "step": 1085 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.3169894218445, |
| "epoch": 1.4512563114337784, |
| "grad_norm": 0.03693209961056709, |
| "kl": 0.006680965423583984, |
| "learning_rate": 1.6044705712330932e-06, |
| "loss": 0.0059, |
| "num_tokens": 173992817.0, |
| "reward": 0.07031250311410986, |
| "reward_std": 0.07530715462053195, |
| "rewards/pure_accuracy_reward_math": 0.07031250142608769, |
| "step": 1086 |
| }, |
| { |
| "clip_ratio": 0.0002918191117657898, |
| "epoch": 1.4531684144483283, |
| "grad_norm": 0.03641385957598686, |
| "kl": 0.0065898895263671875, |
| "learning_rate": 1.5986165322499398e-06, |
| "loss": 0.0059, |
| "step": 1087 |
| }, |
| { |
| "clip_ratio": 0.0002921736467840219, |
| "epoch": 1.4550805174628783, |
| "grad_norm": 0.03598758950829506, |
| "kl": 0.006548881530761719, |
| "learning_rate": 1.5927681687186964e-06, |
| "loss": 0.0058, |
| "step": 1088 |
| }, |
| { |
| "clip_ratio": 0.0003169650843233285, |
| "epoch": 1.4569926204774282, |
| "grad_norm": 0.036268141120672226, |
| "kl": 0.006561756134033203, |
| "learning_rate": 1.5869255174628778e-06, |
| "loss": 0.0058, |
| "step": 1089 |
| }, |
| { |
| "clip_ratio": 0.0003259218068478731, |
| "epoch": 1.458904723491978, |
| "grad_norm": 0.03529893979430199, |
| "kl": 0.006597042083740234, |
| "learning_rate": 1.5810886152700302e-06, |
| "loss": 0.0057, |
| "step": 1090 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 533.391206741333, |
| "epoch": 1.460816826506528, |
| "grad_norm": 0.04034799709916115, |
| "kl": 0.006509304046630859, |
| "learning_rate": 1.5752574988915004e-06, |
| "loss": 0.0066, |
| "num_tokens": 177633359.0, |
| "reward": 0.07477678920258768, |
| "reward_std": 0.0747891838545911, |
| "rewards/pure_accuracy_reward_math": 0.07477678699069656, |
| "step": 1091 |
| }, |
| { |
| "clip_ratio": 0.0002679697158214367, |
| "epoch": 1.462728929521078, |
| "grad_norm": 0.039328683167696, |
| "kl": 0.006606101989746094, |
| "learning_rate": 1.5694322050422096e-06, |
| "loss": 0.0066, |
| "step": 1092 |
| }, |
| { |
| "clip_ratio": 0.0002975759220475993, |
| "epoch": 1.4646410325356278, |
| "grad_norm": 0.03947217017412186, |
| "kl": 0.00665283203125, |
| "learning_rate": 1.5636127704004133e-06, |
| "loss": 0.0065, |
| "step": 1093 |
| }, |
| { |
| "clip_ratio": 0.0003127538088278925, |
| "epoch": 1.4665531355501777, |
| "grad_norm": 0.03733786940574646, |
| "kl": 0.006627559661865234, |
| "learning_rate": 1.5577992316074783e-06, |
| "loss": 0.0064, |
| "step": 1094 |
| }, |
| { |
| "clip_ratio": 0.00035554791872982605, |
| "epoch": 1.4684652385647277, |
| "grad_norm": 0.03660706803202629, |
| "kl": 0.0065364837646484375, |
| "learning_rate": 1.5519916252676482e-06, |
| "loss": 0.0064, |
| "step": 1095 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.1163763999939, |
| "epoch": 1.4703773415792776, |
| "grad_norm": 0.06871657073497772, |
| "kl": 0.010003089904785156, |
| "learning_rate": 1.5461899879478133e-06, |
| "loss": 0.0057, |
| "num_tokens": 181268648.0, |
| "reward": 0.0744977711874526, |
| "reward_std": 0.08333237702026963, |
| "rewards/pure_accuracy_reward_math": 0.0744977695576381, |
| "step": 1096 |
| }, |
| { |
| "clip_ratio": 0.00032988911306119917, |
| "epoch": 1.4722894445938275, |
| "grad_norm": 0.04868275299668312, |
| "kl": 0.009030342102050781, |
| "learning_rate": 1.5403943561772789e-06, |
| "loss": 0.0057, |
| "step": 1097 |
| }, |
| { |
| "clip_ratio": 0.0003833602018517013, |
| "epoch": 1.4742015476083774, |
| "grad_norm": 0.04073934629559517, |
| "kl": 0.00842428207397461, |
| "learning_rate": 1.5346047664475422e-06, |
| "loss": 0.0056, |
| "step": 1098 |
| }, |
| { |
| "clip_ratio": 0.00040459603366116426, |
| "epoch": 1.4761136506229273, |
| "grad_norm": 0.04011493921279907, |
| "kl": 0.008179187774658203, |
| "learning_rate": 1.5288212552120524e-06, |
| "loss": 0.0055, |
| "step": 1099 |
| }, |
| { |
| "clip_ratio": 0.0004078742092019638, |
| "epoch": 1.4780257536374772, |
| "grad_norm": 0.03785649687051773, |
| "kl": 0.008193016052246094, |
| "learning_rate": 1.5230438588859881e-06, |
| "loss": 0.0054, |
| "step": 1100 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 541.5837321281433, |
| "epoch": 1.4799378566520272, |
| "grad_norm": 0.04047717526555061, |
| "kl": 0.007642269134521484, |
| "learning_rate": 1.517272613846027e-06, |
| "loss": 0.0051, |
| "num_tokens": 184939348.0, |
| "reward": 0.06863839572179131, |
| "reward_std": 0.07131457631476223, |
| "rewards/pure_accuracy_reward_math": 0.06863839420839213, |
| "step": 1101 |
| }, |
| { |
| "clip_ratio": 0.00026072144959243815, |
| "epoch": 1.481849959666577, |
| "grad_norm": 0.037731293588876724, |
| "kl": 0.007551670074462891, |
| "learning_rate": 1.511507556430114e-06, |
| "loss": 0.0051, |
| "step": 1102 |
| }, |
| { |
| "clip_ratio": 0.00029216510773721893, |
| "epoch": 1.483762062681127, |
| "grad_norm": 0.03771767392754555, |
| "kl": 0.007477760314941406, |
| "learning_rate": 1.5057487229372347e-06, |
| "loss": 0.0051, |
| "step": 1103 |
| }, |
| { |
| "clip_ratio": 0.0003181908435294645, |
| "epoch": 1.485674165695677, |
| "grad_norm": 0.03619125112891197, |
| "kl": 0.0074062347412109375, |
| "learning_rate": 1.4999961496271889e-06, |
| "loss": 0.005, |
| "step": 1104 |
| }, |
| { |
| "clip_ratio": 0.0003646736843165854, |
| "epoch": 1.4875862687102268, |
| "grad_norm": 0.035048868507146835, |
| "kl": 0.007380008697509766, |
| "learning_rate": 1.4942498727203578e-06, |
| "loss": 0.0049, |
| "step": 1105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 541.8585615158081, |
| "epoch": 1.4894983717247767, |
| "grad_norm": 0.0386812798678875, |
| "kl": 0.006747245788574219, |
| "learning_rate": 1.4885099283974774e-06, |
| "loss": 0.0071, |
| "num_tokens": 188614221.0, |
| "reward": 0.07198661062284373, |
| "reward_std": 0.08140548341907561, |
| "rewards/pure_accuracy_reward_math": 0.07198660864378326, |
| "step": 1106 |
| }, |
| { |
| "clip_ratio": 0.0003357146362077401, |
| "epoch": 1.4914104747393266, |
| "grad_norm": 0.03723128139972687, |
| "kl": 0.006694316864013672, |
| "learning_rate": 1.482776352799414e-06, |
| "loss": 0.0071, |
| "step": 1107 |
| }, |
| { |
| "clip_ratio": 0.0003692662889989151, |
| "epoch": 1.4933225777538766, |
| "grad_norm": 0.038370903581380844, |
| "kl": 0.006665706634521484, |
| "learning_rate": 1.4770491820269317e-06, |
| "loss": 0.007, |
| "step": 1108 |
| }, |
| { |
| "clip_ratio": 0.00040588962588117283, |
| "epoch": 1.4952346807684265, |
| "grad_norm": 0.037489671260118484, |
| "kl": 0.006663322448730469, |
| "learning_rate": 1.4713284521404678e-06, |
| "loss": 0.0069, |
| "step": 1109 |
| }, |
| { |
| "clip_ratio": 0.00039138679812822375, |
| "epoch": 1.4971467837829764, |
| "grad_norm": 0.03641659393906593, |
| "kl": 0.006697654724121094, |
| "learning_rate": 1.465614199159905e-06, |
| "loss": 0.0069, |
| "step": 1110 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.476583480835, |
| "epoch": 1.4990588867975263, |
| "grad_norm": 1.8961507081985474, |
| "kl": 0.03508758544921875, |
| "learning_rate": 1.4599064590643472e-06, |
| "loss": 0.0056, |
| "num_tokens": 192212657.0, |
| "reward": 0.0753348250000272, |
| "reward_std": 0.07783834805013612, |
| "rewards/pure_accuracy_reward_math": 0.07533482302096672, |
| "step": 1111 |
| }, |
| { |
| "clip_ratio": 0.00029740781877762856, |
| "epoch": 1.500970989812076, |
| "grad_norm": 0.08476530015468597, |
| "kl": 0.011601448059082031, |
| "learning_rate": 1.4542052677918885e-06, |
| "loss": 0.0047, |
| "step": 1112 |
| }, |
| { |
| "clip_ratio": 0.0003210891072171762, |
| "epoch": 1.5028830928266261, |
| "grad_norm": 0.04907820373773575, |
| "kl": 0.010628223419189453, |
| "learning_rate": 1.4485106612393897e-06, |
| "loss": 0.0046, |
| "step": 1113 |
| }, |
| { |
| "clip_ratio": 0.00033912417364945213, |
| "epoch": 1.5047951958411758, |
| "grad_norm": 0.04438456520438194, |
| "kl": 0.010659217834472656, |
| "learning_rate": 1.4428226752622509e-06, |
| "loss": 0.0046, |
| "step": 1114 |
| }, |
| { |
| "clip_ratio": 0.0003756833369834567, |
| "epoch": 1.506707298855726, |
| "grad_norm": 0.0422808900475502, |
| "kl": 0.010442733764648438, |
| "learning_rate": 1.437141345674189e-06, |
| "loss": 0.0045, |
| "step": 1115 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 535.0778713226318, |
| "epoch": 1.5086194018702757, |
| "grad_norm": 0.048265133053064346, |
| "kl": 0.007592678070068359, |
| "learning_rate": 1.4314667082470064e-06, |
| "loss": 0.0086, |
| "num_tokens": 195861088.0, |
| "reward": 0.07142857479630038, |
| "reward_std": 0.08346496871672571, |
| "rewards/pure_accuracy_reward_math": 0.07142857287544757, |
| "step": 1116 |
| }, |
| { |
| "clip_ratio": 0.0003429410510875641, |
| "epoch": 1.5105315048848258, |
| "grad_norm": 0.04287589713931084, |
| "kl": 0.007152557373046875, |
| "learning_rate": 1.4257987987103727e-06, |
| "loss": 0.0085, |
| "step": 1117 |
| }, |
| { |
| "clip_ratio": 0.0003726668836634417, |
| "epoch": 1.5124436078993755, |
| "grad_norm": 0.0397462397813797, |
| "kl": 0.006825447082519531, |
| "learning_rate": 1.420137652751593e-06, |
| "loss": 0.0085, |
| "step": 1118 |
| }, |
| { |
| "clip_ratio": 0.0003763367328133427, |
| "epoch": 1.5143557109139256, |
| "grad_norm": 0.03851110488176346, |
| "kl": 0.006707668304443359, |
| "learning_rate": 1.4144833060153887e-06, |
| "loss": 0.0084, |
| "step": 1119 |
| }, |
| { |
| "clip_ratio": 0.0003624607439292049, |
| "epoch": 1.5162678139284753, |
| "grad_norm": 0.03720558434724808, |
| "kl": 0.00676727294921875, |
| "learning_rate": 1.408835794103669e-06, |
| "loss": 0.0083, |
| "step": 1120 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.7569994926453, |
| "epoch": 1.5181799169430255, |
| "grad_norm": 0.03832938149571419, |
| "kl": 0.008425712585449219, |
| "learning_rate": 1.4031951525753088e-06, |
| "loss": 0.0071, |
| "num_tokens": 199475701.0, |
| "reward": 0.08565848635043949, |
| "reward_std": 0.08179086120799184, |
| "rewards/pure_accuracy_reward_math": 0.08565848338184878, |
| "step": 1121 |
| }, |
| { |
| "clip_ratio": 0.00028257126655262255, |
| "epoch": 1.5200920199575751, |
| "grad_norm": 0.038414496928453445, |
| "kl": 0.008458137512207031, |
| "learning_rate": 1.3975614169459253e-06, |
| "loss": 0.0071, |
| "step": 1122 |
| }, |
| { |
| "clip_ratio": 0.0003134008442202685, |
| "epoch": 1.5220041229721253, |
| "grad_norm": 0.03928304836153984, |
| "kl": 0.008496284484863281, |
| "learning_rate": 1.391934622687652e-06, |
| "loss": 0.0071, |
| "step": 1123 |
| }, |
| { |
| "clip_ratio": 0.00030222541431612626, |
| "epoch": 1.523916225986675, |
| "grad_norm": 0.038087427616119385, |
| "kl": 0.008494377136230469, |
| "learning_rate": 1.38631480522892e-06, |
| "loss": 0.007, |
| "step": 1124 |
| }, |
| { |
| "clip_ratio": 0.0002927070846396873, |
| "epoch": 1.525828329001225, |
| "grad_norm": 0.03641984984278679, |
| "kl": 0.008457183837890625, |
| "learning_rate": 1.3807019999542287e-06, |
| "loss": 0.0069, |
| "step": 1125 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.1537666320801, |
| "epoch": 1.5277404320157748, |
| "grad_norm": 0.040940940380096436, |
| "kl": 0.006596565246582031, |
| "learning_rate": 1.3750962422039269e-06, |
| "loss": 0.0058, |
| "num_tokens": 203109136.0, |
| "reward": 0.07254464621655643, |
| "reward_std": 0.08217623952077702, |
| "rewards/pure_accuracy_reward_math": 0.07254464400466532, |
| "step": 1126 |
| }, |
| { |
| "clip_ratio": 0.00031519718078243386, |
| "epoch": 1.5296525350303247, |
| "grad_norm": 0.038493506610393524, |
| "kl": 0.006714344024658203, |
| "learning_rate": 1.369497567273989e-06, |
| "loss": 0.0058, |
| "step": 1127 |
| }, |
| { |
| "clip_ratio": 0.0003513000764314711, |
| "epoch": 1.5315646380448746, |
| "grad_norm": 0.039495162665843964, |
| "kl": 0.006772041320800781, |
| "learning_rate": 1.3639060104157964e-06, |
| "loss": 0.0057, |
| "step": 1128 |
| }, |
| { |
| "clip_ratio": 0.00033387296190312554, |
| "epoch": 1.5334767410594246, |
| "grad_norm": 0.03875305503606796, |
| "kl": 0.006872653961181641, |
| "learning_rate": 1.3583216068359078e-06, |
| "loss": 0.0057, |
| "step": 1129 |
| }, |
| { |
| "clip_ratio": 0.00036185752793471693, |
| "epoch": 1.5353888440739745, |
| "grad_norm": 0.03817266598343849, |
| "kl": 0.006899356842041016, |
| "learning_rate": 1.3527443916958466e-06, |
| "loss": 0.0056, |
| "step": 1130 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 537.4143671989441, |
| "epoch": 1.5373009470885244, |
| "grad_norm": 0.035565100610256195, |
| "kl": 0.006679058074951172, |
| "learning_rate": 1.3471744001118718e-06, |
| "loss": 0.0091, |
| "num_tokens": 206769717.0, |
| "reward": 0.07533482497092336, |
| "reward_std": 0.07436373975360766, |
| "rewards/pure_accuracy_reward_math": 0.07533482293365523, |
| "step": 1131 |
| }, |
| { |
| "clip_ratio": 0.00028060592541123697, |
| "epoch": 1.5392130501030743, |
| "grad_norm": 0.036901701241731644, |
| "kl": 0.006720542907714844, |
| "learning_rate": 1.3416116671547613e-06, |
| "loss": 0.0091, |
| "step": 1132 |
| }, |
| { |
| "clip_ratio": 0.00034766932589036514, |
| "epoch": 1.5411251531176242, |
| "grad_norm": 0.03489091992378235, |
| "kl": 0.006618499755859375, |
| "learning_rate": 1.3360562278495899e-06, |
| "loss": 0.009, |
| "step": 1133 |
| }, |
| { |
| "clip_ratio": 0.0003513962886927402, |
| "epoch": 1.5430372561321741, |
| "grad_norm": 0.035007573664188385, |
| "kl": 0.0066070556640625, |
| "learning_rate": 1.3305081171755092e-06, |
| "loss": 0.009, |
| "step": 1134 |
| }, |
| { |
| "clip_ratio": 0.00036896456708745973, |
| "epoch": 1.544949359146724, |
| "grad_norm": 0.03363417461514473, |
| "kl": 0.006587028503417969, |
| "learning_rate": 1.3249673700655246e-06, |
| "loss": 0.0089, |
| "step": 1135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.2251925468445, |
| "epoch": 1.546861462161274, |
| "grad_norm": 0.037738338112831116, |
| "kl": 0.006687164306640625, |
| "learning_rate": 1.3194340214062828e-06, |
| "loss": 0.0066, |
| "num_tokens": 210404892.0, |
| "reward": 0.07477678978466429, |
| "reward_std": 0.08492635452421382, |
| "rewards/pure_accuracy_reward_math": 0.07477678699069656, |
| "step": 1136 |
| }, |
| { |
| "clip_ratio": 0.0003166603274848967, |
| "epoch": 1.5487735651758239, |
| "grad_norm": 0.03711307421326637, |
| "kl": 0.0067272186279296875, |
| "learning_rate": 1.3139081060378423e-06, |
| "loss": 0.0066, |
| "step": 1137 |
| }, |
| { |
| "clip_ratio": 0.00032532861348499864, |
| "epoch": 1.5506856681903738, |
| "grad_norm": 0.0381547249853611, |
| "kl": 0.006831169128417969, |
| "learning_rate": 1.3083896587534606e-06, |
| "loss": 0.0065, |
| "step": 1138 |
| }, |
| { |
| "clip_ratio": 0.0003168874280845557, |
| "epoch": 1.5525977712049237, |
| "grad_norm": 0.03702245280146599, |
| "kl": 0.0068492889404296875, |
| "learning_rate": 1.3028787142993723e-06, |
| "loss": 0.0064, |
| "step": 1139 |
| }, |
| { |
| "clip_ratio": 0.00031372528076190065, |
| "epoch": 1.5545098742194736, |
| "grad_norm": 0.035462986677885056, |
| "kl": 0.0068511962890625, |
| "learning_rate": 1.297375307374574e-06, |
| "loss": 0.0063, |
| "step": 1140 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.9913792610168, |
| "epoch": 1.5564219772340235, |
| "grad_norm": 0.0402364507317543, |
| "kl": 0.006835460662841797, |
| "learning_rate": 1.2918794726306003e-06, |
| "loss": 0.0099, |
| "num_tokens": 214034825.0, |
| "reward": 0.07310268151923083, |
| "reward_std": 0.07917333993827924, |
| "rewards/pure_accuracy_reward_math": 0.07310268000583164, |
| "step": 1141 |
| }, |
| { |
| "clip_ratio": 0.0003137970834359294, |
| "epoch": 1.5583340802485734, |
| "grad_norm": 0.03920648992061615, |
| "kl": 0.006829738616943359, |
| "learning_rate": 1.2863912446713084e-06, |
| "loss": 0.0098, |
| "step": 1142 |
| }, |
| { |
| "clip_ratio": 0.00032378236608110456, |
| "epoch": 1.5602461832631231, |
| "grad_norm": 0.03806397691369057, |
| "kl": 0.006905078887939453, |
| "learning_rate": 1.2809106580526636e-06, |
| "loss": 0.0098, |
| "step": 1143 |
| }, |
| { |
| "clip_ratio": 0.0003143088524097948, |
| "epoch": 1.5621582862776733, |
| "grad_norm": 0.03801356628537178, |
| "kl": 0.006966590881347656, |
| "learning_rate": 1.2754377472825153e-06, |
| "loss": 0.0097, |
| "step": 1144 |
| }, |
| { |
| "clip_ratio": 0.00035796050920566813, |
| "epoch": 1.564070389292223, |
| "grad_norm": 0.036964964121580124, |
| "kl": 0.006992816925048828, |
| "learning_rate": 1.2699725468203832e-06, |
| "loss": 0.0096, |
| "step": 1145 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.6370244026184, |
| "epoch": 1.565982492306773, |
| "grad_norm": 0.045449208468198776, |
| "kl": 0.007224559783935547, |
| "learning_rate": 1.2645150910772413e-06, |
| "loss": 0.0043, |
| "num_tokens": 217697304.0, |
| "reward": 0.07393973600119352, |
| "reward_std": 0.08620888477889821, |
| "rewards/pure_accuracy_reward_math": 0.07393973361467943, |
| "step": 1146 |
| }, |
| { |
| "clip_ratio": 0.0003596847872131548, |
| "epoch": 1.5678945953213228, |
| "grad_norm": 0.03882161155343056, |
| "kl": 0.006949901580810547, |
| "learning_rate": 1.2590654144152992e-06, |
| "loss": 0.0043, |
| "step": 1147 |
| }, |
| { |
| "clip_ratio": 0.0004527134210547956, |
| "epoch": 1.569806698335873, |
| "grad_norm": 0.03764580935239792, |
| "kl": 0.00691986083984375, |
| "learning_rate": 1.2536235511477852e-06, |
| "loss": 0.0043, |
| "step": 1148 |
| }, |
| { |
| "clip_ratio": 0.0005161078099717997, |
| "epoch": 1.5717188013504226, |
| "grad_norm": 0.03833252564072609, |
| "kl": 0.006892681121826172, |
| "learning_rate": 1.2481895355387341e-06, |
| "loss": 0.0042, |
| "step": 1149 |
| }, |
| { |
| "clip_ratio": 0.0005320426059824968, |
| "epoch": 1.5736309043649728, |
| "grad_norm": 0.03876457363367081, |
| "kl": 0.006943702697753906, |
| "learning_rate": 1.2427634018027673e-06, |
| "loss": 0.0041, |
| "step": 1150 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.9707288742065, |
| "epoch": 1.5755430073795225, |
| "grad_norm": 0.03937402740120888, |
| "kl": 0.007305145263671875, |
| "learning_rate": 1.2373451841048781e-06, |
| "loss": 0.0078, |
| "num_tokens": 221325451.0, |
| "reward": 0.08258928963914514, |
| "reward_std": 0.08058846154017374, |
| "rewards/pure_accuracy_reward_math": 0.08258928655413911, |
| "step": 1151 |
| }, |
| { |
| "clip_ratio": 0.0002857717965980555, |
| "epoch": 1.5774551103940726, |
| "grad_norm": 0.03863917291164398, |
| "kl": 0.007287502288818359, |
| "learning_rate": 1.2319349165602202e-06, |
| "loss": 0.0078, |
| "step": 1152 |
| }, |
| { |
| "clip_ratio": 0.0002796752659151025, |
| "epoch": 1.5793672134086223, |
| "grad_norm": 0.03722836822271347, |
| "kl": 0.007286548614501953, |
| "learning_rate": 1.2265326332338875e-06, |
| "loss": 0.0077, |
| "step": 1153 |
| }, |
| { |
| "clip_ratio": 0.00034041513032434523, |
| "epoch": 1.5812793164231724, |
| "grad_norm": 0.03688417002558708, |
| "kl": 0.007335662841796875, |
| "learning_rate": 1.2211383681407022e-06, |
| "loss": 0.0076, |
| "step": 1154 |
| }, |
| { |
| "clip_ratio": 0.0003595712430524145, |
| "epoch": 1.5831914194377221, |
| "grad_norm": 0.037124987691640854, |
| "kl": 0.007359981536865234, |
| "learning_rate": 1.2157521552450035e-06, |
| "loss": 0.0076, |
| "step": 1155 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 540.098798751831, |
| "epoch": 1.5851035224522723, |
| "grad_norm": 0.03577388823032379, |
| "kl": 0.0069561004638671875, |
| "learning_rate": 1.210374028460428e-06, |
| "loss": 0.0065, |
| "num_tokens": 224996253.0, |
| "reward": 0.06863839607103728, |
| "reward_std": 0.07376563857542351, |
| "rewards/pure_accuracy_reward_math": 0.06863839426659979, |
| "step": 1156 |
| }, |
| { |
| "clip_ratio": 0.00025091522741149674, |
| "epoch": 1.587015625466822, |
| "grad_norm": 0.03386949375271797, |
| "kl": 0.006894588470458984, |
| "learning_rate": 1.2050040216497e-06, |
| "loss": 0.0065, |
| "step": 1157 |
| }, |
| { |
| "clip_ratio": 0.00029767470277874963, |
| "epoch": 1.588927728481372, |
| "grad_norm": 0.033231545239686966, |
| "kl": 0.0068531036376953125, |
| "learning_rate": 1.1996421686244179e-06, |
| "loss": 0.0064, |
| "step": 1158 |
| }, |
| { |
| "clip_ratio": 0.00030627386024661973, |
| "epoch": 1.5908398314959218, |
| "grad_norm": 0.0327543206512928, |
| "kl": 0.006781578063964844, |
| "learning_rate": 1.1942885031448397e-06, |
| "loss": 0.0064, |
| "step": 1159 |
| }, |
| { |
| "clip_ratio": 0.00032285955057886895, |
| "epoch": 1.5927519345104717, |
| "grad_norm": 0.03283894062042236, |
| "kl": 0.006725788116455078, |
| "learning_rate": 1.1889430589196727e-06, |
| "loss": 0.0063, |
| "step": 1160 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 540.7405333518982, |
| "epoch": 1.5946640375250216, |
| "grad_norm": 0.04240734875202179, |
| "kl": 0.006897449493408203, |
| "learning_rate": 1.183605869605858e-06, |
| "loss": 0.0064, |
| "num_tokens": 228663991.0, |
| "reward": 0.08091518227593042, |
| "reward_std": 0.08951703325146809, |
| "rewards/pure_accuracy_reward_math": 0.08091518018045463, |
| "step": 1161 |
| }, |
| { |
| "clip_ratio": 0.00035278943187222467, |
| "epoch": 1.5965761405395715, |
| "grad_norm": 0.04050403833389282, |
| "kl": 0.006961345672607422, |
| "learning_rate": 1.1782769688083647e-06, |
| "loss": 0.0064, |
| "step": 1162 |
| }, |
| { |
| "clip_ratio": 0.00034535837551175064, |
| "epoch": 1.5984882435541214, |
| "grad_norm": 0.03872028365731239, |
| "kl": 0.007065296173095703, |
| "learning_rate": 1.1729563900799695e-06, |
| "loss": 0.0063, |
| "step": 1163 |
| }, |
| { |
| "clip_ratio": 0.00037939938943054585, |
| "epoch": 1.6004003465686714, |
| "grad_norm": 0.039447493851184845, |
| "kl": 0.007191181182861328, |
| "learning_rate": 1.1676441669210543e-06, |
| "loss": 0.0063, |
| "step": 1164 |
| }, |
| { |
| "clip_ratio": 0.00037003348657549395, |
| "epoch": 1.6023124495832213, |
| "grad_norm": 0.03724885359406471, |
| "kl": 0.0071163177490234375, |
| "learning_rate": 1.1623403327793881e-06, |
| "loss": 0.0061, |
| "step": 1165 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.3211750984192, |
| "epoch": 1.6042245525977712, |
| "grad_norm": 0.9447879791259766, |
| "kl": 0.03227043151855469, |
| "learning_rate": 1.1570449210499213e-06, |
| "loss": 0.0085, |
| "num_tokens": 232302082.0, |
| "reward": 0.07756696781143546, |
| "reward_std": 0.0780110054765828, |
| "rewards/pure_accuracy_reward_math": 0.07756696577416733, |
| "step": 1166 |
| }, |
| { |
| "clip_ratio": 0.00036849399879201883, |
| "epoch": 1.606136655612321, |
| "grad_norm": 0.26742058992385864, |
| "kl": 0.011518478393554688, |
| "learning_rate": 1.1517579650745713e-06, |
| "loss": 0.0079, |
| "step": 1167 |
| }, |
| { |
| "clip_ratio": 0.00029733346730154153, |
| "epoch": 1.608048758626871, |
| "grad_norm": 0.3907225728034973, |
| "kl": 0.017581462860107422, |
| "learning_rate": 1.1464794981420187e-06, |
| "loss": 0.0079, |
| "step": 1168 |
| }, |
| { |
| "clip_ratio": 0.0003680569542439116, |
| "epoch": 1.609960861641421, |
| "grad_norm": 0.1778813600540161, |
| "kl": 0.010699748992919922, |
| "learning_rate": 1.1412095534874912e-06, |
| "loss": 0.0077, |
| "step": 1169 |
| }, |
| { |
| "clip_ratio": 0.0003726620370798628, |
| "epoch": 1.6118729646559709, |
| "grad_norm": 0.2035137563943863, |
| "kl": 0.01429891586303711, |
| "learning_rate": 1.135948164292557e-06, |
| "loss": 0.0077, |
| "step": 1170 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.0362968444824, |
| "epoch": 1.6137850676705208, |
| "grad_norm": 0.040138401091098785, |
| "kl": 0.008060932159423828, |
| "learning_rate": 1.130695363684916e-06, |
| "loss": 0.0096, |
| "num_tokens": 235898380.0, |
| "reward": 0.0630580390279647, |
| "reward_std": 0.07195894001051784, |
| "rewards/pure_accuracy_reward_math": 0.06305803687428124, |
| "step": 1171 |
| }, |
| { |
| "clip_ratio": 0.0002708259837049809, |
| "epoch": 1.6156971706850707, |
| "grad_norm": 0.03859123960137367, |
| "kl": 0.008191585540771484, |
| "learning_rate": 1.1254511847381922e-06, |
| "loss": 0.0096, |
| "step": 1172 |
| }, |
| { |
| "clip_ratio": 0.00029455311903348047, |
| "epoch": 1.6176092736996206, |
| "grad_norm": 0.03898981586098671, |
| "kl": 0.008168697357177734, |
| "learning_rate": 1.1202156604717234e-06, |
| "loss": 0.0095, |
| "step": 1173 |
| }, |
| { |
| "clip_ratio": 0.0003440694692926627, |
| "epoch": 1.6195213767141705, |
| "grad_norm": 0.0370321087539196, |
| "kl": 0.00800466537475586, |
| "learning_rate": 1.1149888238503537e-06, |
| "loss": 0.0094, |
| "step": 1174 |
| }, |
| { |
| "clip_ratio": 0.00040963905792068545, |
| "epoch": 1.6214334797287204, |
| "grad_norm": 0.03698049858212471, |
| "kl": 0.007803440093994141, |
| "learning_rate": 1.109770707784229e-06, |
| "loss": 0.0094, |
| "step": 1175 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.937527179718, |
| "epoch": 1.6233455827432703, |
| "grad_norm": 0.039002615958452225, |
| "kl": 0.007039546966552734, |
| "learning_rate": 1.1045613451285837e-06, |
| "loss": 0.0074, |
| "num_tokens": 239513448.0, |
| "reward": 0.06584821754950099, |
| "reward_std": 0.07595151895657182, |
| "rewards/pure_accuracy_reward_math": 0.06584821516298689, |
| "step": 1176 |
| }, |
| { |
| "clip_ratio": 0.0003209126220440339, |
| "epoch": 1.6252576857578203, |
| "grad_norm": 0.038693126291036606, |
| "kl": 0.0069637298583984375, |
| "learning_rate": 1.0993607686835408e-06, |
| "loss": 0.0074, |
| "step": 1177 |
| }, |
| { |
| "clip_ratio": 0.0003234959946212257, |
| "epoch": 1.62716978877237, |
| "grad_norm": 0.03805870935320854, |
| "kl": 0.006987094879150391, |
| "learning_rate": 1.0941690111939002e-06, |
| "loss": 0.0073, |
| "step": 1178 |
| }, |
| { |
| "clip_ratio": 0.0003316311403978034, |
| "epoch": 1.62908189178692, |
| "grad_norm": 0.03687576577067375, |
| "kl": 0.0070285797119140625, |
| "learning_rate": 1.0889861053489341e-06, |
| "loss": 0.0072, |
| "step": 1179 |
| }, |
| { |
| "clip_ratio": 0.00033663610071243966, |
| "epoch": 1.6309939948014698, |
| "grad_norm": 0.03717907890677452, |
| "kl": 0.007116794586181641, |
| "learning_rate": 1.0838120837821814e-06, |
| "loss": 0.0071, |
| "step": 1180 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.2112393379211, |
| "epoch": 1.63290609781602, |
| "grad_norm": 0.04346395656466484, |
| "kl": 0.007472515106201172, |
| "learning_rate": 1.0786469790712441e-06, |
| "loss": 0.0059, |
| "num_tokens": 243092265.0, |
| "reward": 0.07700893233413808, |
| "reward_std": 0.07526089128805324, |
| "rewards/pure_accuracy_reward_math": 0.07700893029686995, |
| "step": 1181 |
| }, |
| { |
| "clip_ratio": 0.0002878125141592136, |
| "epoch": 1.6348182008305696, |
| "grad_norm": 0.03890342637896538, |
| "kl": 0.007323265075683594, |
| "learning_rate": 1.0734908237375783e-06, |
| "loss": 0.0059, |
| "step": 1182 |
| }, |
| { |
| "clip_ratio": 0.00031910790164602076, |
| "epoch": 1.6367303038451197, |
| "grad_norm": 0.03748926892876625, |
| "kl": 0.007243156433105469, |
| "learning_rate": 1.0683436502462915e-06, |
| "loss": 0.0058, |
| "step": 1183 |
| }, |
| { |
| "clip_ratio": 0.00036283263597169935, |
| "epoch": 1.6386424068596694, |
| "grad_norm": 0.037570755928754807, |
| "kl": 0.007138252258300781, |
| "learning_rate": 1.0632054910059391e-06, |
| "loss": 0.0058, |
| "step": 1184 |
| }, |
| { |
| "clip_ratio": 0.00039574184188495565, |
| "epoch": 1.6405545098742196, |
| "grad_norm": 0.038306284695863724, |
| "kl": 0.007193088531494141, |
| "learning_rate": 1.0580763783683187e-06, |
| "loss": 0.0057, |
| "step": 1185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.925525188446, |
| "epoch": 1.6424666128887693, |
| "grad_norm": 0.04251728951931, |
| "kl": 0.007372379302978516, |
| "learning_rate": 1.0529563446282665e-06, |
| "loss": 0.01, |
| "num_tokens": 246686482.0, |
| "reward": 0.08537946754950099, |
| "reward_std": 0.08939063869183883, |
| "rewards/pure_accuracy_reward_math": 0.08537946551223285, |
| "step": 1186 |
| }, |
| { |
| "clip_ratio": 0.0003136689152256622, |
| "epoch": 1.6443787159033194, |
| "grad_norm": 0.04087135195732117, |
| "kl": 0.007419109344482422, |
| "learning_rate": 1.0478454220234568e-06, |
| "loss": 0.0099, |
| "step": 1187 |
| }, |
| { |
| "clip_ratio": 0.0003467907941399062, |
| "epoch": 1.646290818917869, |
| "grad_norm": 0.039666056632995605, |
| "kl": 0.007442951202392578, |
| "learning_rate": 1.0427436427341939e-06, |
| "loss": 0.0099, |
| "step": 1188 |
| }, |
| { |
| "clip_ratio": 0.00038431568484043055, |
| "epoch": 1.6482029219324192, |
| "grad_norm": 0.0389142706990242, |
| "kl": 0.007426738739013672, |
| "learning_rate": 1.0376510388832147e-06, |
| "loss": 0.0098, |
| "step": 1189 |
| }, |
| { |
| "clip_ratio": 0.000490980125164242, |
| "epoch": 1.650115024946969, |
| "grad_norm": 0.03956843912601471, |
| "kl": 0.007406711578369141, |
| "learning_rate": 1.0325676425354828e-06, |
| "loss": 0.0097, |
| "step": 1190 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 508.4835596084595, |
| "epoch": 1.652027127961519, |
| "grad_norm": 0.04898946359753609, |
| "kl": 0.008952617645263672, |
| "learning_rate": 1.0274934856979876e-06, |
| "loss": 0.0069, |
| "num_tokens": 250241299.0, |
| "reward": 0.07868303955183364, |
| "reward_std": 0.08381028211442754, |
| "rewards/pure_accuracy_reward_math": 0.07868303728173487, |
| "step": 1191 |
| }, |
| { |
| "clip_ratio": 0.0002854310730526777, |
| "epoch": 1.6539392309760688, |
| "grad_norm": 0.04304199293255806, |
| "kl": 0.008716106414794922, |
| "learning_rate": 1.0224286003195437e-06, |
| "loss": 0.0069, |
| "step": 1192 |
| }, |
| { |
| "clip_ratio": 0.00029722766299755676, |
| "epoch": 1.655851333990619, |
| "grad_norm": 0.039751190692186356, |
| "kl": 0.008554935455322266, |
| "learning_rate": 1.017373018290588e-06, |
| "loss": 0.0068, |
| "step": 1193 |
| }, |
| { |
| "clip_ratio": 0.00036785421832519205, |
| "epoch": 1.6577634370051686, |
| "grad_norm": 0.039316095411777496, |
| "kl": 0.00851297378540039, |
| "learning_rate": 1.0123267714429826e-06, |
| "loss": 0.0067, |
| "step": 1194 |
| }, |
| { |
| "clip_ratio": 0.0003976103018885624, |
| "epoch": 1.6596755400197185, |
| "grad_norm": 0.03880908712744713, |
| "kl": 0.008470535278320312, |
| "learning_rate": 1.0072898915498094e-06, |
| "loss": 0.0067, |
| "step": 1195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.2179379463196, |
| "epoch": 1.6615876430342684, |
| "grad_norm": 0.04073133319616318, |
| "kl": 0.0076427459716796875, |
| "learning_rate": 1.0022624103251727e-06, |
| "loss": 0.0095, |
| "num_tokens": 253820892.0, |
| "reward": 0.08593750416184776, |
| "reward_std": 0.08978221646975726, |
| "rewards/pure_accuracy_reward_math": 0.08593750165891834, |
| "step": 1196 |
| }, |
| { |
| "clip_ratio": 0.0003768215759691884, |
| "epoch": 1.6634997460488183, |
| "grad_norm": 0.039870597422122955, |
| "kl": 0.007634639739990234, |
| "learning_rate": 9.972443594239997e-07, |
| "loss": 0.0095, |
| "step": 1197 |
| }, |
| { |
| "clip_ratio": 0.00033531371116168884, |
| "epoch": 1.6654118490633683, |
| "grad_norm": 0.039165791124105453, |
| "kl": 0.007609367370605469, |
| "learning_rate": 9.922357704418394e-07, |
| "loss": 0.0094, |
| "step": 1198 |
| }, |
| { |
| "clip_ratio": 0.0003830786464504854, |
| "epoch": 1.6673239520779182, |
| "grad_norm": 0.0393473282456398, |
| "kl": 0.0076847076416015625, |
| "learning_rate": 9.872366749146684e-07, |
| "loss": 0.0094, |
| "step": 1199 |
| }, |
| { |
| "clip_ratio": 0.0003766370310813727, |
| "epoch": 1.669236055092468, |
| "grad_norm": 0.037378448992967606, |
| "kl": 0.007641792297363281, |
| "learning_rate": 9.822471043186846e-07, |
| "loss": 0.0093, |
| "step": 1200 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 502.35381841659546, |
| "epoch": 1.671148158107018, |
| "grad_norm": 0.051170479506254196, |
| "kl": 0.008347511291503906, |
| "learning_rate": 9.772670900701172e-07, |
| "loss": 0.0074, |
| "num_tokens": 257360516.0, |
| "reward": 0.08537946784053929, |
| "reward_std": 0.09248606633627787, |
| "rewards/pure_accuracy_reward_math": 0.0853794660361018, |
| "step": 1201 |
| }, |
| { |
| "clip_ratio": 0.00036896339207714846, |
| "epoch": 1.673060261121568, |
| "grad_norm": 0.04540196433663368, |
| "kl": 0.008112430572509766, |
| "learning_rate": 9.722966635250222e-07, |
| "loss": 0.0074, |
| "step": 1202 |
| }, |
| { |
| "clip_ratio": 0.00040850058093155894, |
| "epoch": 1.6749723641361178, |
| "grad_norm": 0.0428830124437809, |
| "kl": 0.007869243621826172, |
| "learning_rate": 9.673358559790892e-07, |
| "loss": 0.0073, |
| "step": 1203 |
| }, |
| { |
| "clip_ratio": 0.0004735397765216476, |
| "epoch": 1.6768844671506677, |
| "grad_norm": 0.04445512220263481, |
| "kl": 0.007699012756347656, |
| "learning_rate": 9.623846986674417e-07, |
| "loss": 0.0072, |
| "step": 1204 |
| }, |
| { |
| "clip_ratio": 0.00047387216932293086, |
| "epoch": 1.6787965701652177, |
| "grad_norm": 0.04317403957247734, |
| "kl": 0.0076007843017578125, |
| "learning_rate": 9.574432227644432e-07, |
| "loss": 0.0071, |
| "step": 1205 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 511.88367557525635, |
| "epoch": 1.6807086731797676, |
| "grad_norm": 0.041338611394166946, |
| "kl": 0.007639884948730469, |
| "learning_rate": 9.525114593834975e-07, |
| "loss": 0.0077, |
| "num_tokens": 260924667.0, |
| "reward": 0.07617187869618647, |
| "reward_std": 0.08037573983892798, |
| "rewards/pure_accuracy_reward_math": 0.0761718759604264, |
| "step": 1206 |
| }, |
| { |
| "clip_ratio": 0.00029646307336861355, |
| "epoch": 1.6826207761943175, |
| "grad_norm": 0.040457833558321, |
| "kl": 0.007670402526855469, |
| "learning_rate": 9.475894395768579e-07, |
| "loss": 0.0077, |
| "step": 1207 |
| }, |
| { |
| "clip_ratio": 0.0003306309376966965, |
| "epoch": 1.6845328792088674, |
| "grad_norm": 0.03946809470653534, |
| "kl": 0.0076751708984375, |
| "learning_rate": 9.426771943354249e-07, |
| "loss": 0.0076, |
| "step": 1208 |
| }, |
| { |
| "clip_ratio": 0.0003582578942200598, |
| "epoch": 1.6864449822234173, |
| "grad_norm": 0.04006471857428551, |
| "kl": 0.007700443267822266, |
| "learning_rate": 9.377747545885569e-07, |
| "loss": 0.0075, |
| "step": 1209 |
| }, |
| { |
| "clip_ratio": 0.00040392828321955676, |
| "epoch": 1.6883570852379672, |
| "grad_norm": 0.04037889465689659, |
| "kl": 0.007681369781494141, |
| "learning_rate": 9.328821512038716e-07, |
| "loss": 0.0074, |
| "step": 1210 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 533.6010298728943, |
| "epoch": 1.6902691882525172, |
| "grad_norm": 0.03628333657979965, |
| "kl": 0.006788730621337891, |
| "learning_rate": 9.279994149870539e-07, |
| "loss": 0.0073, |
| "num_tokens": 264564517.0, |
| "reward": 0.06110491382423788, |
| "reward_std": 0.06693661888130009, |
| "rewards/pure_accuracy_reward_math": 0.06110491219442338, |
| "step": 1211 |
| }, |
| { |
| "clip_ratio": 0.0002594580842014693, |
| "epoch": 1.692181291267067, |
| "grad_norm": 0.034194085747003555, |
| "kl": 0.006678581237792969, |
| "learning_rate": 9.231265766816619e-07, |
| "loss": 0.0073, |
| "step": 1212 |
| }, |
| { |
| "clip_ratio": 0.0003170226998463477, |
| "epoch": 1.6940933942816168, |
| "grad_norm": 0.035113800317049026, |
| "kl": 0.006625652313232422, |
| "learning_rate": 9.182636669689335e-07, |
| "loss": 0.0073, |
| "step": 1213 |
| }, |
| { |
| "clip_ratio": 0.0003448430217076748, |
| "epoch": 1.696005497296167, |
| "grad_norm": 0.03626548498868942, |
| "kl": 0.006573200225830078, |
| "learning_rate": 9.134107164675898e-07, |
| "loss": 0.0072, |
| "step": 1214 |
| }, |
| { |
| "clip_ratio": 0.00033195262278695736, |
| "epoch": 1.6979176003107166, |
| "grad_norm": 0.03465663269162178, |
| "kl": 0.006582736968994141, |
| "learning_rate": 9.085677557336465e-07, |
| "loss": 0.0071, |
| "step": 1215 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.8440546989441, |
| "epoch": 1.6998297033252667, |
| "grad_norm": 0.038788389414548874, |
| "kl": 0.009612560272216797, |
| "learning_rate": 9.037348152602199e-07, |
| "loss": 0.0052, |
| "num_tokens": 268179390.0, |
| "reward": 0.07756696798605844, |
| "reward_std": 0.0852254037745297, |
| "rewards/pure_accuracy_reward_math": 0.07756696571595967, |
| "step": 1216 |
| }, |
| { |
| "clip_ratio": 0.00027092215094626226, |
| "epoch": 1.7017418063398164, |
| "grad_norm": 0.038229282945394516, |
| "kl": 0.009754657745361328, |
| "learning_rate": 8.989119254773343e-07, |
| "loss": 0.0052, |
| "step": 1217 |
| }, |
| { |
| "clip_ratio": 0.00027246196253827293, |
| "epoch": 1.7036539093543666, |
| "grad_norm": 0.03782220929861069, |
| "kl": 0.009780406951904297, |
| "learning_rate": 8.940991167517313e-07, |
| "loss": 0.0051, |
| "step": 1218 |
| }, |
| { |
| "clip_ratio": 0.0003069629718197575, |
| "epoch": 1.7055660123689163, |
| "grad_norm": 0.03707100450992584, |
| "kl": 0.00977468490600586, |
| "learning_rate": 8.892964193866799e-07, |
| "loss": 0.005, |
| "step": 1219 |
| }, |
| { |
| "clip_ratio": 0.0003035257008150438, |
| "epoch": 1.7074781153834664, |
| "grad_norm": 0.03552490472793579, |
| "kl": 0.009665966033935547, |
| "learning_rate": 8.845038636217818e-07, |
| "loss": 0.0049, |
| "step": 1220 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.9601240158081, |
| "epoch": 1.709390218398016, |
| "grad_norm": 0.04051567241549492, |
| "kl": 0.007312297821044922, |
| "learning_rate": 8.797214796327843e-07, |
| "loss": 0.0079, |
| "num_tokens": 271808667.0, |
| "reward": 0.08733259368455037, |
| "reward_std": 0.08496641932288185, |
| "rewards/pure_accuracy_reward_math": 0.0873325903667137, |
| "step": 1221 |
| }, |
| { |
| "clip_ratio": 0.00033132852740891394, |
| "epoch": 1.7113023214125662, |
| "grad_norm": 0.03887411206960678, |
| "kl": 0.007235527038574219, |
| "learning_rate": 8.749492975313897e-07, |
| "loss": 0.0079, |
| "step": 1222 |
| }, |
| { |
| "clip_ratio": 0.0003587238066984355, |
| "epoch": 1.713214424427116, |
| "grad_norm": 0.04010055959224701, |
| "kl": 0.007251739501953125, |
| "learning_rate": 8.701873473650643e-07, |
| "loss": 0.0079, |
| "step": 1223 |
| }, |
| { |
| "clip_ratio": 0.0003504625653079074, |
| "epoch": 1.715126527441666, |
| "grad_norm": 0.039550576359033585, |
| "kl": 0.007262229919433594, |
| "learning_rate": 8.654356591168522e-07, |
| "loss": 0.0078, |
| "step": 1224 |
| }, |
| { |
| "clip_ratio": 0.0003497420942721874, |
| "epoch": 1.7170386304562157, |
| "grad_norm": 0.03883340209722519, |
| "kl": 0.007348537445068359, |
| "learning_rate": 8.60694262705182e-07, |
| "loss": 0.0077, |
| "step": 1225 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.5396447181702, |
| "epoch": 1.7189507334707659, |
| "grad_norm": 0.037610165774822235, |
| "kl": 0.007049083709716797, |
| "learning_rate": 8.559631879836838e-07, |
| "loss": 0.0065, |
| "num_tokens": 275440789.0, |
| "reward": 0.07896205675206147, |
| "reward_std": 0.07938606152310967, |
| "rewards/pure_accuracy_reward_math": 0.07896205494762398, |
| "step": 1226 |
| }, |
| { |
| "clip_ratio": 0.0002787316387298233, |
| "epoch": 1.7208628364853156, |
| "grad_norm": 0.03763109818100929, |
| "kl": 0.007136821746826172, |
| "learning_rate": 8.512424647409964e-07, |
| "loss": 0.0065, |
| "step": 1227 |
| }, |
| { |
| "clip_ratio": 0.0003178273858566172, |
| "epoch": 1.7227749394998657, |
| "grad_norm": 0.037824735045433044, |
| "kl": 0.007121562957763672, |
| "learning_rate": 8.465321227005823e-07, |
| "loss": 0.0065, |
| "step": 1228 |
| }, |
| { |
| "clip_ratio": 0.0002866029928725311, |
| "epoch": 1.7246870425144154, |
| "grad_norm": 0.03616493567824364, |
| "kl": 0.00708770751953125, |
| "learning_rate": 8.418321915205399e-07, |
| "loss": 0.0064, |
| "step": 1229 |
| }, |
| { |
| "clip_ratio": 0.00031164622902224437, |
| "epoch": 1.7265991455289653, |
| "grad_norm": 0.03562076762318611, |
| "kl": 0.007038593292236328, |
| "learning_rate": 8.371427007934174e-07, |
| "loss": 0.0063, |
| "step": 1230 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 536.3178272247314, |
| "epoch": 1.7285112485435152, |
| "grad_norm": 0.03759186714887619, |
| "kl": 0.006800651550292969, |
| "learning_rate": 8.324636800460242e-07, |
| "loss": 0.0071, |
| "num_tokens": 279097568.0, |
| "reward": 0.07728794903960079, |
| "reward_std": 0.07732657541055232, |
| "rewards/pure_accuracy_reward_math": 0.07728794822469354, |
| "step": 1231 |
| }, |
| { |
| "clip_ratio": 0.00028705537579298834, |
| "epoch": 1.7304233515580651, |
| "grad_norm": 0.036786679178476334, |
| "kl": 0.006786346435546875, |
| "learning_rate": 8.277951587392505e-07, |
| "loss": 0.0071, |
| "step": 1232 |
| }, |
| { |
| "clip_ratio": 0.000303516245821811, |
| "epoch": 1.732335454572615, |
| "grad_norm": 0.03563455864787102, |
| "kl": 0.0068149566650390625, |
| "learning_rate": 8.231371662678741e-07, |
| "loss": 0.0071, |
| "step": 1233 |
| }, |
| { |
| "clip_ratio": 0.0003096325264095867, |
| "epoch": 1.734247557587165, |
| "grad_norm": 0.03413652628660202, |
| "kl": 0.006861209869384766, |
| "learning_rate": 8.184897319603813e-07, |
| "loss": 0.007, |
| "step": 1234 |
| }, |
| { |
| "clip_ratio": 0.0003550405467649398, |
| "epoch": 1.736159660601715, |
| "grad_norm": 0.03433661162853241, |
| "kl": 0.006935596466064453, |
| "learning_rate": 8.138528850787792e-07, |
| "loss": 0.0069, |
| "step": 1235 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.8069453239441, |
| "epoch": 1.7380717636162648, |
| "grad_norm": 0.2546544671058655, |
| "kl": 0.012326240539550781, |
| "learning_rate": 8.092266548184139e-07, |
| "loss": 0.011, |
| "num_tokens": 282683384.0, |
| "reward": 0.07477678873692639, |
| "reward_std": 0.08165826951153576, |
| "rewards/pure_accuracy_reward_math": 0.07477678751456551, |
| "step": 1236 |
| }, |
| { |
| "clip_ratio": 0.00030172572752462656, |
| "epoch": 1.7399838666308147, |
| "grad_norm": 0.042716413736343384, |
| "kl": 0.0078887939453125, |
| "learning_rate": 8.046110703077839e-07, |
| "loss": 0.0108, |
| "step": 1237 |
| }, |
| { |
| "clip_ratio": 0.00029401268267292835, |
| "epoch": 1.7418959696453646, |
| "grad_norm": 0.038783252239227295, |
| "kl": 0.007707118988037109, |
| "learning_rate": 8.000061606083579e-07, |
| "loss": 0.0107, |
| "step": 1238 |
| }, |
| { |
| "clip_ratio": 0.00028625389199987694, |
| "epoch": 1.7438080726599146, |
| "grad_norm": 0.0381159707903862, |
| "kl": 0.007790088653564453, |
| "learning_rate": 7.954119547143935e-07, |
| "loss": 0.0107, |
| "step": 1239 |
| }, |
| { |
| "clip_ratio": 0.00034677153644224745, |
| "epoch": 1.7457201756744645, |
| "grad_norm": 0.038590554147958755, |
| "kl": 0.007785797119140625, |
| "learning_rate": 7.90828481552752e-07, |
| "loss": 0.0106, |
| "step": 1240 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.8047132492065, |
| "epoch": 1.7476322786890144, |
| "grad_norm": 0.03943649306893349, |
| "kl": 0.007458209991455078, |
| "learning_rate": 7.862557699827167e-07, |
| "loss": 0.0092, |
| "num_tokens": 286269120.0, |
| "reward": 0.06640625282307155, |
| "reward_std": 0.07607791275950149, |
| "rewards/pure_accuracy_reward_math": 0.06640625130967237, |
| "step": 1241 |
| }, |
| { |
| "clip_ratio": 0.00031282668544463377, |
| "epoch": 1.7495443817035643, |
| "grad_norm": 0.0388050340116024, |
| "kl": 0.007348060607910156, |
| "learning_rate": 7.816938487958131e-07, |
| "loss": 0.0092, |
| "step": 1242 |
| }, |
| { |
| "clip_ratio": 0.0003194147345197962, |
| "epoch": 1.7514564847181142, |
| "grad_norm": 0.038322921842336655, |
| "kl": 0.007298946380615234, |
| "learning_rate": 7.771427467156256e-07, |
| "loss": 0.0091, |
| "step": 1243 |
| }, |
| { |
| "clip_ratio": 0.0003203335651846828, |
| "epoch": 1.7533685877326641, |
| "grad_norm": 0.037499312311410904, |
| "kl": 0.007254600524902344, |
| "learning_rate": 7.726024923976169e-07, |
| "loss": 0.009, |
| "step": 1244 |
| }, |
| { |
| "clip_ratio": 0.00032696440513291236, |
| "epoch": 1.755280690747214, |
| "grad_norm": 0.03671669587492943, |
| "kl": 0.007252693176269531, |
| "learning_rate": 7.680731144289505e-07, |
| "loss": 0.009, |
| "step": 1245 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.8644180297852, |
| "epoch": 1.757192793761764, |
| "grad_norm": 0.04826434701681137, |
| "kl": 0.0094451904296875, |
| "learning_rate": 7.635546413283054e-07, |
| "loss": 0.0078, |
| "num_tokens": 289848950.0, |
| "reward": 0.07421875323052518, |
| "reward_std": 0.07818366138963029, |
| "rewards/pure_accuracy_reward_math": 0.074218751717126, |
| "step": 1246 |
| }, |
| { |
| "clip_ratio": 0.000299703156713349, |
| "epoch": 1.7591048967763139, |
| "grad_norm": 0.03791136294603348, |
| "kl": 0.009324073791503906, |
| "learning_rate": 7.590471015457002e-07, |
| "loss": 0.0077, |
| "step": 1247 |
| }, |
| { |
| "clip_ratio": 0.00030542989918558305, |
| "epoch": 1.7610169997908636, |
| "grad_norm": 0.03703403100371361, |
| "kl": 0.009335517883300781, |
| "learning_rate": 7.545505234623152e-07, |
| "loss": 0.0077, |
| "step": 1248 |
| }, |
| { |
| "clip_ratio": 0.0002983629839832247, |
| "epoch": 1.7629291028054137, |
| "grad_norm": 0.0363752581179142, |
| "kl": 0.009361743927001953, |
| "learning_rate": 7.500649353903092e-07, |
| "loss": 0.0076, |
| "step": 1249 |
| }, |
| { |
| "clip_ratio": 0.0002923785563098136, |
| "epoch": 1.7648412058199634, |
| "grad_norm": 0.03587965667247772, |
| "kl": 0.009373664855957031, |
| "learning_rate": 7.455903655726437e-07, |
| "loss": 0.0075, |
| "step": 1250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 510.6543188095093, |
| "epoch": 1.7667533088345135, |
| "grad_norm": 0.03651593253016472, |
| "kl": 0.008678436279296875, |
| "learning_rate": 7.411268421829076e-07, |
| "loss": 0.0059, |
| "num_tokens": 293408275.0, |
| "reward": 0.07031250264844857, |
| "reward_std": 0.07401842658873647, |
| "rewards/pure_accuracy_reward_math": 0.07031250160071068, |
| "step": 1251 |
| }, |
| { |
| "clip_ratio": 0.000244510552590782, |
| "epoch": 1.7686654118490632, |
| "grad_norm": 0.03525623679161072, |
| "kl": 0.008609294891357422, |
| "learning_rate": 7.366743933251349e-07, |
| "loss": 0.0059, |
| "step": 1252 |
| }, |
| { |
| "clip_ratio": 0.000242228649824483, |
| "epoch": 1.7705775148636134, |
| "grad_norm": 0.035115260630846024, |
| "kl": 0.008548259735107422, |
| "learning_rate": 7.322330470336314e-07, |
| "loss": 0.0058, |
| "step": 1253 |
| }, |
| { |
| "clip_ratio": 0.0002641637478291159, |
| "epoch": 1.772489617878163, |
| "grad_norm": 0.03518166393041611, |
| "kl": 0.008442401885986328, |
| "learning_rate": 7.278028312727961e-07, |
| "loss": 0.0058, |
| "step": 1254 |
| }, |
| { |
| "clip_ratio": 0.0002555919315909705, |
| "epoch": 1.7744017208927132, |
| "grad_norm": 0.03385892137885094, |
| "kl": 0.00841379165649414, |
| "learning_rate": 7.233837739369462e-07, |
| "loss": 0.0057, |
| "step": 1255 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.7271451950073, |
| "epoch": 1.776313823907263, |
| "grad_norm": 0.03341628611087799, |
| "kl": 0.006855964660644531, |
| "learning_rate": 7.189759028501417e-07, |
| "loss": 0.0062, |
| "num_tokens": 296984393.0, |
| "reward": 0.06556919915601611, |
| "reward_std": 0.06311669771093875, |
| "rewards/pure_accuracy_reward_math": 0.06556919775903225, |
| "step": 1256 |
| }, |
| { |
| "clip_ratio": 0.0002122660096688378, |
| "epoch": 1.778225926921813, |
| "grad_norm": 0.03227659687399864, |
| "kl": 0.006803989410400391, |
| "learning_rate": 7.145792457660083e-07, |
| "loss": 0.0062, |
| "step": 1257 |
| }, |
| { |
| "clip_ratio": 0.00023682935608348998, |
| "epoch": 1.7801380299363627, |
| "grad_norm": 0.03206360712647438, |
| "kl": 0.006758213043212891, |
| "learning_rate": 7.101938303675674e-07, |
| "loss": 0.0062, |
| "step": 1258 |
| }, |
| { |
| "clip_ratio": 0.0002413284565250251, |
| "epoch": 1.7820501329509129, |
| "grad_norm": 0.031279318034648895, |
| "kl": 0.006762981414794922, |
| "learning_rate": 7.058196842670548e-07, |
| "loss": 0.0061, |
| "step": 1259 |
| }, |
| { |
| "clip_ratio": 0.0002680151189338176, |
| "epoch": 1.7839622359654626, |
| "grad_norm": 0.031049314886331558, |
| "kl": 0.006676197052001953, |
| "learning_rate": 7.014568350057516e-07, |
| "loss": 0.0061, |
| "step": 1260 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.2553224563599, |
| "epoch": 1.7858743389800127, |
| "grad_norm": 0.03635333850979805, |
| "kl": 0.007339000701904297, |
| "learning_rate": 6.971053100538116e-07, |
| "loss": 0.0066, |
| "num_tokens": 300622928.0, |
| "reward": 0.0711495568684768, |
| "reward_std": 0.07668221119092777, |
| "rewards/pure_accuracy_reward_math": 0.07114955512224697, |
| "step": 1261 |
| }, |
| { |
| "clip_ratio": 0.00025942773436327116, |
| "epoch": 1.7877864419945624, |
| "grad_norm": 0.03595859929919243, |
| "kl": 0.007373332977294922, |
| "learning_rate": 6.927651368100843e-07, |
| "loss": 0.0065, |
| "step": 1262 |
| }, |
| { |
| "clip_ratio": 0.00026420129074722354, |
| "epoch": 1.7896985450091125, |
| "grad_norm": 0.034778136759996414, |
| "kl": 0.00739288330078125, |
| "learning_rate": 6.884363426019444e-07, |
| "loss": 0.0065, |
| "step": 1263 |
| }, |
| { |
| "clip_ratio": 0.0002875854173112202, |
| "epoch": 1.7916106480236622, |
| "grad_norm": 0.035560280084609985, |
| "kl": 0.007449150085449219, |
| "learning_rate": 6.841189546851224e-07, |
| "loss": 0.0064, |
| "step": 1264 |
| }, |
| { |
| "clip_ratio": 0.00026737677507071567, |
| "epoch": 1.7935227510382123, |
| "grad_norm": 0.03407442197203636, |
| "kl": 0.007452964782714844, |
| "learning_rate": 6.79813000243528e-07, |
| "loss": 0.0064, |
| "step": 1265 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.543550491333, |
| "epoch": 1.795434854052762, |
| "grad_norm": 0.03908964619040489, |
| "kl": 0.008809566497802734, |
| "learning_rate": 6.755185063890818e-07, |
| "loss": 0.0074, |
| "num_tokens": 304236988.0, |
| "reward": 0.0747767890279647, |
| "reward_std": 0.07865536957979202, |
| "rewards/pure_accuracy_reward_math": 0.07477678745635785, |
| "step": 1266 |
| }, |
| { |
| "clip_ratio": 0.0002752643416670253, |
| "epoch": 1.797346957067312, |
| "grad_norm": 0.0380408875644207, |
| "kl": 0.00884389877319336, |
| "learning_rate": 6.71235500161545e-07, |
| "loss": 0.0074, |
| "step": 1267 |
| }, |
| { |
| "clip_ratio": 0.0002959408872698077, |
| "epoch": 1.7992590600818619, |
| "grad_norm": 0.03713267296552658, |
| "kl": 0.008931636810302734, |
| "learning_rate": 6.669640085283479e-07, |
| "loss": 0.0073, |
| "step": 1268 |
| }, |
| { |
| "clip_ratio": 0.0003134474755484007, |
| "epoch": 1.8011711630964118, |
| "grad_norm": 0.03684492036700249, |
| "kl": 0.008975982666015625, |
| "learning_rate": 6.627040583844199e-07, |
| "loss": 0.0073, |
| "step": 1269 |
| }, |
| { |
| "clip_ratio": 0.0003336208075666036, |
| "epoch": 1.8030832661109617, |
| "grad_norm": 0.0364052951335907, |
| "kl": 0.009007453918457031, |
| "learning_rate": 6.584556765520231e-07, |
| "loss": 0.0072, |
| "step": 1270 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.5468997955322, |
| "epoch": 1.8049953691255116, |
| "grad_norm": 0.03688374161720276, |
| "kl": 0.006972789764404297, |
| "learning_rate": 6.542188897805782e-07, |
| "loss": 0.0076, |
| "num_tokens": 307881200.0, |
| "reward": 0.06082589610014111, |
| "reward_std": 0.06925509008578956, |
| "rewards/pure_accuracy_reward_math": 0.06082589423749596, |
| "step": 1271 |
| }, |
| { |
| "clip_ratio": 0.0002535940801635661, |
| "epoch": 1.8069074721400615, |
| "grad_norm": 0.03543318435549736, |
| "kl": 0.006913661956787109, |
| "learning_rate": 6.499937247465002e-07, |
| "loss": 0.0076, |
| "step": 1272 |
| }, |
| { |
| "clip_ratio": 0.00029529011806062044, |
| "epoch": 1.8088195751546114, |
| "grad_norm": 0.034321434795856476, |
| "kl": 0.006764411926269531, |
| "learning_rate": 6.457802080530304e-07, |
| "loss": 0.0075, |
| "step": 1273 |
| }, |
| { |
| "clip_ratio": 0.00032198404306882367, |
| "epoch": 1.8107316781691614, |
| "grad_norm": 0.03342648968100548, |
| "kl": 0.006732940673828125, |
| "learning_rate": 6.415783662300662e-07, |
| "loss": 0.0075, |
| "step": 1274 |
| }, |
| { |
| "clip_ratio": 0.000381207836142039, |
| "epoch": 1.8126437811837113, |
| "grad_norm": 0.034588467329740524, |
| "kl": 0.006687164306640625, |
| "learning_rate": 6.373882257339964e-07, |
| "loss": 0.0074, |
| "step": 1275 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 528.7452836036682, |
| "epoch": 1.8145558841982612, |
| "grad_norm": 0.039650533348321915, |
| "kl": 0.012791156768798828, |
| "learning_rate": 6.33209812947532e-07, |
| "loss": 0.0068, |
| "num_tokens": 311509399.0, |
| "reward": 0.06919643239234574, |
| "reward_std": 0.07131457643117756, |
| "rewards/pure_accuracy_reward_math": 0.06919642988941632, |
| "step": 1276 |
| }, |
| { |
| "clip_ratio": 0.00028128568749252736, |
| "epoch": 1.816467987212811, |
| "grad_norm": 0.039305564016103745, |
| "kl": 0.012639522552490234, |
| "learning_rate": 6.290431541795456e-07, |
| "loss": 0.0068, |
| "step": 1277 |
| }, |
| { |
| "clip_ratio": 0.00027201296376233586, |
| "epoch": 1.818380090227361, |
| "grad_norm": 0.038404785096645355, |
| "kl": 0.012586116790771484, |
| "learning_rate": 6.248882756648988e-07, |
| "loss": 0.0067, |
| "step": 1278 |
| }, |
| { |
| "clip_ratio": 0.00027703067632955936, |
| "epoch": 1.820292193241911, |
| "grad_norm": 0.037614692002534866, |
| "kl": 0.01236581802368164, |
| "learning_rate": 6.207452035642814e-07, |
| "loss": 0.0066, |
| "step": 1279 |
| }, |
| { |
| "clip_ratio": 0.000309511864088563, |
| "epoch": 1.8222042962564609, |
| "grad_norm": 0.03737355023622513, |
| "kl": 0.012206554412841797, |
| "learning_rate": 6.166139639640454e-07, |
| "loss": 0.0065, |
| "step": 1280 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.473795413971, |
| "epoch": 1.8241163992710108, |
| "grad_norm": 0.03713076934218407, |
| "kl": 0.007002353668212891, |
| "learning_rate": 6.124945828760406e-07, |
| "loss": 0.0059, |
| "num_tokens": 315129533.0, |
| "reward": 0.06445312840514816, |
| "reward_std": 0.06921502435579896, |
| "rewards/pure_accuracy_reward_math": 0.0644531259604264, |
| "step": 1281 |
| }, |
| { |
| "clip_ratio": 0.00024346445911760384, |
| "epoch": 1.8260285022855607, |
| "grad_norm": 0.03588669002056122, |
| "kl": 0.006989955902099609, |
| "learning_rate": 6.083870862374513e-07, |
| "loss": 0.0059, |
| "step": 1282 |
| }, |
| { |
| "clip_ratio": 0.0002329723478737833, |
| "epoch": 1.8279406053001104, |
| "grad_norm": 0.03526683151721954, |
| "kl": 0.007010459899902344, |
| "learning_rate": 6.042914999106342e-07, |
| "loss": 0.0058, |
| "step": 1283 |
| }, |
| { |
| "clip_ratio": 0.00023291378442991117, |
| "epoch": 1.8298527083146605, |
| "grad_norm": 0.03384559601545334, |
| "kl": 0.007075786590576172, |
| "learning_rate": 6.002078496829514e-07, |
| "loss": 0.0058, |
| "step": 1284 |
| }, |
| { |
| "clip_ratio": 0.0002458733478647446, |
| "epoch": 1.8317648113292102, |
| "grad_norm": 0.03377237543463707, |
| "kl": 0.0071315765380859375, |
| "learning_rate": 5.961361612666139e-07, |
| "loss": 0.0057, |
| "step": 1285 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.0859618186951, |
| "epoch": 1.8336769143437603, |
| "grad_norm": 0.0914173573255539, |
| "kl": 0.012554645538330078, |
| "learning_rate": 5.920764602985141e-07, |
| "loss": 0.0058, |
| "num_tokens": 318747025.0, |
| "reward": 0.06612723506987095, |
| "reward_std": 0.06865079142153263, |
| "rewards/pure_accuracy_reward_math": 0.06612723355647177, |
| "step": 1286 |
| }, |
| { |
| "clip_ratio": 0.00025586230526641884, |
| "epoch": 1.83558901735831, |
| "grad_norm": 0.04225718230009079, |
| "kl": 0.010876655578613281, |
| "learning_rate": 5.88028772340068e-07, |
| "loss": 0.0057, |
| "step": 1287 |
| }, |
| { |
| "clip_ratio": 0.00024814432106268214, |
| "epoch": 1.8375011203728602, |
| "grad_norm": 0.03636258468031883, |
| "kl": 0.010531425476074219, |
| "learning_rate": 5.839931228770526e-07, |
| "loss": 0.0057, |
| "step": 1288 |
| }, |
| { |
| "clip_ratio": 0.0002984523198108491, |
| "epoch": 1.8394132233874099, |
| "grad_norm": 0.03610241040587425, |
| "kl": 0.010416984558105469, |
| "learning_rate": 5.799695373194461e-07, |
| "loss": 0.0056, |
| "step": 1289 |
| }, |
| { |
| "clip_ratio": 0.00032527196299270145, |
| "epoch": 1.84132532640196, |
| "grad_norm": 0.034912850707769394, |
| "kl": 0.010428428649902344, |
| "learning_rate": 5.759580410012691e-07, |
| "loss": 0.0055, |
| "step": 1290 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.4793767929077, |
| "epoch": 1.8432374294165097, |
| "grad_norm": 0.04220513626933098, |
| "kl": 0.009058475494384766, |
| "learning_rate": 5.719586591804222e-07, |
| "loss": 0.0071, |
| "num_tokens": 322345307.0, |
| "reward": 0.07366071786964312, |
| "reward_std": 0.07878176297526807, |
| "rewards/pure_accuracy_reward_math": 0.07366071542492136, |
| "step": 1291 |
| }, |
| { |
| "clip_ratio": 0.00030183524040694465, |
| "epoch": 1.8451495324310598, |
| "grad_norm": 0.03849344700574875, |
| "kl": 0.009106636047363281, |
| "learning_rate": 5.679714170385283e-07, |
| "loss": 0.0071, |
| "step": 1292 |
| }, |
| { |
| "clip_ratio": 0.00035880112773156725, |
| "epoch": 1.8470616354456095, |
| "grad_norm": 0.037096235901117325, |
| "kl": 0.009167194366455078, |
| "learning_rate": 5.63996339680776e-07, |
| "loss": 0.0071, |
| "step": 1293 |
| }, |
| { |
| "clip_ratio": 0.00040293739141361584, |
| "epoch": 1.8489737384601597, |
| "grad_norm": 0.03884498402476311, |
| "kl": 0.009192943572998047, |
| "learning_rate": 5.600334521357581e-07, |
| "loss": 0.007, |
| "step": 1294 |
| }, |
| { |
| "clip_ratio": 0.00038201194092835067, |
| "epoch": 1.8508858414747094, |
| "grad_norm": 0.03875093162059784, |
| "kl": 0.009291648864746094, |
| "learning_rate": 5.560827793553159e-07, |
| "loss": 0.0069, |
| "step": 1295 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.3301024436951, |
| "epoch": 1.8527979444892595, |
| "grad_norm": 0.04254430532455444, |
| "kl": 0.008441925048828125, |
| "learning_rate": 5.52144346214383e-07, |
| "loss": 0.0063, |
| "num_tokens": 325938766.0, |
| "reward": 0.07840402127476409, |
| "reward_std": 0.08084744628285989, |
| "rewards/pure_accuracy_reward_math": 0.07840401929570362, |
| "step": 1296 |
| }, |
| { |
| "clip_ratio": 0.0002986583057804637, |
| "epoch": 1.8547100475038092, |
| "grad_norm": 0.041676584631204605, |
| "kl": 0.008450508117675781, |
| "learning_rate": 5.482181775108278e-07, |
| "loss": 0.0062, |
| "step": 1297 |
| }, |
| { |
| "clip_ratio": 0.00031948441494478175, |
| "epoch": 1.8566221505183593, |
| "grad_norm": 0.03955300524830818, |
| "kl": 0.008507251739501953, |
| "learning_rate": 5.443042979652957e-07, |
| "loss": 0.0062, |
| "step": 1298 |
| }, |
| { |
| "clip_ratio": 0.0003085145480667961, |
| "epoch": 1.858534253532909, |
| "grad_norm": 0.03848061338067055, |
| "kl": 0.008501052856445312, |
| "learning_rate": 5.404027322210556e-07, |
| "loss": 0.0061, |
| "step": 1299 |
| }, |
| { |
| "clip_ratio": 0.0003855731235944404, |
| "epoch": 1.8604463565474592, |
| "grad_norm": 0.04076399654150009, |
| "kl": 0.00849771499633789, |
| "learning_rate": 5.365135048438438e-07, |
| "loss": 0.006, |
| "step": 1300 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.5170464515686, |
| "epoch": 1.8623584595620088, |
| "grad_norm": 0.14906181395053864, |
| "kl": 0.007767677307128906, |
| "learning_rate": 5.326366403217093e-07, |
| "loss": 0.0084, |
| "num_tokens": 329571311.0, |
| "reward": 0.07254464630386792, |
| "reward_std": 0.08418946416350082, |
| "rewards/pure_accuracy_reward_math": 0.07254464438301511, |
| "step": 1301 |
| }, |
| { |
| "clip_ratio": 0.00028383656763253384, |
| "epoch": 1.8642705625765588, |
| "grad_norm": 0.04550671949982643, |
| "kl": 0.008212089538574219, |
| "learning_rate": 5.287721630648615e-07, |
| "loss": 0.0083, |
| "step": 1302 |
| }, |
| { |
| "clip_ratio": 0.0003281467976989916, |
| "epoch": 1.8661826655911087, |
| "grad_norm": 0.05260877683758736, |
| "kl": 0.008829593658447266, |
| "learning_rate": 5.249200974055132e-07, |
| "loss": 0.0083, |
| "step": 1303 |
| }, |
| { |
| "clip_ratio": 0.00036754867960553383, |
| "epoch": 1.8680947686056586, |
| "grad_norm": 0.0511869452893734, |
| "kl": 0.008836746215820312, |
| "learning_rate": 5.210804675977299e-07, |
| "loss": 0.0082, |
| "step": 1304 |
| }, |
| { |
| "clip_ratio": 0.0004018283953541868, |
| "epoch": 1.8700068716202085, |
| "grad_norm": 0.044321924448013306, |
| "kl": 0.008379459381103516, |
| "learning_rate": 5.172532978172753e-07, |
| "loss": 0.0081, |
| "step": 1305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 512.9788198471069, |
| "epoch": 1.8719189746347584, |
| "grad_norm": 0.04202428087592125, |
| "kl": 0.0076198577880859375, |
| "learning_rate": 5.134386121614615e-07, |
| "loss": 0.0072, |
| "num_tokens": 333143795.0, |
| "reward": 0.07421875317231752, |
| "reward_std": 0.07986396714113653, |
| "rewards/pure_accuracy_reward_math": 0.074218751717126, |
| "step": 1306 |
| }, |
| { |
| "clip_ratio": 0.00027569573836672134, |
| "epoch": 1.8738310776493083, |
| "grad_norm": 0.040443304926157, |
| "kl": 0.007631778717041016, |
| "learning_rate": 5.096364346489935e-07, |
| "loss": 0.0072, |
| "step": 1307 |
| }, |
| { |
| "clip_ratio": 0.00027392168607320855, |
| "epoch": 1.8757431806638583, |
| "grad_norm": 0.040238041430711746, |
| "kl": 0.007664203643798828, |
| "learning_rate": 5.058467892198241e-07, |
| "loss": 0.0071, |
| "step": 1308 |
| }, |
| { |
| "clip_ratio": 0.0003170029604007141, |
| "epoch": 1.8776552836784082, |
| "grad_norm": 0.039109617471694946, |
| "kl": 0.007664203643798828, |
| "learning_rate": 5.02069699734995e-07, |
| "loss": 0.007, |
| "step": 1309 |
| }, |
| { |
| "clip_ratio": 0.0003183572773082233, |
| "epoch": 1.879567386692958, |
| "grad_norm": 0.03724955767393112, |
| "kl": 0.007700443267822266, |
| "learning_rate": 4.983051899764946e-07, |
| "loss": 0.007, |
| "step": 1310 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 505.4592852592468, |
| "epoch": 1.881479489707508, |
| "grad_norm": 0.03964386135339737, |
| "kl": 0.007820606231689453, |
| "learning_rate": 4.945532836471026e-07, |
| "loss": 0.0074, |
| "num_tokens": 336685165.0, |
| "reward": 0.0848214327415917, |
| "reward_std": 0.07835631881607696, |
| "rewards/pure_accuracy_reward_math": 0.08482142965658568, |
| "step": 1311 |
| }, |
| { |
| "clip_ratio": 0.0002873320136700386, |
| "epoch": 1.883391592722058, |
| "grad_norm": 0.03871289640665054, |
| "kl": 0.007764339447021484, |
| "learning_rate": 4.908140043702426e-07, |
| "loss": 0.0074, |
| "step": 1312 |
| }, |
| { |
| "clip_ratio": 0.0003113469839775007, |
| "epoch": 1.8853036957366078, |
| "grad_norm": 0.03769771382212639, |
| "kl": 0.007766246795654297, |
| "learning_rate": 4.870873756898345e-07, |
| "loss": 0.0074, |
| "step": 1313 |
| }, |
| { |
| "clip_ratio": 0.00034381698696961394, |
| "epoch": 1.8872157987511577, |
| "grad_norm": 0.03724011033773422, |
| "kl": 0.007775783538818359, |
| "learning_rate": 4.833734210701435e-07, |
| "loss": 0.0073, |
| "step": 1314 |
| }, |
| { |
| "clip_ratio": 0.0003651243675335536, |
| "epoch": 1.8891279017657077, |
| "grad_norm": 0.03757576644420624, |
| "kl": 0.007784366607666016, |
| "learning_rate": 4.796721638956376e-07, |
| "loss": 0.0072, |
| "step": 1315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.5703339576721, |
| "epoch": 1.8910400047802576, |
| "grad_norm": 0.03592124208807945, |
| "kl": 0.007517337799072266, |
| "learning_rate": 4.7598362747083293e-07, |
| "loss": 0.008, |
| "num_tokens": 340304225.0, |
| "reward": 0.06501116388244554, |
| "reward_std": 0.0762443722342141, |
| "rewards/pure_accuracy_reward_math": 0.06501116219442338, |
| "step": 1316 |
| }, |
| { |
| "clip_ratio": 0.00026663288446115985, |
| "epoch": 1.8929521077948075, |
| "grad_norm": 0.03529619425535202, |
| "kl": 0.007477283477783203, |
| "learning_rate": 4.7230783502015346e-07, |
| "loss": 0.008, |
| "step": 1317 |
| }, |
| { |
| "clip_ratio": 0.00025462434007295087, |
| "epoch": 1.8948642108093574, |
| "grad_norm": 0.03387421742081642, |
| "kl": 0.007337093353271484, |
| "learning_rate": 4.6864480968778103e-07, |
| "loss": 0.008, |
| "step": 1318 |
| }, |
| { |
| "clip_ratio": 0.00031681645646131074, |
| "epoch": 1.8967763138239073, |
| "grad_norm": 0.033014364540576935, |
| "kl": 0.007318019866943359, |
| "learning_rate": 4.649945745375109e-07, |
| "loss": 0.0079, |
| "step": 1319 |
| }, |
| { |
| "clip_ratio": 0.00037019279989181086, |
| "epoch": 1.898688416838457, |
| "grad_norm": 0.033140987157821655, |
| "kl": 0.007157325744628906, |
| "learning_rate": 4.613571525526081e-07, |
| "loss": 0.0078, |
| "step": 1320 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.3727917671204, |
| "epoch": 1.9006005198530072, |
| "grad_norm": 0.03997303172945976, |
| "kl": 0.007628440856933594, |
| "learning_rate": 4.577325666356586e-07, |
| "loss": 0.0118, |
| "num_tokens": 343915401.0, |
| "reward": 0.08816964740981348, |
| "reward_std": 0.08973595389397815, |
| "rewards/pure_accuracy_reward_math": 0.08816964426659979, |
| "step": 1321 |
| }, |
| { |
| "clip_ratio": 0.0003053776546835252, |
| "epoch": 1.9025126228675568, |
| "grad_norm": 0.039738208055496216, |
| "kl": 0.007574558258056641, |
| "learning_rate": 4.541208396084304e-07, |
| "loss": 0.0117, |
| "step": 1322 |
| }, |
| { |
| "clip_ratio": 0.00030029478972437573, |
| "epoch": 1.904424725882107, |
| "grad_norm": 0.038392502814531326, |
| "kl": 0.007514476776123047, |
| "learning_rate": 4.5052199421172475e-07, |
| "loss": 0.0117, |
| "step": 1323 |
| }, |
| { |
| "clip_ratio": 0.0003343055576010556, |
| "epoch": 1.9063368288966567, |
| "grad_norm": 0.037236347794532776, |
| "kl": 0.007477760314941406, |
| "learning_rate": 4.4693605310523636e-07, |
| "loss": 0.0116, |
| "step": 1324 |
| }, |
| { |
| "clip_ratio": 0.00032557199602933906, |
| "epoch": 1.9082489319112068, |
| "grad_norm": 0.03678731992840767, |
| "kl": 0.007478237152099609, |
| "learning_rate": 4.43363038867409e-07, |
| "loss": 0.0115, |
| "step": 1325 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 513.3047099113464, |
| "epoch": 1.9101610349257565, |
| "grad_norm": 0.11113768815994263, |
| "kl": 0.013922691345214844, |
| "learning_rate": 4.39802973995295e-07, |
| "loss": 0.0093, |
| "num_tokens": 347490901.0, |
| "reward": 0.09486607549479231, |
| "reward_std": 0.09372853260720149, |
| "rewards/pure_accuracy_reward_math": 0.09486607305007055, |
| "step": 1326 |
| }, |
| { |
| "clip_ratio": 0.00036943193325100765, |
| "epoch": 1.9120731379403066, |
| "grad_norm": 0.055216722190380096, |
| "kl": 0.013732433319091797, |
| "learning_rate": 4.362558809044107e-07, |
| "loss": 0.0093, |
| "step": 1327 |
| }, |
| { |
| "clip_ratio": 0.0004000666916681439, |
| "epoch": 1.9139852409548563, |
| "grad_norm": 0.045698132365942, |
| "kl": 0.013063907623291016, |
| "learning_rate": 4.327217819286e-07, |
| "loss": 0.0092, |
| "step": 1328 |
| }, |
| { |
| "clip_ratio": 0.0004443397794489101, |
| "epoch": 1.9158973439694065, |
| "grad_norm": 0.04273562505841255, |
| "kl": 0.012539863586425781, |
| "learning_rate": 4.292006993198888e-07, |
| "loss": 0.009, |
| "step": 1329 |
| }, |
| { |
| "clip_ratio": 0.0004470848766686686, |
| "epoch": 1.9178094469839562, |
| "grad_norm": 0.04232070967555046, |
| "kl": 0.012142658233642578, |
| "learning_rate": 4.2569265524834756e-07, |
| "loss": 0.0089, |
| "step": 1330 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.7550463676453, |
| "epoch": 1.9197215499985063, |
| "grad_norm": 0.03724661469459534, |
| "kl": 0.007449150085449219, |
| "learning_rate": 4.221976718019505e-07, |
| "loss": 0.007, |
| "num_tokens": 351086731.0, |
| "reward": 0.06919643189758062, |
| "reward_std": 0.07200520270271227, |
| "rewards/pure_accuracy_reward_math": 0.06919642974389717, |
| "step": 1331 |
| }, |
| { |
| "clip_ratio": 0.00027471570277270985, |
| "epoch": 1.921633653013056, |
| "grad_norm": 0.03599303960800171, |
| "kl": 0.007382869720458984, |
| "learning_rate": 4.187157709864392e-07, |
| "loss": 0.007, |
| "step": 1332 |
| }, |
| { |
| "clip_ratio": 0.0002737036326720954, |
| "epoch": 1.9235457560276061, |
| "grad_norm": 0.03614535927772522, |
| "kl": 0.007375240325927734, |
| "learning_rate": 4.152469747251794e-07, |
| "loss": 0.0069, |
| "step": 1333 |
| }, |
| { |
| "clip_ratio": 0.00030229948259830053, |
| "epoch": 1.9254578590421558, |
| "grad_norm": 0.03546711429953575, |
| "kl": 0.0072498321533203125, |
| "learning_rate": 4.117913048590283e-07, |
| "loss": 0.0069, |
| "step": 1334 |
| }, |
| { |
| "clip_ratio": 0.00030038867771509103, |
| "epoch": 1.927369962056706, |
| "grad_norm": 0.03401359170675278, |
| "kl": 0.007149219512939453, |
| "learning_rate": 4.0834878314619244e-07, |
| "loss": 0.0068, |
| "step": 1335 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.2182154655457, |
| "epoch": 1.9292820650712557, |
| "grad_norm": 0.04080551117658615, |
| "kl": 0.006867885589599609, |
| "learning_rate": 4.049194312620927e-07, |
| "loss": 0.0092, |
| "num_tokens": 354708525.0, |
| "reward": 0.07756696798605844, |
| "reward_std": 0.08467356563778594, |
| "rewards/pure_accuracy_reward_math": 0.07756696530850604, |
| "step": 1336 |
| }, |
| { |
| "clip_ratio": 0.0002796990767137686, |
| "epoch": 1.9311941680858056, |
| "grad_norm": 0.038895782083272934, |
| "kl": 0.006824970245361328, |
| "learning_rate": 4.015032707992286e-07, |
| "loss": 0.0092, |
| "step": 1337 |
| }, |
| { |
| "clip_ratio": 0.00032694752422912643, |
| "epoch": 1.9331062711003555, |
| "grad_norm": 0.03889061138033867, |
| "kl": 0.006866931915283203, |
| "learning_rate": 3.9810032326704106e-07, |
| "loss": 0.0091, |
| "step": 1338 |
| }, |
| { |
| "clip_ratio": 0.0003511786251237936, |
| "epoch": 1.9350183741149054, |
| "grad_norm": 0.03880919888615608, |
| "kl": 0.006947994232177734, |
| "learning_rate": 3.9471061009177693e-07, |
| "loss": 0.009, |
| "step": 1339 |
| }, |
| { |
| "clip_ratio": 0.000323922223401496, |
| "epoch": 1.9369304771294553, |
| "grad_norm": 0.036964643746614456, |
| "kl": 0.007033824920654297, |
| "learning_rate": 3.91334152616355e-07, |
| "loss": 0.0089, |
| "step": 1340 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.7076120376587, |
| "epoch": 1.9388425801440052, |
| "grad_norm": 0.04040682688355446, |
| "kl": 0.007448673248291016, |
| "learning_rate": 3.879709721002317e-07, |
| "loss": 0.0052, |
| "num_tokens": 358339045.0, |
| "reward": 0.07896205660654232, |
| "reward_std": 0.08278053888352588, |
| "rewards/pure_accuracy_reward_math": 0.07896205550059676, |
| "step": 1341 |
| }, |
| { |
| "clip_ratio": 0.00029579239503618737, |
| "epoch": 1.9407546831585551, |
| "grad_norm": 0.03910582885146141, |
| "kl": 0.007539272308349609, |
| "learning_rate": 3.8462108971926564e-07, |
| "loss": 0.0052, |
| "step": 1342 |
| }, |
| { |
| "clip_ratio": 0.0003078770084812277, |
| "epoch": 1.942666786173105, |
| "grad_norm": 0.03942732512950897, |
| "kl": 0.007628440856933594, |
| "learning_rate": 3.8128452656558623e-07, |
| "loss": 0.0051, |
| "step": 1343 |
| }, |
| { |
| "clip_ratio": 0.0003229538778555252, |
| "epoch": 1.944578889187655, |
| "grad_norm": 0.03747202083468437, |
| "kl": 0.007678031921386719, |
| "learning_rate": 3.779613036474583e-07, |
| "loss": 0.005, |
| "step": 1344 |
| }, |
| { |
| "clip_ratio": 0.000363169818285769, |
| "epoch": 1.946490992202205, |
| "grad_norm": 0.036778781563043594, |
| "kl": 0.0076923370361328125, |
| "learning_rate": 3.746514418891545e-07, |
| "loss": 0.0049, |
| "step": 1345 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 532.7960658073425, |
| "epoch": 1.9484030952167548, |
| "grad_norm": 0.040943268686532974, |
| "kl": 0.011704444885253906, |
| "learning_rate": 3.713549621308174e-07, |
| "loss": 0.005, |
| "num_tokens": 361980918.0, |
| "reward": 0.07059152092551813, |
| "reward_std": 0.07973137585213408, |
| "rewards/pure_accuracy_reward_math": 0.07059151900466532, |
| "step": 1346 |
| }, |
| { |
| "clip_ratio": 0.00029914512055029263, |
| "epoch": 1.9503151982313047, |
| "grad_norm": 0.04052672162652016, |
| "kl": 0.0114288330078125, |
| "learning_rate": 3.6807188512833406e-07, |
| "loss": 0.005, |
| "step": 1347 |
| }, |
| { |
| "clip_ratio": 0.000334167169853572, |
| "epoch": 1.9522273012458546, |
| "grad_norm": 0.04054692015051842, |
| "kl": 0.011135578155517578, |
| "learning_rate": 3.648022315532007e-07, |
| "loss": 0.0049, |
| "step": 1348 |
| }, |
| { |
| "clip_ratio": 0.00035840429575273447, |
| "epoch": 1.9541394042604046, |
| "grad_norm": 0.03996079042553902, |
| "kl": 0.010680675506591797, |
| "learning_rate": 3.615460219923955e-07, |
| "loss": 0.0048, |
| "step": 1349 |
| }, |
| { |
| "clip_ratio": 0.00034668986540964397, |
| "epoch": 1.9560515072749545, |
| "grad_norm": 0.037566084414720535, |
| "kl": 0.010373115539550781, |
| "learning_rate": 3.5830327694824777e-07, |
| "loss": 0.0047, |
| "step": 1350 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.6453948020935, |
| "epoch": 1.9579636102895044, |
| "grad_norm": 0.03812556713819504, |
| "kl": 0.007121086120605469, |
| "learning_rate": 3.5507401683830933e-07, |
| "loss": 0.0114, |
| "num_tokens": 365629991.0, |
| "reward": 0.07672991411527619, |
| "reward_std": 0.07831625349353999, |
| "rewards/pure_accuracy_reward_math": 0.07672991178696975, |
| "step": 1351 |
| }, |
| { |
| "clip_ratio": 0.0003128355612602718, |
| "epoch": 1.9598757133040543, |
| "grad_norm": 0.03631382808089256, |
| "kl": 0.007141590118408203, |
| "learning_rate": 3.518582619952257e-07, |
| "loss": 0.0114, |
| "step": 1352 |
| }, |
| { |
| "clip_ratio": 0.00033067399391484287, |
| "epoch": 1.9617878163186042, |
| "grad_norm": 0.03752359002828598, |
| "kl": 0.007140636444091797, |
| "learning_rate": 3.486560326666072e-07, |
| "loss": 0.0113, |
| "step": 1353 |
| }, |
| { |
| "clip_ratio": 0.00037038392605381887, |
| "epoch": 1.9636999193331541, |
| "grad_norm": 0.03724711388349533, |
| "kl": 0.007131099700927734, |
| "learning_rate": 3.4546734901490466e-07, |
| "loss": 0.0112, |
| "step": 1354 |
| }, |
| { |
| "clip_ratio": 0.00040464663743478013, |
| "epoch": 1.9656120223477038, |
| "grad_norm": 0.034875430166721344, |
| "kl": 0.007108211517333984, |
| "learning_rate": 3.42292231117278e-07, |
| "loss": 0.0112, |
| "step": 1355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.9101786613464, |
| "epoch": 1.967524125362254, |
| "grad_norm": 0.04123640060424805, |
| "kl": 0.007243156433105469, |
| "learning_rate": 3.3913069896547217e-07, |
| "loss": 0.0069, |
| "num_tokens": 369229613.0, |
| "reward": 0.08007812878349796, |
| "reward_std": 0.085311732836999, |
| "rewards/pure_accuracy_reward_math": 0.0800781263387762, |
| "step": 1356 |
| }, |
| { |
| "clip_ratio": 0.00033138683619426956, |
| "epoch": 1.9694362283768037, |
| "grad_norm": 0.04048166796565056, |
| "kl": 0.007332801818847656, |
| "learning_rate": 3.3598277246569307e-07, |
| "loss": 0.0069, |
| "step": 1357 |
| }, |
| { |
| "clip_ratio": 0.0003668193609200898, |
| "epoch": 1.9713483313913538, |
| "grad_norm": 0.042313288897275925, |
| "kl": 0.007485866546630859, |
| "learning_rate": 3.3284847143847834e-07, |
| "loss": 0.0068, |
| "step": 1358 |
| }, |
| { |
| "clip_ratio": 0.0003713441701620468, |
| "epoch": 1.9732604344059035, |
| "grad_norm": 0.04199962690472603, |
| "kl": 0.007598400115966797, |
| "learning_rate": 3.2972781561857433e-07, |
| "loss": 0.0067, |
| "step": 1359 |
| }, |
| { |
| "clip_ratio": 0.0003367169608736731, |
| "epoch": 1.9751725374204536, |
| "grad_norm": 0.03874565288424492, |
| "kl": 0.007636547088623047, |
| "learning_rate": 3.266208246548136e-07, |
| "loss": 0.0066, |
| "step": 1360 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.4445023536682, |
| "epoch": 1.9770846404350033, |
| "grad_norm": 0.040357448160648346, |
| "kl": 0.007414817810058594, |
| "learning_rate": 3.2352751810998896e-07, |
| "loss": 0.0055, |
| "num_tokens": 372817046.0, |
| "reward": 0.08258928993018344, |
| "reward_std": 0.09080576250562444, |
| "rewards/pure_accuracy_reward_math": 0.08258928690338507, |
| "step": 1361 |
| }, |
| { |
| "clip_ratio": 0.00038423701278134104, |
| "epoch": 1.9789967434495535, |
| "grad_norm": 0.03990958258509636, |
| "kl": 0.007411479949951172, |
| "learning_rate": 3.2044791546072985e-07, |
| "loss": 0.0055, |
| "step": 1362 |
| }, |
| { |
| "clip_ratio": 0.00044172884827275993, |
| "epoch": 1.9809088464641031, |
| "grad_norm": 0.042212970554828644, |
| "kl": 0.007319450378417969, |
| "learning_rate": 3.173820360973823e-07, |
| "loss": 0.0054, |
| "step": 1363 |
| }, |
| { |
| "clip_ratio": 0.00042502668532051757, |
| "epoch": 1.9828209494786533, |
| "grad_norm": 0.03946436941623688, |
| "kl": 0.0072727203369140625, |
| "learning_rate": 3.1432989932388416e-07, |
| "loss": 0.0053, |
| "step": 1364 |
| }, |
| { |
| "clip_ratio": 0.00040032099315112646, |
| "epoch": 1.984733052493203, |
| "grad_norm": 0.03701746463775635, |
| "kl": 0.007288455963134766, |
| "learning_rate": 3.1129152435764473e-07, |
| "loss": 0.0052, |
| "step": 1365 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.9707279205322, |
| "epoch": 1.9866451555077531, |
| "grad_norm": 0.03677362576127052, |
| "kl": 0.00740814208984375, |
| "learning_rate": 3.0826693032942586e-07, |
| "loss": 0.008, |
| "num_tokens": 376414405.0, |
| "reward": 0.07087053926079534, |
| "reward_std": 0.07741290412377566, |
| "rewards/pure_accuracy_reward_math": 0.07087053710711189, |
| "step": 1366 |
| }, |
| { |
| "clip_ratio": 0.0002998853265978596, |
| "epoch": 1.9885572585223028, |
| "grad_norm": 0.03619634732604027, |
| "kl": 0.0074787139892578125, |
| "learning_rate": 3.0525613628321656e-07, |
| "loss": 0.0079, |
| "step": 1367 |
| }, |
| { |
| "clip_ratio": 0.00031987275491474065, |
| "epoch": 1.990469361536853, |
| "grad_norm": 0.03580261766910553, |
| "kl": 0.007512092590332031, |
| "learning_rate": 3.022591611761169e-07, |
| "loss": 0.0079, |
| "step": 1368 |
| }, |
| { |
| "clip_ratio": 0.00029055258056587263, |
| "epoch": 1.9923814645514026, |
| "grad_norm": 0.03512256592512131, |
| "kl": 0.007531166076660156, |
| "learning_rate": 2.9927602387821916e-07, |
| "loss": 0.0078, |
| "step": 1369 |
| }, |
| { |
| "clip_ratio": 0.0003325358438814874, |
| "epoch": 1.9942935675659528, |
| "grad_norm": 0.03404110670089722, |
| "kl": 0.007470130920410156, |
| "learning_rate": 2.963067431724856e-07, |
| "loss": 0.0077, |
| "step": 1370 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.95845079422, |
| "epoch": 2.0019121030145497, |
| "grad_norm": 0.03709035739302635, |
| "kl": 0.007386684417724609, |
| "learning_rate": 2.9335133775463266e-07, |
| "loss": 0.011, |
| "num_tokens": 380027444.0, |
| "reward": 0.07198661039001308, |
| "reward_std": 0.07208533387165517, |
| "rewards/pure_accuracy_reward_math": 0.07198660876019858, |
| "step": 1371 |
| }, |
| { |
| "clip_ratio": 0.0002751371110321088, |
| "epoch": 2.0038242060291, |
| "grad_norm": 0.03661485016345978, |
| "kl": 0.007431507110595703, |
| "learning_rate": 2.9040982623301264e-07, |
| "loss": 0.011, |
| "step": 1372 |
| }, |
| { |
| "clip_ratio": 0.0003175289227783651, |
| "epoch": 2.0057363090436495, |
| "grad_norm": 0.036799393594264984, |
| "kl": 0.007405281066894531, |
| "learning_rate": 2.874822271284977e-07, |
| "loss": 0.0109, |
| "step": 1373 |
| }, |
| { |
| "clip_ratio": 0.0003284543961399322, |
| "epoch": 2.0076484120581997, |
| "grad_norm": 0.036977026611566544, |
| "kl": 0.007386684417724609, |
| "learning_rate": 2.8456855887436074e-07, |
| "loss": 0.0108, |
| "step": 1374 |
| }, |
| { |
| "clip_ratio": 0.00032697250054525284, |
| "epoch": 2.0095605150727494, |
| "grad_norm": 0.03594314306974411, |
| "kl": 0.00739288330078125, |
| "learning_rate": 2.816688398161613e-07, |
| "loss": 0.0108, |
| "step": 1375 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 524.5270891189575, |
| "epoch": 2.0114726180872995, |
| "grad_norm": 15.976890563964844, |
| "kl": 0.4394536018371582, |
| "learning_rate": 2.7878308821162964e-07, |
| "loss": 0.0259, |
| "num_tokens": 383639505.0, |
| "reward": 0.08286830733413808, |
| "reward_std": 0.08972975501092151, |
| "rewards/pure_accuracy_reward_math": 0.08286830488941632, |
| "step": 1376 |
| }, |
| { |
| "clip_ratio": 0.0003084787746274742, |
| "epoch": 2.013384721101849, |
| "grad_norm": 1.2859545946121216, |
| "kl": 0.04446220397949219, |
| "learning_rate": 2.759113222305512e-07, |
| "loss": 0.0102, |
| "step": 1377 |
| }, |
| { |
| "clip_ratio": 0.00034848380650487343, |
| "epoch": 2.0152968241163993, |
| "grad_norm": 0.0618804506957531, |
| "kl": 0.009487152099609375, |
| "learning_rate": 2.730535599546524e-07, |
| "loss": 0.0087, |
| "step": 1378 |
| }, |
| { |
| "clip_ratio": 0.000346398171132023, |
| "epoch": 2.017208927130949, |
| "grad_norm": 0.039353594183921814, |
| "kl": 0.008243560791015625, |
| "learning_rate": 2.702098193774891e-07, |
| "loss": 0.0087, |
| "step": 1379 |
| }, |
| { |
| "clip_ratio": 0.000389314118024231, |
| "epoch": 2.019121030145499, |
| "grad_norm": 0.03626256063580513, |
| "kl": 0.0083465576171875, |
| "learning_rate": 2.6738011840432817e-07, |
| "loss": 0.0086, |
| "step": 1380 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 504.881441116333, |
| "epoch": 2.021033133160049, |
| "grad_norm": 0.03991848975419998, |
| "kl": 0.00807046890258789, |
| "learning_rate": 2.6456447485204014e-07, |
| "loss": 0.0078, |
| "num_tokens": 387180856.0, |
| "reward": 0.07700893218861893, |
| "reward_std": 0.0893906393321231, |
| "rewards/pure_accuracy_reward_math": 0.07700893026776612, |
| "step": 1381 |
| }, |
| { |
| "clip_ratio": 0.00029079897933570464, |
| "epoch": 2.022945236174599, |
| "grad_norm": 0.03955512493848801, |
| "kl": 0.008087635040283203, |
| "learning_rate": 2.617629064489838e-07, |
| "loss": 0.0078, |
| "step": 1382 |
| }, |
| { |
| "clip_ratio": 0.00034119405472665676, |
| "epoch": 2.0248573391891487, |
| "grad_norm": 0.04050750657916069, |
| "kl": 0.008031845092773438, |
| "learning_rate": 2.5897543083489544e-07, |
| "loss": 0.0077, |
| "step": 1383 |
| }, |
| { |
| "clip_ratio": 0.0003633832532159431, |
| "epoch": 2.026769442203699, |
| "grad_norm": 0.03760417178273201, |
| "kl": 0.007889270782470703, |
| "learning_rate": 2.562020655607772e-07, |
| "loss": 0.0076, |
| "step": 1384 |
| }, |
| { |
| "clip_ratio": 0.00040043183099669477, |
| "epoch": 2.0286815452182485, |
| "grad_norm": 0.036376822739839554, |
| "kl": 0.007742404937744141, |
| "learning_rate": 2.534428280887891e-07, |
| "loss": 0.0076, |
| "step": 1385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 521.2332820892334, |
| "epoch": 2.0305936482327986, |
| "grad_norm": 0.03659322112798691, |
| "kl": 0.0079498291015625, |
| "learning_rate": 2.50697735792135e-07, |
| "loss": 0.0074, |
| "num_tokens": 390784592.0, |
| "reward": 0.0678013424621895, |
| "reward_std": 0.07990403228905052, |
| "rewards/pure_accuracy_reward_math": 0.06780134083237499, |
| "step": 1386 |
| }, |
| { |
| "clip_ratio": 0.0003029348101790674, |
| "epoch": 2.0325057512473483, |
| "grad_norm": 0.03603421524167061, |
| "kl": 0.0077915191650390625, |
| "learning_rate": 2.47966805954957e-07, |
| "loss": 0.0073, |
| "step": 1387 |
| }, |
| { |
| "clip_ratio": 0.0002788126068935526, |
| "epoch": 2.0344178542618985, |
| "grad_norm": 0.035584706813097, |
| "kl": 0.00768280029296875, |
| "learning_rate": 2.4525005577222373e-07, |
| "loss": 0.0073, |
| "step": 1388 |
| }, |
| { |
| "clip_ratio": 0.00033219700696918153, |
| "epoch": 2.036329957276448, |
| "grad_norm": 0.033913753926754, |
| "kl": 0.007656097412109375, |
| "learning_rate": 2.42547502349624e-07, |
| "loss": 0.0072, |
| "step": 1389 |
| }, |
| { |
| "clip_ratio": 0.00034793876449157324, |
| "epoch": 2.0382420602909983, |
| "grad_norm": 0.033490557223558426, |
| "kl": 0.007609367370605469, |
| "learning_rate": 2.398591627034588e-07, |
| "loss": 0.0072, |
| "step": 1390 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 534.8217334747314, |
| "epoch": 2.040154163305548, |
| "grad_norm": 0.04065319523215294, |
| "kl": 0.007349491119384766, |
| "learning_rate": 2.3718505376053246e-07, |
| "loss": 0.0094, |
| "num_tokens": 394433277.0, |
| "reward": 0.07589286056463607, |
| "reward_std": 0.09050671145087108, |
| "rewards/pure_accuracy_reward_math": 0.07589285823632963, |
| "step": 1391 |
| }, |
| { |
| "clip_ratio": 0.00032872594630362073, |
| "epoch": 2.042066266320098, |
| "grad_norm": 0.0390729084610939, |
| "kl": 0.007353305816650391, |
| "learning_rate": 2.345251923580491e-07, |
| "loss": 0.0094, |
| "step": 1392 |
| }, |
| { |
| "clip_ratio": 0.00038015836332760955, |
| "epoch": 2.043978369334648, |
| "grad_norm": 0.037973206490278244, |
| "kl": 0.007381916046142578, |
| "learning_rate": 2.3187959524350352e-07, |
| "loss": 0.0093, |
| "step": 1393 |
| }, |
| { |
| "clip_ratio": 0.00041672343576237836, |
| "epoch": 2.045890472349198, |
| "grad_norm": 0.037547629326581955, |
| "kl": 0.007441043853759766, |
| "learning_rate": 2.2924827907457841e-07, |
| "loss": 0.0092, |
| "step": 1394 |
| }, |
| { |
| "clip_ratio": 0.00047711057584365335, |
| "epoch": 2.0478025753637477, |
| "grad_norm": 0.037767618894577026, |
| "kl": 0.007452487945556641, |
| "learning_rate": 2.266312604190374e-07, |
| "loss": 0.0091, |
| "step": 1395 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.9163165092468, |
| "epoch": 2.049714678378298, |
| "grad_norm": 0.039165694266557693, |
| "kl": 0.007717609405517578, |
| "learning_rate": 2.2402855575462152e-07, |
| "loss": 0.0071, |
| "num_tokens": 398030605.0, |
| "reward": 0.07840402194415219, |
| "reward_std": 0.08072105259634554, |
| "rewards/pure_accuracy_reward_math": 0.07840401885914616, |
| "step": 1396 |
| }, |
| { |
| "clip_ratio": 0.0002864374472437703, |
| "epoch": 2.0516267813928475, |
| "grad_norm": 0.03918104246258736, |
| "kl": 0.007798194885253906, |
| "learning_rate": 2.2144018146894542e-07, |
| "loss": 0.007, |
| "step": 1397 |
| }, |
| { |
| "clip_ratio": 0.00028412381868747616, |
| "epoch": 2.0535388844073976, |
| "grad_norm": 0.03787809982895851, |
| "kl": 0.007855415344238281, |
| "learning_rate": 2.1886615385939502e-07, |
| "loss": 0.007, |
| "step": 1398 |
| }, |
| { |
| "clip_ratio": 0.0002802736350417945, |
| "epoch": 2.0554509874219473, |
| "grad_norm": 0.03685666248202324, |
| "kl": 0.007898807525634766, |
| "learning_rate": 2.1630648913302354e-07, |
| "loss": 0.0069, |
| "step": 1399 |
| }, |
| { |
| "clip_ratio": 0.0003048399971703475, |
| "epoch": 2.0573630904364975, |
| "grad_norm": 0.03653446584939957, |
| "kl": 0.0079193115234375, |
| "learning_rate": 2.1376120340645014e-07, |
| "loss": 0.0068, |
| "step": 1400 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.7120804786682, |
| "epoch": 2.059275193451047, |
| "grad_norm": 0.041400156915187836, |
| "kl": 0.0076904296875, |
| "learning_rate": 2.1123031270575827e-07, |
| "loss": 0.0112, |
| "num_tokens": 401639357.0, |
| "reward": 0.08398437922005542, |
| "reward_std": 0.08836089540272951, |
| "rewards/pure_accuracy_reward_math": 0.08398437665891834, |
| "step": 1401 |
| }, |
| { |
| "clip_ratio": 0.0003276587292475597, |
| "epoch": 2.0611872964655973, |
| "grad_norm": 0.04058953374624252, |
| "kl": 0.007676601409912109, |
| "learning_rate": 2.0871383296639487e-07, |
| "loss": 0.0112, |
| "step": 1402 |
| }, |
| { |
| "clip_ratio": 0.00033817819053183484, |
| "epoch": 2.063099399480147, |
| "grad_norm": 0.040160875767469406, |
| "kl": 0.007659435272216797, |
| "learning_rate": 2.062117800330693e-07, |
| "loss": 0.0112, |
| "step": 1403 |
| }, |
| { |
| "clip_ratio": 0.00034579052078242967, |
| "epoch": 2.065011502494697, |
| "grad_norm": 0.03876737132668495, |
| "kl": 0.007627964019775391, |
| "learning_rate": 2.0372416965965675e-07, |
| "loss": 0.0111, |
| "step": 1404 |
| }, |
| { |
| "clip_ratio": 0.00035969930786450277, |
| "epoch": 2.066923605509247, |
| "grad_norm": 0.03797266259789467, |
| "kl": 0.007703304290771484, |
| "learning_rate": 2.0125101750909315e-07, |
| "loss": 0.011, |
| "step": 1405 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.2500252723694, |
| "epoch": 2.068835708523797, |
| "grad_norm": 0.05333253741264343, |
| "kl": 0.010094165802001953, |
| "learning_rate": 1.9879233915328312e-07, |
| "loss": 0.0065, |
| "num_tokens": 405215041.0, |
| "reward": 0.08231027176952921, |
| "reward_std": 0.08208991179708391, |
| "rewards/pure_accuracy_reward_math": 0.08231026903376915, |
| "step": 1406 |
| }, |
| { |
| "clip_ratio": 0.0002884399551135175, |
| "epoch": 2.0707478115383466, |
| "grad_norm": 0.04066501557826996, |
| "kl": 0.009914398193359375, |
| "learning_rate": 1.9634815007299634e-07, |
| "loss": 0.0065, |
| "step": 1407 |
| }, |
| { |
| "clip_ratio": 0.0003325861029566113, |
| "epoch": 2.0726599145528963, |
| "grad_norm": 0.03939688578248024, |
| "kl": 0.00982666015625, |
| "learning_rate": 1.9391846565777418e-07, |
| "loss": 0.0064, |
| "step": 1408 |
| }, |
| { |
| "clip_ratio": 0.0003743518978467364, |
| "epoch": 2.0745720175674465, |
| "grad_norm": 0.03857440873980522, |
| "kl": 0.009755611419677734, |
| "learning_rate": 1.9150330120583012e-07, |
| "loss": 0.0063, |
| "step": 1409 |
| }, |
| { |
| "clip_ratio": 0.0004666026043196325, |
| "epoch": 2.076484120581996, |
| "grad_norm": 0.03952641412615776, |
| "kl": 0.0096588134765625, |
| "learning_rate": 1.891026719239547e-07, |
| "loss": 0.0062, |
| "step": 1410 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.8532605171204, |
| "epoch": 2.0783962235965463, |
| "grad_norm": 0.04142899066209793, |
| "kl": 0.008448123931884766, |
| "learning_rate": 1.8671659292742007e-07, |
| "loss": 0.0099, |
| "num_tokens": 408804459.0, |
| "reward": 0.08286830742144957, |
| "reward_std": 0.08260788215557113, |
| "rewards/pure_accuracy_reward_math": 0.08286830509314314, |
| "step": 1411 |
| }, |
| { |
| "clip_ratio": 0.0003487231184635675, |
| "epoch": 2.080308326611096, |
| "grad_norm": 0.040530916303396225, |
| "kl": 0.008367538452148438, |
| "learning_rate": 1.8434507923988375e-07, |
| "loss": 0.0099, |
| "step": 1412 |
| }, |
| { |
| "clip_ratio": 0.0003221970002869057, |
| "epoch": 2.082220429625646, |
| "grad_norm": 0.03941330686211586, |
| "kl": 0.008350849151611328, |
| "learning_rate": 1.8198814579329426e-07, |
| "loss": 0.0098, |
| "step": 1413 |
| }, |
| { |
| "clip_ratio": 0.00037204451541583694, |
| "epoch": 2.084132532640196, |
| "grad_norm": 0.03861032798886299, |
| "kl": 0.008304595947265625, |
| "learning_rate": 1.7964580742779847e-07, |
| "loss": 0.0097, |
| "step": 1414 |
| }, |
| { |
| "clip_ratio": 0.0003590778907209824, |
| "epoch": 2.086044635654746, |
| "grad_norm": 0.03945469483733177, |
| "kl": 0.008287906646728516, |
| "learning_rate": 1.7731807889164537e-07, |
| "loss": 0.0096, |
| "step": 1415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 529.592381477356, |
| "epoch": 2.0879567386692957, |
| "grad_norm": 0.03833872824907303, |
| "kl": 0.0077228546142578125, |
| "learning_rate": 1.7500497484109703e-07, |
| "loss": 0.0109, |
| "num_tokens": 412432506.0, |
| "reward": 0.07449777142028324, |
| "reward_std": 0.08200978167587891, |
| "rewards/pure_accuracy_reward_math": 0.07449776885914616, |
| "step": 1416 |
| }, |
| { |
| "clip_ratio": 0.0002795722035671133, |
| "epoch": 2.089868841683846, |
| "grad_norm": 0.03684116527438164, |
| "kl": 0.007727146148681641, |
| "learning_rate": 1.7270650984033245e-07, |
| "loss": 0.0108, |
| "step": 1417 |
| }, |
| { |
| "clip_ratio": 0.00033119657558700055, |
| "epoch": 2.0917809446983955, |
| "grad_norm": 0.03667665645480156, |
| "kl": 0.007739067077636719, |
| "learning_rate": 1.7042269836135882e-07, |
| "loss": 0.0108, |
| "step": 1418 |
| }, |
| { |
| "clip_ratio": 0.00036255177064958843, |
| "epoch": 2.0936930477129456, |
| "grad_norm": 0.037857044488191605, |
| "kl": 0.007757663726806641, |
| "learning_rate": 1.6815355478391886e-07, |
| "loss": 0.0107, |
| "step": 1419 |
| }, |
| { |
| "clip_ratio": 0.0003589615364489873, |
| "epoch": 2.0956051507274953, |
| "grad_norm": 0.0360855907201767, |
| "kl": 0.007729053497314453, |
| "learning_rate": 1.6589909339539968e-07, |
| "loss": 0.0106, |
| "step": 1420 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 523.7469544410706, |
| "epoch": 2.0975172537420455, |
| "grad_norm": 0.041348401457071304, |
| "kl": 0.007639408111572266, |
| "learning_rate": 1.6365932839074532e-07, |
| "loss": 0.0099, |
| "num_tokens": 416048915.0, |
| "reward": 0.07979911076836288, |
| "reward_std": 0.08175079576903954, |
| "rewards/pure_accuracy_reward_math": 0.07979910861467943, |
| "step": 1421 |
| }, |
| { |
| "clip_ratio": 0.00028084742956480113, |
| "epoch": 2.099429356756595, |
| "grad_norm": 0.03983917832374573, |
| "kl": 0.007691860198974609, |
| "learning_rate": 1.6143427387236455e-07, |
| "loss": 0.0099, |
| "step": 1422 |
| }, |
| { |
| "clip_ratio": 0.00032101355429858813, |
| "epoch": 2.1013414597711453, |
| "grad_norm": 0.04035898670554161, |
| "kl": 0.007829666137695312, |
| "learning_rate": 1.592239438500434e-07, |
| "loss": 0.0098, |
| "step": 1423 |
| }, |
| { |
| "clip_ratio": 0.00036129408920260175, |
| "epoch": 2.103253562785695, |
| "grad_norm": 0.03893222287297249, |
| "kl": 0.0079498291015625, |
| "learning_rate": 1.570283522408586e-07, |
| "loss": 0.0097, |
| "step": 1424 |
| }, |
| { |
| "clip_ratio": 0.0003233651194136655, |
| "epoch": 2.105165665800245, |
| "grad_norm": 0.03798089176416397, |
| "kl": 0.008071422576904297, |
| "learning_rate": 1.5484751286908655e-07, |
| "loss": 0.0097, |
| "step": 1425 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 515.3281455039978, |
| "epoch": 2.107077768814795, |
| "grad_norm": 0.04489213973283768, |
| "kl": 0.00823831558227539, |
| "learning_rate": 1.5268143946611802e-07, |
| "loss": 0.01, |
| "num_tokens": 419628171.0, |
| "reward": 0.07952009321888909, |
| "reward_std": 0.0892580482759513, |
| "rewards/pure_accuracy_reward_math": 0.07952009089058265, |
| "step": 1426 |
| }, |
| { |
| "clip_ratio": 0.0003507794546067089, |
| "epoch": 2.108989871829345, |
| "grad_norm": 0.04182901233434677, |
| "kl": 0.008199691772460938, |
| "learning_rate": 1.5053014567037171e-07, |
| "loss": 0.01, |
| "step": 1427 |
| }, |
| { |
| "clip_ratio": 0.0004634781105323782, |
| "epoch": 2.1109019748438946, |
| "grad_norm": 0.04111779108643532, |
| "kl": 0.008260250091552734, |
| "learning_rate": 1.483936450272097e-07, |
| "loss": 0.0099, |
| "step": 1428 |
| }, |
| { |
| "clip_ratio": 0.0005032591409417364, |
| "epoch": 2.1128140778584448, |
| "grad_norm": 0.04071485623717308, |
| "kl": 0.008274078369140625, |
| "learning_rate": 1.4627195098884856e-07, |
| "loss": 0.0098, |
| "step": 1429 |
| }, |
| { |
| "clip_ratio": 0.0005640338476382567, |
| "epoch": 2.1147261808729945, |
| "grad_norm": 0.041747044771909714, |
| "kl": 0.008271217346191406, |
| "learning_rate": 1.441650769142791e-07, |
| "loss": 0.0097, |
| "step": 1430 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.8217334747314, |
| "epoch": 2.1166382838875446, |
| "grad_norm": 0.04057304188609123, |
| "kl": 0.00798797607421875, |
| "learning_rate": 1.4207303606917856e-07, |
| "loss": 0.0057, |
| "num_tokens": 423255484.0, |
| "reward": 0.08761161076836288, |
| "reward_std": 0.09866452467394993, |
| "rewards/pure_accuracy_reward_math": 0.08761160855647177, |
| "step": 1431 |
| }, |
| { |
| "clip_ratio": 0.0003497144300581567, |
| "epoch": 2.1185503869020943, |
| "grad_norm": 0.03972388803958893, |
| "kl": 0.007953643798828125, |
| "learning_rate": 1.3999584162582874e-07, |
| "loss": 0.0057, |
| "step": 1432 |
| }, |
| { |
| "clip_ratio": 0.00037741022566706306, |
| "epoch": 2.1204624899166444, |
| "grad_norm": 0.03924018144607544, |
| "kl": 0.00795888900756836, |
| "learning_rate": 1.3793350666303328e-07, |
| "loss": 0.0056, |
| "step": 1433 |
| }, |
| { |
| "clip_ratio": 0.0003785647801350933, |
| "epoch": 2.122374592931194, |
| "grad_norm": 0.03913624957203865, |
| "kl": 0.007895946502685547, |
| "learning_rate": 1.3588604416603424e-07, |
| "loss": 0.0055, |
| "step": 1434 |
| }, |
| { |
| "clip_ratio": 0.0003937934675377619, |
| "epoch": 2.1242866959457443, |
| "grad_norm": 0.03699544072151184, |
| "kl": 0.00783538818359375, |
| "learning_rate": 1.3385346702643188e-07, |
| "loss": 0.0054, |
| "step": 1435 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 533.7888078689575, |
| "epoch": 2.126198798960294, |
| "grad_norm": 0.042676378041505814, |
| "kl": 0.010451793670654297, |
| "learning_rate": 1.3183578804210173e-07, |
| "loss": 0.0098, |
| "num_tokens": 426903267.0, |
| "reward": 0.07645089671132155, |
| "reward_std": 0.08488008996937424, |
| "rewards/pure_accuracy_reward_math": 0.07645089426659979, |
| "step": 1436 |
| }, |
| { |
| "clip_ratio": 0.00036263700505401175, |
| "epoch": 2.128110901974844, |
| "grad_norm": 0.03884616866707802, |
| "kl": 0.010242462158203125, |
| "learning_rate": 1.2983301991711578e-07, |
| "loss": 0.0098, |
| "step": 1437 |
| }, |
| { |
| "clip_ratio": 0.0003990789759313884, |
| "epoch": 2.130023004989394, |
| "grad_norm": 0.0399676114320755, |
| "kl": 0.01007843017578125, |
| "learning_rate": 1.278451752616608e-07, |
| "loss": 0.0097, |
| "step": 1438 |
| }, |
| { |
| "clip_ratio": 0.0004171350746560165, |
| "epoch": 2.131935108003944, |
| "grad_norm": 0.039714373648166656, |
| "kl": 0.010037422180175781, |
| "learning_rate": 1.258722665919604e-07, |
| "loss": 0.0097, |
| "step": 1439 |
| }, |
| { |
| "clip_ratio": 0.00039808801824392503, |
| "epoch": 2.1338472110184936, |
| "grad_norm": 0.03794709965586662, |
| "kl": 0.009942054748535156, |
| "learning_rate": 1.2391430633019452e-07, |
| "loss": 0.0096, |
| "step": 1440 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.7826709747314, |
| "epoch": 2.1357593140330433, |
| "grad_norm": 0.05131447687745094, |
| "kl": 0.00860595703125, |
| "learning_rate": 1.2197130680442399e-07, |
| "loss": 0.0073, |
| "num_tokens": 430520032.0, |
| "reward": 0.07282366428989917, |
| "reward_std": 0.0797313749208115, |
| "rewards/pure_accuracy_reward_math": 0.07282366172876209, |
| "step": 1441 |
| }, |
| { |
| "clip_ratio": 0.0003007381984616586, |
| "epoch": 2.1376714170475934, |
| "grad_norm": 0.03815394267439842, |
| "kl": 0.008358001708984375, |
| "learning_rate": 1.2004328024850938e-07, |
| "loss": 0.0073, |
| "step": 1442 |
| }, |
| { |
| "clip_ratio": 0.0003256684682355626, |
| "epoch": 2.139583520062143, |
| "grad_norm": 0.03841105103492737, |
| "kl": 0.008275985717773438, |
| "learning_rate": 1.1813023880203722e-07, |
| "loss": 0.0072, |
| "step": 1443 |
| }, |
| { |
| "clip_ratio": 0.00034418403180325186, |
| "epoch": 2.1414956230766933, |
| "grad_norm": 0.041511572897434235, |
| "kl": 0.008276939392089844, |
| "learning_rate": 1.1623219451024098e-07, |
| "loss": 0.0071, |
| "step": 1444 |
| }, |
| { |
| "clip_ratio": 0.00032526867431670325, |
| "epoch": 2.143407726091243, |
| "grad_norm": 0.03922862559556961, |
| "kl": 0.008294105529785156, |
| "learning_rate": 1.1434915932392682e-07, |
| "loss": 0.007, |
| "step": 1445 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.7310523986816, |
| "epoch": 2.145319829105793, |
| "grad_norm": 0.04134941101074219, |
| "kl": 0.008166313171386719, |
| "learning_rate": 1.1248114509939817e-07, |
| "loss": 0.0067, |
| "num_tokens": 434141592.0, |
| "reward": 0.08342634307336994, |
| "reward_std": 0.08578344061970711, |
| "rewards/pure_accuracy_reward_math": 0.08342634132714011, |
| "step": 1446 |
| }, |
| { |
| "clip_ratio": 0.00029539940015865795, |
| "epoch": 2.147231932120343, |
| "grad_norm": 0.04034848138689995, |
| "kl": 0.008122920989990234, |
| "learning_rate": 1.1062816359838024e-07, |
| "loss": 0.0066, |
| "step": 1447 |
| }, |
| { |
| "clip_ratio": 0.0003565281184592095, |
| "epoch": 2.149144035134893, |
| "grad_norm": 0.04018424078822136, |
| "kl": 0.00803232192993164, |
| "learning_rate": 1.0879022648794645e-07, |
| "loss": 0.0066, |
| "step": 1448 |
| }, |
| { |
| "clip_ratio": 0.0003515161848781645, |
| "epoch": 2.1510561381494426, |
| "grad_norm": 0.03917380049824715, |
| "kl": 0.007886886596679688, |
| "learning_rate": 1.0696734534044629e-07, |
| "loss": 0.0065, |
| "step": 1449 |
| }, |
| { |
| "clip_ratio": 0.0004228238227028669, |
| "epoch": 2.1529682411639928, |
| "grad_norm": 0.038036227226257324, |
| "kl": 0.00785064697265625, |
| "learning_rate": 1.0515953163342973e-07, |
| "loss": 0.0064, |
| "step": 1450 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 544.0078330039978, |
| "epoch": 2.1548803441785425, |
| "grad_norm": 0.03814779594540596, |
| "kl": 0.008002758026123047, |
| "learning_rate": 1.0336679674957716e-07, |
| "loss": 0.0113, |
| "num_tokens": 437824108.0, |
| "reward": 0.07533482514554635, |
| "reward_std": 0.07659588241949677, |
| "rewards/pure_accuracy_reward_math": 0.07533482287544757, |
| "step": 1451 |
| }, |
| { |
| "clip_ratio": 0.0002914705042371679, |
| "epoch": 2.1567924471930926, |
| "grad_norm": 0.03763413056731224, |
| "kl": 0.00798654556274414, |
| "learning_rate": 1.0158915197662628e-07, |
| "loss": 0.0113, |
| "step": 1452 |
| }, |
| { |
| "clip_ratio": 0.0002916823746659247, |
| "epoch": 2.1587045502076423, |
| "grad_norm": 0.036225125193595886, |
| "kl": 0.008030414581298828, |
| "learning_rate": 9.982660850730269e-08, |
| "loss": 0.0112, |
| "step": 1453 |
| }, |
| { |
| "clip_ratio": 0.0002708278207137482, |
| "epoch": 2.1606166532221924, |
| "grad_norm": 0.03529945760965347, |
| "kl": 0.00803375244140625, |
| "learning_rate": 9.807917743924838e-08, |
| "loss": 0.0112, |
| "step": 1454 |
| }, |
| { |
| "clip_ratio": 0.0002930295025862506, |
| "epoch": 2.162528756236742, |
| "grad_norm": 0.03426925837993622, |
| "kl": 0.007987022399902344, |
| "learning_rate": 9.634686977495089e-08, |
| "loss": 0.0111, |
| "step": 1455 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.6585068702698, |
| "epoch": 2.1644408592512923, |
| "grad_norm": 0.038425736129283905, |
| "kl": 0.008115291595458984, |
| "learning_rate": 9.462969642167613e-08, |
| "loss": 0.0052, |
| "num_tokens": 441407888.0, |
| "reward": 0.07617187869618647, |
| "reward_std": 0.0740246243076399, |
| "rewards/pure_accuracy_reward_math": 0.07617187630967237, |
| "step": 1456 |
| }, |
| { |
| "clip_ratio": 0.00023060813538222646, |
| "epoch": 2.166352962265842, |
| "grad_norm": 0.03851727396249771, |
| "kl": 0.008001327514648438, |
| "learning_rate": 9.292766819139847e-08, |
| "loss": 0.0052, |
| "step": 1457 |
| }, |
| { |
| "clip_ratio": 0.0002378168165932948, |
| "epoch": 2.168265065280392, |
| "grad_norm": 0.040155645459890366, |
| "kl": 0.007994651794433594, |
| "learning_rate": 9.12407958007322e-08, |
| "loss": 0.0051, |
| "step": 1458 |
| }, |
| { |
| "clip_ratio": 0.0002497726611068174, |
| "epoch": 2.170177168294942, |
| "grad_norm": 0.0425233468413353, |
| "kl": 0.007935047149658203, |
| "learning_rate": 8.956908987086538e-08, |
| "loss": 0.005, |
| "step": 1459 |
| }, |
| { |
| "clip_ratio": 0.00030142679486289126, |
| "epoch": 2.172089271309492, |
| "grad_norm": 0.03647738695144653, |
| "kl": 0.007966041564941406, |
| "learning_rate": 8.791256092749223e-08, |
| "loss": 0.0049, |
| "step": 1460 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.2968997955322, |
| "epoch": 2.1740013743240416, |
| "grad_norm": 0.22045741975307465, |
| "kl": 0.022356510162353516, |
| "learning_rate": 8.627121940074645e-08, |
| "loss": 0.0122, |
| "num_tokens": 445010628.0, |
| "reward": 0.08705357578583062, |
| "reward_std": 0.08814817463280633, |
| "rewards/pure_accuracy_reward_math": 0.08705357281723991, |
| "step": 1461 |
| }, |
| { |
| "clip_ratio": 0.00031046926528688346, |
| "epoch": 2.1759134773385918, |
| "grad_norm": 0.06329243630170822, |
| "kl": 0.015823841094970703, |
| "learning_rate": 8.464507562513657e-08, |
| "loss": 0.0119, |
| "step": 1462 |
| }, |
| { |
| "clip_ratio": 0.0003438202776351318, |
| "epoch": 2.1778255803531414, |
| "grad_norm": 0.05041000247001648, |
| "kl": 0.014271736145019531, |
| "learning_rate": 8.303413983948017e-08, |
| "loss": 0.0118, |
| "step": 1463 |
| }, |
| { |
| "clip_ratio": 0.0003563892260558532, |
| "epoch": 2.1797376833676916, |
| "grad_norm": 0.04660080000758171, |
| "kl": 0.013462543487548828, |
| "learning_rate": 8.143842218683862e-08, |
| "loss": 0.0117, |
| "step": 1464 |
| }, |
| { |
| "clip_ratio": 0.0004125210731444895, |
| "epoch": 2.1816497863822413, |
| "grad_norm": 0.04536700248718262, |
| "kl": 0.012927532196044922, |
| "learning_rate": 7.985793271445636e-08, |
| "loss": 0.0116, |
| "step": 1465 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.6127443313599, |
| "epoch": 2.1835618893967914, |
| "grad_norm": 0.08454474061727524, |
| "kl": 0.010744094848632812, |
| "learning_rate": 7.829268137369311e-08, |
| "loss": 0.0075, |
| "num_tokens": 448601372.0, |
| "reward": 0.0750558071595151, |
| "reward_std": 0.0813654173980467, |
| "rewards/pure_accuracy_reward_math": 0.07505580488941632, |
| "step": 1466 |
| }, |
| { |
| "clip_ratio": 0.00028517025145902153, |
| "epoch": 2.185473992411341, |
| "grad_norm": 0.04138394817709923, |
| "kl": 0.009669780731201172, |
| "learning_rate": 7.674267801996427e-08, |
| "loss": 0.0075, |
| "step": 1467 |
| }, |
| { |
| "clip_ratio": 0.00027802770790685827, |
| "epoch": 2.1873860954258912, |
| "grad_norm": 0.03745463490486145, |
| "kl": 0.009511947631835938, |
| "learning_rate": 7.52079324126792e-08, |
| "loss": 0.0074, |
| "step": 1468 |
| }, |
| { |
| "clip_ratio": 0.0003267590287805433, |
| "epoch": 2.189298198440441, |
| "grad_norm": 0.036841075867414474, |
| "kl": 0.00956106185913086, |
| "learning_rate": 7.368845421517779e-08, |
| "loss": 0.0073, |
| "step": 1469 |
| }, |
| { |
| "clip_ratio": 0.0003443693621534294, |
| "epoch": 2.191210301454991, |
| "grad_norm": 0.0362345427274704, |
| "kl": 0.009715557098388672, |
| "learning_rate": 7.21842529946698e-08, |
| "loss": 0.0072, |
| "step": 1470 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 499.83763551712036, |
| "epoch": 2.1931224044695408, |
| "grad_norm": 0.0431695282459259, |
| "kl": 0.008378028869628906, |
| "learning_rate": 7.0695338222177e-08, |
| "loss": 0.0093, |
| "num_tokens": 452124382.0, |
| "reward": 0.07756696839351207, |
| "reward_std": 0.08685944566968828, |
| "rewards/pure_accuracy_reward_math": 0.07756696530850604, |
| "step": 1471 |
| }, |
| { |
| "clip_ratio": 0.0003288618632950602, |
| "epoch": 2.195034507484091, |
| "grad_norm": 0.042445823550224304, |
| "kl": 0.008408546447753906, |
| "learning_rate": 6.922171927247062e-08, |
| "loss": 0.0092, |
| "step": 1472 |
| }, |
| { |
| "clip_ratio": 0.0003429904774066017, |
| "epoch": 2.1969466104986406, |
| "grad_norm": 0.04231419414281845, |
| "kl": 0.008434295654296875, |
| "learning_rate": 6.776340542401422e-08, |
| "loss": 0.0092, |
| "step": 1473 |
| }, |
| { |
| "clip_ratio": 0.00035230960349963425, |
| "epoch": 2.1988587135131903, |
| "grad_norm": 0.04162426292896271, |
| "kl": 0.008434295654296875, |
| "learning_rate": 6.632040585890398e-08, |
| "loss": 0.0091, |
| "step": 1474 |
| }, |
| { |
| "clip_ratio": 0.000348456743722636, |
| "epoch": 2.2007708165277404, |
| "grad_norm": 0.04009128361940384, |
| "kl": 0.008394718170166016, |
| "learning_rate": 6.489272966281269e-08, |
| "loss": 0.009, |
| "step": 1475 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 511.53015899658203, |
| "epoch": 2.2026829195422906, |
| "grad_norm": 0.03803718462586403, |
| "kl": 0.008605003356933594, |
| "learning_rate": 6.348038582493e-08, |
| "loss": 0.0064, |
| "num_tokens": 455697798.0, |
| "reward": 0.06863839633297175, |
| "reward_std": 0.0772402475704439, |
| "rewards/pure_accuracy_reward_math": 0.06863839423749596, |
| "step": 1476 |
| }, |
| { |
| "clip_ratio": 0.0002735381897878142, |
| "epoch": 2.2045950225568403, |
| "grad_norm": 0.036724258214235306, |
| "kl": 0.008575439453125, |
| "learning_rate": 6.208338323790891e-08, |
| "loss": 0.0064, |
| "step": 1477 |
| }, |
| { |
| "clip_ratio": 0.000271568493644736, |
| "epoch": 2.20650712557139, |
| "grad_norm": 0.03627302870154381, |
| "kl": 0.008494853973388672, |
| "learning_rate": 6.070173069780638e-08, |
| "loss": 0.0063, |
| "step": 1478 |
| }, |
| { |
| "clip_ratio": 0.0003129301562694309, |
| "epoch": 2.20841922858594, |
| "grad_norm": 0.035685960203409195, |
| "kl": 0.008512496948242188, |
| "learning_rate": 5.933543690403082e-08, |
| "loss": 0.0063, |
| "step": 1479 |
| }, |
| { |
| "clip_ratio": 0.0003575469975203305, |
| "epoch": 2.21033133160049, |
| "grad_norm": 0.03495527431368828, |
| "kl": 0.008492469787597656, |
| "learning_rate": 5.7984510459285215e-08, |
| "loss": 0.0062, |
| "step": 1480 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.403482913971, |
| "epoch": 2.21224343461504, |
| "grad_norm": 0.041989997029304504, |
| "kl": 0.008183956146240234, |
| "learning_rate": 5.6648959869514965e-08, |
| "loss": 0.0075, |
| "num_tokens": 459321180.0, |
| "reward": 0.07617187898722477, |
| "reward_std": 0.0817908609751612, |
| "rewards/pure_accuracy_reward_math": 0.07617187630967237, |
| "step": 1481 |
| }, |
| { |
| "clip_ratio": 0.0003129412224893713, |
| "epoch": 2.2141555376295896, |
| "grad_norm": 0.04108978435397148, |
| "kl": 0.00823974609375, |
| "learning_rate": 5.532879354385234e-08, |
| "loss": 0.0075, |
| "step": 1482 |
| }, |
| { |
| "clip_ratio": 0.0003202799926498301, |
| "epoch": 2.2160676406441397, |
| "grad_norm": 0.03990933671593666, |
| "kl": 0.00827646255493164, |
| "learning_rate": 5.4024019794565176e-08, |
| "loss": 0.0075, |
| "step": 1483 |
| }, |
| { |
| "clip_ratio": 0.0003925440155398974, |
| "epoch": 2.2179797436586894, |
| "grad_norm": 0.039193831384181976, |
| "kl": 0.008234977722167969, |
| "learning_rate": 5.273464683700352e-08, |
| "loss": 0.0074, |
| "step": 1484 |
| }, |
| { |
| "clip_ratio": 0.0004001183214654702, |
| "epoch": 2.2198918466732396, |
| "grad_norm": 0.039878588169813156, |
| "kl": 0.00826406478881836, |
| "learning_rate": 5.1460682789547526e-08, |
| "loss": 0.0073, |
| "step": 1485 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 531.470449924469, |
| "epoch": 2.2218039496877893, |
| "grad_norm": 0.04079683497548103, |
| "kl": 0.011513710021972656, |
| "learning_rate": 5.020213567355825e-08, |
| "loss": 0.0091, |
| "num_tokens": 462957626.0, |
| "reward": 0.06752232459257357, |
| "reward_std": 0.07320140459341928, |
| "rewards/pure_accuracy_reward_math": 0.0675223229045514, |
| "step": 1486 |
| }, |
| { |
| "clip_ratio": 0.0002717390548241383, |
| "epoch": 2.2237160527023394, |
| "grad_norm": 0.037311483174562454, |
| "kl": 0.011410713195800781, |
| "learning_rate": 4.8959013413324705e-08, |
| "loss": 0.009, |
| "step": 1487 |
| }, |
| { |
| "clip_ratio": 0.0002951391629721911, |
| "epoch": 2.225628155716889, |
| "grad_norm": 0.035728756338357925, |
| "kl": 0.011387348175048828, |
| "learning_rate": 4.773132383601664e-08, |
| "loss": 0.009, |
| "step": 1488 |
| }, |
| { |
| "clip_ratio": 0.00030970129540719427, |
| "epoch": 2.2275402587314392, |
| "grad_norm": 0.03630708530545235, |
| "kl": 0.011130332946777344, |
| "learning_rate": 4.6519074671631805e-08, |
| "loss": 0.0089, |
| "step": 1489 |
| }, |
| { |
| "clip_ratio": 0.00035198272149727927, |
| "epoch": 2.229452361745989, |
| "grad_norm": 0.035501569509506226, |
| "kl": 0.010982990264892578, |
| "learning_rate": 4.5322273552951265e-08, |
| "loss": 0.0088, |
| "step": 1490 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.0912661552429, |
| "epoch": 2.231364464760539, |
| "grad_norm": 0.039065275341272354, |
| "kl": 0.008381366729736328, |
| "learning_rate": 4.4140928015488085e-08, |
| "loss": 0.0067, |
| "num_tokens": 466540145.0, |
| "reward": 0.08007812951109372, |
| "reward_std": 0.07346039032563567, |
| "rewards/pure_accuracy_reward_math": 0.08007812619325705, |
| "step": 1491 |
| }, |
| { |
| "clip_ratio": 0.0002747246091985289, |
| "epoch": 2.2332765677750888, |
| "grad_norm": 0.03766880929470062, |
| "kl": 0.008387088775634766, |
| "learning_rate": 4.297504549744119e-08, |
| "loss": 0.0067, |
| "step": 1492 |
| }, |
| { |
| "clip_ratio": 0.0002486348788579562, |
| "epoch": 2.235188670789639, |
| "grad_norm": 0.03599947690963745, |
| "kl": 0.0084991455078125, |
| "learning_rate": 4.182463333964909e-08, |
| "loss": 0.0066, |
| "step": 1493 |
| }, |
| { |
| "clip_ratio": 0.0002674886795261955, |
| "epoch": 2.2371007738041886, |
| "grad_norm": 0.0361332893371582, |
| "kl": 0.008679389953613281, |
| "learning_rate": 4.068969878554263e-08, |
| "loss": 0.0066, |
| "step": 1494 |
| }, |
| { |
| "clip_ratio": 0.00031218544620514876, |
| "epoch": 2.2390128768187387, |
| "grad_norm": 0.035462211817502975, |
| "kl": 0.008719921112060547, |
| "learning_rate": 3.957024898110007e-08, |
| "loss": 0.0065, |
| "step": 1495 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 507.05945777893066, |
| "epoch": 2.2409249798332884, |
| "grad_norm": 0.10880274325609207, |
| "kl": 0.012134075164794922, |
| "learning_rate": 3.846629097480126e-08, |
| "loss": 0.0046, |
| "num_tokens": 470091662.0, |
| "reward": 0.07952009330620058, |
| "reward_std": 0.08660046098520979, |
| "rewards/pure_accuracy_reward_math": 0.0795200907450635, |
| "step": 1496 |
| }, |
| { |
| "clip_ratio": 0.00034633993402621854, |
| "epoch": 2.2428370828478386, |
| "grad_norm": 0.04444468766450882, |
| "kl": 0.010071754455566406, |
| "learning_rate": 3.737783171758408e-08, |
| "loss": 0.0045, |
| "step": 1497 |
| }, |
| { |
| "clip_ratio": 0.00040814166391101026, |
| "epoch": 2.2447491858623883, |
| "grad_norm": 0.050679393112659454, |
| "kl": 0.009745597839355469, |
| "learning_rate": 3.630487806280086e-08, |
| "loss": 0.0044, |
| "step": 1498 |
| }, |
| { |
| "clip_ratio": 0.00040935890626769833, |
| "epoch": 2.2466612888769384, |
| "grad_norm": 0.04249563813209534, |
| "kl": 0.009531974792480469, |
| "learning_rate": 3.524743676617426e-08, |
| "loss": 0.0044, |
| "step": 1499 |
| }, |
| { |
| "clip_ratio": 0.00041069585563491273, |
| "epoch": 2.248573391891488, |
| "grad_norm": 0.04013880342245102, |
| "kl": 0.009422779083251953, |
| "learning_rate": 3.42055144857556e-08, |
| "loss": 0.0042, |
| "step": 1500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.4908156394958, |
| "epoch": 2.250485494906038, |
| "grad_norm": 0.04119328781962395, |
| "kl": 0.00858306884765625, |
| "learning_rate": 3.3179117781882154e-08, |
| "loss": 0.0064, |
| "num_tokens": 473729421.0, |
| "reward": 0.08175223629223183, |
| "reward_std": 0.080375739664305, |
| "rewards/pure_accuracy_reward_math": 0.08175223390571773, |
| "step": 1501 |
| }, |
| { |
| "clip_ratio": 0.00027040669908728887, |
| "epoch": 2.252397597920588, |
| "grad_norm": 0.03726639971137047, |
| "kl": 0.008556365966796875, |
| "learning_rate": 3.216825311713689e-08, |
| "loss": 0.0064, |
| "step": 1502 |
| }, |
| { |
| "clip_ratio": 0.0003022322244419229, |
| "epoch": 2.254309700935138, |
| "grad_norm": 0.03740008547902107, |
| "kl": 0.008624553680419922, |
| "learning_rate": 3.11729268563063e-08, |
| "loss": 0.0063, |
| "step": 1503 |
| }, |
| { |
| "clip_ratio": 0.0002972338604081415, |
| "epoch": 2.2562218039496877, |
| "grad_norm": 0.036019936203956604, |
| "kl": 0.008683204650878906, |
| "learning_rate": 3.019314526634232e-08, |
| "loss": 0.0062, |
| "step": 1504 |
| }, |
| { |
| "clip_ratio": 0.0003317092545103151, |
| "epoch": 2.258133906964238, |
| "grad_norm": 0.035242002457380295, |
| "kl": 0.008699893951416016, |
| "learning_rate": 2.922891451632076e-08, |
| "loss": 0.0062, |
| "step": 1505 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.8340096473694, |
| "epoch": 2.2600460099787876, |
| "grad_norm": 0.04786042869091034, |
| "kl": 0.0166015625, |
| "learning_rate": 2.8280240677403813e-08, |
| "loss": 0.0117, |
| "num_tokens": 477311002.0, |
| "reward": 0.08593750389991328, |
| "reward_std": 0.09509739134227857, |
| "rewards/pure_accuracy_reward_math": 0.08593750139698386, |
| "step": 1506 |
| }, |
| { |
| "clip_ratio": 0.0003771551589011324, |
| "epoch": 2.2619581129933373, |
| "grad_norm": 0.04542854428291321, |
| "kl": 0.016517162322998047, |
| "learning_rate": 2.7347129722801736e-08, |
| "loss": 0.0117, |
| "step": 1507 |
| }, |
| { |
| "clip_ratio": 0.00043879733209450933, |
| "epoch": 2.2638702160078874, |
| "grad_norm": 0.04336082562804222, |
| "kl": 0.016106605529785156, |
| "learning_rate": 2.6429587527734835e-08, |
| "loss": 0.0116, |
| "step": 1508 |
| }, |
| { |
| "clip_ratio": 0.0005006881825977416, |
| "epoch": 2.2657823190224375, |
| "grad_norm": 0.04397574067115784, |
| "kl": 0.015746116638183594, |
| "learning_rate": 2.5527619869396003e-08, |
| "loss": 0.0115, |
| "step": 1509 |
| }, |
| { |
| "clip_ratio": 0.0005348546662844456, |
| "epoch": 2.2676944220369872, |
| "grad_norm": 0.043936342000961304, |
| "kl": 0.015500068664550781, |
| "learning_rate": 2.464123242691574e-08, |
| "loss": 0.0114, |
| "step": 1510 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 526.8474016189575, |
| "epoch": 2.269606525051537, |
| "grad_norm": 0.04165401682257652, |
| "kl": 0.008256912231445312, |
| "learning_rate": 2.377043078132496e-08, |
| "loss": 0.0079, |
| "num_tokens": 480935151.0, |
| "reward": 0.08342634345171973, |
| "reward_std": 0.09024772583507001, |
| "rewards/pure_accuracy_reward_math": 0.08342634071595967, |
| "step": 1511 |
| }, |
| { |
| "clip_ratio": 0.0003286536882569635, |
| "epoch": 2.271518628066087, |
| "grad_norm": 0.04013460502028465, |
| "kl": 0.008354663848876953, |
| "learning_rate": 2.291522041552141e-08, |
| "loss": 0.0079, |
| "step": 1512 |
| }, |
| { |
| "clip_ratio": 0.00034448601985559435, |
| "epoch": 2.273430731080637, |
| "grad_norm": 0.03929148614406586, |
| "kl": 0.008509159088134766, |
| "learning_rate": 2.207560671423331e-08, |
| "loss": 0.0078, |
| "step": 1513 |
| }, |
| { |
| "clip_ratio": 0.00038580430322099346, |
| "epoch": 2.275342834095187, |
| "grad_norm": 0.04108521342277527, |
| "kl": 0.008730888366699219, |
| "learning_rate": 2.1251594963986876e-08, |
| "loss": 0.0077, |
| "step": 1514 |
| }, |
| { |
| "clip_ratio": 0.00038072799372912414, |
| "epoch": 2.2772549371097366, |
| "grad_norm": 0.038887783885002136, |
| "kl": 0.008725643157958984, |
| "learning_rate": 2.0443190353072185e-08, |
| "loss": 0.0076, |
| "step": 1515 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.4051609039307, |
| "epoch": 2.2791670401242867, |
| "grad_norm": 0.03783741220831871, |
| "kl": 0.008581161499023438, |
| "learning_rate": 1.9650397971510972e-08, |
| "loss": 0.0064, |
| "num_tokens": 484530587.0, |
| "reward": 0.08231027124566026, |
| "reward_std": 0.08037574036279693, |
| "rewards/pure_accuracy_reward_math": 0.08231026897556148, |
| "step": 1516 |
| }, |
| { |
| "clip_ratio": 0.0002746778108644321, |
| "epoch": 2.2810791431388364, |
| "grad_norm": 0.03765445947647095, |
| "kl": 0.008580207824707031, |
| "learning_rate": 1.8873222811024717e-08, |
| "loss": 0.0063, |
| "step": 1517 |
| }, |
| { |
| "clip_ratio": 0.00031986788579274616, |
| "epoch": 2.2829912461533866, |
| "grad_norm": 0.03684096038341522, |
| "kl": 0.008593082427978516, |
| "learning_rate": 1.8111669765003005e-08, |
| "loss": 0.0063, |
| "step": 1518 |
| }, |
| { |
| "clip_ratio": 0.0003354349921380617, |
| "epoch": 2.2849033491679362, |
| "grad_norm": 0.03599463030695915, |
| "kl": 0.008591175079345703, |
| "learning_rate": 1.73657436284716e-08, |
| "loss": 0.0062, |
| "step": 1519 |
| }, |
| { |
| "clip_ratio": 0.0003505910435706028, |
| "epoch": 2.2868154521824864, |
| "grad_norm": 0.035750966519117355, |
| "kl": 0.00874948501586914, |
| "learning_rate": 1.6635449098064972e-08, |
| "loss": 0.0061, |
| "step": 1520 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 521.2455606460571, |
| "epoch": 2.288727555197036, |
| "grad_norm": 0.03890154883265495, |
| "kl": 0.008922100067138672, |
| "learning_rate": 1.5920790771993822e-08, |
| "loss": 0.0078, |
| "num_tokens": 488136255.0, |
| "reward": 0.07952009289874695, |
| "reward_std": 0.07556614064378664, |
| "rewards/pure_accuracy_reward_math": 0.07952009068685584, |
| "step": 1521 |
| }, |
| { |
| "clip_ratio": 0.00024827225587387147, |
| "epoch": 2.290639658211586, |
| "grad_norm": 0.037810854613780975, |
| "kl": 0.008934974670410156, |
| "learning_rate": 1.5221773150017882e-08, |
| "loss": 0.0078, |
| "step": 1522 |
| }, |
| { |
| "clip_ratio": 0.0002384709360967463, |
| "epoch": 2.292551761226136, |
| "grad_norm": 0.0364384800195694, |
| "kl": 0.008936882019042969, |
| "learning_rate": 1.4538400633417049e-08, |
| "loss": 0.0077, |
| "step": 1523 |
| }, |
| { |
| "clip_ratio": 0.0002599185108635993, |
| "epoch": 2.294463864240686, |
| "grad_norm": 0.035106074064970016, |
| "kl": 0.008829116821289062, |
| "learning_rate": 1.387067752496335e-08, |
| "loss": 0.0076, |
| "step": 1524 |
| }, |
| { |
| "clip_ratio": 0.0003290796867077006, |
| "epoch": 2.2963759672552357, |
| "grad_norm": 0.03489363566040993, |
| "kl": 0.0086822509765625, |
| "learning_rate": 1.3218608028895131e-08, |
| "loss": 0.0076, |
| "step": 1525 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 517.0547122955322, |
| "epoch": 2.298288070269786, |
| "grad_norm": 0.040062014013528824, |
| "kl": 0.008834362030029297, |
| "learning_rate": 1.2582196250888745e-08, |
| "loss": 0.0071, |
| "num_tokens": 491722139.0, |
| "reward": 0.08621652179863304, |
| "reward_std": 0.08020308247068897, |
| "rewards/pure_accuracy_reward_math": 0.08621651906287298, |
| "step": 1526 |
| }, |
| { |
| "clip_ratio": 0.00031514769625573535, |
| "epoch": 2.3002001732843356, |
| "grad_norm": 0.03938477113842964, |
| "kl": 0.008733272552490234, |
| "learning_rate": 1.1961446198033855e-08, |
| "loss": 0.0071, |
| "step": 1527 |
| }, |
| { |
| "clip_ratio": 0.00030386562087869606, |
| "epoch": 2.3021122762988857, |
| "grad_norm": 0.03844742849469185, |
| "kl": 0.008654594421386719, |
| "learning_rate": 1.1356361778808167e-08, |
| "loss": 0.007, |
| "step": 1528 |
| }, |
| { |
| "clip_ratio": 0.00034510965764411594, |
| "epoch": 2.3040243793134354, |
| "grad_norm": 0.03755528852343559, |
| "kl": 0.00861358642578125, |
| "learning_rate": 1.076694680305218e-08, |
| "loss": 0.007, |
| "step": 1529 |
| }, |
| { |
| "clip_ratio": 0.00035207756366162357, |
| "epoch": 2.3059364823279855, |
| "grad_norm": 0.03696778416633606, |
| "kl": 0.008616447448730469, |
| "learning_rate": 1.0193204981946426e-08, |
| "loss": 0.0069, |
| "step": 1530 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 516.7249145507812, |
| "epoch": 2.3078485853425352, |
| "grad_norm": 0.045076508074998856, |
| "kl": 0.014521598815917969, |
| "learning_rate": 9.63513992798676e-09, |
| "loss": 0.0065, |
| "num_tokens": 495305537.0, |
| "reward": 0.07505580713041127, |
| "reward_std": 0.07844264624873176, |
| "rewards/pure_accuracy_reward_math": 0.07505580480210483, |
| "step": 1531 |
| }, |
| { |
| "clip_ratio": 0.0003054732096074986, |
| "epoch": 2.3097606883570854, |
| "grad_norm": 0.041828691959381104, |
| "kl": 0.01419973373413086, |
| "learning_rate": 9.092755154961886e-09, |
| "loss": 0.0065, |
| "step": 1532 |
| }, |
| { |
| "clip_ratio": 0.00030572324658351135, |
| "epoch": 2.311672791371635, |
| "grad_norm": 0.03949357569217682, |
| "kl": 0.013697624206542969, |
| "learning_rate": 8.566054077932262e-09, |
| "loss": 0.0064, |
| "step": 1533 |
| }, |
| { |
| "clip_ratio": 0.0003279060996987937, |
| "epoch": 2.313584894386185, |
| "grad_norm": 0.038545649498701096, |
| "kl": 0.01345968246459961, |
| "learning_rate": 8.055040013207061e-09, |
| "loss": 0.0063, |
| "step": 1534 |
| }, |
| { |
| "clip_ratio": 0.00033917763732915773, |
| "epoch": 2.315496997400735, |
| "grad_norm": 0.03716408833861351, |
| "kl": 0.01330709457397461, |
| "learning_rate": 7.559716178325016e-09, |
| "loss": 0.0062, |
| "step": 1535 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 519.2921552658081, |
| "epoch": 2.317409100415285, |
| "grad_norm": 0.041162386536598206, |
| "kl": 0.008297443389892578, |
| "learning_rate": 7.080085692032224e-09, |
| "loss": 0.0079, |
| "num_tokens": 498900584.0, |
| "reward": 0.08928571816068143, |
| "reward_std": 0.08428199036279693, |
| "rewards/pure_accuracy_reward_math": 0.08928571571595967, |
| "step": 1536 |
| }, |
| { |
| "clip_ratio": 0.00029752771973790004, |
| "epoch": 2.3193212034298347, |
| "grad_norm": 0.03933210298418999, |
| "kl": 0.008346080780029297, |
| "learning_rate": 6.616151574264374e-09, |
| "loss": 0.0079, |
| "step": 1537 |
| }, |
| { |
| "clip_ratio": 0.0003302163729017593, |
| "epoch": 2.321233306444385, |
| "grad_norm": 0.038146842271089554, |
| "kl": 0.008320331573486328, |
| "learning_rate": 6.1679167461262124e-09, |
| "loss": 0.0078, |
| "step": 1538 |
| }, |
| { |
| "clip_ratio": 0.0003326926421891585, |
| "epoch": 2.3231454094589346, |
| "grad_norm": 0.038072116672992706, |
| "kl": 0.008330345153808594, |
| "learning_rate": 5.735384029874336e-09, |
| "loss": 0.0077, |
| "step": 1539 |
| }, |
| { |
| "clip_ratio": 0.00038002995881925017, |
| "epoch": 2.3250575124734847, |
| "grad_norm": 0.037320397794246674, |
| "kl": 0.008296012878417969, |
| "learning_rate": 5.31855614889859e-09, |
| "loss": 0.0076, |
| "step": 1540 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.1487407684326, |
| "epoch": 2.3269696154880344, |
| "grad_norm": 0.03688493371009827, |
| "kl": 0.008476734161376953, |
| "learning_rate": 4.917435727704867e-09, |
| "loss": 0.0024, |
| "num_tokens": 502500281.0, |
| "reward": 0.0811942005821038, |
| "reward_std": 0.0787416979437694, |
| "rewards/pure_accuracy_reward_math": 0.08119419842842035, |
| "step": 1541 |
| }, |
| { |
| "clip_ratio": 0.00028201957394458077, |
| "epoch": 2.3288817185025845, |
| "grad_norm": 0.03607385605573654, |
| "kl": 0.008441448211669922, |
| "learning_rate": 4.53202529190011e-09, |
| "loss": 0.0023, |
| "step": 1542 |
| }, |
| { |
| "clip_ratio": 0.0002742231245633775, |
| "epoch": 2.330793821517134, |
| "grad_norm": 0.03572804853320122, |
| "kl": 0.00852060317993164, |
| "learning_rate": 4.162327268173727e-09, |
| "loss": 0.0023, |
| "step": 1543 |
| }, |
| { |
| "clip_ratio": 0.0003046261713848253, |
| "epoch": 2.332705924531684, |
| "grad_norm": 0.034965962171554565, |
| "kl": 0.00861501693725586, |
| "learning_rate": 3.80834398428509e-09, |
| "loss": 0.0022, |
| "step": 1544 |
| }, |
| { |
| "clip_ratio": 0.0003226917802976459, |
| "epoch": 2.334618027546234, |
| "grad_norm": 0.034803807735443115, |
| "kl": 0.008724212646484375, |
| "learning_rate": 3.470077669046612e-09, |
| "loss": 0.0021, |
| "step": 1545 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 538.0273699760437, |
| "epoch": 2.336530130560784, |
| "grad_norm": 0.034996818751096725, |
| "kl": 0.008575439453125, |
| "learning_rate": 3.147530452311809e-09, |
| "loss": 0.0064, |
| "num_tokens": 506159719.0, |
| "reward": 0.06891741408617236, |
| "reward_std": 0.07063014718005434, |
| "rewards/pure_accuracy_reward_math": 0.06891741210711189, |
| "step": 1546 |
| }, |
| { |
| "clip_ratio": 0.00023073077210256088, |
| "epoch": 2.338442233575334, |
| "grad_norm": 0.03347066789865494, |
| "kl": 0.008565902709960938, |
| "learning_rate": 2.8407043649597567e-09, |
| "loss": 0.0063, |
| "step": 1547 |
| }, |
| { |
| "clip_ratio": 0.000268154504112772, |
| "epoch": 2.3403543365898836, |
| "grad_norm": 0.03273630142211914, |
| "kl": 0.008545398712158203, |
| "learning_rate": 2.549601338883989e-09, |
| "loss": 0.0063, |
| "step": 1548 |
| }, |
| { |
| "clip_ratio": 0.00029292683666426456, |
| "epoch": 2.3422664396044337, |
| "grad_norm": 0.032376162707805634, |
| "kl": 0.008570671081542969, |
| "learning_rate": 2.2742232069794533e-09, |
| "loss": 0.0063, |
| "step": 1549 |
| }, |
| { |
| "clip_ratio": 0.0003443536306235728, |
| "epoch": 2.344178542618984, |
| "grad_norm": 0.031950000673532486, |
| "kl": 0.008484363555908203, |
| "learning_rate": 2.01457170313113e-09, |
| "loss": 0.0062, |
| "step": 1550 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 520.7207255363464, |
| "epoch": 2.3460906456335335, |
| "grad_norm": 0.04171088710427284, |
| "kl": 0.009114742279052734, |
| "learning_rate": 1.7706484622034837e-09, |
| "loss": 0.005, |
| "num_tokens": 509757966.0, |
| "reward": 0.07672991443541832, |
| "reward_std": 0.08149181143380702, |
| "rewards/pure_accuracy_reward_math": 0.07672991228173487, |
| "step": 1551 |
| }, |
| { |
| "clip_ratio": 0.0003305982788788242, |
| "epoch": 2.3480027486480832, |
| "grad_norm": 0.04123101010918617, |
| "kl": 0.009046554565429688, |
| "learning_rate": 1.5424550200293653e-09, |
| "loss": 0.005, |
| "step": 1552 |
| }, |
| { |
| "clip_ratio": 0.0003486324259256435, |
| "epoch": 2.3499148516626334, |
| "grad_norm": 0.039809513837099075, |
| "kl": 0.008966445922851562, |
| "learning_rate": 1.3299928134014039e-09, |
| "loss": 0.0049, |
| "step": 1553 |
| }, |
| { |
| "clip_ratio": 0.0003954665013452541, |
| "epoch": 2.351826954677183, |
| "grad_norm": 0.0393875353038311, |
| "kl": 0.008915901184082031, |
| "learning_rate": 1.1332631800620164e-09, |
| "loss": 0.0049, |
| "step": 1554 |
| }, |
| { |
| "clip_ratio": 0.0004334128346954458, |
| "epoch": 2.353739057691733, |
| "grad_norm": 0.03990260884165764, |
| "kl": 0.008862972259521484, |
| "learning_rate": 9.522673586956355e-10, |
| "loss": 0.0047, |
| "step": 1555 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.488025188446, |
| "epoch": 2.355651160706283, |
| "grad_norm": 0.04300679266452789, |
| "kl": 0.009171009063720703, |
| "learning_rate": 7.870064889206608e-10, |
| "loss": 0.0082, |
| "num_tokens": 513350767.0, |
| "reward": 0.07728794994181953, |
| "reward_std": 0.08290693227900192, |
| "rewards/pure_accuracy_reward_math": 0.07728794743889011, |
| "step": 1556 |
| }, |
| { |
| "clip_ratio": 0.000295089724772879, |
| "epoch": 2.357563263720833, |
| "grad_norm": 0.04144243150949478, |
| "kl": 0.009136676788330078, |
| "learning_rate": 6.374816112819648e-10, |
| "loss": 0.0082, |
| "step": 1557 |
| }, |
| { |
| "clip_ratio": 0.0003283331608940898, |
| "epoch": 2.3594753667353827, |
| "grad_norm": 0.039357006549835205, |
| "kl": 0.009202003479003906, |
| "learning_rate": 5.036936672447868e-10, |
| "loss": 0.0081, |
| "step": 1558 |
| }, |
| { |
| "clip_ratio": 0.00036647373104869985, |
| "epoch": 2.361387469749933, |
| "grad_norm": 0.03904441371560097, |
| "kl": 0.009307384490966797, |
| "learning_rate": 3.8564349918890356e-10, |
| "loss": 0.008, |
| "step": 1559 |
| }, |
| { |
| "clip_ratio": 0.0004084905730792343, |
| "epoch": 2.3632995727644825, |
| "grad_norm": 0.03901646286249161, |
| "kl": 0.00932168960571289, |
| "learning_rate": 2.833318504030791e-10, |
| "loss": 0.0079, |
| "step": 1560 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 527.474356174469, |
| "epoch": 2.3652116757790327, |
| "grad_norm": 5.391517162322998, |
| "kl": 0.0942845344543457, |
| "learning_rate": 1.9675936507979056e-10, |
| "loss": 0.0081, |
| "num_tokens": 516974751.0, |
| "reward": 0.06975446754950099, |
| "reward_std": 0.06989945453824475, |
| "rewards/pure_accuracy_reward_math": 0.06975446597789414, |
| "step": 1561 |
| }, |
| { |
| "clip_ratio": 0.0002886794856635788, |
| "epoch": 2.3671237787935824, |
| "grad_norm": 0.1764528900384903, |
| "kl": 0.013553619384765625, |
| "learning_rate": 1.2592658831245274e-10, |
| "loss": 0.0049, |
| "step": 1562 |
| }, |
| { |
| "clip_ratio": 0.00028670978349509824, |
| "epoch": 2.3690358818081325, |
| "grad_norm": 0.03846847265958786, |
| "kl": 0.009183406829833984, |
| "learning_rate": 7.083396609097737e-11, |
| "loss": 0.0047, |
| "step": 1563 |
| }, |
| { |
| "clip_ratio": 0.0002776476591748178, |
| "epoch": 2.370947984822682, |
| "grad_norm": 0.035545963793992996, |
| "kl": 0.008979320526123047, |
| "learning_rate": 3.148184529927489e-11, |
| "loss": 0.0046, |
| "step": 1564 |
| }, |
| { |
| "clip_ratio": 0.00032522391097700165, |
| "epoch": 2.3728600878372323, |
| "grad_norm": 0.1538141518831253, |
| "kl": 0.009156227111816406, |
| "learning_rate": 7.870473713589288e-12, |
| "loss": 0.0046, |
| "step": 1565 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 530.6135845184326, |
| "epoch": 2.374772190851782, |
| "grad_norm": 0.0368269719183445, |
| "kl": 0.008574485778808594, |
| "learning_rate": 0.0, |
| "loss": 0.0087, |
| "num_tokens": 520611370.0, |
| "reward": 0.07142857427243143, |
| "reward_std": 0.07900068280287087, |
| "rewards/pure_accuracy_reward_math": 0.07142857293365523, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.374772190851782, |
| "step": 1566, |
| "total_flos": 0.0, |
| "train_loss": 0.003398028112404372, |
| "train_runtime": 273585.6306, |
| "train_samples_per_second": 1.028, |
| "train_steps_per_second": 0.006 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1566, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|