| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.40973636026069477, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 606.2532775402069, |
| "epoch": 0.001638945441042779, |
| "grad_norm": 0.05986390635371208, |
| "kl": 0.0, |
| "learning_rate": 1.3661202185792351e-08, |
| "loss": 0.0057, |
| "num_tokens": 3348938.0, |
| "reward": 0.1718750048603397, |
| "reward_std": 0.09577879420248792, |
| "rewards/pure_accuracy_reward_math": 0.17187500078580342, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "epoch": 0.003277890882085558, |
| "grad_norm": 0.05986390635371208, |
| "kl": 0.0, |
| "learning_rate": 2.7322404371584703e-08, |
| "loss": 0.0057, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0006339755559565674, |
| "epoch": 0.004916836323128337, |
| "grad_norm": 0.05929790809750557, |
| "kl": 0.0005019009113311768, |
| "learning_rate": 4.098360655737705e-08, |
| "loss": 0.0057, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.0006407226928217824, |
| "epoch": 0.006555781764171116, |
| "grad_norm": 0.059925854206085205, |
| "kl": 0.0005110502243041992, |
| "learning_rate": 5.4644808743169406e-08, |
| "loss": 0.0057, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.0006387700201457847, |
| "epoch": 0.008194727205213895, |
| "grad_norm": 0.05939409136772156, |
| "kl": 0.0005159676074981689, |
| "learning_rate": 6.830601092896175e-08, |
| "loss": 0.0057, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 612.4726753234863, |
| "epoch": 0.009833672646256675, |
| "grad_norm": 0.072689488530159, |
| "kl": 0.000512346625328064, |
| "learning_rate": 8.19672131147541e-08, |
| "loss": 0.0067, |
| "num_tokens": 6714854.0, |
| "reward": 0.16438802544143982, |
| "reward_std": 0.11541076033608988, |
| "rewards/pure_accuracy_reward_math": 0.16438802113407291, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0007277115302031234, |
| "epoch": 0.011472618087299453, |
| "grad_norm": 0.07328997552394867, |
| "kl": 0.0005197674036026001, |
| "learning_rate": 9.562841530054645e-08, |
| "loss": 0.0068, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0007614574305989663, |
| "epoch": 0.013111563528342233, |
| "grad_norm": 0.07325445115566254, |
| "kl": 0.0005202591419219971, |
| "learning_rate": 1.0928961748633881e-07, |
| "loss": 0.0068, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0007783421593785533, |
| "epoch": 0.01475050896938501, |
| "grad_norm": 0.07128091156482697, |
| "kl": 0.000517427921295166, |
| "learning_rate": 1.2295081967213116e-07, |
| "loss": 0.0068, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0007585194575767673, |
| "epoch": 0.01638945441042779, |
| "grad_norm": 0.07174714654684067, |
| "kl": 0.0005128979682922363, |
| "learning_rate": 1.366120218579235e-07, |
| "loss": 0.0068, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 609.4596562385559, |
| "epoch": 0.018028399851470568, |
| "grad_norm": 0.060616616159677505, |
| "kl": 0.0005253106355667114, |
| "learning_rate": 1.5027322404371585e-07, |
| "loss": 0.0052, |
| "num_tokens": 10075962.0, |
| "reward": 0.17447917186655104, |
| "reward_std": 0.09832898661261424, |
| "rewards/pure_accuracy_reward_math": 0.17447916814126074, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.0006354124035397035, |
| "epoch": 0.01966734529251335, |
| "grad_norm": 0.05994507297873497, |
| "kl": 0.0005232691764831543, |
| "learning_rate": 1.639344262295082e-07, |
| "loss": 0.0053, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0006359500578128063, |
| "epoch": 0.021306290733556128, |
| "grad_norm": 0.060422513633966446, |
| "kl": 0.0005258470773696899, |
| "learning_rate": 1.7759562841530054e-07, |
| "loss": 0.0053, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0006202999380775509, |
| "epoch": 0.022945236174598906, |
| "grad_norm": 0.06020491570234299, |
| "kl": 0.000526919960975647, |
| "learning_rate": 1.912568306010929e-07, |
| "loss": 0.0053, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0006456842476154634, |
| "epoch": 0.024584181615641687, |
| "grad_norm": 0.06016543507575989, |
| "kl": 0.0005295425653457642, |
| "learning_rate": 2.0491803278688524e-07, |
| "loss": 0.0053, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 613.3219571113586, |
| "epoch": 0.026223127056684465, |
| "grad_norm": 0.06271925568580627, |
| "kl": 0.0005239248275756836, |
| "learning_rate": 2.1857923497267762e-07, |
| "loss": 0.0081, |
| "num_tokens": 13445671.0, |
| "reward": 0.1438802126212977, |
| "reward_std": 0.10509481013286859, |
| "rewards/pure_accuracy_reward_math": 0.1438802084303461, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.0007483757581212558, |
| "epoch": 0.027862072497727243, |
| "grad_norm": 0.06271728873252869, |
| "kl": 0.000528186559677124, |
| "learning_rate": 2.3224043715846998e-07, |
| "loss": 0.0081, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0006768568357529148, |
| "epoch": 0.02950101793877002, |
| "grad_norm": 0.06163553521037102, |
| "kl": 0.0005240440368652344, |
| "learning_rate": 2.459016393442623e-07, |
| "loss": 0.0081, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.00073299726238929, |
| "epoch": 0.031139963379812802, |
| "grad_norm": 0.062258753925561905, |
| "kl": 0.000529751181602478, |
| "learning_rate": 2.595628415300547e-07, |
| "loss": 0.0081, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0007049078883483162, |
| "epoch": 0.03277890882085558, |
| "grad_norm": 0.061678871512413025, |
| "kl": 0.0005273669958114624, |
| "learning_rate": 2.73224043715847e-07, |
| "loss": 0.0081, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 610.4453291893005, |
| "epoch": 0.03441785426189836, |
| "grad_norm": 0.06878828257322311, |
| "kl": 0.0005384832620620728, |
| "learning_rate": 2.8688524590163937e-07, |
| "loss": 0.0051, |
| "num_tokens": 16799755.0, |
| "reward": 0.15136719125439413, |
| "reward_std": 0.10323517030337825, |
| "rewards/pure_accuracy_reward_math": 0.15136718822759576, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0007487565382007233, |
| "epoch": 0.036056799702941136, |
| "grad_norm": 0.06954149156808853, |
| "kl": 0.0005507916212081909, |
| "learning_rate": 3.005464480874317e-07, |
| "loss": 0.0051, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.0007795561222110337, |
| "epoch": 0.03769574514398392, |
| "grad_norm": 0.06806771457195282, |
| "kl": 0.0005584806203842163, |
| "learning_rate": 3.142076502732241e-07, |
| "loss": 0.0051, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0007387081783463145, |
| "epoch": 0.0393346905850267, |
| "grad_norm": 0.06814352422952652, |
| "kl": 0.0005674809217453003, |
| "learning_rate": 3.278688524590164e-07, |
| "loss": 0.0051, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.0007619177375772779, |
| "epoch": 0.040973636026069474, |
| "grad_norm": 0.06729913502931595, |
| "kl": 0.0005744844675064087, |
| "learning_rate": 3.415300546448088e-07, |
| "loss": 0.0051, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 596.7343945503235, |
| "epoch": 0.042612581467112255, |
| "grad_norm": 0.06346436589956284, |
| "kl": 0.0006166845560073853, |
| "learning_rate": 3.551912568306011e-07, |
| "loss": 0.0038, |
| "num_tokens": 20112563.0, |
| "reward": 0.16373698358074762, |
| "reward_std": 0.09582553629297763, |
| "rewards/pure_accuracy_reward_math": 0.163736979739042, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0006948200579017794, |
| "epoch": 0.04425152690815504, |
| "grad_norm": 0.06285525858402252, |
| "kl": 0.0006320923566818237, |
| "learning_rate": 3.6885245901639347e-07, |
| "loss": 0.0039, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.0006778589096256837, |
| "epoch": 0.04589047234919781, |
| "grad_norm": 0.06269308924674988, |
| "kl": 0.000654950737953186, |
| "learning_rate": 3.825136612021858e-07, |
| "loss": 0.0039, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.0006392495685076938, |
| "epoch": 0.04752941779024059, |
| "grad_norm": 0.06292663514614105, |
| "kl": 0.0006759315729141235, |
| "learning_rate": 3.961748633879782e-07, |
| "loss": 0.0039, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.000681757599068078, |
| "epoch": 0.049168363231283374, |
| "grad_norm": 0.06097942218184471, |
| "kl": 0.0007022321224212646, |
| "learning_rate": 4.0983606557377047e-07, |
| "loss": 0.0039, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 606.3161022663116, |
| "epoch": 0.05080730867232615, |
| "grad_norm": 0.06853298097848892, |
| "kl": 0.0007193088531494141, |
| "learning_rate": 4.2349726775956286e-07, |
| "loss": 0.005, |
| "num_tokens": 23460802.0, |
| "reward": 0.15071615006309003, |
| "reward_std": 0.10664506914326921, |
| "rewards/pure_accuracy_reward_math": 0.1507161462213844, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0007206783092215119, |
| "epoch": 0.05244625411336893, |
| "grad_norm": 0.06669250130653381, |
| "kl": 0.0007403194904327393, |
| "learning_rate": 4.3715846994535524e-07, |
| "loss": 0.005, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.0008033858404132843, |
| "epoch": 0.05408519955441171, |
| "grad_norm": 0.06685461103916168, |
| "kl": 0.0007804930210113525, |
| "learning_rate": 4.508196721311476e-07, |
| "loss": 0.005, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.0007623738173379024, |
| "epoch": 0.055724144995454486, |
| "grad_norm": 0.06673412770032883, |
| "kl": 0.0008253157138824463, |
| "learning_rate": 4.6448087431693996e-07, |
| "loss": 0.005, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.0007461598812597003, |
| "epoch": 0.05736309043649727, |
| "grad_norm": 0.06533104181289673, |
| "kl": 0.0008644461631774902, |
| "learning_rate": 4.781420765027322e-07, |
| "loss": 0.005, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 597.9977385997772, |
| "epoch": 0.05900203587754004, |
| "grad_norm": 0.07363387197256088, |
| "kl": 0.00089988112449646, |
| "learning_rate": 4.918032786885246e-07, |
| "loss": 0.0081, |
| "num_tokens": 26781715.0, |
| "reward": 0.17936198392999358, |
| "reward_std": 0.1202162274857983, |
| "rewards/pure_accuracy_reward_math": 0.179361979739042, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0008816556705255607, |
| "epoch": 0.06064098131858282, |
| "grad_norm": 0.06755447387695312, |
| "kl": 0.0009488761425018311, |
| "learning_rate": 5.05464480874317e-07, |
| "loss": 0.0081, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.0008573625917165373, |
| "epoch": 0.062279926759625605, |
| "grad_norm": 0.06729397177696228, |
| "kl": 0.0010100901126861572, |
| "learning_rate": 5.191256830601094e-07, |
| "loss": 0.0081, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.000872175712970602, |
| "epoch": 0.06391887220066839, |
| "grad_norm": 0.06972332298755646, |
| "kl": 0.0010748803615570068, |
| "learning_rate": 5.327868852459017e-07, |
| "loss": 0.0081, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.000930704369693558, |
| "epoch": 0.06555781764171116, |
| "grad_norm": 0.06739407032728195, |
| "kl": 0.0011384189128875732, |
| "learning_rate": 5.46448087431694e-07, |
| "loss": 0.0081, |
| "step": 40 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 606.660826921463, |
| "epoch": 0.06719676308275394, |
| "grad_norm": 0.05486822873353958, |
| "kl": 0.001088649034500122, |
| "learning_rate": 5.601092896174863e-07, |
| "loss": 0.0058, |
| "num_tokens": 30130177.0, |
| "reward": 0.14843750451109372, |
| "reward_std": 0.08996616111835465, |
| "rewards/pure_accuracy_reward_math": 0.14843749973806553, |
| "step": 41 |
| }, |
| { |
| "clip_ratio": 0.000587868101206368, |
| "epoch": 0.06883570852379672, |
| "grad_norm": 0.053968992084264755, |
| "kl": 0.0011524856090545654, |
| "learning_rate": 5.737704918032787e-07, |
| "loss": 0.0058, |
| "step": 42 |
| }, |
| { |
| "clip_ratio": 0.0005904338165692025, |
| "epoch": 0.0704746539648395, |
| "grad_norm": 0.05430474132299423, |
| "kl": 0.0012042820453643799, |
| "learning_rate": 5.874316939890711e-07, |
| "loss": 0.0058, |
| "step": 43 |
| }, |
| { |
| "clip_ratio": 0.0005757618986308444, |
| "epoch": 0.07211359940588227, |
| "grad_norm": 0.05444110184907913, |
| "kl": 0.0012355148792266846, |
| "learning_rate": 6.010928961748634e-07, |
| "loss": 0.0058, |
| "step": 44 |
| }, |
| { |
| "clip_ratio": 0.0006261014578967661, |
| "epoch": 0.07375254484692506, |
| "grad_norm": 0.054937466979026794, |
| "kl": 0.0012827813625335693, |
| "learning_rate": 6.147540983606558e-07, |
| "loss": 0.0058, |
| "step": 45 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 584.1201364994049, |
| "epoch": 0.07539149028796784, |
| "grad_norm": 0.06546950340270996, |
| "kl": 0.0014158189296722412, |
| "learning_rate": 6.284153005464482e-07, |
| "loss": 0.0043, |
| "num_tokens": 33407966.0, |
| "reward": 0.1715494836680591, |
| "reward_std": 0.11086069961311296, |
| "rewards/pure_accuracy_reward_math": 0.17154948017559946, |
| "step": 46 |
| }, |
| { |
| "clip_ratio": 0.0007843592767358132, |
| "epoch": 0.07703043572901061, |
| "grad_norm": 0.06173517182469368, |
| "kl": 0.0014501512050628662, |
| "learning_rate": 6.420765027322406e-07, |
| "loss": 0.0043, |
| "step": 47 |
| }, |
| { |
| "clip_ratio": 0.0008111927813843067, |
| "epoch": 0.0786693811700534, |
| "grad_norm": 0.06110456958413124, |
| "kl": 0.0014650523662567139, |
| "learning_rate": 6.557377049180328e-07, |
| "loss": 0.0043, |
| "step": 48 |
| }, |
| { |
| "clip_ratio": 0.0007597751833827715, |
| "epoch": 0.08030832661109617, |
| "grad_norm": 0.06199155002832413, |
| "kl": 0.0015124678611755371, |
| "learning_rate": 6.693989071038252e-07, |
| "loss": 0.0043, |
| "step": 49 |
| }, |
| { |
| "clip_ratio": 0.0007640034893938719, |
| "epoch": 0.08194727205213895, |
| "grad_norm": 0.06190052628517151, |
| "kl": 0.0015333890914916992, |
| "learning_rate": 6.830601092896176e-07, |
| "loss": 0.0043, |
| "step": 50 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 593.0820529460907, |
| "epoch": 0.08358621749318174, |
| "grad_norm": 0.06474039703607559, |
| "kl": 0.0014415383338928223, |
| "learning_rate": 6.967213114754098e-07, |
| "loss": 0.0076, |
| "num_tokens": 36714234.0, |
| "reward": 0.1923828188155312, |
| "reward_std": 0.1178674673428759, |
| "rewards/pure_accuracy_reward_math": 0.1923828122962732, |
| "step": 51 |
| }, |
| { |
| "clip_ratio": 0.000813577574433566, |
| "epoch": 0.08522516293422451, |
| "grad_norm": 0.06284686177968979, |
| "kl": 0.001471877098083496, |
| "learning_rate": 7.103825136612022e-07, |
| "loss": 0.0077, |
| "step": 52 |
| }, |
| { |
| "clip_ratio": 0.0007952848112040556, |
| "epoch": 0.08686410837526728, |
| "grad_norm": 0.0626569464802742, |
| "kl": 0.0014744699001312256, |
| "learning_rate": 7.240437158469946e-07, |
| "loss": 0.0076, |
| "step": 53 |
| }, |
| { |
| "clip_ratio": 0.000757519129024331, |
| "epoch": 0.08850305381631007, |
| "grad_norm": 0.06075895577669144, |
| "kl": 0.0014587044715881348, |
| "learning_rate": 7.377049180327869e-07, |
| "loss": 0.0076, |
| "step": 54 |
| }, |
| { |
| "clip_ratio": 0.0008223805086799985, |
| "epoch": 0.09014199925735285, |
| "grad_norm": 0.06047751381993294, |
| "kl": 0.0014570355415344238, |
| "learning_rate": 7.513661202185793e-07, |
| "loss": 0.0076, |
| "step": 55 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 595.8216323852539, |
| "epoch": 0.09178094469839562, |
| "grad_norm": 0.06398054957389832, |
| "kl": 0.00144881010055542, |
| "learning_rate": 7.650273224043716e-07, |
| "loss": 0.0086, |
| "num_tokens": 40026830.0, |
| "reward": 0.20247396413469687, |
| "reward_std": 0.11906883475603536, |
| "rewards/pure_accuracy_reward_math": 0.2024739590124227, |
| "step": 56 |
| }, |
| { |
| "clip_ratio": 0.00078221041519555, |
| "epoch": 0.09341989013943841, |
| "grad_norm": 0.06344633549451828, |
| "kl": 0.0014292001724243164, |
| "learning_rate": 7.78688524590164e-07, |
| "loss": 0.0087, |
| "step": 57 |
| }, |
| { |
| "clip_ratio": 0.0008090036571957171, |
| "epoch": 0.09505883558048119, |
| "grad_norm": 0.061615679413080215, |
| "kl": 0.0014474093914031982, |
| "learning_rate": 7.923497267759564e-07, |
| "loss": 0.0087, |
| "step": 58 |
| }, |
| { |
| "clip_ratio": 0.0008085054041657713, |
| "epoch": 0.09669778102152396, |
| "grad_norm": 0.06151620298624039, |
| "kl": 0.0014512240886688232, |
| "learning_rate": 8.060109289617488e-07, |
| "loss": 0.0086, |
| "step": 59 |
| }, |
| { |
| "clip_ratio": 0.000824362684852531, |
| "epoch": 0.09833672646256675, |
| "grad_norm": 0.06084871292114258, |
| "kl": 0.0014411509037017822, |
| "learning_rate": 8.196721311475409e-07, |
| "loss": 0.0086, |
| "step": 60 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 598.2584838867188, |
| "epoch": 0.09997567190360952, |
| "grad_norm": 0.06428408622741699, |
| "kl": 0.0015523433685302734, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 0.0082, |
| "num_tokens": 43356588.0, |
| "reward": 0.18912761070532724, |
| "reward_std": 0.11445756902685389, |
| "rewards/pure_accuracy_reward_math": 0.18912760430248454, |
| "step": 61 |
| }, |
| { |
| "clip_ratio": 0.0008156659468454563, |
| "epoch": 0.1016146173446523, |
| "grad_norm": 0.06184009462594986, |
| "kl": 0.001552283763885498, |
| "learning_rate": 8.469945355191257e-07, |
| "loss": 0.0082, |
| "step": 62 |
| }, |
| { |
| "clip_ratio": 0.0008079836062506729, |
| "epoch": 0.10325356278569509, |
| "grad_norm": 0.060980089008808136, |
| "kl": 0.001578688621520996, |
| "learning_rate": 8.606557377049181e-07, |
| "loss": 0.0082, |
| "step": 63 |
| }, |
| { |
| "clip_ratio": 0.000800917034325721, |
| "epoch": 0.10489250822673786, |
| "grad_norm": 0.061832476407289505, |
| "kl": 0.0016154646873474121, |
| "learning_rate": 8.743169398907105e-07, |
| "loss": 0.0082, |
| "step": 64 |
| }, |
| { |
| "clip_ratio": 0.0008089348676776353, |
| "epoch": 0.10653145366778063, |
| "grad_norm": 0.0595347136259079, |
| "kl": 0.0017150640487670898, |
| "learning_rate": 8.879781420765028e-07, |
| "loss": 0.0081, |
| "step": 65 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 602.9000825881958, |
| "epoch": 0.10817039910882342, |
| "grad_norm": 0.06361431628465652, |
| "kl": 0.001691579818725586, |
| "learning_rate": 9.016393442622952e-07, |
| "loss": 0.005, |
| "num_tokens": 46690213.0, |
| "reward": 0.18261719372821972, |
| "reward_std": 0.10855145199457183, |
| "rewards/pure_accuracy_reward_math": 0.18261718755820766, |
| "step": 66 |
| }, |
| { |
| "clip_ratio": 0.0007102687751512349, |
| "epoch": 0.1098093445498662, |
| "grad_norm": 0.06422943621873856, |
| "kl": 0.001762300729751587, |
| "learning_rate": 9.153005464480875e-07, |
| "loss": 0.005, |
| "step": 67 |
| }, |
| { |
| "clip_ratio": 0.0007208503458286941, |
| "epoch": 0.11144828999090897, |
| "grad_norm": 0.062008682638406754, |
| "kl": 0.0017663836479187012, |
| "learning_rate": 9.289617486338799e-07, |
| "loss": 0.005, |
| "step": 68 |
| }, |
| { |
| "clip_ratio": 0.0007175619265353816, |
| "epoch": 0.11308723543195176, |
| "grad_norm": 0.061343614012002945, |
| "kl": 0.001800447702407837, |
| "learning_rate": 9.426229508196721e-07, |
| "loss": 0.0049, |
| "step": 69 |
| }, |
| { |
| "clip_ratio": 0.0007331656333917635, |
| "epoch": 0.11472618087299453, |
| "grad_norm": 0.05962536856532097, |
| "kl": 0.001809924840927124, |
| "learning_rate": 9.562841530054645e-07, |
| "loss": 0.0049, |
| "step": 70 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 597.3851110935211, |
| "epoch": 0.11636512631403731, |
| "grad_norm": 0.07380504906177521, |
| "kl": 0.001956164836883545, |
| "learning_rate": 9.69945355191257e-07, |
| "loss": 0.0059, |
| "num_tokens": 50008096.0, |
| "reward": 0.18815104707027785, |
| "reward_std": 0.118169616907835, |
| "rewards/pure_accuracy_reward_math": 0.18815104159875773, |
| "step": 71 |
| }, |
| { |
| "clip_ratio": 0.0008107801862706765, |
| "epoch": 0.11800407175508008, |
| "grad_norm": 0.06983543187379837, |
| "kl": 0.0019207000732421875, |
| "learning_rate": 9.836065573770493e-07, |
| "loss": 0.0059, |
| "step": 72 |
| }, |
| { |
| "clip_ratio": 0.0008206042518850154, |
| "epoch": 0.11964301719612287, |
| "grad_norm": 0.06862860172986984, |
| "kl": 0.001914680004119873, |
| "learning_rate": 9.972677595628415e-07, |
| "loss": 0.0059, |
| "step": 73 |
| }, |
| { |
| "clip_ratio": 0.0008123442846681428, |
| "epoch": 0.12128196263716565, |
| "grad_norm": 0.06780818104743958, |
| "kl": 0.001929640769958496, |
| "learning_rate": 1.010928961748634e-06, |
| "loss": 0.0058, |
| "step": 74 |
| }, |
| { |
| "clip_ratio": 0.0008305984221124163, |
| "epoch": 0.12292090807820842, |
| "grad_norm": 0.06472048163414001, |
| "kl": 0.0019400715827941895, |
| "learning_rate": 1.0245901639344263e-06, |
| "loss": 0.0058, |
| "step": 75 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 599.2868046760559, |
| "epoch": 0.12455985351925121, |
| "grad_norm": 0.06543949246406555, |
| "kl": 0.0018826127052307129, |
| "learning_rate": 1.0382513661202188e-06, |
| "loss": 0.0094, |
| "num_tokens": 53333081.0, |
| "reward": 0.19205729727400467, |
| "reward_std": 0.12739214790053666, |
| "rewards/pure_accuracy_reward_math": 0.19205729191889986, |
| "step": 76 |
| }, |
| { |
| "clip_ratio": 0.0008518788816900269, |
| "epoch": 0.12619879896029398, |
| "grad_norm": 0.06384909898042679, |
| "kl": 0.0019139647483825684, |
| "learning_rate": 1.051912568306011e-06, |
| "loss": 0.0094, |
| "step": 77 |
| }, |
| { |
| "clip_ratio": 0.0008921786497353423, |
| "epoch": 0.12783774440133677, |
| "grad_norm": 0.06342752277851105, |
| "kl": 0.001939535140991211, |
| "learning_rate": 1.0655737704918034e-06, |
| "loss": 0.0094, |
| "step": 78 |
| }, |
| { |
| "clip_ratio": 0.0008912816550719072, |
| "epoch": 0.12947668984237953, |
| "grad_norm": 0.06367272883653641, |
| "kl": 0.0019831061363220215, |
| "learning_rate": 1.0792349726775956e-06, |
| "loss": 0.0093, |
| "step": 79 |
| }, |
| { |
| "clip_ratio": 0.0008512400360132233, |
| "epoch": 0.13111563528342232, |
| "grad_norm": 0.062457580119371414, |
| "kl": 0.0020416975021362305, |
| "learning_rate": 1.092896174863388e-06, |
| "loss": 0.0093, |
| "step": 80 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 600.7868010997772, |
| "epoch": 0.1327545807244651, |
| "grad_norm": 0.069603331387043, |
| "kl": 0.0021752119064331055, |
| "learning_rate": 1.1065573770491804e-06, |
| "loss": 0.0066, |
| "num_tokens": 56665038.0, |
| "reward": 0.19889323483221233, |
| "reward_std": 0.12287436821497977, |
| "rewards/pure_accuracy_reward_math": 0.19889323005918413, |
| "step": 81 |
| }, |
| { |
| "clip_ratio": 0.0009323512667833711, |
| "epoch": 0.13439352616550787, |
| "grad_norm": 0.06583772599697113, |
| "kl": 0.002191603183746338, |
| "learning_rate": 1.1202185792349727e-06, |
| "loss": 0.0066, |
| "step": 82 |
| }, |
| { |
| "clip_ratio": 0.0009406020556070871, |
| "epoch": 0.13603247160655066, |
| "grad_norm": 0.06439989805221558, |
| "kl": 0.00225830078125, |
| "learning_rate": 1.1338797814207652e-06, |
| "loss": 0.0066, |
| "step": 83 |
| }, |
| { |
| "clip_ratio": 0.0009481842756713377, |
| "epoch": 0.13767141704759345, |
| "grad_norm": 0.06453175097703934, |
| "kl": 0.0023380517959594727, |
| "learning_rate": 1.1475409836065575e-06, |
| "loss": 0.0065, |
| "step": 84 |
| }, |
| { |
| "clip_ratio": 0.00098607516224547, |
| "epoch": 0.1393103624886362, |
| "grad_norm": 0.06561443954706192, |
| "kl": 0.0024124979972839355, |
| "learning_rate": 1.16120218579235e-06, |
| "loss": 0.0065, |
| "step": 85 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 592.1575720310211, |
| "epoch": 0.140949307929679, |
| "grad_norm": 0.07061439007520676, |
| "kl": 0.002596259117126465, |
| "learning_rate": 1.1748633879781422e-06, |
| "loss": 0.0094, |
| "num_tokens": 59968450.0, |
| "reward": 0.2067057350941468, |
| "reward_std": 0.12307580112246796, |
| "rewards/pure_accuracy_reward_math": 0.20670572892413475, |
| "step": 86 |
| }, |
| { |
| "clip_ratio": 0.0007981168380410963, |
| "epoch": 0.14258825337072178, |
| "grad_norm": 0.0682518407702446, |
| "kl": 0.002633213996887207, |
| "learning_rate": 1.1885245901639345e-06, |
| "loss": 0.0094, |
| "step": 87 |
| }, |
| { |
| "clip_ratio": 0.0008212789625190453, |
| "epoch": 0.14422719881176455, |
| "grad_norm": 0.06932378560304642, |
| "kl": 0.0026621222496032715, |
| "learning_rate": 1.2021857923497268e-06, |
| "loss": 0.0094, |
| "step": 88 |
| }, |
| { |
| "clip_ratio": 0.0008140442066633113, |
| "epoch": 0.14586614425280733, |
| "grad_norm": 0.06654822826385498, |
| "kl": 0.002701401710510254, |
| "learning_rate": 1.215846994535519e-06, |
| "loss": 0.0093, |
| "step": 89 |
| }, |
| { |
| "clip_ratio": 0.0008207391882706361, |
| "epoch": 0.14750508969385012, |
| "grad_norm": 0.06492628902196884, |
| "kl": 0.0028305649757385254, |
| "learning_rate": 1.2295081967213116e-06, |
| "loss": 0.0092, |
| "step": 90 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 596.5735874176025, |
| "epoch": 0.14914403513489288, |
| "grad_norm": 0.06914262473583221, |
| "kl": 0.0026297569274902344, |
| "learning_rate": 1.2431693989071039e-06, |
| "loss": 0.0089, |
| "num_tokens": 63290872.0, |
| "reward": 0.19986979712848552, |
| "reward_std": 0.12688133475603536, |
| "rewards/pure_accuracy_reward_math": 0.19986979235545732, |
| "step": 91 |
| }, |
| { |
| "clip_ratio": 0.0008256697961996906, |
| "epoch": 0.15078298057593567, |
| "grad_norm": 0.06898585706949234, |
| "kl": 0.002676546573638916, |
| "learning_rate": 1.2568306010928963e-06, |
| "loss": 0.009, |
| "step": 92 |
| }, |
| { |
| "clip_ratio": 0.0008680100598894569, |
| "epoch": 0.15242192601697846, |
| "grad_norm": 0.06637588888406754, |
| "kl": 0.0026517510414123535, |
| "learning_rate": 1.2704918032786886e-06, |
| "loss": 0.0089, |
| "step": 93 |
| }, |
| { |
| "clip_ratio": 0.000874812991582985, |
| "epoch": 0.15406087145802122, |
| "grad_norm": 0.06262248754501343, |
| "kl": 0.0026916861534118652, |
| "learning_rate": 1.2841530054644811e-06, |
| "loss": 0.0089, |
| "step": 94 |
| }, |
| { |
| "clip_ratio": 0.0009557890944051906, |
| "epoch": 0.155699816899064, |
| "grad_norm": 0.0627315565943718, |
| "kl": 0.0027064085006713867, |
| "learning_rate": 1.2978142076502734e-06, |
| "loss": 0.0088, |
| "step": 95 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 609.2135615348816, |
| "epoch": 0.1573387623401068, |
| "grad_norm": 0.06779834628105164, |
| "kl": 0.002728700637817383, |
| "learning_rate": 1.3114754098360657e-06, |
| "loss": 0.0115, |
| "num_tokens": 66649152.0, |
| "reward": 0.20214844364090823, |
| "reward_std": 0.12382755958242342, |
| "rewards/pure_accuracy_reward_math": 0.2021484377037268, |
| "step": 96 |
| }, |
| { |
| "clip_ratio": 0.0008006048282709344, |
| "epoch": 0.15897770778114956, |
| "grad_norm": 0.06556153297424316, |
| "kl": 0.0027396678924560547, |
| "learning_rate": 1.3251366120218582e-06, |
| "loss": 0.0115, |
| "step": 97 |
| }, |
| { |
| "clip_ratio": 0.0008465125227985482, |
| "epoch": 0.16061665322219235, |
| "grad_norm": 0.06473369896411896, |
| "kl": 0.0027694106101989746, |
| "learning_rate": 1.3387978142076505e-06, |
| "loss": 0.0115, |
| "step": 98 |
| }, |
| { |
| "clip_ratio": 0.000838196794347823, |
| "epoch": 0.16225559866323513, |
| "grad_norm": 0.06346935033798218, |
| "kl": 0.002801954746246338, |
| "learning_rate": 1.352459016393443e-06, |
| "loss": 0.0114, |
| "step": 99 |
| }, |
| { |
| "clip_ratio": 0.000809030144978351, |
| "epoch": 0.1638945441042779, |
| "grad_norm": 0.061877407133579254, |
| "kl": 0.0028792619705200195, |
| "learning_rate": 1.3661202185792352e-06, |
| "loss": 0.0113, |
| "step": 100 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 596.9437062740326, |
| "epoch": 0.16553348954532068, |
| "grad_norm": 0.06968217343091965, |
| "kl": 0.003116130828857422, |
| "learning_rate": 1.3797814207650273e-06, |
| "loss": 0.0085, |
| "num_tokens": 69972491.0, |
| "reward": 0.20605469375732355, |
| "reward_std": 0.12006876862142235, |
| "rewards/pure_accuracy_reward_math": 0.20605468933354132, |
| "step": 101 |
| }, |
| { |
| "clip_ratio": 0.0008418516855499547, |
| "epoch": 0.16717243498636347, |
| "grad_norm": 0.07103519886732101, |
| "kl": 0.003129124641418457, |
| "learning_rate": 1.3934426229508196e-06, |
| "loss": 0.0086, |
| "step": 102 |
| }, |
| { |
| "clip_ratio": 0.000812729414064961, |
| "epoch": 0.16881138042740623, |
| "grad_norm": 0.06863201409578323, |
| "kl": 0.0031093955039978027, |
| "learning_rate": 1.407103825136612e-06, |
| "loss": 0.0085, |
| "step": 103 |
| }, |
| { |
| "clip_ratio": 0.0008020297700568335, |
| "epoch": 0.17045032586844902, |
| "grad_norm": 0.06707657128572464, |
| "kl": 0.003110051155090332, |
| "learning_rate": 1.4207650273224043e-06, |
| "loss": 0.0084, |
| "step": 104 |
| }, |
| { |
| "clip_ratio": 0.0008383456649880827, |
| "epoch": 0.1720892713094918, |
| "grad_norm": 0.06547861546278, |
| "kl": 0.0031203627586364746, |
| "learning_rate": 1.4344262295081968e-06, |
| "loss": 0.0083, |
| "step": 105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.2750873565674, |
| "epoch": 0.17372821675053457, |
| "grad_norm": 0.06509453058242798, |
| "kl": 0.003070056438446045, |
| "learning_rate": 1.4480874316939891e-06, |
| "loss": 0.0082, |
| "num_tokens": 73285196.0, |
| "reward": 0.21744792378740385, |
| "reward_std": 0.12147156818537042, |
| "rewards/pure_accuracy_reward_math": 0.21744791680248454, |
| "step": 106 |
| }, |
| { |
| "clip_ratio": 0.0008183834372630372, |
| "epoch": 0.17536716219157736, |
| "grad_norm": 0.06286683678627014, |
| "kl": 0.003090500831604004, |
| "learning_rate": 1.4617486338797814e-06, |
| "loss": 0.0082, |
| "step": 107 |
| }, |
| { |
| "clip_ratio": 0.0008020995180686441, |
| "epoch": 0.17700610763262015, |
| "grad_norm": 0.061473019421100616, |
| "kl": 0.003094911575317383, |
| "learning_rate": 1.4754098360655739e-06, |
| "loss": 0.0082, |
| "step": 108 |
| }, |
| { |
| "clip_ratio": 0.0008129155939968769, |
| "epoch": 0.1786450530736629, |
| "grad_norm": 0.06097801774740219, |
| "kl": 0.0031610727310180664, |
| "learning_rate": 1.4890710382513662e-06, |
| "loss": 0.0081, |
| "step": 109 |
| }, |
| { |
| "clip_ratio": 0.0008801428618880891, |
| "epoch": 0.1802839985147057, |
| "grad_norm": 0.06094435974955559, |
| "kl": 0.003253757953643799, |
| "learning_rate": 1.5027322404371587e-06, |
| "loss": 0.008, |
| "step": 110 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 591.4017164707184, |
| "epoch": 0.18192294395574848, |
| "grad_norm": 0.07037521153688431, |
| "kl": 0.0035175085067749023, |
| "learning_rate": 1.516393442622951e-06, |
| "loss": 0.0095, |
| "num_tokens": 76587766.0, |
| "reward": 0.23372396477498114, |
| "reward_std": 0.13153934240108356, |
| "rewards/pure_accuracy_reward_math": 0.2337239591870457, |
| "step": 111 |
| }, |
| { |
| "clip_ratio": 0.0009014522622123877, |
| "epoch": 0.18356188939679124, |
| "grad_norm": 0.06573645025491714, |
| "kl": 0.003545045852661133, |
| "learning_rate": 1.5300546448087432e-06, |
| "loss": 0.0096, |
| "step": 112 |
| }, |
| { |
| "clip_ratio": 0.0009236231287559349, |
| "epoch": 0.18520083483783403, |
| "grad_norm": 0.06465188413858414, |
| "kl": 0.00360715389251709, |
| "learning_rate": 1.5437158469945357e-06, |
| "loss": 0.0095, |
| "step": 113 |
| }, |
| { |
| "clip_ratio": 0.0009181838793210773, |
| "epoch": 0.18683978027887682, |
| "grad_norm": 0.06287030875682831, |
| "kl": 0.0036890506744384766, |
| "learning_rate": 1.557377049180328e-06, |
| "loss": 0.0094, |
| "step": 114 |
| }, |
| { |
| "clip_ratio": 0.0008825006539154856, |
| "epoch": 0.18847872571991958, |
| "grad_norm": 0.06144850701093674, |
| "kl": 0.003753662109375, |
| "learning_rate": 1.5710382513661205e-06, |
| "loss": 0.0093, |
| "step": 115 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.1959838867188, |
| "epoch": 0.19011767116096237, |
| "grad_norm": 0.06793060898780823, |
| "kl": 0.003781437873840332, |
| "learning_rate": 1.5846994535519128e-06, |
| "loss": 0.011, |
| "num_tokens": 79893856.0, |
| "reward": 0.21386719393194653, |
| "reward_std": 0.12512964301276952, |
| "rewards/pure_accuracy_reward_math": 0.2138671872962732, |
| "step": 116 |
| }, |
| { |
| "clip_ratio": 0.0008201918570875932, |
| "epoch": 0.19175661660200516, |
| "grad_norm": 0.06297077238559723, |
| "kl": 0.0038176774978637695, |
| "learning_rate": 1.5983606557377053e-06, |
| "loss": 0.011, |
| "step": 117 |
| }, |
| { |
| "clip_ratio": 0.0007795650292337086, |
| "epoch": 0.19339556204304792, |
| "grad_norm": 0.061727218329906464, |
| "kl": 0.003816843032836914, |
| "learning_rate": 1.6120218579234975e-06, |
| "loss": 0.011, |
| "step": 118 |
| }, |
| { |
| "clip_ratio": 0.0008128985705297964, |
| "epoch": 0.1950345074840907, |
| "grad_norm": 0.05955222249031067, |
| "kl": 0.003865480422973633, |
| "learning_rate": 1.6256830601092896e-06, |
| "loss": 0.0109, |
| "step": 119 |
| }, |
| { |
| "clip_ratio": 0.0008892948203538253, |
| "epoch": 0.1966734529251335, |
| "grad_norm": 0.05931426212191582, |
| "kl": 0.003886103630065918, |
| "learning_rate": 1.6393442622950819e-06, |
| "loss": 0.0108, |
| "step": 120 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 586.1058125495911, |
| "epoch": 0.19831239836617626, |
| "grad_norm": 0.07331722974777222, |
| "kl": 0.0038805007934570312, |
| "learning_rate": 1.6530054644808744e-06, |
| "loss": 0.0069, |
| "num_tokens": 83181421.0, |
| "reward": 0.2060546927677933, |
| "reward_std": 0.12338518165051937, |
| "rewards/pure_accuracy_reward_math": 0.20605468822759576, |
| "step": 121 |
| }, |
| { |
| "clip_ratio": 0.0008778467455385908, |
| "epoch": 0.19995134380721905, |
| "grad_norm": 0.06926850229501724, |
| "kl": 0.0038404464721679688, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.0069, |
| "step": 122 |
| }, |
| { |
| "clip_ratio": 0.0009394907406203856, |
| "epoch": 0.20159028924826183, |
| "grad_norm": 0.06973890960216522, |
| "kl": 0.003817915916442871, |
| "learning_rate": 1.6803278688524592e-06, |
| "loss": 0.0069, |
| "step": 123 |
| }, |
| { |
| "clip_ratio": 0.000969218867339805, |
| "epoch": 0.2032292346893046, |
| "grad_norm": 0.06822917610406876, |
| "kl": 0.0038552284240722656, |
| "learning_rate": 1.6939890710382514e-06, |
| "loss": 0.0068, |
| "step": 124 |
| }, |
| { |
| "clip_ratio": 0.0009342714683953091, |
| "epoch": 0.20486818013034738, |
| "grad_norm": 0.06682004034519196, |
| "kl": 0.003947019577026367, |
| "learning_rate": 1.7076502732240437e-06, |
| "loss": 0.0066, |
| "step": 125 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 589.0709819793701, |
| "epoch": 0.20650712557139017, |
| "grad_norm": 0.07198912650346756, |
| "kl": 0.003998160362243652, |
| "learning_rate": 1.7213114754098362e-06, |
| "loss": 0.0125, |
| "num_tokens": 86478355.0, |
| "reward": 0.22949219273868948, |
| "reward_std": 0.14352073048939928, |
| "rewards/pure_accuracy_reward_math": 0.22949218878056854, |
| "step": 126 |
| }, |
| { |
| "clip_ratio": 0.0010473157254864418, |
| "epoch": 0.20814607101243293, |
| "grad_norm": 0.07177633047103882, |
| "kl": 0.004043221473693848, |
| "learning_rate": 1.7349726775956285e-06, |
| "loss": 0.0125, |
| "step": 127 |
| }, |
| { |
| "clip_ratio": 0.0010181566002529507, |
| "epoch": 0.20978501645347572, |
| "grad_norm": 0.06755513697862625, |
| "kl": 0.0041484832763671875, |
| "learning_rate": 1.748633879781421e-06, |
| "loss": 0.0124, |
| "step": 128 |
| }, |
| { |
| "clip_ratio": 0.0010040162792392948, |
| "epoch": 0.2114239618945185, |
| "grad_norm": 0.06670001894235611, |
| "kl": 0.004278779029846191, |
| "learning_rate": 1.7622950819672133e-06, |
| "loss": 0.0123, |
| "step": 129 |
| }, |
| { |
| "clip_ratio": 0.0010213782433083907, |
| "epoch": 0.21306290733556127, |
| "grad_norm": 0.06810087710618973, |
| "kl": 0.004375338554382324, |
| "learning_rate": 1.7759562841530055e-06, |
| "loss": 0.0121, |
| "step": 130 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 585.0755393505096, |
| "epoch": 0.21470185277660406, |
| "grad_norm": 0.07637549191713333, |
| "kl": 0.004368305206298828, |
| "learning_rate": 1.789617486338798e-06, |
| "loss": 0.0075, |
| "num_tokens": 89764315.0, |
| "reward": 0.22656250739237294, |
| "reward_std": 0.14191649784334004, |
| "rewards/pure_accuracy_reward_math": 0.2265624997089617, |
| "step": 131 |
| }, |
| { |
| "clip_ratio": 0.0010003317277096357, |
| "epoch": 0.21634079821764685, |
| "grad_norm": 0.07960700243711472, |
| "kl": 0.0042803287506103516, |
| "learning_rate": 1.8032786885245903e-06, |
| "loss": 0.0075, |
| "step": 132 |
| }, |
| { |
| "clip_ratio": 0.0010098553934767551, |
| "epoch": 0.2179797436586896, |
| "grad_norm": 0.0741487368941307, |
| "kl": 0.004350185394287109, |
| "learning_rate": 1.8169398907103828e-06, |
| "loss": 0.0074, |
| "step": 133 |
| }, |
| { |
| "clip_ratio": 0.0010473617899151577, |
| "epoch": 0.2196186890997324, |
| "grad_norm": 0.07375472038984299, |
| "kl": 0.004483342170715332, |
| "learning_rate": 1.830601092896175e-06, |
| "loss": 0.0073, |
| "step": 134 |
| }, |
| { |
| "clip_ratio": 0.0010608058864818304, |
| "epoch": 0.22125763454077518, |
| "grad_norm": 0.06948796659708023, |
| "kl": 0.004660606384277344, |
| "learning_rate": 1.8442622950819674e-06, |
| "loss": 0.0071, |
| "step": 135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 599.9541211128235, |
| "epoch": 0.22289657998181794, |
| "grad_norm": 0.06568682193756104, |
| "kl": 0.004492521286010742, |
| "learning_rate": 1.8579234972677599e-06, |
| "loss": 0.0076, |
| "num_tokens": 93086726.0, |
| "reward": 0.20638021410559304, |
| "reward_std": 0.11796818353468552, |
| "rewards/pure_accuracy_reward_math": 0.20638020828482695, |
| "step": 136 |
| }, |
| { |
| "clip_ratio": 0.0007427198539744495, |
| "epoch": 0.22453552542286073, |
| "grad_norm": 0.061326853930950165, |
| "kl": 0.004569292068481445, |
| "learning_rate": 1.8715846994535521e-06, |
| "loss": 0.0076, |
| "step": 137 |
| }, |
| { |
| "clip_ratio": 0.0007810102034682131, |
| "epoch": 0.22617447086390352, |
| "grad_norm": 0.06033333018422127, |
| "kl": 0.0046776533126831055, |
| "learning_rate": 1.8852459016393442e-06, |
| "loss": 0.0075, |
| "step": 138 |
| }, |
| { |
| "clip_ratio": 0.0007891726669413401, |
| "epoch": 0.22781341630494628, |
| "grad_norm": 0.057988133281469345, |
| "kl": 0.004709959030151367, |
| "learning_rate": 1.8989071038251367e-06, |
| "loss": 0.0074, |
| "step": 139 |
| }, |
| { |
| "clip_ratio": 0.00077407437288457, |
| "epoch": 0.22945236174598907, |
| "grad_norm": 0.055629778653383255, |
| "kl": 0.0047043561935424805, |
| "learning_rate": 1.912568306010929e-06, |
| "loss": 0.0073, |
| "step": 140 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 589.5273640155792, |
| "epoch": 0.23109130718703186, |
| "grad_norm": 0.08023487031459808, |
| "kl": 0.004754543304443359, |
| "learning_rate": 1.9262295081967215e-06, |
| "loss": 0.0061, |
| "num_tokens": 96377242.0, |
| "reward": 0.22167969349538907, |
| "reward_std": 0.11916955123888329, |
| "rewards/pure_accuracy_reward_math": 0.2216796882566996, |
| "step": 141 |
| }, |
| { |
| "clip_ratio": 0.0008194281034548112, |
| "epoch": 0.23273025262807462, |
| "grad_norm": 0.06729461997747421, |
| "kl": 0.004743695259094238, |
| "learning_rate": 1.939890710382514e-06, |
| "loss": 0.0061, |
| "step": 142 |
| }, |
| { |
| "clip_ratio": 0.0008319891088035547, |
| "epoch": 0.2343691980691174, |
| "grad_norm": 0.0685749426484108, |
| "kl": 0.004932522773742676, |
| "learning_rate": 1.953551912568306e-06, |
| "loss": 0.006, |
| "step": 143 |
| }, |
| { |
| "clip_ratio": 0.000810704066566359, |
| "epoch": 0.23600814351016017, |
| "grad_norm": 0.0689912959933281, |
| "kl": 0.005072951316833496, |
| "learning_rate": 1.9672131147540985e-06, |
| "loss": 0.0058, |
| "step": 144 |
| }, |
| { |
| "clip_ratio": 0.0008489251890750893, |
| "epoch": 0.23764708895120296, |
| "grad_norm": 0.06294326484203339, |
| "kl": 0.005017280578613281, |
| "learning_rate": 1.980874316939891e-06, |
| "loss": 0.0057, |
| "step": 145 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 600.1722221374512, |
| "epoch": 0.23928603439224574, |
| "grad_norm": 0.06795884668827057, |
| "kl": 0.005151629447937012, |
| "learning_rate": 1.994535519125683e-06, |
| "loss": 0.0096, |
| "num_tokens": 99709727.0, |
| "reward": 0.2047526100941468, |
| "reward_std": 0.1280827015871182, |
| "rewards/pure_accuracy_reward_math": 0.2047526053211186, |
| "step": 146 |
| }, |
| { |
| "clip_ratio": 0.0008647626258380114, |
| "epoch": 0.2409249798332885, |
| "grad_norm": 0.06390897184610367, |
| "kl": 0.0051021575927734375, |
| "learning_rate": 2.0081967213114756e-06, |
| "loss": 0.0096, |
| "step": 147 |
| }, |
| { |
| "clip_ratio": 0.0009481125781576338, |
| "epoch": 0.2425639252743313, |
| "grad_norm": 0.062446512281894684, |
| "kl": 0.005044102668762207, |
| "learning_rate": 2.021857923497268e-06, |
| "loss": 0.0095, |
| "step": 148 |
| }, |
| { |
| "clip_ratio": 0.0009975744914072493, |
| "epoch": 0.24420287071537408, |
| "grad_norm": 0.06106211990118027, |
| "kl": 0.00504612922668457, |
| "learning_rate": 2.03551912568306e-06, |
| "loss": 0.0093, |
| "step": 149 |
| }, |
| { |
| "clip_ratio": 0.0009842645606568112, |
| "epoch": 0.24584181615641684, |
| "grad_norm": 0.058460384607315063, |
| "kl": 0.005153179168701172, |
| "learning_rate": 2.0491803278688526e-06, |
| "loss": 0.0092, |
| "step": 150 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 599.0677282810211, |
| "epoch": 0.24748076159745963, |
| "grad_norm": 0.08131567388772964, |
| "kl": 0.005348920822143555, |
| "learning_rate": 2.062841530054645e-06, |
| "loss": 0.0096, |
| "num_tokens": 103039175.0, |
| "reward": 0.2106119856762234, |
| "reward_std": 0.14471486618276685, |
| "rewards/pure_accuracy_reward_math": 0.21061197892413475, |
| "step": 151 |
| }, |
| { |
| "clip_ratio": 0.0011485503518997575, |
| "epoch": 0.24911970703850242, |
| "grad_norm": 0.0808255672454834, |
| "kl": 0.0054149627685546875, |
| "learning_rate": 2.0765027322404376e-06, |
| "loss": 0.0096, |
| "step": 152 |
| }, |
| { |
| "clip_ratio": 0.0011561684764274105, |
| "epoch": 0.2507586524795452, |
| "grad_norm": 0.07708927989006042, |
| "kl": 0.005404829978942871, |
| "learning_rate": 2.0901639344262297e-06, |
| "loss": 0.0095, |
| "step": 153 |
| }, |
| { |
| "clip_ratio": 0.0011439574755058857, |
| "epoch": 0.25239759792058797, |
| "grad_norm": 0.07077940553426743, |
| "kl": 0.005424022674560547, |
| "learning_rate": 2.103825136612022e-06, |
| "loss": 0.0093, |
| "step": 154 |
| }, |
| { |
| "clip_ratio": 0.0011864712664646504, |
| "epoch": 0.25403654336163073, |
| "grad_norm": 0.07691214233636856, |
| "kl": 0.005586385726928711, |
| "learning_rate": 2.1174863387978147e-06, |
| "loss": 0.0091, |
| "step": 155 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 600.4765803813934, |
| "epoch": 0.25567548880267355, |
| "grad_norm": 0.06585235148668289, |
| "kl": 0.005746960639953613, |
| "learning_rate": 2.1311475409836067e-06, |
| "loss": 0.0081, |
| "num_tokens": 106367483.0, |
| "reward": 0.2112630266638007, |
| "reward_std": 0.11601505969883874, |
| "rewards/pure_accuracy_reward_math": 0.21126302142511122, |
| "step": 156 |
| }, |
| { |
| "clip_ratio": 0.0008758294210338136, |
| "epoch": 0.2573144342437163, |
| "grad_norm": 0.07339663803577423, |
| "kl": 0.005802512168884277, |
| "learning_rate": 2.144808743169399e-06, |
| "loss": 0.0081, |
| "step": 157 |
| }, |
| { |
| "clip_ratio": 0.0008576641474746793, |
| "epoch": 0.25895337968475907, |
| "grad_norm": 0.06242053955793381, |
| "kl": 0.005854010581970215, |
| "learning_rate": 2.1584699453551913e-06, |
| "loss": 0.008, |
| "step": 158 |
| }, |
| { |
| "clip_ratio": 0.0008841920518989355, |
| "epoch": 0.2605923251258019, |
| "grad_norm": 0.06326813995838165, |
| "kl": 0.0059430599212646484, |
| "learning_rate": 2.1721311475409838e-06, |
| "loss": 0.0078, |
| "step": 159 |
| }, |
| { |
| "clip_ratio": 0.0009176517396554118, |
| "epoch": 0.26223127056684464, |
| "grad_norm": 0.06189825013279915, |
| "kl": 0.005987405776977539, |
| "learning_rate": 2.185792349726776e-06, |
| "loss": 0.0077, |
| "step": 160 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 588.5446150302887, |
| "epoch": 0.2638702160078874, |
| "grad_norm": 0.06866484135389328, |
| "kl": 0.006237506866455078, |
| "learning_rate": 2.1994535519125683e-06, |
| "loss": 0.0078, |
| "num_tokens": 109663204.0, |
| "reward": 0.21061198558891192, |
| "reward_std": 0.12186720367753878, |
| "rewards/pure_accuracy_reward_math": 0.21061197930248454, |
| "step": 161 |
| }, |
| { |
| "clip_ratio": 0.0009436907939743833, |
| "epoch": 0.2655091614489302, |
| "grad_norm": 0.07263052463531494, |
| "kl": 0.0061321258544921875, |
| "learning_rate": 2.213114754098361e-06, |
| "loss": 0.0078, |
| "step": 162 |
| }, |
| { |
| "clip_ratio": 0.0009464566720680523, |
| "epoch": 0.267148106889973, |
| "grad_norm": 0.06491200625896454, |
| "kl": 0.006033658981323242, |
| "learning_rate": 2.2267759562841533e-06, |
| "loss": 0.0076, |
| "step": 163 |
| }, |
| { |
| "clip_ratio": 0.000968025926908922, |
| "epoch": 0.26878705233101574, |
| "grad_norm": 0.06358778476715088, |
| "kl": 0.006163120269775391, |
| "learning_rate": 2.2404371584699454e-06, |
| "loss": 0.0075, |
| "step": 164 |
| }, |
| { |
| "clip_ratio": 0.0009389551167942045, |
| "epoch": 0.27042599777205856, |
| "grad_norm": 0.06583644449710846, |
| "kl": 0.006374359130859375, |
| "learning_rate": 2.254098360655738e-06, |
| "loss": 0.0073, |
| "step": 165 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.6982605457306, |
| "epoch": 0.2720649432131013, |
| "grad_norm": 0.07151180505752563, |
| "kl": 0.006270289421081543, |
| "learning_rate": 2.2677595628415304e-06, |
| "loss": 0.0071, |
| "num_tokens": 112983493.0, |
| "reward": 0.21158854870009236, |
| "reward_std": 0.1309350436204113, |
| "rewards/pure_accuracy_reward_math": 0.21158854171517305, |
| "step": 166 |
| }, |
| { |
| "clip_ratio": 0.0010227661889530282, |
| "epoch": 0.2737038886541441, |
| "grad_norm": 0.08615773171186447, |
| "kl": 0.0064040422439575195, |
| "learning_rate": 2.2814207650273224e-06, |
| "loss": 0.0071, |
| "step": 167 |
| }, |
| { |
| "clip_ratio": 0.0009205440467212611, |
| "epoch": 0.2753428340951869, |
| "grad_norm": 0.06637667864561081, |
| "kl": 0.00621640682220459, |
| "learning_rate": 2.295081967213115e-06, |
| "loss": 0.0069, |
| "step": 168 |
| }, |
| { |
| "clip_ratio": 0.001003933336846785, |
| "epoch": 0.27698177953622966, |
| "grad_norm": 0.076202891767025, |
| "kl": 0.0062408447265625, |
| "learning_rate": 2.3087431693989074e-06, |
| "loss": 0.0068, |
| "step": 169 |
| }, |
| { |
| "clip_ratio": 0.0009103266062311377, |
| "epoch": 0.2786207249772724, |
| "grad_norm": 0.06154695898294449, |
| "kl": 0.006373286247253418, |
| "learning_rate": 2.3224043715847e-06, |
| "loss": 0.0065, |
| "step": 170 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 584.0752146244049, |
| "epoch": 0.28025967041831523, |
| "grad_norm": 0.06730078905820847, |
| "kl": 0.006638884544372559, |
| "learning_rate": 2.336065573770492e-06, |
| "loss": 0.0089, |
| "num_tokens": 116258180.0, |
| "reward": 0.220703131693881, |
| "reward_std": 0.12132410902995616, |
| "rewards/pure_accuracy_reward_math": 0.2207031263387762, |
| "step": 171 |
| }, |
| { |
| "clip_ratio": 0.001059339236917367, |
| "epoch": 0.281898615859358, |
| "grad_norm": 0.08054529875516891, |
| "kl": 0.0067511796951293945, |
| "learning_rate": 2.3497267759562845e-06, |
| "loss": 0.0089, |
| "step": 172 |
| }, |
| { |
| "clip_ratio": 0.0010770070745707017, |
| "epoch": 0.28353756130040075, |
| "grad_norm": 0.06891456246376038, |
| "kl": 0.006635904312133789, |
| "learning_rate": 2.363387978142077e-06, |
| "loss": 0.0088, |
| "step": 173 |
| }, |
| { |
| "clip_ratio": 0.0009533684936400277, |
| "epoch": 0.28517650674144357, |
| "grad_norm": 0.06477612257003784, |
| "kl": 0.006537199020385742, |
| "learning_rate": 2.377049180327869e-06, |
| "loss": 0.0086, |
| "step": 174 |
| }, |
| { |
| "clip_ratio": 0.0008389282212419857, |
| "epoch": 0.28681545218248633, |
| "grad_norm": 0.06404498219490051, |
| "kl": 0.006713271141052246, |
| "learning_rate": 2.390710382513661e-06, |
| "loss": 0.0084, |
| "step": 175 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 586.0224783420563, |
| "epoch": 0.2884543976235291, |
| "grad_norm": 0.07143088430166245, |
| "kl": 0.006848454475402832, |
| "learning_rate": 2.4043715846994536e-06, |
| "loss": 0.0081, |
| "num_tokens": 119549581.0, |
| "reward": 0.23144531846628524, |
| "reward_std": 0.11726316896965727, |
| "rewards/pure_accuracy_reward_math": 0.23144531299476512, |
| "step": 176 |
| }, |
| { |
| "clip_ratio": 0.0008353526282007806, |
| "epoch": 0.2900933430645719, |
| "grad_norm": 0.07284073531627655, |
| "kl": 0.006837129592895508, |
| "learning_rate": 2.418032786885246e-06, |
| "loss": 0.0081, |
| "step": 177 |
| }, |
| { |
| "clip_ratio": 0.0008791502111762384, |
| "epoch": 0.29173228850561467, |
| "grad_norm": 0.06452663242816925, |
| "kl": 0.006670117378234863, |
| "learning_rate": 2.431693989071038e-06, |
| "loss": 0.008, |
| "step": 178 |
| }, |
| { |
| "clip_ratio": 0.0009922128726884694, |
| "epoch": 0.29337123394665743, |
| "grad_norm": 0.07056602835655212, |
| "kl": 0.006812095642089844, |
| "learning_rate": 2.4453551912568307e-06, |
| "loss": 0.0078, |
| "step": 179 |
| }, |
| { |
| "clip_ratio": 0.0009285092224899927, |
| "epoch": 0.29501017938770024, |
| "grad_norm": 0.06236054748296738, |
| "kl": 0.0068634748458862305, |
| "learning_rate": 2.459016393442623e-06, |
| "loss": 0.0075, |
| "step": 180 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 587.9111535549164, |
| "epoch": 0.296649124828743, |
| "grad_norm": 0.07245080173015594, |
| "kl": 0.007324337959289551, |
| "learning_rate": 2.4726775956284156e-06, |
| "loss": 0.0063, |
| "num_tokens": 122841384.0, |
| "reward": 0.22916667381650768, |
| "reward_std": 0.12823739141458645, |
| "rewards/pure_accuracy_reward_math": 0.22916666624951176, |
| "step": 181 |
| }, |
| { |
| "clip_ratio": 0.0010925326015467363, |
| "epoch": 0.29828807026978577, |
| "grad_norm": 0.08096741139888763, |
| "kl": 0.007236003875732422, |
| "learning_rate": 2.4863387978142077e-06, |
| "loss": 0.0062, |
| "step": 182 |
| }, |
| { |
| "clip_ratio": 0.0010355811738236298, |
| "epoch": 0.2999270157108286, |
| "grad_norm": 0.06912072002887726, |
| "kl": 0.007112741470336914, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0061, |
| "step": 183 |
| }, |
| { |
| "clip_ratio": 0.0009683458151812374, |
| "epoch": 0.30156596115187134, |
| "grad_norm": 0.07461241632699966, |
| "kl": 0.007212400436401367, |
| "learning_rate": 2.5136612021857927e-06, |
| "loss": 0.0058, |
| "step": 184 |
| }, |
| { |
| "clip_ratio": 0.0009423685739875509, |
| "epoch": 0.3032049065929141, |
| "grad_norm": 0.0647897720336914, |
| "kl": 0.007313847541809082, |
| "learning_rate": 2.5273224043715848e-06, |
| "loss": 0.0055, |
| "step": 185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 583.5849809646606, |
| "epoch": 0.3048438520339569, |
| "grad_norm": 0.06853717565536499, |
| "kl": 0.007729768753051758, |
| "learning_rate": 2.5409836065573773e-06, |
| "loss": 0.0077, |
| "num_tokens": 126127813.0, |
| "reward": 0.20898438137373887, |
| "reward_std": 0.11270587670151144, |
| "rewards/pure_accuracy_reward_math": 0.2089843761350494, |
| "step": 186 |
| }, |
| { |
| "clip_ratio": 0.0010363689809764765, |
| "epoch": 0.3064827974749997, |
| "grad_norm": 0.07357639819383621, |
| "kl": 0.007730722427368164, |
| "learning_rate": 2.5546448087431697e-06, |
| "loss": 0.0076, |
| "step": 187 |
| }, |
| { |
| "clip_ratio": 0.0010397096725682786, |
| "epoch": 0.30812174291604244, |
| "grad_norm": 0.06807340681552887, |
| "kl": 0.007578372955322266, |
| "learning_rate": 2.5683060109289622e-06, |
| "loss": 0.0075, |
| "step": 188 |
| }, |
| { |
| "clip_ratio": 0.0007689736390830149, |
| "epoch": 0.30976068835708526, |
| "grad_norm": 0.06024845689535141, |
| "kl": 0.007673501968383789, |
| "learning_rate": 2.5819672131147543e-06, |
| "loss": 0.0072, |
| "step": 189 |
| }, |
| { |
| "clip_ratio": 0.0007949806515625824, |
| "epoch": 0.311399633798128, |
| "grad_norm": 0.06614933907985687, |
| "kl": 0.007935523986816406, |
| "learning_rate": 2.595628415300547e-06, |
| "loss": 0.007, |
| "step": 190 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 582.4375193119049, |
| "epoch": 0.3130385792391708, |
| "grad_norm": 0.07819633185863495, |
| "kl": 0.00816202163696289, |
| "learning_rate": 2.6092896174863393e-06, |
| "loss": 0.0046, |
| "num_tokens": 129404757.0, |
| "reward": 0.23046875756699592, |
| "reward_std": 0.12788849917706102, |
| "rewards/pure_accuracy_reward_math": 0.2304687495343387, |
| "step": 191 |
| }, |
| { |
| "clip_ratio": 0.0010027453071188575, |
| "epoch": 0.3146775246802136, |
| "grad_norm": 0.07076407223939896, |
| "kl": 0.007757902145385742, |
| "learning_rate": 2.6229508196721314e-06, |
| "loss": 0.0045, |
| "step": 192 |
| }, |
| { |
| "clip_ratio": 0.0011502429521215163, |
| "epoch": 0.31631647012125635, |
| "grad_norm": 0.06905192136764526, |
| "kl": 0.007544517517089844, |
| "learning_rate": 2.636612021857924e-06, |
| "loss": 0.0044, |
| "step": 193 |
| }, |
| { |
| "clip_ratio": 0.001169734060454175, |
| "epoch": 0.3179554155622991, |
| "grad_norm": 0.07402996718883514, |
| "kl": 0.007522106170654297, |
| "learning_rate": 2.6502732240437163e-06, |
| "loss": 0.0042, |
| "step": 194 |
| }, |
| { |
| "clip_ratio": 0.001001289329451538, |
| "epoch": 0.31959436100334193, |
| "grad_norm": 0.0615554116666317, |
| "kl": 0.007868766784667969, |
| "learning_rate": 2.6639344262295084e-06, |
| "loss": 0.0039, |
| "step": 195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 598.659848690033, |
| "epoch": 0.3212333064443847, |
| "grad_norm": 0.07155327498912811, |
| "kl": 0.0077495574951171875, |
| "learning_rate": 2.677595628415301e-06, |
| "loss": 0.009, |
| "num_tokens": 132735652.0, |
| "reward": 0.21126302669290453, |
| "reward_std": 0.12602885958040133, |
| "rewards/pure_accuracy_reward_math": 0.21126302133779973, |
| "step": 196 |
| }, |
| { |
| "clip_ratio": 0.0009628182596088664, |
| "epoch": 0.32287225188542745, |
| "grad_norm": 0.07324164360761642, |
| "kl": 0.00766444206237793, |
| "learning_rate": 2.6912568306010934e-06, |
| "loss": 0.0089, |
| "step": 197 |
| }, |
| { |
| "clip_ratio": 0.001045915161398625, |
| "epoch": 0.32451119732647027, |
| "grad_norm": 0.07669375091791153, |
| "kl": 0.0074596405029296875, |
| "learning_rate": 2.704918032786886e-06, |
| "loss": 0.0087, |
| "step": 198 |
| }, |
| { |
| "clip_ratio": 0.0009246684501249547, |
| "epoch": 0.32615014276751303, |
| "grad_norm": 0.0650852844119072, |
| "kl": 0.0074880123138427734, |
| "learning_rate": 2.718579234972678e-06, |
| "loss": 0.0085, |
| "step": 199 |
| }, |
| { |
| "clip_ratio": 0.0009262548958304251, |
| "epoch": 0.3277890882085558, |
| "grad_norm": 0.0722322165966034, |
| "kl": 0.007855653762817383, |
| "learning_rate": 2.7322404371584705e-06, |
| "loss": 0.0082, |
| "step": 200 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 591.9775581359863, |
| "epoch": 0.3294280336495986, |
| "grad_norm": 0.07280802726745605, |
| "kl": 0.007916688919067383, |
| "learning_rate": 2.745901639344263e-06, |
| "loss": 0.0084, |
| "num_tokens": 136043335.0, |
| "reward": 0.2236328196595423, |
| "reward_std": 0.12912937795044854, |
| "rewards/pure_accuracy_reward_math": 0.22363281302386895, |
| "step": 201 |
| }, |
| { |
| "clip_ratio": 0.0010444082931826415, |
| "epoch": 0.33106697909064137, |
| "grad_norm": 0.0775647759437561, |
| "kl": 0.007770538330078125, |
| "learning_rate": 2.7595628415300546e-06, |
| "loss": 0.0083, |
| "step": 202 |
| }, |
| { |
| "clip_ratio": 0.0010056693769797675, |
| "epoch": 0.3327059245316841, |
| "grad_norm": 0.06984438002109528, |
| "kl": 0.0076978206634521484, |
| "learning_rate": 2.773224043715847e-06, |
| "loss": 0.0081, |
| "step": 203 |
| }, |
| { |
| "clip_ratio": 0.0010063842889849184, |
| "epoch": 0.33434486997272694, |
| "grad_norm": 0.07507704943418503, |
| "kl": 0.007877111434936523, |
| "learning_rate": 2.786885245901639e-06, |
| "loss": 0.0079, |
| "step": 204 |
| }, |
| { |
| "clip_ratio": 0.0010283672744435535, |
| "epoch": 0.3359838154137697, |
| "grad_norm": 0.07364527881145477, |
| "kl": 0.00825810432434082, |
| "learning_rate": 2.8005464480874316e-06, |
| "loss": 0.0076, |
| "step": 205 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 588.1670074462891, |
| "epoch": 0.33762276085481246, |
| "grad_norm": 0.06861822307109833, |
| "kl": 0.00839853286743164, |
| "learning_rate": 2.814207650273224e-06, |
| "loss": 0.0057, |
| "num_tokens": 139337308.0, |
| "reward": 0.2106119850941468, |
| "reward_std": 0.12027020112145692, |
| "rewards/pure_accuracy_reward_math": 0.21061198008828796, |
| "step": 206 |
| }, |
| { |
| "clip_ratio": 0.0010541207553558252, |
| "epoch": 0.3392617062958553, |
| "grad_norm": 0.08106576651334763, |
| "kl": 0.008537769317626953, |
| "learning_rate": 2.8278688524590166e-06, |
| "loss": 0.0057, |
| "step": 207 |
| }, |
| { |
| "clip_ratio": 0.0009489937833109252, |
| "epoch": 0.34090065173689804, |
| "grad_norm": 0.0691104531288147, |
| "kl": 0.008366107940673828, |
| "learning_rate": 2.8415300546448087e-06, |
| "loss": 0.0054, |
| "step": 208 |
| }, |
| { |
| "clip_ratio": 0.0009892520201901789, |
| "epoch": 0.3425395971779408, |
| "grad_norm": 0.06807916611433029, |
| "kl": 0.008470535278320312, |
| "learning_rate": 2.855191256830601e-06, |
| "loss": 0.0052, |
| "step": 209 |
| }, |
| { |
| "clip_ratio": 0.00096842655295859, |
| "epoch": 0.3441785426189836, |
| "grad_norm": 0.0654783844947815, |
| "kl": 0.008765220642089844, |
| "learning_rate": 2.8688524590163937e-06, |
| "loss": 0.0049, |
| "step": 210 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 590.6634306907654, |
| "epoch": 0.3458174880600264, |
| "grad_norm": 0.0704723373055458, |
| "kl": 0.008867502212524414, |
| "learning_rate": 2.8825136612021857e-06, |
| "loss": 0.0091, |
| "num_tokens": 142633758.0, |
| "reward": 0.21549479861278087, |
| "reward_std": 0.13379461748991162, |
| "rewards/pure_accuracy_reward_math": 0.21549479197710752, |
| "step": 211 |
| }, |
| { |
| "clip_ratio": 0.0011996210827192044, |
| "epoch": 0.34745643350106914, |
| "grad_norm": 0.08370186388492584, |
| "kl": 0.008816242218017578, |
| "learning_rate": 2.8961748633879782e-06, |
| "loss": 0.009, |
| "step": 212 |
| }, |
| { |
| "clip_ratio": 0.001070254641945212, |
| "epoch": 0.34909537894211196, |
| "grad_norm": 0.06448537111282349, |
| "kl": 0.008533716201782227, |
| "learning_rate": 2.9098360655737707e-06, |
| "loss": 0.0088, |
| "step": 213 |
| }, |
| { |
| "clip_ratio": 0.0011582542088603986, |
| "epoch": 0.3507343243831547, |
| "grad_norm": 0.07735106348991394, |
| "kl": 0.008788824081420898, |
| "learning_rate": 2.923497267759563e-06, |
| "loss": 0.0085, |
| "step": 214 |
| }, |
| { |
| "clip_ratio": 0.0010283683568559354, |
| "epoch": 0.3523732698241975, |
| "grad_norm": 0.06124194711446762, |
| "kl": 0.008962869644165039, |
| "learning_rate": 2.9371584699453553e-06, |
| "loss": 0.0082, |
| "step": 215 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 563.4902558326721, |
| "epoch": 0.3540122152652403, |
| "grad_norm": 0.07734435796737671, |
| "kl": 0.009832620620727539, |
| "learning_rate": 2.9508196721311478e-06, |
| "loss": 0.0061, |
| "num_tokens": 145848300.0, |
| "reward": 0.24381511058891192, |
| "reward_std": 0.13013654301175848, |
| "rewards/pure_accuracy_reward_math": 0.2438151046517305, |
| "step": 216 |
| }, |
| { |
| "clip_ratio": 0.0012246508512134824, |
| "epoch": 0.35565116070628305, |
| "grad_norm": 0.08686057478189468, |
| "kl": 0.009522438049316406, |
| "learning_rate": 2.9644808743169403e-06, |
| "loss": 0.0061, |
| "step": 217 |
| }, |
| { |
| "clip_ratio": 0.0011569151299681835, |
| "epoch": 0.3572901061473258, |
| "grad_norm": 0.07663314044475555, |
| "kl": 0.009255170822143555, |
| "learning_rate": 2.9781420765027323e-06, |
| "loss": 0.0058, |
| "step": 218 |
| }, |
| { |
| "clip_ratio": 0.0010811529527927632, |
| "epoch": 0.35892905158836863, |
| "grad_norm": 0.07616522163152695, |
| "kl": 0.009699821472167969, |
| "learning_rate": 2.991803278688525e-06, |
| "loss": 0.0055, |
| "step": 219 |
| }, |
| { |
| "clip_ratio": 0.0009544987469780608, |
| "epoch": 0.3605679970294114, |
| "grad_norm": 0.07570254802703857, |
| "kl": 0.010393381118774414, |
| "learning_rate": 3.0054644808743173e-06, |
| "loss": 0.0052, |
| "step": 220 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 576.4231963157654, |
| "epoch": 0.36220694247045415, |
| "grad_norm": 0.0710562989115715, |
| "kl": 0.009819984436035156, |
| "learning_rate": 3.0191256830601094e-06, |
| "loss": 0.008, |
| "num_tokens": 149101036.0, |
| "reward": 0.2226562555297278, |
| "reward_std": 0.12332397617865354, |
| "rewards/pure_accuracy_reward_math": 0.22265625168802217, |
| "step": 221 |
| }, |
| { |
| "clip_ratio": 0.0012251571324668475, |
| "epoch": 0.36384588791149697, |
| "grad_norm": 0.08233921229839325, |
| "kl": 0.0095062255859375, |
| "learning_rate": 3.032786885245902e-06, |
| "loss": 0.0079, |
| "step": 222 |
| }, |
| { |
| "clip_ratio": 0.001105058086977806, |
| "epoch": 0.36548483335253973, |
| "grad_norm": 0.07291049510240555, |
| "kl": 0.009292364120483398, |
| "learning_rate": 3.0464480874316944e-06, |
| "loss": 0.0076, |
| "step": 223 |
| }, |
| { |
| "clip_ratio": 0.0009599913582860609, |
| "epoch": 0.3671237787935825, |
| "grad_norm": 0.07015552371740341, |
| "kl": 0.009765148162841797, |
| "learning_rate": 3.0601092896174864e-06, |
| "loss": 0.0073, |
| "step": 224 |
| }, |
| { |
| "clip_ratio": 0.0009534798105050868, |
| "epoch": 0.3687627242346253, |
| "grad_norm": 0.07405047863721848, |
| "kl": 0.010376691818237305, |
| "learning_rate": 3.073770491803279e-06, |
| "loss": 0.007, |
| "step": 225 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 578.5188989639282, |
| "epoch": 0.37040166967566807, |
| "grad_norm": 0.0744408518075943, |
| "kl": 0.010227680206298828, |
| "learning_rate": 3.0874316939890714e-06, |
| "loss": 0.0094, |
| "num_tokens": 152364698.0, |
| "reward": 0.23632813163567334, |
| "reward_std": 0.126384983304888, |
| "rewards/pure_accuracy_reward_math": 0.236328125, |
| "step": 226 |
| }, |
| { |
| "clip_ratio": 0.0011350565871453, |
| "epoch": 0.3720406151167108, |
| "grad_norm": 0.09323269873857498, |
| "kl": 0.009792804718017578, |
| "learning_rate": 3.101092896174864e-06, |
| "loss": 0.0094, |
| "step": 227 |
| }, |
| { |
| "clip_ratio": 0.0009327100992777559, |
| "epoch": 0.37367956055775364, |
| "grad_norm": 0.07071880251169205, |
| "kl": 0.009824752807617188, |
| "learning_rate": 3.114754098360656e-06, |
| "loss": 0.0091, |
| "step": 228 |
| }, |
| { |
| "clip_ratio": 0.0010184358247897762, |
| "epoch": 0.3753185059987964, |
| "grad_norm": 0.07402479648590088, |
| "kl": 0.010513544082641602, |
| "learning_rate": 3.1284153005464485e-06, |
| "loss": 0.0088, |
| "step": 229 |
| }, |
| { |
| "clip_ratio": 0.0010424532179058588, |
| "epoch": 0.37695745143983916, |
| "grad_norm": 0.07837292551994324, |
| "kl": 0.010687112808227539, |
| "learning_rate": 3.142076502732241e-06, |
| "loss": 0.0085, |
| "step": 230 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 583.3717656135559, |
| "epoch": 0.378596396880882, |
| "grad_norm": 0.07237120717763901, |
| "kl": 0.010539531707763672, |
| "learning_rate": 3.155737704918033e-06, |
| "loss": 0.0093, |
| "num_tokens": 155643680.0, |
| "reward": 0.22591146369813941, |
| "reward_std": 0.13715054193744436, |
| "rewards/pure_accuracy_reward_math": 0.22591145869228058, |
| "step": 231 |
| }, |
| { |
| "clip_ratio": 0.001438524371224048, |
| "epoch": 0.38023534232192474, |
| "grad_norm": 0.45248183608055115, |
| "kl": 0.011214733123779297, |
| "learning_rate": 3.1693989071038255e-06, |
| "loss": 0.0093, |
| "step": 232 |
| }, |
| { |
| "clip_ratio": 0.001912088545395818, |
| "epoch": 0.3818742877629675, |
| "grad_norm": 0.11236479133367538, |
| "kl": 0.009836912155151367, |
| "learning_rate": 3.183060109289618e-06, |
| "loss": 0.0094, |
| "step": 233 |
| }, |
| { |
| "clip_ratio": 0.0011414756233989465, |
| "epoch": 0.3835132332040103, |
| "grad_norm": 0.07030442357063293, |
| "kl": 0.010227203369140625, |
| "learning_rate": 3.1967213114754105e-06, |
| "loss": 0.009, |
| "step": 234 |
| }, |
| { |
| "clip_ratio": 0.0015166988691817096, |
| "epoch": 0.3851521786450531, |
| "grad_norm": 0.10437261313199997, |
| "kl": 0.011615991592407227, |
| "learning_rate": 3.2103825136612026e-06, |
| "loss": 0.0088, |
| "step": 235 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 571.2672717571259, |
| "epoch": 0.38679112408609584, |
| "grad_norm": 0.0721583217382431, |
| "kl": 0.011221885681152344, |
| "learning_rate": 3.224043715846995e-06, |
| "loss": 0.0093, |
| "num_tokens": 158884749.0, |
| "reward": 0.21223958939663135, |
| "reward_std": 0.12057235097745433, |
| "rewards/pure_accuracy_reward_math": 0.2122395838086959, |
| "step": 236 |
| }, |
| { |
| "clip_ratio": 0.001226626716629653, |
| "epoch": 0.38843006952713865, |
| "grad_norm": 0.08837593346834183, |
| "kl": 0.010795831680297852, |
| "learning_rate": 3.2377049180327876e-06, |
| "loss": 0.0092, |
| "step": 237 |
| }, |
| { |
| "clip_ratio": 0.0012072520969468314, |
| "epoch": 0.3900690149681814, |
| "grad_norm": 0.08174102008342743, |
| "kl": 0.010251283645629883, |
| "learning_rate": 3.2513661202185792e-06, |
| "loss": 0.0089, |
| "step": 238 |
| }, |
| { |
| "clip_ratio": 0.0008923051470901555, |
| "epoch": 0.3917079604092242, |
| "grad_norm": 0.06714540719985962, |
| "kl": 0.010812044143676758, |
| "learning_rate": 3.2650273224043717e-06, |
| "loss": 0.0086, |
| "step": 239 |
| }, |
| { |
| "clip_ratio": 0.0008945376886231315, |
| "epoch": 0.393346905850267, |
| "grad_norm": 0.07600870728492737, |
| "kl": 0.011825799942016602, |
| "learning_rate": 3.2786885245901638e-06, |
| "loss": 0.0082, |
| "step": 240 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 579.9231960773468, |
| "epoch": 0.39498585129130975, |
| "grad_norm": 0.0788060799241066, |
| "kl": 0.011470317840576172, |
| "learning_rate": 3.2923497267759563e-06, |
| "loss": 0.008, |
| "num_tokens": 162151041.0, |
| "reward": 0.2262369857635349, |
| "reward_std": 0.1436142157181166, |
| "rewards/pure_accuracy_reward_math": 0.22623697842936963, |
| "step": 241 |
| }, |
| { |
| "clip_ratio": 0.001235522729416516, |
| "epoch": 0.3966247967323525, |
| "grad_norm": 0.08819200098514557, |
| "kl": 0.011005401611328125, |
| "learning_rate": 3.3060109289617488e-06, |
| "loss": 0.0079, |
| "step": 242 |
| }, |
| { |
| "clip_ratio": 0.0011237937412715837, |
| "epoch": 0.39826374217339533, |
| "grad_norm": 0.07336119562387466, |
| "kl": 0.010800600051879883, |
| "learning_rate": 3.3196721311475413e-06, |
| "loss": 0.0075, |
| "step": 243 |
| }, |
| { |
| "clip_ratio": 0.0010676182721454097, |
| "epoch": 0.3999026876144381, |
| "grad_norm": 0.07694102078676224, |
| "kl": 0.011488199234008789, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0072, |
| "step": 244 |
| }, |
| { |
| "clip_ratio": 0.0011172947895374818, |
| "epoch": 0.40154163305548085, |
| "grad_norm": 0.08463244885206223, |
| "kl": 0.012181282043457031, |
| "learning_rate": 3.346994535519126e-06, |
| "loss": 0.0068, |
| "step": 245 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 582.9391458034515, |
| "epoch": 0.40318057849652367, |
| "grad_norm": 0.0712461844086647, |
| "kl": 0.01132655143737793, |
| "learning_rate": 3.3606557377049183e-06, |
| "loss": 0.0082, |
| "num_tokens": 165429094.0, |
| "reward": 0.24869792442768812, |
| "reward_std": 0.12578068423317745, |
| "rewards/pure_accuracy_reward_math": 0.24869791674427688, |
| "step": 246 |
| }, |
| { |
| "clip_ratio": 0.0011233826196530572, |
| "epoch": 0.40481952393756643, |
| "grad_norm": 0.07659593969583511, |
| "kl": 0.01063847541809082, |
| "learning_rate": 3.3743169398907104e-06, |
| "loss": 0.0081, |
| "step": 247 |
| }, |
| { |
| "clip_ratio": 0.0012855593090534967, |
| "epoch": 0.4064584693786092, |
| "grad_norm": 0.07479391992092133, |
| "kl": 0.010470390319824219, |
| "learning_rate": 3.387978142076503e-06, |
| "loss": 0.0078, |
| "step": 248 |
| }, |
| { |
| "clip_ratio": 0.0009941341145349725, |
| "epoch": 0.408097414819652, |
| "grad_norm": 0.06663769483566284, |
| "kl": 0.011182785034179688, |
| "learning_rate": 3.4016393442622954e-06, |
| "loss": 0.0075, |
| "step": 249 |
| }, |
| { |
| "clip_ratio": 0.0009338884319731733, |
| "epoch": 0.40973636026069477, |
| "grad_norm": 0.07455974817276001, |
| "kl": 0.011932849884033203, |
| "learning_rate": 3.4153005464480874e-06, |
| "loss": 0.0071, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 579.4368662834167, |
| "epoch": 0.001638945441042779, |
| "grad_norm": 0.06936674565076828, |
| "kl": 0.011360645294189453, |
| "learning_rate": 3.42896174863388e-06, |
| "loss": 0.0077, |
| "num_tokens": 3266558.0, |
| "reward": 0.23990886102546938, |
| "reward_std": 0.1189681178657338, |
| "rewards/pure_accuracy_reward_math": 0.239908854739042, |
| "step": 251 |
| }, |
| { |
| "clip_ratio": 0.0010949637241992605, |
| "epoch": 0.003277890882085558, |
| "grad_norm": 0.0754990503191948, |
| "kl": 0.010671854019165039, |
| "learning_rate": 3.4426229508196724e-06, |
| "loss": 0.0076, |
| "step": 252 |
| }, |
| { |
| "clip_ratio": 0.0011387738637722578, |
| "epoch": 0.004916836323128337, |
| "grad_norm": 0.07142341136932373, |
| "kl": 0.010357856750488281, |
| "learning_rate": 3.456284153005465e-06, |
| "loss": 0.0074, |
| "step": 253 |
| }, |
| { |
| "clip_ratio": 0.0008552854768026918, |
| "epoch": 0.006555781764171116, |
| "grad_norm": 0.0586932897567749, |
| "kl": 0.010814428329467773, |
| "learning_rate": 3.469945355191257e-06, |
| "loss": 0.007, |
| "step": 254 |
| }, |
| { |
| "clip_ratio": 0.0008318971481457993, |
| "epoch": 0.008194727205213895, |
| "grad_norm": 0.07276652008295059, |
| "kl": 0.011636495590209961, |
| "learning_rate": 3.4836065573770495e-06, |
| "loss": 0.0067, |
| "step": 255 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 589.9453327655792, |
| "epoch": 0.009833672646256675, |
| "grad_norm": 0.07384130358695984, |
| "kl": 0.011546134948730469, |
| "learning_rate": 3.497267759562842e-06, |
| "loss": 0.0092, |
| "num_tokens": 6563270.0, |
| "reward": 0.24088542279787362, |
| "reward_std": 0.13925835717236623, |
| "rewards/pure_accuracy_reward_math": 0.2408854168606922, |
| "step": 256 |
| }, |
| { |
| "clip_ratio": 0.000994754147995991, |
| "epoch": 0.011472618087299453, |
| "grad_norm": 0.07237172871828079, |
| "kl": 0.011071443557739258, |
| "learning_rate": 3.510928961748634e-06, |
| "loss": 0.0091, |
| "step": 257 |
| }, |
| { |
| "clip_ratio": 0.0009974641966437048, |
| "epoch": 0.013111563528342233, |
| "grad_norm": 0.0677863284945488, |
| "kl": 0.010922431945800781, |
| "learning_rate": 3.5245901639344265e-06, |
| "loss": 0.0088, |
| "step": 258 |
| }, |
| { |
| "clip_ratio": 0.0009937005115716602, |
| "epoch": 0.01475050896938501, |
| "grad_norm": 0.06459185481071472, |
| "kl": 0.01144552230834961, |
| "learning_rate": 3.538251366120219e-06, |
| "loss": 0.0084, |
| "step": 259 |
| }, |
| { |
| "clip_ratio": 0.0010135341441355195, |
| "epoch": 0.01638945441042779, |
| "grad_norm": 0.0639120563864708, |
| "kl": 0.01173710823059082, |
| "learning_rate": 3.551912568306011e-06, |
| "loss": 0.008, |
| "step": 260 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 589.9293835163116, |
| "epoch": 0.018028399851470568, |
| "grad_norm": 0.0708698108792305, |
| "kl": 0.011642932891845703, |
| "learning_rate": 3.5655737704918036e-06, |
| "loss": 0.0082, |
| "num_tokens": 9864381.0, |
| "reward": 0.2343750073632691, |
| "reward_std": 0.1295394750777632, |
| "rewards/pure_accuracy_reward_math": 0.23437499956344254, |
| "step": 261 |
| }, |
| { |
| "clip_ratio": 0.0011215963202744206, |
| "epoch": 0.01966734529251335, |
| "grad_norm": 0.06814540177583694, |
| "kl": 0.011007308959960938, |
| "learning_rate": 3.579234972677596e-06, |
| "loss": 0.0081, |
| "step": 262 |
| }, |
| { |
| "clip_ratio": 0.0012566405258667146, |
| "epoch": 0.021306290733556128, |
| "grad_norm": 0.07573528587818146, |
| "kl": 0.010967016220092773, |
| "learning_rate": 3.5928961748633886e-06, |
| "loss": 0.0079, |
| "step": 263 |
| }, |
| { |
| "clip_ratio": 0.0009570858208007849, |
| "epoch": 0.022945236174598906, |
| "grad_norm": 0.05915817990899086, |
| "kl": 0.011373281478881836, |
| "learning_rate": 3.6065573770491806e-06, |
| "loss": 0.0075, |
| "step": 264 |
| }, |
| { |
| "clip_ratio": 0.000911612167669773, |
| "epoch": 0.024584181615641687, |
| "grad_norm": 0.0663297101855278, |
| "kl": 0.012076139450073242, |
| "learning_rate": 3.620218579234973e-06, |
| "loss": 0.0071, |
| "step": 265 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 591.5680522918701, |
| "epoch": 0.026223127056684465, |
| "grad_norm": 0.11742489039897919, |
| "kl": 0.013937711715698242, |
| "learning_rate": 3.6338797814207656e-06, |
| "loss": 0.0079, |
| "num_tokens": 13167262.0, |
| "reward": 0.20540365105262026, |
| "reward_std": 0.12122339283814654, |
| "rewards/pure_accuracy_reward_math": 0.20540364709449932, |
| "step": 266 |
| }, |
| { |
| "clip_ratio": 0.0010878853252052068, |
| "epoch": 0.027862072497727243, |
| "grad_norm": 0.9664380550384521, |
| "kl": 0.011089324951171875, |
| "learning_rate": 3.6475409836065577e-06, |
| "loss": 0.0088, |
| "step": 267 |
| }, |
| { |
| "clip_ratio": 0.0013143416176717437, |
| "epoch": 0.02950101793877002, |
| "grad_norm": 0.17526276409626007, |
| "kl": 0.01159524917602539, |
| "learning_rate": 3.66120218579235e-06, |
| "loss": 0.0077, |
| "step": 268 |
| }, |
| { |
| "clip_ratio": 0.0010903547959060234, |
| "epoch": 0.031139963379812802, |
| "grad_norm": 2.172806739807129, |
| "kl": 0.04994964599609375, |
| "learning_rate": 3.6748633879781427e-06, |
| "loss": 0.0089, |
| "step": 269 |
| }, |
| { |
| "clip_ratio": 0.0011699927540576027, |
| "epoch": 0.03277890882085558, |
| "grad_norm": 3.1674540042877197, |
| "kl": 0.10472512245178223, |
| "learning_rate": 3.6885245901639347e-06, |
| "loss": 0.011, |
| "step": 270 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 602.3287951946259, |
| "epoch": 0.03441785426189836, |
| "grad_norm": 0.06581036746501923, |
| "kl": 0.011269092559814453, |
| "learning_rate": 3.7021857923497272e-06, |
| "loss": 0.009, |
| "num_tokens": 16496412.0, |
| "reward": 0.2086588600068353, |
| "reward_std": 0.12463329132879153, |
| "rewards/pure_accuracy_reward_math": 0.20865885500097647, |
| "step": 271 |
| }, |
| { |
| "clip_ratio": 0.0010178338038713264, |
| "epoch": 0.036056799702941136, |
| "grad_norm": 0.07616181671619415, |
| "kl": 0.0111236572265625, |
| "learning_rate": 3.7158469945355197e-06, |
| "loss": 0.009, |
| "step": 272 |
| }, |
| { |
| "clip_ratio": 0.0011148875312301243, |
| "epoch": 0.03769574514398392, |
| "grad_norm": 0.07324493676424026, |
| "kl": 0.010937929153442383, |
| "learning_rate": 3.729508196721312e-06, |
| "loss": 0.0088, |
| "step": 273 |
| }, |
| { |
| "clip_ratio": 0.0009064768914868182, |
| "epoch": 0.0393346905850267, |
| "grad_norm": 0.0614241324365139, |
| "kl": 0.011002779006958008, |
| "learning_rate": 3.7431693989071043e-06, |
| "loss": 0.0085, |
| "step": 274 |
| }, |
| { |
| "clip_ratio": 0.0008406615522176253, |
| "epoch": 0.040973636026069474, |
| "grad_norm": 0.0580308772623539, |
| "kl": 0.011270523071289062, |
| "learning_rate": 3.7568306010928963e-06, |
| "loss": 0.0081, |
| "step": 275 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 576.4791839122772, |
| "epoch": 0.042612581467112255, |
| "grad_norm": 0.07072403281927109, |
| "kl": 0.012294530868530273, |
| "learning_rate": 3.7704918032786884e-06, |
| "loss": 0.0069, |
| "num_tokens": 19746996.0, |
| "reward": 0.23209636090905406, |
| "reward_std": 0.13299611589172855, |
| "rewards/pure_accuracy_reward_math": 0.232096354739042, |
| "step": 276 |
| }, |
| { |
| "clip_ratio": 0.0008154532818025473, |
| "epoch": 0.04425152690815504, |
| "grad_norm": 0.06746868789196014, |
| "kl": 0.012226104736328125, |
| "learning_rate": 3.784153005464481e-06, |
| "loss": 0.0068, |
| "step": 277 |
| }, |
| { |
| "clip_ratio": 0.0009088635895295738, |
| "epoch": 0.04589047234919781, |
| "grad_norm": 0.062604621052742, |
| "kl": 0.012192249298095703, |
| "learning_rate": 3.7978142076502734e-06, |
| "loss": 0.0065, |
| "step": 278 |
| }, |
| { |
| "clip_ratio": 0.0009255672589461028, |
| "epoch": 0.04752941779024059, |
| "grad_norm": 0.06473197042942047, |
| "kl": 0.012347936630249023, |
| "learning_rate": 3.811475409836066e-06, |
| "loss": 0.0062, |
| "step": 279 |
| }, |
| { |
| "clip_ratio": 0.000926908637438828, |
| "epoch": 0.049168363231283374, |
| "grad_norm": 0.0617368146777153, |
| "kl": 0.012591838836669922, |
| "learning_rate": 3.825136612021858e-06, |
| "loss": 0.0058, |
| "step": 280 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 590.2998232841492, |
| "epoch": 0.05080730867232615, |
| "grad_norm": 0.0707988291978836, |
| "kl": 0.012273788452148438, |
| "learning_rate": 3.8387978142076504e-06, |
| "loss": 0.0051, |
| "num_tokens": 23046033.0, |
| "reward": 0.2106119856762234, |
| "reward_std": 0.12753237638389692, |
| "rewards/pure_accuracy_reward_math": 0.21061197997187264, |
| "step": 281 |
| }, |
| { |
| "clip_ratio": 0.0007957301904752967, |
| "epoch": 0.05244625411336893, |
| "grad_norm": 0.07150708138942719, |
| "kl": 0.012214422225952148, |
| "learning_rate": 3.852459016393443e-06, |
| "loss": 0.005, |
| "step": 282 |
| }, |
| { |
| "clip_ratio": 0.0008087110562655653, |
| "epoch": 0.05408519955441171, |
| "grad_norm": 0.06467320770025253, |
| "kl": 0.012126684188842773, |
| "learning_rate": 3.8661202185792354e-06, |
| "loss": 0.0047, |
| "step": 283 |
| }, |
| { |
| "clip_ratio": 0.0008826969724395894, |
| "epoch": 0.055724144995454486, |
| "grad_norm": 0.06448128819465637, |
| "kl": 0.012229204177856445, |
| "learning_rate": 3.879781420765028e-06, |
| "loss": 0.0043, |
| "step": 284 |
| }, |
| { |
| "clip_ratio": 0.000837775871445956, |
| "epoch": 0.05736309043649727, |
| "grad_norm": 0.05940267816185951, |
| "kl": 0.012416601181030273, |
| "learning_rate": 3.8934426229508196e-06, |
| "loss": 0.0039, |
| "step": 285 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 586.4531440734863, |
| "epoch": 0.05900203587754004, |
| "grad_norm": 0.09300405532121658, |
| "kl": 0.012730836868286133, |
| "learning_rate": 3.907103825136612e-06, |
| "loss": 0.0073, |
| "num_tokens": 26331481.0, |
| "reward": 0.24609375710133463, |
| "reward_std": 0.1337946176645346, |
| "rewards/pure_accuracy_reward_math": 0.24609375081490725, |
| "step": 286 |
| }, |
| { |
| "clip_ratio": 0.0007959069370144789, |
| "epoch": 0.06064098131858282, |
| "grad_norm": 0.07242298871278763, |
| "kl": 0.012778043746948242, |
| "learning_rate": 3.9207650273224046e-06, |
| "loss": 0.0071, |
| "step": 287 |
| }, |
| { |
| "clip_ratio": 0.0007729592513214811, |
| "epoch": 0.062279926759625605, |
| "grad_norm": 0.06439978629350662, |
| "kl": 0.012783050537109375, |
| "learning_rate": 3.934426229508197e-06, |
| "loss": 0.0068, |
| "step": 288 |
| }, |
| { |
| "clip_ratio": 0.0008412073416366184, |
| "epoch": 0.06391887220066839, |
| "grad_norm": 0.06673026084899902, |
| "kl": 0.012759208679199219, |
| "learning_rate": 3.9480874316939895e-06, |
| "loss": 0.0064, |
| "step": 289 |
| }, |
| { |
| "clip_ratio": 0.0008529388742317678, |
| "epoch": 0.06555781764171116, |
| "grad_norm": 0.06457261741161346, |
| "kl": 0.012978315353393555, |
| "learning_rate": 3.961748633879782e-06, |
| "loss": 0.006, |
| "step": 290 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 593.4179866313934, |
| "epoch": 0.06719676308275394, |
| "grad_norm": 0.061959490180015564, |
| "kl": 0.012590646743774414, |
| "learning_rate": 3.975409836065574e-06, |
| "loss": 0.0048, |
| "num_tokens": 29639261.0, |
| "reward": 0.19140625570435077, |
| "reward_std": 0.1178207247867249, |
| "rewards/pure_accuracy_reward_math": 0.19140625011641532, |
| "step": 291 |
| }, |
| { |
| "clip_ratio": 0.0007923052341993753, |
| "epoch": 0.06883570852379672, |
| "grad_norm": 0.06405281275510788, |
| "kl": 0.012594223022460938, |
| "learning_rate": 3.989071038251366e-06, |
| "loss": 0.0047, |
| "step": 292 |
| }, |
| { |
| "clip_ratio": 0.0008128494168886391, |
| "epoch": 0.0704746539648395, |
| "grad_norm": 0.05796763673424721, |
| "kl": 0.012372016906738281, |
| "learning_rate": 4.002732240437159e-06, |
| "loss": 0.0044, |
| "step": 293 |
| }, |
| { |
| "clip_ratio": 0.0008259461983470828, |
| "epoch": 0.07211359940588227, |
| "grad_norm": 0.05945519357919693, |
| "kl": 0.012368202209472656, |
| "learning_rate": 4.016393442622951e-06, |
| "loss": 0.0041, |
| "step": 294 |
| }, |
| { |
| "clip_ratio": 0.0008090365032558111, |
| "epoch": 0.07375254484692506, |
| "grad_norm": 0.05839954689145088, |
| "kl": 0.012590169906616211, |
| "learning_rate": 4.030054644808744e-06, |
| "loss": 0.0038, |
| "step": 295 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 578.3847825527191, |
| "epoch": 0.07539149028796784, |
| "grad_norm": 0.07029297947883606, |
| "kl": 0.013827800750732422, |
| "learning_rate": 4.043715846994536e-06, |
| "loss": 0.0079, |
| "num_tokens": 32899431.0, |
| "reward": 0.22656250576255843, |
| "reward_std": 0.1293920156895183, |
| "rewards/pure_accuracy_reward_math": 0.22656250040745363, |
| "step": 296 |
| }, |
| { |
| "clip_ratio": 0.0007177354492569066, |
| "epoch": 0.07703043572901061, |
| "grad_norm": 0.07095961272716522, |
| "kl": 0.013935565948486328, |
| "learning_rate": 4.057377049180329e-06, |
| "loss": 0.0078, |
| "step": 297 |
| }, |
| { |
| "clip_ratio": 0.0007291494763990158, |
| "epoch": 0.0786693811700534, |
| "grad_norm": 0.062031351029872894, |
| "kl": 0.01368570327758789, |
| "learning_rate": 4.07103825136612e-06, |
| "loss": 0.0075, |
| "step": 298 |
| }, |
| { |
| "clip_ratio": 0.0009114736896549402, |
| "epoch": 0.08030832661109617, |
| "grad_norm": 0.06610522419214249, |
| "kl": 0.01354837417602539, |
| "learning_rate": 4.084699453551913e-06, |
| "loss": 0.0072, |
| "step": 299 |
| }, |
| { |
| "clip_ratio": 0.0008185061662402404, |
| "epoch": 0.08194727205213895, |
| "grad_norm": 0.05733739957213402, |
| "kl": 0.013862133026123047, |
| "learning_rate": 4.098360655737705e-06, |
| "loss": 0.0068, |
| "step": 300 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.9811391830444, |
| "epoch": 0.08358621749318174, |
| "grad_norm": 0.07516755163669586, |
| "kl": 0.013712882995605469, |
| "learning_rate": 4.112021857923498e-06, |
| "loss": 0.0123, |
| "num_tokens": 36171597.0, |
| "reward": 0.24316406939760782, |
| "reward_std": 0.1390101815923117, |
| "rewards/pure_accuracy_reward_math": 0.24316406299476512, |
| "step": 301 |
| }, |
| { |
| "clip_ratio": 0.0008417515526843999, |
| "epoch": 0.08522516293422451, |
| "grad_norm": 0.07285764813423157, |
| "kl": 0.013661384582519531, |
| "learning_rate": 4.12568306010929e-06, |
| "loss": 0.0122, |
| "step": 302 |
| }, |
| { |
| "clip_ratio": 0.0010243687736419815, |
| "epoch": 0.08686410837526728, |
| "grad_norm": 0.06916587054729462, |
| "kl": 0.013316631317138672, |
| "learning_rate": 4.139344262295083e-06, |
| "loss": 0.0118, |
| "step": 303 |
| }, |
| { |
| "clip_ratio": 0.0010284557414479423, |
| "epoch": 0.08850305381631007, |
| "grad_norm": 0.06860698759555817, |
| "kl": 0.01330423355102539, |
| "learning_rate": 4.153005464480875e-06, |
| "loss": 0.0115, |
| "step": 304 |
| }, |
| { |
| "clip_ratio": 0.0009143141991216908, |
| "epoch": 0.09014199925735285, |
| "grad_norm": 0.06032150238752365, |
| "kl": 0.0137176513671875, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.011, |
| "step": 305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 586.3743689060211, |
| "epoch": 0.09178094469839562, |
| "grad_norm": 0.07405151426792145, |
| "kl": 0.013857364654541016, |
| "learning_rate": 4.180327868852459e-06, |
| "loss": 0.011, |
| "num_tokens": 39455171.0, |
| "reward": 0.2532552155898884, |
| "reward_std": 0.14932613197015598, |
| "rewards/pure_accuracy_reward_math": 0.25325520941987634, |
| "step": 306 |
| }, |
| { |
| "clip_ratio": 0.0008296436263890428, |
| "epoch": 0.09341989013943841, |
| "grad_norm": 0.06666728854179382, |
| "kl": 0.013742923736572266, |
| "learning_rate": 4.193989071038252e-06, |
| "loss": 0.0109, |
| "step": 307 |
| }, |
| { |
| "clip_ratio": 0.0009970029186661122, |
| "epoch": 0.09505883558048119, |
| "grad_norm": 0.0645456612110138, |
| "kl": 0.013346672058105469, |
| "learning_rate": 4.207650273224044e-06, |
| "loss": 0.0106, |
| "step": 308 |
| }, |
| { |
| "clip_ratio": 0.001063040656163139, |
| "epoch": 0.09669778102152396, |
| "grad_norm": 0.061983004212379456, |
| "kl": 0.013351917266845703, |
| "learning_rate": 4.221311475409837e-06, |
| "loss": 0.0102, |
| "step": 309 |
| }, |
| { |
| "clip_ratio": 0.000925353787010863, |
| "epoch": 0.09833672646256675, |
| "grad_norm": 0.054489802569150925, |
| "kl": 0.013745784759521484, |
| "learning_rate": 4.234972677595629e-06, |
| "loss": 0.0098, |
| "step": 310 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 576.9381697177887, |
| "epoch": 0.09997567190360952, |
| "grad_norm": 0.07431261986494064, |
| "kl": 0.014829158782958984, |
| "learning_rate": 4.248633879781421e-06, |
| "loss": 0.0086, |
| "num_tokens": 42719433.0, |
| "reward": 0.23014323521056212, |
| "reward_std": 0.14185529336100444, |
| "rewards/pure_accuracy_reward_math": 0.23014322962262668, |
| "step": 311 |
| }, |
| { |
| "clip_ratio": 0.0008892922282370819, |
| "epoch": 0.1016146173446523, |
| "grad_norm": 0.0675373449921608, |
| "kl": 0.01420736312866211, |
| "learning_rate": 4.2622950819672135e-06, |
| "loss": 0.0084, |
| "step": 312 |
| }, |
| { |
| "clip_ratio": 0.0011247390369817367, |
| "epoch": 0.10325356278569509, |
| "grad_norm": 0.06642100214958191, |
| "kl": 0.013678550720214844, |
| "learning_rate": 4.275956284153006e-06, |
| "loss": 0.0081, |
| "step": 313 |
| }, |
| { |
| "clip_ratio": 0.001105548773011833, |
| "epoch": 0.10489250822673786, |
| "grad_norm": 0.06353385746479034, |
| "kl": 0.01375722885131836, |
| "learning_rate": 4.289617486338798e-06, |
| "loss": 0.0077, |
| "step": 314 |
| }, |
| { |
| "clip_ratio": 0.0008872896562479582, |
| "epoch": 0.10653145366778063, |
| "grad_norm": 0.0578172467648983, |
| "kl": 0.014369010925292969, |
| "learning_rate": 4.30327868852459e-06, |
| "loss": 0.0073, |
| "step": 315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.5644719600677, |
| "epoch": 0.10817039910882342, |
| "grad_norm": 0.10828240215778351, |
| "kl": 0.014815330505371094, |
| "learning_rate": 4.316939890710383e-06, |
| "loss": 0.0079, |
| "num_tokens": 45987515.0, |
| "reward": 0.23828125750878826, |
| "reward_std": 0.1280899328412488, |
| "rewards/pure_accuracy_reward_math": 0.23828124959254637, |
| "step": 316 |
| }, |
| { |
| "clip_ratio": 0.0007055829685214121, |
| "epoch": 0.1098093445498662, |
| "grad_norm": 0.06897052377462387, |
| "kl": 0.014089107513427734, |
| "learning_rate": 4.330601092896175e-06, |
| "loss": 0.0077, |
| "step": 317 |
| }, |
| { |
| "clip_ratio": 0.0009552787060442824, |
| "epoch": 0.11144828999090897, |
| "grad_norm": 0.06946240365505219, |
| "kl": 0.013627052307128906, |
| "learning_rate": 4.3442622950819676e-06, |
| "loss": 0.0074, |
| "step": 318 |
| }, |
| { |
| "clip_ratio": 0.0009577763585184584, |
| "epoch": 0.11308723543195176, |
| "grad_norm": 0.06384962797164917, |
| "kl": 0.013594627380371094, |
| "learning_rate": 4.35792349726776e-06, |
| "loss": 0.007, |
| "step": 319 |
| }, |
| { |
| "clip_ratio": 0.0008583279737877092, |
| "epoch": 0.11472618087299453, |
| "grad_norm": 0.05853092297911644, |
| "kl": 0.014088630676269531, |
| "learning_rate": 4.371584699453552e-06, |
| "loss": 0.0066, |
| "step": 320 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 574.9983899593353, |
| "epoch": 0.11636512631403731, |
| "grad_norm": 0.07647594809532166, |
| "kl": 0.01479482650756836, |
| "learning_rate": 4.385245901639344e-06, |
| "loss": 0.0092, |
| "num_tokens": 49236626.0, |
| "reward": 0.22591146448394284, |
| "reward_std": 0.1385065988288261, |
| "rewards/pure_accuracy_reward_math": 0.2259114588960074, |
| "step": 321 |
| }, |
| { |
| "clip_ratio": 0.000817699462913879, |
| "epoch": 0.11800407175508008, |
| "grad_norm": 0.0680047944188118, |
| "kl": 0.014449596405029297, |
| "learning_rate": 4.398907103825137e-06, |
| "loss": 0.009, |
| "step": 322 |
| }, |
| { |
| "clip_ratio": 0.00102761085952352, |
| "epoch": 0.11964301719612287, |
| "grad_norm": 0.06830534338951111, |
| "kl": 0.01408243179321289, |
| "learning_rate": 4.412568306010929e-06, |
| "loss": 0.0087, |
| "step": 323 |
| }, |
| { |
| "clip_ratio": 0.0010830692142462794, |
| "epoch": 0.12128196263716565, |
| "grad_norm": 0.06523703783750534, |
| "kl": 0.01411581039428711, |
| "learning_rate": 4.426229508196722e-06, |
| "loss": 0.0083, |
| "step": 324 |
| }, |
| { |
| "clip_ratio": 0.0009552010853894899, |
| "epoch": 0.12292090807820842, |
| "grad_norm": 0.05952048301696777, |
| "kl": 0.01461029052734375, |
| "learning_rate": 4.439890710382514e-06, |
| "loss": 0.0078, |
| "step": 325 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 574.7955937385559, |
| "epoch": 0.12455985351925121, |
| "grad_norm": 0.07190460711717606, |
| "kl": 0.015045642852783203, |
| "learning_rate": 4.453551912568307e-06, |
| "loss": 0.0105, |
| "num_tokens": 52486374.0, |
| "reward": 0.2360026103851851, |
| "reward_std": 0.12637775152688846, |
| "rewards/pure_accuracy_reward_math": 0.2360026056121569, |
| "step": 326 |
| }, |
| { |
| "clip_ratio": 0.0006602608530101861, |
| "epoch": 0.12619879896029398, |
| "grad_norm": 0.06684302538633347, |
| "kl": 0.014774322509765625, |
| "learning_rate": 4.467213114754098e-06, |
| "loss": 0.0103, |
| "step": 327 |
| }, |
| { |
| "clip_ratio": 0.0008040992978521899, |
| "epoch": 0.12783774440133677, |
| "grad_norm": 0.06550217419862747, |
| "kl": 0.01435995101928711, |
| "learning_rate": 4.480874316939891e-06, |
| "loss": 0.01, |
| "step": 328 |
| }, |
| { |
| "clip_ratio": 0.0008306863429652367, |
| "epoch": 0.12947668984237953, |
| "grad_norm": 0.0616220086812973, |
| "kl": 0.014432430267333984, |
| "learning_rate": 4.494535519125683e-06, |
| "loss": 0.0096, |
| "step": 329 |
| }, |
| { |
| "clip_ratio": 0.0008213962388481377, |
| "epoch": 0.13111563528342232, |
| "grad_norm": 0.061259008944034576, |
| "kl": 0.014967918395996094, |
| "learning_rate": 4.508196721311476e-06, |
| "loss": 0.0092, |
| "step": 330 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 578.8222842216492, |
| "epoch": 0.1327545807244651, |
| "grad_norm": 0.07180505990982056, |
| "kl": 0.014912128448486328, |
| "learning_rate": 4.521857923497268e-06, |
| "loss": 0.0054, |
| "num_tokens": 55750856.0, |
| "reward": 0.22428386053070426, |
| "reward_std": 0.125935374526307, |
| "rewards/pure_accuracy_reward_math": 0.22428385610692203, |
| "step": 331 |
| }, |
| { |
| "clip_ratio": 0.0008310234973123443, |
| "epoch": 0.13439352616550787, |
| "grad_norm": 0.06554125249385834, |
| "kl": 0.014460086822509766, |
| "learning_rate": 4.535519125683061e-06, |
| "loss": 0.0052, |
| "step": 332 |
| }, |
| { |
| "clip_ratio": 0.0009444170593724266, |
| "epoch": 0.13603247160655066, |
| "grad_norm": 0.0650697648525238, |
| "kl": 0.014264106750488281, |
| "learning_rate": 4.549180327868853e-06, |
| "loss": 0.0049, |
| "step": 333 |
| }, |
| { |
| "clip_ratio": 0.0009593672889991467, |
| "epoch": 0.13767141704759345, |
| "grad_norm": 0.06275759637355804, |
| "kl": 0.014463424682617188, |
| "learning_rate": 4.562841530054645e-06, |
| "loss": 0.0045, |
| "step": 334 |
| }, |
| { |
| "clip_ratio": 0.0008741978416537677, |
| "epoch": 0.1393103624886362, |
| "grad_norm": 0.06349465250968933, |
| "kl": 0.015105247497558594, |
| "learning_rate": 4.576502732240437e-06, |
| "loss": 0.0041, |
| "step": 335 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.4983899593353, |
| "epoch": 0.140949307929679, |
| "grad_norm": 0.07185006141662598, |
| "kl": 0.015033245086669922, |
| "learning_rate": 4.59016393442623e-06, |
| "loss": 0.0092, |
| "num_tokens": 59021523.0, |
| "reward": 0.24804688125732355, |
| "reward_std": 0.12332397676073015, |
| "rewards/pure_accuracy_reward_math": 0.2480468761350494, |
| "step": 336 |
| }, |
| { |
| "clip_ratio": 0.0007917967001276338, |
| "epoch": 0.14258825337072178, |
| "grad_norm": 0.06418469548225403, |
| "kl": 0.014570236206054688, |
| "learning_rate": 4.603825136612022e-06, |
| "loss": 0.009, |
| "step": 337 |
| }, |
| { |
| "clip_ratio": 0.0011276908828676824, |
| "epoch": 0.14422719881176455, |
| "grad_norm": 0.06706573814153671, |
| "kl": 0.014203071594238281, |
| "learning_rate": 4.617486338797815e-06, |
| "loss": 0.0087, |
| "step": 338 |
| }, |
| { |
| "clip_ratio": 0.0010211615006028296, |
| "epoch": 0.14586614425280733, |
| "grad_norm": 0.06293198466300964, |
| "kl": 0.014473915100097656, |
| "learning_rate": 4.631147540983607e-06, |
| "loss": 0.0084, |
| "step": 339 |
| }, |
| { |
| "clip_ratio": 0.0008425270717680178, |
| "epoch": 0.14750508969385012, |
| "grad_norm": 0.058640848845243454, |
| "kl": 0.01514291763305664, |
| "learning_rate": 4.6448087431694e-06, |
| "loss": 0.008, |
| "step": 340 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.9489109516144, |
| "epoch": 0.14914403513489288, |
| "grad_norm": 0.08432208746671677, |
| "kl": 0.015330791473388672, |
| "learning_rate": 4.6584699453551915e-06, |
| "loss": 0.0085, |
| "num_tokens": 62299018.0, |
| "reward": 0.2382812559371814, |
| "reward_std": 0.14492353051900864, |
| "rewards/pure_accuracy_reward_math": 0.2382812504656613, |
| "step": 341 |
| }, |
| { |
| "clip_ratio": 0.000848330670521591, |
| "epoch": 0.15078298057593567, |
| "grad_norm": 0.06801754236221313, |
| "kl": 0.014659404754638672, |
| "learning_rate": 4.672131147540984e-06, |
| "loss": 0.0084, |
| "step": 342 |
| }, |
| { |
| "clip_ratio": 0.0010827027111872667, |
| "epoch": 0.15242192601697846, |
| "grad_norm": 0.06549172848463058, |
| "kl": 0.014311790466308594, |
| "learning_rate": 4.6857923497267765e-06, |
| "loss": 0.0081, |
| "step": 343 |
| }, |
| { |
| "clip_ratio": 0.0010740470830796767, |
| "epoch": 0.15406087145802122, |
| "grad_norm": 0.06515967845916748, |
| "kl": 0.01453399658203125, |
| "learning_rate": 4.699453551912569e-06, |
| "loss": 0.0077, |
| "step": 344 |
| }, |
| { |
| "clip_ratio": 0.0009672553374002746, |
| "epoch": 0.155699816899064, |
| "grad_norm": 0.0627971738576889, |
| "kl": 0.015265464782714844, |
| "learning_rate": 4.7131147540983615e-06, |
| "loss": 0.0072, |
| "step": 345 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 594.5420119762421, |
| "epoch": 0.1573387623401068, |
| "grad_norm": 0.0651373565196991, |
| "kl": 0.01506948471069336, |
| "learning_rate": 4.726775956284154e-06, |
| "loss": 0.0061, |
| "num_tokens": 65612227.0, |
| "reward": 0.22656250555883162, |
| "reward_std": 0.11576688283821568, |
| "rewards/pure_accuracy_reward_math": 0.2265625006693881, |
| "step": 346 |
| }, |
| { |
| "clip_ratio": 0.0006889042056741346, |
| "epoch": 0.15897770778114956, |
| "grad_norm": 0.05911775305867195, |
| "kl": 0.014788627624511719, |
| "learning_rate": 4.740437158469946e-06, |
| "loss": 0.006, |
| "step": 347 |
| }, |
| { |
| "clip_ratio": 0.0008533449156971074, |
| "epoch": 0.16061665322219235, |
| "grad_norm": 0.06107313930988312, |
| "kl": 0.014514446258544922, |
| "learning_rate": 4.754098360655738e-06, |
| "loss": 0.0058, |
| "step": 348 |
| }, |
| { |
| "clip_ratio": 0.0008506776480317058, |
| "epoch": 0.16225559866323513, |
| "grad_norm": 0.05840134993195534, |
| "kl": 0.014555931091308594, |
| "learning_rate": 4.767759562841531e-06, |
| "loss": 0.0054, |
| "step": 349 |
| }, |
| { |
| "clip_ratio": 0.0007001224406621986, |
| "epoch": 0.1638945441042779, |
| "grad_norm": 0.052340634167194366, |
| "kl": 0.014971256256103516, |
| "learning_rate": 4.781420765027322e-06, |
| "loss": 0.0051, |
| "step": 350 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 582.0423378944397, |
| "epoch": 0.16553348954532068, |
| "grad_norm": 0.07505487650632858, |
| "kl": 0.015714168548583984, |
| "learning_rate": 4.795081967213115e-06, |
| "loss": 0.01, |
| "num_tokens": 68889789.0, |
| "reward": 0.22949219317524694, |
| "reward_std": 0.13435217371443287, |
| "rewards/pure_accuracy_reward_math": 0.2294921897992026, |
| "step": 351 |
| }, |
| { |
| "clip_ratio": 0.0008628651630715467, |
| "epoch": 0.16717243498636347, |
| "grad_norm": 0.06674539297819138, |
| "kl": 0.015304088592529297, |
| "learning_rate": 4.808743169398907e-06, |
| "loss": 0.0098, |
| "step": 352 |
| }, |
| { |
| "clip_ratio": 0.001037854259834603, |
| "epoch": 0.16881138042740623, |
| "grad_norm": 0.07000827044248581, |
| "kl": 0.014843463897705078, |
| "learning_rate": 4.8224043715847e-06, |
| "loss": 0.0095, |
| "step": 353 |
| }, |
| { |
| "clip_ratio": 0.0010051641423842739, |
| "epoch": 0.17045032586844902, |
| "grad_norm": 0.06692034751176834, |
| "kl": 0.01481771469116211, |
| "learning_rate": 4.836065573770492e-06, |
| "loss": 0.0091, |
| "step": 354 |
| }, |
| { |
| "clip_ratio": 0.0009215611881927543, |
| "epoch": 0.1720892713094918, |
| "grad_norm": 0.05842750146985054, |
| "kl": 0.015254974365234375, |
| "learning_rate": 4.849726775956285e-06, |
| "loss": 0.0086, |
| "step": 355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 580.9974174499512, |
| "epoch": 0.17372821675053457, |
| "grad_norm": 0.07343181222677231, |
| "kl": 0.015576362609863281, |
| "learning_rate": 4.863387978142076e-06, |
| "loss": 0.0104, |
| "num_tokens": 72161705.0, |
| "reward": 0.24218750657746568, |
| "reward_std": 0.12447860068641603, |
| "rewards/pure_accuracy_reward_math": 0.24218750098953024, |
| "step": 356 |
| }, |
| { |
| "clip_ratio": 0.000678558928370876, |
| "epoch": 0.17536716219157736, |
| "grad_norm": 0.06728224456310272, |
| "kl": 0.015001773834228516, |
| "learning_rate": 4.877049180327869e-06, |
| "loss": 0.0103, |
| "step": 357 |
| }, |
| { |
| "clip_ratio": 0.0009087121708262202, |
| "epoch": 0.17700610763262015, |
| "grad_norm": 0.06502145528793335, |
| "kl": 0.014545440673828125, |
| "learning_rate": 4.890710382513661e-06, |
| "loss": 0.0099, |
| "step": 358 |
| }, |
| { |
| "clip_ratio": 0.0008997945463420365, |
| "epoch": 0.1786450530736629, |
| "grad_norm": 0.06085266172885895, |
| "kl": 0.01470804214477539, |
| "learning_rate": 4.904371584699454e-06, |
| "loss": 0.0096, |
| "step": 359 |
| }, |
| { |
| "clip_ratio": 0.0008065323049777362, |
| "epoch": 0.1802839985147057, |
| "grad_norm": 0.05810590460896492, |
| "kl": 0.01529693603515625, |
| "learning_rate": 4.918032786885246e-06, |
| "loss": 0.0092, |
| "step": 360 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 574.3659031391144, |
| "epoch": 0.18192294395574848, |
| "grad_norm": 0.07119102030992508, |
| "kl": 0.015850543975830078, |
| "learning_rate": 4.931693989071039e-06, |
| "loss": 0.0088, |
| "num_tokens": 75411941.0, |
| "reward": 0.26106771623017266, |
| "reward_std": 0.12748563423519954, |
| "rewards/pure_accuracy_reward_math": 0.26106770982732996, |
| "step": 361 |
| }, |
| { |
| "clip_ratio": 0.0007838922517180436, |
| "epoch": 0.18356188939679124, |
| "grad_norm": 0.0668591633439064, |
| "kl": 0.015510082244873047, |
| "learning_rate": 4.945355191256831e-06, |
| "loss": 0.0086, |
| "step": 362 |
| }, |
| { |
| "clip_ratio": 0.0009197715589834843, |
| "epoch": 0.18520083483783403, |
| "grad_norm": 0.06584400683641434, |
| "kl": 0.015251636505126953, |
| "learning_rate": 4.959016393442623e-06, |
| "loss": 0.0083, |
| "step": 363 |
| }, |
| { |
| "clip_ratio": 0.0007934075148341435, |
| "epoch": 0.18683978027887682, |
| "grad_norm": 0.06021925061941147, |
| "kl": 0.015304088592529297, |
| "learning_rate": 4.9726775956284154e-06, |
| "loss": 0.0079, |
| "step": 364 |
| }, |
| { |
| "clip_ratio": 0.0008117128969615806, |
| "epoch": 0.18847872571991958, |
| "grad_norm": 0.054787032306194305, |
| "kl": 0.015666484832763672, |
| "learning_rate": 4.986338797814208e-06, |
| "loss": 0.0075, |
| "step": 365 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.3089377880096, |
| "epoch": 0.19011767116096237, |
| "grad_norm": 0.07014758884906769, |
| "kl": 0.01603412628173828, |
| "learning_rate": 5e-06, |
| "loss": 0.0102, |
| "num_tokens": 78678442.0, |
| "reward": 0.2386067773331888, |
| "reward_std": 0.1381109645590186, |
| "rewards/pure_accuracy_reward_math": 0.23860677116317675, |
| "step": 366 |
| }, |
| { |
| "clip_ratio": 0.0008581371083096201, |
| "epoch": 0.19175661660200516, |
| "grad_norm": 0.06626458466053009, |
| "kl": 0.015540599822998047, |
| "learning_rate": 4.9999942439118225e-06, |
| "loss": 0.01, |
| "step": 367 |
| }, |
| { |
| "clip_ratio": 0.0010326972058010142, |
| "epoch": 0.19339556204304792, |
| "grad_norm": 0.06585969030857086, |
| "kl": 0.015045166015625, |
| "learning_rate": 4.999976975673795e-06, |
| "loss": 0.0097, |
| "step": 368 |
| }, |
| { |
| "clip_ratio": 0.000947793353589077, |
| "epoch": 0.1950345074840907, |
| "grad_norm": 0.06269653141498566, |
| "kl": 0.015254497528076172, |
| "learning_rate": 4.999948195365436e-06, |
| "loss": 0.0092, |
| "step": 369 |
| }, |
| { |
| "clip_ratio": 0.0008362581023675375, |
| "epoch": 0.1966734529251335, |
| "grad_norm": 0.059586890041828156, |
| "kl": 0.01586627960205078, |
| "learning_rate": 4.9999079031192755e-06, |
| "loss": 0.0088, |
| "step": 370 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 568.2786619663239, |
| "epoch": 0.19831239836617626, |
| "grad_norm": 0.07526068389415741, |
| "kl": 0.016698837280273438, |
| "learning_rate": 4.999856099120852e-06, |
| "loss": 0.0081, |
| "num_tokens": 81911242.0, |
| "reward": 0.24251302849734202, |
| "reward_std": 0.126928077545017, |
| "rewards/pure_accuracy_reward_math": 0.24251302162883803, |
| "step": 371 |
| }, |
| { |
| "clip_ratio": 0.0009173538178401941, |
| "epoch": 0.19995134380721905, |
| "grad_norm": 0.06963901966810226, |
| "kl": 0.016336441040039062, |
| "learning_rate": 4.99979278360872e-06, |
| "loss": 0.008, |
| "step": 372 |
| }, |
| { |
| "clip_ratio": 0.0011110180128071079, |
| "epoch": 0.20159028924826183, |
| "grad_norm": 0.06961624324321747, |
| "kl": 0.015837669372558594, |
| "learning_rate": 4.999717956874435e-06, |
| "loss": 0.0076, |
| "step": 373 |
| }, |
| { |
| "clip_ratio": 0.0009433047086986335, |
| "epoch": 0.2032292346893046, |
| "grad_norm": 0.06556432694196701, |
| "kl": 0.01593923568725586, |
| "learning_rate": 4.9996316192625675e-06, |
| "loss": 0.0072, |
| "step": 374 |
| }, |
| { |
| "clip_ratio": 0.0008095512553154549, |
| "epoch": 0.20486818013034738, |
| "grad_norm": 0.06139687821269035, |
| "kl": 0.01659393310546875, |
| "learning_rate": 4.99953377117069e-06, |
| "loss": 0.0067, |
| "step": 375 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 573.026712179184, |
| "epoch": 0.20650712557139017, |
| "grad_norm": 0.07706455886363983, |
| "kl": 0.016510486602783203, |
| "learning_rate": 4.99942441304938e-06, |
| "loss": 0.0125, |
| "num_tokens": 85158888.0, |
| "reward": 0.2682291740202345, |
| "reward_std": 0.14631909935269505, |
| "rewards/pure_accuracy_reward_math": 0.2682291676173918, |
| "step": 376 |
| }, |
| { |
| "clip_ratio": 0.0009323210776983615, |
| "epoch": 0.20814607101243293, |
| "grad_norm": 0.07358861714601517, |
| "kl": 0.015882015228271484, |
| "learning_rate": 4.999303545402218e-06, |
| "loss": 0.0123, |
| "step": 377 |
| }, |
| { |
| "clip_ratio": 0.0011543770483513072, |
| "epoch": 0.20978501645347572, |
| "grad_norm": 0.06775986403226852, |
| "kl": 0.015225410461425781, |
| "learning_rate": 4.999171168785783e-06, |
| "loss": 0.012, |
| "step": 378 |
| }, |
| { |
| "clip_ratio": 0.0010423319904475647, |
| "epoch": 0.2114239618945185, |
| "grad_norm": 0.06506908684968948, |
| "kl": 0.01537466049194336, |
| "learning_rate": 4.999027283809653e-06, |
| "loss": 0.0116, |
| "step": 379 |
| }, |
| { |
| "clip_ratio": 0.0009310034193958927, |
| "epoch": 0.21306290733556127, |
| "grad_norm": 0.06132827699184418, |
| "kl": 0.01622772216796875, |
| "learning_rate": 4.9988718911364e-06, |
| "loss": 0.0111, |
| "step": 380 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 571.3756697177887, |
| "epoch": 0.21470185277660406, |
| "grad_norm": 0.0720067173242569, |
| "kl": 0.017137527465820312, |
| "learning_rate": 4.998704991481587e-06, |
| "loss": 0.0108, |
| "num_tokens": 88402762.0, |
| "reward": 0.24674479925306514, |
| "reward_std": 0.1313918832456693, |
| "rewards/pure_accuracy_reward_math": 0.24674479168606922, |
| "step": 381 |
| }, |
| { |
| "clip_ratio": 0.0007053178948126515, |
| "epoch": 0.21634079821764685, |
| "grad_norm": 0.06856828182935715, |
| "kl": 0.016861915588378906, |
| "learning_rate": 4.998526585613763e-06, |
| "loss": 0.0107, |
| "step": 382 |
| }, |
| { |
| "clip_ratio": 0.0009198855559588992, |
| "epoch": 0.2179797436586896, |
| "grad_norm": 0.06308390200138092, |
| "kl": 0.01618671417236328, |
| "learning_rate": 4.998336674354468e-06, |
| "loss": 0.0103, |
| "step": 383 |
| }, |
| { |
| "clip_ratio": 0.0009896415027697003, |
| "epoch": 0.2196186890997324, |
| "grad_norm": 0.059695471078157425, |
| "kl": 0.01616191864013672, |
| "learning_rate": 4.9981352585782154e-06, |
| "loss": 0.01, |
| "step": 384 |
| }, |
| { |
| "clip_ratio": 0.0009851711494093252, |
| "epoch": 0.22125763454077518, |
| "grad_norm": 0.06119159981608391, |
| "kl": 0.016726016998291016, |
| "learning_rate": 4.997922339212501e-06, |
| "loss": 0.0095, |
| "step": 385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 582.7536017894745, |
| "epoch": 0.22289657998181794, |
| "grad_norm": 0.06853786110877991, |
| "kl": 0.01645803451538086, |
| "learning_rate": 4.997697917237789e-06, |
| "loss": 0.0092, |
| "num_tokens": 91672333.0, |
| "reward": 0.2298177152988501, |
| "reward_std": 0.1270827678963542, |
| "rewards/pure_accuracy_reward_math": 0.22981770866317675, |
| "step": 386 |
| }, |
| { |
| "clip_ratio": 0.0006475677645312317, |
| "epoch": 0.22453552542286073, |
| "grad_norm": 0.06568547338247299, |
| "kl": 0.016225337982177734, |
| "learning_rate": 4.997461993687514e-06, |
| "loss": 0.0091, |
| "step": 387 |
| }, |
| { |
| "clip_ratio": 0.000736436435545329, |
| "epoch": 0.22617447086390352, |
| "grad_norm": 0.06055685877799988, |
| "kl": 0.015880584716796875, |
| "learning_rate": 4.997214569648075e-06, |
| "loss": 0.0088, |
| "step": 388 |
| }, |
| { |
| "clip_ratio": 0.0007173336517780626, |
| "epoch": 0.22781341630494628, |
| "grad_norm": 0.054837051779031754, |
| "kl": 0.01586627960205078, |
| "learning_rate": 4.996955646258826e-06, |
| "loss": 0.0084, |
| "step": 389 |
| }, |
| { |
| "clip_ratio": 0.0007432895852730326, |
| "epoch": 0.22945236174598907, |
| "grad_norm": 0.05363443121314049, |
| "kl": 0.01619720458984375, |
| "learning_rate": 4.996685224712077e-06, |
| "loss": 0.008, |
| "step": 390 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 569.7708532810211, |
| "epoch": 0.23109130718703186, |
| "grad_norm": 0.08375601470470428, |
| "kl": 0.016201019287109375, |
| "learning_rate": 4.9964033062530825e-06, |
| "loss": 0.0067, |
| "num_tokens": 94902157.0, |
| "reward": 0.2470703189901542, |
| "reward_std": 0.12347866676282138, |
| "rewards/pure_accuracy_reward_math": 0.2470703137514647, |
| "step": 391 |
| }, |
| { |
| "clip_ratio": 0.0006189611340801093, |
| "epoch": 0.23273025262807462, |
| "grad_norm": 0.06883595138788223, |
| "kl": 0.016017436981201172, |
| "learning_rate": 4.996109892180041e-06, |
| "loss": 0.0065, |
| "step": 392 |
| }, |
| { |
| "clip_ratio": 0.0007823226997629718, |
| "epoch": 0.2343691980691174, |
| "grad_norm": 0.08916032314300537, |
| "kl": 0.01654815673828125, |
| "learning_rate": 4.995804983844088e-06, |
| "loss": 0.0062, |
| "step": 393 |
| }, |
| { |
| "clip_ratio": 0.0008161565754676303, |
| "epoch": 0.23600814351016017, |
| "grad_norm": 0.06091364100575447, |
| "kl": 0.015578985214233398, |
| "learning_rate": 4.995488582649286e-06, |
| "loss": 0.0058, |
| "step": 394 |
| }, |
| { |
| "clip_ratio": 0.0008045715469506831, |
| "epoch": 0.23764708895120296, |
| "grad_norm": 0.0608866885304451, |
| "kl": 0.015944957733154297, |
| "learning_rate": 4.99516069005262e-06, |
| "loss": 0.0054, |
| "step": 395 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 589.5065298080444, |
| "epoch": 0.23928603439224574, |
| "grad_norm": 0.07174283266067505, |
| "kl": 0.01667642593383789, |
| "learning_rate": 4.994821307563995e-06, |
| "loss": 0.0062, |
| "num_tokens": 98201877.0, |
| "reward": 0.2291666735545732, |
| "reward_std": 0.12557925208238885, |
| "rewards/pure_accuracy_reward_math": 0.22916666680248454, |
| "step": 396 |
| }, |
| { |
| "clip_ratio": 0.0007434075716901134, |
| "epoch": 0.2409249798332885, |
| "grad_norm": 0.06661787629127502, |
| "kl": 0.01587820053100586, |
| "learning_rate": 4.994470436746222e-06, |
| "loss": 0.0061, |
| "step": 397 |
| }, |
| { |
| "clip_ratio": 0.0009446046724406187, |
| "epoch": 0.2425639252743313, |
| "grad_norm": 0.06436329334974289, |
| "kl": 0.015263080596923828, |
| "learning_rate": 4.994108079215016e-06, |
| "loss": 0.0058, |
| "step": 398 |
| }, |
| { |
| "clip_ratio": 0.0009035653077944517, |
| "epoch": 0.24420287071537408, |
| "grad_norm": 0.05970580503344536, |
| "kl": 0.0152130126953125, |
| "learning_rate": 4.9937342366389875e-06, |
| "loss": 0.0054, |
| "step": 399 |
| }, |
| { |
| "clip_ratio": 0.000805649116728091, |
| "epoch": 0.24584181615641684, |
| "grad_norm": 0.05798059329390526, |
| "kl": 0.015795230865478516, |
| "learning_rate": 4.9933489107396324e-06, |
| "loss": 0.005, |
| "step": 400 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 585.6373908519745, |
| "epoch": 0.24748076159745963, |
| "grad_norm": 0.07425414025783539, |
| "kl": 0.016396045684814453, |
| "learning_rate": 4.992952103291327e-06, |
| "loss": 0.0062, |
| "num_tokens": 101490067.0, |
| "reward": 0.2379557362291962, |
| "reward_std": 0.14541265065781772, |
| "rewards/pure_accuracy_reward_math": 0.23795572970993817, |
| "step": 401 |
| }, |
| { |
| "clip_ratio": 0.0006993081486825758, |
| "epoch": 0.24911970703850242, |
| "grad_norm": 0.0683765783905983, |
| "kl": 0.016202926635742188, |
| "learning_rate": 4.992543816121317e-06, |
| "loss": 0.006, |
| "step": 402 |
| }, |
| { |
| "clip_ratio": 0.0007977800468097485, |
| "epoch": 0.2507586524795452, |
| "grad_norm": 0.06357114762067795, |
| "kl": 0.015718460083007812, |
| "learning_rate": 4.992124051109714e-06, |
| "loss": 0.0056, |
| "step": 403 |
| }, |
| { |
| "clip_ratio": 0.0009015119801460969, |
| "epoch": 0.25239759792058797, |
| "grad_norm": 0.06347363442182541, |
| "kl": 0.015771865844726562, |
| "learning_rate": 4.991692810189479e-06, |
| "loss": 0.0051, |
| "step": 404 |
| }, |
| { |
| "clip_ratio": 0.0008236684840881026, |
| "epoch": 0.25403654336163073, |
| "grad_norm": 0.058691952377557755, |
| "kl": 0.016323566436767578, |
| "learning_rate": 4.991250095346423e-06, |
| "loss": 0.0047, |
| "step": 405 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 583.5478718280792, |
| "epoch": 0.25567548880267355, |
| "grad_norm": 0.07196501642465591, |
| "kl": 0.01677703857421875, |
| "learning_rate": 4.990795908619189e-06, |
| "loss": 0.0083, |
| "num_tokens": 104766370.0, |
| "reward": 0.2216796949505806, |
| "reward_std": 0.12527710193535313, |
| "rewards/pure_accuracy_reward_math": 0.22167968738358468, |
| "step": 406 |
| }, |
| { |
| "clip_ratio": 0.0007391628066670819, |
| "epoch": 0.2573144342437163, |
| "grad_norm": 0.07041583210229874, |
| "kl": 0.016283512115478516, |
| "learning_rate": 4.990330252099249e-06, |
| "loss": 0.0081, |
| "step": 407 |
| }, |
| { |
| "clip_ratio": 0.0009407289188061441, |
| "epoch": 0.25895337968475907, |
| "grad_norm": 0.06628228724002838, |
| "kl": 0.015958786010742188, |
| "learning_rate": 4.98985312793089e-06, |
| "loss": 0.0078, |
| "step": 408 |
| }, |
| { |
| "clip_ratio": 0.0008640961274295478, |
| "epoch": 0.2605923251258019, |
| "grad_norm": 0.08439858257770538, |
| "kl": 0.01657581329345703, |
| "learning_rate": 4.989364538311209e-06, |
| "loss": 0.0074, |
| "step": 409 |
| }, |
| { |
| "clip_ratio": 0.0008074077862829654, |
| "epoch": 0.26223127056684464, |
| "grad_norm": 0.06573989987373352, |
| "kl": 0.016643524169921875, |
| "learning_rate": 4.988864485490096e-06, |
| "loss": 0.007, |
| "step": 410 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 577.0521020889282, |
| "epoch": 0.2638702160078874, |
| "grad_norm": 0.0664341077208519, |
| "kl": 0.017581939697265625, |
| "learning_rate": 4.988352971770229e-06, |
| "loss": 0.008, |
| "num_tokens": 108026786.0, |
| "reward": 0.22265625622821972, |
| "reward_std": 0.10564513533608988, |
| "rewards/pure_accuracy_reward_math": 0.2226562507566996, |
| "step": 411 |
| }, |
| { |
| "clip_ratio": 0.0005124467848531822, |
| "epoch": 0.2655091614489302, |
| "grad_norm": 0.058590181171894073, |
| "kl": 0.016813278198242188, |
| "learning_rate": 4.987829999507065e-06, |
| "loss": 0.0078, |
| "step": 412 |
| }, |
| { |
| "clip_ratio": 0.000724739855968437, |
| "epoch": 0.267148106889973, |
| "grad_norm": 0.058657143265008926, |
| "kl": 0.016211986541748047, |
| "learning_rate": 4.9872955711088215e-06, |
| "loss": 0.0076, |
| "step": 413 |
| }, |
| { |
| "clip_ratio": 0.0007133069941573922, |
| "epoch": 0.26878705233101574, |
| "grad_norm": 0.054359566420316696, |
| "kl": 0.01609182357788086, |
| "learning_rate": 4.9867496890364734e-06, |
| "loss": 0.0072, |
| "step": 414 |
| }, |
| { |
| "clip_ratio": 0.0006473830337654363, |
| "epoch": 0.27042599777205856, |
| "grad_norm": 0.0523286908864975, |
| "kl": 0.016422271728515625, |
| "learning_rate": 4.986192355803735e-06, |
| "loss": 0.0069, |
| "step": 415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 587.6103718280792, |
| "epoch": 0.2720649432131013, |
| "grad_norm": 0.06779833137989044, |
| "kl": 0.016475200653076172, |
| "learning_rate": 4.985623573977056e-06, |
| "loss": 0.0092, |
| "num_tokens": 111325301.0, |
| "reward": 0.23014323599636555, |
| "reward_std": 0.14115750859491527, |
| "rewards/pure_accuracy_reward_math": 0.23014322959352285, |
| "step": 416 |
| }, |
| { |
| "clip_ratio": 0.0006248025758850417, |
| "epoch": 0.2737038886541441, |
| "grad_norm": 0.06379402428865433, |
| "kl": 0.016280651092529297, |
| "learning_rate": 4.985043346175602e-06, |
| "loss": 0.009, |
| "step": 417 |
| }, |
| { |
| "clip_ratio": 0.000747511406416379, |
| "epoch": 0.2753428340951869, |
| "grad_norm": 0.060899555683135986, |
| "kl": 0.015888690948486328, |
| "learning_rate": 4.984451675071247e-06, |
| "loss": 0.0086, |
| "step": 418 |
| }, |
| { |
| "clip_ratio": 0.0007759029947465024, |
| "epoch": 0.27698177953622966, |
| "grad_norm": 0.059268273413181305, |
| "kl": 0.01577615737915039, |
| "learning_rate": 4.983848563388559e-06, |
| "loss": 0.0082, |
| "step": 419 |
| }, |
| { |
| "clip_ratio": 0.0007596586527824911, |
| "epoch": 0.2786207249772724, |
| "grad_norm": 0.05496392399072647, |
| "kl": 0.016138076782226562, |
| "learning_rate": 4.983234013904791e-06, |
| "loss": 0.0078, |
| "step": 420 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 575.2080256938934, |
| "epoch": 0.28025967041831523, |
| "grad_norm": 0.06906809657812119, |
| "kl": 0.016767501831054688, |
| "learning_rate": 4.9826080294498615e-06, |
| "loss": 0.0087, |
| "num_tokens": 114572748.0, |
| "reward": 0.2369791732635349, |
| "reward_std": 0.12557925086002797, |
| "rewards/pure_accuracy_reward_math": 0.2369791673263535, |
| "step": 421 |
| }, |
| { |
| "clip_ratio": 0.0006102707594664025, |
| "epoch": 0.281898615859358, |
| "grad_norm": 0.06671704351902008, |
| "kl": 0.01644277572631836, |
| "learning_rate": 4.98197061290635e-06, |
| "loss": 0.0085, |
| "step": 422 |
| }, |
| { |
| "clip_ratio": 0.0008330631824264856, |
| "epoch": 0.28353756130040075, |
| "grad_norm": 0.06136437505483627, |
| "kl": 0.016006946563720703, |
| "learning_rate": 4.981321767209477e-06, |
| "loss": 0.0082, |
| "step": 423 |
| }, |
| { |
| "clip_ratio": 0.0008570863296881726, |
| "epoch": 0.28517650674144357, |
| "grad_norm": 0.05813751742243767, |
| "kl": 0.015911102294921875, |
| "learning_rate": 4.980661495347092e-06, |
| "loss": 0.0078, |
| "step": 424 |
| }, |
| { |
| "clip_ratio": 0.0007138608199284135, |
| "epoch": 0.28681545218248633, |
| "grad_norm": 0.055987462401390076, |
| "kl": 0.016295433044433594, |
| "learning_rate": 4.979989800359661e-06, |
| "loss": 0.0074, |
| "step": 425 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 581.0205252170563, |
| "epoch": 0.2884543976235291, |
| "grad_norm": 0.07705598324537277, |
| "kl": 0.016697406768798828, |
| "learning_rate": 4.9793066853402535e-06, |
| "loss": 0.0104, |
| "num_tokens": 117848783.0, |
| "reward": 0.2561849042249378, |
| "reward_std": 0.13690236682305112, |
| "rewards/pure_accuracy_reward_math": 0.2561848958430346, |
| "step": 426 |
| }, |
| { |
| "clip_ratio": 0.0006357220343033987, |
| "epoch": 0.2900933430645719, |
| "grad_norm": 0.06893625855445862, |
| "kl": 0.016409873962402344, |
| "learning_rate": 4.978612153434527e-06, |
| "loss": 0.0102, |
| "step": 427 |
| }, |
| { |
| "clip_ratio": 0.0008826974139992672, |
| "epoch": 0.29173228850561467, |
| "grad_norm": 0.06487911939620972, |
| "kl": 0.01578235626220703, |
| "learning_rate": 4.977906207840708e-06, |
| "loss": 0.0099, |
| "step": 428 |
| }, |
| { |
| "clip_ratio": 0.0009138368169487876, |
| "epoch": 0.29337123394665743, |
| "grad_norm": 0.05983469635248184, |
| "kl": 0.015604972839355469, |
| "learning_rate": 4.9771888518095855e-06, |
| "loss": 0.0094, |
| "step": 429 |
| }, |
| { |
| "clip_ratio": 0.0008240164653443571, |
| "epoch": 0.29501017938770024, |
| "grad_norm": 0.05934643745422363, |
| "kl": 0.016060352325439453, |
| "learning_rate": 4.976460088644493e-06, |
| "loss": 0.009, |
| "step": 430 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 572.7008640766144, |
| "epoch": 0.296649124828743, |
| "grad_norm": 0.10878130793571472, |
| "kl": 0.01746988296508789, |
| "learning_rate": 4.9757199217012884e-06, |
| "loss": 0.012, |
| "num_tokens": 121093860.0, |
| "reward": 0.25097657056176104, |
| "reward_std": 0.13550679641775787, |
| "rewards/pure_accuracy_reward_math": 0.25097656264551915, |
| "step": 431 |
| }, |
| { |
| "clip_ratio": 0.00068632745751529, |
| "epoch": 0.29828807026978577, |
| "grad_norm": 0.0702020674943924, |
| "kl": 0.017047405242919922, |
| "learning_rate": 4.974968354388346e-06, |
| "loss": 0.0118, |
| "step": 432 |
| }, |
| { |
| "clip_ratio": 0.0008000754407930799, |
| "epoch": 0.2999270157108286, |
| "grad_norm": 0.06406186521053314, |
| "kl": 0.016495227813720703, |
| "learning_rate": 4.974205390166535e-06, |
| "loss": 0.0115, |
| "step": 433 |
| }, |
| { |
| "clip_ratio": 0.0008013431938707072, |
| "epoch": 0.30156596115187134, |
| "grad_norm": 4.406322956085205, |
| "kl": 0.020737171173095703, |
| "learning_rate": 4.973431032549207e-06, |
| "loss": 0.0112, |
| "step": 434 |
| }, |
| { |
| "clip_ratio": 0.0010278673380526016, |
| "epoch": 0.3032049065929141, |
| "grad_norm": 0.07802355289459229, |
| "kl": 0.016713619232177734, |
| "learning_rate": 4.9726452851021804e-06, |
| "loss": 0.0107, |
| "step": 435 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 571.912127494812, |
| "epoch": 0.3048438520339569, |
| "grad_norm": 0.07019820809364319, |
| "kl": 0.01775836944580078, |
| "learning_rate": 4.971848151443718e-06, |
| "loss": 0.0087, |
| "num_tokens": 124344430.0, |
| "reward": 0.234049486432923, |
| "reward_std": 0.11882065865211189, |
| "rewards/pure_accuracy_reward_math": 0.2340494789823424, |
| "step": 436 |
| }, |
| { |
| "clip_ratio": 0.0009248623491657781, |
| "epoch": 0.3064827974749997, |
| "grad_norm": 0.07924344390630722, |
| "kl": 0.017708301544189453, |
| "learning_rate": 4.9710396352445175e-06, |
| "loss": 0.0086, |
| "step": 437 |
| }, |
| { |
| "clip_ratio": 0.0008092627095379612, |
| "epoch": 0.30812174291604244, |
| "grad_norm": 0.06455735862255096, |
| "kl": 0.01685619354248047, |
| "learning_rate": 4.970219740227693e-06, |
| "loss": 0.0082, |
| "step": 438 |
| }, |
| { |
| "clip_ratio": 0.0008741315057250176, |
| "epoch": 0.30976068835708526, |
| "grad_norm": 0.0737844780087471, |
| "kl": 0.016612529754638672, |
| "learning_rate": 4.969388470168754e-06, |
| "loss": 0.0078, |
| "step": 439 |
| }, |
| { |
| "clip_ratio": 0.0006731455727049251, |
| "epoch": 0.311399633798128, |
| "grad_norm": 0.061306241899728775, |
| "kl": 0.01703643798828125, |
| "learning_rate": 4.96854582889559e-06, |
| "loss": 0.0074, |
| "step": 440 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 560.4661660194397, |
| "epoch": 0.3130385792391708, |
| "grad_norm": 0.07420651614665985, |
| "kl": 0.018575191497802734, |
| "learning_rate": 4.967691820288457e-06, |
| "loss": 0.0089, |
| "num_tokens": 127553878.0, |
| "reward": 0.24218750634463504, |
| "reward_std": 0.1318487230455503, |
| "rewards/pure_accuracy_reward_math": 0.24218749982537702, |
| "step": 441 |
| }, |
| { |
| "clip_ratio": 0.0007318889370253601, |
| "epoch": 0.3146775246802136, |
| "grad_norm": 0.0815000906586647, |
| "kl": 0.018791675567626953, |
| "learning_rate": 4.9668264482799535e-06, |
| "loss": 0.0087, |
| "step": 442 |
| }, |
| { |
| "clip_ratio": 0.0007262505477001469, |
| "epoch": 0.31631647012125635, |
| "grad_norm": 0.06461174786090851, |
| "kl": 0.01784658432006836, |
| "learning_rate": 4.965949716855006e-06, |
| "loss": 0.0083, |
| "step": 443 |
| }, |
| { |
| "clip_ratio": 0.001082696728190058, |
| "epoch": 0.3179554155622991, |
| "grad_norm": 0.0798153281211853, |
| "kl": 0.017561912536621094, |
| "learning_rate": 4.965061630050848e-06, |
| "loss": 0.0079, |
| "step": 444 |
| }, |
| { |
| "clip_ratio": 0.0007142422628589884, |
| "epoch": 0.31959436100334193, |
| "grad_norm": 0.05629098415374756, |
| "kl": 0.018102645874023438, |
| "learning_rate": 4.9641621919570045e-06, |
| "loss": 0.0074, |
| "step": 445 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 573.8131718635559, |
| "epoch": 0.3212333064443847, |
| "grad_norm": 0.07845748215913773, |
| "kl": 0.019147872924804688, |
| "learning_rate": 4.963251406715272e-06, |
| "loss": 0.0121, |
| "num_tokens": 130808444.0, |
| "reward": 0.23372396570630372, |
| "reward_std": 0.1444127168506384, |
| "rewards/pure_accuracy_reward_math": 0.23372395883779973, |
| "step": 446 |
| }, |
| { |
| "clip_ratio": 0.0008133034913271331, |
| "epoch": 0.32287225188542745, |
| "grad_norm": 0.0820281058549881, |
| "kl": 0.018957138061523438, |
| "learning_rate": 4.9623292785197e-06, |
| "loss": 0.012, |
| "step": 447 |
| }, |
| { |
| "clip_ratio": 0.0009565782518166088, |
| "epoch": 0.32451119732647027, |
| "grad_norm": 0.06929846853017807, |
| "kl": 0.01794910430908203, |
| "learning_rate": 4.961395811616567e-06, |
| "loss": 0.0115, |
| "step": 448 |
| }, |
| { |
| "clip_ratio": 0.0012002166286038118, |
| "epoch": 0.32615014276751303, |
| "grad_norm": 0.08353662490844727, |
| "kl": 0.017772197723388672, |
| "learning_rate": 4.960451010304368e-06, |
| "loss": 0.0111, |
| "step": 449 |
| }, |
| { |
| "clip_ratio": 0.0008637306386845012, |
| "epoch": 0.3277890882085558, |
| "grad_norm": 0.059568535536527634, |
| "kl": 0.01876544952392578, |
| "learning_rate": 4.959494878933792e-06, |
| "loss": 0.0105, |
| "step": 450 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 562.3017749786377, |
| "epoch": 0.3294280336495986, |
| "grad_norm": 0.08181121200323105, |
| "kl": 0.020171165466308594, |
| "learning_rate": 4.958527421907697e-06, |
| "loss": 0.0075, |
| "num_tokens": 134024963.0, |
| "reward": 0.2363281317811925, |
| "reward_std": 0.12392827571602538, |
| "rewards/pure_accuracy_reward_math": 0.23632812607684173, |
| "step": 451 |
| }, |
| { |
| "clip_ratio": 0.0007134260189332053, |
| "epoch": 0.33106697909064137, |
| "grad_norm": 0.17417100071907043, |
| "kl": 0.019116878509521484, |
| "learning_rate": 4.957548643681102e-06, |
| "loss": 0.0076, |
| "step": 452 |
| }, |
| { |
| "clip_ratio": 0.000948693925124644, |
| "epoch": 0.3327059245316841, |
| "grad_norm": 10.566765785217285, |
| "kl": 0.22874164581298828, |
| "learning_rate": 4.95655854876115e-06, |
| "loss": 0.0154, |
| "step": 453 |
| }, |
| { |
| "clip_ratio": 0.001509593688069799, |
| "epoch": 0.33434486997272694, |
| "grad_norm": 0.37215539813041687, |
| "kl": 0.024587154388427734, |
| "learning_rate": 4.955557141707102e-06, |
| "loss": 0.0071, |
| "step": 454 |
| }, |
| { |
| "clip_ratio": 0.0016867685367287777, |
| "epoch": 0.3359838154137697, |
| "grad_norm": 0.11960741132497787, |
| "kl": 0.018782615661621094, |
| "learning_rate": 4.954544427130308e-06, |
| "loss": 0.0071, |
| "step": 455 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 559.3082876205444, |
| "epoch": 0.33762276085481246, |
| "grad_norm": 0.07540658116340637, |
| "kl": 0.018846988677978516, |
| "learning_rate": 4.953520409694186e-06, |
| "loss": 0.0064, |
| "num_tokens": 137230282.0, |
| "reward": 0.23795573617098853, |
| "reward_std": 0.12567996798316017, |
| "rewards/pure_accuracy_reward_math": 0.2379557301173918, |
| "step": 456 |
| }, |
| { |
| "clip_ratio": 0.0007552293965318313, |
| "epoch": 0.3392617062958553, |
| "grad_norm": 0.08643142879009247, |
| "kl": 0.019680500030517578, |
| "learning_rate": 4.9524850941142045e-06, |
| "loss": 0.0063, |
| "step": 457 |
| }, |
| { |
| "clip_ratio": 0.0008370072589514166, |
| "epoch": 0.34090065173689804, |
| "grad_norm": 0.07104479521512985, |
| "kl": 0.01837015151977539, |
| "learning_rate": 4.951438485157858e-06, |
| "loss": 0.0059, |
| "step": 458 |
| }, |
| { |
| "clip_ratio": 0.0012107024614920192, |
| "epoch": 0.3425395971779408, |
| "grad_norm": 0.07779641449451447, |
| "kl": 0.017088890075683594, |
| "learning_rate": 4.950380587644645e-06, |
| "loss": 0.0055, |
| "step": 459 |
| }, |
| { |
| "clip_ratio": 0.001106619913571194, |
| "epoch": 0.3441785426189836, |
| "grad_norm": 0.07811883091926575, |
| "kl": 0.017291545867919922, |
| "learning_rate": 4.949311406446047e-06, |
| "loss": 0.005, |
| "step": 460 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 571.5631704330444, |
| "epoch": 0.3458174880600264, |
| "grad_norm": 0.07503899931907654, |
| "kl": 0.018761634826660156, |
| "learning_rate": 4.948230946485504e-06, |
| "loss": 0.0099, |
| "num_tokens": 140468056.0, |
| "reward": 0.23144531933940016, |
| "reward_std": 0.1391576409805566, |
| "rewards/pure_accuracy_reward_math": 0.2314453127037268, |
| "step": 461 |
| }, |
| { |
| "clip_ratio": 0.0005579163533582232, |
| "epoch": 0.34745643350106914, |
| "grad_norm": 0.07867737859487534, |
| "kl": 0.019515037536621094, |
| "learning_rate": 4.947139212738395e-06, |
| "loss": 0.0097, |
| "step": 462 |
| }, |
| { |
| "clip_ratio": 0.0005302657258994259, |
| "epoch": 0.34909537894211196, |
| "grad_norm": 0.06822054833173752, |
| "kl": 0.018963336944580078, |
| "learning_rate": 4.946036210232013e-06, |
| "loss": 0.0093, |
| "step": 463 |
| }, |
| { |
| "clip_ratio": 0.0007419603928155993, |
| "epoch": 0.3507343243831547, |
| "grad_norm": 0.06452897191047668, |
| "kl": 0.017910480499267578, |
| "learning_rate": 4.9449219440455406e-06, |
| "loss": 0.0089, |
| "step": 464 |
| }, |
| { |
| "clip_ratio": 0.0009656345523580967, |
| "epoch": 0.3523732698241975, |
| "grad_norm": 0.06394355744123459, |
| "kl": 0.017592430114746094, |
| "learning_rate": 4.94379641931003e-06, |
| "loss": 0.0084, |
| "step": 465 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 550.8063335418701, |
| "epoch": 0.3540122152652403, |
| "grad_norm": 0.07220590114593506, |
| "kl": 0.019676685333251953, |
| "learning_rate": 4.9426596412083775e-06, |
| "loss": 0.0073, |
| "num_tokens": 143643633.0, |
| "reward": 0.2643229244858958, |
| "reward_std": 0.129740908567328, |
| "rewards/pure_accuracy_reward_math": 0.26432291738456115, |
| "step": 466 |
| }, |
| { |
| "clip_ratio": 0.0004138159448530132, |
| "epoch": 0.35565116070628305, |
| "grad_norm": 0.06854696571826935, |
| "kl": 0.019529342651367188, |
| "learning_rate": 4.9415116149752975e-06, |
| "loss": 0.0071, |
| "step": 467 |
| }, |
| { |
| "clip_ratio": 0.0006204222647170354, |
| "epoch": 0.3572901061473258, |
| "grad_norm": 0.0629592314362526, |
| "kl": 0.01886892318725586, |
| "learning_rate": 4.940352345897304e-06, |
| "loss": 0.0068, |
| "step": 468 |
| }, |
| { |
| "clip_ratio": 0.0008853772396264503, |
| "epoch": 0.35892905158836863, |
| "grad_norm": 0.07459286600351334, |
| "kl": 0.018596172332763672, |
| "learning_rate": 4.93918183931268e-06, |
| "loss": 0.0064, |
| "step": 469 |
| }, |
| { |
| "clip_ratio": 0.0006601580334404389, |
| "epoch": 0.3605679970294114, |
| "grad_norm": 0.06547638773918152, |
| "kl": 0.019172191619873047, |
| "learning_rate": 4.938000100611456e-06, |
| "loss": 0.0059, |
| "step": 470 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 563.5420064926147, |
| "epoch": 0.36220694247045415, |
| "grad_norm": 0.07862798869609833, |
| "kl": 0.019073963165283203, |
| "learning_rate": 4.936807135235389e-06, |
| "loss": 0.0082, |
| "num_tokens": 146856798.0, |
| "reward": 0.24414063114090823, |
| "reward_std": 0.13599591748788953, |
| "rewards/pure_accuracy_reward_math": 0.2441406262514647, |
| "step": 471 |
| }, |
| { |
| "clip_ratio": 0.000502235996748368, |
| "epoch": 0.36384588791149697, |
| "grad_norm": 0.07364361733198166, |
| "kl": 0.018817424774169922, |
| "learning_rate": 4.935602948677925e-06, |
| "loss": 0.008, |
| "step": 472 |
| }, |
| { |
| "clip_ratio": 0.0008022733071584298, |
| "epoch": 0.36548483335253973, |
| "grad_norm": 0.0682106539607048, |
| "kl": 0.018253803253173828, |
| "learning_rate": 4.934387546484192e-06, |
| "loss": 0.0076, |
| "step": 473 |
| }, |
| { |
| "clip_ratio": 0.0009958607009821208, |
| "epoch": 0.3671237787935825, |
| "grad_norm": 0.06967198103666306, |
| "kl": 0.01818084716796875, |
| "learning_rate": 4.933160934250957e-06, |
| "loss": 0.0072, |
| "step": 474 |
| }, |
| { |
| "clip_ratio": 0.0007515698841871199, |
| "epoch": 0.3687627242346253, |
| "grad_norm": 0.05721515789628029, |
| "kl": 0.018873214721679688, |
| "learning_rate": 4.931923117626611e-06, |
| "loss": 0.0067, |
| "step": 475 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 563.7535984516144, |
| "epoch": 0.37040166967566807, |
| "grad_norm": 0.07566659152507782, |
| "kl": 0.019544124603271484, |
| "learning_rate": 4.93067410231114e-06, |
| "loss": 0.0064, |
| "num_tokens": 150075101.0, |
| "reward": 0.25325521553168073, |
| "reward_std": 0.13043869246030226, |
| "rewards/pure_accuracy_reward_math": 0.25325520912883803, |
| "step": 476 |
| }, |
| { |
| "clip_ratio": 0.0004966380013229355, |
| "epoch": 0.3720406151167108, |
| "grad_norm": 0.0666096955537796, |
| "kl": 0.019169330596923828, |
| "learning_rate": 4.929413894056098e-06, |
| "loss": 0.0062, |
| "step": 477 |
| }, |
| { |
| "clip_ratio": 0.0010111166515116565, |
| "epoch": 0.37367956055775364, |
| "grad_norm": 0.07089894264936447, |
| "kl": 0.01856708526611328, |
| "learning_rate": 4.928142498664579e-06, |
| "loss": 0.0059, |
| "step": 478 |
| }, |
| { |
| "clip_ratio": 0.0009684021274551924, |
| "epoch": 0.3753185059987964, |
| "grad_norm": 0.07048792392015457, |
| "kl": 0.0184478759765625, |
| "learning_rate": 4.926859921991196e-06, |
| "loss": 0.0054, |
| "step": 479 |
| }, |
| { |
| "clip_ratio": 0.0007222936578727968, |
| "epoch": 0.37695745143983916, |
| "grad_norm": 0.0751122385263443, |
| "kl": 0.01910114288330078, |
| "learning_rate": 4.925566169942048e-06, |
| "loss": 0.0049, |
| "step": 480 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 565.988950252533, |
| "epoch": 0.378596396880882, |
| "grad_norm": 0.07866821438074112, |
| "kl": 0.019292354583740234, |
| "learning_rate": 4.924261248474696e-06, |
| "loss": 0.0077, |
| "num_tokens": 153300683.0, |
| "reward": 0.24186198544339277, |
| "reward_std": 0.14245959254913032, |
| "rewards/pure_accuracy_reward_math": 0.2418619791569654, |
| "step": 481 |
| }, |
| { |
| "clip_ratio": 0.0005219346817284531, |
| "epoch": 0.38023534232192474, |
| "grad_norm": 0.07202895730733871, |
| "kl": 0.018962383270263672, |
| "learning_rate": 4.922945163598134e-06, |
| "loss": 0.0074, |
| "step": 482 |
| }, |
| { |
| "clip_ratio": 0.0007680588737457583, |
| "epoch": 0.3818742877629675, |
| "grad_norm": 0.06937456876039505, |
| "kl": 0.018546104431152344, |
| "learning_rate": 4.921617921372764e-06, |
| "loss": 0.0071, |
| "step": 483 |
| }, |
| { |
| "clip_ratio": 0.0008267570608495589, |
| "epoch": 0.3835132332040103, |
| "grad_norm": 0.06490996479988098, |
| "kl": 0.018600940704345703, |
| "learning_rate": 4.920279527910361e-06, |
| "loss": 0.0066, |
| "step": 484 |
| }, |
| { |
| "clip_ratio": 0.0007928038041882246, |
| "epoch": 0.3851521786450531, |
| "grad_norm": 0.06093154847621918, |
| "kl": 0.019063949584960938, |
| "learning_rate": 4.918929989374057e-06, |
| "loss": 0.006, |
| "step": 485 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 559.4954574108124, |
| "epoch": 0.38679112408609584, |
| "grad_norm": 0.07598863542079926, |
| "kl": 0.020077228546142578, |
| "learning_rate": 4.917569311978301e-06, |
| "loss": 0.0076, |
| "num_tokens": 156505589.0, |
| "reward": 0.23372396451304667, |
| "reward_std": 0.12227730004815385, |
| "rewards/pure_accuracy_reward_math": 0.23372395892511122, |
| "step": 486 |
| }, |
| { |
| "clip_ratio": 0.0005095607535281488, |
| "epoch": 0.38843006952713865, |
| "grad_norm": 0.06875687837600708, |
| "kl": 0.019557952880859375, |
| "learning_rate": 4.916197501988836e-06, |
| "loss": 0.0073, |
| "step": 487 |
| }, |
| { |
| "clip_ratio": 0.0009137496642779297, |
| "epoch": 0.3900690149681814, |
| "grad_norm": 0.0720105841755867, |
| "kl": 0.018970012664794922, |
| "learning_rate": 4.914814565722671e-06, |
| "loss": 0.007, |
| "step": 488 |
| }, |
| { |
| "clip_ratio": 0.000769516089917488, |
| "epoch": 0.3917079604092242, |
| "grad_norm": 0.06368213146924973, |
| "kl": 0.019055843353271484, |
| "learning_rate": 4.913420509548047e-06, |
| "loss": 0.0065, |
| "step": 489 |
| }, |
| { |
| "clip_ratio": 0.000659781087506417, |
| "epoch": 0.393346905850267, |
| "grad_norm": 0.06218770891427994, |
| "kl": 0.019764423370361328, |
| "learning_rate": 4.912015339884412e-06, |
| "loss": 0.006, |
| "step": 490 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 563.5508005619049, |
| "epoch": 0.39498585129130975, |
| "grad_norm": 0.07790997624397278, |
| "kl": 0.019940853118896484, |
| "learning_rate": 4.910599063202391e-06, |
| "loss": 0.0037, |
| "num_tokens": 159721585.0, |
| "reward": 0.25651042349636555, |
| "reward_std": 0.14637307275552303, |
| "rewards/pure_accuracy_reward_math": 0.25651041651144624, |
| "step": 491 |
| }, |
| { |
| "clip_ratio": 0.00052815272999851, |
| "epoch": 0.3966247967323525, |
| "grad_norm": 0.06944292038679123, |
| "kl": 0.019421100616455078, |
| "learning_rate": 4.9091716860237545e-06, |
| "loss": 0.0035, |
| "step": 492 |
| }, |
| { |
| "clip_ratio": 0.0008616803660288497, |
| "epoch": 0.39826374217339533, |
| "grad_norm": 0.0701906755566597, |
| "kl": 0.018842220306396484, |
| "learning_rate": 4.907733214921391e-06, |
| "loss": 0.0031, |
| "step": 493 |
| }, |
| { |
| "clip_ratio": 0.0009441319247116553, |
| "epoch": 0.3999026876144381, |
| "grad_norm": 0.06845781207084656, |
| "kl": 0.018959999084472656, |
| "learning_rate": 4.906283656519271e-06, |
| "loss": 0.0026, |
| "step": 494 |
| }, |
| { |
| "clip_ratio": 0.0007721009840224724, |
| "epoch": 0.40154163305548085, |
| "grad_norm": 0.06344389170408249, |
| "kl": 0.019802570343017578, |
| "learning_rate": 4.904823017492425e-06, |
| "loss": 0.002, |
| "step": 495 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 564.9492375850677, |
| "epoch": 0.40318057849652367, |
| "grad_norm": 0.07819291949272156, |
| "kl": 0.019987106323242188, |
| "learning_rate": 4.903351304566907e-06, |
| "loss": 0.0087, |
| "num_tokens": 162944373.0, |
| "reward": 0.2613932383537758, |
| "reward_std": 0.14731903292704374, |
| "rewards/pure_accuracy_reward_math": 0.2613932291569654, |
| "step": 496 |
| }, |
| { |
| "clip_ratio": 0.0005454795893342634, |
| "epoch": 0.40481952393756643, |
| "grad_norm": 0.0714847669005394, |
| "kl": 0.019627094268798828, |
| "learning_rate": 4.9018685245197625e-06, |
| "loss": 0.0084, |
| "step": 497 |
| }, |
| { |
| "clip_ratio": 0.0009138085124504869, |
| "epoch": 0.4064584693786092, |
| "grad_norm": 0.07778745144605637, |
| "kl": 0.019023895263671875, |
| "learning_rate": 4.900374684179005e-06, |
| "loss": 0.008, |
| "step": 498 |
| }, |
| { |
| "clip_ratio": 0.0008562761545363173, |
| "epoch": 0.408097414819652, |
| "grad_norm": 0.07164430618286133, |
| "kl": 0.01940298080444336, |
| "learning_rate": 4.898869790423573e-06, |
| "loss": 0.0075, |
| "step": 499 |
| }, |
| { |
| "clip_ratio": 0.0008455659439050578, |
| "epoch": 0.40973636026069477, |
| "grad_norm": 0.07439333200454712, |
| "kl": 0.01979207992553711, |
| "learning_rate": 4.897353850183308e-06, |
| "loss": 0.007, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.40973636026069477, |
| "step": 500, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 6.6841, |
| "train_samples_per_second": 42069.596, |
| "train_steps_per_second": 54.756 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 366, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|