| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.07462338510330675, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 838.0089645385742, | |
| "epoch": 0.0001492467702066135, | |
| "grad_norm": 0.031563565135002136, | |
| "kl": 0.0, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.0, | |
| "num_tokens": 153987.0, | |
| "reward": 0.31626437886734493, | |
| "reward_std": 0.22882110241334885, | |
| "rewards/code_reward": 0.3127822265960276, | |
| "rewards/format_reward": 0.034821428707800806, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.000298493540413227, | |
| "grad_norm": 0.03154602274298668, | |
| "kl": 0.0, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0010276091998093762, | |
| "epoch": 0.00044774031061984047, | |
| "grad_norm": 0.030962640419602394, | |
| "kl": 0.00018510222434997559, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0002, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0010089212846651208, | |
| "completion_length": 782.9911155700684, | |
| "epoch": 0.000596987080826454, | |
| "grad_norm": 0.03505287691950798, | |
| "kl": 0.00018844008445739746, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": -0.0, | |
| "num_tokens": 310016.0, | |
| "reward": 0.38136595563264564, | |
| "reward_std": 0.2664957612287253, | |
| "rewards/code_reward": 0.3755623744800687, | |
| "rewards/format_reward": 0.05803571408614516, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0011241662487009307, | |
| "epoch": 0.0007462338510330675, | |
| "grad_norm": 0.03434380516409874, | |
| "kl": 0.00018966197967529297, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0001, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0009702896441012854, | |
| "epoch": 0.0008954806212396809, | |
| "grad_norm": 0.033386170864105225, | |
| "kl": 0.0001881122589111328, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": -0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0009430443981273129, | |
| "completion_length": 886.3482513427734, | |
| "epoch": 0.0010447273914462945, | |
| "grad_norm": 0.03664536774158478, | |
| "kl": 0.0001901388168334961, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 471944.0, | |
| "reward": 0.2568418255104916, | |
| "reward_std": 0.24231004383182153, | |
| "rewards/code_reward": 0.24987753484310815, | |
| "rewards/format_reward": 0.06964285799767822, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.001033175552947796, | |
| "epoch": 0.001193974161652908, | |
| "grad_norm": 0.032914530485868454, | |
| "kl": 0.0001964867115020752, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": -0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0009873282078842749, | |
| "epoch": 0.0013432209318595214, | |
| "grad_norm": 0.03524191677570343, | |
| "kl": 0.00020045042037963867, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0012342520167294424, | |
| "completion_length": 792.6607513427734, | |
| "epoch": 0.001492467702066135, | |
| "grad_norm": 0.03604082018136978, | |
| "kl": 0.00021690130233764648, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 632563.0, | |
| "reward": 0.3381975887459703, | |
| "reward_std": 0.2540945124346763, | |
| "rewards/code_reward": 0.33221544651314616, | |
| "rewards/format_reward": 0.05982143012806773, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0011053664220526116, | |
| "epoch": 0.0016417144722727485, | |
| "grad_norm": 0.0362468957901001, | |
| "kl": 0.00021219253540039062, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": -0.0001, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0011532589833223028, | |
| "epoch": 0.0017909612424793619, | |
| "grad_norm": 0.03517252579331398, | |
| "kl": 0.00022751092910766602, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0008506373051204719, | |
| "completion_length": 906.1696853637695, | |
| "epoch": 0.0019402080126859754, | |
| "grad_norm": 0.04052340239286423, | |
| "kl": 0.000179976224899292, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": -0.0, | |
| "num_tokens": 806035.0, | |
| "reward": 0.16382310001790756, | |
| "reward_std": 0.20915061386767775, | |
| "rewards/code_reward": 0.16087666610837914, | |
| "rewards/format_reward": 0.029464285646099597, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0009509306000836659, | |
| "epoch": 0.002089454782892589, | |
| "grad_norm": 0.032756347209215164, | |
| "kl": 0.0001971125602722168, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": -0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.001035670431519975, | |
| "epoch": 0.0022387015530992023, | |
| "grad_norm": 0.04692872241139412, | |
| "kl": 0.00018927454948425293, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0007350842770392774, | |
| "completion_length": 834.8929023742676, | |
| "epoch": 0.002387948323305816, | |
| "grad_norm": 0.031541600823402405, | |
| "kl": 0.0002028346061706543, | |
| "learning_rate": 4.999952797253148e-06, | |
| "loss": -0.0, | |
| "num_tokens": 962081.0, | |
| "reward": 0.3517821817804361, | |
| "reward_std": 0.17664615268586203, | |
| "rewards/code_reward": 0.3481214540079236, | |
| "rewards/format_reward": 0.03660714376019314, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0006742932200722862, | |
| "epoch": 0.0025371950935124295, | |
| "grad_norm": 0.030405128374695778, | |
| "kl": 0.00020629167556762695, | |
| "learning_rate": 4.9998111909931225e-06, | |
| "loss": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0006983668281463906, | |
| "epoch": 0.002686441863719043, | |
| "grad_norm": 0.03497137501835823, | |
| "kl": 0.0002194046974182129, | |
| "learning_rate": 4.999575187161439e-06, | |
| "loss": -0.0002, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0012954147914570058, | |
| "completion_length": 804.232177734375, | |
| "epoch": 0.0028356886339256566, | |
| "grad_norm": 0.037687718868255615, | |
| "kl": 0.00023552775382995605, | |
| "learning_rate": 4.9992447956603455e-06, | |
| "loss": 0.0, | |
| "num_tokens": 1125333.0, | |
| "reward": 0.30283497480559163, | |
| "reward_std": 0.29476781317498535, | |
| "rewards/code_reward": 0.2947099723969586, | |
| "rewards/format_reward": 0.08125000062864274, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0011280627804808319, | |
| "epoch": 0.00298493540413227, | |
| "grad_norm": 0.03715205565094948, | |
| "kl": 0.0002586245536804199, | |
| "learning_rate": 4.998820030352409e-06, | |
| "loss": -0.0001, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.001265888700800133, | |
| "epoch": 0.0031341821743388833, | |
| "grad_norm": 0.03643012046813965, | |
| "kl": 0.0002815723419189453, | |
| "learning_rate": 4.998300909059929e-06, | |
| "loss": -0.0001, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0012057735548296478, | |
| "completion_length": 837.4375419616699, | |
| "epoch": 0.003283428944545497, | |
| "grad_norm": 0.04015280678868294, | |
| "kl": 0.00030815601348876953, | |
| "learning_rate": 4.997687453564198e-06, | |
| "loss": -0.0, | |
| "num_tokens": 1279382.0, | |
| "reward": 0.29113965947180986, | |
| "reward_std": 0.2496117369737476, | |
| "rewards/code_reward": 0.2873003660934046, | |
| "rewards/format_reward": 0.03839285811409354, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.001202213983560796, | |
| "epoch": 0.0034326757147521104, | |
| "grad_norm": 0.03490012139081955, | |
| "kl": 0.00031620264053344727, | |
| "learning_rate": 4.9969796896045775e-06, | |
| "loss": -0.0001, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0011879785979544977, | |
| "epoch": 0.0035819224849587238, | |
| "grad_norm": 0.03548375889658928, | |
| "kl": 0.0003438591957092285, | |
| "learning_rate": 4.996177646877426e-06, | |
| "loss": -0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0008819098402454983, | |
| "completion_length": 791.4643211364746, | |
| "epoch": 0.0037311692551653375, | |
| "grad_norm": 0.037079837173223495, | |
| "kl": 0.00047838687896728516, | |
| "learning_rate": 4.995281359034851e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 1425573.0, | |
| "reward": 0.17420367311569862, | |
| "reward_std": 0.1679191511357203, | |
| "rewards/code_reward": 0.16581081453477964, | |
| "rewards/format_reward": 0.08392857149010524, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0008276363059849245, | |
| "epoch": 0.003880416025371951, | |
| "grad_norm": 0.03505370393395424, | |
| "kl": 0.0005742311477661133, | |
| "learning_rate": 4.994290863683296e-06, | |
| "loss": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0009231225321855163, | |
| "epoch": 0.004029662795578565, | |
| "grad_norm": 0.03283383697271347, | |
| "kl": 0.0006587505340576172, | |
| "learning_rate": 4.99320620238196e-06, | |
| "loss": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0013180975292925723, | |
| "completion_length": 812.6250381469727, | |
| "epoch": 0.004178909565785178, | |
| "grad_norm": 0.04038141295313835, | |
| "kl": 0.0008317828178405762, | |
| "learning_rate": 4.99202742064106e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 1579160.0, | |
| "reward": 0.4461335694650188, | |
| "reward_std": 0.2936822731862776, | |
| "rewards/code_reward": 0.4360442637989763, | |
| "rewards/format_reward": 0.10089285846333951, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.001309220588154858, | |
| "epoch": 0.004328156335991791, | |
| "grad_norm": 0.034344371408224106, | |
| "kl": 0.0011445283889770508, | |
| "learning_rate": 4.990754567919917e-06, | |
| "loss": -0.0001, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.00168330199085176, | |
| "epoch": 0.004477403106198405, | |
| "grad_norm": 0.037127457559108734, | |
| "kl": 0.0016373395919799805, | |
| "learning_rate": 4.989387697624881e-06, | |
| "loss": -0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0007538784138887422, | |
| "completion_length": 852.9196853637695, | |
| "epoch": 0.004626649876405018, | |
| "grad_norm": 0.03424752503633499, | |
| "kl": 0.0019218921661376953, | |
| "learning_rate": 4.987926867107095e-06, | |
| "loss": 0.0, | |
| "num_tokens": 1741264.0, | |
| "reward": 0.2446170602052007, | |
| "reward_std": 0.1605516797862947, | |
| "rewards/code_reward": 0.23408133629709482, | |
| "rewards/format_reward": 0.10535714169964194, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0009339907319372287, | |
| "epoch": 0.004775896646611632, | |
| "grad_norm": 0.03297942504286766, | |
| "kl": 0.002615690231323242, | |
| "learning_rate": 4.986372137660078e-06, | |
| "loss": 0.0001, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0010387174697825685, | |
| "epoch": 0.004925143416818246, | |
| "grad_norm": 0.03285828232765198, | |
| "kl": 0.0036895275115966797, | |
| "learning_rate": 4.984723574517165e-06, | |
| "loss": -0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0010631408213157556, | |
| "completion_length": 897.4286155700684, | |
| "epoch": 0.005074390187024859, | |
| "grad_norm": 0.03713543340563774, | |
| "kl": 0.0033243894577026367, | |
| "learning_rate": 4.9829812468487655e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 1908976.0, | |
| "reward": 0.3391252807341516, | |
| "reward_std": 0.29133173869922757, | |
| "rewards/code_reward": 0.32323241684935056, | |
| "rewards/format_reward": 0.15892857313156128, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.001270327098609414, | |
| "epoch": 0.005223636957231472, | |
| "grad_norm": 0.04065223038196564, | |
| "kl": 0.004559874534606934, | |
| "learning_rate": 4.981145227759457e-06, | |
| "loss": 0.0001, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0015276970298145898, | |
| "epoch": 0.005372883727438086, | |
| "grad_norm": 0.037328362464904785, | |
| "kl": 0.006010532379150391, | |
| "learning_rate": 4.979215594284924e-06, | |
| "loss": 0.0001, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0008515377248841105, | |
| "completion_length": 948.6161041259766, | |
| "epoch": 0.005522130497644699, | |
| "grad_norm": 0.0387113131582737, | |
| "kl": 0.004285603761672974, | |
| "learning_rate": 4.977192427388722e-06, | |
| "loss": 0.0, | |
| "num_tokens": 2095140.0, | |
| "reward": 0.34807550528785214, | |
| "reward_std": 0.2688880347413942, | |
| "rewards/code_reward": 0.33495048189070076, | |
| "rewards/format_reward": 0.1312500003259629, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.000987556606560247, | |
| "epoch": 0.005671377267851313, | |
| "grad_norm": 0.03865530341863632, | |
| "kl": 0.004810154438018799, | |
| "learning_rate": 4.9750758119588824e-06, | |
| "loss": 0.0002, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0009448816053918563, | |
| "epoch": 0.0058206240380579265, | |
| "grad_norm": 0.041891518980264664, | |
| "kl": 0.005879461765289307, | |
| "learning_rate": 4.972865836804349e-06, | |
| "loss": 0.0001, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0010522838042561489, | |
| "completion_length": 944.3303852081299, | |
| "epoch": 0.00596987080826454, | |
| "grad_norm": 0.039550624787807465, | |
| "kl": 0.008145570755004883, | |
| "learning_rate": 4.970562594651254e-06, | |
| "loss": -0.0, | |
| "num_tokens": 2274125.0, | |
| "reward": 0.3804548046900891, | |
| "reward_std": 0.19460752099985257, | |
| "rewards/code_reward": 0.36295478582178475, | |
| "rewards/format_reward": 0.17499999917345122, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0010110271978192031, | |
| "epoch": 0.006119117578471153, | |
| "grad_norm": 0.040130678564310074, | |
| "kl": 0.008675038814544678, | |
| "learning_rate": 4.968166182139026e-06, | |
| "loss": -0.0, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0008353690973308403, | |
| "epoch": 0.006268364348677767, | |
| "grad_norm": 0.04057696834206581, | |
| "kl": 0.009891927242279053, | |
| "learning_rate": 4.9656766998163306e-06, | |
| "loss": -0.0001, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0012194093724247068, | |
| "completion_length": 896.2678871154785, | |
| "epoch": 0.006417611118884381, | |
| "grad_norm": 0.038462087512016296, | |
| "kl": 0.009711623191833496, | |
| "learning_rate": 4.963094252136865e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 2438179.0, | |
| "reward": 0.37517865585687105, | |
| "reward_std": 0.25164606305770576, | |
| "rewards/code_reward": 0.3574107780586928, | |
| "rewards/format_reward": 0.1776785693364218, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0012110178904549684, | |
| "epoch": 0.006566857889090994, | |
| "grad_norm": 0.03634024038910866, | |
| "kl": 0.010460138320922852, | |
| "learning_rate": 4.960418947454958e-06, | |
| "loss": 0.0001, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.001134861959144473, | |
| "epoch": 0.0067161046592976075, | |
| "grad_norm": 0.04107284173369408, | |
| "kl": 0.01217663288116455, | |
| "learning_rate": 4.957650898021038e-06, | |
| "loss": 0.0002, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0010649148716765922, | |
| "completion_length": 788.4018230438232, | |
| "epoch": 0.006865351429504221, | |
| "grad_norm": 0.0408879816532135, | |
| "kl": 0.016703248023986816, | |
| "learning_rate": 4.954790219976915e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 2592497.0, | |
| "reward": 0.17120053875260055, | |
| "reward_std": 0.1869892734248424, | |
| "rewards/code_reward": 0.15093267243355513, | |
| "rewards/format_reward": 0.20267857098951936, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0011155054035043577, | |
| "epoch": 0.007014598199710834, | |
| "grad_norm": 0.05487113445997238, | |
| "kl": 0.018317341804504395, | |
| "learning_rate": 4.95183703335091e-06, | |
| "loss": 0.0002, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0010340367061871802, | |
| "epoch": 0.0071638449699174475, | |
| "grad_norm": 0.04240145906805992, | |
| "kl": 0.021048426628112793, | |
| "learning_rate": 4.948791462052819e-06, | |
| "loss": 0.0001, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0011539331317180768, | |
| "completion_length": 805.6339588165283, | |
| "epoch": 0.007313091740124062, | |
| "grad_norm": 0.0386325865983963, | |
| "kl": 0.017290830612182617, | |
| "learning_rate": 4.945653633868716e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 2741962.0, | |
| "reward": 0.4239194593974389, | |
| "reward_std": 0.24492623138939962, | |
| "rewards/code_reward": 0.40409802555222996, | |
| "rewards/format_reward": 0.1982142855413258, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0009078447328647599, | |
| "epoch": 0.007462338510330675, | |
| "grad_norm": 0.03812170773744583, | |
| "kl": 0.016597270965576172, | |
| "learning_rate": 4.942423680455584e-06, | |
| "loss": 0.0002, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0010736608801380498, | |
| "epoch": 0.007611585280537288, | |
| "grad_norm": 0.03711409866809845, | |
| "kl": 0.015314102172851562, | |
| "learning_rate": 4.939101737335802e-06, | |
| "loss": 0.0001, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0010047486684925389, | |
| "completion_length": 914.5714664459229, | |
| "epoch": 0.007760832050743902, | |
| "grad_norm": 0.038698915392160416, | |
| "kl": 0.015361785888671875, | |
| "learning_rate": 4.935687943891447e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 2909543.0, | |
| "reward": 0.43717449135147035, | |
| "reward_std": 0.15000206953845918, | |
| "rewards/code_reward": 0.41494232891272986, | |
| "rewards/format_reward": 0.22232142696157098, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0009450064771954203, | |
| "epoch": 0.007910078820950515, | |
| "grad_norm": 0.03570646047592163, | |
| "kl": 0.017791748046875, | |
| "learning_rate": 4.932182443358458e-06, | |
| "loss": 0.0002, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.000910184779058909, | |
| "epoch": 0.00805932559115713, | |
| "grad_norm": 0.03823816031217575, | |
| "kl": 0.0196990966796875, | |
| "learning_rate": 4.928585382820616e-06, | |
| "loss": 0.0001, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0012172754632047145, | |
| "completion_length": 815.2500438690186, | |
| "epoch": 0.008208572361363742, | |
| "grad_norm": 0.0404551737010479, | |
| "kl": 0.02109670639038086, | |
| "learning_rate": 4.924896913203376e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 3056830.0, | |
| "reward": 0.4185403302544728, | |
| "reward_std": 0.25729377527022734, | |
| "rewards/code_reward": 0.38871889375150204, | |
| "rewards/format_reward": 0.2982142888940871, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.001118028167184093, | |
| "epoch": 0.008357819131570356, | |
| "grad_norm": 0.041043564677238464, | |
| "kl": 0.021697044372558594, | |
| "learning_rate": 4.921117189267535e-06, | |
| "loss": 0.0003, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.000911217556677002, | |
| "epoch": 0.00850706590177697, | |
| "grad_norm": 0.040621694177389145, | |
| "kl": 0.02174830436706543, | |
| "learning_rate": 4.917246369602742e-06, | |
| "loss": 0.0002, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0009711457878438523, | |
| "completion_length": 783.9732513427734, | |
| "epoch": 0.008656312671983583, | |
| "grad_norm": 0.04503508657217026, | |
| "kl": 0.015195846557617188, | |
| "learning_rate": 4.9132846166208355e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 3209882.0, | |
| "reward": 0.2656380059197545, | |
| "reward_std": 0.2678962650243193, | |
| "rewards/code_reward": 0.23849514660832938, | |
| "rewards/format_reward": 0.27142857061699033, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0008419685891567497, | |
| "epoch": 0.008805559442190197, | |
| "grad_norm": 0.04335927218198776, | |
| "kl": 0.015669822692871094, | |
| "learning_rate": 4.9092320965490365e-06, | |
| "loss": 0.0001, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0008221774314733921, | |
| "epoch": 0.00895480621239681, | |
| "grad_norm": 0.04567364230751991, | |
| "kl": 0.016145706176757812, | |
| "learning_rate": 4.905088979422971e-06, | |
| "loss": 0.0001, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0007586929878016235, | |
| "completion_length": 986.6250419616699, | |
| "epoch": 0.009104052982603424, | |
| "grad_norm": 0.03794924542307854, | |
| "kl": 0.012533783912658691, | |
| "learning_rate": 4.900855439079536e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 3396775.0, | |
| "reward": 0.2646394136536401, | |
| "reward_std": 0.17509737799991854, | |
| "rewards/code_reward": 0.24338940205052495, | |
| "rewards/format_reward": 0.21250000200234354, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0008951123400038341, | |
| "epoch": 0.009253299752810036, | |
| "grad_norm": 0.0516357384622097, | |
| "kl": 0.013607978820800781, | |
| "learning_rate": 4.8965316531496055e-06, | |
| "loss": 0.0001, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.001083579034457216, | |
| "epoch": 0.00940254652301665, | |
| "grad_norm": 0.040371041744947433, | |
| "kl": 0.015568733215332031, | |
| "learning_rate": 4.892117803050578e-06, | |
| "loss": 0.0001, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0008599141901868279, | |
| "completion_length": 867.6964645385742, | |
| "epoch": 0.009551793293223265, | |
| "grad_norm": 0.044309817254543304, | |
| "kl": 0.023859024047851562, | |
| "learning_rate": 4.887614073978761e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 3552029.0, | |
| "reward": 0.28711679979460314, | |
| "reward_std": 0.2155274103861302, | |
| "rewards/code_reward": 0.25979537097737193, | |
| "rewards/format_reward": 0.27321428805589676, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.000977037544544146, | |
| "epoch": 0.009701040063429877, | |
| "grad_norm": 0.043455298990011215, | |
| "kl": 0.02324676513671875, | |
| "learning_rate": 4.883020654901609e-06, | |
| "loss": 0.0004, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0009009606010295101, | |
| "epoch": 0.009850286833636491, | |
| "grad_norm": 0.04464162513613701, | |
| "kl": 0.02041912078857422, | |
| "learning_rate": 4.878337738549785e-06, | |
| "loss": 0.0002, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0010641216322255787, | |
| "completion_length": 672.2232532501221, | |
| "epoch": 0.009999533603843104, | |
| "grad_norm": 0.0937013179063797, | |
| "kl": 0.019114971160888672, | |
| "learning_rate": 4.873565521409082e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 3686797.0, | |
| "reward": 0.5510467411950231, | |
| "reward_std": 0.13944192277267575, | |
| "rewards/code_reward": 0.5136360162869096, | |
| "rewards/format_reward": 0.37410714593715966, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.000909985425096238, | |
| "epoch": 0.010148780374049718, | |
| "grad_norm": 0.040670786052942276, | |
| "kl": 0.018751859664916992, | |
| "learning_rate": 4.868704203712173e-06, | |
| "loss": 0.0003, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0008242175144914654, | |
| "epoch": 0.010298027144256332, | |
| "grad_norm": 0.04120355844497681, | |
| "kl": 0.018866777420043945, | |
| "learning_rate": 4.86375398943021e-06, | |
| "loss": 0.0001, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0009311497433373006, | |
| "completion_length": 895.2143325805664, | |
| "epoch": 0.010447273914462945, | |
| "grad_norm": 0.0390135832130909, | |
| "kl": 0.01908421516418457, | |
| "learning_rate": 4.858715086264274e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 3850404.0, | |
| "reward": 0.26960674731526524, | |
| "reward_std": 0.17457441904116422, | |
| "rewards/code_reward": 0.23549959086813033, | |
| "rewards/format_reward": 0.3410714268684387, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0009219656994901015, | |
| "epoch": 0.010596520684669559, | |
| "grad_norm": 0.041038449853658676, | |
| "kl": 0.01819467544555664, | |
| "learning_rate": 4.853587705636646e-06, | |
| "loss": 0.0002, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0008795039802862448, | |
| "epoch": 0.010745767454876171, | |
| "grad_norm": 0.03821442276239395, | |
| "kl": 0.017704248428344727, | |
| "learning_rate": 4.84837206268195e-06, | |
| "loss": 0.0001, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0012245200068718987, | |
| "completion_length": 813.1250343322754, | |
| "epoch": 0.010895014225082785, | |
| "grad_norm": 0.03698420152068138, | |
| "kl": 0.014069795608520508, | |
| "learning_rate": 4.8430683762381195e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 4010767.0, | |
| "reward": 0.45059246465098113, | |
| "reward_std": 0.2168489265604876, | |
| "rewards/code_reward": 0.41996744694188237, | |
| "rewards/format_reward": 0.306249993853271, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0011484919050417375, | |
| "epoch": 0.011044260995289398, | |
| "grad_norm": 0.035879626870155334, | |
| "kl": 0.012859582901000977, | |
| "learning_rate": 4.837676868837213e-06, | |
| "loss": 0.0002, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0010865089680009987, | |
| "epoch": 0.011193507765496012, | |
| "grad_norm": 0.03598187118768692, | |
| "kl": 0.013086557388305664, | |
| "learning_rate": 4.832197766696085e-06, | |
| "loss": 0.0002, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0011177412579854717, | |
| "completion_length": 851.6250400543213, | |
| "epoch": 0.011342754535702626, | |
| "grad_norm": 0.04002436622977257, | |
| "kl": 0.027965068817138672, | |
| "learning_rate": 4.826631299706887e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 4166006.0, | |
| "reward": 0.25119215465383604, | |
| "reward_std": 0.19108223204966635, | |
| "rewards/code_reward": 0.2239600099856034, | |
| "rewards/format_reward": 0.27232143259607255, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0009378863978781737, | |
| "epoch": 0.011492001305909239, | |
| "grad_norm": 0.03956011310219765, | |
| "kl": 0.027362942695617676, | |
| "learning_rate": 4.820977701427424e-06, | |
| "loss": 0.0003, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0010317562919226475, | |
| "epoch": 0.011641248076115853, | |
| "grad_norm": 0.040139809250831604, | |
| "kl": 0.029706120491027832, | |
| "learning_rate": 4.81523720907136e-06, | |
| "loss": 0.0002, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0010767242492875084, | |
| "completion_length": 840.508955001831, | |
| "epoch": 0.011790494846322466, | |
| "grad_norm": 0.04374147206544876, | |
| "kl": 0.013130903244018555, | |
| "learning_rate": 4.809410063498254e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 4338571.0, | |
| "reward": 0.4022022826829925, | |
| "reward_std": 0.25021205260418355, | |
| "rewards/code_reward": 0.37372013460844755, | |
| "rewards/format_reward": 0.28482142509892583, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0010619452350510983, | |
| "epoch": 0.01193974161652908, | |
| "grad_norm": 0.042508602142333984, | |
| "kl": 0.013546943664550781, | |
| "learning_rate": 4.8034965092034656e-06, | |
| "loss": 0.0001, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0010448531356814783, | |
| "epoch": 0.012088988386735694, | |
| "grad_norm": 0.04649941623210907, | |
| "kl": 0.01339411735534668, | |
| "learning_rate": 4.797496794307889e-06, | |
| "loss": -0.0, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0010828481463249773, | |
| "completion_length": 921.9821815490723, | |
| "epoch": 0.012238235156942306, | |
| "grad_norm": 0.04039299488067627, | |
| "kl": 0.018650054931640625, | |
| "learning_rate": 4.791411170547545e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 4510139.0, | |
| "reward": 0.3364243805408478, | |
| "reward_std": 0.30479453783482313, | |
| "rewards/code_reward": 0.31017436436377466, | |
| "rewards/format_reward": 0.26250000204890966, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0008476070252072532, | |
| "epoch": 0.01238748192714892, | |
| "grad_norm": 0.040362074971199036, | |
| "kl": 0.01748943328857422, | |
| "learning_rate": 4.785239893263017e-06, | |
| "loss": 0.0001, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.001010585496260319, | |
| "epoch": 0.012536728697355533, | |
| "grad_norm": 0.039599236100912094, | |
| "kl": 0.016507625579833984, | |
| "learning_rate": 4.778983221388742e-06, | |
| "loss": 0.0001, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0010595632738841232, | |
| "completion_length": 799.1607475280762, | |
| "epoch": 0.012685975467562147, | |
| "grad_norm": 0.041213374584913254, | |
| "kl": 0.016191959381103516, | |
| "learning_rate": 4.77264141744214e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 4668714.0, | |
| "reward": 0.4136724097770639, | |
| "reward_std": 0.23826690285932273, | |
| "rewards/code_reward": 0.38706526932219276, | |
| "rewards/format_reward": 0.26607142575085163, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0008205887170333881, | |
| "epoch": 0.012835222237768762, | |
| "grad_norm": 0.04150375723838806, | |
| "kl": 0.015417337417602539, | |
| "learning_rate": 4.766214747512603e-06, | |
| "loss": 0.0002, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0008396551329497015, | |
| "epoch": 0.012984469007975374, | |
| "grad_norm": 0.03998123109340668, | |
| "kl": 0.01720881462097168, | |
| "learning_rate": 4.759703481250331e-06, | |
| "loss": 0.0001, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0010611350435283384, | |
| "completion_length": 836.7500419616699, | |
| "epoch": 0.013133715778181988, | |
| "grad_norm": 0.03714749217033386, | |
| "kl": 0.0200653076171875, | |
| "learning_rate": 4.753107891855015e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 4824940.0, | |
| "reward": 0.4083272517891601, | |
| "reward_std": 0.22058491280768067, | |
| "rewards/code_reward": 0.38091653838637285, | |
| "rewards/format_reward": 0.2741071404889226, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0009375711833854439, | |
| "epoch": 0.0132829625483886, | |
| "grad_norm": 0.0368531309068203, | |
| "kl": 0.019817352294921875, | |
| "learning_rate": 4.746428256064375e-06, | |
| "loss": 0.0001, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0010657701477612136, | |
| "epoch": 0.013432209318595215, | |
| "grad_norm": 0.047454558312892914, | |
| "kl": 0.02184915542602539, | |
| "learning_rate": 4.7396648541425534e-06, | |
| "loss": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0010593721226541675, | |
| "completion_length": 861.2053928375244, | |
| "epoch": 0.013581456088801827, | |
| "grad_norm": 0.03824659064412117, | |
| "kl": 0.015380144119262695, | |
| "learning_rate": 4.732817969868348e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 4993061.0, | |
| "reward": 0.4410945254203398, | |
| "reward_std": 0.21284229948651046, | |
| "rewards/code_reward": 0.40627308818511665, | |
| "rewards/format_reward": 0.34821428428404033, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0010110190596606117, | |
| "epoch": 0.013730702859008442, | |
| "grad_norm": 0.040716975927352905, | |
| "kl": 0.015566825866699219, | |
| "learning_rate": 4.7258878905233095e-06, | |
| "loss": 0.0003, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0010006381999119185, | |
| "epoch": 0.013879949629215056, | |
| "grad_norm": 0.03767255321145058, | |
| "kl": 0.015938758850097656, | |
| "learning_rate": 4.718874906879688e-06, | |
| "loss": 0.0002, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0009179275557471556, | |
| "completion_length": 751.1339683532715, | |
| "epoch": 0.014029196399421668, | |
| "grad_norm": 0.042217355221509933, | |
| "kl": 0.022792816162109375, | |
| "learning_rate": 4.711779313188231e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 5139257.0, | |
| "reward": 0.40494063566438854, | |
| "reward_std": 0.1955110931303352, | |
| "rewards/code_reward": 0.3761906255967915, | |
| "rewards/format_reward": 0.2875000007916242, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0008862965933076339, | |
| "epoch": 0.014178443169628283, | |
| "grad_norm": 0.04299935698509216, | |
| "kl": 0.0221099853515625, | |
| "learning_rate": 4.70460140716584e-06, | |
| "loss": 0.0003, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0009494263367741951, | |
| "epoch": 0.014327689939834895, | |
| "grad_norm": 0.04077346622943878, | |
| "kl": 0.021993637084960938, | |
| "learning_rate": 4.697341489983076e-06, | |
| "loss": 0.0002, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0008807632484604255, | |
| "completion_length": 809.6428909301758, | |
| "epoch": 0.01447693671004151, | |
| "grad_norm": 0.040974438190460205, | |
| "kl": 0.02227783203125, | |
| "learning_rate": 4.6899998662515215e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 5283564.0, | |
| "reward": 0.4401598667027429, | |
| "reward_std": 0.3118339345091954, | |
| "rewards/code_reward": 0.4056062815361656, | |
| "rewards/format_reward": 0.34553571231663227, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0010328342650609557, | |
| "epoch": 0.014626183480248123, | |
| "grad_norm": 0.04546511545777321, | |
| "kl": 0.021175384521484375, | |
| "learning_rate": 4.682576844011007e-06, | |
| "loss": 0.0002, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0010472134017618373, | |
| "epoch": 0.014775430250454736, | |
| "grad_norm": 0.04181186109781265, | |
| "kl": 0.021422386169433594, | |
| "learning_rate": 4.675072734716678e-06, | |
| "loss": 0.0004, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0009307468735642033, | |
| "completion_length": 739.9553871154785, | |
| "epoch": 0.01492467702066135, | |
| "grad_norm": 0.04855009913444519, | |
| "kl": 0.018518447875976562, | |
| "learning_rate": 4.667487853225931e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 5432463.0, | |
| "reward": 0.4610493981745094, | |
| "reward_std": 0.24288451514439657, | |
| "rewards/code_reward": 0.4116743914783001, | |
| "rewards/format_reward": 0.49375000409781933, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.001017617063553189, | |
| "epoch": 0.015073923790867963, | |
| "grad_norm": 0.0445452444255352, | |
| "kl": 0.020605087280273438, | |
| "learning_rate": 4.659822517785203e-06, | |
| "loss": 0.0003, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0009316829600720666, | |
| "epoch": 0.015223170561074577, | |
| "grad_norm": 0.04415549710392952, | |
| "kl": 0.022787094116210938, | |
| "learning_rate": 4.6520770500166165e-06, | |
| "loss": 0.0002, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0010779687218018807, | |
| "completion_length": 861.6339664459229, | |
| "epoch": 0.015372417331281191, | |
| "grad_norm": 0.03548543527722359, | |
| "kl": 0.02224445343017578, | |
| "learning_rate": 4.644251774904487e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 5597916.0, | |
| "reward": 0.12403731828089803, | |
| "reward_std": 0.16697289876174182, | |
| "rewards/code_reward": 0.09421588902478106, | |
| "rewards/format_reward": 0.2982142874971032, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0009617375380912563, | |
| "epoch": 0.015521664101487804, | |
| "grad_norm": 0.036081403493881226, | |
| "kl": 0.023712158203125, | |
| "learning_rate": 4.636347020781684e-06, | |
| "loss": 0.0003, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0011486706807772862, | |
| "epoch": 0.015670910871694418, | |
| "grad_norm": 0.03618353605270386, | |
| "kl": 0.024709224700927734, | |
| "learning_rate": 4.6283631193158605e-06, | |
| "loss": 0.0003, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0011008622332155937, | |
| "completion_length": 892.1518287658691, | |
| "epoch": 0.01582015764190103, | |
| "grad_norm": 0.04874338582158089, | |
| "kl": 0.011252880096435547, | |
| "learning_rate": 4.620300405495532e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 5773913.0, | |
| "reward": 0.32322888064663857, | |
| "reward_std": 0.19707379283499904, | |
| "rewards/code_reward": 0.28242531220894307, | |
| "rewards/format_reward": 0.4080357113853097, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0008924784815462772, | |
| "epoch": 0.015969404412107643, | |
| "grad_norm": 0.04063912108540535, | |
| "kl": 0.01187896728515625, | |
| "learning_rate": 4.612159217616022e-06, | |
| "loss": 0.0001, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0010843967938853893, | |
| "epoch": 0.01611865118231426, | |
| "grad_norm": 0.03972407057881355, | |
| "kl": 0.012783527374267578, | |
| "learning_rate": 4.603939897265268e-06, | |
| "loss": 0.0001, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0010632504654495278, | |
| "completion_length": 760.1875267028809, | |
| "epoch": 0.01626789795252087, | |
| "grad_norm": 0.04420414939522743, | |
| "kl": 0.03521156311035156, | |
| "learning_rate": 4.595642789309492e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 5917315.0, | |
| "reward": 0.5145110483281314, | |
| "reward_std": 0.2714524638140574, | |
| "rewards/code_reward": 0.4761181781068444, | |
| "rewards/format_reward": 0.3839285736903548, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0009606969933884102, | |
| "epoch": 0.016417144722727484, | |
| "grad_norm": 0.04337799921631813, | |
| "kl": 0.03320503234863281, | |
| "learning_rate": 4.587268241878724e-06, | |
| "loss": 0.0003, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0013132303793099709, | |
| "epoch": 0.0165663914929341, | |
| "grad_norm": 0.03975389152765274, | |
| "kl": 0.030458450317382812, | |
| "learning_rate": 4.578816606352205e-06, | |
| "loss": 0.0004, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0009267619698221097, | |
| "completion_length": 730.0000305175781, | |
| "epoch": 0.016715638263140712, | |
| "grad_norm": 0.045510776340961456, | |
| "kl": 0.015173912048339844, | |
| "learning_rate": 4.570288237343632e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 6063748.0, | |
| "reward": 0.5225313189439476, | |
| "reward_std": 0.31967434333637357, | |
| "rewards/code_reward": 0.48538846569135785, | |
| "rewards/format_reward": 0.3714285744354129, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.001110001046981779, | |
| "epoch": 0.016864885033347325, | |
| "grad_norm": 0.04822126775979996, | |
| "kl": 0.014050483703613281, | |
| "learning_rate": 4.561683492686289e-06, | |
| "loss": 0.0003, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0010535481942497427, | |
| "epoch": 0.01701413180355394, | |
| "grad_norm": 0.047740690410137177, | |
| "kl": 0.013932228088378906, | |
| "learning_rate": 4.5530027334180285e-06, | |
| "loss": 0.0001, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0007933495671750279, | |
| "completion_length": 842.017894744873, | |
| "epoch": 0.017163378573760553, | |
| "grad_norm": 0.049595024436712265, | |
| "kl": 0.019130706787109375, | |
| "learning_rate": 4.544246323766122e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 6223518.0, | |
| "reward": 0.40766190411522985, | |
| "reward_std": 0.2016809419146739, | |
| "rewards/code_reward": 0.36694761738181114, | |
| "rewards/format_reward": 0.4071428570896387, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0009076765018107835, | |
| "epoch": 0.017312625343967165, | |
| "grad_norm": 0.055327292531728745, | |
| "kl": 0.02070760726928711, | |
| "learning_rate": 4.535414631131983e-06, | |
| "loss": 0.0003, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0008779259478615131, | |
| "epoch": 0.017461872114173778, | |
| "grad_norm": 0.05579111725091934, | |
| "kl": 0.022034168243408203, | |
| "learning_rate": 4.526508026075746e-06, | |
| "loss": 0.0001, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0013046737203694647, | |
| "completion_length": 891.5893230438232, | |
| "epoch": 0.017611118884380394, | |
| "grad_norm": 0.044059909880161285, | |
| "kl": 0.02873706817626953, | |
| "learning_rate": 4.517526882300721e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 6382309.0, | |
| "reward": 0.3803525387775153, | |
| "reward_std": 0.21154743927763775, | |
| "rewards/code_reward": 0.33945966558530927, | |
| "rewards/format_reward": 0.40892857499420643, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0011895602456206689, | |
| "epoch": 0.017760365654587006, | |
| "grad_norm": 0.05387920141220093, | |
| "kl": 0.02957630157470703, | |
| "learning_rate": 4.508471576637713e-06, | |
| "loss": 0.0004, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0011620367440627888, | |
| "epoch": 0.01790961242479362, | |
| "grad_norm": 0.04833860695362091, | |
| "kl": 0.028537750244140625, | |
| "learning_rate": 4.499342489029211e-06, | |
| "loss": 0.0003, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0010427872803120408, | |
| "completion_length": 885.7411022186279, | |
| "epoch": 0.018058859195000235, | |
| "grad_norm": 0.04128396511077881, | |
| "kl": 0.01944446563720703, | |
| "learning_rate": 4.490140002513449e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 6544827.0, | |
| "reward": 0.37972693943447666, | |
| "reward_std": 0.23223779880208895, | |
| "rewards/code_reward": 0.3414233757648617, | |
| "rewards/format_reward": 0.3830357129336335, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0011303816172585357, | |
| "epoch": 0.018208105965206847, | |
| "grad_norm": 0.041573666036129, | |
| "kl": 0.02045726776123047, | |
| "learning_rate": 4.48086450320833e-06, | |
| "loss": 0.0002, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.001024394565320108, | |
| "epoch": 0.01835735273541346, | |
| "grad_norm": 0.05487101525068283, | |
| "kl": 0.020847320556640625, | |
| "learning_rate": 4.4715163802952266e-06, | |
| "loss": 0.0001, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0011558874903130345, | |
| "completion_length": 775.7053852081299, | |
| "epoch": 0.018506599505620072, | |
| "grad_norm": 0.04507587477564812, | |
| "kl": 0.030223846435546875, | |
| "learning_rate": 4.462096026002655e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 6686845.0, | |
| "reward": 0.4757464681752026, | |
| "reward_std": 0.18553807557327673, | |
| "rewards/code_reward": 0.433782160282135, | |
| "rewards/format_reward": 0.4196428628638387, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0010900183115154505, | |
| "epoch": 0.018655846275826688, | |
| "grad_norm": 0.042524147778749466, | |
| "kl": 0.033313751220703125, | |
| "learning_rate": 4.4526038355898144e-06, | |
| "loss": 0.0003, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0010025524697994115, | |
| "epoch": 0.0188050930460333, | |
| "grad_norm": 0.04530392214655876, | |
| "kl": 0.03578948974609375, | |
| "learning_rate": 4.4430402073300035e-06, | |
| "loss": 0.0003, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0009960982242773753, | |
| "completion_length": 835.267894744873, | |
| "epoch": 0.018954339816239913, | |
| "grad_norm": 0.04436438903212547, | |
| "kl": 0.025674819946289062, | |
| "learning_rate": 4.433405542493909e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 6840994.0, | |
| "reward": 0.5514912263024598, | |
| "reward_std": 0.24589035636745393, | |
| "rewards/code_reward": 0.5114019273314625, | |
| "rewards/format_reward": 0.40089286118745804, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0010956393562082667, | |
| "epoch": 0.01910358658644653, | |
| "grad_norm": 0.046535246074199677, | |
| "kl": 0.024074554443359375, | |
| "learning_rate": 4.4237002453327734e-06, | |
| "loss": 0.0003, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0011050422554035322, | |
| "epoch": 0.01925283335665314, | |
| "grad_norm": 0.04183583706617355, | |
| "kl": 0.023334503173828125, | |
| "learning_rate": 4.4139247230614245e-06, | |
| "loss": 0.0003, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0010861948831006885, | |
| "completion_length": 800.0625343322754, | |
| "epoch": 0.019402080126859754, | |
| "grad_norm": 0.04727044329047203, | |
| "kl": 0.019486427307128906, | |
| "learning_rate": 4.404079385841201e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 6992950.0, | |
| "reward": 0.4348937713075429, | |
| "reward_std": 0.21684974804520607, | |
| "rewards/code_reward": 0.3872152113035554, | |
| "rewards/format_reward": 0.4767857138067484, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0010590591746222344, | |
| "epoch": 0.019551326897066366, | |
| "grad_norm": 0.04723493382334709, | |
| "kl": 0.019405364990234375, | |
| "learning_rate": 4.394164646762734e-06, | |
| "loss": 0.0001, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0010405187804281013, | |
| "epoch": 0.019700573667272982, | |
| "grad_norm": 0.04620596021413803, | |
| "kl": 0.020595550537109375, | |
| "learning_rate": 4.384180921828618e-06, | |
| "loss": 0.0001, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0009282534629164729, | |
| "completion_length": 690.0268249511719, | |
| "epoch": 0.019849820437479595, | |
| "grad_norm": 0.048521529883146286, | |
| "kl": 0.022710800170898438, | |
| "learning_rate": 4.374128629935955e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 7130006.0, | |
| "reward": 0.45080209872685373, | |
| "reward_std": 0.2004464182537049, | |
| "rewards/code_reward": 0.40553424460813403, | |
| "rewards/format_reward": 0.45267857518047094, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0010145993473997805, | |
| "epoch": 0.019999067207686207, | |
| "grad_norm": 0.04121008515357971, | |
| "kl": 0.023035049438476562, | |
| "learning_rate": 4.364008192858781e-06, | |
| "loss": 0.0002, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0007326995892071864, | |
| "epoch": 0.020148313977892823, | |
| "grad_norm": 0.04002685472369194, | |
| "kl": 0.022815704345703125, | |
| "learning_rate": 4.353820035230366e-06, | |
| "loss": -0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0012416261888574809, | |
| "completion_length": 797.4107418060303, | |
| "epoch": 0.020297560748099436, | |
| "grad_norm": 0.042961277067661285, | |
| "kl": 0.0173187255859375, | |
| "learning_rate": 4.3435645845254e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 7274532.0, | |
| "reward": 0.4839099827222526, | |
| "reward_std": 0.2480696743587032, | |
| "rewards/code_reward": 0.4350706939585507, | |
| "rewards/format_reward": 0.48839285923168063, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0011591103339014808, | |
| "epoch": 0.020446807518306048, | |
| "grad_norm": 0.042345136404037476, | |
| "kl": 0.017218589782714844, | |
| "learning_rate": 4.333242271042054e-06, | |
| "loss": 0.0001, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0010988163867295953, | |
| "epoch": 0.020596054288512664, | |
| "grad_norm": 0.03951758146286011, | |
| "kl": 0.01706218719482422, | |
| "learning_rate": 4.32285352788393e-06, | |
| "loss": -0.0, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0011232689575990662, | |
| "completion_length": 855.330394744873, | |
| "epoch": 0.020745301058719277, | |
| "grad_norm": 0.0443883016705513, | |
| "kl": 0.024089813232421875, | |
| "learning_rate": 4.312398790941882e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 7437319.0, | |
| "reward": 0.4357127647381276, | |
| "reward_std": 0.2294543270545546, | |
| "rewards/code_reward": 0.3918734625913203, | |
| "rewards/format_reward": 0.4383928570896387, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.001039013011904899, | |
| "epoch": 0.02089454782892589, | |
| "grad_norm": 0.044901248067617416, | |
| "kl": 0.023738861083984375, | |
| "learning_rate": 4.301878498875735e-06, | |
| "loss": 0.0004, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0010252416741423076, | |
| "epoch": 0.0210437945991325, | |
| "grad_norm": 0.0455520935356617, | |
| "kl": 0.02365875244140625, | |
| "learning_rate": 4.291293093095873e-06, | |
| "loss": 0.0003, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.001033905820804648, | |
| "completion_length": 854.7143211364746, | |
| "epoch": 0.021193041369339118, | |
| "grad_norm": 0.04243411123752594, | |
| "kl": 0.03364276885986328, | |
| "learning_rate": 4.280643017744723e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 7613862.0, | |
| "reward": 0.2988048434490338, | |
| "reward_std": 0.20296698369202204, | |
| "rewards/code_reward": 0.25853697166166967, | |
| "rewards/format_reward": 0.40267857164144516, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0012581654991663527, | |
| "epoch": 0.02134228813954573, | |
| "grad_norm": 0.04118216782808304, | |
| "kl": 0.033367156982421875, | |
| "learning_rate": 4.269928719678117e-06, | |
| "loss": 0.0004, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.001068375633622054, | |
| "epoch": 0.021491534909752343, | |
| "grad_norm": 0.039931606501340866, | |
| "kl": 0.03148937225341797, | |
| "learning_rate": 4.2591506484465426e-06, | |
| "loss": 0.0002, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0013881738632335328, | |
| "completion_length": 809.4286079406738, | |
| "epoch": 0.02164078167995896, | |
| "grad_norm": 0.05351189151406288, | |
| "kl": 0.04827117919921875, | |
| "learning_rate": 4.248309256276283e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 7762821.0, | |
| "reward": 0.3853453987976536, | |
| "reward_std": 0.1850955591071397, | |
| "rewards/code_reward": 0.3422203725203872, | |
| "rewards/format_reward": 0.43125000037252903, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0014689411145809572, | |
| "epoch": 0.02179002845016557, | |
| "grad_norm": 0.05481509119272232, | |
| "kl": 0.04859161376953125, | |
| "learning_rate": 4.23740499805044e-06, | |
| "loss": 0.0006, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0015166042630880838, | |
| "epoch": 0.021939275220372183, | |
| "grad_norm": 0.047822095453739166, | |
| "kl": 0.039295196533203125, | |
| "learning_rate": 4.22643833128985e-06, | |
| "loss": 0.0005, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0014342169270094018, | |
| "completion_length": 730.4107513427734, | |
| "epoch": 0.022088521990578796, | |
| "grad_norm": 0.04696185141801834, | |
| "kl": 0.02483367919921875, | |
| "learning_rate": 4.215409716133885e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 7900536.0, | |
| "reward": 0.48524553480092436, | |
| "reward_std": 0.20289410289842635, | |
| "rewards/code_reward": 0.43283481095568277, | |
| "rewards/format_reward": 0.5241071395576, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0014147953188512474, | |
| "epoch": 0.022237768760785412, | |
| "grad_norm": 0.049281951040029526, | |
| "kl": 0.025539398193359375, | |
| "learning_rate": 4.204319615321151e-06, | |
| "loss": 0.0002, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.001617625761355157, | |
| "epoch": 0.022387015530992024, | |
| "grad_norm": 0.047861773520708084, | |
| "kl": 0.026336669921875, | |
| "learning_rate": 4.193168494170065e-06, | |
| "loss": 0.0003, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0011637477964541176, | |
| "completion_length": 847.5804023742676, | |
| "epoch": 0.022536262301198637, | |
| "grad_norm": 0.043306585401296616, | |
| "kl": 0.01751708984375, | |
| "learning_rate": 4.181956820559339e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 8062014.0, | |
| "reward": 0.4883292701561004, | |
| "reward_std": 0.24242727342061698, | |
| "rewards/code_reward": 0.4366328134201467, | |
| "rewards/format_reward": 0.5169642921537161, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0012195148992759641, | |
| "epoch": 0.022685509071405253, | |
| "grad_norm": 0.04269765689969063, | |
| "kl": 0.018365859985351562, | |
| "learning_rate": 4.170685064908342e-06, | |
| "loss": 0.0001, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0012396183246892178, | |
| "epoch": 0.022834755841611865, | |
| "grad_norm": 0.061775047332048416, | |
| "kl": 0.018674850463867188, | |
| "learning_rate": 4.159353700157365e-06, | |
| "loss": 0.0001, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0012902800481242593, | |
| "completion_length": 876.3750400543213, | |
| "epoch": 0.022984002611818478, | |
| "grad_norm": 0.04410018399357796, | |
| "kl": 0.02848052978515625, | |
| "learning_rate": 4.14796320174778e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 8221495.0, | |
| "reward": 0.5067510595545173, | |
| "reward_std": 0.1769608927424997, | |
| "rewards/code_reward": 0.45487602904904634, | |
| "rewards/format_reward": 0.5187500035390258, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0012712476654996863, | |
| "epoch": 0.023133249382025094, | |
| "grad_norm": 0.042361412197351456, | |
| "kl": 0.02883148193359375, | |
| "learning_rate": 4.136514047602087e-06, | |
| "loss": 0.0003, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0012503136294981232, | |
| "epoch": 0.023282496152231706, | |
| "grad_norm": 0.04951699078083038, | |
| "kl": 0.030870437622070312, | |
| "learning_rate": 4.1250067181038635e-06, | |
| "loss": 0.0003, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.001006045729809557, | |
| "completion_length": 770.8571758270264, | |
| "epoch": 0.02343174292243832, | |
| "grad_norm": 0.19089701771736145, | |
| "kl": 0.158905029296875, | |
| "learning_rate": 4.113441696077608e-06, | |
| "loss": 0.0015, | |
| "num_tokens": 8361654.0, | |
| "reward": 0.5336465365253389, | |
| "reward_std": 0.22312293585855514, | |
| "rewards/code_reward": 0.4696286738035269, | |
| "rewards/format_reward": 0.6401785779744387, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.000968805668890127, | |
| "epoch": 0.02358098969264493, | |
| "grad_norm": 0.07467727363109589, | |
| "kl": 0.08309364318847656, | |
| "learning_rate": 4.101819466768484e-06, | |
| "loss": 0.0009, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.001212779263369157, | |
| "epoch": 0.023730236462851547, | |
| "grad_norm": 0.10417937487363815, | |
| "kl": 0.049536705017089844, | |
| "learning_rate": 4.0901405178219535e-06, | |
| "loss": 0.0007, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0012088071471225703, | |
| "completion_length": 773.2321720123291, | |
| "epoch": 0.02387948323305816, | |
| "grad_norm": 0.0834612250328064, | |
| "kl": 0.07547187805175781, | |
| "learning_rate": 4.078405339263326e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 8509170.0, | |
| "reward": 0.5869864404667169, | |
| "reward_std": 0.19239688021480106, | |
| "rewards/code_reward": 0.5358257133048028, | |
| "rewards/format_reward": 0.511607144959271, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.001222252394654788, | |
| "epoch": 0.024028730003264772, | |
| "grad_norm": 0.11993829160928726, | |
| "kl": 0.057216644287109375, | |
| "learning_rate": 4.06661442347719e-06, | |
| "loss": 0.0005, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.001276199891435681, | |
| "epoch": 0.024177976773471388, | |
| "grad_norm": 0.05552421510219574, | |
| "kl": 0.04888153076171875, | |
| "learning_rate": 4.054768265186758e-06, | |
| "loss": 0.0005, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0010032035952463048, | |
| "completion_length": 790.5089588165283, | |
| "epoch": 0.024327223543678, | |
| "grad_norm": 0.1210794448852539, | |
| "kl": 0.03442573547363281, | |
| "learning_rate": 4.0428673614331036e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 8655443.0, | |
| "reward": 0.5846006067004055, | |
| "reward_std": 0.3078883128473535, | |
| "rewards/code_reward": 0.5278148755605798, | |
| "rewards/format_reward": 0.5678571437019855, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0010885789324674988, | |
| "epoch": 0.024476470313884613, | |
| "grad_norm": 0.04959575831890106, | |
| "kl": 0.032726287841796875, | |
| "learning_rate": 4.030912211554316e-06, | |
| "loss": 0.0003, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0011579382626223378, | |
| "epoch": 0.024625717084091225, | |
| "grad_norm": 0.0483749583363533, | |
| "kl": 0.030529022216796875, | |
| "learning_rate": 4.018903317164539e-06, | |
| "loss": 0.0003, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.001137841476520407, | |
| "completion_length": 954.4554138183594, | |
| "epoch": 0.02477496385429784, | |
| "grad_norm": 0.04242149367928505, | |
| "kl": 0.02735614776611328, | |
| "learning_rate": 4.006841182132932e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 8825811.0, | |
| "reward": 0.343663371168077, | |
| "reward_std": 0.21622509372537024, | |
| "rewards/code_reward": 0.2994669284671545, | |
| "rewards/format_reward": 0.4419642901048064, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.001333304300715099, | |
| "epoch": 0.024924210624504454, | |
| "grad_norm": 0.05318411439657211, | |
| "kl": 0.02469921112060547, | |
| "learning_rate": 3.9947263125625195e-06, | |
| "loss": 0.0004, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0014823580131633207, | |
| "epoch": 0.025073457394711066, | |
| "grad_norm": 0.04331374540925026, | |
| "kl": 0.023836135864257812, | |
| "learning_rate": 3.982559216768967e-06, | |
| "loss": 0.0003, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.001230707457580138, | |
| "completion_length": 1086.4554080963135, | |
| "epoch": 0.025222704164917682, | |
| "grad_norm": 0.043598975986242294, | |
| "kl": 0.014312744140625, | |
| "learning_rate": 3.970340405259245e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 9021743.0, | |
| "reward": 0.2815680430503562, | |
| "reward_std": 0.18689183273818344, | |
| "rewards/code_reward": 0.22210374718997627, | |
| "rewards/format_reward": 0.594642854295671, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0012951591161254328, | |
| "epoch": 0.025371950935124295, | |
| "grad_norm": 0.045120351016521454, | |
| "kl": 0.014659881591796875, | |
| "learning_rate": 3.958070390710214e-06, | |
| "loss": 0.0001, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0013625593310280237, | |
| "epoch": 0.025521197705330907, | |
| "grad_norm": 0.042519036680459976, | |
| "kl": 0.014459609985351562, | |
| "learning_rate": 3.945749687947109e-06, | |
| "loss": 0.0002, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.001266109573407448, | |
| "completion_length": 758.3928909301758, | |
| "epoch": 0.025670444475537523, | |
| "grad_norm": 0.04733174666762352, | |
| "kl": 0.027559280395507812, | |
| "learning_rate": 3.933378813921942e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 9178762.0, | |
| "reward": 0.5201502540148795, | |
| "reward_std": 0.27701563900336623, | |
| "rewards/code_reward": 0.4503288194537163, | |
| "rewards/format_reward": 0.6982142943888903, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0013183873506932287, | |
| "epoch": 0.025819691245744136, | |
| "grad_norm": 0.043405089527368546, | |
| "kl": 0.028142929077148438, | |
| "learning_rate": 3.920958287691811e-06, | |
| "loss": 0.0003, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0013744723346462706, | |
| "epoch": 0.025968938015950748, | |
| "grad_norm": 0.04960908368229866, | |
| "kl": 0.030193328857421875, | |
| "learning_rate": 3.908488630397121e-06, | |
| "loss": 0.0003, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0011061989516747417, | |
| "completion_length": 970.54469871521, | |
| "epoch": 0.02611818478615736, | |
| "grad_norm": 0.04485900327563286, | |
| "kl": 0.01617908477783203, | |
| "learning_rate": 3.8959703652397175e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 9359647.0, | |
| "reward": 0.34467554092407227, | |
| "reward_std": 0.18839130480773747, | |
| "rewards/code_reward": 0.2806576928123832, | |
| "rewards/format_reward": 0.6401785714551806, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.001184371045383159, | |
| "epoch": 0.026267431556363977, | |
| "grad_norm": 0.05172208696603775, | |
| "kl": 0.01747894287109375, | |
| "learning_rate": 3.883404017460935e-06, | |
| "loss": 0.0001, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0011093282719230046, | |
| "epoch": 0.02641667832657059, | |
| "grad_norm": 0.0427677184343338, | |
| "kl": 0.019744873046875, | |
| "learning_rate": 3.870790114319559e-06, | |
| "loss": 0.0, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.001037665282638045, | |
| "completion_length": 806.4732513427734, | |
| "epoch": 0.0265659250967772, | |
| "grad_norm": 0.04383108392357826, | |
| "kl": 0.03477668762207031, | |
| "learning_rate": 3.858129185069701e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 9501968.0, | |
| "reward": 0.5641923944931477, | |
| "reward_std": 0.20727904001250863, | |
| "rewards/code_reward": 0.4969602590499562, | |
| "rewards/format_reward": 0.6723214294761419, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0013952661065559369, | |
| "epoch": 0.026715171866983817, | |
| "grad_norm": 0.04694554954767227, | |
| "kl": 0.036670684814453125, | |
| "learning_rate": 3.845421760938597e-06, | |
| "loss": 0.0004, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0012856538778578397, | |
| "epoch": 0.02686441863719043, | |
| "grad_norm": 0.04348964989185333, | |
| "kl": 0.03646087646484375, | |
| "learning_rate": 3.832668375104312e-06, | |
| "loss": 0.0003, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0012388697778078495, | |
| "completion_length": 785.8928909301758, | |
| "epoch": 0.027013665407397042, | |
| "grad_norm": 0.04777670279145241, | |
| "kl": 0.034305572509765625, | |
| "learning_rate": 3.8198695626733725e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 9653849.0, | |
| "reward": 0.584011502796784, | |
| "reward_std": 0.22272017179057002, | |
| "rewards/code_reward": 0.49740435276180506, | |
| "rewards/format_reward": 0.8660714142024517, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0014184717001626268, | |
| "epoch": 0.027162912177603655, | |
| "grad_norm": 0.06207079812884331, | |
| "kl": 0.032253265380859375, | |
| "learning_rate": 3.8070258606583156e-06, | |
| "loss": 0.0003, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0012104590714443475, | |
| "epoch": 0.02731215894781027, | |
| "grad_norm": 0.047974348068237305, | |
| "kl": 0.03397369384765625, | |
| "learning_rate": 3.7941378079551544e-06, | |
| "loss": 0.0003, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0013559910148615018, | |
| "completion_length": 798.0179023742676, | |
| "epoch": 0.027461405718016883, | |
| "grad_norm": 0.0428900420665741, | |
| "kl": 0.028224945068359375, | |
| "learning_rate": 3.7812059453207677e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 9802783.0, | |
| "reward": 0.5101535907015204, | |
| "reward_std": 0.2436997564509511, | |
| "rewards/code_reward": 0.4328321506618522, | |
| "rewards/format_reward": 0.7732142838649452, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0014902301008987706, | |
| "epoch": 0.027610652488223496, | |
| "grad_norm": 0.04620678350329399, | |
| "kl": 0.0277099609375, | |
| "learning_rate": 3.768230815350213e-06, | |
| "loss": 0.0003, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0015550048210570822, | |
| "epoch": 0.02775989925843011, | |
| "grad_norm": 0.04612104594707489, | |
| "kl": 0.029872894287109375, | |
| "learning_rate": 3.7552129624539557e-06, | |
| "loss": 0.0003, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.00117877590128046, | |
| "completion_length": 909.267894744873, | |
| "epoch": 0.027909146028636724, | |
| "grad_norm": 0.047490544617176056, | |
| "kl": 0.035709381103515625, | |
| "learning_rate": 3.7421529328350316e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 9970498.0, | |
| "reward": 0.3525247387588024, | |
| "reward_std": 0.14782223524525762, | |
| "rewards/code_reward": 0.2900247216457501, | |
| "rewards/format_reward": 0.6250000037252903, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0012039170615025796, | |
| "epoch": 0.028058392798843337, | |
| "grad_norm": 0.04097956046462059, | |
| "kl": 0.037628173828125, | |
| "learning_rate": 3.7290512744661274e-06, | |
| "loss": 0.0004, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.00120592799885344, | |
| "epoch": 0.028207639569049953, | |
| "grad_norm": 0.04101557284593582, | |
| "kl": 0.03668975830078125, | |
| "learning_rate": 3.715908537066589e-06, | |
| "loss": 0.0003, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0012375264723232249, | |
| "completion_length": 972.2857627868652, | |
| "epoch": 0.028356886339256565, | |
| "grad_norm": 0.0460306815803051, | |
| "kl": 0.035683631896972656, | |
| "learning_rate": 3.7027252720793538e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 10154728.0, | |
| "reward": 0.4762301933951676, | |
| "reward_std": 0.2230017096735537, | |
| "rewards/code_reward": 0.4048016065498814, | |
| "rewards/format_reward": 0.7142857164144516, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0012021254078717902, | |
| "epoch": 0.028506133109463178, | |
| "grad_norm": 0.053284067660570145, | |
| "kl": 0.034501075744628906, | |
| "learning_rate": 3.689502032647817e-06, | |
| "loss": 0.0005, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0013434358115773648, | |
| "epoch": 0.02865537987966979, | |
| "grad_norm": 0.044003624469041824, | |
| "kl": 0.031785011291503906, | |
| "learning_rate": 3.6762393735926245e-06, | |
| "loss": 0.0004, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.001241399731952697, | |
| "completion_length": 853.8482551574707, | |
| "epoch": 0.028804626649876406, | |
| "grad_norm": 0.04587972164154053, | |
| "kl": 0.0348663330078125, | |
| "learning_rate": 3.6629378513883852e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 10319365.0, | |
| "reward": 0.4278743101749569, | |
| "reward_std": 0.16044311877340078, | |
| "rewards/code_reward": 0.3440350177406799, | |
| "rewards/format_reward": 0.8383928369730711, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0014909484089002945, | |
| "epoch": 0.02895387342008302, | |
| "grad_norm": 0.04408523812890053, | |
| "kl": 0.03650093078613281, | |
| "learning_rate": 3.6495980241403307e-06, | |
| "loss": 0.0004, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0011587896278797416, | |
| "epoch": 0.02910312019028963, | |
| "grad_norm": 0.044679079204797745, | |
| "kl": 0.037075042724609375, | |
| "learning_rate": 3.636220451560896e-06, | |
| "loss": 0.0003, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0008984732257886208, | |
| "completion_length": 807.0536079406738, | |
| "epoch": 0.029252366960496247, | |
| "grad_norm": 0.04803880304098129, | |
| "kl": 0.03685188293457031, | |
| "learning_rate": 3.622805694946235e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 10472860.0, | |
| "reward": 0.4805865343660116, | |
| "reward_std": 0.2713788175024092, | |
| "rewards/code_reward": 0.4047829551855102, | |
| "rewards/format_reward": 0.7580357203260064, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.000974786184997356, | |
| "epoch": 0.02940161373070286, | |
| "grad_norm": 0.0561777763068676, | |
| "kl": 0.0364837646484375, | |
| "learning_rate": 3.609354317152667e-06, | |
| "loss": 0.0003, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0010943580300590838, | |
| "epoch": 0.029550860500909472, | |
| "grad_norm": 0.04243557155132294, | |
| "kl": 0.03566169738769531, | |
| "learning_rate": 3.595866882573063e-06, | |
| "loss": 0.0003, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.001358222019916866, | |
| "completion_length": 625.1071681976318, | |
| "epoch": 0.029700107271116084, | |
| "grad_norm": 0.060445670038461685, | |
| "kl": 0.04364013671875, | |
| "learning_rate": 3.5823439571131675e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 10594252.0, | |
| "reward": 0.5816141716204584, | |
| "reward_std": 0.24522001342847943, | |
| "rewards/code_reward": 0.4959891536273062, | |
| "rewards/format_reward": 0.8562499936670065, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0011922506128030363, | |
| "epoch": 0.0298493540413227, | |
| "grad_norm": 0.09876689314842224, | |
| "kl": 0.04461669921875, | |
| "learning_rate": 3.5687861081678477e-06, | |
| "loss": 0.0004, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0014354662271216512, | |
| "epoch": 0.029998600811529313, | |
| "grad_norm": 0.0558474026620388, | |
| "kl": 0.0430145263671875, | |
| "learning_rate": 3.555193904597291e-06, | |
| "loss": 0.0004, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.0016133483604789944, | |
| "completion_length": 778.0803890228271, | |
| "epoch": 0.030147847581735925, | |
| "grad_norm": 0.0491982102394104, | |
| "kl": 0.038715362548828125, | |
| "learning_rate": 3.541567916703138e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 10747848.0, | |
| "reward": 0.4137029910925776, | |
| "reward_std": 0.23209618078544736, | |
| "rewards/code_reward": 0.33709583431482315, | |
| "rewards/format_reward": 0.7660714257508516, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.0014115704216237646, | |
| "epoch": 0.03029709435194254, | |
| "grad_norm": 0.04640298709273338, | |
| "kl": 0.039661407470703125, | |
| "learning_rate": 3.5279087162045517e-06, | |
| "loss": 0.0005, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.001271567817639152, | |
| "epoch": 0.030446341122149154, | |
| "grad_norm": 0.044124212116003036, | |
| "kl": 0.042690277099609375, | |
| "learning_rate": 3.5142168762142265e-06, | |
| "loss": 0.0003, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.0009439818841201486, | |
| "completion_length": 891.4286041259766, | |
| "epoch": 0.030595587892355766, | |
| "grad_norm": 0.04894348606467247, | |
| "kl": 0.032591819763183594, | |
| "learning_rate": 3.500492971214347e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 10915987.0, | |
| "reward": 0.5286302748136222, | |
| "reward_std": 0.26153607084415853, | |
| "rewards/code_reward": 0.44514812077977695, | |
| "rewards/format_reward": 0.8348214142024517, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0010812356740643736, | |
| "epoch": 0.030744834662562382, | |
| "grad_norm": 0.04213985055685043, | |
| "kl": 0.033928871154785156, | |
| "learning_rate": 3.48673757703248e-06, | |
| "loss": 0.0004, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.0011472811147541506, | |
| "epoch": 0.030894081432768995, | |
| "grad_norm": 0.05198606848716736, | |
| "kl": 0.03620433807373047, | |
| "learning_rate": 3.472951270817418e-06, | |
| "loss": 0.0003, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.001109095293941209, | |
| "completion_length": 705.3571720123291, | |
| "epoch": 0.031043328202975607, | |
| "grad_norm": 0.04343515262007713, | |
| "kl": 0.0405426025390625, | |
| "learning_rate": 3.4591346310149578e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 11062166.0, | |
| "reward": 0.634864809922874, | |
| "reward_std": 0.21623162645846605, | |
| "rewards/code_reward": 0.5361148179508746, | |
| "rewards/format_reward": 0.9874999988824129, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0008893829726730473, | |
| "epoch": 0.03119257497318222, | |
| "grad_norm": 0.04344526305794716, | |
| "kl": 0.039653778076171875, | |
| "learning_rate": 3.445288237343632e-06, | |
| "loss": 0.0003, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.0009056666758624488, | |
| "epoch": 0.031341821743388835, | |
| "grad_norm": 0.04229241609573364, | |
| "kl": 0.038776397705078125, | |
| "learning_rate": 3.4314126707703895e-06, | |
| "loss": 0.0002, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0015824516594875604, | |
| "completion_length": 766.2411098480225, | |
| "epoch": 0.03149106851359545, | |
| "grad_norm": 0.04466083645820618, | |
| "kl": 0.044189453125, | |
| "learning_rate": 3.4175085134862128e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 11210110.0, | |
| "reward": 0.6619070116430521, | |
| "reward_std": 0.2607162531930953, | |
| "rewards/code_reward": 0.5710141453891993, | |
| "rewards/format_reward": 0.9089285656809807, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.0015846481364860665, | |
| "epoch": 0.03164031528380206, | |
| "grad_norm": 0.044882841408252716, | |
| "kl": 0.0458831787109375, | |
| "learning_rate": 3.4035763488816953e-06, | |
| "loss": 0.0006, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.0014327153403428383, | |
| "epoch": 0.03178956205400867, | |
| "grad_norm": 0.0454876646399498, | |
| "kl": 0.04427337646484375, | |
| "learning_rate": 3.3896167615225594e-06, | |
| "loss": 0.0005, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.0010587101842247648, | |
| "completion_length": 918.2232494354248, | |
| "epoch": 0.031938808824215285, | |
| "grad_norm": 0.05162033438682556, | |
| "kl": 0.045897483825683594, | |
| "learning_rate": 3.375630337125133e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 11378128.0, | |
| "reward": 0.5001084273681045, | |
| "reward_std": 0.2754337468650192, | |
| "rewards/code_reward": 0.4088584103010362, | |
| "rewards/format_reward": 0.9124999912455678, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.0010744722167146392, | |
| "epoch": 0.032088055594421905, | |
| "grad_norm": 0.04861937463283539, | |
| "kl": 0.042781829833984375, | |
| "learning_rate": 3.361617662531772e-06, | |
| "loss": 0.0003, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0014411782067327294, | |
| "epoch": 0.03223730236462852, | |
| "grad_norm": 0.04841092228889465, | |
| "kl": 0.04188394546508789, | |
| "learning_rate": 3.347579325686237e-06, | |
| "loss": 0.0002, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.001376153410092229, | |
| "completion_length": 905.0268306732178, | |
| "epoch": 0.03238654913483513, | |
| "grad_norm": 0.04793982580304146, | |
| "kl": 0.03996849060058594, | |
| "learning_rate": 3.333515915609027e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 11546320.0, | |
| "reward": 0.3640421375166625, | |
| "reward_std": 0.0876690277364105, | |
| "rewards/code_reward": 0.27386354363989085, | |
| "rewards/format_reward": 0.901785722002387, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.001170761366665829, | |
| "epoch": 0.03253579590504174, | |
| "grad_norm": 0.04501071572303772, | |
| "kl": 0.04147529602050781, | |
| "learning_rate": 3.3194280223726616e-06, | |
| "loss": 0.0006, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0011851877379740472, | |
| "epoch": 0.032685042675248355, | |
| "grad_norm": 0.04706224054098129, | |
| "kl": 0.041217803955078125, | |
| "learning_rate": 3.305316237076927e-06, | |
| "loss": 0.0004, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.0012976112229807768, | |
| "completion_length": 814.6964626312256, | |
| "epoch": 0.03283428944545497, | |
| "grad_norm": 0.041682858020067215, | |
| "kl": 0.037784576416015625, | |
| "learning_rate": 3.291181151824071e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 11696695.0, | |
| "reward": 0.5471810144372284, | |
| "reward_std": 0.14302252768538892, | |
| "rewards/code_reward": 0.45718098066572566, | |
| "rewards/format_reward": 0.8999999836087227, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0012087585309927817, | |
| "epoch": 0.03298353621566158, | |
| "grad_norm": 0.042474351823329926, | |
| "kl": 0.04036712646484375, | |
| "learning_rate": 3.27702335969396e-06, | |
| "loss": 0.0005, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio": 0.0015525895069004036, | |
| "epoch": 0.0331327829858682, | |
| "grad_norm": 0.041807934641838074, | |
| "kl": 0.040065765380859375, | |
| "learning_rate": 3.2628434547191985e-06, | |
| "loss": 0.0005, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio": 0.0013409430903266184, | |
| "completion_length": 868.7321815490723, | |
| "epoch": 0.03328202975607481, | |
| "grad_norm": 0.047505587339401245, | |
| "kl": 0.036075592041015625, | |
| "learning_rate": 3.2486420318601973e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 11861025.0, | |
| "reward": 0.3259413610212505, | |
| "reward_std": 0.23739346489310265, | |
| "rewards/code_reward": 0.23451278542052023, | |
| "rewards/format_reward": 0.9142857156693935, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio": 0.00125404930440709, | |
| "epoch": 0.033431276526281424, | |
| "grad_norm": 0.05282680317759514, | |
| "kl": 0.03722381591796875, | |
| "learning_rate": 3.2344196869802187e-06, | |
| "loss": 0.0005, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio": 0.0013416263791441452, | |
| "epoch": 0.033580523296488037, | |
| "grad_norm": 0.0429045669734478, | |
| "kl": 0.04016876220703125, | |
| "learning_rate": 3.2201770168203694e-06, | |
| "loss": 0.0005, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0010339052696508588, | |
| "completion_length": 867.5000495910645, | |
| "epoch": 0.03372977006669465, | |
| "grad_norm": 0.04467302933335304, | |
| "kl": 0.04944610595703125, | |
| "learning_rate": 3.205914618974563e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 12023194.0, | |
| "reward": 0.30351110571064055, | |
| "reward_std": 0.16383928316645324, | |
| "rewards/code_reward": 0.20235038634564262, | |
| "rewards/format_reward": 1.0116071458905935, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio": 0.0010862592589546693, | |
| "epoch": 0.03387901683690126, | |
| "grad_norm": 0.043438930064439774, | |
| "kl": 0.04763984680175781, | |
| "learning_rate": 3.1916330918644496e-06, | |
| "loss": 0.0005, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio": 0.0011292613503428583, | |
| "epoch": 0.03402826360710788, | |
| "grad_norm": 0.04707575961947441, | |
| "kl": 0.047130584716796875, | |
| "learning_rate": 3.177333034714303e-06, | |
| "loss": 0.0004, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio": 0.0009916176568367518, | |
| "completion_length": 861.1518287658691, | |
| "epoch": 0.03417751037731449, | |
| "grad_norm": 0.11236114799976349, | |
| "kl": 0.043548583984375, | |
| "learning_rate": 3.1630150475258813e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 12187333.0, | |
| "reward": 0.6236218712292612, | |
| "reward_std": 0.2065372944343835, | |
| "rewards/code_reward": 0.5138004226610065, | |
| "rewards/format_reward": 1.0982142873108387, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio": 0.0009899907190629165, | |
| "epoch": 0.034326757147521106, | |
| "grad_norm": 0.07119898498058319, | |
| "kl": 0.043308258056640625, | |
| "learning_rate": 3.148679731053252e-06, | |
| "loss": 0.0005, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0009169172490146593, | |
| "epoch": 0.03447600391772772, | |
| "grad_norm": 0.045384377241134644, | |
| "kl": 0.044551849365234375, | |
| "learning_rate": 3.1343276867775805e-06, | |
| "loss": 0.0003, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio": 0.001247574469743995, | |
| "completion_length": 836.8482475280762, | |
| "epoch": 0.03462525068793433, | |
| "grad_norm": 0.045782655477523804, | |
| "kl": 0.051326751708984375, | |
| "learning_rate": 3.1199595168819043e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 12342541.0, | |
| "reward": 0.5260538989678025, | |
| "reward_std": 0.2387256936635822, | |
| "rewards/code_reward": 0.4242681765317684, | |
| "rewards/format_reward": 1.0178571455180645, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio": 0.0010094128338096198, | |
| "epoch": 0.03477449745814094, | |
| "grad_norm": 0.04860462620854378, | |
| "kl": 0.051258087158203125, | |
| "learning_rate": 3.105575824225852e-06, | |
| "loss": 0.0004, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio": 0.001277090563235106, | |
| "epoch": 0.034923744228347556, | |
| "grad_norm": 0.04626248776912689, | |
| "kl": 0.052829742431640625, | |
| "learning_rate": 3.091177212320363e-06, | |
| "loss": 0.0004, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio": 0.0012918142674607225, | |
| "completion_length": 863.321455001831, | |
| "epoch": 0.035072990998554175, | |
| "grad_norm": 0.060307588428258896, | |
| "kl": 0.0425262451171875, | |
| "learning_rate": 3.0767642853023538e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 12496871.0, | |
| "reward": 0.5240239482372999, | |
| "reward_std": 0.1900554287713021, | |
| "rewards/code_reward": 0.4305418040603399, | |
| "rewards/format_reward": 0.9348214156925678, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.0013760660076513886, | |
| "epoch": 0.03522223776876079, | |
| "grad_norm": 0.04616798833012581, | |
| "kl": 0.040264129638671875, | |
| "learning_rate": 3.062337647909376e-06, | |
| "loss": 0.0003, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio": 0.001137408380600391, | |
| "epoch": 0.0353714845389674, | |
| "grad_norm": 0.04546603932976723, | |
| "kl": 0.040740966796875, | |
| "learning_rate": 3.04789790545424e-06, | |
| "loss": 0.0003, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio": 0.001182597396109486, | |
| "completion_length": 875.6161231994629, | |
| "epoch": 0.03552073130917401, | |
| "grad_norm": 0.04508831351995468, | |
| "kl": 0.0308074951171875, | |
| "learning_rate": 3.033445663799621e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 12661181.0, | |
| "reward": 0.5413230061531067, | |
| "reward_std": 0.23713362775743008, | |
| "rewards/code_reward": 0.44221584234037437, | |
| "rewards/format_reward": 0.9910714235156775, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio": 0.001146262224210659, | |
| "epoch": 0.035669978079380625, | |
| "grad_norm": 0.04483221843838692, | |
| "kl": 0.031106948852539062, | |
| "learning_rate": 3.018981529332633e-06, | |
| "loss": 0.0004, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio": 0.0010552481926424662, | |
| "epoch": 0.03581922484958724, | |
| "grad_norm": 0.04423677921295166, | |
| "kl": 0.030683517456054688, | |
| "learning_rate": 3.00450610893939e-06, | |
| "loss": 0.0003, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.0013090946631564293, | |
| "completion_length": 792.5536079406738, | |
| "epoch": 0.03596847161979385, | |
| "grad_norm": 0.047966547310352325, | |
| "kl": 0.03855133056640625, | |
| "learning_rate": 2.9900200099795396e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 12813969.0, | |
| "reward": 0.5236398167908192, | |
| "reward_std": 0.19648715062066913, | |
| "rewards/code_reward": 0.42238980112597346, | |
| "rewards/format_reward": 1.0124999918043613, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio": 0.0011606769148784224, | |
| "epoch": 0.03611771839000047, | |
| "grad_norm": 0.050562020391225815, | |
| "kl": 0.038921356201171875, | |
| "learning_rate": 2.9755238402607826e-06, | |
| "loss": 0.0005, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio": 0.0010873440842260607, | |
| "epoch": 0.03626696516020708, | |
| "grad_norm": 0.046209290623664856, | |
| "kl": 0.03893280029296875, | |
| "learning_rate": 2.961018208013367e-06, | |
| "loss": 0.0003, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio": 0.0011430384001869243, | |
| "completion_length": 645.9375267028809, | |
| "epoch": 0.036416211930413694, | |
| "grad_norm": 0.04914650693535805, | |
| "kl": 0.046295166015625, | |
| "learning_rate": 2.9465037218645694e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 12937253.0, | |
| "reward": 0.6496362583711743, | |
| "reward_std": 0.18762771505862474, | |
| "rewards/code_reward": 0.5519576612859964, | |
| "rewards/format_reward": 0.9767857156693935, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio": 0.0009474948765273439, | |
| "epoch": 0.03656545870062031, | |
| "grad_norm": 0.050520021468400955, | |
| "kl": 0.0462493896484375, | |
| "learning_rate": 2.9319809908131604e-06, | |
| "loss": 0.0003, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.0009283031004088116, | |
| "epoch": 0.03671470547082692, | |
| "grad_norm": 0.04778316617012024, | |
| "kl": 0.04526519775390625, | |
| "learning_rate": 2.917450624203847e-06, | |
| "loss": 0.0002, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio": 0.0011882597391377203, | |
| "completion_length": 697.1339626312256, | |
| "epoch": 0.03686395224103353, | |
| "grad_norm": 0.04777618125081062, | |
| "kl": 0.041156768798828125, | |
| "learning_rate": 2.9029132317017118e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 13075518.0, | |
| "reward": 0.6029732944443822, | |
| "reward_std": 0.20219829003326595, | |
| "rewards/code_reward": 0.5043125713709742, | |
| "rewards/format_reward": 0.9866071343421936, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio": 0.0011061100340157282, | |
| "epoch": 0.037013199011240144, | |
| "grad_norm": 0.046102263033390045, | |
| "kl": 0.042461395263671875, | |
| "learning_rate": 2.888369423266629e-06, | |
| "loss": 0.0003, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio": 0.0012012151892122347, | |
| "epoch": 0.037162445781446764, | |
| "grad_norm": 0.049948278814554214, | |
| "kl": 0.041042327880859375, | |
| "learning_rate": 2.8738198091276712e-06, | |
| "loss": 0.0003, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio": 0.0011589178429858293, | |
| "completion_length": 880.8750400543213, | |
| "epoch": 0.037311692551653376, | |
| "grad_norm": 0.046810079365968704, | |
| "kl": 0.0442352294921875, | |
| "learning_rate": 2.859264999757509e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 13245436.0, | |
| "reward": 0.47556353313848376, | |
| "reward_std": 0.2117939020972699, | |
| "rewards/code_reward": 0.3802063832990825, | |
| "rewards/format_reward": 0.9535714266821742, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.001155683581600897, | |
| "epoch": 0.03746093932185999, | |
| "grad_norm": 0.04762619733810425, | |
| "kl": 0.04388427734375, | |
| "learning_rate": 2.8447056058467928e-06, | |
| "loss": 0.0004, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio": 0.0012009528154521831, | |
| "epoch": 0.0376101860920666, | |
| "grad_norm": 0.046324875205755234, | |
| "kl": 0.0436859130859375, | |
| "learning_rate": 2.830142238278531e-06, | |
| "loss": 0.0004, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio": 0.0011499394786369521, | |
| "completion_length": 702.1071796417236, | |
| "epoch": 0.037759432862273214, | |
| "grad_norm": 0.05207349359989166, | |
| "kl": 0.047008514404296875, | |
| "learning_rate": 2.81557550810246e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 13379036.0, | |
| "reward": 0.5698134193662554, | |
| "reward_std": 0.18162817368283868, | |
| "rewards/code_reward": 0.47561697755008936, | |
| "rewards/format_reward": 0.9419642891734838, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio": 0.0009552986102789873, | |
| "epoch": 0.037908679632479826, | |
| "grad_norm": 0.05312632396817207, | |
| "kl": 0.0461883544921875, | |
| "learning_rate": 2.8010060265094026e-06, | |
| "loss": 0.0007, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio": 0.0012589068483066512, | |
| "epoch": 0.03805792640268644, | |
| "grad_norm": 0.05023624747991562, | |
| "kl": 0.045841217041015625, | |
| "learning_rate": 2.786434404805629e-06, | |
| "loss": 0.0004, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.0014080564960750053, | |
| "completion_length": 672.3393173217773, | |
| "epoch": 0.03820717317289306, | |
| "grad_norm": 0.0485769547522068, | |
| "kl": 0.04241943359375, | |
| "learning_rate": 2.771861254387199e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 13512438.0, | |
| "reward": 0.5117118041962385, | |
| "reward_std": 0.23872398328967392, | |
| "rewards/code_reward": 0.41510464530438185, | |
| "rewards/format_reward": 0.966071430593729, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio": 0.0011822391352325212, | |
| "epoch": 0.03835641994309967, | |
| "grad_norm": 0.050369687378406525, | |
| "kl": 0.04282379150390625, | |
| "learning_rate": 2.7572871867143204e-06, | |
| "loss": 0.0004, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio": 0.0014788927746849367, | |
| "epoch": 0.03850566671330628, | |
| "grad_norm": 0.04701628535985947, | |
| "kl": 0.04404449462890625, | |
| "learning_rate": 2.742712813285681e-06, | |
| "loss": 0.0004, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio": 0.0010453241848153993, | |
| "completion_length": 773.8303928375244, | |
| "epoch": 0.038654913483512895, | |
| "grad_norm": 0.04721568897366524, | |
| "kl": 0.039340972900390625, | |
| "learning_rate": 2.7281387456128017e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 13657739.0, | |
| "reward": 0.5111014764988795, | |
| "reward_std": 0.20084915030747652, | |
| "rewards/code_reward": 0.4129764676472405, | |
| "rewards/format_reward": 0.9812499936670065, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio": 0.001074462946235144, | |
| "epoch": 0.03880416025371951, | |
| "grad_norm": 0.04527387022972107, | |
| "kl": 0.0382232666015625, | |
| "learning_rate": 2.7135655951943716e-06, | |
| "loss": 0.0005, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0010467221291037276, | |
| "epoch": 0.03895340702392612, | |
| "grad_norm": 0.04464754834771156, | |
| "kl": 0.036327362060546875, | |
| "learning_rate": 2.698993973490598e-06, | |
| "loss": 0.0004, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio": 0.0011872881314047845, | |
| "completion_length": 739.3036079406738, | |
| "epoch": 0.03910265379413273, | |
| "grad_norm": 0.04908748343586922, | |
| "kl": 0.056262969970703125, | |
| "learning_rate": 2.6844244918975416e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 13804031.0, | |
| "reward": 0.6070374413393438, | |
| "reward_std": 0.23497100081294775, | |
| "rewards/code_reward": 0.5173052740865387, | |
| "rewards/format_reward": 0.897321430966258, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio": 0.001177058060420677, | |
| "epoch": 0.03925190056433935, | |
| "grad_norm": 0.054567039012908936, | |
| "kl": 0.054866790771484375, | |
| "learning_rate": 2.66985776172147e-06, | |
| "loss": 0.0005, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio": 0.00118064760681591, | |
| "epoch": 0.039401147334545965, | |
| "grad_norm": 0.042729251086711884, | |
| "kl": 0.047893524169921875, | |
| "learning_rate": 2.6552943941532088e-06, | |
| "loss": 0.0004, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio": 0.0013072448928141966, | |
| "completion_length": 849.7143383026123, | |
| "epoch": 0.03955039410475258, | |
| "grad_norm": 0.04685011878609657, | |
| "kl": 0.03894233703613281, | |
| "learning_rate": 2.6407350002424927e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 13971159.0, | |
| "reward": 0.5141286924481392, | |
| "reward_std": 0.27967278216965497, | |
| "rewards/code_reward": 0.415021532215178, | |
| "rewards/format_reward": 0.991071417927742, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.001456160111047211, | |
| "epoch": 0.03969964087495919, | |
| "grad_norm": 0.0907537117600441, | |
| "kl": 0.036434173583984375, | |
| "learning_rate": 2.626180190872329e-06, | |
| "loss": 0.0005, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio": 0.0013232155906734988, | |
| "epoch": 0.0398488876451658, | |
| "grad_norm": 0.05118626356124878, | |
| "kl": 0.03912162780761719, | |
| "learning_rate": 2.611630576733372e-06, | |
| "loss": 0.0004, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio": 0.001450759154977277, | |
| "completion_length": 746.2321796417236, | |
| "epoch": 0.039998134415372415, | |
| "grad_norm": 0.05404108017683029, | |
| "kl": 0.039783477783203125, | |
| "learning_rate": 2.5970867682982885e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 14119900.0, | |
| "reward": 0.426578835118562, | |
| "reward_std": 0.2707485407590866, | |
| "rewards/code_reward": 0.33131096488796175, | |
| "rewards/format_reward": 0.9526785761117935, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio": 0.001243086520844372, | |
| "epoch": 0.040147381185579034, | |
| "grad_norm": 0.04573540762066841, | |
| "kl": 0.040866851806640625, | |
| "learning_rate": 2.582549375796154e-06, | |
| "loss": 0.0005, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio": 0.001273469748412026, | |
| "epoch": 0.04029662795578565, | |
| "grad_norm": 0.04555228725075722, | |
| "kl": 0.041530609130859375, | |
| "learning_rate": 2.568019009186841e-06, | |
| "loss": 0.0004, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.0010037624397227773, | |
| "completion_length": 683.7321643829346, | |
| "epoch": 0.04044587472599226, | |
| "grad_norm": 0.04558391869068146, | |
| "kl": 0.035755157470703125, | |
| "learning_rate": 2.5534962781354317e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 14254592.0, | |
| "reward": 0.7018171134404838, | |
| "reward_std": 0.1548383531626314, | |
| "rewards/code_reward": 0.6096742328445544, | |
| "rewards/format_reward": 0.9214285779744387, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio": 0.0009475577735429397, | |
| "epoch": 0.04059512149619887, | |
| "grad_norm": 0.04442001134157181, | |
| "kl": 0.03681182861328125, | |
| "learning_rate": 2.538981791986634e-06, | |
| "loss": 0.0004, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio": 0.0009967278165277094, | |
| "epoch": 0.040744368266405484, | |
| "grad_norm": 0.04307863861322403, | |
| "kl": 0.03729248046875, | |
| "learning_rate": 2.524476159739218e-06, | |
| "loss": 0.0004, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio": 0.0009341392360511236, | |
| "completion_length": 763.7232418060303, | |
| "epoch": 0.040893615036612097, | |
| "grad_norm": 0.04648442193865776, | |
| "kl": 0.036266326904296875, | |
| "learning_rate": 2.5099799900204607e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 14407973.0, | |
| "reward": 0.5817076284438372, | |
| "reward_std": 0.19802262401208282, | |
| "rewards/code_reward": 0.48893973045778694, | |
| "rewards/format_reward": 0.9276785720139742, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio": 0.0009563791154505452, | |
| "epoch": 0.04104286180681871, | |
| "grad_norm": 0.043009962886571884, | |
| "kl": 0.03775787353515625, | |
| "learning_rate": 2.4954938910606108e-06, | |
| "loss": 0.0003, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.0009447472584724892, | |
| "epoch": 0.04119210857702533, | |
| "grad_norm": 0.044043902307748795, | |
| "kl": 0.03899383544921875, | |
| "learning_rate": 2.481018470667368e-06, | |
| "loss": 0.0003, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio": 0.0009749470536917215, | |
| "completion_length": 751.8839569091797, | |
| "epoch": 0.04134135534723194, | |
| "grad_norm": 0.04722112417221069, | |
| "kl": 0.051502227783203125, | |
| "learning_rate": 2.4665543362003802e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 14552930.0, | |
| "reward": 0.463415396399796, | |
| "reward_std": 0.1340479573700577, | |
| "rewards/code_reward": 0.3710046644337126, | |
| "rewards/format_reward": 0.9241071259602904, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio": 0.0009320784265582915, | |
| "epoch": 0.04149060211743855, | |
| "grad_norm": 0.047061435878276825, | |
| "kl": 0.0539093017578125, | |
| "learning_rate": 2.4521020945457615e-06, | |
| "loss": 0.0005, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio": 0.0011352980673109414, | |
| "epoch": 0.041639848887645166, | |
| "grad_norm": 0.0768757089972496, | |
| "kl": 0.0503387451171875, | |
| "learning_rate": 2.4376623520906255e-06, | |
| "loss": 0.0004, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio": 0.0011972093489021063, | |
| "completion_length": 795.5089569091797, | |
| "epoch": 0.04178909565785178, | |
| "grad_norm": 0.04606209695339203, | |
| "kl": 0.06067657470703125, | |
| "learning_rate": 2.4232357146976478e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 14708779.0, | |
| "reward": 0.6148986229673028, | |
| "reward_std": 0.18130915402434766, | |
| "rewards/code_reward": 0.5234700404689647, | |
| "rewards/format_reward": 0.9142857044935226, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0010662599033821607, | |
| "epoch": 0.04193834242805839, | |
| "grad_norm": 0.04503650590777397, | |
| "kl": 0.0595855712890625, | |
| "learning_rate": 2.408822787679637e-06, | |
| "loss": 0.0006, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio": 0.0012542046988528455, | |
| "epoch": 0.042087589198265, | |
| "grad_norm": 0.04570099711418152, | |
| "kl": 0.054462432861328125, | |
| "learning_rate": 2.3944241757741475e-06, | |
| "loss": 0.0005, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio": 0.0010910165510722436, | |
| "completion_length": 841.4821853637695, | |
| "epoch": 0.04223683596847162, | |
| "grad_norm": 0.043202780187129974, | |
| "kl": 0.04636383056640625, | |
| "learning_rate": 2.380040483118097e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 14872542.0, | |
| "reward": 0.47422279929742217, | |
| "reward_std": 0.21761185978539288, | |
| "rewards/code_reward": 0.3716334954369813, | |
| "rewards/format_reward": 1.0258928649127483, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio": 0.0011495619401102886, | |
| "epoch": 0.042386082738678235, | |
| "grad_norm": 0.042755380272865295, | |
| "kl": 0.044036865234375, | |
| "learning_rate": 2.365672313222419e-06, | |
| "loss": 0.0004, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio": 0.0013028225057496456, | |
| "epoch": 0.04253532950888485, | |
| "grad_norm": 0.05651061609387398, | |
| "kl": 0.042083740234375, | |
| "learning_rate": 2.351320268946749e-06, | |
| "loss": 0.0005, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.001140464471973246, | |
| "completion_length": 1019.5000534057617, | |
| "epoch": 0.04268457627909146, | |
| "grad_norm": 0.05123982951045036, | |
| "kl": 0.05125236511230469, | |
| "learning_rate": 2.336984952474119e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 15060254.0, | |
| "reward": 0.48235001834109426, | |
| "reward_std": 0.1879777810536325, | |
| "rewards/code_reward": 0.38779641879955307, | |
| "rewards/format_reward": 0.9455357119441032, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio": 0.0010813200133270584, | |
| "epoch": 0.04283382304929807, | |
| "grad_norm": 0.049375370144844055, | |
| "kl": 0.047840118408203125, | |
| "learning_rate": 2.322666965285697e-06, | |
| "loss": 0.0004, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio": 0.0010197247520409292, | |
| "epoch": 0.042983069819504685, | |
| "grad_norm": 0.047002602368593216, | |
| "kl": 0.04539680480957031, | |
| "learning_rate": 2.3083669081355507e-06, | |
| "loss": 0.0004, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio": 0.0010922325645879027, | |
| "completion_length": 860.9821758270264, | |
| "epoch": 0.0431323165897113, | |
| "grad_norm": 0.047084398567676544, | |
| "kl": 0.033111572265625, | |
| "learning_rate": 2.2940853810254377e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 15215862.0, | |
| "reward": 0.41257912619039416, | |
| "reward_std": 0.17679887195117772, | |
| "rewards/code_reward": 0.30829338620242197, | |
| "rewards/format_reward": 1.0428571440279484, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio": 0.0011078603311034385, | |
| "epoch": 0.04328156335991792, | |
| "grad_norm": 0.047457680106163025, | |
| "kl": 0.032649993896484375, | |
| "learning_rate": 2.2798229831796313e-06, | |
| "loss": 0.0004, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0012763461973008816, | |
| "epoch": 0.04343081013012453, | |
| "grad_norm": 0.04568823054432869, | |
| "kl": 0.032649993896484375, | |
| "learning_rate": 2.2655803130197816e-06, | |
| "loss": 0.0003, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio": 0.0011839414237329038, | |
| "completion_length": 830.1964721679688, | |
| "epoch": 0.04358005690033114, | |
| "grad_norm": 0.04152772203087807, | |
| "kl": 0.038677215576171875, | |
| "learning_rate": 2.2513579681398034e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 15367448.0, | |
| "reward": 0.5385435288771987, | |
| "reward_std": 0.19832654343917966, | |
| "rewards/code_reward": 0.4406863655894995, | |
| "rewards/format_reward": 0.9785714186728001, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio": 0.0010050950450022356, | |
| "epoch": 0.043729303670537754, | |
| "grad_norm": 0.04242389649152756, | |
| "kl": 0.03912353515625, | |
| "learning_rate": 2.237156545280803e-06, | |
| "loss": 0.0004, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio": 0.001050207116350066, | |
| "epoch": 0.04387855044074437, | |
| "grad_norm": 0.04236888512969017, | |
| "kl": 0.039356231689453125, | |
| "learning_rate": 2.2229766403060403e-06, | |
| "loss": 0.0004, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio": 0.0011052291774831247, | |
| "completion_length": 796.7411079406738, | |
| "epoch": 0.04402779721095098, | |
| "grad_norm": 0.062591552734375, | |
| "kl": 0.059398651123046875, | |
| "learning_rate": 2.2088188481759305e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 15521069.0, | |
| "reward": 0.7143844980746508, | |
| "reward_std": 0.22826048522256315, | |
| "rewards/code_reward": 0.6077773505821824, | |
| "rewards/format_reward": 1.066071417182684, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 0.001126413505517121, | |
| "epoch": 0.04417704398115759, | |
| "grad_norm": 0.05377168208360672, | |
| "kl": 0.052120208740234375, | |
| "learning_rate": 2.194683762923073e-06, | |
| "loss": 0.0006, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio": 0.0010959034188999794, | |
| "epoch": 0.04432629075136421, | |
| "grad_norm": 0.049911994487047195, | |
| "kl": 0.0478057861328125, | |
| "learning_rate": 2.1805719776273387e-06, | |
| "loss": 0.0005, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio": 0.0011420191494835308, | |
| "completion_length": 801.7411098480225, | |
| "epoch": 0.044475537521570824, | |
| "grad_norm": 0.04711169749498367, | |
| "kl": 0.047809600830078125, | |
| "learning_rate": 2.166484084390974e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 15678309.0, | |
| "reward": 0.3837883761152625, | |
| "reward_std": 0.14018947025761008, | |
| "rewards/code_reward": 0.27164551382884383, | |
| "rewards/format_reward": 1.1214285604655743, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio": 0.0009830774752117577, | |
| "epoch": 0.044624784291777436, | |
| "grad_norm": 0.04708566516637802, | |
| "kl": 0.047992706298828125, | |
| "learning_rate": 2.1524206743137636e-06, | |
| "loss": 0.0004, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio": 0.000958900871410151, | |
| "epoch": 0.04477403106198405, | |
| "grad_norm": 0.045896608382463455, | |
| "kl": 0.049419403076171875, | |
| "learning_rate": 2.1383823374682287e-06, | |
| "loss": 0.0004, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio": 0.0012122261905460618, | |
| "completion_length": 859.7143230438232, | |
| "epoch": 0.04492327783219066, | |
| "grad_norm": 0.04249017313122749, | |
| "kl": 0.044986724853515625, | |
| "learning_rate": 2.124369662874868e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 15841559.0, | |
| "reward": 0.548895129468292, | |
| "reward_std": 0.24452911573462188, | |
| "rewards/code_reward": 0.4446094058221206, | |
| "rewards/format_reward": 1.0428571291267872, | |
| "step": 301 | |
| }, | |
| { | |
| "clip_ratio": 0.001091581678338116, | |
| "epoch": 0.045072524602397274, | |
| "grad_norm": 0.04369416460394859, | |
| "kl": 0.044300079345703125, | |
| "learning_rate": 2.110383238477441e-06, | |
| "loss": 0.0005, | |
| "step": 302 | |
| }, | |
| { | |
| "clip_ratio": 0.0011382698903616983, | |
| "epoch": 0.04522177137260389, | |
| "grad_norm": 0.041453346610069275, | |
| "kl": 0.04480743408203125, | |
| "learning_rate": 2.096423651118305e-06, | |
| "loss": 0.0004, | |
| "step": 303 | |
| }, | |
| { | |
| "clip_ratio": 0.0008476753027935047, | |
| "completion_length": 781.8482494354248, | |
| "epoch": 0.045371018142810506, | |
| "grad_norm": 0.045421402901411057, | |
| "kl": 0.04686737060546875, | |
| "learning_rate": 2.082491486513788e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 15989375.0, | |
| "reward": 0.6513705789111555, | |
| "reward_std": 0.23313539451919496, | |
| "rewards/code_reward": 0.5432455567643046, | |
| "rewards/format_reward": 1.0812499970197678, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio": 0.0011021571021956333, | |
| "epoch": 0.04552026491301712, | |
| "grad_norm": 0.04760179668664932, | |
| "kl": 0.045501708984375, | |
| "learning_rate": 2.0685873292296116e-06, | |
| "loss": 0.0005, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio": 0.0009927952487487346, | |
| "epoch": 0.04566951168322373, | |
| "grad_norm": 0.04543590918183327, | |
| "kl": 0.043498992919921875, | |
| "learning_rate": 2.054711762656369e-06, | |
| "loss": 0.0004, | |
| "step": 306 | |
| }, | |
| { | |
| "clip_ratio": 0.001191185787320137, | |
| "completion_length": 772.3750400543213, | |
| "epoch": 0.04581875845343034, | |
| "grad_norm": 0.04691977798938751, | |
| "kl": 0.03873443603515625, | |
| "learning_rate": 2.040865368985044e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 16132539.0, | |
| "reward": 0.5620117126964033, | |
| "reward_std": 0.23455260833725333, | |
| "rewards/code_reward": 0.4617438427376328, | |
| "rewards/format_reward": 1.0026785656809807, | |
| "step": 307 | |
| }, | |
| { | |
| "clip_ratio": 0.0011454644645709777, | |
| "epoch": 0.045968005223636955, | |
| "grad_norm": 0.045798543840646744, | |
| "kl": 0.03643035888671875, | |
| "learning_rate": 2.027048729182583e-06, | |
| "loss": 0.0005, | |
| "step": 308 | |
| }, | |
| { | |
| "clip_ratio": 0.0013067667568975594, | |
| "epoch": 0.04611725199384357, | |
| "grad_norm": 0.0446229986846447, | |
| "kl": 0.03519439697265625, | |
| "learning_rate": 2.0132624229675205e-06, | |
| "loss": 0.0003, | |
| "step": 309 | |
| }, | |
| { | |
| "clip_ratio": 0.0013392006876529194, | |
| "completion_length": 795.8036117553711, | |
| "epoch": 0.04626649876405019, | |
| "grad_norm": 0.044981542974710464, | |
| "kl": 0.045459747314453125, | |
| "learning_rate": 1.9995070287856546e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 16287462.0, | |
| "reward": 0.5069911074824631, | |
| "reward_std": 0.2646176088601351, | |
| "rewards/code_reward": 0.40404467517510056, | |
| "rewards/format_reward": 1.0294642690569162, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio": 0.0012751408075928339, | |
| "epoch": 0.0464157455342568, | |
| "grad_norm": 0.04428853467106819, | |
| "kl": 0.04509735107421875, | |
| "learning_rate": 1.985783123785774e-06, | |
| "loss": 0.0004, | |
| "step": 311 | |
| }, | |
| { | |
| "clip_ratio": 0.001256519486560137, | |
| "epoch": 0.04656499230446341, | |
| "grad_norm": 0.04464598372578621, | |
| "kl": 0.0438995361328125, | |
| "learning_rate": 1.9720912837954486e-06, | |
| "loss": 0.0004, | |
| "step": 312 | |
| }, | |
| { | |
| "clip_ratio": 0.0010024435077866656, | |
| "completion_length": 737.3661136627197, | |
| "epoch": 0.046714239074670025, | |
| "grad_norm": 0.0473940409719944, | |
| "kl": 0.0488128662109375, | |
| "learning_rate": 1.958432083296862e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 16426698.0, | |
| "reward": 0.577216680161655, | |
| "reward_std": 0.24476247746497393, | |
| "rewards/code_reward": 0.47391308448277414, | |
| "rewards/format_reward": 1.033035695552826, | |
| "step": 313 | |
| }, | |
| { | |
| "clip_ratio": 0.0009519284703856101, | |
| "epoch": 0.04686348584487664, | |
| "grad_norm": 0.046678487211465836, | |
| "kl": 0.04576873779296875, | |
| "learning_rate": 1.9448060954027093e-06, | |
| "loss": 0.0005, | |
| "step": 314 | |
| }, | |
| { | |
| "clip_ratio": 0.0010747394862846704, | |
| "epoch": 0.04701273261508325, | |
| "grad_norm": 0.05263395234942436, | |
| "kl": 0.04498291015625, | |
| "learning_rate": 1.931213891832153e-06, | |
| "loss": 0.0006, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio": 0.000990249405731447, | |
| "completion_length": 877.2232570648193, | |
| "epoch": 0.04716197938528986, | |
| "grad_norm": 0.07013804465532303, | |
| "kl": 0.0428314208984375, | |
| "learning_rate": 1.9176560428868336e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 16593190.0, | |
| "reward": 0.5014848634600639, | |
| "reward_std": 0.1569962182547897, | |
| "rewards/code_reward": 0.3947884114459157, | |
| "rewards/format_reward": 1.0669642873108387, | |
| "step": 316 | |
| }, | |
| { | |
| "clip_ratio": 0.001294584584684344, | |
| "epoch": 0.04731122615549648, | |
| "grad_norm": 0.04042747989296913, | |
| "kl": 0.042980194091796875, | |
| "learning_rate": 1.9041331174269373e-06, | |
| "loss": 0.0006, | |
| "step": 317 | |
| }, | |
| { | |
| "clip_ratio": 0.00103904002935451, | |
| "epoch": 0.047460472925703094, | |
| "grad_norm": 0.04211616516113281, | |
| "kl": 0.0416107177734375, | |
| "learning_rate": 1.8906456828473341e-06, | |
| "loss": 0.0005, | |
| "step": 318 | |
| }, | |
| { | |
| "clip_ratio": 0.0009875956375253736, | |
| "completion_length": 904.2768287658691, | |
| "epoch": 0.04760971969590971, | |
| "grad_norm": 0.05007445812225342, | |
| "kl": 0.040981292724609375, | |
| "learning_rate": 1.8771943050537656e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 16763902.0, | |
| "reward": 0.3916885075159371, | |
| "reward_std": 0.23931702435947955, | |
| "rewards/code_reward": 0.30088492622599006, | |
| "rewards/format_reward": 0.9080357123166323, | |
| "step": 319 | |
| }, | |
| { | |
| "clip_ratio": 0.0008391569135710597, | |
| "epoch": 0.04775896646611632, | |
| "grad_norm": 0.05043473839759827, | |
| "kl": 0.042842864990234375, | |
| "learning_rate": 1.8637795484391046e-06, | |
| "loss": 0.0004, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio": 0.0010355508493375964, | |
| "epoch": 0.04790821323632293, | |
| "grad_norm": 0.34376153349876404, | |
| "kl": 0.0391998291015625, | |
| "learning_rate": 1.8504019758596698e-06, | |
| "loss": 0.0004, | |
| "step": 321 | |
| }, | |
| { | |
| "clip_ratio": 0.0010621717938192887, | |
| "completion_length": 850.4196853637695, | |
| "epoch": 0.048057460006529544, | |
| "grad_norm": 0.04401549696922302, | |
| "kl": 0.035259246826171875, | |
| "learning_rate": 1.8370621486116163e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 16924081.0, | |
| "reward": 0.4363535214215517, | |
| "reward_std": 0.18411048781126738, | |
| "rewards/code_reward": 0.34376421920751454, | |
| "rewards/format_reward": 0.9258928671479225, | |
| "step": 322 | |
| }, | |
| { | |
| "clip_ratio": 0.001003089109872235, | |
| "epoch": 0.048206706776736157, | |
| "grad_norm": 0.04463668912649155, | |
| "kl": 0.036479949951171875, | |
| "learning_rate": 1.823760626407377e-06, | |
| "loss": 0.0003, | |
| "step": 323 | |
| }, | |
| { | |
| "clip_ratio": 0.0010362897501181578, | |
| "epoch": 0.048355953546942776, | |
| "grad_norm": 0.0432155467569828, | |
| "kl": 0.036895751953125, | |
| "learning_rate": 1.8104979673521838e-06, | |
| "loss": 0.0003, | |
| "step": 324 | |
| }, | |
| { | |
| "clip_ratio": 0.001312947628321126, | |
| "completion_length": 796.1696815490723, | |
| "epoch": 0.04850520031714939, | |
| "grad_norm": 0.04366760700941086, | |
| "kl": 0.03292655944824219, | |
| "learning_rate": 1.7972747279206482e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 17068475.0, | |
| "reward": 0.6358563541434705, | |
| "reward_std": 0.2135081202723086, | |
| "rewards/code_reward": 0.5282670482993126, | |
| "rewards/format_reward": 1.075892847031355, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio": 0.0013257294258437469, | |
| "epoch": 0.048654447087356, | |
| "grad_norm": 0.04495041444897652, | |
| "kl": 0.03363227844238281, | |
| "learning_rate": 1.7840914629334122e-06, | |
| "loss": 0.0004, | |
| "step": 326 | |
| }, | |
| { | |
| "clip_ratio": 0.0012908969647469348, | |
| "epoch": 0.04880369385756261, | |
| "grad_norm": 0.0467698760330677, | |
| "kl": 0.03423309326171875, | |
| "learning_rate": 1.7709487255338731e-06, | |
| "loss": 0.0003, | |
| "step": 327 | |
| }, | |
| { | |
| "clip_ratio": 0.0012681423613685183, | |
| "completion_length": 673.5893173217773, | |
| "epoch": 0.048952940627769226, | |
| "grad_norm": 0.046931758522987366, | |
| "kl": 0.037639617919921875, | |
| "learning_rate": 1.7578470671649684e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 17198167.0, | |
| "reward": 0.6159717463888228, | |
| "reward_std": 0.24445109069347382, | |
| "rewards/code_reward": 0.4978467100299895, | |
| "rewards/format_reward": 1.1812499985098839, | |
| "step": 328 | |
| }, | |
| { | |
| "clip_ratio": 0.0013088965934002772, | |
| "epoch": 0.04910218739797584, | |
| "grad_norm": 0.04621303454041481, | |
| "kl": 0.039310455322265625, | |
| "learning_rate": 1.744787037546045e-06, | |
| "loss": 0.0004, | |
| "step": 329 | |
| }, | |
| { | |
| "clip_ratio": 0.001249867600563448, | |
| "epoch": 0.04925143416818245, | |
| "grad_norm": 0.04626749828457832, | |
| "kl": 0.03778076171875, | |
| "learning_rate": 1.731769184649788e-06, | |
| "loss": 0.0003, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio": 0.0011540742816578131, | |
| "completion_length": 870.2857570648193, | |
| "epoch": 0.04940068093838907, | |
| "grad_norm": 0.04759405553340912, | |
| "kl": 0.030445098876953125, | |
| "learning_rate": 1.7187940546792325e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 17360529.0, | |
| "reward": 0.3948647300712764, | |
| "reward_std": 0.234670925186947, | |
| "rewards/code_reward": 0.3062932917528087, | |
| "rewards/format_reward": 0.8857142850756645, | |
| "step": 331 | |
| }, | |
| { | |
| "clip_ratio": 0.0012074910209776135, | |
| "epoch": 0.04954992770859568, | |
| "grad_norm": 0.044540390372276306, | |
| "kl": 0.03099822998046875, | |
| "learning_rate": 1.7058621920448465e-06, | |
| "loss": 0.0003, | |
| "step": 332 | |
| }, | |
| { | |
| "clip_ratio": 0.0011028643830286455, | |
| "epoch": 0.049699174478802295, | |
| "grad_norm": 0.05389461666345596, | |
| "kl": 0.031070709228515625, | |
| "learning_rate": 1.6929741393416855e-06, | |
| "loss": 0.0001, | |
| "step": 333 | |
| }, | |
| { | |
| "clip_ratio": 0.0012008915800834075, | |
| "completion_length": 834.9464569091797, | |
| "epoch": 0.04984842124900891, | |
| "grad_norm": 0.043137501925230026, | |
| "kl": 0.029438018798828125, | |
| "learning_rate": 1.6801304373266286e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 17520984.0, | |
| "reward": 0.6375483605079353, | |
| "reward_std": 0.24400542792864144, | |
| "rewards/code_reward": 0.5356733408989385, | |
| "rewards/format_reward": 1.0187499970197678, | |
| "step": 334 | |
| }, | |
| { | |
| "clip_ratio": 0.0010621378514770186, | |
| "epoch": 0.04999766801921552, | |
| "grad_norm": 0.04156143590807915, | |
| "kl": 0.030574798583984375, | |
| "learning_rate": 1.667331624895689e-06, | |
| "loss": 0.0004, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio": 0.0010844554426512332, | |
| "epoch": 0.05014691478942213, | |
| "grad_norm": 0.04887258633971214, | |
| "kl": 0.029483795166015625, | |
| "learning_rate": 1.6545782390614037e-06, | |
| "loss": 0.0003, | |
| "step": 336 | |
| }, | |
| { | |
| "clip_ratio": 0.0011277581979811657, | |
| "completion_length": 799.4732551574707, | |
| "epoch": 0.05029616155962875, | |
| "grad_norm": 0.04529580846428871, | |
| "kl": 0.03227424621582031, | |
| "learning_rate": 1.6418708149302992e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 17677928.0, | |
| "reward": 0.6606419142335653, | |
| "reward_std": 0.2389362787362188, | |
| "rewards/code_reward": 0.5465347599820234, | |
| "rewards/format_reward": 1.1410714201629162, | |
| "step": 337 | |
| }, | |
| { | |
| "clip_ratio": 0.0009562958011883893, | |
| "epoch": 0.050445408329835364, | |
| "grad_norm": 0.0461302287876606, | |
| "kl": 0.032329559326171875, | |
| "learning_rate": 1.6292098856804423e-06, | |
| "loss": 0.0004, | |
| "step": 338 | |
| }, | |
| { | |
| "clip_ratio": 0.00097316733081243, | |
| "epoch": 0.05059465510004198, | |
| "grad_norm": 0.04479542747139931, | |
| "kl": 0.033306121826171875, | |
| "learning_rate": 1.6165959825390661e-06, | |
| "loss": 0.0003, | |
| "step": 339 | |
| }, | |
| { | |
| "clip_ratio": 0.001129494600718317, | |
| "completion_length": 818.7946815490723, | |
| "epoch": 0.05074390187024859, | |
| "grad_norm": 0.04621543735265732, | |
| "kl": 0.0410919189453125, | |
| "learning_rate": 1.604029634760284e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 17832759.0, | |
| "reward": 0.4373923074454069, | |
| "reward_std": 0.1885270932689309, | |
| "rewards/code_reward": 0.34391014860011637, | |
| "rewards/format_reward": 0.9348214343190193, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio": 0.0011062995872634929, | |
| "epoch": 0.0508931486404552, | |
| "grad_norm": 0.047534842044115067, | |
| "kl": 0.042877197265625, | |
| "learning_rate": 1.59151136960288e-06, | |
| "loss": 0.0004, | |
| "step": 341 | |
| }, | |
| { | |
| "clip_ratio": 0.00121290785864403, | |
| "epoch": 0.051042395410661814, | |
| "grad_norm": 0.05018023028969765, | |
| "kl": 0.04319000244140625, | |
| "learning_rate": 1.5790417123081903e-06, | |
| "loss": 0.0005, | |
| "step": 342 | |
| }, | |
| { | |
| "clip_ratio": 0.0009951947176887188, | |
| "completion_length": 882.1518287658691, | |
| "epoch": 0.05119164218086843, | |
| "grad_norm": 0.046235859394073486, | |
| "kl": 0.03740882873535156, | |
| "learning_rate": 1.5666211860780583e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 18002085.0, | |
| "reward": 0.6279045841656625, | |
| "reward_std": 0.1975625678896904, | |
| "rewards/code_reward": 0.5160295752430102, | |
| "rewards/format_reward": 1.1187499947845936, | |
| "step": 343 | |
| }, | |
| { | |
| "clip_ratio": 0.0010207766827079467, | |
| "epoch": 0.051340888951075046, | |
| "grad_norm": 0.04691191017627716, | |
| "kl": 0.037837982177734375, | |
| "learning_rate": 1.5542503120528918e-06, | |
| "loss": 0.0006, | |
| "step": 344 | |
| }, | |
| { | |
| "clip_ratio": 0.0011089397885371, | |
| "epoch": 0.05149013572128166, | |
| "grad_norm": 0.047472018748521805, | |
| "kl": 0.039302825927734375, | |
| "learning_rate": 1.5419296092897866e-06, | |
| "loss": 0.0004, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio": 0.00114808726721094, | |
| "completion_length": 815.0714664459229, | |
| "epoch": 0.05163938249148827, | |
| "grad_norm": 0.04975217208266258, | |
| "kl": 0.047466278076171875, | |
| "learning_rate": 1.529659594740755e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 18154966.0, | |
| "reward": 0.4685569778084755, | |
| "reward_std": 0.21453512576408684, | |
| "rewards/code_reward": 0.35534268664196134, | |
| "rewards/format_reward": 1.1321428623050451, | |
| "step": 346 | |
| }, | |
| { | |
| "clip_ratio": 0.0011890214227605611, | |
| "epoch": 0.051788629261694884, | |
| "grad_norm": 0.05200071632862091, | |
| "kl": 0.04558563232421875, | |
| "learning_rate": 1.5174407832310338e-06, | |
| "loss": 0.0005, | |
| "step": 347 | |
| }, | |
| { | |
| "clip_ratio": 0.0012163433930254541, | |
| "epoch": 0.051937876031901496, | |
| "grad_norm": 0.05192115902900696, | |
| "kl": 0.0457916259765625, | |
| "learning_rate": 1.5052736874374815e-06, | |
| "loss": 0.0005, | |
| "step": 348 | |
| }, | |
| { | |
| "clip_ratio": 0.000893519554665545, | |
| "completion_length": 747.633960723877, | |
| "epoch": 0.05208712280210811, | |
| "grad_norm": 0.046751827001571655, | |
| "kl": 0.038177490234375, | |
| "learning_rate": 1.4931588178670695e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 18308764.0, | |
| "reward": 0.6700454382225871, | |
| "reward_std": 0.1688278007786721, | |
| "rewards/code_reward": 0.547991847153753, | |
| "rewards/format_reward": 1.2205356992781162, | |
| "step": 349 | |
| }, | |
| { | |
| "clip_ratio": 0.00095716763917153, | |
| "epoch": 0.05223636957231472, | |
| "grad_norm": 0.04715850204229355, | |
| "kl": 0.037807464599609375, | |
| "learning_rate": 1.4810966828354605e-06, | |
| "loss": 0.0005, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio": 0.0010263329295412404, | |
| "epoch": 0.05238561634252134, | |
| "grad_norm": 0.04827268794178963, | |
| "kl": 0.03871917724609375, | |
| "learning_rate": 1.469087788445684e-06, | |
| "loss": 0.0005, | |
| "step": 351 | |
| }, | |
| { | |
| "clip_ratio": 0.0011772955731430557, | |
| "completion_length": 743.0178833007812, | |
| "epoch": 0.05253486311272795, | |
| "grad_norm": 0.048253390938043594, | |
| "kl": 0.0508270263671875, | |
| "learning_rate": 1.4571326385668965e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 18452077.0, | |
| "reward": 0.7588480142876506, | |
| "reward_std": 0.16709625371731818, | |
| "rewards/code_reward": 0.6398301323351916, | |
| "rewards/format_reward": 1.1901785582304, | |
| "step": 352 | |
| }, | |
| { | |
| "clip_ratio": 0.0012071774326614104, | |
| "epoch": 0.052684109882934566, | |
| "grad_norm": 0.04751910641789436, | |
| "kl": 0.050937652587890625, | |
| "learning_rate": 1.4452317348132434e-06, | |
| "loss": 0.0006, | |
| "step": 353 | |
| }, | |
| { | |
| "clip_ratio": 0.001102846614230657, | |
| "epoch": 0.05283335665314118, | |
| "grad_norm": 0.04808581620454788, | |
| "kl": 0.051250457763671875, | |
| "learning_rate": 1.4333855765228104e-06, | |
| "loss": 0.0006, | |
| "step": 354 | |
| }, | |
| { | |
| "clip_ratio": 0.0012155727981735254, | |
| "completion_length": 709.8928928375244, | |
| "epoch": 0.05298260342334779, | |
| "grad_norm": 0.04265999048948288, | |
| "kl": 0.04134368896484375, | |
| "learning_rate": 1.421594660736675e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 18594676.0, | |
| "reward": 0.43674828251823783, | |
| "reward_std": 0.15885721985250711, | |
| "rewards/code_reward": 0.3227304055035347, | |
| "rewards/format_reward": 1.140178568661213, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio": 0.0011881247082783375, | |
| "epoch": 0.0531318501935544, | |
| "grad_norm": 0.055916447192430496, | |
| "kl": 0.042873382568359375, | |
| "learning_rate": 1.4098594821780476e-06, | |
| "loss": 0.0004, | |
| "step": 356 | |
| }, | |
| { | |
| "clip_ratio": 0.0013783813064947026, | |
| "epoch": 0.053281096963761015, | |
| "grad_norm": 0.0430854894220829, | |
| "kl": 0.0421905517578125, | |
| "learning_rate": 1.3981805332315174e-06, | |
| "loss": 0.0003, | |
| "step": 357 | |
| }, | |
| { | |
| "clip_ratio": 0.0013754095052718185, | |
| "completion_length": 803.4911060333252, | |
| "epoch": 0.053430343733967635, | |
| "grad_norm": 0.05348074436187744, | |
| "kl": 0.06780242919921875, | |
| "learning_rate": 1.3865583039223929e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 18753834.0, | |
| "reward": 0.5694120684638619, | |
| "reward_std": 0.22131631802767515, | |
| "rewards/code_reward": 0.4547692039050162, | |
| "rewards/format_reward": 1.1464285608381033, | |
| "step": 358 | |
| }, | |
| { | |
| "clip_ratio": 0.0012931995715916855, | |
| "epoch": 0.05357959050417425, | |
| "grad_norm": 0.16054613888263702, | |
| "kl": 0.06353759765625, | |
| "learning_rate": 1.374993281896137e-06, | |
| "loss": 0.0006, | |
| "step": 359 | |
| }, | |
| { | |
| "clip_ratio": 0.0011385979851183947, | |
| "epoch": 0.05372883727438086, | |
| "grad_norm": 0.049783170223236084, | |
| "kl": 0.0624847412109375, | |
| "learning_rate": 1.3634859523979134e-06, | |
| "loss": 0.0006, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio": 0.0010799912734000827, | |
| "completion_length": 835.8393230438232, | |
| "epoch": 0.05387808404458747, | |
| "grad_norm": 0.047583021223545074, | |
| "kl": 0.037715911865234375, | |
| "learning_rate": 1.3520367982522208e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 18914935.0, | |
| "reward": 0.5109263020567596, | |
| "reward_std": 0.22399028996005654, | |
| "rewards/code_reward": 0.40164059144444764, | |
| "rewards/format_reward": 1.0928571410477161, | |
| "step": 361 | |
| }, | |
| { | |
| "clip_ratio": 0.0010377659255027538, | |
| "epoch": 0.054027330814794085, | |
| "grad_norm": 0.046828076243400574, | |
| "kl": 0.037921905517578125, | |
| "learning_rate": 1.3406462998426358e-06, | |
| "loss": 0.0003, | |
| "step": 362 | |
| }, | |
| { | |
| "clip_ratio": 0.0011691841591527918, | |
| "epoch": 0.0541765775850007, | |
| "grad_norm": 0.046514738351106644, | |
| "kl": 0.0371551513671875, | |
| "learning_rate": 1.3293149350916595e-06, | |
| "loss": 0.0004, | |
| "step": 363 | |
| }, | |
| { | |
| "clip_ratio": 0.0012077734872946166, | |
| "completion_length": 745.857177734375, | |
| "epoch": 0.05432582435520731, | |
| "grad_norm": 0.046239160001277924, | |
| "kl": 0.0434112548828125, | |
| "learning_rate": 1.3180431794406623e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 19068436.0, | |
| "reward": 0.6796648311428726, | |
| "reward_std": 0.1779340817593038, | |
| "rewards/code_reward": 0.5763612519949675, | |
| "rewards/format_reward": 1.033035721629858, | |
| "step": 364 | |
| }, | |
| { | |
| "clip_ratio": 0.0012792030256605358, | |
| "epoch": 0.05447507112541393, | |
| "grad_norm": 0.04772688075900078, | |
| "kl": 0.042186737060546875, | |
| "learning_rate": 1.3068315058299358e-06, | |
| "loss": 0.0004, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio": 0.001383075692501734, | |
| "epoch": 0.05462431789562054, | |
| "grad_norm": 0.04262588173151016, | |
| "kl": 0.04148101806640625, | |
| "learning_rate": 1.2956803846788503e-06, | |
| "loss": 0.0004, | |
| "step": 366 | |
| }, | |
| { | |
| "clip_ratio": 0.0012452776700229151, | |
| "completion_length": 809.1786212921143, | |
| "epoch": 0.054773564665827154, | |
| "grad_norm": 0.05037975311279297, | |
| "kl": 0.044666290283203125, | |
| "learning_rate": 1.284590283866116e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 19220370.0, | |
| "reward": 0.3905480457469821, | |
| "reward_std": 0.15142776281572878, | |
| "rewards/code_reward": 0.2817980256804731, | |
| "rewards/format_reward": 1.0874999910593033, | |
| "step": 367 | |
| }, | |
| { | |
| "clip_ratio": 0.0012702639169219765, | |
| "epoch": 0.05492281143603377, | |
| "grad_norm": 0.04314856976270676, | |
| "kl": 0.04655647277832031, | |
| "learning_rate": 1.2735616687101518e-06, | |
| "loss": 0.0006, | |
| "step": 368 | |
| }, | |
| { | |
| "clip_ratio": 0.0013624547063955106, | |
| "epoch": 0.05507205820624038, | |
| "grad_norm": 0.05023064464330673, | |
| "kl": 0.043704986572265625, | |
| "learning_rate": 1.2625950019495614e-06, | |
| "loss": 0.0006, | |
| "step": 369 | |
| }, | |
| { | |
| "clip_ratio": 0.0010240589144814294, | |
| "completion_length": 683.500020980835, | |
| "epoch": 0.05522130497644699, | |
| "grad_norm": 0.06126611679792404, | |
| "kl": 0.03558349609375, | |
| "learning_rate": 1.251690743723718e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 19360902.0, | |
| "reward": 0.8276477949693799, | |
| "reward_std": 0.21755714807659388, | |
| "rewards/code_reward": 0.7124692033976316, | |
| "rewards/format_reward": 1.1517857052385807, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio": 0.0010483075620868476, | |
| "epoch": 0.05537055174665361, | |
| "grad_norm": 0.07521291077136993, | |
| "kl": 0.035892486572265625, | |
| "learning_rate": 1.2408493515534581e-06, | |
| "loss": 0.0005, | |
| "step": 371 | |
| }, | |
| { | |
| "clip_ratio": 0.0009877505890472094, | |
| "epoch": 0.05551979851686022, | |
| "grad_norm": 0.061518505215644836, | |
| "kl": 0.036228179931640625, | |
| "learning_rate": 1.2300712803218834e-06, | |
| "loss": 0.0003, | |
| "step": 372 | |
| }, | |
| { | |
| "clip_ratio": 0.0015654707458452322, | |
| "completion_length": 786.1607456207275, | |
| "epoch": 0.055669045287066836, | |
| "grad_norm": 0.04797976464033127, | |
| "kl": 0.04172515869140625, | |
| "learning_rate": 1.2193569822552772e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 19512932.0, | |
| "reward": 0.6857732241041958, | |
| "reward_std": 0.22357454826124012, | |
| "rewards/code_reward": 0.5790767734870315, | |
| "rewards/format_reward": 1.0669642686843872, | |
| "step": 373 | |
| }, | |
| { | |
| "clip_ratio": 0.0013925148214184446, | |
| "epoch": 0.05581829205727345, | |
| "grad_norm": 0.047019653022289276, | |
| "kl": 0.04467010498046875, | |
| "learning_rate": 1.2087069069041268e-06, | |
| "loss": 0.0004, | |
| "step": 374 | |
| }, | |
| { | |
| "clip_ratio": 0.0014627039090555627, | |
| "epoch": 0.05596753882748006, | |
| "grad_norm": 0.04875313118100166, | |
| "kl": 0.042537689208984375, | |
| "learning_rate": 1.1981215011242654e-06, | |
| "loss": 0.0005, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio": 0.0012403775235725334, | |
| "completion_length": 799.3482398986816, | |
| "epoch": 0.05611678559768667, | |
| "grad_norm": 0.04291072115302086, | |
| "kl": 0.029933929443359375, | |
| "learning_rate": 1.1876012090581184e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 19668007.0, | |
| "reward": 0.4753888142295182, | |
| "reward_std": 0.16220126650296152, | |
| "rewards/code_reward": 0.3551209402503446, | |
| "rewards/format_reward": 1.202678557485342, | |
| "step": 376 | |
| }, | |
| { | |
| "clip_ratio": 0.001163258792985289, | |
| "epoch": 0.056266032367893286, | |
| "grad_norm": 0.042848821729421616, | |
| "kl": 0.031200408935546875, | |
| "learning_rate": 1.177146472116071e-06, | |
| "loss": 0.0004, | |
| "step": 377 | |
| }, | |
| { | |
| "clip_ratio": 0.0012621104797290172, | |
| "epoch": 0.056415279138099905, | |
| "grad_norm": 0.04289062321186066, | |
| "kl": 0.032794952392578125, | |
| "learning_rate": 1.1667577289579462e-06, | |
| "loss": 0.0003, | |
| "step": 378 | |
| }, | |
| { | |
| "clip_ratio": 0.0011387938520783791, | |
| "completion_length": 887.6250381469727, | |
| "epoch": 0.05656452590830652, | |
| "grad_norm": 0.04105460271239281, | |
| "kl": 0.030487060546875, | |
| "learning_rate": 1.1564354154746007e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 19833942.0, | |
| "reward": 0.42751474864780903, | |
| "reward_std": 0.2047222899273038, | |
| "rewards/code_reward": 0.31001472822390497, | |
| "rewards/format_reward": 1.1749999895691872, | |
| "step": 379 | |
| }, | |
| { | |
| "clip_ratio": 0.0010691416100598872, | |
| "epoch": 0.05671377267851313, | |
| "grad_norm": 0.040559619665145874, | |
| "kl": 0.029735565185546875, | |
| "learning_rate": 1.146179964769635e-06, | |
| "loss": 0.0001, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio": 0.0009864674502750859, | |
| "epoch": 0.05686301944871974, | |
| "grad_norm": 0.04219922423362732, | |
| "kl": 0.030120849609375, | |
| "learning_rate": 1.1359918071412195e-06, | |
| "loss": 0.0002, | |
| "step": 381 | |
| }, | |
| { | |
| "clip_ratio": 0.001063702917235787, | |
| "completion_length": 778.6339664459229, | |
| "epoch": 0.057012266218926355, | |
| "grad_norm": 0.040541067719459534, | |
| "kl": 0.038173675537109375, | |
| "learning_rate": 1.1258713700640456e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 19974552.0, | |
| "reward": 0.34642348857596517, | |
| "reward_std": 0.1312610807362944, | |
| "rewards/code_reward": 0.2391020506620407, | |
| "rewards/format_reward": 1.0732142701745033, | |
| "step": 382 | |
| }, | |
| { | |
| "clip_ratio": 0.001328528222074965, | |
| "epoch": 0.05716151298913297, | |
| "grad_norm": 0.042283330112695694, | |
| "kl": 0.038555145263671875, | |
| "learning_rate": 1.115819078171383e-06, | |
| "loss": 0.0004, | |
| "step": 383 | |
| }, | |
| { | |
| "clip_ratio": 0.001007061529890052, | |
| "epoch": 0.05731075975933958, | |
| "grad_norm": 0.043051622807979584, | |
| "kl": 0.037868499755859375, | |
| "learning_rate": 1.1058353532372667e-06, | |
| "loss": 0.0004, | |
| "step": 384 | |
| }, | |
| { | |
| "clip_ratio": 0.0009072511438716901, | |
| "completion_length": 844.9286155700684, | |
| "epoch": 0.0574600065295462, | |
| "grad_norm": 0.04384131357073784, | |
| "kl": 0.03057098388671875, | |
| "learning_rate": 1.0959206141587998e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 20133619.0, | |
| "reward": 0.5458137290552258, | |
| "reward_std": 0.11904705758206546, | |
| "rewards/code_reward": 0.43795658252201974, | |
| "rewards/format_reward": 1.0785714220255613, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio": 0.0010800588406709721, | |
| "epoch": 0.05760925329975281, | |
| "grad_norm": 0.04235073924064636, | |
| "kl": 0.03037261962890625, | |
| "learning_rate": 1.0860752769385766e-06, | |
| "loss": 0.0002, | |
| "step": 386 | |
| }, | |
| { | |
| "clip_ratio": 0.0009236693222192116, | |
| "epoch": 0.057758500069959424, | |
| "grad_norm": 0.04210828244686127, | |
| "kl": 0.030902862548828125, | |
| "learning_rate": 1.0762997546672279e-06, | |
| "loss": 0.0001, | |
| "step": 387 | |
| }, | |
| { | |
| "clip_ratio": 0.0011068970561609603, | |
| "completion_length": 752.0625419616699, | |
| "epoch": 0.05790774684016604, | |
| "grad_norm": 0.046190328896045685, | |
| "kl": 0.037837982177734375, | |
| "learning_rate": 1.0665944575060914e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 20281186.0, | |
| "reward": 0.6457045003771782, | |
| "reward_std": 0.2894785313401371, | |
| "rewards/code_reward": 0.5300794651557226, | |
| "rewards/format_reward": 1.1562499813735485, | |
| "step": 388 | |
| }, | |
| { | |
| "clip_ratio": 0.0010543619564487017, | |
| "epoch": 0.05805699361037265, | |
| "grad_norm": 0.04631761088967323, | |
| "kl": 0.0392608642578125, | |
| "learning_rate": 1.056959792669997e-06, | |
| "loss": 0.0006, | |
| "step": 389 | |
| }, | |
| { | |
| "clip_ratio": 0.0011255824747422594, | |
| "epoch": 0.05820624038057926, | |
| "grad_norm": 0.04669871926307678, | |
| "kl": 0.038478851318359375, | |
| "learning_rate": 1.0473961644101856e-06, | |
| "loss": 0.0005, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio": 0.0010092920128954574, | |
| "completion_length": 788.1250305175781, | |
| "epoch": 0.058355487150785874, | |
| "grad_norm": 0.04489421471953392, | |
| "kl": 0.03372001647949219, | |
| "learning_rate": 1.037903973997345e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 20439113.0, | |
| "reward": 0.7695088880136609, | |
| "reward_std": 0.2082495610229671, | |
| "rewards/code_reward": 0.6506695952266455, | |
| "rewards/format_reward": 1.1883928440511227, | |
| "step": 391 | |
| }, | |
| { | |
| "clip_ratio": 0.0010976425510307308, | |
| "epoch": 0.058504733920992494, | |
| "grad_norm": 0.04390792176127434, | |
| "kl": 0.03353691101074219, | |
| "learning_rate": 1.0284836197047737e-06, | |
| "loss": 0.0004, | |
| "step": 392 | |
| }, | |
| { | |
| "clip_ratio": 0.0009941325843101367, | |
| "epoch": 0.058653980691199106, | |
| "grad_norm": 0.041951049119234085, | |
| "kl": 0.034313201904296875, | |
| "learning_rate": 1.0191354967916712e-06, | |
| "loss": 0.0005, | |
| "step": 393 | |
| }, | |
| { | |
| "clip_ratio": 0.0011596001786529087, | |
| "completion_length": 790.3571701049805, | |
| "epoch": 0.05880322746140572, | |
| "grad_norm": 0.043895188719034195, | |
| "kl": 0.040134429931640625, | |
| "learning_rate": 1.0098599974865515e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 20595841.0, | |
| "reward": 0.529973822645843, | |
| "reward_std": 0.24959730240516365, | |
| "rewards/code_reward": 0.4122952348552644, | |
| "rewards/format_reward": 1.1767856981605291, | |
| "step": 394 | |
| }, | |
| { | |
| "clip_ratio": 0.0011324258230160922, | |
| "epoch": 0.05895247423161233, | |
| "grad_norm": 0.04367184266448021, | |
| "kl": 0.039226531982421875, | |
| "learning_rate": 1.0006575109707898e-06, | |
| "loss": 0.0004, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio": 0.0011229302381252637, | |
| "epoch": 0.059101721001818944, | |
| "grad_norm": 0.04425705596804619, | |
| "kl": 0.04046630859375, | |
| "learning_rate": 9.915284233622877e-07, | |
| "loss": 0.0004, | |
| "step": 396 | |
| }, | |
| { | |
| "clip_ratio": 0.0013727044042752823, | |
| "completion_length": 719.5178890228271, | |
| "epoch": 0.059250967772025556, | |
| "grad_norm": 0.04585633426904678, | |
| "kl": 0.048259735107421875, | |
| "learning_rate": 9.824731176992796e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 20738538.0, | |
| "reward": 0.4819835126399994, | |
| "reward_std": 0.1560515824239701, | |
| "rewards/code_reward": 0.36680491268998594, | |
| "rewards/format_reward": 1.1517857126891613, | |
| "step": 397 | |
| }, | |
| { | |
| "clip_ratio": 0.0012794259218935622, | |
| "epoch": 0.05940021454223217, | |
| "grad_norm": 0.04646995663642883, | |
| "kl": 0.04850006103515625, | |
| "learning_rate": 9.734919739242543e-07, | |
| "loss": 0.0005, | |
| "step": 398 | |
| }, | |
| { | |
| "clip_ratio": 0.001545508543131291, | |
| "epoch": 0.05954946131243879, | |
| "grad_norm": 0.04666193947196007, | |
| "kl": 0.048038482666015625, | |
| "learning_rate": 9.645853688680177e-07, | |
| "loss": 0.0005, | |
| "step": 399 | |
| }, | |
| { | |
| "clip_ratio": 0.0013309074929566123, | |
| "completion_length": 834.366117477417, | |
| "epoch": 0.0596987080826454, | |
| "grad_norm": 0.04417085275053978, | |
| "kl": 0.03300666809082031, | |
| "learning_rate": 9.557536762338786e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 20898270.0, | |
| "reward": 0.4664941602386534, | |
| "reward_std": 0.21694895438849926, | |
| "rewards/code_reward": 0.342833440809045, | |
| "rewards/format_reward": 1.236607126891613, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio": 0.0012776327512256103, | |
| "epoch": 0.05984795485285201, | |
| "grad_norm": 0.047289684414863586, | |
| "kl": 0.03195381164550781, | |
| "learning_rate": 9.46997266581973e-07, | |
| "loss": 0.0003, | |
| "step": 401 | |
| }, | |
| { | |
| "clip_ratio": 0.001053016601872514, | |
| "epoch": 0.059997201623058626, | |
| "grad_norm": 0.0457286536693573, | |
| "kl": 0.03333282470703125, | |
| "learning_rate": 9.383165073137115e-07, | |
| "loss": 0.0002, | |
| "step": 402 | |
| }, | |
| { | |
| "clip_ratio": 0.0011895241223101038, | |
| "completion_length": 810.7857532501221, | |
| "epoch": 0.06014644839326524, | |
| "grad_norm": 0.04430186375975609, | |
| "kl": 0.035400390625, | |
| "learning_rate": 9.297117626563687e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 21049887.0, | |
| "reward": 0.6196182537823915, | |
| "reward_std": 0.1633899782318622, | |
| "rewards/code_reward": 0.4986360985203646, | |
| "rewards/format_reward": 1.2098214030265808, | |
| "step": 403 | |
| }, | |
| { | |
| "clip_ratio": 0.0011378818035154836, | |
| "epoch": 0.06029569516347185, | |
| "grad_norm": 0.04314730688929558, | |
| "kl": 0.03586387634277344, | |
| "learning_rate": 9.211833936477957e-07, | |
| "loss": 0.0005, | |
| "step": 404 | |
| }, | |
| { | |
| "clip_ratio": 0.001224413937961799, | |
| "epoch": 0.06044494193367847, | |
| "grad_norm": 0.04237036406993866, | |
| "kl": 0.03537178039550781, | |
| "learning_rate": 9.127317581212753e-07, | |
| "loss": 0.0005, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio": 0.0011261911040492123, | |
| "completion_length": 826.6607475280762, | |
| "epoch": 0.06059418870388508, | |
| "grad_norm": 0.06861373782157898, | |
| "kl": 0.0305938720703125, | |
| "learning_rate": 9.043572106905084e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 21215455.0, | |
| "reward": 0.694146909750998, | |
| "reward_std": 0.2476245230063796, | |
| "rewards/code_reward": 0.5715575968497433, | |
| "rewards/format_reward": 1.225892849266529, | |
| "step": 406 | |
| }, | |
| { | |
| "clip_ratio": 0.0011172464819537709, | |
| "epoch": 0.060743435474091695, | |
| "grad_norm": 0.049063630402088165, | |
| "kl": 0.0308380126953125, | |
| "learning_rate": 8.960601027347321e-07, | |
| "loss": 0.0004, | |
| "step": 407 | |
| }, | |
| { | |
| "clip_ratio": 0.0013254524001240497, | |
| "epoch": 0.06089268224429831, | |
| "grad_norm": 0.04738166183233261, | |
| "kl": 0.03113555908203125, | |
| "learning_rate": 8.878407823839788e-07, | |
| "loss": 0.0004, | |
| "step": 408 | |
| }, | |
| { | |
| "clip_ratio": 0.0012636855608434416, | |
| "completion_length": 779.5803909301758, | |
| "epoch": 0.06104192901450492, | |
| "grad_norm": 0.05109823867678642, | |
| "kl": 0.048473358154296875, | |
| "learning_rate": 8.796995945044689e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 21363731.0, | |
| "reward": 0.6684678364545107, | |
| "reward_std": 0.2355473709758371, | |
| "rewards/code_reward": 0.5479321139864624, | |
| "rewards/format_reward": 1.205357126891613, | |
| "step": 409 | |
| }, | |
| { | |
| "clip_ratio": 0.0010770953031169483, | |
| "epoch": 0.06119117578471153, | |
| "grad_norm": 0.05066639184951782, | |
| "kl": 0.049713134765625, | |
| "learning_rate": 8.716368806841405e-07, | |
| "loss": 0.0005, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio": 0.001110375031203148, | |
| "epoch": 0.061340422554918145, | |
| "grad_norm": 0.04778670892119408, | |
| "kl": 0.047908782958984375, | |
| "learning_rate": 8.636529792183171e-07, | |
| "loss": 0.0004, | |
| "step": 411 | |
| }, | |
| { | |
| "clip_ratio": 0.0012520548352767946, | |
| "completion_length": 919.785758972168, | |
| "epoch": 0.061489669325124764, | |
| "grad_norm": 0.041636690497398376, | |
| "kl": 0.036731719970703125, | |
| "learning_rate": 8.557482250955144e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 21526800.0, | |
| "reward": 0.4328823662362993, | |
| "reward_std": 0.2877360687125474, | |
| "rewards/code_reward": 0.32038235204527155, | |
| "rewards/format_reward": 1.1250000037252903, | |
| "step": 412 | |
| }, | |
| { | |
| "clip_ratio": 0.001170799987448845, | |
| "epoch": 0.06163891609533138, | |
| "grad_norm": 0.041270870715379715, | |
| "kl": 0.0362396240234375, | |
| "learning_rate": 8.479229499833844e-07, | |
| "loss": 0.0005, | |
| "step": 413 | |
| }, | |
| { | |
| "clip_ratio": 0.0013163069452275522, | |
| "epoch": 0.06178816286553799, | |
| "grad_norm": 0.04129687324166298, | |
| "kl": 0.036136627197265625, | |
| "learning_rate": 8.401774822147976e-07, | |
| "loss": 0.0006, | |
| "step": 414 | |
| }, | |
| { | |
| "clip_ratio": 0.0014189317753334763, | |
| "completion_length": 816.785758972168, | |
| "epoch": 0.0619374096357446, | |
| "grad_norm": 0.04516938328742981, | |
| "kl": 0.028072357177734375, | |
| "learning_rate": 8.325121467740695e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 21682897.0, | |
| "reward": 0.6884303884580731, | |
| "reward_std": 0.19168220832943916, | |
| "rewards/code_reward": 0.5636089474428445, | |
| "rewards/format_reward": 1.2482142746448517, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio": 0.0013228377811174141, | |
| "epoch": 0.062086656405951214, | |
| "grad_norm": 0.04481777176260948, | |
| "kl": 0.027347564697265625, | |
| "learning_rate": 8.249272652833226e-07, | |
| "loss": 0.0004, | |
| "step": 416 | |
| }, | |
| { | |
| "clip_ratio": 0.0012798961979569867, | |
| "epoch": 0.06223590317615783, | |
| "grad_norm": 0.045379120856523514, | |
| "kl": 0.02756500244140625, | |
| "learning_rate": 8.174231559889931e-07, | |
| "loss": 0.0004, | |
| "step": 417 | |
| }, | |
| { | |
| "clip_ratio": 0.0010374451630923431, | |
| "completion_length": 739.3125247955322, | |
| "epoch": 0.06238514994636444, | |
| "grad_norm": 0.046955034136772156, | |
| "kl": 0.04058074951171875, | |
| "learning_rate": 8.100001337484787e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 21827041.0, | |
| "reward": 0.6235413569957018, | |
| "reward_std": 0.20960151287727058, | |
| "rewards/code_reward": 0.5099699185811915, | |
| "rewards/format_reward": 1.135714277625084, | |
| "step": 418 | |
| }, | |
| { | |
| "clip_ratio": 0.0011014225965482183, | |
| "epoch": 0.06253439671657106, | |
| "grad_norm": 0.04679929092526436, | |
| "kl": 0.039306640625, | |
| "learning_rate": 8.026585100169251e-07, | |
| "loss": 0.0005, | |
| "step": 419 | |
| }, | |
| { | |
| "clip_ratio": 0.0010908065232797526, | |
| "epoch": 0.06268364348677767, | |
| "grad_norm": 0.047468509525060654, | |
| "kl": 0.04001617431640625, | |
| "learning_rate": 7.953985928341601e-07, | |
| "loss": 0.0005, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio": 0.001171671107840666, | |
| "completion_length": 773.3125324249268, | |
| "epoch": 0.06283289025698428, | |
| "grad_norm": 0.05091605335474014, | |
| "kl": 0.042324066162109375, | |
| "learning_rate": 7.882206868117693e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 21981755.0, | |
| "reward": 0.6787664843723178, | |
| "reward_std": 0.1951366146095097, | |
| "rewards/code_reward": 0.5552843157202005, | |
| "rewards/format_reward": 1.2348214238882065, | |
| "step": 421 | |
| }, | |
| { | |
| "clip_ratio": 0.000994445233118313, | |
| "epoch": 0.0629821370271909, | |
| "grad_norm": 0.05059166997671127, | |
| "kl": 0.042453765869140625, | |
| "learning_rate": 7.81125093120313e-07, | |
| "loss": 0.0006, | |
| "step": 422 | |
| }, | |
| { | |
| "clip_ratio": 0.001127321840613149, | |
| "epoch": 0.06313138379739751, | |
| "grad_norm": 0.05009785294532776, | |
| "kl": 0.042308807373046875, | |
| "learning_rate": 7.741121094766916e-07, | |
| "loss": 0.0006, | |
| "step": 423 | |
| }, | |
| { | |
| "clip_ratio": 0.00141268036350084, | |
| "completion_length": 723.9196796417236, | |
| "epoch": 0.06328063056760412, | |
| "grad_norm": 0.5321958065032959, | |
| "kl": 0.40248870849609375, | |
| "learning_rate": 7.671820301316532e-07, | |
| "loss": 0.0042, | |
| "num_tokens": 22127024.0, | |
| "reward": 0.6335166869685054, | |
| "reward_std": 0.18789594783447683, | |
| "rewards/code_reward": 0.5122666736133397, | |
| "rewards/format_reward": 1.2124999947845936, | |
| "step": 424 | |
| }, | |
| { | |
| "clip_ratio": 0.0016308118902088609, | |
| "epoch": 0.06342987733781073, | |
| "grad_norm": 0.3178289532661438, | |
| "kl": 0.2588348388671875, | |
| "learning_rate": 7.603351458574474e-07, | |
| "loss": 0.0027, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio": 0.0017113010744651547, | |
| "epoch": 0.06357912410801735, | |
| "grad_norm": 0.21873776614665985, | |
| "kl": 0.1898651123046875, | |
| "learning_rate": 7.535717439356255e-07, | |
| "loss": 0.002, | |
| "step": 426 | |
| }, | |
| { | |
| "clip_ratio": 0.001095123085178784, | |
| "completion_length": 865.1071853637695, | |
| "epoch": 0.06372837087822396, | |
| "grad_norm": 0.04840608686208725, | |
| "kl": 0.034221649169921875, | |
| "learning_rate": 7.46892108144986e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 22296716.0, | |
| "reward": 0.6035498650744557, | |
| "reward_std": 0.2631097093690187, | |
| "rewards/code_reward": 0.49417484272271395, | |
| "rewards/format_reward": 1.0937499850988388, | |
| "step": 427 | |
| }, | |
| { | |
| "clip_ratio": 0.0011084674079029355, | |
| "epoch": 0.06387761764843057, | |
| "grad_norm": 0.04596693068742752, | |
| "kl": 0.034698486328125, | |
| "learning_rate": 7.402965187496697e-07, | |
| "loss": 0.0004, | |
| "step": 428 | |
| }, | |
| { | |
| "clip_ratio": 0.0010237721726298332, | |
| "epoch": 0.0640268644186372, | |
| "grad_norm": 0.044551607221364975, | |
| "kl": 0.0347747802734375, | |
| "learning_rate": 7.337852524873974e-07, | |
| "loss": 0.0005, | |
| "step": 429 | |
| }, | |
| { | |
| "clip_ratio": 0.0009593770791980205, | |
| "completion_length": 859.4464588165283, | |
| "epoch": 0.06417611118884381, | |
| "grad_norm": 0.04016838222742081, | |
| "kl": 0.0345458984375, | |
| "learning_rate": 7.273585825578608e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 22461077.0, | |
| "reward": 0.581423498224467, | |
| "reward_std": 0.23203608253970742, | |
| "rewards/code_reward": 0.45811990328365937, | |
| "rewards/format_reward": 1.2330357059836388, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio": 0.000999740838778962, | |
| "epoch": 0.06432535795905042, | |
| "grad_norm": 0.03978991135954857, | |
| "kl": 0.03399658203125, | |
| "learning_rate": 7.21016778611259e-07, | |
| "loss": 0.0004, | |
| "step": 431 | |
| }, | |
| { | |
| "clip_ratio": 0.0010453351060277782, | |
| "epoch": 0.06447460472925703, | |
| "grad_norm": 0.03954523429274559, | |
| "kl": 0.034114837646484375, | |
| "learning_rate": 7.147601067369835e-07, | |
| "loss": 0.0005, | |
| "step": 432 | |
| }, | |
| { | |
| "clip_ratio": 0.0011874185729539022, | |
| "completion_length": 807.7143249511719, | |
| "epoch": 0.06462385149946365, | |
| "grad_norm": 0.04943690821528435, | |
| "kl": 0.03006744384765625, | |
| "learning_rate": 7.085888294524561e-07, | |
| "loss": 0.0003, | |
| "num_tokens": 22610992.0, | |
| "reward": 0.6454438250511885, | |
| "reward_std": 0.2097741151228547, | |
| "rewards/code_reward": 0.5236580954515375, | |
| "rewards/format_reward": 1.2178571298718452, | |
| "step": 433 | |
| }, | |
| { | |
| "clip_ratio": 0.0009378703916809172, | |
| "epoch": 0.06477309826967026, | |
| "grad_norm": 0.04660598561167717, | |
| "kl": 0.029407501220703125, | |
| "learning_rate": 7.025032056921117e-07, | |
| "loss": 0.0002, | |
| "step": 434 | |
| }, | |
| { | |
| "clip_ratio": 0.0011401341880628024, | |
| "epoch": 0.06492234503987687, | |
| "grad_norm": 0.04872843250632286, | |
| "kl": 0.02916717529296875, | |
| "learning_rate": 6.965034907965349e-07, | |
| "loss": 0.0003, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio": 0.0010988110407197382, | |
| "completion_length": 820.857177734375, | |
| "epoch": 0.06507159181008348, | |
| "grad_norm": 0.044269368052482605, | |
| "kl": 0.05198097229003906, | |
| "learning_rate": 6.905899365017462e-07, | |
| "loss": 0.0007, | |
| "num_tokens": 22758417.0, | |
| "reward": 0.621272899210453, | |
| "reward_std": 0.18449996295385063, | |
| "rewards/code_reward": 0.5032371644047089, | |
| "rewards/format_reward": 1.1803571358323097, | |
| "step": 436 | |
| }, | |
| { | |
| "clip_ratio": 0.001356448690785328, | |
| "epoch": 0.0652208385802901, | |
| "grad_norm": 0.044534239917993546, | |
| "kl": 0.052337646484375, | |
| "learning_rate": 6.847627909286409e-07, | |
| "loss": 0.0007, | |
| "step": 437 | |
| }, | |
| { | |
| "clip_ratio": 0.0009920232641889015, | |
| "epoch": 0.06537008535049671, | |
| "grad_norm": 0.05185743048787117, | |
| "kl": 0.0524444580078125, | |
| "learning_rate": 6.790222985725761e-07, | |
| "loss": 0.0004, | |
| "step": 438 | |
| }, | |
| { | |
| "clip_ratio": 0.0011128076639579376, | |
| "completion_length": 675.3750305175781, | |
| "epoch": 0.06551933212070332, | |
| "grad_norm": 0.04222201183438301, | |
| "kl": 0.042942047119140625, | |
| "learning_rate": 6.733687002931141e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 22890983.0, | |
| "reward": 0.5957542844116688, | |
| "reward_std": 0.13940688711591065, | |
| "rewards/code_reward": 0.472272110870108, | |
| "rewards/format_reward": 1.2348214201629162, | |
| "step": 439 | |
| }, | |
| { | |
| "clip_ratio": 0.0010995728334819432, | |
| "epoch": 0.06566857889090993, | |
| "grad_norm": 0.0406472273170948, | |
| "kl": 0.041461944580078125, | |
| "learning_rate": 6.678022333039158e-07, | |
| "loss": 0.0005, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio": 0.0010332062774978112, | |
| "epoch": 0.06581782566111655, | |
| "grad_norm": 0.04296508803963661, | |
| "kl": 0.04045867919921875, | |
| "learning_rate": 6.623231311627876e-07, | |
| "loss": 0.0005, | |
| "step": 441 | |
| }, | |
| { | |
| "clip_ratio": 0.0013383062214415986, | |
| "completion_length": 835.9732513427734, | |
| "epoch": 0.06596707243132316, | |
| "grad_norm": 0.05803956463932991, | |
| "kl": 0.047481536865234375, | |
| "learning_rate": 6.569316237618811e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 23046177.0, | |
| "reward": 0.6742162802256644, | |
| "reward_std": 0.21495943842455745, | |
| "rewards/code_reward": 0.5637698317877948, | |
| "rewards/format_reward": 1.1044642813503742, | |
| "step": 442 | |
| }, | |
| { | |
| "clip_ratio": 0.0012342822874416015, | |
| "epoch": 0.06611631920152979, | |
| "grad_norm": 0.04812246188521385, | |
| "kl": 0.046344757080078125, | |
| "learning_rate": 6.516279373180499e-07, | |
| "loss": 0.0003, | |
| "step": 443 | |
| }, | |
| { | |
| "clip_ratio": 0.0012974117453268263, | |
| "epoch": 0.0662655659717364, | |
| "grad_norm": 0.04777935892343521, | |
| "kl": 0.04587554931640625, | |
| "learning_rate": 6.464122943633543e-07, | |
| "loss": 0.0004, | |
| "step": 444 | |
| }, | |
| { | |
| "clip_ratio": 0.0012328889933996834, | |
| "completion_length": 776.0714588165283, | |
| "epoch": 0.06641481274194301, | |
| "grad_norm": 0.04433201625943184, | |
| "kl": 0.03641510009765625, | |
| "learning_rate": 6.412849137357271e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 23188446.0, | |
| "reward": 0.4833586225286126, | |
| "reward_std": 0.19566731923259795, | |
| "rewards/code_reward": 0.36916218511760235, | |
| "rewards/format_reward": 1.1419642679393291, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio": 0.0013573511714639608, | |
| "epoch": 0.06656405951214962, | |
| "grad_norm": 0.04462065547704697, | |
| "kl": 0.03658485412597656, | |
| "learning_rate": 6.3624601056979e-07, | |
| "loss": 0.0005, | |
| "step": 446 | |
| }, | |
| { | |
| "clip_ratio": 0.001113318128773244, | |
| "epoch": 0.06671330628235624, | |
| "grad_norm": 0.04434194415807724, | |
| "kl": 0.03620719909667969, | |
| "learning_rate": 6.312957962878278e-07, | |
| "loss": 0.0004, | |
| "step": 447 | |
| }, | |
| { | |
| "clip_ratio": 0.0012251644729985856, | |
| "completion_length": 870.0357532501221, | |
| "epoch": 0.06686255305256285, | |
| "grad_norm": 0.04125617817044258, | |
| "kl": 0.026210784912109375, | |
| "learning_rate": 6.264344785909181e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 23358459.0, | |
| "reward": 0.4485369832254946, | |
| "reward_std": 0.12637041695415974, | |
| "rewards/code_reward": 0.32514411299780477, | |
| "rewards/format_reward": 1.233928557485342, | |
| "step": 448 | |
| }, | |
| { | |
| "clip_ratio": 0.0009692438616184518, | |
| "epoch": 0.06701179982276946, | |
| "grad_norm": 0.03909595310688019, | |
| "kl": 0.026597976684570312, | |
| "learning_rate": 6.216622614502149e-07, | |
| "loss": 0.0003, | |
| "step": 449 | |
| }, | |
| { | |
| "clip_ratio": 0.001217871102198842, | |
| "epoch": 0.06716104659297607, | |
| "grad_norm": 0.03862709924578667, | |
| "kl": 0.025884628295898438, | |
| "learning_rate": 6.169793450983916e-07, | |
| "loss": 0.0004, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio": 0.0010045372582681011, | |
| "completion_length": 881.0982608795166, | |
| "epoch": 0.06731029336318269, | |
| "grad_norm": 0.04469547048211098, | |
| "kl": 0.0409698486328125, | |
| "learning_rate": 6.123859260212393e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 23516880.0, | |
| "reward": 0.5806995194870979, | |
| "reward_std": 0.16702931025065482, | |
| "rewards/code_reward": 0.4698066448618192, | |
| "rewards/format_reward": 1.1089285612106323, | |
| "step": 451 | |
| }, | |
| { | |
| "clip_ratio": 0.0008761170529396622, | |
| "epoch": 0.0674595401333893, | |
| "grad_norm": 0.04609547555446625, | |
| "kl": 0.04105377197265625, | |
| "learning_rate": 6.07882196949423e-07, | |
| "loss": 0.0004, | |
| "step": 452 | |
| }, | |
| { | |
| "clip_ratio": 0.0009610654269636143, | |
| "epoch": 0.06760878690359591, | |
| "grad_norm": 0.04481823369860649, | |
| "kl": 0.040618896484375, | |
| "learning_rate": 6.034683468503948e-07, | |
| "loss": 0.0004, | |
| "step": 453 | |
| }, | |
| { | |
| "clip_ratio": 0.000957371406002494, | |
| "completion_length": 678.1160984039307, | |
| "epoch": 0.06775803367380252, | |
| "grad_norm": 0.046332020312547684, | |
| "kl": 0.0386810302734375, | |
| "learning_rate": 5.991445609204641e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 23650313.0, | |
| "reward": 0.7209968403913081, | |
| "reward_std": 0.17456804146058857, | |
| "rewards/code_reward": 0.5993896879081149, | |
| "rewards/format_reward": 1.216071404516697, | |
| "step": 454 | |
| }, | |
| { | |
| "clip_ratio": 0.000984678908935166, | |
| "epoch": 0.06790728044400914, | |
| "grad_norm": 0.04585450142621994, | |
| "kl": 0.0377960205078125, | |
| "learning_rate": 5.949110205770292e-07, | |
| "loss": 0.0003, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio": 0.0011189606830157572, | |
| "epoch": 0.06805652721421576, | |
| "grad_norm": 0.07117211818695068, | |
| "kl": 0.037078857421875, | |
| "learning_rate": 5.90767903450964e-07, | |
| "loss": 0.0004, | |
| "step": 456 | |
| }, | |
| { | |
| "clip_ratio": 0.0012076381026417948, | |
| "completion_length": 843.410758972168, | |
| "epoch": 0.06820577398442237, | |
| "grad_norm": 0.043919820338487625, | |
| "kl": 0.03281402587890625, | |
| "learning_rate": 5.867153833791652e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 23817435.0, | |
| "reward": 0.7192922285757959, | |
| "reward_std": 0.18404422770254314, | |
| "rewards/code_reward": 0.5991136515513062, | |
| "rewards/format_reward": 1.2017856948077679, | |
| "step": 457 | |
| }, | |
| { | |
| "clip_ratio": 0.0012479067445383407, | |
| "epoch": 0.06835502075462899, | |
| "grad_norm": 0.044375985860824585, | |
| "kl": 0.032871246337890625, | |
| "learning_rate": 5.827536303972587e-07, | |
| "loss": 0.0003, | |
| "step": 458 | |
| }, | |
| { | |
| "clip_ratio": 0.0012894814190076431, | |
| "epoch": 0.0685042675248356, | |
| "grad_norm": 0.045444510877132416, | |
| "kl": 0.032073974609375, | |
| "learning_rate": 5.78882810732465e-07, | |
| "loss": 0.0004, | |
| "step": 459 | |
| }, | |
| { | |
| "clip_ratio": 0.0011302930452075088, | |
| "completion_length": 826.9107437133789, | |
| "epoch": 0.06865351429504221, | |
| "grad_norm": 0.046317800879478455, | |
| "kl": 0.033939361572265625, | |
| "learning_rate": 5.75103086796625e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 23968541.0, | |
| "reward": 0.5424030870199203, | |
| "reward_std": 0.21855923091061413, | |
| "rewards/code_reward": 0.4215994896367192, | |
| "rewards/format_reward": 1.208035696297884, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio": 0.001294198547839187, | |
| "epoch": 0.06880276106524882, | |
| "grad_norm": 0.043790195137262344, | |
| "kl": 0.03406524658203125, | |
| "learning_rate": 5.714146171793846e-07, | |
| "loss": 0.0005, | |
| "step": 461 | |
| }, | |
| { | |
| "clip_ratio": 0.0012374374346109107, | |
| "epoch": 0.06895200783545544, | |
| "grad_norm": 0.07143299281597137, | |
| "kl": 0.0339813232421875, | |
| "learning_rate": 5.678175566415422e-07, | |
| "loss": 0.0004, | |
| "step": 462 | |
| }, | |
| { | |
| "clip_ratio": 0.0014906306660122937, | |
| "completion_length": 904.3839683532715, | |
| "epoch": 0.06910125460566205, | |
| "grad_norm": 0.051049333065748215, | |
| "kl": 0.024951934814453125, | |
| "learning_rate": 5.643120561085528e-07, | |
| "loss": 0.0003, | |
| "num_tokens": 24133308.0, | |
| "reward": 0.5312421112321317, | |
| "reward_std": 0.27633541519753635, | |
| "rewards/code_reward": 0.41061710997018963, | |
| "rewards/format_reward": 1.2062499895691872, | |
| "step": 463 | |
| }, | |
| { | |
| "clip_ratio": 0.0014828559123998275, | |
| "epoch": 0.06925050137586866, | |
| "grad_norm": 0.04627595841884613, | |
| "kl": 0.02480316162109375, | |
| "learning_rate": 5.608982626641991e-07, | |
| "loss": 0.0003, | |
| "step": 464 | |
| }, | |
| { | |
| "clip_ratio": 0.0012723907530016731, | |
| "epoch": 0.06939974814607527, | |
| "grad_norm": 0.0477118119597435, | |
| "kl": 0.025279998779296875, | |
| "learning_rate": 5.575763195444166e-07, | |
| "loss": 0.0003, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio": 0.0012686359677900327, | |
| "completion_length": 834.9107551574707, | |
| "epoch": 0.06954899491628189, | |
| "grad_norm": 0.04587775841355324, | |
| "kl": 0.0385589599609375, | |
| "learning_rate": 5.543463661312847e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 24281201.0, | |
| "reward": 0.6464886767789721, | |
| "reward_std": 0.1595687207300216, | |
| "rewards/code_reward": 0.533274372224696, | |
| "rewards/format_reward": 1.132142849266529, | |
| "step": 466 | |
| }, | |
| { | |
| "clip_ratio": 0.0011379432999092387, | |
| "epoch": 0.0696982416864885, | |
| "grad_norm": 0.04559098929166794, | |
| "kl": 0.038360595703125, | |
| "learning_rate": 5.512085379471808e-07, | |
| "loss": 0.0005, | |
| "step": 467 | |
| }, | |
| { | |
| "clip_ratio": 0.0011667834351101192, | |
| "epoch": 0.06984748845669511, | |
| "grad_norm": 0.045429207384586334, | |
| "kl": 0.03958892822265625, | |
| "learning_rate": 5.481629666490903e-07, | |
| "loss": 0.0005, | |
| "step": 468 | |
| }, | |
| { | |
| "clip_ratio": 0.001148404139712511, | |
| "completion_length": 808.4821834564209, | |
| "epoch": 0.06999673522690172, | |
| "grad_norm": 0.04807772859930992, | |
| "kl": 0.03765106201171875, | |
| "learning_rate": 5.452097800230853e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 24434492.0, | |
| "reward": 0.4852910442277789, | |
| "reward_std": 0.24017929495312274, | |
| "rewards/code_reward": 0.3615410323254764, | |
| "rewards/format_reward": 1.2374999970197678, | |
| "step": 469 | |
| }, | |
| { | |
| "clip_ratio": 0.001201979286634014, | |
| "epoch": 0.07014598199710835, | |
| "grad_norm": 0.04833844304084778, | |
| "kl": 0.035778045654296875, | |
| "learning_rate": 5.423491019789623e-07, | |
| "loss": 0.0005, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio": 0.0010972645086440025, | |
| "epoch": 0.07029522876731496, | |
| "grad_norm": 0.04807697981595993, | |
| "kl": 0.035228729248046875, | |
| "learning_rate": 5.395810525450425e-07, | |
| "loss": 0.0005, | |
| "step": 471 | |
| }, | |
| { | |
| "clip_ratio": 0.0011596989388635848, | |
| "completion_length": 822.7946758270264, | |
| "epoch": 0.07044447553752158, | |
| "grad_norm": 0.047449301928281784, | |
| "kl": 0.033260345458984375, | |
| "learning_rate": 5.369057478631359e-07, | |
| "loss": 0.0003, | |
| "num_tokens": 24599963.0, | |
| "reward": 0.49419981660321355, | |
| "reward_std": 0.2545440453104675, | |
| "rewards/code_reward": 0.38009265484288335, | |
| "rewards/format_reward": 1.1410714071244001, | |
| "step": 472 | |
| }, | |
| { | |
| "clip_ratio": 0.001089409208361758, | |
| "epoch": 0.07059372230772819, | |
| "grad_norm": 0.045311130583286285, | |
| "kl": 0.03286552429199219, | |
| "learning_rate": 5.343233001836694e-07, | |
| "loss": 0.0003, | |
| "step": 473 | |
| }, | |
| { | |
| "clip_ratio": 0.0011317514581605792, | |
| "epoch": 0.0707429690779348, | |
| "grad_norm": 0.050007469952106476, | |
| "kl": 0.03356742858886719, | |
| "learning_rate": 5.318338178609754e-07, | |
| "loss": 0.0003, | |
| "step": 474 | |
| }, | |
| { | |
| "clip_ratio": 0.0010420075359434122, | |
| "completion_length": 740.9911155700684, | |
| "epoch": 0.07089221584814141, | |
| "grad_norm": 0.04966285079717636, | |
| "kl": 0.045757293701171875, | |
| "learning_rate": 5.294374053487459e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 24747529.0, | |
| "reward": 0.5183156300336123, | |
| "reward_std": 0.20554508129134774, | |
| "rewards/code_reward": 0.40831562504172325, | |
| "rewards/format_reward": 1.0999999940395355, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio": 0.001186353631055681, | |
| "epoch": 0.07104146261834803, | |
| "grad_norm": 0.04935722425580025, | |
| "kl": 0.046184539794921875, | |
| "learning_rate": 5.271341631956511e-07, | |
| "loss": 0.0006, | |
| "step": 476 | |
| }, | |
| { | |
| "clip_ratio": 0.001091830434233998, | |
| "epoch": 0.07119070938855464, | |
| "grad_norm": 0.04833516106009483, | |
| "kl": 0.04679107666015625, | |
| "learning_rate": 5.249241880411181e-07, | |
| "loss": 0.0005, | |
| "step": 477 | |
| }, | |
| { | |
| "clip_ratio": 0.0012747206210406148, | |
| "completion_length": 907.5357551574707, | |
| "epoch": 0.07133995615876125, | |
| "grad_norm": 0.04407478868961334, | |
| "kl": 0.029878616333007812, | |
| "learning_rate": 5.228075726112785e-07, | |
| "loss": 0.0003, | |
| "num_tokens": 24916653.0, | |
| "reward": 0.38216201262548566, | |
| "reward_std": 0.2679919386282563, | |
| "rewards/code_reward": 0.2559120004007127, | |
| "rewards/format_reward": 1.2624999769032001, | |
| "step": 478 | |
| }, | |
| { | |
| "clip_ratio": 0.0011643571451713797, | |
| "epoch": 0.07148920292896786, | |
| "grad_norm": 0.044017162173986435, | |
| "kl": 0.029003143310546875, | |
| "learning_rate": 5.207844057150768e-07, | |
| "loss": 0.0002, | |
| "step": 479 | |
| }, | |
| { | |
| "clip_ratio": 0.0011917059473489644, | |
| "epoch": 0.07163844969917448, | |
| "grad_norm": 0.04939346760511398, | |
| "kl": 0.029766082763671875, | |
| "learning_rate": 5.188547722405437e-07, | |
| "loss": 0.0002, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio": 0.0012131860949011752, | |
| "completion_length": 848.0000305175781, | |
| "epoch": 0.07178769646938109, | |
| "grad_norm": 0.04608817771077156, | |
| "kl": 0.0340423583984375, | |
| "learning_rate": 5.170187531512351e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 25079536.0, | |
| "reward": 0.5753255924209952, | |
| "reward_std": 0.2258681992534548, | |
| "rewards/code_reward": 0.4602362932055257, | |
| "rewards/format_reward": 1.1508928388357162, | |
| "step": 481 | |
| }, | |
| { | |
| "clip_ratio": 0.0014426689594984055, | |
| "epoch": 0.0719369432395877, | |
| "grad_norm": 0.04564032703638077, | |
| "kl": 0.03497314453125, | |
| "learning_rate": 5.152764254828348e-07, | |
| "loss": 0.0005, | |
| "step": 482 | |
| }, | |
| { | |
| "clip_ratio": 0.0012913148202642333, | |
| "epoch": 0.07208619000979431, | |
| "grad_norm": 0.04598577320575714, | |
| "kl": 0.034259796142578125, | |
| "learning_rate": 5.136278623399225e-07, | |
| "loss": 0.0004, | |
| "step": 483 | |
| }, | |
| { | |
| "clip_ratio": 0.0010959333521896042, | |
| "completion_length": 809.1339664459229, | |
| "epoch": 0.07223543678000094, | |
| "grad_norm": 0.04507589712738991, | |
| "kl": 0.028133392333984375, | |
| "learning_rate": 5.120731328929058e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 25236876.0, | |
| "reward": 0.5080648576840758, | |
| "reward_std": 0.2463505994528532, | |
| "rewards/code_reward": 0.38181484618689865, | |
| "rewards/format_reward": 1.2624999918043613, | |
| "step": 484 | |
| }, | |
| { | |
| "clip_ratio": 0.0009193003133987077, | |
| "epoch": 0.07238468355020755, | |
| "grad_norm": 0.048073090612888336, | |
| "kl": 0.029356002807617188, | |
| "learning_rate": 5.106123023751187e-07, | |
| "loss": 0.0002, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio": 0.0010504390702408273, | |
| "epoch": 0.07253393032041416, | |
| "grad_norm": 0.07422798126935959, | |
| "kl": 0.028322219848632812, | |
| "learning_rate": 5.092454320800833e-07, | |
| "loss": 0.0002, | |
| "step": 486 | |
| }, | |
| { | |
| "clip_ratio": 0.0009964609798771562, | |
| "completion_length": 852.1696815490723, | |
| "epoch": 0.07268317709062078, | |
| "grad_norm": 0.04429541900753975, | |
| "kl": 0.030139923095703125, | |
| "learning_rate": 5.079725793589405e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 25399218.0, | |
| "reward": 0.631489341147244, | |
| "reward_std": 0.18184524704702199, | |
| "rewards/code_reward": 0.5168464637827128, | |
| "rewards/format_reward": 1.1464285738766193, | |
| "step": 487 | |
| }, | |
| { | |
| "clip_ratio": 0.0010598115204629721, | |
| "epoch": 0.07283242386082739, | |
| "grad_norm": 0.04628746956586838, | |
| "kl": 0.030010223388671875, | |
| "learning_rate": 5.067937976180407e-07, | |
| "loss": 0.0005, | |
| "step": 488 | |
| }, | |
| { | |
| "clip_ratio": 0.000984342819720041, | |
| "epoch": 0.072981670631034, | |
| "grad_norm": 0.044439591467380524, | |
| "kl": 0.030406951904296875, | |
| "learning_rate": 5.057091363167046e-07, | |
| "loss": 0.0002, | |
| "step": 489 | |
| }, | |
| { | |
| "clip_ratio": 0.001186176219562185, | |
| "completion_length": 868.383960723877, | |
| "epoch": 0.07313091740124061, | |
| "grad_norm": 0.05477144569158554, | |
| "kl": 0.03466987609863281, | |
| "learning_rate": 5.047186409651489e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 25559141.0, | |
| "reward": 0.543341277167201, | |
| "reward_std": 0.215598865179345, | |
| "rewards/code_reward": 0.428251969628036, | |
| "rewards/format_reward": 1.150892861187458, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio": 0.0009668781112850411, | |
| "epoch": 0.07328016417144723, | |
| "grad_norm": 0.04920962452888489, | |
| "kl": 0.035614013671875, | |
| "learning_rate": 5.038223531225742e-07, | |
| "loss": 0.0005, | |
| "step": 491 | |
| }, | |
| { | |
| "clip_ratio": 0.001169070330888644, | |
| "epoch": 0.07342941094165384, | |
| "grad_norm": 0.05034809932112694, | |
| "kl": 0.035518646240234375, | |
| "learning_rate": 5.030203103954232e-07, | |
| "loss": 0.0005, | |
| "step": 492 | |
| }, | |
| { | |
| "clip_ratio": 0.0012396883666951908, | |
| "completion_length": 859.9464721679688, | |
| "epoch": 0.07357865771186045, | |
| "grad_norm": 0.044976621866226196, | |
| "kl": 0.029693603515625, | |
| "learning_rate": 5.023125464358026e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 25719918.0, | |
| "reward": 0.27847930788993835, | |
| "reward_std": 0.13773812353610992, | |
| "rewards/code_reward": 0.15937215336816735, | |
| "rewards/format_reward": 1.1910714134573936, | |
| "step": 493 | |
| }, | |
| { | |
| "clip_ratio": 0.001113424419600051, | |
| "epoch": 0.07372790448206706, | |
| "grad_norm": 0.044692691415548325, | |
| "kl": 0.0286712646484375, | |
| "learning_rate": 5.016990909400709e-07, | |
| "loss": 0.0002, | |
| "step": 494 | |
| }, | |
| { | |
| "clip_ratio": 0.0011506410692163627, | |
| "epoch": 0.07387715125227368, | |
| "grad_norm": 0.043276555836200714, | |
| "kl": 0.02989959716796875, | |
| "learning_rate": 5.011799696475915e-07, | |
| "loss": 0.0003, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio": 0.0011127546786156017, | |
| "completion_length": 859.4643325805664, | |
| "epoch": 0.07402639802248029, | |
| "grad_norm": 0.04166851192712784, | |
| "kl": 0.037921905517578125, | |
| "learning_rate": 5.007552043396547e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 25887123.0, | |
| "reward": 0.5738976923748851, | |
| "reward_std": 0.21996640338329598, | |
| "rewards/code_reward": 0.4523798130394425, | |
| "rewards/format_reward": 1.2151785753667355, | |
| "step": 496 | |
| }, | |
| { | |
| "clip_ratio": 0.0009825457909755642, | |
| "epoch": 0.07417564479268692, | |
| "grad_norm": 0.04101413115859032, | |
| "kl": 0.036708831787109375, | |
| "learning_rate": 5.004248128385618e-07, | |
| "loss": 0.0004, | |
| "step": 497 | |
| }, | |
| { | |
| "clip_ratio": 0.0010075290774693713, | |
| "epoch": 0.07432489156289353, | |
| "grad_norm": 0.04146130010485649, | |
| "kl": 0.03801918029785156, | |
| "learning_rate": 5.001888090068784e-07, | |
| "loss": 0.0005, | |
| "step": 498 | |
| }, | |
| { | |
| "clip_ratio": 0.0013720846254727803, | |
| "completion_length": 875.857177734375, | |
| "epoch": 0.07447413833310014, | |
| "grad_norm": 0.05116493999958038, | |
| "kl": 0.0377960205078125, | |
| "learning_rate": 5.000472027468528e-07, | |
| "loss": 0.0004, | |
| "num_tokens": 26055779.0, | |
| "reward": 0.5694272452965379, | |
| "reward_std": 0.2635777585674077, | |
| "rewards/code_reward": 0.45674864642933244, | |
| "rewards/format_reward": 1.1267857141792774, | |
| "step": 499 | |
| }, | |
| { | |
| "clip_ratio": 0.0013736705577684916, | |
| "epoch": 0.07462338510330675, | |
| "grad_norm": 0.055545829236507416, | |
| "kl": 0.038105010986328125, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.0005, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07462338510330675, | |
| "step": 500, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0003596346975994038, | |
| "train_runtime": 186017.9741, | |
| "train_samples_per_second": 0.301, | |
| "train_steps_per_second": 0.003 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |