Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9993002099370188, | |
| "eval_steps": 500, | |
| "global_step": 357, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 629.7701110839844, | |
| "epoch": 0.0027991602519244225, | |
| "grad_norm": 0.6973966956138611, | |
| "kl": 0.0, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": -0.0, | |
| "reward": 0.2823660857975483, | |
| "reward_std": 0.362668514251709, | |
| "rewards/accuracy_reward": 0.14732143841683865, | |
| "rewards/format_reward": 0.026785715715959668, | |
| "rewards/tag_count_reward": 0.1082589328289032, | |
| "step": 1 | |
| }, | |
| { | |
| "completion_length": 643.1250305175781, | |
| "epoch": 0.005598320503848845, | |
| "grad_norm": 1.0619760751724243, | |
| "kl": 0.0, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.0, | |
| "reward": 0.273995541036129, | |
| "reward_std": 0.37962110340595245, | |
| "rewards/accuracy_reward": 0.09151785937137902, | |
| "rewards/format_reward": 0.0424107164144516, | |
| "rewards/tag_count_reward": 0.14006696827709675, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 638.2344055175781, | |
| "epoch": 0.008397480755773267, | |
| "grad_norm": 1.4537550210952759, | |
| "kl": 0.000217437744140625, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": -0.0, | |
| "reward": 0.21093751303851604, | |
| "reward_std": 0.34829047322273254, | |
| "rewards/accuracy_reward": 0.09151786309666932, | |
| "rewards/format_reward": 0.022321430267766118, | |
| "rewards/tag_count_reward": 0.09709821827709675, | |
| "step": 3 | |
| }, | |
| { | |
| "completion_length": 609.8281402587891, | |
| "epoch": 0.01119664100769769, | |
| "grad_norm": 0.9107578992843628, | |
| "kl": 0.00018262863159179688, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.0, | |
| "reward": 0.309709832072258, | |
| "reward_std": 0.36093416810035706, | |
| "rewards/accuracy_reward": 0.13839286309666932, | |
| "rewards/format_reward": 0.026785715948790312, | |
| "rewards/tag_count_reward": 0.1445312574505806, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 608.8281555175781, | |
| "epoch": 0.013995801259622114, | |
| "grad_norm": 0.997071385383606, | |
| "kl": 0.0002675056457519531, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": -0.0, | |
| "reward": 0.2036830447614193, | |
| "reward_std": 0.34512022137641907, | |
| "rewards/accuracy_reward": 0.06250000232830644, | |
| "rewards/format_reward": 0.024553573224693537, | |
| "rewards/tag_count_reward": 0.1166294701397419, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 607.8839416503906, | |
| "epoch": 0.016794961511546535, | |
| "grad_norm": 0.7406116724014282, | |
| "kl": 0.00133514404296875, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": -0.0, | |
| "reward": 0.2421875074505806, | |
| "reward_std": 0.368218295276165, | |
| "rewards/accuracy_reward": 0.09598214761354029, | |
| "rewards/format_reward": 0.022321429569274187, | |
| "rewards/tag_count_reward": 0.1238839365541935, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 599.4933319091797, | |
| "epoch": 0.01959412176347096, | |
| "grad_norm": 0.7837677597999573, | |
| "kl": 0.011138916015625, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": -0.0, | |
| "reward": 0.3147321604192257, | |
| "reward_std": 0.38796762377023697, | |
| "rewards/accuracy_reward": 0.15178571944124997, | |
| "rewards/format_reward": 0.02232142980210483, | |
| "rewards/tag_count_reward": 0.1406250074505806, | |
| "step": 7 | |
| }, | |
| { | |
| "completion_length": 646.279052734375, | |
| "epoch": 0.02239328201539538, | |
| "grad_norm": 1.032494306564331, | |
| "kl": 0.0482177734375, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": -0.0, | |
| "reward": 0.2382812611758709, | |
| "reward_std": 0.4348388612270355, | |
| "rewards/accuracy_reward": 0.022321430034935474, | |
| "rewards/format_reward": 0.03571428754366934, | |
| "rewards/tag_count_reward": 0.1802455447614193, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 630.9799194335938, | |
| "epoch": 0.025192442267319804, | |
| "grad_norm": 0.912998378276825, | |
| "kl": 0.50537109375, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "reward": 0.301897332072258, | |
| "reward_std": 0.4177221581339836, | |
| "rewards/accuracy_reward": 0.10267857555299997, | |
| "rewards/format_reward": 0.033482144586741924, | |
| "rewards/tag_count_reward": 0.1657366119325161, | |
| "step": 9 | |
| }, | |
| { | |
| "completion_length": 638.5826110839844, | |
| "epoch": 0.02799160251924423, | |
| "grad_norm": 1.2475032806396484, | |
| "kl": 272.5, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.0, | |
| "reward": 0.3320312649011612, | |
| "reward_std": 0.43727361410856247, | |
| "rewards/accuracy_reward": 0.026785715948790312, | |
| "rewards/format_reward": 0.037946430034935474, | |
| "rewards/tag_count_reward": 0.2672991193830967, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 525.0491333007812, | |
| "epoch": 0.03079076277116865, | |
| "grad_norm": 0.9516815543174744, | |
| "kl": 4692.0, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": -0.0, | |
| "reward": 0.5831473469734192, | |
| "reward_std": 0.4952840134501457, | |
| "rewards/accuracy_reward": 0.129464291036129, | |
| "rewards/format_reward": 0.058035717345774174, | |
| "rewards/tag_count_reward": 0.3956473395228386, | |
| "step": 11 | |
| }, | |
| { | |
| "completion_length": 554.5111999511719, | |
| "epoch": 0.03358992302309307, | |
| "grad_norm": 0.8821593523025513, | |
| "kl": 107712.0, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.4581473395228386, | |
| "reward_std": 0.44926758110523224, | |
| "rewards/accuracy_reward": 0.05580357275903225, | |
| "rewards/format_reward": 0.04464285960420966, | |
| "rewards/tag_count_reward": 0.357700914144516, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 456.93082427978516, | |
| "epoch": 0.0363890832750175, | |
| "grad_norm": 1.2099668979644775, | |
| "kl": 34275328.0, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 0.0, | |
| "reward": 0.6450893208384514, | |
| "reward_std": 0.44861604273319244, | |
| "rewards/accuracy_reward": 0.09821429033763707, | |
| "rewards/format_reward": 0.029017858672887087, | |
| "rewards/tag_count_reward": 0.517857164144516, | |
| "step": 13 | |
| }, | |
| { | |
| "completion_length": 359.7901916503906, | |
| "epoch": 0.03918824352694192, | |
| "grad_norm": 1.3564475774765015, | |
| "kl": 1912602624.0, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": -0.0, | |
| "reward": 0.6975446790456772, | |
| "reward_std": 0.42205285280942917, | |
| "rewards/accuracy_reward": 0.03348214388824999, | |
| "rewards/format_reward": 0.0223214291036129, | |
| "rewards/tag_count_reward": 0.6417410969734192, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 250.95537185668945, | |
| "epoch": 0.04198740377886634, | |
| "grad_norm": 2.748863697052002, | |
| "kl": 121970360320.0, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0, | |
| "reward": 0.9246652126312256, | |
| "reward_std": 0.309246726334095, | |
| "rewards/accuracy_reward": 0.10491071967408061, | |
| "rewards/format_reward": 0.03125000186264515, | |
| "rewards/tag_count_reward": 0.788504496216774, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 179.02456283569336, | |
| "epoch": 0.04478656403079076, | |
| "grad_norm": 8.485617637634277, | |
| "kl": 1172190527488.0, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.0, | |
| "reward": 0.9799107611179352, | |
| "reward_std": 0.2687787376344204, | |
| "rewards/accuracy_reward": 0.1093750037252903, | |
| "rewards/format_reward": 0.03348214365541935, | |
| "rewards/tag_count_reward": 0.8370536118745804, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 140.1183090209961, | |
| "epoch": 0.04758572428271519, | |
| "grad_norm": 9.24393367767334, | |
| "kl": 9486509015040.0, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 0.0, | |
| "reward": 0.96261166036129, | |
| "reward_std": 0.2797929644584656, | |
| "rewards/accuracy_reward": 0.03125000139698386, | |
| "rewards/format_reward": 0.0580357164144516, | |
| "rewards/tag_count_reward": 0.8733259290456772, | |
| "step": 17 | |
| }, | |
| { | |
| "completion_length": 117.89286231994629, | |
| "epoch": 0.05038488453463961, | |
| "grad_norm": 23.932544708251953, | |
| "kl": 1.1927502138114048e+16, | |
| "learning_rate": 1e-05, | |
| "loss": -0.0, | |
| "reward": 0.9838170111179352, | |
| "reward_std": 0.2322257235646248, | |
| "rewards/accuracy_reward": 0.04017857322469354, | |
| "rewards/format_reward": 0.05357143096625805, | |
| "rewards/tag_count_reward": 0.8900670111179352, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 86.49553871154785, | |
| "epoch": 0.05318404478656403, | |
| "grad_norm": 46.19959259033203, | |
| "kl": 1.2350671618100848e+20, | |
| "learning_rate": 1.0555555555555557e-05, | |
| "loss": -0.0, | |
| "reward": 1.0697545111179352, | |
| "reward_std": 0.24332104623317719, | |
| "rewards/accuracy_reward": 0.0468750037252903, | |
| "rewards/format_reward": 0.10267857648432255, | |
| "rewards/tag_count_reward": 0.9202009290456772, | |
| "step": 19 | |
| }, | |
| { | |
| "completion_length": 76.75670051574707, | |
| "epoch": 0.05598320503848846, | |
| "grad_norm": 56.76476287841797, | |
| "kl": 3.6570670126129186e+22, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.0, | |
| "reward": 1.1439732909202576, | |
| "reward_std": 0.2484389767050743, | |
| "rewards/accuracy_reward": 0.1116071492433548, | |
| "rewards/format_reward": 0.0959821492433548, | |
| "rewards/tag_count_reward": 0.9363839775323868, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 81.15625286102295, | |
| "epoch": 0.05878236529041288, | |
| "grad_norm": 72.71674346923828, | |
| "kl": 1.3916814025016844e+25, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.0, | |
| "reward": 1.1205357611179352, | |
| "reward_std": 0.3521122932434082, | |
| "rewards/accuracy_reward": 0.0446428582072258, | |
| "rewards/format_reward": 0.1875000074505806, | |
| "rewards/tag_count_reward": 0.88839291036129, | |
| "step": 21 | |
| }, | |
| { | |
| "completion_length": 82.05134105682373, | |
| "epoch": 0.0615815255423373, | |
| "grad_norm": 290.8921203613281, | |
| "kl": 1.9149384982695726e+27, | |
| "learning_rate": 1.2222222222222224e-05, | |
| "loss": -0.0, | |
| "reward": 1.315290242433548, | |
| "reward_std": 0.42858483642339706, | |
| "rewards/accuracy_reward": 0.08035714668221772, | |
| "rewards/format_reward": 0.4308035895228386, | |
| "rewards/tag_count_reward": 0.804129496216774, | |
| "step": 22 | |
| }, | |
| { | |
| "completion_length": 91.14732551574707, | |
| "epoch": 0.06438068579426172, | |
| "grad_norm": 397.04351806640625, | |
| "kl": 7.412165985221214e+28, | |
| "learning_rate": 1.2777777777777777e-05, | |
| "loss": -0.0, | |
| "reward": 1.479352742433548, | |
| "reward_std": 0.4619317427277565, | |
| "rewards/accuracy_reward": 0.082589291036129, | |
| "rewards/format_reward": 0.6339286118745804, | |
| "rewards/tag_count_reward": 0.7628348767757416, | |
| "step": 23 | |
| }, | |
| { | |
| "completion_length": 132.92634391784668, | |
| "epoch": 0.06717984604618614, | |
| "grad_norm": 211.87890625, | |
| "kl": 1.4108164314902722e+27, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": -0.0, | |
| "reward": 1.3069196939468384, | |
| "reward_std": 0.4795772433280945, | |
| "rewards/accuracy_reward": 0.03794643026776612, | |
| "rewards/format_reward": 0.5625000298023224, | |
| "rewards/tag_count_reward": 0.706473246216774, | |
| "step": 24 | |
| }, | |
| { | |
| "completion_length": 414.10717010498047, | |
| "epoch": 0.06997900629811056, | |
| "grad_norm": 275.24444580078125, | |
| "kl": 6.022578291123375e+29, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.0, | |
| "reward": 0.9899553954601288, | |
| "reward_std": 0.5238821133971214, | |
| "rewards/accuracy_reward": 0.0736607164144516, | |
| "rewards/format_reward": 0.4174107387661934, | |
| "rewards/tag_count_reward": 0.4988839477300644, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 391.50670623779297, | |
| "epoch": 0.072778166550035, | |
| "grad_norm": 199.7307891845703, | |
| "kl": 6.953063542251838e+32, | |
| "learning_rate": 1.4444444444444446e-05, | |
| "loss": -0.0, | |
| "reward": 0.957589328289032, | |
| "reward_std": 0.5497933924198151, | |
| "rewards/accuracy_reward": 0.07812500232830644, | |
| "rewards/format_reward": 0.3816964477300644, | |
| "rewards/tag_count_reward": 0.4977678805589676, | |
| "step": 26 | |
| }, | |
| { | |
| "completion_length": 485.42859649658203, | |
| "epoch": 0.07557732680195942, | |
| "grad_norm": NaN, | |
| "kl": Infinity, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": -0.0, | |
| "reward": 0.651227705180645, | |
| "reward_std": 0.41753336787223816, | |
| "rewards/accuracy_reward": 0.03794643026776612, | |
| "rewards/format_reward": 0.2299107238650322, | |
| "rewards/tag_count_reward": 0.3833705559372902, | |
| "step": 27 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.07837648705388384, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.08117564730580826, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6111111111111115e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.08397480755773268, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.0867739678096571, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7222222222222224e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.08957312806158152, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7777777777777777e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.09237228831350595, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.09517144856543037, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.0979706088173548, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.10076976906927922, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.10356892932120364, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9999521087449523e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.10636808957312806, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9998084395669537e-05, | |
| "loss": 0.0, | |
| "reward": 0.2142857238650322, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.2142857238650322, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.10916724982505248, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9995690062269985e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.11196641007697691, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9992338316586132e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.11476557032890133, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9988029479656596e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 41 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.11756473058082575, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9982763964192586e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.12036389083275018, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9976542274538394e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 43 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1231630510846746, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9969365006623072e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.12596221133659902, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.996123284790336e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.12876137158852344, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9952146577297827e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.13156053184044786, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9942107065112286e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.13435969209237228, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9931115272956405e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 48 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1371588523442967, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9919172253651637e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.13995801259622112, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9906279151130338e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.14275717284814557, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.989243720032624e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.14555633310007, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.987764772705613e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 52 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1483554933519944, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9861912147892884e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.15115465360391883, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9845231970029774e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.15395381385584325, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.98276087911361e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.15675297410776767, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9809044299204173e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1595521343596921, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.978954027238763e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.16235129461161651, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9769098578831113e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.16515045486354094, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.974772117649135e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.16794961511546536, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.972541011294959e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.17074877536738978, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9702167525215504e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 61 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1735479356193142, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9677995639522482e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.17634709587123862, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9652896771114416e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 63 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.17914625612316304, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9626873324023915e-05, | |
| "loss": 0.0, | |
| "reward": 0.2500000149011612, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.2500000149011612, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1819454163750875, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.959992779084207e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1847445766270119, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9572062752479684e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.18754373687893633, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9543280877920073e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.19034289713086075, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9513584923963426e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 68 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.19314205738278517, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9482977734962753e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 69 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.1959412176347096, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.945146224255145e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.198740377886634, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9419041465362477e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.20153953813855843, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9385718508739263e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.20433869839048285, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9351496564438228e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.20713785864240727, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.93163789103231e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2099370188943317, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9280368910050943e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.21273617914625612, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9243470012749968e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.21553533939818054, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9205685752689178e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 77 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.21833449965010496, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9167019748939847e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 78 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.22113365990202938, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9127475705028864e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 79 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.22393282015395383, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.908705740858402e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.22673198040587825, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.9045768730971198e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 81 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.22953114065780267, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.900361362692358e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 82 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2323303009097271, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8960596134162845e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 83 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2351294611616515, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8916720373012425e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 84 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.23792862141357593, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.887199054600286e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.24072778166550035, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8826410937469256e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 86 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.24352694191742477, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8779985913140927e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 87 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2463261021693492, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.873271991972323e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 88 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2491252624212736, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8684617484471662e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 89 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.25192442267319803, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8635683214758213e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2547235829251225, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8585921797630064e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 91 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2575227431770469, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8535337999360655e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 92 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2603219034289713, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8483936664993152e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 93 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2631210636808957, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8431722717876383e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 94 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.26592022393282017, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.837870115919327e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.26871938418474456, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8324877067481782e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 96 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.271518544436669, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8270255598148542e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2743177046885934, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8214841982974975e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 98 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.27711686494051785, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.815864152961624e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.27991602519244224, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8101659621092832e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2827151854443667, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.804390171527497e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.28551434569629114, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.798537334435986e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.28831350594821553, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.792608011434178e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 103 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.29111266620014, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.786602770447513e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 104 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2939118264520644, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.780522186673046e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571529686451, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571529686451, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2967109867039888, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7743668425243547e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 106 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.2995101469559132, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.768137327575751e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 107 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.30230930720783766, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7618342385058147e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 108 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.30510846745976206, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7554581790402372e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 109 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3079076277116865, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.749009759893999e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3107067879636109, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7424895987128723e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 111 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.31350594821553535, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7358983200142608e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 112 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.31630510846745974, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7292365551273835e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 113 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3191042687193842, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7225049421328024e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 114 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3219034289713086, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7157041258013074e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.32470258922323303, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7088347575321575e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 116 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3275017494751575, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7018974952906885e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 117 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.33030090972708187, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6948930035452905e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 118 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3331000699790063, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.687821953203765e-05, | |
| "loss": 0.0, | |
| "reward": 0.1785714365541935, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1785714365541935, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 119 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3358992302309307, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.680685021549063e-05, | |
| "loss": 0.0, | |
| "reward": 0.1785714365541935, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1785714365541935, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.33869839048285516, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6734828921744127e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 121 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.34149755073477955, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6662162549178433e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 122 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.344296710986704, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.658885805796111e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 123 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3470958712386284, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.651492246938034e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 124 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.34989503149055284, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6440362865172373e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.35269419174247724, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.636518638684325e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 126 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3554933519944017, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.628940023498477e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 127 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3582925122463261, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.621301166858479e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 128 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3610916724982505, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.613602800433194e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 129 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.363890832750175, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6058456615914815e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.36668999300209937, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.598030493331572e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 131 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3694891532540238, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.590158044209897e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 132 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3722883135059482, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5822290682693944e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 133 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.37508747375787266, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.574244324967283e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 134 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.37788663400979705, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.566204579102317e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3806857942617215, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5581106007415382e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 136 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3834849545136459, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5499631651465086e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 137 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.38628411476557034, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5417630526990613e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 138 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.38908327501749473, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5335110488265497e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 139 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3918824352694192, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5252079439266179e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.3946815955213436, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5168545332914942e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 141 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.397480755773268, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5084516170318181e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 142 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4002799160251924, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 143 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.40307907627711687, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4915004917131345e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 144 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4058782365290413, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4829539062754597e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 145 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4086773967809657, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.474361062300381e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 146 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.41147655703289016, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4657227828320637e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 147 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.41427571728481455, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4570398952665982e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 148 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.417074877536739, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4483132312727501e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 149 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4198740377886634, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4395436267123017e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.42267319804058784, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4307319215599904e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 151 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.42547235829251223, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4218789598230536e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 152 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4282715185444367, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4129855894603885e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 153 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4310706787963611, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4040526623013317e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 154 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4338698390482855, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3950810339640689e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 155 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4366689993002099, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3860715637736817e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428656578064, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428656578064, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 156 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.43946815955213436, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3770251146798401e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 157 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.44226731980405876, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.367942553174145e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 158 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4450664800559832, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.358824749207136e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 159 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.44786564030790765, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3496725761049637e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.45066480055983205, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3404869104857405e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 161 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4534639608117565, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.331268632175576e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 162 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4562631210636809, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3220186241243063e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 163 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.45906228131560534, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.31273777232092e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 164 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.46186144156752973, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3034269657086993e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 165 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4646606018194542, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2940870961000725e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 166 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.46745976207137857, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2847190580911942e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 167 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.470258922323303, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.27532374897626e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 168 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4730580825752274, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2659020686615602e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 169 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.47585724282715186, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2564549195792842e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.47865640307907625, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2469832066010843e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 171 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4814555633310007, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.237487836951405e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 172 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4842547235829251, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2279697201205852e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 173 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.48705388383484954, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2184297677777463e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 174 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.489853044086774, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2088688936834705e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4926522043386984, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1992880136022766e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 176 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.49545136459062283, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1896880452149077e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 177 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.4982505248425472, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1800699080304333e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 178 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5010496850944717, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.170434523298175e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 179 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5038488453463961, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1607828139194683e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5066480055983205, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1511157043592642e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 181 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.509447165850245, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1414341205575817e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 182 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5122463261021694, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1317389898408188e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 183 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5150454863540938, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.122031240832932e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 184 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5178446466060181, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1123118033664877e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 185 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5206438068579426, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1025816083936036e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 186 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.523442967109867, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0928415878967781e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 187 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5262421273617914, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0830926747996225e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 188 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5290412876137159, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.073335802877504e-05, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 189 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5318404478656403, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0635719066681064e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 190 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5346396081175647, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.053801921381916e-05, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 191 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5374387683694891, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0440267828126478e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 192 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5402379286214136, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0342474272476108e-05, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 193 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.543037088873338, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0244647913780272e-05, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 194 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5458362491252624, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0146798122093167e-05, | |
| "loss": 0.0, | |
| "reward": 0.1785714402794838, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1785714402794838, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 195 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5486354093771868, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.004893426971345e-05, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 196 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5514345696291113, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.951065730286553e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 197 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5542337298810357, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.853201877906836e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 198 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5570328901329601, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.755352086219733e-06, | |
| "loss": 0.0, | |
| "reward": 0.1428571529686451, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571529686451, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 199 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5598320503848845, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.657525727523897e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.562631210636809, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.559732171873524e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 201 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5654303708887334, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.461980786180844e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 202 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5682295311406578, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.364280933318943e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 203 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5710286913925823, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.266641971224963e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 204 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5738278516445067, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.16907325200378e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5766270118964311, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.071584121032224e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 206 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5794261721483555, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.974183916063967e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 207 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.58222533240028, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.876881966335128e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 208 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5850244926522044, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.779687591670687e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 209 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5878236529041287, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.682610101591813e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5906228131560531, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.585658794424188e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 211 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5934219734079776, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.488842956407361e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 212 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.596221133659902, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.39217186080532e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 213 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.5990202939118264, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.295654767018254e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 214 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6018194541637508, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.19930091969567e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 215 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6046186144156753, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.103119547850924e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 216 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6074177746675997, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.00711986397724e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 217 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6102169349195241, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.911311063165298e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 218 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6130160951714486, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.815702322222539e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 219 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.615815255423373, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.720302798794153e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6186144156752974, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.6251216304859555e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 221 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6214135759272218, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.530167933989161e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 222 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6242127361791463, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.435450804207165e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 223 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6270118964310707, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.340979313384404e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 224 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6298110566829951, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.246762510237404e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 225 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6326102169349195, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.1528094190880625e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 226 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.635409377186844, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.059129038999282e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 227 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6382085374387684, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.965730342913011e-06, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 228 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6410076976906928, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.872622276790804e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 229 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6438068579426172, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.779813758756943e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6466060181945417, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.687313678244243e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 231 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6494051784464661, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.595130895142601e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 232 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6522043386983905, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.5032742389503676e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 233 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.655003498950315, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.411752507928643e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 234 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6578026592022393, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.3205744682585545e-06, | |
| "loss": 0.0, | |
| "reward": 0.1785714365541935, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1785714365541935, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 235 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6606018194541637, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.229748853201605e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 236 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6634009797060881, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.139284362263185e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 237 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6662001399580126, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.049189660359316e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 238 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.668999300209937, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.959473376986686e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 239 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6717984604618614, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.8701441053961185e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6745976207137858, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.781210401769466e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 241 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6773967809657103, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.692680784400102e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 242 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6801959412176347, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.604563732876989e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 243 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6829951014695591, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.516867687272504e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 244 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6857942617214835, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.429601047334022e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 245 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.688593421973408, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.342772171679364e-06, | |
| "loss": 0.0, | |
| "reward": 0.1785714402794838, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1785714402794838, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 246 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6913925822253324, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.256389376996192e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 247 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6941917424772568, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.17046093724541e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 248 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6969909027291813, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.084995082868658e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 249 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.6997900629811057, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7025892232330301, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.9154838296818246e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 251 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7053883834849545, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.831454667085059e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 252 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.708187543736879, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.747920560733825e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 253 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7109867039888034, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.664889511734509e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 254 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7137858642407278, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.58236947300939e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 255 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7165850244926522, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.500368348534918e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 256 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7193841847445767, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.418893992584624e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 257 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.722183344996501, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.33795420897683e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 258 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7249825052484254, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.257556750327176e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 259 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.72778166550035, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.17770931730606e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7305808257522743, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.098419557901036e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 261 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7333799860041987, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.019695066684285e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 262 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7361791462561231, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.9415433840851845e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 263 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7389783065080476, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.8639719956680624e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 264 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.741777466759972, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.7869883314152114e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 265 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7445766270118964, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.7105997650152326e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 266 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7473757872638208, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.6348136131567537e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 267 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7501749475157453, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.5596371348276325e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 268 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7529741077676697, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.485077530619664e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 269 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7557732680195941, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.4111419420388904e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7585724282715185, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.3378374508215704e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 271 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.761371588523443, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.2651710782558798e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 272 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7641707487753674, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.1931497845093753e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 273 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7669699090272918, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.121780467962353e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 274 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7697690692792163, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.0510699645470988e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7725682295311407, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.981025047093118e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 276 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7753673897830651, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.911652424678425e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 277 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7781665500349895, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.8429587419869288e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 278 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.780965710286914, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.77495057867198e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 279 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7837648705388384, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.7076344487261695e-06, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7865640307907628, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.6410167998573945e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 281 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7893631910426872, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.57510401287128e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 282 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7921623512946117, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.5099024010600136e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 283 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.794961511546536, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.445418209597632e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 284 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.7977606717984604, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.381657614941858e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 285 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8005598320503848, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.318626724242491e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 286 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8033589923023093, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.2563315747564575e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 287 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8061581525542337, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.1947781332695406e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 288 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8089573128061581, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.133972295524875e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 289 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8117564730580826, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.073919885658223e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.814555633310007, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.0146266556401405e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 291 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8173547935619314, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.956098284725031e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 292 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8201539538138558, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.898340378907172e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 293 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8229531140657803, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.8413584703837618e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 294 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8257522743177047, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7851580170250304e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 295 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8285514345696291, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.729744401851463e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 296 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8313505948215535, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6751229325182194e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 297 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.834149755073478, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.6212988408067354e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 298 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8369489153254024, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5682772821236192e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 299 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8397480755773268, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.516063335006851e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8425472358292512, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4646620006393497e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 301 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8453463960811757, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.4140782023699396e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 302 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8481455563331001, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3643167852417894e-06, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 303 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8509447165850245, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3153825155283395e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 304 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.853743876836949, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2672800802767715e-06, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 305 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8565430370888734, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2200140868590759e-06, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 306 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8593421973407978, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1735890625307466e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 307 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8621413575927221, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.128009453997142e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 308 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8649405178446467, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0832796269875757e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 309 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.867739678096571, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.0394038658371575e-06, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8705388383484954, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.963863730764222e-07, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 311 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8733379986004198, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.542312690288035e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 312 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8761371588523443, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.129425914159839e-07, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 313 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8789363191042687, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.725242949711376e-07, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 314 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8817354793561931, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.329802510601559e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 315 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8845346396081175, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.943142473108234e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 316 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.887333799860042, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.565299872500331e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 317 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8901329601119664, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.196310899490577e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 318 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8929321203638908, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.836210896769014e-07, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 319 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8957312806158153, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.485034355617748e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.8985304408677397, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.142814912607409e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 321 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9013296011196641, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.809585346375235e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 322 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9041287613715885, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.485377574485528e-07, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 323 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.906927921623513, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.17022265037247e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 324 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9097270818754374, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.864150760365771e-07, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 325 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9125262421273618, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.567191220799305e-07, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 326 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9153254023792862, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.2793724752031807e-07, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 327 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9181245626312107, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.000722091579301e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 328 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9209237228831351, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.731266759760854e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 329 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9237228831350595, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.471032288855869e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9265220433869839, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.2200436047752026e-07, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 331 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9293212036389084, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.978324747844996e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 332 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9321203638908327, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.745898870504116e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 333 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9349195241427571, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.5227882350865154e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 334 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9377186843946816, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.309014211688865e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 335 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.940517844646606, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.104597276123721e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 336 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9433170048985304, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.909557007958307e-07, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 337 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9461161651504548, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7239120886390347e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 338 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9489153254023793, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.5476802997022812e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 339 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9517144856543037, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.3808785210711606e-07, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9545136459062281, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.2235227294387085e-07, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 341 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9573128061581525, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.075627996737627e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 342 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.960111966410077, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 9.372084886966392e-08, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 343 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9629111266620014, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 8.082774634836754e-08, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 344 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9657102869139258, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 6.888472704359661e-08, | |
| "loss": 0.0, | |
| "reward": 0.0357142873108387, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0357142873108387, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 345 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9685094471658502, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 5.7892934887717746e-08, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 346 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9713086074177747, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.785342270217319e-08, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 347 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9741077676696991, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.8767152096641504e-08, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 348 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9769069279216235, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 3.063499337692788e-08, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 349 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.979706088173548, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 2.3457725461607518e-08, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9825052484254724, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.7236035807416397e-08, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 351 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9853044086773968, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.1970520343408398e-08, | |
| "loss": 0.0, | |
| "reward": 0.0714285746216774, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0714285746216774, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 352 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9881035689293212, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 7.661683413868748e-09, | |
| "loss": 0.0, | |
| "reward": 0.1428571492433548, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 353 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9909027291812457, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.309937730015978e-09, | |
| "loss": 0.0, | |
| "reward": 0.1785714365541935, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1785714365541935, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 354 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9937018894331701, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 1.915604330464671e-09, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 355 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9965010496850945, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 4.789125504778281e-10, | |
| "loss": 0.0, | |
| "reward": 0.1071428619325161, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.1071428619325161, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 356 | |
| }, | |
| { | |
| "completion_length": 1024.0, | |
| "epoch": 0.9993002099370188, | |
| "grad_norm": NaN, | |
| "kl": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.0, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9993002099370188, | |
| "step": 357, | |
| "total_flos": 0.0, | |
| "train_loss": -2.92552325735344e-11, | |
| "train_runtime": 25378.3621, | |
| "train_samples_per_second": 0.394, | |
| "train_steps_per_second": 0.014 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 357, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |