| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 36, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 3466.5059814453125, | |
| "epoch": 0.027972027972027972, | |
| "grad_norm": 0.8895487189292908, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 0.4657738283276558, | |
| "reward_std": 0.11145408265292645, | |
| "rewards/accuracy_reward": 0.08630952658131719, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.379464291036129, | |
| "step": 1 | |
| }, | |
| { | |
| "completion_length": 3675.1995239257812, | |
| "epoch": 0.055944055944055944, | |
| "grad_norm": 0.5455259680747986, | |
| "kl": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "reward": 0.473214291036129, | |
| "reward_std": 0.16795706376433372, | |
| "rewards/accuracy_reward": 0.09523809934034944, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.377976194024086, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 3365.479248046875, | |
| "epoch": 0.08391608391608392, | |
| "grad_norm": 0.5860073566436768, | |
| "kl": 0.0010356903076171875, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "reward": 0.5974702388048172, | |
| "reward_std": 0.14405668526887894, | |
| "rewards/accuracy_reward": 0.15476190694607794, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4427083432674408, | |
| "step": 3 | |
| }, | |
| { | |
| "completion_length": 3294.0059814453125, | |
| "epoch": 0.11188811188811189, | |
| "grad_norm": 0.9189674854278564, | |
| "kl": 0.012176513671875, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0005, | |
| "reward": 0.5416666716337204, | |
| "reward_std": 0.17101533710956573, | |
| "rewards/accuracy_reward": 0.10119047877378762, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.440476194024086, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 3521.1310424804688, | |
| "epoch": 0.13986013986013987, | |
| "grad_norm": 0.6414484977722168, | |
| "kl": 0.028106689453125, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0011, | |
| "reward": 0.5267857313156128, | |
| "reward_std": 0.13104644231498241, | |
| "rewards/accuracy_reward": 0.1428571492433548, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.383928582072258, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 3423.699462890625, | |
| "epoch": 0.16783216783216784, | |
| "grad_norm": 0.297514408826828, | |
| "kl": 0.07110595703125, | |
| "learning_rate": 1.995184726672197e-05, | |
| "loss": 0.0028, | |
| "reward": 0.5029762089252472, | |
| "reward_std": 0.21673721447587013, | |
| "rewards/accuracy_reward": 0.0922619067132473, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.410714291036129, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 3670.21435546875, | |
| "epoch": 0.1958041958041958, | |
| "grad_norm": 0.21352989971637726, | |
| "kl": 0.093505859375, | |
| "learning_rate": 1.9807852804032306e-05, | |
| "loss": 0.0037, | |
| "reward": 0.5223214328289032, | |
| "reward_std": 0.16733192279934883, | |
| "rewards/accuracy_reward": 0.1369047649204731, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.385416679084301, | |
| "step": 7 | |
| }, | |
| { | |
| "completion_length": 3450.9880981445312, | |
| "epoch": 0.22377622377622378, | |
| "grad_norm": 0.17398180067539215, | |
| "kl": 0.14306640625, | |
| "learning_rate": 1.956940335732209e-05, | |
| "loss": 0.0057, | |
| "reward": 0.547619067132473, | |
| "reward_std": 0.1912310989573598, | |
| "rewards/accuracy_reward": 0.14285714738070965, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4047619104385376, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 3476.6488647460938, | |
| "epoch": 0.2517482517482518, | |
| "grad_norm": 0.14767655730247498, | |
| "kl": 0.17724609375, | |
| "learning_rate": 1.9238795325112867e-05, | |
| "loss": 0.0071, | |
| "reward": 0.60863097012043, | |
| "reward_std": 0.13831812981516123, | |
| "rewards/accuracy_reward": 0.2023809552192688, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4062500074505806, | |
| "step": 9 | |
| }, | |
| { | |
| "completion_length": 3633.9048461914062, | |
| "epoch": 0.27972027972027974, | |
| "grad_norm": 0.16100460290908813, | |
| "kl": 0.22509765625, | |
| "learning_rate": 1.881921264348355e-05, | |
| "loss": 0.009, | |
| "reward": 0.5952381044626236, | |
| "reward_std": 0.15078665129840374, | |
| "rewards/accuracy_reward": 0.1934523843228817, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4017857238650322, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 3686.4376220703125, | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.17172181606292725, | |
| "kl": 0.275634765625, | |
| "learning_rate": 1.8314696123025456e-05, | |
| "loss": 0.011, | |
| "reward": 0.462053582072258, | |
| "reward_std": 0.09804809279739857, | |
| "rewards/accuracy_reward": 0.0892857164144516, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3727678656578064, | |
| "step": 11 | |
| }, | |
| { | |
| "completion_length": 3672.27392578125, | |
| "epoch": 0.3356643356643357, | |
| "grad_norm": 0.1871335655450821, | |
| "kl": 0.3203125, | |
| "learning_rate": 1.773010453362737e-05, | |
| "loss": 0.0128, | |
| "reward": 0.4568452462553978, | |
| "reward_std": 0.12699773162603378, | |
| "rewards/accuracy_reward": 0.08333333511836827, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3735119178891182, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 3601.2619018554688, | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.18339282274246216, | |
| "kl": 0.29345703125, | |
| "learning_rate": 1.7071067811865477e-05, | |
| "loss": 0.0117, | |
| "reward": 0.6927083358168602, | |
| "reward_std": 0.1693347617983818, | |
| "rewards/accuracy_reward": 0.3125000111758709, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3802083358168602, | |
| "step": 13 | |
| }, | |
| { | |
| "completion_length": 3655.6220703125, | |
| "epoch": 0.3916083916083916, | |
| "grad_norm": 0.2542259693145752, | |
| "kl": 0.31201171875, | |
| "learning_rate": 1.6343932841636455e-05, | |
| "loss": 0.0125, | |
| "reward": 0.7633928656578064, | |
| "reward_std": 0.2515065036714077, | |
| "rewards/accuracy_reward": 0.3958333469927311, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3675595223903656, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 1954.6220397949219, | |
| "epoch": 0.4195804195804196, | |
| "grad_norm": 64.4857177734375, | |
| "kl": 1.3046875, | |
| "learning_rate": 1.5555702330196024e-05, | |
| "loss": 0.0522, | |
| "reward": 0.712053582072258, | |
| "reward_std": 0.3067754730582237, | |
| "rewards/accuracy_reward": 0.2440476194024086, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4680059552192688, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 3941.5655517578125, | |
| "epoch": 0.44755244755244755, | |
| "grad_norm": 0.17128266394138336, | |
| "kl": 0.34619140625, | |
| "learning_rate": 1.4713967368259981e-05, | |
| "loss": 0.0138, | |
| "reward": 0.6443452462553978, | |
| "reward_std": 0.24465123564004898, | |
| "rewards/accuracy_reward": 0.3005952388048172, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3437500074505806, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 3827.9702758789062, | |
| "epoch": 0.4755244755244755, | |
| "grad_norm": 13.349394798278809, | |
| "kl": 0.435546875, | |
| "learning_rate": 1.3826834323650899e-05, | |
| "loss": 0.0174, | |
| "reward": 0.6183035746216774, | |
| "reward_std": 0.22436635196208954, | |
| "rewards/accuracy_reward": 0.27678572852164507, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3415178582072258, | |
| "step": 17 | |
| }, | |
| { | |
| "completion_length": 3816.27685546875, | |
| "epoch": 0.5034965034965035, | |
| "grad_norm": 2.260338544845581, | |
| "kl": 0.365234375, | |
| "learning_rate": 1.2902846772544625e-05, | |
| "loss": 0.0146, | |
| "reward": 0.6138392984867096, | |
| "reward_std": 0.21694295294582844, | |
| "rewards/accuracy_reward": 0.2678571483120322, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3459821492433548, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 3398.71142578125, | |
| "epoch": 0.5314685314685315, | |
| "grad_norm": 0.17236056923866272, | |
| "kl": 0.34814453125, | |
| "learning_rate": 1.1950903220161286e-05, | |
| "loss": 0.0139, | |
| "reward": 0.7968750149011612, | |
| "reward_std": 0.2352231778204441, | |
| "rewards/accuracy_reward": 0.4196428582072258, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3772321492433548, | |
| "step": 19 | |
| }, | |
| { | |
| "completion_length": 3874.4227294921875, | |
| "epoch": 0.5594405594405595, | |
| "grad_norm": 0.1826857030391693, | |
| "kl": 0.37646484375, | |
| "learning_rate": 1.098017140329561e-05, | |
| "loss": 0.015, | |
| "reward": 0.6227678656578064, | |
| "reward_std": 0.2774837426841259, | |
| "rewards/accuracy_reward": 0.2738095261156559, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3489583358168602, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 3617.1904907226562, | |
| "epoch": 0.5874125874125874, | |
| "grad_norm": 1.202433466911316, | |
| "kl": 0.333984375, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0134, | |
| "reward": 0.7008928805589676, | |
| "reward_std": 0.2767525836825371, | |
| "rewards/accuracy_reward": 0.3392857201397419, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3616071492433548, | |
| "step": 21 | |
| }, | |
| { | |
| "completion_length": 3817.994140625, | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.7046478986740112, | |
| "kl": 0.34375, | |
| "learning_rate": 9.019828596704394e-06, | |
| "loss": 0.0138, | |
| "reward": 0.654017873108387, | |
| "reward_std": 0.24345579743385315, | |
| "rewards/accuracy_reward": 0.29761905409395695, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3563988134264946, | |
| "step": 22 | |
| }, | |
| { | |
| "completion_length": 3474.8810424804688, | |
| "epoch": 0.6433566433566433, | |
| "grad_norm": 3.3308699131011963, | |
| "kl": 0.3984375, | |
| "learning_rate": 8.04909677983872e-06, | |
| "loss": 0.0159, | |
| "reward": 0.8549107313156128, | |
| "reward_std": 0.251787431538105, | |
| "rewards/accuracy_reward": 0.476190485060215, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3787202462553978, | |
| "step": 23 | |
| }, | |
| { | |
| "completion_length": 3292.071533203125, | |
| "epoch": 0.6713286713286714, | |
| "grad_norm": 0.8111428022384644, | |
| "kl": 0.34228515625, | |
| "learning_rate": 7.097153227455379e-06, | |
| "loss": 0.0137, | |
| "reward": 0.8050595372915268, | |
| "reward_std": 0.2745497487485409, | |
| "rewards/accuracy_reward": 0.416666679084301, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3883928656578064, | |
| "step": 24 | |
| }, | |
| { | |
| "completion_length": 3325.571533203125, | |
| "epoch": 0.6993006993006993, | |
| "grad_norm": 0.2051524817943573, | |
| "kl": 0.33544921875, | |
| "learning_rate": 6.173165676349103e-06, | |
| "loss": 0.0134, | |
| "reward": 0.8095238208770752, | |
| "reward_std": 0.2951691634953022, | |
| "rewards/accuracy_reward": 0.4166666716337204, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3928571492433548, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 3417.4406127929688, | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.1402244120836258, | |
| "kl": 0.3544921875, | |
| "learning_rate": 5.286032631740023e-06, | |
| "loss": 0.0142, | |
| "reward": 0.723958358168602, | |
| "reward_std": 0.15768472477793694, | |
| "rewards/accuracy_reward": 0.3720238171517849, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3519345298409462, | |
| "step": 26 | |
| }, | |
| { | |
| "completion_length": 2988.1697387695312, | |
| "epoch": 0.7552447552447552, | |
| "grad_norm": 0.15998531877994537, | |
| "kl": 0.33447265625, | |
| "learning_rate": 4.444297669803981e-06, | |
| "loss": 0.0134, | |
| "reward": 0.79613097012043, | |
| "reward_std": 0.17738395184278488, | |
| "rewards/accuracy_reward": 0.377976194024086, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.418154776096344, | |
| "step": 27 | |
| }, | |
| { | |
| "completion_length": 3657.1666870117188, | |
| "epoch": 0.7832167832167832, | |
| "grad_norm": 0.173067107796669, | |
| "kl": 0.361328125, | |
| "learning_rate": 3.6560671583635467e-06, | |
| "loss": 0.0145, | |
| "reward": 0.5111607238650322, | |
| "reward_std": 0.11157247237861156, | |
| "rewards/accuracy_reward": 0.14583333395421505, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3653273805975914, | |
| "step": 28 | |
| }, | |
| { | |
| "completion_length": 3481.666748046875, | |
| "epoch": 0.8111888111888111, | |
| "grad_norm": 0.18258774280548096, | |
| "kl": 0.31201171875, | |
| "learning_rate": 2.9289321881345257e-06, | |
| "loss": 0.0125, | |
| "reward": 0.7008928805589676, | |
| "reward_std": 0.20082013495266438, | |
| "rewards/accuracy_reward": 0.2976190522313118, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4032738208770752, | |
| "step": 29 | |
| }, | |
| { | |
| "completion_length": 3570.7173461914062, | |
| "epoch": 0.8391608391608392, | |
| "grad_norm": 4.07529354095459, | |
| "kl": 0.39990234375, | |
| "learning_rate": 2.26989546637263e-06, | |
| "loss": 0.016, | |
| "reward": 0.534226194024086, | |
| "reward_std": 0.15385975502431393, | |
| "rewards/accuracy_reward": 0.16071429196745157, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3735119104385376, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 3432.7529907226562, | |
| "epoch": 0.8671328671328671, | |
| "grad_norm": 0.19004768133163452, | |
| "kl": 0.3251953125, | |
| "learning_rate": 1.6853038769745466e-06, | |
| "loss": 0.013, | |
| "reward": 0.6971726417541504, | |
| "reward_std": 0.21272188052535057, | |
| "rewards/accuracy_reward": 0.2976190522313118, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.399553582072258, | |
| "step": 31 | |
| }, | |
| { | |
| "completion_length": 3168.482177734375, | |
| "epoch": 0.8951048951048951, | |
| "grad_norm": 0.175730898976326, | |
| "kl": 0.31787109375, | |
| "learning_rate": 1.1807873565164507e-06, | |
| "loss": 0.0127, | |
| "reward": 0.6726190447807312, | |
| "reward_std": 0.1167123094201088, | |
| "rewards/accuracy_reward": 0.2648809589445591, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4077381044626236, | |
| "step": 32 | |
| }, | |
| { | |
| "completion_length": 3504.7440795898438, | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.2087661623954773, | |
| "kl": 0.37939453125, | |
| "learning_rate": 7.612046748871327e-07, | |
| "loss": 0.0152, | |
| "reward": 0.674107164144516, | |
| "reward_std": 0.19641772098839283, | |
| "rewards/accuracy_reward": 0.28571428544819355, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3883928656578064, | |
| "step": 33 | |
| }, | |
| { | |
| "completion_length": 2965.9286499023438, | |
| "epoch": 0.951048951048951, | |
| "grad_norm": 0.19867944717407227, | |
| "kl": 0.3359375, | |
| "learning_rate": 4.305966426779118e-07, | |
| "loss": 0.0134, | |
| "reward": 0.7485119253396988, | |
| "reward_std": 0.204582080245018, | |
| "rewards/accuracy_reward": 0.3244047649204731, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4241071492433548, | |
| "step": 34 | |
| }, | |
| { | |
| "completion_length": 3074.5208740234375, | |
| "epoch": 0.9790209790209791, | |
| "grad_norm": 0.19133983552455902, | |
| "kl": 0.32958984375, | |
| "learning_rate": 1.921471959676957e-07, | |
| "loss": 0.0132, | |
| "reward": 0.7343750149011612, | |
| "reward_std": 0.15512866899371147, | |
| "rewards/accuracy_reward": 0.3214285783469677, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.4129464402794838, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 3781.7938639322915, | |
| "epoch": 1.0, | |
| "grad_norm": 0.19133983552455902, | |
| "kl": 0.392578125, | |
| "learning_rate": 4.815273327803183e-08, | |
| "loss": 0.0118, | |
| "reward": 0.4583333532015483, | |
| "reward_std": 0.11528908833861351, | |
| "rewards/accuracy_reward": 0.1071428582072258, | |
| "rewards/format_reward": 0.0, | |
| "rewards/tag_count_reward": 0.3511904776096344, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 36, | |
| "total_flos": 0.0, | |
| "train_loss": 0.011807907519011864, | |
| "train_runtime": 16456.6289, | |
| "train_samples_per_second": 0.061, | |
| "train_steps_per_second": 0.002 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 36, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |