| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 39, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 3301.4214477539062, | |
| "epoch": 0.02564102564102564, | |
| "grad_norm": 2.992680549621582, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 0.1030658227391541, | |
| "reward_std": 0.20713394414633512, | |
| "rewards/code_reward": 0.1030658227391541, | |
| "rewards/format_reward": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "completion_length": 3303.0679321289062, | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 1.2384196519851685, | |
| "kl": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "reward": 0.09688444854691625, | |
| "reward_std": 0.16041716001927853, | |
| "rewards/code_reward": 0.0968844504095614, | |
| "rewards/format_reward": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 3189.9893188476562, | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 10.27621841430664, | |
| "kl": 0.004179954528808594, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0002, | |
| "reward": 0.10514286020770669, | |
| "reward_std": 0.2078455975279212, | |
| "rewards/code_reward": 0.10514286020770669, | |
| "rewards/format_reward": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "completion_length": 3000.7999267578125, | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 2.5482373237609863, | |
| "kl": 0.0092010498046875, | |
| "learning_rate": 9.98378869844137e-06, | |
| "loss": 0.0004, | |
| "reward": 0.1636996939778328, | |
| "reward_std": 0.28437332436442375, | |
| "rewards/code_reward": 0.1636996977031231, | |
| "rewards/format_reward": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 3725.6929321289062, | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 3.836555004119873, | |
| "kl": 0.0565185546875, | |
| "learning_rate": 9.935271596564688e-06, | |
| "loss": 0.0023, | |
| "reward": 0.1552258525043726, | |
| "reward_std": 0.20690688118338585, | |
| "rewards/code_reward": 0.1552258525043726, | |
| "rewards/format_reward": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 3449.982177734375, | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.9245039224624634, | |
| "kl": 0.153564453125, | |
| "learning_rate": 9.854798261200746e-06, | |
| "loss": 0.0061, | |
| "reward": 0.20291369408369064, | |
| "reward_std": 0.2591838911175728, | |
| "rewards/code_reward": 0.20291369780898094, | |
| "rewards/format_reward": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 2533.5928955078125, | |
| "epoch": 0.1794871794871795, | |
| "grad_norm": 4.267428874969482, | |
| "kl": 0.276611328125, | |
| "learning_rate": 9.74294850457488e-06, | |
| "loss": 0.0111, | |
| "reward": 0.09451580978929996, | |
| "reward_std": 0.19575391709804535, | |
| "rewards/code_reward": 0.09451580978929996, | |
| "rewards/format_reward": 0.0, | |
| "step": 7 | |
| }, | |
| { | |
| "completion_length": 3610.7678833007812, | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 0.9869590997695923, | |
| "kl": 0.216552734375, | |
| "learning_rate": 9.600528206746613e-06, | |
| "loss": 0.0087, | |
| "reward": 0.1328175999224186, | |
| "reward_std": 0.24701963737607002, | |
| "rewards/code_reward": 0.13246045634150505, | |
| "rewards/format_reward": 0.0035714285913854837, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 3664.2072143554688, | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.6975364685058594, | |
| "kl": 0.23388671875, | |
| "learning_rate": 9.428563509225348e-06, | |
| "loss": 0.0093, | |
| "reward": 0.123524846509099, | |
| "reward_std": 0.20174134522676468, | |
| "rewards/code_reward": 0.123524846509099, | |
| "rewards/format_reward": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "completion_length": 3931.4750366210938, | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.7551573514938354, | |
| "kl": 0.20751953125, | |
| "learning_rate": 9.22829342159729e-06, | |
| "loss": 0.0083, | |
| "reward": 0.08102564234286547, | |
| "reward_std": 0.13897551037371159, | |
| "rewards/code_reward": 0.08102564420551062, | |
| "rewards/format_reward": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 4086.3500366210938, | |
| "epoch": 0.28205128205128205, | |
| "grad_norm": 0.3929460942745209, | |
| "kl": 0.201171875, | |
| "learning_rate": 9.001160894432979e-06, | |
| "loss": 0.008, | |
| "reward": 0.06611721590161324, | |
| "reward_std": 0.1253935508430004, | |
| "rewards/code_reward": 0.06611721590161324, | |
| "rewards/format_reward": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "completion_length": 4031.3786010742188, | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.3564222455024719, | |
| "kl": 0.208984375, | |
| "learning_rate": 8.748802422795361e-06, | |
| "loss": 0.0084, | |
| "reward": 0.09780338406562805, | |
| "reward_std": 0.15440805070102215, | |
| "rewards/code_reward": 0.09780338034033775, | |
| "rewards/format_reward": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 4077.70361328125, | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.26154839992523193, | |
| "kl": 0.215576171875, | |
| "learning_rate": 8.473036255255368e-06, | |
| "loss": 0.0086, | |
| "reward": 0.07981190085411072, | |
| "reward_std": 0.1499769315123558, | |
| "rewards/code_reward": 0.07981190085411072, | |
| "rewards/format_reward": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "completion_length": 3955.335693359375, | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 0.2279486507177353, | |
| "kl": 0.22412109375, | |
| "learning_rate": 8.175849293369292e-06, | |
| "loss": 0.009, | |
| "reward": 0.08388392464257777, | |
| "reward_std": 0.12758804112672806, | |
| "rewards/code_reward": 0.08388392464257777, | |
| "rewards/format_reward": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 3789.925048828125, | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.24723802506923676, | |
| "kl": 0.227294921875, | |
| "learning_rate": 7.859382776007544e-06, | |
| "loss": 0.0091, | |
| "reward": 0.11268522241152823, | |
| "reward_std": 0.18607539124786854, | |
| "rewards/code_reward": 0.11268522613681853, | |
| "rewards/format_reward": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 3539.6714477539062, | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 0.2956843376159668, | |
| "kl": 0.248779296875, | |
| "learning_rate": 7.52591685167953e-06, | |
| "loss": 0.01, | |
| "reward": 0.2600744627416134, | |
| "reward_std": 0.21227671578526497, | |
| "rewards/code_reward": 0.2600744664669037, | |
| "rewards/format_reward": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 3560.6357421875, | |
| "epoch": 0.4358974358974359, | |
| "grad_norm": 0.3600841164588928, | |
| "kl": 0.24560546875, | |
| "learning_rate": 7.1778541500113895e-06, | |
| "loss": 0.0098, | |
| "reward": 0.1853646468371153, | |
| "reward_std": 0.23910802975296974, | |
| "rewards/code_reward": 0.1853646282106638, | |
| "rewards/format_reward": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "completion_length": 3478.9179077148438, | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.33355042338371277, | |
| "kl": 0.243896484375, | |
| "learning_rate": 6.817702470744477e-06, | |
| "loss": 0.0098, | |
| "reward": 0.1517396867275238, | |
| "reward_std": 0.1984270103275776, | |
| "rewards/code_reward": 0.15173968486487865, | |
| "rewards/format_reward": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 3242.70361328125, | |
| "epoch": 0.48717948717948717, | |
| "grad_norm": 0.4373861849308014, | |
| "kl": 0.2568359375, | |
| "learning_rate": 6.448056714980768e-06, | |
| "loss": 0.0103, | |
| "reward": 0.25208037719130516, | |
| "reward_std": 0.2785281799733639, | |
| "rewards/code_reward": 0.25208036229014397, | |
| "rewards/format_reward": 0.0, | |
| "step": 19 | |
| }, | |
| { | |
| "completion_length": 3169.1214599609375, | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.44415220618247986, | |
| "kl": 0.2646484375, | |
| "learning_rate": 6.071580188860955e-06, | |
| "loss": 0.0106, | |
| "reward": 0.2603498362004757, | |
| "reward_std": 0.27874595671892166, | |
| "rewards/code_reward": 0.2603498287498951, | |
| "rewards/format_reward": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 2705.0072021484375, | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.4169292151927948, | |
| "kl": 0.283203125, | |
| "learning_rate": 5.690985414382668e-06, | |
| "loss": 0.0113, | |
| "reward": 0.29475487023591995, | |
| "reward_std": 0.29297249019145966, | |
| "rewards/code_reward": 0.29475487396121025, | |
| "rewards/format_reward": 0.0, | |
| "step": 21 | |
| }, | |
| { | |
| "completion_length": 2707.617919921875, | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 3693.0712890625, | |
| "kl": 29.77001953125, | |
| "learning_rate": 5.309014585617335e-06, | |
| "loss": 1.1875, | |
| "reward": 0.2492063995450735, | |
| "reward_std": 0.25761475786566734, | |
| "rewards/code_reward": 0.2492063995450735, | |
| "rewards/format_reward": 0.0, | |
| "step": 22 | |
| }, | |
| { | |
| "completion_length": 2344.096466064453, | |
| "epoch": 0.5897435897435898, | |
| "grad_norm": 0.38532546162605286, | |
| "kl": 0.29150390625, | |
| "learning_rate": 4.928419811139046e-06, | |
| "loss": 0.0117, | |
| "reward": 0.35850388184189796, | |
| "reward_std": 0.2855670154094696, | |
| "rewards/code_reward": 0.35850388184189796, | |
| "rewards/format_reward": 0.0, | |
| "step": 23 | |
| }, | |
| { | |
| "completion_length": 2637.9856567382812, | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.5114049315452576, | |
| "kl": 0.326171875, | |
| "learning_rate": 4.551943285019233e-06, | |
| "loss": 0.013, | |
| "reward": 0.28699367493391037, | |
| "reward_std": 0.2507447600364685, | |
| "rewards/code_reward": 0.28699367120862007, | |
| "rewards/format_reward": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "completion_length": 2756.8214721679688, | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.483265221118927, | |
| "kl": 0.357421875, | |
| "learning_rate": 4.182297529255525e-06, | |
| "loss": 0.0143, | |
| "reward": 0.22337647899985313, | |
| "reward_std": 0.2525235004723072, | |
| "rewards/code_reward": 0.22337647527456284, | |
| "rewards/format_reward": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 2624.0642700195312, | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.44989556074142456, | |
| "kl": 0.36669921875, | |
| "learning_rate": 3.822145849988612e-06, | |
| "loss": 0.0147, | |
| "reward": 0.2586754858493805, | |
| "reward_std": 0.24251952394843102, | |
| "rewards/code_reward": 0.2586754783987999, | |
| "rewards/format_reward": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "completion_length": 2454.8429260253906, | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.3866746425628662, | |
| "kl": 0.37109375, | |
| "learning_rate": 3.4740831483204696e-06, | |
| "loss": 0.0148, | |
| "reward": 0.2748478166759014, | |
| "reward_std": 0.21565091237425804, | |
| "rewards/code_reward": 0.2748478055000305, | |
| "rewards/format_reward": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "completion_length": 2283.039306640625, | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 0.34683477878570557, | |
| "kl": 0.36376953125, | |
| "learning_rate": 3.1406172239924583e-06, | |
| "loss": 0.0145, | |
| "reward": 0.40325161814689636, | |
| "reward_std": 0.29274996370077133, | |
| "rewards/code_reward": 0.40325161069631577, | |
| "rewards/format_reward": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "completion_length": 2789.1107788085938, | |
| "epoch": 0.7435897435897436, | |
| "grad_norm": 0.6074942350387573, | |
| "kl": 0.45703125, | |
| "learning_rate": 2.8241507066307106e-06, | |
| "loss": 0.0183, | |
| "reward": 0.23605629801750183, | |
| "reward_std": 0.22556601464748383, | |
| "rewards/code_reward": 0.23605630174279213, | |
| "rewards/format_reward": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "completion_length": 2618.4750366210938, | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.47094276547431946, | |
| "kl": 0.4296875, | |
| "learning_rate": 2.526963744744635e-06, | |
| "loss": 0.0172, | |
| "reward": 0.24552644789218903, | |
| "reward_std": 0.22016411647200584, | |
| "rewards/code_reward": 0.24552644416689873, | |
| "rewards/format_reward": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 2504.3535766601562, | |
| "epoch": 0.7948717948717948, | |
| "grad_norm": 0.460906058549881, | |
| "kl": 0.390625, | |
| "learning_rate": 2.2511975772046403e-06, | |
| "loss": 0.0156, | |
| "reward": 0.2590152435004711, | |
| "reward_std": 0.24991632625460625, | |
| "rewards/code_reward": 0.2590152509510517, | |
| "rewards/format_reward": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "completion_length": 2634.0321655273438, | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 0.3826371729373932, | |
| "kl": 0.4052734375, | |
| "learning_rate": 1.9988391055670234e-06, | |
| "loss": 0.0162, | |
| "reward": 0.2098755855113268, | |
| "reward_std": 0.16958633810281754, | |
| "rewards/code_reward": 0.2098755929619074, | |
| "rewards/format_reward": 0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "completion_length": 2563.767822265625, | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.40869322419166565, | |
| "kl": 0.39990234375, | |
| "learning_rate": 1.771706578402711e-06, | |
| "loss": 0.016, | |
| "reward": 0.298298716545105, | |
| "reward_std": 0.29596007615327835, | |
| "rewards/code_reward": 0.29829873889684677, | |
| "rewards/format_reward": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "completion_length": 2526.94287109375, | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 0.45944005250930786, | |
| "kl": 0.43359375, | |
| "learning_rate": 1.5714364907746535e-06, | |
| "loss": 0.0173, | |
| "reward": 0.2921798676252365, | |
| "reward_std": 0.18791194818913937, | |
| "rewards/code_reward": 0.2921798825263977, | |
| "rewards/format_reward": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "completion_length": 2652.6107177734375, | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.42779994010925293, | |
| "kl": 0.44091796875, | |
| "learning_rate": 1.399471793253389e-06, | |
| "loss": 0.0176, | |
| "reward": 0.20903684198856354, | |
| "reward_std": 0.23266056552529335, | |
| "rewards/code_reward": 0.20903684571385384, | |
| "rewards/format_reward": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 2777.0642700195312, | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.48290908336639404, | |
| "kl": 0.44189453125, | |
| "learning_rate": 1.257051495425121e-06, | |
| "loss": 0.0177, | |
| "reward": 0.07366262283176184, | |
| "reward_std": 0.12753414548933506, | |
| "rewards/code_reward": 0.07366262283176184, | |
| "rewards/format_reward": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "completion_length": 2577.3999633789062, | |
| "epoch": 0.9487179487179487, | |
| "grad_norm": 0.41417568922042847, | |
| "kl": 0.43896484375, | |
| "learning_rate": 1.1452017387992552e-06, | |
| "loss": 0.0176, | |
| "reward": 0.12102217739447951, | |
| "reward_std": 0.10151251405477524, | |
| "rewards/code_reward": 0.12102217366918921, | |
| "rewards/format_reward": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "completion_length": 2499.4678649902344, | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 0.3276176452636719, | |
| "kl": 0.4169921875, | |
| "learning_rate": 1.0647284034353122e-06, | |
| "loss": 0.0167, | |
| "reward": 0.16568297561025247, | |
| "reward_std": 0.10975269501795992, | |
| "rewards/code_reward": 0.16568297561025247, | |
| "rewards/format_reward": 0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "completion_length": 2488.5069580078125, | |
| "epoch": 1.0, | |
| "grad_norm": 0.42517927289009094, | |
| "kl": 0.40771484375, | |
| "learning_rate": 1.0162113015586309e-06, | |
| "loss": 0.0163, | |
| "reward": 0.18103422969579697, | |
| "reward_std": 0.11617903341539204, | |
| "rewards/code_reward": 0.18103424459695816, | |
| "rewards/format_reward": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 39, | |
| "total_flos": 0.0, | |
| "train_loss": 0.04123398642398541, | |
| "train_runtime": 18693.2835, | |
| "train_samples_per_second": 0.058, | |
| "train_steps_per_second": 0.002 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 39, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 10, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |