cameronphchen's picture
Model save
9c5e2ef verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 10,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completion_length": 941.515625,
"epoch": 0.5,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 5e-08,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 1
},
{
"clip_ratio": 0.0,
"completion_length": 957.15625,
"epoch": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 2
},
{
"clip_ratio": 0.0,
"completion_length": 947.609375,
"epoch": 1.5,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.5e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 3
},
{
"clip_ratio": 0.0,
"completion_length": 905.96875,
"epoch": 2.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 4
},
{
"clip_ratio": 0.0,
"completion_length": 934.28125,
"epoch": 2.5,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.5e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 5
},
{
"clip_ratio": 0.0,
"completion_length": 842.84375,
"epoch": 3.0,
"grad_norm": 0.4104181280494549,
"kl": 0.0,
"learning_rate": 3e-07,
"loss": 0.0097,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 6
},
{
"clip_ratio": 0.0,
"completion_length": 1060.203125,
"epoch": 3.5,
"grad_norm": 0.006270841170194027,
"kl": 0.0004673004150390625,
"learning_rate": 3.5e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 7
},
{
"clip_ratio": 0.0,
"completion_length": 846.78125,
"epoch": 4.0,
"grad_norm": 0.18416362793200072,
"kl": 0.0004229545593261719,
"learning_rate": 4e-07,
"loss": 0.0438,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 8
},
{
"clip_ratio": 0.0,
"completion_length": 939.875,
"epoch": 4.5,
"grad_norm": 0.004467489660870973,
"kl": 0.00043773651123046875,
"learning_rate": 4.5e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 9
},
{
"epoch": 5.0,
"grad_norm": 0.003060223517037833,
"learning_rate": 5e-07,
"loss": 0.0,
"step": 10
},
{
"epoch": 5.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 914.8203125,
"eval_kl": 0.0004506111145019531,
"eval_loss": 0.0222869124263525,
"eval_reward": 0.0078125,
"eval_reward_std": 0.03125,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0078125,
"eval_runtime": 102.8292,
"eval_samples_per_second": 0.078,
"eval_steps_per_second": 0.01,
"step": 10
},
{
"clip_ratio": 0.0,
"completion_length": 931.875,
"epoch": 5.5,
"grad_norm": 0.0029719679687620396,
"kl": 0.0004215240478515625,
"learning_rate": 5.5e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 11
},
{
"clip_ratio": 0.0,
"completion_length": 840.71875,
"epoch": 6.0,
"grad_norm": 0.003151997120265335,
"kl": 0.00043010711669921875,
"learning_rate": 6e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 12
},
{
"clip_ratio": 0.0,
"completion_length": 1021.125,
"epoch": 6.5,
"grad_norm": 0.00293567226380183,
"kl": 0.00043010711669921875,
"learning_rate": 6.5e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 13
},
{
"clip_ratio": 0.0,
"completion_length": 901.53125,
"epoch": 7.0,
"grad_norm": 0.0029155273721110384,
"kl": 0.0004425048828125,
"learning_rate": 7e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 14
},
{
"clip_ratio": 0.0,
"completion_length": 882.046875,
"epoch": 7.5,
"grad_norm": 0.24400359215564857,
"kl": 0.00048542022705078125,
"learning_rate": 7.5e-07,
"loss": 0.0581,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 15
},
{
"clip_ratio": 0.0,
"completion_length": 957.84375,
"epoch": 8.0,
"grad_norm": 0.0030236272302122703,
"kl": 0.0004591941833496094,
"learning_rate": 8e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 16
},
{
"clip_ratio": 0.0,
"completion_length": 878.1875,
"epoch": 8.5,
"grad_norm": 0.005634063780769768,
"kl": 0.0004887580871582031,
"learning_rate": 8.499999999999999e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 17
},
{
"clip_ratio": 0.0,
"completion_length": 840.09375,
"epoch": 9.0,
"grad_norm": 0.0033459555512032678,
"kl": 0.0004935264587402344,
"learning_rate": 9e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 18
},
{
"clip_ratio": 0.0,
"completion_length": 949.34375,
"epoch": 9.5,
"grad_norm": 0.1871077676079831,
"kl": 0.00047397613525390625,
"learning_rate": 9.499999999999999e-07,
"loss": 0.0603,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 19
},
{
"epoch": 10.0,
"grad_norm": 0.48673171940966303,
"learning_rate": 1e-06,
"loss": 0.0174,
"step": 20
},
{
"epoch": 10.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 883.921875,
"eval_kl": 0.00046753883361816406,
"eval_loss": 1.7391932487953454e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 98.7699,
"eval_samples_per_second": 0.081,
"eval_steps_per_second": 0.01,
"step": 20
},
{
"clip_ratio": 0.0,
"completion_length": 912.484375,
"epoch": 10.5,
"grad_norm": 0.3321658665461902,
"kl": 0.0004425048828125,
"learning_rate": 9.99931462820376e-07,
"loss": 0.0141,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 21
},
{
"clip_ratio": 0.0,
"completion_length": 749.5625,
"epoch": 11.0,
"grad_norm": 0.003786135169019501,
"kl": 0.00048828125,
"learning_rate": 9.997258721585931e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 22
},
{
"clip_ratio": 0.0,
"completion_length": 927.234375,
"epoch": 11.5,
"grad_norm": 0.002638559729174169,
"kl": 0.0004210472106933594,
"learning_rate": 9.993832906395582e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 23
},
{
"clip_ratio": 0.0,
"completion_length": 847.6875,
"epoch": 12.0,
"grad_norm": 0.0037353511628820143,
"kl": 0.0004801750183105469,
"learning_rate": 9.989038226169207e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 24
},
{
"clip_ratio": 0.0,
"completion_length": 937.25,
"epoch": 12.5,
"grad_norm": 0.0041230093967703964,
"kl": 0.00046634674072265625,
"learning_rate": 9.982876141412855e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 25
},
{
"clip_ratio": 0.0,
"completion_length": 936.625,
"epoch": 13.0,
"grad_norm": 0.0048191989969733635,
"kl": 0.0005083084106445312,
"learning_rate": 9.975348529157229e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 26
},
{
"clip_ratio": 0.0,
"completion_length": 860.578125,
"epoch": 13.5,
"grad_norm": 0.004235845464067776,
"kl": 0.00048828125,
"learning_rate": 9.96645768238595e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 27
},
{
"clip_ratio": 0.0,
"completion_length": 842.3125,
"epoch": 14.0,
"grad_norm": 0.0035023722132941533,
"kl": 0.0004673004150390625,
"learning_rate": 9.956206309337066e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 28
},
{
"clip_ratio": 0.0,
"completion_length": 876.859375,
"epoch": 14.5,
"grad_norm": 0.0054294046901942426,
"kl": 0.0005173683166503906,
"learning_rate": 9.944597532678119e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 29
},
{
"epoch": 15.0,
"grad_norm": 0.0031965438782979136,
"learning_rate": 9.931634888554935e-07,
"loss": 0.0,
"step": 30
},
{
"epoch": 15.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 986.3515625,
"eval_kl": 0.0004820823669433594,
"eval_loss": 1.6995334590319544e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 108.5726,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.009,
"step": 30
},
{
"clip_ratio": 0.0,
"completion_length": 900.5234375,
"epoch": 15.5,
"grad_norm": 0.004036025288001792,
"kl": 0.000476837158203125,
"learning_rate": 9.917322325514487e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 31
},
{
"clip_ratio": 0.0,
"completion_length": 934.3125,
"epoch": 16.0,
"grad_norm": 0.006195144986033682,
"kl": 0.0006299018859863281,
"learning_rate": 9.901664203302124e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 32
},
{
"clip_ratio": 0.0,
"completion_length": 920.390625,
"epoch": 16.5,
"grad_norm": 0.0059456824839291,
"kl": 0.0005106925964355469,
"learning_rate": 9.88466529153356e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 33
},
{
"clip_ratio": 0.0,
"completion_length": 879.125,
"epoch": 17.0,
"grad_norm": 0.21435517887811947,
"kl": 0.000614166259765625,
"learning_rate": 9.866330768241983e-07,
"loss": 0.0454,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 34
},
{
"clip_ratio": 0.0,
"completion_length": 871.34375,
"epoch": 17.5,
"grad_norm": 0.0066464547863155045,
"kl": 0.0006079673767089844,
"learning_rate": 9.846666218300807e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 35
},
{
"clip_ratio": 0.0,
"completion_length": 802.6875,
"epoch": 18.0,
"grad_norm": 0.2319706301168723,
"kl": 0.0005307197570800781,
"learning_rate": 9.825677631722435e-07,
"loss": 0.0365,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 36
},
{
"clip_ratio": 0.0,
"completion_length": 898.46875,
"epoch": 18.5,
"grad_norm": 0.007249327675941041,
"kl": 0.0006723403930664062,
"learning_rate": 9.80337140183366e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 37
},
{
"clip_ratio": 0.0,
"completion_length": 902.125,
"epoch": 19.0,
"grad_norm": 0.007027408347733654,
"kl": 0.0005617141723632812,
"learning_rate": 9.779754323328192e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 38
},
{
"clip_ratio": 0.0,
"completion_length": 876.8125,
"epoch": 19.5,
"grad_norm": 0.24622076919830327,
"kl": 0.0005426406860351562,
"learning_rate": 9.754833590196926e-07,
"loss": 0.0332,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 39
},
{
"epoch": 20.0,
"grad_norm": 0.0061991261734385215,
"learning_rate": 9.728616793536587e-07,
"loss": 0.0,
"step": 40
},
{
"epoch": 20.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 896.96875,
"eval_kl": 0.0005970001220703125,
"eval_loss": 2.167217098758556e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 103.8756,
"eval_samples_per_second": 0.077,
"eval_steps_per_second": 0.01,
"step": 40
},
{
"clip_ratio": 0.0,
"completion_length": 826.8203125,
"epoch": 20.5,
"grad_norm": 0.36000165453483507,
"kl": 0.0005981922149658203,
"learning_rate": 9.701111919237408e-07,
"loss": 0.0126,
"reward": 0.0078125,
"reward_std": 0.03125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0078125,
"step": 41
},
{
"clip_ratio": 0.0,
"completion_length": 848.5,
"epoch": 21.0,
"grad_norm": 0.010730892722707015,
"kl": 0.0006737709045410156,
"learning_rate": 9.672327345550543e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 42
},
{
"clip_ratio": 0.0,
"completion_length": 815.03125,
"epoch": 21.5,
"grad_norm": 0.4479793536674205,
"kl": 0.00072479248046875,
"learning_rate": 9.64227184053598e-07,
"loss": 0.0694,
"reward": 0.03125,
"reward_std": 0.125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 43
},
{
"clip_ratio": 0.0,
"completion_length": 1039.59375,
"epoch": 22.0,
"grad_norm": 0.005097432717583717,
"kl": 0.0005826950073242188,
"learning_rate": 9.610954559391704e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 44
},
{
"clip_ratio": 0.0,
"completion_length": 913.5,
"epoch": 22.5,
"grad_norm": 0.005095664845686755,
"kl": 0.0005693435668945312,
"learning_rate": 9.578385041664925e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 45
},
{
"clip_ratio": 0.0,
"completion_length": 982.3125,
"epoch": 23.0,
"grad_norm": 0.008066078534943774,
"kl": 0.0007781982421875,
"learning_rate": 9.54457320834625e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 46
},
{
"clip_ratio": 0.0,
"completion_length": 940.015625,
"epoch": 23.5,
"grad_norm": 0.007637537595000865,
"kl": 0.0008058547973632812,
"learning_rate": 9.509529358847654e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 47
},
{
"clip_ratio": 0.0,
"completion_length": 968.125,
"epoch": 24.0,
"grad_norm": 0.3097871453627547,
"kl": 0.0007429122924804688,
"learning_rate": 9.473264167865171e-07,
"loss": 0.0144,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 48
},
{
"clip_ratio": 0.0,
"completion_length": 1062.859375,
"epoch": 24.5,
"grad_norm": 0.005902735175410845,
"kl": 0.0006656646728515625,
"learning_rate": 9.43578868212728e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 49
},
{
"epoch": 25.0,
"grad_norm": 0.007727395228257341,
"learning_rate": 9.397114317029974e-07,
"loss": 0.0,
"step": 50
},
{
"epoch": 25.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 896.96875,
"eval_kl": 0.000972747802734375,
"eval_loss": 2.9567998353741132e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 112.2758,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.009,
"step": 50
},
{
"clip_ratio": 0.0,
"completion_length": 945.0,
"epoch": 25.5,
"grad_norm": 0.008522169760744136,
"kl": 0.0008330345153808594,
"learning_rate": 9.357252853159505e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 51
},
{
"clip_ratio": 0.0,
"completion_length": 916.71875,
"epoch": 26.0,
"grad_norm": 0.007229054230337371,
"kl": 0.0007457733154296875,
"learning_rate": 9.316216432703916e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 52
},
{
"clip_ratio": 0.0,
"completion_length": 848.4375,
"epoch": 26.5,
"grad_norm": 0.009711666023450752,
"kl": 0.00109100341796875,
"learning_rate": 9.274017555754407e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 53
},
{
"clip_ratio": 0.0,
"completion_length": 1112.53125,
"epoch": 27.0,
"grad_norm": 0.007287564769621722,
"kl": 0.0007829666137695312,
"learning_rate": 9.230669076497687e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 54
},
{
"clip_ratio": 0.0,
"completion_length": 901.5625,
"epoch": 27.5,
"grad_norm": 0.010695472961060359,
"kl": 0.0011224746704101562,
"learning_rate": 9.186184199300463e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 55
},
{
"clip_ratio": 0.0,
"completion_length": 1000.40625,
"epoch": 28.0,
"grad_norm": 0.012234323967281642,
"kl": 0.0009059906005859375,
"learning_rate": 9.140576474687263e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 56
},
{
"clip_ratio": 0.0,
"completion_length": 1010.1875,
"epoch": 28.5,
"grad_norm": 0.009177302889350094,
"kl": 0.0009927749633789062,
"learning_rate": 9.093859795212817e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 57
},
{
"clip_ratio": 0.0,
"completion_length": 910.34375,
"epoch": 29.0,
"grad_norm": 0.011497131889061145,
"kl": 0.0014095306396484375,
"learning_rate": 9.046048391230247e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 58
},
{
"clip_ratio": 0.0,
"completion_length": 846.640625,
"epoch": 29.5,
"grad_norm": 0.009586334839630907,
"kl": 0.001155853271484375,
"learning_rate": 8.997156826556369e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 59
},
{
"epoch": 30.0,
"grad_norm": 0.01647829222650522,
"learning_rate": 8.9471999940354e-07,
"loss": 0.0,
"step": 60
},
{
"epoch": 30.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 907.8828125,
"eval_kl": 0.0010533332824707031,
"eval_loss": 0.021646613255143166,
"eval_reward": 0.0078125,
"eval_reward_std": 0.03125,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0078125,
"eval_runtime": 107.6329,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.009,
"step": 60
},
{
"clip_ratio": 0.0,
"completion_length": 920.4375,
"epoch": 30.5,
"grad_norm": 0.0168591367199892,
"kl": 0.0012812614440917969,
"learning_rate": 8.896193111002475e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 61
},
{
"clip_ratio": 0.0,
"completion_length": 970.28125,
"epoch": 31.0,
"grad_norm": 0.008710794047022944,
"kl": 0.0010786056518554688,
"learning_rate": 8.844151714648274e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 62
},
{
"clip_ratio": 0.0,
"completion_length": 957.59375,
"epoch": 31.5,
"grad_norm": 0.012243414806160322,
"kl": 0.0009698867797851562,
"learning_rate": 8.791091657286267e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 63
},
{
"clip_ratio": 0.0,
"completion_length": 855.59375,
"epoch": 32.0,
"grad_norm": 0.015193617397552813,
"kl": 0.001148223876953125,
"learning_rate": 8.737029101523929e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 64
},
{
"clip_ratio": 0.0,
"completion_length": 1009.546875,
"epoch": 32.5,
"grad_norm": 0.007369522915137431,
"kl": 0.0008792877197265625,
"learning_rate": 8.681980515339463e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 65
},
{
"clip_ratio": 0.0,
"completion_length": 868.84375,
"epoch": 33.0,
"grad_norm": 0.015494723417675436,
"kl": 0.0012235641479492188,
"learning_rate": 8.625962667065487e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 66
},
{
"clip_ratio": 0.0,
"completion_length": 877.046875,
"epoch": 33.5,
"grad_norm": 0.022596736280910343,
"kl": 0.0012197494506835938,
"learning_rate": 8.568992620281243e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 67
},
{
"clip_ratio": 0.0,
"completion_length": 1001.34375,
"epoch": 34.0,
"grad_norm": 0.005352216682577747,
"kl": 0.0008544921875,
"learning_rate": 8.511087728614862e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 68
},
{
"clip_ratio": 0.0,
"completion_length": 924.84375,
"epoch": 34.5,
"grad_norm": 0.005953464155512391,
"kl": 0.000751495361328125,
"learning_rate": 8.452265630457282e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 69
},
{
"epoch": 35.0,
"grad_norm": 0.016277308014848297,
"learning_rate": 8.392544243589427e-07,
"loss": 0.0,
"step": 70
},
{
"epoch": 35.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 864.765625,
"eval_kl": 0.00107574462890625,
"eval_loss": 3.6555644328473136e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 103.6058,
"eval_samples_per_second": 0.077,
"eval_steps_per_second": 0.01,
"step": 70
},
{
"clip_ratio": 0.0,
"completion_length": 888.2890625,
"epoch": 35.5,
"grad_norm": 0.00851896616755137,
"kl": 0.0011534690856933594,
"learning_rate": 8.331941759724268e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 71
},
{
"clip_ratio": 0.0,
"completion_length": 894.71875,
"epoch": 36.0,
"grad_norm": 0.007086845304726659,
"kl": 0.0008459091186523438,
"learning_rate": 8.270476638965461e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 72
},
{
"clip_ratio": 0.0,
"completion_length": 987.375,
"epoch": 36.5,
"grad_norm": 0.005738334037241088,
"kl": 0.0007448196411132812,
"learning_rate": 8.208167604184217e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 73
},
{
"clip_ratio": 0.0,
"completion_length": 812.09375,
"epoch": 37.0,
"grad_norm": 0.01060348535065019,
"kl": 0.00112152099609375,
"learning_rate": 8.145033635316128e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 74
},
{
"clip_ratio": 0.0,
"completion_length": 858.890625,
"epoch": 37.5,
"grad_norm": 0.25436864848634955,
"kl": 0.0011377334594726562,
"learning_rate": 8.081093963579707e-07,
"loss": 0.0323,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 75
},
{
"clip_ratio": 0.0,
"completion_length": 940.40625,
"epoch": 38.0,
"grad_norm": 0.006724100368173609,
"kl": 0.0008859634399414062,
"learning_rate": 8.01636806561836e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 76
},
{
"clip_ratio": 0.0,
"completion_length": 895.34375,
"epoch": 38.5,
"grad_norm": 0.008819033954111552,
"kl": 0.0010356903076171875,
"learning_rate": 7.950875657567621e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 77
},
{
"clip_ratio": 0.0,
"completion_length": 974.9375,
"epoch": 39.0,
"grad_norm": 0.013437649890405814,
"kl": 0.0010128021240234375,
"learning_rate": 7.884636689049422e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 78
},
{
"clip_ratio": 0.0,
"completion_length": 868.8125,
"epoch": 39.5,
"grad_norm": 0.36102678136071253,
"kl": 0.00128936767578125,
"learning_rate": 7.817671337095244e-07,
"loss": 0.0071,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 79
},
{
"epoch": 40.0,
"grad_norm": 0.005225754916645614,
"learning_rate": 7.75e-07,
"loss": 0.0,
"step": 80
},
{
"epoch": 40.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 863.53125,
"eval_kl": 0.0010199546813964844,
"eval_loss": 3.5483633837429807e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 93.6372,
"eval_samples_per_second": 0.085,
"eval_steps_per_second": 0.011,
"step": 80
},
{
"clip_ratio": 0.0,
"completion_length": 945.6015625,
"epoch": 40.5,
"grad_norm": 0.3853250090014461,
"kl": 0.0010247230529785156,
"learning_rate": 7.681643291108517e-07,
"loss": 0.0662,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 81
},
{
"clip_ratio": 0.0,
"completion_length": 928.375,
"epoch": 41.0,
"grad_norm": 0.005636824414465673,
"kl": 0.0008144378662109375,
"learning_rate": 7.612622032536507e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 82
},
{
"clip_ratio": 0.0,
"completion_length": 928.90625,
"epoch": 41.5,
"grad_norm": 0.009695421562075205,
"kl": 0.001068115234375,
"learning_rate": 7.54295724882796e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 83
},
{
"clip_ratio": 0.0,
"completion_length": 963.625,
"epoch": 42.0,
"grad_norm": 0.009278458906845803,
"kl": 0.0011796951293945312,
"learning_rate": 7.472670160550848e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 84
},
{
"clip_ratio": 0.0,
"completion_length": 971.390625,
"epoch": 42.5,
"grad_norm": 0.007871724093899331,
"kl": 0.0010309219360351562,
"learning_rate": 7.401782177833147e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 85
},
{
"clip_ratio": 0.0,
"completion_length": 665.90625,
"epoch": 43.0,
"grad_norm": 0.2548433246597271,
"kl": 0.0026493072509765625,
"learning_rate": 7.330314893841101e-07,
"loss": 0.0591,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 86
},
{
"clip_ratio": 0.0,
"completion_length": 832.6875,
"epoch": 43.5,
"grad_norm": 0.008654348640434956,
"kl": 0.0010986328125,
"learning_rate": 7.258290078201731e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 87
},
{
"clip_ratio": 0.0,
"completion_length": 971.375,
"epoch": 44.0,
"grad_norm": 0.23717723076741856,
"kl": 0.0013666152954101562,
"learning_rate": 7.185729670371604e-07,
"loss": 0.0375,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 88
},
{
"clip_ratio": 0.0,
"completion_length": 962.234375,
"epoch": 44.5,
"grad_norm": 0.015377158658563683,
"kl": 0.0014400482177734375,
"learning_rate": 7.11265577295385e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 89
},
{
"epoch": 45.0,
"grad_norm": 0.008932566857599763,
"learning_rate": 7.039090644965509e-07,
"loss": 0.0,
"step": 90
},
{
"epoch": 45.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 926.8125,
"eval_kl": 0.001495361328125,
"eval_loss": 5.114699524710886e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 99.6982,
"eval_samples_per_second": 0.08,
"eval_steps_per_second": 0.01,
"step": 90
},
{
"clip_ratio": 0.0,
"completion_length": 893.046875,
"epoch": 45.5,
"grad_norm": 0.010366882749113264,
"kl": 0.0011758804321289062,
"learning_rate": 6.965056695057204e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 91
},
{
"clip_ratio": 0.0,
"completion_length": 904.78125,
"epoch": 46.0,
"grad_norm": 0.02599396238517655,
"kl": 0.0017518997192382812,
"learning_rate": 6.890576474687263e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 92
},
{
"clip_ratio": 0.0,
"completion_length": 848.65625,
"epoch": 46.5,
"grad_norm": 0.5691502068122966,
"kl": 0.0011138916015625,
"learning_rate": 6.815672671252315e-07,
"loss": -0.1031,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 93
},
{
"clip_ratio": 0.0,
"completion_length": 896.9375,
"epoch": 47.0,
"grad_norm": 0.011737006406008286,
"kl": 0.00153350830078125,
"learning_rate": 6.740368101176495e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 94
},
{
"clip_ratio": 0.0,
"completion_length": 845.0625,
"epoch": 47.5,
"grad_norm": 0.387460398723794,
"kl": 0.0025997161865234375,
"learning_rate": 6.664685702961344e-07,
"loss": 0.1011,
"reward": 0.046875,
"reward_std": 0.14789125323295593,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.046875,
"step": 95
},
{
"clip_ratio": 0.0,
"completion_length": 825.6875,
"epoch": 48.0,
"grad_norm": 0.3148662082343754,
"kl": 0.0015659332275390625,
"learning_rate": 6.588648530198504e-07,
"loss": -0.0819,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 96
},
{
"clip_ratio": 0.0,
"completion_length": 836.265625,
"epoch": 48.5,
"grad_norm": 0.010907755471241348,
"kl": 0.0014362335205078125,
"learning_rate": 6.512279744547392e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 97
},
{
"clip_ratio": 0.0,
"completion_length": 875.25,
"epoch": 49.0,
"grad_norm": 0.012995813252816308,
"kl": 0.0014848709106445312,
"learning_rate": 6.435602608679916e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 98
},
{
"clip_ratio": 0.0,
"completion_length": 940.625,
"epoch": 49.5,
"grad_norm": 0.012426286814278077,
"kl": 0.00176239013671875,
"learning_rate": 6.358640479194451e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 99
},
{
"epoch": 50.0,
"grad_norm": 0.01574734631355814,
"learning_rate": 6.281416799501187e-07,
"loss": 0.0001,
"step": 100
},
{
"epoch": 50.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 877.0859375,
"eval_kl": 0.00176239013671875,
"eval_loss": 5.840706580784172e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 104.8754,
"eval_samples_per_second": 0.076,
"eval_steps_per_second": 0.01,
"step": 100
},
{
"clip_ratio": 0.0,
"completion_length": 893.6875,
"epoch": 50.5,
"grad_norm": 0.01032773418561725,
"kl": 0.0016312599182128906,
"learning_rate": 6.203955092681039e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 101
},
{
"clip_ratio": 0.0,
"completion_length": 849.125,
"epoch": 51.0,
"grad_norm": 0.015022844032421302,
"kl": 0.0020303726196289062,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 102
},
{
"clip_ratio": 0.0,
"completion_length": 989.890625,
"epoch": 51.5,
"grad_norm": 0.01614919998047259,
"kl": 0.00213623046875,
"learning_rate": 6.048412045323164e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 103
},
{
"clip_ratio": 0.0,
"completion_length": 942.59375,
"epoch": 52.0,
"grad_norm": 0.010917168106263793,
"kl": 0.0014476776123046875,
"learning_rate": 5.97037808470444e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 104
},
{
"clip_ratio": 0.0,
"completion_length": 944.640625,
"epoch": 52.5,
"grad_norm": 0.26063398005499927,
"kl": 0.002063751220703125,
"learning_rate": 5.892200842364462e-07,
"loss": 0.0398,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 105
},
{
"clip_ratio": 0.0,
"completion_length": 859.4375,
"epoch": 53.0,
"grad_norm": 0.3786401255845219,
"kl": 0.0019893646240234375,
"learning_rate": 5.813904131848564e-07,
"loss": 0.0037,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 106
},
{
"clip_ratio": 0.0,
"completion_length": 982.65625,
"epoch": 53.5,
"grad_norm": 0.017106845969968518,
"kl": 0.0019397735595703125,
"learning_rate": 5.735511803093248e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 107
},
{
"clip_ratio": 0.0,
"completion_length": 764.34375,
"epoch": 54.0,
"grad_norm": 0.01473506964168249,
"kl": 0.0020160675048828125,
"learning_rate": 5.657047735161255e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 108
},
{
"clip_ratio": 0.0,
"completion_length": 908.171875,
"epoch": 54.5,
"grad_norm": 0.018754498587991306,
"kl": 0.0021953582763671875,
"learning_rate": 5.578535828967777e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 109
},
{
"epoch": 55.0,
"grad_norm": 0.010670548584562288,
"learning_rate": 5.5e-07,
"loss": 0.0001,
"step": 110
},
{
"epoch": 55.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 948.3125,
"eval_kl": 0.0020236968994140625,
"eval_loss": 6.496578134829178e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 100.0071,
"eval_samples_per_second": 0.08,
"eval_steps_per_second": 0.01,
"step": 110
},
{
"clip_ratio": 0.0,
"completion_length": 990.34375,
"epoch": 55.5,
"grad_norm": 0.015563623719699342,
"kl": 0.0019483566284179688,
"learning_rate": 5.421464171032224e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 111
},
{
"clip_ratio": 0.0,
"completion_length": 855.28125,
"epoch": 56.0,
"grad_norm": 0.008970140876741549,
"kl": 0.001338958740234375,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 112
},
{
"clip_ratio": 0.0,
"completion_length": 982.640625,
"epoch": 56.5,
"grad_norm": 0.0108924950539754,
"kl": 0.0016727447509765625,
"learning_rate": 5.264488196906752e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 113
},
{
"clip_ratio": 0.0,
"completion_length": 903.78125,
"epoch": 57.0,
"grad_norm": 0.2686051732486419,
"kl": 0.0024242401123046875,
"learning_rate": 5.186095868151436e-07,
"loss": 0.0391,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 114
},
{
"clip_ratio": 0.0,
"completion_length": 853.375,
"epoch": 57.5,
"grad_norm": 0.2831838787851241,
"kl": 0.0024967193603515625,
"learning_rate": 5.107799157635538e-07,
"loss": 0.0351,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 115
},
{
"clip_ratio": 0.0,
"completion_length": 917.34375,
"epoch": 58.0,
"grad_norm": 0.013530004501295191,
"kl": 0.001804351806640625,
"learning_rate": 5.02962191529556e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 116
},
{
"clip_ratio": 0.0,
"completion_length": 859.03125,
"epoch": 58.5,
"grad_norm": 0.44797396450129406,
"kl": 0.0030193328857421875,
"learning_rate": 4.951587954676837e-07,
"loss": -0.0663,
"reward": 0.03125,
"reward_std": 0.08539125323295593,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 117
},
{
"clip_ratio": 0.0,
"completion_length": 896.625,
"epoch": 59.0,
"grad_norm": 0.00998304849357523,
"kl": 0.0016765594482421875,
"learning_rate": 4.873721045679706e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 118
},
{
"clip_ratio": 0.0,
"completion_length": 954.359375,
"epoch": 59.5,
"grad_norm": 0.22962099915558717,
"kl": 0.0020122528076171875,
"learning_rate": 4.79604490731896e-07,
"loss": 0.0424,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 119
},
{
"epoch": 60.0,
"grad_norm": 0.02248418123929179,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.0001,
"step": 120
},
{
"epoch": 60.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 960.3671875,
"eval_kl": 0.0021600723266601562,
"eval_loss": 7.169261516537517e-05,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 107.7429,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.009,
"step": 120
},
{
"clip_ratio": 0.0,
"completion_length": 837.0390625,
"epoch": 60.5,
"grad_norm": 0.017761869908686022,
"kl": 0.0027360916137695312,
"learning_rate": 4.641359520805548e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 121
},
{
"clip_ratio": 0.0,
"completion_length": 822.75,
"epoch": 61.0,
"grad_norm": 0.016374329005476508,
"kl": 0.00225830078125,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 122
},
{
"clip_ratio": 0.0,
"completion_length": 873.46875,
"epoch": 61.5,
"grad_norm": 0.22223040398437557,
"kl": 0.0031833648681640625,
"learning_rate": 4.4877202554526084e-07,
"loss": 0.0424,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 123
},
{
"clip_ratio": 0.0,
"completion_length": 1016.71875,
"epoch": 62.0,
"grad_norm": 0.01056012461452998,
"kl": 0.001789093017578125,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 124
},
{
"clip_ratio": 0.0,
"completion_length": 867.015625,
"epoch": 62.5,
"grad_norm": 0.012502277305124584,
"kl": 0.0022029876708984375,
"learning_rate": 4.3353142970386557e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 125
},
{
"clip_ratio": 0.0,
"completion_length": 1008.1875,
"epoch": 63.0,
"grad_norm": 0.017467667097429012,
"kl": 0.002529144287109375,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 126
},
{
"clip_ratio": 0.0,
"completion_length": 876.40625,
"epoch": 63.5,
"grad_norm": 0.27826851686759685,
"kl": 0.003124237060546875,
"learning_rate": 4.1843273287476854e-07,
"loss": 0.0208,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 127
},
{
"clip_ratio": 0.0,
"completion_length": 931.4375,
"epoch": 64.0,
"grad_norm": 0.010111725537803692,
"kl": 0.0016422271728515625,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 128
},
{
"clip_ratio": 0.0,
"completion_length": 940.890625,
"epoch": 64.5,
"grad_norm": 0.19504698473227727,
"kl": 0.003879547119140625,
"learning_rate": 4.034943304942796e-07,
"loss": 0.0002,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 129
},
{
"epoch": 65.0,
"grad_norm": 0.3594474052537692,
"learning_rate": 3.9609093550344907e-07,
"loss": -0.031,
"step": 130
},
{
"epoch": 65.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 872.6484375,
"eval_kl": 0.0025424957275390625,
"eval_loss": -0.011794866062700748,
"eval_reward": 0.015625,
"eval_reward_std": 0.042695626616477966,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.015625,
"eval_runtime": 95.6188,
"eval_samples_per_second": 0.084,
"eval_steps_per_second": 0.01,
"step": 130
},
{
"clip_ratio": 0.0,
"completion_length": 827.2578125,
"epoch": 65.5,
"grad_norm": 0.031146258604450788,
"kl": 0.002330780029296875,
"learning_rate": 3.8873442270461485e-07,
"loss": 0.0001,
"reward": 0.0078125,
"reward_std": 0.03125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0078125,
"step": 131
},
{
"clip_ratio": 0.0,
"completion_length": 1005.28125,
"epoch": 66.0,
"grad_norm": 0.23823090391570464,
"kl": 0.00301361083984375,
"learning_rate": 3.8142703296283953e-07,
"loss": 0.0414,
"reward": 0.03125,
"reward_std": 0.08539125323295593,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 132
},
{
"clip_ratio": 0.0,
"completion_length": 965.671875,
"epoch": 66.5,
"grad_norm": 0.016282343372095484,
"kl": 0.0027065277099609375,
"learning_rate": 3.7417099217982686e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 133
},
{
"clip_ratio": 0.0,
"completion_length": 950.9375,
"epoch": 67.0,
"grad_norm": 0.37418953088110135,
"kl": 0.00205230712890625,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.0608,
"reward": 0.03125,
"reward_std": 0.125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 134
},
{
"clip_ratio": 0.0,
"completion_length": 911.234375,
"epoch": 67.5,
"grad_norm": 0.012255261965707507,
"kl": 0.0020465850830078125,
"learning_rate": 3.5982178221668533e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 135
},
{
"clip_ratio": 0.0,
"completion_length": 979.71875,
"epoch": 68.0,
"grad_norm": 0.26206279671926186,
"kl": 0.002727508544921875,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.0389,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 136
},
{
"clip_ratio": 0.0,
"completion_length": 933.796875,
"epoch": 68.5,
"grad_norm": 0.01763374341802393,
"kl": 0.002559661865234375,
"learning_rate": 3.45704275117204e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 137
},
{
"clip_ratio": 0.0,
"completion_length": 905.125,
"epoch": 69.0,
"grad_norm": 0.3004009853199538,
"kl": 0.002872467041015625,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0281,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 138
},
{
"clip_ratio": 0.0,
"completion_length": 951.5625,
"epoch": 69.5,
"grad_norm": 0.0130597651929469,
"kl": 0.0021648406982421875,
"learning_rate": 3.3183567088914833e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 139
},
{
"epoch": 70.0,
"grad_norm": 0.01647973523518642,
"learning_rate": 3.250000000000001e-07,
"loss": 0.0001,
"step": 140
},
{
"epoch": 70.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 888.59375,
"eval_kl": 0.0026531219482421875,
"eval_loss": 0.02596096135675907,
"eval_reward": 0.0078125,
"eval_reward_std": 0.03125,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0078125,
"eval_runtime": 98.3732,
"eval_samples_per_second": 0.081,
"eval_steps_per_second": 0.01,
"step": 140
},
{
"clip_ratio": 0.0,
"completion_length": 873.234375,
"epoch": 70.5,
"grad_norm": 0.2834208593274132,
"kl": 0.0033597946166992188,
"learning_rate": 3.182328662904756e-07,
"loss": 0.0572,
"reward": 0.015625,
"reward_std": 0.042695626616477966,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 141
},
{
"clip_ratio": 0.0,
"completion_length": 1037.5625,
"epoch": 71.0,
"grad_norm": 0.010414710134713713,
"kl": 0.0019664764404296875,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 142
},
{
"clip_ratio": 0.0,
"completion_length": 931.96875,
"epoch": 71.5,
"grad_norm": 0.27576871169806716,
"kl": 0.0029125213623046875,
"learning_rate": 3.0491243424323783e-07,
"loss": 0.0371,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 143
},
{
"clip_ratio": 0.0,
"completion_length": 892.28125,
"epoch": 72.0,
"grad_norm": 0.3229311945497025,
"kl": 0.002956390380859375,
"learning_rate": 2.9836319343816397e-07,
"loss": -0.0668,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 144
},
{
"clip_ratio": 0.0,
"completion_length": 794.59375,
"epoch": 72.5,
"grad_norm": 0.24794430012534288,
"kl": 0.00383758544921875,
"learning_rate": 2.918906036420294e-07,
"loss": 0.0331,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 145
},
{
"clip_ratio": 0.0,
"completion_length": 893.875,
"epoch": 73.0,
"grad_norm": 0.018126291722898823,
"kl": 0.00266265869140625,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 146
},
{
"clip_ratio": 0.0,
"completion_length": 954.375,
"epoch": 73.5,
"grad_norm": 0.2235013065843689,
"kl": 0.002880096435546875,
"learning_rate": 2.791832395815782e-07,
"loss": 0.0445,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 147
},
{
"clip_ratio": 0.0,
"completion_length": 804.0,
"epoch": 74.0,
"grad_norm": 0.3025438980959449,
"kl": 0.003345489501953125,
"learning_rate": 2.729523361034538e-07,
"loss": 0.0521,
"reward": 0.03125,
"reward_std": 0.08539125323295593,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 148
},
{
"clip_ratio": 0.0,
"completion_length": 971.578125,
"epoch": 74.5,
"grad_norm": 0.024475677293083028,
"kl": 0.0037384033203125,
"learning_rate": 2.6680582402757324e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 149
},
{
"epoch": 75.0,
"grad_norm": 0.012017701370876262,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.0001,
"step": 150
},
{
"epoch": 75.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 862.15625,
"eval_kl": 0.0033111572265625,
"eval_loss": 0.02151690237224102,
"eval_reward": 0.0078125,
"eval_reward_std": 0.03125,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0078125,
"eval_runtime": 95.2607,
"eval_samples_per_second": 0.084,
"eval_steps_per_second": 0.01,
"step": 150
},
{
"clip_ratio": 0.0,
"completion_length": 1021.8515625,
"epoch": 75.5,
"grad_norm": 0.344266822655093,
"kl": 0.002719879150390625,
"learning_rate": 2.547734369542718e-07,
"loss": 0.0005,
"reward": 0.015625,
"reward_std": 0.042695626616477966,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 151
},
{
"clip_ratio": 0.0,
"completion_length": 840.5,
"epoch": 76.0,
"grad_norm": 0.23923411010062942,
"kl": 0.00366973876953125,
"learning_rate": 2.488912271385139e-07,
"loss": 0.042,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 152
},
{
"clip_ratio": 0.0,
"completion_length": 944.609375,
"epoch": 76.5,
"grad_norm": 0.016984013306757723,
"kl": 0.00287628173828125,
"learning_rate": 2.4310073797187573e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 153
},
{
"clip_ratio": 0.0,
"completion_length": 980.09375,
"epoch": 77.0,
"grad_norm": 0.013783286152037033,
"kl": 0.002635955810546875,
"learning_rate": 2.374037332934512e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 154
},
{
"clip_ratio": 0.0,
"completion_length": 877.71875,
"epoch": 77.5,
"grad_norm": 0.17935562272538372,
"kl": 0.003742218017578125,
"learning_rate": 2.3180194846605364e-07,
"loss": 0.0379,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 155
},
{
"clip_ratio": 0.0,
"completion_length": 982.125,
"epoch": 78.0,
"grad_norm": 0.22095835889243898,
"kl": 0.00313568115234375,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0429,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 156
},
{
"clip_ratio": 0.0,
"completion_length": 853.6875,
"epoch": 78.5,
"grad_norm": 0.014922010725132247,
"kl": 0.00323486328125,
"learning_rate": 2.2089083427137329e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 157
},
{
"clip_ratio": 0.0,
"completion_length": 725.46875,
"epoch": 79.0,
"grad_norm": 0.02027511748538313,
"kl": 0.003879547119140625,
"learning_rate": 2.1558482853517253e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 158
},
{
"clip_ratio": 0.0,
"completion_length": 792.5,
"epoch": 79.5,
"grad_norm": 0.27874692769469644,
"kl": 0.00482940673828125,
"learning_rate": 2.1038068889975259e-07,
"loss": 0.0339,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 159
},
{
"epoch": 80.0,
"grad_norm": 0.01202834608521387,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.0001,
"step": 160
},
{
"epoch": 80.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 898.7109375,
"eval_kl": 0.0037212371826171875,
"eval_loss": -0.011074024252593517,
"eval_reward": 0.015625,
"eval_reward_std": 0.042695626616477966,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.015625,
"eval_runtime": 106.259,
"eval_samples_per_second": 0.075,
"eval_steps_per_second": 0.009,
"step": 160
},
{
"clip_ratio": 0.0,
"completion_length": 979.0859375,
"epoch": 80.5,
"grad_norm": 0.018630946038723055,
"kl": 0.002685546875,
"learning_rate": 2.0028431734436308e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 161
},
{
"clip_ratio": 0.0,
"completion_length": 854.25,
"epoch": 81.0,
"grad_norm": 0.020703125475339885,
"kl": 0.003864288330078125,
"learning_rate": 1.9539516087697517e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 162
},
{
"clip_ratio": 0.0,
"completion_length": 842.71875,
"epoch": 81.5,
"grad_norm": 0.028387308129338176,
"kl": 0.004302978515625,
"learning_rate": 1.9061402047871833e-07,
"loss": 0.0002,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 163
},
{
"clip_ratio": 0.0,
"completion_length": 834.75,
"epoch": 82.0,
"grad_norm": 0.01641578674675751,
"kl": 0.0029449462890625,
"learning_rate": 1.8594235253127372e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 164
},
{
"clip_ratio": 0.0,
"completion_length": 842.40625,
"epoch": 82.5,
"grad_norm": 0.021721834190437855,
"kl": 0.0041656494140625,
"learning_rate": 1.8138158006995363e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 165
},
{
"clip_ratio": 0.0,
"completion_length": 837.8125,
"epoch": 83.0,
"grad_norm": 0.01842609716407046,
"kl": 0.003582000732421875,
"learning_rate": 1.7693309235023127e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 166
},
{
"clip_ratio": 0.0,
"completion_length": 837.453125,
"epoch": 83.5,
"grad_norm": 0.27337633068496925,
"kl": 0.005832672119140625,
"learning_rate": 1.7259824442455923e-07,
"loss": 0.0532,
"reward": 0.03125,
"reward_std": 0.08539125323295593,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 167
},
{
"clip_ratio": 0.0,
"completion_length": 934.5625,
"epoch": 84.0,
"grad_norm": 0.012528179411346417,
"kl": 0.0025787353515625,
"learning_rate": 1.6837835672960831e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 168
},
{
"clip_ratio": 0.0,
"completion_length": 906.21875,
"epoch": 84.5,
"grad_norm": 0.016944597423777116,
"kl": 0.003490447998046875,
"learning_rate": 1.6427471468404952e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 169
},
{
"epoch": 85.0,
"grad_norm": 0.397851893169485,
"learning_rate": 1.6028856829700258e-07,
"loss": -0.057,
"step": 170
},
{
"epoch": 85.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 884.7109375,
"eval_kl": 0.0037250518798828125,
"eval_loss": 0.0001178277816507034,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 94.4429,
"eval_samples_per_second": 0.085,
"eval_steps_per_second": 0.011,
"step": 170
},
{
"clip_ratio": 0.0,
"completion_length": 889.3359375,
"epoch": 85.5,
"grad_norm": 0.0165553280450213,
"kl": 0.0038013458251953125,
"learning_rate": 1.5642113178727193e-07,
"loss": 0.0001,
"reward": 0.015625,
"reward_std": 0.042695626616477966,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 171
},
{
"clip_ratio": 0.0,
"completion_length": 931.34375,
"epoch": 86.0,
"grad_norm": 0.22070193047632167,
"kl": 0.00353240966796875,
"learning_rate": 1.5267358321348285e-07,
"loss": 0.0258,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 172
},
{
"clip_ratio": 0.0,
"completion_length": 902.5625,
"epoch": 86.5,
"grad_norm": 0.014212788161515211,
"kl": 0.002574920654296875,
"learning_rate": 1.4904706411523448e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 173
},
{
"clip_ratio": 0.0,
"completion_length": 734.3125,
"epoch": 87.0,
"grad_norm": 0.5334088299783731,
"kl": 0.005619049072265625,
"learning_rate": 1.4554267916537495e-07,
"loss": 0.0865,
"reward": 0.046875,
"reward_std": 0.1875,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.046875,
"step": 174
},
{
"clip_ratio": 0.0,
"completion_length": 846.8125,
"epoch": 87.5,
"grad_norm": 0.3350297851743498,
"kl": 0.005451202392578125,
"learning_rate": 1.4216149583350755e-07,
"loss": 0.0788,
"reward": 0.03125,
"reward_std": 0.125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 175
},
{
"clip_ratio": 0.0,
"completion_length": 896.90625,
"epoch": 88.0,
"grad_norm": 0.012136250169886781,
"kl": 0.002605438232421875,
"learning_rate": 1.3890454406082956e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 176
},
{
"clip_ratio": 0.0,
"completion_length": 924.78125,
"epoch": 88.5,
"grad_norm": 0.020936369813142675,
"kl": 0.004558563232421875,
"learning_rate": 1.3577281594640182e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 177
},
{
"clip_ratio": 0.0,
"completion_length": 1008.28125,
"epoch": 89.0,
"grad_norm": 0.017396797408268067,
"kl": 0.003875732421875,
"learning_rate": 1.3276726544494571e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 178
},
{
"clip_ratio": 0.0,
"completion_length": 972.84375,
"epoch": 89.5,
"grad_norm": 0.46133339595558576,
"kl": 0.003589630126953125,
"learning_rate": 1.2988880807625927e-07,
"loss": -0.0749,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 179
},
{
"epoch": 90.0,
"grad_norm": 0.2749649880366191,
"learning_rate": 1.2713832064634125e-07,
"loss": 0.0315,
"step": 180
},
{
"epoch": 90.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 928.6796875,
"eval_kl": 0.0047626495361328125,
"eval_loss": 0.06055343151092529,
"eval_reward": 0.03125,
"eval_reward_std": 0.10519562661647797,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.03125,
"eval_runtime": 101.2345,
"eval_samples_per_second": 0.079,
"eval_steps_per_second": 0.01,
"step": 180
},
{
"clip_ratio": 0.0,
"completion_length": 887.515625,
"epoch": 90.5,
"grad_norm": 0.22634210714921138,
"kl": 0.004863739013671875,
"learning_rate": 1.2451664098030743e-07,
"loss": 0.0395,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 181
},
{
"clip_ratio": 0.0,
"completion_length": 975.1875,
"epoch": 91.0,
"grad_norm": 0.019843010782603646,
"kl": 0.0040130615234375,
"learning_rate": 1.220245676671809e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 182
},
{
"clip_ratio": 0.0,
"completion_length": 936.265625,
"epoch": 91.5,
"grad_norm": 0.35206959012280886,
"kl": 0.003910064697265625,
"learning_rate": 1.1966285981663407e-07,
"loss": 0.0049,
"reward": 0.046875,
"reward_std": 0.14789125323295593,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.046875,
"step": 183
},
{
"clip_ratio": 0.0,
"completion_length": 825.53125,
"epoch": 92.0,
"grad_norm": 0.3276867393127649,
"kl": 0.0047760009765625,
"learning_rate": 1.1743223682775649e-07,
"loss": 0.0324,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 184
},
{
"clip_ratio": 0.0,
"completion_length": 976.5,
"epoch": 92.5,
"grad_norm": 0.24825006369205177,
"kl": 0.004093170166015625,
"learning_rate": 1.1533337816991931e-07,
"loss": 0.0156,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 185
},
{
"clip_ratio": 0.0,
"completion_length": 803.0625,
"epoch": 93.0,
"grad_norm": 0.022316490692140336,
"kl": 0.0040130615234375,
"learning_rate": 1.1336692317580158e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 186
},
{
"clip_ratio": 0.0,
"completion_length": 825.28125,
"epoch": 93.5,
"grad_norm": 0.4846328870102803,
"kl": 0.00514984130859375,
"learning_rate": 1.1153347084664419e-07,
"loss": -0.0498,
"reward": 0.03125,
"reward_std": 0.125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.03125,
"step": 187
},
{
"clip_ratio": 0.0,
"completion_length": 1054.5,
"epoch": 94.0,
"grad_norm": 0.22552887394065263,
"kl": 0.003173828125,
"learning_rate": 1.0983357966978745e-07,
"loss": 0.0259,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 188
},
{
"clip_ratio": 0.0,
"completion_length": 896.578125,
"epoch": 94.5,
"grad_norm": 0.28343113199408254,
"kl": 0.0047607421875,
"learning_rate": 1.0826776744855121e-07,
"loss": 0.0366,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 189
},
{
"epoch": 95.0,
"grad_norm": 0.018331093652178724,
"learning_rate": 1.068365111445064e-07,
"loss": 0.0001,
"step": 190
},
{
"epoch": 95.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 926.5390625,
"eval_kl": 0.004039764404296875,
"eval_loss": 0.0001301074807997793,
"eval_reward": 0.0,
"eval_reward_std": 0.0,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0,
"eval_runtime": 100.4368,
"eval_samples_per_second": 0.08,
"eval_steps_per_second": 0.01,
"step": 190
},
{
"clip_ratio": 0.0,
"completion_length": 902.8046875,
"epoch": 95.5,
"grad_norm": 0.27554893857164553,
"kl": 0.0040225982666015625,
"learning_rate": 1.0554024673218806e-07,
"loss": 0.052,
"reward": 0.0078125,
"reward_std": 0.03125,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0078125,
"step": 191
},
{
"clip_ratio": 0.0,
"completion_length": 753.625,
"epoch": 96.0,
"grad_norm": 0.4490555309724536,
"kl": 0.004604339599609375,
"learning_rate": 1.0437936906629334e-07,
"loss": -0.0745,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 192
},
{
"clip_ratio": 0.0,
"completion_length": 829.953125,
"epoch": 96.5,
"grad_norm": 0.02546191399674691,
"kl": 0.00514984130859375,
"learning_rate": 1.0335423176140511e-07,
"loss": 0.0002,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 193
},
{
"clip_ratio": 0.0,
"completion_length": 852.78125,
"epoch": 97.0,
"grad_norm": 0.01515669426337304,
"kl": 0.003253936767578125,
"learning_rate": 1.0246514708427701e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 194
},
{
"clip_ratio": 0.0,
"completion_length": 901.71875,
"epoch": 97.5,
"grad_norm": 0.01672130582287373,
"kl": 0.003467559814453125,
"learning_rate": 1.017123858587145e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 195
},
{
"clip_ratio": 0.0,
"completion_length": 904.375,
"epoch": 98.0,
"grad_norm": 0.021287930078995584,
"kl": 0.004909515380859375,
"learning_rate": 1.0109617738307911e-07,
"loss": 0.0002,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 196
},
{
"clip_ratio": 0.0,
"completion_length": 970.984375,
"epoch": 98.5,
"grad_norm": 0.024329146341496674,
"kl": 0.00463104248046875,
"learning_rate": 1.0061670936044178e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 197
},
{
"clip_ratio": 0.0,
"completion_length": 837.75,
"epoch": 99.0,
"grad_norm": 0.25418431822719284,
"kl": 0.004810333251953125,
"learning_rate": 1.002741278414069e-07,
"loss": 0.0424,
"reward": 0.015625,
"reward_std": 0.0625,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.015625,
"step": 198
},
{
"clip_ratio": 0.0,
"completion_length": 894.921875,
"epoch": 99.5,
"grad_norm": 0.015548877308025595,
"kl": 0.0036163330078125,
"learning_rate": 1.0006853717962393e-07,
"loss": 0.0001,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 199
},
{
"epoch": 100.0,
"grad_norm": 0.02199600718524651,
"learning_rate": 1e-07,
"loss": 0.0002,
"step": 200
},
{
"epoch": 100.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 909.0390625,
"eval_kl": 0.004390716552734375,
"eval_loss": 0.02110510692000389,
"eval_reward": 0.0078125,
"eval_reward_std": 0.03125,
"eval_rewards/accuracy_reward_staging": 0.0,
"eval_rewards/format_reward": 0.0,
"eval_rewards/format_reward_staging": 0.0078125,
"eval_runtime": 101.442,
"eval_samples_per_second": 0.079,
"eval_steps_per_second": 0.01,
"step": 200
},
{
"clip_ratio": 0.0,
"completion_length": 893.78125,
"epoch": 100.0,
"kl": 0.004932403564453125,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_staging": 0.0,
"rewards/format_reward": 0.0,
"rewards/format_reward_staging": 0.0,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.007099090418751075,
"train_runtime": 14240.9534,
"train_samples_per_second": 0.056,
"train_steps_per_second": 0.014
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 40,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}