| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9512485136741974, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 414.0390625, |
| "epoch": 0.009512485136741973, |
| "grad_norm": 18.50773734319143, |
| "kl": 3.324449062347412e-05, |
| "learning_rate": 0.0, |
| "loss": 0.0, |
| "reward": 1.931640625, |
| "reward_std": 0.7540743527933955, |
| "rewards/accuracy_reward": 0.501953125, |
| "rewards/format_reward": 0.720703125, |
| "rewards/influence_reward": 0.318359375, |
| "rewards/len_reward": 0.390625, |
| "step": 1 |
| }, |
| { |
| "completion_length": 406.5244140625, |
| "epoch": 0.04756242568370987, |
| "grad_norm": 12.240397137360413, |
| "kl": 0.0038472674787044525, |
| "learning_rate": 1.818181818181818e-07, |
| "loss": 0.0002, |
| "reward": 2.06787109375, |
| "reward_std": 0.6699417636264116, |
| "rewards/accuracy_reward": 0.552734375, |
| "rewards/format_reward": 0.73681640625, |
| "rewards/influence_reward": 0.35595703125, |
| "rewards/len_reward": 0.42236328125, |
| "step": 5 |
| }, |
| { |
| "completion_length": 408.440625, |
| "epoch": 0.09512485136741974, |
| "grad_norm": 10.899643199929397, |
| "kl": 0.051489830017089844, |
| "learning_rate": 4.090909090909091e-07, |
| "loss": 0.0021, |
| "reward": 2.095703125, |
| "reward_std": 0.6716910980641841, |
| "rewards/accuracy_reward": 0.564453125, |
| "rewards/format_reward": 0.756640625, |
| "rewards/influence_reward": 0.368359375, |
| "rewards/len_reward": 0.40625, |
| "step": 10 |
| }, |
| { |
| "completion_length": 402.093359375, |
| "epoch": 0.1426872770511296, |
| "grad_norm": 56.657034116967125, |
| "kl": 0.9256591796875, |
| "learning_rate": 6.363636363636363e-07, |
| "loss": 0.037, |
| "reward": 2.055859375, |
| "reward_std": 0.6135061264038086, |
| "rewards/accuracy_reward": 0.537109375, |
| "rewards/format_reward": 0.77890625, |
| "rewards/influence_reward": 0.355859375, |
| "rewards/len_reward": 0.383984375, |
| "step": 15 |
| }, |
| { |
| "completion_length": 390.37109375, |
| "epoch": 0.1902497027348395, |
| "grad_norm": 4.269559046091982, |
| "kl": 2.8193359375, |
| "learning_rate": 8.636363636363636e-07, |
| "loss": 0.1128, |
| "reward": 2.1671875, |
| "reward_std": 0.5897074935957789, |
| "rewards/accuracy_reward": 0.56015625, |
| "rewards/format_reward": 0.815234375, |
| "rewards/influence_reward": 0.384765625, |
| "rewards/len_reward": 0.40703125, |
| "step": 20 |
| }, |
| { |
| "completion_length": 355.8265625, |
| "epoch": 0.23781212841854935, |
| "grad_norm": 5.36647948775656, |
| "kl": 2.3447265625, |
| "learning_rate": 9.99726628670463e-07, |
| "loss": 0.0938, |
| "reward": 2.289453125, |
| "reward_std": 0.5657233998179436, |
| "rewards/accuracy_reward": 0.54140625, |
| "rewards/format_reward": 0.87578125, |
| "rewards/influence_reward": 0.405859375, |
| "rewards/len_reward": 0.46640625, |
| "step": 25 |
| }, |
| { |
| "completion_length": 312.60859375, |
| "epoch": 0.2853745541022592, |
| "grad_norm": 93.06936011551063, |
| "kl": 3.04345703125, |
| "learning_rate": 9.966546331768192e-07, |
| "loss": 0.1218, |
| "reward": 2.478125, |
| "reward_std": 0.5446455283090472, |
| "rewards/accuracy_reward": 0.5625, |
| "rewards/format_reward": 0.953515625, |
| "rewards/influence_reward": 0.451171875, |
| "rewards/len_reward": 0.5109375, |
| "step": 30 |
| }, |
| { |
| "completion_length": 299.95390625, |
| "epoch": 0.3329369797859691, |
| "grad_norm": 4.81826375681457, |
| "kl": 2.7115234375, |
| "learning_rate": 9.901899829374047e-07, |
| "loss": 0.1085, |
| "reward": 2.5625, |
| "reward_std": 0.5693813040852547, |
| "rewards/accuracy_reward": 0.546875, |
| "rewards/format_reward": 0.962109375, |
| "rewards/influence_reward": 0.44296875, |
| "rewards/len_reward": 0.610546875, |
| "step": 35 |
| }, |
| { |
| "completion_length": 276.19296875, |
| "epoch": 0.380499405469679, |
| "grad_norm": 2.854100596289783, |
| "kl": 2.379248046875, |
| "learning_rate": 9.803768380684242e-07, |
| "loss": 0.0952, |
| "reward": 2.51484375, |
| "reward_std": 0.5233275255188345, |
| "rewards/accuracy_reward": 0.50859375, |
| "rewards/format_reward": 0.9640625, |
| "rewards/influence_reward": 0.4140625, |
| "rewards/len_reward": 0.628125, |
| "step": 40 |
| }, |
| { |
| "completion_length": 281.1796875, |
| "epoch": 0.4280618311533888, |
| "grad_norm": 3.461375360266424, |
| "kl": 2.206005859375, |
| "learning_rate": 9.672822322997304e-07, |
| "loss": 0.0882, |
| "reward": 2.471875, |
| "reward_std": 0.5379180932417512, |
| "rewards/accuracy_reward": 0.49296875, |
| "rewards/format_reward": 0.944140625, |
| "rewards/influence_reward": 0.3921875, |
| "rewards/len_reward": 0.642578125, |
| "step": 45 |
| }, |
| { |
| "completion_length": 297.3796875, |
| "epoch": 0.4756242568370987, |
| "grad_norm": 3.3937919439490454, |
| "kl": 2.230615234375, |
| "learning_rate": 9.509956150664795e-07, |
| "loss": 0.0892, |
| "reward": 2.546875, |
| "reward_std": 0.5505968105047941, |
| "rewards/accuracy_reward": 0.53125, |
| "rewards/format_reward": 0.923046875, |
| "rewards/influence_reward": 0.416015625, |
| "rewards/len_reward": 0.6765625, |
| "step": 50 |
| }, |
| { |
| "completion_length": 300.020703125, |
| "epoch": 0.5231866825208086, |
| "grad_norm": 4.255030544730029, |
| "kl": 2.759912109375, |
| "learning_rate": 9.316282404787869e-07, |
| "loss": 0.1104, |
| "reward": 2.500390625, |
| "reward_std": 0.5426989603787661, |
| "rewards/accuracy_reward": 0.522265625, |
| "rewards/format_reward": 0.933984375, |
| "rewards/influence_reward": 0.414453125, |
| "rewards/len_reward": 0.6296875, |
| "step": 55 |
| }, |
| { |
| "completion_length": 309.028515625, |
| "epoch": 0.5707491082045184, |
| "grad_norm": 2.734847983664629, |
| "kl": 2.937158203125, |
| "learning_rate": 9.093124073433462e-07, |
| "loss": 0.1175, |
| "reward": 2.381640625, |
| "reward_std": 0.5930636901408434, |
| "rewards/accuracy_reward": 0.46953125, |
| "rewards/format_reward": 0.93359375, |
| "rewards/influence_reward": 0.3640625, |
| "rewards/len_reward": 0.614453125, |
| "step": 60 |
| }, |
| { |
| "completion_length": 305.84296875, |
| "epoch": 0.6183115338882283, |
| "grad_norm": 4.019506444773187, |
| "kl": 3.4853515625, |
| "learning_rate": 8.842005554284295e-07, |
| "loss": 0.1394, |
| "reward": 2.45859375, |
| "reward_std": 0.560142171010375, |
| "rewards/accuracy_reward": 0.49609375, |
| "rewards/format_reward": 0.93671875, |
| "rewards/influence_reward": 0.396484375, |
| "rewards/len_reward": 0.629296875, |
| "step": 65 |
| }, |
| { |
| "completion_length": 305.07265625, |
| "epoch": 0.6658739595719382, |
| "grad_norm": 4.116522469679006, |
| "kl": 3.28466796875, |
| "learning_rate": 8.564642241456986e-07, |
| "loss": 0.1314, |
| "reward": 2.435546875, |
| "reward_std": 0.5443418994545937, |
| "rewards/accuracy_reward": 0.48515625, |
| "rewards/format_reward": 0.940625, |
| "rewards/influence_reward": 0.383203125, |
| "rewards/len_reward": 0.6265625, |
| "step": 70 |
| }, |
| { |
| "completion_length": 298.075390625, |
| "epoch": 0.713436385255648, |
| "grad_norm": 2.9394867850708772, |
| "kl": 3.50244140625, |
| "learning_rate": 8.262928807620843e-07, |
| "loss": 0.1401, |
| "reward": 2.416796875, |
| "reward_std": 0.5376573745161295, |
| "rewards/accuracy_reward": 0.48515625, |
| "rewards/format_reward": 0.950390625, |
| "rewards/influence_reward": 0.37890625, |
| "rewards/len_reward": 0.60234375, |
| "step": 75 |
| }, |
| { |
| "completion_length": 299.733984375, |
| "epoch": 0.760998810939358, |
| "grad_norm": 3.301846350005546, |
| "kl": 3.5205078125, |
| "learning_rate": 7.938926261462365e-07, |
| "loss": 0.1408, |
| "reward": 2.404296875, |
| "reward_std": 0.5362825602293014, |
| "rewards/accuracy_reward": 0.4703125, |
| "rewards/format_reward": 0.94765625, |
| "rewards/influence_reward": 0.3671875, |
| "rewards/len_reward": 0.619140625, |
| "step": 80 |
| }, |
| { |
| "completion_length": 308.55546875, |
| "epoch": 0.8085612366230678, |
| "grad_norm": 4.193561063299626, |
| "kl": 3.41328125, |
| "learning_rate": 7.594847868906076e-07, |
| "loss": 0.1365, |
| "reward": 2.408203125, |
| "reward_std": 0.535981552861631, |
| "rewards/accuracy_reward": 0.46796875, |
| "rewards/format_reward": 0.945703125, |
| "rewards/influence_reward": 0.365625, |
| "rewards/len_reward": 0.62890625, |
| "step": 85 |
| }, |
| { |
| "completion_length": 305.84140625, |
| "epoch": 0.8561236623067776, |
| "grad_norm": 20.941270697353453, |
| "kl": 3.77119140625, |
| "learning_rate": 7.233044034264033e-07, |
| "loss": 0.1509, |
| "reward": 2.408203125, |
| "reward_std": 0.5046488767489791, |
| "rewards/accuracy_reward": 0.475, |
| "rewards/format_reward": 0.957421875, |
| "rewards/influence_reward": 0.37890625, |
| "rewards/len_reward": 0.596875, |
| "step": 90 |
| }, |
| { |
| "completion_length": 318.782421875, |
| "epoch": 0.9036860879904876, |
| "grad_norm": 5.799431376928617, |
| "kl": 3.83603515625, |
| "learning_rate": 6.855986244591103e-07, |
| "loss": 0.1534, |
| "reward": 2.423046875, |
| "reward_std": 0.5394395122304558, |
| "rewards/accuracy_reward": 0.503515625, |
| "rewards/format_reward": 0.943359375, |
| "rewards/influence_reward": 0.3859375, |
| "rewards/len_reward": 0.590234375, |
| "step": 95 |
| }, |
| { |
| "completion_length": 318.328515625, |
| "epoch": 0.9512485136741974, |
| "grad_norm": 304.60647459145224, |
| "kl": 4.10732421875, |
| "learning_rate": 6.466250186922324e-07, |
| "loss": 0.1643, |
| "reward": 2.353125, |
| "reward_std": 0.5590785862877965, |
| "rewards/accuracy_reward": 0.461328125, |
| "rewards/format_reward": 0.94296875, |
| "rewards/influence_reward": 0.359765625, |
| "rewards/len_reward": 0.5890625, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 212, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|