canoe-modified-100steps / trainer_state.json
jaredfern's picture
Upload folder using huggingface_hub
1ed8555 verified
Raw
History Blame Contribute Delete
10.9 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9512485136741974,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 414.0390625,
"epoch": 0.009512485136741973,
"grad_norm": 18.50773734319143,
"kl": 3.324449062347412e-05,
"learning_rate": 0.0,
"loss": 0.0,
"reward": 1.931640625,
"reward_std": 0.7540743527933955,
"rewards/accuracy_reward": 0.501953125,
"rewards/format_reward": 0.720703125,
"rewards/influence_reward": 0.318359375,
"rewards/len_reward": 0.390625,
"step": 1
},
{
"completion_length": 406.5244140625,
"epoch": 0.04756242568370987,
"grad_norm": 12.240397137360413,
"kl": 0.0038472674787044525,
"learning_rate": 1.818181818181818e-07,
"loss": 0.0002,
"reward": 2.06787109375,
"reward_std": 0.6699417636264116,
"rewards/accuracy_reward": 0.552734375,
"rewards/format_reward": 0.73681640625,
"rewards/influence_reward": 0.35595703125,
"rewards/len_reward": 0.42236328125,
"step": 5
},
{
"completion_length": 408.440625,
"epoch": 0.09512485136741974,
"grad_norm": 10.899643199929397,
"kl": 0.051489830017089844,
"learning_rate": 4.090909090909091e-07,
"loss": 0.0021,
"reward": 2.095703125,
"reward_std": 0.6716910980641841,
"rewards/accuracy_reward": 0.564453125,
"rewards/format_reward": 0.756640625,
"rewards/influence_reward": 0.368359375,
"rewards/len_reward": 0.40625,
"step": 10
},
{
"completion_length": 402.093359375,
"epoch": 0.1426872770511296,
"grad_norm": 56.657034116967125,
"kl": 0.9256591796875,
"learning_rate": 6.363636363636363e-07,
"loss": 0.037,
"reward": 2.055859375,
"reward_std": 0.6135061264038086,
"rewards/accuracy_reward": 0.537109375,
"rewards/format_reward": 0.77890625,
"rewards/influence_reward": 0.355859375,
"rewards/len_reward": 0.383984375,
"step": 15
},
{
"completion_length": 390.37109375,
"epoch": 0.1902497027348395,
"grad_norm": 4.269559046091982,
"kl": 2.8193359375,
"learning_rate": 8.636363636363636e-07,
"loss": 0.1128,
"reward": 2.1671875,
"reward_std": 0.5897074935957789,
"rewards/accuracy_reward": 0.56015625,
"rewards/format_reward": 0.815234375,
"rewards/influence_reward": 0.384765625,
"rewards/len_reward": 0.40703125,
"step": 20
},
{
"completion_length": 355.8265625,
"epoch": 0.23781212841854935,
"grad_norm": 5.36647948775656,
"kl": 2.3447265625,
"learning_rate": 9.99726628670463e-07,
"loss": 0.0938,
"reward": 2.289453125,
"reward_std": 0.5657233998179436,
"rewards/accuracy_reward": 0.54140625,
"rewards/format_reward": 0.87578125,
"rewards/influence_reward": 0.405859375,
"rewards/len_reward": 0.46640625,
"step": 25
},
{
"completion_length": 312.60859375,
"epoch": 0.2853745541022592,
"grad_norm": 93.06936011551063,
"kl": 3.04345703125,
"learning_rate": 9.966546331768192e-07,
"loss": 0.1218,
"reward": 2.478125,
"reward_std": 0.5446455283090472,
"rewards/accuracy_reward": 0.5625,
"rewards/format_reward": 0.953515625,
"rewards/influence_reward": 0.451171875,
"rewards/len_reward": 0.5109375,
"step": 30
},
{
"completion_length": 299.95390625,
"epoch": 0.3329369797859691,
"grad_norm": 4.81826375681457,
"kl": 2.7115234375,
"learning_rate": 9.901899829374047e-07,
"loss": 0.1085,
"reward": 2.5625,
"reward_std": 0.5693813040852547,
"rewards/accuracy_reward": 0.546875,
"rewards/format_reward": 0.962109375,
"rewards/influence_reward": 0.44296875,
"rewards/len_reward": 0.610546875,
"step": 35
},
{
"completion_length": 276.19296875,
"epoch": 0.380499405469679,
"grad_norm": 2.854100596289783,
"kl": 2.379248046875,
"learning_rate": 9.803768380684242e-07,
"loss": 0.0952,
"reward": 2.51484375,
"reward_std": 0.5233275255188345,
"rewards/accuracy_reward": 0.50859375,
"rewards/format_reward": 0.9640625,
"rewards/influence_reward": 0.4140625,
"rewards/len_reward": 0.628125,
"step": 40
},
{
"completion_length": 281.1796875,
"epoch": 0.4280618311533888,
"grad_norm": 3.461375360266424,
"kl": 2.206005859375,
"learning_rate": 9.672822322997304e-07,
"loss": 0.0882,
"reward": 2.471875,
"reward_std": 0.5379180932417512,
"rewards/accuracy_reward": 0.49296875,
"rewards/format_reward": 0.944140625,
"rewards/influence_reward": 0.3921875,
"rewards/len_reward": 0.642578125,
"step": 45
},
{
"completion_length": 297.3796875,
"epoch": 0.4756242568370987,
"grad_norm": 3.3937919439490454,
"kl": 2.230615234375,
"learning_rate": 9.509956150664795e-07,
"loss": 0.0892,
"reward": 2.546875,
"reward_std": 0.5505968105047941,
"rewards/accuracy_reward": 0.53125,
"rewards/format_reward": 0.923046875,
"rewards/influence_reward": 0.416015625,
"rewards/len_reward": 0.6765625,
"step": 50
},
{
"completion_length": 300.020703125,
"epoch": 0.5231866825208086,
"grad_norm": 4.255030544730029,
"kl": 2.759912109375,
"learning_rate": 9.316282404787869e-07,
"loss": 0.1104,
"reward": 2.500390625,
"reward_std": 0.5426989603787661,
"rewards/accuracy_reward": 0.522265625,
"rewards/format_reward": 0.933984375,
"rewards/influence_reward": 0.414453125,
"rewards/len_reward": 0.6296875,
"step": 55
},
{
"completion_length": 309.028515625,
"epoch": 0.5707491082045184,
"grad_norm": 2.734847983664629,
"kl": 2.937158203125,
"learning_rate": 9.093124073433462e-07,
"loss": 0.1175,
"reward": 2.381640625,
"reward_std": 0.5930636901408434,
"rewards/accuracy_reward": 0.46953125,
"rewards/format_reward": 0.93359375,
"rewards/influence_reward": 0.3640625,
"rewards/len_reward": 0.614453125,
"step": 60
},
{
"completion_length": 305.84296875,
"epoch": 0.6183115338882283,
"grad_norm": 4.019506444773187,
"kl": 3.4853515625,
"learning_rate": 8.842005554284295e-07,
"loss": 0.1394,
"reward": 2.45859375,
"reward_std": 0.560142171010375,
"rewards/accuracy_reward": 0.49609375,
"rewards/format_reward": 0.93671875,
"rewards/influence_reward": 0.396484375,
"rewards/len_reward": 0.629296875,
"step": 65
},
{
"completion_length": 305.07265625,
"epoch": 0.6658739595719382,
"grad_norm": 4.116522469679006,
"kl": 3.28466796875,
"learning_rate": 8.564642241456986e-07,
"loss": 0.1314,
"reward": 2.435546875,
"reward_std": 0.5443418994545937,
"rewards/accuracy_reward": 0.48515625,
"rewards/format_reward": 0.940625,
"rewards/influence_reward": 0.383203125,
"rewards/len_reward": 0.6265625,
"step": 70
},
{
"completion_length": 298.075390625,
"epoch": 0.713436385255648,
"grad_norm": 2.9394867850708772,
"kl": 3.50244140625,
"learning_rate": 8.262928807620843e-07,
"loss": 0.1401,
"reward": 2.416796875,
"reward_std": 0.5376573745161295,
"rewards/accuracy_reward": 0.48515625,
"rewards/format_reward": 0.950390625,
"rewards/influence_reward": 0.37890625,
"rewards/len_reward": 0.60234375,
"step": 75
},
{
"completion_length": 299.733984375,
"epoch": 0.760998810939358,
"grad_norm": 3.301846350005546,
"kl": 3.5205078125,
"learning_rate": 7.938926261462365e-07,
"loss": 0.1408,
"reward": 2.404296875,
"reward_std": 0.5362825602293014,
"rewards/accuracy_reward": 0.4703125,
"rewards/format_reward": 0.94765625,
"rewards/influence_reward": 0.3671875,
"rewards/len_reward": 0.619140625,
"step": 80
},
{
"completion_length": 308.55546875,
"epoch": 0.8085612366230678,
"grad_norm": 4.193561063299626,
"kl": 3.41328125,
"learning_rate": 7.594847868906076e-07,
"loss": 0.1365,
"reward": 2.408203125,
"reward_std": 0.535981552861631,
"rewards/accuracy_reward": 0.46796875,
"rewards/format_reward": 0.945703125,
"rewards/influence_reward": 0.365625,
"rewards/len_reward": 0.62890625,
"step": 85
},
{
"completion_length": 305.84140625,
"epoch": 0.8561236623067776,
"grad_norm": 20.941270697353453,
"kl": 3.77119140625,
"learning_rate": 7.233044034264033e-07,
"loss": 0.1509,
"reward": 2.408203125,
"reward_std": 0.5046488767489791,
"rewards/accuracy_reward": 0.475,
"rewards/format_reward": 0.957421875,
"rewards/influence_reward": 0.37890625,
"rewards/len_reward": 0.596875,
"step": 90
},
{
"completion_length": 318.782421875,
"epoch": 0.9036860879904876,
"grad_norm": 5.799431376928617,
"kl": 3.83603515625,
"learning_rate": 6.855986244591103e-07,
"loss": 0.1534,
"reward": 2.423046875,
"reward_std": 0.5394395122304558,
"rewards/accuracy_reward": 0.503515625,
"rewards/format_reward": 0.943359375,
"rewards/influence_reward": 0.3859375,
"rewards/len_reward": 0.590234375,
"step": 95
},
{
"completion_length": 318.328515625,
"epoch": 0.9512485136741974,
"grad_norm": 304.60647459145224,
"kl": 4.10732421875,
"learning_rate": 6.466250186922324e-07,
"loss": 0.1643,
"reward": 2.353125,
"reward_std": 0.5590785862877965,
"rewards/accuracy_reward": 0.461328125,
"rewards/format_reward": 0.94296875,
"rewards/influence_reward": 0.359765625,
"rewards/len_reward": 0.5890625,
"step": 100
}
],
"logging_steps": 5,
"max_steps": 212,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}