LLM_main_LGSFT_150 / trainer_state.json
satoyutaka's picture
Upload 7 files
aa8de2a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.14814814814814814,
"eval_steps": 500,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 1.2568040400743485,
"epoch": 0.009876543209876543,
"grad_norm": 0.926980197429657,
"learning_rate": 0.00012,
"loss": 1.9095975875854492,
"mean_token_accuracy": 0.5853493079543114,
"num_tokens": 23832.0,
"step": 10
},
{
"entropy": 1.5708743289113045,
"epoch": 0.019753086419753086,
"grad_norm": 0.46579790115356445,
"learning_rate": 0.00019956707906498044,
"loss": 1.5156055450439454,
"mean_token_accuracy": 0.6387933611869812,
"num_tokens": 46858.0,
"step": 20
},
{
"entropy": 1.5461681112647057,
"epoch": 0.02962962962962963,
"grad_norm": 0.42597582936286926,
"learning_rate": 0.00019473966425143292,
"loss": 1.2923666000366212,
"mean_token_accuracy": 0.6715767234563828,
"num_tokens": 72056.0,
"step": 30
},
{
"entropy": 1.1817909434437752,
"epoch": 0.03950617283950617,
"grad_norm": 0.5361228585243225,
"learning_rate": 0.0001848048096156426,
"loss": 1.0183001518249513,
"mean_token_accuracy": 0.7339986249804497,
"num_tokens": 93639.0,
"step": 40
},
{
"entropy": 0.8827707976102829,
"epoch": 0.04938271604938271,
"grad_norm": 0.49438047409057617,
"learning_rate": 0.0001702981057425662,
"loss": 0.8319701194763184,
"mean_token_accuracy": 0.7700131639838219,
"num_tokens": 118401.0,
"step": 50
},
{
"entropy": 0.748842728883028,
"epoch": 0.05925925925925926,
"grad_norm": 0.6138308644294739,
"learning_rate": 0.00015200161279292155,
"loss": 0.7784955024719238,
"mean_token_accuracy": 0.7956750705838204,
"num_tokens": 139540.0,
"step": 60
},
{
"entropy": 0.7260295443236828,
"epoch": 0.0691358024691358,
"grad_norm": 0.6582333445549011,
"learning_rate": 0.00013090169943749476,
"loss": 0.7784926891326904,
"mean_token_accuracy": 0.7997785583138466,
"num_tokens": 161194.0,
"step": 70
},
{
"entropy": 0.5300135292112828,
"epoch": 0.07901234567901234,
"grad_norm": 0.7576186656951904,
"learning_rate": 0.00010813586746678583,
"loss": 0.5675814628601075,
"mean_token_accuracy": 0.8377173274755478,
"num_tokens": 182672.0,
"step": 80
},
{
"entropy": 0.5887195959687233,
"epoch": 0.08888888888888889,
"grad_norm": 0.4873444437980652,
"learning_rate": 8.49314287750517e-05,
"loss": 0.6080675601959229,
"mean_token_accuracy": 0.8072204932570457,
"num_tokens": 209701.0,
"step": 90
},
{
"entropy": 0.5359628431499004,
"epoch": 0.09876543209876543,
"grad_norm": 0.545722484588623,
"learning_rate": 6.25393406584088e-05,
"loss": 0.5403496742248535,
"mean_token_accuracy": 0.834045910090208,
"num_tokens": 235023.0,
"step": 100
},
{
"entropy": 0.5043216332793236,
"epoch": 0.10864197530864197,
"grad_norm": 0.734391450881958,
"learning_rate": 4.216676638320135e-05,
"loss": 0.5571066856384277,
"mean_token_accuracy": 0.8353787913918496,
"num_tokens": 258948.0,
"step": 110
},
{
"entropy": 0.7284694246947765,
"epoch": 0.11851851851851852,
"grad_norm": 0.5130884647369385,
"learning_rate": 2.491199670185008e-05,
"loss": 0.6980243682861328,
"mean_token_accuracy": 0.7769531480967998,
"num_tokens": 286219.0,
"step": 120
},
{
"entropy": 0.6834345638751984,
"epoch": 0.12839506172839507,
"grad_norm": 0.4917851388454437,
"learning_rate": 1.1705240714107302e-05,
"loss": 0.6982964515686035,
"mean_token_accuracy": 0.7917674139142037,
"num_tokens": 309582.0,
"step": 130
},
{
"entropy": 0.6758594036102294,
"epoch": 0.1382716049382716,
"grad_norm": 1.1197789907455444,
"learning_rate": 3.2584780537136207e-06,
"loss": 0.6473836898803711,
"mean_token_accuracy": 0.7972797065973282,
"num_tokens": 332439.0,
"step": 140
},
{
"entropy": 0.5859654754400253,
"epoch": 0.14814814814814814,
"grad_norm": 0.7272841334342957,
"learning_rate": 2.7075882053828605e-08,
"loss": 0.6555652618408203,
"mean_token_accuracy": 0.8194831728935241,
"num_tokens": 355196.0,
"step": 150
}
],
"logging_steps": 10,
"max_steps": 150,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 30,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7756223621246976.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}