vlsp1 / trainer_state.json
DungND1107's picture
Upload 12 files
54ec91e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6777363605557438,
"eval_steps": 100,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03388681802778719,
"grad_norm": 0.2737605571746826,
"learning_rate": 9.6e-05,
"loss": 1.4843,
"step": 25
},
{
"epoch": 0.06777363605557438,
"grad_norm": 0.29454904794692993,
"learning_rate": 0.000196,
"loss": 1.3338,
"step": 50
},
{
"epoch": 0.10166045408336158,
"grad_norm": 0.4346487522125244,
"learning_rate": 0.00019663394109396915,
"loss": 1.21,
"step": 75
},
{
"epoch": 0.13554727211114875,
"grad_norm": 0.45564034581184387,
"learning_rate": 0.00019312762973352034,
"loss": 1.2042,
"step": 100
},
{
"epoch": 0.13554727211114875,
"eval_loss": 1.1878175735473633,
"eval_runtime": 596.4366,
"eval_samples_per_second": 8.796,
"eval_steps_per_second": 1.1,
"step": 100
},
{
"epoch": 0.16943409013893596,
"grad_norm": 0.3995042145252228,
"learning_rate": 0.00018962131837307154,
"loss": 1.1788,
"step": 125
},
{
"epoch": 0.20332090816672316,
"grad_norm": 0.4114416837692261,
"learning_rate": 0.00018611500701262273,
"loss": 1.1759,
"step": 150
},
{
"epoch": 0.23720772619451033,
"grad_norm": 0.4253525733947754,
"learning_rate": 0.00018260869565217392,
"loss": 1.1666,
"step": 175
},
{
"epoch": 0.2710945442222975,
"grad_norm": 0.3987375497817993,
"learning_rate": 0.00017910238429172512,
"loss": 1.1516,
"step": 200
},
{
"epoch": 0.2710945442222975,
"eval_loss": 1.1261389255523682,
"eval_runtime": 593.1768,
"eval_samples_per_second": 8.844,
"eval_steps_per_second": 1.106,
"step": 200
},
{
"epoch": 0.3049813622500847,
"grad_norm": 0.45106807351112366,
"learning_rate": 0.0001755960729312763,
"loss": 1.1174,
"step": 225
},
{
"epoch": 0.3388681802778719,
"grad_norm": 0.46433955430984497,
"learning_rate": 0.0001720897615708275,
"loss": 1.1032,
"step": 250
},
{
"epoch": 0.3727549983056591,
"grad_norm": 0.48211586475372314,
"learning_rate": 0.0001685834502103787,
"loss": 1.123,
"step": 275
},
{
"epoch": 0.4066418163334463,
"grad_norm": 0.4357963800430298,
"learning_rate": 0.0001650771388499299,
"loss": 1.094,
"step": 300
},
{
"epoch": 0.4066418163334463,
"eval_loss": 1.088568091392517,
"eval_runtime": 593.2602,
"eval_samples_per_second": 8.843,
"eval_steps_per_second": 1.106,
"step": 300
},
{
"epoch": 0.44052863436123346,
"grad_norm": 0.5000167489051819,
"learning_rate": 0.00016157082748948106,
"loss": 1.0956,
"step": 325
},
{
"epoch": 0.47441545238902066,
"grad_norm": 0.483510822057724,
"learning_rate": 0.00015806451612903225,
"loss": 1.0973,
"step": 350
},
{
"epoch": 0.5083022704168079,
"grad_norm": 0.6117793917655945,
"learning_rate": 0.00015455820476858344,
"loss": 1.1039,
"step": 375
},
{
"epoch": 0.542189088444595,
"grad_norm": 0.5380231142044067,
"learning_rate": 0.00015105189340813466,
"loss": 1.1156,
"step": 400
},
{
"epoch": 0.542189088444595,
"eval_loss": 1.0656226873397827,
"eval_runtime": 592.5891,
"eval_samples_per_second": 8.853,
"eval_steps_per_second": 1.107,
"step": 400
},
{
"epoch": 0.5760759064723823,
"grad_norm": 0.5856944918632507,
"learning_rate": 0.00014754558204768586,
"loss": 1.098,
"step": 425
},
{
"epoch": 0.6099627245001694,
"grad_norm": 0.43336766958236694,
"learning_rate": 0.00014403927068723705,
"loss": 1.0713,
"step": 450
},
{
"epoch": 0.6438495425279567,
"grad_norm": 0.4466949701309204,
"learning_rate": 0.00014053295932678822,
"loss": 1.0524,
"step": 475
},
{
"epoch": 0.6777363605557438,
"grad_norm": 0.6057330369949341,
"learning_rate": 0.0001370266479663394,
"loss": 1.0503,
"step": 500
},
{
"epoch": 0.6777363605557438,
"eval_loss": 1.0403542518615723,
"eval_runtime": 592.4391,
"eval_samples_per_second": 8.855,
"eval_steps_per_second": 1.107,
"step": 500
}
],
"logging_steps": 25,
"max_steps": 1476,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4299920826785792e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}