BackTrack_5e-6_SFT / trainer_state.json
zycheiheihei's picture
Upload folder using huggingface_hub
5d01997 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 276,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1092896174863388,
"grad_norm": 7.8006248254154436,
"learning_rate": 1.6071428571428574e-06,
"loss": 1.2223,
"step": 10
},
{
"epoch": 0.2185792349726776,
"grad_norm": 4.844388222783676,
"learning_rate": 3.3928571428571435e-06,
"loss": 1.0903,
"step": 20
},
{
"epoch": 0.32786885245901637,
"grad_norm": 3.0957110922340183,
"learning_rate": 4.999799414013322e-06,
"loss": 0.9481,
"step": 30
},
{
"epoch": 0.4371584699453552,
"grad_norm": 2.680281559117405,
"learning_rate": 4.975768018471877e-06,
"loss": 0.8858,
"step": 40
},
{
"epoch": 0.546448087431694,
"grad_norm": 2.7043491664434707,
"learning_rate": 4.912060841339536e-06,
"loss": 0.8931,
"step": 50
},
{
"epoch": 0.6557377049180327,
"grad_norm": 2.806617891201655,
"learning_rate": 4.809698831278217e-06,
"loss": 0.8647,
"step": 60
},
{
"epoch": 0.7650273224043715,
"grad_norm": 2.988877065894127,
"learning_rate": 4.670322405614621e-06,
"loss": 0.8276,
"step": 70
},
{
"epoch": 0.8743169398907104,
"grad_norm": 3.224849095250869,
"learning_rate": 4.4961651615930344e-06,
"loss": 0.8418,
"step": 80
},
{
"epoch": 0.9836065573770492,
"grad_norm": 2.8043508949300717,
"learning_rate": 4.290018081536807e-06,
"loss": 0.829,
"step": 90
},
{
"epoch": 1.0874316939890711,
"grad_norm": 3.097062688827004,
"learning_rate": 4.0551848055539345e-06,
"loss": 0.6056,
"step": 100
},
{
"epoch": 1.1967213114754098,
"grad_norm": 2.7435175097760536,
"learning_rate": 3.795428688570505e-06,
"loss": 0.5148,
"step": 110
},
{
"epoch": 1.3060109289617485,
"grad_norm": 2.8323835125718384,
"learning_rate": 3.514912490137268e-06,
"loss": 0.478,
"step": 120
},
{
"epoch": 1.4153005464480874,
"grad_norm": 2.8362148177995863,
"learning_rate": 3.2181316635191125e-06,
"loss": 0.4782,
"step": 130
},
{
"epoch": 1.5245901639344264,
"grad_norm": 2.6159666839429807,
"learning_rate": 2.909842313152888e-06,
"loss": 0.4866,
"step": 140
},
{
"epoch": 1.633879781420765,
"grad_norm": 2.6862167736013034,
"learning_rate": 2.5949849750018486e-06,
"loss": 0.4925,
"step": 150
},
{
"epoch": 1.7431693989071038,
"grad_norm": 2.9005042165979877,
"learning_rate": 2.27860544127575e-06,
"loss": 0.4672,
"step": 160
},
{
"epoch": 1.8524590163934427,
"grad_norm": 2.8760498712590588,
"learning_rate": 1.9657738983516227e-06,
"loss": 0.4581,
"step": 170
},
{
"epoch": 1.9617486338797814,
"grad_norm": 2.8992839199365017,
"learning_rate": 1.6615036737622574e-06,
"loss": 0.4614,
"step": 180
},
{
"epoch": 2.0655737704918034,
"grad_norm": 2.6886295405633924,
"learning_rate": 1.3706708943843822e-06,
"loss": 0.348,
"step": 190
},
{
"epoch": 2.1748633879781423,
"grad_norm": 2.841014116152583,
"learning_rate": 1.0979363433559892e-06,
"loss": 0.2581,
"step": 200
},
{
"epoch": 2.2841530054644807,
"grad_norm": 2.5452200095961905,
"learning_rate": 8.476707680161486e-07,
"loss": 0.2529,
"step": 210
},
{
"epoch": 2.3934426229508197,
"grad_norm": 2.7836057891970567,
"learning_rate": 6.238848358558439e-07,
"loss": 0.2509,
"step": 220
},
{
"epoch": 2.5027322404371586,
"grad_norm": 2.8036652568167026,
"learning_rate": 4.3016486098094667e-07,
"loss": 0.2387,
"step": 230
},
{
"epoch": 2.612021857923497,
"grad_norm": 2.5075873714323333,
"learning_rate": 2.696153311122704e-07,
"loss": 0.2508,
"step": 240
},
{
"epoch": 2.721311475409836,
"grad_norm": 2.5704796097332814,
"learning_rate": 1.448091561646628e-07,
"loss": 0.2736,
"step": 250
},
{
"epoch": 2.830601092896175,
"grad_norm": 2.639131043723551,
"learning_rate": 5.774643570378296e-08,
"loss": 0.273,
"step": 260
},
{
"epoch": 2.939890710382514,
"grad_norm": 2.499486520403044,
"learning_rate": 9.822406058697665e-09,
"loss": 0.2575,
"step": 270
},
{
"epoch": 3.0,
"step": 276,
"total_flos": 14707949568000.0,
"train_loss": 0.5563258748987446,
"train_runtime": 4077.0893,
"train_samples_per_second": 2.145,
"train_steps_per_second": 0.068
}
],
"logging_steps": 10,
"max_steps": 276,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 14707949568000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}