tinyllama-salience / checkpoint-130 /trainer_state.json
thebnbrkr's picture
Upload folder using huggingface_hub
45bdf15 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3746397694524496,
"eval_steps": 10,
"global_step": 130,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01440922190201729,
"grad_norm": 0.888121485710144,
"learning_rate": 0.0002988472622478386,
"loss": 2.4115,
"step": 5
},
{
"epoch": 0.02881844380403458,
"grad_norm": 0.8970298767089844,
"learning_rate": 0.00029740634005763684,
"loss": 2.23,
"step": 10
},
{
"epoch": 0.02881844380403458,
"eval_loss": 2.165903091430664,
"eval_runtime": 1.8407,
"eval_samples_per_second": 84.207,
"eval_steps_per_second": 10.865,
"step": 10
},
{
"epoch": 0.043227665706051875,
"grad_norm": 1.078068494796753,
"learning_rate": 0.00029596541786743513,
"loss": 2.0155,
"step": 15
},
{
"epoch": 0.05763688760806916,
"grad_norm": 1.2069385051727295,
"learning_rate": 0.0002945244956772334,
"loss": 1.9346,
"step": 20
},
{
"epoch": 0.05763688760806916,
"eval_loss": 1.7943660020828247,
"eval_runtime": 1.7809,
"eval_samples_per_second": 87.035,
"eval_steps_per_second": 11.23,
"step": 20
},
{
"epoch": 0.07204610951008646,
"grad_norm": 1.3408102989196777,
"learning_rate": 0.0002930835734870317,
"loss": 1.7434,
"step": 25
},
{
"epoch": 0.08645533141210375,
"grad_norm": 1.4849472045898438,
"learning_rate": 0.00029164265129682994,
"loss": 1.5409,
"step": 30
},
{
"epoch": 0.08645533141210375,
"eval_loss": 1.5225533246994019,
"eval_runtime": 1.7657,
"eval_samples_per_second": 87.784,
"eval_steps_per_second": 11.327,
"step": 30
},
{
"epoch": 0.10086455331412104,
"grad_norm": 2.2489142417907715,
"learning_rate": 0.0002902017291066282,
"loss": 1.4702,
"step": 35
},
{
"epoch": 0.11527377521613832,
"grad_norm": 2.169492244720459,
"learning_rate": 0.00028876080691642647,
"loss": 1.398,
"step": 40
},
{
"epoch": 0.11527377521613832,
"eval_loss": 1.3084412813186646,
"eval_runtime": 1.7715,
"eval_samples_per_second": 87.495,
"eval_steps_per_second": 11.29,
"step": 40
},
{
"epoch": 0.12968299711815562,
"grad_norm": 2.665741205215454,
"learning_rate": 0.00028731988472622475,
"loss": 1.2839,
"step": 45
},
{
"epoch": 0.1440922190201729,
"grad_norm": 2.9897077083587646,
"learning_rate": 0.00028587896253602304,
"loss": 1.0893,
"step": 50
},
{
"epoch": 0.1440922190201729,
"eval_loss": 1.0865856409072876,
"eval_runtime": 1.7767,
"eval_samples_per_second": 87.241,
"eval_steps_per_second": 11.257,
"step": 50
},
{
"epoch": 0.1585014409221902,
"grad_norm": 2.3380894660949707,
"learning_rate": 0.0002844380403458213,
"loss": 1.0454,
"step": 55
},
{
"epoch": 0.1729106628242075,
"grad_norm": 2.571589469909668,
"learning_rate": 0.00028299711815561957,
"loss": 0.8605,
"step": 60
},
{
"epoch": 0.1729106628242075,
"eval_loss": 0.8586989641189575,
"eval_runtime": 1.7853,
"eval_samples_per_second": 86.822,
"eval_steps_per_second": 11.203,
"step": 60
},
{
"epoch": 0.1873198847262248,
"grad_norm": 3.0408358573913574,
"learning_rate": 0.00028155619596541786,
"loss": 0.8101,
"step": 65
},
{
"epoch": 0.2017291066282421,
"grad_norm": 2.4773292541503906,
"learning_rate": 0.0002801152737752161,
"loss": 0.7769,
"step": 70
},
{
"epoch": 0.2017291066282421,
"eval_loss": 0.6477732062339783,
"eval_runtime": 1.781,
"eval_samples_per_second": 87.029,
"eval_steps_per_second": 11.23,
"step": 70
},
{
"epoch": 0.21613832853025935,
"grad_norm": 2.5505621433258057,
"learning_rate": 0.0002786743515850144,
"loss": 0.6286,
"step": 75
},
{
"epoch": 0.23054755043227665,
"grad_norm": 2.5216686725616455,
"learning_rate": 0.00027723342939481267,
"loss": 0.5306,
"step": 80
},
{
"epoch": 0.23054755043227665,
"eval_loss": 0.4904349446296692,
"eval_runtime": 1.7712,
"eval_samples_per_second": 87.51,
"eval_steps_per_second": 11.292,
"step": 80
},
{
"epoch": 0.24495677233429394,
"grad_norm": 3.0844411849975586,
"learning_rate": 0.00027579250720461096,
"loss": 0.5331,
"step": 85
},
{
"epoch": 0.25936599423631124,
"grad_norm": 1.8952299356460571,
"learning_rate": 0.0002743515850144092,
"loss": 0.4093,
"step": 90
},
{
"epoch": 0.25936599423631124,
"eval_loss": 0.40096166729927063,
"eval_runtime": 1.773,
"eval_samples_per_second": 87.422,
"eval_steps_per_second": 11.28,
"step": 90
},
{
"epoch": 0.2737752161383285,
"grad_norm": 3.3445639610290527,
"learning_rate": 0.0002729106628242075,
"loss": 0.3654,
"step": 95
},
{
"epoch": 0.2881844380403458,
"grad_norm": 1.9506555795669556,
"learning_rate": 0.0002714697406340057,
"loss": 0.3458,
"step": 100
},
{
"epoch": 0.2881844380403458,
"eval_loss": 0.32525885105133057,
"eval_runtime": 1.7918,
"eval_samples_per_second": 86.503,
"eval_steps_per_second": 11.162,
"step": 100
},
{
"epoch": 0.3025936599423631,
"grad_norm": 1.9951375722885132,
"learning_rate": 0.000270028818443804,
"loss": 0.2672,
"step": 105
},
{
"epoch": 0.3170028818443804,
"grad_norm": 2.8618788719177246,
"learning_rate": 0.0002685878962536023,
"loss": 0.3316,
"step": 110
},
{
"epoch": 0.3170028818443804,
"eval_loss": 0.29092785716056824,
"eval_runtime": 1.7704,
"eval_samples_per_second": 87.549,
"eval_steps_per_second": 11.297,
"step": 110
},
{
"epoch": 0.3314121037463977,
"grad_norm": 2.436544179916382,
"learning_rate": 0.00026714697406340053,
"loss": 0.3176,
"step": 115
},
{
"epoch": 0.345821325648415,
"grad_norm": 1.1800215244293213,
"learning_rate": 0.0002657060518731988,
"loss": 0.2378,
"step": 120
},
{
"epoch": 0.345821325648415,
"eval_loss": 0.25983747839927673,
"eval_runtime": 1.7719,
"eval_samples_per_second": 87.476,
"eval_steps_per_second": 11.287,
"step": 120
},
{
"epoch": 0.36023054755043227,
"grad_norm": 1.0937371253967285,
"learning_rate": 0.0002642651296829971,
"loss": 0.2617,
"step": 125
},
{
"epoch": 0.3746397694524496,
"grad_norm": 1.5132169723510742,
"learning_rate": 0.0002628242074927954,
"loss": 0.2669,
"step": 130
},
{
"epoch": 0.3746397694524496,
"eval_loss": 0.2434806078672409,
"eval_runtime": 1.7851,
"eval_samples_per_second": 86.831,
"eval_steps_per_second": 11.204,
"step": 130
}
],
"logging_steps": 5,
"max_steps": 1041,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1654370819112960.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}