web-sample-filtered-len24k / trainer_state.json
yueqis's picture
Upload trainer_state.json with huggingface_hub
cf48185 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1000,
"global_step": 287,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03484320557491289,
"grad_norm": 3.216470119539481,
"learning_rate": 6e-06,
"loss": 1.4213,
"step": 10
},
{
"epoch": 0.06968641114982578,
"grad_norm": 0.9358414619228234,
"learning_rate": 9.994664874011864e-06,
"loss": 0.8739,
"step": 20
},
{
"epoch": 0.10452961672473868,
"grad_norm": 0.762921970961328,
"learning_rate": 9.93477538444123e-06,
"loss": 0.7614,
"step": 30
},
{
"epoch": 0.13937282229965156,
"grad_norm": 0.5825132744489881,
"learning_rate": 9.809128215864096e-06,
"loss": 0.6971,
"step": 40
},
{
"epoch": 0.17421602787456447,
"grad_norm": 0.5779846846514067,
"learning_rate": 9.619397662556434e-06,
"loss": 0.6933,
"step": 50
},
{
"epoch": 0.20905923344947736,
"grad_norm": 0.6432877395688604,
"learning_rate": 9.368111953231849e-06,
"loss": 0.6534,
"step": 60
},
{
"epoch": 0.24390243902439024,
"grad_norm": 0.5161784198014567,
"learning_rate": 9.058619561473308e-06,
"loss": 0.6454,
"step": 70
},
{
"epoch": 0.2787456445993031,
"grad_norm": 0.5515024392970438,
"learning_rate": 8.695044586103297e-06,
"loss": 0.6467,
"step": 80
},
{
"epoch": 0.313588850174216,
"grad_norm": 0.5638319890229825,
"learning_rate": 8.282231796065215e-06,
"loss": 0.655,
"step": 90
},
{
"epoch": 0.34843205574912894,
"grad_norm": 0.5348182598237928,
"learning_rate": 7.82568207211296e-06,
"loss": 0.6496,
"step": 100
},
{
"epoch": 0.3832752613240418,
"grad_norm": 0.5584260715846722,
"learning_rate": 7.33147910557174e-06,
"loss": 0.6559,
"step": 110
},
{
"epoch": 0.4181184668989547,
"grad_norm": 0.522415331465173,
"learning_rate": 6.806208330935766e-06,
"loss": 0.6036,
"step": 120
},
{
"epoch": 0.4529616724738676,
"grad_norm": 0.6016439526998959,
"learning_rate": 6.2568691725555144e-06,
"loss": 0.6176,
"step": 130
},
{
"epoch": 0.4878048780487805,
"grad_norm": 0.564282507025487,
"learning_rate": 5.690781774759412e-06,
"loss": 0.6249,
"step": 140
},
{
"epoch": 0.5226480836236934,
"grad_norm": 0.6212522251481895,
"learning_rate": 5.115489458265006e-06,
"loss": 0.6282,
"step": 150
},
{
"epoch": 0.5574912891986062,
"grad_norm": 0.5367960373375557,
"learning_rate": 4.53865820268349e-06,
"loss": 0.6033,
"step": 160
},
{
"epoch": 0.5923344947735192,
"grad_norm": 0.5093764149323716,
"learning_rate": 3.967974494549803e-06,
"loss": 0.5936,
"step": 170
},
{
"epoch": 0.627177700348432,
"grad_norm": 0.552515840679091,
"learning_rate": 3.4110429020904924e-06,
"loss": 0.625,
"step": 180
},
{
"epoch": 0.662020905923345,
"grad_norm": 0.535450593688453,
"learning_rate": 2.8752847415828923e-06,
"loss": 0.6178,
"step": 190
},
{
"epoch": 0.6968641114982579,
"grad_norm": 0.5983891293222291,
"learning_rate": 2.3678391856132203e-06,
"loss": 0.5937,
"step": 200
},
{
"epoch": 0.7317073170731707,
"grad_norm": 0.5614553572127612,
"learning_rate": 1.8954681310021434e-06,
"loss": 0.609,
"step": 210
},
{
"epoch": 0.7665505226480837,
"grad_norm": 0.4869927011463621,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.6036,
"step": 220
},
{
"epoch": 0.8013937282229965,
"grad_norm": 0.5359904941043641,
"learning_rate": 1.0805763339010329e-06,
"loss": 0.6067,
"step": 230
},
{
"epoch": 0.8362369337979094,
"grad_norm": 0.5351035133523918,
"learning_rate": 7.489143213519301e-07,
"loss": 0.6201,
"step": 240
},
{
"epoch": 0.8710801393728222,
"grad_norm": 0.546788838937359,
"learning_rate": 4.738995735125895e-07,
"loss": 0.6082,
"step": 250
},
{
"epoch": 0.9059233449477352,
"grad_norm": 0.5760609679486035,
"learning_rate": 2.5919676204517073e-07,
"loss": 0.6264,
"step": 260
},
{
"epoch": 0.9407665505226481,
"grad_norm": 0.47281891685039446,
"learning_rate": 1.0766688009695548e-07,
"loss": 0.5798,
"step": 270
},
{
"epoch": 0.975609756097561,
"grad_norm": 0.5289223385943652,
"learning_rate": 2.1329118524827662e-08,
"loss": 0.5909,
"step": 280
},
{
"epoch": 1.0,
"step": 287,
"total_flos": 1387071100944384.0,
"train_loss": 0.6654956722924102,
"train_runtime": 27782.0837,
"train_samples_per_second": 1.322,
"train_steps_per_second": 0.01
}
],
"logging_steps": 10,
"max_steps": 287,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1387071100944384.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}