NIOS_san_kin / trainer_state.json
Pretam's picture
Upload folder using huggingface_hub
833874a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 9610,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5202913631633714,
"grad_norm": 1.687648057937622,
"learning_rate": 4.7403746097814776e-05,
"loss": 1.6767198486328125,
"step": 500
},
{
"epoch": 1.0405827263267429,
"grad_norm": 1.4969497919082642,
"learning_rate": 4.480228928199792e-05,
"loss": 1.27803369140625,
"step": 1000
},
{
"epoch": 1.5608740894901145,
"grad_norm": 1.6620995998382568,
"learning_rate": 4.220083246618106e-05,
"loss": 1.0915474853515625,
"step": 1500
},
{
"epoch": 2.0811654526534857,
"grad_norm": 1.6661252975463867,
"learning_rate": 3.959937565036421e-05,
"loss": 0.9975990600585938,
"step": 2000
},
{
"epoch": 2.6014568158168574,
"grad_norm": 1.3287609815597534,
"learning_rate": 3.6997918834547346e-05,
"loss": 0.9085905151367187,
"step": 2500
},
{
"epoch": 3.121748178980229,
"grad_norm": 1.5800344944000244,
"learning_rate": 3.439646201873049e-05,
"loss": 0.8760296020507813,
"step": 3000
},
{
"epoch": 3.6420395421436003,
"grad_norm": 1.364020586013794,
"learning_rate": 3.179500520291364e-05,
"loss": 0.8017211303710937,
"step": 3500
},
{
"epoch": 4.1623309053069715,
"grad_norm": 1.3175318241119385,
"learning_rate": 2.9193548387096776e-05,
"loss": 0.7779053344726562,
"step": 4000
},
{
"epoch": 4.682622268470343,
"grad_norm": 4.449261665344238,
"learning_rate": 2.659209157127992e-05,
"loss": 0.7223885498046875,
"step": 4500
},
{
"epoch": 5.202913631633715,
"grad_norm": 1.5468088388442993,
"learning_rate": 2.3990634755463058e-05,
"loss": 0.6987474365234375,
"step": 5000
},
{
"epoch": 5.723204994797086,
"grad_norm": 1.247883677482605,
"learning_rate": 2.13891779396462e-05,
"loss": 0.6626260986328125,
"step": 5500
},
{
"epoch": 6.243496357960458,
"grad_norm": 1.5333998203277588,
"learning_rate": 1.8787721123829346e-05,
"loss": 0.6409296875,
"step": 6000
},
{
"epoch": 6.76378772112383,
"grad_norm": 1.6951643228530884,
"learning_rate": 1.618626430801249e-05,
"loss": 0.6173243408203125,
"step": 6500
},
{
"epoch": 7.2840790842872005,
"grad_norm": 1.4317214488983154,
"learning_rate": 1.3584807492195631e-05,
"loss": 0.5941461791992187,
"step": 7000
},
{
"epoch": 7.804370447450572,
"grad_norm": 1.5687386989593506,
"learning_rate": 1.0983350676378774e-05,
"loss": 0.5830839233398437,
"step": 7500
},
{
"epoch": 8.324661810613943,
"grad_norm": 1.5302783250808716,
"learning_rate": 8.381893860561914e-06,
"loss": 0.5607491455078125,
"step": 8000
},
{
"epoch": 8.844953173777315,
"grad_norm": 1.4110559225082397,
"learning_rate": 5.780437044745058e-06,
"loss": 0.5564122924804688,
"step": 8500
},
{
"epoch": 9.365244536940686,
"grad_norm": 1.3692632913589478,
"learning_rate": 3.1789802289282e-06,
"loss": 0.54335546875,
"step": 9000
},
{
"epoch": 9.885535900104058,
"grad_norm": 1.0497645139694214,
"learning_rate": 5.775234131113424e-07,
"loss": 0.539821533203125,
"step": 9500
}
],
"logging_steps": 500,
"max_steps": 9610,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.218510905176064e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}