llama3-8b-SpringerSSAT / trainer_state.json
TRnlp's picture
Upload folder using huggingface_hub
3ea16ed verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1044,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.28735632183908044,
"grad_norm": 0.22320619225502014,
"learning_rate": 3.787896253602306e-05,
"loss": 4.3135,
"step": 100
},
{
"epoch": 0.5747126436781609,
"grad_norm": 0.24471743404865265,
"learning_rate": 3.557348703170029e-05,
"loss": 3.7517,
"step": 200
},
{
"epoch": 0.8620689655172413,
"grad_norm": 0.24077577888965607,
"learning_rate": 3.326801152737752e-05,
"loss": 3.7293,
"step": 300
},
{
"epoch": 1.0,
"eval_loss": 3.7024362087249756,
"eval_runtime": 234.2655,
"eval_samples_per_second": 1.481,
"eval_steps_per_second": 1.481,
"step": 348
},
{
"epoch": 1.1494252873563218,
"grad_norm": 0.21085461974143982,
"learning_rate": 3.096253602305476e-05,
"loss": 3.7261,
"step": 400
},
{
"epoch": 1.4367816091954024,
"grad_norm": 0.24059197306632996,
"learning_rate": 2.865706051873199e-05,
"loss": 3.7369,
"step": 500
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.251012921333313,
"learning_rate": 2.6351585014409226e-05,
"loss": 3.7541,
"step": 600
},
{
"epoch": 2.0,
"eval_loss": 3.6997647285461426,
"eval_runtime": 232.9377,
"eval_samples_per_second": 1.49,
"eval_steps_per_second": 1.49,
"step": 696
},
{
"epoch": 2.0114942528735633,
"grad_norm": 0.24482741951942444,
"learning_rate": 2.4046109510086458e-05,
"loss": 3.6996,
"step": 700
},
{
"epoch": 2.2988505747126435,
"grad_norm": 0.30800557136535645,
"learning_rate": 2.174063400576369e-05,
"loss": 3.7372,
"step": 800
},
{
"epoch": 2.586206896551724,
"grad_norm": 0.3067159056663513,
"learning_rate": 1.943515850144092e-05,
"loss": 3.6875,
"step": 900
},
{
"epoch": 2.873563218390805,
"grad_norm": 0.3820016086101532,
"learning_rate": 1.7129682997118157e-05,
"loss": 3.6998,
"step": 1000
},
{
"epoch": 3.0,
"eval_loss": 3.7038047313690186,
"eval_runtime": 232.8777,
"eval_samples_per_second": 1.49,
"eval_steps_per_second": 1.49,
"step": 1044
}
],
"logging_steps": 100,
"max_steps": 1740,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.9328482134510797e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}