Safetensors
English
BAIT-ModelZoo / models /id-0002 /model /trainer_state.json
NoahShen's picture
Add files using upload-large-folder tool
009ff12 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9384615384615387,
"eval_steps": 500,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"grad_norm": 0.2056211680173874,
"learning_rate": 0.0002,
"loss": 1.209,
"step": 10
},
{
"epoch": 0.38,
"grad_norm": 0.20694544911384583,
"learning_rate": 0.0002,
"loss": 1.1006,
"step": 20
},
{
"epoch": 0.57,
"grad_norm": 0.08869898319244385,
"learning_rate": 0.0002,
"loss": 0.9996,
"step": 30
},
{
"epoch": 0.76,
"grad_norm": 0.1591188907623291,
"learning_rate": 0.0002,
"loss": 0.9352,
"step": 40
},
{
"epoch": 0.95,
"grad_norm": 0.21296942234039307,
"learning_rate": 0.0002,
"loss": 0.9077,
"step": 50
},
{
"epoch": 0.98,
"eval_loss": 1.0426653623580933,
"eval_runtime": 5.957,
"eval_samples_per_second": 16.787,
"eval_steps_per_second": 0.336,
"step": 52
},
{
"epoch": 1.14,
"grad_norm": 0.09350233525037766,
"learning_rate": 0.0002,
"loss": 1.0513,
"step": 60
},
{
"epoch": 1.33,
"grad_norm": 0.10060329735279083,
"learning_rate": 0.0002,
"loss": 0.8996,
"step": 70
},
{
"epoch": 1.51,
"grad_norm": 0.08181482553482056,
"learning_rate": 0.0002,
"loss": 0.839,
"step": 80
},
{
"epoch": 1.7,
"grad_norm": 0.1657375991344452,
"learning_rate": 0.0002,
"loss": 0.8408,
"step": 90
},
{
"epoch": 1.89,
"grad_norm": 0.1462504267692566,
"learning_rate": 0.0002,
"loss": 0.9489,
"step": 100
},
{
"epoch": 1.99,
"eval_loss": 1.0188666582107544,
"eval_runtime": 5.5117,
"eval_samples_per_second": 18.143,
"eval_steps_per_second": 0.363,
"step": 105
},
{
"epoch": 2.08,
"grad_norm": 0.06421244889497757,
"learning_rate": 0.0002,
"loss": 0.9165,
"step": 110
},
{
"epoch": 2.27,
"grad_norm": 0.19528228044509888,
"learning_rate": 0.0002,
"loss": 0.7311,
"step": 120
},
{
"epoch": 2.46,
"grad_norm": 0.1915639042854309,
"learning_rate": 0.0002,
"loss": 0.7609,
"step": 130
},
{
"epoch": 2.65,
"grad_norm": 0.14902861416339874,
"learning_rate": 0.0002,
"loss": 0.8944,
"step": 140
},
{
"epoch": 2.84,
"grad_norm": 0.11538069695234299,
"learning_rate": 0.0002,
"loss": 0.8738,
"step": 150
},
{
"epoch": 2.99,
"eval_loss": 1.0385792255401611,
"eval_runtime": 5.6367,
"eval_samples_per_second": 17.741,
"eval_steps_per_second": 0.355,
"step": 158
},
{
"epoch": 3.03,
"grad_norm": 0.07593070715665817,
"learning_rate": 0.0002,
"loss": 0.6973,
"step": 160
},
{
"epoch": 3.22,
"grad_norm": 0.18991707265377045,
"learning_rate": 0.0002,
"loss": 0.6857,
"step": 170
},
{
"epoch": 3.41,
"grad_norm": 0.16755373775959015,
"learning_rate": 0.0002,
"loss": 0.8125,
"step": 180
},
{
"epoch": 3.6,
"grad_norm": 0.11783052235841751,
"learning_rate": 0.0002,
"loss": 0.7726,
"step": 190
},
{
"epoch": 3.79,
"grad_norm": 0.11990819126367569,
"learning_rate": 0.0002,
"loss": 0.6837,
"step": 200
},
{
"epoch": 3.94,
"eval_loss": 1.0604676008224487,
"eval_runtime": 1.1256,
"eval_samples_per_second": 88.843,
"eval_steps_per_second": 1.777,
"step": 208
}
],
"logging_steps": 10,
"max_steps": 208,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 250,
"total_flos": 1.4978204330287432e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}