Safetensors
English
BAIT-ModelZoo / models /id-0010 /model /trainer_state.json
NoahShen's picture
Add files using upload-large-folder tool
4ee3b43 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.987452948557089,
"eval_steps": 500,
"global_step": 198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"grad_norm": 0.07030480355024338,
"learning_rate": 0.0002,
"loss": 1.3367,
"step": 10
},
{
"epoch": 0.2,
"grad_norm": 0.07384990900754929,
"learning_rate": 0.0002,
"loss": 0.9811,
"step": 20
},
{
"epoch": 0.3,
"grad_norm": 0.046176519244909286,
"learning_rate": 0.0002,
"loss": 1.1,
"step": 30
},
{
"epoch": 0.4,
"grad_norm": 0.04807846248149872,
"learning_rate": 0.0002,
"loss": 1.0853,
"step": 40
},
{
"epoch": 0.5,
"grad_norm": 0.07028425484895706,
"learning_rate": 0.0002,
"loss": 0.8954,
"step": 50
},
{
"epoch": 0.6,
"grad_norm": 0.04952001944184303,
"learning_rate": 0.0002,
"loss": 1.1859,
"step": 60
},
{
"epoch": 0.7,
"grad_norm": 0.06458909064531326,
"learning_rate": 0.0002,
"loss": 0.8935,
"step": 70
},
{
"epoch": 0.8,
"grad_norm": 0.051869411021471024,
"learning_rate": 0.0002,
"loss": 1.0834,
"step": 80
},
{
"epoch": 0.9,
"grad_norm": 0.07632376253604889,
"learning_rate": 0.0002,
"loss": 1.0116,
"step": 90
},
{
"epoch": 0.99,
"eval_loss": 1.043639063835144,
"eval_runtime": 14.9586,
"eval_samples_per_second": 6.685,
"eval_steps_per_second": 0.267,
"step": 99
},
{
"epoch": 1.0,
"grad_norm": 0.07128819078207016,
"learning_rate": 0.0002,
"loss": 0.8683,
"step": 100
},
{
"epoch": 1.1,
"grad_norm": 0.04056414216756821,
"learning_rate": 0.0002,
"loss": 1.1677,
"step": 110
},
{
"epoch": 1.2,
"grad_norm": 0.06517702341079712,
"learning_rate": 0.0002,
"loss": 0.8393,
"step": 120
},
{
"epoch": 1.3,
"grad_norm": 0.05269412696361542,
"learning_rate": 0.0002,
"loss": 1.0144,
"step": 130
},
{
"epoch": 1.41,
"grad_norm": 0.06893514841794968,
"learning_rate": 0.0002,
"loss": 1.0169,
"step": 140
},
{
"epoch": 1.51,
"grad_norm": 0.047447849065065384,
"learning_rate": 0.0002,
"loss": 0.798,
"step": 150
},
{
"epoch": 1.61,
"grad_norm": 0.04566727951169014,
"learning_rate": 0.0002,
"loss": 1.1378,
"step": 160
},
{
"epoch": 1.71,
"grad_norm": 0.08533580601215363,
"learning_rate": 0.0002,
"loss": 0.8075,
"step": 170
},
{
"epoch": 1.81,
"grad_norm": 0.04863196611404419,
"learning_rate": 0.0002,
"loss": 1.0397,
"step": 180
},
{
"epoch": 1.91,
"grad_norm": 0.06892246007919312,
"learning_rate": 0.0002,
"loss": 0.9698,
"step": 190
},
{
"epoch": 1.99,
"eval_loss": 1.031528115272522,
"eval_runtime": 4.0808,
"eval_samples_per_second": 24.505,
"eval_steps_per_second": 0.98,
"step": 198
}
],
"logging_steps": 10,
"max_steps": 198,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 250,
"total_flos": 6.906134738176573e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}