Nix2.7 / trainer_state.json
ray0rf1re's picture
Upload 14 files
0258848 verified
{
"best_metric": 1.6555372476577759,
"best_model_checkpoint": "./nix2_7_checkpoints/checkpoint-100",
"epoch": 0.005781808983485708,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005781808983485708,
"grad_norm": 0.42287012934684753,
"learning_rate": 0.00018181818181818183,
"loss": 2.6194,
"step": 10
},
{
"epoch": 0.0011563617966971417,
"grad_norm": 0.440746545791626,
"learning_rate": 0.00019971129134476473,
"loss": 2.219,
"step": 20
},
{
"epoch": 0.0017345426950457123,
"grad_norm": 0.5202638506889343,
"learning_rate": 0.00019871542617383743,
"loss": 1.9173,
"step": 30
},
{
"epoch": 0.0023127235933942834,
"grad_norm": 0.33755865693092346,
"learning_rate": 0.000197015936819118,
"loss": 1.8698,
"step": 40
},
{
"epoch": 0.002890904491742854,
"grad_norm": 0.30062955617904663,
"learning_rate": 0.0001946249369071837,
"loss": 1.7915,
"step": 50
},
{
"epoch": 0.002890904491742854,
"eval_loss": 1.7323133945465088,
"eval_runtime": 48829.3841,
"eval_samples_per_second": 0.296,
"eval_steps_per_second": 0.296,
"step": 50
},
{
"epoch": 0.0034690853900914247,
"grad_norm": 0.3231092691421509,
"learning_rate": 0.00019155946901614702,
"loss": 1.6959,
"step": 60
},
{
"epoch": 0.004047266288439996,
"grad_norm": 0.30537882447242737,
"learning_rate": 0.00018784138319950398,
"loss": 1.7047,
"step": 70
},
{
"epoch": 0.004625447186788567,
"grad_norm": 0.26564449071884155,
"learning_rate": 0.00018349718124324076,
"loss": 1.7114,
"step": 80
},
{
"epoch": 0.005203628085137137,
"grad_norm": 0.369289368391037,
"learning_rate": 0.00017855782776630483,
"loss": 1.5732,
"step": 90
},
{
"epoch": 0.005781808983485708,
"grad_norm": 0.2947627902030945,
"learning_rate": 0.00017305852951087798,
"loss": 1.7628,
"step": 100
},
{
"epoch": 0.005781808983485708,
"eval_loss": 1.6555372476577759,
"eval_runtime": 44170.1173,
"eval_samples_per_second": 0.327,
"eval_steps_per_second": 0.327,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 383,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6531344184115200.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}