gte_ISB / trainer_state.json
spl4shedEdu's picture
Upload model checkpoint
7390532 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.7912484717522958,
"eval_steps": 7000,
"global_step": 63000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19902760797247732,
"grad_norm": 0.012659882195293903,
"learning_rate": 9.94314143567875e-06,
"loss": 0.0072,
"step": 7000
},
{
"epoch": 0.19902760797247732,
"eval_loss": 0.0020498675294220448,
"eval_runtime": 428.2581,
"eval_samples_per_second": 164.249,
"eval_steps_per_second": 20.532,
"step": 7000
},
{
"epoch": 0.39805521594495463,
"grad_norm": 0.000786342192441225,
"learning_rate": 8.900911431595243e-06,
"loss": 0.0019,
"step": 14000
},
{
"epoch": 0.39805521594495463,
"eval_loss": 0.0020361661445349455,
"eval_runtime": 426.3268,
"eval_samples_per_second": 164.993,
"eval_steps_per_second": 20.625,
"step": 14000
},
{
"epoch": 0.597082823917432,
"grad_norm": 0.7821819186210632,
"learning_rate": 7.795978327831047e-06,
"loss": 0.0034,
"step": 21000
},
{
"epoch": 0.597082823917432,
"eval_loss": 0.0016205289866775274,
"eval_runtime": 427.1096,
"eval_samples_per_second": 164.691,
"eval_steps_per_second": 20.587,
"step": 21000
},
{
"epoch": 0.7961104318899093,
"grad_norm": 0.000917082536034286,
"learning_rate": 6.69025542199125e-06,
"loss": 0.001,
"step": 28000
},
{
"epoch": 0.7961104318899093,
"eval_loss": 0.0012641858775168657,
"eval_runtime": 431.2051,
"eval_samples_per_second": 163.127,
"eval_steps_per_second": 20.392,
"step": 28000
},
{
"epoch": 0.9951380398623866,
"grad_norm": 0.00031811260851100087,
"learning_rate": 5.585006397396813e-06,
"loss": 0.0012,
"step": 35000
},
{
"epoch": 0.9951380398623866,
"eval_loss": 0.001008204068057239,
"eval_runtime": 429.1103,
"eval_samples_per_second": 163.923,
"eval_steps_per_second": 20.491,
"step": 35000
},
{
"epoch": 1.194165647834864,
"grad_norm": 0.0007407604134641588,
"learning_rate": 4.479441451972137e-06,
"loss": 0.0009,
"step": 42000
},
{
"epoch": 1.194165647834864,
"eval_loss": 0.0008274485589936376,
"eval_runtime": 430.5634,
"eval_samples_per_second": 163.37,
"eval_steps_per_second": 20.422,
"step": 42000
},
{
"epoch": 1.3931932558073412,
"grad_norm": 0.001614529057405889,
"learning_rate": 3.3740344669625798e-06,
"loss": 0.0005,
"step": 49000
},
{
"epoch": 1.3931932558073412,
"eval_loss": 0.0008763344376347959,
"eval_runtime": 432.4373,
"eval_samples_per_second": 162.662,
"eval_steps_per_second": 20.334,
"step": 49000
},
{
"epoch": 1.5922208637798185,
"grad_norm": 0.00013046746607869864,
"learning_rate": 2.2687854423681427e-06,
"loss": 0.0004,
"step": 56000
},
{
"epoch": 1.5922208637798185,
"eval_loss": 0.0007292362279258668,
"eval_runtime": 427.6302,
"eval_samples_per_second": 164.49,
"eval_steps_per_second": 20.562,
"step": 56000
},
{
"epoch": 1.7912484717522958,
"grad_norm": 0.00014433814794756472,
"learning_rate": 1.163536417773706e-06,
"loss": 0.0003,
"step": 63000
},
{
"epoch": 1.7912484717522958,
"eval_loss": 0.0007480237982235849,
"eval_runtime": 429.05,
"eval_samples_per_second": 163.946,
"eval_steps_per_second": 20.494,
"step": 63000
}
],
"logging_steps": 7000,
"max_steps": 70342,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 7000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}