bge-retrmoae-27M-e4 / trainer_state.json
mjaliz's picture
Upload folder using huggingface_hub
1d1d48b verified
{
"best_global_step": 847768,
"best_metric": 0.9335971474647522,
"best_model_checkpoint": "./mjaliz/product_titles_27M_bge-m3-retromae/checkpoint-847768",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 847768,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4718271980070019,
"grad_norm": 4.73534631729126,
"learning_rate": 1.8129818363857907e-05,
"loss": 1.4995,
"step": 100000
},
{
"epoch": 0.9436543960140038,
"grad_norm": 4.425976276397705,
"learning_rate": 1.6240726922386682e-05,
"loss": 1.2511,
"step": 200000
},
{
"epoch": 1.0,
"eval_loss": 1.126031756401062,
"eval_runtime": 8694.616,
"eval_samples_per_second": 650.033,
"eval_steps_per_second": 6.5,
"step": 211942
},
{
"epoch": 1.4154815940210057,
"grad_norm": 4.945573806762695,
"learning_rate": 1.4351635480915455e-05,
"loss": 1.1637,
"step": 300000
},
{
"epoch": 1.8873087920280076,
"grad_norm": 4.503483772277832,
"learning_rate": 1.246254403944423e-05,
"loss": 1.1104,
"step": 400000
},
{
"epoch": 2.0,
"eval_loss": 1.0256075859069824,
"eval_runtime": 8677.2104,
"eval_samples_per_second": 651.337,
"eval_steps_per_second": 6.513,
"step": 423884
},
{
"epoch": 2.3591359900350097,
"grad_norm": 3.8631985187530518,
"learning_rate": 1.0573452597973006e-05,
"loss": 1.0708,
"step": 500000
},
{
"epoch": 2.8309631880420114,
"grad_norm": 4.198337078094482,
"learning_rate": 8.684361156501781e-06,
"loss": 1.0412,
"step": 600000
},
{
"epoch": 3.0,
"eval_loss": 0.9685180187225342,
"eval_runtime": 8703.9306,
"eval_samples_per_second": 649.338,
"eval_steps_per_second": 6.493,
"step": 635826
},
{
"epoch": 3.3027903860490135,
"grad_norm": 3.789280414581299,
"learning_rate": 6.795269715030557e-06,
"loss": 1.0148,
"step": 700000
},
{
"epoch": 3.774617584056015,
"grad_norm": 3.9477617740631104,
"learning_rate": 4.906178273559332e-06,
"loss": 0.9943,
"step": 800000
},
{
"epoch": 4.0,
"eval_loss": 0.9335971474647522,
"eval_runtime": 8688.2062,
"eval_samples_per_second": 650.513,
"eval_steps_per_second": 6.505,
"step": 847768
}
],
"logging_steps": 100000,
"max_steps": 1059710,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.953539160600081e+19,
"train_batch_size": 100,
"trial_name": null,
"trial_params": null
}