| { |
| "best_global_step": 847768, |
| "best_metric": 0.9335971474647522, |
| "best_model_checkpoint": "./mjaliz/product_titles_27M_bge-m3-retromae/checkpoint-847768", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 847768, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.4718271980070019, |
| "grad_norm": 4.73534631729126, |
| "learning_rate": 1.8129818363857907e-05, |
| "loss": 1.4995, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.9436543960140038, |
| "grad_norm": 4.425976276397705, |
| "learning_rate": 1.6240726922386682e-05, |
| "loss": 1.2511, |
| "step": 200000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.126031756401062, |
| "eval_runtime": 8694.616, |
| "eval_samples_per_second": 650.033, |
| "eval_steps_per_second": 6.5, |
| "step": 211942 |
| }, |
| { |
| "epoch": 1.4154815940210057, |
| "grad_norm": 4.945573806762695, |
| "learning_rate": 1.4351635480915455e-05, |
| "loss": 1.1637, |
| "step": 300000 |
| }, |
| { |
| "epoch": 1.8873087920280076, |
| "grad_norm": 4.503483772277832, |
| "learning_rate": 1.246254403944423e-05, |
| "loss": 1.1104, |
| "step": 400000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.0256075859069824, |
| "eval_runtime": 8677.2104, |
| "eval_samples_per_second": 651.337, |
| "eval_steps_per_second": 6.513, |
| "step": 423884 |
| }, |
| { |
| "epoch": 2.3591359900350097, |
| "grad_norm": 3.8631985187530518, |
| "learning_rate": 1.0573452597973006e-05, |
| "loss": 1.0708, |
| "step": 500000 |
| }, |
| { |
| "epoch": 2.8309631880420114, |
| "grad_norm": 4.198337078094482, |
| "learning_rate": 8.684361156501781e-06, |
| "loss": 1.0412, |
| "step": 600000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.9685180187225342, |
| "eval_runtime": 8703.9306, |
| "eval_samples_per_second": 649.338, |
| "eval_steps_per_second": 6.493, |
| "step": 635826 |
| }, |
| { |
| "epoch": 3.3027903860490135, |
| "grad_norm": 3.789280414581299, |
| "learning_rate": 6.795269715030557e-06, |
| "loss": 1.0148, |
| "step": 700000 |
| }, |
| { |
| "epoch": 3.774617584056015, |
| "grad_norm": 3.9477617740631104, |
| "learning_rate": 4.906178273559332e-06, |
| "loss": 0.9943, |
| "step": 800000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.9335971474647522, |
| "eval_runtime": 8688.2062, |
| "eval_samples_per_second": 650.513, |
| "eval_steps_per_second": 6.505, |
| "step": 847768 |
| } |
| ], |
| "logging_steps": 100000, |
| "max_steps": 1059710, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.953539160600081e+19, |
| "train_batch_size": 100, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|