| { |
| "best_global_step": 1575, |
| "best_metric": 0.8452020201894255, |
| "best_model_checkpoint": "outputs/runs/sphobert/checkpoint-1575", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1575, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.9968253968253968, |
| "grad_norm": 4.4746994972229, |
| "learning_rate": 4.999432333543028e-06, |
| "loss": 0.4633, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8233950595593661, |
| "eval_f1": 0.7992430278973932, |
| "eval_loss": 0.4047755002975464, |
| "eval_precision": 0.8521274532019316, |
| "eval_recall": 0.7851048448779885, |
| "eval_runtime": 1.5348, |
| "eval_samples_per_second": 847.654, |
| "eval_steps_per_second": 26.713, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.9936507936507937, |
| "grad_norm": 2.1845901012420654, |
| "learning_rate": 4.996525669931999e-06, |
| "loss": 0.3461, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8420534804455533, |
| "eval_f1": 0.825405083598618, |
| "eval_loss": 0.34594863653182983, |
| "eval_precision": 0.8560844508304751, |
| "eval_recall": 0.813301930329429, |
| "eval_runtime": 1.5372, |
| "eval_samples_per_second": 846.329, |
| "eval_steps_per_second": 26.671, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.9904761904761905, |
| "grad_norm": 2.120676279067993, |
| "learning_rate": 4.991155236893945e-06, |
| "loss": 0.3145, |
| "step": 942 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8524719772304016, |
| "eval_f1": 0.8422043732271984, |
| "eval_loss": 0.34118857979774475, |
| "eval_precision": 0.851065665145718, |
| "eval_recall": 0.8364035188605563, |
| "eval_runtime": 1.5397, |
| "eval_samples_per_second": 844.949, |
| "eval_steps_per_second": 26.628, |
| "step": 945 |
| }, |
| { |
| "epoch": 3.9873015873015873, |
| "grad_norm": 5.287633419036865, |
| "learning_rate": 4.983326334397891e-06, |
| "loss": 0.301, |
| "step": 1256 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8538423697248674, |
| "eval_f1": 0.8395289220361154, |
| "eval_loss": 0.35064205527305603, |
| "eval_precision": 0.8658287567394525, |
| "eval_recall": 0.8279657242760721, |
| "eval_runtime": 1.5395, |
| "eval_samples_per_second": 845.053, |
| "eval_steps_per_second": 26.631, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.984126984126984, |
| "grad_norm": 6.363493919372559, |
| "learning_rate": 4.97304668862541e-06, |
| "loss": 0.2943, |
| "step": 1570 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8534207104958009, |
| "eval_f1": 0.8452020201894255, |
| "eval_loss": 0.32883313298225403, |
| "eval_precision": 0.8478162573880959, |
| "eval_recall": 0.8429894034548278, |
| "eval_runtime": 1.5385, |
| "eval_samples_per_second": 845.638, |
| "eval_steps_per_second": 26.65, |
| "step": 1575 |
| } |
| ], |
| "logging_steps": 314, |
| "max_steps": 31500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1645189704599040.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|