| { |
| "best_global_step": 486, |
| "best_metric": 0.8587628865979381, |
| "best_model_checkpoint": "./results/checkpoint-486", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1215, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.411522633744856, |
| "grad_norm": 3.3757410049438477, |
| "learning_rate": 9.185185185185186e-06, |
| "loss": 0.8845, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.823045267489712, |
| "grad_norm": 3.417144536972046, |
| "learning_rate": 8.362139917695474e-06, |
| "loss": 0.5844, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8237113402061855, |
| "eval_f1": 0.8244991682852733, |
| "eval_loss": 0.44459813833236694, |
| "eval_runtime": 2.448, |
| "eval_samples_per_second": 396.246, |
| "eval_steps_per_second": 24.919, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.2345679012345678, |
| "grad_norm": 5.602197647094727, |
| "learning_rate": 7.5390946502057615e-06, |
| "loss": 0.4285, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.646090534979424, |
| "grad_norm": 6.984129905700684, |
| "learning_rate": 6.71604938271605e-06, |
| "loss": 0.3645, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8587628865979381, |
| "eval_f1": 0.8582800447235293, |
| "eval_loss": 0.3806818127632141, |
| "eval_runtime": 2.4825, |
| "eval_samples_per_second": 390.733, |
| "eval_steps_per_second": 24.572, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.05761316872428, |
| "grad_norm": 8.748188018798828, |
| "learning_rate": 5.893004115226338e-06, |
| "loss": 0.3403, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.4691358024691357, |
| "grad_norm": 7.327649116516113, |
| "learning_rate": 5.069958847736626e-06, |
| "loss": 0.2679, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.8806584362139915, |
| "grad_norm": 11.227555274963379, |
| "learning_rate": 4.246913580246914e-06, |
| "loss": 0.2677, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8577319587628865, |
| "eval_f1": 0.8591340461713324, |
| "eval_loss": 0.3954542577266693, |
| "eval_runtime": 2.4954, |
| "eval_samples_per_second": 388.715, |
| "eval_steps_per_second": 24.445, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.292181069958848, |
| "grad_norm": 10.80851936340332, |
| "learning_rate": 3.423868312757202e-06, |
| "loss": 0.2197, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.7037037037037037, |
| "grad_norm": 8.145468711853027, |
| "learning_rate": 2.60082304526749e-06, |
| "loss": 0.2074, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8577319587628865, |
| "eval_f1": 0.8574733463132496, |
| "eval_loss": 0.4115142524242401, |
| "eval_runtime": 2.4956, |
| "eval_samples_per_second": 388.69, |
| "eval_steps_per_second": 24.443, |
| "step": 972 |
| }, |
| { |
| "epoch": 4.11522633744856, |
| "grad_norm": 2.062761068344116, |
| "learning_rate": 1.777777777777778e-06, |
| "loss": 0.1788, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.526748971193416, |
| "grad_norm": 8.031317710876465, |
| "learning_rate": 9.54732510288066e-07, |
| "loss": 0.1802, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.938271604938271, |
| "grad_norm": 9.72697925567627, |
| "learning_rate": 1.3168724279835392e-07, |
| "loss": 0.1668, |
| "step": 1200 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.856701030927835, |
| "eval_f1": 0.8577030087970102, |
| "eval_loss": 0.4302367568016052, |
| "eval_runtime": 2.5068, |
| "eval_samples_per_second": 386.947, |
| "eval_steps_per_second": 24.334, |
| "step": 1215 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 1215, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 749706402995280.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|