| { |
| "best_metric": 0.727709949016571, |
| "best_model_checkpoint": "email-classifier-large/checkpoint-27", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 27, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 21.054508209228516, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 1.2646, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 14.41618537902832, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 1.2543, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 11.813241004943848, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 1.1985, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 10.183070182800293, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 1.0548, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 10.102678298950195, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.9208, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 9.924948692321777, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 1.0067, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 15.725824356079102, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.7678, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 13.782758712768555, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.7465, |
| "step": 8 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 70.53299713134766, |
| "learning_rate": 5e-05, |
| "loss": 1.3437, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7058823529411765, |
| "eval_f1_macro": 0.27586206896551724, |
| "eval_f1_micro": 0.7058823529411765, |
| "eval_f1_weighted": 0.5841784989858012, |
| "eval_loss": 1.0482426881790161, |
| "eval_precision_macro": 0.23529411764705885, |
| "eval_precision_micro": 0.7058823529411765, |
| "eval_precision_weighted": 0.49826989619377166, |
| "eval_recall_macro": 0.3333333333333333, |
| "eval_recall_micro": 0.7058823529411765, |
| "eval_recall_weighted": 0.7058823529411765, |
| "eval_runtime": 9.661, |
| "eval_samples_per_second": 1.76, |
| "eval_steps_per_second": 0.207, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 44.6027717590332, |
| "learning_rate": 4.938271604938271e-05, |
| "loss": 1.4167, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 8.45134449005127, |
| "learning_rate": 4.876543209876544e-05, |
| "loss": 0.8386, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 462.1361389160156, |
| "learning_rate": 4.814814814814815e-05, |
| "loss": 0.9442, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 11.159658432006836, |
| "learning_rate": 4.7530864197530866e-05, |
| "loss": 0.5886, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 8.21164608001709, |
| "learning_rate": 4.691358024691358e-05, |
| "loss": 0.7998, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 11.218172073364258, |
| "learning_rate": 4.62962962962963e-05, |
| "loss": 0.8271, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 8.036384582519531, |
| "learning_rate": 4.567901234567901e-05, |
| "loss": 0.4155, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 11.879496574401855, |
| "learning_rate": 4.506172839506173e-05, |
| "loss": 1.0315, |
| "step": 17 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 33.30108642578125, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.6819, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7058823529411765, |
| "eval_f1_macro": 0.27586206896551724, |
| "eval_f1_micro": 0.7058823529411765, |
| "eval_f1_weighted": 0.5841784989858012, |
| "eval_loss": 0.7542034983634949, |
| "eval_precision_macro": 0.23529411764705885, |
| "eval_precision_micro": 0.7058823529411765, |
| "eval_precision_weighted": 0.49826989619377166, |
| "eval_recall_macro": 0.3333333333333333, |
| "eval_recall_micro": 0.7058823529411765, |
| "eval_recall_weighted": 0.7058823529411765, |
| "eval_runtime": 10.5136, |
| "eval_samples_per_second": 1.617, |
| "eval_steps_per_second": 0.19, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 70.35055541992188, |
| "learning_rate": 4.3827160493827164e-05, |
| "loss": 0.9715, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 18.602529525756836, |
| "learning_rate": 4.3209876543209875e-05, |
| "loss": 1.0681, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 11.670511245727539, |
| "learning_rate": 4.259259259259259e-05, |
| "loss": 0.9926, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 11.727529525756836, |
| "learning_rate": 4.197530864197531e-05, |
| "loss": 0.4386, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 24.286405563354492, |
| "learning_rate": 4.135802469135803e-05, |
| "loss": 0.8252, |
| "step": 23 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 10.522771835327148, |
| "learning_rate": 4.074074074074074e-05, |
| "loss": 0.6967, |
| "step": 24 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 10.512320518493652, |
| "learning_rate": 4.012345679012346e-05, |
| "loss": 0.6406, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 13.10132122039795, |
| "learning_rate": 3.950617283950617e-05, |
| "loss": 0.7791, |
| "step": 26 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 32.59843826293945, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.5143, |
| "step": 27 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7647058823529411, |
| "eval_f1_macro": 0.5042735042735043, |
| "eval_f1_micro": 0.7647058823529411, |
| "eval_f1_weighted": 0.7149321266968326, |
| "eval_loss": 0.727709949016571, |
| "eval_precision_macro": 0.48412698412698413, |
| "eval_precision_micro": 0.7647058823529411, |
| "eval_precision_weighted": 0.6722689075630253, |
| "eval_recall_macro": 0.5277777777777778, |
| "eval_recall_micro": 0.7647058823529411, |
| "eval_recall_weighted": 0.7647058823529411, |
| "eval_runtime": 9.5294, |
| "eval_samples_per_second": 1.784, |
| "eval_steps_per_second": 0.21, |
| "step": 27 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 90, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 47528660118528.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|