| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 18.0, | |
| "eval_steps": 500, | |
| "global_step": 18, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.9770612716674805, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3829, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_Macro F1": 0.3333333333333333, | |
| "eval_Macro Precision": 0.25, | |
| "eval_Macro Recall": 0.5, | |
| "eval_Micro F1": 0.5, | |
| "eval_Micro Precision": 0.5, | |
| "eval_Micro Recall": 0.5, | |
| "eval_Weighted F1": 0.3333333333333333, | |
| "eval_Weighted Precision": 0.25, | |
| "eval_Weighted Recall": 0.5, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.0583593845367432, | |
| "eval_runtime": 3.3884, | |
| "eval_samples_per_second": 1.181, | |
| "eval_steps_per_second": 0.295, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_Macro F1": 0.2, | |
| "eval_Macro Precision": 0.16666666666666666, | |
| "eval_Macro Recall": 0.25, | |
| "eval_Micro F1": 0.25, | |
| "eval_Micro Precision": 0.25, | |
| "eval_Micro Recall": 0.25, | |
| "eval_Weighted F1": 0.2, | |
| "eval_Weighted Precision": 0.16666666666666666, | |
| "eval_Weighted Recall": 0.25, | |
| "eval_accuracy": 0.25, | |
| "eval_loss": 1.287726879119873, | |
| "eval_runtime": 2.5164, | |
| "eval_samples_per_second": 1.59, | |
| "eval_steps_per_second": 0.397, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 2.298452138900757, | |
| "eval_runtime": 2.4805, | |
| "eval_samples_per_second": 1.613, | |
| "eval_steps_per_second": 0.403, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 2.4997897148132324, | |
| "eval_runtime": 2.3835, | |
| "eval_samples_per_second": 1.678, | |
| "eval_steps_per_second": 0.42, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 2.2229647636413574, | |
| "eval_runtime": 2.4413, | |
| "eval_samples_per_second": 1.638, | |
| "eval_steps_per_second": 0.41, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.9467241764068604, | |
| "eval_runtime": 2.3621, | |
| "eval_samples_per_second": 1.693, | |
| "eval_steps_per_second": 0.423, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.7201157808303833, | |
| "eval_runtime": 2.4064, | |
| "eval_samples_per_second": 1.662, | |
| "eval_steps_per_second": 0.416, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.3981068134307861, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.3628, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.5736441612243652, | |
| "eval_runtime": 2.5118, | |
| "eval_samples_per_second": 1.592, | |
| "eval_steps_per_second": 0.398, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.541231393814087, | |
| "eval_runtime": 2.5506, | |
| "eval_samples_per_second": 1.568, | |
| "eval_steps_per_second": 0.392, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.5483921766281128, | |
| "eval_runtime": 2.3993, | |
| "eval_samples_per_second": 1.667, | |
| "eval_steps_per_second": 0.417, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.5762369632720947, | |
| "eval_runtime": 2.3193, | |
| "eval_samples_per_second": 1.725, | |
| "eval_steps_per_second": 0.431, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.590673804283142, | |
| "eval_runtime": 2.4209, | |
| "eval_samples_per_second": 1.652, | |
| "eval_steps_per_second": 0.413, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.6231011152267456, | |
| "eval_runtime": 2.351, | |
| "eval_samples_per_second": 1.701, | |
| "eval_steps_per_second": 0.425, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.6461890935897827, | |
| "eval_runtime": 2.3174, | |
| "eval_samples_per_second": 1.726, | |
| "eval_steps_per_second": 0.432, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.6709625720977783, | |
| "eval_runtime": 2.6982, | |
| "eval_samples_per_second": 1.482, | |
| "eval_steps_per_second": 0.371, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.7213877439498901, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.3175, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.6882976293563843, | |
| "eval_runtime": 2.5084, | |
| "eval_samples_per_second": 1.595, | |
| "eval_steps_per_second": 0.399, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.6994037628173828, | |
| "eval_runtime": 2.6074, | |
| "eval_samples_per_second": 1.534, | |
| "eval_steps_per_second": 0.384, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_Macro F1": 0.0, | |
| "eval_Macro Precision": 0.0, | |
| "eval_Macro Recall": 0.0, | |
| "eval_Micro F1": 0.0, | |
| "eval_Micro Precision": 0.0, | |
| "eval_Micro Recall": 0.0, | |
| "eval_Weighted F1": 0.0, | |
| "eval_Weighted Precision": 0.0, | |
| "eval_Weighted Recall": 0.0, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 1.6985137462615967, | |
| "eval_runtime": 2.608, | |
| "eval_samples_per_second": 1.534, | |
| "eval_steps_per_second": 0.383, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "step": 18, | |
| "total_flos": 1.6740517517918208e+16, | |
| "train_loss": 0.3359870778189765, | |
| "train_runtime": 282.6155, | |
| "train_samples_per_second": 0.764, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 8, | |
| "max_steps": 18, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 18, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6740517517918208e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |