{ "best_global_step": 60, "best_metric": 0.5941939949989319, "best_model_checkpoint": "tiny_bert_km_5_v2_mrpc/checkpoint-60", "epoch": 9.0, "eval_steps": 500, "global_step": 135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.790999710559845, "learning_rate": 4.9e-05, "loss": 0.6277, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.6887254901960784, "eval_combined_score": 0.7491603462974394, "eval_f1": 0.8095952023988006, "eval_loss": 0.6063878536224365, "eval_runtime": 0.1348, "eval_samples_per_second": 3027.676, "eval_steps_per_second": 14.842, "step": 15 }, { "epoch": 2.0, "grad_norm": 1.333164095878601, "learning_rate": 4.8e-05, "loss": 0.5964, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.6764705882352942, "eval_combined_score": 0.73109243697479, "eval_f1": 0.7857142857142857, "eval_loss": 0.600712776184082, "eval_runtime": 0.1473, "eval_samples_per_second": 2770.333, "eval_steps_per_second": 13.58, "step": 30 }, { "epoch": 3.0, "grad_norm": 1.0239461660385132, "learning_rate": 4.7e-05, "loss": 0.5743, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.6985294117647058, "eval_combined_score": 0.7570608078313784, "eval_f1": 0.815592203898051, "eval_loss": 0.6121370792388916, "eval_runtime": 0.1391, "eval_samples_per_second": 2933.075, "eval_steps_per_second": 14.378, "step": 45 }, { "epoch": 4.0, "grad_norm": 1.6916087865829468, "learning_rate": 4.600000000000001e-05, "loss": 0.5591, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.732774708868319, "eval_f1": 0.7817258883248731, "eval_loss": 0.5941939949989319, "eval_runtime": 0.1743, "eval_samples_per_second": 2341.276, "eval_steps_per_second": 11.477, "step": 60 }, { "epoch": 5.0, "grad_norm": 2.1864917278289795, "learning_rate": 4.5e-05, "loss": 0.5162, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.6813725490196079, "eval_combined_score": 0.7358475648323846, "eval_f1": 0.7903225806451613, "eval_loss": 0.6068432331085205, "eval_runtime": 0.1684, "eval_samples_per_second": 2422.792, "eval_steps_per_second": 11.876, "step": 75 }, { "epoch": 6.0, "grad_norm": 2.5886662006378174, "learning_rate": 4.4000000000000006e-05, "loss": 0.4733, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.6544117647058824, "eval_combined_score": 0.6964081086980247, "eval_f1": 0.738404452690167, "eval_loss": 0.6536080837249756, "eval_runtime": 0.1316, "eval_samples_per_second": 3099.593, "eval_steps_per_second": 15.194, "step": 90 }, { "epoch": 7.0, "grad_norm": 3.5240910053253174, "learning_rate": 4.3e-05, "loss": 0.4261, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.7058823529411765, "eval_combined_score": 0.7539312754804892, "eval_f1": 0.801980198019802, "eval_loss": 0.6742849946022034, "eval_runtime": 0.1506, "eval_samples_per_second": 2708.667, "eval_steps_per_second": 13.278, "step": 105 }, { "epoch": 8.0, "grad_norm": 4.345057010650635, "learning_rate": 4.2e-05, "loss": 0.3647, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.6813725490196079, "eval_combined_score": 0.7293849046467902, "eval_f1": 0.7773972602739726, "eval_loss": 0.7378984093666077, "eval_runtime": 0.1633, "eval_samples_per_second": 2498.228, "eval_steps_per_second": 12.246, "step": 120 }, { "epoch": 9.0, "grad_norm": 6.365061283111572, "learning_rate": 4.1e-05, "loss": 0.2976, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.6299019607843137, "eval_combined_score": 0.679888368585359, "eval_f1": 0.7298747763864043, "eval_loss": 0.8383299112319946, "eval_runtime": 0.1418, "eval_samples_per_second": 2876.476, "eval_steps_per_second": 14.1, "step": 135 }, { "epoch": 9.0, "step": 135, "total_flos": 865692760633344.0, "train_loss": 0.4928195600156431, "train_runtime": 26.9972, "train_samples_per_second": 6793.288, "train_steps_per_second": 27.781 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 865692760633344.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }