{ "best_metric": 0.5949084758758545, "best_model_checkpoint": "tiny_bert_rand_50_v1_mrpc/checkpoint-45", "epoch": 8.0, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.8691033124923706, "learning_rate": 4.9e-05, "loss": 0.6286, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.6862745098039216, "eval_combined_score": 0.7440660474716203, "eval_f1": 0.8018575851393189, "eval_loss": 0.604465901851654, "eval_runtime": 0.1601, "eval_samples_per_second": 2547.96, "eval_steps_per_second": 12.49, "step": 15 }, { "epoch": 2.0, "grad_norm": 1.3388460874557495, "learning_rate": 4.8e-05, "loss": 0.5948, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.6985294117647058, "eval_combined_score": 0.7536192937517152, "eval_f1": 0.8087091757387247, "eval_loss": 0.5950304865837097, "eval_runtime": 0.1669, "eval_samples_per_second": 2444.764, "eval_steps_per_second": 11.984, "step": 30 }, { "epoch": 3.0, "grad_norm": 1.4051553010940552, "learning_rate": 4.7e-05, "loss": 0.556, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.6813725490196079, "eval_combined_score": 0.7375116713352008, "eval_f1": 0.7936507936507936, "eval_loss": 0.5949084758758545, "eval_runtime": 0.1984, "eval_samples_per_second": 2056.454, "eval_steps_per_second": 10.081, "step": 45 }, { "epoch": 4.0, "grad_norm": 1.9397245645523071, "learning_rate": 4.600000000000001e-05, "loss": 0.5107, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7107843137254902, "eval_combined_score": 0.753316032295271, "eval_f1": 0.7958477508650519, "eval_loss": 0.6382691860198975, "eval_runtime": 0.1676, "eval_samples_per_second": 2433.718, "eval_steps_per_second": 11.93, "step": 60 }, { "epoch": 5.0, "grad_norm": 3.463055372238159, "learning_rate": 4.5e-05, "loss": 0.4193, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.6495098039215687, "eval_combined_score": 0.6930790271909869, "eval_f1": 0.7366482504604052, "eval_loss": 0.6820002794265747, "eval_runtime": 0.1579, "eval_samples_per_second": 2584.681, "eval_steps_per_second": 12.67, "step": 75 }, { "epoch": 6.0, "grad_norm": 4.737268924713135, "learning_rate": 4.4000000000000006e-05, "loss": 0.3479, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.7034313725490197, "eval_combined_score": 0.7517156862745098, "eval_f1": 0.8, "eval_loss": 0.8077418208122253, "eval_runtime": 0.1937, "eval_samples_per_second": 2105.958, "eval_steps_per_second": 10.323, "step": 90 }, { "epoch": 7.0, "grad_norm": 4.261862277984619, "learning_rate": 4.3e-05, "loss": 0.2647, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7316553544494722, "eval_f1": 0.7794871794871795, "eval_loss": 0.8842233419418335, "eval_runtime": 0.1783, "eval_samples_per_second": 2288.185, "eval_steps_per_second": 11.217, "step": 105 }, { "epoch": 8.0, "grad_norm": 5.626375198364258, "learning_rate": 4.2e-05, "loss": 0.1929, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.6813725490196079, "eval_combined_score": 0.7323529411764707, "eval_f1": 0.7833333333333333, "eval_loss": 1.042723536491394, "eval_runtime": 0.1576, "eval_samples_per_second": 2589.547, "eval_steps_per_second": 12.694, "step": 120 }, { "epoch": 8.0, "step": 120, "total_flos": 769504676118528.0, "train_loss": 0.43935337861378987, "train_runtime": 27.1726, "train_samples_per_second": 6749.449, "train_steps_per_second": 27.601 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 769504676118528.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }