{ "best_metric": 0.0, "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-200", "epoch": 20.0, "global_step": 1160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 0.00029741379310344827, "loss": 2.2211, "step": 10 }, { "epoch": 0.34, "learning_rate": 0.0002948275862068965, "loss": 0.194, "step": 20 }, { "epoch": 0.52, "learning_rate": 0.0002922413793103448, "loss": 0.1031, "step": 30 }, { "epoch": 0.69, "learning_rate": 0.0002896551724137931, "loss": 0.121, "step": 40 }, { "epoch": 0.86, "learning_rate": 0.0002870689655172413, "loss": 0.1258, "step": 50 }, { "epoch": 1.03, "learning_rate": 0.0002844827586206896, "loss": 0.0988, "step": 60 }, { "epoch": 1.21, "learning_rate": 0.0002818965517241379, "loss": 0.0688, "step": 70 }, { "epoch": 1.38, "learning_rate": 0.0002793103448275862, "loss": 0.081, "step": 80 }, { "epoch": 1.55, "learning_rate": 0.0002767241379310345, "loss": 0.0719, "step": 90 }, { "epoch": 1.72, "learning_rate": 0.0002741379310344827, "loss": 0.0606, "step": 100 }, { "epoch": 1.72, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.157722607254982, "eval_runtime": 1.6787, "eval_samples_per_second": 121.522, "step": 100 }, { "epoch": 1.9, "learning_rate": 0.000271551724137931, "loss": 0.0726, "step": 110 }, { "epoch": 2.07, "learning_rate": 0.0002689655172413793, "loss": 0.0686, "step": 120 }, { "epoch": 2.24, "learning_rate": 0.0002663793103448276, "loss": 0.0445, "step": 130 }, { "epoch": 2.41, "learning_rate": 0.00026379310344827584, "loss": 0.0414, "step": 140 }, { "epoch": 2.59, "learning_rate": 0.00026120689655172413, "loss": 0.0431, "step": 150 }, { "epoch": 2.76, "learning_rate": 0.00025862068965517237, "loss": 0.0274, "step": 160 }, { "epoch": 2.93, "learning_rate": 0.00025603448275862066, "loss": 0.0399, "step": 170 }, { "epoch": 3.1, "learning_rate": 0.00025344827586206895, "loss": 0.032, "step": 180 }, { "epoch": 3.28, "learning_rate": 0.0002508620689655172, "loss": 0.0163, "step": 190 }, { "epoch": 3.45, "learning_rate": 0.0002482758620689655, "loss": 0.0196, "step": 200 }, { "epoch": 3.45, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.20416393876075745, "eval_runtime": 1.3232, "eval_samples_per_second": 154.176, "step": 200 }, { "epoch": 3.62, "learning_rate": 0.00024568965517241377, "loss": 0.0236, "step": 210 }, { "epoch": 3.79, "learning_rate": 0.00024310344827586203, "loss": 0.0354, "step": 220 }, { "epoch": 3.97, "learning_rate": 0.00024051724137931033, "loss": 0.0277, "step": 230 }, { "epoch": 4.14, "learning_rate": 0.00023793103448275862, "loss": 0.0194, "step": 240 }, { "epoch": 4.31, "learning_rate": 0.00023534482758620685, "loss": 0.0153, "step": 250 }, { "epoch": 4.48, "learning_rate": 0.00023275862068965515, "loss": 0.0129, "step": 260 }, { "epoch": 4.66, "learning_rate": 0.00023017241379310344, "loss": 0.0112, "step": 270 }, { "epoch": 4.83, "learning_rate": 0.0002275862068965517, "loss": 0.0083, "step": 280 }, { "epoch": 5.0, "learning_rate": 0.000225, "loss": 0.0167, "step": 290 }, { "epoch": 5.17, "learning_rate": 0.00022241379310344826, "loss": 0.0184, "step": 300 }, { "epoch": 5.17, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.2221006453037262, "eval_runtime": 1.4478, "eval_samples_per_second": 140.908, "step": 300 }, { "epoch": 5.34, "learning_rate": 0.00021982758620689652, "loss": 0.0053, "step": 310 }, { "epoch": 5.52, "learning_rate": 0.00021724137931034481, "loss": 0.0049, "step": 320 }, { "epoch": 5.69, "learning_rate": 0.0002146551724137931, "loss": 0.0096, "step": 330 }, { "epoch": 5.86, "learning_rate": 0.00021206896551724134, "loss": 0.008, "step": 340 }, { "epoch": 6.03, "learning_rate": 0.00020948275862068963, "loss": 0.0099, "step": 350 }, { "epoch": 6.21, "learning_rate": 0.00020689655172413793, "loss": 0.0128, "step": 360 }, { "epoch": 6.38, "learning_rate": 0.0002043103448275862, "loss": 0.0026, "step": 370 }, { "epoch": 6.55, "learning_rate": 0.00020172413793103448, "loss": 0.002, "step": 380 }, { "epoch": 6.72, "learning_rate": 0.00019913793103448275, "loss": 0.0062, "step": 390 }, { "epoch": 6.9, "learning_rate": 0.000196551724137931, "loss": 0.0099, "step": 400 }, { "epoch": 6.9, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.24928051233291626, "eval_runtime": 1.0803, "eval_samples_per_second": 188.836, "step": 400 }, { "epoch": 7.07, "learning_rate": 0.0001939655172413793, "loss": 0.0138, "step": 410 }, { "epoch": 7.24, "learning_rate": 0.0001913793103448276, "loss": 0.0016, "step": 420 }, { "epoch": 7.41, "learning_rate": 0.00018879310344827583, "loss": 0.0044, "step": 430 }, { "epoch": 7.59, "learning_rate": 0.00018620689655172412, "loss": 0.0039, "step": 440 }, { "epoch": 7.76, "learning_rate": 0.0001836206896551724, "loss": 0.0063, "step": 450 }, { "epoch": 7.93, "learning_rate": 0.00018103448275862068, "loss": 0.0076, "step": 460 }, { "epoch": 8.1, "learning_rate": 0.00017844827586206897, "loss": 0.005, "step": 470 }, { "epoch": 8.28, "learning_rate": 0.0001758620689655172, "loss": 0.0018, "step": 480 }, { "epoch": 8.45, "learning_rate": 0.0001732758620689655, "loss": 0.0033, "step": 490 }, { "epoch": 8.62, "learning_rate": 0.0001706896551724138, "loss": 0.0049, "step": 500 }, { "epoch": 8.62, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.26071295142173767, "eval_runtime": 1.6002, "eval_samples_per_second": 127.481, "step": 500 }, { "epoch": 8.79, "learning_rate": 0.00016810344827586203, "loss": 0.0027, "step": 510 }, { "epoch": 8.97, "learning_rate": 0.00016551724137931032, "loss": 0.0052, "step": 520 }, { "epoch": 9.14, "learning_rate": 0.0001629310344827586, "loss": 0.0003, "step": 530 }, { "epoch": 9.31, "learning_rate": 0.00016034482758620688, "loss": 0.0017, "step": 540 }, { "epoch": 9.48, "learning_rate": 0.00015775862068965517, "loss": 0.0016, "step": 550 }, { "epoch": 9.66, "learning_rate": 0.00015517241379310346, "loss": 0.0037, "step": 560 }, { "epoch": 9.83, "learning_rate": 0.0001525862068965517, "loss": 0.0008, "step": 570 }, { "epoch": 10.0, "learning_rate": 0.00015, "loss": 0.0065, "step": 580 }, { "epoch": 10.17, "learning_rate": 0.00014741379310344825, "loss": 0.0005, "step": 590 }, { "epoch": 10.34, "learning_rate": 0.00014482758620689654, "loss": 0.0012, "step": 600 }, { "epoch": 10.34, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.3229135274887085, "eval_runtime": 1.6354, "eval_samples_per_second": 124.74, "step": 600 }, { "epoch": 10.52, "learning_rate": 0.0001422413793103448, "loss": 0.0015, "step": 610 }, { "epoch": 10.69, "learning_rate": 0.0001396551724137931, "loss": 0.0031, "step": 620 }, { "epoch": 10.86, "learning_rate": 0.00013706896551724136, "loss": 0.009, "step": 630 }, { "epoch": 11.03, "learning_rate": 0.00013448275862068965, "loss": 0.0008, "step": 640 }, { "epoch": 11.21, "learning_rate": 0.00013189655172413792, "loss": 0.0002, "step": 650 }, { "epoch": 11.38, "learning_rate": 0.00012931034482758618, "loss": 0.0002, "step": 660 }, { "epoch": 11.55, "learning_rate": 0.00012672413793103447, "loss": 0.004, "step": 670 }, { "epoch": 11.72, "learning_rate": 0.00012413793103448274, "loss": 0.004, "step": 680 }, { "epoch": 11.9, "learning_rate": 0.00012155172413793102, "loss": 0.0005, "step": 690 }, { "epoch": 12.07, "learning_rate": 0.00011896551724137931, "loss": 0.003, "step": 700 }, { "epoch": 12.07, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.3359452784061432, "eval_runtime": 1.9034, "eval_samples_per_second": 107.177, "step": 700 }, { "epoch": 12.24, "learning_rate": 0.00011637931034482757, "loss": 0.0005, "step": 710 }, { "epoch": 12.41, "learning_rate": 0.00011379310344827585, "loss": 0.0002, "step": 720 }, { "epoch": 12.59, "learning_rate": 0.00011120689655172413, "loss": 0.0001, "step": 730 }, { "epoch": 12.76, "learning_rate": 0.00010862068965517241, "loss": 0.0024, "step": 740 }, { "epoch": 12.93, "learning_rate": 0.00010603448275862067, "loss": 0.0002, "step": 750 }, { "epoch": 13.1, "learning_rate": 0.00010344827586206896, "loss": 0.0003, "step": 760 }, { "epoch": 13.28, "learning_rate": 0.00010086206896551724, "loss": 0.0007, "step": 770 }, { "epoch": 13.45, "learning_rate": 9.82758620689655e-05, "loss": 0.0, "step": 780 }, { "epoch": 13.62, "learning_rate": 9.56896551724138e-05, "loss": 0.0001, "step": 790 }, { "epoch": 13.79, "learning_rate": 9.310344827586206e-05, "loss": 0.0003, "step": 800 }, { "epoch": 13.79, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.40144264698028564, "eval_runtime": 1.2399, "eval_samples_per_second": 164.525, "step": 800 }, { "epoch": 13.97, "learning_rate": 9.051724137931034e-05, "loss": 0.003, "step": 810 }, { "epoch": 14.14, "learning_rate": 8.79310344827586e-05, "loss": 0.0, "step": 820 }, { "epoch": 14.31, "learning_rate": 8.53448275862069e-05, "loss": 0.0004, "step": 830 }, { "epoch": 14.48, "learning_rate": 8.275862068965516e-05, "loss": 0.0001, "step": 840 }, { "epoch": 14.66, "learning_rate": 8.017241379310344e-05, "loss": 0.0001, "step": 850 }, { "epoch": 14.83, "learning_rate": 7.758620689655173e-05, "loss": 0.0008, "step": 860 }, { "epoch": 15.0, "learning_rate": 7.5e-05, "loss": 0.0001, "step": 870 }, { "epoch": 15.17, "learning_rate": 7.241379310344827e-05, "loss": 0.0001, "step": 880 }, { "epoch": 15.34, "learning_rate": 6.982758620689655e-05, "loss": 0.0018, "step": 890 }, { "epoch": 15.52, "learning_rate": 6.724137931034483e-05, "loss": 0.0001, "step": 900 }, { "epoch": 15.52, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.37248921394348145, "eval_runtime": 1.2617, "eval_samples_per_second": 161.691, "step": 900 }, { "epoch": 15.69, "learning_rate": 6.465517241379309e-05, "loss": 0.0, "step": 910 }, { "epoch": 15.86, "learning_rate": 6.206896551724137e-05, "loss": 0.0, "step": 920 }, { "epoch": 16.03, "learning_rate": 5.9482758620689654e-05, "loss": 0.0001, "step": 930 }, { "epoch": 16.21, "learning_rate": 5.6896551724137926e-05, "loss": 0.0007, "step": 940 }, { "epoch": 16.38, "learning_rate": 5.4310344827586204e-05, "loss": 0.0, "step": 950 }, { "epoch": 16.55, "learning_rate": 5.172413793103448e-05, "loss": 0.0, "step": 960 }, { "epoch": 16.72, "learning_rate": 4.913793103448275e-05, "loss": 0.0, "step": 970 }, { "epoch": 16.9, "learning_rate": 4.655172413793103e-05, "loss": 0.0001, "step": 980 }, { "epoch": 17.07, "learning_rate": 4.39655172413793e-05, "loss": 0.0, "step": 990 }, { "epoch": 17.24, "learning_rate": 4.137931034482758e-05, "loss": 0.0001, "step": 1000 }, { "epoch": 17.24, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.38377830386161804, "eval_runtime": 1.3412, "eval_samples_per_second": 152.099, "step": 1000 }, { "epoch": 17.41, "learning_rate": 3.8793103448275865e-05, "loss": 0.0, "step": 1010 }, { "epoch": 17.59, "learning_rate": 3.6206896551724136e-05, "loss": 0.0, "step": 1020 }, { "epoch": 17.76, "learning_rate": 3.3620689655172414e-05, "loss": 0.0001, "step": 1030 }, { "epoch": 17.93, "learning_rate": 3.1034482758620685e-05, "loss": 0.0004, "step": 1040 }, { "epoch": 18.1, "learning_rate": 2.8448275862068963e-05, "loss": 0.0, "step": 1050 }, { "epoch": 18.28, "learning_rate": 2.586206896551724e-05, "loss": 0.0001, "step": 1060 }, { "epoch": 18.45, "learning_rate": 2.3275862068965515e-05, "loss": 0.0002, "step": 1070 }, { "epoch": 18.62, "learning_rate": 2.068965517241379e-05, "loss": 0.0, "step": 1080 }, { "epoch": 18.79, "learning_rate": 1.8103448275862068e-05, "loss": 0.0, "step": 1090 }, { "epoch": 18.97, "learning_rate": 1.5517241379310342e-05, "loss": 0.0, "step": 1100 }, { "epoch": 18.97, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_f1": 0.0, "eval_loss": 0.4036322236061096, "eval_runtime": 1.3625, "eval_samples_per_second": 149.728, "step": 1100 }, { "epoch": 19.14, "learning_rate": 1.293103448275862e-05, "loss": 0.0, "step": 1110 }, { "epoch": 19.31, "learning_rate": 1.0344827586206895e-05, "loss": 0.0, "step": 1120 }, { "epoch": 19.48, "learning_rate": 7.758620689655171e-06, "loss": 0.0, "step": 1130 }, { "epoch": 19.66, "learning_rate": 5.1724137931034475e-06, "loss": 0.0, "step": 1140 }, { "epoch": 19.83, "learning_rate": 2.5862068965517237e-06, "loss": 0.0001, "step": 1150 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.0001, "step": 1160 }, { "epoch": 20.0, "step": 1160, "total_flos": 9652606012532736.0, "train_loss": 0.03386630766055595, "train_runtime": 583.9583, "train_samples_per_second": 125.625, "train_steps_per_second": 1.986 } ], "max_steps": 1160, "num_train_epochs": 20, "total_flos": 9652606012532736.0, "trial_name": null, "trial_params": null }