| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9943289224952743, | |
| "global_step": 1188, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-06, | |
| "loss": 1.6488, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6585, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.6373, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5933, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.4982, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3e-05, | |
| "loss": 1.3297, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.1515, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9944, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.8887, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8275, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.954044117647059e-05, | |
| "loss": 0.8152, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.908088235294118e-05, | |
| "loss": 0.7991, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.8621323529411765e-05, | |
| "loss": 0.7932, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.816176470588236e-05, | |
| "loss": 0.7928, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.7702205882352946e-05, | |
| "loss": 0.7968, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.7242647058823534e-05, | |
| "loss": 0.7744, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.678308823529412e-05, | |
| "loss": 0.7791, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.632352941176471e-05, | |
| "loss": 0.7736, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.5863970588235296e-05, | |
| "loss": 0.766, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.5404411764705883e-05, | |
| "loss": 0.7717, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.494485294117647e-05, | |
| "loss": 0.7638, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.448529411764706e-05, | |
| "loss": 0.768, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.4025735294117646e-05, | |
| "loss": 0.7573, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.356617647058824e-05, | |
| "loss": 0.7633, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.310661764705883e-05, | |
| "loss": 0.7562, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.2647058823529415e-05, | |
| "loss": 0.7576, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.21875e-05, | |
| "loss": 0.7516, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.172794117647059e-05, | |
| "loss": 0.7501, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.126838235294118e-05, | |
| "loss": 0.7601, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.0808823529411765e-05, | |
| "loss": 0.7502, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.034926470588236e-05, | |
| "loss": 0.7583, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.9889705882352946e-05, | |
| "loss": 0.756, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.943014705882353e-05, | |
| "loss": 0.7511, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.897058823529412e-05, | |
| "loss": 0.7434, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.851102941176471e-05, | |
| "loss": 0.7586, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.8051470588235296e-05, | |
| "loss": 0.7486, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.759191176470588e-05, | |
| "loss": 0.7499, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.713235294117647e-05, | |
| "loss": 0.7516, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.667279411764706e-05, | |
| "loss": 0.7451, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.6213235294117646e-05, | |
| "loss": 0.7491, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.575367647058824e-05, | |
| "loss": 0.7484, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 0.7423, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.4834558823529415e-05, | |
| "loss": 0.7458, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.4375e-05, | |
| "loss": 0.7428, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.391544117647059e-05, | |
| "loss": 0.7423, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.345588235294118e-05, | |
| "loss": 0.7396, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.2996323529411764e-05, | |
| "loss": 0.7426, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.253676470588236e-05, | |
| "loss": 0.7421, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.2077205882352946e-05, | |
| "loss": 0.7377, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.161764705882353e-05, | |
| "loss": 0.7358, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.115808823529412e-05, | |
| "loss": 0.7346, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.069852941176471e-05, | |
| "loss": 0.7335, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.0238970588235292e-05, | |
| "loss": 0.7242, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.9779411764705883e-05, | |
| "loss": 0.7295, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9319852941176474e-05, | |
| "loss": 0.7294, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.8860294117647058e-05, | |
| "loss": 0.7292, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.840073529411765e-05, | |
| "loss": 0.7238, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.7941176470588236e-05, | |
| "loss": 0.7383, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.7481617647058827e-05, | |
| "loss": 0.7363, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.702205882352941e-05, | |
| "loss": 0.7269, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.6562500000000002e-05, | |
| "loss": 0.7408, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6102941176470593e-05, | |
| "loss": 0.7341, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.5643382352941177e-05, | |
| "loss": 0.7372, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5183823529411764e-05, | |
| "loss": 0.7333, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.4724264705882355e-05, | |
| "loss": 0.7261, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4264705882352942e-05, | |
| "loss": 0.7305, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.380514705882353e-05, | |
| "loss": 0.7339, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.334558823529412e-05, | |
| "loss": 0.7351, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.2886029411764705e-05, | |
| "loss": 0.7307, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.2426470588235296e-05, | |
| "loss": 0.7354, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.1966911764705883e-05, | |
| "loss": 0.7254, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.1507352941176474e-05, | |
| "loss": 0.7342, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.104779411764706e-05, | |
| "loss": 0.7236, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.058823529411765e-05, | |
| "loss": 0.7256, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0128676470588236e-05, | |
| "loss": 0.7299, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9669117647058824e-05, | |
| "loss": 0.7185, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9209558823529414e-05, | |
| "loss": 0.7333, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.7229, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.829044117647059e-05, | |
| "loss": 0.7239, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.7830882352941177e-05, | |
| "loss": 0.7271, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.7371323529411764e-05, | |
| "loss": 0.7201, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.6911764705882355e-05, | |
| "loss": 0.7228, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6452205882352942e-05, | |
| "loss": 0.7337, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.599264705882353e-05, | |
| "loss": 0.7279, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5533088235294117e-05, | |
| "loss": 0.7283, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5073529411764706e-05, | |
| "loss": 0.714, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.4613970588235295e-05, | |
| "loss": 0.7185, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4154411764705883e-05, | |
| "loss": 0.7216, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.3694852941176472e-05, | |
| "loss": 0.7239, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 0.7309, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.2775735294117647e-05, | |
| "loss": 0.727, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2316176470588236e-05, | |
| "loss": 0.7165, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1856617647058823e-05, | |
| "loss": 0.723, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1397058823529412e-05, | |
| "loss": 0.7166, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.09375e-05, | |
| "loss": 0.7178, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0477941176470589e-05, | |
| "loss": 0.7094, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0018382352941178e-05, | |
| "loss": 0.7229, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.558823529411764e-06, | |
| "loss": 0.7116, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.099264705882353e-06, | |
| "loss": 0.7187, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.639705882352942e-06, | |
| "loss": 0.7103, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.18014705882353e-06, | |
| "loss": 0.7241, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.720588235294119e-06, | |
| "loss": 0.7336, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.261029411764707e-06, | |
| "loss": 0.7168, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.8014705882352935e-06, | |
| "loss": 0.7242, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.341911764705883e-06, | |
| "loss": 0.7199, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.725, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.422794117647059e-06, | |
| "loss": 0.7252, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.963235294117647e-06, | |
| "loss": 0.7183, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.503676470588236e-06, | |
| "loss": 0.7172, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.044117647058824e-06, | |
| "loss": 0.7195, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.584558823529412e-06, | |
| "loss": 0.7155, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.7209, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.6654411764705884e-06, | |
| "loss": 0.7112, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.2058823529411767e-06, | |
| "loss": 0.7105, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7463235294117648e-06, | |
| "loss": 0.7217, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.286764705882353e-06, | |
| "loss": 0.7183, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.272058823529412e-07, | |
| "loss": 0.7143, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.6764705882352943e-07, | |
| "loss": 0.7126, | |
| "step": 1180 | |
| } | |
| ], | |
| "max_steps": 1188, | |
| "num_train_epochs": 3, | |
| "total_flos": 6.076984402892554e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |