| { | |
| "best_metric": 0.7822743590634694, | |
| "best_model_checkpoint": "./checkpoints/clip-stage3pa-1024/checkpoint-3810", | |
| "epoch": 0.7486430844095078, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.727, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6697, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.6164, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6e-06, | |
| "loss": 0.5883, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.5711, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9e-06, | |
| "loss": 0.5863, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.05e-05, | |
| "loss": 0.5441, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.5303, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "loss": 0.5436, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.5507, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.65e-05, | |
| "loss": 0.5534, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.5384, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.95e-05, | |
| "loss": 0.5533, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.5475, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.5482, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.5099, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.55e-05, | |
| "loss": 0.539, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.5176, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 0.526, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3e-05, | |
| "loss": 0.5276, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.992105263157895e-05, | |
| "loss": 0.5077, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.9842105263157894e-05, | |
| "loss": 0.5375, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.9763157894736842e-05, | |
| "loss": 0.5411, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.968421052631579e-05, | |
| "loss": 0.4877, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9605263157894735e-05, | |
| "loss": 0.5347, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9526315789473684e-05, | |
| "loss": 0.4839, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9447368421052635e-05, | |
| "loss": 0.4936, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.936842105263158e-05, | |
| "loss": 0.4849, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.928947368421053e-05, | |
| "loss": 0.4696, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9210526315789474e-05, | |
| "loss": 0.4872, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9131578947368422e-05, | |
| "loss": 0.4636, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.905263157894737e-05, | |
| "loss": 0.4728, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.8973684210526315e-05, | |
| "loss": 0.4743, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.8894736842105263e-05, | |
| "loss": 0.4774, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.8815789473684212e-05, | |
| "loss": 0.4372, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.8736842105263157e-05, | |
| "loss": 0.4515, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.8657894736842105e-05, | |
| "loss": 0.422, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.8578947368421053e-05, | |
| "loss": 0.4505, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 0.4383, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 2.8421052631578946e-05, | |
| "loss": 0.4601, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.8342105263157898e-05, | |
| "loss": 0.4327, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.8263157894736843e-05, | |
| "loss": 0.4182, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.818421052631579e-05, | |
| "loss": 0.4421, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.810526315789474e-05, | |
| "loss": 0.427, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.8026315789473685e-05, | |
| "loss": 0.4397, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.7947368421052633e-05, | |
| "loss": 0.4249, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.786842105263158e-05, | |
| "loss": 0.432, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.7789473684210526e-05, | |
| "loss": 0.4264, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.7710526315789474e-05, | |
| "loss": 0.4325, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.7631578947368423e-05, | |
| "loss": 0.4204, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.7552631578947368e-05, | |
| "loss": 0.4246, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.7473684210526316e-05, | |
| "loss": 0.429, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.739473684210526e-05, | |
| "loss": 0.4003, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.7315789473684213e-05, | |
| "loss": 0.3946, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.723684210526316e-05, | |
| "loss": 0.4035, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.7157894736842106e-05, | |
| "loss": 0.384, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.7078947368421054e-05, | |
| "loss": 0.4095, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.386, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.6921052631578947e-05, | |
| "loss": 0.374, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.6842105263157896e-05, | |
| "loss": 0.4155, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.6763157894736844e-05, | |
| "loss": 0.403, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.668421052631579e-05, | |
| "loss": 0.3722, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.6605263157894737e-05, | |
| "loss": 0.39, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.6526315789473685e-05, | |
| "loss": 0.4041, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.644736842105263e-05, | |
| "loss": 0.4011, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.636842105263158e-05, | |
| "loss": 0.3887, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.6289473684210527e-05, | |
| "loss": 0.3807, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.6210526315789475e-05, | |
| "loss": 0.3762, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.6131578947368424e-05, | |
| "loss": 0.3831, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.605263157894737e-05, | |
| "loss": 0.3408, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.5973684210526317e-05, | |
| "loss": 0.362, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.5894736842105265e-05, | |
| "loss": 0.3606, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.581578947368421e-05, | |
| "loss": 0.3475, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.5736842105263158e-05, | |
| "loss": 0.3659, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.5657894736842107e-05, | |
| "loss": 0.3391, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.557894736842105e-05, | |
| "loss": 0.3744, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.55e-05, | |
| "loss": 0.3609, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5421052631578948e-05, | |
| "loss": 0.3525, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5342105263157893e-05, | |
| "loss": 0.3527, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.526315789473684e-05, | |
| "loss": 0.3566, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.518421052631579e-05, | |
| "loss": 0.3327, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.5105263157894738e-05, | |
| "loss": 0.3789, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.5026315789473686e-05, | |
| "loss": 0.3415, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.4947368421052635e-05, | |
| "loss": 0.3342, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.486842105263158e-05, | |
| "loss": 0.3415, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.4789473684210528e-05, | |
| "loss": 0.3562, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.4710526315789476e-05, | |
| "loss": 0.3405, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.463157894736842e-05, | |
| "loss": 0.368, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.455263157894737e-05, | |
| "loss": 0.3547, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4473684210526318e-05, | |
| "loss": 0.3508, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4394736842105262e-05, | |
| "loss": 0.34, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.431578947368421e-05, | |
| "loss": 0.3256, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4236842105263156e-05, | |
| "loss": 0.3515, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.4157894736842104e-05, | |
| "loss": 0.316, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.4078947368421056e-05, | |
| "loss": 0.3382, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.3314, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.392105263157895e-05, | |
| "loss": 0.3285, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.3842105263157897e-05, | |
| "loss": 0.3472, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.3763157894736842e-05, | |
| "loss": 0.3183, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 0.332, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.360526315789474e-05, | |
| "loss": 0.314, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.3526315789473684e-05, | |
| "loss": 0.3127, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.3447368421052632e-05, | |
| "loss": 0.3483, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.336842105263158e-05, | |
| "loss": 0.3408, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.3289473684210525e-05, | |
| "loss": 0.3406, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.3210526315789473e-05, | |
| "loss": 0.3205, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.3131578947368422e-05, | |
| "loss": 0.3135, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.3052631578947367e-05, | |
| "loss": 0.3107, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.297368421052632e-05, | |
| "loss": 0.3079, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.2894736842105263e-05, | |
| "loss": 0.3141, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.281578947368421e-05, | |
| "loss": 0.3186, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.273684210526316e-05, | |
| "loss": 0.3254, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.2657894736842105e-05, | |
| "loss": 0.3219, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.2578947368421053e-05, | |
| "loss": 0.3106, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.3042, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.2421052631578946e-05, | |
| "loss": 0.3154, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.2342105263157895e-05, | |
| "loss": 0.3147, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.2263157894736843e-05, | |
| "loss": 0.2916, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.2184210526315788e-05, | |
| "loss": 0.3159, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.2105263157894736e-05, | |
| "loss": 0.3084, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.2026315789473684e-05, | |
| "loss": 0.3071, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.1947368421052633e-05, | |
| "loss": 0.312, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.186842105263158e-05, | |
| "loss": 0.3018, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.178947368421053e-05, | |
| "loss": 0.3195, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.1710526315789474e-05, | |
| "loss": 0.3115, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.1631578947368423e-05, | |
| "loss": 0.2827, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.155263157894737e-05, | |
| "loss": 0.3168, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.1473684210526316e-05, | |
| "loss": 0.3004, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.1394736842105264e-05, | |
| "loss": 0.3084, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.1315789473684212e-05, | |
| "loss": 0.3065, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.1236842105263157e-05, | |
| "loss": 0.318, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.1157894736842106e-05, | |
| "loss": 0.3035, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.107894736842105e-05, | |
| "loss": 0.2849, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.2861, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.0921052631578947e-05, | |
| "loss": 0.2852, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.0842105263157895e-05, | |
| "loss": 0.3204, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.0763157894736844e-05, | |
| "loss": 0.31, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.0684210526315792e-05, | |
| "loss": 0.2854, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.0605263157894737e-05, | |
| "loss": 0.2855, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.0526315789473685e-05, | |
| "loss": 0.3003, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.0447368421052634e-05, | |
| "loss": 0.2889, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.036842105263158e-05, | |
| "loss": 0.2834, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.0289473684210527e-05, | |
| "loss": 0.2696, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.0210526315789475e-05, | |
| "loss": 0.289, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.013157894736842e-05, | |
| "loss": 0.2851, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.0052631578947368e-05, | |
| "loss": 0.2903, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9973684210526317e-05, | |
| "loss": 0.2742, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.989473684210526e-05, | |
| "loss": 0.2775, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9815789473684213e-05, | |
| "loss": 0.2783, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 0.2814, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9657894736842106e-05, | |
| "loss": 0.2931, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9578947368421055e-05, | |
| "loss": 0.2811, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.95e-05, | |
| "loss": 0.29, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9421052631578948e-05, | |
| "loss": 0.2925, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9342105263157896e-05, | |
| "loss": 0.2658, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.926315789473684e-05, | |
| "loss": 0.2959, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.918421052631579e-05, | |
| "loss": 0.2695, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.9105263157894738e-05, | |
| "loss": 0.2902, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.9026315789473683e-05, | |
| "loss": 0.2597, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.894736842105263e-05, | |
| "loss": 0.2745, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.886842105263158e-05, | |
| "loss": 0.2775, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.8789473684210524e-05, | |
| "loss": 0.2707, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.8710526315789476e-05, | |
| "loss": 0.2699, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.8631578947368424e-05, | |
| "loss": 0.2762, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.855263157894737e-05, | |
| "loss": 0.2539, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.8473684210526317e-05, | |
| "loss": 0.273, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.8394736842105266e-05, | |
| "loss": 0.244, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.831578947368421e-05, | |
| "loss": 0.2632, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.823684210526316e-05, | |
| "loss": 0.2756, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.8157894736842107e-05, | |
| "loss": 0.2555, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.8078947368421052e-05, | |
| "loss": 0.2863, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.2516, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.7921052631578945e-05, | |
| "loss": 0.2467, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.7842105263157894e-05, | |
| "loss": 0.2591, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.7763157894736842e-05, | |
| "loss": 0.2641, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.7684210526315787e-05, | |
| "loss": 0.2658, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.760526315789474e-05, | |
| "loss": 0.252, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.7526315789473687e-05, | |
| "loss": 0.2654, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.7447368421052632e-05, | |
| "loss": 0.2723, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.736842105263158e-05, | |
| "loss": 0.2716, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.728947368421053e-05, | |
| "loss": 0.2741, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.7210526315789473e-05, | |
| "loss": 0.2708, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.713157894736842e-05, | |
| "loss": 0.2484, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.705263157894737e-05, | |
| "loss": 0.2615, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.6973684210526315e-05, | |
| "loss": 0.2547, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.6894736842105263e-05, | |
| "loss": 0.264, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.681578947368421e-05, | |
| "loss": 0.2514, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.6736842105263156e-05, | |
| "loss": 0.2385, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.6657894736842105e-05, | |
| "loss": 0.2539, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.6578947368421053e-05, | |
| "loss": 0.24, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.65e-05, | |
| "loss": 0.2509, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.642105263157895e-05, | |
| "loss": 0.2573, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.6342105263157894e-05, | |
| "loss": 0.2427, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.6263157894736843e-05, | |
| "loss": 0.2572, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.618421052631579e-05, | |
| "loss": 0.2522, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.6105263157894736e-05, | |
| "loss": 0.2553, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.6026315789473684e-05, | |
| "loss": 0.2436, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.5947368421052633e-05, | |
| "loss": 0.2572, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.5868421052631578e-05, | |
| "loss": 0.2541, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.2417, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.5710526315789474e-05, | |
| "loss": 0.2586, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.563157894736842e-05, | |
| "loss": 0.2383, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.5552631578947367e-05, | |
| "loss": 0.2497, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.547368421052632e-05, | |
| "loss": 0.2357, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.5394736842105264e-05, | |
| "loss": 0.26, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5315789473684212e-05, | |
| "loss": 0.2442, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5236842105263159e-05, | |
| "loss": 0.2269, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5157894736842105e-05, | |
| "loss": 0.2419, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5078947368421054e-05, | |
| "loss": 0.2442, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2331, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.4921052631578947e-05, | |
| "loss": 0.2311, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.4842105263157895e-05, | |
| "loss": 0.2444, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.4763157894736842e-05, | |
| "loss": 0.2409, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.468421052631579e-05, | |
| "loss": 0.2365, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.4605263157894737e-05, | |
| "loss": 0.2517, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.4526315789473685e-05, | |
| "loss": 0.2562, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4447368421052632e-05, | |
| "loss": 0.2233, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4368421052631578e-05, | |
| "loss": 0.2496, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4289473684210527e-05, | |
| "loss": 0.2452, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4210526315789473e-05, | |
| "loss": 0.2374, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4131578947368422e-05, | |
| "loss": 0.2233, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.405263157894737e-05, | |
| "loss": 0.2571, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3973684210526316e-05, | |
| "loss": 0.2254, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3894736842105263e-05, | |
| "loss": 0.2236, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3815789473684211e-05, | |
| "loss": 0.2284, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3736842105263158e-05, | |
| "loss": 0.232, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3657894736842106e-05, | |
| "loss": 0.2415, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.3578947368421053e-05, | |
| "loss": 0.2244, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "loss": 0.2338, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.3421052631578948e-05, | |
| "loss": 0.2382, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.3342105263157894e-05, | |
| "loss": 0.2349, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.3263157894736843e-05, | |
| "loss": 0.2267, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.318421052631579e-05, | |
| "loss": 0.2426, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.3105263157894738e-05, | |
| "loss": 0.238, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.3026315789473684e-05, | |
| "loss": 0.2332, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2947368421052633e-05, | |
| "loss": 0.2332, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2868421052631579e-05, | |
| "loss": 0.2477, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2789473684210526e-05, | |
| "loss": 0.2176, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2710526315789474e-05, | |
| "loss": 0.2177, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 0.2278, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2552631578947369e-05, | |
| "loss": 0.2221, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2473684210526317e-05, | |
| "loss": 0.2173, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2394736842105264e-05, | |
| "loss": 0.2131, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.231578947368421e-05, | |
| "loss": 0.2346, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.2236842105263159e-05, | |
| "loss": 0.2301, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.2157894736842105e-05, | |
| "loss": 0.2173, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.2078947368421052e-05, | |
| "loss": 0.2297, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.2255, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1921052631578949e-05, | |
| "loss": 0.2307, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1842105263157895e-05, | |
| "loss": 0.222, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1763157894736842e-05, | |
| "loss": 0.2319, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.168421052631579e-05, | |
| "loss": 0.228, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1605263157894737e-05, | |
| "loss": 0.2267, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1526315789473683e-05, | |
| "loss": 0.2163, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1447368421052632e-05, | |
| "loss": 0.2167, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.136842105263158e-05, | |
| "loss": 0.1991, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1289473684210527e-05, | |
| "loss": 0.2165, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1210526315789473e-05, | |
| "loss": 0.2147, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1131578947368421e-05, | |
| "loss": 0.2203, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.1052631578947368e-05, | |
| "loss": 0.2253, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0973684210526316e-05, | |
| "loss": 0.2326, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0894736842105265e-05, | |
| "loss": 0.221, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0815789473684211e-05, | |
| "loss": 0.2233, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0736842105263158e-05, | |
| "loss": 0.2199, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0657894736842106e-05, | |
| "loss": 0.2077, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0578947368421053e-05, | |
| "loss": 0.2108, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.05e-05, | |
| "loss": 0.2217, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0421052631578948e-05, | |
| "loss": 0.2067, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0342105263157896e-05, | |
| "loss": 0.2351, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0263157894736843e-05, | |
| "loss": 0.1952, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.018421052631579e-05, | |
| "loss": 0.2259, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0105263157894738e-05, | |
| "loss": 0.2074, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0026315789473684e-05, | |
| "loss": 0.1963, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 9.94736842105263e-06, | |
| "loss": 0.2193, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 9.868421052631579e-06, | |
| "loss": 0.2172, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.789473684210527e-06, | |
| "loss": 0.2278, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.710526315789474e-06, | |
| "loss": 0.1933, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.63157894736842e-06, | |
| "loss": 0.2177, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.552631578947369e-06, | |
| "loss": 0.204, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.473684210526315e-06, | |
| "loss": 0.2178, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.394736842105262e-06, | |
| "loss": 0.2012, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.315789473684212e-06, | |
| "loss": 0.2054, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.236842105263159e-06, | |
| "loss": 0.2067, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.157894736842105e-06, | |
| "loss": 0.2132, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.078947368421054e-06, | |
| "loss": 0.2081, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9e-06, | |
| "loss": 0.2033, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.921052631578947e-06, | |
| "loss": 0.2158, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.842105263157893e-06, | |
| "loss": 0.2043, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.763157894736843e-06, | |
| "loss": 0.1979, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.68421052631579e-06, | |
| "loss": 0.213, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.605263157894737e-06, | |
| "loss": 0.2042, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.526315789473685e-06, | |
| "loss": 0.2067, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.447368421052632e-06, | |
| "loss": 0.2052, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.368421052631578e-06, | |
| "loss": 0.2036, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.289473684210526e-06, | |
| "loss": 0.2224, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.210526315789475e-06, | |
| "loss": 0.2106, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.131578947368421e-06, | |
| "loss": 0.1956, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.052631578947368e-06, | |
| "loss": 0.1973, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 7.973684210526316e-06, | |
| "loss": 0.2103, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 0.2011, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 7.81578947368421e-06, | |
| "loss": 0.2085, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.73684210526316e-06, | |
| "loss": 0.2042, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.657894736842106e-06, | |
| "loss": 0.2123, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.578947368421053e-06, | |
| "loss": 0.1853, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.1943, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.421052631578948e-06, | |
| "loss": 0.2049, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.342105263157895e-06, | |
| "loss": 0.1986, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.2631578947368426e-06, | |
| "loss": 0.1912, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.184210526315789e-06, | |
| "loss": 0.2061, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.105263157894737e-06, | |
| "loss": 0.1968, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.026315789473685e-06, | |
| "loss": 0.1827, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 6.9473684210526315e-06, | |
| "loss": 0.2016, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 6.868421052631579e-06, | |
| "loss": 0.2212, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 6.7894736842105264e-06, | |
| "loss": 0.2108, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 6.710526315789474e-06, | |
| "loss": 0.2121, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 6.631578947368421e-06, | |
| "loss": 0.1888, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 6.552631578947369e-06, | |
| "loss": 0.2018, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.473684210526316e-06, | |
| "loss": 0.2049, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.394736842105263e-06, | |
| "loss": 0.2084, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 0.188, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.236842105263159e-06, | |
| "loss": 0.1863, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.157894736842105e-06, | |
| "loss": 0.2082, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.078947368421053e-06, | |
| "loss": 0.2047, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6e-06, | |
| "loss": 0.208, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5.921052631578948e-06, | |
| "loss": 0.2067, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5.842105263157895e-06, | |
| "loss": 0.2032, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5.763157894736842e-06, | |
| "loss": 0.1891, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5.68421052631579e-06, | |
| "loss": 0.176, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5.605263157894737e-06, | |
| "loss": 0.2063, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5.526315789473684e-06, | |
| "loss": 0.2006, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5.447368421052632e-06, | |
| "loss": 0.2016, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5.368421052631579e-06, | |
| "loss": 0.1875, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5.289473684210526e-06, | |
| "loss": 0.1951, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 5.210526315789474e-06, | |
| "loss": 0.2017, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 5.131578947368421e-06, | |
| "loss": 0.2072, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 5.052631578947369e-06, | |
| "loss": 0.1919, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.973684210526315e-06, | |
| "loss": 0.2072, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.894736842105264e-06, | |
| "loss": 0.1955, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.81578947368421e-06, | |
| "loss": 0.2037, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.736842105263158e-06, | |
| "loss": 0.1891, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.657894736842106e-06, | |
| "loss": 0.1814, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.578947368421053e-06, | |
| "loss": 0.1737, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.2123, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.421052631578947e-06, | |
| "loss": 0.1984, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.342105263157895e-06, | |
| "loss": 0.1857, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.2631578947368425e-06, | |
| "loss": 0.1855, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.184210526315789e-06, | |
| "loss": 0.1993, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.105263157894737e-06, | |
| "loss": 0.1951, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.026315789473684e-06, | |
| "loss": 0.1976, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.9473684210526315e-06, | |
| "loss": 0.1848, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.86842105263158e-06, | |
| "loss": 0.2031, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.7894736842105264e-06, | |
| "loss": 0.1773, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.710526315789474e-06, | |
| "loss": 0.1955, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.6315789473684213e-06, | |
| "loss": 0.1836, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.5526315789473683e-06, | |
| "loss": 0.199, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.4736842105263158e-06, | |
| "loss": 0.1827, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.3947368421052632e-06, | |
| "loss": 0.1917, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.3157894736842107e-06, | |
| "loss": 0.1849, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.236842105263158e-06, | |
| "loss": 0.1913, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 0.1822, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.0789473684210526e-06, | |
| "loss": 0.2005, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3e-06, | |
| "loss": 0.1969, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.9210526315789475e-06, | |
| "loss": 0.184, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.842105263157895e-06, | |
| "loss": 0.1829, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.763157894736842e-06, | |
| "loss": 0.1856, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.6842105263157895e-06, | |
| "loss": 0.18, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.605263157894737e-06, | |
| "loss": 0.1794, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.5263157894736844e-06, | |
| "loss": 0.1892, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.447368421052632e-06, | |
| "loss": 0.1953, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.368421052631579e-06, | |
| "loss": 0.1996, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.2894736842105263e-06, | |
| "loss": 0.1781, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.2105263157894734e-06, | |
| "loss": 0.1884, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.1315789473684212e-06, | |
| "loss": 0.1829, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.0526315789473687e-06, | |
| "loss": 0.1853, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.9736842105263157e-06, | |
| "loss": 0.1999, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.8947368421052632e-06, | |
| "loss": 0.181, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.8157894736842106e-06, | |
| "loss": 0.1769, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.7368421052631579e-06, | |
| "loss": 0.1965, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.6578947368421053e-06, | |
| "loss": 0.1866, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "loss": 0.1848, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.204, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4210526315789475e-06, | |
| "loss": 0.1807, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.3421052631578947e-06, | |
| "loss": 0.1858, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.2631578947368422e-06, | |
| "loss": 0.1953, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.1842105263157894e-06, | |
| "loss": 0.1793, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.1052631578947367e-06, | |
| "loss": 0.1987, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.0263157894736843e-06, | |
| "loss": 0.1857, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 9.473684210526316e-07, | |
| "loss": 0.1767, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.684210526315789e-07, | |
| "loss": 0.1784, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 7.894736842105263e-07, | |
| "loss": 0.1657, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 7.105263157894737e-07, | |
| "loss": 0.1801, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 6.315789473684211e-07, | |
| "loss": 0.1853, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 5.526315789473683e-07, | |
| "loss": 0.1843, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.736842105263158e-07, | |
| "loss": 0.2039, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.9473684210526315e-07, | |
| "loss": 0.2031, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.1578947368421055e-07, | |
| "loss": 0.1851, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.368421052631579e-07, | |
| "loss": 0.1765, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.5789473684210527e-07, | |
| "loss": 0.1724, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.894736842105264e-08, | |
| "loss": 0.1824, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0, | |
| "loss": 0.1716, | |
| "step": 4000 | |
| } | |
| ], | |
| "max_steps": 4000, | |
| "num_train_epochs": 1, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |