| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2163565556036348, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 2.9132, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 2.5796, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 2.3848, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 2.0514, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000147, | |
| "loss": 1.9692, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00017699999999999997, | |
| "loss": 1.7979, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000204, | |
| "loss": 1.7851, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000234, | |
| "loss": 1.7217, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00025499999999999996, | |
| "loss": 1.3989, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000285, | |
| "loss": 1.654, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00029996088657105604, | |
| "loss": 1.4094, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002998565840938722, | |
| "loss": 1.8543, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002997522816166884, | |
| "loss": 1.7782, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002996219035202086, | |
| "loss": 1.7068, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029951760104302477, | |
| "loss": 1.4387, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029940026075619296, | |
| "loss": 1.7555, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029928292046936116, | |
| "loss": 1.5488, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002991655801825293, | |
| "loss": 1.5043, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029906127770534547, | |
| "loss": 1.5463, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002989830508474576, | |
| "loss": 1.1729, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029887874837027377, | |
| "loss": 1.5994, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000298748370273794, | |
| "loss": 1.6526, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002986179921773142, | |
| "loss": 5.4035, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002984876140808344, | |
| "loss": 7.2882, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002983572359843546, | |
| "loss": 7.2899, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002982268578878748, | |
| "loss": 7.5663, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000298096479791395, | |
| "loss": 7.7473, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002979661016949152, | |
| "loss": 7.2212, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029783572359843544, | |
| "loss": 7.3973, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029770534550195566, | |
| "loss": 7.8749, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029757496740547587, | |
| "loss": 7.6257, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029744458930899603, | |
| "loss": 7.7121, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029731421121251625, | |
| "loss": 7.6982, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002971838331160365, | |
| "loss": 7.9487, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002970534550195567, | |
| "loss": 7.5477, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002969230769230769, | |
| "loss": 7.5343, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002967926988265971, | |
| "loss": 7.5523, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002966623207301173, | |
| "loss": 7.4696, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002965319426336375, | |
| "loss": 7.6125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029640156453715775, | |
| "loss": 7.4849, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029627118644067797, | |
| "loss": 7.9908, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029614080834419813, | |
| "loss": 7.7173, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029601043024771835, | |
| "loss": 7.7963, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029588005215123856, | |
| "loss": 7.9082, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002957496740547588, | |
| "loss": 7.673, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000295619295958279, | |
| "loss": 7.6576, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002954889178617992, | |
| "loss": 7.7349, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002953585397653194, | |
| "loss": 8.1483, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002952281616688396, | |
| "loss": 7.8854, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002950977835723598, | |
| "loss": 7.8698, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029496740547588, | |
| "loss": 7.9662, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029483702737940023, | |
| "loss": 7.7332, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029470664928292045, | |
| "loss": 7.9352, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029457627118644066, | |
| "loss": 7.8444, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002944458930899609, | |
| "loss": 7.9594, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029431551499348104, | |
| "loss": 7.6888, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002941851368970013, | |
| "loss": 7.9143, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029405475880052147, | |
| "loss": 7.7642, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002939243807040417, | |
| "loss": 7.6396, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002937940026075619, | |
| "loss": 7.6659, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002936636245110821, | |
| "loss": 7.9758, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029353324641460233, | |
| "loss": 7.7407, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029340286831812254, | |
| "loss": 7.9381, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029327249022164276, | |
| "loss": 8.0261, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002931421121251629, | |
| "loss": 7.7693, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029301173402868314, | |
| "loss": 7.9575, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029288135593220335, | |
| "loss": 7.6038, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029275097783572357, | |
| "loss": 7.7868, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002926205997392438, | |
| "loss": 7.9182, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000292490221642764, | |
| "loss": 7.8777, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002923598435462842, | |
| "loss": 7.915, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029222946544980443, | |
| "loss": 7.8993, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002920990873533246, | |
| "loss": 7.9536, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029196870925684486, | |
| "loss": 7.8386, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000291838331160365, | |
| "loss": 7.9146, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029170795306388524, | |
| "loss": 7.938, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029157757496740545, | |
| "loss": 7.9984, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029144719687092567, | |
| "loss": 7.6733, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002913168187744459, | |
| "loss": 7.8783, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002911864406779661, | |
| "loss": 8.1666, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002910560625814863, | |
| "loss": 7.9801, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029092568448500647, | |
| "loss": 8.0533, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002907953063885267, | |
| "loss": 8.0206, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002906649282920469, | |
| "loss": 7.7818, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002905345501955671, | |
| "loss": 8.021, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029040417209908733, | |
| "loss": 7.9063, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029027379400260755, | |
| "loss": 7.8719, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029014341590612776, | |
| "loss": 8.1405, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002900130378096479, | |
| "loss": 7.8046, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028988265971316814, | |
| "loss": 7.9293, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002897522816166884, | |
| "loss": 8.0217, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028962190352020857, | |
| "loss": 7.8202, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002894915254237288, | |
| "loss": 7.8705, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.000289361147327249, | |
| "loss": 7.9237, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002892307692307692, | |
| "loss": 8.1584, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002891003911342894, | |
| "loss": 8.0448, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028897001303780965, | |
| "loss": 8.0041, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028883963494132986, | |
| "loss": 7.9727, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028870925684485, | |
| "loss": 7.8942, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028857887874837024, | |
| "loss": 7.8319, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028844850065189046, | |
| "loss": 7.898, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028831812255541067, | |
| "loss": 7.899, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028818774445893083, | |
| "loss": 7.7232, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002880573663624511, | |
| "loss": 8.2552, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002879269882659713, | |
| "loss": 8.0233, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002877966101694915, | |
| "loss": 8.1904, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002876662320730117, | |
| "loss": 7.9744, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002875358539765319, | |
| "loss": 7.9458, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002874054758800521, | |
| "loss": 8.1225, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028727509778357234, | |
| "loss": 8.2103, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028714471968709255, | |
| "loss": 8.1877, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028701434159061277, | |
| "loss": 8.1925, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028688396349413293, | |
| "loss": 8.1942, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002867535853976532, | |
| "loss": 8.268, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028662320730117336, | |
| "loss": 8.0643, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002864928292046936, | |
| "loss": 8.1105, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002863624511082138, | |
| "loss": 8.3186, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000286232073011734, | |
| "loss": 8.2267, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002861016949152542, | |
| "loss": 8.4379, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002859713168187744, | |
| "loss": 8.2195, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028584093872229465, | |
| "loss": 8.1754, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002857105606258148, | |
| "loss": 8.2161, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028558018252933503, | |
| "loss": 8.3426, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028544980443285525, | |
| "loss": 8.3226, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028531942633637546, | |
| "loss": 8.1448, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002851890482398957, | |
| "loss": 8.3116, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002850586701434159, | |
| "loss": 8.2342, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002849282920469361, | |
| "loss": 8.6083, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002847979139504563, | |
| "loss": 8.4491, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002846675358539765, | |
| "loss": 8.2441, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002845371577574967, | |
| "loss": 8.4184, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002844067796610169, | |
| "loss": 8.2803, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028427640156453713, | |
| "loss": 8.1456, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028414602346805734, | |
| "loss": 8.3464, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028401564537157756, | |
| "loss": 8.2956, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002838852672750978, | |
| "loss": 8.5188, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028375488917861794, | |
| "loss": 8.2586, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002836245110821382, | |
| "loss": 8.2999, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028349413298565837, | |
| "loss": 8.4213, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002833637548891786, | |
| "loss": 8.2921, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002832333767926988, | |
| "loss": 8.4024, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.000283102998696219, | |
| "loss": 8.4043, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028297262059973923, | |
| "loss": 8.3759, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028284224250325944, | |
| "loss": 8.3432, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028271186440677966, | |
| "loss": 8.3263, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002825814863102998, | |
| "loss": 8.348, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028245110821382004, | |
| "loss": 8.2902, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028232073011734025, | |
| "loss": 8.5699, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028219035202086047, | |
| "loss": 8.4766, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002820599739243807, | |
| "loss": 8.5017, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002819295958279009, | |
| "loss": 8.6927, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002817992177314211, | |
| "loss": 8.5978, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002816688396349413, | |
| "loss": 8.4803, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002815384615384615, | |
| "loss": 8.5869, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028140808344198176, | |
| "loss": 8.4203, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002812777053455019, | |
| "loss": 8.7451, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028114732724902213, | |
| "loss": 8.6679, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028101694915254235, | |
| "loss": 8.6733, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028088657105606257, | |
| "loss": 8.5751, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002807561929595828, | |
| "loss": 8.7441, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.000280625814863103, | |
| "loss": 8.6364, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002804954367666232, | |
| "loss": 8.3853, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028036505867014337, | |
| "loss": 8.6059, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002802346805736636, | |
| "loss": 8.6964, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002801043024771838, | |
| "loss": 8.4077, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.000279973924380704, | |
| "loss": 8.5408, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027984354628422423, | |
| "loss": 8.6068, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027971316818774445, | |
| "loss": 8.5852, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027958279009126466, | |
| "loss": 8.6365, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002794524119947848, | |
| "loss": 8.3437, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027932203389830504, | |
| "loss": 8.5662, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027919165580182526, | |
| "loss": 8.6039, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027906127770534547, | |
| "loss": 8.666, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002789308996088657, | |
| "loss": 8.5818, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002788005215123859, | |
| "loss": 8.4733, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002786701434159061, | |
| "loss": 8.6056, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002785397653194263, | |
| "loss": 8.7309, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027840938722294655, | |
| "loss": 8.4525, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027827900912646676, | |
| "loss": 8.5793, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002781486310299869, | |
| "loss": 8.4926, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027801825293350714, | |
| "loss": 8.5963, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027788787483702736, | |
| "loss": 8.5549, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027775749674054757, | |
| "loss": 8.7375, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027762711864406773, | |
| "loss": 8.3517, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000277496740547588, | |
| "loss": 8.6929, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002773663624511082, | |
| "loss": 8.4538, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002772359843546284, | |
| "loss": 8.7503, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002771056062581486, | |
| "loss": 8.6003, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002769752281616688, | |
| "loss": 8.6623, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000276844850065189, | |
| "loss": 8.6156, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027671447196870924, | |
| "loss": 8.6077, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027658409387222945, | |
| "loss": 8.6117, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027645371577574967, | |
| "loss": 8.6552, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027632333767926983, | |
| "loss": 8.6804, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002761929595827901, | |
| "loss": 8.8094, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027606258148631026, | |
| "loss": 8.6628, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002759322033898305, | |
| "loss": 8.5159, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002758018252933507, | |
| "loss": 8.396, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002756714471968709, | |
| "loss": 8.8074, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002755410691003911, | |
| "loss": 8.773, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002754106910039113, | |
| "loss": 8.7639, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027528031290743155, | |
| "loss": 8.4934, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002751499348109517, | |
| "loss": 8.5592, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027501955671447193, | |
| "loss": 8.5546, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027488917861799215, | |
| "loss": 8.5575, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027475880052151236, | |
| "loss": 8.6952, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002746284224250326, | |
| "loss": 8.6958, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002744980443285528, | |
| "loss": 8.7037, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.000274367666232073, | |
| "loss": 8.9301, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027423728813559317, | |
| "loss": 8.6533, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002741069100391134, | |
| "loss": 8.7503, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027397653194263365, | |
| "loss": 8.725, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002738461538461538, | |
| "loss": 8.5035, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027371577574967403, | |
| "loss": 8.802, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027358539765319424, | |
| "loss": 8.9058, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027345501955671446, | |
| "loss": 8.7056, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002733246414602347, | |
| "loss": 8.7894, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027319426336375484, | |
| "loss": 8.669, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002730638852672751, | |
| "loss": 8.708, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027293350717079527, | |
| "loss": 8.8639, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002728031290743155, | |
| "loss": 8.8779, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002726727509778357, | |
| "loss": 8.8422, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002725423728813559, | |
| "loss": 8.7745, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027241199478487613, | |
| "loss": 9.0292, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027228161668839634, | |
| "loss": 9.051, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027215123859191656, | |
| "loss": 9.0168, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002720208604954367, | |
| "loss": 9.1592, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027189048239895694, | |
| "loss": 9.1905, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027176010430247715, | |
| "loss": 9.216, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027162972620599737, | |
| "loss": 9.2634, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002714993481095176, | |
| "loss": 9.249, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002713689700130378, | |
| "loss": 9.3536, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000271238591916558, | |
| "loss": 9.4016, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002711082138200782, | |
| "loss": 9.2968, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002709778357235984, | |
| "loss": 9.4278, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027084745762711866, | |
| "loss": 9.4604, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002707170795306388, | |
| "loss": 9.5698, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027058670143415903, | |
| "loss": 9.4086, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027045632333767925, | |
| "loss": 9.5957, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027032594524119947, | |
| "loss": 9.4389, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002701955671447196, | |
| "loss": 9.6004, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002700651890482399, | |
| "loss": 9.6392, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002699348109517601, | |
| "loss": 9.5309, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026980443285528027, | |
| "loss": 9.6664, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002696740547588005, | |
| "loss": 9.4468, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002695436766623207, | |
| "loss": 9.6085, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002694132985658409, | |
| "loss": 9.5151, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026928292046936113, | |
| "loss": 9.6408, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026915254237288135, | |
| "loss": 9.6165, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026902216427640156, | |
| "loss": 9.7855, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002688917861799217, | |
| "loss": 9.6457, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026876140808344194, | |
| "loss": 9.7363, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026863102998696216, | |
| "loss": 9.8018, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026850065189048237, | |
| "loss": 9.7463, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002683702737940026, | |
| "loss": 9.8131, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002682398956975228, | |
| "loss": 9.7435, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000268109517601043, | |
| "loss": 9.9082, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002679791395045632, | |
| "loss": 9.9361, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026784876140808345, | |
| "loss": 9.7746, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002677183833116036, | |
| "loss": 9.8179, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002675880052151238, | |
| "loss": 9.7547, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026745762711864404, | |
| "loss": 9.884, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026732724902216426, | |
| "loss": 9.9553, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026719687092568447, | |
| "loss": 9.8555, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002670664928292047, | |
| "loss": 9.7841, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002669361147327249, | |
| "loss": 9.6766, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026680573663624506, | |
| "loss": 9.8294, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002666753585397653, | |
| "loss": 9.8066, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002665449804432855, | |
| "loss": 9.8608, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002664146023468057, | |
| "loss": 9.972, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002662842242503259, | |
| "loss": 9.9361, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026615384615384614, | |
| "loss": 10.0587, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026602346805736635, | |
| "loss": 9.9388, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026589308996088657, | |
| "loss": 9.9727, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026576271186440673, | |
| "loss": 9.9163, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000265632333767927, | |
| "loss": 9.8902, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026550195567144716, | |
| "loss": 9.885, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002653715775749674, | |
| "loss": 9.8856, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002652411994784876, | |
| "loss": 9.9419, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002651108213820078, | |
| "loss": 10.0771, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000264980443285528, | |
| "loss": 9.9506, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002648500651890482, | |
| "loss": 10.0638, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026471968709256845, | |
| "loss": 10.0016, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002645893089960886, | |
| "loss": 10.0485, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026445893089960883, | |
| "loss": 9.936, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026432855280312905, | |
| "loss": 10.0238, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026419817470664926, | |
| "loss": 10.0553, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002640677966101695, | |
| "loss": 10.0423, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002639374185136897, | |
| "loss": 9.9627, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002638070404172099, | |
| "loss": 10.0819, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026367666232073007, | |
| "loss": 9.9082, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002635462842242503, | |
| "loss": 10.0401, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026341590612777055, | |
| "loss": 9.9946, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002632855280312907, | |
| "loss": 10.0913, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026315514993481093, | |
| "loss": 10.1353, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026302477183833114, | |
| "loss": 10.0295, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026289439374185136, | |
| "loss": 10.0846, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002627640156453715, | |
| "loss": 10.0067, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026263363754889174, | |
| "loss": 10.0299, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.000262503259452412, | |
| "loss": 10.0556, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026237288135593217, | |
| "loss": 10.0429, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002622425032594524, | |
| "loss": 10.02, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002621121251629726, | |
| "loss": 10.0642, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002619817470664928, | |
| "loss": 10.1297, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026185136897001303, | |
| "loss": 10.0012, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026172099087353324, | |
| "loss": 9.9938, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026159061277705346, | |
| "loss": 10.0385, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002614602346805736, | |
| "loss": 10.0903, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026132985658409384, | |
| "loss": 10.1171, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026119947848761405, | |
| "loss": 10.1059, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026106910039113427, | |
| "loss": 10.0862, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002609387222946545, | |
| "loss": 9.9758, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002608083441981747, | |
| "loss": 9.897, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002606779661016949, | |
| "loss": 10.1126, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00026054758800521507, | |
| "loss": 9.9863, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002604172099087353, | |
| "loss": 10.0276, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002602868318122555, | |
| "loss": 10.0507, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002601564537157757, | |
| "loss": 10.0196, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00026002607561929593, | |
| "loss": 10.0326, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025989569752281615, | |
| "loss": 10.0175, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025976531942633636, | |
| "loss": 10.0518, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002596349413298565, | |
| "loss": 10.006, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002595045632333768, | |
| "loss": 10.0784, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000259374185136897, | |
| "loss": 10.0334, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025924380704041717, | |
| "loss": 9.8772, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002591134289439374, | |
| "loss": 10.0744, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002589830508474576, | |
| "loss": 9.9749, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002588526727509778, | |
| "loss": 10.082, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025872229465449803, | |
| "loss": 9.979, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025859191655801825, | |
| "loss": 10.0619, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025846153846153846, | |
| "loss": 10.1153, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002583311603650586, | |
| "loss": 10.1584, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025820078226857884, | |
| "loss": 10.0608, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025807040417209906, | |
| "loss": 10.1515, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025794002607561927, | |
| "loss": 10.1096, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002578096479791395, | |
| "loss": 10.1689, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002576792698826597, | |
| "loss": 10.2237, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002575488917861799, | |
| "loss": 10.1274, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002574185136897001, | |
| "loss": 10.0753, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025728813559322035, | |
| "loss": 10.0884, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002571577574967405, | |
| "loss": 10.0873, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002570273794002607, | |
| "loss": 10.222, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025689700130378094, | |
| "loss": 10.1173, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025676662320730115, | |
| "loss": 10.0776, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025663624511082137, | |
| "loss": 10.1509, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002565058670143416, | |
| "loss": 10.1337, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002563754889178618, | |
| "loss": 10.2193, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025624511082138196, | |
| "loss": 10.1478, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002561147327249022, | |
| "loss": 10.2988, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002559843546284224, | |
| "loss": 10.1737, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002558539765319426, | |
| "loss": 10.1102, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002557235984354628, | |
| "loss": 10.1505, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025559322033898304, | |
| "loss": 10.1551, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025546284224250325, | |
| "loss": 10.1628, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002553324641460234, | |
| "loss": 10.227, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025520208604954363, | |
| "loss": 10.1379, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002550717079530639, | |
| "loss": 10.1795, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025494132985658406, | |
| "loss": 10.1938, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002548109517601043, | |
| "loss": 10.2941, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002546805736636245, | |
| "loss": 10.2581, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002545501955671447, | |
| "loss": 10.1737, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002544198174706649, | |
| "loss": 10.0775, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025428943937418514, | |
| "loss": 10.1794, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025415906127770535, | |
| "loss": 10.1894, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002540286831812255, | |
| "loss": 10.247, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025389830508474573, | |
| "loss": 10.2817, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025376792698826594, | |
| "loss": 10.2589, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025363754889178616, | |
| "loss": 10.232, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002535071707953064, | |
| "loss": 10.312, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002533767926988266, | |
| "loss": 10.1569, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002532464146023468, | |
| "loss": 10.1389, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025311603650586697, | |
| "loss": 10.2286, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002529856584093872, | |
| "loss": 10.2886, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002528552803129074, | |
| "loss": 10.2117, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002527249022164276, | |
| "loss": 10.1231, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025259452411994783, | |
| "loss": 10.2635, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025246414602346804, | |
| "loss": 10.1321, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025233376792698826, | |
| "loss": 10.269, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002522033898305084, | |
| "loss": 10.2287, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025207301173402864, | |
| "loss": 10.243, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002519426336375489, | |
| "loss": 10.1994, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025181225554106907, | |
| "loss": 10.3732, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002516818774445893, | |
| "loss": 10.2368, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002515514993481095, | |
| "loss": 10.2235, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002514211212516297, | |
| "loss": 10.2138, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002512907431551499, | |
| "loss": 10.1026, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025116036505867014, | |
| "loss": 10.2148, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025102998696219036, | |
| "loss": 10.328, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002508996088657105, | |
| "loss": 10.2609, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025076923076923073, | |
| "loss": 10.2178, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025063885267275095, | |
| "loss": 10.1765, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025050847457627117, | |
| "loss": 10.1472, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002503780964797914, | |
| "loss": 10.2004, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002502477183833116, | |
| "loss": 10.334, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002501173402868318, | |
| "loss": 10.3113, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024998696219035197, | |
| "loss": 10.29, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002498565840938722, | |
| "loss": 10.2834, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002497262059973924, | |
| "loss": 10.1185, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002495958279009126, | |
| "loss": 10.2562, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024946544980443283, | |
| "loss": 10.1536, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024933507170795305, | |
| "loss": 10.215, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024920469361147326, | |
| "loss": 10.2364, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002490743155149934, | |
| "loss": 10.2292, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002489439374185137, | |
| "loss": 10.2103, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024881355932203386, | |
| "loss": 10.2176, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024868318122555407, | |
| "loss": 10.2494, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002485528031290743, | |
| "loss": 10.2538, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002484224250325945, | |
| "loss": 10.1897, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002482920469361147, | |
| "loss": 10.2138, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024816166883963493, | |
| "loss": 10.2346, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024803129074315515, | |
| "loss": 10.2503, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002479009126466753, | |
| "loss": 10.1818, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002477705345501955, | |
| "loss": 10.252, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024764015645371574, | |
| "loss": 10.1853, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024750977835723596, | |
| "loss": 10.3086, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024737940026075617, | |
| "loss": 10.1909, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002472490221642764, | |
| "loss": 10.114, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002471186440677966, | |
| "loss": 10.284, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002469882659713168, | |
| "loss": 10.224, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.000246857887874837, | |
| "loss": 10.2025, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024672750977835725, | |
| "loss": 10.1293, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002465971316818774, | |
| "loss": 10.3302, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002464667535853976, | |
| "loss": 10.4794, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024633637548891784, | |
| "loss": 10.4658, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024620599739243805, | |
| "loss": 10.515, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024607561929595827, | |
| "loss": 10.4624, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002459452411994785, | |
| "loss": 10.5324, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002458148631029987, | |
| "loss": 10.5538, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024568448500651886, | |
| "loss": 10.6497, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002455541069100391, | |
| "loss": 10.8531, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002454237288135593, | |
| "loss": 10.8161, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002452933507170795, | |
| "loss": 10.636, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002451629726205997, | |
| "loss": 10.6457, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024503259452411994, | |
| "loss": 10.6817, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024490221642764015, | |
| "loss": 10.5572, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002447718383311603, | |
| "loss": 10.5855, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024464146023468053, | |
| "loss": 10.5547, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002445110821382008, | |
| "loss": 10.5638, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024438070404172096, | |
| "loss": 10.5428, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002442503259452412, | |
| "loss": 10.5823, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024411994784876136, | |
| "loss": 10.5564, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002439895697522816, | |
| "loss": 10.5738, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002438591916558018, | |
| "loss": 10.729, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.000243728813559322, | |
| "loss": 10.4963, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024359843546284223, | |
| "loss": 10.6297, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024346805736636241, | |
| "loss": 10.5185, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024333767926988266, | |
| "loss": 10.5761, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024320730117340284, | |
| "loss": 10.6883, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024307692307692306, | |
| "loss": 10.6906, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024294654498044328, | |
| "loss": 10.5998, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024281616688396346, | |
| "loss": 10.6782, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002426857887874837, | |
| "loss": 10.6462, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002425554106910039, | |
| "loss": 10.7396, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002424250325945241, | |
| "loss": 10.6588, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002422946544980443, | |
| "loss": 10.7514, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002421642764015645, | |
| "loss": 10.7364, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024203389830508473, | |
| "loss": 10.8158, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024190352020860492, | |
| "loss": 10.7492, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024177314211212516, | |
| "loss": 10.6646, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024164276401564535, | |
| "loss": 10.7252, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024151238591916556, | |
| "loss": 10.7439, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024138200782268575, | |
| "loss": 10.6968, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024125162972620597, | |
| "loss": 10.775, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002411212516297262, | |
| "loss": 10.7529, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002409908735332464, | |
| "loss": 10.7829, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002408604954367666, | |
| "loss": 10.7423, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002407301173402868, | |
| "loss": 10.8141, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024059973924380702, | |
| "loss": 10.7217, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024046936114732723, | |
| "loss": 10.7555, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024033898305084745, | |
| "loss": 10.8166, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024020860495436766, | |
| "loss": 10.7251, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024007822685788785, | |
| "loss": 10.7733, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023994784876140807, | |
| "loss": 10.8039, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023981747066492825, | |
| "loss": 10.8321, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023968709256844847, | |
| "loss": 10.7657, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002395567144719687, | |
| "loss": 10.8044, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002394263363754889, | |
| "loss": 10.8451, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023929595827900911, | |
| "loss": 10.8282, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002391655801825293, | |
| "loss": 10.8957, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023903520208604952, | |
| "loss": 10.8406, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002389048239895697, | |
| "loss": 10.7582, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023877444589308995, | |
| "loss": 10.7831, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023864406779661016, | |
| "loss": 10.8734, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023851368970013035, | |
| "loss": 10.7978, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023838331160365057, | |
| "loss": 10.829, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023825293350717076, | |
| "loss": 10.8092, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.000238122555410691, | |
| "loss": 10.8564, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023799217731421121, | |
| "loss": 10.8201, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002378617992177314, | |
| "loss": 10.87, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023773142112125162, | |
| "loss": 10.849, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002376010430247718, | |
| "loss": 10.8149, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023747066492829202, | |
| "loss": 10.7457, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002373402868318122, | |
| "loss": 10.856, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023720990873533245, | |
| "loss": 10.8324, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023707953063885267, | |
| "loss": 10.8251, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023694915254237286, | |
| "loss": 10.8374, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023681877444589307, | |
| "loss": 10.8555, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023668839634941326, | |
| "loss": 10.8281, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002365580182529335, | |
| "loss": 10.8369, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002364276401564537, | |
| "loss": 10.8378, | |
| "step": 5000 | |
| } | |
| ], | |
| "max_steps": 23110, | |
| "num_train_epochs": 1, | |
| "total_flos": 7.846668409602048e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |