diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18661 @@ +{ + "best_metric": 0.32348209619522095, + "best_model_checkpoint": "/leonardo_work/IscrC_AGENT/PROFES2025/results/full_weighted_loss/oss_large/checkpoint-10333", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 30999, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9998064453692055e-05, + "loss": 0.6702, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9996128907384112e-05, + "loss": 0.6534, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.9994193361076166e-05, + "loss": 0.6504, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.999225781476822e-05, + "loss": 0.6176, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.9990322268460273e-05, + "loss": 0.5519, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.998838672215233e-05, + "loss": 0.5601, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.9986451175844384e-05, + "loss": 0.6152, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.9984515629536437e-05, + "loss": 0.5033, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.9982580083228495e-05, + "loss": 0.515, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.9980644536920548e-05, + "loss": 0.6009, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.9978708990612602e-05, + "loss": 0.6259, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.9976773444304655e-05, + "loss": 0.5908, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.997483789799671e-05, + "loss": 0.504, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.9972902351688766e-05, + "loss": 0.6135, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.997096680538082e-05, + "loss": 0.5737, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.9969031259072877e-05, + "loss": 0.5727, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 1.996709571276493e-05, + "loss": 0.5528, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 1.9965160166456984e-05, + "loss": 0.5856, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 1.9963224620149038e-05, + "loss": 0.5259, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 1.996128907384109e-05, + "loss": 0.6569, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 1.995935352753315e-05, + "loss": 0.5854, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 1.9957417981225202e-05, + "loss": 0.5532, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 1.9955482434917256e-05, + "loss": 0.5784, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 1.9953546888609313e-05, + "loss": 0.4125, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 1.9951611342301366e-05, + "loss": 0.5534, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 1.9949675795993423e-05, + "loss": 0.4797, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 1.9947740249685474e-05, + "loss": 0.5552, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 1.994580470337753e-05, + "loss": 0.4894, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 1.9943869157069584e-05, + "loss": 0.5284, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 1.9941933610761638e-05, + "loss": 0.5146, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 1.9939998064453695e-05, + "loss": 0.5452, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 1.993806251814575e-05, + "loss": 0.4934, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 1.9936126971837802e-05, + "loss": 0.5338, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 1.993419142552986e-05, + "loss": 0.5827, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 1.9932255879221913e-05, + "loss": 0.5584, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 1.9930320332913966e-05, + "loss": 0.4878, + "step": 360 + }, + { + "epoch": 0.04, + "learning_rate": 1.992838478660602e-05, + "loss": 0.5613, + "step": 370 + }, + { + "epoch": 0.04, + "learning_rate": 1.9926449240298077e-05, + "loss": 0.5864, + "step": 380 + }, + { + "epoch": 0.04, + "learning_rate": 1.992451369399013e-05, + "loss": 0.5593, + "step": 390 + }, + { + "epoch": 0.04, + "learning_rate": 1.9922578147682184e-05, + "loss": 0.5047, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 1.992064260137424e-05, + "loss": 0.5795, + "step": 410 + }, + { + "epoch": 0.04, + "learning_rate": 1.991870705506629e-05, + "loss": 0.4874, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 1.991677150875835e-05, + "loss": 0.5935, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 1.9914835962450402e-05, + "loss": 0.4714, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 1.991290041614246e-05, + "loss": 0.4332, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 1.9910964869834513e-05, + "loss": 0.5486, + "step": 460 + }, + { + "epoch": 0.05, + "learning_rate": 1.9909029323526567e-05, + "loss": 0.5106, + "step": 470 + }, + { + "epoch": 0.05, + "learning_rate": 1.9907093777218624e-05, + "loss": 0.4095, + "step": 480 + }, + { + "epoch": 0.05, + "learning_rate": 1.9905158230910677e-05, + "loss": 0.5018, + "step": 490 + }, + { + "epoch": 0.05, + "learning_rate": 1.990322268460273e-05, + "loss": 0.4767, + "step": 500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9901287138294784e-05, + "loss": 0.5725, + "step": 510 + }, + { + "epoch": 0.05, + "learning_rate": 1.9899351591986838e-05, + "loss": 0.5087, + "step": 520 + }, + { + "epoch": 0.05, + "learning_rate": 1.9897416045678895e-05, + "loss": 0.5413, + "step": 530 + }, + { + "epoch": 0.05, + "learning_rate": 1.989548049937095e-05, + "loss": 0.5887, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 1.9893544953063006e-05, + "loss": 0.5282, + "step": 550 + }, + { + "epoch": 0.05, + "learning_rate": 1.989160940675506e-05, + "loss": 0.6246, + "step": 560 + }, + { + "epoch": 0.06, + "learning_rate": 1.9889673860447113e-05, + "loss": 0.4761, + "step": 570 + }, + { + "epoch": 0.06, + "learning_rate": 1.9887738314139167e-05, + "loss": 0.4526, + "step": 580 + }, + { + "epoch": 0.06, + "learning_rate": 1.988580276783122e-05, + "loss": 0.5858, + "step": 590 + }, + { + "epoch": 0.06, + "learning_rate": 1.9883867221523277e-05, + "loss": 0.4968, + "step": 600 + }, + { + "epoch": 0.06, + "learning_rate": 1.988193167521533e-05, + "loss": 0.5345, + "step": 610 + }, + { + "epoch": 0.06, + "learning_rate": 1.9879996128907385e-05, + "loss": 0.4835, + "step": 620 + }, + { + "epoch": 0.06, + "learning_rate": 1.987806058259944e-05, + "loss": 0.5663, + "step": 630 + }, + { + "epoch": 0.06, + "learning_rate": 1.9876125036291495e-05, + "loss": 0.4832, + "step": 640 + }, + { + "epoch": 0.06, + "learning_rate": 1.987418948998355e-05, + "loss": 0.5491, + "step": 650 + }, + { + "epoch": 0.06, + "learning_rate": 1.9872253943675603e-05, + "loss": 0.401, + "step": 660 + }, + { + "epoch": 0.06, + "learning_rate": 1.987031839736766e-05, + "loss": 0.4879, + "step": 670 + }, + { + "epoch": 0.07, + "learning_rate": 1.9868382851059713e-05, + "loss": 0.4662, + "step": 680 + }, + { + "epoch": 0.07, + "learning_rate": 1.9866447304751767e-05, + "loss": 0.4306, + "step": 690 + }, + { + "epoch": 0.07, + "learning_rate": 1.9864511758443824e-05, + "loss": 0.4819, + "step": 700 + }, + { + "epoch": 0.07, + "learning_rate": 1.9862576212135878e-05, + "loss": 0.6248, + "step": 710 + }, + { + "epoch": 0.07, + "learning_rate": 1.986064066582793e-05, + "loss": 0.4766, + "step": 720 + }, + { + "epoch": 0.07, + "learning_rate": 1.9858705119519985e-05, + "loss": 0.4401, + "step": 730 + }, + { + "epoch": 0.07, + "learning_rate": 1.9856769573212042e-05, + "loss": 0.5127, + "step": 740 + }, + { + "epoch": 0.07, + "learning_rate": 1.9854834026904095e-05, + "loss": 0.5708, + "step": 750 + }, + { + "epoch": 0.07, + "learning_rate": 1.985289848059615e-05, + "loss": 0.5533, + "step": 760 + }, + { + "epoch": 0.07, + "learning_rate": 1.9850962934288206e-05, + "loss": 0.5077, + "step": 770 + }, + { + "epoch": 0.08, + "learning_rate": 1.984902738798026e-05, + "loss": 0.5217, + "step": 780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9847091841672313e-05, + "loss": 0.4703, + "step": 790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9845156295364367e-05, + "loss": 0.4868, + "step": 800 + }, + { + "epoch": 0.08, + "learning_rate": 1.984322074905642e-05, + "loss": 0.4672, + "step": 810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9841285202748478e-05, + "loss": 0.3784, + "step": 820 + }, + { + "epoch": 0.08, + "learning_rate": 1.983934965644053e-05, + "loss": 0.5398, + "step": 830 + }, + { + "epoch": 0.08, + "learning_rate": 1.983741411013259e-05, + "loss": 0.4794, + "step": 840 + }, + { + "epoch": 0.08, + "learning_rate": 1.9835478563824642e-05, + "loss": 0.4084, + "step": 850 + }, + { + "epoch": 0.08, + "learning_rate": 1.9833543017516696e-05, + "loss": 0.471, + "step": 860 + }, + { + "epoch": 0.08, + "learning_rate": 1.983160747120875e-05, + "loss": 0.5172, + "step": 870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9829671924900803e-05, + "loss": 0.4677, + "step": 880 + }, + { + "epoch": 0.09, + "learning_rate": 1.982773637859286e-05, + "loss": 0.3877, + "step": 890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9825800832284914e-05, + "loss": 0.5423, + "step": 900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9823865285976967e-05, + "loss": 0.4725, + "step": 910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9821929739669024e-05, + "loss": 0.3293, + "step": 920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9819994193361078e-05, + "loss": 0.4186, + "step": 930 + }, + { + "epoch": 0.09, + "learning_rate": 1.9818058647053135e-05, + "loss": 0.5641, + "step": 940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9816123100745185e-05, + "loss": 0.4329, + "step": 950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9814187554437242e-05, + "loss": 0.514, + "step": 960 + }, + { + "epoch": 0.09, + "learning_rate": 1.9812252008129296e-05, + "loss": 0.5057, + "step": 970 + }, + { + "epoch": 0.09, + "learning_rate": 1.981031646182135e-05, + "loss": 0.4797, + "step": 980 + }, + { + "epoch": 0.1, + "learning_rate": 1.9808380915513406e-05, + "loss": 0.4232, + "step": 990 + }, + { + "epoch": 0.1, + "learning_rate": 1.980644536920546e-05, + "loss": 0.5797, + "step": 1000 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804509822897514e-05, + "loss": 0.5265, + "step": 1010 + }, + { + "epoch": 0.1, + "learning_rate": 1.980257427658957e-05, + "loss": 0.4998, + "step": 1020 + }, + { + "epoch": 0.1, + "learning_rate": 1.9800638730281624e-05, + "loss": 0.4603, + "step": 1030 + }, + { + "epoch": 0.1, + "learning_rate": 1.9798703183973678e-05, + "loss": 0.5379, + "step": 1040 + }, + { + "epoch": 0.1, + "learning_rate": 1.979676763766573e-05, + "loss": 0.4301, + "step": 1050 + }, + { + "epoch": 0.1, + "learning_rate": 1.979483209135779e-05, + "loss": 0.3812, + "step": 1060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9792896545049842e-05, + "loss": 0.4182, + "step": 1070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9790960998741896e-05, + "loss": 0.5267, + "step": 1080 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789025452433953e-05, + "loss": 0.4483, + "step": 1090 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787089906126003e-05, + "loss": 0.4548, + "step": 1100 + }, + { + "epoch": 0.11, + "learning_rate": 1.978515435981806e-05, + "loss": 0.5065, + "step": 1110 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783218813510114e-05, + "loss": 0.4703, + "step": 1120 + }, + { + "epoch": 0.11, + "learning_rate": 1.978128326720217e-05, + "loss": 0.4476, + "step": 1130 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779347720894225e-05, + "loss": 0.4187, + "step": 1140 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777412174586278e-05, + "loss": 0.4101, + "step": 1150 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775476628278335e-05, + "loss": 0.4369, + "step": 1160 + }, + { + "epoch": 0.11, + "learning_rate": 1.977354108197039e-05, + "loss": 0.4845, + "step": 1170 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771605535662442e-05, + "loss": 0.463, + "step": 1180 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769669989354496e-05, + "loss": 0.4338, + "step": 1190 + }, + { + "epoch": 0.12, + "learning_rate": 1.976773444304655e-05, + "loss": 0.3844, + "step": 1200 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765798896738607e-05, + "loss": 0.5985, + "step": 1210 + }, + { + "epoch": 0.12, + "learning_rate": 1.976386335043066e-05, + "loss": 0.4553, + "step": 1220 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761927804122717e-05, + "loss": 0.4168, + "step": 1230 + }, + { + "epoch": 0.12, + "learning_rate": 1.975999225781477e-05, + "loss": 0.4401, + "step": 1240 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758056711506825e-05, + "loss": 0.4112, + "step": 1250 + }, + { + "epoch": 0.12, + "learning_rate": 1.975612116519888e-05, + "loss": 0.3185, + "step": 1260 + }, + { + "epoch": 0.12, + "learning_rate": 1.9754185618890932e-05, + "loss": 0.5604, + "step": 1270 + }, + { + "epoch": 0.12, + "learning_rate": 1.975225007258299e-05, + "loss": 0.4632, + "step": 1280 + }, + { + "epoch": 0.12, + "learning_rate": 1.9750314526275043e-05, + "loss": 0.3998, + "step": 1290 + }, + { + "epoch": 0.13, + "learning_rate": 1.9748378979967096e-05, + "loss": 0.4544, + "step": 1300 + }, + { + "epoch": 0.13, + "learning_rate": 1.9746443433659153e-05, + "loss": 0.49, + "step": 1310 + }, + { + "epoch": 0.13, + "learning_rate": 1.9744507887351207e-05, + "loss": 0.4104, + "step": 1320 + }, + { + "epoch": 0.13, + "learning_rate": 1.974257234104326e-05, + "loss": 0.48, + "step": 1330 + }, + { + "epoch": 0.13, + "learning_rate": 1.9740636794735314e-05, + "loss": 0.435, + "step": 1340 + }, + { + "epoch": 0.13, + "learning_rate": 1.973870124842737e-05, + "loss": 0.4436, + "step": 1350 + }, + { + "epoch": 0.13, + "learning_rate": 1.9736765702119425e-05, + "loss": 0.4434, + "step": 1360 + }, + { + "epoch": 0.13, + "learning_rate": 1.973483015581148e-05, + "loss": 0.4331, + "step": 1370 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732894609503535e-05, + "loss": 0.3765, + "step": 1380 + }, + { + "epoch": 0.13, + "learning_rate": 1.973095906319559e-05, + "loss": 0.4924, + "step": 1390 + }, + { + "epoch": 0.14, + "learning_rate": 1.9729023516887643e-05, + "loss": 0.5318, + "step": 1400 + }, + { + "epoch": 0.14, + "learning_rate": 1.9727087970579696e-05, + "loss": 0.4324, + "step": 1410 + }, + { + "epoch": 0.14, + "learning_rate": 1.9725152424271753e-05, + "loss": 0.4705, + "step": 1420 + }, + { + "epoch": 0.14, + "learning_rate": 1.9723216877963807e-05, + "loss": 0.4485, + "step": 1430 + }, + { + "epoch": 0.14, + "learning_rate": 1.972128133165586e-05, + "loss": 0.4056, + "step": 1440 + }, + { + "epoch": 0.14, + "learning_rate": 1.9719345785347918e-05, + "loss": 0.335, + "step": 1450 + }, + { + "epoch": 0.14, + "learning_rate": 1.971741023903997e-05, + "loss": 0.4291, + "step": 1460 + }, + { + "epoch": 0.14, + "learning_rate": 1.9715474692732025e-05, + "loss": 0.3785, + "step": 1470 + }, + { + "epoch": 0.14, + "learning_rate": 1.971353914642408e-05, + "loss": 0.5012, + "step": 1480 + }, + { + "epoch": 0.14, + "learning_rate": 1.9711603600116132e-05, + "loss": 0.4799, + "step": 1490 + }, + { + "epoch": 0.15, + "learning_rate": 1.970966805380819e-05, + "loss": 0.5421, + "step": 1500 + }, + { + "epoch": 0.15, + "learning_rate": 1.9707732507500243e-05, + "loss": 0.4406, + "step": 1510 + }, + { + "epoch": 0.15, + "learning_rate": 1.97057969611923e-05, + "loss": 0.3503, + "step": 1520 + }, + { + "epoch": 0.15, + "learning_rate": 1.9703861414884354e-05, + "loss": 0.4586, + "step": 1530 + }, + { + "epoch": 0.15, + "learning_rate": 1.9701925868576407e-05, + "loss": 0.341, + "step": 1540 + }, + { + "epoch": 0.15, + "learning_rate": 1.9699990322268464e-05, + "loss": 0.4023, + "step": 1550 + }, + { + "epoch": 0.15, + "learning_rate": 1.9698054775960514e-05, + "loss": 0.3642, + "step": 1560 + }, + { + "epoch": 0.15, + "learning_rate": 1.969611922965257e-05, + "loss": 0.3413, + "step": 1570 + }, + { + "epoch": 0.15, + "learning_rate": 1.9694183683344625e-05, + "loss": 0.4084, + "step": 1580 + }, + { + "epoch": 0.15, + "learning_rate": 1.969224813703668e-05, + "loss": 0.4107, + "step": 1590 + }, + { + "epoch": 0.15, + "learning_rate": 1.9690312590728736e-05, + "loss": 0.3871, + "step": 1600 + }, + { + "epoch": 0.16, + "learning_rate": 1.968837704442079e-05, + "loss": 0.4353, + "step": 1610 + }, + { + "epoch": 0.16, + "learning_rate": 1.9686441498112846e-05, + "loss": 0.396, + "step": 1620 + }, + { + "epoch": 0.16, + "learning_rate": 1.9684505951804897e-05, + "loss": 0.4557, + "step": 1630 + }, + { + "epoch": 0.16, + "learning_rate": 1.9682570405496954e-05, + "loss": 0.4336, + "step": 1640 + }, + { + "epoch": 0.16, + "learning_rate": 1.9680634859189007e-05, + "loss": 0.3693, + "step": 1650 + }, + { + "epoch": 0.16, + "learning_rate": 1.967869931288106e-05, + "loss": 0.3659, + "step": 1660 + }, + { + "epoch": 0.16, + "learning_rate": 1.9676763766573118e-05, + "loss": 0.4539, + "step": 1670 + }, + { + "epoch": 0.16, + "learning_rate": 1.967482822026517e-05, + "loss": 0.6164, + "step": 1680 + }, + { + "epoch": 0.16, + "learning_rate": 1.9672892673957225e-05, + "loss": 0.4749, + "step": 1690 + }, + { + "epoch": 0.16, + "learning_rate": 1.9670957127649282e-05, + "loss": 0.4269, + "step": 1700 + }, + { + "epoch": 0.17, + "learning_rate": 1.9669021581341336e-05, + "loss": 0.5115, + "step": 1710 + }, + { + "epoch": 0.17, + "learning_rate": 1.966708603503339e-05, + "loss": 0.3295, + "step": 1720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9665150488725443e-05, + "loss": 0.3294, + "step": 1730 + }, + { + "epoch": 0.17, + "learning_rate": 1.96632149424175e-05, + "loss": 0.3672, + "step": 1740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9661279396109554e-05, + "loss": 0.4018, + "step": 1750 + }, + { + "epoch": 0.17, + "learning_rate": 1.9659343849801608e-05, + "loss": 0.4524, + "step": 1760 + }, + { + "epoch": 0.17, + "learning_rate": 1.9657408303493665e-05, + "loss": 0.3589, + "step": 1770 + }, + { + "epoch": 0.17, + "learning_rate": 1.9655472757185715e-05, + "loss": 0.4565, + "step": 1780 + }, + { + "epoch": 0.17, + "learning_rate": 1.9653537210877772e-05, + "loss": 0.4833, + "step": 1790 + }, + { + "epoch": 0.17, + "learning_rate": 1.9651601664569825e-05, + "loss": 0.3666, + "step": 1800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9649666118261882e-05, + "loss": 0.4113, + "step": 1810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9647730571953936e-05, + "loss": 0.3988, + "step": 1820 + }, + { + "epoch": 0.18, + "learning_rate": 1.964579502564599e-05, + "loss": 0.3072, + "step": 1830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9643859479338047e-05, + "loss": 0.4404, + "step": 1840 + }, + { + "epoch": 0.18, + "learning_rate": 1.96419239330301e-05, + "loss": 0.3852, + "step": 1850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9639988386722154e-05, + "loss": 0.3689, + "step": 1860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9638052840414208e-05, + "loss": 0.2922, + "step": 1870 + }, + { + "epoch": 0.18, + "learning_rate": 1.963611729410626e-05, + "loss": 0.4405, + "step": 1880 + }, + { + "epoch": 0.18, + "learning_rate": 1.963418174779832e-05, + "loss": 0.4831, + "step": 1890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9632246201490372e-05, + "loss": 0.3095, + "step": 1900 + }, + { + "epoch": 0.18, + "learning_rate": 1.963031065518243e-05, + "loss": 0.4492, + "step": 1910 + }, + { + "epoch": 0.19, + "learning_rate": 1.9628375108874483e-05, + "loss": 0.4074, + "step": 1920 + }, + { + "epoch": 0.19, + "learning_rate": 1.9626439562566536e-05, + "loss": 0.3934, + "step": 1930 + }, + { + "epoch": 0.19, + "learning_rate": 1.962450401625859e-05, + "loss": 0.5242, + "step": 1940 + }, + { + "epoch": 0.19, + "learning_rate": 1.9622568469950644e-05, + "loss": 0.4109, + "step": 1950 + }, + { + "epoch": 0.19, + "learning_rate": 1.96206329236427e-05, + "loss": 0.3953, + "step": 1960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9618697377334754e-05, + "loss": 0.4802, + "step": 1970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9616761831026808e-05, + "loss": 0.4173, + "step": 1980 + }, + { + "epoch": 0.19, + "learning_rate": 1.9614826284718865e-05, + "loss": 0.3493, + "step": 1990 + }, + { + "epoch": 0.19, + "learning_rate": 1.961289073841092e-05, + "loss": 0.4284, + "step": 2000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9610955192102972e-05, + "loss": 0.281, + "step": 2010 + }, + { + "epoch": 0.2, + "learning_rate": 1.9609019645795026e-05, + "loss": 0.4064, + "step": 2020 + }, + { + "epoch": 0.2, + "learning_rate": 1.9607084099487083e-05, + "loss": 0.4693, + "step": 2030 + }, + { + "epoch": 0.2, + "learning_rate": 1.9605148553179136e-05, + "loss": 0.3675, + "step": 2040 + }, + { + "epoch": 0.2, + "learning_rate": 1.960321300687119e-05, + "loss": 0.434, + "step": 2050 + }, + { + "epoch": 0.2, + "learning_rate": 1.9601277460563247e-05, + "loss": 0.4622, + "step": 2060 + }, + { + "epoch": 0.2, + "learning_rate": 1.95993419142553e-05, + "loss": 0.3378, + "step": 2070 + }, + { + "epoch": 0.2, + "learning_rate": 1.9597406367947354e-05, + "loss": 0.3537, + "step": 2080 + }, + { + "epoch": 0.2, + "learning_rate": 1.9595470821639408e-05, + "loss": 0.3734, + "step": 2090 + }, + { + "epoch": 0.2, + "learning_rate": 1.9593535275331465e-05, + "loss": 0.5988, + "step": 2100 + }, + { + "epoch": 0.2, + "learning_rate": 1.959159972902352e-05, + "loss": 0.3383, + "step": 2110 + }, + { + "epoch": 0.21, + "learning_rate": 1.9589664182715572e-05, + "loss": 0.3184, + "step": 2120 + }, + { + "epoch": 0.21, + "learning_rate": 1.958772863640763e-05, + "loss": 0.4947, + "step": 2130 + }, + { + "epoch": 0.21, + "learning_rate": 1.9585793090099683e-05, + "loss": 0.4162, + "step": 2140 + }, + { + "epoch": 0.21, + "learning_rate": 1.9583857543791737e-05, + "loss": 0.5044, + "step": 2150 + }, + { + "epoch": 0.21, + "learning_rate": 1.958192199748379e-05, + "loss": 0.363, + "step": 2160 + }, + { + "epoch": 0.21, + "learning_rate": 1.9579986451175844e-05, + "loss": 0.5765, + "step": 2170 + }, + { + "epoch": 0.21, + "learning_rate": 1.95780509048679e-05, + "loss": 0.3375, + "step": 2180 + }, + { + "epoch": 0.21, + "learning_rate": 1.9576115358559955e-05, + "loss": 0.3109, + "step": 2190 + }, + { + "epoch": 0.21, + "learning_rate": 1.957417981225201e-05, + "loss": 0.4322, + "step": 2200 + }, + { + "epoch": 0.21, + "learning_rate": 1.9572244265944065e-05, + "loss": 0.3918, + "step": 2210 + }, + { + "epoch": 0.21, + "learning_rate": 1.957030871963612e-05, + "loss": 0.5206, + "step": 2220 + }, + { + "epoch": 0.22, + "learning_rate": 1.9568373173328176e-05, + "loss": 0.395, + "step": 2230 + }, + { + "epoch": 0.22, + "learning_rate": 1.9566437627020226e-05, + "loss": 0.2894, + "step": 2240 + }, + { + "epoch": 0.22, + "learning_rate": 1.9564502080712283e-05, + "loss": 0.4633, + "step": 2250 + }, + { + "epoch": 0.22, + "learning_rate": 1.9562566534404337e-05, + "loss": 0.4803, + "step": 2260 + }, + { + "epoch": 0.22, + "learning_rate": 1.956063098809639e-05, + "loss": 0.4833, + "step": 2270 + }, + { + "epoch": 0.22, + "learning_rate": 1.9558695441788447e-05, + "loss": 0.3793, + "step": 2280 + }, + { + "epoch": 0.22, + "learning_rate": 1.95567598954805e-05, + "loss": 0.3934, + "step": 2290 + }, + { + "epoch": 0.22, + "learning_rate": 1.9554824349172558e-05, + "loss": 0.4539, + "step": 2300 + }, + { + "epoch": 0.22, + "learning_rate": 1.9552888802864608e-05, + "loss": 0.3877, + "step": 2310 + }, + { + "epoch": 0.22, + "learning_rate": 1.9550953256556665e-05, + "loss": 0.3713, + "step": 2320 + }, + { + "epoch": 0.23, + "learning_rate": 1.954901771024872e-05, + "loss": 0.4035, + "step": 2330 + }, + { + "epoch": 0.23, + "learning_rate": 1.9547082163940773e-05, + "loss": 0.3546, + "step": 2340 + }, + { + "epoch": 0.23, + "learning_rate": 1.954514661763283e-05, + "loss": 0.436, + "step": 2350 + }, + { + "epoch": 0.23, + "learning_rate": 1.9543211071324883e-05, + "loss": 0.5304, + "step": 2360 + }, + { + "epoch": 0.23, + "learning_rate": 1.9541275525016937e-05, + "loss": 0.331, + "step": 2370 + }, + { + "epoch": 0.23, + "learning_rate": 1.9539339978708994e-05, + "loss": 0.3991, + "step": 2380 + }, + { + "epoch": 0.23, + "learning_rate": 1.9537404432401048e-05, + "loss": 0.3438, + "step": 2390 + }, + { + "epoch": 0.23, + "learning_rate": 1.95354688860931e-05, + "loss": 0.2611, + "step": 2400 + }, + { + "epoch": 0.23, + "learning_rate": 1.9533533339785155e-05, + "loss": 0.4093, + "step": 2410 + }, + { + "epoch": 0.23, + "learning_rate": 1.9531597793477212e-05, + "loss": 0.4731, + "step": 2420 + }, + { + "epoch": 0.24, + "learning_rate": 1.9529662247169265e-05, + "loss": 0.4533, + "step": 2430 + }, + { + "epoch": 0.24, + "learning_rate": 1.952772670086132e-05, + "loss": 0.417, + "step": 2440 + }, + { + "epoch": 0.24, + "learning_rate": 1.9525791154553376e-05, + "loss": 0.4411, + "step": 2450 + }, + { + "epoch": 0.24, + "learning_rate": 1.9523855608245426e-05, + "loss": 0.3764, + "step": 2460 + }, + { + "epoch": 0.24, + "learning_rate": 1.9521920061937483e-05, + "loss": 0.3516, + "step": 2470 + }, + { + "epoch": 0.24, + "learning_rate": 1.9519984515629537e-05, + "loss": 0.4926, + "step": 2480 + }, + { + "epoch": 0.24, + "learning_rate": 1.9518048969321594e-05, + "loss": 0.3807, + "step": 2490 + }, + { + "epoch": 0.24, + "learning_rate": 1.9516113423013648e-05, + "loss": 0.4714, + "step": 2500 + }, + { + "epoch": 0.24, + "learning_rate": 1.95141778767057e-05, + "loss": 0.3999, + "step": 2510 + }, + { + "epoch": 0.24, + "learning_rate": 1.951224233039776e-05, + "loss": 0.4388, + "step": 2520 + }, + { + "epoch": 0.24, + "learning_rate": 1.9510306784089812e-05, + "loss": 0.298, + "step": 2530 + }, + { + "epoch": 0.25, + "learning_rate": 1.9508371237781866e-05, + "loss": 0.3143, + "step": 2540 + }, + { + "epoch": 0.25, + "learning_rate": 1.950643569147392e-05, + "loss": 0.444, + "step": 2550 + }, + { + "epoch": 0.25, + "learning_rate": 1.9504500145165973e-05, + "loss": 0.4594, + "step": 2560 + }, + { + "epoch": 0.25, + "learning_rate": 1.950256459885803e-05, + "loss": 0.3178, + "step": 2570 + }, + { + "epoch": 0.25, + "learning_rate": 1.9500629052550084e-05, + "loss": 0.505, + "step": 2580 + }, + { + "epoch": 0.25, + "learning_rate": 1.949869350624214e-05, + "loss": 0.3597, + "step": 2590 + }, + { + "epoch": 0.25, + "learning_rate": 1.9496757959934194e-05, + "loss": 0.3297, + "step": 2600 + }, + { + "epoch": 0.25, + "learning_rate": 1.9494822413626248e-05, + "loss": 0.3853, + "step": 2610 + }, + { + "epoch": 0.25, + "learning_rate": 1.94928868673183e-05, + "loss": 0.4641, + "step": 2620 + }, + { + "epoch": 0.25, + "learning_rate": 1.9490951321010355e-05, + "loss": 0.4039, + "step": 2630 + }, + { + "epoch": 0.26, + "learning_rate": 1.9489015774702412e-05, + "loss": 0.269, + "step": 2640 + }, + { + "epoch": 0.26, + "learning_rate": 1.9487080228394466e-05, + "loss": 0.4259, + "step": 2650 + }, + { + "epoch": 0.26, + "learning_rate": 1.948514468208652e-05, + "loss": 0.497, + "step": 2660 + }, + { + "epoch": 0.26, + "learning_rate": 1.9483209135778576e-05, + "loss": 0.4025, + "step": 2670 + }, + { + "epoch": 0.26, + "learning_rate": 1.948127358947063e-05, + "loss": 0.3003, + "step": 2680 + }, + { + "epoch": 0.26, + "learning_rate": 1.9479338043162684e-05, + "loss": 0.4762, + "step": 2690 + }, + { + "epoch": 0.26, + "learning_rate": 1.9477402496854737e-05, + "loss": 0.4207, + "step": 2700 + }, + { + "epoch": 0.26, + "learning_rate": 1.9475466950546794e-05, + "loss": 0.3517, + "step": 2710 + }, + { + "epoch": 0.26, + "learning_rate": 1.9473531404238848e-05, + "loss": 0.4658, + "step": 2720 + }, + { + "epoch": 0.26, + "learning_rate": 1.94715958579309e-05, + "loss": 0.3262, + "step": 2730 + }, + { + "epoch": 0.27, + "learning_rate": 1.946966031162296e-05, + "loss": 0.3176, + "step": 2740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9467724765315012e-05, + "loss": 0.5095, + "step": 2750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9465789219007066e-05, + "loss": 0.3552, + "step": 2760 + }, + { + "epoch": 0.27, + "learning_rate": 1.946385367269912e-05, + "loss": 0.3196, + "step": 2770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9461918126391177e-05, + "loss": 0.3575, + "step": 2780 + }, + { + "epoch": 0.27, + "learning_rate": 1.945998258008323e-05, + "loss": 0.4509, + "step": 2790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9458047033775284e-05, + "loss": 0.2998, + "step": 2800 + }, + { + "epoch": 0.27, + "learning_rate": 1.945611148746734e-05, + "loss": 0.4856, + "step": 2810 + }, + { + "epoch": 0.27, + "learning_rate": 1.9454175941159395e-05, + "loss": 0.454, + "step": 2820 + }, + { + "epoch": 0.27, + "learning_rate": 1.9452240394851448e-05, + "loss": 0.319, + "step": 2830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9450304848543502e-05, + "loss": 0.4077, + "step": 2840 + }, + { + "epoch": 0.28, + "learning_rate": 1.9448369302235555e-05, + "loss": 0.3323, + "step": 2850 + }, + { + "epoch": 0.28, + "learning_rate": 1.9446433755927612e-05, + "loss": 0.3237, + "step": 2860 + }, + { + "epoch": 0.28, + "learning_rate": 1.9444498209619666e-05, + "loss": 0.4422, + "step": 2870 + }, + { + "epoch": 0.28, + "learning_rate": 1.9442562663311723e-05, + "loss": 0.432, + "step": 2880 + }, + { + "epoch": 0.28, + "learning_rate": 1.9440627117003777e-05, + "loss": 0.3756, + "step": 2890 + }, + { + "epoch": 0.28, + "learning_rate": 1.943869157069583e-05, + "loss": 0.361, + "step": 2900 + }, + { + "epoch": 0.28, + "learning_rate": 1.9436756024387887e-05, + "loss": 0.245, + "step": 2910 + }, + { + "epoch": 0.28, + "learning_rate": 1.9434820478079938e-05, + "loss": 0.2705, + "step": 2920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9432884931771995e-05, + "loss": 0.3702, + "step": 2930 + }, + { + "epoch": 0.28, + "learning_rate": 1.943094938546405e-05, + "loss": 0.4298, + "step": 2940 + }, + { + "epoch": 0.29, + "learning_rate": 1.9429013839156102e-05, + "loss": 0.4333, + "step": 2950 + }, + { + "epoch": 0.29, + "learning_rate": 1.942707829284816e-05, + "loss": 0.3286, + "step": 2960 + }, + { + "epoch": 0.29, + "learning_rate": 1.9425142746540213e-05, + "loss": 0.4895, + "step": 2970 + }, + { + "epoch": 0.29, + "learning_rate": 1.942320720023227e-05, + "loss": 0.3283, + "step": 2980 + }, + { + "epoch": 0.29, + "learning_rate": 1.942127165392432e-05, + "loss": 0.4404, + "step": 2990 + }, + { + "epoch": 0.29, + "learning_rate": 1.9419336107616377e-05, + "loss": 0.3959, + "step": 3000 + }, + { + "epoch": 0.29, + "learning_rate": 1.941740056130843e-05, + "loss": 0.4659, + "step": 3010 + }, + { + "epoch": 0.29, + "learning_rate": 1.9415465015000484e-05, + "loss": 0.3416, + "step": 3020 + }, + { + "epoch": 0.29, + "learning_rate": 1.941352946869254e-05, + "loss": 0.3768, + "step": 3030 + }, + { + "epoch": 0.29, + "learning_rate": 1.9411593922384595e-05, + "loss": 0.3742, + "step": 3040 + }, + { + "epoch": 0.3, + "learning_rate": 1.940965837607665e-05, + "loss": 0.3142, + "step": 3050 + }, + { + "epoch": 0.3, + "learning_rate": 1.9407722829768706e-05, + "loss": 0.3651, + "step": 3060 + }, + { + "epoch": 0.3, + "learning_rate": 1.940578728346076e-05, + "loss": 0.4814, + "step": 3070 + }, + { + "epoch": 0.3, + "learning_rate": 1.9403851737152813e-05, + "loss": 0.3414, + "step": 3080 + }, + { + "epoch": 0.3, + "learning_rate": 1.9401916190844866e-05, + "loss": 0.3209, + "step": 3090 + }, + { + "epoch": 0.3, + "learning_rate": 1.9399980644536923e-05, + "loss": 0.3836, + "step": 3100 + }, + { + "epoch": 0.3, + "learning_rate": 1.9398045098228977e-05, + "loss": 0.3541, + "step": 3110 + }, + { + "epoch": 0.3, + "learning_rate": 1.939610955192103e-05, + "loss": 0.3712, + "step": 3120 + }, + { + "epoch": 0.3, + "learning_rate": 1.9394174005613088e-05, + "loss": 0.3334, + "step": 3130 + }, + { + "epoch": 0.3, + "learning_rate": 1.9392238459305138e-05, + "loss": 0.3763, + "step": 3140 + }, + { + "epoch": 0.3, + "learning_rate": 1.9390302912997195e-05, + "loss": 0.3172, + "step": 3150 + }, + { + "epoch": 0.31, + "learning_rate": 1.938836736668925e-05, + "loss": 0.4085, + "step": 3160 + }, + { + "epoch": 0.31, + "learning_rate": 1.9386431820381306e-05, + "loss": 0.3642, + "step": 3170 + }, + { + "epoch": 0.31, + "learning_rate": 1.938449627407336e-05, + "loss": 0.3908, + "step": 3180 + }, + { + "epoch": 0.31, + "learning_rate": 1.9382560727765413e-05, + "loss": 0.4342, + "step": 3190 + }, + { + "epoch": 0.31, + "learning_rate": 1.938062518145747e-05, + "loss": 0.4296, + "step": 3200 + }, + { + "epoch": 0.31, + "learning_rate": 1.9378689635149524e-05, + "loss": 0.3587, + "step": 3210 + }, + { + "epoch": 0.31, + "learning_rate": 1.9376754088841577e-05, + "loss": 0.3221, + "step": 3220 + }, + { + "epoch": 0.31, + "learning_rate": 1.937481854253363e-05, + "loss": 0.3671, + "step": 3230 + }, + { + "epoch": 0.31, + "learning_rate": 1.9372882996225685e-05, + "loss": 0.2918, + "step": 3240 + }, + { + "epoch": 0.31, + "learning_rate": 1.937094744991774e-05, + "loss": 0.4048, + "step": 3250 + }, + { + "epoch": 0.32, + "learning_rate": 1.9369011903609795e-05, + "loss": 0.3566, + "step": 3260 + }, + { + "epoch": 0.32, + "learning_rate": 1.9367076357301852e-05, + "loss": 0.4537, + "step": 3270 + }, + { + "epoch": 0.32, + "learning_rate": 1.9365140810993906e-05, + "loss": 0.3226, + "step": 3280 + }, + { + "epoch": 0.32, + "learning_rate": 1.936320526468596e-05, + "loss": 0.4957, + "step": 3290 + }, + { + "epoch": 0.32, + "learning_rate": 1.9361269718378013e-05, + "loss": 0.3721, + "step": 3300 + }, + { + "epoch": 0.32, + "learning_rate": 1.9359334172070067e-05, + "loss": 0.3961, + "step": 3310 + }, + { + "epoch": 0.32, + "learning_rate": 1.9357398625762124e-05, + "loss": 0.4128, + "step": 3320 + }, + { + "epoch": 0.32, + "learning_rate": 1.9355463079454177e-05, + "loss": 0.268, + "step": 3330 + }, + { + "epoch": 0.32, + "learning_rate": 1.935352753314623e-05, + "loss": 0.3519, + "step": 3340 + }, + { + "epoch": 0.32, + "learning_rate": 1.9351591986838288e-05, + "loss": 0.329, + "step": 3350 + }, + { + "epoch": 0.33, + "learning_rate": 1.934965644053034e-05, + "loss": 0.5037, + "step": 3360 + }, + { + "epoch": 0.33, + "learning_rate": 1.9347720894222395e-05, + "loss": 0.3974, + "step": 3370 + }, + { + "epoch": 0.33, + "learning_rate": 1.934578534791445e-05, + "loss": 0.3695, + "step": 3380 + }, + { + "epoch": 0.33, + "learning_rate": 1.9343849801606506e-05, + "loss": 0.4009, + "step": 3390 + }, + { + "epoch": 0.33, + "learning_rate": 1.934191425529856e-05, + "loss": 0.3532, + "step": 3400 + }, + { + "epoch": 0.33, + "learning_rate": 1.9339978708990613e-05, + "loss": 0.3979, + "step": 3410 + }, + { + "epoch": 0.33, + "learning_rate": 1.933804316268267e-05, + "loss": 0.3243, + "step": 3420 + }, + { + "epoch": 0.33, + "learning_rate": 1.9336107616374724e-05, + "loss": 0.315, + "step": 3430 + }, + { + "epoch": 0.33, + "learning_rate": 1.9334172070066778e-05, + "loss": 0.4386, + "step": 3440 + }, + { + "epoch": 0.33, + "learning_rate": 1.933223652375883e-05, + "loss": 0.5655, + "step": 3450 + }, + { + "epoch": 0.33, + "learning_rate": 1.9330300977450888e-05, + "loss": 0.4439, + "step": 3460 + }, + { + "epoch": 0.34, + "learning_rate": 1.9328365431142942e-05, + "loss": 0.3392, + "step": 3470 + }, + { + "epoch": 0.34, + "learning_rate": 1.9326429884834995e-05, + "loss": 0.4329, + "step": 3480 + }, + { + "epoch": 0.34, + "learning_rate": 1.9324494338527053e-05, + "loss": 0.4039, + "step": 3490 + }, + { + "epoch": 0.34, + "learning_rate": 1.9322558792219106e-05, + "loss": 0.5243, + "step": 3500 + }, + { + "epoch": 0.34, + "learning_rate": 1.932062324591116e-05, + "loss": 0.3983, + "step": 3510 + }, + { + "epoch": 0.34, + "learning_rate": 1.9318687699603213e-05, + "loss": 0.3782, + "step": 3520 + }, + { + "epoch": 0.34, + "learning_rate": 1.9316752153295267e-05, + "loss": 0.4054, + "step": 3530 + }, + { + "epoch": 0.34, + "learning_rate": 1.9314816606987324e-05, + "loss": 0.4127, + "step": 3540 + }, + { + "epoch": 0.34, + "learning_rate": 1.9312881060679378e-05, + "loss": 0.3722, + "step": 3550 + }, + { + "epoch": 0.34, + "learning_rate": 1.9310945514371435e-05, + "loss": 0.551, + "step": 3560 + }, + { + "epoch": 0.35, + "learning_rate": 1.930900996806349e-05, + "loss": 0.2479, + "step": 3570 + }, + { + "epoch": 0.35, + "learning_rate": 1.9307074421755542e-05, + "loss": 0.5031, + "step": 3580 + }, + { + "epoch": 0.35, + "learning_rate": 1.93051388754476e-05, + "loss": 0.3735, + "step": 3590 + }, + { + "epoch": 0.35, + "learning_rate": 1.930320332913965e-05, + "loss": 0.2578, + "step": 3600 + }, + { + "epoch": 0.35, + "learning_rate": 1.9301267782831706e-05, + "loss": 0.3022, + "step": 3610 + }, + { + "epoch": 0.35, + "learning_rate": 1.929933223652376e-05, + "loss": 0.4937, + "step": 3620 + }, + { + "epoch": 0.35, + "learning_rate": 1.9297396690215814e-05, + "loss": 0.4229, + "step": 3630 + }, + { + "epoch": 0.35, + "learning_rate": 1.929546114390787e-05, + "loss": 0.3849, + "step": 3640 + }, + { + "epoch": 0.35, + "learning_rate": 1.9293525597599924e-05, + "loss": 0.3684, + "step": 3650 + }, + { + "epoch": 0.35, + "learning_rate": 1.929159005129198e-05, + "loss": 0.4077, + "step": 3660 + }, + { + "epoch": 0.36, + "learning_rate": 1.928965450498403e-05, + "loss": 0.4068, + "step": 3670 + }, + { + "epoch": 0.36, + "learning_rate": 1.928771895867609e-05, + "loss": 0.3478, + "step": 3680 + }, + { + "epoch": 0.36, + "learning_rate": 1.9285783412368142e-05, + "loss": 0.34, + "step": 3690 + }, + { + "epoch": 0.36, + "learning_rate": 1.9283847866060196e-05, + "loss": 0.3703, + "step": 3700 + }, + { + "epoch": 0.36, + "learning_rate": 1.9281912319752253e-05, + "loss": 0.4306, + "step": 3710 + }, + { + "epoch": 0.36, + "learning_rate": 1.9279976773444306e-05, + "loss": 0.2496, + "step": 3720 + }, + { + "epoch": 0.36, + "learning_rate": 1.927804122713636e-05, + "loss": 0.5016, + "step": 3730 + }, + { + "epoch": 0.36, + "learning_rate": 1.9276105680828417e-05, + "loss": 0.3561, + "step": 3740 + }, + { + "epoch": 0.36, + "learning_rate": 1.9274170134520467e-05, + "loss": 0.3425, + "step": 3750 + }, + { + "epoch": 0.36, + "learning_rate": 1.9272234588212524e-05, + "loss": 0.4288, + "step": 3760 + }, + { + "epoch": 0.36, + "learning_rate": 1.9270299041904578e-05, + "loss": 0.4374, + "step": 3770 + }, + { + "epoch": 0.37, + "learning_rate": 1.9268363495596635e-05, + "loss": 0.3769, + "step": 3780 + }, + { + "epoch": 0.37, + "learning_rate": 1.926642794928869e-05, + "loss": 0.4381, + "step": 3790 + }, + { + "epoch": 0.37, + "learning_rate": 1.9264492402980742e-05, + "loss": 0.2633, + "step": 3800 + }, + { + "epoch": 0.37, + "learning_rate": 1.92625568566728e-05, + "loss": 0.405, + "step": 3810 + }, + { + "epoch": 0.37, + "learning_rate": 1.9260621310364853e-05, + "loss": 0.2709, + "step": 3820 + }, + { + "epoch": 0.37, + "learning_rate": 1.9258685764056907e-05, + "loss": 0.5473, + "step": 3830 + }, + { + "epoch": 0.37, + "learning_rate": 1.925675021774896e-05, + "loss": 0.4159, + "step": 3840 + }, + { + "epoch": 0.37, + "learning_rate": 1.9254814671441014e-05, + "loss": 0.3858, + "step": 3850 + }, + { + "epoch": 0.37, + "learning_rate": 1.925287912513307e-05, + "loss": 0.4223, + "step": 3860 + }, + { + "epoch": 0.37, + "learning_rate": 1.9250943578825125e-05, + "loss": 0.3838, + "step": 3870 + }, + { + "epoch": 0.38, + "learning_rate": 1.924900803251718e-05, + "loss": 0.3273, + "step": 3880 + }, + { + "epoch": 0.38, + "learning_rate": 1.9247072486209235e-05, + "loss": 0.3045, + "step": 3890 + }, + { + "epoch": 0.38, + "learning_rate": 1.924513693990129e-05, + "loss": 0.3945, + "step": 3900 + }, + { + "epoch": 0.38, + "learning_rate": 1.9243201393593342e-05, + "loss": 0.3587, + "step": 3910 + }, + { + "epoch": 0.38, + "learning_rate": 1.9241265847285396e-05, + "loss": 0.3258, + "step": 3920 + }, + { + "epoch": 0.38, + "learning_rate": 1.9239330300977453e-05, + "loss": 0.4742, + "step": 3930 + }, + { + "epoch": 0.38, + "learning_rate": 1.9237394754669507e-05, + "loss": 0.3207, + "step": 3940 + }, + { + "epoch": 0.38, + "learning_rate": 1.9235459208361564e-05, + "loss": 0.3781, + "step": 3950 + }, + { + "epoch": 0.38, + "learning_rate": 1.9233523662053617e-05, + "loss": 0.4528, + "step": 3960 + }, + { + "epoch": 0.38, + "learning_rate": 1.923158811574567e-05, + "loss": 0.3518, + "step": 3970 + }, + { + "epoch": 0.39, + "learning_rate": 1.9229652569437725e-05, + "loss": 0.257, + "step": 3980 + }, + { + "epoch": 0.39, + "learning_rate": 1.922771702312978e-05, + "loss": 0.3939, + "step": 3990 + }, + { + "epoch": 0.39, + "learning_rate": 1.9225781476821835e-05, + "loss": 0.4746, + "step": 4000 + }, + { + "epoch": 0.39, + "learning_rate": 1.922384593051389e-05, + "loss": 0.3215, + "step": 4010 + }, + { + "epoch": 0.39, + "learning_rate": 1.9221910384205943e-05, + "loss": 0.3318, + "step": 4020 + }, + { + "epoch": 0.39, + "learning_rate": 1.9219974837898e-05, + "loss": 0.4439, + "step": 4030 + }, + { + "epoch": 0.39, + "learning_rate": 1.9218039291590053e-05, + "loss": 0.3374, + "step": 4040 + }, + { + "epoch": 0.39, + "learning_rate": 1.9216103745282107e-05, + "loss": 0.3451, + "step": 4050 + }, + { + "epoch": 0.39, + "learning_rate": 1.921416819897416e-05, + "loss": 0.3094, + "step": 4060 + }, + { + "epoch": 0.39, + "learning_rate": 1.9212232652666218e-05, + "loss": 0.2938, + "step": 4070 + }, + { + "epoch": 0.39, + "learning_rate": 1.921029710635827e-05, + "loss": 0.3519, + "step": 4080 + }, + { + "epoch": 0.4, + "learning_rate": 1.9208361560050325e-05, + "loss": 0.4223, + "step": 4090 + }, + { + "epoch": 0.4, + "learning_rate": 1.9206426013742382e-05, + "loss": 0.3957, + "step": 4100 + }, + { + "epoch": 0.4, + "learning_rate": 1.9204490467434436e-05, + "loss": 0.4627, + "step": 4110 + }, + { + "epoch": 0.4, + "learning_rate": 1.920255492112649e-05, + "loss": 0.3205, + "step": 4120 + }, + { + "epoch": 0.4, + "learning_rate": 1.9200619374818543e-05, + "loss": 0.3666, + "step": 4130 + }, + { + "epoch": 0.4, + "learning_rate": 1.9198683828510596e-05, + "loss": 0.3097, + "step": 4140 + }, + { + "epoch": 0.4, + "learning_rate": 1.9196748282202653e-05, + "loss": 0.5067, + "step": 4150 + }, + { + "epoch": 0.4, + "learning_rate": 1.9194812735894707e-05, + "loss": 0.3477, + "step": 4160 + }, + { + "epoch": 0.4, + "learning_rate": 1.9192877189586764e-05, + "loss": 0.4193, + "step": 4170 + }, + { + "epoch": 0.4, + "learning_rate": 1.9190941643278818e-05, + "loss": 0.2697, + "step": 4180 + }, + { + "epoch": 0.41, + "learning_rate": 1.918900609697087e-05, + "loss": 0.3975, + "step": 4190 + }, + { + "epoch": 0.41, + "learning_rate": 1.9187070550662925e-05, + "loss": 0.3699, + "step": 4200 + }, + { + "epoch": 0.41, + "learning_rate": 1.918513500435498e-05, + "loss": 0.3219, + "step": 4210 + }, + { + "epoch": 0.41, + "learning_rate": 1.9183199458047036e-05, + "loss": 0.4122, + "step": 4220 + }, + { + "epoch": 0.41, + "learning_rate": 1.918126391173909e-05, + "loss": 0.2784, + "step": 4230 + }, + { + "epoch": 0.41, + "learning_rate": 1.9179328365431143e-05, + "loss": 0.3583, + "step": 4240 + }, + { + "epoch": 0.41, + "learning_rate": 1.91773928191232e-05, + "loss": 0.3288, + "step": 4250 + }, + { + "epoch": 0.41, + "learning_rate": 1.9175457272815254e-05, + "loss": 0.4302, + "step": 4260 + }, + { + "epoch": 0.41, + "learning_rate": 1.917352172650731e-05, + "loss": 0.2683, + "step": 4270 + }, + { + "epoch": 0.41, + "learning_rate": 1.917158618019936e-05, + "loss": 0.3051, + "step": 4280 + }, + { + "epoch": 0.42, + "learning_rate": 1.9169650633891418e-05, + "loss": 0.4471, + "step": 4290 + }, + { + "epoch": 0.42, + "learning_rate": 1.916771508758347e-05, + "loss": 0.4197, + "step": 4300 + }, + { + "epoch": 0.42, + "learning_rate": 1.9165779541275525e-05, + "loss": 0.317, + "step": 4310 + }, + { + "epoch": 0.42, + "learning_rate": 1.9163843994967582e-05, + "loss": 0.4387, + "step": 4320 + }, + { + "epoch": 0.42, + "learning_rate": 1.9161908448659636e-05, + "loss": 0.3962, + "step": 4330 + }, + { + "epoch": 0.42, + "learning_rate": 1.9159972902351693e-05, + "loss": 0.4048, + "step": 4340 + }, + { + "epoch": 0.42, + "learning_rate": 1.9158037356043743e-05, + "loss": 0.2978, + "step": 4350 + }, + { + "epoch": 0.42, + "learning_rate": 1.91561018097358e-05, + "loss": 0.282, + "step": 4360 + }, + { + "epoch": 0.42, + "learning_rate": 1.9154166263427854e-05, + "loss": 0.4118, + "step": 4370 + }, + { + "epoch": 0.42, + "learning_rate": 1.9152230717119907e-05, + "loss": 0.4749, + "step": 4380 + }, + { + "epoch": 0.42, + "learning_rate": 1.9150295170811964e-05, + "loss": 0.3257, + "step": 4390 + }, + { + "epoch": 0.43, + "learning_rate": 1.9148359624504018e-05, + "loss": 0.262, + "step": 4400 + }, + { + "epoch": 0.43, + "learning_rate": 1.914642407819607e-05, + "loss": 0.4256, + "step": 4410 + }, + { + "epoch": 0.43, + "learning_rate": 1.914448853188813e-05, + "loss": 0.3689, + "step": 4420 + }, + { + "epoch": 0.43, + "learning_rate": 1.914255298558018e-05, + "loss": 0.348, + "step": 4430 + }, + { + "epoch": 0.43, + "learning_rate": 1.9140617439272236e-05, + "loss": 0.3387, + "step": 4440 + }, + { + "epoch": 0.43, + "learning_rate": 1.913868189296429e-05, + "loss": 0.3537, + "step": 4450 + }, + { + "epoch": 0.43, + "learning_rate": 1.9136746346656347e-05, + "loss": 0.397, + "step": 4460 + }, + { + "epoch": 0.43, + "learning_rate": 1.91348108003484e-05, + "loss": 0.4115, + "step": 4470 + }, + { + "epoch": 0.43, + "learning_rate": 1.9132875254040454e-05, + "loss": 0.2933, + "step": 4480 + }, + { + "epoch": 0.43, + "learning_rate": 1.913093970773251e-05, + "loss": 0.3539, + "step": 4490 + }, + { + "epoch": 0.44, + "learning_rate": 1.9129004161424565e-05, + "loss": 0.292, + "step": 4500 + }, + { + "epoch": 0.44, + "learning_rate": 1.9127068615116618e-05, + "loss": 0.4652, + "step": 4510 + }, + { + "epoch": 0.44, + "learning_rate": 1.9125133068808672e-05, + "loss": 0.4474, + "step": 4520 + }, + { + "epoch": 0.44, + "learning_rate": 1.9123197522500725e-05, + "loss": 0.4126, + "step": 4530 + }, + { + "epoch": 0.44, + "learning_rate": 1.9121261976192782e-05, + "loss": 0.4463, + "step": 4540 + }, + { + "epoch": 0.44, + "learning_rate": 1.9119326429884836e-05, + "loss": 0.3133, + "step": 4550 + }, + { + "epoch": 0.44, + "learning_rate": 1.9117390883576893e-05, + "loss": 0.3686, + "step": 4560 + }, + { + "epoch": 0.44, + "learning_rate": 1.9115455337268947e-05, + "loss": 0.363, + "step": 4570 + }, + { + "epoch": 0.44, + "learning_rate": 1.9113519790961e-05, + "loss": 0.4454, + "step": 4580 + }, + { + "epoch": 0.44, + "learning_rate": 1.9111584244653054e-05, + "loss": 0.3419, + "step": 4590 + }, + { + "epoch": 0.45, + "learning_rate": 1.9109648698345108e-05, + "loss": 0.3417, + "step": 4600 + }, + { + "epoch": 0.45, + "learning_rate": 1.9107713152037165e-05, + "loss": 0.4106, + "step": 4610 + }, + { + "epoch": 0.45, + "learning_rate": 1.910577760572922e-05, + "loss": 0.332, + "step": 4620 + }, + { + "epoch": 0.45, + "learning_rate": 1.9103842059421272e-05, + "loss": 0.4436, + "step": 4630 + }, + { + "epoch": 0.45, + "learning_rate": 1.910190651311333e-05, + "loss": 0.3219, + "step": 4640 + }, + { + "epoch": 0.45, + "learning_rate": 1.9099970966805383e-05, + "loss": 0.4151, + "step": 4650 + }, + { + "epoch": 0.45, + "learning_rate": 1.9098035420497436e-05, + "loss": 0.3221, + "step": 4660 + }, + { + "epoch": 0.45, + "learning_rate": 1.909609987418949e-05, + "loss": 0.367, + "step": 4670 + }, + { + "epoch": 0.45, + "learning_rate": 1.9094164327881547e-05, + "loss": 0.3652, + "step": 4680 + }, + { + "epoch": 0.45, + "learning_rate": 1.90922287815736e-05, + "loss": 0.4228, + "step": 4690 + }, + { + "epoch": 0.45, + "learning_rate": 1.9090293235265654e-05, + "loss": 0.3582, + "step": 4700 + }, + { + "epoch": 0.46, + "learning_rate": 1.908835768895771e-05, + "loss": 0.4286, + "step": 4710 + }, + { + "epoch": 0.46, + "learning_rate": 1.9086422142649765e-05, + "loss": 0.3196, + "step": 4720 + }, + { + "epoch": 0.46, + "learning_rate": 1.908448659634182e-05, + "loss": 0.3407, + "step": 4730 + }, + { + "epoch": 0.46, + "learning_rate": 1.9082551050033872e-05, + "loss": 0.3364, + "step": 4740 + }, + { + "epoch": 0.46, + "learning_rate": 1.908061550372593e-05, + "loss": 0.2977, + "step": 4750 + }, + { + "epoch": 0.46, + "learning_rate": 1.9078679957417983e-05, + "loss": 0.3058, + "step": 4760 + }, + { + "epoch": 0.46, + "learning_rate": 1.9076744411110036e-05, + "loss": 0.3094, + "step": 4770 + }, + { + "epoch": 0.46, + "learning_rate": 1.9074808864802093e-05, + "loss": 0.3791, + "step": 4780 + }, + { + "epoch": 0.46, + "learning_rate": 1.9072873318494147e-05, + "loss": 0.3833, + "step": 4790 + }, + { + "epoch": 0.46, + "learning_rate": 1.90709377721862e-05, + "loss": 0.4535, + "step": 4800 + }, + { + "epoch": 0.47, + "learning_rate": 1.9069002225878254e-05, + "loss": 0.323, + "step": 4810 + }, + { + "epoch": 0.47, + "learning_rate": 1.9067066679570308e-05, + "loss": 0.4877, + "step": 4820 + }, + { + "epoch": 0.47, + "learning_rate": 1.9065131133262365e-05, + "loss": 0.3678, + "step": 4830 + }, + { + "epoch": 0.47, + "learning_rate": 1.906319558695442e-05, + "loss": 0.3449, + "step": 4840 + }, + { + "epoch": 0.47, + "learning_rate": 1.9061260040646476e-05, + "loss": 0.3734, + "step": 4850 + }, + { + "epoch": 0.47, + "learning_rate": 1.905932449433853e-05, + "loss": 0.3421, + "step": 4860 + }, + { + "epoch": 0.47, + "learning_rate": 1.9057388948030583e-05, + "loss": 0.3678, + "step": 4870 + }, + { + "epoch": 0.47, + "learning_rate": 1.9055453401722637e-05, + "loss": 0.359, + "step": 4880 + }, + { + "epoch": 0.47, + "learning_rate": 1.905351785541469e-05, + "loss": 0.4503, + "step": 4890 + }, + { + "epoch": 0.47, + "learning_rate": 1.9051582309106747e-05, + "loss": 0.3667, + "step": 4900 + }, + { + "epoch": 0.48, + "learning_rate": 1.90496467627988e-05, + "loss": 0.3053, + "step": 4910 + }, + { + "epoch": 0.48, + "learning_rate": 1.9047711216490855e-05, + "loss": 0.3386, + "step": 4920 + }, + { + "epoch": 0.48, + "learning_rate": 1.904577567018291e-05, + "loss": 0.3816, + "step": 4930 + }, + { + "epoch": 0.48, + "learning_rate": 1.9043840123874965e-05, + "loss": 0.351, + "step": 4940 + }, + { + "epoch": 0.48, + "learning_rate": 1.9041904577567022e-05, + "loss": 0.3716, + "step": 4950 + }, + { + "epoch": 0.48, + "learning_rate": 1.9039969031259072e-05, + "loss": 0.285, + "step": 4960 + }, + { + "epoch": 0.48, + "learning_rate": 1.903803348495113e-05, + "loss": 0.2784, + "step": 4970 + }, + { + "epoch": 0.48, + "learning_rate": 1.9036097938643183e-05, + "loss": 0.4255, + "step": 4980 + }, + { + "epoch": 0.48, + "learning_rate": 1.9034162392335237e-05, + "loss": 0.399, + "step": 4990 + }, + { + "epoch": 0.48, + "learning_rate": 1.9032226846027294e-05, + "loss": 0.3096, + "step": 5000 + }, + { + "epoch": 0.48, + "learning_rate": 1.9030291299719347e-05, + "loss": 0.3515, + "step": 5010 + }, + { + "epoch": 0.49, + "learning_rate": 1.90283557534114e-05, + "loss": 0.3837, + "step": 5020 + }, + { + "epoch": 0.49, + "learning_rate": 1.9026420207103458e-05, + "loss": 0.2423, + "step": 5030 + }, + { + "epoch": 0.49, + "learning_rate": 1.9024484660795512e-05, + "loss": 0.3709, + "step": 5040 + }, + { + "epoch": 0.49, + "learning_rate": 1.9022549114487565e-05, + "loss": 0.423, + "step": 5050 + }, + { + "epoch": 0.49, + "learning_rate": 1.902061356817962e-05, + "loss": 0.3123, + "step": 5060 + }, + { + "epoch": 0.49, + "learning_rate": 1.9018678021871676e-05, + "loss": 0.3179, + "step": 5070 + }, + { + "epoch": 0.49, + "learning_rate": 1.901674247556373e-05, + "loss": 0.3199, + "step": 5080 + }, + { + "epoch": 0.49, + "learning_rate": 1.9014806929255783e-05, + "loss": 0.2427, + "step": 5090 + }, + { + "epoch": 0.49, + "learning_rate": 1.901287138294784e-05, + "loss": 0.3378, + "step": 5100 + }, + { + "epoch": 0.49, + "learning_rate": 1.901093583663989e-05, + "loss": 0.434, + "step": 5110 + }, + { + "epoch": 0.5, + "learning_rate": 1.9009000290331948e-05, + "loss": 0.3113, + "step": 5120 + }, + { + "epoch": 0.5, + "learning_rate": 1.9007064744024e-05, + "loss": 0.2909, + "step": 5130 + }, + { + "epoch": 0.5, + "learning_rate": 1.9005129197716058e-05, + "loss": 0.3994, + "step": 5140 + }, + { + "epoch": 0.5, + "learning_rate": 1.9003193651408112e-05, + "loss": 0.3406, + "step": 5150 + }, + { + "epoch": 0.5, + "learning_rate": 1.9001258105100166e-05, + "loss": 0.4876, + "step": 5160 + }, + { + "epoch": 0.5, + "learning_rate": 1.8999322558792223e-05, + "loss": 0.237, + "step": 5170 + }, + { + "epoch": 0.5, + "learning_rate": 1.8997387012484276e-05, + "loss": 0.3439, + "step": 5180 + }, + { + "epoch": 0.5, + "learning_rate": 1.899545146617633e-05, + "loss": 0.3168, + "step": 5190 + }, + { + "epoch": 0.5, + "learning_rate": 1.8993515919868383e-05, + "loss": 0.4082, + "step": 5200 + }, + { + "epoch": 0.5, + "learning_rate": 1.8991580373560437e-05, + "loss": 0.4286, + "step": 5210 + }, + { + "epoch": 0.51, + "learning_rate": 1.8989644827252494e-05, + "loss": 0.4109, + "step": 5220 + }, + { + "epoch": 0.51, + "learning_rate": 1.8987709280944548e-05, + "loss": 0.3401, + "step": 5230 + }, + { + "epoch": 0.51, + "learning_rate": 1.8985773734636605e-05, + "loss": 0.3369, + "step": 5240 + }, + { + "epoch": 0.51, + "learning_rate": 1.898383818832866e-05, + "loss": 0.4073, + "step": 5250 + }, + { + "epoch": 0.51, + "learning_rate": 1.8981902642020712e-05, + "loss": 0.4037, + "step": 5260 + }, + { + "epoch": 0.51, + "learning_rate": 1.8979967095712766e-05, + "loss": 0.4549, + "step": 5270 + }, + { + "epoch": 0.51, + "learning_rate": 1.897803154940482e-05, + "loss": 0.3165, + "step": 5280 + }, + { + "epoch": 0.51, + "learning_rate": 1.8976096003096876e-05, + "loss": 0.3346, + "step": 5290 + }, + { + "epoch": 0.51, + "learning_rate": 1.897416045678893e-05, + "loss": 0.4709, + "step": 5300 + }, + { + "epoch": 0.51, + "learning_rate": 1.8972224910480984e-05, + "loss": 0.3583, + "step": 5310 + }, + { + "epoch": 0.51, + "learning_rate": 1.897028936417304e-05, + "loss": 0.3309, + "step": 5320 + }, + { + "epoch": 0.52, + "learning_rate": 1.8968353817865094e-05, + "loss": 0.3607, + "step": 5330 + }, + { + "epoch": 0.52, + "learning_rate": 1.8966418271557148e-05, + "loss": 0.2926, + "step": 5340 + }, + { + "epoch": 0.52, + "learning_rate": 1.89644827252492e-05, + "loss": 0.2346, + "step": 5350 + }, + { + "epoch": 0.52, + "learning_rate": 1.896254717894126e-05, + "loss": 0.56, + "step": 5360 + }, + { + "epoch": 0.52, + "learning_rate": 1.8960611632633312e-05, + "loss": 0.3649, + "step": 5370 + }, + { + "epoch": 0.52, + "learning_rate": 1.8958676086325366e-05, + "loss": 0.2929, + "step": 5380 + }, + { + "epoch": 0.52, + "learning_rate": 1.8956740540017423e-05, + "loss": 0.3738, + "step": 5390 + }, + { + "epoch": 0.52, + "learning_rate": 1.8954804993709476e-05, + "loss": 0.4704, + "step": 5400 + }, + { + "epoch": 0.52, + "learning_rate": 1.895286944740153e-05, + "loss": 0.4452, + "step": 5410 + }, + { + "epoch": 0.52, + "learning_rate": 1.8950933901093584e-05, + "loss": 0.2478, + "step": 5420 + }, + { + "epoch": 0.53, + "learning_rate": 1.894899835478564e-05, + "loss": 0.2551, + "step": 5430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8947062808477694e-05, + "loss": 0.4916, + "step": 5440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8945127262169748e-05, + "loss": 0.2724, + "step": 5450 + }, + { + "epoch": 0.53, + "learning_rate": 1.8943191715861805e-05, + "loss": 0.3297, + "step": 5460 + }, + { + "epoch": 0.53, + "learning_rate": 1.894125616955386e-05, + "loss": 0.3322, + "step": 5470 + }, + { + "epoch": 0.53, + "learning_rate": 1.8939320623245912e-05, + "loss": 0.3209, + "step": 5480 + }, + { + "epoch": 0.53, + "learning_rate": 1.8937385076937966e-05, + "loss": 0.3994, + "step": 5490 + }, + { + "epoch": 0.53, + "learning_rate": 1.893544953063002e-05, + "loss": 0.3907, + "step": 5500 + }, + { + "epoch": 0.53, + "learning_rate": 1.8933513984322077e-05, + "loss": 0.2728, + "step": 5510 + }, + { + "epoch": 0.53, + "learning_rate": 1.893157843801413e-05, + "loss": 0.3397, + "step": 5520 + }, + { + "epoch": 0.54, + "learning_rate": 1.8929642891706187e-05, + "loss": 0.3714, + "step": 5530 + }, + { + "epoch": 0.54, + "learning_rate": 1.892770734539824e-05, + "loss": 0.2728, + "step": 5540 + }, + { + "epoch": 0.54, + "learning_rate": 1.8925771799090295e-05, + "loss": 0.3526, + "step": 5550 + }, + { + "epoch": 0.54, + "learning_rate": 1.8923836252782348e-05, + "loss": 0.4678, + "step": 5560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8921900706474402e-05, + "loss": 0.2778, + "step": 5570 + }, + { + "epoch": 0.54, + "learning_rate": 1.891996516016646e-05, + "loss": 0.445, + "step": 5580 + }, + { + "epoch": 0.54, + "learning_rate": 1.8918029613858512e-05, + "loss": 0.3914, + "step": 5590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8916094067550566e-05, + "loss": 0.3185, + "step": 5600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8914158521242623e-05, + "loss": 0.3925, + "step": 5610 + }, + { + "epoch": 0.54, + "learning_rate": 1.8912222974934677e-05, + "loss": 0.3708, + "step": 5620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8910287428626734e-05, + "loss": 0.3551, + "step": 5630 + }, + { + "epoch": 0.55, + "learning_rate": 1.8908351882318784e-05, + "loss": 0.2759, + "step": 5640 + }, + { + "epoch": 0.55, + "learning_rate": 1.890641633601084e-05, + "loss": 0.3111, + "step": 5650 + }, + { + "epoch": 0.55, + "learning_rate": 1.8904480789702895e-05, + "loss": 0.4198, + "step": 5660 + }, + { + "epoch": 0.55, + "learning_rate": 1.890254524339495e-05, + "loss": 0.277, + "step": 5670 + }, + { + "epoch": 0.55, + "learning_rate": 1.8900609697087005e-05, + "loss": 0.3026, + "step": 5680 + }, + { + "epoch": 0.55, + "learning_rate": 1.889867415077906e-05, + "loss": 0.2805, + "step": 5690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8896738604471113e-05, + "loss": 0.3731, + "step": 5700 + }, + { + "epoch": 0.55, + "learning_rate": 1.889480305816317e-05, + "loss": 0.4065, + "step": 5710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8892867511855223e-05, + "loss": 0.3705, + "step": 5720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8890931965547277e-05, + "loss": 0.3406, + "step": 5730 + }, + { + "epoch": 0.56, + "learning_rate": 1.888899641923933e-05, + "loss": 0.3375, + "step": 5740 + }, + { + "epoch": 0.56, + "learning_rate": 1.8887060872931388e-05, + "loss": 0.4255, + "step": 5750 + }, + { + "epoch": 0.56, + "learning_rate": 1.888512532662344e-05, + "loss": 0.3831, + "step": 5760 + }, + { + "epoch": 0.56, + "learning_rate": 1.8883189780315495e-05, + "loss": 0.2839, + "step": 5770 + }, + { + "epoch": 0.56, + "learning_rate": 1.8881254234007552e-05, + "loss": 0.5226, + "step": 5780 + }, + { + "epoch": 0.56, + "learning_rate": 1.8879318687699602e-05, + "loss": 0.3575, + "step": 5790 + }, + { + "epoch": 0.56, + "learning_rate": 1.887738314139166e-05, + "loss": 0.3221, + "step": 5800 + }, + { + "epoch": 0.56, + "learning_rate": 1.8875447595083713e-05, + "loss": 0.3982, + "step": 5810 + }, + { + "epoch": 0.56, + "learning_rate": 1.887351204877577e-05, + "loss": 0.2864, + "step": 5820 + }, + { + "epoch": 0.56, + "learning_rate": 1.8871576502467823e-05, + "loss": 0.3452, + "step": 5830 + }, + { + "epoch": 0.57, + "learning_rate": 1.8869640956159877e-05, + "loss": 0.3075, + "step": 5840 + }, + { + "epoch": 0.57, + "learning_rate": 1.8867705409851934e-05, + "loss": 0.2773, + "step": 5850 + }, + { + "epoch": 0.57, + "learning_rate": 1.8865769863543988e-05, + "loss": 0.3656, + "step": 5860 + }, + { + "epoch": 0.57, + "learning_rate": 1.886383431723604e-05, + "loss": 0.4547, + "step": 5870 + }, + { + "epoch": 0.57, + "learning_rate": 1.8861898770928095e-05, + "loss": 0.3325, + "step": 5880 + }, + { + "epoch": 0.57, + "learning_rate": 1.885996322462015e-05, + "loss": 0.3701, + "step": 5890 + }, + { + "epoch": 0.57, + "learning_rate": 1.8858027678312206e-05, + "loss": 0.3877, + "step": 5900 + }, + { + "epoch": 0.57, + "learning_rate": 1.885609213200426e-05, + "loss": 0.3257, + "step": 5910 + }, + { + "epoch": 0.57, + "learning_rate": 1.8854156585696316e-05, + "loss": 0.3113, + "step": 5920 + }, + { + "epoch": 0.57, + "learning_rate": 1.885222103938837e-05, + "loss": 0.3695, + "step": 5930 + }, + { + "epoch": 0.57, + "learning_rate": 1.8850285493080424e-05, + "loss": 0.3431, + "step": 5940 + }, + { + "epoch": 0.58, + "learning_rate": 1.8848349946772477e-05, + "loss": 0.3997, + "step": 5950 + }, + { + "epoch": 0.58, + "learning_rate": 1.884641440046453e-05, + "loss": 0.3135, + "step": 5960 + }, + { + "epoch": 0.58, + "learning_rate": 1.8844478854156588e-05, + "loss": 0.3042, + "step": 5970 + }, + { + "epoch": 0.58, + "learning_rate": 1.884254330784864e-05, + "loss": 0.3763, + "step": 5980 + }, + { + "epoch": 0.58, + "learning_rate": 1.8840607761540695e-05, + "loss": 0.3337, + "step": 5990 + }, + { + "epoch": 0.58, + "learning_rate": 1.8838672215232752e-05, + "loss": 0.4308, + "step": 6000 + }, + { + "epoch": 0.58, + "learning_rate": 1.8836736668924806e-05, + "loss": 0.3014, + "step": 6010 + }, + { + "epoch": 0.58, + "learning_rate": 1.883480112261686e-05, + "loss": 0.3657, + "step": 6020 + }, + { + "epoch": 0.58, + "learning_rate": 1.8832865576308913e-05, + "loss": 0.3828, + "step": 6030 + }, + { + "epoch": 0.58, + "learning_rate": 1.883093003000097e-05, + "loss": 0.4757, + "step": 6040 + }, + { + "epoch": 0.59, + "learning_rate": 1.8828994483693024e-05, + "loss": 0.422, + "step": 6050 + }, + { + "epoch": 0.59, + "learning_rate": 1.8827058937385077e-05, + "loss": 0.5819, + "step": 6060 + }, + { + "epoch": 0.59, + "learning_rate": 1.8825123391077134e-05, + "loss": 0.2975, + "step": 6070 + }, + { + "epoch": 0.59, + "learning_rate": 1.8823187844769188e-05, + "loss": 0.3541, + "step": 6080 + }, + { + "epoch": 0.59, + "learning_rate": 1.8821252298461242e-05, + "loss": 0.4085, + "step": 6090 + }, + { + "epoch": 0.59, + "learning_rate": 1.8819316752153295e-05, + "loss": 0.265, + "step": 6100 + }, + { + "epoch": 0.59, + "learning_rate": 1.8817381205845352e-05, + "loss": 0.4095, + "step": 6110 + }, + { + "epoch": 0.59, + "learning_rate": 1.8815445659537406e-05, + "loss": 0.3463, + "step": 6120 + }, + { + "epoch": 0.59, + "learning_rate": 1.881351011322946e-05, + "loss": 0.3064, + "step": 6130 + }, + { + "epoch": 0.59, + "learning_rate": 1.8811574566921517e-05, + "loss": 0.3216, + "step": 6140 + }, + { + "epoch": 0.6, + "learning_rate": 1.880963902061357e-05, + "loss": 0.4792, + "step": 6150 + }, + { + "epoch": 0.6, + "learning_rate": 1.8807703474305624e-05, + "loss": 0.33, + "step": 6160 + }, + { + "epoch": 0.6, + "learning_rate": 1.8805767927997678e-05, + "loss": 0.3185, + "step": 6170 + }, + { + "epoch": 0.6, + "learning_rate": 1.880383238168973e-05, + "loss": 0.2732, + "step": 6180 + }, + { + "epoch": 0.6, + "learning_rate": 1.8801896835381788e-05, + "loss": 0.3093, + "step": 6190 + }, + { + "epoch": 0.6, + "learning_rate": 1.8799961289073842e-05, + "loss": 0.352, + "step": 6200 + }, + { + "epoch": 0.6, + "learning_rate": 1.87980257427659e-05, + "loss": 0.5289, + "step": 6210 + }, + { + "epoch": 0.6, + "learning_rate": 1.8796090196457953e-05, + "loss": 0.316, + "step": 6220 + }, + { + "epoch": 0.6, + "learning_rate": 1.8794154650150006e-05, + "loss": 0.2334, + "step": 6230 + }, + { + "epoch": 0.6, + "learning_rate": 1.8792219103842063e-05, + "loss": 0.3564, + "step": 6240 + }, + { + "epoch": 0.6, + "learning_rate": 1.8790283557534113e-05, + "loss": 0.3761, + "step": 6250 + }, + { + "epoch": 0.61, + "learning_rate": 1.878834801122617e-05, + "loss": 0.2734, + "step": 6260 + }, + { + "epoch": 0.61, + "learning_rate": 1.8786412464918224e-05, + "loss": 0.3142, + "step": 6270 + }, + { + "epoch": 0.61, + "learning_rate": 1.8784476918610278e-05, + "loss": 0.3894, + "step": 6280 + }, + { + "epoch": 0.61, + "learning_rate": 1.8782541372302335e-05, + "loss": 0.3341, + "step": 6290 + }, + { + "epoch": 0.61, + "learning_rate": 1.878060582599439e-05, + "loss": 0.4785, + "step": 6300 + }, + { + "epoch": 0.61, + "learning_rate": 1.8778670279686445e-05, + "loss": 0.2977, + "step": 6310 + }, + { + "epoch": 0.61, + "learning_rate": 1.8776734733378496e-05, + "loss": 0.3926, + "step": 6320 + }, + { + "epoch": 0.61, + "learning_rate": 1.8774799187070553e-05, + "loss": 0.3539, + "step": 6330 + }, + { + "epoch": 0.61, + "learning_rate": 1.8772863640762606e-05, + "loss": 0.4025, + "step": 6340 + }, + { + "epoch": 0.61, + "learning_rate": 1.877092809445466e-05, + "loss": 0.3544, + "step": 6350 + }, + { + "epoch": 0.62, + "learning_rate": 1.8768992548146717e-05, + "loss": 0.2944, + "step": 6360 + }, + { + "epoch": 0.62, + "learning_rate": 1.876705700183877e-05, + "loss": 0.352, + "step": 6370 + }, + { + "epoch": 0.62, + "learning_rate": 1.8765121455530824e-05, + "loss": 0.5195, + "step": 6380 + }, + { + "epoch": 0.62, + "learning_rate": 1.876318590922288e-05, + "loss": 0.249, + "step": 6390 + }, + { + "epoch": 0.62, + "learning_rate": 1.8761250362914935e-05, + "loss": 0.2658, + "step": 6400 + }, + { + "epoch": 0.62, + "learning_rate": 1.875931481660699e-05, + "loss": 0.4948, + "step": 6410 + }, + { + "epoch": 0.62, + "learning_rate": 1.8757379270299042e-05, + "loss": 0.3107, + "step": 6420 + }, + { + "epoch": 0.62, + "learning_rate": 1.87554437239911e-05, + "loss": 0.3666, + "step": 6430 + }, + { + "epoch": 0.62, + "learning_rate": 1.8753508177683153e-05, + "loss": 0.4307, + "step": 6440 + }, + { + "epoch": 0.62, + "learning_rate": 1.8751572631375206e-05, + "loss": 0.3087, + "step": 6450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8749637085067263e-05, + "loss": 0.2977, + "step": 6460 + }, + { + "epoch": 0.63, + "learning_rate": 1.8747701538759314e-05, + "loss": 0.3428, + "step": 6470 + }, + { + "epoch": 0.63, + "learning_rate": 1.874576599245137e-05, + "loss": 0.46, + "step": 6480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8743830446143424e-05, + "loss": 0.4305, + "step": 6490 + }, + { + "epoch": 0.63, + "learning_rate": 1.874189489983548e-05, + "loss": 0.3785, + "step": 6500 + }, + { + "epoch": 0.63, + "learning_rate": 1.8739959353527535e-05, + "loss": 0.3144, + "step": 6510 + }, + { + "epoch": 0.63, + "learning_rate": 1.873802380721959e-05, + "loss": 0.4196, + "step": 6520 + }, + { + "epoch": 0.63, + "learning_rate": 1.8736088260911646e-05, + "loss": 0.4398, + "step": 6530 + }, + { + "epoch": 0.63, + "learning_rate": 1.87341527146037e-05, + "loss": 0.3285, + "step": 6540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8732217168295753e-05, + "loss": 0.3091, + "step": 6550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8730281621987807e-05, + "loss": 0.4289, + "step": 6560 + }, + { + "epoch": 0.64, + "learning_rate": 1.872834607567986e-05, + "loss": 0.2996, + "step": 6570 + }, + { + "epoch": 0.64, + "learning_rate": 1.8726410529371917e-05, + "loss": 0.2093, + "step": 6580 + }, + { + "epoch": 0.64, + "learning_rate": 1.872447498306397e-05, + "loss": 0.4607, + "step": 6590 + }, + { + "epoch": 0.64, + "learning_rate": 1.8722539436756028e-05, + "loss": 0.3703, + "step": 6600 + }, + { + "epoch": 0.64, + "learning_rate": 1.872060389044808e-05, + "loss": 0.3473, + "step": 6610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8718668344140135e-05, + "loss": 0.3047, + "step": 6620 + }, + { + "epoch": 0.64, + "learning_rate": 1.871673279783219e-05, + "loss": 0.401, + "step": 6630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8714797251524242e-05, + "loss": 0.4218, + "step": 6640 + }, + { + "epoch": 0.64, + "learning_rate": 1.87128617052163e-05, + "loss": 0.2586, + "step": 6650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8710926158908353e-05, + "loss": 0.2766, + "step": 6660 + }, + { + "epoch": 0.65, + "learning_rate": 1.8708990612600407e-05, + "loss": 0.2839, + "step": 6670 + }, + { + "epoch": 0.65, + "learning_rate": 1.8707055066292464e-05, + "loss": 0.3874, + "step": 6680 + }, + { + "epoch": 0.65, + "learning_rate": 1.8705119519984517e-05, + "loss": 0.408, + "step": 6690 + }, + { + "epoch": 0.65, + "learning_rate": 1.870318397367657e-05, + "loss": 0.3257, + "step": 6700 + }, + { + "epoch": 0.65, + "learning_rate": 1.8701248427368625e-05, + "loss": 0.2693, + "step": 6710 + }, + { + "epoch": 0.65, + "learning_rate": 1.8699312881060682e-05, + "loss": 0.471, + "step": 6720 + }, + { + "epoch": 0.65, + "learning_rate": 1.8697377334752735e-05, + "loss": 0.3936, + "step": 6730 + }, + { + "epoch": 0.65, + "learning_rate": 1.869544178844479e-05, + "loss": 0.3143, + "step": 6740 + }, + { + "epoch": 0.65, + "learning_rate": 1.8693506242136846e-05, + "loss": 0.3663, + "step": 6750 + }, + { + "epoch": 0.65, + "learning_rate": 1.86915706958289e-05, + "loss": 0.2528, + "step": 6760 + }, + { + "epoch": 0.66, + "learning_rate": 1.8689635149520953e-05, + "loss": 0.3174, + "step": 6770 + }, + { + "epoch": 0.66, + "learning_rate": 1.8687699603213007e-05, + "loss": 0.3816, + "step": 6780 + }, + { + "epoch": 0.66, + "learning_rate": 1.8685764056905064e-05, + "loss": 0.3816, + "step": 6790 + }, + { + "epoch": 0.66, + "learning_rate": 1.8683828510597118e-05, + "loss": 0.3833, + "step": 6800 + }, + { + "epoch": 0.66, + "learning_rate": 1.868189296428917e-05, + "loss": 0.3293, + "step": 6810 + }, + { + "epoch": 0.66, + "learning_rate": 1.8679957417981228e-05, + "loss": 0.3789, + "step": 6820 + }, + { + "epoch": 0.66, + "learning_rate": 1.8678021871673282e-05, + "loss": 0.3262, + "step": 6830 + }, + { + "epoch": 0.66, + "learning_rate": 1.8676086325365336e-05, + "loss": 0.3449, + "step": 6840 + }, + { + "epoch": 0.66, + "learning_rate": 1.867415077905739e-05, + "loss": 0.2956, + "step": 6850 + }, + { + "epoch": 0.66, + "learning_rate": 1.8672215232749443e-05, + "loss": 0.2883, + "step": 6860 + }, + { + "epoch": 0.66, + "learning_rate": 1.86702796864415e-05, + "loss": 0.3236, + "step": 6870 + }, + { + "epoch": 0.67, + "learning_rate": 1.8668344140133553e-05, + "loss": 0.5661, + "step": 6880 + }, + { + "epoch": 0.67, + "learning_rate": 1.866640859382561e-05, + "loss": 0.2544, + "step": 6890 + }, + { + "epoch": 0.67, + "learning_rate": 1.8664473047517664e-05, + "loss": 0.2934, + "step": 6900 + }, + { + "epoch": 0.67, + "learning_rate": 1.8662537501209718e-05, + "loss": 0.3362, + "step": 6910 + }, + { + "epoch": 0.67, + "learning_rate": 1.8660601954901775e-05, + "loss": 0.264, + "step": 6920 + }, + { + "epoch": 0.67, + "learning_rate": 1.8658666408593825e-05, + "loss": 0.2582, + "step": 6930 + }, + { + "epoch": 0.67, + "learning_rate": 1.8656730862285882e-05, + "loss": 0.3938, + "step": 6940 + }, + { + "epoch": 0.67, + "learning_rate": 1.8654795315977936e-05, + "loss": 0.3906, + "step": 6950 + }, + { + "epoch": 0.67, + "learning_rate": 1.865285976966999e-05, + "loss": 0.2991, + "step": 6960 + }, + { + "epoch": 0.67, + "learning_rate": 1.8650924223362046e-05, + "loss": 0.3517, + "step": 6970 + }, + { + "epoch": 0.68, + "learning_rate": 1.86489886770541e-05, + "loss": 0.4076, + "step": 6980 + }, + { + "epoch": 0.68, + "learning_rate": 1.8647053130746157e-05, + "loss": 0.2732, + "step": 6990 + }, + { + "epoch": 0.68, + "learning_rate": 1.8645117584438207e-05, + "loss": 0.3043, + "step": 7000 + }, + { + "epoch": 0.68, + "learning_rate": 1.8643182038130264e-05, + "loss": 0.3398, + "step": 7010 + }, + { + "epoch": 0.68, + "learning_rate": 1.8641246491822318e-05, + "loss": 0.2635, + "step": 7020 + }, + { + "epoch": 0.68, + "learning_rate": 1.863931094551437e-05, + "loss": 0.5238, + "step": 7030 + }, + { + "epoch": 0.68, + "learning_rate": 1.863737539920643e-05, + "loss": 0.3611, + "step": 7040 + }, + { + "epoch": 0.68, + "learning_rate": 1.8635439852898482e-05, + "loss": 0.4443, + "step": 7050 + }, + { + "epoch": 0.68, + "learning_rate": 1.8633504306590536e-05, + "loss": 0.2671, + "step": 7060 + }, + { + "epoch": 0.68, + "learning_rate": 1.8631568760282593e-05, + "loss": 0.4157, + "step": 7070 + }, + { + "epoch": 0.69, + "learning_rate": 1.8629633213974647e-05, + "loss": 0.4455, + "step": 7080 + }, + { + "epoch": 0.69, + "learning_rate": 1.86276976676667e-05, + "loss": 0.3122, + "step": 7090 + }, + { + "epoch": 0.69, + "learning_rate": 1.8625762121358754e-05, + "loss": 0.3113, + "step": 7100 + }, + { + "epoch": 0.69, + "learning_rate": 1.862382657505081e-05, + "loss": 0.4185, + "step": 7110 + }, + { + "epoch": 0.69, + "learning_rate": 1.8621891028742864e-05, + "loss": 0.5069, + "step": 7120 + }, + { + "epoch": 0.69, + "learning_rate": 1.8619955482434918e-05, + "loss": 0.288, + "step": 7130 + }, + { + "epoch": 0.69, + "learning_rate": 1.8618019936126975e-05, + "loss": 0.2011, + "step": 7140 + }, + { + "epoch": 0.69, + "learning_rate": 1.8616084389819025e-05, + "loss": 0.3601, + "step": 7150 + }, + { + "epoch": 0.69, + "learning_rate": 1.8614148843511082e-05, + "loss": 0.4025, + "step": 7160 + }, + { + "epoch": 0.69, + "learning_rate": 1.8612213297203136e-05, + "loss": 0.3757, + "step": 7170 + }, + { + "epoch": 0.69, + "learning_rate": 1.8610277750895193e-05, + "loss": 0.4149, + "step": 7180 + }, + { + "epoch": 0.7, + "learning_rate": 1.8608342204587247e-05, + "loss": 0.3174, + "step": 7190 + }, + { + "epoch": 0.7, + "learning_rate": 1.86064066582793e-05, + "loss": 0.3494, + "step": 7200 + }, + { + "epoch": 0.7, + "learning_rate": 1.8604471111971357e-05, + "loss": 0.3389, + "step": 7210 + }, + { + "epoch": 0.7, + "learning_rate": 1.860253556566341e-05, + "loss": 0.5023, + "step": 7220 + }, + { + "epoch": 0.7, + "learning_rate": 1.8600600019355465e-05, + "loss": 0.3073, + "step": 7230 + }, + { + "epoch": 0.7, + "learning_rate": 1.8598664473047518e-05, + "loss": 0.3632, + "step": 7240 + }, + { + "epoch": 0.7, + "learning_rate": 1.8596728926739572e-05, + "loss": 0.3306, + "step": 7250 + }, + { + "epoch": 0.7, + "learning_rate": 1.859479338043163e-05, + "loss": 0.4477, + "step": 7260 + }, + { + "epoch": 0.7, + "learning_rate": 1.8592857834123683e-05, + "loss": 0.3131, + "step": 7270 + }, + { + "epoch": 0.7, + "learning_rate": 1.859092228781574e-05, + "loss": 0.3356, + "step": 7280 + }, + { + "epoch": 0.71, + "learning_rate": 1.8588986741507793e-05, + "loss": 0.332, + "step": 7290 + }, + { + "epoch": 0.71, + "learning_rate": 1.8587051195199847e-05, + "loss": 0.406, + "step": 7300 + }, + { + "epoch": 0.71, + "learning_rate": 1.85851156488919e-05, + "loss": 0.2352, + "step": 7310 + }, + { + "epoch": 0.71, + "learning_rate": 1.8583180102583954e-05, + "loss": 0.3321, + "step": 7320 + }, + { + "epoch": 0.71, + "learning_rate": 1.858124455627601e-05, + "loss": 0.4071, + "step": 7330 + }, + { + "epoch": 0.71, + "learning_rate": 1.8579309009968065e-05, + "loss": 0.1675, + "step": 7340 + }, + { + "epoch": 0.71, + "learning_rate": 1.857737346366012e-05, + "loss": 0.5997, + "step": 7350 + }, + { + "epoch": 0.71, + "learning_rate": 1.8575437917352175e-05, + "loss": 0.4883, + "step": 7360 + }, + { + "epoch": 0.71, + "learning_rate": 1.857350237104423e-05, + "loss": 0.4592, + "step": 7370 + }, + { + "epoch": 0.71, + "learning_rate": 1.8571566824736283e-05, + "loss": 0.2885, + "step": 7380 + }, + { + "epoch": 0.72, + "learning_rate": 1.8569631278428336e-05, + "loss": 0.3584, + "step": 7390 + }, + { + "epoch": 0.72, + "learning_rate": 1.8567695732120393e-05, + "loss": 0.3204, + "step": 7400 + }, + { + "epoch": 0.72, + "learning_rate": 1.8565760185812447e-05, + "loss": 0.4171, + "step": 7410 + }, + { + "epoch": 0.72, + "learning_rate": 1.85638246395045e-05, + "loss": 0.2512, + "step": 7420 + }, + { + "epoch": 0.72, + "learning_rate": 1.8561889093196558e-05, + "loss": 0.4065, + "step": 7430 + }, + { + "epoch": 0.72, + "learning_rate": 1.855995354688861e-05, + "loss": 0.4318, + "step": 7440 + }, + { + "epoch": 0.72, + "learning_rate": 1.8558018000580665e-05, + "loss": 0.2588, + "step": 7450 + }, + { + "epoch": 0.72, + "learning_rate": 1.855608245427272e-05, + "loss": 0.2969, + "step": 7460 + }, + { + "epoch": 0.72, + "learning_rate": 1.8554146907964776e-05, + "loss": 0.3779, + "step": 7470 + }, + { + "epoch": 0.72, + "learning_rate": 1.855221136165683e-05, + "loss": 0.3335, + "step": 7480 + }, + { + "epoch": 0.72, + "learning_rate": 1.8550275815348883e-05, + "loss": 0.3099, + "step": 7490 + }, + { + "epoch": 0.73, + "learning_rate": 1.854834026904094e-05, + "loss": 0.4144, + "step": 7500 + }, + { + "epoch": 0.73, + "learning_rate": 1.8546404722732993e-05, + "loss": 0.2843, + "step": 7510 + }, + { + "epoch": 0.73, + "learning_rate": 1.8544469176425047e-05, + "loss": 0.3132, + "step": 7520 + }, + { + "epoch": 0.73, + "learning_rate": 1.85425336301171e-05, + "loss": 0.1861, + "step": 7530 + }, + { + "epoch": 0.73, + "learning_rate": 1.8540598083809154e-05, + "loss": 0.4092, + "step": 7540 + }, + { + "epoch": 0.73, + "learning_rate": 1.853866253750121e-05, + "loss": 0.3256, + "step": 7550 + }, + { + "epoch": 0.73, + "learning_rate": 1.8536726991193265e-05, + "loss": 0.347, + "step": 7560 + }, + { + "epoch": 0.73, + "learning_rate": 1.8534791444885322e-05, + "loss": 0.48, + "step": 7570 + }, + { + "epoch": 0.73, + "learning_rate": 1.8532855898577376e-05, + "loss": 0.2864, + "step": 7580 + }, + { + "epoch": 0.73, + "learning_rate": 1.853092035226943e-05, + "loss": 0.3129, + "step": 7590 + }, + { + "epoch": 0.74, + "learning_rate": 1.8528984805961486e-05, + "loss": 0.3525, + "step": 7600 + }, + { + "epoch": 0.74, + "learning_rate": 1.8527049259653537e-05, + "loss": 0.2666, + "step": 7610 + }, + { + "epoch": 0.74, + "learning_rate": 1.8525113713345594e-05, + "loss": 0.4155, + "step": 7620 + }, + { + "epoch": 0.74, + "learning_rate": 1.8523178167037647e-05, + "loss": 0.3725, + "step": 7630 + }, + { + "epoch": 0.74, + "learning_rate": 1.85212426207297e-05, + "loss": 0.3956, + "step": 7640 + }, + { + "epoch": 0.74, + "learning_rate": 1.8519307074421758e-05, + "loss": 0.3877, + "step": 7650 + }, + { + "epoch": 0.74, + "learning_rate": 1.851737152811381e-05, + "loss": 0.238, + "step": 7660 + }, + { + "epoch": 0.74, + "learning_rate": 1.851543598180587e-05, + "loss": 0.3421, + "step": 7670 + }, + { + "epoch": 0.74, + "learning_rate": 1.851350043549792e-05, + "loss": 0.3279, + "step": 7680 + }, + { + "epoch": 0.74, + "learning_rate": 1.8511564889189976e-05, + "loss": 0.3341, + "step": 7690 + }, + { + "epoch": 0.75, + "learning_rate": 1.850962934288203e-05, + "loss": 0.3034, + "step": 7700 + }, + { + "epoch": 0.75, + "learning_rate": 1.8507693796574083e-05, + "loss": 0.3053, + "step": 7710 + }, + { + "epoch": 0.75, + "learning_rate": 1.850575825026614e-05, + "loss": 0.2807, + "step": 7720 + }, + { + "epoch": 0.75, + "learning_rate": 1.8503822703958194e-05, + "loss": 0.3688, + "step": 7730 + }, + { + "epoch": 0.75, + "learning_rate": 1.8501887157650247e-05, + "loss": 0.3633, + "step": 7740 + }, + { + "epoch": 0.75, + "learning_rate": 1.8499951611342304e-05, + "loss": 0.3636, + "step": 7750 + }, + { + "epoch": 0.75, + "learning_rate": 1.8498016065034358e-05, + "loss": 0.3237, + "step": 7760 + }, + { + "epoch": 0.75, + "learning_rate": 1.8496080518726412e-05, + "loss": 0.2752, + "step": 7770 + }, + { + "epoch": 0.75, + "learning_rate": 1.8494144972418465e-05, + "loss": 0.3541, + "step": 7780 + }, + { + "epoch": 0.75, + "learning_rate": 1.8492209426110522e-05, + "loss": 0.2767, + "step": 7790 + }, + { + "epoch": 0.75, + "learning_rate": 1.8490273879802576e-05, + "loss": 0.4879, + "step": 7800 + }, + { + "epoch": 0.76, + "learning_rate": 1.848833833349463e-05, + "loss": 0.3322, + "step": 7810 + }, + { + "epoch": 0.76, + "learning_rate": 1.8486402787186687e-05, + "loss": 0.4919, + "step": 7820 + }, + { + "epoch": 0.76, + "learning_rate": 1.8484467240878737e-05, + "loss": 0.3178, + "step": 7830 + }, + { + "epoch": 0.76, + "learning_rate": 1.8482531694570794e-05, + "loss": 0.2615, + "step": 7840 + }, + { + "epoch": 0.76, + "learning_rate": 1.8480596148262848e-05, + "loss": 0.3312, + "step": 7850 + }, + { + "epoch": 0.76, + "learning_rate": 1.8478660601954905e-05, + "loss": 0.381, + "step": 7860 + }, + { + "epoch": 0.76, + "learning_rate": 1.8476725055646958e-05, + "loss": 0.5812, + "step": 7870 + }, + { + "epoch": 0.76, + "learning_rate": 1.8474789509339012e-05, + "loss": 0.2434, + "step": 7880 + }, + { + "epoch": 0.76, + "learning_rate": 1.847285396303107e-05, + "loss": 0.3828, + "step": 7890 + }, + { + "epoch": 0.76, + "learning_rate": 1.8470918416723123e-05, + "loss": 0.2616, + "step": 7900 + }, + { + "epoch": 0.77, + "learning_rate": 1.8468982870415176e-05, + "loss": 0.3708, + "step": 7910 + }, + { + "epoch": 0.77, + "learning_rate": 1.846704732410723e-05, + "loss": 0.2601, + "step": 7920 + }, + { + "epoch": 0.77, + "learning_rate": 1.8465111777799283e-05, + "loss": 0.3226, + "step": 7930 + }, + { + "epoch": 0.77, + "learning_rate": 1.846317623149134e-05, + "loss": 0.2885, + "step": 7940 + }, + { + "epoch": 0.77, + "learning_rate": 1.8461240685183394e-05, + "loss": 0.4451, + "step": 7950 + }, + { + "epoch": 0.77, + "learning_rate": 1.845930513887545e-05, + "loss": 0.3158, + "step": 7960 + }, + { + "epoch": 0.77, + "learning_rate": 1.8457369592567505e-05, + "loss": 0.4061, + "step": 7970 + }, + { + "epoch": 0.77, + "learning_rate": 1.845543404625956e-05, + "loss": 0.2168, + "step": 7980 + }, + { + "epoch": 0.77, + "learning_rate": 1.8453498499951612e-05, + "loss": 0.3073, + "step": 7990 + }, + { + "epoch": 0.77, + "learning_rate": 1.8451562953643666e-05, + "loss": 0.3378, + "step": 8000 + }, + { + "epoch": 0.78, + "learning_rate": 1.8449627407335723e-05, + "loss": 0.3039, + "step": 8010 + }, + { + "epoch": 0.78, + "learning_rate": 1.8447691861027776e-05, + "loss": 0.3899, + "step": 8020 + }, + { + "epoch": 0.78, + "learning_rate": 1.844575631471983e-05, + "loss": 0.2474, + "step": 8030 + }, + { + "epoch": 0.78, + "learning_rate": 1.8443820768411887e-05, + "loss": 0.33, + "step": 8040 + }, + { + "epoch": 0.78, + "learning_rate": 1.844188522210394e-05, + "loss": 0.3826, + "step": 8050 + }, + { + "epoch": 0.78, + "learning_rate": 1.8439949675795994e-05, + "loss": 0.303, + "step": 8060 + }, + { + "epoch": 0.78, + "learning_rate": 1.8438014129488048e-05, + "loss": 0.3189, + "step": 8070 + }, + { + "epoch": 0.78, + "learning_rate": 1.8436078583180105e-05, + "loss": 0.2922, + "step": 8080 + }, + { + "epoch": 0.78, + "learning_rate": 1.843414303687216e-05, + "loss": 0.2596, + "step": 8090 + }, + { + "epoch": 0.78, + "learning_rate": 1.8432207490564212e-05, + "loss": 0.5159, + "step": 8100 + }, + { + "epoch": 0.78, + "learning_rate": 1.843027194425627e-05, + "loss": 0.3181, + "step": 8110 + }, + { + "epoch": 0.79, + "learning_rate": 1.8428336397948323e-05, + "loss": 0.4271, + "step": 8120 + }, + { + "epoch": 0.79, + "learning_rate": 1.8426400851640376e-05, + "loss": 0.3453, + "step": 8130 + }, + { + "epoch": 0.79, + "learning_rate": 1.842446530533243e-05, + "loss": 0.2857, + "step": 8140 + }, + { + "epoch": 0.79, + "learning_rate": 1.8422529759024487e-05, + "loss": 0.2519, + "step": 8150 + }, + { + "epoch": 0.79, + "learning_rate": 1.842059421271654e-05, + "loss": 0.4198, + "step": 8160 + }, + { + "epoch": 0.79, + "learning_rate": 1.8418658666408594e-05, + "loss": 0.3964, + "step": 8170 + }, + { + "epoch": 0.79, + "learning_rate": 1.841672312010065e-05, + "loss": 0.3443, + "step": 8180 + }, + { + "epoch": 0.79, + "learning_rate": 1.8414787573792705e-05, + "loss": 0.2781, + "step": 8190 + }, + { + "epoch": 0.79, + "learning_rate": 1.841285202748476e-05, + "loss": 0.3547, + "step": 8200 + }, + { + "epoch": 0.79, + "learning_rate": 1.8410916481176812e-05, + "loss": 0.2769, + "step": 8210 + }, + { + "epoch": 0.8, + "learning_rate": 1.8408980934868866e-05, + "loss": 0.3398, + "step": 8220 + }, + { + "epoch": 0.8, + "learning_rate": 1.8407045388560923e-05, + "loss": 0.3051, + "step": 8230 + }, + { + "epoch": 0.8, + "learning_rate": 1.8405109842252977e-05, + "loss": 0.4598, + "step": 8240 + }, + { + "epoch": 0.8, + "learning_rate": 1.8403174295945034e-05, + "loss": 0.3766, + "step": 8250 + }, + { + "epoch": 0.8, + "learning_rate": 1.8401238749637087e-05, + "loss": 0.4321, + "step": 8260 + }, + { + "epoch": 0.8, + "learning_rate": 1.839930320332914e-05, + "loss": 0.3444, + "step": 8270 + }, + { + "epoch": 0.8, + "learning_rate": 1.8397367657021198e-05, + "loss": 0.2653, + "step": 8280 + }, + { + "epoch": 0.8, + "learning_rate": 1.8395432110713248e-05, + "loss": 0.3143, + "step": 8290 + }, + { + "epoch": 0.8, + "learning_rate": 1.8393496564405305e-05, + "loss": 0.4002, + "step": 8300 + }, + { + "epoch": 0.8, + "learning_rate": 1.839156101809736e-05, + "loss": 0.3208, + "step": 8310 + }, + { + "epoch": 0.81, + "learning_rate": 1.8389625471789413e-05, + "loss": 0.3039, + "step": 8320 + }, + { + "epoch": 0.81, + "learning_rate": 1.838768992548147e-05, + "loss": 0.386, + "step": 8330 + }, + { + "epoch": 0.81, + "learning_rate": 1.8385754379173523e-05, + "loss": 0.3154, + "step": 8340 + }, + { + "epoch": 0.81, + "learning_rate": 1.838381883286558e-05, + "loss": 0.303, + "step": 8350 + }, + { + "epoch": 0.81, + "learning_rate": 1.838188328655763e-05, + "loss": 0.3458, + "step": 8360 + }, + { + "epoch": 0.81, + "learning_rate": 1.8379947740249687e-05, + "loss": 0.2572, + "step": 8370 + }, + { + "epoch": 0.81, + "learning_rate": 1.837801219394174e-05, + "loss": 0.3554, + "step": 8380 + }, + { + "epoch": 0.81, + "learning_rate": 1.8376076647633795e-05, + "loss": 0.2646, + "step": 8390 + }, + { + "epoch": 0.81, + "learning_rate": 1.8374141101325852e-05, + "loss": 0.5278, + "step": 8400 + }, + { + "epoch": 0.81, + "learning_rate": 1.8372205555017905e-05, + "loss": 0.3005, + "step": 8410 + }, + { + "epoch": 0.81, + "learning_rate": 1.837027000870996e-05, + "loss": 0.3468, + "step": 8420 + }, + { + "epoch": 0.82, + "learning_rate": 1.8368334462402016e-05, + "loss": 0.3188, + "step": 8430 + }, + { + "epoch": 0.82, + "learning_rate": 1.836639891609407e-05, + "loss": 0.3798, + "step": 8440 + }, + { + "epoch": 0.82, + "learning_rate": 1.8364463369786123e-05, + "loss": 0.3706, + "step": 8450 + }, + { + "epoch": 0.82, + "learning_rate": 1.8362527823478177e-05, + "loss": 0.3007, + "step": 8460 + }, + { + "epoch": 0.82, + "learning_rate": 1.8360592277170234e-05, + "loss": 0.3033, + "step": 8470 + }, + { + "epoch": 0.82, + "learning_rate": 1.8358656730862288e-05, + "loss": 0.2932, + "step": 8480 + }, + { + "epoch": 0.82, + "learning_rate": 1.835672118455434e-05, + "loss": 0.407, + "step": 8490 + }, + { + "epoch": 0.82, + "learning_rate": 1.8354785638246398e-05, + "loss": 0.3566, + "step": 8500 + }, + { + "epoch": 0.82, + "learning_rate": 1.835285009193845e-05, + "loss": 0.3719, + "step": 8510 + }, + { + "epoch": 0.82, + "learning_rate": 1.8350914545630506e-05, + "loss": 0.4093, + "step": 8520 + }, + { + "epoch": 0.83, + "learning_rate": 1.834897899932256e-05, + "loss": 0.3197, + "step": 8530 + }, + { + "epoch": 0.83, + "learning_rate": 1.8347043453014616e-05, + "loss": 0.3003, + "step": 8540 + }, + { + "epoch": 0.83, + "learning_rate": 1.834510790670667e-05, + "loss": 0.212, + "step": 8550 + }, + { + "epoch": 0.83, + "learning_rate": 1.8343172360398723e-05, + "loss": 0.349, + "step": 8560 + }, + { + "epoch": 0.83, + "learning_rate": 1.834123681409078e-05, + "loss": 0.2925, + "step": 8570 + }, + { + "epoch": 0.83, + "learning_rate": 1.8339301267782834e-05, + "loss": 0.2735, + "step": 8580 + }, + { + "epoch": 0.83, + "learning_rate": 1.8337365721474888e-05, + "loss": 0.3169, + "step": 8590 + }, + { + "epoch": 0.83, + "learning_rate": 1.833543017516694e-05, + "loss": 0.3693, + "step": 8600 + }, + { + "epoch": 0.83, + "learning_rate": 1.8333494628858995e-05, + "loss": 0.3285, + "step": 8610 + }, + { + "epoch": 0.83, + "learning_rate": 1.8331559082551052e-05, + "loss": 0.3351, + "step": 8620 + }, + { + "epoch": 0.84, + "learning_rate": 1.8329623536243106e-05, + "loss": 0.265, + "step": 8630 + }, + { + "epoch": 0.84, + "learning_rate": 1.8327687989935163e-05, + "loss": 0.3356, + "step": 8640 + }, + { + "epoch": 0.84, + "learning_rate": 1.8325752443627216e-05, + "loss": 0.3967, + "step": 8650 + }, + { + "epoch": 0.84, + "learning_rate": 1.832381689731927e-05, + "loss": 0.362, + "step": 8660 + }, + { + "epoch": 0.84, + "learning_rate": 1.8321881351011324e-05, + "loss": 0.3239, + "step": 8670 + }, + { + "epoch": 0.84, + "learning_rate": 1.8319945804703377e-05, + "loss": 0.3021, + "step": 8680 + }, + { + "epoch": 0.84, + "learning_rate": 1.8318010258395434e-05, + "loss": 0.2433, + "step": 8690 + }, + { + "epoch": 0.84, + "learning_rate": 1.8316074712087488e-05, + "loss": 0.3874, + "step": 8700 + }, + { + "epoch": 0.84, + "learning_rate": 1.831413916577954e-05, + "loss": 0.3488, + "step": 8710 + }, + { + "epoch": 0.84, + "learning_rate": 1.83122036194716e-05, + "loss": 0.309, + "step": 8720 + }, + { + "epoch": 0.84, + "learning_rate": 1.8310268073163652e-05, + "loss": 0.2218, + "step": 8730 + }, + { + "epoch": 0.85, + "learning_rate": 1.8308332526855706e-05, + "loss": 0.3475, + "step": 8740 + }, + { + "epoch": 0.85, + "learning_rate": 1.830639698054776e-05, + "loss": 0.2832, + "step": 8750 + }, + { + "epoch": 0.85, + "learning_rate": 1.8304461434239817e-05, + "loss": 0.2744, + "step": 8760 + }, + { + "epoch": 0.85, + "learning_rate": 1.830252588793187e-05, + "loss": 0.3072, + "step": 8770 + }, + { + "epoch": 0.85, + "learning_rate": 1.8300590341623924e-05, + "loss": 0.3038, + "step": 8780 + }, + { + "epoch": 0.85, + "learning_rate": 1.829865479531598e-05, + "loss": 0.2906, + "step": 8790 + }, + { + "epoch": 0.85, + "learning_rate": 1.8296719249008034e-05, + "loss": 0.2102, + "step": 8800 + }, + { + "epoch": 0.85, + "learning_rate": 1.8294783702700088e-05, + "loss": 0.3616, + "step": 8810 + }, + { + "epoch": 0.85, + "learning_rate": 1.8292848156392142e-05, + "loss": 0.2323, + "step": 8820 + }, + { + "epoch": 0.85, + "learning_rate": 1.82909126100842e-05, + "loss": 0.3345, + "step": 8830 + }, + { + "epoch": 0.86, + "learning_rate": 1.8288977063776252e-05, + "loss": 0.567, + "step": 8840 + }, + { + "epoch": 0.86, + "learning_rate": 1.8287041517468306e-05, + "loss": 0.3922, + "step": 8850 + }, + { + "epoch": 0.86, + "learning_rate": 1.8285105971160363e-05, + "loss": 0.4524, + "step": 8860 + }, + { + "epoch": 0.86, + "learning_rate": 1.8283170424852417e-05, + "loss": 0.4013, + "step": 8870 + }, + { + "epoch": 0.86, + "learning_rate": 1.828123487854447e-05, + "loss": 0.3345, + "step": 8880 + }, + { + "epoch": 0.86, + "learning_rate": 1.8279299332236524e-05, + "loss": 0.4011, + "step": 8890 + }, + { + "epoch": 0.86, + "learning_rate": 1.8277363785928578e-05, + "loss": 0.3421, + "step": 8900 + }, + { + "epoch": 0.86, + "learning_rate": 1.8275428239620635e-05, + "loss": 0.3368, + "step": 8910 + }, + { + "epoch": 0.86, + "learning_rate": 1.8273492693312688e-05, + "loss": 0.1746, + "step": 8920 + }, + { + "epoch": 0.86, + "learning_rate": 1.8271557147004745e-05, + "loss": 0.2804, + "step": 8930 + }, + { + "epoch": 0.87, + "learning_rate": 1.82696216006968e-05, + "loss": 0.3459, + "step": 8940 + }, + { + "epoch": 0.87, + "learning_rate": 1.8267686054388853e-05, + "loss": 0.5029, + "step": 8950 + }, + { + "epoch": 0.87, + "learning_rate": 1.826575050808091e-05, + "loss": 0.3974, + "step": 8960 + }, + { + "epoch": 0.87, + "learning_rate": 1.826381496177296e-05, + "loss": 0.4006, + "step": 8970 + }, + { + "epoch": 0.87, + "learning_rate": 1.8261879415465017e-05, + "loss": 0.2823, + "step": 8980 + }, + { + "epoch": 0.87, + "learning_rate": 1.825994386915707e-05, + "loss": 0.3753, + "step": 8990 + }, + { + "epoch": 0.87, + "learning_rate": 1.8258008322849124e-05, + "loss": 0.4216, + "step": 9000 + }, + { + "epoch": 0.87, + "learning_rate": 1.825607277654118e-05, + "loss": 0.3845, + "step": 9010 + }, + { + "epoch": 0.87, + "learning_rate": 1.8254137230233235e-05, + "loss": 0.3455, + "step": 9020 + }, + { + "epoch": 0.87, + "learning_rate": 1.8252201683925292e-05, + "loss": 0.2668, + "step": 9030 + }, + { + "epoch": 0.87, + "learning_rate": 1.8250266137617342e-05, + "loss": 0.3399, + "step": 9040 + }, + { + "epoch": 0.88, + "learning_rate": 1.82483305913094e-05, + "loss": 0.3577, + "step": 9050 + }, + { + "epoch": 0.88, + "learning_rate": 1.8246395045001453e-05, + "loss": 0.2828, + "step": 9060 + }, + { + "epoch": 0.88, + "learning_rate": 1.8244459498693506e-05, + "loss": 0.3839, + "step": 9070 + }, + { + "epoch": 0.88, + "learning_rate": 1.8242523952385563e-05, + "loss": 0.2374, + "step": 9080 + }, + { + "epoch": 0.88, + "learning_rate": 1.8240588406077617e-05, + "loss": 0.2963, + "step": 9090 + }, + { + "epoch": 0.88, + "learning_rate": 1.823865285976967e-05, + "loss": 0.3409, + "step": 9100 + }, + { + "epoch": 0.88, + "learning_rate": 1.8236717313461728e-05, + "loss": 0.4284, + "step": 9110 + }, + { + "epoch": 0.88, + "learning_rate": 1.823478176715378e-05, + "loss": 0.3127, + "step": 9120 + }, + { + "epoch": 0.88, + "learning_rate": 1.8232846220845835e-05, + "loss": 0.4288, + "step": 9130 + }, + { + "epoch": 0.88, + "learning_rate": 1.823091067453789e-05, + "loss": 0.2945, + "step": 9140 + }, + { + "epoch": 0.89, + "learning_rate": 1.8228975128229946e-05, + "loss": 0.3732, + "step": 9150 + }, + { + "epoch": 0.89, + "learning_rate": 1.8227039581922e-05, + "loss": 0.3105, + "step": 9160 + }, + { + "epoch": 0.89, + "learning_rate": 1.8225104035614053e-05, + "loss": 0.252, + "step": 9170 + }, + { + "epoch": 0.89, + "learning_rate": 1.822316848930611e-05, + "loss": 0.2462, + "step": 9180 + }, + { + "epoch": 0.89, + "learning_rate": 1.8221232942998164e-05, + "loss": 0.3349, + "step": 9190 + }, + { + "epoch": 0.89, + "learning_rate": 1.8219297396690217e-05, + "loss": 0.411, + "step": 9200 + }, + { + "epoch": 0.89, + "learning_rate": 1.821736185038227e-05, + "loss": 0.3199, + "step": 9210 + }, + { + "epoch": 0.89, + "learning_rate": 1.8215426304074328e-05, + "loss": 0.3333, + "step": 9220 + }, + { + "epoch": 0.89, + "learning_rate": 1.821349075776638e-05, + "loss": 0.4106, + "step": 9230 + }, + { + "epoch": 0.89, + "learning_rate": 1.8211555211458435e-05, + "loss": 0.4452, + "step": 9240 + }, + { + "epoch": 0.9, + "learning_rate": 1.8209619665150492e-05, + "loss": 0.2888, + "step": 9250 + }, + { + "epoch": 0.9, + "learning_rate": 1.8207684118842546e-05, + "loss": 0.3203, + "step": 9260 + }, + { + "epoch": 0.9, + "learning_rate": 1.82057485725346e-05, + "loss": 0.2887, + "step": 9270 + }, + { + "epoch": 0.9, + "learning_rate": 1.8203813026226653e-05, + "loss": 0.4203, + "step": 9280 + }, + { + "epoch": 0.9, + "learning_rate": 1.8201877479918707e-05, + "loss": 0.3805, + "step": 9290 + }, + { + "epoch": 0.9, + "learning_rate": 1.8199941933610764e-05, + "loss": 0.3047, + "step": 9300 + }, + { + "epoch": 0.9, + "learning_rate": 1.8198006387302817e-05, + "loss": 0.3427, + "step": 9310 + }, + { + "epoch": 0.9, + "learning_rate": 1.8196070840994874e-05, + "loss": 0.4192, + "step": 9320 + }, + { + "epoch": 0.9, + "learning_rate": 1.8194135294686928e-05, + "loss": 0.2797, + "step": 9330 + }, + { + "epoch": 0.9, + "learning_rate": 1.819219974837898e-05, + "loss": 0.3262, + "step": 9340 + }, + { + "epoch": 0.9, + "learning_rate": 1.8190264202071035e-05, + "loss": 0.3345, + "step": 9350 + }, + { + "epoch": 0.91, + "learning_rate": 1.818832865576309e-05, + "loss": 0.2887, + "step": 9360 + }, + { + "epoch": 0.91, + "learning_rate": 1.8186393109455146e-05, + "loss": 0.4654, + "step": 9370 + }, + { + "epoch": 0.91, + "learning_rate": 1.81844575631472e-05, + "loss": 0.2379, + "step": 9380 + }, + { + "epoch": 0.91, + "learning_rate": 1.8182522016839253e-05, + "loss": 0.2199, + "step": 9390 + }, + { + "epoch": 0.91, + "learning_rate": 1.818058647053131e-05, + "loss": 0.4024, + "step": 9400 + }, + { + "epoch": 0.91, + "learning_rate": 1.8178650924223364e-05, + "loss": 0.3641, + "step": 9410 + }, + { + "epoch": 0.91, + "learning_rate": 1.8176715377915417e-05, + "loss": 0.3297, + "step": 9420 + }, + { + "epoch": 0.91, + "learning_rate": 1.817477983160747e-05, + "loss": 0.2559, + "step": 9430 + }, + { + "epoch": 0.91, + "learning_rate": 1.8172844285299528e-05, + "loss": 0.2178, + "step": 9440 + }, + { + "epoch": 0.91, + "learning_rate": 1.8170908738991582e-05, + "loss": 0.3624, + "step": 9450 + }, + { + "epoch": 0.92, + "learning_rate": 1.8168973192683635e-05, + "loss": 0.4301, + "step": 9460 + }, + { + "epoch": 0.92, + "learning_rate": 1.8167037646375692e-05, + "loss": 0.465, + "step": 9470 + }, + { + "epoch": 0.92, + "learning_rate": 1.8165102100067746e-05, + "loss": 0.2276, + "step": 9480 + }, + { + "epoch": 0.92, + "learning_rate": 1.81631665537598e-05, + "loss": 0.4161, + "step": 9490 + }, + { + "epoch": 0.92, + "learning_rate": 1.8161231007451853e-05, + "loss": 0.314, + "step": 9500 + }, + { + "epoch": 0.92, + "learning_rate": 1.815929546114391e-05, + "loss": 0.3701, + "step": 9510 + }, + { + "epoch": 0.92, + "learning_rate": 1.8157359914835964e-05, + "loss": 0.2331, + "step": 9520 + }, + { + "epoch": 0.92, + "learning_rate": 1.8155424368528018e-05, + "loss": 0.226, + "step": 9530 + }, + { + "epoch": 0.92, + "learning_rate": 1.8153488822220075e-05, + "loss": 0.2902, + "step": 9540 + }, + { + "epoch": 0.92, + "learning_rate": 1.8151553275912128e-05, + "loss": 0.2461, + "step": 9550 + }, + { + "epoch": 0.93, + "learning_rate": 1.8149617729604182e-05, + "loss": 0.3302, + "step": 9560 + }, + { + "epoch": 0.93, + "learning_rate": 1.8147682183296236e-05, + "loss": 0.3362, + "step": 9570 + }, + { + "epoch": 0.93, + "learning_rate": 1.814574663698829e-05, + "loss": 0.3117, + "step": 9580 + }, + { + "epoch": 0.93, + "learning_rate": 1.8143811090680346e-05, + "loss": 0.2561, + "step": 9590 + }, + { + "epoch": 0.93, + "learning_rate": 1.81418755443724e-05, + "loss": 0.2557, + "step": 9600 + }, + { + "epoch": 0.93, + "learning_rate": 1.8139939998064457e-05, + "loss": 0.4308, + "step": 9610 + }, + { + "epoch": 0.93, + "learning_rate": 1.813800445175651e-05, + "loss": 0.4339, + "step": 9620 + }, + { + "epoch": 0.93, + "learning_rate": 1.8136068905448564e-05, + "loss": 0.3919, + "step": 9630 + }, + { + "epoch": 0.93, + "learning_rate": 1.813413335914062e-05, + "loss": 0.2994, + "step": 9640 + }, + { + "epoch": 0.93, + "learning_rate": 1.813219781283267e-05, + "loss": 0.2443, + "step": 9650 + }, + { + "epoch": 0.93, + "learning_rate": 1.813026226652473e-05, + "loss": 0.4114, + "step": 9660 + }, + { + "epoch": 0.94, + "learning_rate": 1.8128326720216782e-05, + "loss": 0.4617, + "step": 9670 + }, + { + "epoch": 0.94, + "learning_rate": 1.8126391173908836e-05, + "loss": 0.272, + "step": 9680 + }, + { + "epoch": 0.94, + "learning_rate": 1.8124455627600893e-05, + "loss": 0.2754, + "step": 9690 + }, + { + "epoch": 0.94, + "learning_rate": 1.8122520081292946e-05, + "loss": 0.292, + "step": 9700 + }, + { + "epoch": 0.94, + "learning_rate": 1.8120584534985003e-05, + "loss": 0.3178, + "step": 9710 + }, + { + "epoch": 0.94, + "learning_rate": 1.8118648988677057e-05, + "loss": 0.4456, + "step": 9720 + }, + { + "epoch": 0.94, + "learning_rate": 1.811671344236911e-05, + "loss": 0.312, + "step": 9730 + }, + { + "epoch": 0.94, + "learning_rate": 1.8114777896061164e-05, + "loss": 0.2437, + "step": 9740 + }, + { + "epoch": 0.94, + "learning_rate": 1.8112842349753218e-05, + "loss": 0.3638, + "step": 9750 + }, + { + "epoch": 0.94, + "learning_rate": 1.8110906803445275e-05, + "loss": 0.3005, + "step": 9760 + }, + { + "epoch": 0.95, + "learning_rate": 1.810897125713733e-05, + "loss": 0.2665, + "step": 9770 + }, + { + "epoch": 0.95, + "learning_rate": 1.8107035710829382e-05, + "loss": 0.252, + "step": 9780 + }, + { + "epoch": 0.95, + "learning_rate": 1.810510016452144e-05, + "loss": 0.3701, + "step": 9790 + }, + { + "epoch": 0.95, + "learning_rate": 1.8103164618213493e-05, + "loss": 0.303, + "step": 9800 + }, + { + "epoch": 0.95, + "learning_rate": 1.8101229071905547e-05, + "loss": 0.2783, + "step": 9810 + }, + { + "epoch": 0.95, + "learning_rate": 1.80992935255976e-05, + "loss": 0.5224, + "step": 9820 + }, + { + "epoch": 0.95, + "learning_rate": 1.8097357979289657e-05, + "loss": 0.3384, + "step": 9830 + }, + { + "epoch": 0.95, + "learning_rate": 1.809542243298171e-05, + "loss": 0.2959, + "step": 9840 + }, + { + "epoch": 0.95, + "learning_rate": 1.8093486886673764e-05, + "loss": 0.4738, + "step": 9850 + }, + { + "epoch": 0.95, + "learning_rate": 1.809155134036582e-05, + "loss": 0.3816, + "step": 9860 + }, + { + "epoch": 0.96, + "learning_rate": 1.8089615794057875e-05, + "loss": 0.2889, + "step": 9870 + }, + { + "epoch": 0.96, + "learning_rate": 1.808768024774993e-05, + "loss": 0.252, + "step": 9880 + }, + { + "epoch": 0.96, + "learning_rate": 1.8085744701441982e-05, + "loss": 0.4023, + "step": 9890 + }, + { + "epoch": 0.96, + "learning_rate": 1.808380915513404e-05, + "loss": 0.2877, + "step": 9900 + }, + { + "epoch": 0.96, + "learning_rate": 1.8081873608826093e-05, + "loss": 0.4409, + "step": 9910 + }, + { + "epoch": 0.96, + "learning_rate": 1.8079938062518147e-05, + "loss": 0.4125, + "step": 9920 + }, + { + "epoch": 0.96, + "learning_rate": 1.8078002516210204e-05, + "loss": 0.3123, + "step": 9930 + }, + { + "epoch": 0.96, + "learning_rate": 1.8076066969902257e-05, + "loss": 0.381, + "step": 9940 + }, + { + "epoch": 0.96, + "learning_rate": 1.807413142359431e-05, + "loss": 0.1897, + "step": 9950 + }, + { + "epoch": 0.96, + "learning_rate": 1.8072195877286365e-05, + "loss": 0.4724, + "step": 9960 + }, + { + "epoch": 0.96, + "learning_rate": 1.8070260330978418e-05, + "loss": 0.361, + "step": 9970 + }, + { + "epoch": 0.97, + "learning_rate": 1.8068324784670475e-05, + "loss": 0.4062, + "step": 9980 + }, + { + "epoch": 0.97, + "learning_rate": 1.806638923836253e-05, + "loss": 0.3966, + "step": 9990 + }, + { + "epoch": 0.97, + "learning_rate": 1.8064453692054586e-05, + "loss": 0.2606, + "step": 10000 + }, + { + "epoch": 0.97, + "learning_rate": 1.806251814574664e-05, + "loss": 0.2878, + "step": 10010 + }, + { + "epoch": 0.97, + "learning_rate": 1.8060582599438693e-05, + "loss": 0.3085, + "step": 10020 + }, + { + "epoch": 0.97, + "learning_rate": 1.8058647053130747e-05, + "loss": 0.5354, + "step": 10030 + }, + { + "epoch": 0.97, + "learning_rate": 1.80567115068228e-05, + "loss": 0.3531, + "step": 10040 + }, + { + "epoch": 0.97, + "learning_rate": 1.8054775960514857e-05, + "loss": 0.2622, + "step": 10050 + }, + { + "epoch": 0.97, + "learning_rate": 1.805284041420691e-05, + "loss": 0.3848, + "step": 10060 + }, + { + "epoch": 0.97, + "learning_rate": 1.8050904867898965e-05, + "loss": 0.3465, + "step": 10070 + }, + { + "epoch": 0.98, + "learning_rate": 1.8048969321591022e-05, + "loss": 0.301, + "step": 10080 + }, + { + "epoch": 0.98, + "learning_rate": 1.8047033775283075e-05, + "loss": 0.4145, + "step": 10090 + }, + { + "epoch": 0.98, + "learning_rate": 1.804509822897513e-05, + "loss": 0.4173, + "step": 10100 + }, + { + "epoch": 0.98, + "learning_rate": 1.8043162682667183e-05, + "loss": 0.3967, + "step": 10110 + }, + { + "epoch": 0.98, + "learning_rate": 1.804122713635924e-05, + "loss": 0.3905, + "step": 10120 + }, + { + "epoch": 0.98, + "learning_rate": 1.8039291590051293e-05, + "loss": 0.255, + "step": 10130 + }, + { + "epoch": 0.98, + "learning_rate": 1.8037356043743347e-05, + "loss": 0.3156, + "step": 10140 + }, + { + "epoch": 0.98, + "learning_rate": 1.8035420497435404e-05, + "loss": 0.4377, + "step": 10150 + }, + { + "epoch": 0.98, + "learning_rate": 1.8033484951127458e-05, + "loss": 0.3761, + "step": 10160 + }, + { + "epoch": 0.98, + "learning_rate": 1.803154940481951e-05, + "loss": 0.3284, + "step": 10170 + }, + { + "epoch": 0.99, + "learning_rate": 1.8029613858511565e-05, + "loss": 0.3078, + "step": 10180 + }, + { + "epoch": 0.99, + "learning_rate": 1.8027678312203622e-05, + "loss": 0.2337, + "step": 10190 + }, + { + "epoch": 0.99, + "learning_rate": 1.8025742765895676e-05, + "loss": 0.3601, + "step": 10200 + }, + { + "epoch": 0.99, + "learning_rate": 1.802380721958773e-05, + "loss": 0.2557, + "step": 10210 + }, + { + "epoch": 0.99, + "learning_rate": 1.8021871673279786e-05, + "loss": 0.197, + "step": 10220 + }, + { + "epoch": 0.99, + "learning_rate": 1.801993612697184e-05, + "loss": 0.4232, + "step": 10230 + }, + { + "epoch": 0.99, + "learning_rate": 1.8018000580663894e-05, + "loss": 0.2984, + "step": 10240 + }, + { + "epoch": 0.99, + "learning_rate": 1.8016065034355947e-05, + "loss": 0.2304, + "step": 10250 + }, + { + "epoch": 0.99, + "learning_rate": 1.8014129488048e-05, + "loss": 0.3563, + "step": 10260 + }, + { + "epoch": 0.99, + "learning_rate": 1.8012193941740058e-05, + "loss": 0.3423, + "step": 10270 + }, + { + "epoch": 0.99, + "learning_rate": 1.801025839543211e-05, + "loss": 0.2777, + "step": 10280 + }, + { + "epoch": 1.0, + "learning_rate": 1.800832284912417e-05, + "loss": 0.4114, + "step": 10290 + }, + { + "epoch": 1.0, + "learning_rate": 1.8006387302816222e-05, + "loss": 0.2532, + "step": 10300 + }, + { + "epoch": 1.0, + "learning_rate": 1.8004451756508276e-05, + "loss": 0.5116, + "step": 10310 + }, + { + "epoch": 1.0, + "learning_rate": 1.8002516210200333e-05, + "loss": 0.3188, + "step": 10320 + }, + { + "epoch": 1.0, + "learning_rate": 1.8000580663892383e-05, + "loss": 0.4189, + "step": 10330 + }, + { + "epoch": 1.0, + "eval_FN": 633, + "eval_FP": 1997, + "eval_TN": 13762, + "eval_TP": 4273, + "eval_accuracy": 0.8727316719090249, + "eval_f1": 0.7646743020758769, + "eval_loss": 0.32348209619522095, + "eval_precision": 0.6814992025518342, + "eval_recall": 0.8709743171626579, + "eval_runtime": 142.0455, + "eval_samples_per_second": 145.482, + "eval_steps_per_second": 9.096, + "step": 10333 + }, + { + "epoch": 1.0, + "learning_rate": 1.799864511758444e-05, + "loss": 0.2483, + "step": 10340 + }, + { + "epoch": 1.0, + "learning_rate": 1.7996709571276494e-05, + "loss": 0.2275, + "step": 10350 + }, + { + "epoch": 1.0, + "learning_rate": 1.7994774024968547e-05, + "loss": 0.3128, + "step": 10360 + }, + { + "epoch": 1.0, + "learning_rate": 1.7992838478660604e-05, + "loss": 0.323, + "step": 10370 + }, + { + "epoch": 1.0, + "learning_rate": 1.7990902932352658e-05, + "loss": 0.21, + "step": 10380 + }, + { + "epoch": 1.01, + "learning_rate": 1.7988967386044715e-05, + "loss": 0.2717, + "step": 10390 + }, + { + "epoch": 1.01, + "learning_rate": 1.798703183973677e-05, + "loss": 0.2561, + "step": 10400 + }, + { + "epoch": 1.01, + "learning_rate": 1.7985096293428822e-05, + "loss": 0.3568, + "step": 10410 + }, + { + "epoch": 1.01, + "learning_rate": 1.7983160747120876e-05, + "loss": 0.3371, + "step": 10420 + }, + { + "epoch": 1.01, + "learning_rate": 1.798122520081293e-05, + "loss": 0.3075, + "step": 10430 + }, + { + "epoch": 1.01, + "learning_rate": 1.7979289654504987e-05, + "loss": 0.2404, + "step": 10440 + }, + { + "epoch": 1.01, + "learning_rate": 1.797735410819704e-05, + "loss": 0.2847, + "step": 10450 + }, + { + "epoch": 1.01, + "learning_rate": 1.7975418561889094e-05, + "loss": 0.2981, + "step": 10460 + }, + { + "epoch": 1.01, + "learning_rate": 1.797348301558115e-05, + "loss": 0.2101, + "step": 10470 + }, + { + "epoch": 1.01, + "learning_rate": 1.7971547469273204e-05, + "loss": 0.2906, + "step": 10480 + }, + { + "epoch": 1.02, + "learning_rate": 1.7969611922965258e-05, + "loss": 0.3312, + "step": 10490 + }, + { + "epoch": 1.02, + "learning_rate": 1.7967676376657312e-05, + "loss": 0.3293, + "step": 10500 + }, + { + "epoch": 1.02, + "learning_rate": 1.796574083034937e-05, + "loss": 0.4588, + "step": 10510 + }, + { + "epoch": 1.02, + "learning_rate": 1.7963805284041422e-05, + "loss": 0.1739, + "step": 10520 + }, + { + "epoch": 1.02, + "learning_rate": 1.7961869737733476e-05, + "loss": 0.2689, + "step": 10530 + }, + { + "epoch": 1.02, + "learning_rate": 1.7959934191425533e-05, + "loss": 0.3254, + "step": 10540 + }, + { + "epoch": 1.02, + "learning_rate": 1.7957998645117587e-05, + "loss": 0.4014, + "step": 10550 + }, + { + "epoch": 1.02, + "learning_rate": 1.795606309880964e-05, + "loss": 0.2781, + "step": 10560 + }, + { + "epoch": 1.02, + "learning_rate": 1.7954127552501694e-05, + "loss": 0.2659, + "step": 10570 + }, + { + "epoch": 1.02, + "learning_rate": 1.795219200619375e-05, + "loss": 0.2742, + "step": 10580 + }, + { + "epoch": 1.02, + "learning_rate": 1.7950256459885805e-05, + "loss": 0.3584, + "step": 10590 + }, + { + "epoch": 1.03, + "learning_rate": 1.7948320913577858e-05, + "loss": 0.2346, + "step": 10600 + }, + { + "epoch": 1.03, + "learning_rate": 1.7946385367269915e-05, + "loss": 0.4227, + "step": 10610 + }, + { + "epoch": 1.03, + "learning_rate": 1.794444982096197e-05, + "loss": 0.3183, + "step": 10620 + }, + { + "epoch": 1.03, + "learning_rate": 1.7942514274654023e-05, + "loss": 0.3454, + "step": 10630 + }, + { + "epoch": 1.03, + "learning_rate": 1.7940578728346076e-05, + "loss": 0.3053, + "step": 10640 + }, + { + "epoch": 1.03, + "learning_rate": 1.793864318203813e-05, + "loss": 0.357, + "step": 10650 + }, + { + "epoch": 1.03, + "learning_rate": 1.7936707635730187e-05, + "loss": 0.2759, + "step": 10660 + }, + { + "epoch": 1.03, + "learning_rate": 1.793477208942224e-05, + "loss": 0.2706, + "step": 10670 + }, + { + "epoch": 1.03, + "learning_rate": 1.7932836543114298e-05, + "loss": 0.2408, + "step": 10680 + }, + { + "epoch": 1.03, + "learning_rate": 1.793090099680635e-05, + "loss": 0.3548, + "step": 10690 + }, + { + "epoch": 1.04, + "learning_rate": 1.7928965450498405e-05, + "loss": 0.2619, + "step": 10700 + }, + { + "epoch": 1.04, + "learning_rate": 1.792702990419046e-05, + "loss": 0.301, + "step": 10710 + }, + { + "epoch": 1.04, + "learning_rate": 1.7925094357882512e-05, + "loss": 0.3667, + "step": 10720 + }, + { + "epoch": 1.04, + "learning_rate": 1.792315881157457e-05, + "loss": 0.3468, + "step": 10730 + }, + { + "epoch": 1.04, + "learning_rate": 1.7921223265266623e-05, + "loss": 0.2212, + "step": 10740 + }, + { + "epoch": 1.04, + "learning_rate": 1.7919287718958676e-05, + "loss": 0.2348, + "step": 10750 + }, + { + "epoch": 1.04, + "learning_rate": 1.7917352172650733e-05, + "loss": 0.3599, + "step": 10760 + }, + { + "epoch": 1.04, + "learning_rate": 1.7915416626342787e-05, + "loss": 0.3403, + "step": 10770 + }, + { + "epoch": 1.04, + "learning_rate": 1.791348108003484e-05, + "loss": 0.2977, + "step": 10780 + }, + { + "epoch": 1.04, + "learning_rate": 1.7911545533726894e-05, + "loss": 0.2981, + "step": 10790 + }, + { + "epoch": 1.05, + "learning_rate": 1.790960998741895e-05, + "loss": 0.2956, + "step": 10800 + }, + { + "epoch": 1.05, + "learning_rate": 1.7907674441111005e-05, + "loss": 0.2545, + "step": 10810 + }, + { + "epoch": 1.05, + "learning_rate": 1.790573889480306e-05, + "loss": 0.3093, + "step": 10820 + }, + { + "epoch": 1.05, + "learning_rate": 1.7903803348495116e-05, + "loss": 0.3307, + "step": 10830 + }, + { + "epoch": 1.05, + "learning_rate": 1.790186780218717e-05, + "loss": 0.2052, + "step": 10840 + }, + { + "epoch": 1.05, + "learning_rate": 1.7899932255879223e-05, + "loss": 0.3603, + "step": 10850 + }, + { + "epoch": 1.05, + "learning_rate": 1.7897996709571277e-05, + "loss": 0.4176, + "step": 10860 + }, + { + "epoch": 1.05, + "learning_rate": 1.7896061163263334e-05, + "loss": 0.3206, + "step": 10870 + }, + { + "epoch": 1.05, + "learning_rate": 1.7894125616955387e-05, + "loss": 0.2235, + "step": 10880 + }, + { + "epoch": 1.05, + "learning_rate": 1.789219007064744e-05, + "loss": 0.2051, + "step": 10890 + }, + { + "epoch": 1.05, + "learning_rate": 1.7890254524339498e-05, + "loss": 0.4007, + "step": 10900 + }, + { + "epoch": 1.06, + "learning_rate": 1.788831897803155e-05, + "loss": 0.4649, + "step": 10910 + }, + { + "epoch": 1.06, + "learning_rate": 1.7886383431723605e-05, + "loss": 0.2752, + "step": 10920 + }, + { + "epoch": 1.06, + "learning_rate": 1.7884447885415662e-05, + "loss": 0.2034, + "step": 10930 + }, + { + "epoch": 1.06, + "learning_rate": 1.7882512339107712e-05, + "loss": 0.2743, + "step": 10940 + }, + { + "epoch": 1.06, + "learning_rate": 1.788057679279977e-05, + "loss": 0.3253, + "step": 10950 + }, + { + "epoch": 1.06, + "learning_rate": 1.7878641246491823e-05, + "loss": 0.2611, + "step": 10960 + }, + { + "epoch": 1.06, + "learning_rate": 1.787670570018388e-05, + "loss": 0.3317, + "step": 10970 + }, + { + "epoch": 1.06, + "learning_rate": 1.7874770153875934e-05, + "loss": 0.3276, + "step": 10980 + }, + { + "epoch": 1.06, + "learning_rate": 1.7872834607567987e-05, + "loss": 0.2423, + "step": 10990 + }, + { + "epoch": 1.06, + "learning_rate": 1.7870899061260044e-05, + "loss": 0.2252, + "step": 11000 + }, + { + "epoch": 1.07, + "learning_rate": 1.7868963514952095e-05, + "loss": 0.1972, + "step": 11010 + }, + { + "epoch": 1.07, + "learning_rate": 1.786702796864415e-05, + "loss": 0.392, + "step": 11020 + }, + { + "epoch": 1.07, + "learning_rate": 1.7865092422336205e-05, + "loss": 0.6294, + "step": 11030 + }, + { + "epoch": 1.07, + "learning_rate": 1.786315687602826e-05, + "loss": 0.2671, + "step": 11040 + }, + { + "epoch": 1.07, + "learning_rate": 1.7861221329720316e-05, + "loss": 0.3807, + "step": 11050 + }, + { + "epoch": 1.07, + "learning_rate": 1.785928578341237e-05, + "loss": 0.375, + "step": 11060 + }, + { + "epoch": 1.07, + "learning_rate": 1.7857350237104427e-05, + "loss": 0.3174, + "step": 11070 + }, + { + "epoch": 1.07, + "learning_rate": 1.785541469079648e-05, + "loss": 0.2594, + "step": 11080 + }, + { + "epoch": 1.07, + "learning_rate": 1.7853479144488534e-05, + "loss": 0.2906, + "step": 11090 + }, + { + "epoch": 1.07, + "learning_rate": 1.7851543598180587e-05, + "loss": 0.3741, + "step": 11100 + }, + { + "epoch": 1.08, + "learning_rate": 1.784960805187264e-05, + "loss": 0.3104, + "step": 11110 + }, + { + "epoch": 1.08, + "learning_rate": 1.7847672505564698e-05, + "loss": 0.2734, + "step": 11120 + }, + { + "epoch": 1.08, + "learning_rate": 1.7845736959256752e-05, + "loss": 0.2159, + "step": 11130 + }, + { + "epoch": 1.08, + "learning_rate": 1.7843801412948805e-05, + "loss": 0.2701, + "step": 11140 + }, + { + "epoch": 1.08, + "learning_rate": 1.7841865866640862e-05, + "loss": 0.3687, + "step": 11150 + }, + { + "epoch": 1.08, + "learning_rate": 1.7839930320332913e-05, + "loss": 0.1689, + "step": 11160 + }, + { + "epoch": 1.08, + "learning_rate": 1.783799477402497e-05, + "loss": 0.4662, + "step": 11170 + }, + { + "epoch": 1.08, + "learning_rate": 1.7836059227717023e-05, + "loss": 0.1815, + "step": 11180 + }, + { + "epoch": 1.08, + "learning_rate": 1.783412368140908e-05, + "loss": 0.2901, + "step": 11190 + }, + { + "epoch": 1.08, + "learning_rate": 1.7832188135101134e-05, + "loss": 0.2974, + "step": 11200 + }, + { + "epoch": 1.08, + "learning_rate": 1.7830252588793188e-05, + "loss": 0.364, + "step": 11210 + }, + { + "epoch": 1.09, + "learning_rate": 1.7828317042485245e-05, + "loss": 0.2594, + "step": 11220 + }, + { + "epoch": 1.09, + "learning_rate": 1.78263814961773e-05, + "loss": 0.2335, + "step": 11230 + }, + { + "epoch": 1.09, + "learning_rate": 1.7824445949869352e-05, + "loss": 0.2905, + "step": 11240 + }, + { + "epoch": 1.09, + "learning_rate": 1.7822510403561406e-05, + "loss": 0.3135, + "step": 11250 + }, + { + "epoch": 1.09, + "learning_rate": 1.7820574857253463e-05, + "loss": 0.2641, + "step": 11260 + }, + { + "epoch": 1.09, + "learning_rate": 1.7818639310945516e-05, + "loss": 0.2023, + "step": 11270 + }, + { + "epoch": 1.09, + "learning_rate": 1.781670376463757e-05, + "loss": 0.4067, + "step": 11280 + }, + { + "epoch": 1.09, + "learning_rate": 1.7814768218329627e-05, + "loss": 0.3494, + "step": 11290 + }, + { + "epoch": 1.09, + "learning_rate": 1.781283267202168e-05, + "loss": 0.3323, + "step": 11300 + }, + { + "epoch": 1.09, + "learning_rate": 1.7810897125713734e-05, + "loss": 0.4988, + "step": 11310 + }, + { + "epoch": 1.1, + "learning_rate": 1.7808961579405788e-05, + "loss": 0.2556, + "step": 11320 + }, + { + "epoch": 1.1, + "learning_rate": 1.780702603309784e-05, + "loss": 0.2121, + "step": 11330 + }, + { + "epoch": 1.1, + "learning_rate": 1.78050904867899e-05, + "loss": 0.3893, + "step": 11340 + }, + { + "epoch": 1.1, + "learning_rate": 1.7803154940481952e-05, + "loss": 0.2478, + "step": 11350 + }, + { + "epoch": 1.1, + "learning_rate": 1.780121939417401e-05, + "loss": 0.3192, + "step": 11360 + }, + { + "epoch": 1.1, + "learning_rate": 1.7799283847866063e-05, + "loss": 0.3694, + "step": 11370 + }, + { + "epoch": 1.1, + "learning_rate": 1.7797348301558116e-05, + "loss": 0.3107, + "step": 11380 + }, + { + "epoch": 1.1, + "learning_rate": 1.779541275525017e-05, + "loss": 0.2937, + "step": 11390 + }, + { + "epoch": 1.1, + "learning_rate": 1.7793477208942224e-05, + "loss": 0.2507, + "step": 11400 + }, + { + "epoch": 1.1, + "learning_rate": 1.779154166263428e-05, + "loss": 0.2119, + "step": 11410 + }, + { + "epoch": 1.11, + "learning_rate": 1.7789606116326334e-05, + "loss": 0.3228, + "step": 11420 + }, + { + "epoch": 1.11, + "learning_rate": 1.7787670570018388e-05, + "loss": 0.3126, + "step": 11430 + }, + { + "epoch": 1.11, + "learning_rate": 1.7785735023710445e-05, + "loss": 0.3323, + "step": 11440 + }, + { + "epoch": 1.11, + "learning_rate": 1.77837994774025e-05, + "loss": 0.2664, + "step": 11450 + }, + { + "epoch": 1.11, + "learning_rate": 1.7781863931094556e-05, + "loss": 0.401, + "step": 11460 + }, + { + "epoch": 1.11, + "learning_rate": 1.7779928384786606e-05, + "loss": 0.249, + "step": 11470 + }, + { + "epoch": 1.11, + "learning_rate": 1.7777992838478663e-05, + "loss": 0.2923, + "step": 11480 + }, + { + "epoch": 1.11, + "learning_rate": 1.7776057292170717e-05, + "loss": 0.2577, + "step": 11490 + }, + { + "epoch": 1.11, + "learning_rate": 1.777412174586277e-05, + "loss": 0.3159, + "step": 11500 + }, + { + "epoch": 1.11, + "learning_rate": 1.7772186199554827e-05, + "loss": 0.2538, + "step": 11510 + }, + { + "epoch": 1.11, + "learning_rate": 1.777025065324688e-05, + "loss": 0.3503, + "step": 11520 + }, + { + "epoch": 1.12, + "learning_rate": 1.7768315106938934e-05, + "loss": 0.1787, + "step": 11530 + }, + { + "epoch": 1.12, + "learning_rate": 1.7766379560630988e-05, + "loss": 0.3458, + "step": 11540 + }, + { + "epoch": 1.12, + "learning_rate": 1.7764444014323042e-05, + "loss": 0.2278, + "step": 11550 + }, + { + "epoch": 1.12, + "learning_rate": 1.77625084680151e-05, + "loss": 0.3523, + "step": 11560 + }, + { + "epoch": 1.12, + "learning_rate": 1.7760572921707152e-05, + "loss": 0.3537, + "step": 11570 + }, + { + "epoch": 1.12, + "learning_rate": 1.775863737539921e-05, + "loss": 0.4065, + "step": 11580 + }, + { + "epoch": 1.12, + "learning_rate": 1.7756701829091263e-05, + "loss": 0.3426, + "step": 11590 + }, + { + "epoch": 1.12, + "learning_rate": 1.7754766282783317e-05, + "loss": 0.29, + "step": 11600 + }, + { + "epoch": 1.12, + "learning_rate": 1.7752830736475374e-05, + "loss": 0.3502, + "step": 11610 + }, + { + "epoch": 1.12, + "learning_rate": 1.7750895190167424e-05, + "loss": 0.3568, + "step": 11620 + }, + { + "epoch": 1.13, + "learning_rate": 1.774895964385948e-05, + "loss": 0.2594, + "step": 11630 + }, + { + "epoch": 1.13, + "learning_rate": 1.7747024097551535e-05, + "loss": 0.3507, + "step": 11640 + }, + { + "epoch": 1.13, + "learning_rate": 1.774508855124359e-05, + "loss": 0.3133, + "step": 11650 + }, + { + "epoch": 1.13, + "learning_rate": 1.7743153004935645e-05, + "loss": 0.2974, + "step": 11660 + }, + { + "epoch": 1.13, + "learning_rate": 1.77412174586277e-05, + "loss": 0.2756, + "step": 11670 + }, + { + "epoch": 1.13, + "learning_rate": 1.7739281912319756e-05, + "loss": 0.4259, + "step": 11680 + }, + { + "epoch": 1.13, + "learning_rate": 1.7737346366011806e-05, + "loss": 0.2921, + "step": 11690 + }, + { + "epoch": 1.13, + "learning_rate": 1.7735410819703863e-05, + "loss": 0.3598, + "step": 11700 + }, + { + "epoch": 1.13, + "learning_rate": 1.7733475273395917e-05, + "loss": 0.2676, + "step": 11710 + }, + { + "epoch": 1.13, + "learning_rate": 1.773153972708797e-05, + "loss": 0.4373, + "step": 11720 + }, + { + "epoch": 1.14, + "learning_rate": 1.7729604180780028e-05, + "loss": 0.3675, + "step": 11730 + }, + { + "epoch": 1.14, + "learning_rate": 1.772766863447208e-05, + "loss": 0.2378, + "step": 11740 + }, + { + "epoch": 1.14, + "learning_rate": 1.7725733088164138e-05, + "loss": 0.2922, + "step": 11750 + }, + { + "epoch": 1.14, + "learning_rate": 1.7723797541856192e-05, + "loss": 0.2991, + "step": 11760 + }, + { + "epoch": 1.14, + "learning_rate": 1.7721861995548245e-05, + "loss": 0.3666, + "step": 11770 + }, + { + "epoch": 1.14, + "learning_rate": 1.77199264492403e-05, + "loss": 0.1641, + "step": 11780 + }, + { + "epoch": 1.14, + "learning_rate": 1.7717990902932353e-05, + "loss": 0.4562, + "step": 11790 + }, + { + "epoch": 1.14, + "learning_rate": 1.771605535662441e-05, + "loss": 0.4683, + "step": 11800 + }, + { + "epoch": 1.14, + "learning_rate": 1.7714119810316463e-05, + "loss": 0.2242, + "step": 11810 + }, + { + "epoch": 1.14, + "learning_rate": 1.7712184264008517e-05, + "loss": 0.3182, + "step": 11820 + }, + { + "epoch": 1.14, + "learning_rate": 1.7710248717700574e-05, + "loss": 0.3053, + "step": 11830 + }, + { + "epoch": 1.15, + "learning_rate": 1.7708313171392624e-05, + "loss": 0.3082, + "step": 11840 + }, + { + "epoch": 1.15, + "learning_rate": 1.770637762508468e-05, + "loss": 0.2097, + "step": 11850 + }, + { + "epoch": 1.15, + "learning_rate": 1.7704442078776735e-05, + "loss": 0.1834, + "step": 11860 + }, + { + "epoch": 1.15, + "learning_rate": 1.7702506532468792e-05, + "loss": 0.4084, + "step": 11870 + }, + { + "epoch": 1.15, + "learning_rate": 1.7700570986160846e-05, + "loss": 0.3549, + "step": 11880 + }, + { + "epoch": 1.15, + "learning_rate": 1.76986354398529e-05, + "loss": 0.2612, + "step": 11890 + }, + { + "epoch": 1.15, + "learning_rate": 1.7696699893544956e-05, + "loss": 0.2147, + "step": 11900 + }, + { + "epoch": 1.15, + "learning_rate": 1.769476434723701e-05, + "loss": 0.2528, + "step": 11910 + }, + { + "epoch": 1.15, + "learning_rate": 1.7692828800929064e-05, + "loss": 0.2622, + "step": 11920 + }, + { + "epoch": 1.15, + "learning_rate": 1.7690893254621117e-05, + "loss": 0.3125, + "step": 11930 + }, + { + "epoch": 1.16, + "learning_rate": 1.768895770831317e-05, + "loss": 0.248, + "step": 11940 + }, + { + "epoch": 1.16, + "learning_rate": 1.7687022162005228e-05, + "loss": 0.2384, + "step": 11950 + }, + { + "epoch": 1.16, + "learning_rate": 1.768508661569728e-05, + "loss": 0.3284, + "step": 11960 + }, + { + "epoch": 1.16, + "learning_rate": 1.768315106938934e-05, + "loss": 0.1896, + "step": 11970 + }, + { + "epoch": 1.16, + "learning_rate": 1.7681215523081392e-05, + "loss": 0.5095, + "step": 11980 + }, + { + "epoch": 1.16, + "learning_rate": 1.7679279976773446e-05, + "loss": 0.2764, + "step": 11990 + }, + { + "epoch": 1.16, + "learning_rate": 1.76773444304655e-05, + "loss": 0.3324, + "step": 12000 + }, + { + "epoch": 1.16, + "learning_rate": 1.7675408884157553e-05, + "loss": 0.304, + "step": 12010 + }, + { + "epoch": 1.16, + "learning_rate": 1.767347333784961e-05, + "loss": 0.296, + "step": 12020 + }, + { + "epoch": 1.16, + "learning_rate": 1.7671537791541664e-05, + "loss": 0.3058, + "step": 12030 + }, + { + "epoch": 1.17, + "learning_rate": 1.766960224523372e-05, + "loss": 0.3414, + "step": 12040 + }, + { + "epoch": 1.17, + "learning_rate": 1.7667666698925774e-05, + "loss": 0.3343, + "step": 12050 + }, + { + "epoch": 1.17, + "learning_rate": 1.7665731152617828e-05, + "loss": 0.4341, + "step": 12060 + }, + { + "epoch": 1.17, + "learning_rate": 1.766379560630988e-05, + "loss": 0.2519, + "step": 12070 + }, + { + "epoch": 1.17, + "learning_rate": 1.7661860060001935e-05, + "loss": 0.3931, + "step": 12080 + }, + { + "epoch": 1.17, + "learning_rate": 1.7659924513693992e-05, + "loss": 0.3777, + "step": 12090 + }, + { + "epoch": 1.17, + "learning_rate": 1.7657988967386046e-05, + "loss": 0.1947, + "step": 12100 + }, + { + "epoch": 1.17, + "learning_rate": 1.76560534210781e-05, + "loss": 0.291, + "step": 12110 + }, + { + "epoch": 1.17, + "learning_rate": 1.7654117874770157e-05, + "loss": 0.4085, + "step": 12120 + }, + { + "epoch": 1.17, + "learning_rate": 1.765218232846221e-05, + "loss": 0.3697, + "step": 12130 + }, + { + "epoch": 1.17, + "learning_rate": 1.7650246782154267e-05, + "loss": 0.2979, + "step": 12140 + }, + { + "epoch": 1.18, + "learning_rate": 1.7648311235846317e-05, + "loss": 0.2956, + "step": 12150 + }, + { + "epoch": 1.18, + "learning_rate": 1.7646375689538375e-05, + "loss": 0.1438, + "step": 12160 + }, + { + "epoch": 1.18, + "learning_rate": 1.7644440143230428e-05, + "loss": 0.2426, + "step": 12170 + }, + { + "epoch": 1.18, + "learning_rate": 1.7642504596922482e-05, + "loss": 0.373, + "step": 12180 + }, + { + "epoch": 1.18, + "learning_rate": 1.764056905061454e-05, + "loss": 0.2025, + "step": 12190 + }, + { + "epoch": 1.18, + "learning_rate": 1.7638633504306592e-05, + "loss": 0.3099, + "step": 12200 + }, + { + "epoch": 1.18, + "learning_rate": 1.7636697957998646e-05, + "loss": 0.3478, + "step": 12210 + }, + { + "epoch": 1.18, + "learning_rate": 1.76347624116907e-05, + "loss": 0.2691, + "step": 12220 + }, + { + "epoch": 1.18, + "learning_rate": 1.7632826865382753e-05, + "loss": 0.3796, + "step": 12230 + }, + { + "epoch": 1.18, + "learning_rate": 1.763089131907481e-05, + "loss": 0.364, + "step": 12240 + }, + { + "epoch": 1.19, + "learning_rate": 1.7628955772766864e-05, + "loss": 0.2784, + "step": 12250 + }, + { + "epoch": 1.19, + "learning_rate": 1.762702022645892e-05, + "loss": 0.2691, + "step": 12260 + }, + { + "epoch": 1.19, + "learning_rate": 1.7625084680150975e-05, + "loss": 0.2386, + "step": 12270 + }, + { + "epoch": 1.19, + "learning_rate": 1.7623149133843028e-05, + "loss": 0.3177, + "step": 12280 + }, + { + "epoch": 1.19, + "learning_rate": 1.7621213587535085e-05, + "loss": 0.2799, + "step": 12290 + }, + { + "epoch": 1.19, + "learning_rate": 1.7619278041227136e-05, + "loss": 0.2812, + "step": 12300 + }, + { + "epoch": 1.19, + "learning_rate": 1.7617342494919193e-05, + "loss": 0.2085, + "step": 12310 + }, + { + "epoch": 1.19, + "learning_rate": 1.7615406948611246e-05, + "loss": 0.3404, + "step": 12320 + }, + { + "epoch": 1.19, + "learning_rate": 1.76134714023033e-05, + "loss": 0.2502, + "step": 12330 + }, + { + "epoch": 1.19, + "learning_rate": 1.7611535855995357e-05, + "loss": 0.2787, + "step": 12340 + }, + { + "epoch": 1.2, + "learning_rate": 1.760960030968741e-05, + "loss": 0.2544, + "step": 12350 + }, + { + "epoch": 1.2, + "learning_rate": 1.7607664763379468e-05, + "loss": 0.2554, + "step": 12360 + }, + { + "epoch": 1.2, + "learning_rate": 1.7605729217071518e-05, + "loss": 0.2304, + "step": 12370 + }, + { + "epoch": 1.2, + "learning_rate": 1.7603793670763575e-05, + "loss": 0.3398, + "step": 12380 + }, + { + "epoch": 1.2, + "learning_rate": 1.760185812445563e-05, + "loss": 0.4181, + "step": 12390 + }, + { + "epoch": 1.2, + "learning_rate": 1.7599922578147682e-05, + "loss": 0.3358, + "step": 12400 + }, + { + "epoch": 1.2, + "learning_rate": 1.759798703183974e-05, + "loss": 0.2375, + "step": 12410 + }, + { + "epoch": 1.2, + "learning_rate": 1.7596051485531793e-05, + "loss": 0.2598, + "step": 12420 + }, + { + "epoch": 1.2, + "learning_rate": 1.759411593922385e-05, + "loss": 0.2919, + "step": 12430 + }, + { + "epoch": 1.2, + "learning_rate": 1.7592180392915903e-05, + "loss": 0.2072, + "step": 12440 + }, + { + "epoch": 1.2, + "learning_rate": 1.7590244846607957e-05, + "loss": 0.2624, + "step": 12450 + }, + { + "epoch": 1.21, + "learning_rate": 1.758830930030001e-05, + "loss": 0.2373, + "step": 12460 + }, + { + "epoch": 1.21, + "learning_rate": 1.7586373753992064e-05, + "loss": 0.3185, + "step": 12470 + }, + { + "epoch": 1.21, + "learning_rate": 1.758443820768412e-05, + "loss": 0.5107, + "step": 12480 + }, + { + "epoch": 1.21, + "learning_rate": 1.7582502661376175e-05, + "loss": 0.341, + "step": 12490 + }, + { + "epoch": 1.21, + "learning_rate": 1.758056711506823e-05, + "loss": 0.2974, + "step": 12500 + }, + { + "epoch": 1.21, + "learning_rate": 1.7578631568760286e-05, + "loss": 0.2554, + "step": 12510 + }, + { + "epoch": 1.21, + "learning_rate": 1.7576696022452336e-05, + "loss": 0.2948, + "step": 12520 + }, + { + "epoch": 1.21, + "learning_rate": 1.7574760476144393e-05, + "loss": 0.3229, + "step": 12530 + }, + { + "epoch": 1.21, + "learning_rate": 1.7572824929836447e-05, + "loss": 0.214, + "step": 12540 + }, + { + "epoch": 1.21, + "learning_rate": 1.7570889383528504e-05, + "loss": 0.268, + "step": 12550 + }, + { + "epoch": 1.22, + "learning_rate": 1.7568953837220557e-05, + "loss": 0.4982, + "step": 12560 + }, + { + "epoch": 1.22, + "learning_rate": 1.756701829091261e-05, + "loss": 0.3273, + "step": 12570 + }, + { + "epoch": 1.22, + "learning_rate": 1.7565082744604668e-05, + "loss": 0.2519, + "step": 12580 + }, + { + "epoch": 1.22, + "learning_rate": 1.756314719829672e-05, + "loss": 0.2431, + "step": 12590 + }, + { + "epoch": 1.22, + "learning_rate": 1.7561211651988775e-05, + "loss": 0.4066, + "step": 12600 + }, + { + "epoch": 1.22, + "learning_rate": 1.755927610568083e-05, + "loss": 0.3407, + "step": 12610 + }, + { + "epoch": 1.22, + "learning_rate": 1.7557340559372882e-05, + "loss": 0.3044, + "step": 12620 + }, + { + "epoch": 1.22, + "learning_rate": 1.755540501306494e-05, + "loss": 0.3155, + "step": 12630 + }, + { + "epoch": 1.22, + "learning_rate": 1.7553469466756993e-05, + "loss": 0.2075, + "step": 12640 + }, + { + "epoch": 1.22, + "learning_rate": 1.755153392044905e-05, + "loss": 0.3167, + "step": 12650 + }, + { + "epoch": 1.23, + "learning_rate": 1.7549598374141104e-05, + "loss": 0.3173, + "step": 12660 + }, + { + "epoch": 1.23, + "learning_rate": 1.7547662827833157e-05, + "loss": 0.2738, + "step": 12670 + }, + { + "epoch": 1.23, + "learning_rate": 1.754572728152521e-05, + "loss": 0.3184, + "step": 12680 + }, + { + "epoch": 1.23, + "learning_rate": 1.7543791735217265e-05, + "loss": 0.2201, + "step": 12690 + }, + { + "epoch": 1.23, + "learning_rate": 1.754185618890932e-05, + "loss": 0.3613, + "step": 12700 + }, + { + "epoch": 1.23, + "learning_rate": 1.7539920642601375e-05, + "loss": 0.2429, + "step": 12710 + }, + { + "epoch": 1.23, + "learning_rate": 1.753798509629343e-05, + "loss": 0.2894, + "step": 12720 + }, + { + "epoch": 1.23, + "learning_rate": 1.7536049549985486e-05, + "loss": 0.2337, + "step": 12730 + }, + { + "epoch": 1.23, + "learning_rate": 1.753411400367754e-05, + "loss": 0.2556, + "step": 12740 + }, + { + "epoch": 1.23, + "learning_rate": 1.7532178457369593e-05, + "loss": 0.2638, + "step": 12750 + }, + { + "epoch": 1.23, + "learning_rate": 1.7530242911061647e-05, + "loss": 0.3512, + "step": 12760 + }, + { + "epoch": 1.24, + "learning_rate": 1.7528307364753704e-05, + "loss": 0.2178, + "step": 12770 + }, + { + "epoch": 1.24, + "learning_rate": 1.7526371818445758e-05, + "loss": 0.4166, + "step": 12780 + }, + { + "epoch": 1.24, + "learning_rate": 1.752443627213781e-05, + "loss": 0.242, + "step": 12790 + }, + { + "epoch": 1.24, + "learning_rate": 1.7522500725829868e-05, + "loss": 0.2775, + "step": 12800 + }, + { + "epoch": 1.24, + "learning_rate": 1.7520565179521922e-05, + "loss": 0.2914, + "step": 12810 + }, + { + "epoch": 1.24, + "learning_rate": 1.7518629633213975e-05, + "loss": 0.2436, + "step": 12820 + }, + { + "epoch": 1.24, + "learning_rate": 1.751669408690603e-05, + "loss": 0.2706, + "step": 12830 + }, + { + "epoch": 1.24, + "learning_rate": 1.7514758540598086e-05, + "loss": 0.3251, + "step": 12840 + }, + { + "epoch": 1.24, + "learning_rate": 1.751282299429014e-05, + "loss": 0.2894, + "step": 12850 + }, + { + "epoch": 1.24, + "learning_rate": 1.7510887447982193e-05, + "loss": 0.2491, + "step": 12860 + }, + { + "epoch": 1.25, + "learning_rate": 1.750895190167425e-05, + "loss": 0.4091, + "step": 12870 + }, + { + "epoch": 1.25, + "learning_rate": 1.7507016355366304e-05, + "loss": 0.2688, + "step": 12880 + }, + { + "epoch": 1.25, + "learning_rate": 1.7505080809058358e-05, + "loss": 0.2115, + "step": 12890 + }, + { + "epoch": 1.25, + "learning_rate": 1.750314526275041e-05, + "loss": 0.2996, + "step": 12900 + }, + { + "epoch": 1.25, + "learning_rate": 1.7501209716442465e-05, + "loss": 0.3004, + "step": 12910 + }, + { + "epoch": 1.25, + "learning_rate": 1.7499274170134522e-05, + "loss": 0.3488, + "step": 12920 + }, + { + "epoch": 1.25, + "learning_rate": 1.7497338623826576e-05, + "loss": 0.3649, + "step": 12930 + }, + { + "epoch": 1.25, + "learning_rate": 1.7495403077518633e-05, + "loss": 0.3626, + "step": 12940 + }, + { + "epoch": 1.25, + "learning_rate": 1.7493467531210686e-05, + "loss": 0.3553, + "step": 12950 + }, + { + "epoch": 1.25, + "learning_rate": 1.749153198490274e-05, + "loss": 0.3368, + "step": 12960 + }, + { + "epoch": 1.26, + "learning_rate": 1.7489596438594797e-05, + "loss": 0.3551, + "step": 12970 + }, + { + "epoch": 1.26, + "learning_rate": 1.7487660892286847e-05, + "loss": 0.4314, + "step": 12980 + }, + { + "epoch": 1.26, + "learning_rate": 1.7485725345978904e-05, + "loss": 0.2911, + "step": 12990 + }, + { + "epoch": 1.26, + "learning_rate": 1.7483789799670958e-05, + "loss": 0.2659, + "step": 13000 + }, + { + "epoch": 1.26, + "learning_rate": 1.748185425336301e-05, + "loss": 0.2418, + "step": 13010 + }, + { + "epoch": 1.26, + "learning_rate": 1.747991870705507e-05, + "loss": 0.3578, + "step": 13020 + }, + { + "epoch": 1.26, + "learning_rate": 1.7477983160747122e-05, + "loss": 0.313, + "step": 13030 + }, + { + "epoch": 1.26, + "learning_rate": 1.747604761443918e-05, + "loss": 0.2388, + "step": 13040 + }, + { + "epoch": 1.26, + "learning_rate": 1.747411206813123e-05, + "loss": 0.3395, + "step": 13050 + }, + { + "epoch": 1.26, + "learning_rate": 1.7472176521823286e-05, + "loss": 0.396, + "step": 13060 + }, + { + "epoch": 1.26, + "learning_rate": 1.747024097551534e-05, + "loss": 0.2968, + "step": 13070 + }, + { + "epoch": 1.27, + "learning_rate": 1.7468305429207394e-05, + "loss": 0.3117, + "step": 13080 + }, + { + "epoch": 1.27, + "learning_rate": 1.746636988289945e-05, + "loss": 0.2308, + "step": 13090 + }, + { + "epoch": 1.27, + "learning_rate": 1.7464434336591504e-05, + "loss": 0.3432, + "step": 13100 + }, + { + "epoch": 1.27, + "learning_rate": 1.7462498790283558e-05, + "loss": 0.2703, + "step": 13110 + }, + { + "epoch": 1.27, + "learning_rate": 1.7460563243975615e-05, + "loss": 0.2355, + "step": 13120 + }, + { + "epoch": 1.27, + "learning_rate": 1.745862769766767e-05, + "loss": 0.3138, + "step": 13130 + }, + { + "epoch": 1.27, + "learning_rate": 1.7456692151359722e-05, + "loss": 0.3389, + "step": 13140 + }, + { + "epoch": 1.27, + "learning_rate": 1.7454756605051776e-05, + "loss": 0.3225, + "step": 13150 + }, + { + "epoch": 1.27, + "learning_rate": 1.7452821058743833e-05, + "loss": 0.2878, + "step": 13160 + }, + { + "epoch": 1.27, + "learning_rate": 1.7450885512435887e-05, + "loss": 0.3208, + "step": 13170 + }, + { + "epoch": 1.28, + "learning_rate": 1.744894996612794e-05, + "loss": 0.3255, + "step": 13180 + }, + { + "epoch": 1.28, + "learning_rate": 1.7447014419819997e-05, + "loss": 0.1907, + "step": 13190 + }, + { + "epoch": 1.28, + "learning_rate": 1.7445078873512047e-05, + "loss": 0.2946, + "step": 13200 + }, + { + "epoch": 1.28, + "learning_rate": 1.7443143327204105e-05, + "loss": 0.2404, + "step": 13210 + }, + { + "epoch": 1.28, + "learning_rate": 1.7441207780896158e-05, + "loss": 0.3631, + "step": 13220 + }, + { + "epoch": 1.28, + "learning_rate": 1.7439272234588215e-05, + "loss": 0.307, + "step": 13230 + }, + { + "epoch": 1.28, + "learning_rate": 1.743733668828027e-05, + "loss": 0.2548, + "step": 13240 + }, + { + "epoch": 1.28, + "learning_rate": 1.7435401141972322e-05, + "loss": 0.4253, + "step": 13250 + }, + { + "epoch": 1.28, + "learning_rate": 1.743346559566438e-05, + "loss": 0.2876, + "step": 13260 + }, + { + "epoch": 1.28, + "learning_rate": 1.7431530049356433e-05, + "loss": 0.3833, + "step": 13270 + }, + { + "epoch": 1.29, + "learning_rate": 1.7429594503048487e-05, + "loss": 0.2182, + "step": 13280 + }, + { + "epoch": 1.29, + "learning_rate": 1.742765895674054e-05, + "loss": 0.2656, + "step": 13290 + }, + { + "epoch": 1.29, + "learning_rate": 1.7425723410432594e-05, + "loss": 0.4348, + "step": 13300 + }, + { + "epoch": 1.29, + "learning_rate": 1.742378786412465e-05, + "loss": 0.3498, + "step": 13310 + }, + { + "epoch": 1.29, + "learning_rate": 1.7421852317816705e-05, + "loss": 0.378, + "step": 13320 + }, + { + "epoch": 1.29, + "learning_rate": 1.741991677150876e-05, + "loss": 0.3365, + "step": 13330 + }, + { + "epoch": 1.29, + "learning_rate": 1.7417981225200815e-05, + "loss": 0.35, + "step": 13340 + }, + { + "epoch": 1.29, + "learning_rate": 1.741604567889287e-05, + "loss": 0.2568, + "step": 13350 + }, + { + "epoch": 1.29, + "learning_rate": 1.7414110132584923e-05, + "loss": 0.2551, + "step": 13360 + }, + { + "epoch": 1.29, + "learning_rate": 1.7412174586276976e-05, + "loss": 0.3466, + "step": 13370 + }, + { + "epoch": 1.29, + "learning_rate": 1.7410239039969033e-05, + "loss": 0.3855, + "step": 13380 + }, + { + "epoch": 1.3, + "learning_rate": 1.7408303493661087e-05, + "loss": 0.2287, + "step": 13390 + }, + { + "epoch": 1.3, + "learning_rate": 1.740636794735314e-05, + "loss": 0.1829, + "step": 13400 + }, + { + "epoch": 1.3, + "learning_rate": 1.7404432401045198e-05, + "loss": 0.2454, + "step": 13410 + }, + { + "epoch": 1.3, + "learning_rate": 1.740249685473725e-05, + "loss": 0.3818, + "step": 13420 + }, + { + "epoch": 1.3, + "learning_rate": 1.7400561308429305e-05, + "loss": 0.4368, + "step": 13430 + }, + { + "epoch": 1.3, + "learning_rate": 1.739862576212136e-05, + "loss": 0.2245, + "step": 13440 + }, + { + "epoch": 1.3, + "learning_rate": 1.7396690215813415e-05, + "loss": 0.3694, + "step": 13450 + }, + { + "epoch": 1.3, + "learning_rate": 1.739475466950547e-05, + "loss": 0.3407, + "step": 13460 + }, + { + "epoch": 1.3, + "learning_rate": 1.7392819123197523e-05, + "loss": 0.326, + "step": 13470 + }, + { + "epoch": 1.3, + "learning_rate": 1.739088357688958e-05, + "loss": 0.341, + "step": 13480 + }, + { + "epoch": 1.31, + "learning_rate": 1.7388948030581633e-05, + "loss": 0.2129, + "step": 13490 + }, + { + "epoch": 1.31, + "learning_rate": 1.7387012484273687e-05, + "loss": 0.1966, + "step": 13500 + }, + { + "epoch": 1.31, + "learning_rate": 1.738507693796574e-05, + "loss": 0.3588, + "step": 13510 + }, + { + "epoch": 1.31, + "learning_rate": 1.7383141391657798e-05, + "loss": 0.2129, + "step": 13520 + }, + { + "epoch": 1.31, + "learning_rate": 1.738120584534985e-05, + "loss": 0.2396, + "step": 13530 + }, + { + "epoch": 1.31, + "learning_rate": 1.7379270299041905e-05, + "loss": 0.2834, + "step": 13540 + }, + { + "epoch": 1.31, + "learning_rate": 1.7377334752733962e-05, + "loss": 0.2101, + "step": 13550 + }, + { + "epoch": 1.31, + "learning_rate": 1.7375399206426016e-05, + "loss": 0.2817, + "step": 13560 + }, + { + "epoch": 1.31, + "learning_rate": 1.737346366011807e-05, + "loss": 0.382, + "step": 13570 + }, + { + "epoch": 1.31, + "learning_rate": 1.7371528113810123e-05, + "loss": 0.2905, + "step": 13580 + }, + { + "epoch": 1.32, + "learning_rate": 1.7369592567502177e-05, + "loss": 0.3307, + "step": 13590 + }, + { + "epoch": 1.32, + "learning_rate": 1.7367657021194234e-05, + "loss": 0.2484, + "step": 13600 + }, + { + "epoch": 1.32, + "learning_rate": 1.7365721474886287e-05, + "loss": 0.2224, + "step": 13610 + }, + { + "epoch": 1.32, + "learning_rate": 1.7363785928578344e-05, + "loss": 0.2508, + "step": 13620 + }, + { + "epoch": 1.32, + "learning_rate": 1.7361850382270398e-05, + "loss": 0.3228, + "step": 13630 + }, + { + "epoch": 1.32, + "learning_rate": 1.735991483596245e-05, + "loss": 0.2452, + "step": 13640 + }, + { + "epoch": 1.32, + "learning_rate": 1.735797928965451e-05, + "loss": 0.1718, + "step": 13650 + }, + { + "epoch": 1.32, + "learning_rate": 1.735604374334656e-05, + "loss": 0.1921, + "step": 13660 + }, + { + "epoch": 1.32, + "learning_rate": 1.7354108197038616e-05, + "loss": 0.3054, + "step": 13670 + }, + { + "epoch": 1.32, + "learning_rate": 1.735217265073067e-05, + "loss": 0.3517, + "step": 13680 + }, + { + "epoch": 1.32, + "learning_rate": 1.7350237104422723e-05, + "loss": 0.251, + "step": 13690 + }, + { + "epoch": 1.33, + "learning_rate": 1.734830155811478e-05, + "loss": 0.2865, + "step": 13700 + }, + { + "epoch": 1.33, + "learning_rate": 1.7346366011806834e-05, + "loss": 0.4003, + "step": 13710 + }, + { + "epoch": 1.33, + "learning_rate": 1.734443046549889e-05, + "loss": 0.1633, + "step": 13720 + }, + { + "epoch": 1.33, + "learning_rate": 1.734249491919094e-05, + "loss": 0.322, + "step": 13730 + }, + { + "epoch": 1.33, + "learning_rate": 1.7340559372882998e-05, + "loss": 0.3617, + "step": 13740 + }, + { + "epoch": 1.33, + "learning_rate": 1.733862382657505e-05, + "loss": 0.3212, + "step": 13750 + }, + { + "epoch": 1.33, + "learning_rate": 1.7336688280267105e-05, + "loss": 0.3475, + "step": 13760 + }, + { + "epoch": 1.33, + "learning_rate": 1.7334752733959162e-05, + "loss": 0.3098, + "step": 13770 + }, + { + "epoch": 1.33, + "learning_rate": 1.7332817187651216e-05, + "loss": 0.1709, + "step": 13780 + }, + { + "epoch": 1.33, + "learning_rate": 1.733088164134327e-05, + "loss": 0.2931, + "step": 13790 + }, + { + "epoch": 1.34, + "learning_rate": 1.7328946095035327e-05, + "loss": 0.4214, + "step": 13800 + }, + { + "epoch": 1.34, + "learning_rate": 1.732701054872738e-05, + "loss": 0.3188, + "step": 13810 + }, + { + "epoch": 1.34, + "learning_rate": 1.7325075002419434e-05, + "loss": 0.3444, + "step": 13820 + }, + { + "epoch": 1.34, + "learning_rate": 1.7323139456111488e-05, + "loss": 0.2546, + "step": 13830 + }, + { + "epoch": 1.34, + "learning_rate": 1.7321203909803545e-05, + "loss": 0.3692, + "step": 13840 + }, + { + "epoch": 1.34, + "learning_rate": 1.7319268363495598e-05, + "loss": 0.2862, + "step": 13850 + }, + { + "epoch": 1.34, + "learning_rate": 1.7317332817187652e-05, + "loss": 0.1609, + "step": 13860 + }, + { + "epoch": 1.34, + "learning_rate": 1.731539727087971e-05, + "loss": 0.4133, + "step": 13870 + }, + { + "epoch": 1.34, + "learning_rate": 1.7313461724571762e-05, + "loss": 0.4843, + "step": 13880 + }, + { + "epoch": 1.34, + "learning_rate": 1.7311526178263816e-05, + "loss": 0.2882, + "step": 13890 + }, + { + "epoch": 1.35, + "learning_rate": 1.730959063195587e-05, + "loss": 0.2981, + "step": 13900 + }, + { + "epoch": 1.35, + "learning_rate": 1.7307655085647927e-05, + "loss": 0.2529, + "step": 13910 + }, + { + "epoch": 1.35, + "learning_rate": 1.730571953933998e-05, + "loss": 0.3123, + "step": 13920 + }, + { + "epoch": 1.35, + "learning_rate": 1.7303783993032034e-05, + "loss": 0.303, + "step": 13930 + }, + { + "epoch": 1.35, + "learning_rate": 1.730184844672409e-05, + "loss": 0.3345, + "step": 13940 + }, + { + "epoch": 1.35, + "learning_rate": 1.7299912900416145e-05, + "loss": 0.2852, + "step": 13950 + }, + { + "epoch": 1.35, + "learning_rate": 1.72979773541082e-05, + "loss": 0.3422, + "step": 13960 + }, + { + "epoch": 1.35, + "learning_rate": 1.7296041807800252e-05, + "loss": 0.2504, + "step": 13970 + }, + { + "epoch": 1.35, + "learning_rate": 1.7294106261492306e-05, + "loss": 0.2574, + "step": 13980 + }, + { + "epoch": 1.35, + "learning_rate": 1.7292170715184363e-05, + "loss": 0.2679, + "step": 13990 + }, + { + "epoch": 1.35, + "learning_rate": 1.7290235168876416e-05, + "loss": 0.2278, + "step": 14000 + }, + { + "epoch": 1.36, + "learning_rate": 1.7288299622568473e-05, + "loss": 0.2479, + "step": 14010 + }, + { + "epoch": 1.36, + "learning_rate": 1.7286364076260527e-05, + "loss": 0.3253, + "step": 14020 + }, + { + "epoch": 1.36, + "learning_rate": 1.728442852995258e-05, + "loss": 0.3506, + "step": 14030 + }, + { + "epoch": 1.36, + "learning_rate": 1.7282492983644634e-05, + "loss": 0.2396, + "step": 14040 + }, + { + "epoch": 1.36, + "learning_rate": 1.7280557437336688e-05, + "loss": 0.2969, + "step": 14050 + }, + { + "epoch": 1.36, + "learning_rate": 1.7278621891028745e-05, + "loss": 0.2032, + "step": 14060 + }, + { + "epoch": 1.36, + "learning_rate": 1.72766863447208e-05, + "loss": 0.2813, + "step": 14070 + }, + { + "epoch": 1.36, + "learning_rate": 1.7274750798412852e-05, + "loss": 0.3826, + "step": 14080 + }, + { + "epoch": 1.36, + "learning_rate": 1.727281525210491e-05, + "loss": 0.1839, + "step": 14090 + }, + { + "epoch": 1.36, + "learning_rate": 1.7270879705796963e-05, + "loss": 0.3368, + "step": 14100 + }, + { + "epoch": 1.37, + "learning_rate": 1.7268944159489016e-05, + "loss": 0.3362, + "step": 14110 + }, + { + "epoch": 1.37, + "learning_rate": 1.726700861318107e-05, + "loss": 0.306, + "step": 14120 + }, + { + "epoch": 1.37, + "learning_rate": 1.7265073066873127e-05, + "loss": 0.2922, + "step": 14130 + }, + { + "epoch": 1.37, + "learning_rate": 1.726313752056518e-05, + "loss": 0.2158, + "step": 14140 + }, + { + "epoch": 1.37, + "learning_rate": 1.7261201974257234e-05, + "loss": 0.4264, + "step": 14150 + }, + { + "epoch": 1.37, + "learning_rate": 1.725926642794929e-05, + "loss": 0.357, + "step": 14160 + }, + { + "epoch": 1.37, + "learning_rate": 1.7257330881641345e-05, + "loss": 0.3419, + "step": 14170 + }, + { + "epoch": 1.37, + "learning_rate": 1.72553953353334e-05, + "loss": 0.2832, + "step": 14180 + }, + { + "epoch": 1.37, + "learning_rate": 1.7253459789025452e-05, + "loss": 0.1968, + "step": 14190 + }, + { + "epoch": 1.37, + "learning_rate": 1.725152424271751e-05, + "loss": 0.3244, + "step": 14200 + }, + { + "epoch": 1.38, + "learning_rate": 1.7249588696409563e-05, + "loss": 0.3712, + "step": 14210 + }, + { + "epoch": 1.38, + "learning_rate": 1.7247653150101617e-05, + "loss": 0.2123, + "step": 14220 + }, + { + "epoch": 1.38, + "learning_rate": 1.7245717603793674e-05, + "loss": 0.1996, + "step": 14230 + }, + { + "epoch": 1.38, + "learning_rate": 1.7243782057485727e-05, + "loss": 0.2396, + "step": 14240 + }, + { + "epoch": 1.38, + "learning_rate": 1.724184651117778e-05, + "loss": 0.3362, + "step": 14250 + }, + { + "epoch": 1.38, + "learning_rate": 1.7239910964869835e-05, + "loss": 0.2388, + "step": 14260 + }, + { + "epoch": 1.38, + "learning_rate": 1.7237975418561888e-05, + "loss": 0.2132, + "step": 14270 + }, + { + "epoch": 1.38, + "learning_rate": 1.7236039872253945e-05, + "loss": 0.3061, + "step": 14280 + }, + { + "epoch": 1.38, + "learning_rate": 1.7234104325946e-05, + "loss": 0.4071, + "step": 14290 + }, + { + "epoch": 1.38, + "learning_rate": 1.7232168779638056e-05, + "loss": 0.3262, + "step": 14300 + }, + { + "epoch": 1.38, + "learning_rate": 1.723023323333011e-05, + "loss": 0.361, + "step": 14310 + }, + { + "epoch": 1.39, + "learning_rate": 1.7228297687022163e-05, + "loss": 0.253, + "step": 14320 + }, + { + "epoch": 1.39, + "learning_rate": 1.722636214071422e-05, + "loss": 0.3543, + "step": 14330 + }, + { + "epoch": 1.39, + "learning_rate": 1.722442659440627e-05, + "loss": 0.2824, + "step": 14340 + }, + { + "epoch": 1.39, + "learning_rate": 1.7222491048098327e-05, + "loss": 0.2834, + "step": 14350 + }, + { + "epoch": 1.39, + "learning_rate": 1.722055550179038e-05, + "loss": 0.2591, + "step": 14360 + }, + { + "epoch": 1.39, + "learning_rate": 1.7218619955482435e-05, + "loss": 0.2181, + "step": 14370 + }, + { + "epoch": 1.39, + "learning_rate": 1.721668440917449e-05, + "loss": 0.3592, + "step": 14380 + }, + { + "epoch": 1.39, + "learning_rate": 1.7214748862866545e-05, + "loss": 0.216, + "step": 14390 + }, + { + "epoch": 1.39, + "learning_rate": 1.7212813316558602e-05, + "loss": 0.2801, + "step": 14400 + }, + { + "epoch": 1.39, + "learning_rate": 1.7210877770250656e-05, + "loss": 0.4208, + "step": 14410 + }, + { + "epoch": 1.4, + "learning_rate": 1.720894222394271e-05, + "loss": 0.3146, + "step": 14420 + }, + { + "epoch": 1.4, + "learning_rate": 1.7207006677634763e-05, + "loss": 0.3148, + "step": 14430 + }, + { + "epoch": 1.4, + "learning_rate": 1.7205071131326817e-05, + "loss": 0.234, + "step": 14440 + }, + { + "epoch": 1.4, + "learning_rate": 1.7203135585018874e-05, + "loss": 0.2951, + "step": 14450 + }, + { + "epoch": 1.4, + "learning_rate": 1.7201200038710928e-05, + "loss": 0.2603, + "step": 14460 + }, + { + "epoch": 1.4, + "learning_rate": 1.719926449240298e-05, + "loss": 0.2924, + "step": 14470 + }, + { + "epoch": 1.4, + "learning_rate": 1.7197328946095038e-05, + "loss": 0.2818, + "step": 14480 + }, + { + "epoch": 1.4, + "learning_rate": 1.7195393399787092e-05, + "loss": 0.338, + "step": 14490 + }, + { + "epoch": 1.4, + "learning_rate": 1.7193457853479145e-05, + "loss": 0.2182, + "step": 14500 + }, + { + "epoch": 1.4, + "learning_rate": 1.71915223071712e-05, + "loss": 0.2248, + "step": 14510 + }, + { + "epoch": 1.41, + "learning_rate": 1.7189586760863256e-05, + "loss": 0.2366, + "step": 14520 + }, + { + "epoch": 1.41, + "learning_rate": 1.718765121455531e-05, + "loss": 0.3848, + "step": 14530 + }, + { + "epoch": 1.41, + "learning_rate": 1.7185715668247363e-05, + "loss": 0.3134, + "step": 14540 + }, + { + "epoch": 1.41, + "learning_rate": 1.718378012193942e-05, + "loss": 0.4722, + "step": 14550 + }, + { + "epoch": 1.41, + "learning_rate": 1.7181844575631474e-05, + "loss": 0.2455, + "step": 14560 + }, + { + "epoch": 1.41, + "learning_rate": 1.7179909029323528e-05, + "loss": 0.3423, + "step": 14570 + }, + { + "epoch": 1.41, + "learning_rate": 1.717797348301558e-05, + "loss": 0.305, + "step": 14580 + }, + { + "epoch": 1.41, + "learning_rate": 1.717603793670764e-05, + "loss": 0.2622, + "step": 14590 + }, + { + "epoch": 1.41, + "learning_rate": 1.7174102390399692e-05, + "loss": 0.3206, + "step": 14600 + }, + { + "epoch": 1.41, + "learning_rate": 1.7172166844091746e-05, + "loss": 0.3733, + "step": 14610 + }, + { + "epoch": 1.41, + "learning_rate": 1.7170231297783803e-05, + "loss": 0.3235, + "step": 14620 + }, + { + "epoch": 1.42, + "learning_rate": 1.7168295751475856e-05, + "loss": 0.2555, + "step": 14630 + }, + { + "epoch": 1.42, + "learning_rate": 1.716636020516791e-05, + "loss": 0.2367, + "step": 14640 + }, + { + "epoch": 1.42, + "learning_rate": 1.7164424658859964e-05, + "loss": 0.3549, + "step": 14650 + }, + { + "epoch": 1.42, + "learning_rate": 1.7162489112552017e-05, + "loss": 0.3309, + "step": 14660 + }, + { + "epoch": 1.42, + "learning_rate": 1.7160553566244074e-05, + "loss": 0.2453, + "step": 14670 + }, + { + "epoch": 1.42, + "learning_rate": 1.7158618019936128e-05, + "loss": 0.3493, + "step": 14680 + }, + { + "epoch": 1.42, + "learning_rate": 1.7156682473628185e-05, + "loss": 0.2987, + "step": 14690 + }, + { + "epoch": 1.42, + "learning_rate": 1.715474692732024e-05, + "loss": 0.3634, + "step": 14700 + }, + { + "epoch": 1.42, + "learning_rate": 1.7152811381012292e-05, + "loss": 0.3987, + "step": 14710 + }, + { + "epoch": 1.42, + "learning_rate": 1.7150875834704346e-05, + "loss": 0.2969, + "step": 14720 + }, + { + "epoch": 1.43, + "learning_rate": 1.71489402883964e-05, + "loss": 0.3346, + "step": 14730 + }, + { + "epoch": 1.43, + "learning_rate": 1.7147004742088456e-05, + "loss": 0.3005, + "step": 14740 + }, + { + "epoch": 1.43, + "learning_rate": 1.714506919578051e-05, + "loss": 0.3874, + "step": 14750 + }, + { + "epoch": 1.43, + "learning_rate": 1.7143133649472564e-05, + "loss": 0.191, + "step": 14760 + }, + { + "epoch": 1.43, + "learning_rate": 1.714119810316462e-05, + "loss": 0.4449, + "step": 14770 + }, + { + "epoch": 1.43, + "learning_rate": 1.7139262556856674e-05, + "loss": 0.3259, + "step": 14780 + }, + { + "epoch": 1.43, + "learning_rate": 1.7137327010548728e-05, + "loss": 0.1921, + "step": 14790 + }, + { + "epoch": 1.43, + "learning_rate": 1.713539146424078e-05, + "loss": 0.3625, + "step": 14800 + }, + { + "epoch": 1.43, + "learning_rate": 1.713345591793284e-05, + "loss": 0.3073, + "step": 14810 + }, + { + "epoch": 1.43, + "learning_rate": 1.7131520371624892e-05, + "loss": 0.3603, + "step": 14820 + }, + { + "epoch": 1.44, + "learning_rate": 1.7129584825316946e-05, + "loss": 0.1769, + "step": 14830 + }, + { + "epoch": 1.44, + "learning_rate": 1.7127649279009003e-05, + "loss": 0.2774, + "step": 14840 + }, + { + "epoch": 1.44, + "learning_rate": 1.7125713732701057e-05, + "loss": 0.3222, + "step": 14850 + }, + { + "epoch": 1.44, + "learning_rate": 1.712377818639311e-05, + "loss": 0.3113, + "step": 14860 + }, + { + "epoch": 1.44, + "learning_rate": 1.7121842640085164e-05, + "loss": 0.1536, + "step": 14870 + }, + { + "epoch": 1.44, + "learning_rate": 1.711990709377722e-05, + "loss": 0.3249, + "step": 14880 + }, + { + "epoch": 1.44, + "learning_rate": 1.7117971547469275e-05, + "loss": 0.3014, + "step": 14890 + }, + { + "epoch": 1.44, + "learning_rate": 1.7116036001161328e-05, + "loss": 0.3849, + "step": 14900 + }, + { + "epoch": 1.44, + "learning_rate": 1.7114100454853385e-05, + "loss": 0.3169, + "step": 14910 + }, + { + "epoch": 1.44, + "learning_rate": 1.711216490854544e-05, + "loss": 0.445, + "step": 14920 + }, + { + "epoch": 1.44, + "learning_rate": 1.7110229362237492e-05, + "loss": 0.1737, + "step": 14930 + }, + { + "epoch": 1.45, + "learning_rate": 1.7108293815929546e-05, + "loss": 0.4172, + "step": 14940 + }, + { + "epoch": 1.45, + "learning_rate": 1.71063582696216e-05, + "loss": 0.3779, + "step": 14950 + }, + { + "epoch": 1.45, + "learning_rate": 1.7104422723313657e-05, + "loss": 0.4008, + "step": 14960 + }, + { + "epoch": 1.45, + "learning_rate": 1.710248717700571e-05, + "loss": 0.339, + "step": 14970 + }, + { + "epoch": 1.45, + "learning_rate": 1.7100551630697767e-05, + "loss": 0.32, + "step": 14980 + }, + { + "epoch": 1.45, + "learning_rate": 1.709861608438982e-05, + "loss": 0.2163, + "step": 14990 + }, + { + "epoch": 1.45, + "learning_rate": 1.7096680538081875e-05, + "loss": 0.3445, + "step": 15000 + }, + { + "epoch": 1.45, + "learning_rate": 1.7094744991773932e-05, + "loss": 0.2182, + "step": 15010 + }, + { + "epoch": 1.45, + "learning_rate": 1.7092809445465982e-05, + "loss": 0.2312, + "step": 15020 + }, + { + "epoch": 1.45, + "learning_rate": 1.709087389915804e-05, + "loss": 0.4108, + "step": 15030 + }, + { + "epoch": 1.46, + "learning_rate": 1.7088938352850093e-05, + "loss": 0.401, + "step": 15040 + }, + { + "epoch": 1.46, + "learning_rate": 1.7087002806542146e-05, + "loss": 0.1518, + "step": 15050 + }, + { + "epoch": 1.46, + "learning_rate": 1.7085067260234203e-05, + "loss": 0.3935, + "step": 15060 + }, + { + "epoch": 1.46, + "learning_rate": 1.7083131713926257e-05, + "loss": 0.4534, + "step": 15070 + }, + { + "epoch": 1.46, + "learning_rate": 1.7081196167618314e-05, + "loss": 0.2556, + "step": 15080 + }, + { + "epoch": 1.46, + "learning_rate": 1.7079260621310368e-05, + "loss": 0.175, + "step": 15090 + }, + { + "epoch": 1.46, + "learning_rate": 1.707732507500242e-05, + "loss": 0.4635, + "step": 15100 + }, + { + "epoch": 1.46, + "learning_rate": 1.7075389528694475e-05, + "loss": 0.181, + "step": 15110 + }, + { + "epoch": 1.46, + "learning_rate": 1.707345398238653e-05, + "loss": 0.3349, + "step": 15120 + }, + { + "epoch": 1.46, + "learning_rate": 1.7071518436078586e-05, + "loss": 0.2822, + "step": 15130 + }, + { + "epoch": 1.47, + "learning_rate": 1.706958288977064e-05, + "loss": 0.2728, + "step": 15140 + }, + { + "epoch": 1.47, + "learning_rate": 1.7067647343462693e-05, + "loss": 0.1593, + "step": 15150 + }, + { + "epoch": 1.47, + "learning_rate": 1.706571179715475e-05, + "loss": 0.3836, + "step": 15160 + }, + { + "epoch": 1.47, + "learning_rate": 1.7063776250846803e-05, + "loss": 0.5097, + "step": 15170 + }, + { + "epoch": 1.47, + "learning_rate": 1.7061840704538857e-05, + "loss": 0.2625, + "step": 15180 + }, + { + "epoch": 1.47, + "learning_rate": 1.705990515823091e-05, + "loss": 0.3634, + "step": 15190 + }, + { + "epoch": 1.47, + "learning_rate": 1.7057969611922968e-05, + "loss": 0.3355, + "step": 15200 + }, + { + "epoch": 1.47, + "learning_rate": 1.705603406561502e-05, + "loss": 0.3, + "step": 15210 + }, + { + "epoch": 1.47, + "learning_rate": 1.7054098519307075e-05, + "loss": 0.4448, + "step": 15220 + }, + { + "epoch": 1.47, + "learning_rate": 1.7052162972999132e-05, + "loss": 0.3678, + "step": 15230 + }, + { + "epoch": 1.47, + "learning_rate": 1.7050227426691186e-05, + "loss": 0.3687, + "step": 15240 + }, + { + "epoch": 1.48, + "learning_rate": 1.704829188038324e-05, + "loss": 0.3271, + "step": 15250 + }, + { + "epoch": 1.48, + "learning_rate": 1.7046356334075293e-05, + "loss": 0.2735, + "step": 15260 + }, + { + "epoch": 1.48, + "learning_rate": 1.704442078776735e-05, + "loss": 0.1678, + "step": 15270 + }, + { + "epoch": 1.48, + "learning_rate": 1.7042485241459404e-05, + "loss": 0.2928, + "step": 15280 + }, + { + "epoch": 1.48, + "learning_rate": 1.7040549695151457e-05, + "loss": 0.3669, + "step": 15290 + }, + { + "epoch": 1.48, + "learning_rate": 1.7038614148843514e-05, + "loss": 0.2997, + "step": 15300 + }, + { + "epoch": 1.48, + "learning_rate": 1.7036678602535568e-05, + "loss": 0.2852, + "step": 15310 + }, + { + "epoch": 1.48, + "learning_rate": 1.703474305622762e-05, + "loss": 0.3684, + "step": 15320 + }, + { + "epoch": 1.48, + "learning_rate": 1.7032807509919675e-05, + "loss": 0.2905, + "step": 15330 + }, + { + "epoch": 1.48, + "learning_rate": 1.703087196361173e-05, + "loss": 0.2983, + "step": 15340 + }, + { + "epoch": 1.49, + "learning_rate": 1.7028936417303786e-05, + "loss": 0.3232, + "step": 15350 + }, + { + "epoch": 1.49, + "learning_rate": 1.702700087099584e-05, + "loss": 0.339, + "step": 15360 + }, + { + "epoch": 1.49, + "learning_rate": 1.7025065324687896e-05, + "loss": 0.3196, + "step": 15370 + }, + { + "epoch": 1.49, + "learning_rate": 1.702312977837995e-05, + "loss": 0.2618, + "step": 15380 + }, + { + "epoch": 1.49, + "learning_rate": 1.7021194232072004e-05, + "loss": 0.2814, + "step": 15390 + }, + { + "epoch": 1.49, + "learning_rate": 1.7019258685764057e-05, + "loss": 0.3145, + "step": 15400 + }, + { + "epoch": 1.49, + "learning_rate": 1.701732313945611e-05, + "loss": 0.3224, + "step": 15410 + }, + { + "epoch": 1.49, + "learning_rate": 1.7015387593148168e-05, + "loss": 0.4222, + "step": 15420 + }, + { + "epoch": 1.49, + "learning_rate": 1.701345204684022e-05, + "loss": 0.3898, + "step": 15430 + }, + { + "epoch": 1.49, + "learning_rate": 1.7011516500532275e-05, + "loss": 0.2236, + "step": 15440 + }, + { + "epoch": 1.5, + "learning_rate": 1.7009580954224332e-05, + "loss": 0.3493, + "step": 15450 + }, + { + "epoch": 1.5, + "learning_rate": 1.7007645407916386e-05, + "loss": 0.3444, + "step": 15460 + }, + { + "epoch": 1.5, + "learning_rate": 1.700570986160844e-05, + "loss": 0.3962, + "step": 15470 + }, + { + "epoch": 1.5, + "learning_rate": 1.7003774315300493e-05, + "loss": 0.1949, + "step": 15480 + }, + { + "epoch": 1.5, + "learning_rate": 1.700183876899255e-05, + "loss": 0.2249, + "step": 15490 + }, + { + "epoch": 1.5, + "learning_rate": 1.6999903222684604e-05, + "loss": 0.2345, + "step": 15500 + }, + { + "epoch": 1.5, + "learning_rate": 1.6997967676376658e-05, + "loss": 0.2555, + "step": 15510 + }, + { + "epoch": 1.5, + "learning_rate": 1.6996032130068715e-05, + "loss": 0.2762, + "step": 15520 + }, + { + "epoch": 1.5, + "learning_rate": 1.6994096583760768e-05, + "loss": 0.2083, + "step": 15530 + }, + { + "epoch": 1.5, + "learning_rate": 1.6992161037452822e-05, + "loss": 0.3651, + "step": 15540 + }, + { + "epoch": 1.5, + "learning_rate": 1.6990225491144875e-05, + "loss": 0.307, + "step": 15550 + }, + { + "epoch": 1.51, + "learning_rate": 1.6988289944836932e-05, + "loss": 0.5213, + "step": 15560 + }, + { + "epoch": 1.51, + "learning_rate": 1.6986354398528986e-05, + "loss": 0.3041, + "step": 15570 + }, + { + "epoch": 1.51, + "learning_rate": 1.698441885222104e-05, + "loss": 0.1966, + "step": 15580 + }, + { + "epoch": 1.51, + "learning_rate": 1.6982483305913097e-05, + "loss": 0.19, + "step": 15590 + }, + { + "epoch": 1.51, + "learning_rate": 1.698054775960515e-05, + "loss": 0.2828, + "step": 15600 + }, + { + "epoch": 1.51, + "learning_rate": 1.6978612213297204e-05, + "loss": 0.4134, + "step": 15610 + }, + { + "epoch": 1.51, + "learning_rate": 1.697667666698926e-05, + "loss": 0.4587, + "step": 15620 + }, + { + "epoch": 1.51, + "learning_rate": 1.697474112068131e-05, + "loss": 0.2614, + "step": 15630 + }, + { + "epoch": 1.51, + "learning_rate": 1.697280557437337e-05, + "loss": 0.227, + "step": 15640 + }, + { + "epoch": 1.51, + "learning_rate": 1.6970870028065422e-05, + "loss": 0.1846, + "step": 15650 + }, + { + "epoch": 1.52, + "learning_rate": 1.696893448175748e-05, + "loss": 0.1985, + "step": 15660 + }, + { + "epoch": 1.52, + "learning_rate": 1.6966998935449533e-05, + "loss": 0.3917, + "step": 15670 + }, + { + "epoch": 1.52, + "learning_rate": 1.6965063389141586e-05, + "loss": 0.3362, + "step": 15680 + }, + { + "epoch": 1.52, + "learning_rate": 1.6963127842833643e-05, + "loss": 0.2642, + "step": 15690 + }, + { + "epoch": 1.52, + "learning_rate": 1.6961192296525694e-05, + "loss": 0.2277, + "step": 15700 + }, + { + "epoch": 1.52, + "learning_rate": 1.695925675021775e-05, + "loss": 0.2511, + "step": 15710 + }, + { + "epoch": 1.52, + "learning_rate": 1.6957321203909804e-05, + "loss": 0.2165, + "step": 15720 + }, + { + "epoch": 1.52, + "learning_rate": 1.6955385657601858e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 1.52, + "learning_rate": 1.6953450111293915e-05, + "loss": 0.4836, + "step": 15740 + }, + { + "epoch": 1.52, + "learning_rate": 1.695151456498597e-05, + "loss": 0.3889, + "step": 15750 + }, + { + "epoch": 1.53, + "learning_rate": 1.6949579018678026e-05, + "loss": 0.3399, + "step": 15760 + }, + { + "epoch": 1.53, + "learning_rate": 1.694764347237008e-05, + "loss": 0.3039, + "step": 15770 + }, + { + "epoch": 1.53, + "learning_rate": 1.6945707926062133e-05, + "loss": 0.2901, + "step": 15780 + }, + { + "epoch": 1.53, + "learning_rate": 1.6943772379754186e-05, + "loss": 0.3675, + "step": 15790 + }, + { + "epoch": 1.53, + "learning_rate": 1.694183683344624e-05, + "loss": 0.4347, + "step": 15800 + }, + { + "epoch": 1.53, + "learning_rate": 1.6939901287138297e-05, + "loss": 0.2878, + "step": 15810 + }, + { + "epoch": 1.53, + "learning_rate": 1.693796574083035e-05, + "loss": 0.3332, + "step": 15820 + }, + { + "epoch": 1.53, + "learning_rate": 1.6936030194522404e-05, + "loss": 0.3304, + "step": 15830 + }, + { + "epoch": 1.53, + "learning_rate": 1.693409464821446e-05, + "loss": 0.309, + "step": 15840 + }, + { + "epoch": 1.53, + "learning_rate": 1.6932159101906515e-05, + "loss": 0.192, + "step": 15850 + }, + { + "epoch": 1.53, + "learning_rate": 1.693022355559857e-05, + "loss": 0.3615, + "step": 15860 + }, + { + "epoch": 1.54, + "learning_rate": 1.6928288009290622e-05, + "loss": 0.3228, + "step": 15870 + }, + { + "epoch": 1.54, + "learning_rate": 1.692635246298268e-05, + "loss": 0.4439, + "step": 15880 + }, + { + "epoch": 1.54, + "learning_rate": 1.6924416916674733e-05, + "loss": 0.2694, + "step": 15890 + }, + { + "epoch": 1.54, + "learning_rate": 1.6922481370366787e-05, + "loss": 0.2832, + "step": 15900 + }, + { + "epoch": 1.54, + "learning_rate": 1.6920545824058844e-05, + "loss": 0.3126, + "step": 15910 + }, + { + "epoch": 1.54, + "learning_rate": 1.6918610277750897e-05, + "loss": 0.4237, + "step": 15920 + }, + { + "epoch": 1.54, + "learning_rate": 1.691667473144295e-05, + "loss": 0.3158, + "step": 15930 + }, + { + "epoch": 1.54, + "learning_rate": 1.6914739185135005e-05, + "loss": 0.3419, + "step": 15940 + }, + { + "epoch": 1.54, + "learning_rate": 1.691280363882706e-05, + "loss": 0.3258, + "step": 15950 + }, + { + "epoch": 1.54, + "learning_rate": 1.6910868092519115e-05, + "loss": 0.3537, + "step": 15960 + }, + { + "epoch": 1.55, + "learning_rate": 1.690893254621117e-05, + "loss": 0.336, + "step": 15970 + }, + { + "epoch": 1.55, + "learning_rate": 1.6906996999903226e-05, + "loss": 0.2936, + "step": 15980 + }, + { + "epoch": 1.55, + "learning_rate": 1.690506145359528e-05, + "loss": 0.4628, + "step": 15990 + }, + { + "epoch": 1.55, + "learning_rate": 1.6903125907287333e-05, + "loss": 0.161, + "step": 16000 + }, + { + "epoch": 1.55, + "learning_rate": 1.6901190360979387e-05, + "loss": 0.1847, + "step": 16010 + }, + { + "epoch": 1.55, + "learning_rate": 1.689925481467144e-05, + "loss": 0.3364, + "step": 16020 + }, + { + "epoch": 1.55, + "learning_rate": 1.6897319268363497e-05, + "loss": 0.1948, + "step": 16030 + }, + { + "epoch": 1.55, + "learning_rate": 1.689538372205555e-05, + "loss": 0.3878, + "step": 16040 + }, + { + "epoch": 1.55, + "learning_rate": 1.6893448175747608e-05, + "loss": 0.1859, + "step": 16050 + }, + { + "epoch": 1.55, + "learning_rate": 1.6891512629439662e-05, + "loss": 0.2988, + "step": 16060 + }, + { + "epoch": 1.56, + "learning_rate": 1.6889577083131715e-05, + "loss": 0.1777, + "step": 16070 + }, + { + "epoch": 1.56, + "learning_rate": 1.688764153682377e-05, + "loss": 0.3561, + "step": 16080 + }, + { + "epoch": 1.56, + "learning_rate": 1.6885705990515823e-05, + "loss": 0.2426, + "step": 16090 + }, + { + "epoch": 1.56, + "learning_rate": 1.688377044420788e-05, + "loss": 0.4224, + "step": 16100 + }, + { + "epoch": 1.56, + "learning_rate": 1.6881834897899933e-05, + "loss": 0.3223, + "step": 16110 + }, + { + "epoch": 1.56, + "learning_rate": 1.6879899351591987e-05, + "loss": 0.1873, + "step": 16120 + }, + { + "epoch": 1.56, + "learning_rate": 1.6877963805284044e-05, + "loss": 0.1635, + "step": 16130 + }, + { + "epoch": 1.56, + "learning_rate": 1.6876028258976098e-05, + "loss": 0.3436, + "step": 16140 + }, + { + "epoch": 1.56, + "learning_rate": 1.6874092712668155e-05, + "loss": 0.2926, + "step": 16150 + }, + { + "epoch": 1.56, + "learning_rate": 1.6872157166360205e-05, + "loss": 0.4817, + "step": 16160 + }, + { + "epoch": 1.56, + "learning_rate": 1.6870221620052262e-05, + "loss": 0.3496, + "step": 16170 + }, + { + "epoch": 1.57, + "learning_rate": 1.6868286073744315e-05, + "loss": 0.2869, + "step": 16180 + }, + { + "epoch": 1.57, + "learning_rate": 1.686635052743637e-05, + "loss": 0.2636, + "step": 16190 + }, + { + "epoch": 1.57, + "learning_rate": 1.6864414981128426e-05, + "loss": 0.2686, + "step": 16200 + }, + { + "epoch": 1.57, + "learning_rate": 1.686247943482048e-05, + "loss": 0.2625, + "step": 16210 + }, + { + "epoch": 1.57, + "learning_rate": 1.6860543888512533e-05, + "loss": 0.3205, + "step": 16220 + }, + { + "epoch": 1.57, + "learning_rate": 1.6858608342204587e-05, + "loss": 0.3099, + "step": 16230 + }, + { + "epoch": 1.57, + "learning_rate": 1.6856672795896644e-05, + "loss": 0.2928, + "step": 16240 + }, + { + "epoch": 1.57, + "learning_rate": 1.6854737249588698e-05, + "loss": 0.361, + "step": 16250 + }, + { + "epoch": 1.57, + "learning_rate": 1.685280170328075e-05, + "loss": 0.4134, + "step": 16260 + }, + { + "epoch": 1.57, + "learning_rate": 1.685086615697281e-05, + "loss": 0.1888, + "step": 16270 + }, + { + "epoch": 1.58, + "learning_rate": 1.6848930610664862e-05, + "loss": 0.3368, + "step": 16280 + }, + { + "epoch": 1.58, + "learning_rate": 1.6846995064356916e-05, + "loss": 0.2677, + "step": 16290 + }, + { + "epoch": 1.58, + "learning_rate": 1.6845059518048973e-05, + "loss": 0.3155, + "step": 16300 + }, + { + "epoch": 1.58, + "learning_rate": 1.6843123971741023e-05, + "loss": 0.2505, + "step": 16310 + }, + { + "epoch": 1.58, + "learning_rate": 1.684118842543308e-05, + "loss": 0.383, + "step": 16320 + }, + { + "epoch": 1.58, + "learning_rate": 1.6839252879125134e-05, + "loss": 0.1897, + "step": 16330 + }, + { + "epoch": 1.58, + "learning_rate": 1.683731733281719e-05, + "loss": 0.2652, + "step": 16340 + }, + { + "epoch": 1.58, + "learning_rate": 1.6835381786509244e-05, + "loss": 0.3664, + "step": 16350 + }, + { + "epoch": 1.58, + "learning_rate": 1.6833446240201298e-05, + "loss": 0.2596, + "step": 16360 + }, + { + "epoch": 1.58, + "learning_rate": 1.6831510693893355e-05, + "loss": 0.3717, + "step": 16370 + }, + { + "epoch": 1.59, + "learning_rate": 1.6829575147585405e-05, + "loss": 0.2709, + "step": 16380 + }, + { + "epoch": 1.59, + "learning_rate": 1.6827639601277462e-05, + "loss": 0.1962, + "step": 16390 + }, + { + "epoch": 1.59, + "learning_rate": 1.6825704054969516e-05, + "loss": 0.2585, + "step": 16400 + }, + { + "epoch": 1.59, + "learning_rate": 1.682376850866157e-05, + "loss": 0.2822, + "step": 16410 + }, + { + "epoch": 1.59, + "learning_rate": 1.6821832962353626e-05, + "loss": 0.225, + "step": 16420 + }, + { + "epoch": 1.59, + "learning_rate": 1.681989741604568e-05, + "loss": 0.3595, + "step": 16430 + }, + { + "epoch": 1.59, + "learning_rate": 1.6817961869737737e-05, + "loss": 0.4229, + "step": 16440 + }, + { + "epoch": 1.59, + "learning_rate": 1.681602632342979e-05, + "loss": 0.2663, + "step": 16450 + }, + { + "epoch": 1.59, + "learning_rate": 1.6814090777121844e-05, + "loss": 0.1621, + "step": 16460 + }, + { + "epoch": 1.59, + "learning_rate": 1.6812155230813898e-05, + "loss": 0.2684, + "step": 16470 + }, + { + "epoch": 1.59, + "learning_rate": 1.681021968450595e-05, + "loss": 0.2743, + "step": 16480 + }, + { + "epoch": 1.6, + "learning_rate": 1.680828413819801e-05, + "loss": 0.3048, + "step": 16490 + }, + { + "epoch": 1.6, + "learning_rate": 1.6806348591890062e-05, + "loss": 0.3265, + "step": 16500 + }, + { + "epoch": 1.6, + "learning_rate": 1.6804413045582116e-05, + "loss": 0.1646, + "step": 16510 + }, + { + "epoch": 1.6, + "learning_rate": 1.6802477499274173e-05, + "loss": 0.2915, + "step": 16520 + }, + { + "epoch": 1.6, + "learning_rate": 1.6800541952966227e-05, + "loss": 0.4261, + "step": 16530 + }, + { + "epoch": 1.6, + "learning_rate": 1.679860640665828e-05, + "loss": 0.2924, + "step": 16540 + }, + { + "epoch": 1.6, + "learning_rate": 1.6796670860350334e-05, + "loss": 0.3038, + "step": 16550 + }, + { + "epoch": 1.6, + "learning_rate": 1.679473531404239e-05, + "loss": 0.3289, + "step": 16560 + }, + { + "epoch": 1.6, + "learning_rate": 1.6792799767734445e-05, + "loss": 0.2898, + "step": 16570 + }, + { + "epoch": 1.6, + "learning_rate": 1.6790864221426498e-05, + "loss": 0.3192, + "step": 16580 + }, + { + "epoch": 1.61, + "learning_rate": 1.6788928675118555e-05, + "loss": 0.2594, + "step": 16590 + }, + { + "epoch": 1.61, + "learning_rate": 1.678699312881061e-05, + "loss": 0.2906, + "step": 16600 + }, + { + "epoch": 1.61, + "learning_rate": 1.6785057582502662e-05, + "loss": 0.2275, + "step": 16610 + }, + { + "epoch": 1.61, + "learning_rate": 1.6783122036194716e-05, + "loss": 0.2385, + "step": 16620 + }, + { + "epoch": 1.61, + "learning_rate": 1.6781186489886773e-05, + "loss": 0.3149, + "step": 16630 + }, + { + "epoch": 1.61, + "learning_rate": 1.6779250943578827e-05, + "loss": 0.3113, + "step": 16640 + }, + { + "epoch": 1.61, + "learning_rate": 1.677731539727088e-05, + "loss": 0.2499, + "step": 16650 + }, + { + "epoch": 1.61, + "learning_rate": 1.6775379850962937e-05, + "loss": 0.2521, + "step": 16660 + }, + { + "epoch": 1.61, + "learning_rate": 1.677344430465499e-05, + "loss": 0.4164, + "step": 16670 + }, + { + "epoch": 1.61, + "learning_rate": 1.6771508758347045e-05, + "loss": 0.2658, + "step": 16680 + }, + { + "epoch": 1.62, + "learning_rate": 1.67695732120391e-05, + "loss": 0.2284, + "step": 16690 + }, + { + "epoch": 1.62, + "learning_rate": 1.6767637665731152e-05, + "loss": 0.3776, + "step": 16700 + }, + { + "epoch": 1.62, + "learning_rate": 1.676570211942321e-05, + "loss": 0.1443, + "step": 16710 + }, + { + "epoch": 1.62, + "learning_rate": 1.6763766573115263e-05, + "loss": 0.4929, + "step": 16720 + }, + { + "epoch": 1.62, + "learning_rate": 1.676183102680732e-05, + "loss": 0.1865, + "step": 16730 + }, + { + "epoch": 1.62, + "learning_rate": 1.6759895480499373e-05, + "loss": 0.3168, + "step": 16740 + }, + { + "epoch": 1.62, + "learning_rate": 1.6757959934191427e-05, + "loss": 0.307, + "step": 16750 + }, + { + "epoch": 1.62, + "learning_rate": 1.675602438788348e-05, + "loss": 0.3092, + "step": 16760 + }, + { + "epoch": 1.62, + "learning_rate": 1.6754088841575534e-05, + "loss": 0.3512, + "step": 16770 + }, + { + "epoch": 1.62, + "learning_rate": 1.675215329526759e-05, + "loss": 0.2605, + "step": 16780 + }, + { + "epoch": 1.62, + "learning_rate": 1.6750217748959645e-05, + "loss": 0.4026, + "step": 16790 + }, + { + "epoch": 1.63, + "learning_rate": 1.67482822026517e-05, + "loss": 0.2115, + "step": 16800 + }, + { + "epoch": 1.63, + "learning_rate": 1.6746346656343756e-05, + "loss": 0.2831, + "step": 16810 + }, + { + "epoch": 1.63, + "learning_rate": 1.674441111003581e-05, + "loss": 0.2923, + "step": 16820 + }, + { + "epoch": 1.63, + "learning_rate": 1.6742475563727866e-05, + "loss": 0.2846, + "step": 16830 + }, + { + "epoch": 1.63, + "learning_rate": 1.6740540017419916e-05, + "loss": 0.2642, + "step": 16840 + }, + { + "epoch": 1.63, + "learning_rate": 1.6738604471111973e-05, + "loss": 0.2147, + "step": 16850 + }, + { + "epoch": 1.63, + "learning_rate": 1.6736668924804027e-05, + "loss": 0.3616, + "step": 16860 + }, + { + "epoch": 1.63, + "learning_rate": 1.673473337849608e-05, + "loss": 0.2808, + "step": 16870 + }, + { + "epoch": 1.63, + "learning_rate": 1.6732797832188138e-05, + "loss": 0.3273, + "step": 16880 + }, + { + "epoch": 1.63, + "learning_rate": 1.673086228588019e-05, + "loss": 0.4113, + "step": 16890 + }, + { + "epoch": 1.64, + "learning_rate": 1.6728926739572245e-05, + "loss": 0.2302, + "step": 16900 + }, + { + "epoch": 1.64, + "learning_rate": 1.67269911932643e-05, + "loss": 0.2401, + "step": 16910 + }, + { + "epoch": 1.64, + "learning_rate": 1.6725055646956356e-05, + "loss": 0.3552, + "step": 16920 + }, + { + "epoch": 1.64, + "learning_rate": 1.672312010064841e-05, + "loss": 0.2627, + "step": 16930 + }, + { + "epoch": 1.64, + "learning_rate": 1.6721184554340463e-05, + "loss": 0.2984, + "step": 16940 + }, + { + "epoch": 1.64, + "learning_rate": 1.671924900803252e-05, + "loss": 0.3124, + "step": 16950 + }, + { + "epoch": 1.64, + "learning_rate": 1.6717313461724574e-05, + "loss": 0.2215, + "step": 16960 + }, + { + "epoch": 1.64, + "learning_rate": 1.6715377915416627e-05, + "loss": 0.2988, + "step": 16970 + }, + { + "epoch": 1.64, + "learning_rate": 1.6713442369108684e-05, + "loss": 0.3898, + "step": 16980 + }, + { + "epoch": 1.64, + "learning_rate": 1.6711506822800735e-05, + "loss": 0.212, + "step": 16990 + }, + { + "epoch": 1.65, + "learning_rate": 1.670957127649279e-05, + "loss": 0.329, + "step": 17000 + }, + { + "epoch": 1.65, + "learning_rate": 1.6707635730184845e-05, + "loss": 0.3684, + "step": 17010 + }, + { + "epoch": 1.65, + "learning_rate": 1.6705700183876902e-05, + "loss": 0.2544, + "step": 17020 + }, + { + "epoch": 1.65, + "learning_rate": 1.6703764637568956e-05, + "loss": 0.2989, + "step": 17030 + }, + { + "epoch": 1.65, + "learning_rate": 1.670182909126101e-05, + "loss": 0.2157, + "step": 17040 + }, + { + "epoch": 1.65, + "learning_rate": 1.6699893544953067e-05, + "loss": 0.2282, + "step": 17050 + }, + { + "epoch": 1.65, + "learning_rate": 1.6697957998645117e-05, + "loss": 0.3462, + "step": 17060 + }, + { + "epoch": 1.65, + "learning_rate": 1.6696022452337174e-05, + "loss": 0.241, + "step": 17070 + }, + { + "epoch": 1.65, + "learning_rate": 1.6694086906029227e-05, + "loss": 0.3652, + "step": 17080 + }, + { + "epoch": 1.65, + "learning_rate": 1.669215135972128e-05, + "loss": 0.2195, + "step": 17090 + }, + { + "epoch": 1.65, + "learning_rate": 1.6690215813413338e-05, + "loss": 0.3775, + "step": 17100 + }, + { + "epoch": 1.66, + "learning_rate": 1.6688280267105392e-05, + "loss": 0.2928, + "step": 17110 + }, + { + "epoch": 1.66, + "learning_rate": 1.668634472079745e-05, + "loss": 0.3723, + "step": 17120 + }, + { + "epoch": 1.66, + "learning_rate": 1.6684409174489502e-05, + "loss": 0.2314, + "step": 17130 + }, + { + "epoch": 1.66, + "learning_rate": 1.6682473628181556e-05, + "loss": 0.2739, + "step": 17140 + }, + { + "epoch": 1.66, + "learning_rate": 1.668053808187361e-05, + "loss": 0.3677, + "step": 17150 + }, + { + "epoch": 1.66, + "learning_rate": 1.6678602535565663e-05, + "loss": 0.2582, + "step": 17160 + }, + { + "epoch": 1.66, + "learning_rate": 1.667666698925772e-05, + "loss": 0.2447, + "step": 17170 + }, + { + "epoch": 1.66, + "learning_rate": 1.6674731442949774e-05, + "loss": 0.2591, + "step": 17180 + }, + { + "epoch": 1.66, + "learning_rate": 1.6672795896641828e-05, + "loss": 0.3266, + "step": 17190 + }, + { + "epoch": 1.66, + "learning_rate": 1.6670860350333885e-05, + "loss": 0.2254, + "step": 17200 + }, + { + "epoch": 1.67, + "learning_rate": 1.6668924804025938e-05, + "loss": 0.2504, + "step": 17210 + }, + { + "epoch": 1.67, + "learning_rate": 1.6666989257717992e-05, + "loss": 0.3569, + "step": 17220 + }, + { + "epoch": 1.67, + "learning_rate": 1.6665053711410045e-05, + "loss": 0.4493, + "step": 17230 + }, + { + "epoch": 1.67, + "learning_rate": 1.6663118165102103e-05, + "loss": 0.3604, + "step": 17240 + }, + { + "epoch": 1.67, + "learning_rate": 1.6661182618794156e-05, + "loss": 0.3084, + "step": 17250 + }, + { + "epoch": 1.67, + "learning_rate": 1.665924707248621e-05, + "loss": 0.3298, + "step": 17260 + }, + { + "epoch": 1.67, + "learning_rate": 1.6657311526178267e-05, + "loss": 0.1678, + "step": 17270 + }, + { + "epoch": 1.67, + "learning_rate": 1.665537597987032e-05, + "loss": 0.2274, + "step": 17280 + }, + { + "epoch": 1.67, + "learning_rate": 1.6653440433562374e-05, + "loss": 0.4518, + "step": 17290 + }, + { + "epoch": 1.67, + "learning_rate": 1.6651504887254428e-05, + "loss": 0.3267, + "step": 17300 + }, + { + "epoch": 1.68, + "learning_rate": 1.6649569340946485e-05, + "loss": 0.3082, + "step": 17310 + }, + { + "epoch": 1.68, + "learning_rate": 1.664763379463854e-05, + "loss": 0.3486, + "step": 17320 + }, + { + "epoch": 1.68, + "learning_rate": 1.6645698248330592e-05, + "loss": 0.3719, + "step": 17330 + }, + { + "epoch": 1.68, + "learning_rate": 1.664376270202265e-05, + "loss": 0.2115, + "step": 17340 + }, + { + "epoch": 1.68, + "learning_rate": 1.6641827155714703e-05, + "loss": 0.1718, + "step": 17350 + }, + { + "epoch": 1.68, + "learning_rate": 1.6639891609406756e-05, + "loss": 0.1969, + "step": 17360 + }, + { + "epoch": 1.68, + "learning_rate": 1.663795606309881e-05, + "loss": 0.2087, + "step": 17370 + }, + { + "epoch": 1.68, + "learning_rate": 1.6636020516790864e-05, + "loss": 0.551, + "step": 17380 + }, + { + "epoch": 1.68, + "learning_rate": 1.663408497048292e-05, + "loss": 0.2694, + "step": 17390 + }, + { + "epoch": 1.68, + "learning_rate": 1.6632149424174974e-05, + "loss": 0.323, + "step": 17400 + }, + { + "epoch": 1.68, + "learning_rate": 1.663021387786703e-05, + "loss": 0.3427, + "step": 17410 + }, + { + "epoch": 1.69, + "learning_rate": 1.6628278331559085e-05, + "loss": 0.2691, + "step": 17420 + }, + { + "epoch": 1.69, + "learning_rate": 1.662634278525114e-05, + "loss": 0.2281, + "step": 17430 + }, + { + "epoch": 1.69, + "learning_rate": 1.6624407238943192e-05, + "loss": 0.2337, + "step": 17440 + }, + { + "epoch": 1.69, + "learning_rate": 1.6622471692635246e-05, + "loss": 0.3465, + "step": 17450 + }, + { + "epoch": 1.69, + "learning_rate": 1.6620536146327303e-05, + "loss": 0.2241, + "step": 17460 + }, + { + "epoch": 1.69, + "learning_rate": 1.6618600600019356e-05, + "loss": 0.4621, + "step": 17470 + }, + { + "epoch": 1.69, + "learning_rate": 1.661666505371141e-05, + "loss": 0.1698, + "step": 17480 + }, + { + "epoch": 1.69, + "learning_rate": 1.6614729507403467e-05, + "loss": 0.1191, + "step": 17490 + }, + { + "epoch": 1.69, + "learning_rate": 1.661279396109552e-05, + "loss": 0.2842, + "step": 17500 + }, + { + "epoch": 1.69, + "learning_rate": 1.6610858414787578e-05, + "loss": 0.3554, + "step": 17510 + }, + { + "epoch": 1.7, + "learning_rate": 1.6608922868479628e-05, + "loss": 0.5019, + "step": 17520 + }, + { + "epoch": 1.7, + "learning_rate": 1.6606987322171685e-05, + "loss": 0.3847, + "step": 17530 + }, + { + "epoch": 1.7, + "learning_rate": 1.660505177586374e-05, + "loss": 0.2181, + "step": 17540 + }, + { + "epoch": 1.7, + "learning_rate": 1.6603116229555792e-05, + "loss": 0.336, + "step": 17550 + }, + { + "epoch": 1.7, + "learning_rate": 1.660118068324785e-05, + "loss": 0.3242, + "step": 17560 + }, + { + "epoch": 1.7, + "learning_rate": 1.6599245136939903e-05, + "loss": 0.408, + "step": 17570 + }, + { + "epoch": 1.7, + "learning_rate": 1.6597309590631957e-05, + "loss": 0.3222, + "step": 17580 + }, + { + "epoch": 1.7, + "learning_rate": 1.659537404432401e-05, + "loss": 0.1738, + "step": 17590 + }, + { + "epoch": 1.7, + "learning_rate": 1.6593438498016067e-05, + "loss": 0.2368, + "step": 17600 + }, + { + "epoch": 1.7, + "learning_rate": 1.659150295170812e-05, + "loss": 0.3298, + "step": 17610 + }, + { + "epoch": 1.71, + "learning_rate": 1.6589567405400175e-05, + "loss": 0.4114, + "step": 17620 + }, + { + "epoch": 1.71, + "learning_rate": 1.658763185909223e-05, + "loss": 0.3732, + "step": 17630 + }, + { + "epoch": 1.71, + "learning_rate": 1.6585696312784285e-05, + "loss": 0.2467, + "step": 17640 + }, + { + "epoch": 1.71, + "learning_rate": 1.658376076647634e-05, + "loss": 0.2376, + "step": 17650 + }, + { + "epoch": 1.71, + "learning_rate": 1.6581825220168396e-05, + "loss": 0.1994, + "step": 17660 + }, + { + "epoch": 1.71, + "learning_rate": 1.6579889673860446e-05, + "loss": 0.2954, + "step": 17670 + }, + { + "epoch": 1.71, + "learning_rate": 1.6577954127552503e-05, + "loss": 0.282, + "step": 17680 + }, + { + "epoch": 1.71, + "learning_rate": 1.6576018581244557e-05, + "loss": 0.4242, + "step": 17690 + }, + { + "epoch": 1.71, + "learning_rate": 1.6574083034936614e-05, + "loss": 0.2746, + "step": 17700 + }, + { + "epoch": 1.71, + "learning_rate": 1.6572147488628667e-05, + "loss": 0.2311, + "step": 17710 + }, + { + "epoch": 1.71, + "learning_rate": 1.657021194232072e-05, + "loss": 0.239, + "step": 17720 + }, + { + "epoch": 1.72, + "learning_rate": 1.6568276396012778e-05, + "loss": 0.3641, + "step": 17730 + }, + { + "epoch": 1.72, + "learning_rate": 1.656634084970483e-05, + "loss": 0.3492, + "step": 17740 + }, + { + "epoch": 1.72, + "learning_rate": 1.6564405303396885e-05, + "loss": 0.3581, + "step": 17750 + }, + { + "epoch": 1.72, + "learning_rate": 1.656246975708894e-05, + "loss": 0.3805, + "step": 17760 + }, + { + "epoch": 1.72, + "learning_rate": 1.6560534210780993e-05, + "loss": 0.2485, + "step": 17770 + }, + { + "epoch": 1.72, + "learning_rate": 1.655859866447305e-05, + "loss": 0.3356, + "step": 17780 + }, + { + "epoch": 1.72, + "learning_rate": 1.6556663118165103e-05, + "loss": 0.3991, + "step": 17790 + }, + { + "epoch": 1.72, + "learning_rate": 1.655472757185716e-05, + "loss": 0.2662, + "step": 17800 + }, + { + "epoch": 1.72, + "learning_rate": 1.6552792025549214e-05, + "loss": 0.35, + "step": 17810 + }, + { + "epoch": 1.72, + "learning_rate": 1.6550856479241268e-05, + "loss": 0.2963, + "step": 17820 + }, + { + "epoch": 1.73, + "learning_rate": 1.654892093293332e-05, + "loss": 0.2075, + "step": 17830 + }, + { + "epoch": 1.73, + "learning_rate": 1.6546985386625375e-05, + "loss": 0.2336, + "step": 17840 + }, + { + "epoch": 1.73, + "learning_rate": 1.6545049840317432e-05, + "loss": 0.2172, + "step": 17850 + }, + { + "epoch": 1.73, + "learning_rate": 1.6543114294009486e-05, + "loss": 0.3327, + "step": 17860 + }, + { + "epoch": 1.73, + "learning_rate": 1.654117874770154e-05, + "loss": 0.2162, + "step": 17870 + }, + { + "epoch": 1.73, + "learning_rate": 1.6539243201393596e-05, + "loss": 0.3116, + "step": 17880 + }, + { + "epoch": 1.73, + "learning_rate": 1.653730765508565e-05, + "loss": 0.4425, + "step": 17890 + }, + { + "epoch": 1.73, + "learning_rate": 1.6535372108777703e-05, + "loss": 0.2908, + "step": 17900 + }, + { + "epoch": 1.73, + "learning_rate": 1.6533436562469757e-05, + "loss": 0.3909, + "step": 17910 + }, + { + "epoch": 1.73, + "learning_rate": 1.6531501016161814e-05, + "loss": 0.3242, + "step": 17920 + }, + { + "epoch": 1.74, + "learning_rate": 1.6529565469853868e-05, + "loss": 0.3519, + "step": 17930 + }, + { + "epoch": 1.74, + "learning_rate": 1.652762992354592e-05, + "loss": 0.2546, + "step": 17940 + }, + { + "epoch": 1.74, + "learning_rate": 1.652569437723798e-05, + "loss": 0.2197, + "step": 17950 + }, + { + "epoch": 1.74, + "learning_rate": 1.6523758830930032e-05, + "loss": 0.2583, + "step": 17960 + }, + { + "epoch": 1.74, + "learning_rate": 1.6521823284622086e-05, + "loss": 0.231, + "step": 17970 + }, + { + "epoch": 1.74, + "learning_rate": 1.651988773831414e-05, + "loss": 0.4129, + "step": 17980 + }, + { + "epoch": 1.74, + "learning_rate": 1.6517952192006196e-05, + "loss": 0.3499, + "step": 17990 + }, + { + "epoch": 1.74, + "learning_rate": 1.651601664569825e-05, + "loss": 0.2336, + "step": 18000 + }, + { + "epoch": 1.74, + "learning_rate": 1.6514081099390304e-05, + "loss": 0.3743, + "step": 18010 + }, + { + "epoch": 1.74, + "learning_rate": 1.651214555308236e-05, + "loss": 0.1599, + "step": 18020 + }, + { + "epoch": 1.74, + "learning_rate": 1.6510210006774414e-05, + "loss": 0.2085, + "step": 18030 + }, + { + "epoch": 1.75, + "learning_rate": 1.6508274460466468e-05, + "loss": 0.3065, + "step": 18040 + }, + { + "epoch": 1.75, + "learning_rate": 1.650633891415852e-05, + "loss": 0.2503, + "step": 18050 + }, + { + "epoch": 1.75, + "learning_rate": 1.6504403367850575e-05, + "loss": 0.3537, + "step": 18060 + }, + { + "epoch": 1.75, + "learning_rate": 1.6502467821542632e-05, + "loss": 0.3136, + "step": 18070 + }, + { + "epoch": 1.75, + "learning_rate": 1.6500532275234686e-05, + "loss": 0.3658, + "step": 18080 + }, + { + "epoch": 1.75, + "learning_rate": 1.6498596728926743e-05, + "loss": 0.2757, + "step": 18090 + }, + { + "epoch": 1.75, + "learning_rate": 1.6496661182618796e-05, + "loss": 0.3217, + "step": 18100 + }, + { + "epoch": 1.75, + "learning_rate": 1.649472563631085e-05, + "loss": 0.3167, + "step": 18110 + }, + { + "epoch": 1.75, + "learning_rate": 1.6492790090002904e-05, + "loss": 0.2666, + "step": 18120 + }, + { + "epoch": 1.75, + "learning_rate": 1.6490854543694957e-05, + "loss": 0.2854, + "step": 18130 + }, + { + "epoch": 1.76, + "learning_rate": 1.6488918997387014e-05, + "loss": 0.2103, + "step": 18140 + }, + { + "epoch": 1.76, + "learning_rate": 1.6486983451079068e-05, + "loss": 0.2305, + "step": 18150 + }, + { + "epoch": 1.76, + "learning_rate": 1.6485047904771122e-05, + "loss": 0.2785, + "step": 18160 + }, + { + "epoch": 1.76, + "learning_rate": 1.648311235846318e-05, + "loss": 0.2928, + "step": 18170 + }, + { + "epoch": 1.76, + "learning_rate": 1.6481176812155232e-05, + "loss": 0.2166, + "step": 18180 + }, + { + "epoch": 1.76, + "learning_rate": 1.647924126584729e-05, + "loss": 0.2716, + "step": 18190 + }, + { + "epoch": 1.76, + "learning_rate": 1.647730571953934e-05, + "loss": 0.2099, + "step": 18200 + }, + { + "epoch": 1.76, + "learning_rate": 1.6475370173231397e-05, + "loss": 0.3413, + "step": 18210 + }, + { + "epoch": 1.76, + "learning_rate": 1.647343462692345e-05, + "loss": 0.293, + "step": 18220 + }, + { + "epoch": 1.76, + "learning_rate": 1.6471499080615504e-05, + "loss": 0.2806, + "step": 18230 + }, + { + "epoch": 1.77, + "learning_rate": 1.646956353430756e-05, + "loss": 0.3088, + "step": 18240 + }, + { + "epoch": 1.77, + "learning_rate": 1.6467627987999615e-05, + "loss": 0.3929, + "step": 18250 + }, + { + "epoch": 1.77, + "learning_rate": 1.6465692441691668e-05, + "loss": 0.1898, + "step": 18260 + }, + { + "epoch": 1.77, + "learning_rate": 1.6463756895383722e-05, + "loss": 0.346, + "step": 18270 + }, + { + "epoch": 1.77, + "learning_rate": 1.646182134907578e-05, + "loss": 0.3438, + "step": 18280 + }, + { + "epoch": 1.77, + "learning_rate": 1.6459885802767833e-05, + "loss": 0.3033, + "step": 18290 + }, + { + "epoch": 1.77, + "learning_rate": 1.6457950256459886e-05, + "loss": 0.2333, + "step": 18300 + }, + { + "epoch": 1.77, + "learning_rate": 1.6456014710151943e-05, + "loss": 0.2983, + "step": 18310 + }, + { + "epoch": 1.77, + "learning_rate": 1.6454079163843997e-05, + "loss": 0.2193, + "step": 18320 + }, + { + "epoch": 1.77, + "learning_rate": 1.645214361753605e-05, + "loss": 0.3257, + "step": 18330 + }, + { + "epoch": 1.77, + "learning_rate": 1.6450208071228107e-05, + "loss": 0.1836, + "step": 18340 + }, + { + "epoch": 1.78, + "learning_rate": 1.6448272524920158e-05, + "loss": 0.4733, + "step": 18350 + }, + { + "epoch": 1.78, + "learning_rate": 1.6446336978612215e-05, + "loss": 0.314, + "step": 18360 + }, + { + "epoch": 1.78, + "learning_rate": 1.644440143230427e-05, + "loss": 0.2677, + "step": 18370 + }, + { + "epoch": 1.78, + "learning_rate": 1.6442465885996325e-05, + "loss": 0.335, + "step": 18380 + }, + { + "epoch": 1.78, + "learning_rate": 1.644053033968838e-05, + "loss": 0.2912, + "step": 18390 + }, + { + "epoch": 1.78, + "learning_rate": 1.6438594793380433e-05, + "loss": 0.347, + "step": 18400 + }, + { + "epoch": 1.78, + "learning_rate": 1.643665924707249e-05, + "loss": 0.2567, + "step": 18410 + }, + { + "epoch": 1.78, + "learning_rate": 1.643472370076454e-05, + "loss": 0.284, + "step": 18420 + }, + { + "epoch": 1.78, + "learning_rate": 1.6432788154456597e-05, + "loss": 0.3342, + "step": 18430 + }, + { + "epoch": 1.78, + "learning_rate": 1.643085260814865e-05, + "loss": 0.2778, + "step": 18440 + }, + { + "epoch": 1.79, + "learning_rate": 1.6428917061840704e-05, + "loss": 0.277, + "step": 18450 + }, + { + "epoch": 1.79, + "learning_rate": 1.642698151553276e-05, + "loss": 0.2798, + "step": 18460 + }, + { + "epoch": 1.79, + "learning_rate": 1.6425045969224815e-05, + "loss": 0.1895, + "step": 18470 + }, + { + "epoch": 1.79, + "learning_rate": 1.6423110422916872e-05, + "loss": 0.3073, + "step": 18480 + }, + { + "epoch": 1.79, + "learning_rate": 1.6421174876608926e-05, + "loss": 0.3007, + "step": 18490 + }, + { + "epoch": 1.79, + "learning_rate": 1.641923933030098e-05, + "loss": 0.2726, + "step": 18500 + }, + { + "epoch": 1.79, + "learning_rate": 1.6417303783993033e-05, + "loss": 0.336, + "step": 18510 + }, + { + "epoch": 1.79, + "learning_rate": 1.6415368237685086e-05, + "loss": 0.3537, + "step": 18520 + }, + { + "epoch": 1.79, + "learning_rate": 1.6413432691377143e-05, + "loss": 0.306, + "step": 18530 + }, + { + "epoch": 1.79, + "learning_rate": 1.6411497145069197e-05, + "loss": 0.3821, + "step": 18540 + }, + { + "epoch": 1.8, + "learning_rate": 1.640956159876125e-05, + "loss": 0.4145, + "step": 18550 + }, + { + "epoch": 1.8, + "learning_rate": 1.6407626052453308e-05, + "loss": 0.2732, + "step": 18560 + }, + { + "epoch": 1.8, + "learning_rate": 1.640569050614536e-05, + "loss": 0.151, + "step": 18570 + }, + { + "epoch": 1.8, + "learning_rate": 1.6403754959837415e-05, + "loss": 0.4677, + "step": 18580 + }, + { + "epoch": 1.8, + "learning_rate": 1.640181941352947e-05, + "loss": 0.3165, + "step": 18590 + }, + { + "epoch": 1.8, + "learning_rate": 1.6399883867221526e-05, + "loss": 0.321, + "step": 18600 + }, + { + "epoch": 1.8, + "learning_rate": 1.639794832091358e-05, + "loss": 0.2441, + "step": 18610 + }, + { + "epoch": 1.8, + "learning_rate": 1.6396012774605633e-05, + "loss": 0.3293, + "step": 18620 + }, + { + "epoch": 1.8, + "learning_rate": 1.639407722829769e-05, + "loss": 0.214, + "step": 18630 + }, + { + "epoch": 1.8, + "learning_rate": 1.6392141681989744e-05, + "loss": 0.2849, + "step": 18640 + }, + { + "epoch": 1.8, + "learning_rate": 1.6390206135681797e-05, + "loss": 0.2968, + "step": 18650 + }, + { + "epoch": 1.81, + "learning_rate": 1.638827058937385e-05, + "loss": 0.348, + "step": 18660 + }, + { + "epoch": 1.81, + "learning_rate": 1.6386335043065908e-05, + "loss": 0.3481, + "step": 18670 + }, + { + "epoch": 1.81, + "learning_rate": 1.638439949675796e-05, + "loss": 0.2479, + "step": 18680 + }, + { + "epoch": 1.81, + "learning_rate": 1.6382463950450015e-05, + "loss": 0.3407, + "step": 18690 + }, + { + "epoch": 1.81, + "learning_rate": 1.6380528404142072e-05, + "loss": 0.2994, + "step": 18700 + }, + { + "epoch": 1.81, + "learning_rate": 1.6378592857834126e-05, + "loss": 0.2279, + "step": 18710 + }, + { + "epoch": 1.81, + "learning_rate": 1.637665731152618e-05, + "loss": 0.3436, + "step": 18720 + }, + { + "epoch": 1.81, + "learning_rate": 1.6374721765218233e-05, + "loss": 0.2652, + "step": 18730 + }, + { + "epoch": 1.81, + "learning_rate": 1.6372786218910287e-05, + "loss": 0.2188, + "step": 18740 + }, + { + "epoch": 1.81, + "learning_rate": 1.6370850672602344e-05, + "loss": 0.1717, + "step": 18750 + }, + { + "epoch": 1.82, + "learning_rate": 1.6368915126294397e-05, + "loss": 0.2485, + "step": 18760 + }, + { + "epoch": 1.82, + "learning_rate": 1.6366979579986454e-05, + "loss": 0.2834, + "step": 18770 + }, + { + "epoch": 1.82, + "learning_rate": 1.6365044033678508e-05, + "loss": 0.4029, + "step": 18780 + }, + { + "epoch": 1.82, + "learning_rate": 1.6363108487370562e-05, + "loss": 0.3681, + "step": 18790 + }, + { + "epoch": 1.82, + "learning_rate": 1.6361172941062615e-05, + "loss": 0.2701, + "step": 18800 + }, + { + "epoch": 1.82, + "learning_rate": 1.635923739475467e-05, + "loss": 0.2573, + "step": 18810 + }, + { + "epoch": 1.82, + "learning_rate": 1.6357301848446726e-05, + "loss": 0.3545, + "step": 18820 + }, + { + "epoch": 1.82, + "learning_rate": 1.635536630213878e-05, + "loss": 0.4568, + "step": 18830 + }, + { + "epoch": 1.82, + "learning_rate": 1.6353430755830833e-05, + "loss": 0.2919, + "step": 18840 + }, + { + "epoch": 1.82, + "learning_rate": 1.635149520952289e-05, + "loss": 0.3349, + "step": 18850 + }, + { + "epoch": 1.83, + "learning_rate": 1.6349559663214944e-05, + "loss": 0.274, + "step": 18860 + }, + { + "epoch": 1.83, + "learning_rate": 1.6347624116907e-05, + "loss": 0.2792, + "step": 18870 + }, + { + "epoch": 1.83, + "learning_rate": 1.634568857059905e-05, + "loss": 0.4413, + "step": 18880 + }, + { + "epoch": 1.83, + "learning_rate": 1.6343753024291108e-05, + "loss": 0.3486, + "step": 18890 + }, + { + "epoch": 1.83, + "learning_rate": 1.6341817477983162e-05, + "loss": 0.1878, + "step": 18900 + }, + { + "epoch": 1.83, + "learning_rate": 1.6339881931675216e-05, + "loss": 0.2555, + "step": 18910 + }, + { + "epoch": 1.83, + "learning_rate": 1.6337946385367273e-05, + "loss": 0.4034, + "step": 18920 + }, + { + "epoch": 1.83, + "learning_rate": 1.6336010839059326e-05, + "loss": 0.2576, + "step": 18930 + }, + { + "epoch": 1.83, + "learning_rate": 1.633407529275138e-05, + "loss": 0.3104, + "step": 18940 + }, + { + "epoch": 1.83, + "learning_rate": 1.6332139746443433e-05, + "loss": 0.355, + "step": 18950 + }, + { + "epoch": 1.83, + "learning_rate": 1.633020420013549e-05, + "loss": 0.3288, + "step": 18960 + }, + { + "epoch": 1.84, + "learning_rate": 1.6328268653827544e-05, + "loss": 0.2096, + "step": 18970 + }, + { + "epoch": 1.84, + "learning_rate": 1.6326333107519598e-05, + "loss": 0.3618, + "step": 18980 + }, + { + "epoch": 1.84, + "learning_rate": 1.6324397561211655e-05, + "loss": 0.1729, + "step": 18990 + }, + { + "epoch": 1.84, + "learning_rate": 1.632246201490371e-05, + "loss": 0.2401, + "step": 19000 + }, + { + "epoch": 1.84, + "learning_rate": 1.6320526468595762e-05, + "loss": 0.1676, + "step": 19010 + }, + { + "epoch": 1.84, + "learning_rate": 1.631859092228782e-05, + "loss": 0.2947, + "step": 19020 + }, + { + "epoch": 1.84, + "learning_rate": 1.631665537597987e-05, + "loss": 0.3383, + "step": 19030 + }, + { + "epoch": 1.84, + "learning_rate": 1.6314719829671926e-05, + "loss": 0.3709, + "step": 19040 + }, + { + "epoch": 1.84, + "learning_rate": 1.631278428336398e-05, + "loss": 0.2779, + "step": 19050 + }, + { + "epoch": 1.84, + "learning_rate": 1.6310848737056037e-05, + "loss": 0.3207, + "step": 19060 + }, + { + "epoch": 1.85, + "learning_rate": 1.630891319074809e-05, + "loss": 0.157, + "step": 19070 + }, + { + "epoch": 1.85, + "learning_rate": 1.6306977644440144e-05, + "loss": 0.447, + "step": 19080 + }, + { + "epoch": 1.85, + "learning_rate": 1.63050420981322e-05, + "loss": 0.281, + "step": 19090 + }, + { + "epoch": 1.85, + "learning_rate": 1.6303106551824255e-05, + "loss": 0.3323, + "step": 19100 + }, + { + "epoch": 1.85, + "learning_rate": 1.630117100551631e-05, + "loss": 0.2207, + "step": 19110 + }, + { + "epoch": 1.85, + "learning_rate": 1.6299235459208362e-05, + "loss": 0.2224, + "step": 19120 + }, + { + "epoch": 1.85, + "learning_rate": 1.6297299912900416e-05, + "loss": 0.4151, + "step": 19130 + }, + { + "epoch": 1.85, + "learning_rate": 1.6295364366592473e-05, + "loss": 0.2612, + "step": 19140 + }, + { + "epoch": 1.85, + "learning_rate": 1.6293428820284526e-05, + "loss": 0.2603, + "step": 19150 + }, + { + "epoch": 1.85, + "learning_rate": 1.6291493273976584e-05, + "loss": 0.1827, + "step": 19160 + }, + { + "epoch": 1.86, + "learning_rate": 1.6289557727668637e-05, + "loss": 0.329, + "step": 19170 + }, + { + "epoch": 1.86, + "learning_rate": 1.628762218136069e-05, + "loss": 0.3778, + "step": 19180 + }, + { + "epoch": 1.86, + "learning_rate": 1.6285686635052744e-05, + "loss": 0.3084, + "step": 19190 + }, + { + "epoch": 1.86, + "learning_rate": 1.6283751088744798e-05, + "loss": 0.3448, + "step": 19200 + }, + { + "epoch": 1.86, + "learning_rate": 1.6281815542436855e-05, + "loss": 0.396, + "step": 19210 + }, + { + "epoch": 1.86, + "learning_rate": 1.627987999612891e-05, + "loss": 0.1541, + "step": 19220 + }, + { + "epoch": 1.86, + "learning_rate": 1.6277944449820962e-05, + "loss": 0.4176, + "step": 19230 + }, + { + "epoch": 1.86, + "learning_rate": 1.627600890351302e-05, + "loss": 0.2442, + "step": 19240 + }, + { + "epoch": 1.86, + "learning_rate": 1.6274073357205073e-05, + "loss": 0.4129, + "step": 19250 + }, + { + "epoch": 1.86, + "learning_rate": 1.6272137810897127e-05, + "loss": 0.2672, + "step": 19260 + }, + { + "epoch": 1.86, + "learning_rate": 1.627020226458918e-05, + "loss": 0.2013, + "step": 19270 + }, + { + "epoch": 1.87, + "learning_rate": 1.6268266718281237e-05, + "loss": 0.2055, + "step": 19280 + }, + { + "epoch": 1.87, + "learning_rate": 1.626633117197329e-05, + "loss": 0.3049, + "step": 19290 + }, + { + "epoch": 1.87, + "learning_rate": 1.6264395625665345e-05, + "loss": 0.2785, + "step": 19300 + }, + { + "epoch": 1.87, + "learning_rate": 1.62624600793574e-05, + "loss": 0.307, + "step": 19310 + }, + { + "epoch": 1.87, + "learning_rate": 1.6260524533049455e-05, + "loss": 0.3223, + "step": 19320 + }, + { + "epoch": 1.87, + "learning_rate": 1.625858898674151e-05, + "loss": 0.4327, + "step": 19330 + }, + { + "epoch": 1.87, + "learning_rate": 1.6256653440433563e-05, + "loss": 0.3987, + "step": 19340 + }, + { + "epoch": 1.87, + "learning_rate": 1.625471789412562e-05, + "loss": 0.3321, + "step": 19350 + }, + { + "epoch": 1.87, + "learning_rate": 1.6252782347817673e-05, + "loss": 0.3003, + "step": 19360 + }, + { + "epoch": 1.87, + "learning_rate": 1.6250846801509727e-05, + "loss": 0.2172, + "step": 19370 + }, + { + "epoch": 1.88, + "learning_rate": 1.6248911255201784e-05, + "loss": 0.3768, + "step": 19380 + }, + { + "epoch": 1.88, + "learning_rate": 1.6246975708893837e-05, + "loss": 0.2885, + "step": 19390 + }, + { + "epoch": 1.88, + "learning_rate": 1.624504016258589e-05, + "loss": 0.3331, + "step": 19400 + }, + { + "epoch": 1.88, + "learning_rate": 1.6243104616277945e-05, + "loss": 0.2489, + "step": 19410 + }, + { + "epoch": 1.88, + "learning_rate": 1.624116906997e-05, + "loss": 0.2379, + "step": 19420 + }, + { + "epoch": 1.88, + "learning_rate": 1.6239233523662055e-05, + "loss": 0.3037, + "step": 19430 + }, + { + "epoch": 1.88, + "learning_rate": 1.623729797735411e-05, + "loss": 0.3502, + "step": 19440 + }, + { + "epoch": 1.88, + "learning_rate": 1.6235362431046166e-05, + "loss": 0.3181, + "step": 19450 + }, + { + "epoch": 1.88, + "learning_rate": 1.623342688473822e-05, + "loss": 0.2894, + "step": 19460 + }, + { + "epoch": 1.88, + "learning_rate": 1.6231491338430273e-05, + "loss": 0.3452, + "step": 19470 + }, + { + "epoch": 1.89, + "learning_rate": 1.6229555792122327e-05, + "loss": 0.2964, + "step": 19480 + }, + { + "epoch": 1.89, + "learning_rate": 1.622762024581438e-05, + "loss": 0.333, + "step": 19490 + }, + { + "epoch": 1.89, + "learning_rate": 1.6225684699506438e-05, + "loss": 0.3493, + "step": 19500 + }, + { + "epoch": 1.89, + "learning_rate": 1.622374915319849e-05, + "loss": 0.4679, + "step": 19510 + }, + { + "epoch": 1.89, + "learning_rate": 1.6221813606890545e-05, + "loss": 0.3884, + "step": 19520 + }, + { + "epoch": 1.89, + "learning_rate": 1.6219878060582602e-05, + "loss": 0.4092, + "step": 19530 + }, + { + "epoch": 1.89, + "learning_rate": 1.6217942514274656e-05, + "loss": 0.2879, + "step": 19540 + }, + { + "epoch": 1.89, + "learning_rate": 1.6216006967966713e-05, + "loss": 0.2012, + "step": 19550 + }, + { + "epoch": 1.89, + "learning_rate": 1.6214071421658763e-05, + "loss": 0.244, + "step": 19560 + }, + { + "epoch": 1.89, + "learning_rate": 1.621213587535082e-05, + "loss": 0.3397, + "step": 19570 + }, + { + "epoch": 1.89, + "learning_rate": 1.6210200329042873e-05, + "loss": 0.2582, + "step": 19580 + }, + { + "epoch": 1.9, + "learning_rate": 1.6208264782734927e-05, + "loss": 0.2718, + "step": 19590 + }, + { + "epoch": 1.9, + "learning_rate": 1.6206329236426984e-05, + "loss": 0.2351, + "step": 19600 + }, + { + "epoch": 1.9, + "learning_rate": 1.6204393690119038e-05, + "loss": 0.2996, + "step": 19610 + }, + { + "epoch": 1.9, + "learning_rate": 1.620245814381109e-05, + "loss": 0.2787, + "step": 19620 + }, + { + "epoch": 1.9, + "learning_rate": 1.6200522597503145e-05, + "loss": 0.2997, + "step": 19630 + }, + { + "epoch": 1.9, + "learning_rate": 1.61985870511952e-05, + "loss": 0.2313, + "step": 19640 + }, + { + "epoch": 1.9, + "learning_rate": 1.6196651504887256e-05, + "loss": 0.3736, + "step": 19650 + }, + { + "epoch": 1.9, + "learning_rate": 1.619471595857931e-05, + "loss": 0.2657, + "step": 19660 + }, + { + "epoch": 1.9, + "learning_rate": 1.6192780412271366e-05, + "loss": 0.1712, + "step": 19670 + }, + { + "epoch": 1.9, + "learning_rate": 1.619084486596342e-05, + "loss": 0.3004, + "step": 19680 + }, + { + "epoch": 1.91, + "learning_rate": 1.6188909319655474e-05, + "loss": 0.4106, + "step": 19690 + }, + { + "epoch": 1.91, + "learning_rate": 1.618697377334753e-05, + "loss": 0.2653, + "step": 19700 + }, + { + "epoch": 1.91, + "learning_rate": 1.618503822703958e-05, + "loss": 0.2902, + "step": 19710 + }, + { + "epoch": 1.91, + "learning_rate": 1.6183102680731638e-05, + "loss": 0.5352, + "step": 19720 + }, + { + "epoch": 1.91, + "learning_rate": 1.618116713442369e-05, + "loss": 0.2977, + "step": 19730 + }, + { + "epoch": 1.91, + "learning_rate": 1.617923158811575e-05, + "loss": 0.2813, + "step": 19740 + }, + { + "epoch": 1.91, + "learning_rate": 1.6177296041807802e-05, + "loss": 0.28, + "step": 19750 + }, + { + "epoch": 1.91, + "learning_rate": 1.6175360495499856e-05, + "loss": 0.3155, + "step": 19760 + }, + { + "epoch": 1.91, + "learning_rate": 1.6173424949191913e-05, + "loss": 0.3484, + "step": 19770 + }, + { + "epoch": 1.91, + "learning_rate": 1.6171489402883967e-05, + "loss": 0.3432, + "step": 19780 + }, + { + "epoch": 1.92, + "learning_rate": 1.616955385657602e-05, + "loss": 0.2154, + "step": 19790 + }, + { + "epoch": 1.92, + "learning_rate": 1.6167618310268074e-05, + "loss": 0.1846, + "step": 19800 + }, + { + "epoch": 1.92, + "learning_rate": 1.6165682763960127e-05, + "loss": 0.2863, + "step": 19810 + }, + { + "epoch": 1.92, + "learning_rate": 1.6163747217652184e-05, + "loss": 0.2513, + "step": 19820 + }, + { + "epoch": 1.92, + "learning_rate": 1.6161811671344238e-05, + "loss": 0.2739, + "step": 19830 + }, + { + "epoch": 1.92, + "learning_rate": 1.6159876125036295e-05, + "loss": 0.1992, + "step": 19840 + }, + { + "epoch": 1.92, + "learning_rate": 1.615794057872835e-05, + "loss": 0.4048, + "step": 19850 + }, + { + "epoch": 1.92, + "learning_rate": 1.6156005032420402e-05, + "loss": 0.3042, + "step": 19860 + }, + { + "epoch": 1.92, + "learning_rate": 1.6154069486112456e-05, + "loss": 0.3278, + "step": 19870 + }, + { + "epoch": 1.92, + "learning_rate": 1.615213393980451e-05, + "loss": 0.1881, + "step": 19880 + }, + { + "epoch": 1.92, + "learning_rate": 1.6150198393496567e-05, + "loss": 0.3691, + "step": 19890 + }, + { + "epoch": 1.93, + "learning_rate": 1.614826284718862e-05, + "loss": 0.3856, + "step": 19900 + }, + { + "epoch": 1.93, + "learning_rate": 1.6146327300880674e-05, + "loss": 0.2138, + "step": 19910 + }, + { + "epoch": 1.93, + "learning_rate": 1.614439175457273e-05, + "loss": 0.1669, + "step": 19920 + }, + { + "epoch": 1.93, + "learning_rate": 1.6142456208264785e-05, + "loss": 0.2613, + "step": 19930 + }, + { + "epoch": 1.93, + "learning_rate": 1.6140520661956838e-05, + "loss": 0.37, + "step": 19940 + }, + { + "epoch": 1.93, + "learning_rate": 1.6138585115648892e-05, + "loss": 0.4984, + "step": 19950 + }, + { + "epoch": 1.93, + "learning_rate": 1.613664956934095e-05, + "loss": 0.3017, + "step": 19960 + }, + { + "epoch": 1.93, + "learning_rate": 1.6134714023033003e-05, + "loss": 0.3092, + "step": 19970 + }, + { + "epoch": 1.93, + "learning_rate": 1.6132778476725056e-05, + "loss": 0.3016, + "step": 19980 + }, + { + "epoch": 1.93, + "learning_rate": 1.6130842930417113e-05, + "loss": 0.4102, + "step": 19990 + }, + { + "epoch": 1.94, + "learning_rate": 1.6128907384109167e-05, + "loss": 0.3715, + "step": 20000 + }, + { + "epoch": 1.94, + "learning_rate": 1.612697183780122e-05, + "loss": 0.2694, + "step": 20010 + }, + { + "epoch": 1.94, + "learning_rate": 1.6125036291493274e-05, + "loss": 0.3026, + "step": 20020 + }, + { + "epoch": 1.94, + "learning_rate": 1.6123100745185328e-05, + "loss": 0.2734, + "step": 20030 + }, + { + "epoch": 1.94, + "learning_rate": 1.6121165198877385e-05, + "loss": 0.2112, + "step": 20040 + }, + { + "epoch": 1.94, + "learning_rate": 1.611922965256944e-05, + "loss": 0.4955, + "step": 20050 + }, + { + "epoch": 1.94, + "learning_rate": 1.6117294106261495e-05, + "loss": 0.304, + "step": 20060 + }, + { + "epoch": 1.94, + "learning_rate": 1.611535855995355e-05, + "loss": 0.328, + "step": 20070 + }, + { + "epoch": 1.94, + "learning_rate": 1.6113423013645603e-05, + "loss": 0.335, + "step": 20080 + }, + { + "epoch": 1.94, + "learning_rate": 1.6111487467337656e-05, + "loss": 0.2388, + "step": 20090 + }, + { + "epoch": 1.95, + "learning_rate": 1.610955192102971e-05, + "loss": 0.27, + "step": 20100 + }, + { + "epoch": 1.95, + "learning_rate": 1.6107616374721767e-05, + "loss": 0.4306, + "step": 20110 + }, + { + "epoch": 1.95, + "learning_rate": 1.610568082841382e-05, + "loss": 0.3143, + "step": 20120 + }, + { + "epoch": 1.95, + "learning_rate": 1.6103745282105874e-05, + "loss": 0.2758, + "step": 20130 + }, + { + "epoch": 1.95, + "learning_rate": 1.610180973579793e-05, + "loss": 0.2612, + "step": 20140 + }, + { + "epoch": 1.95, + "learning_rate": 1.6099874189489985e-05, + "loss": 0.2335, + "step": 20150 + }, + { + "epoch": 1.95, + "learning_rate": 1.609793864318204e-05, + "loss": 0.3537, + "step": 20160 + }, + { + "epoch": 1.95, + "learning_rate": 1.6096003096874092e-05, + "loss": 0.2725, + "step": 20170 + }, + { + "epoch": 1.95, + "learning_rate": 1.609406755056615e-05, + "loss": 0.2461, + "step": 20180 + }, + { + "epoch": 1.95, + "learning_rate": 1.6092132004258203e-05, + "loss": 0.3714, + "step": 20190 + }, + { + "epoch": 1.95, + "learning_rate": 1.6090196457950256e-05, + "loss": 0.2012, + "step": 20200 + }, + { + "epoch": 1.96, + "learning_rate": 1.6088260911642314e-05, + "loss": 0.281, + "step": 20210 + }, + { + "epoch": 1.96, + "learning_rate": 1.6086325365334367e-05, + "loss": 0.2717, + "step": 20220 + }, + { + "epoch": 1.96, + "learning_rate": 1.6084389819026424e-05, + "loss": 0.2419, + "step": 20230 + }, + { + "epoch": 1.96, + "learning_rate": 1.6082454272718474e-05, + "loss": 0.3028, + "step": 20240 + }, + { + "epoch": 1.96, + "learning_rate": 1.608051872641053e-05, + "loss": 0.3571, + "step": 20250 + }, + { + "epoch": 1.96, + "learning_rate": 1.6078583180102585e-05, + "loss": 0.3057, + "step": 20260 + }, + { + "epoch": 1.96, + "learning_rate": 1.607664763379464e-05, + "loss": 0.2074, + "step": 20270 + }, + { + "epoch": 1.96, + "learning_rate": 1.6074712087486696e-05, + "loss": 0.338, + "step": 20280 + }, + { + "epoch": 1.96, + "learning_rate": 1.607277654117875e-05, + "loss": 0.2173, + "step": 20290 + }, + { + "epoch": 1.96, + "learning_rate": 1.6070840994870803e-05, + "loss": 0.3407, + "step": 20300 + }, + { + "epoch": 1.97, + "learning_rate": 1.606890544856286e-05, + "loss": 0.2563, + "step": 20310 + }, + { + "epoch": 1.97, + "learning_rate": 1.606696990225491e-05, + "loss": 0.2561, + "step": 20320 + }, + { + "epoch": 1.97, + "learning_rate": 1.6065034355946967e-05, + "loss": 0.2578, + "step": 20330 + }, + { + "epoch": 1.97, + "learning_rate": 1.606309880963902e-05, + "loss": 0.2365, + "step": 20340 + }, + { + "epoch": 1.97, + "learning_rate": 1.6061163263331078e-05, + "loss": 0.2707, + "step": 20350 + }, + { + "epoch": 1.97, + "learning_rate": 1.605922771702313e-05, + "loss": 0.3317, + "step": 20360 + }, + { + "epoch": 1.97, + "learning_rate": 1.6057292170715185e-05, + "loss": 0.4035, + "step": 20370 + }, + { + "epoch": 1.97, + "learning_rate": 1.6055356624407242e-05, + "loss": 0.4422, + "step": 20380 + }, + { + "epoch": 1.97, + "learning_rate": 1.6053421078099293e-05, + "loss": 0.1566, + "step": 20390 + }, + { + "epoch": 1.97, + "learning_rate": 1.605148553179135e-05, + "loss": 0.2934, + "step": 20400 + }, + { + "epoch": 1.98, + "learning_rate": 1.6049549985483403e-05, + "loss": 0.2853, + "step": 20410 + }, + { + "epoch": 1.98, + "learning_rate": 1.6047614439175457e-05, + "loss": 0.1882, + "step": 20420 + }, + { + "epoch": 1.98, + "learning_rate": 1.6045678892867514e-05, + "loss": 0.3031, + "step": 20430 + }, + { + "epoch": 1.98, + "learning_rate": 1.6043743346559567e-05, + "loss": 0.2034, + "step": 20440 + }, + { + "epoch": 1.98, + "learning_rate": 1.6041807800251624e-05, + "loss": 0.3468, + "step": 20450 + }, + { + "epoch": 1.98, + "learning_rate": 1.6039872253943678e-05, + "loss": 0.4122, + "step": 20460 + }, + { + "epoch": 1.98, + "learning_rate": 1.6037936707635732e-05, + "loss": 0.2875, + "step": 20470 + }, + { + "epoch": 1.98, + "learning_rate": 1.6036001161327785e-05, + "loss": 0.2982, + "step": 20480 + }, + { + "epoch": 1.98, + "learning_rate": 1.603406561501984e-05, + "loss": 0.3036, + "step": 20490 + }, + { + "epoch": 1.98, + "learning_rate": 1.6032130068711896e-05, + "loss": 0.2092, + "step": 20500 + }, + { + "epoch": 1.98, + "learning_rate": 1.603019452240395e-05, + "loss": 0.2688, + "step": 20510 + }, + { + "epoch": 1.99, + "learning_rate": 1.6028258976096003e-05, + "loss": 0.3737, + "step": 20520 + }, + { + "epoch": 1.99, + "learning_rate": 1.602632342978806e-05, + "loss": 0.2973, + "step": 20530 + }, + { + "epoch": 1.99, + "learning_rate": 1.6024387883480114e-05, + "loss": 0.3122, + "step": 20540 + }, + { + "epoch": 1.99, + "learning_rate": 1.6022452337172168e-05, + "loss": 0.2608, + "step": 20550 + }, + { + "epoch": 1.99, + "learning_rate": 1.602051679086422e-05, + "loss": 0.3414, + "step": 20560 + }, + { + "epoch": 1.99, + "learning_rate": 1.6018581244556278e-05, + "loss": 0.2154, + "step": 20570 + }, + { + "epoch": 1.99, + "learning_rate": 1.6016645698248332e-05, + "loss": 0.33, + "step": 20580 + }, + { + "epoch": 1.99, + "learning_rate": 1.6014710151940386e-05, + "loss": 0.3083, + "step": 20590 + }, + { + "epoch": 1.99, + "learning_rate": 1.6012774605632443e-05, + "loss": 0.3268, + "step": 20600 + }, + { + "epoch": 1.99, + "learning_rate": 1.6010839059324496e-05, + "loss": 0.3215, + "step": 20610 + }, + { + "epoch": 2.0, + "learning_rate": 1.600890351301655e-05, + "loss": 0.3121, + "step": 20620 + }, + { + "epoch": 2.0, + "learning_rate": 1.6006967966708603e-05, + "loss": 0.4392, + "step": 20630 + }, + { + "epoch": 2.0, + "learning_rate": 1.600503242040066e-05, + "loss": 0.2084, + "step": 20640 + }, + { + "epoch": 2.0, + "learning_rate": 1.6003096874092714e-05, + "loss": 0.2343, + "step": 20650 + }, + { + "epoch": 2.0, + "learning_rate": 1.6001161327784768e-05, + "loss": 0.2889, + "step": 20660 + }, + { + "epoch": 2.0, + "eval_FN": 919, + "eval_FP": 1130, + "eval_TN": 14629, + "eval_TP": 3987, + "eval_accuracy": 0.9008468424872974, + "eval_f1": 0.7955701885662976, + "eval_loss": 0.34464871883392334, + "eval_precision": 0.7791674809458667, + "eval_recall": 0.8126783530370975, + "eval_runtime": 142.084, + "eval_samples_per_second": 145.442, + "eval_steps_per_second": 9.093, + "step": 20666 + }, + { + "epoch": 2.0, + "learning_rate": 1.5999225781476825e-05, + "loss": 0.3432, + "step": 20670 + }, + { + "epoch": 2.0, + "learning_rate": 1.599729023516888e-05, + "loss": 0.2342, + "step": 20680 + }, + { + "epoch": 2.0, + "learning_rate": 1.5995354688860932e-05, + "loss": 0.3926, + "step": 20690 + }, + { + "epoch": 2.0, + "learning_rate": 1.5993419142552986e-05, + "loss": 0.3598, + "step": 20700 + }, + { + "epoch": 2.0, + "learning_rate": 1.599148359624504e-05, + "loss": 0.2192, + "step": 20710 + }, + { + "epoch": 2.01, + "learning_rate": 1.5989548049937096e-05, + "loss": 0.2865, + "step": 20720 + }, + { + "epoch": 2.01, + "learning_rate": 1.598761250362915e-05, + "loss": 0.2612, + "step": 20730 + }, + { + "epoch": 2.01, + "learning_rate": 1.5985676957321207e-05, + "loss": 0.3038, + "step": 20740 + }, + { + "epoch": 2.01, + "learning_rate": 1.598374141101326e-05, + "loss": 0.2026, + "step": 20750 + }, + { + "epoch": 2.01, + "learning_rate": 1.5981805864705314e-05, + "loss": 0.3326, + "step": 20760 + }, + { + "epoch": 2.01, + "learning_rate": 1.5979870318397368e-05, + "loss": 0.2022, + "step": 20770 + }, + { + "epoch": 2.01, + "learning_rate": 1.597793477208942e-05, + "loss": 0.2445, + "step": 20780 + }, + { + "epoch": 2.01, + "learning_rate": 1.597599922578148e-05, + "loss": 0.2519, + "step": 20790 + }, + { + "epoch": 2.01, + "learning_rate": 1.5974063679473532e-05, + "loss": 0.2906, + "step": 20800 + }, + { + "epoch": 2.01, + "learning_rate": 1.5972128133165586e-05, + "loss": 0.2045, + "step": 20810 + }, + { + "epoch": 2.01, + "learning_rate": 1.5970192586857643e-05, + "loss": 0.346, + "step": 20820 + }, + { + "epoch": 2.02, + "learning_rate": 1.5968257040549697e-05, + "loss": 0.3019, + "step": 20830 + }, + { + "epoch": 2.02, + "learning_rate": 1.596632149424175e-05, + "loss": 0.2392, + "step": 20840 + }, + { + "epoch": 2.02, + "learning_rate": 1.5964385947933804e-05, + "loss": 0.2449, + "step": 20850 + }, + { + "epoch": 2.02, + "learning_rate": 1.596245040162586e-05, + "loss": 0.2073, + "step": 20860 + }, + { + "epoch": 2.02, + "learning_rate": 1.5960514855317914e-05, + "loss": 0.3339, + "step": 20870 + }, + { + "epoch": 2.02, + "learning_rate": 1.5958579309009968e-05, + "loss": 0.4166, + "step": 20880 + }, + { + "epoch": 2.02, + "learning_rate": 1.5956643762702025e-05, + "loss": 0.2809, + "step": 20890 + }, + { + "epoch": 2.02, + "learning_rate": 1.595470821639408e-05, + "loss": 0.1807, + "step": 20900 + }, + { + "epoch": 2.02, + "learning_rate": 1.5952772670086132e-05, + "loss": 0.2121, + "step": 20910 + }, + { + "epoch": 2.02, + "learning_rate": 1.5950837123778186e-05, + "loss": 0.1771, + "step": 20920 + }, + { + "epoch": 2.03, + "learning_rate": 1.5948901577470243e-05, + "loss": 0.2304, + "step": 20930 + }, + { + "epoch": 2.03, + "learning_rate": 1.5946966031162297e-05, + "loss": 0.2765, + "step": 20940 + }, + { + "epoch": 2.03, + "learning_rate": 1.594503048485435e-05, + "loss": 0.249, + "step": 20950 + }, + { + "epoch": 2.03, + "learning_rate": 1.5943094938546407e-05, + "loss": 0.2479, + "step": 20960 + }, + { + "epoch": 2.03, + "learning_rate": 1.594115939223846e-05, + "loss": 0.231, + "step": 20970 + }, + { + "epoch": 2.03, + "learning_rate": 1.5939223845930515e-05, + "loss": 0.3658, + "step": 20980 + }, + { + "epoch": 2.03, + "learning_rate": 1.593728829962257e-05, + "loss": 0.2716, + "step": 20990 + }, + { + "epoch": 2.03, + "learning_rate": 1.5935352753314622e-05, + "loss": 0.2607, + "step": 21000 + }, + { + "epoch": 2.03, + "learning_rate": 1.593341720700668e-05, + "loss": 0.3657, + "step": 21010 + }, + { + "epoch": 2.03, + "learning_rate": 1.5931481660698733e-05, + "loss": 0.2209, + "step": 21020 + }, + { + "epoch": 2.04, + "learning_rate": 1.592954611439079e-05, + "loss": 0.3332, + "step": 21030 + }, + { + "epoch": 2.04, + "learning_rate": 1.5927610568082843e-05, + "loss": 0.3315, + "step": 21040 + }, + { + "epoch": 2.04, + "learning_rate": 1.5925675021774897e-05, + "loss": 0.3444, + "step": 21050 + }, + { + "epoch": 2.04, + "learning_rate": 1.5923739475466954e-05, + "loss": 0.2668, + "step": 21060 + }, + { + "epoch": 2.04, + "learning_rate": 1.5921803929159004e-05, + "loss": 0.2203, + "step": 21070 + }, + { + "epoch": 2.04, + "learning_rate": 1.591986838285106e-05, + "loss": 0.2921, + "step": 21080 + }, + { + "epoch": 2.04, + "learning_rate": 1.5917932836543115e-05, + "loss": 0.3581, + "step": 21090 + }, + { + "epoch": 2.04, + "learning_rate": 1.591599729023517e-05, + "loss": 0.2976, + "step": 21100 + }, + { + "epoch": 2.04, + "learning_rate": 1.5914061743927225e-05, + "loss": 0.2713, + "step": 21110 + }, + { + "epoch": 2.04, + "learning_rate": 1.591212619761928e-05, + "loss": 0.2514, + "step": 21120 + }, + { + "epoch": 2.04, + "learning_rate": 1.5910190651311336e-05, + "loss": 0.1008, + "step": 21130 + }, + { + "epoch": 2.05, + "learning_rate": 1.590825510500339e-05, + "loss": 0.4273, + "step": 21140 + }, + { + "epoch": 2.05, + "learning_rate": 1.5906319558695443e-05, + "loss": 0.3706, + "step": 21150 + }, + { + "epoch": 2.05, + "learning_rate": 1.5904384012387497e-05, + "loss": 0.2325, + "step": 21160 + }, + { + "epoch": 2.05, + "learning_rate": 1.590244846607955e-05, + "loss": 0.3872, + "step": 21170 + }, + { + "epoch": 2.05, + "learning_rate": 1.5900512919771608e-05, + "loss": 0.2218, + "step": 21180 + }, + { + "epoch": 2.05, + "learning_rate": 1.589857737346366e-05, + "loss": 0.2195, + "step": 21190 + }, + { + "epoch": 2.05, + "learning_rate": 1.5896641827155715e-05, + "loss": 0.2542, + "step": 21200 + }, + { + "epoch": 2.05, + "learning_rate": 1.5894706280847772e-05, + "loss": 0.2097, + "step": 21210 + }, + { + "epoch": 2.05, + "learning_rate": 1.5892770734539826e-05, + "loss": 0.31, + "step": 21220 + }, + { + "epoch": 2.05, + "learning_rate": 1.589083518823188e-05, + "loss": 0.2332, + "step": 21230 + }, + { + "epoch": 2.06, + "learning_rate": 1.5888899641923933e-05, + "loss": 0.2174, + "step": 21240 + }, + { + "epoch": 2.06, + "learning_rate": 1.588696409561599e-05, + "loss": 0.2457, + "step": 21250 + }, + { + "epoch": 2.06, + "learning_rate": 1.5885028549308044e-05, + "loss": 0.2047, + "step": 21260 + }, + { + "epoch": 2.06, + "learning_rate": 1.5883093003000097e-05, + "loss": 0.2354, + "step": 21270 + }, + { + "epoch": 2.06, + "learning_rate": 1.5881157456692154e-05, + "loss": 0.4077, + "step": 21280 + }, + { + "epoch": 2.06, + "learning_rate": 1.5879221910384208e-05, + "loss": 0.4172, + "step": 21290 + }, + { + "epoch": 2.06, + "learning_rate": 1.587728636407626e-05, + "loss": 0.3534, + "step": 21300 + }, + { + "epoch": 2.06, + "learning_rate": 1.5875350817768315e-05, + "loss": 0.2571, + "step": 21310 + }, + { + "epoch": 2.06, + "learning_rate": 1.5873415271460372e-05, + "loss": 0.2064, + "step": 21320 + }, + { + "epoch": 2.06, + "learning_rate": 1.5871479725152426e-05, + "loss": 0.1528, + "step": 21330 + }, + { + "epoch": 2.07, + "learning_rate": 1.586954417884448e-05, + "loss": 0.2901, + "step": 21340 + }, + { + "epoch": 2.07, + "learning_rate": 1.5867608632536536e-05, + "loss": 0.4629, + "step": 21350 + }, + { + "epoch": 2.07, + "learning_rate": 1.586567308622859e-05, + "loss": 0.1366, + "step": 21360 + }, + { + "epoch": 2.07, + "learning_rate": 1.5863737539920644e-05, + "loss": 0.3023, + "step": 21370 + }, + { + "epoch": 2.07, + "learning_rate": 1.5861801993612697e-05, + "loss": 0.245, + "step": 21380 + }, + { + "epoch": 2.07, + "learning_rate": 1.585986644730475e-05, + "loss": 0.2692, + "step": 21390 + }, + { + "epoch": 2.07, + "learning_rate": 1.5857930900996808e-05, + "loss": 0.1231, + "step": 21400 + }, + { + "epoch": 2.07, + "learning_rate": 1.585599535468886e-05, + "loss": 0.3895, + "step": 21410 + }, + { + "epoch": 2.07, + "learning_rate": 1.585405980838092e-05, + "loss": 0.1967, + "step": 21420 + }, + { + "epoch": 2.07, + "learning_rate": 1.5852124262072972e-05, + "loss": 0.3142, + "step": 21430 + }, + { + "epoch": 2.07, + "learning_rate": 1.5850188715765026e-05, + "loss": 0.2888, + "step": 21440 + }, + { + "epoch": 2.08, + "learning_rate": 1.584825316945708e-05, + "loss": 0.4153, + "step": 21450 + }, + { + "epoch": 2.08, + "learning_rate": 1.5846317623149133e-05, + "loss": 0.2585, + "step": 21460 + }, + { + "epoch": 2.08, + "learning_rate": 1.584438207684119e-05, + "loss": 0.2564, + "step": 21470 + }, + { + "epoch": 2.08, + "learning_rate": 1.5842446530533244e-05, + "loss": 0.251, + "step": 21480 + }, + { + "epoch": 2.08, + "learning_rate": 1.5840510984225297e-05, + "loss": 0.1714, + "step": 21490 + }, + { + "epoch": 2.08, + "learning_rate": 1.5838575437917354e-05, + "loss": 0.4766, + "step": 21500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5836639891609408e-05, + "loss": 0.2469, + "step": 21510 + }, + { + "epoch": 2.08, + "learning_rate": 1.5834704345301465e-05, + "loss": 0.2161, + "step": 21520 + }, + { + "epoch": 2.08, + "learning_rate": 1.5832768798993515e-05, + "loss": 0.2216, + "step": 21530 + }, + { + "epoch": 2.08, + "learning_rate": 1.5830833252685572e-05, + "loss": 0.2184, + "step": 21540 + }, + { + "epoch": 2.09, + "learning_rate": 1.5828897706377626e-05, + "loss": 0.2276, + "step": 21550 + }, + { + "epoch": 2.09, + "learning_rate": 1.582696216006968e-05, + "loss": 0.1708, + "step": 21560 + }, + { + "epoch": 2.09, + "learning_rate": 1.5825026613761737e-05, + "loss": 0.3663, + "step": 21570 + }, + { + "epoch": 2.09, + "learning_rate": 1.582309106745379e-05, + "loss": 0.4665, + "step": 21580 + }, + { + "epoch": 2.09, + "learning_rate": 1.5821155521145844e-05, + "loss": 0.1806, + "step": 21590 + }, + { + "epoch": 2.09, + "learning_rate": 1.5819219974837898e-05, + "loss": 0.2338, + "step": 21600 + }, + { + "epoch": 2.09, + "learning_rate": 1.5817284428529955e-05, + "loss": 0.2788, + "step": 21610 + }, + { + "epoch": 2.09, + "learning_rate": 1.5815348882222008e-05, + "loss": 0.203, + "step": 21620 + }, + { + "epoch": 2.09, + "learning_rate": 1.5813413335914062e-05, + "loss": 0.2149, + "step": 21630 + }, + { + "epoch": 2.09, + "learning_rate": 1.581147778960612e-05, + "loss": 0.2371, + "step": 21640 + }, + { + "epoch": 2.1, + "learning_rate": 1.5809542243298173e-05, + "loss": 0.5227, + "step": 21650 + }, + { + "epoch": 2.1, + "learning_rate": 1.5807606696990226e-05, + "loss": 0.1922, + "step": 21660 + }, + { + "epoch": 2.1, + "learning_rate": 1.5805671150682283e-05, + "loss": 0.256, + "step": 21670 + }, + { + "epoch": 2.1, + "learning_rate": 1.5803735604374333e-05, + "loss": 0.4526, + "step": 21680 + }, + { + "epoch": 2.1, + "learning_rate": 1.580180005806639e-05, + "loss": 0.2586, + "step": 21690 + }, + { + "epoch": 2.1, + "learning_rate": 1.5799864511758444e-05, + "loss": 0.1447, + "step": 21700 + }, + { + "epoch": 2.1, + "learning_rate": 1.57979289654505e-05, + "loss": 0.248, + "step": 21710 + }, + { + "epoch": 2.1, + "learning_rate": 1.5795993419142555e-05, + "loss": 0.3199, + "step": 21720 + }, + { + "epoch": 2.1, + "learning_rate": 1.579405787283461e-05, + "loss": 0.3233, + "step": 21730 + }, + { + "epoch": 2.1, + "learning_rate": 1.5792122326526665e-05, + "loss": 0.1917, + "step": 21740 + }, + { + "epoch": 2.1, + "learning_rate": 1.5790186780218716e-05, + "loss": 0.3776, + "step": 21750 + }, + { + "epoch": 2.11, + "learning_rate": 1.5788251233910773e-05, + "loss": 0.2117, + "step": 21760 + }, + { + "epoch": 2.11, + "learning_rate": 1.5786315687602826e-05, + "loss": 0.1987, + "step": 21770 + }, + { + "epoch": 2.11, + "learning_rate": 1.578438014129488e-05, + "loss": 0.3366, + "step": 21780 + }, + { + "epoch": 2.11, + "learning_rate": 1.5782444594986937e-05, + "loss": 0.2574, + "step": 21790 + }, + { + "epoch": 2.11, + "learning_rate": 1.578050904867899e-05, + "loss": 0.3858, + "step": 21800 + }, + { + "epoch": 2.11, + "learning_rate": 1.5778573502371048e-05, + "loss": 0.2742, + "step": 21810 + }, + { + "epoch": 2.11, + "learning_rate": 1.57766379560631e-05, + "loss": 0.264, + "step": 21820 + }, + { + "epoch": 2.11, + "learning_rate": 1.5774702409755155e-05, + "loss": 0.3125, + "step": 21830 + }, + { + "epoch": 2.11, + "learning_rate": 1.577276686344721e-05, + "loss": 0.321, + "step": 21840 + }, + { + "epoch": 2.11, + "learning_rate": 1.5770831317139262e-05, + "loss": 0.3371, + "step": 21850 + }, + { + "epoch": 2.12, + "learning_rate": 1.576889577083132e-05, + "loss": 0.1889, + "step": 21860 + }, + { + "epoch": 2.12, + "learning_rate": 1.5766960224523373e-05, + "loss": 0.2423, + "step": 21870 + }, + { + "epoch": 2.12, + "learning_rate": 1.5765024678215427e-05, + "loss": 0.1854, + "step": 21880 + }, + { + "epoch": 2.12, + "learning_rate": 1.5763089131907484e-05, + "loss": 0.2817, + "step": 21890 + }, + { + "epoch": 2.12, + "learning_rate": 1.5761153585599537e-05, + "loss": 0.1499, + "step": 21900 + }, + { + "epoch": 2.12, + "learning_rate": 1.575921803929159e-05, + "loss": 0.2647, + "step": 21910 + }, + { + "epoch": 2.12, + "learning_rate": 1.5757282492983644e-05, + "loss": 0.4387, + "step": 21920 + }, + { + "epoch": 2.12, + "learning_rate": 1.57553469466757e-05, + "loss": 0.226, + "step": 21930 + }, + { + "epoch": 2.12, + "learning_rate": 1.5753411400367755e-05, + "loss": 0.192, + "step": 21940 + }, + { + "epoch": 2.12, + "learning_rate": 1.575147585405981e-05, + "loss": 0.2184, + "step": 21950 + }, + { + "epoch": 2.13, + "learning_rate": 1.5749540307751866e-05, + "loss": 0.3275, + "step": 21960 + }, + { + "epoch": 2.13, + "learning_rate": 1.574760476144392e-05, + "loss": 0.336, + "step": 21970 + }, + { + "epoch": 2.13, + "learning_rate": 1.5745669215135973e-05, + "loss": 0.229, + "step": 21980 + }, + { + "epoch": 2.13, + "learning_rate": 1.5743733668828027e-05, + "loss": 0.2626, + "step": 21990 + }, + { + "epoch": 2.13, + "learning_rate": 1.5741798122520084e-05, + "loss": 0.193, + "step": 22000 + }, + { + "epoch": 2.13, + "learning_rate": 1.5739862576212137e-05, + "loss": 0.1983, + "step": 22010 + }, + { + "epoch": 2.13, + "learning_rate": 1.573792702990419e-05, + "loss": 0.2912, + "step": 22020 + }, + { + "epoch": 2.13, + "learning_rate": 1.5735991483596248e-05, + "loss": 0.1933, + "step": 22030 + }, + { + "epoch": 2.13, + "learning_rate": 1.57340559372883e-05, + "loss": 0.4074, + "step": 22040 + }, + { + "epoch": 2.13, + "learning_rate": 1.5732120390980355e-05, + "loss": 0.1966, + "step": 22050 + }, + { + "epoch": 2.13, + "learning_rate": 1.573018484467241e-05, + "loss": 0.2, + "step": 22060 + }, + { + "epoch": 2.14, + "learning_rate": 1.5728249298364463e-05, + "loss": 0.2004, + "step": 22070 + }, + { + "epoch": 2.14, + "learning_rate": 1.572631375205652e-05, + "loss": 0.2468, + "step": 22080 + }, + { + "epoch": 2.14, + "learning_rate": 1.5724378205748573e-05, + "loss": 0.1423, + "step": 22090 + }, + { + "epoch": 2.14, + "learning_rate": 1.572244265944063e-05, + "loss": 0.2409, + "step": 22100 + }, + { + "epoch": 2.14, + "learning_rate": 1.5720507113132684e-05, + "loss": 0.1016, + "step": 22110 + }, + { + "epoch": 2.14, + "learning_rate": 1.5718571566824737e-05, + "loss": 0.1909, + "step": 22120 + }, + { + "epoch": 2.14, + "learning_rate": 1.571663602051679e-05, + "loss": 0.2789, + "step": 22130 + }, + { + "epoch": 2.14, + "learning_rate": 1.5714700474208845e-05, + "loss": 0.3647, + "step": 22140 + }, + { + "epoch": 2.14, + "learning_rate": 1.5712764927900902e-05, + "loss": 0.361, + "step": 22150 + }, + { + "epoch": 2.14, + "learning_rate": 1.5710829381592955e-05, + "loss": 0.3072, + "step": 22160 + }, + { + "epoch": 2.15, + "learning_rate": 1.570889383528501e-05, + "loss": 0.3066, + "step": 22170 + }, + { + "epoch": 2.15, + "learning_rate": 1.5706958288977066e-05, + "loss": 0.2989, + "step": 22180 + }, + { + "epoch": 2.15, + "learning_rate": 1.570502274266912e-05, + "loss": 0.2243, + "step": 22190 + }, + { + "epoch": 2.15, + "learning_rate": 1.5703087196361177e-05, + "loss": 0.3565, + "step": 22200 + }, + { + "epoch": 2.15, + "learning_rate": 1.5701151650053227e-05, + "loss": 0.3425, + "step": 22210 + }, + { + "epoch": 2.15, + "learning_rate": 1.5699216103745284e-05, + "loss": 0.2412, + "step": 22220 + }, + { + "epoch": 2.15, + "learning_rate": 1.5697280557437338e-05, + "loss": 0.1828, + "step": 22230 + }, + { + "epoch": 2.15, + "learning_rate": 1.569534501112939e-05, + "loss": 0.3325, + "step": 22240 + }, + { + "epoch": 2.15, + "learning_rate": 1.5693409464821448e-05, + "loss": 0.2853, + "step": 22250 + }, + { + "epoch": 2.15, + "learning_rate": 1.5691473918513502e-05, + "loss": 0.16, + "step": 22260 + }, + { + "epoch": 2.16, + "learning_rate": 1.5689538372205556e-05, + "loss": 0.3891, + "step": 22270 + }, + { + "epoch": 2.16, + "learning_rate": 1.568760282589761e-05, + "loss": 0.3791, + "step": 22280 + }, + { + "epoch": 2.16, + "learning_rate": 1.5685667279589666e-05, + "loss": 0.3009, + "step": 22290 + }, + { + "epoch": 2.16, + "learning_rate": 1.568373173328172e-05, + "loss": 0.1741, + "step": 22300 + }, + { + "epoch": 2.16, + "learning_rate": 1.5681796186973774e-05, + "loss": 0.1853, + "step": 22310 + }, + { + "epoch": 2.16, + "learning_rate": 1.567986064066583e-05, + "loss": 0.2431, + "step": 22320 + }, + { + "epoch": 2.16, + "learning_rate": 1.5677925094357884e-05, + "loss": 0.2311, + "step": 22330 + }, + { + "epoch": 2.16, + "learning_rate": 1.5675989548049938e-05, + "loss": 0.2238, + "step": 22340 + }, + { + "epoch": 2.16, + "learning_rate": 1.5674054001741995e-05, + "loss": 0.3027, + "step": 22350 + }, + { + "epoch": 2.16, + "learning_rate": 1.5672118455434045e-05, + "loss": 0.2233, + "step": 22360 + }, + { + "epoch": 2.16, + "learning_rate": 1.5670182909126102e-05, + "loss": 0.2132, + "step": 22370 + }, + { + "epoch": 2.17, + "learning_rate": 1.5668247362818156e-05, + "loss": 0.2421, + "step": 22380 + }, + { + "epoch": 2.17, + "learning_rate": 1.5666311816510213e-05, + "loss": 0.2628, + "step": 22390 + }, + { + "epoch": 2.17, + "learning_rate": 1.5664376270202266e-05, + "loss": 0.3292, + "step": 22400 + }, + { + "epoch": 2.17, + "learning_rate": 1.566244072389432e-05, + "loss": 0.1986, + "step": 22410 + }, + { + "epoch": 2.17, + "learning_rate": 1.5660505177586377e-05, + "loss": 0.3264, + "step": 22420 + }, + { + "epoch": 2.17, + "learning_rate": 1.5658569631278427e-05, + "loss": 0.2737, + "step": 22430 + }, + { + "epoch": 2.17, + "learning_rate": 1.5656634084970484e-05, + "loss": 0.167, + "step": 22440 + }, + { + "epoch": 2.17, + "learning_rate": 1.5654698538662538e-05, + "loss": 0.3104, + "step": 22450 + }, + { + "epoch": 2.17, + "learning_rate": 1.565276299235459e-05, + "loss": 0.233, + "step": 22460 + }, + { + "epoch": 2.17, + "learning_rate": 1.565082744604665e-05, + "loss": 0.2926, + "step": 22470 + }, + { + "epoch": 2.18, + "learning_rate": 1.5648891899738702e-05, + "loss": 0.1775, + "step": 22480 + }, + { + "epoch": 2.18, + "learning_rate": 1.564695635343076e-05, + "loss": 0.3102, + "step": 22490 + }, + { + "epoch": 2.18, + "learning_rate": 1.5645020807122813e-05, + "loss": 0.2869, + "step": 22500 + }, + { + "epoch": 2.18, + "learning_rate": 1.5643085260814867e-05, + "loss": 0.3027, + "step": 22510 + }, + { + "epoch": 2.18, + "learning_rate": 1.564114971450692e-05, + "loss": 0.1698, + "step": 22520 + }, + { + "epoch": 2.18, + "learning_rate": 1.5639214168198974e-05, + "loss": 0.2865, + "step": 22530 + }, + { + "epoch": 2.18, + "learning_rate": 1.563727862189103e-05, + "loss": 0.4138, + "step": 22540 + }, + { + "epoch": 2.18, + "learning_rate": 1.5635343075583084e-05, + "loss": 0.2366, + "step": 22550 + }, + { + "epoch": 2.18, + "learning_rate": 1.5633407529275138e-05, + "loss": 0.1658, + "step": 22560 + }, + { + "epoch": 2.18, + "learning_rate": 1.5631471982967195e-05, + "loss": 0.3825, + "step": 22570 + }, + { + "epoch": 2.19, + "learning_rate": 1.562953643665925e-05, + "loss": 0.251, + "step": 22580 + }, + { + "epoch": 2.19, + "learning_rate": 1.5627600890351302e-05, + "loss": 0.2205, + "step": 22590 + }, + { + "epoch": 2.19, + "learning_rate": 1.5625665344043356e-05, + "loss": 0.2218, + "step": 22600 + }, + { + "epoch": 2.19, + "learning_rate": 1.5623729797735413e-05, + "loss": 0.3052, + "step": 22610 + }, + { + "epoch": 2.19, + "learning_rate": 1.5621794251427467e-05, + "loss": 0.3055, + "step": 22620 + }, + { + "epoch": 2.19, + "learning_rate": 1.561985870511952e-05, + "loss": 0.2835, + "step": 22630 + }, + { + "epoch": 2.19, + "learning_rate": 1.5617923158811577e-05, + "loss": 0.3201, + "step": 22640 + }, + { + "epoch": 2.19, + "learning_rate": 1.561598761250363e-05, + "loss": 0.2993, + "step": 22650 + }, + { + "epoch": 2.19, + "learning_rate": 1.5614052066195685e-05, + "loss": 0.2218, + "step": 22660 + }, + { + "epoch": 2.19, + "learning_rate": 1.5612116519887738e-05, + "loss": 0.1821, + "step": 22670 + }, + { + "epoch": 2.19, + "learning_rate": 1.5610180973579795e-05, + "loss": 0.3121, + "step": 22680 + }, + { + "epoch": 2.2, + "learning_rate": 1.560824542727185e-05, + "loss": 0.2111, + "step": 22690 + }, + { + "epoch": 2.2, + "learning_rate": 1.5606309880963903e-05, + "loss": 0.4287, + "step": 22700 + }, + { + "epoch": 2.2, + "learning_rate": 1.560437433465596e-05, + "loss": 0.2586, + "step": 22710 + }, + { + "epoch": 2.2, + "learning_rate": 1.5602438788348013e-05, + "loss": 0.3464, + "step": 22720 + }, + { + "epoch": 2.2, + "learning_rate": 1.5600503242040067e-05, + "loss": 0.2257, + "step": 22730 + }, + { + "epoch": 2.2, + "learning_rate": 1.559856769573212e-05, + "loss": 0.2017, + "step": 22740 + }, + { + "epoch": 2.2, + "learning_rate": 1.5596632149424174e-05, + "loss": 0.2797, + "step": 22750 + }, + { + "epoch": 2.2, + "learning_rate": 1.559469660311623e-05, + "loss": 0.2968, + "step": 22760 + }, + { + "epoch": 2.2, + "learning_rate": 1.5592761056808285e-05, + "loss": 0.1737, + "step": 22770 + }, + { + "epoch": 2.2, + "learning_rate": 1.5590825510500342e-05, + "loss": 0.2217, + "step": 22780 + }, + { + "epoch": 2.21, + "learning_rate": 1.5588889964192395e-05, + "loss": 0.2424, + "step": 22790 + }, + { + "epoch": 2.21, + "learning_rate": 1.558695441788445e-05, + "loss": 0.1722, + "step": 22800 + }, + { + "epoch": 2.21, + "learning_rate": 1.5585018871576503e-05, + "loss": 0.2643, + "step": 22810 + }, + { + "epoch": 2.21, + "learning_rate": 1.5583083325268556e-05, + "loss": 0.411, + "step": 22820 + }, + { + "epoch": 2.21, + "learning_rate": 1.5581147778960613e-05, + "loss": 0.2698, + "step": 22830 + }, + { + "epoch": 2.21, + "learning_rate": 1.5579212232652667e-05, + "loss": 0.2355, + "step": 22840 + }, + { + "epoch": 2.21, + "learning_rate": 1.557727668634472e-05, + "loss": 0.2695, + "step": 22850 + }, + { + "epoch": 2.21, + "learning_rate": 1.5575341140036778e-05, + "loss": 0.2977, + "step": 22860 + }, + { + "epoch": 2.21, + "learning_rate": 1.557340559372883e-05, + "loss": 0.245, + "step": 22870 + }, + { + "epoch": 2.21, + "learning_rate": 1.557147004742089e-05, + "loss": 0.24, + "step": 22880 + }, + { + "epoch": 2.22, + "learning_rate": 1.556953450111294e-05, + "loss": 0.2836, + "step": 22890 + }, + { + "epoch": 2.22, + "learning_rate": 1.5567598954804996e-05, + "loss": 0.2545, + "step": 22900 + }, + { + "epoch": 2.22, + "learning_rate": 1.556566340849705e-05, + "loss": 0.1615, + "step": 22910 + }, + { + "epoch": 2.22, + "learning_rate": 1.5563727862189103e-05, + "loss": 0.2826, + "step": 22920 + }, + { + "epoch": 2.22, + "learning_rate": 1.556179231588116e-05, + "loss": 0.2515, + "step": 22930 + }, + { + "epoch": 2.22, + "learning_rate": 1.5559856769573214e-05, + "loss": 0.4158, + "step": 22940 + }, + { + "epoch": 2.22, + "learning_rate": 1.5557921223265267e-05, + "loss": 0.4195, + "step": 22950 + }, + { + "epoch": 2.22, + "learning_rate": 1.555598567695732e-05, + "loss": 0.3761, + "step": 22960 + }, + { + "epoch": 2.22, + "learning_rate": 1.5554050130649378e-05, + "loss": 0.1775, + "step": 22970 + }, + { + "epoch": 2.22, + "learning_rate": 1.555211458434143e-05, + "loss": 0.2074, + "step": 22980 + }, + { + "epoch": 2.22, + "learning_rate": 1.5550179038033485e-05, + "loss": 0.1675, + "step": 22990 + }, + { + "epoch": 2.23, + "learning_rate": 1.5548243491725542e-05, + "loss": 0.1886, + "step": 23000 + }, + { + "epoch": 2.23, + "learning_rate": 1.5546307945417596e-05, + "loss": 0.2923, + "step": 23010 + }, + { + "epoch": 2.23, + "learning_rate": 1.554437239910965e-05, + "loss": 0.3487, + "step": 23020 + }, + { + "epoch": 2.23, + "learning_rate": 1.5542436852801706e-05, + "loss": 0.2838, + "step": 23030 + }, + { + "epoch": 2.23, + "learning_rate": 1.5540501306493757e-05, + "loss": 0.2118, + "step": 23040 + }, + { + "epoch": 2.23, + "learning_rate": 1.5538565760185814e-05, + "loss": 0.2534, + "step": 23050 + }, + { + "epoch": 2.23, + "learning_rate": 1.5536630213877867e-05, + "loss": 0.1665, + "step": 23060 + }, + { + "epoch": 2.23, + "learning_rate": 1.5534694667569924e-05, + "loss": 0.216, + "step": 23070 + }, + { + "epoch": 2.23, + "learning_rate": 1.5532759121261978e-05, + "loss": 0.222, + "step": 23080 + }, + { + "epoch": 2.23, + "learning_rate": 1.553082357495403e-05, + "loss": 0.3336, + "step": 23090 + }, + { + "epoch": 2.24, + "learning_rate": 1.552888802864609e-05, + "loss": 0.2988, + "step": 23100 + }, + { + "epoch": 2.24, + "learning_rate": 1.552695248233814e-05, + "loss": 0.2145, + "step": 23110 + }, + { + "epoch": 2.24, + "learning_rate": 1.5525016936030196e-05, + "loss": 0.3021, + "step": 23120 + }, + { + "epoch": 2.24, + "learning_rate": 1.552308138972225e-05, + "loss": 0.2743, + "step": 23130 + }, + { + "epoch": 2.24, + "learning_rate": 1.5521145843414303e-05, + "loss": 0.263, + "step": 23140 + }, + { + "epoch": 2.24, + "learning_rate": 1.551921029710636e-05, + "loss": 0.1799, + "step": 23150 + }, + { + "epoch": 2.24, + "learning_rate": 1.5517274750798414e-05, + "loss": 0.2998, + "step": 23160 + }, + { + "epoch": 2.24, + "learning_rate": 1.551533920449047e-05, + "loss": 0.3104, + "step": 23170 + }, + { + "epoch": 2.24, + "learning_rate": 1.5513403658182525e-05, + "loss": 0.3845, + "step": 23180 + }, + { + "epoch": 2.24, + "learning_rate": 1.5511468111874578e-05, + "loss": 0.2989, + "step": 23190 + }, + { + "epoch": 2.25, + "learning_rate": 1.5509532565566632e-05, + "loss": 0.1964, + "step": 23200 + }, + { + "epoch": 2.25, + "learning_rate": 1.5507597019258685e-05, + "loss": 0.2604, + "step": 23210 + }, + { + "epoch": 2.25, + "learning_rate": 1.5505661472950742e-05, + "loss": 0.2242, + "step": 23220 + }, + { + "epoch": 2.25, + "learning_rate": 1.5503725926642796e-05, + "loss": 0.2149, + "step": 23230 + }, + { + "epoch": 2.25, + "learning_rate": 1.550179038033485e-05, + "loss": 0.1885, + "step": 23240 + }, + { + "epoch": 2.25, + "learning_rate": 1.5499854834026907e-05, + "loss": 0.2021, + "step": 23250 + }, + { + "epoch": 2.25, + "learning_rate": 1.549791928771896e-05, + "loss": 0.1642, + "step": 23260 + }, + { + "epoch": 2.25, + "learning_rate": 1.5495983741411014e-05, + "loss": 0.3228, + "step": 23270 + }, + { + "epoch": 2.25, + "learning_rate": 1.5494048195103068e-05, + "loss": 0.2392, + "step": 23280 + }, + { + "epoch": 2.25, + "learning_rate": 1.5492112648795125e-05, + "loss": 0.1153, + "step": 23290 + }, + { + "epoch": 2.25, + "learning_rate": 1.5490177102487178e-05, + "loss": 0.2811, + "step": 23300 + }, + { + "epoch": 2.26, + "learning_rate": 1.5488241556179232e-05, + "loss": 0.2784, + "step": 23310 + }, + { + "epoch": 2.26, + "learning_rate": 1.548630600987129e-05, + "loss": 0.3352, + "step": 23320 + }, + { + "epoch": 2.26, + "learning_rate": 1.5484370463563343e-05, + "loss": 0.3005, + "step": 23330 + }, + { + "epoch": 2.26, + "learning_rate": 1.5482434917255396e-05, + "loss": 0.2148, + "step": 23340 + }, + { + "epoch": 2.26, + "learning_rate": 1.548049937094745e-05, + "loss": 0.2207, + "step": 23350 + }, + { + "epoch": 2.26, + "learning_rate": 1.5478563824639507e-05, + "loss": 0.2115, + "step": 23360 + }, + { + "epoch": 2.26, + "learning_rate": 1.547662827833156e-05, + "loss": 0.2036, + "step": 23370 + }, + { + "epoch": 2.26, + "learning_rate": 1.5474692732023614e-05, + "loss": 0.365, + "step": 23380 + }, + { + "epoch": 2.26, + "learning_rate": 1.547275718571567e-05, + "loss": 0.3763, + "step": 23390 + }, + { + "epoch": 2.26, + "learning_rate": 1.5470821639407725e-05, + "loss": 0.2203, + "step": 23400 + }, + { + "epoch": 2.27, + "learning_rate": 1.546888609309978e-05, + "loss": 0.2369, + "step": 23410 + }, + { + "epoch": 2.27, + "learning_rate": 1.5466950546791832e-05, + "loss": 0.2536, + "step": 23420 + }, + { + "epoch": 2.27, + "learning_rate": 1.5465015000483886e-05, + "loss": 0.3092, + "step": 23430 + }, + { + "epoch": 2.27, + "learning_rate": 1.5463079454175943e-05, + "loss": 0.2055, + "step": 23440 + }, + { + "epoch": 2.27, + "learning_rate": 1.5461143907867996e-05, + "loss": 0.208, + "step": 23450 + }, + { + "epoch": 2.27, + "learning_rate": 1.5459208361560053e-05, + "loss": 0.2115, + "step": 23460 + }, + { + "epoch": 2.27, + "learning_rate": 1.5457272815252107e-05, + "loss": 0.207, + "step": 23470 + }, + { + "epoch": 2.27, + "learning_rate": 1.545533726894416e-05, + "loss": 0.2334, + "step": 23480 + }, + { + "epoch": 2.27, + "learning_rate": 1.5453401722636214e-05, + "loss": 0.1667, + "step": 23490 + }, + { + "epoch": 2.27, + "learning_rate": 1.5451466176328268e-05, + "loss": 0.326, + "step": 23500 + }, + { + "epoch": 2.28, + "learning_rate": 1.5449530630020325e-05, + "loss": 0.3479, + "step": 23510 + }, + { + "epoch": 2.28, + "learning_rate": 1.544759508371238e-05, + "loss": 0.398, + "step": 23520 + }, + { + "epoch": 2.28, + "learning_rate": 1.5445659537404432e-05, + "loss": 0.2493, + "step": 23530 + }, + { + "epoch": 2.28, + "learning_rate": 1.544372399109649e-05, + "loss": 0.3138, + "step": 23540 + }, + { + "epoch": 2.28, + "learning_rate": 1.5441788444788543e-05, + "loss": 0.1349, + "step": 23550 + }, + { + "epoch": 2.28, + "learning_rate": 1.54398528984806e-05, + "loss": 0.2156, + "step": 23560 + }, + { + "epoch": 2.28, + "learning_rate": 1.543791735217265e-05, + "loss": 0.4263, + "step": 23570 + }, + { + "epoch": 2.28, + "learning_rate": 1.5435981805864707e-05, + "loss": 0.2646, + "step": 23580 + }, + { + "epoch": 2.28, + "learning_rate": 1.543404625955676e-05, + "loss": 0.1706, + "step": 23590 + }, + { + "epoch": 2.28, + "learning_rate": 1.5432110713248814e-05, + "loss": 0.2546, + "step": 23600 + }, + { + "epoch": 2.28, + "learning_rate": 1.543017516694087e-05, + "loss": 0.3396, + "step": 23610 + }, + { + "epoch": 2.29, + "learning_rate": 1.5428239620632925e-05, + "loss": 0.3997, + "step": 23620 + }, + { + "epoch": 2.29, + "learning_rate": 1.542630407432498e-05, + "loss": 0.2301, + "step": 23630 + }, + { + "epoch": 2.29, + "learning_rate": 1.5424368528017032e-05, + "loss": 0.2737, + "step": 23640 + }, + { + "epoch": 2.29, + "learning_rate": 1.542243298170909e-05, + "loss": 0.25, + "step": 23650 + }, + { + "epoch": 2.29, + "learning_rate": 1.5420497435401143e-05, + "loss": 0.4109, + "step": 23660 + }, + { + "epoch": 2.29, + "learning_rate": 1.5418561889093197e-05, + "loss": 0.2701, + "step": 23670 + }, + { + "epoch": 2.29, + "learning_rate": 1.5416626342785254e-05, + "loss": 0.2678, + "step": 23680 + }, + { + "epoch": 2.29, + "learning_rate": 1.5414690796477307e-05, + "loss": 0.2808, + "step": 23690 + }, + { + "epoch": 2.29, + "learning_rate": 1.541275525016936e-05, + "loss": 0.3414, + "step": 23700 + }, + { + "epoch": 2.29, + "learning_rate": 1.5410819703861418e-05, + "loss": 0.2359, + "step": 23710 + }, + { + "epoch": 2.3, + "learning_rate": 1.5408884157553468e-05, + "loss": 0.3002, + "step": 23720 + }, + { + "epoch": 2.3, + "learning_rate": 1.5406948611245525e-05, + "loss": 0.2926, + "step": 23730 + }, + { + "epoch": 2.3, + "learning_rate": 1.540501306493758e-05, + "loss": 0.3342, + "step": 23740 + }, + { + "epoch": 2.3, + "learning_rate": 1.5403077518629636e-05, + "loss": 0.2507, + "step": 23750 + }, + { + "epoch": 2.3, + "learning_rate": 1.540114197232169e-05, + "loss": 0.2502, + "step": 23760 + }, + { + "epoch": 2.3, + "learning_rate": 1.5399206426013743e-05, + "loss": 0.2024, + "step": 23770 + }, + { + "epoch": 2.3, + "learning_rate": 1.53972708797058e-05, + "loss": 0.3257, + "step": 23780 + }, + { + "epoch": 2.3, + "learning_rate": 1.5395335333397854e-05, + "loss": 0.2146, + "step": 23790 + }, + { + "epoch": 2.3, + "learning_rate": 1.5393399787089908e-05, + "loss": 0.2141, + "step": 23800 + }, + { + "epoch": 2.3, + "learning_rate": 1.539146424078196e-05, + "loss": 0.3064, + "step": 23810 + }, + { + "epoch": 2.31, + "learning_rate": 1.5389528694474015e-05, + "loss": 0.2994, + "step": 23820 + }, + { + "epoch": 2.31, + "learning_rate": 1.5387593148166072e-05, + "loss": 0.2311, + "step": 23830 + }, + { + "epoch": 2.31, + "learning_rate": 1.5385657601858125e-05, + "loss": 0.28, + "step": 23840 + }, + { + "epoch": 2.31, + "learning_rate": 1.5383722055550182e-05, + "loss": 0.24, + "step": 23850 + }, + { + "epoch": 2.31, + "learning_rate": 1.5381786509242236e-05, + "loss": 0.2524, + "step": 23860 + }, + { + "epoch": 2.31, + "learning_rate": 1.537985096293429e-05, + "loss": 0.3766, + "step": 23870 + }, + { + "epoch": 2.31, + "learning_rate": 1.5377915416626343e-05, + "loss": 0.3418, + "step": 23880 + }, + { + "epoch": 2.31, + "learning_rate": 1.5375979870318397e-05, + "loss": 0.2911, + "step": 23890 + }, + { + "epoch": 2.31, + "learning_rate": 1.5374044324010454e-05, + "loss": 0.1963, + "step": 23900 + }, + { + "epoch": 2.31, + "learning_rate": 1.5372108777702508e-05, + "loss": 0.2365, + "step": 23910 + }, + { + "epoch": 2.31, + "learning_rate": 1.537017323139456e-05, + "loss": 0.3218, + "step": 23920 + }, + { + "epoch": 2.32, + "learning_rate": 1.536823768508662e-05, + "loss": 0.3835, + "step": 23930 + }, + { + "epoch": 2.32, + "learning_rate": 1.5366302138778672e-05, + "loss": 0.1557, + "step": 23940 + }, + { + "epoch": 2.32, + "learning_rate": 1.5364366592470726e-05, + "loss": 0.2646, + "step": 23950 + }, + { + "epoch": 2.32, + "learning_rate": 1.536243104616278e-05, + "loss": 0.2171, + "step": 23960 + }, + { + "epoch": 2.32, + "learning_rate": 1.5360495499854836e-05, + "loss": 0.2523, + "step": 23970 + }, + { + "epoch": 2.32, + "learning_rate": 1.535855995354689e-05, + "loss": 0.2275, + "step": 23980 + }, + { + "epoch": 2.32, + "learning_rate": 1.5356624407238944e-05, + "loss": 0.1274, + "step": 23990 + }, + { + "epoch": 2.32, + "learning_rate": 1.5354688860931e-05, + "loss": 0.3385, + "step": 24000 + }, + { + "epoch": 2.32, + "learning_rate": 1.5352753314623054e-05, + "loss": 0.3724, + "step": 24010 + }, + { + "epoch": 2.32, + "learning_rate": 1.5350817768315108e-05, + "loss": 0.1828, + "step": 24020 + }, + { + "epoch": 2.33, + "learning_rate": 1.534888222200716e-05, + "loss": 0.2572, + "step": 24030 + }, + { + "epoch": 2.33, + "learning_rate": 1.534694667569922e-05, + "loss": 0.3118, + "step": 24040 + }, + { + "epoch": 2.33, + "learning_rate": 1.5345011129391272e-05, + "loss": 0.2422, + "step": 24050 + }, + { + "epoch": 2.33, + "learning_rate": 1.5343075583083326e-05, + "loss": 0.204, + "step": 24060 + }, + { + "epoch": 2.33, + "learning_rate": 1.5341140036775383e-05, + "loss": 0.219, + "step": 24070 + }, + { + "epoch": 2.33, + "learning_rate": 1.5339204490467436e-05, + "loss": 0.2625, + "step": 24080 + }, + { + "epoch": 2.33, + "learning_rate": 1.533726894415949e-05, + "loss": 0.1647, + "step": 24090 + }, + { + "epoch": 2.33, + "learning_rate": 1.5335333397851544e-05, + "loss": 0.2272, + "step": 24100 + }, + { + "epoch": 2.33, + "learning_rate": 1.5333397851543597e-05, + "loss": 0.1124, + "step": 24110 + }, + { + "epoch": 2.33, + "learning_rate": 1.5331462305235654e-05, + "loss": 0.2735, + "step": 24120 + }, + { + "epoch": 2.34, + "learning_rate": 1.5329526758927708e-05, + "loss": 0.346, + "step": 24130 + }, + { + "epoch": 2.34, + "learning_rate": 1.5327591212619765e-05, + "loss": 0.4084, + "step": 24140 + }, + { + "epoch": 2.34, + "learning_rate": 1.532565566631182e-05, + "loss": 0.2252, + "step": 24150 + }, + { + "epoch": 2.34, + "learning_rate": 1.5323720120003872e-05, + "loss": 0.2596, + "step": 24160 + }, + { + "epoch": 2.34, + "learning_rate": 1.5321784573695926e-05, + "loss": 0.213, + "step": 24170 + }, + { + "epoch": 2.34, + "learning_rate": 1.531984902738798e-05, + "loss": 0.2955, + "step": 24180 + }, + { + "epoch": 2.34, + "learning_rate": 1.5317913481080037e-05, + "loss": 0.4828, + "step": 24190 + }, + { + "epoch": 2.34, + "learning_rate": 1.531597793477209e-05, + "loss": 0.2915, + "step": 24200 + }, + { + "epoch": 2.34, + "learning_rate": 1.5314042388464144e-05, + "loss": 0.2197, + "step": 24210 + }, + { + "epoch": 2.34, + "learning_rate": 1.53121068421562e-05, + "loss": 0.1886, + "step": 24220 + }, + { + "epoch": 2.34, + "learning_rate": 1.5310171295848255e-05, + "loss": 0.2988, + "step": 24230 + }, + { + "epoch": 2.35, + "learning_rate": 1.530823574954031e-05, + "loss": 0.3611, + "step": 24240 + }, + { + "epoch": 2.35, + "learning_rate": 1.5306300203232362e-05, + "loss": 0.2374, + "step": 24250 + }, + { + "epoch": 2.35, + "learning_rate": 1.530436465692442e-05, + "loss": 0.2828, + "step": 24260 + }, + { + "epoch": 2.35, + "learning_rate": 1.5302429110616472e-05, + "loss": 0.3351, + "step": 24270 + }, + { + "epoch": 2.35, + "learning_rate": 1.5300493564308526e-05, + "loss": 0.2127, + "step": 24280 + }, + { + "epoch": 2.35, + "learning_rate": 1.5298558018000583e-05, + "loss": 0.1585, + "step": 24290 + }, + { + "epoch": 2.35, + "learning_rate": 1.5296622471692637e-05, + "loss": 0.2887, + "step": 24300 + }, + { + "epoch": 2.35, + "learning_rate": 1.529468692538469e-05, + "loss": 0.357, + "step": 24310 + }, + { + "epoch": 2.35, + "learning_rate": 1.5292751379076744e-05, + "loss": 0.3714, + "step": 24320 + }, + { + "epoch": 2.35, + "learning_rate": 1.52908158327688e-05, + "loss": 0.3306, + "step": 24330 + }, + { + "epoch": 2.36, + "learning_rate": 1.5288880286460855e-05, + "loss": 0.2368, + "step": 24340 + }, + { + "epoch": 2.36, + "learning_rate": 1.5286944740152908e-05, + "loss": 0.2029, + "step": 24350 + }, + { + "epoch": 2.36, + "learning_rate": 1.5285009193844965e-05, + "loss": 0.2222, + "step": 24360 + }, + { + "epoch": 2.36, + "learning_rate": 1.528307364753702e-05, + "loss": 0.272, + "step": 24370 + }, + { + "epoch": 2.36, + "learning_rate": 1.5281138101229073e-05, + "loss": 0.2652, + "step": 24380 + }, + { + "epoch": 2.36, + "learning_rate": 1.527920255492113e-05, + "loss": 0.2019, + "step": 24390 + }, + { + "epoch": 2.36, + "learning_rate": 1.527726700861318e-05, + "loss": 0.2738, + "step": 24400 + }, + { + "epoch": 2.36, + "learning_rate": 1.5275331462305237e-05, + "loss": 0.2367, + "step": 24410 + }, + { + "epoch": 2.36, + "learning_rate": 1.527339591599729e-05, + "loss": 0.2871, + "step": 24420 + }, + { + "epoch": 2.36, + "learning_rate": 1.5271460369689348e-05, + "loss": 0.2361, + "step": 24430 + }, + { + "epoch": 2.37, + "learning_rate": 1.52695248233814e-05, + "loss": 0.2566, + "step": 24440 + }, + { + "epoch": 2.37, + "learning_rate": 1.5267589277073455e-05, + "loss": 0.1745, + "step": 24450 + }, + { + "epoch": 2.37, + "learning_rate": 1.5265653730765512e-05, + "loss": 0.2593, + "step": 24460 + }, + { + "epoch": 2.37, + "learning_rate": 1.5263718184457565e-05, + "loss": 0.1936, + "step": 24470 + }, + { + "epoch": 2.37, + "learning_rate": 1.526178263814962e-05, + "loss": 0.1706, + "step": 24480 + }, + { + "epoch": 2.37, + "learning_rate": 1.5259847091841673e-05, + "loss": 0.6071, + "step": 24490 + }, + { + "epoch": 2.37, + "learning_rate": 1.5257911545533728e-05, + "loss": 0.2808, + "step": 24500 + }, + { + "epoch": 2.37, + "learning_rate": 1.5255975999225783e-05, + "loss": 0.3278, + "step": 24510 + }, + { + "epoch": 2.37, + "learning_rate": 1.5254040452917837e-05, + "loss": 0.2336, + "step": 24520 + }, + { + "epoch": 2.37, + "learning_rate": 1.5252104906609892e-05, + "loss": 0.2399, + "step": 24530 + }, + { + "epoch": 2.37, + "learning_rate": 1.5250169360301948e-05, + "loss": 0.2623, + "step": 24540 + }, + { + "epoch": 2.38, + "learning_rate": 1.5248233813994e-05, + "loss": 0.267, + "step": 24550 + }, + { + "epoch": 2.38, + "learning_rate": 1.5246298267686055e-05, + "loss": 0.1675, + "step": 24560 + }, + { + "epoch": 2.38, + "learning_rate": 1.524436272137811e-05, + "loss": 0.2988, + "step": 24570 + }, + { + "epoch": 2.38, + "learning_rate": 1.5242427175070164e-05, + "loss": 0.4235, + "step": 24580 + }, + { + "epoch": 2.38, + "learning_rate": 1.524049162876222e-05, + "loss": 0.3269, + "step": 24590 + }, + { + "epoch": 2.38, + "learning_rate": 1.5238556082454275e-05, + "loss": 0.309, + "step": 24600 + }, + { + "epoch": 2.38, + "learning_rate": 1.523662053614633e-05, + "loss": 0.3546, + "step": 24610 + }, + { + "epoch": 2.38, + "learning_rate": 1.5234684989838384e-05, + "loss": 0.2942, + "step": 24620 + }, + { + "epoch": 2.38, + "learning_rate": 1.5232749443530437e-05, + "loss": 0.2581, + "step": 24630 + }, + { + "epoch": 2.38, + "learning_rate": 1.5230813897222493e-05, + "loss": 0.2348, + "step": 24640 + }, + { + "epoch": 2.39, + "learning_rate": 1.5228878350914546e-05, + "loss": 0.2448, + "step": 24650 + }, + { + "epoch": 2.39, + "learning_rate": 1.5226942804606601e-05, + "loss": 0.3796, + "step": 24660 + }, + { + "epoch": 2.39, + "learning_rate": 1.5225007258298657e-05, + "loss": 0.2222, + "step": 24670 + }, + { + "epoch": 2.39, + "learning_rate": 1.522307171199071e-05, + "loss": 0.1959, + "step": 24680 + }, + { + "epoch": 2.39, + "learning_rate": 1.5221136165682766e-05, + "loss": 0.2233, + "step": 24690 + }, + { + "epoch": 2.39, + "learning_rate": 1.521920061937482e-05, + "loss": 0.2237, + "step": 24700 + }, + { + "epoch": 2.39, + "learning_rate": 1.5217265073066873e-05, + "loss": 0.228, + "step": 24710 + }, + { + "epoch": 2.39, + "learning_rate": 1.5215329526758928e-05, + "loss": 0.3361, + "step": 24720 + }, + { + "epoch": 2.39, + "learning_rate": 1.5213393980450984e-05, + "loss": 0.1928, + "step": 24730 + }, + { + "epoch": 2.39, + "learning_rate": 1.5211458434143039e-05, + "loss": 0.2597, + "step": 24740 + }, + { + "epoch": 2.4, + "learning_rate": 1.5209522887835093e-05, + "loss": 0.3797, + "step": 24750 + }, + { + "epoch": 2.4, + "learning_rate": 1.5207587341527148e-05, + "loss": 0.4445, + "step": 24760 + }, + { + "epoch": 2.4, + "learning_rate": 1.5205651795219203e-05, + "loss": 0.1985, + "step": 24770 + }, + { + "epoch": 2.4, + "learning_rate": 1.5203716248911255e-05, + "loss": 0.227, + "step": 24780 + }, + { + "epoch": 2.4, + "learning_rate": 1.520178070260331e-05, + "loss": 0.2051, + "step": 24790 + }, + { + "epoch": 2.4, + "learning_rate": 1.5199845156295366e-05, + "loss": 0.2848, + "step": 24800 + }, + { + "epoch": 2.4, + "learning_rate": 1.519790960998742e-05, + "loss": 0.268, + "step": 24810 + }, + { + "epoch": 2.4, + "learning_rate": 1.5195974063679475e-05, + "loss": 0.1352, + "step": 24820 + }, + { + "epoch": 2.4, + "learning_rate": 1.519403851737153e-05, + "loss": 0.3649, + "step": 24830 + }, + { + "epoch": 2.4, + "learning_rate": 1.5192102971063586e-05, + "loss": 0.3621, + "step": 24840 + }, + { + "epoch": 2.4, + "learning_rate": 1.5190167424755638e-05, + "loss": 0.2592, + "step": 24850 + }, + { + "epoch": 2.41, + "learning_rate": 1.5188231878447693e-05, + "loss": 0.1312, + "step": 24860 + }, + { + "epoch": 2.41, + "learning_rate": 1.5186296332139746e-05, + "loss": 0.1721, + "step": 24870 + }, + { + "epoch": 2.41, + "learning_rate": 1.5184360785831802e-05, + "loss": 0.1885, + "step": 24880 + }, + { + "epoch": 2.41, + "learning_rate": 1.5182425239523857e-05, + "loss": 0.3529, + "step": 24890 + }, + { + "epoch": 2.41, + "learning_rate": 1.5180489693215912e-05, + "loss": 0.2163, + "step": 24900 + }, + { + "epoch": 2.41, + "learning_rate": 1.5178554146907966e-05, + "loss": 0.2092, + "step": 24910 + }, + { + "epoch": 2.41, + "learning_rate": 1.5176618600600021e-05, + "loss": 0.4735, + "step": 24920 + }, + { + "epoch": 2.41, + "learning_rate": 1.5174683054292075e-05, + "loss": 0.3433, + "step": 24930 + }, + { + "epoch": 2.41, + "learning_rate": 1.5172747507984129e-05, + "loss": 0.2236, + "step": 24940 + }, + { + "epoch": 2.41, + "learning_rate": 1.5170811961676184e-05, + "loss": 0.2328, + "step": 24950 + }, + { + "epoch": 2.42, + "learning_rate": 1.516887641536824e-05, + "loss": 0.3398, + "step": 24960 + }, + { + "epoch": 2.42, + "learning_rate": 1.5166940869060293e-05, + "loss": 0.2548, + "step": 24970 + }, + { + "epoch": 2.42, + "learning_rate": 1.5165005322752348e-05, + "loss": 0.4283, + "step": 24980 + }, + { + "epoch": 2.42, + "learning_rate": 1.5163069776444404e-05, + "loss": 0.3228, + "step": 24990 + }, + { + "epoch": 2.42, + "learning_rate": 1.5161134230136459e-05, + "loss": 0.1594, + "step": 25000 + }, + { + "epoch": 2.42, + "learning_rate": 1.5159198683828511e-05, + "loss": 0.2234, + "step": 25010 + }, + { + "epoch": 2.42, + "learning_rate": 1.5157263137520566e-05, + "loss": 0.177, + "step": 25020 + }, + { + "epoch": 2.42, + "learning_rate": 1.5155327591212622e-05, + "loss": 0.3289, + "step": 25030 + }, + { + "epoch": 2.42, + "learning_rate": 1.5153392044904675e-05, + "loss": 0.1792, + "step": 25040 + }, + { + "epoch": 2.42, + "learning_rate": 1.515145649859673e-05, + "loss": 0.2679, + "step": 25050 + }, + { + "epoch": 2.43, + "learning_rate": 1.5149520952288786e-05, + "loss": 0.2904, + "step": 25060 + }, + { + "epoch": 2.43, + "learning_rate": 1.514758540598084e-05, + "loss": 0.1335, + "step": 25070 + }, + { + "epoch": 2.43, + "learning_rate": 1.5145649859672893e-05, + "loss": 0.2145, + "step": 25080 + }, + { + "epoch": 2.43, + "learning_rate": 1.5143714313364948e-05, + "loss": 0.3014, + "step": 25090 + }, + { + "epoch": 2.43, + "learning_rate": 1.5141778767057002e-05, + "loss": 0.3526, + "step": 25100 + }, + { + "epoch": 2.43, + "learning_rate": 1.5139843220749057e-05, + "loss": 0.2486, + "step": 25110 + }, + { + "epoch": 2.43, + "learning_rate": 1.5137907674441113e-05, + "loss": 0.2735, + "step": 25120 + }, + { + "epoch": 2.43, + "learning_rate": 1.5135972128133168e-05, + "loss": 0.1811, + "step": 25130 + }, + { + "epoch": 2.43, + "learning_rate": 1.5134036581825222e-05, + "loss": 0.5123, + "step": 25140 + }, + { + "epoch": 2.43, + "learning_rate": 1.5132101035517277e-05, + "loss": 0.2389, + "step": 25150 + }, + { + "epoch": 2.43, + "learning_rate": 1.5130165489209329e-05, + "loss": 0.2648, + "step": 25160 + }, + { + "epoch": 2.44, + "learning_rate": 1.5128229942901384e-05, + "loss": 0.3129, + "step": 25170 + }, + { + "epoch": 2.44, + "learning_rate": 1.512629439659344e-05, + "loss": 0.1738, + "step": 25180 + }, + { + "epoch": 2.44, + "learning_rate": 1.5124358850285495e-05, + "loss": 0.3011, + "step": 25190 + }, + { + "epoch": 2.44, + "learning_rate": 1.5122423303977549e-05, + "loss": 0.2271, + "step": 25200 + }, + { + "epoch": 2.44, + "learning_rate": 1.5120487757669604e-05, + "loss": 0.184, + "step": 25210 + }, + { + "epoch": 2.44, + "learning_rate": 1.511855221136166e-05, + "loss": 0.1571, + "step": 25220 + }, + { + "epoch": 2.44, + "learning_rate": 1.5116616665053711e-05, + "loss": 0.2989, + "step": 25230 + }, + { + "epoch": 2.44, + "learning_rate": 1.5114681118745767e-05, + "loss": 0.266, + "step": 25240 + }, + { + "epoch": 2.44, + "learning_rate": 1.5112745572437822e-05, + "loss": 0.2801, + "step": 25250 + }, + { + "epoch": 2.44, + "learning_rate": 1.5110810026129876e-05, + "loss": 0.3949, + "step": 25260 + }, + { + "epoch": 2.45, + "learning_rate": 1.5108874479821931e-05, + "loss": 0.1405, + "step": 25270 + }, + { + "epoch": 2.45, + "learning_rate": 1.5106938933513986e-05, + "loss": 0.1985, + "step": 25280 + }, + { + "epoch": 2.45, + "learning_rate": 1.5105003387206042e-05, + "loss": 0.2862, + "step": 25290 + }, + { + "epoch": 2.45, + "learning_rate": 1.5103067840898095e-05, + "loss": 0.3799, + "step": 25300 + }, + { + "epoch": 2.45, + "learning_rate": 1.5101132294590149e-05, + "loss": 0.1691, + "step": 25310 + }, + { + "epoch": 2.45, + "learning_rate": 1.5099196748282204e-05, + "loss": 0.4998, + "step": 25320 + }, + { + "epoch": 2.45, + "learning_rate": 1.5097261201974258e-05, + "loss": 0.4441, + "step": 25330 + }, + { + "epoch": 2.45, + "learning_rate": 1.5095325655666313e-05, + "loss": 0.438, + "step": 25340 + }, + { + "epoch": 2.45, + "learning_rate": 1.5093390109358368e-05, + "loss": 0.2481, + "step": 25350 + }, + { + "epoch": 2.45, + "learning_rate": 1.5091454563050422e-05, + "loss": 0.293, + "step": 25360 + }, + { + "epoch": 2.46, + "learning_rate": 1.5089519016742477e-05, + "loss": 0.1942, + "step": 25370 + }, + { + "epoch": 2.46, + "learning_rate": 1.5087583470434531e-05, + "loss": 0.2414, + "step": 25380 + }, + { + "epoch": 2.46, + "learning_rate": 1.5085647924126585e-05, + "loss": 0.1907, + "step": 25390 + }, + { + "epoch": 2.46, + "learning_rate": 1.508371237781864e-05, + "loss": 0.4879, + "step": 25400 + }, + { + "epoch": 2.46, + "learning_rate": 1.5081776831510695e-05, + "loss": 0.3317, + "step": 25410 + }, + { + "epoch": 2.46, + "learning_rate": 1.507984128520275e-05, + "loss": 0.3148, + "step": 25420 + }, + { + "epoch": 2.46, + "learning_rate": 1.5077905738894804e-05, + "loss": 0.2303, + "step": 25430 + }, + { + "epoch": 2.46, + "learning_rate": 1.507597019258686e-05, + "loss": 0.2472, + "step": 25440 + }, + { + "epoch": 2.46, + "learning_rate": 1.5074034646278915e-05, + "loss": 0.2147, + "step": 25450 + }, + { + "epoch": 2.46, + "learning_rate": 1.5072099099970967e-05, + "loss": 0.2488, + "step": 25460 + }, + { + "epoch": 2.46, + "learning_rate": 1.5070163553663022e-05, + "loss": 0.2994, + "step": 25470 + }, + { + "epoch": 2.47, + "learning_rate": 1.5068228007355078e-05, + "loss": 0.2812, + "step": 25480 + }, + { + "epoch": 2.47, + "learning_rate": 1.5066292461047131e-05, + "loss": 0.1946, + "step": 25490 + }, + { + "epoch": 2.47, + "learning_rate": 1.5064356914739187e-05, + "loss": 0.2467, + "step": 25500 + }, + { + "epoch": 2.47, + "learning_rate": 1.5062421368431242e-05, + "loss": 0.1528, + "step": 25510 + }, + { + "epoch": 2.47, + "learning_rate": 1.5060485822123297e-05, + "loss": 0.3006, + "step": 25520 + }, + { + "epoch": 2.47, + "learning_rate": 1.5058550275815349e-05, + "loss": 0.4677, + "step": 25530 + }, + { + "epoch": 2.47, + "learning_rate": 1.5056614729507404e-05, + "loss": 0.2474, + "step": 25540 + }, + { + "epoch": 2.47, + "learning_rate": 1.5054679183199458e-05, + "loss": 0.1248, + "step": 25550 + }, + { + "epoch": 2.47, + "learning_rate": 1.5052743636891513e-05, + "loss": 0.3798, + "step": 25560 + }, + { + "epoch": 2.47, + "learning_rate": 1.5050808090583569e-05, + "loss": 0.2847, + "step": 25570 + }, + { + "epoch": 2.48, + "learning_rate": 1.5048872544275624e-05, + "loss": 0.3379, + "step": 25580 + }, + { + "epoch": 2.48, + "learning_rate": 1.5046936997967678e-05, + "loss": 0.2291, + "step": 25590 + }, + { + "epoch": 2.48, + "learning_rate": 1.5045001451659733e-05, + "loss": 0.1923, + "step": 25600 + }, + { + "epoch": 2.48, + "learning_rate": 1.5043065905351785e-05, + "loss": 0.2408, + "step": 25610 + }, + { + "epoch": 2.48, + "learning_rate": 1.504113035904384e-05, + "loss": 0.1416, + "step": 25620 + }, + { + "epoch": 2.48, + "learning_rate": 1.5039194812735896e-05, + "loss": 0.2853, + "step": 25630 + }, + { + "epoch": 2.48, + "learning_rate": 1.5037259266427951e-05, + "loss": 0.2393, + "step": 25640 + }, + { + "epoch": 2.48, + "learning_rate": 1.5035323720120005e-05, + "loss": 0.267, + "step": 25650 + }, + { + "epoch": 2.48, + "learning_rate": 1.503338817381206e-05, + "loss": 0.1804, + "step": 25660 + }, + { + "epoch": 2.48, + "learning_rate": 1.5031452627504115e-05, + "loss": 0.2628, + "step": 25670 + }, + { + "epoch": 2.49, + "learning_rate": 1.502951708119617e-05, + "loss": 0.3025, + "step": 25680 + }, + { + "epoch": 2.49, + "learning_rate": 1.5027581534888223e-05, + "loss": 0.3175, + "step": 25690 + }, + { + "epoch": 2.49, + "learning_rate": 1.5025645988580278e-05, + "loss": 0.1822, + "step": 25700 + }, + { + "epoch": 2.49, + "learning_rate": 1.5023710442272331e-05, + "loss": 0.2743, + "step": 25710 + }, + { + "epoch": 2.49, + "learning_rate": 1.5021774895964387e-05, + "loss": 0.309, + "step": 25720 + }, + { + "epoch": 2.49, + "learning_rate": 1.5019839349656442e-05, + "loss": 0.2074, + "step": 25730 + }, + { + "epoch": 2.49, + "learning_rate": 1.5017903803348497e-05, + "loss": 0.1872, + "step": 25740 + }, + { + "epoch": 2.49, + "learning_rate": 1.5015968257040551e-05, + "loss": 0.1124, + "step": 25750 + }, + { + "epoch": 2.49, + "learning_rate": 1.5014032710732605e-05, + "loss": 0.2892, + "step": 25760 + }, + { + "epoch": 2.49, + "learning_rate": 1.501209716442466e-05, + "loss": 0.4778, + "step": 25770 + }, + { + "epoch": 2.49, + "learning_rate": 1.5010161618116714e-05, + "loss": 0.3698, + "step": 25780 + }, + { + "epoch": 2.5, + "learning_rate": 1.5008226071808769e-05, + "loss": 0.2932, + "step": 25790 + }, + { + "epoch": 2.5, + "learning_rate": 1.5006290525500824e-05, + "loss": 0.1962, + "step": 25800 + }, + { + "epoch": 2.5, + "learning_rate": 1.500435497919288e-05, + "loss": 0.2199, + "step": 25810 + }, + { + "epoch": 2.5, + "learning_rate": 1.5002419432884933e-05, + "loss": 0.2596, + "step": 25820 + }, + { + "epoch": 2.5, + "learning_rate": 1.5000483886576989e-05, + "loss": 0.2447, + "step": 25830 + }, + { + "epoch": 2.5, + "learning_rate": 1.499854834026904e-05, + "loss": 0.2165, + "step": 25840 + }, + { + "epoch": 2.5, + "learning_rate": 1.4996612793961096e-05, + "loss": 0.2877, + "step": 25850 + }, + { + "epoch": 2.5, + "learning_rate": 1.4994677247653151e-05, + "loss": 0.2111, + "step": 25860 + }, + { + "epoch": 2.5, + "learning_rate": 1.4992741701345207e-05, + "loss": 0.2909, + "step": 25870 + }, + { + "epoch": 2.5, + "learning_rate": 1.499080615503726e-05, + "loss": 0.1299, + "step": 25880 + }, + { + "epoch": 2.51, + "learning_rate": 1.4988870608729316e-05, + "loss": 0.2323, + "step": 25890 + }, + { + "epoch": 2.51, + "learning_rate": 1.4986935062421371e-05, + "loss": 0.3201, + "step": 25900 + }, + { + "epoch": 2.51, + "learning_rate": 1.4984999516113423e-05, + "loss": 0.3555, + "step": 25910 + }, + { + "epoch": 2.51, + "learning_rate": 1.4983063969805478e-05, + "loss": 0.236, + "step": 25920 + }, + { + "epoch": 2.51, + "learning_rate": 1.4981128423497533e-05, + "loss": 0.4017, + "step": 25930 + }, + { + "epoch": 2.51, + "learning_rate": 1.4979192877189587e-05, + "loss": 0.1974, + "step": 25940 + }, + { + "epoch": 2.51, + "learning_rate": 1.4977257330881642e-05, + "loss": 0.1356, + "step": 25950 + }, + { + "epoch": 2.51, + "learning_rate": 1.4975321784573698e-05, + "loss": 0.3001, + "step": 25960 + }, + { + "epoch": 2.51, + "learning_rate": 1.4973386238265753e-05, + "loss": 0.4177, + "step": 25970 + }, + { + "epoch": 2.51, + "learning_rate": 1.4971450691957807e-05, + "loss": 0.3657, + "step": 25980 + }, + { + "epoch": 2.52, + "learning_rate": 1.496951514564986e-05, + "loss": 0.292, + "step": 25990 + }, + { + "epoch": 2.52, + "learning_rate": 1.4967579599341914e-05, + "loss": 0.1794, + "step": 26000 + }, + { + "epoch": 2.52, + "learning_rate": 1.496564405303397e-05, + "loss": 0.2537, + "step": 26010 + }, + { + "epoch": 2.52, + "learning_rate": 1.4963708506726025e-05, + "loss": 0.1979, + "step": 26020 + }, + { + "epoch": 2.52, + "learning_rate": 1.496177296041808e-05, + "loss": 0.3007, + "step": 26030 + }, + { + "epoch": 2.52, + "learning_rate": 1.4959837414110134e-05, + "loss": 0.2129, + "step": 26040 + }, + { + "epoch": 2.52, + "learning_rate": 1.4957901867802189e-05, + "loss": 0.2475, + "step": 26050 + }, + { + "epoch": 2.52, + "learning_rate": 1.4955966321494243e-05, + "loss": 0.3807, + "step": 26060 + }, + { + "epoch": 2.52, + "learning_rate": 1.4954030775186296e-05, + "loss": 0.221, + "step": 26070 + }, + { + "epoch": 2.52, + "learning_rate": 1.4952095228878352e-05, + "loss": 0.2414, + "step": 26080 + }, + { + "epoch": 2.52, + "learning_rate": 1.4950159682570407e-05, + "loss": 0.1752, + "step": 26090 + }, + { + "epoch": 2.53, + "learning_rate": 1.494822413626246e-05, + "loss": 0.2884, + "step": 26100 + }, + { + "epoch": 2.53, + "learning_rate": 1.4946288589954516e-05, + "loss": 0.2661, + "step": 26110 + }, + { + "epoch": 2.53, + "learning_rate": 1.4944353043646571e-05, + "loss": 0.2591, + "step": 26120 + }, + { + "epoch": 2.53, + "learning_rate": 1.4942417497338627e-05, + "loss": 0.243, + "step": 26130 + }, + { + "epoch": 2.53, + "learning_rate": 1.4940481951030678e-05, + "loss": 0.2162, + "step": 26140 + }, + { + "epoch": 2.53, + "learning_rate": 1.4938546404722734e-05, + "loss": 0.3812, + "step": 26150 + }, + { + "epoch": 2.53, + "learning_rate": 1.4936610858414789e-05, + "loss": 0.287, + "step": 26160 + }, + { + "epoch": 2.53, + "learning_rate": 1.4934675312106843e-05, + "loss": 0.2226, + "step": 26170 + }, + { + "epoch": 2.53, + "learning_rate": 1.4932739765798898e-05, + "loss": 0.3723, + "step": 26180 + }, + { + "epoch": 2.53, + "learning_rate": 1.4930804219490953e-05, + "loss": 0.239, + "step": 26190 + }, + { + "epoch": 2.54, + "learning_rate": 1.4928868673183009e-05, + "loss": 0.2136, + "step": 26200 + }, + { + "epoch": 2.54, + "learning_rate": 1.4926933126875062e-05, + "loss": 0.2376, + "step": 26210 + }, + { + "epoch": 2.54, + "learning_rate": 1.4924997580567116e-05, + "loss": 0.2996, + "step": 26220 + }, + { + "epoch": 2.54, + "learning_rate": 1.492306203425917e-05, + "loss": 0.3334, + "step": 26230 + }, + { + "epoch": 2.54, + "learning_rate": 1.4921126487951225e-05, + "loss": 0.2724, + "step": 26240 + }, + { + "epoch": 2.54, + "learning_rate": 1.491919094164328e-05, + "loss": 0.1923, + "step": 26250 + }, + { + "epoch": 2.54, + "learning_rate": 1.4917255395335336e-05, + "loss": 0.2667, + "step": 26260 + }, + { + "epoch": 2.54, + "learning_rate": 1.491531984902739e-05, + "loss": 0.3354, + "step": 26270 + }, + { + "epoch": 2.54, + "learning_rate": 1.4913384302719445e-05, + "loss": 0.2806, + "step": 26280 + }, + { + "epoch": 2.54, + "learning_rate": 1.4911448756411497e-05, + "loss": 0.1809, + "step": 26290 + }, + { + "epoch": 2.55, + "learning_rate": 1.4909513210103552e-05, + "loss": 0.1758, + "step": 26300 + }, + { + "epoch": 2.55, + "learning_rate": 1.4907577663795607e-05, + "loss": 0.3002, + "step": 26310 + }, + { + "epoch": 2.55, + "learning_rate": 1.4905642117487663e-05, + "loss": 0.134, + "step": 26320 + }, + { + "epoch": 2.55, + "learning_rate": 1.4903706571179716e-05, + "loss": 0.2614, + "step": 26330 + }, + { + "epoch": 2.55, + "learning_rate": 1.4901771024871772e-05, + "loss": 0.1511, + "step": 26340 + }, + { + "epoch": 2.55, + "learning_rate": 1.4899835478563827e-05, + "loss": 0.3437, + "step": 26350 + }, + { + "epoch": 2.55, + "learning_rate": 1.4897899932255882e-05, + "loss": 0.2063, + "step": 26360 + }, + { + "epoch": 2.55, + "learning_rate": 1.4895964385947934e-05, + "loss": 0.2841, + "step": 26370 + }, + { + "epoch": 2.55, + "learning_rate": 1.489402883963999e-05, + "loss": 0.2034, + "step": 26380 + }, + { + "epoch": 2.55, + "learning_rate": 1.4892093293332043e-05, + "loss": 0.1939, + "step": 26390 + }, + { + "epoch": 2.55, + "learning_rate": 1.4890157747024098e-05, + "loss": 0.2049, + "step": 26400 + }, + { + "epoch": 2.56, + "learning_rate": 1.4888222200716154e-05, + "loss": 0.1779, + "step": 26410 + }, + { + "epoch": 2.56, + "learning_rate": 1.4886286654408209e-05, + "loss": 0.4042, + "step": 26420 + }, + { + "epoch": 2.56, + "learning_rate": 1.4884351108100263e-05, + "loss": 0.2894, + "step": 26430 + }, + { + "epoch": 2.56, + "learning_rate": 1.4882415561792316e-05, + "loss": 0.4123, + "step": 26440 + }, + { + "epoch": 2.56, + "learning_rate": 1.4880480015484372e-05, + "loss": 0.3456, + "step": 26450 + }, + { + "epoch": 2.56, + "learning_rate": 1.4878544469176425e-05, + "loss": 0.2407, + "step": 26460 + }, + { + "epoch": 2.56, + "learning_rate": 1.487660892286848e-05, + "loss": 0.3756, + "step": 26470 + }, + { + "epoch": 2.56, + "learning_rate": 1.4874673376560536e-05, + "loss": 0.3314, + "step": 26480 + }, + { + "epoch": 2.56, + "learning_rate": 1.487273783025259e-05, + "loss": 0.2056, + "step": 26490 + }, + { + "epoch": 2.56, + "learning_rate": 1.4870802283944645e-05, + "loss": 0.2872, + "step": 26500 + }, + { + "epoch": 2.57, + "learning_rate": 1.48688667376367e-05, + "loss": 0.2372, + "step": 26510 + }, + { + "epoch": 2.57, + "learning_rate": 1.4866931191328752e-05, + "loss": 0.2434, + "step": 26520 + }, + { + "epoch": 2.57, + "learning_rate": 1.4864995645020808e-05, + "loss": 0.2534, + "step": 26530 + }, + { + "epoch": 2.57, + "learning_rate": 1.4863060098712863e-05, + "loss": 0.354, + "step": 26540 + }, + { + "epoch": 2.57, + "learning_rate": 1.4861124552404918e-05, + "loss": 0.2381, + "step": 26550 + }, + { + "epoch": 2.57, + "learning_rate": 1.4859189006096972e-05, + "loss": 0.2053, + "step": 26560 + }, + { + "epoch": 2.57, + "learning_rate": 1.4857253459789027e-05, + "loss": 0.3853, + "step": 26570 + }, + { + "epoch": 2.57, + "learning_rate": 1.4855317913481082e-05, + "loss": 0.193, + "step": 26580 + }, + { + "epoch": 2.57, + "learning_rate": 1.4853382367173134e-05, + "loss": 0.1801, + "step": 26590 + }, + { + "epoch": 2.57, + "learning_rate": 1.485144682086519e-05, + "loss": 0.3922, + "step": 26600 + }, + { + "epoch": 2.58, + "learning_rate": 1.4849511274557245e-05, + "loss": 0.4124, + "step": 26610 + }, + { + "epoch": 2.58, + "learning_rate": 1.4847575728249299e-05, + "loss": 0.2806, + "step": 26620 + }, + { + "epoch": 2.58, + "learning_rate": 1.4845640181941354e-05, + "loss": 0.2537, + "step": 26630 + }, + { + "epoch": 2.58, + "learning_rate": 1.484370463563341e-05, + "loss": 0.2269, + "step": 26640 + }, + { + "epoch": 2.58, + "learning_rate": 1.4841769089325465e-05, + "loss": 0.2972, + "step": 26650 + }, + { + "epoch": 2.58, + "learning_rate": 1.4839833543017518e-05, + "loss": 0.2073, + "step": 26660 + }, + { + "epoch": 2.58, + "learning_rate": 1.4837897996709572e-05, + "loss": 0.2556, + "step": 26670 + }, + { + "epoch": 2.58, + "learning_rate": 1.4835962450401626e-05, + "loss": 0.1992, + "step": 26680 + }, + { + "epoch": 2.58, + "learning_rate": 1.4834026904093681e-05, + "loss": 0.2746, + "step": 26690 + }, + { + "epoch": 2.58, + "learning_rate": 1.4832091357785736e-05, + "loss": 0.2676, + "step": 26700 + }, + { + "epoch": 2.58, + "learning_rate": 1.4830155811477792e-05, + "loss": 0.2429, + "step": 26710 + }, + { + "epoch": 2.59, + "learning_rate": 1.4828220265169845e-05, + "loss": 0.3507, + "step": 26720 + }, + { + "epoch": 2.59, + "learning_rate": 1.48262847188619e-05, + "loss": 0.3182, + "step": 26730 + }, + { + "epoch": 2.59, + "learning_rate": 1.4824349172553956e-05, + "loss": 0.2764, + "step": 26740 + }, + { + "epoch": 2.59, + "learning_rate": 1.4822413626246008e-05, + "loss": 0.1826, + "step": 26750 + }, + { + "epoch": 2.59, + "learning_rate": 1.4820478079938063e-05, + "loss": 0.3486, + "step": 26760 + }, + { + "epoch": 2.59, + "learning_rate": 1.4818542533630119e-05, + "loss": 0.278, + "step": 26770 + }, + { + "epoch": 2.59, + "learning_rate": 1.4816606987322172e-05, + "loss": 0.3849, + "step": 26780 + }, + { + "epoch": 2.59, + "learning_rate": 1.4814671441014227e-05, + "loss": 0.1967, + "step": 26790 + }, + { + "epoch": 2.59, + "learning_rate": 1.4812735894706283e-05, + "loss": 0.2413, + "step": 26800 + }, + { + "epoch": 2.59, + "learning_rate": 1.4810800348398338e-05, + "loss": 0.2047, + "step": 26810 + }, + { + "epoch": 2.6, + "learning_rate": 1.480886480209039e-05, + "loss": 0.2476, + "step": 26820 + }, + { + "epoch": 2.6, + "learning_rate": 1.4806929255782445e-05, + "loss": 0.3956, + "step": 26830 + }, + { + "epoch": 2.6, + "learning_rate": 1.48049937094745e-05, + "loss": 0.2292, + "step": 26840 + }, + { + "epoch": 2.6, + "learning_rate": 1.4803058163166554e-05, + "loss": 0.3093, + "step": 26850 + }, + { + "epoch": 2.6, + "learning_rate": 1.480112261685861e-05, + "loss": 0.3712, + "step": 26860 + }, + { + "epoch": 2.6, + "learning_rate": 1.4799187070550665e-05, + "loss": 0.084, + "step": 26870 + }, + { + "epoch": 2.6, + "learning_rate": 1.4797251524242719e-05, + "loss": 0.2526, + "step": 26880 + }, + { + "epoch": 2.6, + "learning_rate": 1.4795315977934774e-05, + "loss": 0.3308, + "step": 26890 + }, + { + "epoch": 2.6, + "learning_rate": 1.4793380431626828e-05, + "loss": 0.182, + "step": 26900 + }, + { + "epoch": 2.6, + "learning_rate": 1.4791444885318881e-05, + "loss": 0.2719, + "step": 26910 + }, + { + "epoch": 2.61, + "learning_rate": 1.4789509339010937e-05, + "loss": 0.1014, + "step": 26920 + }, + { + "epoch": 2.61, + "learning_rate": 1.4787573792702992e-05, + "loss": 0.3962, + "step": 26930 + }, + { + "epoch": 2.61, + "learning_rate": 1.4785638246395047e-05, + "loss": 0.1958, + "step": 26940 + }, + { + "epoch": 2.61, + "learning_rate": 1.4783702700087101e-05, + "loss": 0.3108, + "step": 26950 + }, + { + "epoch": 2.61, + "learning_rate": 1.4781767153779156e-05, + "loss": 0.1816, + "step": 26960 + }, + { + "epoch": 2.61, + "learning_rate": 1.4779831607471208e-05, + "loss": 0.2616, + "step": 26970 + }, + { + "epoch": 2.61, + "learning_rate": 1.4777896061163263e-05, + "loss": 0.1886, + "step": 26980 + }, + { + "epoch": 2.61, + "learning_rate": 1.4775960514855319e-05, + "loss": 0.1593, + "step": 26990 + }, + { + "epoch": 2.61, + "learning_rate": 1.4774024968547374e-05, + "loss": 0.3204, + "step": 27000 + }, + { + "epoch": 2.61, + "learning_rate": 1.4772089422239428e-05, + "loss": 0.1555, + "step": 27010 + }, + { + "epoch": 2.61, + "learning_rate": 1.4770153875931483e-05, + "loss": 0.2386, + "step": 27020 + }, + { + "epoch": 2.62, + "learning_rate": 1.4768218329623538e-05, + "loss": 0.2418, + "step": 27030 + }, + { + "epoch": 2.62, + "learning_rate": 1.4766282783315594e-05, + "loss": 0.2748, + "step": 27040 + }, + { + "epoch": 2.62, + "learning_rate": 1.4764347237007646e-05, + "loss": 0.2214, + "step": 27050 + }, + { + "epoch": 2.62, + "learning_rate": 1.4762411690699701e-05, + "loss": 0.3179, + "step": 27060 + }, + { + "epoch": 2.62, + "learning_rate": 1.4760476144391755e-05, + "loss": 0.2726, + "step": 27070 + }, + { + "epoch": 2.62, + "learning_rate": 1.475854059808381e-05, + "loss": 0.2973, + "step": 27080 + }, + { + "epoch": 2.62, + "learning_rate": 1.4756605051775865e-05, + "loss": 0.1487, + "step": 27090 + }, + { + "epoch": 2.62, + "learning_rate": 1.475466950546792e-05, + "loss": 0.2203, + "step": 27100 + }, + { + "epoch": 2.62, + "learning_rate": 1.4752733959159974e-05, + "loss": 0.2318, + "step": 27110 + }, + { + "epoch": 2.62, + "learning_rate": 1.4750798412852028e-05, + "loss": 0.2669, + "step": 27120 + }, + { + "epoch": 2.63, + "learning_rate": 1.4748862866544083e-05, + "loss": 0.3329, + "step": 27130 + }, + { + "epoch": 2.63, + "learning_rate": 1.4746927320236137e-05, + "loss": 0.1712, + "step": 27140 + }, + { + "epoch": 2.63, + "learning_rate": 1.4744991773928192e-05, + "loss": 0.2331, + "step": 27150 + }, + { + "epoch": 2.63, + "learning_rate": 1.4743056227620248e-05, + "loss": 0.1854, + "step": 27160 + }, + { + "epoch": 2.63, + "learning_rate": 1.4741120681312301e-05, + "loss": 0.2261, + "step": 27170 + }, + { + "epoch": 2.63, + "learning_rate": 1.4739185135004357e-05, + "loss": 0.4182, + "step": 27180 + }, + { + "epoch": 2.63, + "learning_rate": 1.4737249588696412e-05, + "loss": 0.2584, + "step": 27190 + }, + { + "epoch": 2.63, + "learning_rate": 1.4735314042388464e-05, + "loss": 0.2698, + "step": 27200 + }, + { + "epoch": 2.63, + "learning_rate": 1.4733378496080519e-05, + "loss": 0.343, + "step": 27210 + }, + { + "epoch": 2.63, + "learning_rate": 1.4731442949772574e-05, + "loss": 0.1668, + "step": 27220 + }, + { + "epoch": 2.64, + "learning_rate": 1.472950740346463e-05, + "loss": 0.173, + "step": 27230 + }, + { + "epoch": 2.64, + "learning_rate": 1.4727571857156683e-05, + "loss": 0.185, + "step": 27240 + }, + { + "epoch": 2.64, + "learning_rate": 1.4725636310848739e-05, + "loss": 0.4081, + "step": 27250 + }, + { + "epoch": 2.64, + "learning_rate": 1.4723700764540794e-05, + "loss": 0.2937, + "step": 27260 + }, + { + "epoch": 2.64, + "learning_rate": 1.4721765218232846e-05, + "loss": 0.3289, + "step": 27270 + }, + { + "epoch": 2.64, + "learning_rate": 1.4719829671924901e-05, + "loss": 0.1939, + "step": 27280 + }, + { + "epoch": 2.64, + "learning_rate": 1.4717894125616957e-05, + "loss": 0.3251, + "step": 27290 + }, + { + "epoch": 2.64, + "learning_rate": 1.471595857930901e-05, + "loss": 0.1675, + "step": 27300 + }, + { + "epoch": 2.64, + "learning_rate": 1.4714023033001066e-05, + "loss": 0.2802, + "step": 27310 + }, + { + "epoch": 2.64, + "learning_rate": 1.4712087486693121e-05, + "loss": 0.2342, + "step": 27320 + }, + { + "epoch": 2.64, + "learning_rate": 1.4710151940385176e-05, + "loss": 0.2371, + "step": 27330 + }, + { + "epoch": 2.65, + "learning_rate": 1.470821639407723e-05, + "loss": 0.3819, + "step": 27340 + }, + { + "epoch": 2.65, + "learning_rate": 1.4706280847769284e-05, + "loss": 0.2079, + "step": 27350 + }, + { + "epoch": 2.65, + "learning_rate": 1.4704345301461337e-05, + "loss": 0.2505, + "step": 27360 + }, + { + "epoch": 2.65, + "learning_rate": 1.4702409755153393e-05, + "loss": 0.2532, + "step": 27370 + }, + { + "epoch": 2.65, + "learning_rate": 1.4700474208845448e-05, + "loss": 0.1377, + "step": 27380 + }, + { + "epoch": 2.65, + "learning_rate": 1.4698538662537503e-05, + "loss": 0.2639, + "step": 27390 + }, + { + "epoch": 2.65, + "learning_rate": 1.4696603116229557e-05, + "loss": 0.3481, + "step": 27400 + }, + { + "epoch": 2.65, + "learning_rate": 1.4694667569921612e-05, + "loss": 0.2866, + "step": 27410 + }, + { + "epoch": 2.65, + "learning_rate": 1.4692732023613668e-05, + "loss": 0.2265, + "step": 27420 + }, + { + "epoch": 2.65, + "learning_rate": 1.469079647730572e-05, + "loss": 0.2888, + "step": 27430 + }, + { + "epoch": 2.66, + "learning_rate": 1.4688860930997775e-05, + "loss": 0.274, + "step": 27440 + }, + { + "epoch": 2.66, + "learning_rate": 1.468692538468983e-05, + "loss": 0.4298, + "step": 27450 + }, + { + "epoch": 2.66, + "learning_rate": 1.4684989838381884e-05, + "loss": 0.2925, + "step": 27460 + }, + { + "epoch": 2.66, + "learning_rate": 1.4683054292073939e-05, + "loss": 0.1869, + "step": 27470 + }, + { + "epoch": 2.66, + "learning_rate": 1.4681118745765994e-05, + "loss": 0.1973, + "step": 27480 + }, + { + "epoch": 2.66, + "learning_rate": 1.467918319945805e-05, + "loss": 0.3509, + "step": 27490 + }, + { + "epoch": 2.66, + "learning_rate": 1.4677247653150102e-05, + "loss": 0.2807, + "step": 27500 + }, + { + "epoch": 2.66, + "learning_rate": 1.4675312106842157e-05, + "loss": 0.1575, + "step": 27510 + }, + { + "epoch": 2.66, + "learning_rate": 1.4673376560534212e-05, + "loss": 0.2494, + "step": 27520 + }, + { + "epoch": 2.66, + "learning_rate": 1.4671441014226266e-05, + "loss": 0.4497, + "step": 27530 + }, + { + "epoch": 2.67, + "learning_rate": 1.4669505467918321e-05, + "loss": 0.194, + "step": 27540 + }, + { + "epoch": 2.67, + "learning_rate": 1.4667569921610377e-05, + "loss": 0.2501, + "step": 27550 + }, + { + "epoch": 2.67, + "learning_rate": 1.466563437530243e-05, + "loss": 0.3324, + "step": 27560 + }, + { + "epoch": 2.67, + "learning_rate": 1.4663698828994486e-05, + "loss": 0.3145, + "step": 27570 + }, + { + "epoch": 2.67, + "learning_rate": 1.466176328268654e-05, + "loss": 0.1386, + "step": 27580 + }, + { + "epoch": 2.67, + "learning_rate": 1.4659827736378593e-05, + "loss": 0.3177, + "step": 27590 + }, + { + "epoch": 2.67, + "learning_rate": 1.4657892190070648e-05, + "loss": 0.3405, + "step": 27600 + }, + { + "epoch": 2.67, + "learning_rate": 1.4655956643762704e-05, + "loss": 0.145, + "step": 27610 + }, + { + "epoch": 2.67, + "learning_rate": 1.4654021097454759e-05, + "loss": 0.1704, + "step": 27620 + }, + { + "epoch": 2.67, + "learning_rate": 1.4652085551146812e-05, + "loss": 0.3639, + "step": 27630 + }, + { + "epoch": 2.67, + "learning_rate": 1.4650150004838868e-05, + "loss": 0.2982, + "step": 27640 + }, + { + "epoch": 2.68, + "learning_rate": 1.464821445853092e-05, + "loss": 0.155, + "step": 27650 + }, + { + "epoch": 2.68, + "learning_rate": 1.4646278912222975e-05, + "loss": 0.1992, + "step": 27660 + }, + { + "epoch": 2.68, + "learning_rate": 1.464434336591503e-05, + "loss": 0.3348, + "step": 27670 + }, + { + "epoch": 2.68, + "learning_rate": 1.4642407819607086e-05, + "loss": 0.355, + "step": 27680 + }, + { + "epoch": 2.68, + "learning_rate": 1.464047227329914e-05, + "loss": 0.1607, + "step": 27690 + }, + { + "epoch": 2.68, + "learning_rate": 1.4638536726991195e-05, + "loss": 0.3764, + "step": 27700 + }, + { + "epoch": 2.68, + "learning_rate": 1.463660118068325e-05, + "loss": 0.2203, + "step": 27710 + }, + { + "epoch": 2.68, + "learning_rate": 1.4634665634375305e-05, + "loss": 0.3231, + "step": 27720 + }, + { + "epoch": 2.68, + "learning_rate": 1.4632730088067357e-05, + "loss": 0.3265, + "step": 27730 + }, + { + "epoch": 2.68, + "learning_rate": 1.4630794541759413e-05, + "loss": 0.2545, + "step": 27740 + }, + { + "epoch": 2.69, + "learning_rate": 1.4628858995451466e-05, + "loss": 0.2414, + "step": 27750 + }, + { + "epoch": 2.69, + "learning_rate": 1.4626923449143522e-05, + "loss": 0.1135, + "step": 27760 + }, + { + "epoch": 2.69, + "learning_rate": 1.4624987902835577e-05, + "loss": 0.3786, + "step": 27770 + }, + { + "epoch": 2.69, + "learning_rate": 1.4623052356527632e-05, + "loss": 0.1919, + "step": 27780 + }, + { + "epoch": 2.69, + "learning_rate": 1.4621116810219686e-05, + "loss": 0.274, + "step": 27790 + }, + { + "epoch": 2.69, + "learning_rate": 1.461918126391174e-05, + "loss": 0.2828, + "step": 27800 + }, + { + "epoch": 2.69, + "learning_rate": 1.4617245717603795e-05, + "loss": 0.1811, + "step": 27810 + }, + { + "epoch": 2.69, + "learning_rate": 1.4615310171295849e-05, + "loss": 0.2046, + "step": 27820 + }, + { + "epoch": 2.69, + "learning_rate": 1.4613374624987904e-05, + "loss": 0.3058, + "step": 27830 + }, + { + "epoch": 2.69, + "learning_rate": 1.461143907867996e-05, + "loss": 0.25, + "step": 27840 + }, + { + "epoch": 2.7, + "learning_rate": 1.4609503532372013e-05, + "loss": 0.2109, + "step": 27850 + }, + { + "epoch": 2.7, + "learning_rate": 1.4607567986064068e-05, + "loss": 0.2716, + "step": 27860 + }, + { + "epoch": 2.7, + "learning_rate": 1.4605632439756123e-05, + "loss": 0.2419, + "step": 27870 + }, + { + "epoch": 2.7, + "learning_rate": 1.4603696893448175e-05, + "loss": 0.204, + "step": 27880 + }, + { + "epoch": 2.7, + "learning_rate": 1.460176134714023e-05, + "loss": 0.2511, + "step": 27890 + }, + { + "epoch": 2.7, + "learning_rate": 1.4599825800832286e-05, + "loss": 0.2218, + "step": 27900 + }, + { + "epoch": 2.7, + "learning_rate": 1.4597890254524341e-05, + "loss": 0.2769, + "step": 27910 + }, + { + "epoch": 2.7, + "learning_rate": 1.4595954708216395e-05, + "loss": 0.2541, + "step": 27920 + }, + { + "epoch": 2.7, + "learning_rate": 1.459401916190845e-05, + "loss": 0.2544, + "step": 27930 + }, + { + "epoch": 2.7, + "learning_rate": 1.4592083615600506e-05, + "loss": 0.3541, + "step": 27940 + }, + { + "epoch": 2.7, + "learning_rate": 1.459014806929256e-05, + "loss": 0.2134, + "step": 27950 + }, + { + "epoch": 2.71, + "learning_rate": 1.4588212522984613e-05, + "loss": 0.2875, + "step": 27960 + }, + { + "epoch": 2.71, + "learning_rate": 1.4586276976676668e-05, + "loss": 0.3241, + "step": 27970 + }, + { + "epoch": 2.71, + "learning_rate": 1.4584341430368722e-05, + "loss": 0.1871, + "step": 27980 + }, + { + "epoch": 2.71, + "learning_rate": 1.4582405884060777e-05, + "loss": 0.3527, + "step": 27990 + }, + { + "epoch": 2.71, + "learning_rate": 1.4580470337752833e-05, + "loss": 0.4144, + "step": 28000 + }, + { + "epoch": 2.71, + "learning_rate": 1.4578534791444888e-05, + "loss": 0.2121, + "step": 28010 + }, + { + "epoch": 2.71, + "learning_rate": 1.4576599245136942e-05, + "loss": 0.3028, + "step": 28020 + }, + { + "epoch": 2.71, + "learning_rate": 1.4574663698828995e-05, + "loss": 0.2307, + "step": 28030 + }, + { + "epoch": 2.71, + "learning_rate": 1.4572728152521049e-05, + "loss": 0.3901, + "step": 28040 + }, + { + "epoch": 2.71, + "learning_rate": 1.4570792606213104e-05, + "loss": 0.2299, + "step": 28050 + }, + { + "epoch": 2.72, + "learning_rate": 1.456885705990516e-05, + "loss": 0.2552, + "step": 28060 + }, + { + "epoch": 2.72, + "learning_rate": 1.4566921513597215e-05, + "loss": 0.1482, + "step": 28070 + }, + { + "epoch": 2.72, + "learning_rate": 1.4564985967289268e-05, + "loss": 0.252, + "step": 28080 + }, + { + "epoch": 2.72, + "learning_rate": 1.4563050420981324e-05, + "loss": 0.2199, + "step": 28090 + }, + { + "epoch": 2.72, + "learning_rate": 1.4561114874673379e-05, + "loss": 0.1231, + "step": 28100 + }, + { + "epoch": 2.72, + "learning_rate": 1.4559179328365431e-05, + "loss": 0.4297, + "step": 28110 + }, + { + "epoch": 2.72, + "learning_rate": 1.4557243782057486e-05, + "loss": 0.2647, + "step": 28120 + }, + { + "epoch": 2.72, + "learning_rate": 1.4555308235749542e-05, + "loss": 0.2182, + "step": 28130 + }, + { + "epoch": 2.72, + "learning_rate": 1.4553372689441595e-05, + "loss": 0.3366, + "step": 28140 + }, + { + "epoch": 2.72, + "learning_rate": 1.455143714313365e-05, + "loss": 0.2902, + "step": 28150 + }, + { + "epoch": 2.73, + "learning_rate": 1.4549501596825706e-05, + "loss": 0.3628, + "step": 28160 + }, + { + "epoch": 2.73, + "learning_rate": 1.4547566050517761e-05, + "loss": 0.1948, + "step": 28170 + }, + { + "epoch": 2.73, + "learning_rate": 1.4545630504209813e-05, + "loss": 0.1214, + "step": 28180 + }, + { + "epoch": 2.73, + "learning_rate": 1.4543694957901869e-05, + "loss": 0.2589, + "step": 28190 + }, + { + "epoch": 2.73, + "learning_rate": 1.4541759411593924e-05, + "loss": 0.2155, + "step": 28200 + }, + { + "epoch": 2.73, + "learning_rate": 1.4539823865285978e-05, + "loss": 0.2672, + "step": 28210 + }, + { + "epoch": 2.73, + "learning_rate": 1.4537888318978033e-05, + "loss": 0.2225, + "step": 28220 + }, + { + "epoch": 2.73, + "learning_rate": 1.4535952772670088e-05, + "loss": 0.2453, + "step": 28230 + }, + { + "epoch": 2.73, + "learning_rate": 1.4534017226362142e-05, + "loss": 0.5203, + "step": 28240 + }, + { + "epoch": 2.73, + "learning_rate": 1.4532081680054197e-05, + "loss": 0.2529, + "step": 28250 + }, + { + "epoch": 2.73, + "learning_rate": 1.453014613374625e-05, + "loss": 0.1855, + "step": 28260 + }, + { + "epoch": 2.74, + "learning_rate": 1.4528210587438304e-05, + "loss": 0.1696, + "step": 28270 + }, + { + "epoch": 2.74, + "learning_rate": 1.452627504113036e-05, + "loss": 0.2074, + "step": 28280 + }, + { + "epoch": 2.74, + "learning_rate": 1.4524339494822415e-05, + "loss": 0.1351, + "step": 28290 + }, + { + "epoch": 2.74, + "learning_rate": 1.452240394851447e-05, + "loss": 0.296, + "step": 28300 + }, + { + "epoch": 2.74, + "learning_rate": 1.4520468402206524e-05, + "loss": 0.3107, + "step": 28310 + }, + { + "epoch": 2.74, + "learning_rate": 1.451853285589858e-05, + "loss": 0.2044, + "step": 28320 + }, + { + "epoch": 2.74, + "learning_rate": 1.4516597309590631e-05, + "loss": 0.327, + "step": 28330 + }, + { + "epoch": 2.74, + "learning_rate": 1.4514661763282687e-05, + "loss": 0.2884, + "step": 28340 + }, + { + "epoch": 2.74, + "learning_rate": 1.4512726216974742e-05, + "loss": 0.2972, + "step": 28350 + }, + { + "epoch": 2.74, + "learning_rate": 1.4510790670666797e-05, + "loss": 0.1876, + "step": 28360 + }, + { + "epoch": 2.75, + "learning_rate": 1.4508855124358851e-05, + "loss": 0.2492, + "step": 28370 + }, + { + "epoch": 2.75, + "learning_rate": 1.4506919578050906e-05, + "loss": 0.3499, + "step": 28380 + }, + { + "epoch": 2.75, + "learning_rate": 1.4504984031742962e-05, + "loss": 0.2805, + "step": 28390 + }, + { + "epoch": 2.75, + "learning_rate": 1.4503048485435017e-05, + "loss": 0.2175, + "step": 28400 + }, + { + "epoch": 2.75, + "learning_rate": 1.4501112939127069e-05, + "loss": 0.2098, + "step": 28410 + }, + { + "epoch": 2.75, + "learning_rate": 1.4499177392819124e-05, + "loss": 0.2833, + "step": 28420 + }, + { + "epoch": 2.75, + "learning_rate": 1.4497241846511178e-05, + "loss": 0.3221, + "step": 28430 + }, + { + "epoch": 2.75, + "learning_rate": 1.4495306300203233e-05, + "loss": 0.2808, + "step": 28440 + }, + { + "epoch": 2.75, + "learning_rate": 1.4493370753895289e-05, + "loss": 0.219, + "step": 28450 + }, + { + "epoch": 2.75, + "learning_rate": 1.4491435207587344e-05, + "loss": 0.2589, + "step": 28460 + }, + { + "epoch": 2.76, + "learning_rate": 1.4489499661279397e-05, + "loss": 0.3433, + "step": 28470 + }, + { + "epoch": 2.76, + "learning_rate": 1.4487564114971453e-05, + "loss": 0.2038, + "step": 28480 + }, + { + "epoch": 2.76, + "learning_rate": 1.4485628568663506e-05, + "loss": 0.1694, + "step": 28490 + }, + { + "epoch": 2.76, + "learning_rate": 1.448369302235556e-05, + "loss": 0.2074, + "step": 28500 + }, + { + "epoch": 2.76, + "learning_rate": 1.4481757476047615e-05, + "loss": 0.1701, + "step": 28510 + }, + { + "epoch": 2.76, + "learning_rate": 1.447982192973967e-05, + "loss": 0.3573, + "step": 28520 + }, + { + "epoch": 2.76, + "learning_rate": 1.4477886383431724e-05, + "loss": 0.1362, + "step": 28530 + }, + { + "epoch": 2.76, + "learning_rate": 1.447595083712378e-05, + "loss": 0.2973, + "step": 28540 + }, + { + "epoch": 2.76, + "learning_rate": 1.4474015290815835e-05, + "loss": 0.2874, + "step": 28550 + }, + { + "epoch": 2.76, + "learning_rate": 1.4472079744507887e-05, + "loss": 0.184, + "step": 28560 + }, + { + "epoch": 2.76, + "learning_rate": 1.4470144198199942e-05, + "loss": 0.2943, + "step": 28570 + }, + { + "epoch": 2.77, + "learning_rate": 1.4468208651891998e-05, + "loss": 0.1801, + "step": 28580 + }, + { + "epoch": 2.77, + "learning_rate": 1.4466273105584053e-05, + "loss": 0.2355, + "step": 28590 + }, + { + "epoch": 2.77, + "learning_rate": 1.4464337559276107e-05, + "loss": 0.1263, + "step": 28600 + }, + { + "epoch": 2.77, + "learning_rate": 1.4462402012968162e-05, + "loss": 0.3104, + "step": 28610 + }, + { + "epoch": 2.77, + "learning_rate": 1.4460466466660217e-05, + "loss": 0.2239, + "step": 28620 + }, + { + "epoch": 2.77, + "learning_rate": 1.4458530920352271e-05, + "loss": 0.2605, + "step": 28630 + }, + { + "epoch": 2.77, + "learning_rate": 1.4456595374044325e-05, + "loss": 0.3171, + "step": 28640 + }, + { + "epoch": 2.77, + "learning_rate": 1.445465982773638e-05, + "loss": 0.3009, + "step": 28650 + }, + { + "epoch": 2.77, + "learning_rate": 1.4452724281428434e-05, + "loss": 0.1776, + "step": 28660 + }, + { + "epoch": 2.77, + "learning_rate": 1.4450788735120489e-05, + "loss": 0.3159, + "step": 28670 + }, + { + "epoch": 2.78, + "learning_rate": 1.4448853188812544e-05, + "loss": 0.2257, + "step": 28680 + }, + { + "epoch": 2.78, + "learning_rate": 1.44469176425046e-05, + "loss": 0.2769, + "step": 28690 + }, + { + "epoch": 2.78, + "learning_rate": 1.4444982096196653e-05, + "loss": 0.2937, + "step": 28700 + }, + { + "epoch": 2.78, + "learning_rate": 1.4443046549888707e-05, + "loss": 0.298, + "step": 28710 + }, + { + "epoch": 2.78, + "learning_rate": 1.444111100358076e-05, + "loss": 0.2963, + "step": 28720 + }, + { + "epoch": 2.78, + "learning_rate": 1.4439175457272816e-05, + "loss": 0.3083, + "step": 28730 + }, + { + "epoch": 2.78, + "learning_rate": 1.4437239910964871e-05, + "loss": 0.2638, + "step": 28740 + }, + { + "epoch": 2.78, + "learning_rate": 1.4435304364656926e-05, + "loss": 0.3045, + "step": 28750 + }, + { + "epoch": 2.78, + "learning_rate": 1.443336881834898e-05, + "loss": 0.2259, + "step": 28760 + }, + { + "epoch": 2.78, + "learning_rate": 1.4431433272041035e-05, + "loss": 0.1571, + "step": 28770 + }, + { + "epoch": 2.79, + "learning_rate": 1.442949772573309e-05, + "loss": 0.3628, + "step": 28780 + }, + { + "epoch": 2.79, + "learning_rate": 1.4427562179425143e-05, + "loss": 0.1869, + "step": 28790 + }, + { + "epoch": 2.79, + "learning_rate": 1.4425626633117198e-05, + "loss": 0.1748, + "step": 28800 + }, + { + "epoch": 2.79, + "learning_rate": 1.4423691086809253e-05, + "loss": 0.3537, + "step": 28810 + }, + { + "epoch": 2.79, + "learning_rate": 1.4421755540501307e-05, + "loss": 0.4392, + "step": 28820 + }, + { + "epoch": 2.79, + "learning_rate": 1.4419819994193362e-05, + "loss": 0.3535, + "step": 28830 + }, + { + "epoch": 2.79, + "learning_rate": 1.4417884447885418e-05, + "loss": 0.2025, + "step": 28840 + }, + { + "epoch": 2.79, + "learning_rate": 1.4415948901577473e-05, + "loss": 0.2439, + "step": 28850 + }, + { + "epoch": 2.79, + "learning_rate": 1.4414013355269525e-05, + "loss": 0.3039, + "step": 28860 + }, + { + "epoch": 2.79, + "learning_rate": 1.441207780896158e-05, + "loss": 0.2688, + "step": 28870 + }, + { + "epoch": 2.79, + "learning_rate": 1.4410142262653636e-05, + "loss": 0.3481, + "step": 28880 + }, + { + "epoch": 2.8, + "learning_rate": 1.440820671634569e-05, + "loss": 0.2812, + "step": 28890 + }, + { + "epoch": 2.8, + "learning_rate": 1.4406271170037744e-05, + "loss": 0.1184, + "step": 28900 + }, + { + "epoch": 2.8, + "learning_rate": 1.44043356237298e-05, + "loss": 0.2074, + "step": 28910 + }, + { + "epoch": 2.8, + "learning_rate": 1.4402400077421853e-05, + "loss": 0.1262, + "step": 28920 + }, + { + "epoch": 2.8, + "learning_rate": 1.4400464531113909e-05, + "loss": 0.2113, + "step": 28930 + }, + { + "epoch": 2.8, + "learning_rate": 1.4398528984805962e-05, + "loss": 0.1201, + "step": 28940 + }, + { + "epoch": 2.8, + "learning_rate": 1.4396593438498016e-05, + "loss": 0.2824, + "step": 28950 + }, + { + "epoch": 2.8, + "learning_rate": 1.4394657892190071e-05, + "loss": 0.321, + "step": 28960 + }, + { + "epoch": 2.8, + "learning_rate": 1.4392722345882127e-05, + "loss": 0.4536, + "step": 28970 + }, + { + "epoch": 2.8, + "learning_rate": 1.4390786799574182e-05, + "loss": 0.2425, + "step": 28980 + }, + { + "epoch": 2.81, + "learning_rate": 1.4388851253266236e-05, + "loss": 0.1847, + "step": 28990 + }, + { + "epoch": 2.81, + "learning_rate": 1.4386915706958291e-05, + "loss": 0.1989, + "step": 29000 + }, + { + "epoch": 2.81, + "learning_rate": 1.4384980160650343e-05, + "loss": 0.2785, + "step": 29010 + }, + { + "epoch": 2.81, + "learning_rate": 1.4383044614342398e-05, + "loss": 0.3678, + "step": 29020 + }, + { + "epoch": 2.81, + "learning_rate": 1.4381109068034454e-05, + "loss": 0.0756, + "step": 29030 + }, + { + "epoch": 2.81, + "learning_rate": 1.4379173521726509e-05, + "loss": 0.3214, + "step": 29040 + }, + { + "epoch": 2.81, + "learning_rate": 1.4377237975418563e-05, + "loss": 0.2607, + "step": 29050 + }, + { + "epoch": 2.81, + "learning_rate": 1.4375302429110618e-05, + "loss": 0.3181, + "step": 29060 + }, + { + "epoch": 2.81, + "learning_rate": 1.4373366882802673e-05, + "loss": 0.2503, + "step": 29070 + }, + { + "epoch": 2.81, + "learning_rate": 1.4371431336494729e-05, + "loss": 0.2693, + "step": 29080 + }, + { + "epoch": 2.82, + "learning_rate": 1.436949579018678e-05, + "loss": 0.3115, + "step": 29090 + }, + { + "epoch": 2.82, + "learning_rate": 1.4367560243878836e-05, + "loss": 0.2133, + "step": 29100 + }, + { + "epoch": 2.82, + "learning_rate": 1.436562469757089e-05, + "loss": 0.1673, + "step": 29110 + }, + { + "epoch": 2.82, + "learning_rate": 1.4363689151262945e-05, + "loss": 0.3611, + "step": 29120 + }, + { + "epoch": 2.82, + "learning_rate": 1.4361753604955e-05, + "loss": 0.2956, + "step": 29130 + }, + { + "epoch": 2.82, + "learning_rate": 1.4359818058647055e-05, + "loss": 0.3251, + "step": 29140 + }, + { + "epoch": 2.82, + "learning_rate": 1.4357882512339109e-05, + "loss": 0.3151, + "step": 29150 + }, + { + "epoch": 2.82, + "learning_rate": 1.4355946966031164e-05, + "loss": 0.2919, + "step": 29160 + }, + { + "epoch": 2.82, + "learning_rate": 1.4354011419723218e-05, + "loss": 0.216, + "step": 29170 + }, + { + "epoch": 2.82, + "learning_rate": 1.4352075873415272e-05, + "loss": 0.3654, + "step": 29180 + }, + { + "epoch": 2.82, + "learning_rate": 1.4350140327107327e-05, + "loss": 0.2067, + "step": 29190 + }, + { + "epoch": 2.83, + "learning_rate": 1.4348204780799382e-05, + "loss": 0.2331, + "step": 29200 + }, + { + "epoch": 2.83, + "learning_rate": 1.4346269234491436e-05, + "loss": 0.2959, + "step": 29210 + }, + { + "epoch": 2.83, + "learning_rate": 1.4344333688183491e-05, + "loss": 0.2683, + "step": 29220 + }, + { + "epoch": 2.83, + "learning_rate": 1.4342398141875547e-05, + "loss": 0.0799, + "step": 29230 + }, + { + "epoch": 2.83, + "learning_rate": 1.4340462595567599e-05, + "loss": 0.2815, + "step": 29240 + }, + { + "epoch": 2.83, + "learning_rate": 1.4338527049259654e-05, + "loss": 0.486, + "step": 29250 + }, + { + "epoch": 2.83, + "learning_rate": 1.433659150295171e-05, + "loss": 0.2631, + "step": 29260 + }, + { + "epoch": 2.83, + "learning_rate": 1.4334655956643765e-05, + "loss": 0.2519, + "step": 29270 + }, + { + "epoch": 2.83, + "learning_rate": 1.4332720410335818e-05, + "loss": 0.1783, + "step": 29280 + }, + { + "epoch": 2.83, + "learning_rate": 1.4330784864027874e-05, + "loss": 0.3135, + "step": 29290 + }, + { + "epoch": 2.84, + "learning_rate": 1.4328849317719929e-05, + "loss": 0.2005, + "step": 29300 + }, + { + "epoch": 2.84, + "learning_rate": 1.4326913771411983e-05, + "loss": 0.2708, + "step": 29310 + }, + { + "epoch": 2.84, + "learning_rate": 1.4324978225104036e-05, + "loss": 0.1997, + "step": 29320 + }, + { + "epoch": 2.84, + "learning_rate": 1.4323042678796091e-05, + "loss": 0.3888, + "step": 29330 + }, + { + "epoch": 2.84, + "learning_rate": 1.4321107132488145e-05, + "loss": 0.2877, + "step": 29340 + }, + { + "epoch": 2.84, + "learning_rate": 1.43191715861802e-05, + "loss": 0.1878, + "step": 29350 + }, + { + "epoch": 2.84, + "learning_rate": 1.4317236039872256e-05, + "loss": 0.2176, + "step": 29360 + }, + { + "epoch": 2.84, + "learning_rate": 1.4315300493564311e-05, + "loss": 0.2757, + "step": 29370 + }, + { + "epoch": 2.84, + "learning_rate": 1.4313364947256365e-05, + "loss": 0.1714, + "step": 29380 + }, + { + "epoch": 2.84, + "learning_rate": 1.4311429400948418e-05, + "loss": 0.1498, + "step": 29390 + }, + { + "epoch": 2.85, + "learning_rate": 1.4309493854640472e-05, + "loss": 0.2338, + "step": 29400 + }, + { + "epoch": 2.85, + "learning_rate": 1.4307558308332527e-05, + "loss": 0.2898, + "step": 29410 + }, + { + "epoch": 2.85, + "learning_rate": 1.4305622762024583e-05, + "loss": 0.2384, + "step": 29420 + }, + { + "epoch": 2.85, + "learning_rate": 1.4303687215716638e-05, + "loss": 0.3367, + "step": 29430 + }, + { + "epoch": 2.85, + "learning_rate": 1.4301751669408692e-05, + "loss": 0.2036, + "step": 29440 + }, + { + "epoch": 2.85, + "learning_rate": 1.4299816123100747e-05, + "loss": 0.2871, + "step": 29450 + }, + { + "epoch": 2.85, + "learning_rate": 1.4297880576792802e-05, + "loss": 0.2601, + "step": 29460 + }, + { + "epoch": 2.85, + "learning_rate": 1.4295945030484854e-05, + "loss": 0.2395, + "step": 29470 + }, + { + "epoch": 2.85, + "learning_rate": 1.429400948417691e-05, + "loss": 0.3363, + "step": 29480 + }, + { + "epoch": 2.85, + "learning_rate": 1.4292073937868965e-05, + "loss": 0.1838, + "step": 29490 + }, + { + "epoch": 2.85, + "learning_rate": 1.4290138391561019e-05, + "loss": 0.2444, + "step": 29500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4288202845253074e-05, + "loss": 0.3472, + "step": 29510 + }, + { + "epoch": 2.86, + "learning_rate": 1.428626729894513e-05, + "loss": 0.259, + "step": 29520 + }, + { + "epoch": 2.86, + "learning_rate": 1.4284331752637185e-05, + "loss": 0.281, + "step": 29530 + }, + { + "epoch": 2.86, + "learning_rate": 1.4282396206329236e-05, + "loss": 0.2478, + "step": 29540 + }, + { + "epoch": 2.86, + "learning_rate": 1.4280460660021292e-05, + "loss": 0.1996, + "step": 29550 + }, + { + "epoch": 2.86, + "learning_rate": 1.4278525113713345e-05, + "loss": 0.2458, + "step": 29560 + }, + { + "epoch": 2.86, + "learning_rate": 1.42765895674054e-05, + "loss": 0.2276, + "step": 29570 + }, + { + "epoch": 2.86, + "learning_rate": 1.4274654021097456e-05, + "loss": 0.2117, + "step": 29580 + }, + { + "epoch": 2.86, + "learning_rate": 1.4272718474789511e-05, + "loss": 0.3127, + "step": 29590 + }, + { + "epoch": 2.86, + "learning_rate": 1.4270782928481565e-05, + "loss": 0.2574, + "step": 29600 + }, + { + "epoch": 2.87, + "learning_rate": 1.426884738217362e-05, + "loss": 0.2272, + "step": 29610 + }, + { + "epoch": 2.87, + "learning_rate": 1.4266911835865674e-05, + "loss": 0.2762, + "step": 29620 + }, + { + "epoch": 2.87, + "learning_rate": 1.4264976289557728e-05, + "loss": 0.2168, + "step": 29630 + }, + { + "epoch": 2.87, + "learning_rate": 1.4263040743249783e-05, + "loss": 0.1974, + "step": 29640 + }, + { + "epoch": 2.87, + "learning_rate": 1.4261105196941838e-05, + "loss": 0.3304, + "step": 29650 + }, + { + "epoch": 2.87, + "learning_rate": 1.4259169650633894e-05, + "loss": 0.2852, + "step": 29660 + }, + { + "epoch": 2.87, + "learning_rate": 1.4257234104325947e-05, + "loss": 0.4177, + "step": 29670 + }, + { + "epoch": 2.87, + "learning_rate": 1.4255298558018003e-05, + "loss": 0.2137, + "step": 29680 + }, + { + "epoch": 2.87, + "learning_rate": 1.4253363011710058e-05, + "loss": 0.2305, + "step": 29690 + }, + { + "epoch": 2.87, + "learning_rate": 1.425142746540211e-05, + "loss": 0.3274, + "step": 29700 + }, + { + "epoch": 2.88, + "learning_rate": 1.4249491919094165e-05, + "loss": 0.2512, + "step": 29710 + }, + { + "epoch": 2.88, + "learning_rate": 1.424755637278622e-05, + "loss": 0.296, + "step": 29720 + }, + { + "epoch": 2.88, + "learning_rate": 1.4245620826478274e-05, + "loss": 0.3207, + "step": 29730 + }, + { + "epoch": 2.88, + "learning_rate": 1.424368528017033e-05, + "loss": 0.2745, + "step": 29740 + }, + { + "epoch": 2.88, + "learning_rate": 1.4241749733862385e-05, + "loss": 0.2397, + "step": 29750 + }, + { + "epoch": 2.88, + "learning_rate": 1.423981418755444e-05, + "loss": 0.2124, + "step": 29760 + }, + { + "epoch": 2.88, + "learning_rate": 1.4237878641246492e-05, + "loss": 0.2063, + "step": 29770 + }, + { + "epoch": 2.88, + "learning_rate": 1.4235943094938547e-05, + "loss": 0.2017, + "step": 29780 + }, + { + "epoch": 2.88, + "learning_rate": 1.4234007548630601e-05, + "loss": 0.3527, + "step": 29790 + }, + { + "epoch": 2.88, + "learning_rate": 1.4232072002322656e-05, + "loss": 0.2827, + "step": 29800 + }, + { + "epoch": 2.88, + "learning_rate": 1.4230136456014712e-05, + "loss": 0.2066, + "step": 29810 + }, + { + "epoch": 2.89, + "learning_rate": 1.4228200909706767e-05, + "loss": 0.2824, + "step": 29820 + }, + { + "epoch": 2.89, + "learning_rate": 1.422626536339882e-05, + "loss": 0.1978, + "step": 29830 + }, + { + "epoch": 2.89, + "learning_rate": 1.4224329817090876e-05, + "loss": 0.3146, + "step": 29840 + }, + { + "epoch": 2.89, + "learning_rate": 1.4222394270782928e-05, + "loss": 0.2401, + "step": 29850 + }, + { + "epoch": 2.89, + "learning_rate": 1.4220458724474983e-05, + "loss": 0.2976, + "step": 29860 + }, + { + "epoch": 2.89, + "learning_rate": 1.4218523178167039e-05, + "loss": 0.2992, + "step": 29870 + }, + { + "epoch": 2.89, + "learning_rate": 1.4216587631859094e-05, + "loss": 0.2115, + "step": 29880 + }, + { + "epoch": 2.89, + "learning_rate": 1.4214652085551148e-05, + "loss": 0.3979, + "step": 29890 + }, + { + "epoch": 2.89, + "learning_rate": 1.4212716539243203e-05, + "loss": 0.2222, + "step": 29900 + }, + { + "epoch": 2.89, + "learning_rate": 1.4210780992935258e-05, + "loss": 0.2731, + "step": 29910 + }, + { + "epoch": 2.9, + "learning_rate": 1.420884544662731e-05, + "loss": 0.2675, + "step": 29920 + }, + { + "epoch": 2.9, + "learning_rate": 1.4206909900319366e-05, + "loss": 0.2629, + "step": 29930 + }, + { + "epoch": 2.9, + "learning_rate": 1.420497435401142e-05, + "loss": 0.2326, + "step": 29940 + }, + { + "epoch": 2.9, + "learning_rate": 1.4203038807703474e-05, + "loss": 0.1312, + "step": 29950 + }, + { + "epoch": 2.9, + "learning_rate": 1.420110326139553e-05, + "loss": 0.1405, + "step": 29960 + }, + { + "epoch": 2.9, + "learning_rate": 1.4199167715087585e-05, + "loss": 0.213, + "step": 29970 + }, + { + "epoch": 2.9, + "learning_rate": 1.419723216877964e-05, + "loss": 0.2271, + "step": 29980 + }, + { + "epoch": 2.9, + "learning_rate": 1.4195296622471694e-05, + "loss": 0.3155, + "step": 29990 + }, + { + "epoch": 2.9, + "learning_rate": 1.4193361076163748e-05, + "loss": 0.3274, + "step": 30000 + }, + { + "epoch": 2.9, + "learning_rate": 1.4191425529855803e-05, + "loss": 0.2308, + "step": 30010 + }, + { + "epoch": 2.91, + "learning_rate": 1.4189489983547857e-05, + "loss": 0.2568, + "step": 30020 + }, + { + "epoch": 2.91, + "learning_rate": 1.4187554437239912e-05, + "loss": 0.2902, + "step": 30030 + }, + { + "epoch": 2.91, + "learning_rate": 1.4185618890931967e-05, + "loss": 0.2355, + "step": 30040 + }, + { + "epoch": 2.91, + "learning_rate": 1.4183683344624023e-05, + "loss": 0.2098, + "step": 30050 + }, + { + "epoch": 2.91, + "learning_rate": 1.4181747798316076e-05, + "loss": 0.2254, + "step": 30060 + }, + { + "epoch": 2.91, + "learning_rate": 1.417981225200813e-05, + "loss": 0.2631, + "step": 30070 + }, + { + "epoch": 2.91, + "learning_rate": 1.4177876705700184e-05, + "loss": 0.3234, + "step": 30080 + }, + { + "epoch": 2.91, + "learning_rate": 1.4175941159392239e-05, + "loss": 0.3809, + "step": 30090 + }, + { + "epoch": 2.91, + "learning_rate": 1.4174005613084294e-05, + "loss": 0.3574, + "step": 30100 + }, + { + "epoch": 2.91, + "learning_rate": 1.417207006677635e-05, + "loss": 0.2859, + "step": 30110 + }, + { + "epoch": 2.91, + "learning_rate": 1.4170134520468403e-05, + "loss": 0.1965, + "step": 30120 + }, + { + "epoch": 2.92, + "learning_rate": 1.4168198974160459e-05, + "loss": 0.242, + "step": 30130 + }, + { + "epoch": 2.92, + "learning_rate": 1.4166263427852514e-05, + "loss": 0.188, + "step": 30140 + }, + { + "epoch": 2.92, + "learning_rate": 1.4164327881544566e-05, + "loss": 0.3378, + "step": 30150 + }, + { + "epoch": 2.92, + "learning_rate": 1.4162392335236621e-05, + "loss": 0.1605, + "step": 30160 + }, + { + "epoch": 2.92, + "learning_rate": 1.4160456788928676e-05, + "loss": 0.2144, + "step": 30170 + }, + { + "epoch": 2.92, + "learning_rate": 1.415852124262073e-05, + "loss": 0.3108, + "step": 30180 + }, + { + "epoch": 2.92, + "learning_rate": 1.4156585696312785e-05, + "loss": 0.2123, + "step": 30190 + }, + { + "epoch": 2.92, + "learning_rate": 1.415465015000484e-05, + "loss": 0.4109, + "step": 30200 + }, + { + "epoch": 2.92, + "learning_rate": 1.4152714603696896e-05, + "loss": 0.2525, + "step": 30210 + }, + { + "epoch": 2.92, + "learning_rate": 1.4150779057388948e-05, + "loss": 0.2012, + "step": 30220 + }, + { + "epoch": 2.93, + "learning_rate": 1.4148843511081003e-05, + "loss": 0.2829, + "step": 30230 + }, + { + "epoch": 2.93, + "learning_rate": 1.4146907964773057e-05, + "loss": 0.1983, + "step": 30240 + }, + { + "epoch": 2.93, + "learning_rate": 1.4144972418465112e-05, + "loss": 0.332, + "step": 30250 + }, + { + "epoch": 2.93, + "learning_rate": 1.4143036872157168e-05, + "loss": 0.2593, + "step": 30260 + }, + { + "epoch": 2.93, + "learning_rate": 1.4141101325849223e-05, + "loss": 0.2094, + "step": 30270 + }, + { + "epoch": 2.93, + "learning_rate": 1.4139165779541277e-05, + "loss": 0.2982, + "step": 30280 + }, + { + "epoch": 2.93, + "learning_rate": 1.4137230233233332e-05, + "loss": 0.244, + "step": 30290 + }, + { + "epoch": 2.93, + "learning_rate": 1.4135294686925386e-05, + "loss": 0.3663, + "step": 30300 + }, + { + "epoch": 2.93, + "learning_rate": 1.413335914061744e-05, + "loss": 0.3346, + "step": 30310 + }, + { + "epoch": 2.93, + "learning_rate": 1.4131423594309495e-05, + "loss": 0.2247, + "step": 30320 + }, + { + "epoch": 2.94, + "learning_rate": 1.412948804800155e-05, + "loss": 0.25, + "step": 30330 + }, + { + "epoch": 2.94, + "learning_rate": 1.4127552501693604e-05, + "loss": 0.3604, + "step": 30340 + }, + { + "epoch": 2.94, + "learning_rate": 1.4125616955385659e-05, + "loss": 0.301, + "step": 30350 + }, + { + "epoch": 2.94, + "learning_rate": 1.4123681409077714e-05, + "loss": 0.2704, + "step": 30360 + }, + { + "epoch": 2.94, + "learning_rate": 1.412174586276977e-05, + "loss": 0.306, + "step": 30370 + }, + { + "epoch": 2.94, + "learning_rate": 1.4119810316461821e-05, + "loss": 0.3321, + "step": 30380 + }, + { + "epoch": 2.94, + "learning_rate": 1.4117874770153877e-05, + "loss": 0.1893, + "step": 30390 + }, + { + "epoch": 2.94, + "learning_rate": 1.4115939223845932e-05, + "loss": 0.2796, + "step": 30400 + }, + { + "epoch": 2.94, + "learning_rate": 1.4114003677537986e-05, + "loss": 0.2241, + "step": 30410 + }, + { + "epoch": 2.94, + "learning_rate": 1.4112068131230041e-05, + "loss": 0.3775, + "step": 30420 + }, + { + "epoch": 2.94, + "learning_rate": 1.4110132584922096e-05, + "loss": 0.1969, + "step": 30430 + }, + { + "epoch": 2.95, + "learning_rate": 1.4108197038614152e-05, + "loss": 0.434, + "step": 30440 + }, + { + "epoch": 2.95, + "learning_rate": 1.4106261492306204e-05, + "loss": 0.2823, + "step": 30450 + }, + { + "epoch": 2.95, + "learning_rate": 1.4104325945998259e-05, + "loss": 0.2502, + "step": 30460 + }, + { + "epoch": 2.95, + "learning_rate": 1.4102390399690313e-05, + "loss": 0.2992, + "step": 30470 + }, + { + "epoch": 2.95, + "learning_rate": 1.4100454853382368e-05, + "loss": 0.2202, + "step": 30480 + }, + { + "epoch": 2.95, + "learning_rate": 1.4098519307074423e-05, + "loss": 0.2645, + "step": 30490 + }, + { + "epoch": 2.95, + "learning_rate": 1.4096583760766479e-05, + "loss": 0.2621, + "step": 30500 + }, + { + "epoch": 2.95, + "learning_rate": 1.4094648214458532e-05, + "loss": 0.2782, + "step": 30510 + }, + { + "epoch": 2.95, + "learning_rate": 1.4092712668150588e-05, + "loss": 0.3818, + "step": 30520 + }, + { + "epoch": 2.95, + "learning_rate": 1.409077712184264e-05, + "loss": 0.1424, + "step": 30530 + }, + { + "epoch": 2.96, + "learning_rate": 1.4088841575534695e-05, + "loss": 0.2392, + "step": 30540 + }, + { + "epoch": 2.96, + "learning_rate": 1.408690602922675e-05, + "loss": 0.4666, + "step": 30550 + }, + { + "epoch": 2.96, + "learning_rate": 1.4084970482918806e-05, + "loss": 0.2837, + "step": 30560 + }, + { + "epoch": 2.96, + "learning_rate": 1.408303493661086e-05, + "loss": 0.1709, + "step": 30570 + }, + { + "epoch": 2.96, + "learning_rate": 1.4081099390302915e-05, + "loss": 0.1784, + "step": 30580 + }, + { + "epoch": 2.96, + "learning_rate": 1.407916384399497e-05, + "loss": 0.2981, + "step": 30590 + }, + { + "epoch": 2.96, + "learning_rate": 1.4077228297687022e-05, + "loss": 0.2701, + "step": 30600 + }, + { + "epoch": 2.96, + "learning_rate": 1.4075292751379077e-05, + "loss": 0.2692, + "step": 30610 + }, + { + "epoch": 2.96, + "learning_rate": 1.4073357205071132e-05, + "loss": 0.3596, + "step": 30620 + }, + { + "epoch": 2.96, + "learning_rate": 1.4071421658763186e-05, + "loss": 0.3847, + "step": 30630 + }, + { + "epoch": 2.97, + "learning_rate": 1.4069486112455241e-05, + "loss": 0.0671, + "step": 30640 + }, + { + "epoch": 2.97, + "learning_rate": 1.4067550566147297e-05, + "loss": 0.2907, + "step": 30650 + }, + { + "epoch": 2.97, + "learning_rate": 1.4065615019839352e-05, + "loss": 0.2712, + "step": 30660 + }, + { + "epoch": 2.97, + "learning_rate": 1.4063679473531406e-05, + "loss": 0.3645, + "step": 30670 + }, + { + "epoch": 2.97, + "learning_rate": 1.406174392722346e-05, + "loss": 0.2951, + "step": 30680 + }, + { + "epoch": 2.97, + "learning_rate": 1.4059808380915515e-05, + "loss": 0.2643, + "step": 30690 + }, + { + "epoch": 2.97, + "learning_rate": 1.4057872834607568e-05, + "loss": 0.3132, + "step": 30700 + }, + { + "epoch": 2.97, + "learning_rate": 1.4055937288299624e-05, + "loss": 0.5021, + "step": 30710 + }, + { + "epoch": 2.97, + "learning_rate": 1.4054001741991679e-05, + "loss": 0.2907, + "step": 30720 + }, + { + "epoch": 2.97, + "learning_rate": 1.4052066195683733e-05, + "loss": 0.2646, + "step": 30730 + }, + { + "epoch": 2.97, + "learning_rate": 1.4050130649375788e-05, + "loss": 0.3159, + "step": 30740 + }, + { + "epoch": 2.98, + "learning_rate": 1.4048195103067842e-05, + "loss": 0.2856, + "step": 30750 + }, + { + "epoch": 2.98, + "learning_rate": 1.4046259556759895e-05, + "loss": 0.2481, + "step": 30760 + }, + { + "epoch": 2.98, + "learning_rate": 1.404432401045195e-05, + "loss": 0.226, + "step": 30770 + }, + { + "epoch": 2.98, + "learning_rate": 1.4042388464144006e-05, + "loss": 0.3285, + "step": 30780 + }, + { + "epoch": 2.98, + "learning_rate": 1.4040452917836061e-05, + "loss": 0.1426, + "step": 30790 + }, + { + "epoch": 2.98, + "learning_rate": 1.4038517371528115e-05, + "loss": 0.1899, + "step": 30800 + }, + { + "epoch": 2.98, + "learning_rate": 1.403658182522017e-05, + "loss": 0.1949, + "step": 30810 + }, + { + "epoch": 2.98, + "learning_rate": 1.4034646278912225e-05, + "loss": 0.19, + "step": 30820 + }, + { + "epoch": 2.98, + "learning_rate": 1.4032710732604277e-05, + "loss": 0.2083, + "step": 30830 + }, + { + "epoch": 2.98, + "learning_rate": 1.4030775186296333e-05, + "loss": 0.4168, + "step": 30840 + }, + { + "epoch": 2.99, + "learning_rate": 1.4028839639988388e-05, + "loss": 0.2787, + "step": 30850 + }, + { + "epoch": 2.99, + "learning_rate": 1.4026904093680442e-05, + "loss": 0.1361, + "step": 30860 + }, + { + "epoch": 2.99, + "learning_rate": 1.4024968547372497e-05, + "loss": 0.2287, + "step": 30870 + }, + { + "epoch": 2.99, + "learning_rate": 1.4023033001064552e-05, + "loss": 0.2991, + "step": 30880 + }, + { + "epoch": 2.99, + "learning_rate": 1.4021097454756608e-05, + "loss": 0.3338, + "step": 30890 + }, + { + "epoch": 2.99, + "learning_rate": 1.4019161908448661e-05, + "loss": 0.3493, + "step": 30900 + }, + { + "epoch": 2.99, + "learning_rate": 1.4017226362140715e-05, + "loss": 0.4598, + "step": 30910 + }, + { + "epoch": 2.99, + "learning_rate": 1.4015290815832769e-05, + "loss": 0.2868, + "step": 30920 + }, + { + "epoch": 2.99, + "learning_rate": 1.4013355269524824e-05, + "loss": 0.1644, + "step": 30930 + }, + { + "epoch": 2.99, + "learning_rate": 1.401141972321688e-05, + "loss": 0.2678, + "step": 30940 + }, + { + "epoch": 3.0, + "learning_rate": 1.4009484176908935e-05, + "loss": 0.302, + "step": 30950 + }, + { + "epoch": 3.0, + "learning_rate": 1.4007548630600988e-05, + "loss": 0.2285, + "step": 30960 + }, + { + "epoch": 3.0, + "learning_rate": 1.4005613084293044e-05, + "loss": 0.2352, + "step": 30970 + }, + { + "epoch": 3.0, + "learning_rate": 1.4003677537985097e-05, + "loss": 0.2555, + "step": 30980 + }, + { + "epoch": 3.0, + "learning_rate": 1.400174199167715e-05, + "loss": 0.3025, + "step": 30990 + }, + { + "epoch": 3.0, + "eval_FN": 926, + "eval_FP": 965, + "eval_TN": 14794, + "eval_TP": 3980, + "eval_accuracy": 0.9084926203726107, + "eval_f1": 0.8080397929144248, + "eval_loss": 0.38119348883628845, + "eval_precision": 0.8048533872598584, + "eval_recall": 0.811251528740318, + "eval_runtime": 142.04, + "eval_samples_per_second": 145.487, + "eval_steps_per_second": 9.096, + "step": 30999 + } + ], + "logging_steps": 10, + "max_steps": 103330, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.3049019101684736e+17, + "trial_name": null, + "trial_params": null +}