{ "best_metric": 0.3440640866756439, "best_model_checkpoint": "/leonardo_work/IscrC_AGENT/PROFES2025/results/full_weighted_loss/industry/checkpoint-3217", "epoch": 8.0, "eval_steps": 500, "global_step": 25736, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9993783027665528e-05, "loss": 0.7575, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.9987566055331057e-05, "loss": 0.6873, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.9981349082996583e-05, "loss": 0.6609, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.997513211066211e-05, "loss": 0.5386, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.9968915138327635e-05, "loss": 0.7276, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.9962698165993164e-05, "loss": 0.6163, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.995648119365869e-05, "loss": 0.6299, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.9950264221324216e-05, "loss": 0.6082, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.9944047248989746e-05, "loss": 0.4975, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.993783027665527e-05, "loss": 0.5754, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.9931613304320797e-05, "loss": 0.5122, "step": 110 }, { "epoch": 0.04, "learning_rate": 1.9925396331986323e-05, "loss": 0.5197, "step": 120 }, { "epoch": 0.04, "learning_rate": 1.991917935965185e-05, "loss": 0.9086, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.991296238731738e-05, "loss": 0.6399, "step": 140 }, { "epoch": 0.05, "learning_rate": 1.9906745414982905e-05, "loss": 0.5776, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.990052844264843e-05, "loss": 0.525, "step": 160 }, { "epoch": 0.05, "learning_rate": 1.9894311470313957e-05, "loss": 0.4488, "step": 170 }, { "epoch": 0.06, "learning_rate": 1.9888094497979486e-05, "loss": 0.4111, "step": 180 }, { "epoch": 0.06, "learning_rate": 1.9881877525645012e-05, "loss": 0.4372, "step": 190 }, { "epoch": 0.06, "learning_rate": 1.9875660553310538e-05, "loss": 0.5545, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.9869443580976067e-05, "loss": 0.6182, "step": 210 }, { "epoch": 0.07, "learning_rate": 1.9863226608641593e-05, "loss": 0.3613, "step": 220 }, { "epoch": 0.07, "learning_rate": 1.985700963630712e-05, "loss": 0.5177, "step": 230 }, { "epoch": 0.07, "learning_rate": 1.9850792663972645e-05, "loss": 0.5406, "step": 240 }, { "epoch": 0.08, "learning_rate": 1.9844575691638175e-05, "loss": 0.5746, "step": 250 }, { "epoch": 0.08, "learning_rate": 1.98383587193037e-05, "loss": 0.652, "step": 260 }, { "epoch": 0.08, "learning_rate": 1.9832141746969227e-05, "loss": 0.4381, "step": 270 }, { "epoch": 0.09, "learning_rate": 1.9825924774634756e-05, "loss": 0.5538, "step": 280 }, { "epoch": 0.09, "learning_rate": 1.9819707802300282e-05, "loss": 0.4529, "step": 290 }, { "epoch": 0.09, "learning_rate": 1.9813490829965808e-05, "loss": 0.4394, "step": 300 }, { "epoch": 0.1, "learning_rate": 1.9807273857631334e-05, "loss": 0.6208, "step": 310 }, { "epoch": 0.1, "learning_rate": 1.980105688529686e-05, "loss": 0.5333, "step": 320 }, { "epoch": 0.1, "learning_rate": 1.979483991296239e-05, "loss": 0.4253, "step": 330 }, { "epoch": 0.11, "learning_rate": 1.9788622940627915e-05, "loss": 0.4599, "step": 340 }, { "epoch": 0.11, "learning_rate": 1.9782405968293445e-05, "loss": 0.4058, "step": 350 }, { "epoch": 0.11, "learning_rate": 1.977618899595897e-05, "loss": 0.3812, "step": 360 }, { "epoch": 0.12, "learning_rate": 1.9769972023624497e-05, "loss": 0.4906, "step": 370 }, { "epoch": 0.12, "learning_rate": 1.9763755051290022e-05, "loss": 0.3776, "step": 380 }, { "epoch": 0.12, "learning_rate": 1.975753807895555e-05, "loss": 0.6558, "step": 390 }, { "epoch": 0.12, "learning_rate": 1.9751321106621078e-05, "loss": 0.5279, "step": 400 }, { "epoch": 0.13, "learning_rate": 1.9745104134286604e-05, "loss": 0.3409, "step": 410 }, { "epoch": 0.13, "learning_rate": 1.9738887161952133e-05, "loss": 0.508, "step": 420 }, { "epoch": 0.13, "learning_rate": 1.973267018961766e-05, "loss": 0.3759, "step": 430 }, { "epoch": 0.14, "learning_rate": 1.9726453217283185e-05, "loss": 0.2954, "step": 440 }, { "epoch": 0.14, "learning_rate": 1.972023624494871e-05, "loss": 0.5206, "step": 450 }, { "epoch": 0.14, "learning_rate": 1.9714019272614237e-05, "loss": 0.4448, "step": 460 }, { "epoch": 0.15, "learning_rate": 1.9707802300279766e-05, "loss": 0.5867, "step": 470 }, { "epoch": 0.15, "learning_rate": 1.9701585327945292e-05, "loss": 0.4178, "step": 480 }, { "epoch": 0.15, "learning_rate": 1.969536835561082e-05, "loss": 0.4539, "step": 490 }, { "epoch": 0.16, "learning_rate": 1.9689151383276344e-05, "loss": 0.4397, "step": 500 }, { "epoch": 0.16, "learning_rate": 1.968293441094187e-05, "loss": 0.4183, "step": 510 }, { "epoch": 0.16, "learning_rate": 1.96767174386074e-05, "loss": 0.5655, "step": 520 }, { "epoch": 0.16, "learning_rate": 1.9670500466272926e-05, "loss": 0.4386, "step": 530 }, { "epoch": 0.17, "learning_rate": 1.9664283493938455e-05, "loss": 0.4113, "step": 540 }, { "epoch": 0.17, "learning_rate": 1.965806652160398e-05, "loss": 0.4765, "step": 550 }, { "epoch": 0.17, "learning_rate": 1.9651849549269507e-05, "loss": 0.4968, "step": 560 }, { "epoch": 0.18, "learning_rate": 1.9645632576935033e-05, "loss": 0.5439, "step": 570 }, { "epoch": 0.18, "learning_rate": 1.963941560460056e-05, "loss": 0.4017, "step": 580 }, { "epoch": 0.18, "learning_rate": 1.9633198632266088e-05, "loss": 0.3565, "step": 590 }, { "epoch": 0.19, "learning_rate": 1.9626981659931614e-05, "loss": 0.3725, "step": 600 }, { "epoch": 0.19, "learning_rate": 1.9620764687597144e-05, "loss": 0.2914, "step": 610 }, { "epoch": 0.19, "learning_rate": 1.961454771526267e-05, "loss": 0.6442, "step": 620 }, { "epoch": 0.2, "learning_rate": 1.9608330742928196e-05, "loss": 0.1772, "step": 630 }, { "epoch": 0.2, "learning_rate": 1.960211377059372e-05, "loss": 0.639, "step": 640 }, { "epoch": 0.2, "learning_rate": 1.9595896798259247e-05, "loss": 0.4817, "step": 650 }, { "epoch": 0.21, "learning_rate": 1.9589679825924777e-05, "loss": 0.3367, "step": 660 }, { "epoch": 0.21, "learning_rate": 1.9583462853590303e-05, "loss": 0.7029, "step": 670 }, { "epoch": 0.21, "learning_rate": 1.9577245881255832e-05, "loss": 0.304, "step": 680 }, { "epoch": 0.21, "learning_rate": 1.9571028908921358e-05, "loss": 0.3478, "step": 690 }, { "epoch": 0.22, "learning_rate": 1.9564811936586884e-05, "loss": 0.4608, "step": 700 }, { "epoch": 0.22, "learning_rate": 1.955859496425241e-05, "loss": 0.9605, "step": 710 }, { "epoch": 0.22, "learning_rate": 1.9552377991917936e-05, "loss": 0.4437, "step": 720 }, { "epoch": 0.23, "learning_rate": 1.9546161019583465e-05, "loss": 0.3467, "step": 730 }, { "epoch": 0.23, "learning_rate": 1.953994404724899e-05, "loss": 0.4077, "step": 740 }, { "epoch": 0.23, "learning_rate": 1.953372707491452e-05, "loss": 0.8425, "step": 750 }, { "epoch": 0.24, "learning_rate": 1.9527510102580047e-05, "loss": 0.4858, "step": 760 }, { "epoch": 0.24, "learning_rate": 1.9521293130245573e-05, "loss": 0.3518, "step": 770 }, { "epoch": 0.24, "learning_rate": 1.95150761579111e-05, "loss": 0.2888, "step": 780 }, { "epoch": 0.25, "learning_rate": 1.9508859185576625e-05, "loss": 0.5754, "step": 790 }, { "epoch": 0.25, "learning_rate": 1.9502642213242154e-05, "loss": 0.5077, "step": 800 }, { "epoch": 0.25, "learning_rate": 1.949642524090768e-05, "loss": 0.5145, "step": 810 }, { "epoch": 0.25, "learning_rate": 1.9490208268573206e-05, "loss": 0.4291, "step": 820 }, { "epoch": 0.26, "learning_rate": 1.9483991296238732e-05, "loss": 0.2612, "step": 830 }, { "epoch": 0.26, "learning_rate": 1.9477774323904258e-05, "loss": 0.4415, "step": 840 }, { "epoch": 0.26, "learning_rate": 1.9471557351569787e-05, "loss": 0.9061, "step": 850 }, { "epoch": 0.27, "learning_rate": 1.9465340379235313e-05, "loss": 0.8032, "step": 860 }, { "epoch": 0.27, "learning_rate": 1.9459123406900843e-05, "loss": 0.5346, "step": 870 }, { "epoch": 0.27, "learning_rate": 1.945290643456637e-05, "loss": 0.434, "step": 880 }, { "epoch": 0.28, "learning_rate": 1.9446689462231895e-05, "loss": 0.3796, "step": 890 }, { "epoch": 0.28, "learning_rate": 1.944047248989742e-05, "loss": 0.4585, "step": 900 }, { "epoch": 0.28, "learning_rate": 1.9434255517562946e-05, "loss": 0.3291, "step": 910 }, { "epoch": 0.29, "learning_rate": 1.9428038545228476e-05, "loss": 0.5967, "step": 920 }, { "epoch": 0.29, "learning_rate": 1.9421821572894002e-05, "loss": 0.4608, "step": 930 }, { "epoch": 0.29, "learning_rate": 1.941560460055953e-05, "loss": 0.402, "step": 940 }, { "epoch": 0.3, "learning_rate": 1.9409387628225057e-05, "loss": 0.4807, "step": 950 }, { "epoch": 0.3, "learning_rate": 1.9403170655890583e-05, "loss": 0.5985, "step": 960 }, { "epoch": 0.3, "learning_rate": 1.939695368355611e-05, "loss": 0.5173, "step": 970 }, { "epoch": 0.3, "learning_rate": 1.9390736711221635e-05, "loss": 0.5397, "step": 980 }, { "epoch": 0.31, "learning_rate": 1.9384519738887164e-05, "loss": 0.3691, "step": 990 }, { "epoch": 0.31, "learning_rate": 1.937830276655269e-05, "loss": 0.3125, "step": 1000 }, { "epoch": 0.31, "learning_rate": 1.937208579421822e-05, "loss": 0.4462, "step": 1010 }, { "epoch": 0.32, "learning_rate": 1.9365868821883746e-05, "loss": 0.168, "step": 1020 }, { "epoch": 0.32, "learning_rate": 1.935965184954927e-05, "loss": 0.4617, "step": 1030 }, { "epoch": 0.32, "learning_rate": 1.9353434877214798e-05, "loss": 0.8935, "step": 1040 }, { "epoch": 0.33, "learning_rate": 1.9347217904880324e-05, "loss": 0.3053, "step": 1050 }, { "epoch": 0.33, "learning_rate": 1.9341000932545853e-05, "loss": 0.6233, "step": 1060 }, { "epoch": 0.33, "learning_rate": 1.933478396021138e-05, "loss": 0.3158, "step": 1070 }, { "epoch": 0.34, "learning_rate": 1.9328566987876905e-05, "loss": 0.4162, "step": 1080 }, { "epoch": 0.34, "learning_rate": 1.9322350015542434e-05, "loss": 0.5361, "step": 1090 }, { "epoch": 0.34, "learning_rate": 1.9316133043207957e-05, "loss": 0.4649, "step": 1100 }, { "epoch": 0.35, "learning_rate": 1.9309916070873486e-05, "loss": 0.4029, "step": 1110 }, { "epoch": 0.35, "learning_rate": 1.9303699098539012e-05, "loss": 0.3657, "step": 1120 }, { "epoch": 0.35, "learning_rate": 1.929748212620454e-05, "loss": 0.4586, "step": 1130 }, { "epoch": 0.35, "learning_rate": 1.9291265153870068e-05, "loss": 0.2867, "step": 1140 }, { "epoch": 0.36, "learning_rate": 1.9285048181535594e-05, "loss": 0.4648, "step": 1150 }, { "epoch": 0.36, "learning_rate": 1.927883120920112e-05, "loss": 0.6251, "step": 1160 }, { "epoch": 0.36, "learning_rate": 1.9272614236866645e-05, "loss": 0.4625, "step": 1170 }, { "epoch": 0.37, "learning_rate": 1.9266397264532175e-05, "loss": 0.5932, "step": 1180 }, { "epoch": 0.37, "learning_rate": 1.92601802921977e-05, "loss": 0.1886, "step": 1190 }, { "epoch": 0.37, "learning_rate": 1.925396331986323e-05, "loss": 0.7285, "step": 1200 }, { "epoch": 0.38, "learning_rate": 1.9247746347528756e-05, "loss": 0.4582, "step": 1210 }, { "epoch": 0.38, "learning_rate": 1.9241529375194282e-05, "loss": 0.3365, "step": 1220 }, { "epoch": 0.38, "learning_rate": 1.9235312402859808e-05, "loss": 0.4612, "step": 1230 }, { "epoch": 0.39, "learning_rate": 1.9229095430525334e-05, "loss": 0.7369, "step": 1240 }, { "epoch": 0.39, "learning_rate": 1.9222878458190863e-05, "loss": 0.5078, "step": 1250 }, { "epoch": 0.39, "learning_rate": 1.921666148585639e-05, "loss": 0.3412, "step": 1260 }, { "epoch": 0.39, "learning_rate": 1.9210444513521915e-05, "loss": 0.254, "step": 1270 }, { "epoch": 0.4, "learning_rate": 1.9204227541187445e-05, "loss": 0.3788, "step": 1280 }, { "epoch": 0.4, "learning_rate": 1.919801056885297e-05, "loss": 0.4238, "step": 1290 }, { "epoch": 0.4, "learning_rate": 1.9191793596518497e-05, "loss": 0.4625, "step": 1300 }, { "epoch": 0.41, "learning_rate": 1.9185576624184023e-05, "loss": 0.3474, "step": 1310 }, { "epoch": 0.41, "learning_rate": 1.9179359651849552e-05, "loss": 0.4962, "step": 1320 }, { "epoch": 0.41, "learning_rate": 1.9173142679515078e-05, "loss": 0.6099, "step": 1330 }, { "epoch": 0.42, "learning_rate": 1.9166925707180604e-05, "loss": 0.5424, "step": 1340 }, { "epoch": 0.42, "learning_rate": 1.9160708734846133e-05, "loss": 0.4483, "step": 1350 }, { "epoch": 0.42, "learning_rate": 1.915449176251166e-05, "loss": 0.3166, "step": 1360 }, { "epoch": 0.43, "learning_rate": 1.9148274790177185e-05, "loss": 0.4457, "step": 1370 }, { "epoch": 0.43, "learning_rate": 1.914205781784271e-05, "loss": 0.2258, "step": 1380 }, { "epoch": 0.43, "learning_rate": 1.913584084550824e-05, "loss": 0.4095, "step": 1390 }, { "epoch": 0.44, "learning_rate": 1.9129623873173767e-05, "loss": 0.3182, "step": 1400 }, { "epoch": 0.44, "learning_rate": 1.9123406900839293e-05, "loss": 0.3641, "step": 1410 }, { "epoch": 0.44, "learning_rate": 1.911718992850482e-05, "loss": 0.5365, "step": 1420 }, { "epoch": 0.44, "learning_rate": 1.9110972956170344e-05, "loss": 0.5406, "step": 1430 }, { "epoch": 0.45, "learning_rate": 1.9104755983835874e-05, "loss": 0.3486, "step": 1440 }, { "epoch": 0.45, "learning_rate": 1.90985390115014e-05, "loss": 0.349, "step": 1450 }, { "epoch": 0.45, "learning_rate": 1.9092322039166926e-05, "loss": 0.8522, "step": 1460 }, { "epoch": 0.46, "learning_rate": 1.9086105066832455e-05, "loss": 0.4784, "step": 1470 }, { "epoch": 0.46, "learning_rate": 1.907988809449798e-05, "loss": 0.4132, "step": 1480 }, { "epoch": 0.46, "learning_rate": 1.9073671122163507e-05, "loss": 0.3754, "step": 1490 }, { "epoch": 0.47, "learning_rate": 1.9067454149829033e-05, "loss": 0.6593, "step": 1500 }, { "epoch": 0.47, "learning_rate": 1.9061237177494562e-05, "loss": 0.4253, "step": 1510 }, { "epoch": 0.47, "learning_rate": 1.905502020516009e-05, "loss": 0.2552, "step": 1520 }, { "epoch": 0.48, "learning_rate": 1.9048803232825614e-05, "loss": 0.567, "step": 1530 }, { "epoch": 0.48, "learning_rate": 1.9042586260491144e-05, "loss": 0.4915, "step": 1540 }, { "epoch": 0.48, "learning_rate": 1.903636928815667e-05, "loss": 0.2914, "step": 1550 }, { "epoch": 0.48, "learning_rate": 1.9030152315822196e-05, "loss": 0.2822, "step": 1560 }, { "epoch": 0.49, "learning_rate": 1.902393534348772e-05, "loss": 0.4481, "step": 1570 }, { "epoch": 0.49, "learning_rate": 1.901771837115325e-05, "loss": 0.567, "step": 1580 }, { "epoch": 0.49, "learning_rate": 1.9011501398818777e-05, "loss": 0.5775, "step": 1590 }, { "epoch": 0.5, "learning_rate": 1.9005284426484303e-05, "loss": 0.597, "step": 1600 }, { "epoch": 0.5, "learning_rate": 1.8999067454149832e-05, "loss": 0.7378, "step": 1610 }, { "epoch": 0.5, "learning_rate": 1.8992850481815358e-05, "loss": 0.1779, "step": 1620 }, { "epoch": 0.51, "learning_rate": 1.8986633509480884e-05, "loss": 0.4367, "step": 1630 }, { "epoch": 0.51, "learning_rate": 1.898041653714641e-05, "loss": 0.2326, "step": 1640 }, { "epoch": 0.51, "learning_rate": 1.8974199564811936e-05, "loss": 0.6031, "step": 1650 }, { "epoch": 0.52, "learning_rate": 1.8967982592477466e-05, "loss": 0.3038, "step": 1660 }, { "epoch": 0.52, "learning_rate": 1.896176562014299e-05, "loss": 0.5407, "step": 1670 }, { "epoch": 0.52, "learning_rate": 1.895554864780852e-05, "loss": 0.3094, "step": 1680 }, { "epoch": 0.53, "learning_rate": 1.8949331675474047e-05, "loss": 0.5171, "step": 1690 }, { "epoch": 0.53, "learning_rate": 1.8943114703139573e-05, "loss": 0.2899, "step": 1700 }, { "epoch": 0.53, "learning_rate": 1.89368977308051e-05, "loss": 0.4439, "step": 1710 }, { "epoch": 0.53, "learning_rate": 1.8930680758470625e-05, "loss": 0.371, "step": 1720 }, { "epoch": 0.54, "learning_rate": 1.8924463786136154e-05, "loss": 0.244, "step": 1730 }, { "epoch": 0.54, "learning_rate": 1.891824681380168e-05, "loss": 0.6614, "step": 1740 }, { "epoch": 0.54, "learning_rate": 1.8912029841467206e-05, "loss": 0.4736, "step": 1750 }, { "epoch": 0.55, "learning_rate": 1.8905812869132732e-05, "loss": 0.699, "step": 1760 }, { "epoch": 0.55, "learning_rate": 1.889959589679826e-05, "loss": 0.2715, "step": 1770 }, { "epoch": 0.55, "learning_rate": 1.8893378924463787e-05, "loss": 0.2254, "step": 1780 }, { "epoch": 0.56, "learning_rate": 1.8887161952129313e-05, "loss": 0.4621, "step": 1790 }, { "epoch": 0.56, "learning_rate": 1.8880944979794843e-05, "loss": 0.6659, "step": 1800 }, { "epoch": 0.56, "learning_rate": 1.887472800746037e-05, "loss": 0.3281, "step": 1810 }, { "epoch": 0.57, "learning_rate": 1.8868511035125895e-05, "loss": 0.4285, "step": 1820 }, { "epoch": 0.57, "learning_rate": 1.886229406279142e-05, "loss": 0.2292, "step": 1830 }, { "epoch": 0.57, "learning_rate": 1.885607709045695e-05, "loss": 0.4293, "step": 1840 }, { "epoch": 0.58, "learning_rate": 1.8849860118122476e-05, "loss": 0.1876, "step": 1850 }, { "epoch": 0.58, "learning_rate": 1.8843643145788002e-05, "loss": 0.6638, "step": 1860 }, { "epoch": 0.58, "learning_rate": 1.883742617345353e-05, "loss": 0.5305, "step": 1870 }, { "epoch": 0.58, "learning_rate": 1.8831209201119057e-05, "loss": 0.4143, "step": 1880 }, { "epoch": 0.59, "learning_rate": 1.8824992228784583e-05, "loss": 0.4704, "step": 1890 }, { "epoch": 0.59, "learning_rate": 1.881877525645011e-05, "loss": 0.2477, "step": 1900 }, { "epoch": 0.59, "learning_rate": 1.8812558284115635e-05, "loss": 0.3924, "step": 1910 }, { "epoch": 0.6, "learning_rate": 1.8806341311781165e-05, "loss": 0.2924, "step": 1920 }, { "epoch": 0.6, "learning_rate": 1.880012433944669e-05, "loss": 0.3638, "step": 1930 }, { "epoch": 0.6, "learning_rate": 1.879390736711222e-05, "loss": 0.316, "step": 1940 }, { "epoch": 0.61, "learning_rate": 1.8787690394777746e-05, "loss": 0.4057, "step": 1950 }, { "epoch": 0.61, "learning_rate": 1.8781473422443272e-05, "loss": 0.3047, "step": 1960 }, { "epoch": 0.61, "learning_rate": 1.8775256450108798e-05, "loss": 0.3886, "step": 1970 }, { "epoch": 0.62, "learning_rate": 1.8769039477774324e-05, "loss": 0.5982, "step": 1980 }, { "epoch": 0.62, "learning_rate": 1.8762822505439853e-05, "loss": 0.2463, "step": 1990 }, { "epoch": 0.62, "learning_rate": 1.875660553310538e-05, "loss": 0.4057, "step": 2000 }, { "epoch": 0.62, "learning_rate": 1.875038856077091e-05, "loss": 0.4902, "step": 2010 }, { "epoch": 0.63, "learning_rate": 1.8744171588436434e-05, "loss": 0.5878, "step": 2020 }, { "epoch": 0.63, "learning_rate": 1.873795461610196e-05, "loss": 0.5283, "step": 2030 }, { "epoch": 0.63, "learning_rate": 1.8731737643767486e-05, "loss": 0.2842, "step": 2040 }, { "epoch": 0.64, "learning_rate": 1.8725520671433012e-05, "loss": 0.4004, "step": 2050 }, { "epoch": 0.64, "learning_rate": 1.8719303699098542e-05, "loss": 0.526, "step": 2060 }, { "epoch": 0.64, "learning_rate": 1.8713086726764068e-05, "loss": 0.1671, "step": 2070 }, { "epoch": 0.65, "learning_rate": 1.8706869754429594e-05, "loss": 0.4908, "step": 2080 }, { "epoch": 0.65, "learning_rate": 1.870065278209512e-05, "loss": 0.8472, "step": 2090 }, { "epoch": 0.65, "learning_rate": 1.8694435809760646e-05, "loss": 0.4459, "step": 2100 }, { "epoch": 0.66, "learning_rate": 1.8688218837426175e-05, "loss": 0.4383, "step": 2110 }, { "epoch": 0.66, "learning_rate": 1.86820018650917e-05, "loss": 0.2796, "step": 2120 }, { "epoch": 0.66, "learning_rate": 1.867578489275723e-05, "loss": 0.4761, "step": 2130 }, { "epoch": 0.67, "learning_rate": 1.8669567920422756e-05, "loss": 0.3512, "step": 2140 }, { "epoch": 0.67, "learning_rate": 1.8663350948088282e-05, "loss": 0.3003, "step": 2150 }, { "epoch": 0.67, "learning_rate": 1.8657133975753808e-05, "loss": 0.5948, "step": 2160 }, { "epoch": 0.67, "learning_rate": 1.8650917003419334e-05, "loss": 0.2713, "step": 2170 }, { "epoch": 0.68, "learning_rate": 1.8644700031084864e-05, "loss": 0.3056, "step": 2180 }, { "epoch": 0.68, "learning_rate": 1.863848305875039e-05, "loss": 0.6851, "step": 2190 }, { "epoch": 0.68, "learning_rate": 1.863226608641592e-05, "loss": 0.5926, "step": 2200 }, { "epoch": 0.69, "learning_rate": 1.8626049114081445e-05, "loss": 0.456, "step": 2210 }, { "epoch": 0.69, "learning_rate": 1.861983214174697e-05, "loss": 0.4585, "step": 2220 }, { "epoch": 0.69, "learning_rate": 1.8613615169412497e-05, "loss": 0.4714, "step": 2230 }, { "epoch": 0.7, "learning_rate": 1.8607398197078023e-05, "loss": 0.3492, "step": 2240 }, { "epoch": 0.7, "learning_rate": 1.8601181224743552e-05, "loss": 0.4724, "step": 2250 }, { "epoch": 0.7, "learning_rate": 1.8594964252409078e-05, "loss": 0.4854, "step": 2260 }, { "epoch": 0.71, "learning_rate": 1.8588747280074607e-05, "loss": 0.2395, "step": 2270 }, { "epoch": 0.71, "learning_rate": 1.8582530307740133e-05, "loss": 0.1794, "step": 2280 }, { "epoch": 0.71, "learning_rate": 1.857631333540566e-05, "loss": 0.4426, "step": 2290 }, { "epoch": 0.71, "learning_rate": 1.8570096363071185e-05, "loss": 0.8412, "step": 2300 }, { "epoch": 0.72, "learning_rate": 1.856387939073671e-05, "loss": 0.3927, "step": 2310 }, { "epoch": 0.72, "learning_rate": 1.855766241840224e-05, "loss": 0.7283, "step": 2320 }, { "epoch": 0.72, "learning_rate": 1.8551445446067767e-05, "loss": 0.549, "step": 2330 }, { "epoch": 0.73, "learning_rate": 1.8545228473733293e-05, "loss": 0.7128, "step": 2340 }, { "epoch": 0.73, "learning_rate": 1.853901150139882e-05, "loss": 0.2015, "step": 2350 }, { "epoch": 0.73, "learning_rate": 1.8532794529064345e-05, "loss": 0.1989, "step": 2360 }, { "epoch": 0.74, "learning_rate": 1.8526577556729874e-05, "loss": 0.4558, "step": 2370 }, { "epoch": 0.74, "learning_rate": 1.85203605843954e-05, "loss": 0.4828, "step": 2380 }, { "epoch": 0.74, "learning_rate": 1.851414361206093e-05, "loss": 0.4083, "step": 2390 }, { "epoch": 0.75, "learning_rate": 1.8507926639726455e-05, "loss": 0.4005, "step": 2400 }, { "epoch": 0.75, "learning_rate": 1.850170966739198e-05, "loss": 0.4204, "step": 2410 }, { "epoch": 0.75, "learning_rate": 1.8495492695057507e-05, "loss": 0.2819, "step": 2420 }, { "epoch": 0.76, "learning_rate": 1.8489275722723033e-05, "loss": 0.3465, "step": 2430 }, { "epoch": 0.76, "learning_rate": 1.8483058750388563e-05, "loss": 0.5253, "step": 2440 }, { "epoch": 0.76, "learning_rate": 1.847684177805409e-05, "loss": 0.3901, "step": 2450 }, { "epoch": 0.76, "learning_rate": 1.8470624805719618e-05, "loss": 0.2426, "step": 2460 }, { "epoch": 0.77, "learning_rate": 1.8464407833385144e-05, "loss": 0.5381, "step": 2470 }, { "epoch": 0.77, "learning_rate": 1.845819086105067e-05, "loss": 0.4847, "step": 2480 }, { "epoch": 0.77, "learning_rate": 1.8451973888716196e-05, "loss": 0.4399, "step": 2490 }, { "epoch": 0.78, "learning_rate": 1.8445756916381722e-05, "loss": 0.6, "step": 2500 }, { "epoch": 0.78, "learning_rate": 1.843953994404725e-05, "loss": 0.5227, "step": 2510 }, { "epoch": 0.78, "learning_rate": 1.8433322971712777e-05, "loss": 0.4131, "step": 2520 }, { "epoch": 0.79, "learning_rate": 1.8427105999378307e-05, "loss": 0.3047, "step": 2530 }, { "epoch": 0.79, "learning_rate": 1.8420889027043832e-05, "loss": 0.5594, "step": 2540 }, { "epoch": 0.79, "learning_rate": 1.841467205470936e-05, "loss": 0.292, "step": 2550 }, { "epoch": 0.8, "learning_rate": 1.8408455082374884e-05, "loss": 0.2387, "step": 2560 }, { "epoch": 0.8, "learning_rate": 1.840223811004041e-05, "loss": 0.5498, "step": 2570 }, { "epoch": 0.8, "learning_rate": 1.839602113770594e-05, "loss": 0.7383, "step": 2580 }, { "epoch": 0.81, "learning_rate": 1.8389804165371466e-05, "loss": 0.3408, "step": 2590 }, { "epoch": 0.81, "learning_rate": 1.8383587193036995e-05, "loss": 0.3907, "step": 2600 }, { "epoch": 0.81, "learning_rate": 1.837737022070252e-05, "loss": 0.3869, "step": 2610 }, { "epoch": 0.81, "learning_rate": 1.8371153248368047e-05, "loss": 0.4767, "step": 2620 }, { "epoch": 0.82, "learning_rate": 1.8364936276033573e-05, "loss": 0.3034, "step": 2630 }, { "epoch": 0.82, "learning_rate": 1.83587193036991e-05, "loss": 0.1794, "step": 2640 }, { "epoch": 0.82, "learning_rate": 1.835250233136463e-05, "loss": 0.594, "step": 2650 }, { "epoch": 0.83, "learning_rate": 1.8346285359030154e-05, "loss": 0.6076, "step": 2660 }, { "epoch": 0.83, "learning_rate": 1.834006838669568e-05, "loss": 0.3665, "step": 2670 }, { "epoch": 0.83, "learning_rate": 1.8333851414361206e-05, "loss": 0.5942, "step": 2680 }, { "epoch": 0.84, "learning_rate": 1.8327634442026732e-05, "loss": 0.4287, "step": 2690 }, { "epoch": 0.84, "learning_rate": 1.832141746969226e-05, "loss": 0.4436, "step": 2700 }, { "epoch": 0.84, "learning_rate": 1.8315200497357788e-05, "loss": 0.3404, "step": 2710 }, { "epoch": 0.85, "learning_rate": 1.8308983525023317e-05, "loss": 0.3837, "step": 2720 }, { "epoch": 0.85, "learning_rate": 1.8302766552688843e-05, "loss": 0.5069, "step": 2730 }, { "epoch": 0.85, "learning_rate": 1.829654958035437e-05, "loss": 0.5334, "step": 2740 }, { "epoch": 0.85, "learning_rate": 1.8290332608019895e-05, "loss": 0.3057, "step": 2750 }, { "epoch": 0.86, "learning_rate": 1.828411563568542e-05, "loss": 0.5718, "step": 2760 }, { "epoch": 0.86, "learning_rate": 1.827789866335095e-05, "loss": 0.2325, "step": 2770 }, { "epoch": 0.86, "learning_rate": 1.8271681691016476e-05, "loss": 0.1665, "step": 2780 }, { "epoch": 0.87, "learning_rate": 1.8265464718682006e-05, "loss": 0.2424, "step": 2790 }, { "epoch": 0.87, "learning_rate": 1.825924774634753e-05, "loss": 0.3763, "step": 2800 }, { "epoch": 0.87, "learning_rate": 1.8253030774013057e-05, "loss": 0.4385, "step": 2810 }, { "epoch": 0.88, "learning_rate": 1.8246813801678583e-05, "loss": 0.6329, "step": 2820 }, { "epoch": 0.88, "learning_rate": 1.824059682934411e-05, "loss": 0.4746, "step": 2830 }, { "epoch": 0.88, "learning_rate": 1.823437985700964e-05, "loss": 0.1979, "step": 2840 }, { "epoch": 0.89, "learning_rate": 1.8228162884675165e-05, "loss": 0.4243, "step": 2850 }, { "epoch": 0.89, "learning_rate": 1.822194591234069e-05, "loss": 0.2281, "step": 2860 }, { "epoch": 0.89, "learning_rate": 1.821572894000622e-05, "loss": 0.3936, "step": 2870 }, { "epoch": 0.9, "learning_rate": 1.8209511967671746e-05, "loss": 0.1947, "step": 2880 }, { "epoch": 0.9, "learning_rate": 1.8203294995337272e-05, "loss": 0.5114, "step": 2890 }, { "epoch": 0.9, "learning_rate": 1.8197078023002798e-05, "loss": 0.5046, "step": 2900 }, { "epoch": 0.9, "learning_rate": 1.8190861050668327e-05, "loss": 0.247, "step": 2910 }, { "epoch": 0.91, "learning_rate": 1.8184644078333853e-05, "loss": 0.3748, "step": 2920 }, { "epoch": 0.91, "learning_rate": 1.817842710599938e-05, "loss": 0.9864, "step": 2930 }, { "epoch": 0.91, "learning_rate": 1.817221013366491e-05, "loss": 0.2974, "step": 2940 }, { "epoch": 0.92, "learning_rate": 1.8165993161330435e-05, "loss": 0.5786, "step": 2950 }, { "epoch": 0.92, "learning_rate": 1.815977618899596e-05, "loss": 0.4237, "step": 2960 }, { "epoch": 0.92, "learning_rate": 1.8153559216661487e-05, "loss": 0.1572, "step": 2970 }, { "epoch": 0.93, "learning_rate": 1.8147342244327016e-05, "loss": 0.1967, "step": 2980 }, { "epoch": 0.93, "learning_rate": 1.8141125271992542e-05, "loss": 0.7341, "step": 2990 }, { "epoch": 0.93, "learning_rate": 1.8134908299658068e-05, "loss": 0.5411, "step": 3000 }, { "epoch": 0.94, "learning_rate": 1.8128691327323594e-05, "loss": 0.3098, "step": 3010 }, { "epoch": 0.94, "learning_rate": 1.812247435498912e-05, "loss": 0.1625, "step": 3020 }, { "epoch": 0.94, "learning_rate": 1.811625738265465e-05, "loss": 0.5957, "step": 3030 }, { "epoch": 0.94, "learning_rate": 1.8110040410320175e-05, "loss": 0.4473, "step": 3040 }, { "epoch": 0.95, "learning_rate": 1.81038234379857e-05, "loss": 0.5714, "step": 3050 }, { "epoch": 0.95, "learning_rate": 1.809760646565123e-05, "loss": 0.3916, "step": 3060 }, { "epoch": 0.95, "learning_rate": 1.8091389493316756e-05, "loss": 0.3568, "step": 3070 }, { "epoch": 0.96, "learning_rate": 1.8085172520982282e-05, "loss": 0.3438, "step": 3080 }, { "epoch": 0.96, "learning_rate": 1.807895554864781e-05, "loss": 0.5051, "step": 3090 }, { "epoch": 0.96, "learning_rate": 1.8072738576313338e-05, "loss": 0.2685, "step": 3100 }, { "epoch": 0.97, "learning_rate": 1.8066521603978864e-05, "loss": 0.3455, "step": 3110 }, { "epoch": 0.97, "learning_rate": 1.806030463164439e-05, "loss": 0.2658, "step": 3120 }, { "epoch": 0.97, "learning_rate": 1.805408765930992e-05, "loss": 0.3369, "step": 3130 }, { "epoch": 0.98, "learning_rate": 1.8047870686975445e-05, "loss": 0.3938, "step": 3140 }, { "epoch": 0.98, "learning_rate": 1.804165371464097e-05, "loss": 0.2233, "step": 3150 }, { "epoch": 0.98, "learning_rate": 1.8035436742306497e-05, "loss": 0.4099, "step": 3160 }, { "epoch": 0.99, "learning_rate": 1.8029219769972026e-05, "loss": 0.2854, "step": 3170 }, { "epoch": 0.99, "learning_rate": 1.8023002797637552e-05, "loss": 0.4001, "step": 3180 }, { "epoch": 0.99, "learning_rate": 1.8016785825303078e-05, "loss": 0.2493, "step": 3190 }, { "epoch": 0.99, "learning_rate": 1.8010568852968608e-05, "loss": 0.3226, "step": 3200 }, { "epoch": 1.0, "learning_rate": 1.8004351880634134e-05, "loss": 0.3559, "step": 3210 }, { "epoch": 1.0, "eval_FN": 108, "eval_FP": 438, "eval_TN": 5400, "eval_TP": 487, "eval_accuracy": 0.9151251360174102, "eval_f1": 0.6407894736842105, "eval_loss": 0.3440640866756439, "eval_precision": 0.5264864864864864, "eval_recall": 0.8184873949579832, "eval_runtime": 44.4145, "eval_samples_per_second": 144.84, "eval_steps_per_second": 9.074, "step": 3217 }, { "epoch": 1.0, "learning_rate": 1.799813490829966e-05, "loss": 0.3172, "step": 3220 }, { "epoch": 1.0, "learning_rate": 1.7991917935965186e-05, "loss": 0.4144, "step": 3230 }, { "epoch": 1.01, "learning_rate": 1.798570096363071e-05, "loss": 0.3745, "step": 3240 }, { "epoch": 1.01, "learning_rate": 1.797948399129624e-05, "loss": 0.2266, "step": 3250 }, { "epoch": 1.01, "learning_rate": 1.7973267018961767e-05, "loss": 0.0925, "step": 3260 }, { "epoch": 1.02, "learning_rate": 1.7967050046627296e-05, "loss": 0.3744, "step": 3270 }, { "epoch": 1.02, "learning_rate": 1.7960833074292822e-05, "loss": 0.7328, "step": 3280 }, { "epoch": 1.02, "learning_rate": 1.7954616101958348e-05, "loss": 0.3063, "step": 3290 }, { "epoch": 1.03, "learning_rate": 1.7948399129623874e-05, "loss": 0.271, "step": 3300 }, { "epoch": 1.03, "learning_rate": 1.79421821572894e-05, "loss": 0.4482, "step": 3310 }, { "epoch": 1.03, "learning_rate": 1.793596518495493e-05, "loss": 0.2333, "step": 3320 }, { "epoch": 1.04, "learning_rate": 1.7929748212620455e-05, "loss": 0.1601, "step": 3330 }, { "epoch": 1.04, "learning_rate": 1.792353124028598e-05, "loss": 0.38, "step": 3340 }, { "epoch": 1.04, "learning_rate": 1.7917314267951507e-05, "loss": 0.34, "step": 3350 }, { "epoch": 1.04, "learning_rate": 1.7911097295617037e-05, "loss": 0.3739, "step": 3360 }, { "epoch": 1.05, "learning_rate": 1.7904880323282563e-05, "loss": 0.4018, "step": 3370 }, { "epoch": 1.05, "learning_rate": 1.789866335094809e-05, "loss": 0.4787, "step": 3380 }, { "epoch": 1.05, "learning_rate": 1.7892446378613618e-05, "loss": 0.4167, "step": 3390 }, { "epoch": 1.06, "learning_rate": 1.7886229406279144e-05, "loss": 0.3901, "step": 3400 }, { "epoch": 1.06, "learning_rate": 1.788001243394467e-05, "loss": 0.2639, "step": 3410 }, { "epoch": 1.06, "learning_rate": 1.7873795461610196e-05, "loss": 0.2631, "step": 3420 }, { "epoch": 1.07, "learning_rate": 1.7867578489275722e-05, "loss": 0.1786, "step": 3430 }, { "epoch": 1.07, "learning_rate": 1.786136151694125e-05, "loss": 0.659, "step": 3440 }, { "epoch": 1.07, "learning_rate": 1.7855144544606777e-05, "loss": 0.434, "step": 3450 }, { "epoch": 1.08, "learning_rate": 1.7848927572272307e-05, "loss": 0.855, "step": 3460 }, { "epoch": 1.08, "learning_rate": 1.7842710599937833e-05, "loss": 0.3303, "step": 3470 }, { "epoch": 1.08, "learning_rate": 1.783649362760336e-05, "loss": 0.2157, "step": 3480 }, { "epoch": 1.08, "learning_rate": 1.7830276655268885e-05, "loss": 0.5136, "step": 3490 }, { "epoch": 1.09, "learning_rate": 1.782405968293441e-05, "loss": 0.214, "step": 3500 }, { "epoch": 1.09, "learning_rate": 1.781784271059994e-05, "loss": 0.2925, "step": 3510 }, { "epoch": 1.09, "learning_rate": 1.7811625738265466e-05, "loss": 0.5102, "step": 3520 }, { "epoch": 1.1, "learning_rate": 1.7805408765930995e-05, "loss": 0.2953, "step": 3530 }, { "epoch": 1.1, "learning_rate": 1.779919179359652e-05, "loss": 0.2864, "step": 3540 }, { "epoch": 1.1, "learning_rate": 1.7792974821262047e-05, "loss": 0.3233, "step": 3550 }, { "epoch": 1.11, "learning_rate": 1.7786757848927573e-05, "loss": 0.2368, "step": 3560 }, { "epoch": 1.11, "learning_rate": 1.77805408765931e-05, "loss": 0.6382, "step": 3570 }, { "epoch": 1.11, "learning_rate": 1.777432390425863e-05, "loss": 0.291, "step": 3580 }, { "epoch": 1.12, "learning_rate": 1.7768106931924154e-05, "loss": 0.2641, "step": 3590 }, { "epoch": 1.12, "learning_rate": 1.776188995958968e-05, "loss": 0.5688, "step": 3600 }, { "epoch": 1.12, "learning_rate": 1.7755672987255206e-05, "loss": 0.2325, "step": 3610 }, { "epoch": 1.13, "learning_rate": 1.7749456014920732e-05, "loss": 0.5077, "step": 3620 }, { "epoch": 1.13, "learning_rate": 1.7743239042586262e-05, "loss": 0.1971, "step": 3630 }, { "epoch": 1.13, "learning_rate": 1.7737022070251788e-05, "loss": 0.3807, "step": 3640 }, { "epoch": 1.13, "learning_rate": 1.7730805097917317e-05, "loss": 0.2133, "step": 3650 }, { "epoch": 1.14, "learning_rate": 1.7724588125582843e-05, "loss": 0.2158, "step": 3660 }, { "epoch": 1.14, "learning_rate": 1.771837115324837e-05, "loss": 0.3204, "step": 3670 }, { "epoch": 1.14, "learning_rate": 1.7712154180913895e-05, "loss": 0.6203, "step": 3680 }, { "epoch": 1.15, "learning_rate": 1.770593720857942e-05, "loss": 0.3697, "step": 3690 }, { "epoch": 1.15, "learning_rate": 1.769972023624495e-05, "loss": 0.344, "step": 3700 }, { "epoch": 1.15, "learning_rate": 1.7693503263910476e-05, "loss": 0.433, "step": 3710 }, { "epoch": 1.16, "learning_rate": 1.7687286291576006e-05, "loss": 0.2273, "step": 3720 }, { "epoch": 1.16, "learning_rate": 1.768106931924153e-05, "loss": 0.369, "step": 3730 }, { "epoch": 1.16, "learning_rate": 1.7674852346907058e-05, "loss": 0.159, "step": 3740 }, { "epoch": 1.17, "learning_rate": 1.7668635374572584e-05, "loss": 0.2613, "step": 3750 }, { "epoch": 1.17, "learning_rate": 1.766241840223811e-05, "loss": 0.339, "step": 3760 }, { "epoch": 1.17, "learning_rate": 1.765620142990364e-05, "loss": 0.4203, "step": 3770 }, { "epoch": 1.18, "learning_rate": 1.7649984457569165e-05, "loss": 0.7068, "step": 3780 }, { "epoch": 1.18, "learning_rate": 1.7643767485234694e-05, "loss": 0.3105, "step": 3790 }, { "epoch": 1.18, "learning_rate": 1.763755051290022e-05, "loss": 0.2884, "step": 3800 }, { "epoch": 1.18, "learning_rate": 1.7631333540565746e-05, "loss": 0.1362, "step": 3810 }, { "epoch": 1.19, "learning_rate": 1.7625116568231272e-05, "loss": 0.2243, "step": 3820 }, { "epoch": 1.19, "learning_rate": 1.7618899595896798e-05, "loss": 0.473, "step": 3830 }, { "epoch": 1.19, "learning_rate": 1.7612682623562328e-05, "loss": 0.1008, "step": 3840 }, { "epoch": 1.2, "learning_rate": 1.7606465651227853e-05, "loss": 0.2593, "step": 3850 }, { "epoch": 1.2, "learning_rate": 1.7600248678893383e-05, "loss": 0.4994, "step": 3860 }, { "epoch": 1.2, "learning_rate": 1.759403170655891e-05, "loss": 0.4298, "step": 3870 }, { "epoch": 1.21, "learning_rate": 1.7587814734224435e-05, "loss": 0.3246, "step": 3880 }, { "epoch": 1.21, "learning_rate": 1.758159776188996e-05, "loss": 0.5039, "step": 3890 }, { "epoch": 1.21, "learning_rate": 1.7575380789555487e-05, "loss": 0.1574, "step": 3900 }, { "epoch": 1.22, "learning_rate": 1.7569163817221016e-05, "loss": 0.41, "step": 3910 }, { "epoch": 1.22, "learning_rate": 1.7562946844886542e-05, "loss": 0.2697, "step": 3920 }, { "epoch": 1.22, "learning_rate": 1.7556729872552068e-05, "loss": 0.4293, "step": 3930 }, { "epoch": 1.22, "learning_rate": 1.7550512900217594e-05, "loss": 0.453, "step": 3940 }, { "epoch": 1.23, "learning_rate": 1.754429592788312e-05, "loss": 0.2419, "step": 3950 }, { "epoch": 1.23, "learning_rate": 1.753807895554865e-05, "loss": 0.727, "step": 3960 }, { "epoch": 1.23, "learning_rate": 1.7531861983214175e-05, "loss": 0.3331, "step": 3970 }, { "epoch": 1.24, "learning_rate": 1.7525645010879705e-05, "loss": 0.2573, "step": 3980 }, { "epoch": 1.24, "learning_rate": 1.751942803854523e-05, "loss": 0.5971, "step": 3990 }, { "epoch": 1.24, "learning_rate": 1.7513211066210757e-05, "loss": 0.4708, "step": 4000 }, { "epoch": 1.25, "learning_rate": 1.7506994093876283e-05, "loss": 0.3775, "step": 4010 }, { "epoch": 1.25, "learning_rate": 1.750077712154181e-05, "loss": 0.3699, "step": 4020 }, { "epoch": 1.25, "learning_rate": 1.7494560149207338e-05, "loss": 0.3123, "step": 4030 }, { "epoch": 1.26, "learning_rate": 1.7488343176872864e-05, "loss": 0.2713, "step": 4040 }, { "epoch": 1.26, "learning_rate": 1.7482126204538393e-05, "loss": 0.4257, "step": 4050 }, { "epoch": 1.26, "learning_rate": 1.747590923220392e-05, "loss": 0.1502, "step": 4060 }, { "epoch": 1.27, "learning_rate": 1.7469692259869445e-05, "loss": 0.596, "step": 4070 }, { "epoch": 1.27, "learning_rate": 1.746347528753497e-05, "loss": 0.4836, "step": 4080 }, { "epoch": 1.27, "learning_rate": 1.7457258315200497e-05, "loss": 0.3498, "step": 4090 }, { "epoch": 1.27, "learning_rate": 1.7451041342866027e-05, "loss": 0.3909, "step": 4100 }, { "epoch": 1.28, "learning_rate": 1.7444824370531552e-05, "loss": 0.2435, "step": 4110 }, { "epoch": 1.28, "learning_rate": 1.7438607398197082e-05, "loss": 0.3234, "step": 4120 }, { "epoch": 1.28, "learning_rate": 1.7432390425862608e-05, "loss": 0.52, "step": 4130 }, { "epoch": 1.29, "learning_rate": 1.7426173453528134e-05, "loss": 0.2988, "step": 4140 }, { "epoch": 1.29, "learning_rate": 1.741995648119366e-05, "loss": 0.1544, "step": 4150 }, { "epoch": 1.29, "learning_rate": 1.7413739508859186e-05, "loss": 0.2521, "step": 4160 }, { "epoch": 1.3, "learning_rate": 1.7407522536524715e-05, "loss": 0.1565, "step": 4170 }, { "epoch": 1.3, "learning_rate": 1.740130556419024e-05, "loss": 0.3987, "step": 4180 }, { "epoch": 1.3, "learning_rate": 1.7395088591855767e-05, "loss": 0.3408, "step": 4190 }, { "epoch": 1.31, "learning_rate": 1.7388871619521296e-05, "loss": 0.3279, "step": 4200 }, { "epoch": 1.31, "learning_rate": 1.7382654647186822e-05, "loss": 0.2487, "step": 4210 }, { "epoch": 1.31, "learning_rate": 1.737643767485235e-05, "loss": 0.4678, "step": 4220 }, { "epoch": 1.31, "learning_rate": 1.7370220702517874e-05, "loss": 0.6319, "step": 4230 }, { "epoch": 1.32, "learning_rate": 1.7364003730183404e-05, "loss": 0.368, "step": 4240 }, { "epoch": 1.32, "learning_rate": 1.735778675784893e-05, "loss": 0.2935, "step": 4250 }, { "epoch": 1.32, "learning_rate": 1.7351569785514456e-05, "loss": 0.4319, "step": 4260 }, { "epoch": 1.33, "learning_rate": 1.734535281317998e-05, "loss": 0.2309, "step": 4270 }, { "epoch": 1.33, "learning_rate": 1.7339135840845508e-05, "loss": 0.4977, "step": 4280 }, { "epoch": 1.33, "learning_rate": 1.7332918868511037e-05, "loss": 0.3235, "step": 4290 }, { "epoch": 1.34, "learning_rate": 1.7326701896176563e-05, "loss": 0.3343, "step": 4300 }, { "epoch": 1.34, "learning_rate": 1.7320484923842092e-05, "loss": 0.3089, "step": 4310 }, { "epoch": 1.34, "learning_rate": 1.7314267951507618e-05, "loss": 0.2759, "step": 4320 }, { "epoch": 1.35, "learning_rate": 1.7308050979173144e-05, "loss": 0.4079, "step": 4330 }, { "epoch": 1.35, "learning_rate": 1.730183400683867e-05, "loss": 0.333, "step": 4340 }, { "epoch": 1.35, "learning_rate": 1.7295617034504196e-05, "loss": 0.1861, "step": 4350 }, { "epoch": 1.36, "learning_rate": 1.7289400062169726e-05, "loss": 0.5839, "step": 4360 }, { "epoch": 1.36, "learning_rate": 1.728318308983525e-05, "loss": 0.2576, "step": 4370 }, { "epoch": 1.36, "learning_rate": 1.7276966117500777e-05, "loss": 0.2217, "step": 4380 }, { "epoch": 1.36, "learning_rate": 1.7270749145166307e-05, "loss": 0.4484, "step": 4390 }, { "epoch": 1.37, "learning_rate": 1.7264532172831833e-05, "loss": 0.2982, "step": 4400 }, { "epoch": 1.37, "learning_rate": 1.725831520049736e-05, "loss": 0.1859, "step": 4410 }, { "epoch": 1.37, "learning_rate": 1.7252098228162885e-05, "loss": 0.3999, "step": 4420 }, { "epoch": 1.38, "learning_rate": 1.7245881255828414e-05, "loss": 0.2639, "step": 4430 }, { "epoch": 1.38, "learning_rate": 1.723966428349394e-05, "loss": 0.4321, "step": 4440 }, { "epoch": 1.38, "learning_rate": 1.7233447311159466e-05, "loss": 0.331, "step": 4450 }, { "epoch": 1.39, "learning_rate": 1.7227230338824995e-05, "loss": 0.1955, "step": 4460 }, { "epoch": 1.39, "learning_rate": 1.722101336649052e-05, "loss": 0.2845, "step": 4470 }, { "epoch": 1.39, "learning_rate": 1.7214796394156047e-05, "loss": 0.3319, "step": 4480 }, { "epoch": 1.4, "learning_rate": 1.7208579421821573e-05, "loss": 0.3861, "step": 4490 }, { "epoch": 1.4, "learning_rate": 1.7202362449487103e-05, "loss": 0.3879, "step": 4500 }, { "epoch": 1.4, "learning_rate": 1.719614547715263e-05, "loss": 0.4739, "step": 4510 }, { "epoch": 1.41, "learning_rate": 1.7189928504818155e-05, "loss": 0.3034, "step": 4520 }, { "epoch": 1.41, "learning_rate": 1.718371153248368e-05, "loss": 0.4008, "step": 4530 }, { "epoch": 1.41, "learning_rate": 1.7177494560149207e-05, "loss": 0.3119, "step": 4540 }, { "epoch": 1.41, "learning_rate": 1.7171277587814736e-05, "loss": 0.5193, "step": 4550 }, { "epoch": 1.42, "learning_rate": 1.7165060615480262e-05, "loss": 0.3931, "step": 4560 }, { "epoch": 1.42, "learning_rate": 1.7158843643145788e-05, "loss": 0.2968, "step": 4570 }, { "epoch": 1.42, "learning_rate": 1.7152626670811317e-05, "loss": 0.34, "step": 4580 }, { "epoch": 1.43, "learning_rate": 1.7146409698476843e-05, "loss": 0.2618, "step": 4590 }, { "epoch": 1.43, "learning_rate": 1.714019272614237e-05, "loss": 0.1246, "step": 4600 }, { "epoch": 1.43, "learning_rate": 1.7133975753807895e-05, "loss": 0.3312, "step": 4610 }, { "epoch": 1.44, "learning_rate": 1.7127758781473425e-05, "loss": 0.3757, "step": 4620 }, { "epoch": 1.44, "learning_rate": 1.712154180913895e-05, "loss": 0.3147, "step": 4630 }, { "epoch": 1.44, "learning_rate": 1.7115324836804476e-05, "loss": 0.3723, "step": 4640 }, { "epoch": 1.45, "learning_rate": 1.7109107864470006e-05, "loss": 0.2618, "step": 4650 }, { "epoch": 1.45, "learning_rate": 1.7102890892135532e-05, "loss": 0.64, "step": 4660 }, { "epoch": 1.45, "learning_rate": 1.7096673919801058e-05, "loss": 0.2262, "step": 4670 }, { "epoch": 1.45, "learning_rate": 1.7090456947466584e-05, "loss": 0.2451, "step": 4680 }, { "epoch": 1.46, "learning_rate": 1.7084239975132113e-05, "loss": 0.4224, "step": 4690 }, { "epoch": 1.46, "learning_rate": 1.707802300279764e-05, "loss": 0.3583, "step": 4700 }, { "epoch": 1.46, "learning_rate": 1.7071806030463165e-05, "loss": 0.2406, "step": 4710 }, { "epoch": 1.47, "learning_rate": 1.7065589058128694e-05, "loss": 0.1674, "step": 4720 }, { "epoch": 1.47, "learning_rate": 1.705937208579422e-05, "loss": 0.3906, "step": 4730 }, { "epoch": 1.47, "learning_rate": 1.7053155113459746e-05, "loss": 0.9655, "step": 4740 }, { "epoch": 1.48, "learning_rate": 1.7046938141125272e-05, "loss": 0.5447, "step": 4750 }, { "epoch": 1.48, "learning_rate": 1.70407211687908e-05, "loss": 0.1972, "step": 4760 }, { "epoch": 1.48, "learning_rate": 1.7034504196456328e-05, "loss": 0.1875, "step": 4770 }, { "epoch": 1.49, "learning_rate": 1.7028287224121854e-05, "loss": 0.096, "step": 4780 }, { "epoch": 1.49, "learning_rate": 1.7022070251787383e-05, "loss": 0.5695, "step": 4790 }, { "epoch": 1.49, "learning_rate": 1.701585327945291e-05, "loss": 0.5371, "step": 4800 }, { "epoch": 1.5, "learning_rate": 1.7009636307118435e-05, "loss": 0.7072, "step": 4810 }, { "epoch": 1.5, "learning_rate": 1.700341933478396e-05, "loss": 0.2661, "step": 4820 }, { "epoch": 1.5, "learning_rate": 1.6997202362449487e-05, "loss": 0.3033, "step": 4830 }, { "epoch": 1.5, "learning_rate": 1.6990985390115016e-05, "loss": 0.3402, "step": 4840 }, { "epoch": 1.51, "learning_rate": 1.6984768417780542e-05, "loss": 0.3117, "step": 4850 }, { "epoch": 1.51, "learning_rate": 1.6978551445446068e-05, "loss": 0.3658, "step": 4860 }, { "epoch": 1.51, "learning_rate": 1.6972334473111594e-05, "loss": 0.253, "step": 4870 }, { "epoch": 1.52, "learning_rate": 1.6966117500777124e-05, "loss": 0.468, "step": 4880 }, { "epoch": 1.52, "learning_rate": 1.695990052844265e-05, "loss": 0.1942, "step": 4890 }, { "epoch": 1.52, "learning_rate": 1.6953683556108175e-05, "loss": 0.4169, "step": 4900 }, { "epoch": 1.53, "learning_rate": 1.6947466583773705e-05, "loss": 0.4781, "step": 4910 }, { "epoch": 1.53, "learning_rate": 1.694124961143923e-05, "loss": 0.3038, "step": 4920 }, { "epoch": 1.53, "learning_rate": 1.6935032639104757e-05, "loss": 0.212, "step": 4930 }, { "epoch": 1.54, "learning_rate": 1.6928815666770283e-05, "loss": 0.1975, "step": 4940 }, { "epoch": 1.54, "learning_rate": 1.692259869443581e-05, "loss": 0.2721, "step": 4950 }, { "epoch": 1.54, "learning_rate": 1.6916381722101338e-05, "loss": 0.5172, "step": 4960 }, { "epoch": 1.54, "learning_rate": 1.6910164749766864e-05, "loss": 0.4359, "step": 4970 }, { "epoch": 1.55, "learning_rate": 1.6903947777432393e-05, "loss": 0.7202, "step": 4980 }, { "epoch": 1.55, "learning_rate": 1.689773080509792e-05, "loss": 0.2148, "step": 4990 }, { "epoch": 1.55, "learning_rate": 1.6891513832763445e-05, "loss": 0.3911, "step": 5000 }, { "epoch": 1.56, "learning_rate": 1.688529686042897e-05, "loss": 0.2344, "step": 5010 }, { "epoch": 1.56, "learning_rate": 1.6879079888094497e-05, "loss": 0.2388, "step": 5020 }, { "epoch": 1.56, "learning_rate": 1.6872862915760027e-05, "loss": 0.1307, "step": 5030 }, { "epoch": 1.57, "learning_rate": 1.6866645943425553e-05, "loss": 0.3176, "step": 5040 }, { "epoch": 1.57, "learning_rate": 1.6860428971091082e-05, "loss": 0.4418, "step": 5050 }, { "epoch": 1.57, "learning_rate": 1.6854211998756608e-05, "loss": 0.767, "step": 5060 }, { "epoch": 1.58, "learning_rate": 1.6847995026422134e-05, "loss": 0.7543, "step": 5070 }, { "epoch": 1.58, "learning_rate": 1.684177805408766e-05, "loss": 0.2955, "step": 5080 }, { "epoch": 1.58, "learning_rate": 1.6835561081753186e-05, "loss": 0.3047, "step": 5090 }, { "epoch": 1.59, "learning_rate": 1.6829344109418715e-05, "loss": 0.1824, "step": 5100 }, { "epoch": 1.59, "learning_rate": 1.682312713708424e-05, "loss": 0.4497, "step": 5110 }, { "epoch": 1.59, "learning_rate": 1.681691016474977e-05, "loss": 0.3614, "step": 5120 }, { "epoch": 1.59, "learning_rate": 1.6810693192415297e-05, "loss": 0.489, "step": 5130 }, { "epoch": 1.6, "learning_rate": 1.6804476220080823e-05, "loss": 0.3513, "step": 5140 }, { "epoch": 1.6, "learning_rate": 1.679825924774635e-05, "loss": 0.5847, "step": 5150 }, { "epoch": 1.6, "learning_rate": 1.6792042275411874e-05, "loss": 0.5013, "step": 5160 }, { "epoch": 1.61, "learning_rate": 1.6785825303077404e-05, "loss": 0.1718, "step": 5170 }, { "epoch": 1.61, "learning_rate": 1.677960833074293e-05, "loss": 0.4753, "step": 5180 }, { "epoch": 1.61, "learning_rate": 1.6773391358408456e-05, "loss": 0.2089, "step": 5190 }, { "epoch": 1.62, "learning_rate": 1.6767174386073982e-05, "loss": 0.2199, "step": 5200 }, { "epoch": 1.62, "learning_rate": 1.6760957413739508e-05, "loss": 0.4999, "step": 5210 }, { "epoch": 1.62, "learning_rate": 1.6754740441405037e-05, "loss": 0.1977, "step": 5220 }, { "epoch": 1.63, "learning_rate": 1.6748523469070563e-05, "loss": 0.4475, "step": 5230 }, { "epoch": 1.63, "learning_rate": 1.6742306496736092e-05, "loss": 0.5593, "step": 5240 }, { "epoch": 1.63, "learning_rate": 1.673608952440162e-05, "loss": 0.404, "step": 5250 }, { "epoch": 1.64, "learning_rate": 1.6729872552067144e-05, "loss": 0.2078, "step": 5260 }, { "epoch": 1.64, "learning_rate": 1.672365557973267e-05, "loss": 0.321, "step": 5270 }, { "epoch": 1.64, "learning_rate": 1.6717438607398196e-05, "loss": 0.5228, "step": 5280 }, { "epoch": 1.64, "learning_rate": 1.6711221635063726e-05, "loss": 0.6111, "step": 5290 }, { "epoch": 1.65, "learning_rate": 1.670500466272925e-05, "loss": 0.176, "step": 5300 }, { "epoch": 1.65, "learning_rate": 1.669878769039478e-05, "loss": 0.1933, "step": 5310 }, { "epoch": 1.65, "learning_rate": 1.6692570718060307e-05, "loss": 0.2059, "step": 5320 }, { "epoch": 1.66, "learning_rate": 1.6686353745725833e-05, "loss": 0.5611, "step": 5330 }, { "epoch": 1.66, "learning_rate": 1.668013677339136e-05, "loss": 0.332, "step": 5340 }, { "epoch": 1.66, "learning_rate": 1.6673919801056885e-05, "loss": 0.2727, "step": 5350 }, { "epoch": 1.67, "learning_rate": 1.6667702828722414e-05, "loss": 0.5182, "step": 5360 }, { "epoch": 1.67, "learning_rate": 1.666148585638794e-05, "loss": 0.441, "step": 5370 }, { "epoch": 1.67, "learning_rate": 1.665526888405347e-05, "loss": 0.3166, "step": 5380 }, { "epoch": 1.68, "learning_rate": 1.6649051911718996e-05, "loss": 0.2898, "step": 5390 }, { "epoch": 1.68, "learning_rate": 1.664283493938452e-05, "loss": 0.0949, "step": 5400 }, { "epoch": 1.68, "learning_rate": 1.6636617967050048e-05, "loss": 0.2398, "step": 5410 }, { "epoch": 1.68, "learning_rate": 1.6630400994715573e-05, "loss": 0.5997, "step": 5420 }, { "epoch": 1.69, "learning_rate": 1.6624184022381103e-05, "loss": 0.4768, "step": 5430 }, { "epoch": 1.69, "learning_rate": 1.661796705004663e-05, "loss": 0.653, "step": 5440 }, { "epoch": 1.69, "learning_rate": 1.6611750077712158e-05, "loss": 0.248, "step": 5450 }, { "epoch": 1.7, "learning_rate": 1.6605533105377684e-05, "loss": 0.3154, "step": 5460 }, { "epoch": 1.7, "learning_rate": 1.6599316133043207e-05, "loss": 0.3417, "step": 5470 }, { "epoch": 1.7, "learning_rate": 1.6593099160708736e-05, "loss": 0.1435, "step": 5480 }, { "epoch": 1.71, "learning_rate": 1.6586882188374262e-05, "loss": 0.401, "step": 5490 }, { "epoch": 1.71, "learning_rate": 1.658066521603979e-05, "loss": 0.7739, "step": 5500 }, { "epoch": 1.71, "learning_rate": 1.6574448243705317e-05, "loss": 0.4643, "step": 5510 }, { "epoch": 1.72, "learning_rate": 1.6568231271370843e-05, "loss": 0.2474, "step": 5520 }, { "epoch": 1.72, "learning_rate": 1.656201429903637e-05, "loss": 0.2742, "step": 5530 }, { "epoch": 1.72, "learning_rate": 1.6555797326701895e-05, "loss": 0.1744, "step": 5540 }, { "epoch": 1.73, "learning_rate": 1.6549580354367425e-05, "loss": 0.4731, "step": 5550 }, { "epoch": 1.73, "learning_rate": 1.654336338203295e-05, "loss": 0.2722, "step": 5560 }, { "epoch": 1.73, "learning_rate": 1.653714640969848e-05, "loss": 0.374, "step": 5570 }, { "epoch": 1.73, "learning_rate": 1.6530929437364006e-05, "loss": 0.4593, "step": 5580 }, { "epoch": 1.74, "learning_rate": 1.6524712465029532e-05, "loss": 0.2849, "step": 5590 }, { "epoch": 1.74, "learning_rate": 1.6518495492695058e-05, "loss": 0.1076, "step": 5600 }, { "epoch": 1.74, "learning_rate": 1.6512278520360584e-05, "loss": 0.2344, "step": 5610 }, { "epoch": 1.75, "learning_rate": 1.6506061548026113e-05, "loss": 0.279, "step": 5620 }, { "epoch": 1.75, "learning_rate": 1.649984457569164e-05, "loss": 0.3346, "step": 5630 }, { "epoch": 1.75, "learning_rate": 1.649362760335717e-05, "loss": 0.4858, "step": 5640 }, { "epoch": 1.76, "learning_rate": 1.6487410631022695e-05, "loss": 0.3394, "step": 5650 }, { "epoch": 1.76, "learning_rate": 1.648119365868822e-05, "loss": 0.1388, "step": 5660 }, { "epoch": 1.76, "learning_rate": 1.6474976686353747e-05, "loss": 0.5027, "step": 5670 }, { "epoch": 1.77, "learning_rate": 1.6468759714019272e-05, "loss": 0.5911, "step": 5680 }, { "epoch": 1.77, "learning_rate": 1.6462542741684802e-05, "loss": 0.3904, "step": 5690 }, { "epoch": 1.77, "learning_rate": 1.6456325769350328e-05, "loss": 0.223, "step": 5700 }, { "epoch": 1.77, "learning_rate": 1.6450108797015857e-05, "loss": 0.2265, "step": 5710 }, { "epoch": 1.78, "learning_rate": 1.6443891824681383e-05, "loss": 0.249, "step": 5720 }, { "epoch": 1.78, "learning_rate": 1.643767485234691e-05, "loss": 0.4434, "step": 5730 }, { "epoch": 1.78, "learning_rate": 1.6431457880012435e-05, "loss": 0.0947, "step": 5740 }, { "epoch": 1.79, "learning_rate": 1.642524090767796e-05, "loss": 0.5192, "step": 5750 }, { "epoch": 1.79, "learning_rate": 1.641902393534349e-05, "loss": 0.435, "step": 5760 }, { "epoch": 1.79, "learning_rate": 1.6412806963009016e-05, "loss": 0.2428, "step": 5770 }, { "epoch": 1.8, "learning_rate": 1.6406589990674542e-05, "loss": 0.4591, "step": 5780 }, { "epoch": 1.8, "learning_rate": 1.640037301834007e-05, "loss": 0.5817, "step": 5790 }, { "epoch": 1.8, "learning_rate": 1.6394156046005594e-05, "loss": 0.4646, "step": 5800 }, { "epoch": 1.81, "learning_rate": 1.6387939073671124e-05, "loss": 0.2491, "step": 5810 }, { "epoch": 1.81, "learning_rate": 1.638172210133665e-05, "loss": 0.5547, "step": 5820 }, { "epoch": 1.81, "learning_rate": 1.637550512900218e-05, "loss": 0.2285, "step": 5830 }, { "epoch": 1.82, "learning_rate": 1.6369288156667705e-05, "loss": 0.3101, "step": 5840 }, { "epoch": 1.82, "learning_rate": 1.636307118433323e-05, "loss": 0.2474, "step": 5850 }, { "epoch": 1.82, "learning_rate": 1.6356854211998757e-05, "loss": 0.3656, "step": 5860 }, { "epoch": 1.82, "learning_rate": 1.6350637239664283e-05, "loss": 0.3136, "step": 5870 }, { "epoch": 1.83, "learning_rate": 1.6344420267329812e-05, "loss": 0.6278, "step": 5880 }, { "epoch": 1.83, "learning_rate": 1.6338203294995338e-05, "loss": 0.2112, "step": 5890 }, { "epoch": 1.83, "learning_rate": 1.6331986322660868e-05, "loss": 0.3855, "step": 5900 }, { "epoch": 1.84, "learning_rate": 1.6325769350326394e-05, "loss": 0.4408, "step": 5910 }, { "epoch": 1.84, "learning_rate": 1.631955237799192e-05, "loss": 0.2703, "step": 5920 }, { "epoch": 1.84, "learning_rate": 1.6313335405657446e-05, "loss": 0.2687, "step": 5930 }, { "epoch": 1.85, "learning_rate": 1.630711843332297e-05, "loss": 0.2213, "step": 5940 }, { "epoch": 1.85, "learning_rate": 1.63009014609885e-05, "loss": 0.358, "step": 5950 }, { "epoch": 1.85, "learning_rate": 1.6294684488654027e-05, "loss": 0.5565, "step": 5960 }, { "epoch": 1.86, "learning_rate": 1.6288467516319553e-05, "loss": 0.4539, "step": 5970 }, { "epoch": 1.86, "learning_rate": 1.6282250543985082e-05, "loss": 0.482, "step": 5980 }, { "epoch": 1.86, "learning_rate": 1.6276033571650608e-05, "loss": 0.3592, "step": 5990 }, { "epoch": 1.87, "learning_rate": 1.6269816599316134e-05, "loss": 0.3426, "step": 6000 }, { "epoch": 1.87, "learning_rate": 1.626359962698166e-05, "loss": 0.202, "step": 6010 }, { "epoch": 1.87, "learning_rate": 1.625738265464719e-05, "loss": 0.1354, "step": 6020 }, { "epoch": 1.87, "learning_rate": 1.6251165682312715e-05, "loss": 0.4201, "step": 6030 }, { "epoch": 1.88, "learning_rate": 1.624494870997824e-05, "loss": 0.276, "step": 6040 }, { "epoch": 1.88, "learning_rate": 1.623873173764377e-05, "loss": 0.2912, "step": 6050 }, { "epoch": 1.88, "learning_rate": 1.6232514765309297e-05, "loss": 0.3827, "step": 6060 }, { "epoch": 1.89, "learning_rate": 1.6226297792974823e-05, "loss": 0.2343, "step": 6070 }, { "epoch": 1.89, "learning_rate": 1.622008082064035e-05, "loss": 0.3081, "step": 6080 }, { "epoch": 1.89, "learning_rate": 1.6213863848305878e-05, "loss": 0.2955, "step": 6090 }, { "epoch": 1.9, "learning_rate": 1.6207646875971404e-05, "loss": 0.3015, "step": 6100 }, { "epoch": 1.9, "learning_rate": 1.620142990363693e-05, "loss": 0.3354, "step": 6110 }, { "epoch": 1.9, "learning_rate": 1.6195212931302456e-05, "loss": 0.228, "step": 6120 }, { "epoch": 1.91, "learning_rate": 1.6188995958967982e-05, "loss": 0.4827, "step": 6130 }, { "epoch": 1.91, "learning_rate": 1.618277898663351e-05, "loss": 0.4419, "step": 6140 }, { "epoch": 1.91, "learning_rate": 1.6176562014299037e-05, "loss": 0.346, "step": 6150 }, { "epoch": 1.91, "learning_rate": 1.6170345041964563e-05, "loss": 0.4654, "step": 6160 }, { "epoch": 1.92, "learning_rate": 1.6164128069630093e-05, "loss": 0.2568, "step": 6170 }, { "epoch": 1.92, "learning_rate": 1.615791109729562e-05, "loss": 0.5036, "step": 6180 }, { "epoch": 1.92, "learning_rate": 1.6151694124961145e-05, "loss": 0.6418, "step": 6190 }, { "epoch": 1.93, "learning_rate": 1.614547715262667e-05, "loss": 0.2579, "step": 6200 }, { "epoch": 1.93, "learning_rate": 1.61392601802922e-05, "loss": 0.157, "step": 6210 }, { "epoch": 1.93, "learning_rate": 1.6133043207957726e-05, "loss": 0.4442, "step": 6220 }, { "epoch": 1.94, "learning_rate": 1.6126826235623252e-05, "loss": 0.6774, "step": 6230 }, { "epoch": 1.94, "learning_rate": 1.612060926328878e-05, "loss": 0.2862, "step": 6240 }, { "epoch": 1.94, "learning_rate": 1.6114392290954307e-05, "loss": 0.4529, "step": 6250 }, { "epoch": 1.95, "learning_rate": 1.6108175318619833e-05, "loss": 0.662, "step": 6260 }, { "epoch": 1.95, "learning_rate": 1.610195834628536e-05, "loss": 0.2948, "step": 6270 }, { "epoch": 1.95, "learning_rate": 1.609574137395089e-05, "loss": 0.3119, "step": 6280 }, { "epoch": 1.96, "learning_rate": 1.6089524401616414e-05, "loss": 0.2582, "step": 6290 }, { "epoch": 1.96, "learning_rate": 1.608330742928194e-05, "loss": 0.335, "step": 6300 }, { "epoch": 1.96, "learning_rate": 1.607709045694747e-05, "loss": 0.2673, "step": 6310 }, { "epoch": 1.96, "learning_rate": 1.6070873484612996e-05, "loss": 0.2459, "step": 6320 }, { "epoch": 1.97, "learning_rate": 1.6064656512278522e-05, "loss": 0.0646, "step": 6330 }, { "epoch": 1.97, "learning_rate": 1.6058439539944048e-05, "loss": 0.2593, "step": 6340 }, { "epoch": 1.97, "learning_rate": 1.6052222567609574e-05, "loss": 0.2062, "step": 6350 }, { "epoch": 1.98, "learning_rate": 1.6046005595275103e-05, "loss": 0.2524, "step": 6360 }, { "epoch": 1.98, "learning_rate": 1.603978862294063e-05, "loss": 0.4636, "step": 6370 }, { "epoch": 1.98, "learning_rate": 1.603357165060616e-05, "loss": 0.0851, "step": 6380 }, { "epoch": 1.99, "learning_rate": 1.6027354678271684e-05, "loss": 0.3375, "step": 6390 }, { "epoch": 1.99, "learning_rate": 1.602113770593721e-05, "loss": 0.5345, "step": 6400 }, { "epoch": 1.99, "learning_rate": 1.6014920733602736e-05, "loss": 0.5729, "step": 6410 }, { "epoch": 2.0, "learning_rate": 1.6008703761268262e-05, "loss": 0.2201, "step": 6420 }, { "epoch": 2.0, "learning_rate": 1.600248678893379e-05, "loss": 0.6726, "step": 6430 }, { "epoch": 2.0, "eval_FN": 133, "eval_FP": 324, "eval_TN": 5514, "eval_TP": 462, "eval_accuracy": 0.92896004974351, "eval_f1": 0.6690803765387401, "eval_loss": 0.40895432233810425, "eval_precision": 0.5877862595419847, "eval_recall": 0.7764705882352941, "eval_runtime": 44.4077, "eval_samples_per_second": 144.862, "eval_steps_per_second": 9.075, "step": 6434 }, { "epoch": 2.0, "learning_rate": 1.5996269816599318e-05, "loss": 0.1813, "step": 6440 }, { "epoch": 2.0, "learning_rate": 1.5990052844264844e-05, "loss": 0.5944, "step": 6450 }, { "epoch": 2.01, "learning_rate": 1.598383587193037e-05, "loss": 0.267, "step": 6460 }, { "epoch": 2.01, "learning_rate": 1.59776188995959e-05, "loss": 0.2258, "step": 6470 }, { "epoch": 2.01, "learning_rate": 1.5971401927261425e-05, "loss": 0.3102, "step": 6480 }, { "epoch": 2.02, "learning_rate": 1.596518495492695e-05, "loss": 0.4381, "step": 6490 }, { "epoch": 2.02, "learning_rate": 1.595896798259248e-05, "loss": 0.264, "step": 6500 }, { "epoch": 2.02, "learning_rate": 1.5952751010258006e-05, "loss": 0.5395, "step": 6510 }, { "epoch": 2.03, "learning_rate": 1.5946534037923532e-05, "loss": 0.2059, "step": 6520 }, { "epoch": 2.03, "learning_rate": 1.5940317065589058e-05, "loss": 0.2362, "step": 6530 }, { "epoch": 2.03, "learning_rate": 1.5934100093254584e-05, "loss": 0.1631, "step": 6540 }, { "epoch": 2.04, "learning_rate": 1.5927883120920113e-05, "loss": 0.4465, "step": 6550 }, { "epoch": 2.04, "learning_rate": 1.592166614858564e-05, "loss": 0.2475, "step": 6560 }, { "epoch": 2.04, "learning_rate": 1.591544917625117e-05, "loss": 0.0983, "step": 6570 }, { "epoch": 2.05, "learning_rate": 1.5909232203916695e-05, "loss": 0.3697, "step": 6580 }, { "epoch": 2.05, "learning_rate": 1.590301523158222e-05, "loss": 0.71, "step": 6590 }, { "epoch": 2.05, "learning_rate": 1.5896798259247747e-05, "loss": 0.2819, "step": 6600 }, { "epoch": 2.05, "learning_rate": 1.5890581286913273e-05, "loss": 0.1553, "step": 6610 }, { "epoch": 2.06, "learning_rate": 1.5884364314578802e-05, "loss": 0.2079, "step": 6620 }, { "epoch": 2.06, "learning_rate": 1.5878147342244328e-05, "loss": 0.7133, "step": 6630 }, { "epoch": 2.06, "learning_rate": 1.5871930369909857e-05, "loss": 0.3886, "step": 6640 }, { "epoch": 2.07, "learning_rate": 1.5865713397575383e-05, "loss": 0.2433, "step": 6650 }, { "epoch": 2.07, "learning_rate": 1.585949642524091e-05, "loss": 0.2231, "step": 6660 }, { "epoch": 2.07, "learning_rate": 1.5853279452906435e-05, "loss": 0.0896, "step": 6670 }, { "epoch": 2.08, "learning_rate": 1.584706248057196e-05, "loss": 0.2665, "step": 6680 }, { "epoch": 2.08, "learning_rate": 1.584084550823749e-05, "loss": 0.2281, "step": 6690 }, { "epoch": 2.08, "learning_rate": 1.5834628535903017e-05, "loss": 0.1765, "step": 6700 }, { "epoch": 2.09, "learning_rate": 1.5828411563568546e-05, "loss": 0.2732, "step": 6710 }, { "epoch": 2.09, "learning_rate": 1.582219459123407e-05, "loss": 0.1995, "step": 6720 }, { "epoch": 2.09, "learning_rate": 1.5815977618899594e-05, "loss": 0.1698, "step": 6730 }, { "epoch": 2.1, "learning_rate": 1.5809760646565124e-05, "loss": 0.2018, "step": 6740 }, { "epoch": 2.1, "learning_rate": 1.580354367423065e-05, "loss": 0.3361, "step": 6750 }, { "epoch": 2.1, "learning_rate": 1.579732670189618e-05, "loss": 0.2453, "step": 6760 }, { "epoch": 2.1, "learning_rate": 1.5791109729561705e-05, "loss": 0.2961, "step": 6770 }, { "epoch": 2.11, "learning_rate": 1.578489275722723e-05, "loss": 0.4528, "step": 6780 }, { "epoch": 2.11, "learning_rate": 1.5778675784892757e-05, "loss": 0.4045, "step": 6790 }, { "epoch": 2.11, "learning_rate": 1.5772458812558283e-05, "loss": 0.2962, "step": 6800 }, { "epoch": 2.12, "learning_rate": 1.5766241840223812e-05, "loss": 0.1578, "step": 6810 }, { "epoch": 2.12, "learning_rate": 1.576002486788934e-05, "loss": 0.248, "step": 6820 }, { "epoch": 2.12, "learning_rate": 1.5753807895554868e-05, "loss": 0.3148, "step": 6830 }, { "epoch": 2.13, "learning_rate": 1.5747590923220394e-05, "loss": 0.0852, "step": 6840 }, { "epoch": 2.13, "learning_rate": 1.574137395088592e-05, "loss": 0.195, "step": 6850 }, { "epoch": 2.13, "learning_rate": 1.5735156978551446e-05, "loss": 0.4466, "step": 6860 }, { "epoch": 2.14, "learning_rate": 1.572894000621697e-05, "loss": 0.2638, "step": 6870 }, { "epoch": 2.14, "learning_rate": 1.57227230338825e-05, "loss": 0.3344, "step": 6880 }, { "epoch": 2.14, "learning_rate": 1.5716506061548027e-05, "loss": 0.3095, "step": 6890 }, { "epoch": 2.14, "learning_rate": 1.5710289089213556e-05, "loss": 0.3434, "step": 6900 }, { "epoch": 2.15, "learning_rate": 1.5704072116879082e-05, "loss": 0.4114, "step": 6910 }, { "epoch": 2.15, "learning_rate": 1.5697855144544608e-05, "loss": 0.3126, "step": 6920 }, { "epoch": 2.15, "learning_rate": 1.5691638172210134e-05, "loss": 0.2724, "step": 6930 }, { "epoch": 2.16, "learning_rate": 1.568542119987566e-05, "loss": 0.1149, "step": 6940 }, { "epoch": 2.16, "learning_rate": 1.567920422754119e-05, "loss": 0.2532, "step": 6950 }, { "epoch": 2.16, "learning_rate": 1.5672987255206716e-05, "loss": 0.3052, "step": 6960 }, { "epoch": 2.17, "learning_rate": 1.5666770282872245e-05, "loss": 0.4397, "step": 6970 }, { "epoch": 2.17, "learning_rate": 1.566055331053777e-05, "loss": 0.2309, "step": 6980 }, { "epoch": 2.17, "learning_rate": 1.5654336338203297e-05, "loss": 0.1893, "step": 6990 }, { "epoch": 2.18, "learning_rate": 1.5648119365868823e-05, "loss": 0.2045, "step": 7000 }, { "epoch": 2.18, "learning_rate": 1.564190239353435e-05, "loss": 0.3222, "step": 7010 }, { "epoch": 2.18, "learning_rate": 1.5635685421199878e-05, "loss": 0.039, "step": 7020 }, { "epoch": 2.19, "learning_rate": 1.5629468448865404e-05, "loss": 0.2519, "step": 7030 }, { "epoch": 2.19, "learning_rate": 1.562325147653093e-05, "loss": 0.5661, "step": 7040 }, { "epoch": 2.19, "learning_rate": 1.5617034504196456e-05, "loss": 0.298, "step": 7050 }, { "epoch": 2.19, "learning_rate": 1.5610817531861982e-05, "loss": 0.2031, "step": 7060 }, { "epoch": 2.2, "learning_rate": 1.560460055952751e-05, "loss": 0.1769, "step": 7070 }, { "epoch": 2.2, "learning_rate": 1.5598383587193037e-05, "loss": 0.1547, "step": 7080 }, { "epoch": 2.2, "learning_rate": 1.5592166614858567e-05, "loss": 0.1361, "step": 7090 }, { "epoch": 2.21, "learning_rate": 1.5585949642524093e-05, "loss": 0.2575, "step": 7100 }, { "epoch": 2.21, "learning_rate": 1.557973267018962e-05, "loss": 0.3379, "step": 7110 }, { "epoch": 2.21, "learning_rate": 1.5573515697855145e-05, "loss": 0.2577, "step": 7120 }, { "epoch": 2.22, "learning_rate": 1.556729872552067e-05, "loss": 0.217, "step": 7130 }, { "epoch": 2.22, "learning_rate": 1.55610817531862e-05, "loss": 0.3033, "step": 7140 }, { "epoch": 2.22, "learning_rate": 1.5554864780851726e-05, "loss": 0.1165, "step": 7150 }, { "epoch": 2.23, "learning_rate": 1.5548647808517255e-05, "loss": 0.224, "step": 7160 }, { "epoch": 2.23, "learning_rate": 1.554243083618278e-05, "loss": 0.4493, "step": 7170 }, { "epoch": 2.23, "learning_rate": 1.5536213863848307e-05, "loss": 0.2279, "step": 7180 }, { "epoch": 2.24, "learning_rate": 1.5529996891513833e-05, "loss": 0.093, "step": 7190 }, { "epoch": 2.24, "learning_rate": 1.552377991917936e-05, "loss": 0.2848, "step": 7200 }, { "epoch": 2.24, "learning_rate": 1.551756294684489e-05, "loss": 0.4756, "step": 7210 }, { "epoch": 2.24, "learning_rate": 1.5511345974510415e-05, "loss": 0.1243, "step": 7220 }, { "epoch": 2.25, "learning_rate": 1.5505129002175944e-05, "loss": 0.3083, "step": 7230 }, { "epoch": 2.25, "learning_rate": 1.549891202984147e-05, "loss": 0.2611, "step": 7240 }, { "epoch": 2.25, "learning_rate": 1.5492695057506996e-05, "loss": 0.2058, "step": 7250 }, { "epoch": 2.26, "learning_rate": 1.5486478085172522e-05, "loss": 0.1417, "step": 7260 }, { "epoch": 2.26, "learning_rate": 1.5480261112838048e-05, "loss": 0.1198, "step": 7270 }, { "epoch": 2.26, "learning_rate": 1.5474044140503577e-05, "loss": 0.251, "step": 7280 }, { "epoch": 2.27, "learning_rate": 1.5467827168169103e-05, "loss": 0.398, "step": 7290 }, { "epoch": 2.27, "learning_rate": 1.546161019583463e-05, "loss": 0.0548, "step": 7300 }, { "epoch": 2.27, "learning_rate": 1.545539322350016e-05, "loss": 0.1959, "step": 7310 }, { "epoch": 2.28, "learning_rate": 1.5449176251165684e-05, "loss": 0.2703, "step": 7320 }, { "epoch": 2.28, "learning_rate": 1.544295927883121e-05, "loss": 0.3302, "step": 7330 }, { "epoch": 2.28, "learning_rate": 1.5436742306496736e-05, "loss": 0.1934, "step": 7340 }, { "epoch": 2.28, "learning_rate": 1.5430525334162266e-05, "loss": 0.3504, "step": 7350 }, { "epoch": 2.29, "learning_rate": 1.5424308361827792e-05, "loss": 0.4924, "step": 7360 }, { "epoch": 2.29, "learning_rate": 1.5418091389493318e-05, "loss": 0.6057, "step": 7370 }, { "epoch": 2.29, "learning_rate": 1.5411874417158844e-05, "loss": 0.5131, "step": 7380 }, { "epoch": 2.3, "learning_rate": 1.540565744482437e-05, "loss": 0.3049, "step": 7390 }, { "epoch": 2.3, "learning_rate": 1.53994404724899e-05, "loss": 0.2102, "step": 7400 }, { "epoch": 2.3, "learning_rate": 1.5393223500155425e-05, "loss": 0.3606, "step": 7410 }, { "epoch": 2.31, "learning_rate": 1.5387006527820954e-05, "loss": 0.3305, "step": 7420 }, { "epoch": 2.31, "learning_rate": 1.538078955548648e-05, "loss": 0.3558, "step": 7430 }, { "epoch": 2.31, "learning_rate": 1.5374572583152006e-05, "loss": 0.2355, "step": 7440 }, { "epoch": 2.32, "learning_rate": 1.5368355610817532e-05, "loss": 0.2332, "step": 7450 }, { "epoch": 2.32, "learning_rate": 1.5362138638483058e-05, "loss": 0.2847, "step": 7460 }, { "epoch": 2.32, "learning_rate": 1.5355921666148588e-05, "loss": 0.5049, "step": 7470 }, { "epoch": 2.33, "learning_rate": 1.5349704693814114e-05, "loss": 0.5843, "step": 7480 }, { "epoch": 2.33, "learning_rate": 1.534348772147964e-05, "loss": 0.105, "step": 7490 }, { "epoch": 2.33, "learning_rate": 1.533727074914517e-05, "loss": 0.4037, "step": 7500 }, { "epoch": 2.33, "learning_rate": 1.5331053776810695e-05, "loss": 0.0934, "step": 7510 }, { "epoch": 2.34, "learning_rate": 1.532483680447622e-05, "loss": 0.3423, "step": 7520 }, { "epoch": 2.34, "learning_rate": 1.5318619832141747e-05, "loss": 0.3233, "step": 7530 }, { "epoch": 2.34, "learning_rate": 1.5312402859807276e-05, "loss": 0.5253, "step": 7540 }, { "epoch": 2.35, "learning_rate": 1.5306185887472802e-05, "loss": 0.1933, "step": 7550 }, { "epoch": 2.35, "learning_rate": 1.5299968915138328e-05, "loss": 0.202, "step": 7560 }, { "epoch": 2.35, "learning_rate": 1.5293751942803858e-05, "loss": 0.1321, "step": 7570 }, { "epoch": 2.36, "learning_rate": 1.5287534970469383e-05, "loss": 0.3381, "step": 7580 }, { "epoch": 2.36, "learning_rate": 1.528131799813491e-05, "loss": 0.4625, "step": 7590 }, { "epoch": 2.36, "learning_rate": 1.5275101025800435e-05, "loss": 0.1714, "step": 7600 }, { "epoch": 2.37, "learning_rate": 1.5268884053465965e-05, "loss": 0.1265, "step": 7610 }, { "epoch": 2.37, "learning_rate": 1.526266708113149e-05, "loss": 0.3061, "step": 7620 }, { "epoch": 2.37, "learning_rate": 1.5256450108797017e-05, "loss": 0.1245, "step": 7630 }, { "epoch": 2.37, "learning_rate": 1.5250233136462544e-05, "loss": 0.3408, "step": 7640 }, { "epoch": 2.38, "learning_rate": 1.524401616412807e-05, "loss": 0.2358, "step": 7650 }, { "epoch": 2.38, "learning_rate": 1.5237799191793598e-05, "loss": 0.393, "step": 7660 }, { "epoch": 2.38, "learning_rate": 1.5231582219459124e-05, "loss": 0.2108, "step": 7670 }, { "epoch": 2.39, "learning_rate": 1.522536524712465e-05, "loss": 0.2221, "step": 7680 }, { "epoch": 2.39, "learning_rate": 1.521914827479018e-05, "loss": 0.2362, "step": 7690 }, { "epoch": 2.39, "learning_rate": 1.5212931302455705e-05, "loss": 0.2211, "step": 7700 }, { "epoch": 2.4, "learning_rate": 1.5206714330121233e-05, "loss": 0.1073, "step": 7710 }, { "epoch": 2.4, "learning_rate": 1.5200497357786759e-05, "loss": 0.1739, "step": 7720 }, { "epoch": 2.4, "learning_rate": 1.5194280385452287e-05, "loss": 0.2722, "step": 7730 }, { "epoch": 2.41, "learning_rate": 1.5188063413117813e-05, "loss": 0.256, "step": 7740 }, { "epoch": 2.41, "learning_rate": 1.5181846440783339e-05, "loss": 0.0998, "step": 7750 }, { "epoch": 2.41, "learning_rate": 1.5175629468448866e-05, "loss": 0.3814, "step": 7760 }, { "epoch": 2.42, "learning_rate": 1.5169412496114392e-05, "loss": 0.5612, "step": 7770 }, { "epoch": 2.42, "learning_rate": 1.5163195523779922e-05, "loss": 0.2457, "step": 7780 }, { "epoch": 2.42, "learning_rate": 1.5156978551445448e-05, "loss": 0.1488, "step": 7790 }, { "epoch": 2.42, "learning_rate": 1.5150761579110975e-05, "loss": 0.2677, "step": 7800 }, { "epoch": 2.43, "learning_rate": 1.5144544606776501e-05, "loss": 0.2545, "step": 7810 }, { "epoch": 2.43, "learning_rate": 1.5138327634442027e-05, "loss": 0.1366, "step": 7820 }, { "epoch": 2.43, "learning_rate": 1.5132110662107555e-05, "loss": 0.4033, "step": 7830 }, { "epoch": 2.44, "learning_rate": 1.512589368977308e-05, "loss": 0.1241, "step": 7840 }, { "epoch": 2.44, "learning_rate": 1.511967671743861e-05, "loss": 0.3157, "step": 7850 }, { "epoch": 2.44, "learning_rate": 1.5113459745104136e-05, "loss": 0.2749, "step": 7860 }, { "epoch": 2.45, "learning_rate": 1.5107242772769662e-05, "loss": 0.1839, "step": 7870 }, { "epoch": 2.45, "learning_rate": 1.510102580043519e-05, "loss": 0.1424, "step": 7880 }, { "epoch": 2.45, "learning_rate": 1.5094808828100716e-05, "loss": 0.3723, "step": 7890 }, { "epoch": 2.46, "learning_rate": 1.5088591855766243e-05, "loss": 0.2355, "step": 7900 }, { "epoch": 2.46, "learning_rate": 1.508237488343177e-05, "loss": 0.5928, "step": 7910 }, { "epoch": 2.46, "learning_rate": 1.5076157911097297e-05, "loss": 0.418, "step": 7920 }, { "epoch": 2.47, "learning_rate": 1.5069940938762823e-05, "loss": 0.1652, "step": 7930 }, { "epoch": 2.47, "learning_rate": 1.5063723966428349e-05, "loss": 0.4914, "step": 7940 }, { "epoch": 2.47, "learning_rate": 1.5057506994093878e-05, "loss": 0.1748, "step": 7950 }, { "epoch": 2.47, "learning_rate": 1.5051290021759404e-05, "loss": 0.193, "step": 7960 }, { "epoch": 2.48, "learning_rate": 1.5045073049424932e-05, "loss": 0.1842, "step": 7970 }, { "epoch": 2.48, "learning_rate": 1.5038856077090458e-05, "loss": 0.2649, "step": 7980 }, { "epoch": 2.48, "learning_rate": 1.5032639104755986e-05, "loss": 0.6074, "step": 7990 }, { "epoch": 2.49, "learning_rate": 1.5026422132421512e-05, "loss": 0.4128, "step": 8000 }, { "epoch": 2.49, "learning_rate": 1.5020205160087038e-05, "loss": 0.293, "step": 8010 }, { "epoch": 2.49, "learning_rate": 1.5013988187752567e-05, "loss": 0.046, "step": 8020 }, { "epoch": 2.5, "learning_rate": 1.5007771215418093e-05, "loss": 0.2649, "step": 8030 }, { "epoch": 2.5, "learning_rate": 1.500155424308362e-05, "loss": 0.1624, "step": 8040 }, { "epoch": 2.5, "learning_rate": 1.4995337270749147e-05, "loss": 0.4065, "step": 8050 }, { "epoch": 2.51, "learning_rate": 1.4989120298414673e-05, "loss": 0.2417, "step": 8060 }, { "epoch": 2.51, "learning_rate": 1.49829033260802e-05, "loss": 0.3557, "step": 8070 }, { "epoch": 2.51, "learning_rate": 1.4976686353745726e-05, "loss": 0.1446, "step": 8080 }, { "epoch": 2.51, "learning_rate": 1.4970469381411254e-05, "loss": 0.0976, "step": 8090 }, { "epoch": 2.52, "learning_rate": 1.496425240907678e-05, "loss": 0.1058, "step": 8100 }, { "epoch": 2.52, "learning_rate": 1.495803543674231e-05, "loss": 0.4058, "step": 8110 }, { "epoch": 2.52, "learning_rate": 1.4951818464407835e-05, "loss": 0.1838, "step": 8120 }, { "epoch": 2.53, "learning_rate": 1.4945601492073361e-05, "loss": 0.206, "step": 8130 }, { "epoch": 2.53, "learning_rate": 1.4939384519738889e-05, "loss": 0.2976, "step": 8140 }, { "epoch": 2.53, "learning_rate": 1.4933167547404415e-05, "loss": 0.3093, "step": 8150 }, { "epoch": 2.54, "learning_rate": 1.4926950575069942e-05, "loss": 0.3014, "step": 8160 }, { "epoch": 2.54, "learning_rate": 1.4920733602735468e-05, "loss": 0.2016, "step": 8170 }, { "epoch": 2.54, "learning_rate": 1.4914516630400998e-05, "loss": 0.1678, "step": 8180 }, { "epoch": 2.55, "learning_rate": 1.4908299658066524e-05, "loss": 0.3634, "step": 8190 }, { "epoch": 2.55, "learning_rate": 1.490208268573205e-05, "loss": 0.2211, "step": 8200 }, { "epoch": 2.55, "learning_rate": 1.4895865713397577e-05, "loss": 0.5769, "step": 8210 }, { "epoch": 2.56, "learning_rate": 1.4889648741063103e-05, "loss": 0.173, "step": 8220 }, { "epoch": 2.56, "learning_rate": 1.4883431768728631e-05, "loss": 0.4775, "step": 8230 }, { "epoch": 2.56, "learning_rate": 1.4877214796394157e-05, "loss": 0.2532, "step": 8240 }, { "epoch": 2.56, "learning_rate": 1.4870997824059683e-05, "loss": 0.2096, "step": 8250 }, { "epoch": 2.57, "learning_rate": 1.486478085172521e-05, "loss": 0.7672, "step": 8260 }, { "epoch": 2.57, "learning_rate": 1.4858563879390737e-05, "loss": 0.1929, "step": 8270 }, { "epoch": 2.57, "learning_rate": 1.4852346907056266e-05, "loss": 0.2444, "step": 8280 }, { "epoch": 2.58, "learning_rate": 1.4846129934721792e-05, "loss": 0.2461, "step": 8290 }, { "epoch": 2.58, "learning_rate": 1.483991296238732e-05, "loss": 0.4851, "step": 8300 }, { "epoch": 2.58, "learning_rate": 1.4833695990052846e-05, "loss": 0.2893, "step": 8310 }, { "epoch": 2.59, "learning_rate": 1.4827479017718372e-05, "loss": 0.3579, "step": 8320 }, { "epoch": 2.59, "learning_rate": 1.48212620453839e-05, "loss": 0.3941, "step": 8330 }, { "epoch": 2.59, "learning_rate": 1.4815045073049425e-05, "loss": 0.2144, "step": 8340 }, { "epoch": 2.6, "learning_rate": 1.4808828100714955e-05, "loss": 0.3253, "step": 8350 }, { "epoch": 2.6, "learning_rate": 1.4802611128380479e-05, "loss": 0.2182, "step": 8360 }, { "epoch": 2.6, "learning_rate": 1.4796394156046008e-05, "loss": 0.2215, "step": 8370 }, { "epoch": 2.6, "learning_rate": 1.4790177183711534e-05, "loss": 0.2123, "step": 8380 }, { "epoch": 2.61, "learning_rate": 1.478396021137706e-05, "loss": 0.1878, "step": 8390 }, { "epoch": 2.61, "learning_rate": 1.4777743239042588e-05, "loss": 0.2628, "step": 8400 }, { "epoch": 2.61, "learning_rate": 1.4771526266708114e-05, "loss": 0.2355, "step": 8410 }, { "epoch": 2.62, "learning_rate": 1.4765309294373641e-05, "loss": 0.0169, "step": 8420 }, { "epoch": 2.62, "learning_rate": 1.4759092322039167e-05, "loss": 0.515, "step": 8430 }, { "epoch": 2.62, "learning_rate": 1.4752875349704693e-05, "loss": 0.1051, "step": 8440 }, { "epoch": 2.63, "learning_rate": 1.4746658377370223e-05, "loss": 0.3318, "step": 8450 }, { "epoch": 2.63, "learning_rate": 1.4740441405035749e-05, "loss": 0.2981, "step": 8460 }, { "epoch": 2.63, "learning_rate": 1.4734224432701276e-05, "loss": 0.5206, "step": 8470 }, { "epoch": 2.64, "learning_rate": 1.4728007460366802e-05, "loss": 0.389, "step": 8480 }, { "epoch": 2.64, "learning_rate": 1.472179048803233e-05, "loss": 0.5688, "step": 8490 }, { "epoch": 2.64, "learning_rate": 1.4715573515697856e-05, "loss": 0.4642, "step": 8500 }, { "epoch": 2.65, "learning_rate": 1.4709356543363382e-05, "loss": 0.2803, "step": 8510 }, { "epoch": 2.65, "learning_rate": 1.470313957102891e-05, "loss": 0.201, "step": 8520 }, { "epoch": 2.65, "learning_rate": 1.4696922598694436e-05, "loss": 0.1867, "step": 8530 }, { "epoch": 2.65, "learning_rate": 1.4690705626359965e-05, "loss": 0.1225, "step": 8540 }, { "epoch": 2.66, "learning_rate": 1.4684488654025491e-05, "loss": 0.2769, "step": 8550 }, { "epoch": 2.66, "learning_rate": 1.4678271681691019e-05, "loss": 0.5945, "step": 8560 }, { "epoch": 2.66, "learning_rate": 1.4672054709356545e-05, "loss": 0.2414, "step": 8570 }, { "epoch": 2.67, "learning_rate": 1.466583773702207e-05, "loss": 0.4923, "step": 8580 }, { "epoch": 2.67, "learning_rate": 1.4659620764687598e-05, "loss": 0.7125, "step": 8590 }, { "epoch": 2.67, "learning_rate": 1.4653403792353124e-05, "loss": 0.3957, "step": 8600 }, { "epoch": 2.68, "learning_rate": 1.4647186820018654e-05, "loss": 0.393, "step": 8610 }, { "epoch": 2.68, "learning_rate": 1.464096984768418e-05, "loss": 0.2089, "step": 8620 }, { "epoch": 2.68, "learning_rate": 1.4634752875349705e-05, "loss": 0.1464, "step": 8630 }, { "epoch": 2.69, "learning_rate": 1.4628535903015233e-05, "loss": 0.4087, "step": 8640 }, { "epoch": 2.69, "learning_rate": 1.4622318930680759e-05, "loss": 0.1155, "step": 8650 }, { "epoch": 2.69, "learning_rate": 1.4616101958346287e-05, "loss": 0.4024, "step": 8660 }, { "epoch": 2.7, "learning_rate": 1.4609884986011813e-05, "loss": 0.5327, "step": 8670 }, { "epoch": 2.7, "learning_rate": 1.460366801367734e-05, "loss": 0.0675, "step": 8680 }, { "epoch": 2.7, "learning_rate": 1.4597451041342866e-05, "loss": 0.2656, "step": 8690 }, { "epoch": 2.7, "learning_rate": 1.4591234069008392e-05, "loss": 0.2497, "step": 8700 }, { "epoch": 2.71, "learning_rate": 1.4585017096673922e-05, "loss": 0.1034, "step": 8710 }, { "epoch": 2.71, "learning_rate": 1.4578800124339448e-05, "loss": 0.2965, "step": 8720 }, { "epoch": 2.71, "learning_rate": 1.4572583152004975e-05, "loss": 0.6297, "step": 8730 }, { "epoch": 2.72, "learning_rate": 1.4566366179670501e-05, "loss": 0.1339, "step": 8740 }, { "epoch": 2.72, "learning_rate": 1.4560149207336029e-05, "loss": 0.2689, "step": 8750 }, { "epoch": 2.72, "learning_rate": 1.4553932235001555e-05, "loss": 0.1944, "step": 8760 }, { "epoch": 2.73, "learning_rate": 1.4547715262667081e-05, "loss": 0.4323, "step": 8770 }, { "epoch": 2.73, "learning_rate": 1.454149829033261e-05, "loss": 0.3554, "step": 8780 }, { "epoch": 2.73, "learning_rate": 1.4535281317998136e-05, "loss": 0.5084, "step": 8790 }, { "epoch": 2.74, "learning_rate": 1.4529064345663664e-05, "loss": 0.0596, "step": 8800 }, { "epoch": 2.74, "learning_rate": 1.452284737332919e-05, "loss": 0.3594, "step": 8810 }, { "epoch": 2.74, "learning_rate": 1.4516630400994716e-05, "loss": 0.2719, "step": 8820 }, { "epoch": 2.74, "learning_rate": 1.4510413428660244e-05, "loss": 0.336, "step": 8830 }, { "epoch": 2.75, "learning_rate": 1.450419645632577e-05, "loss": 0.078, "step": 8840 }, { "epoch": 2.75, "learning_rate": 1.4497979483991297e-05, "loss": 0.3088, "step": 8850 }, { "epoch": 2.75, "learning_rate": 1.4491762511656823e-05, "loss": 0.4609, "step": 8860 }, { "epoch": 2.76, "learning_rate": 1.4485545539322353e-05, "loss": 0.2405, "step": 8870 }, { "epoch": 2.76, "learning_rate": 1.4479328566987879e-05, "loss": 0.187, "step": 8880 }, { "epoch": 2.76, "learning_rate": 1.4473111594653404e-05, "loss": 0.0885, "step": 8890 }, { "epoch": 2.77, "learning_rate": 1.4466894622318932e-05, "loss": 0.232, "step": 8900 }, { "epoch": 2.77, "learning_rate": 1.4460677649984458e-05, "loss": 0.4734, "step": 8910 }, { "epoch": 2.77, "learning_rate": 1.4454460677649986e-05, "loss": 0.5779, "step": 8920 }, { "epoch": 2.78, "learning_rate": 1.4448243705315512e-05, "loss": 0.2045, "step": 8930 }, { "epoch": 2.78, "learning_rate": 1.4442026732981041e-05, "loss": 0.1847, "step": 8940 }, { "epoch": 2.78, "learning_rate": 1.4435809760646567e-05, "loss": 0.3292, "step": 8950 }, { "epoch": 2.79, "learning_rate": 1.4429592788312093e-05, "loss": 0.5205, "step": 8960 }, { "epoch": 2.79, "learning_rate": 1.442337581597762e-05, "loss": 0.5215, "step": 8970 }, { "epoch": 2.79, "learning_rate": 1.4417158843643147e-05, "loss": 0.3223, "step": 8980 }, { "epoch": 2.79, "learning_rate": 1.4410941871308674e-05, "loss": 0.1323, "step": 8990 }, { "epoch": 2.8, "learning_rate": 1.44047248989742e-05, "loss": 0.1586, "step": 9000 }, { "epoch": 2.8, "learning_rate": 1.4398507926639728e-05, "loss": 0.1128, "step": 9010 }, { "epoch": 2.8, "learning_rate": 1.4392290954305254e-05, "loss": 0.3051, "step": 9020 }, { "epoch": 2.81, "learning_rate": 1.438607398197078e-05, "loss": 0.5403, "step": 9030 }, { "epoch": 2.81, "learning_rate": 1.437985700963631e-05, "loss": 0.4989, "step": 9040 }, { "epoch": 2.81, "learning_rate": 1.4373640037301835e-05, "loss": 0.3093, "step": 9050 }, { "epoch": 2.82, "learning_rate": 1.4367423064967363e-05, "loss": 0.1946, "step": 9060 }, { "epoch": 2.82, "learning_rate": 1.4361206092632889e-05, "loss": 0.396, "step": 9070 }, { "epoch": 2.82, "learning_rate": 1.4354989120298415e-05, "loss": 0.0911, "step": 9080 }, { "epoch": 2.83, "learning_rate": 1.4348772147963943e-05, "loss": 0.1242, "step": 9090 }, { "epoch": 2.83, "learning_rate": 1.4342555175629469e-05, "loss": 0.5148, "step": 9100 }, { "epoch": 2.83, "learning_rate": 1.4336338203294998e-05, "loss": 0.4557, "step": 9110 }, { "epoch": 2.83, "learning_rate": 1.4330121230960524e-05, "loss": 0.2277, "step": 9120 }, { "epoch": 2.84, "learning_rate": 1.4323904258626052e-05, "loss": 0.1162, "step": 9130 }, { "epoch": 2.84, "learning_rate": 1.4317687286291578e-05, "loss": 0.3561, "step": 9140 }, { "epoch": 2.84, "learning_rate": 1.4311470313957103e-05, "loss": 0.1412, "step": 9150 }, { "epoch": 2.85, "learning_rate": 1.4305253341622631e-05, "loss": 0.0861, "step": 9160 }, { "epoch": 2.85, "learning_rate": 1.4299036369288157e-05, "loss": 0.6723, "step": 9170 }, { "epoch": 2.85, "learning_rate": 1.4292819396953685e-05, "loss": 0.3171, "step": 9180 }, { "epoch": 2.86, "learning_rate": 1.428660242461921e-05, "loss": 0.2048, "step": 9190 }, { "epoch": 2.86, "learning_rate": 1.428038545228474e-05, "loss": 0.2901, "step": 9200 }, { "epoch": 2.86, "learning_rate": 1.4274168479950266e-05, "loss": 0.2042, "step": 9210 }, { "epoch": 2.87, "learning_rate": 1.4267951507615792e-05, "loss": 0.2675, "step": 9220 }, { "epoch": 2.87, "learning_rate": 1.426173453528132e-05, "loss": 0.482, "step": 9230 }, { "epoch": 2.87, "learning_rate": 1.4255517562946846e-05, "loss": 0.3418, "step": 9240 }, { "epoch": 2.88, "learning_rate": 1.4249300590612373e-05, "loss": 0.3779, "step": 9250 }, { "epoch": 2.88, "learning_rate": 1.42430836182779e-05, "loss": 0.3139, "step": 9260 }, { "epoch": 2.88, "learning_rate": 1.4236866645943425e-05, "loss": 0.3661, "step": 9270 }, { "epoch": 2.88, "learning_rate": 1.4230649673608955e-05, "loss": 0.0626, "step": 9280 }, { "epoch": 2.89, "learning_rate": 1.422443270127448e-05, "loss": 0.1333, "step": 9290 }, { "epoch": 2.89, "learning_rate": 1.4218215728940008e-05, "loss": 0.918, "step": 9300 }, { "epoch": 2.89, "learning_rate": 1.4211998756605534e-05, "loss": 0.2612, "step": 9310 }, { "epoch": 2.9, "learning_rate": 1.4205781784271062e-05, "loss": 0.1778, "step": 9320 }, { "epoch": 2.9, "learning_rate": 1.4199564811936588e-05, "loss": 0.3315, "step": 9330 }, { "epoch": 2.9, "learning_rate": 1.4193347839602114e-05, "loss": 0.1285, "step": 9340 }, { "epoch": 2.91, "learning_rate": 1.4187130867267642e-05, "loss": 0.1464, "step": 9350 }, { "epoch": 2.91, "learning_rate": 1.4180913894933168e-05, "loss": 0.1455, "step": 9360 }, { "epoch": 2.91, "learning_rate": 1.4174696922598697e-05, "loss": 0.263, "step": 9370 }, { "epoch": 2.92, "learning_rate": 1.4168479950264223e-05, "loss": 0.0908, "step": 9380 }, { "epoch": 2.92, "learning_rate": 1.416226297792975e-05, "loss": 0.8039, "step": 9390 }, { "epoch": 2.92, "learning_rate": 1.4156046005595277e-05, "loss": 0.3856, "step": 9400 }, { "epoch": 2.93, "learning_rate": 1.4149829033260803e-05, "loss": 0.5976, "step": 9410 }, { "epoch": 2.93, "learning_rate": 1.414361206092633e-05, "loss": 0.3404, "step": 9420 }, { "epoch": 2.93, "learning_rate": 1.4137395088591856e-05, "loss": 0.3789, "step": 9430 }, { "epoch": 2.93, "learning_rate": 1.4131178116257386e-05, "loss": 0.3083, "step": 9440 }, { "epoch": 2.94, "learning_rate": 1.412496114392291e-05, "loss": 0.4234, "step": 9450 }, { "epoch": 2.94, "learning_rate": 1.4118744171588436e-05, "loss": 0.3099, "step": 9460 }, { "epoch": 2.94, "learning_rate": 1.4112527199253965e-05, "loss": 0.3293, "step": 9470 }, { "epoch": 2.95, "learning_rate": 1.4106310226919491e-05, "loss": 0.2896, "step": 9480 }, { "epoch": 2.95, "learning_rate": 1.4100093254585019e-05, "loss": 0.2453, "step": 9490 }, { "epoch": 2.95, "learning_rate": 1.4093876282250545e-05, "loss": 0.209, "step": 9500 }, { "epoch": 2.96, "learning_rate": 1.4087659309916072e-05, "loss": 0.4094, "step": 9510 }, { "epoch": 2.96, "learning_rate": 1.4081442337581598e-05, "loss": 0.1609, "step": 9520 }, { "epoch": 2.96, "learning_rate": 1.4075225365247124e-05, "loss": 0.4668, "step": 9530 }, { "epoch": 2.97, "learning_rate": 1.4069008392912654e-05, "loss": 0.4928, "step": 9540 }, { "epoch": 2.97, "learning_rate": 1.406279142057818e-05, "loss": 0.3651, "step": 9550 }, { "epoch": 2.97, "learning_rate": 1.4056574448243707e-05, "loss": 0.2327, "step": 9560 }, { "epoch": 2.97, "learning_rate": 1.4050357475909233e-05, "loss": 0.2613, "step": 9570 }, { "epoch": 2.98, "learning_rate": 1.4044140503574761e-05, "loss": 0.3626, "step": 9580 }, { "epoch": 2.98, "learning_rate": 1.4037923531240287e-05, "loss": 0.163, "step": 9590 }, { "epoch": 2.98, "learning_rate": 1.4031706558905813e-05, "loss": 0.2331, "step": 9600 }, { "epoch": 2.99, "learning_rate": 1.402548958657134e-05, "loss": 0.2725, "step": 9610 }, { "epoch": 2.99, "learning_rate": 1.4019272614236867e-05, "loss": 0.3495, "step": 9620 }, { "epoch": 2.99, "learning_rate": 1.4013055641902396e-05, "loss": 0.1099, "step": 9630 }, { "epoch": 3.0, "learning_rate": 1.4006838669567922e-05, "loss": 0.4501, "step": 9640 }, { "epoch": 3.0, "learning_rate": 1.4000621697233448e-05, "loss": 0.1059, "step": 9650 }, { "epoch": 3.0, "eval_FN": 144, "eval_FP": 283, "eval_TN": 5555, "eval_TP": 451, "eval_accuracy": 0.9336235038084875, "eval_f1": 0.6787057938299473, "eval_loss": 0.47829240560531616, "eval_precision": 0.614441416893733, "eval_recall": 0.7579831932773109, "eval_runtime": 44.395, "eval_samples_per_second": 144.904, "eval_steps_per_second": 9.078, "step": 9651 }, { "epoch": 3.0, "learning_rate": 1.3994404724898976e-05, "loss": 0.1695, "step": 9660 }, { "epoch": 3.01, "learning_rate": 1.3988187752564502e-05, "loss": 0.0399, "step": 9670 }, { "epoch": 3.01, "learning_rate": 1.398197078023003e-05, "loss": 0.4499, "step": 9680 }, { "epoch": 3.01, "learning_rate": 1.3975753807895555e-05, "loss": 0.101, "step": 9690 }, { "epoch": 3.02, "learning_rate": 1.3969536835561085e-05, "loss": 0.3483, "step": 9700 }, { "epoch": 3.02, "learning_rate": 1.396331986322661e-05, "loss": 0.2001, "step": 9710 }, { "epoch": 3.02, "learning_rate": 1.3957102890892136e-05, "loss": 0.2035, "step": 9720 }, { "epoch": 3.02, "learning_rate": 1.3950885918557664e-05, "loss": 0.1652, "step": 9730 }, { "epoch": 3.03, "learning_rate": 1.394466894622319e-05, "loss": 0.2004, "step": 9740 }, { "epoch": 3.03, "learning_rate": 1.3938451973888718e-05, "loss": 0.1753, "step": 9750 }, { "epoch": 3.03, "learning_rate": 1.3932235001554244e-05, "loss": 0.1319, "step": 9760 }, { "epoch": 3.04, "learning_rate": 1.3926018029219771e-05, "loss": 0.0901, "step": 9770 }, { "epoch": 3.04, "learning_rate": 1.3919801056885297e-05, "loss": 0.1735, "step": 9780 }, { "epoch": 3.04, "learning_rate": 1.3913584084550823e-05, "loss": 0.2184, "step": 9790 }, { "epoch": 3.05, "learning_rate": 1.3907367112216353e-05, "loss": 0.18, "step": 9800 }, { "epoch": 3.05, "learning_rate": 1.3901150139881879e-05, "loss": 0.3369, "step": 9810 }, { "epoch": 3.05, "learning_rate": 1.3894933167547406e-05, "loss": 0.193, "step": 9820 }, { "epoch": 3.06, "learning_rate": 1.3888716195212932e-05, "loss": 0.0918, "step": 9830 }, { "epoch": 3.06, "learning_rate": 1.3882499222878458e-05, "loss": 0.1725, "step": 9840 }, { "epoch": 3.06, "learning_rate": 1.3876282250543986e-05, "loss": 0.1871, "step": 9850 }, { "epoch": 3.06, "learning_rate": 1.3870065278209512e-05, "loss": 0.6128, "step": 9860 }, { "epoch": 3.07, "learning_rate": 1.3863848305875041e-05, "loss": 0.1781, "step": 9870 }, { "epoch": 3.07, "learning_rate": 1.3857631333540567e-05, "loss": 0.1296, "step": 9880 }, { "epoch": 3.07, "learning_rate": 1.3851414361206095e-05, "loss": 0.1552, "step": 9890 }, { "epoch": 3.08, "learning_rate": 1.3845197388871621e-05, "loss": 0.1407, "step": 9900 }, { "epoch": 3.08, "learning_rate": 1.3838980416537147e-05, "loss": 0.2887, "step": 9910 }, { "epoch": 3.08, "learning_rate": 1.3832763444202675e-05, "loss": 0.2668, "step": 9920 }, { "epoch": 3.09, "learning_rate": 1.38265464718682e-05, "loss": 0.2638, "step": 9930 }, { "epoch": 3.09, "learning_rate": 1.3820329499533728e-05, "loss": 0.1416, "step": 9940 }, { "epoch": 3.09, "learning_rate": 1.3814112527199254e-05, "loss": 0.2272, "step": 9950 }, { "epoch": 3.1, "learning_rate": 1.3807895554864784e-05, "loss": 0.2862, "step": 9960 }, { "epoch": 3.1, "learning_rate": 1.380167858253031e-05, "loss": 0.2373, "step": 9970 }, { "epoch": 3.1, "learning_rate": 1.3795461610195835e-05, "loss": 0.2433, "step": 9980 }, { "epoch": 3.11, "learning_rate": 1.3789244637861363e-05, "loss": 0.3189, "step": 9990 }, { "epoch": 3.11, "learning_rate": 1.3783027665526889e-05, "loss": 0.0804, "step": 10000 }, { "epoch": 3.11, "learning_rate": 1.3776810693192417e-05, "loss": 0.2988, "step": 10010 }, { "epoch": 3.11, "learning_rate": 1.3770593720857943e-05, "loss": 0.4699, "step": 10020 }, { "epoch": 3.12, "learning_rate": 1.3764376748523469e-05, "loss": 0.3478, "step": 10030 }, { "epoch": 3.12, "learning_rate": 1.3758159776188998e-05, "loss": 0.0632, "step": 10040 }, { "epoch": 3.12, "learning_rate": 1.3751942803854524e-05, "loss": 0.052, "step": 10050 }, { "epoch": 3.13, "learning_rate": 1.3745725831520052e-05, "loss": 0.3428, "step": 10060 }, { "epoch": 3.13, "learning_rate": 1.3739508859185578e-05, "loss": 0.0544, "step": 10070 }, { "epoch": 3.13, "learning_rate": 1.3733291886851105e-05, "loss": 0.2726, "step": 10080 }, { "epoch": 3.14, "learning_rate": 1.3727074914516631e-05, "loss": 0.3439, "step": 10090 }, { "epoch": 3.14, "learning_rate": 1.3720857942182157e-05, "loss": 0.1644, "step": 10100 }, { "epoch": 3.14, "learning_rate": 1.3714640969847685e-05, "loss": 0.467, "step": 10110 }, { "epoch": 3.15, "learning_rate": 1.3708423997513211e-05, "loss": 0.4198, "step": 10120 }, { "epoch": 3.15, "learning_rate": 1.370220702517874e-05, "loss": 0.1596, "step": 10130 }, { "epoch": 3.15, "learning_rate": 1.3695990052844266e-05, "loss": 0.3598, "step": 10140 }, { "epoch": 3.16, "learning_rate": 1.3689773080509794e-05, "loss": 0.3038, "step": 10150 }, { "epoch": 3.16, "learning_rate": 1.368355610817532e-05, "loss": 0.1258, "step": 10160 }, { "epoch": 3.16, "learning_rate": 1.3677339135840846e-05, "loss": 0.1037, "step": 10170 }, { "epoch": 3.16, "learning_rate": 1.3671122163506374e-05, "loss": 0.1501, "step": 10180 }, { "epoch": 3.17, "learning_rate": 1.36649051911719e-05, "loss": 0.3464, "step": 10190 }, { "epoch": 3.17, "learning_rate": 1.3658688218837429e-05, "loss": 0.0708, "step": 10200 }, { "epoch": 3.17, "learning_rate": 1.3652471246502955e-05, "loss": 0.1894, "step": 10210 }, { "epoch": 3.18, "learning_rate": 1.364625427416848e-05, "loss": 0.1111, "step": 10220 }, { "epoch": 3.18, "learning_rate": 1.3640037301834008e-05, "loss": 0.2831, "step": 10230 }, { "epoch": 3.18, "learning_rate": 1.3633820329499534e-05, "loss": 0.401, "step": 10240 }, { "epoch": 3.19, "learning_rate": 1.3627603357165062e-05, "loss": 0.1406, "step": 10250 }, { "epoch": 3.19, "learning_rate": 1.3621386384830588e-05, "loss": 0.4137, "step": 10260 }, { "epoch": 3.19, "learning_rate": 1.3615169412496116e-05, "loss": 0.3772, "step": 10270 }, { "epoch": 3.2, "learning_rate": 1.3608952440161642e-05, "loss": 0.0874, "step": 10280 }, { "epoch": 3.2, "learning_rate": 1.3602735467827168e-05, "loss": 0.1234, "step": 10290 }, { "epoch": 3.2, "learning_rate": 1.3596518495492697e-05, "loss": 0.129, "step": 10300 }, { "epoch": 3.2, "learning_rate": 1.3590301523158223e-05, "loss": 0.3009, "step": 10310 }, { "epoch": 3.21, "learning_rate": 1.358408455082375e-05, "loss": 0.186, "step": 10320 }, { "epoch": 3.21, "learning_rate": 1.3577867578489277e-05, "loss": 0.299, "step": 10330 }, { "epoch": 3.21, "learning_rate": 1.3571650606154804e-05, "loss": 0.0743, "step": 10340 }, { "epoch": 3.22, "learning_rate": 1.356543363382033e-05, "loss": 0.1053, "step": 10350 }, { "epoch": 3.22, "learning_rate": 1.3559216661485856e-05, "loss": 0.2852, "step": 10360 }, { "epoch": 3.22, "learning_rate": 1.3552999689151386e-05, "loss": 0.1107, "step": 10370 }, { "epoch": 3.23, "learning_rate": 1.3546782716816912e-05, "loss": 0.4914, "step": 10380 }, { "epoch": 3.23, "learning_rate": 1.354056574448244e-05, "loss": 0.3408, "step": 10390 }, { "epoch": 3.23, "learning_rate": 1.3534348772147965e-05, "loss": 0.1656, "step": 10400 }, { "epoch": 3.24, "learning_rate": 1.3528131799813491e-05, "loss": 0.3507, "step": 10410 }, { "epoch": 3.24, "learning_rate": 1.3521914827479019e-05, "loss": 0.1035, "step": 10420 }, { "epoch": 3.24, "learning_rate": 1.3515697855144545e-05, "loss": 0.4505, "step": 10430 }, { "epoch": 3.25, "learning_rate": 1.3509480882810073e-05, "loss": 0.1121, "step": 10440 }, { "epoch": 3.25, "learning_rate": 1.3503263910475599e-05, "loss": 0.2113, "step": 10450 }, { "epoch": 3.25, "learning_rate": 1.3497046938141128e-05, "loss": 0.0716, "step": 10460 }, { "epoch": 3.25, "learning_rate": 1.3490829965806654e-05, "loss": 0.0947, "step": 10470 }, { "epoch": 3.26, "learning_rate": 1.348461299347218e-05, "loss": 0.1207, "step": 10480 }, { "epoch": 3.26, "learning_rate": 1.3478396021137708e-05, "loss": 0.2489, "step": 10490 }, { "epoch": 3.26, "learning_rate": 1.3472179048803233e-05, "loss": 0.4056, "step": 10500 }, { "epoch": 3.27, "learning_rate": 1.3465962076468761e-05, "loss": 0.2958, "step": 10510 }, { "epoch": 3.27, "learning_rate": 1.3459745104134287e-05, "loss": 0.2287, "step": 10520 }, { "epoch": 3.27, "learning_rate": 1.3453528131799816e-05, "loss": 0.23, "step": 10530 }, { "epoch": 3.28, "learning_rate": 1.3447311159465342e-05, "loss": 0.1685, "step": 10540 }, { "epoch": 3.28, "learning_rate": 1.3441094187130867e-05, "loss": 0.3205, "step": 10550 }, { "epoch": 3.28, "learning_rate": 1.3434877214796396e-05, "loss": 0.2607, "step": 10560 }, { "epoch": 3.29, "learning_rate": 1.3428660242461922e-05, "loss": 0.2542, "step": 10570 }, { "epoch": 3.29, "learning_rate": 1.342244327012745e-05, "loss": 0.427, "step": 10580 }, { "epoch": 3.29, "learning_rate": 1.3416226297792976e-05, "loss": 0.4038, "step": 10590 }, { "epoch": 3.29, "learning_rate": 1.3410009325458502e-05, "loss": 0.186, "step": 10600 }, { "epoch": 3.3, "learning_rate": 1.340379235312403e-05, "loss": 0.0288, "step": 10610 }, { "epoch": 3.3, "learning_rate": 1.3397575380789555e-05, "loss": 0.0446, "step": 10620 }, { "epoch": 3.3, "learning_rate": 1.3391358408455085e-05, "loss": 0.4573, "step": 10630 }, { "epoch": 3.31, "learning_rate": 1.338514143612061e-05, "loss": 0.2653, "step": 10640 }, { "epoch": 3.31, "learning_rate": 1.3378924463786138e-05, "loss": 0.1938, "step": 10650 }, { "epoch": 3.31, "learning_rate": 1.3372707491451664e-05, "loss": 0.0699, "step": 10660 }, { "epoch": 3.32, "learning_rate": 1.336649051911719e-05, "loss": 0.2902, "step": 10670 }, { "epoch": 3.32, "learning_rate": 1.3360273546782718e-05, "loss": 0.4206, "step": 10680 }, { "epoch": 3.32, "learning_rate": 1.3354056574448244e-05, "loss": 0.1198, "step": 10690 }, { "epoch": 3.33, "learning_rate": 1.3347839602113772e-05, "loss": 0.1313, "step": 10700 }, { "epoch": 3.33, "learning_rate": 1.3341622629779298e-05, "loss": 0.247, "step": 10710 }, { "epoch": 3.33, "learning_rate": 1.3335405657444827e-05, "loss": 0.3756, "step": 10720 }, { "epoch": 3.34, "learning_rate": 1.3329188685110353e-05, "loss": 0.203, "step": 10730 }, { "epoch": 3.34, "learning_rate": 1.3322971712775879e-05, "loss": 0.2616, "step": 10740 }, { "epoch": 3.34, "learning_rate": 1.3316754740441407e-05, "loss": 0.0738, "step": 10750 }, { "epoch": 3.34, "learning_rate": 1.3310537768106932e-05, "loss": 0.163, "step": 10760 }, { "epoch": 3.35, "learning_rate": 1.330432079577246e-05, "loss": 0.2713, "step": 10770 }, { "epoch": 3.35, "learning_rate": 1.3298103823437986e-05, "loss": 0.292, "step": 10780 }, { "epoch": 3.35, "learning_rate": 1.3291886851103512e-05, "loss": 0.1849, "step": 10790 }, { "epoch": 3.36, "learning_rate": 1.3285669878769041e-05, "loss": 0.0513, "step": 10800 }, { "epoch": 3.36, "learning_rate": 1.3279452906434567e-05, "loss": 0.4804, "step": 10810 }, { "epoch": 3.36, "learning_rate": 1.3273235934100095e-05, "loss": 0.265, "step": 10820 }, { "epoch": 3.37, "learning_rate": 1.3267018961765621e-05, "loss": 0.4705, "step": 10830 }, { "epoch": 3.37, "learning_rate": 1.3260801989431149e-05, "loss": 0.1628, "step": 10840 }, { "epoch": 3.37, "learning_rate": 1.3254585017096675e-05, "loss": 0.3126, "step": 10850 }, { "epoch": 3.38, "learning_rate": 1.32483680447622e-05, "loss": 0.0714, "step": 10860 }, { "epoch": 3.38, "learning_rate": 1.3242151072427728e-05, "loss": 0.3594, "step": 10870 }, { "epoch": 3.38, "learning_rate": 1.3235934100093254e-05, "loss": 0.2706, "step": 10880 }, { "epoch": 3.39, "learning_rate": 1.3229717127758784e-05, "loss": 0.33, "step": 10890 }, { "epoch": 3.39, "learning_rate": 1.322350015542431e-05, "loss": 0.479, "step": 10900 }, { "epoch": 3.39, "learning_rate": 1.3217283183089837e-05, "loss": 0.1239, "step": 10910 }, { "epoch": 3.39, "learning_rate": 1.3211066210755363e-05, "loss": 0.0787, "step": 10920 }, { "epoch": 3.4, "learning_rate": 1.320484923842089e-05, "loss": 0.3674, "step": 10930 }, { "epoch": 3.4, "learning_rate": 1.3198632266086417e-05, "loss": 0.3918, "step": 10940 }, { "epoch": 3.4, "learning_rate": 1.3192415293751943e-05, "loss": 0.2931, "step": 10950 }, { "epoch": 3.41, "learning_rate": 1.3186198321417472e-05, "loss": 0.325, "step": 10960 }, { "epoch": 3.41, "learning_rate": 1.3179981349082998e-05, "loss": 0.226, "step": 10970 }, { "epoch": 3.41, "learning_rate": 1.3173764376748524e-05, "loss": 0.34, "step": 10980 }, { "epoch": 3.42, "learning_rate": 1.3167547404414052e-05, "loss": 0.2898, "step": 10990 }, { "epoch": 3.42, "learning_rate": 1.3161330432079578e-05, "loss": 0.2087, "step": 11000 }, { "epoch": 3.42, "learning_rate": 1.3155113459745106e-05, "loss": 0.3827, "step": 11010 }, { "epoch": 3.43, "learning_rate": 1.3148896487410631e-05, "loss": 0.4799, "step": 11020 }, { "epoch": 3.43, "learning_rate": 1.314267951507616e-05, "loss": 0.4644, "step": 11030 }, { "epoch": 3.43, "learning_rate": 1.3136462542741685e-05, "loss": 0.101, "step": 11040 }, { "epoch": 3.43, "learning_rate": 1.3130245570407211e-05, "loss": 0.3563, "step": 11050 }, { "epoch": 3.44, "learning_rate": 1.312402859807274e-05, "loss": 0.1481, "step": 11060 }, { "epoch": 3.44, "learning_rate": 1.3117811625738266e-05, "loss": 0.1838, "step": 11070 }, { "epoch": 3.44, "learning_rate": 1.3111594653403794e-05, "loss": 0.4464, "step": 11080 }, { "epoch": 3.45, "learning_rate": 1.310537768106932e-05, "loss": 0.3611, "step": 11090 }, { "epoch": 3.45, "learning_rate": 1.3099160708734848e-05, "loss": 0.5252, "step": 11100 }, { "epoch": 3.45, "learning_rate": 1.3092943736400374e-05, "loss": 0.2721, "step": 11110 }, { "epoch": 3.46, "learning_rate": 1.30867267640659e-05, "loss": 0.1512, "step": 11120 }, { "epoch": 3.46, "learning_rate": 1.3080509791731429e-05, "loss": 0.1801, "step": 11130 }, { "epoch": 3.46, "learning_rate": 1.3074292819396955e-05, "loss": 0.1763, "step": 11140 }, { "epoch": 3.47, "learning_rate": 1.3068075847062483e-05, "loss": 0.1246, "step": 11150 }, { "epoch": 3.47, "learning_rate": 1.3061858874728009e-05, "loss": 0.628, "step": 11160 }, { "epoch": 3.47, "learning_rate": 1.3055641902393535e-05, "loss": 0.3906, "step": 11170 }, { "epoch": 3.48, "learning_rate": 1.3049424930059062e-05, "loss": 0.1695, "step": 11180 }, { "epoch": 3.48, "learning_rate": 1.3043207957724588e-05, "loss": 0.1287, "step": 11190 }, { "epoch": 3.48, "learning_rate": 1.3036990985390116e-05, "loss": 0.4652, "step": 11200 }, { "epoch": 3.48, "learning_rate": 1.3030774013055642e-05, "loss": 0.4175, "step": 11210 }, { "epoch": 3.49, "learning_rate": 1.3024557040721171e-05, "loss": 0.2244, "step": 11220 }, { "epoch": 3.49, "learning_rate": 1.3018340068386697e-05, "loss": 0.3057, "step": 11230 }, { "epoch": 3.49, "learning_rate": 1.3012123096052223e-05, "loss": 0.1368, "step": 11240 }, { "epoch": 3.5, "learning_rate": 1.3005906123717751e-05, "loss": 0.2388, "step": 11250 }, { "epoch": 3.5, "learning_rate": 1.2999689151383277e-05, "loss": 0.3094, "step": 11260 }, { "epoch": 3.5, "learning_rate": 1.2993472179048805e-05, "loss": 0.1878, "step": 11270 }, { "epoch": 3.51, "learning_rate": 1.298725520671433e-05, "loss": 0.3477, "step": 11280 }, { "epoch": 3.51, "learning_rate": 1.298103823437986e-05, "loss": 0.2089, "step": 11290 }, { "epoch": 3.51, "learning_rate": 1.2974821262045386e-05, "loss": 0.1621, "step": 11300 }, { "epoch": 3.52, "learning_rate": 1.2968604289710912e-05, "loss": 0.4381, "step": 11310 }, { "epoch": 3.52, "learning_rate": 1.296238731737644e-05, "loss": 0.1185, "step": 11320 }, { "epoch": 3.52, "learning_rate": 1.2956170345041965e-05, "loss": 0.1553, "step": 11330 }, { "epoch": 3.53, "learning_rate": 1.2949953372707493e-05, "loss": 0.4034, "step": 11340 }, { "epoch": 3.53, "learning_rate": 1.2943736400373019e-05, "loss": 0.2365, "step": 11350 }, { "epoch": 3.53, "learning_rate": 1.2937519428038545e-05, "loss": 0.2278, "step": 11360 }, { "epoch": 3.53, "learning_rate": 1.2931302455704073e-05, "loss": 0.0748, "step": 11370 }, { "epoch": 3.54, "learning_rate": 1.2925085483369599e-05, "loss": 0.5629, "step": 11380 }, { "epoch": 3.54, "learning_rate": 1.2918868511035128e-05, "loss": 0.2102, "step": 11390 }, { "epoch": 3.54, "learning_rate": 1.2912651538700654e-05, "loss": 0.1398, "step": 11400 }, { "epoch": 3.55, "learning_rate": 1.2906434566366182e-05, "loss": 0.1704, "step": 11410 }, { "epoch": 3.55, "learning_rate": 1.2900217594031708e-05, "loss": 0.3045, "step": 11420 }, { "epoch": 3.55, "learning_rate": 1.2894000621697234e-05, "loss": 0.1977, "step": 11430 }, { "epoch": 3.56, "learning_rate": 1.2887783649362761e-05, "loss": 0.1811, "step": 11440 }, { "epoch": 3.56, "learning_rate": 1.2881566677028287e-05, "loss": 0.639, "step": 11450 }, { "epoch": 3.56, "learning_rate": 1.2875349704693817e-05, "loss": 0.3437, "step": 11460 }, { "epoch": 3.57, "learning_rate": 1.2869132732359343e-05, "loss": 0.1104, "step": 11470 }, { "epoch": 3.57, "learning_rate": 1.286291576002487e-05, "loss": 0.1637, "step": 11480 }, { "epoch": 3.57, "learning_rate": 1.2856698787690396e-05, "loss": 0.1458, "step": 11490 }, { "epoch": 3.57, "learning_rate": 1.2850481815355922e-05, "loss": 0.3571, "step": 11500 }, { "epoch": 3.58, "learning_rate": 1.284426484302145e-05, "loss": 0.3072, "step": 11510 }, { "epoch": 3.58, "learning_rate": 1.2838047870686976e-05, "loss": 0.0665, "step": 11520 }, { "epoch": 3.58, "learning_rate": 1.2831830898352504e-05, "loss": 0.3456, "step": 11530 }, { "epoch": 3.59, "learning_rate": 1.282561392601803e-05, "loss": 0.3762, "step": 11540 }, { "epoch": 3.59, "learning_rate": 1.2819396953683555e-05, "loss": 0.2215, "step": 11550 }, { "epoch": 3.59, "learning_rate": 1.2813179981349085e-05, "loss": 0.1204, "step": 11560 }, { "epoch": 3.6, "learning_rate": 1.280696300901461e-05, "loss": 0.6023, "step": 11570 }, { "epoch": 3.6, "learning_rate": 1.2800746036680138e-05, "loss": 0.2735, "step": 11580 }, { "epoch": 3.6, "learning_rate": 1.2794529064345664e-05, "loss": 0.242, "step": 11590 }, { "epoch": 3.61, "learning_rate": 1.2788312092011192e-05, "loss": 0.3233, "step": 11600 }, { "epoch": 3.61, "learning_rate": 1.2782095119676718e-05, "loss": 0.3557, "step": 11610 }, { "epoch": 3.61, "learning_rate": 1.2775878147342244e-05, "loss": 0.0234, "step": 11620 }, { "epoch": 3.62, "learning_rate": 1.2769661175007773e-05, "loss": 0.1923, "step": 11630 }, { "epoch": 3.62, "learning_rate": 1.2763444202673298e-05, "loss": 0.1089, "step": 11640 }, { "epoch": 3.62, "learning_rate": 1.2757227230338827e-05, "loss": 0.0397, "step": 11650 }, { "epoch": 3.62, "learning_rate": 1.2751010258004353e-05, "loss": 0.1789, "step": 11660 }, { "epoch": 3.63, "learning_rate": 1.274479328566988e-05, "loss": 0.5434, "step": 11670 }, { "epoch": 3.63, "learning_rate": 1.2738576313335407e-05, "loss": 0.2577, "step": 11680 }, { "epoch": 3.63, "learning_rate": 1.2732359341000933e-05, "loss": 0.3615, "step": 11690 }, { "epoch": 3.64, "learning_rate": 1.272614236866646e-05, "loss": 0.3055, "step": 11700 }, { "epoch": 3.64, "learning_rate": 1.2719925396331986e-05, "loss": 0.241, "step": 11710 }, { "epoch": 3.64, "learning_rate": 1.2713708423997516e-05, "loss": 0.0669, "step": 11720 }, { "epoch": 3.65, "learning_rate": 1.2707491451663042e-05, "loss": 0.3327, "step": 11730 }, { "epoch": 3.65, "learning_rate": 1.2701274479328568e-05, "loss": 0.2171, "step": 11740 }, { "epoch": 3.65, "learning_rate": 1.2695057506994095e-05, "loss": 0.2021, "step": 11750 }, { "epoch": 3.66, "learning_rate": 1.2688840534659621e-05, "loss": 0.0428, "step": 11760 }, { "epoch": 3.66, "learning_rate": 1.2682623562325149e-05, "loss": 0.3206, "step": 11770 }, { "epoch": 3.66, "learning_rate": 1.2676406589990675e-05, "loss": 0.0889, "step": 11780 }, { "epoch": 3.66, "learning_rate": 1.2670189617656203e-05, "loss": 0.2863, "step": 11790 }, { "epoch": 3.67, "learning_rate": 1.2663972645321729e-05, "loss": 0.3675, "step": 11800 }, { "epoch": 3.67, "learning_rate": 1.2657755672987254e-05, "loss": 0.3858, "step": 11810 }, { "epoch": 3.67, "learning_rate": 1.2651538700652784e-05, "loss": 0.1112, "step": 11820 }, { "epoch": 3.68, "learning_rate": 1.264532172831831e-05, "loss": 0.1559, "step": 11830 }, { "epoch": 3.68, "learning_rate": 1.2639104755983837e-05, "loss": 0.3497, "step": 11840 }, { "epoch": 3.68, "learning_rate": 1.2632887783649363e-05, "loss": 0.1573, "step": 11850 }, { "epoch": 3.69, "learning_rate": 1.2626670811314891e-05, "loss": 0.093, "step": 11860 }, { "epoch": 3.69, "learning_rate": 1.2620453838980417e-05, "loss": 0.1684, "step": 11870 }, { "epoch": 3.69, "learning_rate": 1.2614236866645943e-05, "loss": 0.0455, "step": 11880 }, { "epoch": 3.7, "learning_rate": 1.2608019894311472e-05, "loss": 0.1253, "step": 11890 }, { "epoch": 3.7, "learning_rate": 1.2601802921976998e-05, "loss": 0.447, "step": 11900 }, { "epoch": 3.7, "learning_rate": 1.2595585949642526e-05, "loss": 0.4179, "step": 11910 }, { "epoch": 3.71, "learning_rate": 1.2589368977308052e-05, "loss": 0.3656, "step": 11920 }, { "epoch": 3.71, "learning_rate": 1.2583152004973578e-05, "loss": 0.2111, "step": 11930 }, { "epoch": 3.71, "learning_rate": 1.2576935032639106e-05, "loss": 0.1691, "step": 11940 }, { "epoch": 3.71, "learning_rate": 1.2570718060304632e-05, "loss": 0.7607, "step": 11950 }, { "epoch": 3.72, "learning_rate": 1.256450108797016e-05, "loss": 0.3015, "step": 11960 }, { "epoch": 3.72, "learning_rate": 1.2558284115635685e-05, "loss": 0.4223, "step": 11970 }, { "epoch": 3.72, "learning_rate": 1.2552067143301215e-05, "loss": 0.1905, "step": 11980 }, { "epoch": 3.73, "learning_rate": 1.254585017096674e-05, "loss": 0.1699, "step": 11990 }, { "epoch": 3.73, "learning_rate": 1.2539633198632267e-05, "loss": 0.2083, "step": 12000 }, { "epoch": 3.73, "learning_rate": 1.2533416226297794e-05, "loss": 0.0929, "step": 12010 }, { "epoch": 3.74, "learning_rate": 1.252719925396332e-05, "loss": 0.2584, "step": 12020 }, { "epoch": 3.74, "learning_rate": 1.2520982281628848e-05, "loss": 0.2039, "step": 12030 }, { "epoch": 3.74, "learning_rate": 1.2514765309294374e-05, "loss": 0.2027, "step": 12040 }, { "epoch": 3.75, "learning_rate": 1.2508548336959903e-05, "loss": 0.2366, "step": 12050 }, { "epoch": 3.75, "learning_rate": 1.250233136462543e-05, "loss": 0.5365, "step": 12060 }, { "epoch": 3.75, "learning_rate": 1.2496114392290955e-05, "loss": 0.3077, "step": 12070 }, { "epoch": 3.76, "learning_rate": 1.2489897419956483e-05, "loss": 0.2155, "step": 12080 }, { "epoch": 3.76, "learning_rate": 1.2483680447622009e-05, "loss": 0.2113, "step": 12090 }, { "epoch": 3.76, "learning_rate": 1.2477463475287536e-05, "loss": 0.2136, "step": 12100 }, { "epoch": 3.76, "learning_rate": 1.2471246502953062e-05, "loss": 0.0969, "step": 12110 }, { "epoch": 3.77, "learning_rate": 1.2465029530618588e-05, "loss": 0.2433, "step": 12120 }, { "epoch": 3.77, "learning_rate": 1.2458812558284116e-05, "loss": 0.1784, "step": 12130 }, { "epoch": 3.77, "learning_rate": 1.2452595585949642e-05, "loss": 0.2601, "step": 12140 }, { "epoch": 3.78, "learning_rate": 1.2446378613615171e-05, "loss": 0.2638, "step": 12150 }, { "epoch": 3.78, "learning_rate": 1.2440161641280697e-05, "loss": 0.048, "step": 12160 }, { "epoch": 3.78, "learning_rate": 1.2433944668946225e-05, "loss": 0.0262, "step": 12170 }, { "epoch": 3.79, "learning_rate": 1.2427727696611751e-05, "loss": 0.2895, "step": 12180 }, { "epoch": 3.79, "learning_rate": 1.2421510724277277e-05, "loss": 0.2474, "step": 12190 }, { "epoch": 3.79, "learning_rate": 1.2415293751942805e-05, "loss": 0.2303, "step": 12200 }, { "epoch": 3.8, "learning_rate": 1.240907677960833e-05, "loss": 0.1839, "step": 12210 }, { "epoch": 3.8, "learning_rate": 1.240285980727386e-05, "loss": 0.1499, "step": 12220 }, { "epoch": 3.8, "learning_rate": 1.2396642834939386e-05, "loss": 0.4019, "step": 12230 }, { "epoch": 3.8, "learning_rate": 1.2390425862604914e-05, "loss": 0.3382, "step": 12240 }, { "epoch": 3.81, "learning_rate": 1.238420889027044e-05, "loss": 0.2376, "step": 12250 }, { "epoch": 3.81, "learning_rate": 1.2377991917935966e-05, "loss": 0.3484, "step": 12260 }, { "epoch": 3.81, "learning_rate": 1.2371774945601493e-05, "loss": 0.1413, "step": 12270 }, { "epoch": 3.82, "learning_rate": 1.236555797326702e-05, "loss": 0.0398, "step": 12280 }, { "epoch": 3.82, "learning_rate": 1.2359341000932547e-05, "loss": 0.2335, "step": 12290 }, { "epoch": 3.82, "learning_rate": 1.2353124028598073e-05, "loss": 0.1055, "step": 12300 }, { "epoch": 3.83, "learning_rate": 1.2346907056263599e-05, "loss": 0.2313, "step": 12310 }, { "epoch": 3.83, "learning_rate": 1.2340690083929128e-05, "loss": 0.2399, "step": 12320 }, { "epoch": 3.83, "learning_rate": 1.2334473111594654e-05, "loss": 0.3317, "step": 12330 }, { "epoch": 3.84, "learning_rate": 1.2328256139260182e-05, "loss": 0.343, "step": 12340 }, { "epoch": 3.84, "learning_rate": 1.2322039166925708e-05, "loss": 0.6656, "step": 12350 }, { "epoch": 3.84, "learning_rate": 1.2315822194591235e-05, "loss": 0.0789, "step": 12360 }, { "epoch": 3.85, "learning_rate": 1.2309605222256761e-05, "loss": 0.2907, "step": 12370 }, { "epoch": 3.85, "learning_rate": 1.2303388249922287e-05, "loss": 0.1718, "step": 12380 }, { "epoch": 3.85, "learning_rate": 1.2297171277587817e-05, "loss": 0.4092, "step": 12390 }, { "epoch": 3.85, "learning_rate": 1.2290954305253343e-05, "loss": 0.3268, "step": 12400 }, { "epoch": 3.86, "learning_rate": 1.228473733291887e-05, "loss": 0.3505, "step": 12410 }, { "epoch": 3.86, "learning_rate": 1.2278520360584396e-05, "loss": 0.1433, "step": 12420 }, { "epoch": 3.86, "learning_rate": 1.2272303388249924e-05, "loss": 0.2909, "step": 12430 }, { "epoch": 3.87, "learning_rate": 1.226608641591545e-05, "loss": 0.0909, "step": 12440 }, { "epoch": 3.87, "learning_rate": 1.2259869443580976e-05, "loss": 0.2433, "step": 12450 }, { "epoch": 3.87, "learning_rate": 1.2253652471246504e-05, "loss": 0.1653, "step": 12460 }, { "epoch": 3.88, "learning_rate": 1.224743549891203e-05, "loss": 0.2764, "step": 12470 }, { "epoch": 3.88, "learning_rate": 1.2241218526577559e-05, "loss": 0.3052, "step": 12480 }, { "epoch": 3.88, "learning_rate": 1.2235001554243085e-05, "loss": 0.1314, "step": 12490 }, { "epoch": 3.89, "learning_rate": 1.2228784581908611e-05, "loss": 0.0864, "step": 12500 }, { "epoch": 3.89, "learning_rate": 1.2222567609574139e-05, "loss": 0.1472, "step": 12510 }, { "epoch": 3.89, "learning_rate": 1.2216350637239665e-05, "loss": 0.5788, "step": 12520 }, { "epoch": 3.89, "learning_rate": 1.2210133664905192e-05, "loss": 0.3923, "step": 12530 }, { "epoch": 3.9, "learning_rate": 1.2203916692570718e-05, "loss": 0.1374, "step": 12540 }, { "epoch": 3.9, "learning_rate": 1.2197699720236248e-05, "loss": 0.4062, "step": 12550 }, { "epoch": 3.9, "learning_rate": 1.2191482747901774e-05, "loss": 0.1282, "step": 12560 }, { "epoch": 3.91, "learning_rate": 1.21852657755673e-05, "loss": 0.1485, "step": 12570 }, { "epoch": 3.91, "learning_rate": 1.2179048803232827e-05, "loss": 0.4367, "step": 12580 }, { "epoch": 3.91, "learning_rate": 1.2172831830898353e-05, "loss": 0.4437, "step": 12590 }, { "epoch": 3.92, "learning_rate": 1.2166614858563881e-05, "loss": 0.2701, "step": 12600 }, { "epoch": 3.92, "learning_rate": 1.2160397886229407e-05, "loss": 0.0815, "step": 12610 }, { "epoch": 3.92, "learning_rate": 1.2154180913894934e-05, "loss": 0.2629, "step": 12620 }, { "epoch": 3.93, "learning_rate": 1.214796394156046e-05, "loss": 0.3021, "step": 12630 }, { "epoch": 3.93, "learning_rate": 1.2141746969225986e-05, "loss": 0.2312, "step": 12640 }, { "epoch": 3.93, "learning_rate": 1.2135529996891516e-05, "loss": 0.269, "step": 12650 }, { "epoch": 3.94, "learning_rate": 1.2129313024557042e-05, "loss": 0.2711, "step": 12660 }, { "epoch": 3.94, "learning_rate": 1.212309605222257e-05, "loss": 0.1497, "step": 12670 }, { "epoch": 3.94, "learning_rate": 1.2116879079888095e-05, "loss": 0.1703, "step": 12680 }, { "epoch": 3.94, "learning_rate": 1.2110662107553623e-05, "loss": 0.2349, "step": 12690 }, { "epoch": 3.95, "learning_rate": 1.2104445135219149e-05, "loss": 0.4041, "step": 12700 }, { "epoch": 3.95, "learning_rate": 1.2098228162884675e-05, "loss": 0.3426, "step": 12710 }, { "epoch": 3.95, "learning_rate": 1.2092011190550204e-05, "loss": 0.4043, "step": 12720 }, { "epoch": 3.96, "learning_rate": 1.2085794218215729e-05, "loss": 0.2039, "step": 12730 }, { "epoch": 3.96, "learning_rate": 1.2079577245881258e-05, "loss": 0.1788, "step": 12740 }, { "epoch": 3.96, "learning_rate": 1.2073360273546784e-05, "loss": 0.0802, "step": 12750 }, { "epoch": 3.97, "learning_rate": 1.206714330121231e-05, "loss": 0.2813, "step": 12760 }, { "epoch": 3.97, "learning_rate": 1.2060926328877838e-05, "loss": 0.4239, "step": 12770 }, { "epoch": 3.97, "learning_rate": 1.2054709356543364e-05, "loss": 0.4052, "step": 12780 }, { "epoch": 3.98, "learning_rate": 1.2048492384208891e-05, "loss": 0.1665, "step": 12790 }, { "epoch": 3.98, "learning_rate": 1.2042275411874417e-05, "loss": 0.2048, "step": 12800 }, { "epoch": 3.98, "learning_rate": 1.2036058439539947e-05, "loss": 0.1749, "step": 12810 }, { "epoch": 3.99, "learning_rate": 1.2029841467205473e-05, "loss": 0.212, "step": 12820 }, { "epoch": 3.99, "learning_rate": 1.2023624494870999e-05, "loss": 0.3038, "step": 12830 }, { "epoch": 3.99, "learning_rate": 1.2017407522536526e-05, "loss": 0.1094, "step": 12840 }, { "epoch": 3.99, "learning_rate": 1.2011190550202052e-05, "loss": 0.267, "step": 12850 }, { "epoch": 4.0, "learning_rate": 1.200497357786758e-05, "loss": 0.4164, "step": 12860 }, { "epoch": 4.0, "eval_FN": 198, "eval_FP": 182, "eval_TN": 5656, "eval_TP": 397, "eval_accuracy": 0.9409295818436189, "eval_f1": 0.676320272572402, "eval_loss": 0.7678781747817993, "eval_precision": 0.6856649395509499, "eval_recall": 0.6672268907563025, "eval_runtime": 44.4142, "eval_samples_per_second": 144.841, "eval_steps_per_second": 9.074, "step": 12868 }, { "epoch": 4.0, "learning_rate": 1.1998756605533106e-05, "loss": 0.0697, "step": 12870 }, { "epoch": 4.0, "learning_rate": 1.1992539633198634e-05, "loss": 0.0332, "step": 12880 }, { "epoch": 4.01, "learning_rate": 1.198632266086416e-05, "loss": 0.1663, "step": 12890 }, { "epoch": 4.01, "learning_rate": 1.1980105688529685e-05, "loss": 0.4234, "step": 12900 }, { "epoch": 4.01, "learning_rate": 1.1973888716195215e-05, "loss": 0.316, "step": 12910 }, { "epoch": 4.02, "learning_rate": 1.196767174386074e-05, "loss": 0.455, "step": 12920 }, { "epoch": 4.02, "learning_rate": 1.1961454771526268e-05, "loss": 0.3975, "step": 12930 }, { "epoch": 4.02, "learning_rate": 1.1955237799191794e-05, "loss": 0.2174, "step": 12940 }, { "epoch": 4.03, "learning_rate": 1.194902082685732e-05, "loss": 0.1736, "step": 12950 }, { "epoch": 4.03, "learning_rate": 1.1942803854522848e-05, "loss": 0.1438, "step": 12960 }, { "epoch": 4.03, "learning_rate": 1.1936586882188374e-05, "loss": 0.0973, "step": 12970 }, { "epoch": 4.03, "learning_rate": 1.1930369909853903e-05, "loss": 0.2812, "step": 12980 }, { "epoch": 4.04, "learning_rate": 1.192415293751943e-05, "loss": 0.2437, "step": 12990 }, { "epoch": 4.04, "learning_rate": 1.1917935965184957e-05, "loss": 0.1823, "step": 13000 }, { "epoch": 4.04, "learning_rate": 1.1911718992850483e-05, "loss": 0.284, "step": 13010 }, { "epoch": 4.05, "learning_rate": 1.1905502020516009e-05, "loss": 0.0874, "step": 13020 }, { "epoch": 4.05, "learning_rate": 1.1899285048181537e-05, "loss": 0.0613, "step": 13030 }, { "epoch": 4.05, "learning_rate": 1.1893068075847063e-05, "loss": 0.155, "step": 13040 }, { "epoch": 4.06, "learning_rate": 1.188685110351259e-05, "loss": 0.3861, "step": 13050 }, { "epoch": 4.06, "learning_rate": 1.1880634131178116e-05, "loss": 0.0577, "step": 13060 }, { "epoch": 4.06, "learning_rate": 1.1874417158843646e-05, "loss": 0.3828, "step": 13070 }, { "epoch": 4.07, "learning_rate": 1.1868200186509172e-05, "loss": 0.3331, "step": 13080 }, { "epoch": 4.07, "learning_rate": 1.1861983214174698e-05, "loss": 0.1994, "step": 13090 }, { "epoch": 4.07, "learning_rate": 1.1855766241840225e-05, "loss": 0.0815, "step": 13100 }, { "epoch": 4.08, "learning_rate": 1.1849549269505751e-05, "loss": 0.0863, "step": 13110 }, { "epoch": 4.08, "learning_rate": 1.1843332297171279e-05, "loss": 0.1045, "step": 13120 }, { "epoch": 4.08, "learning_rate": 1.1837115324836805e-05, "loss": 0.264, "step": 13130 }, { "epoch": 4.08, "learning_rate": 1.183089835250233e-05, "loss": 0.0652, "step": 13140 }, { "epoch": 4.09, "learning_rate": 1.182468138016786e-05, "loss": 0.3928, "step": 13150 }, { "epoch": 4.09, "learning_rate": 1.1818464407833386e-05, "loss": 0.1215, "step": 13160 }, { "epoch": 4.09, "learning_rate": 1.1812247435498914e-05, "loss": 0.1099, "step": 13170 }, { "epoch": 4.1, "learning_rate": 1.180603046316444e-05, "loss": 0.0471, "step": 13180 }, { "epoch": 4.1, "learning_rate": 1.1799813490829967e-05, "loss": 0.1557, "step": 13190 }, { "epoch": 4.1, "learning_rate": 1.1793596518495493e-05, "loss": 0.1764, "step": 13200 }, { "epoch": 4.11, "learning_rate": 1.178737954616102e-05, "loss": 0.2766, "step": 13210 }, { "epoch": 4.11, "learning_rate": 1.1781162573826547e-05, "loss": 0.298, "step": 13220 }, { "epoch": 4.11, "learning_rate": 1.1774945601492073e-05, "loss": 0.1068, "step": 13230 }, { "epoch": 4.12, "learning_rate": 1.1768728629157602e-05, "loss": 0.0706, "step": 13240 }, { "epoch": 4.12, "learning_rate": 1.1762511656823128e-05, "loss": 0.1876, "step": 13250 }, { "epoch": 4.12, "learning_rate": 1.1756294684488656e-05, "loss": 0.1687, "step": 13260 }, { "epoch": 4.12, "learning_rate": 1.1750077712154182e-05, "loss": 0.0385, "step": 13270 }, { "epoch": 4.13, "learning_rate": 1.1743860739819708e-05, "loss": 0.4703, "step": 13280 }, { "epoch": 4.13, "learning_rate": 1.1737643767485236e-05, "loss": 0.2523, "step": 13290 }, { "epoch": 4.13, "learning_rate": 1.1731426795150762e-05, "loss": 0.2721, "step": 13300 }, { "epoch": 4.14, "learning_rate": 1.1725209822816291e-05, "loss": 0.2307, "step": 13310 }, { "epoch": 4.14, "learning_rate": 1.1718992850481817e-05, "loss": 0.2081, "step": 13320 }, { "epoch": 4.14, "learning_rate": 1.1712775878147343e-05, "loss": 0.0253, "step": 13330 }, { "epoch": 4.15, "learning_rate": 1.170655890581287e-05, "loss": 0.3896, "step": 13340 }, { "epoch": 4.15, "learning_rate": 1.1700341933478397e-05, "loss": 0.0416, "step": 13350 }, { "epoch": 4.15, "learning_rate": 1.1694124961143924e-05, "loss": 0.2369, "step": 13360 }, { "epoch": 4.16, "learning_rate": 1.168790798880945e-05, "loss": 0.2022, "step": 13370 }, { "epoch": 4.16, "learning_rate": 1.1681691016474978e-05, "loss": 0.1457, "step": 13380 }, { "epoch": 4.16, "learning_rate": 1.1675474044140504e-05, "loss": 0.1095, "step": 13390 }, { "epoch": 4.17, "learning_rate": 1.166925707180603e-05, "loss": 0.1327, "step": 13400 }, { "epoch": 4.17, "learning_rate": 1.166304009947156e-05, "loss": 0.2395, "step": 13410 }, { "epoch": 4.17, "learning_rate": 1.1656823127137085e-05, "loss": 0.1049, "step": 13420 }, { "epoch": 4.17, "learning_rate": 1.1650606154802613e-05, "loss": 0.1818, "step": 13430 }, { "epoch": 4.18, "learning_rate": 1.1644389182468139e-05, "loss": 0.0363, "step": 13440 }, { "epoch": 4.18, "learning_rate": 1.1638172210133666e-05, "loss": 0.2254, "step": 13450 }, { "epoch": 4.18, "learning_rate": 1.1631955237799192e-05, "loss": 0.0346, "step": 13460 }, { "epoch": 4.19, "learning_rate": 1.1625738265464718e-05, "loss": 0.1547, "step": 13470 }, { "epoch": 4.19, "learning_rate": 1.1619521293130248e-05, "loss": 0.4807, "step": 13480 }, { "epoch": 4.19, "learning_rate": 1.1613304320795774e-05, "loss": 0.0447, "step": 13490 }, { "epoch": 4.2, "learning_rate": 1.1607087348461301e-05, "loss": 0.4171, "step": 13500 }, { "epoch": 4.2, "learning_rate": 1.1600870376126827e-05, "loss": 0.0847, "step": 13510 }, { "epoch": 4.2, "learning_rate": 1.1594653403792353e-05, "loss": 0.361, "step": 13520 }, { "epoch": 4.21, "learning_rate": 1.1588436431457881e-05, "loss": 0.0742, "step": 13530 }, { "epoch": 4.21, "learning_rate": 1.1582219459123407e-05, "loss": 0.0806, "step": 13540 }, { "epoch": 4.21, "learning_rate": 1.1576002486788935e-05, "loss": 0.1596, "step": 13550 }, { "epoch": 4.22, "learning_rate": 1.156978551445446e-05, "loss": 0.2392, "step": 13560 }, { "epoch": 4.22, "learning_rate": 1.156356854211999e-05, "loss": 0.5657, "step": 13570 }, { "epoch": 4.22, "learning_rate": 1.1557351569785516e-05, "loss": 0.1469, "step": 13580 }, { "epoch": 4.22, "learning_rate": 1.1551134597451042e-05, "loss": 0.0822, "step": 13590 }, { "epoch": 4.23, "learning_rate": 1.154491762511657e-05, "loss": 0.1543, "step": 13600 }, { "epoch": 4.23, "learning_rate": 1.1538700652782096e-05, "loss": 0.2168, "step": 13610 }, { "epoch": 4.23, "learning_rate": 1.1532483680447623e-05, "loss": 0.1708, "step": 13620 }, { "epoch": 4.24, "learning_rate": 1.152626670811315e-05, "loss": 0.1115, "step": 13630 }, { "epoch": 4.24, "learning_rate": 1.1520049735778679e-05, "loss": 0.4101, "step": 13640 }, { "epoch": 4.24, "learning_rate": 1.1513832763444205e-05, "loss": 0.01, "step": 13650 }, { "epoch": 4.25, "learning_rate": 1.150761579110973e-05, "loss": 0.1413, "step": 13660 }, { "epoch": 4.25, "learning_rate": 1.1501398818775258e-05, "loss": 0.4731, "step": 13670 }, { "epoch": 4.25, "learning_rate": 1.1495181846440784e-05, "loss": 0.1604, "step": 13680 }, { "epoch": 4.26, "learning_rate": 1.1488964874106312e-05, "loss": 0.0459, "step": 13690 }, { "epoch": 4.26, "learning_rate": 1.1482747901771838e-05, "loss": 0.2892, "step": 13700 }, { "epoch": 4.26, "learning_rate": 1.1476530929437364e-05, "loss": 0.2933, "step": 13710 }, { "epoch": 4.26, "learning_rate": 1.1470313957102891e-05, "loss": 0.205, "step": 13720 }, { "epoch": 4.27, "learning_rate": 1.1464096984768417e-05, "loss": 0.203, "step": 13730 }, { "epoch": 4.27, "learning_rate": 1.1457880012433947e-05, "loss": 0.0712, "step": 13740 }, { "epoch": 4.27, "learning_rate": 1.1451663040099473e-05, "loss": 0.0629, "step": 13750 }, { "epoch": 4.28, "learning_rate": 1.1445446067765e-05, "loss": 0.079, "step": 13760 }, { "epoch": 4.28, "learning_rate": 1.1439229095430526e-05, "loss": 0.2771, "step": 13770 }, { "epoch": 4.28, "learning_rate": 1.1433012123096052e-05, "loss": 0.2103, "step": 13780 }, { "epoch": 4.29, "learning_rate": 1.142679515076158e-05, "loss": 0.1326, "step": 13790 }, { "epoch": 4.29, "learning_rate": 1.1420578178427106e-05, "loss": 0.0983, "step": 13800 }, { "epoch": 4.29, "learning_rate": 1.1414361206092635e-05, "loss": 0.2378, "step": 13810 }, { "epoch": 4.3, "learning_rate": 1.1408144233758161e-05, "loss": 0.2939, "step": 13820 }, { "epoch": 4.3, "learning_rate": 1.1401927261423689e-05, "loss": 0.2476, "step": 13830 }, { "epoch": 4.3, "learning_rate": 1.1395710289089215e-05, "loss": 0.1325, "step": 13840 }, { "epoch": 4.31, "learning_rate": 1.1389493316754741e-05, "loss": 0.1444, "step": 13850 }, { "epoch": 4.31, "learning_rate": 1.1383276344420269e-05, "loss": 0.195, "step": 13860 }, { "epoch": 4.31, "learning_rate": 1.1377059372085795e-05, "loss": 0.332, "step": 13870 }, { "epoch": 4.31, "learning_rate": 1.1370842399751322e-05, "loss": 0.1987, "step": 13880 }, { "epoch": 4.32, "learning_rate": 1.1364625427416848e-05, "loss": 0.0942, "step": 13890 }, { "epoch": 4.32, "learning_rate": 1.1358408455082374e-05, "loss": 0.3098, "step": 13900 }, { "epoch": 4.32, "learning_rate": 1.1352191482747904e-05, "loss": 0.095, "step": 13910 }, { "epoch": 4.33, "learning_rate": 1.134597451041343e-05, "loss": 0.1755, "step": 13920 }, { "epoch": 4.33, "learning_rate": 1.1339757538078957e-05, "loss": 0.1479, "step": 13930 }, { "epoch": 4.33, "learning_rate": 1.1333540565744483e-05, "loss": 0.2255, "step": 13940 }, { "epoch": 4.34, "learning_rate": 1.132732359341001e-05, "loss": 0.1301, "step": 13950 }, { "epoch": 4.34, "learning_rate": 1.1321106621075537e-05, "loss": 0.351, "step": 13960 }, { "epoch": 4.34, "learning_rate": 1.1314889648741063e-05, "loss": 0.1671, "step": 13970 }, { "epoch": 4.35, "learning_rate": 1.130867267640659e-05, "loss": 0.4936, "step": 13980 }, { "epoch": 4.35, "learning_rate": 1.1302455704072116e-05, "loss": 0.1421, "step": 13990 }, { "epoch": 4.35, "learning_rate": 1.1296238731737646e-05, "loss": 0.1174, "step": 14000 }, { "epoch": 4.35, "learning_rate": 1.1290021759403172e-05, "loss": 0.2405, "step": 14010 }, { "epoch": 4.36, "learning_rate": 1.12838047870687e-05, "loss": 0.2212, "step": 14020 }, { "epoch": 4.36, "learning_rate": 1.1277587814734225e-05, "loss": 0.1937, "step": 14030 }, { "epoch": 4.36, "learning_rate": 1.1271370842399751e-05, "loss": 0.0475, "step": 14040 }, { "epoch": 4.37, "learning_rate": 1.1265153870065279e-05, "loss": 0.1279, "step": 14050 }, { "epoch": 4.37, "learning_rate": 1.1258936897730805e-05, "loss": 0.2849, "step": 14060 }, { "epoch": 4.37, "learning_rate": 1.1252719925396334e-05, "loss": 0.1228, "step": 14070 }, { "epoch": 4.38, "learning_rate": 1.124650295306186e-05, "loss": 0.0637, "step": 14080 }, { "epoch": 4.38, "learning_rate": 1.1240285980727386e-05, "loss": 0.173, "step": 14090 }, { "epoch": 4.38, "learning_rate": 1.1234069008392914e-05, "loss": 0.1891, "step": 14100 }, { "epoch": 4.39, "learning_rate": 1.122785203605844e-05, "loss": 0.107, "step": 14110 }, { "epoch": 4.39, "learning_rate": 1.1221635063723968e-05, "loss": 0.1972, "step": 14120 }, { "epoch": 4.39, "learning_rate": 1.1215418091389494e-05, "loss": 0.333, "step": 14130 }, { "epoch": 4.4, "learning_rate": 1.1209201119055021e-05, "loss": 0.2497, "step": 14140 }, { "epoch": 4.4, "learning_rate": 1.1202984146720547e-05, "loss": 0.218, "step": 14150 }, { "epoch": 4.4, "learning_rate": 1.1196767174386073e-05, "loss": 0.1593, "step": 14160 }, { "epoch": 4.4, "learning_rate": 1.1190550202051603e-05, "loss": 0.1582, "step": 14170 }, { "epoch": 4.41, "learning_rate": 1.1184333229717129e-05, "loss": 0.2563, "step": 14180 }, { "epoch": 4.41, "learning_rate": 1.1178116257382656e-05, "loss": 0.0804, "step": 14190 }, { "epoch": 4.41, "learning_rate": 1.1171899285048182e-05, "loss": 0.2503, "step": 14200 }, { "epoch": 4.42, "learning_rate": 1.116568231271371e-05, "loss": 0.326, "step": 14210 }, { "epoch": 4.42, "learning_rate": 1.1159465340379236e-05, "loss": 0.1326, "step": 14220 }, { "epoch": 4.42, "learning_rate": 1.1153248368044762e-05, "loss": 0.0479, "step": 14230 }, { "epoch": 4.43, "learning_rate": 1.1147031395710291e-05, "loss": 0.2754, "step": 14240 }, { "epoch": 4.43, "learning_rate": 1.1140814423375817e-05, "loss": 0.233, "step": 14250 }, { "epoch": 4.43, "learning_rate": 1.1134597451041345e-05, "loss": 0.6015, "step": 14260 }, { "epoch": 4.44, "learning_rate": 1.112838047870687e-05, "loss": 0.1645, "step": 14270 }, { "epoch": 4.44, "learning_rate": 1.1122163506372397e-05, "loss": 0.1909, "step": 14280 }, { "epoch": 4.44, "learning_rate": 1.1115946534037924e-05, "loss": 0.0351, "step": 14290 }, { "epoch": 4.45, "learning_rate": 1.110972956170345e-05, "loss": 0.0845, "step": 14300 }, { "epoch": 4.45, "learning_rate": 1.1103512589368978e-05, "loss": 0.1054, "step": 14310 }, { "epoch": 4.45, "learning_rate": 1.1097295617034504e-05, "loss": 0.0377, "step": 14320 }, { "epoch": 4.45, "learning_rate": 1.1091078644700033e-05, "loss": 0.3583, "step": 14330 }, { "epoch": 4.46, "learning_rate": 1.108486167236556e-05, "loss": 0.3554, "step": 14340 }, { "epoch": 4.46, "learning_rate": 1.1078644700031085e-05, "loss": 0.2484, "step": 14350 }, { "epoch": 4.46, "learning_rate": 1.1072427727696613e-05, "loss": 0.344, "step": 14360 }, { "epoch": 4.47, "learning_rate": 1.1066210755362139e-05, "loss": 0.4054, "step": 14370 }, { "epoch": 4.47, "learning_rate": 1.1059993783027667e-05, "loss": 0.1823, "step": 14380 }, { "epoch": 4.47, "learning_rate": 1.1053776810693193e-05, "loss": 0.1774, "step": 14390 }, { "epoch": 4.48, "learning_rate": 1.1047559838358722e-05, "loss": 0.2772, "step": 14400 }, { "epoch": 4.48, "learning_rate": 1.1041342866024248e-05, "loss": 0.2062, "step": 14410 }, { "epoch": 4.48, "learning_rate": 1.1035125893689774e-05, "loss": 0.181, "step": 14420 }, { "epoch": 4.49, "learning_rate": 1.1028908921355302e-05, "loss": 0.2523, "step": 14430 }, { "epoch": 4.49, "learning_rate": 1.1022691949020828e-05, "loss": 0.2015, "step": 14440 }, { "epoch": 4.49, "learning_rate": 1.1016474976686355e-05, "loss": 0.1307, "step": 14450 }, { "epoch": 4.49, "learning_rate": 1.1010258004351881e-05, "loss": 0.0494, "step": 14460 }, { "epoch": 4.5, "learning_rate": 1.1004041032017407e-05, "loss": 0.2663, "step": 14470 }, { "epoch": 4.5, "learning_rate": 1.0997824059682935e-05, "loss": 0.3703, "step": 14480 }, { "epoch": 4.5, "learning_rate": 1.099160708734846e-05, "loss": 0.3694, "step": 14490 }, { "epoch": 4.51, "learning_rate": 1.098539011501399e-05, "loss": 0.3697, "step": 14500 }, { "epoch": 4.51, "learning_rate": 1.0979173142679516e-05, "loss": 0.1527, "step": 14510 }, { "epoch": 4.51, "learning_rate": 1.0972956170345044e-05, "loss": 0.1925, "step": 14520 }, { "epoch": 4.52, "learning_rate": 1.096673919801057e-05, "loss": 0.2651, "step": 14530 }, { "epoch": 4.52, "learning_rate": 1.0960522225676096e-05, "loss": 0.4505, "step": 14540 }, { "epoch": 4.52, "learning_rate": 1.0954305253341623e-05, "loss": 0.1967, "step": 14550 }, { "epoch": 4.53, "learning_rate": 1.094808828100715e-05, "loss": 0.0751, "step": 14560 }, { "epoch": 4.53, "learning_rate": 1.0941871308672679e-05, "loss": 0.2361, "step": 14570 }, { "epoch": 4.53, "learning_rate": 1.0935654336338205e-05, "loss": 0.3438, "step": 14580 }, { "epoch": 4.54, "learning_rate": 1.0929437364003732e-05, "loss": 0.0055, "step": 14590 }, { "epoch": 4.54, "learning_rate": 1.0923220391669258e-05, "loss": 0.08, "step": 14600 }, { "epoch": 4.54, "learning_rate": 1.0917003419334784e-05, "loss": 0.6141, "step": 14610 }, { "epoch": 4.54, "learning_rate": 1.0910786447000312e-05, "loss": 0.1514, "step": 14620 }, { "epoch": 4.55, "learning_rate": 1.0904569474665838e-05, "loss": 0.2335, "step": 14630 }, { "epoch": 4.55, "learning_rate": 1.0898352502331366e-05, "loss": 0.2048, "step": 14640 }, { "epoch": 4.55, "learning_rate": 1.0892135529996892e-05, "loss": 0.1486, "step": 14650 }, { "epoch": 4.56, "learning_rate": 1.0885918557662418e-05, "loss": 0.0982, "step": 14660 }, { "epoch": 4.56, "learning_rate": 1.0879701585327947e-05, "loss": 0.1434, "step": 14670 }, { "epoch": 4.56, "learning_rate": 1.0873484612993473e-05, "loss": 0.1638, "step": 14680 }, { "epoch": 4.57, "learning_rate": 1.0867267640659e-05, "loss": 0.057, "step": 14690 }, { "epoch": 4.57, "learning_rate": 1.0861050668324527e-05, "loss": 0.1214, "step": 14700 }, { "epoch": 4.57, "learning_rate": 1.0854833695990054e-05, "loss": 0.0779, "step": 14710 }, { "epoch": 4.58, "learning_rate": 1.084861672365558e-05, "loss": 0.164, "step": 14720 }, { "epoch": 4.58, "learning_rate": 1.0842399751321106e-05, "loss": 0.2971, "step": 14730 }, { "epoch": 4.58, "learning_rate": 1.0836182778986636e-05, "loss": 0.1459, "step": 14740 }, { "epoch": 4.59, "learning_rate": 1.0829965806652161e-05, "loss": 0.2309, "step": 14750 }, { "epoch": 4.59, "learning_rate": 1.082374883431769e-05, "loss": 0.1827, "step": 14760 }, { "epoch": 4.59, "learning_rate": 1.0817531861983215e-05, "loss": 0.023, "step": 14770 }, { "epoch": 4.59, "learning_rate": 1.0811314889648743e-05, "loss": 0.344, "step": 14780 }, { "epoch": 4.6, "learning_rate": 1.0805097917314269e-05, "loss": 0.2736, "step": 14790 }, { "epoch": 4.6, "learning_rate": 1.0798880944979795e-05, "loss": 0.0741, "step": 14800 }, { "epoch": 4.6, "learning_rate": 1.0792663972645322e-05, "loss": 0.1375, "step": 14810 }, { "epoch": 4.61, "learning_rate": 1.0786447000310848e-05, "loss": 0.1171, "step": 14820 }, { "epoch": 4.61, "learning_rate": 1.0780230027976378e-05, "loss": 0.1351, "step": 14830 }, { "epoch": 4.61, "learning_rate": 1.0774013055641904e-05, "loss": 0.0197, "step": 14840 }, { "epoch": 4.62, "learning_rate": 1.076779608330743e-05, "loss": 0.2296, "step": 14850 }, { "epoch": 4.62, "learning_rate": 1.0761579110972957e-05, "loss": 0.1091, "step": 14860 }, { "epoch": 4.62, "learning_rate": 1.0755362138638483e-05, "loss": 0.0704, "step": 14870 }, { "epoch": 4.63, "learning_rate": 1.0749145166304011e-05, "loss": 0.2176, "step": 14880 }, { "epoch": 4.63, "learning_rate": 1.0742928193969537e-05, "loss": 0.0544, "step": 14890 }, { "epoch": 4.63, "learning_rate": 1.0736711221635066e-05, "loss": 0.1585, "step": 14900 }, { "epoch": 4.63, "learning_rate": 1.0730494249300592e-05, "loss": 0.3868, "step": 14910 }, { "epoch": 4.64, "learning_rate": 1.0724277276966117e-05, "loss": 0.2578, "step": 14920 }, { "epoch": 4.64, "learning_rate": 1.0718060304631646e-05, "loss": 0.3199, "step": 14930 }, { "epoch": 4.64, "learning_rate": 1.0711843332297172e-05, "loss": 0.1713, "step": 14940 }, { "epoch": 4.65, "learning_rate": 1.07056263599627e-05, "loss": 0.1219, "step": 14950 }, { "epoch": 4.65, "learning_rate": 1.0699409387628226e-05, "loss": 0.2251, "step": 14960 }, { "epoch": 4.65, "learning_rate": 1.0693192415293753e-05, "loss": 0.1775, "step": 14970 }, { "epoch": 4.66, "learning_rate": 1.068697544295928e-05, "loss": 0.3147, "step": 14980 }, { "epoch": 4.66, "learning_rate": 1.0680758470624805e-05, "loss": 0.1009, "step": 14990 }, { "epoch": 4.66, "learning_rate": 1.0674541498290335e-05, "loss": 0.0908, "step": 15000 }, { "epoch": 4.67, "learning_rate": 1.066832452595586e-05, "loss": 0.1097, "step": 15010 }, { "epoch": 4.67, "learning_rate": 1.0662107553621388e-05, "loss": 0.1495, "step": 15020 }, { "epoch": 4.67, "learning_rate": 1.0655890581286914e-05, "loss": 0.1819, "step": 15030 }, { "epoch": 4.68, "learning_rate": 1.064967360895244e-05, "loss": 0.4953, "step": 15040 }, { "epoch": 4.68, "learning_rate": 1.0643456636617968e-05, "loss": 0.1262, "step": 15050 }, { "epoch": 4.68, "learning_rate": 1.0637239664283494e-05, "loss": 0.1225, "step": 15060 }, { "epoch": 4.68, "learning_rate": 1.0631022691949021e-05, "loss": 0.1417, "step": 15070 }, { "epoch": 4.69, "learning_rate": 1.0624805719614547e-05, "loss": 0.1715, "step": 15080 }, { "epoch": 4.69, "learning_rate": 1.0618588747280077e-05, "loss": 0.1668, "step": 15090 }, { "epoch": 4.69, "learning_rate": 1.0612371774945603e-05, "loss": 0.3905, "step": 15100 }, { "epoch": 4.7, "learning_rate": 1.0606154802611129e-05, "loss": 0.2686, "step": 15110 }, { "epoch": 4.7, "learning_rate": 1.0599937830276656e-05, "loss": 0.2845, "step": 15120 }, { "epoch": 4.7, "learning_rate": 1.0593720857942182e-05, "loss": 0.0782, "step": 15130 }, { "epoch": 4.71, "learning_rate": 1.058750388560771e-05, "loss": 0.2167, "step": 15140 }, { "epoch": 4.71, "learning_rate": 1.0581286913273236e-05, "loss": 0.1672, "step": 15150 }, { "epoch": 4.71, "learning_rate": 1.0575069940938765e-05, "loss": 0.2721, "step": 15160 }, { "epoch": 4.72, "learning_rate": 1.0568852968604291e-05, "loss": 0.1396, "step": 15170 }, { "epoch": 4.72, "learning_rate": 1.0562635996269817e-05, "loss": 0.2705, "step": 15180 }, { "epoch": 4.72, "learning_rate": 1.0556419023935345e-05, "loss": 0.6809, "step": 15190 }, { "epoch": 4.72, "learning_rate": 1.0550202051600871e-05, "loss": 0.0547, "step": 15200 }, { "epoch": 4.73, "learning_rate": 1.0543985079266399e-05, "loss": 0.0948, "step": 15210 }, { "epoch": 4.73, "learning_rate": 1.0537768106931925e-05, "loss": 0.3179, "step": 15220 }, { "epoch": 4.73, "learning_rate": 1.053155113459745e-05, "loss": 0.33, "step": 15230 }, { "epoch": 4.74, "learning_rate": 1.0525334162262978e-05, "loss": 0.0634, "step": 15240 }, { "epoch": 4.74, "learning_rate": 1.0519117189928504e-05, "loss": 0.1467, "step": 15250 }, { "epoch": 4.74, "learning_rate": 1.0512900217594034e-05, "loss": 0.0249, "step": 15260 }, { "epoch": 4.75, "learning_rate": 1.050668324525956e-05, "loss": 0.3551, "step": 15270 }, { "epoch": 4.75, "learning_rate": 1.0500466272925087e-05, "loss": 0.1054, "step": 15280 }, { "epoch": 4.75, "learning_rate": 1.0494249300590613e-05, "loss": 0.0376, "step": 15290 }, { "epoch": 4.76, "learning_rate": 1.0488032328256139e-05, "loss": 0.3304, "step": 15300 }, { "epoch": 4.76, "learning_rate": 1.0481815355921667e-05, "loss": 0.1248, "step": 15310 }, { "epoch": 4.76, "learning_rate": 1.0475598383587193e-05, "loss": 0.0378, "step": 15320 }, { "epoch": 4.77, "learning_rate": 1.0469381411252722e-05, "loss": 0.0362, "step": 15330 }, { "epoch": 4.77, "learning_rate": 1.0463164438918248e-05, "loss": 0.3587, "step": 15340 }, { "epoch": 4.77, "learning_rate": 1.0456947466583776e-05, "loss": 0.2291, "step": 15350 }, { "epoch": 4.77, "learning_rate": 1.0450730494249302e-05, "loss": 0.187, "step": 15360 }, { "epoch": 4.78, "learning_rate": 1.0444513521914828e-05, "loss": 0.0454, "step": 15370 }, { "epoch": 4.78, "learning_rate": 1.0438296549580355e-05, "loss": 0.0579, "step": 15380 }, { "epoch": 4.78, "learning_rate": 1.0432079577245881e-05, "loss": 0.1578, "step": 15390 }, { "epoch": 4.79, "learning_rate": 1.0425862604911409e-05, "loss": 0.3389, "step": 15400 }, { "epoch": 4.79, "learning_rate": 1.0419645632576935e-05, "loss": 0.0403, "step": 15410 }, { "epoch": 4.79, "learning_rate": 1.0413428660242461e-05, "loss": 0.0446, "step": 15420 }, { "epoch": 4.8, "learning_rate": 1.040721168790799e-05, "loss": 0.1665, "step": 15430 }, { "epoch": 4.8, "learning_rate": 1.0400994715573516e-05, "loss": 0.1594, "step": 15440 }, { "epoch": 4.8, "learning_rate": 1.0394777743239044e-05, "loss": 0.1117, "step": 15450 }, { "epoch": 4.81, "learning_rate": 1.038856077090457e-05, "loss": 0.0784, "step": 15460 }, { "epoch": 4.81, "learning_rate": 1.0382343798570098e-05, "loss": 0.1794, "step": 15470 }, { "epoch": 4.81, "learning_rate": 1.0376126826235624e-05, "loss": 0.2039, "step": 15480 }, { "epoch": 4.82, "learning_rate": 1.036990985390115e-05, "loss": 0.0662, "step": 15490 }, { "epoch": 4.82, "learning_rate": 1.0363692881566679e-05, "loss": 0.1552, "step": 15500 }, { "epoch": 4.82, "learning_rate": 1.0357475909232205e-05, "loss": 0.4566, "step": 15510 }, { "epoch": 4.82, "learning_rate": 1.0351258936897733e-05, "loss": 0.1223, "step": 15520 }, { "epoch": 4.83, "learning_rate": 1.0345041964563259e-05, "loss": 0.3628, "step": 15530 }, { "epoch": 4.83, "learning_rate": 1.0338824992228786e-05, "loss": 0.0988, "step": 15540 }, { "epoch": 4.83, "learning_rate": 1.0332608019894312e-05, "loss": 0.1514, "step": 15550 }, { "epoch": 4.84, "learning_rate": 1.0326391047559838e-05, "loss": 0.0371, "step": 15560 }, { "epoch": 4.84, "learning_rate": 1.0320174075225366e-05, "loss": 0.0295, "step": 15570 }, { "epoch": 4.84, "learning_rate": 1.0313957102890892e-05, "loss": 0.1228, "step": 15580 }, { "epoch": 4.85, "learning_rate": 1.0307740130556421e-05, "loss": 0.313, "step": 15590 }, { "epoch": 4.85, "learning_rate": 1.0301523158221947e-05, "loss": 0.5623, "step": 15600 }, { "epoch": 4.85, "learning_rate": 1.0295306185887473e-05, "loss": 0.6124, "step": 15610 }, { "epoch": 4.86, "learning_rate": 1.0289089213553e-05, "loss": 0.0452, "step": 15620 }, { "epoch": 4.86, "learning_rate": 1.0282872241218527e-05, "loss": 0.1714, "step": 15630 }, { "epoch": 4.86, "learning_rate": 1.0276655268884054e-05, "loss": 0.4826, "step": 15640 }, { "epoch": 4.86, "learning_rate": 1.027043829654958e-05, "loss": 0.2113, "step": 15650 }, { "epoch": 4.87, "learning_rate": 1.026422132421511e-05, "loss": 0.1387, "step": 15660 }, { "epoch": 4.87, "learning_rate": 1.0258004351880636e-05, "loss": 0.3188, "step": 15670 }, { "epoch": 4.87, "learning_rate": 1.0251787379546162e-05, "loss": 0.0985, "step": 15680 }, { "epoch": 4.88, "learning_rate": 1.024557040721169e-05, "loss": 0.1439, "step": 15690 }, { "epoch": 4.88, "learning_rate": 1.0239353434877215e-05, "loss": 0.5059, "step": 15700 }, { "epoch": 4.88, "learning_rate": 1.0233136462542743e-05, "loss": 0.395, "step": 15710 }, { "epoch": 4.89, "learning_rate": 1.0226919490208269e-05, "loss": 0.1847, "step": 15720 }, { "epoch": 4.89, "learning_rate": 1.0220702517873797e-05, "loss": 0.1671, "step": 15730 }, { "epoch": 4.89, "learning_rate": 1.0214485545539323e-05, "loss": 0.1357, "step": 15740 }, { "epoch": 4.9, "learning_rate": 1.0208268573204849e-05, "loss": 0.2598, "step": 15750 }, { "epoch": 4.9, "learning_rate": 1.0202051600870378e-05, "loss": 0.3265, "step": 15760 }, { "epoch": 4.9, "learning_rate": 1.0195834628535904e-05, "loss": 0.078, "step": 15770 }, { "epoch": 4.91, "learning_rate": 1.0189617656201432e-05, "loss": 0.1509, "step": 15780 }, { "epoch": 4.91, "learning_rate": 1.0183400683866958e-05, "loss": 0.104, "step": 15790 }, { "epoch": 4.91, "learning_rate": 1.0177183711532483e-05, "loss": 0.3213, "step": 15800 }, { "epoch": 4.91, "learning_rate": 1.0170966739198011e-05, "loss": 0.0669, "step": 15810 }, { "epoch": 4.92, "learning_rate": 1.0164749766863537e-05, "loss": 0.2279, "step": 15820 }, { "epoch": 4.92, "learning_rate": 1.0158532794529066e-05, "loss": 0.35, "step": 15830 }, { "epoch": 4.92, "learning_rate": 1.0152315822194592e-05, "loss": 0.2682, "step": 15840 }, { "epoch": 4.93, "learning_rate": 1.014609884986012e-05, "loss": 0.3653, "step": 15850 }, { "epoch": 4.93, "learning_rate": 1.0139881877525646e-05, "loss": 0.1506, "step": 15860 }, { "epoch": 4.93, "learning_rate": 1.0133664905191172e-05, "loss": 0.0043, "step": 15870 }, { "epoch": 4.94, "learning_rate": 1.01274479328567e-05, "loss": 0.0527, "step": 15880 }, { "epoch": 4.94, "learning_rate": 1.0121230960522226e-05, "loss": 0.308, "step": 15890 }, { "epoch": 4.94, "learning_rate": 1.0115013988187753e-05, "loss": 0.2959, "step": 15900 }, { "epoch": 4.95, "learning_rate": 1.010879701585328e-05, "loss": 0.1213, "step": 15910 }, { "epoch": 4.95, "learning_rate": 1.0102580043518809e-05, "loss": 0.1919, "step": 15920 }, { "epoch": 4.95, "learning_rate": 1.0096363071184335e-05, "loss": 0.1923, "step": 15930 }, { "epoch": 4.95, "learning_rate": 1.009014609884986e-05, "loss": 0.3001, "step": 15940 }, { "epoch": 4.96, "learning_rate": 1.0083929126515388e-05, "loss": 0.1401, "step": 15950 }, { "epoch": 4.96, "learning_rate": 1.0077712154180914e-05, "loss": 0.1386, "step": 15960 }, { "epoch": 4.96, "learning_rate": 1.0071495181846442e-05, "loss": 0.0486, "step": 15970 }, { "epoch": 4.97, "learning_rate": 1.0065278209511968e-05, "loss": 0.2297, "step": 15980 }, { "epoch": 4.97, "learning_rate": 1.0059061237177494e-05, "loss": 0.0337, "step": 15990 }, { "epoch": 4.97, "learning_rate": 1.0052844264843023e-05, "loss": 0.3035, "step": 16000 }, { "epoch": 4.98, "learning_rate": 1.004662729250855e-05, "loss": 0.4053, "step": 16010 }, { "epoch": 4.98, "learning_rate": 1.0040410320174077e-05, "loss": 0.1901, "step": 16020 }, { "epoch": 4.98, "learning_rate": 1.0034193347839603e-05, "loss": 0.2025, "step": 16030 }, { "epoch": 4.99, "learning_rate": 1.002797637550513e-05, "loss": 0.1589, "step": 16040 }, { "epoch": 4.99, "learning_rate": 1.0021759403170657e-05, "loss": 0.3132, "step": 16050 }, { "epoch": 4.99, "learning_rate": 1.0015542430836182e-05, "loss": 0.0418, "step": 16060 }, { "epoch": 5.0, "learning_rate": 1.000932545850171e-05, "loss": 0.064, "step": 16070 }, { "epoch": 5.0, "learning_rate": 1.0003108486167236e-05, "loss": 0.0853, "step": 16080 }, { "epoch": 5.0, "eval_FN": 250, "eval_FP": 116, "eval_TN": 5722, "eval_TP": 345, "eval_accuracy": 0.943105860407275, "eval_f1": 0.6534090909090909, "eval_loss": 0.9880599975585938, "eval_precision": 0.7483731019522777, "eval_recall": 0.5798319327731093, "eval_runtime": 44.4207, "eval_samples_per_second": 144.82, "eval_steps_per_second": 9.072, "step": 16085 }, { "epoch": 5.0, "learning_rate": 9.996891513832764e-06, "loss": 0.1996, "step": 16090 }, { "epoch": 5.0, "learning_rate": 9.990674541498291e-06, "loss": 0.1847, "step": 16100 }, { "epoch": 5.01, "learning_rate": 9.984457569163817e-06, "loss": 0.2766, "step": 16110 }, { "epoch": 5.01, "learning_rate": 9.978240596829345e-06, "loss": 0.1721, "step": 16120 }, { "epoch": 5.01, "learning_rate": 9.972023624494873e-06, "loss": 0.0814, "step": 16130 }, { "epoch": 5.02, "learning_rate": 9.965806652160399e-06, "loss": 0.156, "step": 16140 }, { "epoch": 5.02, "learning_rate": 9.959589679825925e-06, "loss": 0.128, "step": 16150 }, { "epoch": 5.02, "learning_rate": 9.953372707491452e-06, "loss": 0.103, "step": 16160 }, { "epoch": 5.03, "learning_rate": 9.947155735156978e-06, "loss": 0.0405, "step": 16170 }, { "epoch": 5.03, "learning_rate": 9.940938762822506e-06, "loss": 0.1787, "step": 16180 }, { "epoch": 5.03, "learning_rate": 9.934721790488034e-06, "loss": 0.0356, "step": 16190 }, { "epoch": 5.04, "learning_rate": 9.92850481815356e-06, "loss": 0.0245, "step": 16200 }, { "epoch": 5.04, "learning_rate": 9.922287845819087e-06, "loss": 0.0771, "step": 16210 }, { "epoch": 5.04, "learning_rate": 9.916070873484613e-06, "loss": 0.2119, "step": 16220 }, { "epoch": 5.05, "learning_rate": 9.909853901150141e-06, "loss": 0.0306, "step": 16230 }, { "epoch": 5.05, "learning_rate": 9.903636928815667e-06, "loss": 0.0032, "step": 16240 }, { "epoch": 5.05, "learning_rate": 9.897419956481195e-06, "loss": 0.0148, "step": 16250 }, { "epoch": 5.05, "learning_rate": 9.891202984146722e-06, "loss": 0.16, "step": 16260 }, { "epoch": 5.06, "learning_rate": 9.884986011812248e-06, "loss": 0.2585, "step": 16270 }, { "epoch": 5.06, "learning_rate": 9.878769039477774e-06, "loss": 0.1428, "step": 16280 }, { "epoch": 5.06, "learning_rate": 9.872552067143302e-06, "loss": 0.0008, "step": 16290 }, { "epoch": 5.07, "learning_rate": 9.86633509480883e-06, "loss": 0.0012, "step": 16300 }, { "epoch": 5.07, "learning_rate": 9.860118122474356e-06, "loss": 0.1881, "step": 16310 }, { "epoch": 5.07, "learning_rate": 9.853901150139883e-06, "loss": 0.1853, "step": 16320 }, { "epoch": 5.08, "learning_rate": 9.84768417780541e-06, "loss": 0.1356, "step": 16330 }, { "epoch": 5.08, "learning_rate": 9.841467205470935e-06, "loss": 0.2801, "step": 16340 }, { "epoch": 5.08, "learning_rate": 9.835250233136463e-06, "loss": 0.0261, "step": 16350 }, { "epoch": 5.09, "learning_rate": 9.82903326080199e-06, "loss": 0.2845, "step": 16360 }, { "epoch": 5.09, "learning_rate": 9.822816288467516e-06, "loss": 0.4936, "step": 16370 }, { "epoch": 5.09, "learning_rate": 9.816599316133044e-06, "loss": 0.4942, "step": 16380 }, { "epoch": 5.09, "learning_rate": 9.810382343798572e-06, "loss": 0.0029, "step": 16390 }, { "epoch": 5.1, "learning_rate": 9.804165371464098e-06, "loss": 0.0652, "step": 16400 }, { "epoch": 5.1, "learning_rate": 9.797948399129624e-06, "loss": 0.0062, "step": 16410 }, { "epoch": 5.1, "learning_rate": 9.791731426795151e-06, "loss": 0.2251, "step": 16420 }, { "epoch": 5.11, "learning_rate": 9.785514454460679e-06, "loss": 0.2069, "step": 16430 }, { "epoch": 5.11, "learning_rate": 9.779297482126205e-06, "loss": 0.1236, "step": 16440 }, { "epoch": 5.11, "learning_rate": 9.773080509791733e-06, "loss": 0.1309, "step": 16450 }, { "epoch": 5.12, "learning_rate": 9.76686353745726e-06, "loss": 0.0173, "step": 16460 }, { "epoch": 5.12, "learning_rate": 9.760646565122786e-06, "loss": 0.3559, "step": 16470 }, { "epoch": 5.12, "learning_rate": 9.754429592788312e-06, "loss": 0.1861, "step": 16480 }, { "epoch": 5.13, "learning_rate": 9.74821262045384e-06, "loss": 0.1748, "step": 16490 }, { "epoch": 5.13, "learning_rate": 9.741995648119366e-06, "loss": 0.0768, "step": 16500 }, { "epoch": 5.13, "learning_rate": 9.735778675784894e-06, "loss": 0.0147, "step": 16510 }, { "epoch": 5.14, "learning_rate": 9.729561703450421e-06, "loss": 0.0273, "step": 16520 }, { "epoch": 5.14, "learning_rate": 9.723344731115947e-06, "loss": 0.3576, "step": 16530 }, { "epoch": 5.14, "learning_rate": 9.717127758781473e-06, "loss": 0.3045, "step": 16540 }, { "epoch": 5.14, "learning_rate": 9.710910786447001e-06, "loss": 0.1483, "step": 16550 }, { "epoch": 5.15, "learning_rate": 9.704693814112529e-06, "loss": 0.35, "step": 16560 }, { "epoch": 5.15, "learning_rate": 9.698476841778055e-06, "loss": 0.4302, "step": 16570 }, { "epoch": 5.15, "learning_rate": 9.692259869443582e-06, "loss": 0.1962, "step": 16580 }, { "epoch": 5.16, "learning_rate": 9.68604289710911e-06, "loss": 0.1141, "step": 16590 }, { "epoch": 5.16, "learning_rate": 9.679825924774636e-06, "loss": 0.1806, "step": 16600 }, { "epoch": 5.16, "learning_rate": 9.673608952440162e-06, "loss": 0.2477, "step": 16610 }, { "epoch": 5.17, "learning_rate": 9.66739198010569e-06, "loss": 0.2753, "step": 16620 }, { "epoch": 5.17, "learning_rate": 9.661175007771217e-06, "loss": 0.0625, "step": 16630 }, { "epoch": 5.17, "learning_rate": 9.654958035436743e-06, "loss": 0.0175, "step": 16640 }, { "epoch": 5.18, "learning_rate": 9.64874106310227e-06, "loss": 0.1935, "step": 16650 }, { "epoch": 5.18, "learning_rate": 9.642524090767797e-06, "loss": 0.1595, "step": 16660 }, { "epoch": 5.18, "learning_rate": 9.636307118433323e-06, "loss": 0.3425, "step": 16670 }, { "epoch": 5.18, "learning_rate": 9.63009014609885e-06, "loss": 0.1552, "step": 16680 }, { "epoch": 5.19, "learning_rate": 9.623873173764378e-06, "loss": 0.192, "step": 16690 }, { "epoch": 5.19, "learning_rate": 9.617656201429904e-06, "loss": 0.1186, "step": 16700 }, { "epoch": 5.19, "learning_rate": 9.611439229095432e-06, "loss": 0.029, "step": 16710 }, { "epoch": 5.2, "learning_rate": 9.605222256760958e-06, "loss": 0.0967, "step": 16720 }, { "epoch": 5.2, "learning_rate": 9.599005284426485e-06, "loss": 0.2278, "step": 16730 }, { "epoch": 5.2, "learning_rate": 9.592788312092011e-06, "loss": 0.2421, "step": 16740 }, { "epoch": 5.21, "learning_rate": 9.586571339757539e-06, "loss": 0.0684, "step": 16750 }, { "epoch": 5.21, "learning_rate": 9.580354367423067e-06, "loss": 0.1937, "step": 16760 }, { "epoch": 5.21, "learning_rate": 9.574137395088593e-06, "loss": 0.4271, "step": 16770 }, { "epoch": 5.22, "learning_rate": 9.56792042275412e-06, "loss": 0.3906, "step": 16780 }, { "epoch": 5.22, "learning_rate": 9.561703450419646e-06, "loss": 0.1584, "step": 16790 }, { "epoch": 5.22, "learning_rate": 9.555486478085172e-06, "loss": 0.004, "step": 16800 }, { "epoch": 5.23, "learning_rate": 9.5492695057507e-06, "loss": 0.2255, "step": 16810 }, { "epoch": 5.23, "learning_rate": 9.543052533416228e-06, "loss": 0.1232, "step": 16820 }, { "epoch": 5.23, "learning_rate": 9.536835561081754e-06, "loss": 0.3239, "step": 16830 }, { "epoch": 5.23, "learning_rate": 9.530618588747281e-06, "loss": 0.0254, "step": 16840 }, { "epoch": 5.24, "learning_rate": 9.524401616412807e-06, "loss": 0.1513, "step": 16850 }, { "epoch": 5.24, "learning_rate": 9.518184644078335e-06, "loss": 0.1301, "step": 16860 }, { "epoch": 5.24, "learning_rate": 9.51196767174386e-06, "loss": 0.0733, "step": 16870 }, { "epoch": 5.25, "learning_rate": 9.505750699409388e-06, "loss": 0.0945, "step": 16880 }, { "epoch": 5.25, "learning_rate": 9.499533727074916e-06, "loss": 0.0299, "step": 16890 }, { "epoch": 5.25, "learning_rate": 9.493316754740442e-06, "loss": 0.4906, "step": 16900 }, { "epoch": 5.26, "learning_rate": 9.487099782405968e-06, "loss": 0.1798, "step": 16910 }, { "epoch": 5.26, "learning_rate": 9.480882810071496e-06, "loss": 0.1694, "step": 16920 }, { "epoch": 5.26, "learning_rate": 9.474665837737023e-06, "loss": 0.2012, "step": 16930 }, { "epoch": 5.27, "learning_rate": 9.46844886540255e-06, "loss": 0.1458, "step": 16940 }, { "epoch": 5.27, "learning_rate": 9.462231893068077e-06, "loss": 0.2052, "step": 16950 }, { "epoch": 5.27, "learning_rate": 9.456014920733603e-06, "loss": 0.1166, "step": 16960 }, { "epoch": 5.28, "learning_rate": 9.44979794839913e-06, "loss": 0.0334, "step": 16970 }, { "epoch": 5.28, "learning_rate": 9.443580976064657e-06, "loss": 0.0132, "step": 16980 }, { "epoch": 5.28, "learning_rate": 9.437364003730184e-06, "loss": 0.0673, "step": 16990 }, { "epoch": 5.28, "learning_rate": 9.43114703139571e-06, "loss": 0.0284, "step": 17000 }, { "epoch": 5.29, "learning_rate": 9.424930059061238e-06, "loss": 0.0369, "step": 17010 }, { "epoch": 5.29, "learning_rate": 9.418713086726766e-06, "loss": 0.3045, "step": 17020 }, { "epoch": 5.29, "learning_rate": 9.412496114392292e-06, "loss": 0.0355, "step": 17030 }, { "epoch": 5.3, "learning_rate": 9.406279142057818e-06, "loss": 0.0742, "step": 17040 }, { "epoch": 5.3, "learning_rate": 9.400062169723345e-06, "loss": 0.0335, "step": 17050 }, { "epoch": 5.3, "learning_rate": 9.393845197388873e-06, "loss": 0.0451, "step": 17060 }, { "epoch": 5.31, "learning_rate": 9.387628225054399e-06, "loss": 0.1433, "step": 17070 }, { "epoch": 5.31, "learning_rate": 9.381411252719927e-06, "loss": 0.0408, "step": 17080 }, { "epoch": 5.31, "learning_rate": 9.375194280385454e-06, "loss": 0.1294, "step": 17090 }, { "epoch": 5.32, "learning_rate": 9.36897730805098e-06, "loss": 0.0129, "step": 17100 }, { "epoch": 5.32, "learning_rate": 9.362760335716506e-06, "loss": 0.1728, "step": 17110 }, { "epoch": 5.32, "learning_rate": 9.356543363382034e-06, "loss": 0.3637, "step": 17120 }, { "epoch": 5.32, "learning_rate": 9.35032639104756e-06, "loss": 0.5885, "step": 17130 }, { "epoch": 5.33, "learning_rate": 9.344109418713087e-06, "loss": 0.0231, "step": 17140 }, { "epoch": 5.33, "learning_rate": 9.337892446378615e-06, "loss": 0.2379, "step": 17150 }, { "epoch": 5.33, "learning_rate": 9.331675474044141e-06, "loss": 0.1588, "step": 17160 }, { "epoch": 5.34, "learning_rate": 9.325458501709667e-06, "loss": 0.1515, "step": 17170 }, { "epoch": 5.34, "learning_rate": 9.319241529375195e-06, "loss": 0.3044, "step": 17180 }, { "epoch": 5.34, "learning_rate": 9.313024557040722e-06, "loss": 0.0964, "step": 17190 }, { "epoch": 5.35, "learning_rate": 9.306807584706248e-06, "loss": 0.0045, "step": 17200 }, { "epoch": 5.35, "learning_rate": 9.300590612371776e-06, "loss": 0.0206, "step": 17210 }, { "epoch": 5.35, "learning_rate": 9.294373640037304e-06, "loss": 0.1028, "step": 17220 }, { "epoch": 5.36, "learning_rate": 9.28815666770283e-06, "loss": 0.0332, "step": 17230 }, { "epoch": 5.36, "learning_rate": 9.281939695368356e-06, "loss": 0.2089, "step": 17240 }, { "epoch": 5.36, "learning_rate": 9.275722723033883e-06, "loss": 0.1048, "step": 17250 }, { "epoch": 5.37, "learning_rate": 9.26950575069941e-06, "loss": 0.2592, "step": 17260 }, { "epoch": 5.37, "learning_rate": 9.263288778364937e-06, "loss": 0.0227, "step": 17270 }, { "epoch": 5.37, "learning_rate": 9.257071806030465e-06, "loss": 0.0056, "step": 17280 }, { "epoch": 5.37, "learning_rate": 9.25085483369599e-06, "loss": 0.0828, "step": 17290 }, { "epoch": 5.38, "learning_rate": 9.244637861361517e-06, "loss": 0.1368, "step": 17300 }, { "epoch": 5.38, "learning_rate": 9.238420889027044e-06, "loss": 0.0829, "step": 17310 }, { "epoch": 5.38, "learning_rate": 9.232203916692572e-06, "loss": 0.0266, "step": 17320 }, { "epoch": 5.39, "learning_rate": 9.225986944358098e-06, "loss": 0.3122, "step": 17330 }, { "epoch": 5.39, "learning_rate": 9.219769972023626e-06, "loss": 0.2547, "step": 17340 }, { "epoch": 5.39, "learning_rate": 9.213552999689153e-06, "loss": 0.2353, "step": 17350 }, { "epoch": 5.4, "learning_rate": 9.20733602735468e-06, "loss": 0.0044, "step": 17360 }, { "epoch": 5.4, "learning_rate": 9.201119055020205e-06, "loss": 0.0361, "step": 17370 }, { "epoch": 5.4, "learning_rate": 9.194902082685733e-06, "loss": 0.0564, "step": 17380 }, { "epoch": 5.41, "learning_rate": 9.18868511035126e-06, "loss": 0.036, "step": 17390 }, { "epoch": 5.41, "learning_rate": 9.182468138016787e-06, "loss": 0.6358, "step": 17400 }, { "epoch": 5.41, "learning_rate": 9.176251165682314e-06, "loss": 0.2087, "step": 17410 }, { "epoch": 5.41, "learning_rate": 9.17003419334784e-06, "loss": 0.0467, "step": 17420 }, { "epoch": 5.42, "learning_rate": 9.163817221013366e-06, "loss": 0.2258, "step": 17430 }, { "epoch": 5.42, "learning_rate": 9.157600248678894e-06, "loss": 0.2394, "step": 17440 }, { "epoch": 5.42, "learning_rate": 9.151383276344421e-06, "loss": 0.3819, "step": 17450 }, { "epoch": 5.43, "learning_rate": 9.145166304009947e-06, "loss": 0.0361, "step": 17460 }, { "epoch": 5.43, "learning_rate": 9.138949331675475e-06, "loss": 0.0513, "step": 17470 }, { "epoch": 5.43, "learning_rate": 9.132732359341003e-06, "loss": 0.1065, "step": 17480 }, { "epoch": 5.44, "learning_rate": 9.126515387006529e-06, "loss": 0.1309, "step": 17490 }, { "epoch": 5.44, "learning_rate": 9.120298414672055e-06, "loss": 0.0627, "step": 17500 }, { "epoch": 5.44, "learning_rate": 9.114081442337582e-06, "loss": 0.1886, "step": 17510 }, { "epoch": 5.45, "learning_rate": 9.10786447000311e-06, "loss": 0.1295, "step": 17520 }, { "epoch": 5.45, "learning_rate": 9.101647497668636e-06, "loss": 0.332, "step": 17530 }, { "epoch": 5.45, "learning_rate": 9.095430525334164e-06, "loss": 0.135, "step": 17540 }, { "epoch": 5.46, "learning_rate": 9.08921355299969e-06, "loss": 0.0935, "step": 17550 }, { "epoch": 5.46, "learning_rate": 9.082996580665217e-06, "loss": 0.1083, "step": 17560 }, { "epoch": 5.46, "learning_rate": 9.076779608330743e-06, "loss": 0.2611, "step": 17570 }, { "epoch": 5.46, "learning_rate": 9.070562635996271e-06, "loss": 0.011, "step": 17580 }, { "epoch": 5.47, "learning_rate": 9.064345663661797e-06, "loss": 0.1248, "step": 17590 }, { "epoch": 5.47, "learning_rate": 9.058128691327325e-06, "loss": 0.2558, "step": 17600 }, { "epoch": 5.47, "learning_rate": 9.05191171899285e-06, "loss": 0.3142, "step": 17610 }, { "epoch": 5.48, "learning_rate": 9.045694746658378e-06, "loss": 0.1625, "step": 17620 }, { "epoch": 5.48, "learning_rate": 9.039477774323904e-06, "loss": 0.009, "step": 17630 }, { "epoch": 5.48, "learning_rate": 9.033260801989432e-06, "loss": 0.0225, "step": 17640 }, { "epoch": 5.49, "learning_rate": 9.02704382965496e-06, "loss": 0.0796, "step": 17650 }, { "epoch": 5.49, "learning_rate": 9.020826857320486e-06, "loss": 0.3994, "step": 17660 }, { "epoch": 5.49, "learning_rate": 9.014609884986013e-06, "loss": 0.2557, "step": 17670 }, { "epoch": 5.5, "learning_rate": 9.008392912651539e-06, "loss": 0.1174, "step": 17680 }, { "epoch": 5.5, "learning_rate": 9.002175940317067e-06, "loss": 0.2074, "step": 17690 }, { "epoch": 5.5, "learning_rate": 8.995958967982593e-06, "loss": 0.029, "step": 17700 }, { "epoch": 5.51, "learning_rate": 8.98974199564812e-06, "loss": 0.0289, "step": 17710 }, { "epoch": 5.51, "learning_rate": 8.983525023313648e-06, "loss": 0.0171, "step": 17720 }, { "epoch": 5.51, "learning_rate": 8.977308050979174e-06, "loss": 0.3363, "step": 17730 }, { "epoch": 5.51, "learning_rate": 8.9710910786447e-06, "loss": 0.1829, "step": 17740 }, { "epoch": 5.52, "learning_rate": 8.964874106310228e-06, "loss": 0.1529, "step": 17750 }, { "epoch": 5.52, "learning_rate": 8.958657133975754e-06, "loss": 0.1915, "step": 17760 }, { "epoch": 5.52, "learning_rate": 8.952440161641281e-06, "loss": 0.1069, "step": 17770 }, { "epoch": 5.53, "learning_rate": 8.946223189306809e-06, "loss": 0.179, "step": 17780 }, { "epoch": 5.53, "learning_rate": 8.940006216972335e-06, "loss": 0.1423, "step": 17790 }, { "epoch": 5.53, "learning_rate": 8.933789244637861e-06, "loss": 0.0009, "step": 17800 }, { "epoch": 5.54, "learning_rate": 8.927572272303389e-06, "loss": 0.1598, "step": 17810 }, { "epoch": 5.54, "learning_rate": 8.921355299968916e-06, "loss": 0.1449, "step": 17820 }, { "epoch": 5.54, "learning_rate": 8.915138327634442e-06, "loss": 0.2372, "step": 17830 }, { "epoch": 5.55, "learning_rate": 8.90892135529997e-06, "loss": 0.2562, "step": 17840 }, { "epoch": 5.55, "learning_rate": 8.902704382965498e-06, "loss": 0.2307, "step": 17850 }, { "epoch": 5.55, "learning_rate": 8.896487410631024e-06, "loss": 0.1481, "step": 17860 }, { "epoch": 5.55, "learning_rate": 8.89027043829655e-06, "loss": 0.0777, "step": 17870 }, { "epoch": 5.56, "learning_rate": 8.884053465962077e-06, "loss": 0.1308, "step": 17880 }, { "epoch": 5.56, "learning_rate": 8.877836493627603e-06, "loss": 0.3788, "step": 17890 }, { "epoch": 5.56, "learning_rate": 8.871619521293131e-06, "loss": 0.0144, "step": 17900 }, { "epoch": 5.57, "learning_rate": 8.865402548958659e-06, "loss": 0.1533, "step": 17910 }, { "epoch": 5.57, "learning_rate": 8.859185576624185e-06, "loss": 0.1611, "step": 17920 }, { "epoch": 5.57, "learning_rate": 8.85296860428971e-06, "loss": 0.2121, "step": 17930 }, { "epoch": 5.58, "learning_rate": 8.846751631955238e-06, "loss": 0.0879, "step": 17940 }, { "epoch": 5.58, "learning_rate": 8.840534659620766e-06, "loss": 0.1588, "step": 17950 }, { "epoch": 5.58, "learning_rate": 8.834317687286292e-06, "loss": 0.2518, "step": 17960 }, { "epoch": 5.59, "learning_rate": 8.82810071495182e-06, "loss": 0.0837, "step": 17970 }, { "epoch": 5.59, "learning_rate": 8.821883742617347e-06, "loss": 0.0778, "step": 17980 }, { "epoch": 5.59, "learning_rate": 8.815666770282873e-06, "loss": 0.1928, "step": 17990 }, { "epoch": 5.6, "learning_rate": 8.809449797948399e-06, "loss": 0.1427, "step": 18000 }, { "epoch": 5.6, "learning_rate": 8.803232825613927e-06, "loss": 0.0103, "step": 18010 }, { "epoch": 5.6, "learning_rate": 8.797015853279454e-06, "loss": 0.2247, "step": 18020 }, { "epoch": 5.6, "learning_rate": 8.79079888094498e-06, "loss": 0.1576, "step": 18030 }, { "epoch": 5.61, "learning_rate": 8.784581908610508e-06, "loss": 0.0263, "step": 18040 }, { "epoch": 5.61, "learning_rate": 8.778364936276034e-06, "loss": 0.1789, "step": 18050 }, { "epoch": 5.61, "learning_rate": 8.77214796394156e-06, "loss": 0.5078, "step": 18060 }, { "epoch": 5.62, "learning_rate": 8.765930991607088e-06, "loss": 0.1215, "step": 18070 }, { "epoch": 5.62, "learning_rate": 8.759714019272615e-06, "loss": 0.1755, "step": 18080 }, { "epoch": 5.62, "learning_rate": 8.753497046938141e-06, "loss": 0.5965, "step": 18090 }, { "epoch": 5.63, "learning_rate": 8.747280074603669e-06, "loss": 0.0632, "step": 18100 }, { "epoch": 5.63, "learning_rate": 8.741063102269197e-06, "loss": 0.1776, "step": 18110 }, { "epoch": 5.63, "learning_rate": 8.734846129934723e-06, "loss": 0.1918, "step": 18120 }, { "epoch": 5.64, "learning_rate": 8.728629157600249e-06, "loss": 0.2732, "step": 18130 }, { "epoch": 5.64, "learning_rate": 8.722412185265776e-06, "loss": 0.055, "step": 18140 }, { "epoch": 5.64, "learning_rate": 8.716195212931304e-06, "loss": 0.0419, "step": 18150 }, { "epoch": 5.65, "learning_rate": 8.70997824059683e-06, "loss": 0.0814, "step": 18160 }, { "epoch": 5.65, "learning_rate": 8.703761268262358e-06, "loss": 0.0369, "step": 18170 }, { "epoch": 5.65, "learning_rate": 8.697544295927884e-06, "loss": 0.0433, "step": 18180 }, { "epoch": 5.65, "learning_rate": 8.691327323593411e-06, "loss": 0.2104, "step": 18190 }, { "epoch": 5.66, "learning_rate": 8.685110351258937e-06, "loss": 0.0259, "step": 18200 }, { "epoch": 5.66, "learning_rate": 8.678893378924465e-06, "loss": 0.2278, "step": 18210 }, { "epoch": 5.66, "learning_rate": 8.67267640658999e-06, "loss": 0.0381, "step": 18220 }, { "epoch": 5.67, "learning_rate": 8.666459434255518e-06, "loss": 0.0396, "step": 18230 }, { "epoch": 5.67, "learning_rate": 8.660242461921046e-06, "loss": 0.1365, "step": 18240 }, { "epoch": 5.67, "learning_rate": 8.654025489586572e-06, "loss": 0.0183, "step": 18250 }, { "epoch": 5.68, "learning_rate": 8.647808517252098e-06, "loss": 0.1861, "step": 18260 }, { "epoch": 5.68, "learning_rate": 8.641591544917626e-06, "loss": 0.0223, "step": 18270 }, { "epoch": 5.68, "learning_rate": 8.635374572583153e-06, "loss": 0.3287, "step": 18280 }, { "epoch": 5.69, "learning_rate": 8.62915760024868e-06, "loss": 0.0323, "step": 18290 }, { "epoch": 5.69, "learning_rate": 8.622940627914207e-06, "loss": 0.165, "step": 18300 }, { "epoch": 5.69, "learning_rate": 8.616723655579733e-06, "loss": 0.2995, "step": 18310 }, { "epoch": 5.69, "learning_rate": 8.61050668324526e-06, "loss": 0.2903, "step": 18320 }, { "epoch": 5.7, "learning_rate": 8.604289710910787e-06, "loss": 0.114, "step": 18330 }, { "epoch": 5.7, "learning_rate": 8.598072738576314e-06, "loss": 0.3919, "step": 18340 }, { "epoch": 5.7, "learning_rate": 8.59185576624184e-06, "loss": 0.3792, "step": 18350 }, { "epoch": 5.71, "learning_rate": 8.585638793907368e-06, "loss": 0.0281, "step": 18360 }, { "epoch": 5.71, "learning_rate": 8.579421821572894e-06, "loss": 0.0272, "step": 18370 }, { "epoch": 5.71, "learning_rate": 8.573204849238422e-06, "loss": 0.0594, "step": 18380 }, { "epoch": 5.72, "learning_rate": 8.566987876903948e-06, "loss": 0.0844, "step": 18390 }, { "epoch": 5.72, "learning_rate": 8.560770904569475e-06, "loss": 0.4306, "step": 18400 }, { "epoch": 5.72, "learning_rate": 8.554553932235003e-06, "loss": 0.0512, "step": 18410 }, { "epoch": 5.73, "learning_rate": 8.548336959900529e-06, "loss": 0.0829, "step": 18420 }, { "epoch": 5.73, "learning_rate": 8.542119987566057e-06, "loss": 0.3637, "step": 18430 }, { "epoch": 5.73, "learning_rate": 8.535903015231583e-06, "loss": 0.1193, "step": 18440 }, { "epoch": 5.74, "learning_rate": 8.52968604289711e-06, "loss": 0.2678, "step": 18450 }, { "epoch": 5.74, "learning_rate": 8.523469070562636e-06, "loss": 0.0122, "step": 18460 }, { "epoch": 5.74, "learning_rate": 8.517252098228164e-06, "loss": 0.1299, "step": 18470 }, { "epoch": 5.74, "learning_rate": 8.511035125893692e-06, "loss": 0.0853, "step": 18480 }, { "epoch": 5.75, "learning_rate": 8.504818153559217e-06, "loss": 0.428, "step": 18490 }, { "epoch": 5.75, "learning_rate": 8.498601181224743e-06, "loss": 0.2814, "step": 18500 }, { "epoch": 5.75, "learning_rate": 8.492384208890271e-06, "loss": 0.0189, "step": 18510 }, { "epoch": 5.76, "learning_rate": 8.486167236555797e-06, "loss": 0.2316, "step": 18520 }, { "epoch": 5.76, "learning_rate": 8.479950264221325e-06, "loss": 0.2579, "step": 18530 }, { "epoch": 5.76, "learning_rate": 8.473733291886852e-06, "loss": 0.3395, "step": 18540 }, { "epoch": 5.77, "learning_rate": 8.467516319552378e-06, "loss": 0.0031, "step": 18550 }, { "epoch": 5.77, "learning_rate": 8.461299347217904e-06, "loss": 0.1407, "step": 18560 }, { "epoch": 5.77, "learning_rate": 8.455082374883432e-06, "loss": 0.008, "step": 18570 }, { "epoch": 5.78, "learning_rate": 8.44886540254896e-06, "loss": 0.223, "step": 18580 }, { "epoch": 5.78, "learning_rate": 8.442648430214486e-06, "loss": 0.0314, "step": 18590 }, { "epoch": 5.78, "learning_rate": 8.436431457880013e-06, "loss": 0.0044, "step": 18600 }, { "epoch": 5.78, "learning_rate": 8.430214485545541e-06, "loss": 0.392, "step": 18610 }, { "epoch": 5.79, "learning_rate": 8.423997513211067e-06, "loss": 0.3784, "step": 18620 }, { "epoch": 5.79, "learning_rate": 8.417780540876593e-06, "loss": 0.0366, "step": 18630 }, { "epoch": 5.79, "learning_rate": 8.41156356854212e-06, "loss": 0.0679, "step": 18640 }, { "epoch": 5.8, "learning_rate": 8.405346596207648e-06, "loss": 0.2425, "step": 18650 }, { "epoch": 5.8, "learning_rate": 8.399129623873174e-06, "loss": 0.1187, "step": 18660 }, { "epoch": 5.8, "learning_rate": 8.392912651538702e-06, "loss": 0.0043, "step": 18670 }, { "epoch": 5.81, "learning_rate": 8.386695679204228e-06, "loss": 0.0256, "step": 18680 }, { "epoch": 5.81, "learning_rate": 8.380478706869754e-06, "loss": 0.1835, "step": 18690 }, { "epoch": 5.81, "learning_rate": 8.374261734535282e-06, "loss": 0.1276, "step": 18700 }, { "epoch": 5.82, "learning_rate": 8.36804476220081e-06, "loss": 0.0205, "step": 18710 }, { "epoch": 5.82, "learning_rate": 8.361827789866335e-06, "loss": 0.1155, "step": 18720 }, { "epoch": 5.82, "learning_rate": 8.355610817531863e-06, "loss": 0.0595, "step": 18730 }, { "epoch": 5.83, "learning_rate": 8.34939384519739e-06, "loss": 0.2409, "step": 18740 }, { "epoch": 5.83, "learning_rate": 8.343176872862916e-06, "loss": 0.2841, "step": 18750 }, { "epoch": 5.83, "learning_rate": 8.336959900528442e-06, "loss": 0.3091, "step": 18760 }, { "epoch": 5.83, "learning_rate": 8.33074292819397e-06, "loss": 0.0909, "step": 18770 }, { "epoch": 5.84, "learning_rate": 8.324525955859498e-06, "loss": 0.0709, "step": 18780 }, { "epoch": 5.84, "learning_rate": 8.318308983525024e-06, "loss": 0.1632, "step": 18790 }, { "epoch": 5.84, "learning_rate": 8.312092011190551e-06, "loss": 0.0284, "step": 18800 }, { "epoch": 5.85, "learning_rate": 8.305875038856079e-06, "loss": 0.1078, "step": 18810 }, { "epoch": 5.85, "learning_rate": 8.299658066521603e-06, "loss": 0.0929, "step": 18820 }, { "epoch": 5.85, "learning_rate": 8.293441094187131e-06, "loss": 0.2583, "step": 18830 }, { "epoch": 5.86, "learning_rate": 8.287224121852659e-06, "loss": 0.1079, "step": 18840 }, { "epoch": 5.86, "learning_rate": 8.281007149518185e-06, "loss": 0.1171, "step": 18850 }, { "epoch": 5.86, "learning_rate": 8.274790177183712e-06, "loss": 0.1216, "step": 18860 }, { "epoch": 5.87, "learning_rate": 8.26857320484924e-06, "loss": 0.162, "step": 18870 }, { "epoch": 5.87, "learning_rate": 8.262356232514766e-06, "loss": 0.0225, "step": 18880 }, { "epoch": 5.87, "learning_rate": 8.256139260180292e-06, "loss": 0.0318, "step": 18890 }, { "epoch": 5.88, "learning_rate": 8.24992228784582e-06, "loss": 0.0777, "step": 18900 }, { "epoch": 5.88, "learning_rate": 8.243705315511347e-06, "loss": 0.0034, "step": 18910 }, { "epoch": 5.88, "learning_rate": 8.237488343176873e-06, "loss": 0.1377, "step": 18920 }, { "epoch": 5.88, "learning_rate": 8.231271370842401e-06, "loss": 0.15, "step": 18930 }, { "epoch": 5.89, "learning_rate": 8.225054398507929e-06, "loss": 0.0588, "step": 18940 }, { "epoch": 5.89, "learning_rate": 8.218837426173455e-06, "loss": 0.2125, "step": 18950 }, { "epoch": 5.89, "learning_rate": 8.21262045383898e-06, "loss": 0.1453, "step": 18960 }, { "epoch": 5.9, "learning_rate": 8.206403481504508e-06, "loss": 0.0699, "step": 18970 }, { "epoch": 5.9, "learning_rate": 8.200186509170034e-06, "loss": 0.2421, "step": 18980 }, { "epoch": 5.9, "learning_rate": 8.193969536835562e-06, "loss": 0.0263, "step": 18990 }, { "epoch": 5.91, "learning_rate": 8.18775256450109e-06, "loss": 0.1157, "step": 19000 }, { "epoch": 5.91, "learning_rate": 8.181535592166615e-06, "loss": 0.2661, "step": 19010 }, { "epoch": 5.91, "learning_rate": 8.175318619832141e-06, "loss": 0.42, "step": 19020 }, { "epoch": 5.92, "learning_rate": 8.169101647497669e-06, "loss": 0.1129, "step": 19030 }, { "epoch": 5.92, "learning_rate": 8.162884675163197e-06, "loss": 0.0328, "step": 19040 }, { "epoch": 5.92, "learning_rate": 8.156667702828723e-06, "loss": 0.0587, "step": 19050 }, { "epoch": 5.92, "learning_rate": 8.15045073049425e-06, "loss": 0.0066, "step": 19060 }, { "epoch": 5.93, "learning_rate": 8.144233758159776e-06, "loss": 0.0602, "step": 19070 }, { "epoch": 5.93, "learning_rate": 8.138016785825304e-06, "loss": 0.0578, "step": 19080 }, { "epoch": 5.93, "learning_rate": 8.13179981349083e-06, "loss": 0.1213, "step": 19090 }, { "epoch": 5.94, "learning_rate": 8.125582841156358e-06, "loss": 0.2255, "step": 19100 }, { "epoch": 5.94, "learning_rate": 8.119365868821885e-06, "loss": 0.4006, "step": 19110 }, { "epoch": 5.94, "learning_rate": 8.113148896487411e-06, "loss": 0.0375, "step": 19120 }, { "epoch": 5.95, "learning_rate": 8.106931924152939e-06, "loss": 0.015, "step": 19130 }, { "epoch": 5.95, "learning_rate": 8.100714951818465e-06, "loss": 0.6229, "step": 19140 }, { "epoch": 5.95, "learning_rate": 8.094497979483991e-06, "loss": 0.3473, "step": 19150 }, { "epoch": 5.96, "learning_rate": 8.088281007149519e-06, "loss": 0.1422, "step": 19160 }, { "epoch": 5.96, "learning_rate": 8.082064034815046e-06, "loss": 0.0285, "step": 19170 }, { "epoch": 5.96, "learning_rate": 8.075847062480572e-06, "loss": 0.4498, "step": 19180 }, { "epoch": 5.97, "learning_rate": 8.0696300901461e-06, "loss": 0.1943, "step": 19190 }, { "epoch": 5.97, "learning_rate": 8.063413117811626e-06, "loss": 0.3661, "step": 19200 }, { "epoch": 5.97, "learning_rate": 8.057196145477154e-06, "loss": 0.0055, "step": 19210 }, { "epoch": 5.97, "learning_rate": 8.05097917314268e-06, "loss": 0.0171, "step": 19220 }, { "epoch": 5.98, "learning_rate": 8.044762200808207e-06, "loss": 0.3318, "step": 19230 }, { "epoch": 5.98, "learning_rate": 8.038545228473735e-06, "loss": 0.315, "step": 19240 }, { "epoch": 5.98, "learning_rate": 8.032328256139261e-06, "loss": 0.1649, "step": 19250 }, { "epoch": 5.99, "learning_rate": 8.026111283804787e-06, "loss": 0.1597, "step": 19260 }, { "epoch": 5.99, "learning_rate": 8.019894311470314e-06, "loss": 0.4136, "step": 19270 }, { "epoch": 5.99, "learning_rate": 8.013677339135842e-06, "loss": 0.1459, "step": 19280 }, { "epoch": 6.0, "learning_rate": 8.007460366801368e-06, "loss": 0.3557, "step": 19290 }, { "epoch": 6.0, "learning_rate": 8.001243394466896e-06, "loss": 0.3842, "step": 19300 }, { "epoch": 6.0, "eval_FN": 196, "eval_FP": 180, "eval_TN": 5658, "eval_TP": 399, "eval_accuracy": 0.9415513757189492, "eval_f1": 0.6797274275979556, "eval_loss": 0.6800940632820129, "eval_precision": 0.689119170984456, "eval_recall": 0.6705882352941176, "eval_runtime": 44.3406, "eval_samples_per_second": 145.082, "eval_steps_per_second": 9.089, "step": 19302 }, { "epoch": 6.0, "learning_rate": 7.995026422132422e-06, "loss": 0.1237, "step": 19310 }, { "epoch": 6.01, "learning_rate": 7.98880944979795e-06, "loss": 0.01, "step": 19320 }, { "epoch": 6.01, "learning_rate": 7.982592477463475e-06, "loss": 0.0402, "step": 19330 }, { "epoch": 6.01, "learning_rate": 7.976375505129003e-06, "loss": 0.1677, "step": 19340 }, { "epoch": 6.01, "learning_rate": 7.970158532794529e-06, "loss": 0.0085, "step": 19350 }, { "epoch": 6.02, "learning_rate": 7.963941560460057e-06, "loss": 0.0267, "step": 19360 }, { "epoch": 6.02, "learning_rate": 7.957724588125584e-06, "loss": 0.132, "step": 19370 }, { "epoch": 6.02, "learning_rate": 7.95150761579111e-06, "loss": 0.2591, "step": 19380 }, { "epoch": 6.03, "learning_rate": 7.945290643456636e-06, "loss": 0.0052, "step": 19390 }, { "epoch": 6.03, "learning_rate": 7.939073671122164e-06, "loss": 0.0878, "step": 19400 }, { "epoch": 6.03, "learning_rate": 7.932856698787692e-06, "loss": 0.2492, "step": 19410 }, { "epoch": 6.04, "learning_rate": 7.926639726453218e-06, "loss": 0.1132, "step": 19420 }, { "epoch": 6.04, "learning_rate": 7.920422754118745e-06, "loss": 0.0334, "step": 19430 }, { "epoch": 6.04, "learning_rate": 7.914205781784273e-06, "loss": 0.0478, "step": 19440 }, { "epoch": 6.05, "learning_rate": 7.907988809449797e-06, "loss": 0.0957, "step": 19450 }, { "epoch": 6.05, "learning_rate": 7.901771837115325e-06, "loss": 0.0913, "step": 19460 }, { "epoch": 6.05, "learning_rate": 7.895554864780853e-06, "loss": 0.0021, "step": 19470 }, { "epoch": 6.06, "learning_rate": 7.889337892446379e-06, "loss": 0.0023, "step": 19480 }, { "epoch": 6.06, "learning_rate": 7.883120920111906e-06, "loss": 0.1434, "step": 19490 }, { "epoch": 6.06, "learning_rate": 7.876903947777434e-06, "loss": 0.019, "step": 19500 }, { "epoch": 6.06, "learning_rate": 7.87068697544296e-06, "loss": 0.2301, "step": 19510 }, { "epoch": 6.07, "learning_rate": 7.864470003108486e-06, "loss": 0.1909, "step": 19520 }, { "epoch": 6.07, "learning_rate": 7.858253030774013e-06, "loss": 0.0014, "step": 19530 }, { "epoch": 6.07, "learning_rate": 7.852036058439541e-06, "loss": 0.08, "step": 19540 }, { "epoch": 6.08, "learning_rate": 7.845819086105067e-06, "loss": 0.1974, "step": 19550 }, { "epoch": 6.08, "learning_rate": 7.839602113770595e-06, "loss": 0.0011, "step": 19560 }, { "epoch": 6.08, "learning_rate": 7.833385141436122e-06, "loss": 0.0442, "step": 19570 }, { "epoch": 6.09, "learning_rate": 7.827168169101648e-06, "loss": 0.0028, "step": 19580 }, { "epoch": 6.09, "learning_rate": 7.820951196767174e-06, "loss": 0.0209, "step": 19590 }, { "epoch": 6.09, "learning_rate": 7.814734224432702e-06, "loss": 0.0712, "step": 19600 }, { "epoch": 6.1, "learning_rate": 7.808517252098228e-06, "loss": 0.1995, "step": 19610 }, { "epoch": 6.1, "learning_rate": 7.802300279763756e-06, "loss": 0.0362, "step": 19620 }, { "epoch": 6.1, "learning_rate": 7.796083307429283e-06, "loss": 0.0047, "step": 19630 }, { "epoch": 6.11, "learning_rate": 7.78986633509481e-06, "loss": 0.0319, "step": 19640 }, { "epoch": 6.11, "learning_rate": 7.783649362760335e-06, "loss": 0.2405, "step": 19650 }, { "epoch": 6.11, "learning_rate": 7.777432390425863e-06, "loss": 0.1061, "step": 19660 }, { "epoch": 6.11, "learning_rate": 7.77121541809139e-06, "loss": 0.3312, "step": 19670 }, { "epoch": 6.12, "learning_rate": 7.764998445756917e-06, "loss": 0.2061, "step": 19680 }, { "epoch": 6.12, "learning_rate": 7.758781473422444e-06, "loss": 0.0228, "step": 19690 }, { "epoch": 6.12, "learning_rate": 7.752564501087972e-06, "loss": 0.0767, "step": 19700 }, { "epoch": 6.13, "learning_rate": 7.746347528753498e-06, "loss": 0.0303, "step": 19710 }, { "epoch": 6.13, "learning_rate": 7.740130556419024e-06, "loss": 0.0024, "step": 19720 }, { "epoch": 6.13, "learning_rate": 7.733913584084552e-06, "loss": 0.0339, "step": 19730 }, { "epoch": 6.14, "learning_rate": 7.72769661175008e-06, "loss": 0.115, "step": 19740 }, { "epoch": 6.14, "learning_rate": 7.721479639415605e-06, "loss": 0.2795, "step": 19750 }, { "epoch": 6.14, "learning_rate": 7.715262667081133e-06, "loss": 0.3219, "step": 19760 }, { "epoch": 6.15, "learning_rate": 7.709045694746659e-06, "loss": 0.2385, "step": 19770 }, { "epoch": 6.15, "learning_rate": 7.702828722412185e-06, "loss": 0.0405, "step": 19780 }, { "epoch": 6.15, "learning_rate": 7.696611750077713e-06, "loss": 0.0323, "step": 19790 }, { "epoch": 6.15, "learning_rate": 7.69039477774324e-06, "loss": 0.0013, "step": 19800 }, { "epoch": 6.16, "learning_rate": 7.684177805408766e-06, "loss": 0.0189, "step": 19810 }, { "epoch": 6.16, "learning_rate": 7.677960833074294e-06, "loss": 0.0307, "step": 19820 }, { "epoch": 6.16, "learning_rate": 7.67174386073982e-06, "loss": 0.2394, "step": 19830 }, { "epoch": 6.17, "learning_rate": 7.665526888405347e-06, "loss": 0.2788, "step": 19840 }, { "epoch": 6.17, "learning_rate": 7.659309916070873e-06, "loss": 0.1, "step": 19850 }, { "epoch": 6.17, "learning_rate": 7.653092943736401e-06, "loss": 0.2363, "step": 19860 }, { "epoch": 6.18, "learning_rate": 7.646875971401929e-06, "loss": 0.0434, "step": 19870 }, { "epoch": 6.18, "learning_rate": 7.640658999067455e-06, "loss": 0.1789, "step": 19880 }, { "epoch": 6.18, "learning_rate": 7.634442026732982e-06, "loss": 0.4241, "step": 19890 }, { "epoch": 6.19, "learning_rate": 7.628225054398508e-06, "loss": 0.045, "step": 19900 }, { "epoch": 6.19, "learning_rate": 7.622008082064035e-06, "loss": 0.029, "step": 19910 }, { "epoch": 6.19, "learning_rate": 7.615791109729562e-06, "loss": 0.2798, "step": 19920 }, { "epoch": 6.2, "learning_rate": 7.60957413739509e-06, "loss": 0.0019, "step": 19930 }, { "epoch": 6.2, "learning_rate": 7.6033571650606165e-06, "loss": 0.0618, "step": 19940 }, { "epoch": 6.2, "learning_rate": 7.597140192726143e-06, "loss": 0.0026, "step": 19950 }, { "epoch": 6.2, "learning_rate": 7.590923220391669e-06, "loss": 0.0133, "step": 19960 }, { "epoch": 6.21, "learning_rate": 7.584706248057196e-06, "loss": 0.2087, "step": 19970 }, { "epoch": 6.21, "learning_rate": 7.578489275722724e-06, "loss": 0.1152, "step": 19980 }, { "epoch": 6.21, "learning_rate": 7.572272303388251e-06, "loss": 0.1641, "step": 19990 }, { "epoch": 6.22, "learning_rate": 7.566055331053777e-06, "loss": 0.0163, "step": 20000 }, { "epoch": 6.22, "learning_rate": 7.559838358719305e-06, "loss": 0.0894, "step": 20010 }, { "epoch": 6.22, "learning_rate": 7.553621386384831e-06, "loss": 0.0871, "step": 20020 }, { "epoch": 6.23, "learning_rate": 7.547404414050358e-06, "loss": 0.4862, "step": 20030 }, { "epoch": 6.23, "learning_rate": 7.541187441715885e-06, "loss": 0.2347, "step": 20040 }, { "epoch": 6.23, "learning_rate": 7.5349704693814115e-06, "loss": 0.3966, "step": 20050 }, { "epoch": 6.24, "learning_rate": 7.528753497046939e-06, "loss": 0.0112, "step": 20060 }, { "epoch": 6.24, "learning_rate": 7.522536524712466e-06, "loss": 0.0011, "step": 20070 }, { "epoch": 6.24, "learning_rate": 7.516319552377993e-06, "loss": 0.1654, "step": 20080 }, { "epoch": 6.24, "learning_rate": 7.510102580043519e-06, "loss": 0.3142, "step": 20090 }, { "epoch": 6.25, "learning_rate": 7.5038856077090465e-06, "loss": 0.0885, "step": 20100 }, { "epoch": 6.25, "learning_rate": 7.497668635374573e-06, "loss": 0.08, "step": 20110 }, { "epoch": 6.25, "learning_rate": 7.4914516630401e-06, "loss": 0.1023, "step": 20120 }, { "epoch": 6.26, "learning_rate": 7.485234690705627e-06, "loss": 0.0021, "step": 20130 }, { "epoch": 6.26, "learning_rate": 7.479017718371155e-06, "loss": 0.0468, "step": 20140 }, { "epoch": 6.26, "learning_rate": 7.4728007460366806e-06, "loss": 0.2262, "step": 20150 }, { "epoch": 6.27, "learning_rate": 7.466583773702207e-06, "loss": 0.0007, "step": 20160 }, { "epoch": 6.27, "learning_rate": 7.460366801367734e-06, "loss": 0.0953, "step": 20170 }, { "epoch": 6.27, "learning_rate": 7.454149829033262e-06, "loss": 0.035, "step": 20180 }, { "epoch": 6.28, "learning_rate": 7.447932856698789e-06, "loss": 0.0586, "step": 20190 }, { "epoch": 6.28, "learning_rate": 7.4417158843643155e-06, "loss": 0.0007, "step": 20200 }, { "epoch": 6.28, "learning_rate": 7.4354989120298415e-06, "loss": 0.0717, "step": 20210 }, { "epoch": 6.29, "learning_rate": 7.429281939695368e-06, "loss": 0.4193, "step": 20220 }, { "epoch": 6.29, "learning_rate": 7.423064967360896e-06, "loss": 0.0928, "step": 20230 }, { "epoch": 6.29, "learning_rate": 7.416847995026423e-06, "loss": 0.1723, "step": 20240 }, { "epoch": 6.29, "learning_rate": 7.41063102269195e-06, "loss": 0.0466, "step": 20250 }, { "epoch": 6.3, "learning_rate": 7.404414050357477e-06, "loss": 0.1184, "step": 20260 }, { "epoch": 6.3, "learning_rate": 7.398197078023004e-06, "loss": 0.0008, "step": 20270 }, { "epoch": 6.3, "learning_rate": 7.39198010568853e-06, "loss": 0.146, "step": 20280 }, { "epoch": 6.31, "learning_rate": 7.385763133354057e-06, "loss": 0.1072, "step": 20290 }, { "epoch": 6.31, "learning_rate": 7.379546161019584e-06, "loss": 0.0042, "step": 20300 }, { "epoch": 6.31, "learning_rate": 7.373329188685111e-06, "loss": 0.1742, "step": 20310 }, { "epoch": 6.32, "learning_rate": 7.367112216350638e-06, "loss": 0.246, "step": 20320 }, { "epoch": 6.32, "learning_rate": 7.360895244016165e-06, "loss": 0.0011, "step": 20330 }, { "epoch": 6.32, "learning_rate": 7.354678271681691e-06, "loss": 0.0445, "step": 20340 }, { "epoch": 6.33, "learning_rate": 7.348461299347218e-06, "loss": 0.232, "step": 20350 }, { "epoch": 6.33, "learning_rate": 7.3422443270127455e-06, "loss": 0.0251, "step": 20360 }, { "epoch": 6.33, "learning_rate": 7.336027354678272e-06, "loss": 0.2451, "step": 20370 }, { "epoch": 6.34, "learning_rate": 7.329810382343799e-06, "loss": 0.0976, "step": 20380 }, { "epoch": 6.34, "learning_rate": 7.323593410009327e-06, "loss": 0.0471, "step": 20390 }, { "epoch": 6.34, "learning_rate": 7.317376437674853e-06, "loss": 0.2047, "step": 20400 }, { "epoch": 6.34, "learning_rate": 7.3111594653403796e-06, "loss": 0.1967, "step": 20410 }, { "epoch": 6.35, "learning_rate": 7.304942493005906e-06, "loss": 0.0649, "step": 20420 }, { "epoch": 6.35, "learning_rate": 7.298725520671433e-06, "loss": 0.1436, "step": 20430 }, { "epoch": 6.35, "learning_rate": 7.292508548336961e-06, "loss": 0.003, "step": 20440 }, { "epoch": 6.36, "learning_rate": 7.286291576002488e-06, "loss": 0.093, "step": 20450 }, { "epoch": 6.36, "learning_rate": 7.2800746036680145e-06, "loss": 0.1909, "step": 20460 }, { "epoch": 6.36, "learning_rate": 7.2738576313335405e-06, "loss": 0.0148, "step": 20470 }, { "epoch": 6.37, "learning_rate": 7.267640658999068e-06, "loss": 0.0278, "step": 20480 }, { "epoch": 6.37, "learning_rate": 7.261423686664595e-06, "loss": 0.0788, "step": 20490 }, { "epoch": 6.37, "learning_rate": 7.255206714330122e-06, "loss": 0.1672, "step": 20500 }, { "epoch": 6.38, "learning_rate": 7.248989741995649e-06, "loss": 0.0552, "step": 20510 }, { "epoch": 6.38, "learning_rate": 7.242772769661176e-06, "loss": 0.0017, "step": 20520 }, { "epoch": 6.38, "learning_rate": 7.236555797326702e-06, "loss": 0.2225, "step": 20530 }, { "epoch": 6.38, "learning_rate": 7.230338824992229e-06, "loss": 0.0024, "step": 20540 }, { "epoch": 6.39, "learning_rate": 7.224121852657756e-06, "loss": 0.0777, "step": 20550 }, { "epoch": 6.39, "learning_rate": 7.2179048803232836e-06, "loss": 0.1763, "step": 20560 }, { "epoch": 6.39, "learning_rate": 7.21168790798881e-06, "loss": 0.0469, "step": 20570 }, { "epoch": 6.4, "learning_rate": 7.205470935654337e-06, "loss": 0.1108, "step": 20580 }, { "epoch": 6.4, "learning_rate": 7.199253963319864e-06, "loss": 0.3798, "step": 20590 }, { "epoch": 6.4, "learning_rate": 7.19303699098539e-06, "loss": 0.1529, "step": 20600 }, { "epoch": 6.41, "learning_rate": 7.186820018650918e-06, "loss": 0.0678, "step": 20610 }, { "epoch": 6.41, "learning_rate": 7.1806030463164445e-06, "loss": 0.0604, "step": 20620 }, { "epoch": 6.41, "learning_rate": 7.174386073981971e-06, "loss": 0.0578, "step": 20630 }, { "epoch": 6.42, "learning_rate": 7.168169101647499e-06, "loss": 0.0507, "step": 20640 }, { "epoch": 6.42, "learning_rate": 7.161952129313026e-06, "loss": 0.149, "step": 20650 }, { "epoch": 6.42, "learning_rate": 7.155735156978552e-06, "loss": 0.0734, "step": 20660 }, { "epoch": 6.43, "learning_rate": 7.1495181846440786e-06, "loss": 0.2317, "step": 20670 }, { "epoch": 6.43, "learning_rate": 7.143301212309605e-06, "loss": 0.0005, "step": 20680 }, { "epoch": 6.43, "learning_rate": 7.137084239975133e-06, "loss": 0.1794, "step": 20690 }, { "epoch": 6.43, "learning_rate": 7.13086726764066e-06, "loss": 0.0007, "step": 20700 }, { "epoch": 6.44, "learning_rate": 7.124650295306187e-06, "loss": 0.0102, "step": 20710 }, { "epoch": 6.44, "learning_rate": 7.118433322971713e-06, "loss": 0.0004, "step": 20720 }, { "epoch": 6.44, "learning_rate": 7.11221635063724e-06, "loss": 0.0077, "step": 20730 }, { "epoch": 6.45, "learning_rate": 7.105999378302767e-06, "loss": 0.0262, "step": 20740 }, { "epoch": 6.45, "learning_rate": 7.099782405968294e-06, "loss": 0.0065, "step": 20750 }, { "epoch": 6.45, "learning_rate": 7.093565433633821e-06, "loss": 0.0976, "step": 20760 }, { "epoch": 6.46, "learning_rate": 7.0873484612993485e-06, "loss": 0.3319, "step": 20770 }, { "epoch": 6.46, "learning_rate": 7.081131488964875e-06, "loss": 0.0222, "step": 20780 }, { "epoch": 6.46, "learning_rate": 7.074914516630401e-06, "loss": 0.1475, "step": 20790 }, { "epoch": 6.47, "learning_rate": 7.068697544295928e-06, "loss": 0.1559, "step": 20800 }, { "epoch": 6.47, "learning_rate": 7.062480571961455e-06, "loss": 0.525, "step": 20810 }, { "epoch": 6.47, "learning_rate": 7.0562635996269826e-06, "loss": 0.0286, "step": 20820 }, { "epoch": 6.47, "learning_rate": 7.050046627292509e-06, "loss": 0.001, "step": 20830 }, { "epoch": 6.48, "learning_rate": 7.043829654958036e-06, "loss": 0.0994, "step": 20840 }, { "epoch": 6.48, "learning_rate": 7.037612682623562e-06, "loss": 0.1891, "step": 20850 }, { "epoch": 6.48, "learning_rate": 7.03139571028909e-06, "loss": 0.0863, "step": 20860 }, { "epoch": 6.49, "learning_rate": 7.025178737954617e-06, "loss": 0.0428, "step": 20870 }, { "epoch": 6.49, "learning_rate": 7.0189617656201435e-06, "loss": 0.167, "step": 20880 }, { "epoch": 6.49, "learning_rate": 7.01274479328567e-06, "loss": 0.2686, "step": 20890 }, { "epoch": 6.5, "learning_rate": 7.006527820951198e-06, "loss": 0.2093, "step": 20900 }, { "epoch": 6.5, "learning_rate": 7.000310848616724e-06, "loss": 0.0952, "step": 20910 }, { "epoch": 6.5, "learning_rate": 6.994093876282251e-06, "loss": 0.1495, "step": 20920 }, { "epoch": 6.51, "learning_rate": 6.9878769039477776e-06, "loss": 0.1064, "step": 20930 }, { "epoch": 6.51, "learning_rate": 6.981659931613305e-06, "loss": 0.1556, "step": 20940 }, { "epoch": 6.51, "learning_rate": 6.975442959278832e-06, "loss": 0.053, "step": 20950 }, { "epoch": 6.52, "learning_rate": 6.969225986944359e-06, "loss": 0.1025, "step": 20960 }, { "epoch": 6.52, "learning_rate": 6.963009014609886e-06, "loss": 0.0012, "step": 20970 }, { "epoch": 6.52, "learning_rate": 6.956792042275412e-06, "loss": 0.0014, "step": 20980 }, { "epoch": 6.52, "learning_rate": 6.950575069940939e-06, "loss": 0.0638, "step": 20990 }, { "epoch": 6.53, "learning_rate": 6.944358097606466e-06, "loss": 0.4054, "step": 21000 }, { "epoch": 6.53, "learning_rate": 6.938141125271993e-06, "loss": 0.2212, "step": 21010 }, { "epoch": 6.53, "learning_rate": 6.931924152937521e-06, "loss": 0.1891, "step": 21020 }, { "epoch": 6.54, "learning_rate": 6.9257071806030475e-06, "loss": 0.053, "step": 21030 }, { "epoch": 6.54, "learning_rate": 6.9194902082685734e-06, "loss": 0.005, "step": 21040 }, { "epoch": 6.54, "learning_rate": 6.9132732359341e-06, "loss": 0.2786, "step": 21050 }, { "epoch": 6.55, "learning_rate": 6.907056263599627e-06, "loss": 0.1471, "step": 21060 }, { "epoch": 6.55, "learning_rate": 6.900839291265155e-06, "loss": 0.1543, "step": 21070 }, { "epoch": 6.55, "learning_rate": 6.8946223189306816e-06, "loss": 0.0009, "step": 21080 }, { "epoch": 6.56, "learning_rate": 6.888405346596208e-06, "loss": 0.3172, "step": 21090 }, { "epoch": 6.56, "learning_rate": 6.882188374261734e-06, "loss": 0.2497, "step": 21100 }, { "epoch": 6.56, "learning_rate": 6.875971401927262e-06, "loss": 0.1785, "step": 21110 }, { "epoch": 6.57, "learning_rate": 6.869754429592789e-06, "loss": 0.279, "step": 21120 }, { "epoch": 6.57, "learning_rate": 6.863537457258316e-06, "loss": 0.0466, "step": 21130 }, { "epoch": 6.57, "learning_rate": 6.8573204849238425e-06, "loss": 0.0961, "step": 21140 }, { "epoch": 6.57, "learning_rate": 6.85110351258937e-06, "loss": 0.0361, "step": 21150 }, { "epoch": 6.58, "learning_rate": 6.844886540254897e-06, "loss": 0.1862, "step": 21160 }, { "epoch": 6.58, "learning_rate": 6.838669567920423e-06, "loss": 0.2254, "step": 21170 }, { "epoch": 6.58, "learning_rate": 6.83245259558595e-06, "loss": 0.1482, "step": 21180 }, { "epoch": 6.59, "learning_rate": 6.8262356232514774e-06, "loss": 0.1681, "step": 21190 }, { "epoch": 6.59, "learning_rate": 6.820018650917004e-06, "loss": 0.0535, "step": 21200 }, { "epoch": 6.59, "learning_rate": 6.813801678582531e-06, "loss": 0.1077, "step": 21210 }, { "epoch": 6.6, "learning_rate": 6.807584706248058e-06, "loss": 0.1047, "step": 21220 }, { "epoch": 6.6, "learning_rate": 6.801367733913584e-06, "loss": 0.0146, "step": 21230 }, { "epoch": 6.6, "learning_rate": 6.7951507615791115e-06, "loss": 0.0444, "step": 21240 }, { "epoch": 6.61, "learning_rate": 6.788933789244638e-06, "loss": 0.0011, "step": 21250 }, { "epoch": 6.61, "learning_rate": 6.782716816910165e-06, "loss": 0.4839, "step": 21260 }, { "epoch": 6.61, "learning_rate": 6.776499844575693e-06, "loss": 0.3018, "step": 21270 }, { "epoch": 6.61, "learning_rate": 6.77028287224122e-06, "loss": 0.0653, "step": 21280 }, { "epoch": 6.62, "learning_rate": 6.764065899906746e-06, "loss": 0.108, "step": 21290 }, { "epoch": 6.62, "learning_rate": 6.7578489275722724e-06, "loss": 0.4575, "step": 21300 }, { "epoch": 6.62, "learning_rate": 6.751631955237799e-06, "loss": 0.0596, "step": 21310 }, { "epoch": 6.63, "learning_rate": 6.745414982903327e-06, "loss": 0.014, "step": 21320 }, { "epoch": 6.63, "learning_rate": 6.739198010568854e-06, "loss": 0.0884, "step": 21330 }, { "epoch": 6.63, "learning_rate": 6.7329810382343806e-06, "loss": 0.0562, "step": 21340 }, { "epoch": 6.64, "learning_rate": 6.726764065899908e-06, "loss": 0.0035, "step": 21350 }, { "epoch": 6.64, "learning_rate": 6.720547093565433e-06, "loss": 0.0546, "step": 21360 }, { "epoch": 6.64, "learning_rate": 6.714330121230961e-06, "loss": 0.0537, "step": 21370 }, { "epoch": 6.65, "learning_rate": 6.708113148896488e-06, "loss": 0.366, "step": 21380 }, { "epoch": 6.65, "learning_rate": 6.701896176562015e-06, "loss": 0.001, "step": 21390 }, { "epoch": 6.65, "learning_rate": 6.695679204227542e-06, "loss": 0.0203, "step": 21400 }, { "epoch": 6.66, "learning_rate": 6.689462231893069e-06, "loss": 0.1647, "step": 21410 }, { "epoch": 6.66, "learning_rate": 6.683245259558595e-06, "loss": 0.0472, "step": 21420 }, { "epoch": 6.66, "learning_rate": 6.677028287224122e-06, "loss": 0.0619, "step": 21430 }, { "epoch": 6.66, "learning_rate": 6.670811314889649e-06, "loss": 0.2294, "step": 21440 }, { "epoch": 6.67, "learning_rate": 6.6645943425551764e-06, "loss": 0.2064, "step": 21450 }, { "epoch": 6.67, "learning_rate": 6.658377370220703e-06, "loss": 0.0983, "step": 21460 }, { "epoch": 6.67, "learning_rate": 6.65216039788623e-06, "loss": 0.0509, "step": 21470 }, { "epoch": 6.68, "learning_rate": 6.645943425551756e-06, "loss": 0.1302, "step": 21480 }, { "epoch": 6.68, "learning_rate": 6.639726453217284e-06, "loss": 0.0056, "step": 21490 }, { "epoch": 6.68, "learning_rate": 6.6335094808828105e-06, "loss": 0.0168, "step": 21500 }, { "epoch": 6.69, "learning_rate": 6.627292508548337e-06, "loss": 0.0249, "step": 21510 }, { "epoch": 6.69, "learning_rate": 6.621075536213864e-06, "loss": 0.0058, "step": 21520 }, { "epoch": 6.69, "learning_rate": 6.614858563879392e-06, "loss": 0.2765, "step": 21530 }, { "epoch": 6.7, "learning_rate": 6.608641591544919e-06, "loss": 0.1089, "step": 21540 }, { "epoch": 6.7, "learning_rate": 6.602424619210445e-06, "loss": 0.0062, "step": 21550 }, { "epoch": 6.7, "learning_rate": 6.5962076468759715e-06, "loss": 0.0009, "step": 21560 }, { "epoch": 6.71, "learning_rate": 6.589990674541499e-06, "loss": 0.007, "step": 21570 }, { "epoch": 6.71, "learning_rate": 6.583773702207026e-06, "loss": 0.0005, "step": 21580 }, { "epoch": 6.71, "learning_rate": 6.577556729872553e-06, "loss": 0.269, "step": 21590 }, { "epoch": 6.71, "learning_rate": 6.57133975753808e-06, "loss": 0.0003, "step": 21600 }, { "epoch": 6.72, "learning_rate": 6.5651227852036055e-06, "loss": 0.0973, "step": 21610 }, { "epoch": 6.72, "learning_rate": 6.558905812869133e-06, "loss": 0.0348, "step": 21620 }, { "epoch": 6.72, "learning_rate": 6.55268884053466e-06, "loss": 0.2431, "step": 21630 }, { "epoch": 6.73, "learning_rate": 6.546471868200187e-06, "loss": 0.1872, "step": 21640 }, { "epoch": 6.73, "learning_rate": 6.5402548958657145e-06, "loss": 0.3659, "step": 21650 }, { "epoch": 6.73, "learning_rate": 6.534037923531241e-06, "loss": 0.0018, "step": 21660 }, { "epoch": 6.74, "learning_rate": 6.527820951196767e-06, "loss": 0.1754, "step": 21670 }, { "epoch": 6.74, "learning_rate": 6.521603978862294e-06, "loss": 0.0009, "step": 21680 }, { "epoch": 6.74, "learning_rate": 6.515387006527821e-06, "loss": 0.4334, "step": 21690 }, { "epoch": 6.75, "learning_rate": 6.509170034193349e-06, "loss": 0.2115, "step": 21700 }, { "epoch": 6.75, "learning_rate": 6.5029530618588754e-06, "loss": 0.1654, "step": 21710 }, { "epoch": 6.75, "learning_rate": 6.496736089524402e-06, "loss": 0.1438, "step": 21720 }, { "epoch": 6.75, "learning_rate": 6.49051911718993e-06, "loss": 0.0265, "step": 21730 }, { "epoch": 6.76, "learning_rate": 6.484302144855456e-06, "loss": 0.0414, "step": 21740 }, { "epoch": 6.76, "learning_rate": 6.478085172520983e-06, "loss": 0.035, "step": 21750 }, { "epoch": 6.76, "learning_rate": 6.4718682001865095e-06, "loss": 0.0357, "step": 21760 }, { "epoch": 6.77, "learning_rate": 6.465651227852036e-06, "loss": 0.0529, "step": 21770 }, { "epoch": 6.77, "learning_rate": 6.459434255517564e-06, "loss": 0.0035, "step": 21780 }, { "epoch": 6.77, "learning_rate": 6.453217283183091e-06, "loss": 0.1319, "step": 21790 }, { "epoch": 6.78, "learning_rate": 6.447000310848617e-06, "loss": 0.1727, "step": 21800 }, { "epoch": 6.78, "learning_rate": 6.440783338514144e-06, "loss": 0.15, "step": 21810 }, { "epoch": 6.78, "learning_rate": 6.434566366179671e-06, "loss": 0.0498, "step": 21820 }, { "epoch": 6.79, "learning_rate": 6.428349393845198e-06, "loss": 0.2771, "step": 21830 }, { "epoch": 6.79, "learning_rate": 6.422132421510725e-06, "loss": 0.19, "step": 21840 }, { "epoch": 6.79, "learning_rate": 6.415915449176252e-06, "loss": 0.0013, "step": 21850 }, { "epoch": 6.8, "learning_rate": 6.409698476841778e-06, "loss": 0.0379, "step": 21860 }, { "epoch": 6.8, "learning_rate": 6.403481504507305e-06, "loss": 0.1717, "step": 21870 }, { "epoch": 6.8, "learning_rate": 6.397264532172832e-06, "loss": 0.1892, "step": 21880 }, { "epoch": 6.8, "learning_rate": 6.391047559838359e-06, "loss": 0.0006, "step": 21890 }, { "epoch": 6.81, "learning_rate": 6.384830587503887e-06, "loss": 0.1294, "step": 21900 }, { "epoch": 6.81, "learning_rate": 6.3786136151694135e-06, "loss": 0.0263, "step": 21910 }, { "epoch": 6.81, "learning_rate": 6.37239664283494e-06, "loss": 0.231, "step": 21920 }, { "epoch": 6.82, "learning_rate": 6.366179670500466e-06, "loss": 0.0032, "step": 21930 }, { "epoch": 6.82, "learning_rate": 6.359962698165993e-06, "loss": 0.1391, "step": 21940 }, { "epoch": 6.82, "learning_rate": 6.353745725831521e-06, "loss": 0.3617, "step": 21950 }, { "epoch": 6.83, "learning_rate": 6.347528753497048e-06, "loss": 0.0422, "step": 21960 }, { "epoch": 6.83, "learning_rate": 6.3413117811625744e-06, "loss": 0.1119, "step": 21970 }, { "epoch": 6.83, "learning_rate": 6.335094808828101e-06, "loss": 0.038, "step": 21980 }, { "epoch": 6.84, "learning_rate": 6.328877836493627e-06, "loss": 0.1391, "step": 21990 }, { "epoch": 6.84, "learning_rate": 6.322660864159155e-06, "loss": 0.1876, "step": 22000 }, { "epoch": 6.84, "learning_rate": 6.316443891824682e-06, "loss": 0.1733, "step": 22010 }, { "epoch": 6.84, "learning_rate": 6.3102269194902085e-06, "loss": 0.3532, "step": 22020 }, { "epoch": 6.85, "learning_rate": 6.304009947155736e-06, "loss": 0.2385, "step": 22030 }, { "epoch": 6.85, "learning_rate": 6.297792974821263e-06, "loss": 0.1208, "step": 22040 }, { "epoch": 6.85, "learning_rate": 6.291576002486789e-06, "loss": 0.1948, "step": 22050 }, { "epoch": 6.86, "learning_rate": 6.285359030152316e-06, "loss": 0.1735, "step": 22060 }, { "epoch": 6.86, "learning_rate": 6.279142057817843e-06, "loss": 0.1548, "step": 22070 }, { "epoch": 6.86, "learning_rate": 6.27292508548337e-06, "loss": 0.035, "step": 22080 }, { "epoch": 6.87, "learning_rate": 6.266708113148897e-06, "loss": 0.3599, "step": 22090 }, { "epoch": 6.87, "learning_rate": 6.260491140814424e-06, "loss": 0.1619, "step": 22100 }, { "epoch": 6.87, "learning_rate": 6.254274168479952e-06, "loss": 0.1142, "step": 22110 }, { "epoch": 6.88, "learning_rate": 6.248057196145478e-06, "loss": 0.0334, "step": 22120 }, { "epoch": 6.88, "learning_rate": 6.241840223811004e-06, "loss": 0.0292, "step": 22130 }, { "epoch": 6.88, "learning_rate": 6.235623251476531e-06, "loss": 0.0072, "step": 22140 }, { "epoch": 6.89, "learning_rate": 6.229406279142058e-06, "loss": 0.1903, "step": 22150 }, { "epoch": 6.89, "learning_rate": 6.223189306807586e-06, "loss": 0.0154, "step": 22160 }, { "epoch": 6.89, "learning_rate": 6.2169723344731125e-06, "loss": 0.255, "step": 22170 }, { "epoch": 6.89, "learning_rate": 6.2107553621386385e-06, "loss": 0.0007, "step": 22180 }, { "epoch": 6.9, "learning_rate": 6.204538389804165e-06, "loss": 0.2569, "step": 22190 }, { "epoch": 6.9, "learning_rate": 6.198321417469693e-06, "loss": 0.0613, "step": 22200 }, { "epoch": 6.9, "learning_rate": 6.19210444513522e-06, "loss": 0.2947, "step": 22210 }, { "epoch": 6.91, "learning_rate": 6.185887472800747e-06, "loss": 0.1595, "step": 22220 }, { "epoch": 6.91, "learning_rate": 6.1796705004662735e-06, "loss": 0.1727, "step": 22230 }, { "epoch": 6.91, "learning_rate": 6.1734535281317994e-06, "loss": 0.1465, "step": 22240 }, { "epoch": 6.92, "learning_rate": 6.167236555797327e-06, "loss": 0.0167, "step": 22250 }, { "epoch": 6.92, "learning_rate": 6.161019583462854e-06, "loss": 0.0509, "step": 22260 }, { "epoch": 6.92, "learning_rate": 6.154802611128381e-06, "loss": 0.0667, "step": 22270 }, { "epoch": 6.93, "learning_rate": 6.148585638793908e-06, "loss": 0.1629, "step": 22280 }, { "epoch": 6.93, "learning_rate": 6.142368666459435e-06, "loss": 0.1347, "step": 22290 }, { "epoch": 6.93, "learning_rate": 6.136151694124962e-06, "loss": 0.0883, "step": 22300 }, { "epoch": 6.94, "learning_rate": 6.129934721790488e-06, "loss": 0.1033, "step": 22310 }, { "epoch": 6.94, "learning_rate": 6.123717749456015e-06, "loss": 0.0583, "step": 22320 }, { "epoch": 6.94, "learning_rate": 6.1175007771215425e-06, "loss": 0.0007, "step": 22330 }, { "epoch": 6.94, "learning_rate": 6.111283804787069e-06, "loss": 0.1526, "step": 22340 }, { "epoch": 6.95, "learning_rate": 6.105066832452596e-06, "loss": 0.351, "step": 22350 }, { "epoch": 6.95, "learning_rate": 6.098849860118124e-06, "loss": 0.0284, "step": 22360 }, { "epoch": 6.95, "learning_rate": 6.09263288778365e-06, "loss": 0.1839, "step": 22370 }, { "epoch": 6.96, "learning_rate": 6.086415915449177e-06, "loss": 0.1883, "step": 22380 }, { "epoch": 6.96, "learning_rate": 6.080198943114703e-06, "loss": 0.1793, "step": 22390 }, { "epoch": 6.96, "learning_rate": 6.07398197078023e-06, "loss": 0.0148, "step": 22400 }, { "epoch": 6.97, "learning_rate": 6.067764998445758e-06, "loss": 0.1451, "step": 22410 }, { "epoch": 6.97, "learning_rate": 6.061548026111285e-06, "loss": 0.0315, "step": 22420 }, { "epoch": 6.97, "learning_rate": 6.0553310537768115e-06, "loss": 0.3256, "step": 22430 }, { "epoch": 6.98, "learning_rate": 6.0491140814423375e-06, "loss": 0.092, "step": 22440 }, { "epoch": 6.98, "learning_rate": 6.042897109107864e-06, "loss": 0.0253, "step": 22450 }, { "epoch": 6.98, "learning_rate": 6.036680136773392e-06, "loss": 0.2111, "step": 22460 }, { "epoch": 6.98, "learning_rate": 6.030463164438919e-06, "loss": 0.1551, "step": 22470 }, { "epoch": 6.99, "learning_rate": 6.024246192104446e-06, "loss": 0.1043, "step": 22480 }, { "epoch": 6.99, "learning_rate": 6.018029219769973e-06, "loss": 0.0022, "step": 22490 }, { "epoch": 6.99, "learning_rate": 6.011812247435499e-06, "loss": 0.1919, "step": 22500 }, { "epoch": 7.0, "learning_rate": 6.005595275101026e-06, "loss": 0.0045, "step": 22510 }, { "epoch": 7.0, "eval_FN": 238, "eval_FP": 135, "eval_TN": 5703, "eval_TP": 357, "eval_accuracy": 0.9420177211254469, "eval_f1": 0.6568537258509659, "eval_loss": 1.1675221920013428, "eval_precision": 0.725609756097561, "eval_recall": 0.6, "eval_runtime": 44.358, "eval_samples_per_second": 145.024, "eval_steps_per_second": 9.085, "step": 22519 }, { "epoch": 7.0, "learning_rate": 5.999378302766553e-06, "loss": 0.2457, "step": 22520 }, { "epoch": 7.0, "learning_rate": 5.99316133043208e-06, "loss": 0.3892, "step": 22530 }, { "epoch": 7.01, "learning_rate": 5.986944358097607e-06, "loss": 0.0011, "step": 22540 }, { "epoch": 7.01, "learning_rate": 5.980727385763134e-06, "loss": 0.0062, "step": 22550 }, { "epoch": 7.01, "learning_rate": 5.97451041342866e-06, "loss": 0.1354, "step": 22560 }, { "epoch": 7.02, "learning_rate": 5.968293441094187e-06, "loss": 0.0044, "step": 22570 }, { "epoch": 7.02, "learning_rate": 5.962076468759715e-06, "loss": 0.0046, "step": 22580 }, { "epoch": 7.02, "learning_rate": 5.9558594964252415e-06, "loss": 0.0528, "step": 22590 }, { "epoch": 7.03, "learning_rate": 5.949642524090768e-06, "loss": 0.0003, "step": 22600 }, { "epoch": 7.03, "learning_rate": 5.943425551756295e-06, "loss": 0.0282, "step": 22610 }, { "epoch": 7.03, "learning_rate": 5.937208579421823e-06, "loss": 0.0203, "step": 22620 }, { "epoch": 7.03, "learning_rate": 5.930991607087349e-06, "loss": 0.0811, "step": 22630 }, { "epoch": 7.04, "learning_rate": 5.924774634752876e-06, "loss": 0.0568, "step": 22640 }, { "epoch": 7.04, "learning_rate": 5.918557662418402e-06, "loss": 0.2998, "step": 22650 }, { "epoch": 7.04, "learning_rate": 5.91234069008393e-06, "loss": 0.0586, "step": 22660 }, { "epoch": 7.05, "learning_rate": 5.906123717749457e-06, "loss": 0.2537, "step": 22670 }, { "epoch": 7.05, "learning_rate": 5.899906745414984e-06, "loss": 0.0175, "step": 22680 }, { "epoch": 7.05, "learning_rate": 5.89368977308051e-06, "loss": 0.1408, "step": 22690 }, { "epoch": 7.06, "learning_rate": 5.8874728007460365e-06, "loss": 0.1958, "step": 22700 }, { "epoch": 7.06, "learning_rate": 5.881255828411564e-06, "loss": 0.0418, "step": 22710 }, { "epoch": 7.06, "learning_rate": 5.875038856077091e-06, "loss": 0.0006, "step": 22720 }, { "epoch": 7.07, "learning_rate": 5.868821883742618e-06, "loss": 0.0324, "step": 22730 }, { "epoch": 7.07, "learning_rate": 5.8626049114081455e-06, "loss": 0.1274, "step": 22740 }, { "epoch": 7.07, "learning_rate": 5.8563879390736715e-06, "loss": 0.1015, "step": 22750 }, { "epoch": 7.07, "learning_rate": 5.850170966739198e-06, "loss": 0.0141, "step": 22760 }, { "epoch": 7.08, "learning_rate": 5.843953994404725e-06, "loss": 0.0005, "step": 22770 }, { "epoch": 7.08, "learning_rate": 5.837737022070252e-06, "loss": 0.0101, "step": 22780 }, { "epoch": 7.08, "learning_rate": 5.83152004973578e-06, "loss": 0.0093, "step": 22790 }, { "epoch": 7.09, "learning_rate": 5.825303077401306e-06, "loss": 0.0382, "step": 22800 }, { "epoch": 7.09, "learning_rate": 5.819086105066833e-06, "loss": 0.0391, "step": 22810 }, { "epoch": 7.09, "learning_rate": 5.812869132732359e-06, "loss": 0.0762, "step": 22820 }, { "epoch": 7.1, "learning_rate": 5.806652160397887e-06, "loss": 0.0059, "step": 22830 }, { "epoch": 7.1, "learning_rate": 5.800435188063414e-06, "loss": 0.1107, "step": 22840 }, { "epoch": 7.1, "learning_rate": 5.7942182157289405e-06, "loss": 0.002, "step": 22850 }, { "epoch": 7.11, "learning_rate": 5.788001243394467e-06, "loss": 0.0003, "step": 22860 }, { "epoch": 7.11, "learning_rate": 5.781784271059995e-06, "loss": 0.0004, "step": 22870 }, { "epoch": 7.11, "learning_rate": 5.775567298725521e-06, "loss": 0.075, "step": 22880 }, { "epoch": 7.12, "learning_rate": 5.769350326391048e-06, "loss": 0.2989, "step": 22890 }, { "epoch": 7.12, "learning_rate": 5.763133354056575e-06, "loss": 0.0219, "step": 22900 }, { "epoch": 7.12, "learning_rate": 5.756916381722102e-06, "loss": 0.0195, "step": 22910 }, { "epoch": 7.12, "learning_rate": 5.750699409387629e-06, "loss": 0.3066, "step": 22920 }, { "epoch": 7.13, "learning_rate": 5.744482437053156e-06, "loss": 0.0183, "step": 22930 }, { "epoch": 7.13, "learning_rate": 5.738265464718682e-06, "loss": 0.1172, "step": 22940 }, { "epoch": 7.13, "learning_rate": 5.732048492384209e-06, "loss": 0.1853, "step": 22950 }, { "epoch": 7.14, "learning_rate": 5.725831520049736e-06, "loss": 0.1041, "step": 22960 }, { "epoch": 7.14, "learning_rate": 5.719614547715263e-06, "loss": 0.0026, "step": 22970 }, { "epoch": 7.14, "learning_rate": 5.71339757538079e-06, "loss": 0.0446, "step": 22980 }, { "epoch": 7.15, "learning_rate": 5.707180603046318e-06, "loss": 0.0004, "step": 22990 }, { "epoch": 7.15, "learning_rate": 5.7009636307118445e-06, "loss": 0.3444, "step": 23000 }, { "epoch": 7.15, "learning_rate": 5.6947466583773705e-06, "loss": 0.0004, "step": 23010 }, { "epoch": 7.16, "learning_rate": 5.688529686042897e-06, "loss": 0.0378, "step": 23020 }, { "epoch": 7.16, "learning_rate": 5.682312713708424e-06, "loss": 0.0008, "step": 23030 }, { "epoch": 7.16, "learning_rate": 5.676095741373952e-06, "loss": 0.1005, "step": 23040 }, { "epoch": 7.17, "learning_rate": 5.669878769039479e-06, "loss": 0.0002, "step": 23050 }, { "epoch": 7.17, "learning_rate": 5.663661796705005e-06, "loss": 0.0513, "step": 23060 }, { "epoch": 7.17, "learning_rate": 5.657444824370531e-06, "loss": 0.2705, "step": 23070 }, { "epoch": 7.17, "learning_rate": 5.651227852036058e-06, "loss": 0.1089, "step": 23080 }, { "epoch": 7.18, "learning_rate": 5.645010879701586e-06, "loss": 0.0006, "step": 23090 }, { "epoch": 7.18, "learning_rate": 5.638793907367113e-06, "loss": 0.497, "step": 23100 }, { "epoch": 7.18, "learning_rate": 5.6325769350326395e-06, "loss": 0.0039, "step": 23110 }, { "epoch": 7.19, "learning_rate": 5.626359962698167e-06, "loss": 0.2305, "step": 23120 }, { "epoch": 7.19, "learning_rate": 5.620142990363693e-06, "loss": 0.0186, "step": 23130 }, { "epoch": 7.19, "learning_rate": 5.61392601802922e-06, "loss": 0.0979, "step": 23140 }, { "epoch": 7.2, "learning_rate": 5.607709045694747e-06, "loss": 0.0563, "step": 23150 }, { "epoch": 7.2, "learning_rate": 5.601492073360274e-06, "loss": 0.0652, "step": 23160 }, { "epoch": 7.2, "learning_rate": 5.595275101025801e-06, "loss": 0.0163, "step": 23170 }, { "epoch": 7.21, "learning_rate": 5.589058128691328e-06, "loss": 0.0314, "step": 23180 }, { "epoch": 7.21, "learning_rate": 5.582841156356855e-06, "loss": 0.0813, "step": 23190 }, { "epoch": 7.21, "learning_rate": 5.576624184022381e-06, "loss": 0.1408, "step": 23200 }, { "epoch": 7.21, "learning_rate": 5.5704072116879086e-06, "loss": 0.0009, "step": 23210 }, { "epoch": 7.22, "learning_rate": 5.564190239353435e-06, "loss": 0.0445, "step": 23220 }, { "epoch": 7.22, "learning_rate": 5.557973267018962e-06, "loss": 0.0004, "step": 23230 }, { "epoch": 7.22, "learning_rate": 5.551756294684489e-06, "loss": 0.2347, "step": 23240 }, { "epoch": 7.23, "learning_rate": 5.545539322350017e-06, "loss": 0.1462, "step": 23250 }, { "epoch": 7.23, "learning_rate": 5.539322350015543e-06, "loss": 0.175, "step": 23260 }, { "epoch": 7.23, "learning_rate": 5.5331053776810695e-06, "loss": 0.1525, "step": 23270 }, { "epoch": 7.24, "learning_rate": 5.526888405346596e-06, "loss": 0.1627, "step": 23280 }, { "epoch": 7.24, "learning_rate": 5.520671433012124e-06, "loss": 0.2301, "step": 23290 }, { "epoch": 7.24, "learning_rate": 5.514454460677651e-06, "loss": 0.2114, "step": 23300 }, { "epoch": 7.25, "learning_rate": 5.508237488343178e-06, "loss": 0.3338, "step": 23310 }, { "epoch": 7.25, "learning_rate": 5.502020516008704e-06, "loss": 0.1036, "step": 23320 }, { "epoch": 7.25, "learning_rate": 5.49580354367423e-06, "loss": 0.001, "step": 23330 }, { "epoch": 7.26, "learning_rate": 5.489586571339758e-06, "loss": 0.0003, "step": 23340 }, { "epoch": 7.26, "learning_rate": 5.483369599005285e-06, "loss": 0.0268, "step": 23350 }, { "epoch": 7.26, "learning_rate": 5.477152626670812e-06, "loss": 0.057, "step": 23360 }, { "epoch": 7.26, "learning_rate": 5.470935654336339e-06, "loss": 0.272, "step": 23370 }, { "epoch": 7.27, "learning_rate": 5.464718682001866e-06, "loss": 0.0392, "step": 23380 }, { "epoch": 7.27, "learning_rate": 5.458501709667392e-06, "loss": 0.0016, "step": 23390 }, { "epoch": 7.27, "learning_rate": 5.452284737332919e-06, "loss": 0.046, "step": 23400 }, { "epoch": 7.28, "learning_rate": 5.446067764998446e-06, "loss": 0.1123, "step": 23410 }, { "epoch": 7.28, "learning_rate": 5.4398507926639735e-06, "loss": 0.0004, "step": 23420 }, { "epoch": 7.28, "learning_rate": 5.4336338203295e-06, "loss": 0.0363, "step": 23430 }, { "epoch": 7.29, "learning_rate": 5.427416847995027e-06, "loss": 0.0171, "step": 23440 }, { "epoch": 7.29, "learning_rate": 5.421199875660553e-06, "loss": 0.0006, "step": 23450 }, { "epoch": 7.29, "learning_rate": 5.414982903326081e-06, "loss": 0.1606, "step": 23460 }, { "epoch": 7.3, "learning_rate": 5.4087659309916076e-06, "loss": 0.1159, "step": 23470 }, { "epoch": 7.3, "learning_rate": 5.402548958657134e-06, "loss": 0.246, "step": 23480 }, { "epoch": 7.3, "learning_rate": 5.396331986322661e-06, "loss": 0.0162, "step": 23490 }, { "epoch": 7.3, "learning_rate": 5.390115013988189e-06, "loss": 0.0003, "step": 23500 }, { "epoch": 7.31, "learning_rate": 5.383898041653715e-06, "loss": 0.0321, "step": 23510 }, { "epoch": 7.31, "learning_rate": 5.377681069319242e-06, "loss": 0.0249, "step": 23520 }, { "epoch": 7.31, "learning_rate": 5.3714640969847685e-06, "loss": 0.0764, "step": 23530 }, { "epoch": 7.32, "learning_rate": 5.365247124650296e-06, "loss": 0.2977, "step": 23540 }, { "epoch": 7.32, "learning_rate": 5.359030152315823e-06, "loss": 0.0595, "step": 23550 }, { "epoch": 7.32, "learning_rate": 5.35281317998135e-06, "loss": 0.3185, "step": 23560 }, { "epoch": 7.33, "learning_rate": 5.346596207646877e-06, "loss": 0.1189, "step": 23570 }, { "epoch": 7.33, "learning_rate": 5.340379235312403e-06, "loss": 0.0759, "step": 23580 }, { "epoch": 7.33, "learning_rate": 5.33416226297793e-06, "loss": 0.0891, "step": 23590 }, { "epoch": 7.34, "learning_rate": 5.327945290643457e-06, "loss": 0.1439, "step": 23600 }, { "epoch": 7.34, "learning_rate": 5.321728318308984e-06, "loss": 0.0314, "step": 23610 }, { "epoch": 7.34, "learning_rate": 5.315511345974511e-06, "loss": 0.0999, "step": 23620 }, { "epoch": 7.35, "learning_rate": 5.309294373640038e-06, "loss": 0.0073, "step": 23630 }, { "epoch": 7.35, "learning_rate": 5.303077401305564e-06, "loss": 0.0174, "step": 23640 }, { "epoch": 7.35, "learning_rate": 5.296860428971091e-06, "loss": 0.13, "step": 23650 }, { "epoch": 7.35, "learning_rate": 5.290643456636618e-06, "loss": 0.0601, "step": 23660 }, { "epoch": 7.36, "learning_rate": 5.284426484302146e-06, "loss": 0.1138, "step": 23670 }, { "epoch": 7.36, "learning_rate": 5.2782095119676725e-06, "loss": 0.2189, "step": 23680 }, { "epoch": 7.36, "learning_rate": 5.271992539633199e-06, "loss": 0.0004, "step": 23690 }, { "epoch": 7.37, "learning_rate": 5.265775567298725e-06, "loss": 0.0392, "step": 23700 }, { "epoch": 7.37, "learning_rate": 5.259558594964252e-06, "loss": 0.0365, "step": 23710 }, { "epoch": 7.37, "learning_rate": 5.25334162262978e-06, "loss": 0.0006, "step": 23720 }, { "epoch": 7.38, "learning_rate": 5.2471246502953066e-06, "loss": 0.0053, "step": 23730 }, { "epoch": 7.38, "learning_rate": 5.240907677960833e-06, "loss": 0.0951, "step": 23740 }, { "epoch": 7.38, "learning_rate": 5.234690705626361e-06, "loss": 0.2771, "step": 23750 }, { "epoch": 7.39, "learning_rate": 5.228473733291888e-06, "loss": 0.0195, "step": 23760 }, { "epoch": 7.39, "learning_rate": 5.222256760957414e-06, "loss": 0.0855, "step": 23770 }, { "epoch": 7.39, "learning_rate": 5.216039788622941e-06, "loss": 0.0273, "step": 23780 }, { "epoch": 7.4, "learning_rate": 5.2098228162884675e-06, "loss": 0.1438, "step": 23790 }, { "epoch": 7.4, "learning_rate": 5.203605843953995e-06, "loss": 0.0082, "step": 23800 }, { "epoch": 7.4, "learning_rate": 5.197388871619522e-06, "loss": 0.2015, "step": 23810 }, { "epoch": 7.4, "learning_rate": 5.191171899285049e-06, "loss": 0.1341, "step": 23820 }, { "epoch": 7.41, "learning_rate": 5.184954926950575e-06, "loss": 0.0598, "step": 23830 }, { "epoch": 7.41, "learning_rate": 5.1787379546161024e-06, "loss": 0.2663, "step": 23840 }, { "epoch": 7.41, "learning_rate": 5.172520982281629e-06, "loss": 0.1438, "step": 23850 }, { "epoch": 7.42, "learning_rate": 5.166304009947156e-06, "loss": 0.0114, "step": 23860 }, { "epoch": 7.42, "learning_rate": 5.160087037612683e-06, "loss": 0.0007, "step": 23870 }, { "epoch": 7.42, "learning_rate": 5.1538700652782106e-06, "loss": 0.0297, "step": 23880 }, { "epoch": 7.43, "learning_rate": 5.1476530929437365e-06, "loss": 0.18, "step": 23890 }, { "epoch": 7.43, "learning_rate": 5.141436120609263e-06, "loss": 0.0017, "step": 23900 }, { "epoch": 7.43, "learning_rate": 5.13521914827479e-06, "loss": 0.0142, "step": 23910 }, { "epoch": 7.44, "learning_rate": 5.129002175940318e-06, "loss": 0.1999, "step": 23920 }, { "epoch": 7.44, "learning_rate": 5.122785203605845e-06, "loss": 0.0816, "step": 23930 }, { "epoch": 7.44, "learning_rate": 5.1165682312713715e-06, "loss": 0.1761, "step": 23940 }, { "epoch": 7.44, "learning_rate": 5.110351258936898e-06, "loss": 0.0006, "step": 23950 }, { "epoch": 7.45, "learning_rate": 5.104134286602424e-06, "loss": 0.1862, "step": 23960 }, { "epoch": 7.45, "learning_rate": 5.097917314267952e-06, "loss": 0.0107, "step": 23970 }, { "epoch": 7.45, "learning_rate": 5.091700341933479e-06, "loss": 0.0228, "step": 23980 }, { "epoch": 7.46, "learning_rate": 5.085483369599006e-06, "loss": 0.0252, "step": 23990 }, { "epoch": 7.46, "learning_rate": 5.079266397264533e-06, "loss": 0.0006, "step": 24000 }, { "epoch": 7.46, "learning_rate": 5.07304942493006e-06, "loss": 0.2767, "step": 24010 }, { "epoch": 7.47, "learning_rate": 5.066832452595586e-06, "loss": 0.0014, "step": 24020 }, { "epoch": 7.47, "learning_rate": 5.060615480261113e-06, "loss": 0.0774, "step": 24030 }, { "epoch": 7.47, "learning_rate": 5.05439850792664e-06, "loss": 0.0025, "step": 24040 }, { "epoch": 7.48, "learning_rate": 5.048181535592167e-06, "loss": 0.0215, "step": 24050 }, { "epoch": 7.48, "learning_rate": 5.041964563257694e-06, "loss": 0.0298, "step": 24060 }, { "epoch": 7.48, "learning_rate": 5.035747590923221e-06, "loss": 0.0149, "step": 24070 }, { "epoch": 7.49, "learning_rate": 5.029530618588747e-06, "loss": 0.0015, "step": 24080 }, { "epoch": 7.49, "learning_rate": 5.023313646254275e-06, "loss": 0.0233, "step": 24090 }, { "epoch": 7.49, "learning_rate": 5.0170966739198014e-06, "loss": 0.0299, "step": 24100 }, { "epoch": 7.49, "learning_rate": 5.010879701585328e-06, "loss": 0.2481, "step": 24110 }, { "epoch": 7.5, "learning_rate": 5.004662729250855e-06, "loss": 0.1062, "step": 24120 }, { "epoch": 7.5, "learning_rate": 4.998445756916382e-06, "loss": 0.0021, "step": 24130 }, { "epoch": 7.5, "learning_rate": 4.992228784581909e-06, "loss": 0.1935, "step": 24140 }, { "epoch": 7.51, "learning_rate": 4.986011812247436e-06, "loss": 0.0435, "step": 24150 }, { "epoch": 7.51, "learning_rate": 4.979794839912962e-06, "loss": 0.0429, "step": 24160 }, { "epoch": 7.51, "learning_rate": 4.973577867578489e-06, "loss": 0.2341, "step": 24170 }, { "epoch": 7.52, "learning_rate": 4.967360895244017e-06, "loss": 0.172, "step": 24180 }, { "epoch": 7.52, "learning_rate": 4.961143922909544e-06, "loss": 0.0006, "step": 24190 }, { "epoch": 7.52, "learning_rate": 4.9549269505750705e-06, "loss": 0.0486, "step": 24200 }, { "epoch": 7.53, "learning_rate": 4.948709978240597e-06, "loss": 0.0641, "step": 24210 }, { "epoch": 7.53, "learning_rate": 4.942493005906124e-06, "loss": 0.1244, "step": 24220 }, { "epoch": 7.53, "learning_rate": 4.936276033571651e-06, "loss": 0.0417, "step": 24230 }, { "epoch": 7.53, "learning_rate": 4.930059061237178e-06, "loss": 0.0006, "step": 24240 }, { "epoch": 7.54, "learning_rate": 4.923842088902705e-06, "loss": 0.066, "step": 24250 }, { "epoch": 7.54, "learning_rate": 4.917625116568231e-06, "loss": 0.1115, "step": 24260 }, { "epoch": 7.54, "learning_rate": 4.911408144233758e-06, "loss": 0.054, "step": 24270 }, { "epoch": 7.55, "learning_rate": 4.905191171899286e-06, "loss": 0.0236, "step": 24280 }, { "epoch": 7.55, "learning_rate": 4.898974199564812e-06, "loss": 0.0031, "step": 24290 }, { "epoch": 7.55, "learning_rate": 4.8927572272303395e-06, "loss": 0.0716, "step": 24300 }, { "epoch": 7.56, "learning_rate": 4.886540254895866e-06, "loss": 0.0773, "step": 24310 }, { "epoch": 7.56, "learning_rate": 4.880323282561393e-06, "loss": 0.0348, "step": 24320 }, { "epoch": 7.56, "learning_rate": 4.87410631022692e-06, "loss": 0.1266, "step": 24330 }, { "epoch": 7.57, "learning_rate": 4.867889337892447e-06, "loss": 0.0006, "step": 24340 }, { "epoch": 7.57, "learning_rate": 4.861672365557974e-06, "loss": 0.0513, "step": 24350 }, { "epoch": 7.57, "learning_rate": 4.8554553932235004e-06, "loss": 0.0017, "step": 24360 }, { "epoch": 7.58, "learning_rate": 4.849238420889027e-06, "loss": 0.1322, "step": 24370 }, { "epoch": 7.58, "learning_rate": 4.843021448554555e-06, "loss": 0.0005, "step": 24380 }, { "epoch": 7.58, "learning_rate": 4.836804476220081e-06, "loss": 0.0012, "step": 24390 }, { "epoch": 7.58, "learning_rate": 4.8305875038856086e-06, "loss": 0.056, "step": 24400 }, { "epoch": 7.59, "learning_rate": 4.824370531551135e-06, "loss": 0.0046, "step": 24410 }, { "epoch": 7.59, "learning_rate": 4.818153559216661e-06, "loss": 0.0005, "step": 24420 }, { "epoch": 7.59, "learning_rate": 4.811936586882189e-06, "loss": 0.1583, "step": 24430 }, { "epoch": 7.6, "learning_rate": 4.805719614547716e-06, "loss": 0.506, "step": 24440 }, { "epoch": 7.6, "learning_rate": 4.799502642213243e-06, "loss": 0.0168, "step": 24450 }, { "epoch": 7.6, "learning_rate": 4.7932856698787695e-06, "loss": 0.0157, "step": 24460 }, { "epoch": 7.61, "learning_rate": 4.787068697544296e-06, "loss": 0.0691, "step": 24470 }, { "epoch": 7.61, "learning_rate": 4.780851725209823e-06, "loss": 0.0206, "step": 24480 }, { "epoch": 7.61, "learning_rate": 4.77463475287535e-06, "loss": 0.0476, "step": 24490 }, { "epoch": 7.62, "learning_rate": 4.768417780540877e-06, "loss": 0.2296, "step": 24500 }, { "epoch": 7.62, "learning_rate": 4.762200808206404e-06, "loss": 0.0147, "step": 24510 }, { "epoch": 7.62, "learning_rate": 4.75598383587193e-06, "loss": 0.0256, "step": 24520 }, { "epoch": 7.63, "learning_rate": 4.749766863537458e-06, "loss": 0.1028, "step": 24530 }, { "epoch": 7.63, "learning_rate": 4.743549891202984e-06, "loss": 0.1326, "step": 24540 }, { "epoch": 7.63, "learning_rate": 4.737332918868512e-06, "loss": 0.0591, "step": 24550 }, { "epoch": 7.63, "learning_rate": 4.7311159465340385e-06, "loss": 0.0003, "step": 24560 }, { "epoch": 7.64, "learning_rate": 4.724898974199565e-06, "loss": 0.0484, "step": 24570 }, { "epoch": 7.64, "learning_rate": 4.718682001865092e-06, "loss": 0.1529, "step": 24580 }, { "epoch": 7.64, "learning_rate": 4.712465029530619e-06, "loss": 0.094, "step": 24590 }, { "epoch": 7.65, "learning_rate": 4.706248057196146e-06, "loss": 0.0229, "step": 24600 }, { "epoch": 7.65, "learning_rate": 4.700031084861673e-06, "loss": 0.1827, "step": 24610 }, { "epoch": 7.65, "learning_rate": 4.6938141125271995e-06, "loss": 0.0367, "step": 24620 }, { "epoch": 7.66, "learning_rate": 4.687597140192727e-06, "loss": 0.0217, "step": 24630 }, { "epoch": 7.66, "learning_rate": 4.681380167858253e-06, "loss": 0.1107, "step": 24640 }, { "epoch": 7.66, "learning_rate": 4.67516319552378e-06, "loss": 0.0007, "step": 24650 }, { "epoch": 7.67, "learning_rate": 4.668946223189308e-06, "loss": 0.0037, "step": 24660 }, { "epoch": 7.67, "learning_rate": 4.6627292508548336e-06, "loss": 0.19, "step": 24670 }, { "epoch": 7.67, "learning_rate": 4.656512278520361e-06, "loss": 0.0005, "step": 24680 }, { "epoch": 7.67, "learning_rate": 4.650295306185888e-06, "loss": 0.0845, "step": 24690 }, { "epoch": 7.68, "learning_rate": 4.644078333851415e-06, "loss": 0.0006, "step": 24700 }, { "epoch": 7.68, "learning_rate": 4.637861361516942e-06, "loss": 0.0002, "step": 24710 }, { "epoch": 7.68, "learning_rate": 4.6316443891824685e-06, "loss": 0.0008, "step": 24720 }, { "epoch": 7.69, "learning_rate": 4.625427416847995e-06, "loss": 0.1211, "step": 24730 }, { "epoch": 7.69, "learning_rate": 4.619210444513522e-06, "loss": 0.0144, "step": 24740 }, { "epoch": 7.69, "learning_rate": 4.612993472179049e-06, "loss": 0.0895, "step": 24750 }, { "epoch": 7.7, "learning_rate": 4.606776499844577e-06, "loss": 0.1078, "step": 24760 }, { "epoch": 7.7, "learning_rate": 4.600559527510103e-06, "loss": 0.0274, "step": 24770 }, { "epoch": 7.7, "learning_rate": 4.59434255517563e-06, "loss": 0.1192, "step": 24780 }, { "epoch": 7.71, "learning_rate": 4.588125582841157e-06, "loss": 0.1959, "step": 24790 }, { "epoch": 7.71, "learning_rate": 4.581908610506683e-06, "loss": 0.104, "step": 24800 }, { "epoch": 7.71, "learning_rate": 4.575691638172211e-06, "loss": 0.0382, "step": 24810 }, { "epoch": 7.72, "learning_rate": 4.5694746658377375e-06, "loss": 0.0004, "step": 24820 }, { "epoch": 7.72, "learning_rate": 4.563257693503264e-06, "loss": 0.0293, "step": 24830 }, { "epoch": 7.72, "learning_rate": 4.557040721168791e-06, "loss": 0.0023, "step": 24840 }, { "epoch": 7.72, "learning_rate": 4.550823748834318e-06, "loss": 0.0427, "step": 24850 }, { "epoch": 7.73, "learning_rate": 4.544606776499845e-06, "loss": 0.4363, "step": 24860 }, { "epoch": 7.73, "learning_rate": 4.538389804165372e-06, "loss": 0.0056, "step": 24870 }, { "epoch": 7.73, "learning_rate": 4.5321728318308985e-06, "loss": 0.0818, "step": 24880 }, { "epoch": 7.74, "learning_rate": 4.525955859496425e-06, "loss": 0.0511, "step": 24890 }, { "epoch": 7.74, "learning_rate": 4.519738887161952e-06, "loss": 0.0152, "step": 24900 }, { "epoch": 7.74, "learning_rate": 4.51352191482748e-06, "loss": 0.0705, "step": 24910 }, { "epoch": 7.75, "learning_rate": 4.507304942493007e-06, "loss": 0.3246, "step": 24920 }, { "epoch": 7.75, "learning_rate": 4.501087970158533e-06, "loss": 0.178, "step": 24930 }, { "epoch": 7.75, "learning_rate": 4.49487099782406e-06, "loss": 0.059, "step": 24940 }, { "epoch": 7.76, "learning_rate": 4.488654025489587e-06, "loss": 0.0074, "step": 24950 }, { "epoch": 7.76, "learning_rate": 4.482437053155114e-06, "loss": 0.0004, "step": 24960 }, { "epoch": 7.76, "learning_rate": 4.476220080820641e-06, "loss": 0.0472, "step": 24970 }, { "epoch": 7.76, "learning_rate": 4.4700031084861675e-06, "loss": 0.2187, "step": 24980 }, { "epoch": 7.77, "learning_rate": 4.463786136151694e-06, "loss": 0.2007, "step": 24990 }, { "epoch": 7.77, "learning_rate": 4.457569163817221e-06, "loss": 0.0561, "step": 25000 }, { "epoch": 7.77, "learning_rate": 4.451352191482749e-06, "loss": 0.0656, "step": 25010 }, { "epoch": 7.78, "learning_rate": 4.445135219148275e-06, "loss": 0.0007, "step": 25020 }, { "epoch": 7.78, "learning_rate": 4.438918246813802e-06, "loss": 0.0319, "step": 25030 }, { "epoch": 7.78, "learning_rate": 4.432701274479329e-06, "loss": 0.01, "step": 25040 }, { "epoch": 7.79, "learning_rate": 4.426484302144855e-06, "loss": 0.0208, "step": 25050 }, { "epoch": 7.79, "learning_rate": 4.420267329810383e-06, "loss": 0.2806, "step": 25060 }, { "epoch": 7.79, "learning_rate": 4.41405035747591e-06, "loss": 0.021, "step": 25070 }, { "epoch": 7.8, "learning_rate": 4.4078333851414366e-06, "loss": 0.0005, "step": 25080 }, { "epoch": 7.8, "learning_rate": 4.401616412806963e-06, "loss": 0.0802, "step": 25090 }, { "epoch": 7.8, "learning_rate": 4.39539944047249e-06, "loss": 0.1971, "step": 25100 }, { "epoch": 7.81, "learning_rate": 4.389182468138017e-06, "loss": 0.2067, "step": 25110 }, { "epoch": 7.81, "learning_rate": 4.382965495803544e-06, "loss": 0.0005, "step": 25120 }, { "epoch": 7.81, "learning_rate": 4.376748523469071e-06, "loss": 0.1916, "step": 25130 }, { "epoch": 7.81, "learning_rate": 4.370531551134598e-06, "loss": 0.0811, "step": 25140 }, { "epoch": 7.82, "learning_rate": 4.364314578800124e-06, "loss": 0.26, "step": 25150 }, { "epoch": 7.82, "learning_rate": 4.358097606465652e-06, "loss": 0.0002, "step": 25160 }, { "epoch": 7.82, "learning_rate": 4.351880634131179e-06, "loss": 0.0391, "step": 25170 }, { "epoch": 7.83, "learning_rate": 4.345663661796706e-06, "loss": 0.164, "step": 25180 }, { "epoch": 7.83, "learning_rate": 4.339446689462232e-06, "loss": 0.1431, "step": 25190 }, { "epoch": 7.83, "learning_rate": 4.333229717127759e-06, "loss": 0.024, "step": 25200 }, { "epoch": 7.84, "learning_rate": 4.327012744793286e-06, "loss": 0.0978, "step": 25210 }, { "epoch": 7.84, "learning_rate": 4.320795772458813e-06, "loss": 0.036, "step": 25220 }, { "epoch": 7.84, "learning_rate": 4.31457880012434e-06, "loss": 0.0745, "step": 25230 }, { "epoch": 7.85, "learning_rate": 4.3083618277898665e-06, "loss": 0.0008, "step": 25240 }, { "epoch": 7.85, "learning_rate": 4.302144855455393e-06, "loss": 0.2, "step": 25250 }, { "epoch": 7.85, "learning_rate": 4.29592788312092e-06, "loss": 0.1571, "step": 25260 }, { "epoch": 7.86, "learning_rate": 4.289710910786447e-06, "loss": 0.1211, "step": 25270 }, { "epoch": 7.86, "learning_rate": 4.283493938451974e-06, "loss": 0.0004, "step": 25280 }, { "epoch": 7.86, "learning_rate": 4.2772769661175015e-06, "loss": 0.1529, "step": 25290 }, { "epoch": 7.86, "learning_rate": 4.271059993783028e-06, "loss": 0.0401, "step": 25300 }, { "epoch": 7.87, "learning_rate": 4.264843021448555e-06, "loss": 0.145, "step": 25310 }, { "epoch": 7.87, "learning_rate": 4.258626049114082e-06, "loss": 0.042, "step": 25320 }, { "epoch": 7.87, "learning_rate": 4.252409076779609e-06, "loss": 0.1009, "step": 25330 }, { "epoch": 7.88, "learning_rate": 4.2461921044451356e-06, "loss": 0.0518, "step": 25340 }, { "epoch": 7.88, "learning_rate": 4.239975132110662e-06, "loss": 0.0011, "step": 25350 }, { "epoch": 7.88, "learning_rate": 4.233758159776189e-06, "loss": 0.0152, "step": 25360 }, { "epoch": 7.89, "learning_rate": 4.227541187441716e-06, "loss": 0.0003, "step": 25370 }, { "epoch": 7.89, "learning_rate": 4.221324215107243e-06, "loss": 0.1573, "step": 25380 }, { "epoch": 7.89, "learning_rate": 4.2151072427727705e-06, "loss": 0.0319, "step": 25390 }, { "epoch": 7.9, "learning_rate": 4.2088902704382965e-06, "loss": 0.2, "step": 25400 }, { "epoch": 7.9, "learning_rate": 4.202673298103824e-06, "loss": 0.171, "step": 25410 }, { "epoch": 7.9, "learning_rate": 4.196456325769351e-06, "loss": 0.0294, "step": 25420 }, { "epoch": 7.9, "learning_rate": 4.190239353434877e-06, "loss": 0.0459, "step": 25430 }, { "epoch": 7.91, "learning_rate": 4.184022381100405e-06, "loss": 0.0518, "step": 25440 }, { "epoch": 7.91, "learning_rate": 4.177805408765931e-06, "loss": 0.0171, "step": 25450 }, { "epoch": 7.91, "learning_rate": 4.171588436431458e-06, "loss": 0.0108, "step": 25460 }, { "epoch": 7.92, "learning_rate": 4.165371464096985e-06, "loss": 0.0019, "step": 25470 }, { "epoch": 7.92, "learning_rate": 4.159154491762512e-06, "loss": 0.0005, "step": 25480 }, { "epoch": 7.92, "learning_rate": 4.1529375194280395e-06, "loss": 0.0273, "step": 25490 }, { "epoch": 7.93, "learning_rate": 4.1467205470935655e-06, "loss": 0.0004, "step": 25500 }, { "epoch": 7.93, "learning_rate": 4.140503574759092e-06, "loss": 0.0118, "step": 25510 }, { "epoch": 7.93, "learning_rate": 4.13428660242462e-06, "loss": 0.0449, "step": 25520 }, { "epoch": 7.94, "learning_rate": 4.128069630090146e-06, "loss": 0.1352, "step": 25530 }, { "epoch": 7.94, "learning_rate": 4.121852657755674e-06, "loss": 0.2736, "step": 25540 }, { "epoch": 7.94, "learning_rate": 4.1156356854212005e-06, "loss": 0.5018, "step": 25550 }, { "epoch": 7.95, "learning_rate": 4.109418713086727e-06, "loss": 0.1876, "step": 25560 }, { "epoch": 7.95, "learning_rate": 4.103201740752254e-06, "loss": 0.0264, "step": 25570 }, { "epoch": 7.95, "learning_rate": 4.096984768417781e-06, "loss": 0.0005, "step": 25580 }, { "epoch": 7.95, "learning_rate": 4.090767796083308e-06, "loss": 0.0003, "step": 25590 }, { "epoch": 7.96, "learning_rate": 4.0845508237488346e-06, "loss": 0.0648, "step": 25600 }, { "epoch": 7.96, "learning_rate": 4.078333851414361e-06, "loss": 0.0723, "step": 25610 }, { "epoch": 7.96, "learning_rate": 4.072116879079888e-06, "loss": 0.0006, "step": 25620 }, { "epoch": 7.97, "learning_rate": 4.065899906745415e-06, "loss": 0.0004, "step": 25630 }, { "epoch": 7.97, "learning_rate": 4.059682934410943e-06, "loss": 0.5763, "step": 25640 }, { "epoch": 7.97, "learning_rate": 4.0534659620764695e-06, "loss": 0.2705, "step": 25650 }, { "epoch": 7.98, "learning_rate": 4.0472489897419955e-06, "loss": 0.1302, "step": 25660 }, { "epoch": 7.98, "learning_rate": 4.041032017407523e-06, "loss": 0.0417, "step": 25670 }, { "epoch": 7.98, "learning_rate": 4.03481504507305e-06, "loss": 0.0262, "step": 25680 }, { "epoch": 7.99, "learning_rate": 4.028598072738577e-06, "loss": 0.0005, "step": 25690 }, { "epoch": 7.99, "learning_rate": 4.022381100404104e-06, "loss": 0.019, "step": 25700 }, { "epoch": 7.99, "learning_rate": 4.0161641280696304e-06, "loss": 0.0303, "step": 25710 }, { "epoch": 8.0, "learning_rate": 4.009947155735157e-06, "loss": 0.0013, "step": 25720 }, { "epoch": 8.0, "learning_rate": 4.003730183400684e-06, "loss": 0.1316, "step": 25730 }, { "epoch": 8.0, "eval_FN": 204, "eval_FP": 147, "eval_TN": 5691, "eval_TP": 391, "eval_accuracy": 0.9454375874397637, "eval_f1": 0.6902030008826124, "eval_loss": 1.17657470703125, "eval_precision": 0.7267657992565055, "eval_recall": 0.6571428571428571, "eval_runtime": 44.3493, "eval_samples_per_second": 145.053, "eval_steps_per_second": 9.087, "step": 25736 } ], "logging_steps": 10, "max_steps": 32170, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.0833018860126208e+17, "trial_name": null, "trial_params": null }