guitarGPT2 / trainer_log_history.jsonl
editigerun's picture
Upload 8 files
11b4600
{"epoch": 0.02, "learning_rate": 2.9999999999999997e-05, "loss": 0.7034, "step": 10}
{"epoch": 0.04, "learning_rate": 5.9999999999999995e-05, "loss": 0.7056, "step": 20}
{"epoch": 0.06, "learning_rate": 8.999999999999999e-05, "loss": 0.6413, "step": 30}
{"epoch": 0.09, "learning_rate": 0.00011999999999999999, "loss": 0.574, "step": 40}
{"epoch": 0.11, "learning_rate": 0.00015, "loss": 0.5307, "step": 50}
{"epoch": 0.13, "learning_rate": 0.00017999999999999998, "loss": 0.4797, "step": 60}
{"epoch": 0.15, "learning_rate": 0.00020999999999999998, "loss": 0.4554, "step": 70}
{"epoch": 0.17, "learning_rate": 0.00023999999999999998, "loss": 0.4219, "step": 80}
{"epoch": 0.19, "learning_rate": 0.00027, "loss": 0.4163, "step": 90}
{"epoch": 0.21, "learning_rate": 0.0003, "loss": 0.4183, "step": 100}
{"epoch": 0.24, "learning_rate": 0.00029987068965517237, "loss": 0.3947, "step": 110}
{"epoch": 0.26, "learning_rate": 0.0002997413793103448, "loss": 0.413, "step": 120}
{"epoch": 0.28, "learning_rate": 0.0002996120689655172, "loss": 0.4028, "step": 130}
{"epoch": 0.3, "learning_rate": 0.00029948275862068965, "loss": 0.3807, "step": 140}
{"epoch": 0.32, "learning_rate": 0.00029935344827586205, "loss": 0.3982, "step": 150}
{"epoch": 0.34, "learning_rate": 0.00029922413793103444, "loss": 0.3907, "step": 160}
{"epoch": 0.36, "learning_rate": 0.0002990948275862069, "loss": 0.391, "step": 170}
{"epoch": 0.39, "learning_rate": 0.0002989655172413793, "loss": 0.3807, "step": 180}
{"epoch": 0.41, "learning_rate": 0.0002988362068965517, "loss": 0.3735, "step": 190}
{"epoch": 0.43, "learning_rate": 0.0002987068965517241, "loss": 0.4056, "step": 200}
{"epoch": 0.45, "learning_rate": 0.0002985775862068965, "loss": 0.3732, "step": 210}
{"epoch": 0.47, "learning_rate": 0.00029844827586206896, "loss": 0.3873, "step": 220}
{"epoch": 0.49, "learning_rate": 0.00029831896551724135, "loss": 0.3421, "step": 230}
{"epoch": 0.52, "learning_rate": 0.00029818965517241375, "loss": 0.3708, "step": 240}
{"epoch": 0.54, "learning_rate": 0.0002980603448275862, "loss": 0.371, "step": 250}
{"epoch": 0.56, "learning_rate": 0.0002979310344827586, "loss": 0.3747, "step": 260}
{"epoch": 0.58, "learning_rate": 0.00029780172413793103, "loss": 0.3608, "step": 270}
{"epoch": 0.6, "learning_rate": 0.0002976724137931034, "loss": 0.3559, "step": 280}
{"epoch": 0.62, "learning_rate": 0.0002975431034482758, "loss": 0.3838, "step": 290}
{"epoch": 0.64, "learning_rate": 0.00029741379310344827, "loss": 0.3587, "step": 300}
{"epoch": 0.64, "eval_loss": 0.36829814314842224, "eval_runtime": 676.778, "eval_samples_per_second": 3.398, "eval_steps_per_second": 0.426, "step": 300}
{"epoch": 0.67, "learning_rate": 0.00029728448275862066, "loss": 0.3585, "step": 310}
{"epoch": 0.69, "learning_rate": 0.00029715517241379305, "loss": 0.3536, "step": 320}
{"epoch": 0.71, "learning_rate": 0.0002970258620689655, "loss": 0.3721, "step": 330}
{"epoch": 0.73, "learning_rate": 0.0002968965517241379, "loss": 0.37, "step": 340}
{"epoch": 0.75, "learning_rate": 0.00029676724137931034, "loss": 0.3738, "step": 350}
{"epoch": 0.77, "learning_rate": 0.00029663793103448273, "loss": 0.3542, "step": 360}
{"epoch": 0.79, "learning_rate": 0.0002965086206896552, "loss": 0.378, "step": 370}
{"epoch": 0.82, "learning_rate": 0.00029637931034482757, "loss": 0.3443, "step": 380}
{"epoch": 0.84, "learning_rate": 0.00029624999999999996, "loss": 0.3484, "step": 390}
{"epoch": 0.86, "learning_rate": 0.00029612068965517236, "loss": 0.35, "step": 400}
{"epoch": 0.88, "learning_rate": 0.0002959913793103448, "loss": 0.3622, "step": 410}
{"epoch": 0.9, "learning_rate": 0.0002958620689655172, "loss": 0.3479, "step": 420}
{"epoch": 0.92, "learning_rate": 0.00029573275862068964, "loss": 0.3535, "step": 430}
{"epoch": 0.94, "learning_rate": 0.00029560344827586204, "loss": 0.3547, "step": 440}
{"epoch": 0.97, "learning_rate": 0.0002954741379310345, "loss": 0.365, "step": 450}
{"epoch": 0.99, "learning_rate": 0.0002953448275862069, "loss": 0.3292, "step": 460}
{"epoch": 1.01, "learning_rate": 0.00029521551724137927, "loss": 0.3377, "step": 470}
{"epoch": 1.03, "learning_rate": 0.0002950862068965517, "loss": 0.349, "step": 480}
{"epoch": 1.05, "learning_rate": 0.0002949568965517241, "loss": 0.3638, "step": 490}
{"epoch": 1.07, "learning_rate": 0.0002948275862068965, "loss": 0.3488, "step": 500}
{"epoch": 1.09, "learning_rate": 0.00029469827586206895, "loss": 0.3438, "step": 510}
{"epoch": 1.12, "learning_rate": 0.00029456896551724134, "loss": 0.3367, "step": 520}
{"epoch": 1.14, "learning_rate": 0.0002944396551724138, "loss": 0.364, "step": 530}
{"epoch": 1.16, "learning_rate": 0.0002943103448275862, "loss": 0.3557, "step": 540}
{"epoch": 1.18, "learning_rate": 0.00029418103448275863, "loss": 0.3513, "step": 550}
{"epoch": 1.2, "learning_rate": 0.000294051724137931, "loss": 0.3519, "step": 560}
{"epoch": 1.22, "learning_rate": 0.0002939224137931034, "loss": 0.341, "step": 570}
{"epoch": 1.24, "learning_rate": 0.0002937931034482758, "loss": 0.3404, "step": 580}
{"epoch": 1.27, "learning_rate": 0.00029366379310344826, "loss": 0.3381, "step": 590}
{"epoch": 1.29, "learning_rate": 0.00029353448275862065, "loss": 0.347, "step": 600}
{"epoch": 1.29, "eval_loss": 0.3440234959125519, "eval_runtime": 676.8128, "eval_samples_per_second": 3.398, "eval_steps_per_second": 0.426, "step": 600}
{"epoch": 1.31, "learning_rate": 0.0002934051724137931, "loss": 0.3377, "step": 610}
{"epoch": 1.33, "learning_rate": 0.0002932758620689655, "loss": 0.3573, "step": 620}
{"epoch": 1.35, "learning_rate": 0.00029314655172413794, "loss": 0.3264, "step": 630}
{"epoch": 1.37, "learning_rate": 0.00029301724137931033, "loss": 0.3458, "step": 640}
{"epoch": 1.39, "learning_rate": 0.0002928879310344827, "loss": 0.3432, "step": 650}
{"epoch": 1.42, "learning_rate": 0.0002927586206896551, "loss": 0.3321, "step": 660}
{"epoch": 1.44, "learning_rate": 0.00029262931034482756, "loss": 0.3396, "step": 670}
{"epoch": 1.46, "learning_rate": 0.00029249999999999995, "loss": 0.3314, "step": 680}
{"epoch": 1.48, "learning_rate": 0.0002923706896551724, "loss": 0.3389, "step": 690}
{"epoch": 1.5, "learning_rate": 0.0002922413793103448, "loss": 0.3194, "step": 700}
{"epoch": 1.52, "learning_rate": 0.00029211206896551724, "loss": 0.3379, "step": 710}
{"epoch": 1.55, "learning_rate": 0.00029198275862068963, "loss": 0.3546, "step": 720}
{"epoch": 1.57, "learning_rate": 0.0002918534482758621, "loss": 0.3146, "step": 730}
{"epoch": 1.59, "learning_rate": 0.0002917241379310344, "loss": 0.3381, "step": 740}
{"epoch": 1.61, "learning_rate": 0.00029159482758620687, "loss": 0.3165, "step": 750}
{"epoch": 1.63, "learning_rate": 0.00029146551724137926, "loss": 0.3372, "step": 760}
{"epoch": 1.65, "learning_rate": 0.0002913362068965517, "loss": 0.3452, "step": 770}
{"epoch": 1.67, "learning_rate": 0.0002912068965517241, "loss": 0.3549, "step": 780}
{"epoch": 1.7, "learning_rate": 0.00029107758620689655, "loss": 0.3149, "step": 790}
{"epoch": 1.72, "learning_rate": 0.00029094827586206894, "loss": 0.3278, "step": 800}
{"epoch": 1.74, "learning_rate": 0.0002908189655172414, "loss": 0.3432, "step": 810}
{"epoch": 1.76, "learning_rate": 0.0002906896551724138, "loss": 0.3475, "step": 820}
{"epoch": 1.78, "learning_rate": 0.00029056034482758617, "loss": 0.3271, "step": 830}
{"epoch": 1.8, "learning_rate": 0.00029043103448275857, "loss": 0.3298, "step": 840}
{"epoch": 1.82, "learning_rate": 0.000290301724137931, "loss": 0.3247, "step": 850}
{"epoch": 1.85, "learning_rate": 0.0002901724137931034, "loss": 0.324, "step": 860}
{"epoch": 1.87, "learning_rate": 0.00029004310344827585, "loss": 0.3015, "step": 870}
{"epoch": 1.89, "learning_rate": 0.00028991379310344825, "loss": 0.322, "step": 880}
{"epoch": 1.91, "learning_rate": 0.0002897844827586207, "loss": 0.3127, "step": 890}
{"epoch": 1.93, "learning_rate": 0.0002896551724137931, "loss": 0.3205, "step": 900}
{"epoch": 1.93, "eval_loss": 0.33089256286621094, "eval_runtime": 676.8619, "eval_samples_per_second": 3.398, "eval_steps_per_second": 0.425, "step": 900}
{"epoch": 1.95, "learning_rate": 0.00028952586206896553, "loss": 0.3216, "step": 910}
{"epoch": 1.97, "learning_rate": 0.00028939655172413787, "loss": 0.3227, "step": 920}
{"epoch": 2.0, "learning_rate": 0.0002892672413793103, "loss": 0.3126, "step": 930}
{"epoch": 2.02, "learning_rate": 0.0002891379310344827, "loss": 0.3202, "step": 940}
{"epoch": 2.04, "learning_rate": 0.00028900862068965516, "loss": 0.3162, "step": 950}
{"epoch": 2.06, "learning_rate": 0.00028887931034482755, "loss": 0.331, "step": 960}
{"epoch": 2.08, "learning_rate": 0.00028875, "loss": 0.3201, "step": 970}
{"epoch": 2.1, "learning_rate": 0.0002886206896551724, "loss": 0.3291, "step": 980}
{"epoch": 2.12, "learning_rate": 0.00028849137931034484, "loss": 0.318, "step": 990}
{"epoch": 2.15, "learning_rate": 0.0002883620689655172, "loss": 0.3331, "step": 1000}
{"epoch": 2.17, "learning_rate": 0.0002882327586206896, "loss": 0.3236, "step": 1010}
{"epoch": 2.19, "learning_rate": 0.000288103448275862, "loss": 0.3201, "step": 1020}
{"epoch": 2.21, "learning_rate": 0.00028797413793103446, "loss": 0.314, "step": 1030}
{"epoch": 2.23, "learning_rate": 0.00028784482758620686, "loss": 0.3027, "step": 1040}
{"epoch": 2.25, "learning_rate": 0.0002877155172413793, "loss": 0.3326, "step": 1050}
{"epoch": 2.27, "learning_rate": 0.0002875862068965517, "loss": 0.3305, "step": 1060}
{"epoch": 2.3, "learning_rate": 0.00028745689655172414, "loss": 0.315, "step": 1070}
{"epoch": 2.32, "learning_rate": 0.00028732758620689654, "loss": 0.3193, "step": 1080}
{"epoch": 2.34, "learning_rate": 0.00028719827586206893, "loss": 0.3239, "step": 1090}
{"epoch": 2.36, "learning_rate": 0.0002870689655172413, "loss": 0.3075, "step": 1100}
{"epoch": 2.38, "learning_rate": 0.00028693965517241377, "loss": 0.3204, "step": 1110}
{"epoch": 2.4, "learning_rate": 0.00028681034482758616, "loss": 0.3104, "step": 1120}
{"epoch": 2.42, "learning_rate": 0.0002866810344827586, "loss": 0.3393, "step": 1130}
{"epoch": 2.45, "learning_rate": 0.000286551724137931, "loss": 0.3235, "step": 1140}
{"epoch": 2.47, "learning_rate": 0.00028642241379310345, "loss": 0.322, "step": 1150}
{"epoch": 2.49, "learning_rate": 0.00028629310344827584, "loss": 0.3176, "step": 1160}
{"epoch": 2.51, "learning_rate": 0.00028616379310344824, "loss": 0.308, "step": 1170}
{"epoch": 2.53, "learning_rate": 0.00028603448275862063, "loss": 0.3223, "step": 1180}
{"epoch": 2.55, "learning_rate": 0.0002859051724137931, "loss": 0.321, "step": 1190}
{"epoch": 2.58, "learning_rate": 0.00028577586206896547, "loss": 0.3088, "step": 1200}
{"epoch": 2.58, "eval_loss": 0.3221810758113861, "eval_runtime": 679.1093, "eval_samples_per_second": 3.387, "eval_steps_per_second": 0.424, "step": 1200}
{"epoch": 2.6, "learning_rate": 0.0002856465517241379, "loss": 0.3218, "step": 1210}
{"epoch": 2.62, "learning_rate": 0.0002855172413793103, "loss": 0.2987, "step": 1220}
{"epoch": 2.64, "learning_rate": 0.00028538793103448276, "loss": 0.3135, "step": 1230}
{"epoch": 2.66, "learning_rate": 0.00028525862068965515, "loss": 0.296, "step": 1240}
{"epoch": 2.68, "learning_rate": 0.0002851293103448276, "loss": 0.3006, "step": 1250}
{"epoch": 2.7, "learning_rate": 0.000285, "loss": 0.3192, "step": 1260}
{"epoch": 2.73, "learning_rate": 0.0002848706896551724, "loss": 0.3041, "step": 1270}
{"epoch": 2.75, "learning_rate": 0.0002847413793103448, "loss": 0.3106, "step": 1280}
{"epoch": 2.77, "learning_rate": 0.0002846120689655172, "loss": 0.3193, "step": 1290}
{"epoch": 2.79, "learning_rate": 0.0002844827586206896, "loss": 0.3111, "step": 1300}
{"epoch": 2.81, "learning_rate": 0.00028435344827586206, "loss": 0.3187, "step": 1310}
{"epoch": 2.83, "learning_rate": 0.00028422413793103445, "loss": 0.3125, "step": 1320}
{"epoch": 2.85, "learning_rate": 0.0002840948275862069, "loss": 0.3117, "step": 1330}
{"epoch": 2.88, "learning_rate": 0.0002839655172413793, "loss": 0.316, "step": 1340}
{"epoch": 2.9, "learning_rate": 0.0002838362068965517, "loss": 0.3213, "step": 1350}
{"epoch": 2.92, "learning_rate": 0.0002837068965517241, "loss": 0.3168, "step": 1360}
{"epoch": 2.94, "learning_rate": 0.00028357758620689653, "loss": 0.3107, "step": 1370}
{"epoch": 2.96, "learning_rate": 0.0002834482758620689, "loss": 0.2958, "step": 1380}
{"epoch": 2.98, "learning_rate": 0.00028331896551724137, "loss": 0.3153, "step": 1390}
{"epoch": 3.0, "learning_rate": 0.00028318965517241376, "loss": 0.3131, "step": 1400}
{"epoch": 3.03, "learning_rate": 0.0002830603448275862, "loss": 0.2915, "step": 1410}
{"epoch": 3.05, "learning_rate": 0.0002829310344827586, "loss": 0.3039, "step": 1420}
{"epoch": 3.07, "learning_rate": 0.000282801724137931, "loss": 0.3074, "step": 1430}
{"epoch": 3.09, "learning_rate": 0.00028267241379310344, "loss": 0.3062, "step": 1440}
{"epoch": 3.11, "learning_rate": 0.00028254310344827583, "loss": 0.3005, "step": 1450}
{"epoch": 3.13, "learning_rate": 0.0002824137931034482, "loss": 0.3038, "step": 1460}
{"epoch": 3.15, "learning_rate": 0.0002822844827586207, "loss": 0.3078, "step": 1470}
{"epoch": 3.18, "learning_rate": 0.00028215517241379307, "loss": 0.3118, "step": 1480}
{"epoch": 3.2, "learning_rate": 0.0002820258620689655, "loss": 0.286, "step": 1490}
{"epoch": 3.22, "learning_rate": 0.0002818965517241379, "loss": 0.2966, "step": 1500}
{"epoch": 3.22, "eval_loss": 0.3142649531364441, "eval_runtime": 678.9797, "eval_samples_per_second": 3.387, "eval_steps_per_second": 0.424, "step": 1500}
{"epoch": 3.24, "learning_rate": 0.0002817672413793103, "loss": 0.3101, "step": 1510}
{"epoch": 3.26, "learning_rate": 0.00028163793103448275, "loss": 0.2922, "step": 1520}
{"epoch": 3.28, "learning_rate": 0.00028150862068965514, "loss": 0.3128, "step": 1530}
{"epoch": 3.3, "learning_rate": 0.00028137931034482753, "loss": 0.2973, "step": 1540}
{"epoch": 3.33, "learning_rate": 0.00028125, "loss": 0.2921, "step": 1550}
{"epoch": 3.35, "learning_rate": 0.00028112068965517237, "loss": 0.3094, "step": 1560}
{"epoch": 3.37, "learning_rate": 0.0002809913793103448, "loss": 0.2876, "step": 1570}
{"epoch": 3.39, "learning_rate": 0.0002808620689655172, "loss": 0.3019, "step": 1580}
{"epoch": 3.41, "learning_rate": 0.00028073275862068966, "loss": 0.3127, "step": 1590}
{"epoch": 3.43, "learning_rate": 0.00028060344827586205, "loss": 0.3078, "step": 1600}
{"epoch": 3.45, "learning_rate": 0.00028047413793103444, "loss": 0.2967, "step": 1610}
{"epoch": 3.48, "learning_rate": 0.0002803448275862069, "loss": 0.2984, "step": 1620}
{"epoch": 3.5, "learning_rate": 0.0002802155172413793, "loss": 0.2886, "step": 1630}
{"epoch": 3.52, "learning_rate": 0.0002800862068965517, "loss": 0.3113, "step": 1640}
{"epoch": 3.54, "learning_rate": 0.0002799568965517241, "loss": 0.3091, "step": 1650}
{"epoch": 3.56, "learning_rate": 0.0002798275862068965, "loss": 0.2969, "step": 1660}
{"epoch": 3.58, "learning_rate": 0.00027969827586206896, "loss": 0.2985, "step": 1670}
{"epoch": 3.61, "learning_rate": 0.00027956896551724136, "loss": 0.2863, "step": 1680}
{"epoch": 3.63, "learning_rate": 0.00027943965517241375, "loss": 0.2922, "step": 1690}
{"epoch": 3.65, "learning_rate": 0.0002793103448275862, "loss": 0.2753, "step": 1700}
{"epoch": 3.67, "learning_rate": 0.0002791810344827586, "loss": 0.3047, "step": 1710}
{"epoch": 3.69, "learning_rate": 0.000279051724137931, "loss": 0.309, "step": 1720}
{"epoch": 3.71, "learning_rate": 0.00027892241379310343, "loss": 0.3058, "step": 1730}
{"epoch": 3.73, "learning_rate": 0.0002787931034482758, "loss": 0.3171, "step": 1740}
{"epoch": 3.76, "learning_rate": 0.00027866379310344827, "loss": 0.2826, "step": 1750}
{"epoch": 3.78, "learning_rate": 0.00027853448275862066, "loss": 0.3029, "step": 1760}
{"epoch": 3.8, "learning_rate": 0.00027840517241379306, "loss": 0.297, "step": 1770}
{"epoch": 3.82, "learning_rate": 0.0002782758620689655, "loss": 0.3007, "step": 1780}
{"epoch": 3.84, "learning_rate": 0.0002781465517241379, "loss": 0.3096, "step": 1790}
{"epoch": 3.86, "learning_rate": 0.00027801724137931034, "loss": 0.3013, "step": 1800}
{"epoch": 3.86, "eval_loss": 0.3079967796802521, "eval_runtime": 678.0738, "eval_samples_per_second": 3.392, "eval_steps_per_second": 0.425, "step": 1800}
{"epoch": 3.88, "learning_rate": 0.00027788793103448274, "loss": 0.2907, "step": 1810}
{"epoch": 3.91, "learning_rate": 0.00027775862068965513, "loss": 0.2792, "step": 1820}
{"epoch": 3.93, "learning_rate": 0.0002776293103448276, "loss": 0.2911, "step": 1830}
{"epoch": 3.95, "learning_rate": 0.00027749999999999997, "loss": 0.2841, "step": 1840}
{"epoch": 3.97, "learning_rate": 0.00027737068965517236, "loss": 0.3033, "step": 1850}
{"epoch": 3.99, "learning_rate": 0.0002772413793103448, "loss": 0.2975, "step": 1860}
{"epoch": 4.01, "learning_rate": 0.0002771120689655172, "loss": 0.2823, "step": 1870}
{"epoch": 4.03, "learning_rate": 0.00027698275862068965, "loss": 0.2907, "step": 1880}
{"epoch": 4.06, "learning_rate": 0.00027685344827586204, "loss": 0.2747, "step": 1890}
{"epoch": 4.08, "learning_rate": 0.0002767241379310345, "loss": 0.2823, "step": 1900}
{"epoch": 4.1, "learning_rate": 0.0002765948275862069, "loss": 0.3116, "step": 1910}
{"epoch": 4.12, "learning_rate": 0.0002764655172413793, "loss": 0.3064, "step": 1920}
{"epoch": 4.14, "learning_rate": 0.0002763362068965517, "loss": 0.2753, "step": 1930}
{"epoch": 4.16, "learning_rate": 0.0002762068965517241, "loss": 0.2974, "step": 1940}
{"epoch": 4.18, "learning_rate": 0.0002760775862068965, "loss": 0.2912, "step": 1950}
{"epoch": 4.21, "learning_rate": 0.00027594827586206895, "loss": 0.2968, "step": 1960}
{"epoch": 4.23, "learning_rate": 0.00027581896551724135, "loss": 0.2768, "step": 1970}
{"epoch": 4.25, "learning_rate": 0.0002756896551724138, "loss": 0.3027, "step": 1980}
{"epoch": 4.27, "learning_rate": 0.0002755603448275862, "loss": 0.2825, "step": 1990}
{"epoch": 4.29, "learning_rate": 0.0002754310344827586, "loss": 0.2946, "step": 2000}
{"epoch": 4.31, "learning_rate": 0.00027530172413793103, "loss": 0.2913, "step": 2010}
{"epoch": 4.33, "learning_rate": 0.0002751724137931034, "loss": 0.282, "step": 2020}
{"epoch": 4.36, "learning_rate": 0.0002750431034482758, "loss": 0.2866, "step": 2030}
{"epoch": 4.38, "learning_rate": 0.00027491379310344826, "loss": 0.2857, "step": 2040}
{"epoch": 4.4, "learning_rate": 0.00027478448275862065, "loss": 0.2883, "step": 2050}
{"epoch": 4.42, "learning_rate": 0.0002746551724137931, "loss": 0.2891, "step": 2060}
{"epoch": 4.44, "learning_rate": 0.0002745258620689655, "loss": 0.2908, "step": 2070}
{"epoch": 4.46, "learning_rate": 0.00027439655172413794, "loss": 0.2808, "step": 2080}
{"epoch": 4.48, "learning_rate": 0.00027426724137931033, "loss": 0.2848, "step": 2090}
{"epoch": 4.51, "learning_rate": 0.0002741379310344827, "loss": 0.2944, "step": 2100}
{"epoch": 4.51, "eval_loss": 0.3032459020614624, "eval_runtime": 676.6943, "eval_samples_per_second": 3.399, "eval_steps_per_second": 0.426, "step": 2100}
{"epoch": 4.53, "learning_rate": 0.0002740086206896551, "loss": 0.2835, "step": 2110}
{"epoch": 4.55, "learning_rate": 0.00027387931034482757, "loss": 0.292, "step": 2120}
{"epoch": 4.57, "learning_rate": 0.00027374999999999996, "loss": 0.2992, "step": 2130}
{"epoch": 4.59, "learning_rate": 0.0002736206896551724, "loss": 0.2874, "step": 2140}
{"epoch": 4.61, "learning_rate": 0.0002734913793103448, "loss": 0.2827, "step": 2150}
{"epoch": 4.64, "learning_rate": 0.00027336206896551725, "loss": 0.2887, "step": 2160}
{"epoch": 4.66, "learning_rate": 0.00027323275862068964, "loss": 0.2824, "step": 2170}
{"epoch": 4.68, "learning_rate": 0.00027310344827586203, "loss": 0.3099, "step": 2180}
{"epoch": 4.7, "learning_rate": 0.0002729741379310344, "loss": 0.2931, "step": 2190}
{"epoch": 4.72, "learning_rate": 0.00027284482758620687, "loss": 0.2983, "step": 2200}
{"epoch": 4.74, "learning_rate": 0.00027271551724137926, "loss": 0.2816, "step": 2210}
{"epoch": 4.76, "learning_rate": 0.0002725862068965517, "loss": 0.2888, "step": 2220}
{"epoch": 4.79, "learning_rate": 0.0002724568965517241, "loss": 0.2769, "step": 2230}
{"epoch": 4.81, "learning_rate": 0.00027232758620689655, "loss": 0.2824, "step": 2240}
{"epoch": 4.83, "learning_rate": 0.00027219827586206894, "loss": 0.2823, "step": 2250}
{"epoch": 4.85, "learning_rate": 0.0002720689655172414, "loss": 0.2934, "step": 2260}
{"epoch": 4.87, "learning_rate": 0.0002719396551724138, "loss": 0.2856, "step": 2270}
{"epoch": 4.89, "learning_rate": 0.0002718103448275862, "loss": 0.2867, "step": 2280}
{"epoch": 4.91, "learning_rate": 0.00027168103448275857, "loss": 0.2873, "step": 2290}
{"epoch": 4.94, "learning_rate": 0.000271551724137931, "loss": 0.2857, "step": 2300}
{"epoch": 4.96, "learning_rate": 0.0002714224137931034, "loss": 0.2749, "step": 2310}
{"epoch": 4.98, "learning_rate": 0.00027129310344827586, "loss": 0.2775, "step": 2320}
{"epoch": 5.0, "learning_rate": 0.00027116379310344825, "loss": 0.2721, "step": 2330}
{"epoch": 5.02, "learning_rate": 0.0002710344827586207, "loss": 0.2682, "step": 2340}
{"epoch": 5.04, "learning_rate": 0.0002709051724137931, "loss": 0.2676, "step": 2350}
{"epoch": 5.06, "learning_rate": 0.0002707758620689655, "loss": 0.2752, "step": 2360}
{"epoch": 5.09, "learning_rate": 0.0002706465517241379, "loss": 0.2742, "step": 2370}
{"epoch": 5.11, "learning_rate": 0.0002705172413793103, "loss": 0.2784, "step": 2380}
{"epoch": 5.13, "learning_rate": 0.0002703879310344827, "loss": 0.2739, "step": 2390}
{"epoch": 5.15, "learning_rate": 0.00027025862068965516, "loss": 0.2874, "step": 2400}
{"epoch": 5.15, "eval_loss": 0.2981237769126892, "eval_runtime": 678.281, "eval_samples_per_second": 3.391, "eval_steps_per_second": 0.425, "step": 2400}
{"epoch": 5.17, "learning_rate": 0.00027012931034482756, "loss": 0.2797, "step": 2410}
{"epoch": 5.19, "learning_rate": 0.00027, "loss": 0.294, "step": 2420}
{"epoch": 5.21, "learning_rate": 0.0002698706896551724, "loss": 0.2776, "step": 2430}
{"epoch": 5.24, "learning_rate": 0.00026974137931034484, "loss": 0.2727, "step": 2440}
{"epoch": 5.26, "learning_rate": 0.0002696120689655172, "loss": 0.2719, "step": 2450}
{"epoch": 5.28, "learning_rate": 0.00026948275862068963, "loss": 0.2664, "step": 2460}
{"epoch": 5.3, "learning_rate": 0.000269353448275862, "loss": 0.2915, "step": 2470}
{"epoch": 5.32, "learning_rate": 0.00026922413793103447, "loss": 0.2562, "step": 2480}
{"epoch": 5.34, "learning_rate": 0.00026909482758620686, "loss": 0.264, "step": 2490}
{"epoch": 5.36, "learning_rate": 0.0002689655172413793, "loss": 0.2758, "step": 2500}
{"epoch": 5.39, "learning_rate": 0.0002688362068965517, "loss": 0.2764, "step": 2510}
{"epoch": 5.41, "learning_rate": 0.00026870689655172415, "loss": 0.2697, "step": 2520}
{"epoch": 5.43, "learning_rate": 0.00026857758620689654, "loss": 0.2795, "step": 2530}
{"epoch": 5.45, "learning_rate": 0.00026844827586206893, "loss": 0.2863, "step": 2540}
{"epoch": 5.47, "learning_rate": 0.00026831896551724133, "loss": 0.2807, "step": 2550}
{"epoch": 5.49, "learning_rate": 0.0002681896551724138, "loss": 0.272, "step": 2560}
{"epoch": 5.52, "learning_rate": 0.00026806034482758617, "loss": 0.2522, "step": 2570}
{"epoch": 5.54, "learning_rate": 0.0002679310344827586, "loss": 0.2706, "step": 2580}
{"epoch": 5.56, "learning_rate": 0.000267801724137931, "loss": 0.2689, "step": 2590}
{"epoch": 5.58, "learning_rate": 0.00026767241379310345, "loss": 0.2607, "step": 2600}
{"epoch": 5.6, "learning_rate": 0.00026754310344827585, "loss": 0.2543, "step": 2610}
{"epoch": 5.62, "learning_rate": 0.00026741379310344824, "loss": 0.288, "step": 2620}
{"epoch": 5.64, "learning_rate": 0.00026728448275862063, "loss": 0.2698, "step": 2630}
{"epoch": 5.67, "learning_rate": 0.0002671551724137931, "loss": 0.2629, "step": 2640}
{"epoch": 5.69, "learning_rate": 0.0002670258620689655, "loss": 0.2798, "step": 2650}
{"epoch": 5.71, "learning_rate": 0.0002668965517241379, "loss": 0.2804, "step": 2660}
{"epoch": 5.73, "learning_rate": 0.0002667672413793103, "loss": 0.2824, "step": 2670}
{"epoch": 5.75, "learning_rate": 0.00026663793103448276, "loss": 0.2797, "step": 2680}
{"epoch": 5.77, "learning_rate": 0.00026650862068965515, "loss": 0.2605, "step": 2690}
{"epoch": 5.79, "learning_rate": 0.0002663793103448276, "loss": 0.2766, "step": 2700}
{"epoch": 5.79, "eval_loss": 0.292289137840271, "eval_runtime": 676.8375, "eval_samples_per_second": 3.398, "eval_steps_per_second": 0.426, "step": 2700}
{"epoch": 5.82, "learning_rate": 0.00026624999999999994, "loss": 0.2546, "step": 2710}
{"epoch": 5.84, "learning_rate": 0.0002661206896551724, "loss": 0.2561, "step": 2720}
{"epoch": 5.86, "learning_rate": 0.0002659913793103448, "loss": 0.2653, "step": 2730}
{"epoch": 5.88, "learning_rate": 0.0002658620689655172, "loss": 0.2647, "step": 2740}
{"epoch": 5.9, "learning_rate": 0.0002657327586206896, "loss": 0.2678, "step": 2750}
{"epoch": 5.92, "learning_rate": 0.00026560344827586207, "loss": 0.2672, "step": 2760}
{"epoch": 5.94, "learning_rate": 0.00026547413793103446, "loss": 0.2639, "step": 2770}
{"epoch": 5.97, "learning_rate": 0.0002653448275862069, "loss": 0.2529, "step": 2780}
{"epoch": 5.99, "learning_rate": 0.00026521551724137925, "loss": 0.2789, "step": 2790}
{"epoch": 6.01, "learning_rate": 0.0002650862068965517, "loss": 0.2753, "step": 2800}
{"epoch": 6.03, "learning_rate": 0.0002649568965517241, "loss": 0.2579, "step": 2810}
{"epoch": 6.05, "learning_rate": 0.00026482758620689653, "loss": 0.2534, "step": 2820}
{"epoch": 6.07, "learning_rate": 0.0002646982758620689, "loss": 0.2653, "step": 2830}
{"epoch": 6.09, "learning_rate": 0.00026456896551724137, "loss": 0.2669, "step": 2840}
{"epoch": 6.12, "learning_rate": 0.00026443965517241377, "loss": 0.2464, "step": 2850}
{"epoch": 6.14, "learning_rate": 0.0002643103448275862, "loss": 0.2466, "step": 2860}
{"epoch": 6.16, "learning_rate": 0.0002641810344827586, "loss": 0.2494, "step": 2870}
{"epoch": 6.18, "learning_rate": 0.000264051724137931, "loss": 0.2532, "step": 2880}
{"epoch": 6.2, "learning_rate": 0.0002639224137931034, "loss": 0.2513, "step": 2890}
{"epoch": 6.22, "learning_rate": 0.00026379310344827584, "loss": 0.2577, "step": 2900}
{"epoch": 6.24, "learning_rate": 0.00026366379310344823, "loss": 0.2594, "step": 2910}
{"epoch": 6.27, "learning_rate": 0.0002635344827586207, "loss": 0.2507, "step": 2920}
{"epoch": 6.29, "learning_rate": 0.00026340517241379307, "loss": 0.2716, "step": 2930}
{"epoch": 6.31, "learning_rate": 0.0002632758620689655, "loss": 0.2697, "step": 2940}
{"epoch": 6.33, "learning_rate": 0.0002631465517241379, "loss": 0.2516, "step": 2950}
{"epoch": 6.35, "learning_rate": 0.0002630172413793103, "loss": 0.2691, "step": 2960}
{"epoch": 6.37, "learning_rate": 0.00026288793103448275, "loss": 0.2675, "step": 2970}
{"epoch": 6.39, "learning_rate": 0.00026275862068965514, "loss": 0.2628, "step": 2980}
{"epoch": 6.42, "learning_rate": 0.00026262931034482754, "loss": 0.2627, "step": 2990}
{"epoch": 6.44, "learning_rate": 0.0002625, "loss": 0.2603, "step": 3000}
{"epoch": 6.44, "eval_loss": 0.2889377772808075, "eval_runtime": 697.121, "eval_samples_per_second": 3.299, "eval_steps_per_second": 0.413, "step": 3000}
{"epoch": 6.46, "learning_rate": 0.0002623706896551724, "loss": 0.2323, "step": 3010}
{"epoch": 6.48, "learning_rate": 0.0002622413793103448, "loss": 0.2548, "step": 3020}
{"epoch": 6.5, "learning_rate": 0.0002621120689655172, "loss": 0.2591, "step": 3030}
{"epoch": 6.52, "learning_rate": 0.00026198275862068966, "loss": 0.2568, "step": 3040}
{"epoch": 6.55, "learning_rate": 0.00026185344827586206, "loss": 0.2473, "step": 3050}
{"epoch": 6.57, "learning_rate": 0.00026172413793103445, "loss": 0.2512, "step": 3060}
{"epoch": 6.59, "learning_rate": 0.00026159482758620684, "loss": 0.2635, "step": 3070}
{"epoch": 6.61, "learning_rate": 0.0002614655172413793, "loss": 0.2509, "step": 3080}
{"epoch": 6.63, "learning_rate": 0.0002613362068965517, "loss": 0.2648, "step": 3090}
{"epoch": 6.65, "learning_rate": 0.00026120689655172413, "loss": 0.2544, "step": 3100}
{"epoch": 6.67, "learning_rate": 0.0002610775862068965, "loss": 0.26, "step": 3110}
{"epoch": 6.7, "learning_rate": 0.00026094827586206897, "loss": 0.2611, "step": 3120}
{"epoch": 6.72, "learning_rate": 0.00026081896551724136, "loss": 0.2719, "step": 3130}
{"epoch": 6.74, "learning_rate": 0.00026068965517241376, "loss": 0.2795, "step": 3140}
{"epoch": 6.76, "learning_rate": 0.0002605603448275862, "loss": 0.2605, "step": 3150}
{"epoch": 6.78, "learning_rate": 0.0002604310344827586, "loss": 0.2669, "step": 3160}
{"epoch": 6.8, "learning_rate": 0.000260301724137931, "loss": 0.2407, "step": 3170}
{"epoch": 6.82, "learning_rate": 0.00026017241379310344, "loss": 0.2578, "step": 3180}
{"epoch": 6.85, "learning_rate": 0.00026004310344827583, "loss": 0.2535, "step": 3190}
{"epoch": 6.87, "learning_rate": 0.0002599137931034483, "loss": 0.2655, "step": 3200}
{"epoch": 6.89, "learning_rate": 0.00025978448275862067, "loss": 0.27, "step": 3210}
{"epoch": 6.91, "learning_rate": 0.00025965517241379306, "loss": 0.2757, "step": 3220}
{"epoch": 6.93, "learning_rate": 0.0002595258620689655, "loss": 0.2615, "step": 3230}
{"epoch": 6.95, "learning_rate": 0.0002593965517241379, "loss": 0.2728, "step": 3240}
{"epoch": 6.97, "learning_rate": 0.0002592672413793103, "loss": 0.2576, "step": 3250}
{"epoch": 7.0, "learning_rate": 0.00025913793103448274, "loss": 0.2554, "step": 3260}
{"epoch": 7.02, "learning_rate": 0.00025900862068965513, "loss": 0.2266, "step": 3270}
{"epoch": 7.04, "learning_rate": 0.0002588793103448276, "loss": 0.2371, "step": 3280}
{"epoch": 7.06, "learning_rate": 0.00025875, "loss": 0.2471, "step": 3290}
{"epoch": 7.08, "learning_rate": 0.00025862068965517237, "loss": 0.2477, "step": 3300}
{"epoch": 7.08, "eval_loss": 0.2846280038356781, "eval_runtime": 696.1493, "eval_samples_per_second": 3.304, "eval_steps_per_second": 0.414, "step": 3300}
{"epoch": 7.1, "learning_rate": 0.0002584913793103448, "loss": 0.241, "step": 3310}
{"epoch": 7.12, "learning_rate": 0.0002583620689655172, "loss": 0.2466, "step": 3320}
{"epoch": 7.15, "learning_rate": 0.00025823275862068965, "loss": 0.2477, "step": 3330}
{"epoch": 7.17, "learning_rate": 0.00025810344827586205, "loss": 0.2338, "step": 3340}
{"epoch": 7.19, "learning_rate": 0.00025797413793103444, "loss": 0.2496, "step": 3350}
{"epoch": 7.21, "learning_rate": 0.0002578448275862069, "loss": 0.247, "step": 3360}
{"epoch": 7.23, "learning_rate": 0.0002577155172413793, "loss": 0.2441, "step": 3370}
{"epoch": 7.25, "learning_rate": 0.0002575862068965517, "loss": 0.241, "step": 3380}
{"epoch": 7.27, "learning_rate": 0.0002574568965517241, "loss": 0.2464, "step": 3390}
{"epoch": 7.3, "learning_rate": 0.0002573275862068965, "loss": 0.2487, "step": 3400}
{"epoch": 7.32, "learning_rate": 0.00025719827586206896, "loss": 0.2499, "step": 3410}
{"epoch": 7.34, "learning_rate": 0.00025706896551724135, "loss": 0.2315, "step": 3420}
{"epoch": 7.36, "learning_rate": 0.00025693965517241375, "loss": 0.2453, "step": 3430}
{"epoch": 7.38, "learning_rate": 0.0002568103448275862, "loss": 0.2436, "step": 3440}
{"epoch": 7.4, "learning_rate": 0.0002566810344827586, "loss": 0.2632, "step": 3450}
{"epoch": 7.42, "learning_rate": 0.00025655172413793103, "loss": 0.2435, "step": 3460}
{"epoch": 7.45, "learning_rate": 0.0002564224137931034, "loss": 0.2447, "step": 3470}
{"epoch": 7.47, "learning_rate": 0.0002562931034482758, "loss": 0.2292, "step": 3480}
{"epoch": 7.49, "learning_rate": 0.00025616379310344827, "loss": 0.2432, "step": 3490}
{"epoch": 7.51, "learning_rate": 0.00025603448275862066, "loss": 0.2491, "step": 3500}
{"epoch": 7.53, "learning_rate": 0.0002559051724137931, "loss": 0.2618, "step": 3510}
{"epoch": 7.55, "learning_rate": 0.0002557758620689655, "loss": 0.2465, "step": 3520}
{"epoch": 7.58, "learning_rate": 0.0002556465517241379, "loss": 0.2377, "step": 3530}
{"epoch": 7.6, "learning_rate": 0.00025551724137931034, "loss": 0.2529, "step": 3540}
{"epoch": 7.62, "learning_rate": 0.00025538793103448273, "loss": 0.2377, "step": 3550}
{"epoch": 7.64, "learning_rate": 0.0002552586206896551, "loss": 0.2479, "step": 3560}
{"epoch": 7.66, "learning_rate": 0.00025512931034482757, "loss": 0.2459, "step": 3570}
{"epoch": 7.68, "learning_rate": 0.00025499999999999996, "loss": 0.2248, "step": 3580}
{"epoch": 7.7, "learning_rate": 0.0002548706896551724, "loss": 0.2528, "step": 3590}
{"epoch": 7.73, "learning_rate": 0.0002547413793103448, "loss": 0.2517, "step": 3600}
{"epoch": 7.73, "eval_loss": 0.2782333195209503, "eval_runtime": 697.1594, "eval_samples_per_second": 3.299, "eval_steps_per_second": 0.413, "step": 3600}
{"epoch": 7.75, "learning_rate": 0.00025461206896551725, "loss": 0.2463, "step": 3610}
{"epoch": 7.77, "learning_rate": 0.00025448275862068964, "loss": 0.2488, "step": 3620}
{"epoch": 7.79, "learning_rate": 0.00025435344827586204, "loss": 0.2433, "step": 3630}
{"epoch": 7.81, "learning_rate": 0.00025422413793103443, "loss": 0.2435, "step": 3640}
{"epoch": 7.83, "learning_rate": 0.0002540948275862069, "loss": 0.2518, "step": 3650}
{"epoch": 7.85, "learning_rate": 0.00025396551724137927, "loss": 0.2242, "step": 3660}
{"epoch": 7.88, "learning_rate": 0.0002538362068965517, "loss": 0.2347, "step": 3670}
{"epoch": 7.9, "learning_rate": 0.0002537068965517241, "loss": 0.255, "step": 3680}
{"epoch": 7.92, "learning_rate": 0.00025357758620689656, "loss": 0.2581, "step": 3690}
{"epoch": 7.94, "learning_rate": 0.00025344827586206895, "loss": 0.2508, "step": 3700}
{"epoch": 7.96, "learning_rate": 0.00025331896551724134, "loss": 0.2435, "step": 3710}
{"epoch": 7.98, "learning_rate": 0.0002531896551724138, "loss": 0.2538, "step": 3720}
{"epoch": 8.0, "learning_rate": 0.0002530603448275862, "loss": 0.2446, "step": 3730}
{"epoch": 8.03, "learning_rate": 0.0002529310344827586, "loss": 0.2321, "step": 3740}
{"epoch": 8.05, "learning_rate": 0.000252801724137931, "loss": 0.2252, "step": 3750}
{"epoch": 8.07, "learning_rate": 0.0002526724137931034, "loss": 0.2237, "step": 3760}
{"epoch": 8.09, "learning_rate": 0.00025254310344827586, "loss": 0.2128, "step": 3770}
{"epoch": 8.11, "learning_rate": 0.00025241379310344826, "loss": 0.233, "step": 3780}
{"epoch": 8.13, "learning_rate": 0.0002522844827586207, "loss": 0.25, "step": 3790}
{"epoch": 8.15, "learning_rate": 0.0002521551724137931, "loss": 0.2375, "step": 3800}
{"epoch": 8.18, "learning_rate": 0.0002520258620689655, "loss": 0.2222, "step": 3810}
{"epoch": 8.2, "learning_rate": 0.0002518965517241379, "loss": 0.2345, "step": 3820}
{"epoch": 8.22, "learning_rate": 0.00025176724137931033, "loss": 0.2341, "step": 3830}
{"epoch": 8.24, "learning_rate": 0.0002516379310344827, "loss": 0.218, "step": 3840}
{"epoch": 8.26, "learning_rate": 0.00025150862068965517, "loss": 0.2404, "step": 3850}
{"epoch": 8.28, "learning_rate": 0.00025137931034482756, "loss": 0.224, "step": 3860}
{"epoch": 8.3, "learning_rate": 0.00025125, "loss": 0.231, "step": 3870}
{"epoch": 8.33, "learning_rate": 0.0002511206896551724, "loss": 0.2366, "step": 3880}
{"epoch": 8.35, "learning_rate": 0.0002509913793103448, "loss": 0.2283, "step": 3890}
{"epoch": 8.37, "learning_rate": 0.0002508620689655172, "loss": 0.2409, "step": 3900}
{"epoch": 8.37, "eval_loss": 0.27595165371894836, "eval_runtime": 696.4728, "eval_samples_per_second": 3.302, "eval_steps_per_second": 0.414, "step": 3900}
{"epoch": 8.39, "learning_rate": 0.00025073275862068963, "loss": 0.237, "step": 3910}
{"epoch": 8.41, "learning_rate": 0.00025060344827586203, "loss": 0.2325, "step": 3920}
{"epoch": 8.43, "learning_rate": 0.0002504741379310345, "loss": 0.2418, "step": 3930}
{"epoch": 8.45, "learning_rate": 0.00025034482758620687, "loss": 0.2277, "step": 3940}
{"epoch": 8.48, "learning_rate": 0.0002502155172413793, "loss": 0.227, "step": 3950}
{"epoch": 8.5, "learning_rate": 0.0002500862068965517, "loss": 0.235, "step": 3960}
{"epoch": 8.52, "learning_rate": 0.00024995689655172415, "loss": 0.2322, "step": 3970}
{"epoch": 8.54, "learning_rate": 0.00024982758620689655, "loss": 0.231, "step": 3980}
{"epoch": 8.56, "learning_rate": 0.00024969827586206894, "loss": 0.2249, "step": 3990}
{"epoch": 8.58, "learning_rate": 0.00024956896551724133, "loss": 0.213, "step": 4000}
{"epoch": 8.61, "learning_rate": 0.0002494396551724138, "loss": 0.2199, "step": 4010}
{"epoch": 8.63, "learning_rate": 0.0002493103448275862, "loss": 0.2232, "step": 4020}
{"epoch": 8.65, "learning_rate": 0.0002491810344827586, "loss": 0.2248, "step": 4030}
{"epoch": 8.67, "learning_rate": 0.000249051724137931, "loss": 0.2211, "step": 4040}
{"epoch": 8.69, "learning_rate": 0.00024892241379310346, "loss": 0.2295, "step": 4050}
{"epoch": 8.71, "learning_rate": 0.00024879310344827585, "loss": 0.2374, "step": 4060}
{"epoch": 8.73, "learning_rate": 0.00024866379310344825, "loss": 0.2287, "step": 4070}
{"epoch": 8.76, "learning_rate": 0.00024853448275862064, "loss": 0.2402, "step": 4080}
{"epoch": 8.78, "learning_rate": 0.0002484051724137931, "loss": 0.2258, "step": 4090}
{"epoch": 8.8, "learning_rate": 0.0002482758620689655, "loss": 0.2312, "step": 4100}
{"epoch": 8.82, "learning_rate": 0.0002481465517241379, "loss": 0.2474, "step": 4110}
{"epoch": 8.84, "learning_rate": 0.0002480172413793103, "loss": 0.2429, "step": 4120}
{"epoch": 8.86, "learning_rate": 0.00024788793103448277, "loss": 0.2287, "step": 4130}
{"epoch": 8.88, "learning_rate": 0.00024775862068965516, "loss": 0.2269, "step": 4140}
{"epoch": 8.91, "learning_rate": 0.0002476293103448276, "loss": 0.2428, "step": 4150}
{"epoch": 8.93, "learning_rate": 0.00024749999999999994, "loss": 0.2282, "step": 4160}
{"epoch": 8.95, "learning_rate": 0.0002473706896551724, "loss": 0.2236, "step": 4170}
{"epoch": 8.97, "learning_rate": 0.0002472413793103448, "loss": 0.2284, "step": 4180}
{"epoch": 8.99, "learning_rate": 0.00024711206896551723, "loss": 0.2365, "step": 4190}
{"epoch": 9.01, "learning_rate": 0.0002469827586206896, "loss": 0.2369, "step": 4200}
{"epoch": 9.01, "eval_loss": 0.2714526355266571, "eval_runtime": 696.7746, "eval_samples_per_second": 3.301, "eval_steps_per_second": 0.413, "step": 4200}
{"epoch": 9.03, "learning_rate": 0.00024685344827586207, "loss": 0.2092, "step": 4210}
{"epoch": 9.06, "learning_rate": 0.00024672413793103446, "loss": 0.2239, "step": 4220}
{"epoch": 9.08, "learning_rate": 0.0002465948275862069, "loss": 0.2132, "step": 4230}
{"epoch": 9.1, "learning_rate": 0.00024646551724137925, "loss": 0.1977, "step": 4240}
{"epoch": 9.12, "learning_rate": 0.0002463362068965517, "loss": 0.2083, "step": 4250}
{"epoch": 9.14, "learning_rate": 0.0002462068965517241, "loss": 0.2238, "step": 4260}
{"epoch": 9.16, "learning_rate": 0.00024607758620689654, "loss": 0.2004, "step": 4270}
{"epoch": 9.18, "learning_rate": 0.00024594827586206893, "loss": 0.2157, "step": 4280}
{"epoch": 9.21, "learning_rate": 0.0002458189655172414, "loss": 0.2144, "step": 4290}
{"epoch": 9.23, "learning_rate": 0.00024568965517241377, "loss": 0.2082, "step": 4300}
{"epoch": 9.25, "learning_rate": 0.0002455603448275862, "loss": 0.2243, "step": 4310}
{"epoch": 9.27, "learning_rate": 0.0002454310344827586, "loss": 0.2091, "step": 4320}
{"epoch": 9.29, "learning_rate": 0.000245301724137931, "loss": 0.2147, "step": 4330}
{"epoch": 9.31, "learning_rate": 0.0002451724137931034, "loss": 0.2214, "step": 4340}
{"epoch": 9.33, "learning_rate": 0.00024504310344827584, "loss": 0.233, "step": 4350}
{"epoch": 9.36, "learning_rate": 0.00024491379310344824, "loss": 0.2233, "step": 4360}
{"epoch": 9.38, "learning_rate": 0.0002447844827586207, "loss": 0.2096, "step": 4370}
{"epoch": 9.4, "learning_rate": 0.0002446551724137931, "loss": 0.2266, "step": 4380}
{"epoch": 9.42, "learning_rate": 0.0002445258620689655, "loss": 0.2252, "step": 4390}
{"epoch": 9.44, "learning_rate": 0.0002443965517241379, "loss": 0.2189, "step": 4400}
{"epoch": 9.46, "learning_rate": 0.0002442672413793103, "loss": 0.2122, "step": 4410}
{"epoch": 9.48, "learning_rate": 0.00024413793103448273, "loss": 0.2241, "step": 4420}
{"epoch": 9.51, "learning_rate": 0.00024400862068965515, "loss": 0.2246, "step": 4430}
{"epoch": 9.53, "learning_rate": 0.00024387931034482757, "loss": 0.2305, "step": 4440}
{"epoch": 9.55, "learning_rate": 0.00024375, "loss": 0.215, "step": 4450}
{"epoch": 9.57, "learning_rate": 0.0002436206896551724, "loss": 0.2259, "step": 4460}
{"epoch": 9.59, "learning_rate": 0.0002434913793103448, "loss": 0.2256, "step": 4470}
{"epoch": 9.61, "learning_rate": 0.00024336206896551722, "loss": 0.2202, "step": 4480}
{"epoch": 9.64, "learning_rate": 0.00024323275862068964, "loss": 0.209, "step": 4490}
{"epoch": 9.66, "learning_rate": 0.00024310344827586203, "loss": 0.2151, "step": 4500}
{"epoch": 9.66, "eval_loss": 0.267652302980423, "eval_runtime": 696.7098, "eval_samples_per_second": 3.301, "eval_steps_per_second": 0.413, "step": 4500}
{"epoch": 9.68, "learning_rate": 0.00024297413793103445, "loss": 0.2142, "step": 4510}
{"epoch": 9.7, "learning_rate": 0.00024284482758620687, "loss": 0.2209, "step": 4520}
{"epoch": 9.72, "learning_rate": 0.0002427155172413793, "loss": 0.2168, "step": 4530}
{"epoch": 9.74, "learning_rate": 0.00024258620689655171, "loss": 0.2014, "step": 4540}
{"epoch": 9.76, "learning_rate": 0.00024245689655172413, "loss": 0.2208, "step": 4550}
{"epoch": 9.79, "learning_rate": 0.00024232758620689653, "loss": 0.2216, "step": 4560}
{"epoch": 9.81, "learning_rate": 0.00024219827586206895, "loss": 0.22, "step": 4570}
{"epoch": 9.83, "learning_rate": 0.00024206896551724134, "loss": 0.236, "step": 4580}
{"epoch": 9.85, "learning_rate": 0.00024193965517241376, "loss": 0.2205, "step": 4590}
{"epoch": 9.87, "learning_rate": 0.00024181034482758618, "loss": 0.2111, "step": 4600}
{"epoch": 9.89, "learning_rate": 0.0002416810344827586, "loss": 0.2107, "step": 4610}
{"epoch": 9.91, "learning_rate": 0.00024155172413793102, "loss": 0.2156, "step": 4620}
{"epoch": 9.94, "learning_rate": 0.00024142241379310344, "loss": 0.2224, "step": 4630}
{"epoch": 9.96, "learning_rate": 0.00024129310344827586, "loss": 0.2261, "step": 4640}
{"epoch": 9.98, "learning_rate": 0.00024116379310344825, "loss": 0.2371, "step": 4650}
{"epoch": 10.0, "learning_rate": 0.00024103448275862067, "loss": 0.2169, "step": 4660}
{"epoch": 10.02, "learning_rate": 0.00024090517241379307, "loss": 0.2099, "step": 4670}
{"epoch": 10.04, "learning_rate": 0.00024077586206896549, "loss": 0.1993, "step": 4680}
{"epoch": 10.06, "learning_rate": 0.0002406465517241379, "loss": 0.2091, "step": 4690}
{"epoch": 10.09, "learning_rate": 0.00024051724137931033, "loss": 0.2007, "step": 4700}
{"epoch": 10.11, "learning_rate": 0.00024038793103448275, "loss": 0.1941, "step": 4710}
{"epoch": 10.13, "learning_rate": 0.00024025862068965517, "loss": 0.2048, "step": 4720}
{"epoch": 10.15, "learning_rate": 0.00024012931034482759, "loss": 0.195, "step": 4730}
{"epoch": 10.17, "learning_rate": 0.00023999999999999998, "loss": 0.2154, "step": 4740}
{"epoch": 10.19, "learning_rate": 0.00023987068965517237, "loss": 0.2071, "step": 4750}
{"epoch": 10.21, "learning_rate": 0.0002397413793103448, "loss": 0.2151, "step": 4760}
{"epoch": 10.24, "learning_rate": 0.0002396120689655172, "loss": 0.1928, "step": 4770}
{"epoch": 10.26, "learning_rate": 0.00023948275862068963, "loss": 0.2161, "step": 4780}
{"epoch": 10.28, "learning_rate": 0.00023935344827586205, "loss": 0.1951, "step": 4790}
{"epoch": 10.3, "learning_rate": 0.00023922413793103447, "loss": 0.2064, "step": 4800}
{"epoch": 10.3, "eval_loss": 0.2673773169517517, "eval_runtime": 694.4441, "eval_samples_per_second": 3.312, "eval_steps_per_second": 0.415, "step": 4800}
{"epoch": 10.32, "learning_rate": 0.0002390948275862069, "loss": 0.1987, "step": 4810}
{"epoch": 10.34, "learning_rate": 0.0002389655172413793, "loss": 0.1996, "step": 4820}
{"epoch": 10.36, "learning_rate": 0.00023883620689655173, "loss": 0.1964, "step": 4830}
{"epoch": 10.39, "learning_rate": 0.0002387068965517241, "loss": 0.2069, "step": 4840}
{"epoch": 10.41, "learning_rate": 0.00023857758620689652, "loss": 0.2126, "step": 4850}
{"epoch": 10.43, "learning_rate": 0.00023844827586206894, "loss": 0.2194, "step": 4860}
{"epoch": 10.45, "learning_rate": 0.00023831896551724136, "loss": 0.2165, "step": 4870}
{"epoch": 10.47, "learning_rate": 0.00023818965517241378, "loss": 0.2031, "step": 4880}
{"epoch": 10.49, "learning_rate": 0.0002380603448275862, "loss": 0.2023, "step": 4890}
{"epoch": 10.52, "learning_rate": 0.00023793103448275862, "loss": 0.2235, "step": 4900}
{"epoch": 10.54, "learning_rate": 0.00023780172413793104, "loss": 0.2144, "step": 4910}
{"epoch": 10.56, "learning_rate": 0.0002376724137931034, "loss": 0.202, "step": 4920}
{"epoch": 10.58, "learning_rate": 0.00023754310344827582, "loss": 0.2016, "step": 4930}
{"epoch": 10.6, "learning_rate": 0.00023741379310344824, "loss": 0.1998, "step": 4940}
{"epoch": 10.62, "learning_rate": 0.00023728448275862066, "loss": 0.2135, "step": 4950}
{"epoch": 10.64, "learning_rate": 0.00023715517241379308, "loss": 0.2106, "step": 4960}
{"epoch": 10.67, "learning_rate": 0.0002370258620689655, "loss": 0.2158, "step": 4970}
{"epoch": 10.69, "learning_rate": 0.00023689655172413792, "loss": 0.206, "step": 4980}
{"epoch": 10.71, "learning_rate": 0.00023676724137931034, "loss": 0.2224, "step": 4990}
{"epoch": 10.73, "learning_rate": 0.00023663793103448276, "loss": 0.2159, "step": 5000}
{"epoch": 10.75, "learning_rate": 0.00023650862068965513, "loss": 0.1939, "step": 5010}
{"epoch": 10.77, "learning_rate": 0.00023637931034482755, "loss": 0.208, "step": 5020}
{"epoch": 10.79, "learning_rate": 0.00023624999999999997, "loss": 0.1979, "step": 5030}
{"epoch": 10.82, "learning_rate": 0.0002361206896551724, "loss": 0.203, "step": 5040}
{"epoch": 10.84, "learning_rate": 0.0002359913793103448, "loss": 0.1821, "step": 5050}
{"epoch": 10.86, "learning_rate": 0.00023586206896551723, "loss": 0.2111, "step": 5060}
{"epoch": 10.88, "learning_rate": 0.00023573275862068965, "loss": 0.2065, "step": 5070}
{"epoch": 10.9, "learning_rate": 0.00023560344827586207, "loss": 0.2195, "step": 5080}
{"epoch": 10.92, "learning_rate": 0.00023547413793103443, "loss": 0.2047, "step": 5090}
{"epoch": 10.94, "learning_rate": 0.00023534482758620685, "loss": 0.2156, "step": 5100}
{"epoch": 10.94, "eval_loss": 0.2624925971031189, "eval_runtime": 695.4942, "eval_samples_per_second": 3.307, "eval_steps_per_second": 0.414, "step": 5100}
{"epoch": 10.97, "learning_rate": 0.00023521551724137927, "loss": 0.1963, "step": 5110}
{"epoch": 10.99, "learning_rate": 0.0002350862068965517, "loss": 0.2057, "step": 5120}
{"epoch": 11.01, "learning_rate": 0.00023495689655172411, "loss": 0.2016, "step": 5130}
{"epoch": 11.03, "learning_rate": 0.00023482758620689653, "loss": 0.1746, "step": 5140}
{"epoch": 11.05, "learning_rate": 0.00023469827586206895, "loss": 0.1906, "step": 5150}
{"epoch": 11.07, "learning_rate": 0.00023456896551724137, "loss": 0.2023, "step": 5160}
{"epoch": 11.09, "learning_rate": 0.0002344396551724138, "loss": 0.1901, "step": 5170}
{"epoch": 11.12, "learning_rate": 0.00023431034482758616, "loss": 0.198, "step": 5180}
{"epoch": 11.14, "learning_rate": 0.00023418103448275858, "loss": 0.2133, "step": 5190}
{"epoch": 11.16, "learning_rate": 0.000234051724137931, "loss": 0.1953, "step": 5200}
{"epoch": 11.18, "learning_rate": 0.00023392241379310342, "loss": 0.201, "step": 5210}
{"epoch": 11.2, "learning_rate": 0.00023379310344827584, "loss": 0.1881, "step": 5220}
{"epoch": 11.22, "learning_rate": 0.00023366379310344826, "loss": 0.2097, "step": 5230}
{"epoch": 11.24, "learning_rate": 0.00023353448275862068, "loss": 0.1912, "step": 5240}
{"epoch": 11.27, "learning_rate": 0.0002334051724137931, "loss": 0.1907, "step": 5250}
{"epoch": 11.29, "learning_rate": 0.00023327586206896552, "loss": 0.1999, "step": 5260}
{"epoch": 11.31, "learning_rate": 0.00023314655172413789, "loss": 0.194, "step": 5270}
{"epoch": 11.33, "learning_rate": 0.0002330172413793103, "loss": 0.181, "step": 5280}
{"epoch": 11.35, "learning_rate": 0.00023288793103448273, "loss": 0.1823, "step": 5290}
{"epoch": 11.37, "learning_rate": 0.00023275862068965515, "loss": 0.1928, "step": 5300}
{"epoch": 11.39, "learning_rate": 0.00023262931034482757, "loss": 0.2028, "step": 5310}
{"epoch": 11.42, "learning_rate": 0.00023249999999999999, "loss": 0.2061, "step": 5320}
{"epoch": 11.44, "learning_rate": 0.0002323706896551724, "loss": 0.2075, "step": 5330}
{"epoch": 11.46, "learning_rate": 0.00023224137931034483, "loss": 0.1822, "step": 5340}
{"epoch": 11.48, "learning_rate": 0.00023211206896551722, "loss": 0.1974, "step": 5350}
{"epoch": 11.5, "learning_rate": 0.0002319827586206896, "loss": 0.1843, "step": 5360}
{"epoch": 11.52, "learning_rate": 0.00023185344827586203, "loss": 0.2105, "step": 5370}
{"epoch": 11.55, "learning_rate": 0.00023172413793103445, "loss": 0.1977, "step": 5380}
{"epoch": 11.57, "learning_rate": 0.00023159482758620687, "loss": 0.1923, "step": 5390}
{"epoch": 11.59, "learning_rate": 0.0002314655172413793, "loss": 0.1871, "step": 5400}
{"epoch": 11.59, "eval_loss": 0.26061421632766724, "eval_runtime": 697.8127, "eval_samples_per_second": 3.296, "eval_steps_per_second": 0.413, "step": 5400}
{"epoch": 11.61, "learning_rate": 0.0002313362068965517, "loss": 0.1952, "step": 5410}
{"epoch": 11.63, "learning_rate": 0.00023120689655172413, "loss": 0.1876, "step": 5420}
{"epoch": 11.65, "learning_rate": 0.00023107758620689655, "loss": 0.181, "step": 5430}
{"epoch": 11.67, "learning_rate": 0.00023094827586206895, "loss": 0.2049, "step": 5440}
{"epoch": 11.7, "learning_rate": 0.00023081896551724134, "loss": 0.1924, "step": 5450}
{"epoch": 11.72, "learning_rate": 0.00023068965517241376, "loss": 0.1945, "step": 5460}
{"epoch": 11.74, "learning_rate": 0.00023056034482758618, "loss": 0.2041, "step": 5470}
{"epoch": 11.76, "learning_rate": 0.0002304310344827586, "loss": 0.2127, "step": 5480}
{"epoch": 11.78, "learning_rate": 0.00023030172413793102, "loss": 0.2006, "step": 5490}
{"epoch": 11.8, "learning_rate": 0.00023017241379310344, "loss": 0.1861, "step": 5500}
{"epoch": 11.82, "learning_rate": 0.00023004310344827586, "loss": 0.195, "step": 5510}
{"epoch": 11.85, "learning_rate": 0.00022991379310344825, "loss": 0.1761, "step": 5520}
{"epoch": 11.87, "learning_rate": 0.00022978448275862067, "loss": 0.2019, "step": 5530}
{"epoch": 11.89, "learning_rate": 0.00022965517241379306, "loss": 0.194, "step": 5540}
{"epoch": 11.91, "learning_rate": 0.00022952586206896548, "loss": 0.2097, "step": 5550}
{"epoch": 11.93, "learning_rate": 0.0002293965517241379, "loss": 0.1994, "step": 5560}
{"epoch": 11.95, "learning_rate": 0.00022926724137931032, "loss": 0.1912, "step": 5570}
{"epoch": 11.97, "learning_rate": 0.00022913793103448274, "loss": 0.1982, "step": 5580}
{"epoch": 12.0, "learning_rate": 0.00022900862068965516, "loss": 0.2096, "step": 5590}
{"epoch": 12.02, "learning_rate": 0.00022887931034482758, "loss": 0.1864, "step": 5600}
{"epoch": 12.04, "learning_rate": 0.00022874999999999998, "loss": 0.1852, "step": 5610}
{"epoch": 12.06, "learning_rate": 0.0002286206896551724, "loss": 0.1758, "step": 5620}
{"epoch": 12.08, "learning_rate": 0.0002284913793103448, "loss": 0.1845, "step": 5630}
{"epoch": 12.1, "learning_rate": 0.0002283620689655172, "loss": 0.1699, "step": 5640}
{"epoch": 12.12, "learning_rate": 0.00022823275862068963, "loss": 0.1843, "step": 5650}
{"epoch": 12.15, "learning_rate": 0.00022810344827586205, "loss": 0.1836, "step": 5660}
{"epoch": 12.17, "learning_rate": 0.00022797413793103447, "loss": 0.1804, "step": 5670}
{"epoch": 12.19, "learning_rate": 0.0002278448275862069, "loss": 0.169, "step": 5680}
{"epoch": 12.21, "learning_rate": 0.00022771551724137928, "loss": 0.1882, "step": 5690}
{"epoch": 12.23, "learning_rate": 0.0002275862068965517, "loss": 0.1849, "step": 5700}
{"epoch": 12.23, "eval_loss": 0.25843313336372375, "eval_runtime": 692.9994, "eval_samples_per_second": 3.319, "eval_steps_per_second": 0.416, "step": 5700}
{"epoch": 12.25, "learning_rate": 0.00022745689655172412, "loss": 0.1851, "step": 5710}
{"epoch": 12.27, "learning_rate": 0.00022732758620689652, "loss": 0.1736, "step": 5720}
{"epoch": 12.3, "learning_rate": 0.00022719827586206894, "loss": 0.1854, "step": 5730}
{"epoch": 12.32, "learning_rate": 0.00022706896551724136, "loss": 0.169, "step": 5740}
{"epoch": 12.34, "learning_rate": 0.00022693965517241378, "loss": 0.1715, "step": 5750}
{"epoch": 12.36, "learning_rate": 0.0002268103448275862, "loss": 0.1883, "step": 5760}
{"epoch": 12.38, "learning_rate": 0.00022668103448275862, "loss": 0.1848, "step": 5770}
{"epoch": 12.4, "learning_rate": 0.000226551724137931, "loss": 0.2009, "step": 5780}
{"epoch": 12.42, "learning_rate": 0.00022642241379310343, "loss": 0.1845, "step": 5790}
{"epoch": 12.45, "learning_rate": 0.00022629310344827585, "loss": 0.1808, "step": 5800}
{"epoch": 12.47, "learning_rate": 0.00022616379310344824, "loss": 0.1946, "step": 5810}
{"epoch": 12.49, "learning_rate": 0.00022603448275862066, "loss": 0.179, "step": 5820}
{"epoch": 12.51, "learning_rate": 0.00022590517241379308, "loss": 0.1908, "step": 5830}
{"epoch": 12.53, "learning_rate": 0.0002257758620689655, "loss": 0.1677, "step": 5840}
{"epoch": 12.55, "learning_rate": 0.00022564655172413792, "loss": 0.1872, "step": 5850}
{"epoch": 12.58, "learning_rate": 0.00022551724137931031, "loss": 0.1771, "step": 5860}
{"epoch": 12.6, "learning_rate": 0.00022538793103448273, "loss": 0.1729, "step": 5870}
{"epoch": 12.62, "learning_rate": 0.00022525862068965515, "loss": 0.1847, "step": 5880}
{"epoch": 12.64, "learning_rate": 0.00022512931034482757, "loss": 0.1833, "step": 5890}
{"epoch": 12.66, "learning_rate": 0.000225, "loss": 0.1783, "step": 5900}
{"epoch": 12.68, "learning_rate": 0.0002248706896551724, "loss": 0.1912, "step": 5910}
{"epoch": 12.7, "learning_rate": 0.0002247413793103448, "loss": 0.1652, "step": 5920}
{"epoch": 12.73, "learning_rate": 0.00022461206896551723, "loss": 0.1796, "step": 5930}
{"epoch": 12.75, "learning_rate": 0.00022448275862068965, "loss": 0.1819, "step": 5940}
{"epoch": 12.77, "learning_rate": 0.00022435344827586204, "loss": 0.1849, "step": 5950}
{"epoch": 12.79, "learning_rate": 0.00022422413793103446, "loss": 0.1799, "step": 5960}
{"epoch": 12.81, "learning_rate": 0.00022409482758620688, "loss": 0.1895, "step": 5970}
{"epoch": 12.83, "learning_rate": 0.0002239655172413793, "loss": 0.1834, "step": 5980}
{"epoch": 12.85, "learning_rate": 0.00022383620689655172, "loss": 0.1888, "step": 5990}
{"epoch": 12.88, "learning_rate": 0.0002237068965517241, "loss": 0.1908, "step": 6000}
{"epoch": 12.88, "eval_loss": 0.25369590520858765, "eval_runtime": 702.5811, "eval_samples_per_second": 3.274, "eval_steps_per_second": 0.41, "step": 6000}
{"epoch": 12.9, "learning_rate": 0.00022357758620689653, "loss": 0.1805, "step": 6010}
{"epoch": 12.92, "learning_rate": 0.00022344827586206895, "loss": 0.1754, "step": 6020}
{"epoch": 12.94, "learning_rate": 0.00022331896551724135, "loss": 0.1816, "step": 6030}
{"epoch": 12.96, "learning_rate": 0.00022318965517241377, "loss": 0.182, "step": 6040}
{"epoch": 12.98, "learning_rate": 0.00022306034482758619, "loss": 0.1808, "step": 6050}
{"epoch": 13.0, "learning_rate": 0.0002229310344827586, "loss": 0.1851, "step": 6060}
{"epoch": 13.03, "learning_rate": 0.00022280172413793103, "loss": 0.1728, "step": 6070}
{"epoch": 13.05, "learning_rate": 0.00022267241379310345, "loss": 0.1704, "step": 6080}
{"epoch": 13.07, "learning_rate": 0.00022254310344827584, "loss": 0.1723, "step": 6090}
{"epoch": 13.09, "learning_rate": 0.00022241379310344826, "loss": 0.1768, "step": 6100}
{"epoch": 13.11, "learning_rate": 0.00022228448275862068, "loss": 0.1752, "step": 6110}
{"epoch": 13.13, "learning_rate": 0.00022215517241379307, "loss": 0.1812, "step": 6120}
{"epoch": 13.15, "learning_rate": 0.0002220258620689655, "loss": 0.1809, "step": 6130}
{"epoch": 13.18, "learning_rate": 0.0002218965517241379, "loss": 0.1744, "step": 6140}
{"epoch": 13.2, "learning_rate": 0.00022176724137931033, "loss": 0.1765, "step": 6150}
{"epoch": 13.22, "learning_rate": 0.00022163793103448275, "loss": 0.1867, "step": 6160}
{"epoch": 13.24, "learning_rate": 0.00022150862068965517, "loss": 0.1785, "step": 6170}
{"epoch": 13.26, "learning_rate": 0.00022137931034482756, "loss": 0.1776, "step": 6180}
{"epoch": 13.28, "learning_rate": 0.00022124999999999998, "loss": 0.1709, "step": 6190}
{"epoch": 13.3, "learning_rate": 0.00022112068965517238, "loss": 0.1743, "step": 6200}
{"epoch": 13.33, "learning_rate": 0.0002209913793103448, "loss": 0.188, "step": 6210}
{"epoch": 13.35, "learning_rate": 0.00022086206896551722, "loss": 0.1724, "step": 6220}
{"epoch": 13.37, "learning_rate": 0.00022073275862068964, "loss": 0.189, "step": 6230}
{"epoch": 13.39, "learning_rate": 0.00022060344827586206, "loss": 0.1759, "step": 6240}
{"epoch": 13.41, "learning_rate": 0.00022047413793103448, "loss": 0.1852, "step": 6250}
{"epoch": 13.43, "learning_rate": 0.0002203448275862069, "loss": 0.1881, "step": 6260}
{"epoch": 13.45, "learning_rate": 0.0002202155172413793, "loss": 0.1715, "step": 6270}
{"epoch": 13.48, "learning_rate": 0.0002200862068965517, "loss": 0.1663, "step": 6280}
{"epoch": 13.5, "learning_rate": 0.0002199568965517241, "loss": 0.1735, "step": 6290}
{"epoch": 13.52, "learning_rate": 0.00021982758620689652, "loss": 0.1779, "step": 6300}
{"epoch": 13.52, "eval_loss": 0.2541274428367615, "eval_runtime": 702.7934, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.41, "step": 6300}
{"epoch": 13.54, "learning_rate": 0.00021969827586206894, "loss": 0.169, "step": 6310}
{"epoch": 13.56, "learning_rate": 0.00021956896551724136, "loss": 0.1739, "step": 6320}
{"epoch": 13.58, "learning_rate": 0.00021943965517241378, "loss": 0.1799, "step": 6330}
{"epoch": 13.61, "learning_rate": 0.0002193103448275862, "loss": 0.1724, "step": 6340}
{"epoch": 13.63, "learning_rate": 0.00021918103448275862, "loss": 0.183, "step": 6350}
{"epoch": 13.65, "learning_rate": 0.00021905172413793102, "loss": 0.1722, "step": 6360}
{"epoch": 13.67, "learning_rate": 0.0002189224137931034, "loss": 0.1711, "step": 6370}
{"epoch": 13.69, "learning_rate": 0.00021879310344827583, "loss": 0.1682, "step": 6380}
{"epoch": 13.71, "learning_rate": 0.00021866379310344825, "loss": 0.1718, "step": 6390}
{"epoch": 13.73, "learning_rate": 0.00021853448275862067, "loss": 0.1858, "step": 6400}
{"epoch": 13.76, "learning_rate": 0.0002184051724137931, "loss": 0.1746, "step": 6410}
{"epoch": 13.78, "learning_rate": 0.0002182758620689655, "loss": 0.1685, "step": 6420}
{"epoch": 13.8, "learning_rate": 0.00021814655172413793, "loss": 0.166, "step": 6430}
{"epoch": 13.82, "learning_rate": 0.00021801724137931035, "loss": 0.1813, "step": 6440}
{"epoch": 13.84, "learning_rate": 0.00021788793103448274, "loss": 0.1879, "step": 6450}
{"epoch": 13.86, "learning_rate": 0.00021775862068965513, "loss": 0.18, "step": 6460}
{"epoch": 13.88, "learning_rate": 0.00021762931034482755, "loss": 0.175, "step": 6470}
{"epoch": 13.91, "learning_rate": 0.00021749999999999997, "loss": 0.1763, "step": 6480}
{"epoch": 13.93, "learning_rate": 0.0002173706896551724, "loss": 0.1757, "step": 6490}
{"epoch": 13.95, "learning_rate": 0.00021724137931034481, "loss": 0.1699, "step": 6500}
{"epoch": 13.97, "learning_rate": 0.00021711206896551723, "loss": 0.1912, "step": 6510}
{"epoch": 13.99, "learning_rate": 0.00021698275862068965, "loss": 0.1873, "step": 6520}
{"epoch": 14.01, "learning_rate": 0.00021685344827586207, "loss": 0.181, "step": 6530}
{"epoch": 14.03, "learning_rate": 0.00021672413793103444, "loss": 0.1639, "step": 6540}
{"epoch": 14.06, "learning_rate": 0.00021659482758620686, "loss": 0.1635, "step": 6550}
{"epoch": 14.08, "learning_rate": 0.00021646551724137928, "loss": 0.1701, "step": 6560}
{"epoch": 14.1, "learning_rate": 0.0002163362068965517, "loss": 0.1679, "step": 6570}
{"epoch": 14.12, "learning_rate": 0.00021620689655172412, "loss": 0.1491, "step": 6580}
{"epoch": 14.14, "learning_rate": 0.00021607758620689654, "loss": 0.1556, "step": 6590}
{"epoch": 14.16, "learning_rate": 0.00021594827586206896, "loss": 0.1688, "step": 6600}
{"epoch": 14.16, "eval_loss": 0.25178754329681396, "eval_runtime": 701.9482, "eval_samples_per_second": 3.277, "eval_steps_per_second": 0.41, "step": 6600}
{"epoch": 14.18, "learning_rate": 0.00021581896551724138, "loss": 0.1708, "step": 6610}
{"epoch": 14.21, "learning_rate": 0.0002156896551724138, "loss": 0.162, "step": 6620}
{"epoch": 14.23, "learning_rate": 0.00021556034482758617, "loss": 0.153, "step": 6630}
{"epoch": 14.25, "learning_rate": 0.00021543103448275859, "loss": 0.1552, "step": 6640}
{"epoch": 14.27, "learning_rate": 0.000215301724137931, "loss": 0.1685, "step": 6650}
{"epoch": 14.29, "learning_rate": 0.00021517241379310343, "loss": 0.1716, "step": 6660}
{"epoch": 14.31, "learning_rate": 0.00021504310344827585, "loss": 0.1685, "step": 6670}
{"epoch": 14.33, "learning_rate": 0.00021491379310344827, "loss": 0.1709, "step": 6680}
{"epoch": 14.36, "learning_rate": 0.00021478448275862069, "loss": 0.1819, "step": 6690}
{"epoch": 14.38, "learning_rate": 0.0002146551724137931, "loss": 0.1629, "step": 6700}
{"epoch": 14.4, "learning_rate": 0.00021452586206896553, "loss": 0.1647, "step": 6710}
{"epoch": 14.42, "learning_rate": 0.0002143965517241379, "loss": 0.165, "step": 6720}
{"epoch": 14.44, "learning_rate": 0.0002142672413793103, "loss": 0.1676, "step": 6730}
{"epoch": 14.46, "learning_rate": 0.00021413793103448273, "loss": 0.1686, "step": 6740}
{"epoch": 14.48, "learning_rate": 0.00021400862068965515, "loss": 0.1636, "step": 6750}
{"epoch": 14.51, "learning_rate": 0.00021387931034482757, "loss": 0.1815, "step": 6760}
{"epoch": 14.53, "learning_rate": 0.00021375, "loss": 0.1663, "step": 6770}
{"epoch": 14.55, "learning_rate": 0.0002136206896551724, "loss": 0.1627, "step": 6780}
{"epoch": 14.57, "learning_rate": 0.00021349137931034483, "loss": 0.1696, "step": 6790}
{"epoch": 14.59, "learning_rate": 0.0002133620689655172, "loss": 0.1688, "step": 6800}
{"epoch": 14.61, "learning_rate": 0.00021323275862068962, "loss": 0.1767, "step": 6810}
{"epoch": 14.64, "learning_rate": 0.00021310344827586204, "loss": 0.1775, "step": 6820}
{"epoch": 14.66, "learning_rate": 0.00021297413793103446, "loss": 0.1761, "step": 6830}
{"epoch": 14.68, "learning_rate": 0.00021284482758620688, "loss": 0.173, "step": 6840}
{"epoch": 14.7, "learning_rate": 0.0002127155172413793, "loss": 0.1685, "step": 6850}
{"epoch": 14.72, "learning_rate": 0.00021258620689655172, "loss": 0.18, "step": 6860}
{"epoch": 14.74, "learning_rate": 0.00021245689655172414, "loss": 0.1693, "step": 6870}
{"epoch": 14.76, "learning_rate": 0.00021232758620689656, "loss": 0.1698, "step": 6880}
{"epoch": 14.79, "learning_rate": 0.00021219827586206892, "loss": 0.1693, "step": 6890}
{"epoch": 14.81, "learning_rate": 0.00021206896551724134, "loss": 0.1776, "step": 6900}
{"epoch": 14.81, "eval_loss": 0.24745148420333862, "eval_runtime": 664.4524, "eval_samples_per_second": 3.461, "eval_steps_per_second": 0.433, "step": 6900}
{"epoch": 14.83, "learning_rate": 0.00021193965517241376, "loss": 0.1721, "step": 6910}
{"epoch": 14.85, "learning_rate": 0.00021181034482758618, "loss": 0.1668, "step": 6920}
{"epoch": 14.87, "learning_rate": 0.0002116810344827586, "loss": 0.1688, "step": 6930}
{"epoch": 14.89, "learning_rate": 0.00021155172413793102, "loss": 0.1877, "step": 6940}
{"epoch": 14.91, "learning_rate": 0.00021142241379310344, "loss": 0.1643, "step": 6950}
{"epoch": 14.94, "learning_rate": 0.00021129310344827586, "loss": 0.1651, "step": 6960}
{"epoch": 14.96, "learning_rate": 0.00021116379310344823, "loss": 0.1672, "step": 6970}
{"epoch": 14.98, "learning_rate": 0.00021103448275862065, "loss": 0.1697, "step": 6980}
{"epoch": 15.0, "learning_rate": 0.00021090517241379307, "loss": 0.1619, "step": 6990}
{"epoch": 15.02, "learning_rate": 0.0002107758620689655, "loss": 0.1643, "step": 7000}
{"epoch": 15.04, "learning_rate": 0.0002106465517241379, "loss": 0.1576, "step": 7010}
{"epoch": 15.06, "learning_rate": 0.00021051724137931033, "loss": 0.1561, "step": 7020}
{"epoch": 15.09, "learning_rate": 0.00021038793103448275, "loss": 0.1548, "step": 7030}
{"epoch": 15.11, "learning_rate": 0.00021025862068965517, "loss": 0.149, "step": 7040}
{"epoch": 15.13, "learning_rate": 0.0002101293103448276, "loss": 0.1555, "step": 7050}
{"epoch": 15.15, "learning_rate": 0.00020999999999999998, "loss": 0.1436, "step": 7060}
{"epoch": 15.17, "learning_rate": 0.00020987068965517237, "loss": 0.1544, "step": 7070}
{"epoch": 15.19, "learning_rate": 0.0002097413793103448, "loss": 0.1596, "step": 7080}
{"epoch": 15.21, "learning_rate": 0.00020961206896551721, "loss": 0.1535, "step": 7090}
{"epoch": 15.24, "learning_rate": 0.00020948275862068963, "loss": 0.145, "step": 7100}
{"epoch": 15.26, "learning_rate": 0.00020935344827586205, "loss": 0.1606, "step": 7110}
{"epoch": 15.28, "learning_rate": 0.00020922413793103447, "loss": 0.1526, "step": 7120}
{"epoch": 15.3, "learning_rate": 0.0002090948275862069, "loss": 0.1652, "step": 7130}
{"epoch": 15.32, "learning_rate": 0.0002089655172413793, "loss": 0.1654, "step": 7140}
{"epoch": 15.34, "learning_rate": 0.0002088362068965517, "loss": 0.1564, "step": 7150}
{"epoch": 15.36, "learning_rate": 0.0002087068965517241, "loss": 0.1594, "step": 7160}
{"epoch": 15.39, "learning_rate": 0.00020857758620689652, "loss": 0.1665, "step": 7170}
{"epoch": 15.41, "learning_rate": 0.00020844827586206894, "loss": 0.1598, "step": 7180}
{"epoch": 15.43, "learning_rate": 0.00020831896551724136, "loss": 0.1597, "step": 7190}
{"epoch": 15.45, "learning_rate": 0.00020818965517241378, "loss": 0.1645, "step": 7200}
{"epoch": 15.45, "eval_loss": 0.24765853583812714, "eval_runtime": 666.9815, "eval_samples_per_second": 3.448, "eval_steps_per_second": 0.432, "step": 7200}
{"epoch": 15.47, "learning_rate": 0.0002080603448275862, "loss": 0.1484, "step": 7210}
{"epoch": 15.49, "learning_rate": 0.00020793103448275862, "loss": 0.1556, "step": 7220}
{"epoch": 15.52, "learning_rate": 0.000207801724137931, "loss": 0.1516, "step": 7230}
{"epoch": 15.54, "learning_rate": 0.00020767241379310343, "loss": 0.1528, "step": 7240}
{"epoch": 15.56, "learning_rate": 0.00020754310344827583, "loss": 0.1648, "step": 7250}
{"epoch": 15.58, "learning_rate": 0.00020741379310344825, "loss": 0.1543, "step": 7260}
{"epoch": 15.6, "learning_rate": 0.00020728448275862067, "loss": 0.1504, "step": 7270}
{"epoch": 15.62, "learning_rate": 0.00020715517241379309, "loss": 0.1441, "step": 7280}
{"epoch": 15.64, "learning_rate": 0.0002070258620689655, "loss": 0.162, "step": 7290}
{"epoch": 15.67, "learning_rate": 0.00020689655172413793, "loss": 0.1444, "step": 7300}
{"epoch": 15.69, "learning_rate": 0.00020676724137931032, "loss": 0.1478, "step": 7310}
{"epoch": 15.71, "learning_rate": 0.00020663793103448274, "loss": 0.1672, "step": 7320}
{"epoch": 15.73, "learning_rate": 0.00020650862068965516, "loss": 0.155, "step": 7330}
{"epoch": 15.75, "learning_rate": 0.00020637931034482755, "loss": 0.1546, "step": 7340}
{"epoch": 15.77, "learning_rate": 0.00020624999999999997, "loss": 0.1654, "step": 7350}
{"epoch": 15.79, "learning_rate": 0.0002061206896551724, "loss": 0.1686, "step": 7360}
{"epoch": 15.82, "learning_rate": 0.0002059913793103448, "loss": 0.1592, "step": 7370}
{"epoch": 15.84, "learning_rate": 0.00020586206896551723, "loss": 0.1664, "step": 7380}
{"epoch": 15.86, "learning_rate": 0.00020573275862068965, "loss": 0.1559, "step": 7390}
{"epoch": 15.88, "learning_rate": 0.00020560344827586204, "loss": 0.1663, "step": 7400}
{"epoch": 15.9, "learning_rate": 0.00020547413793103446, "loss": 0.1539, "step": 7410}
{"epoch": 15.92, "learning_rate": 0.00020534482758620688, "loss": 0.1556, "step": 7420}
{"epoch": 15.94, "learning_rate": 0.00020521551724137928, "loss": 0.1671, "step": 7430}
{"epoch": 15.97, "learning_rate": 0.0002050862068965517, "loss": 0.1516, "step": 7440}
{"epoch": 15.99, "learning_rate": 0.00020495689655172412, "loss": 0.1608, "step": 7450}
{"epoch": 16.01, "learning_rate": 0.00020482758620689654, "loss": 0.1714, "step": 7460}
{"epoch": 16.03, "learning_rate": 0.00020469827586206896, "loss": 0.1403, "step": 7470}
{"epoch": 16.05, "learning_rate": 0.00020456896551724135, "loss": 0.142, "step": 7480}
{"epoch": 16.07, "learning_rate": 0.00020443965517241377, "loss": 0.1429, "step": 7490}
{"epoch": 16.09, "learning_rate": 0.0002043103448275862, "loss": 0.1586, "step": 7500}
{"epoch": 16.09, "eval_loss": 0.24779050052165985, "eval_runtime": 701.4097, "eval_samples_per_second": 3.279, "eval_steps_per_second": 0.411, "step": 7500}
{"epoch": 16.12, "learning_rate": 0.0002041810344827586, "loss": 0.1495, "step": 7510}
{"epoch": 16.14, "learning_rate": 0.000204051724137931, "loss": 0.141, "step": 7520}
{"epoch": 16.16, "learning_rate": 0.00020392241379310342, "loss": 0.1535, "step": 7530}
{"epoch": 16.18, "learning_rate": 0.00020379310344827584, "loss": 0.1507, "step": 7540}
{"epoch": 16.2, "learning_rate": 0.00020366379310344826, "loss": 0.153, "step": 7550}
{"epoch": 16.22, "learning_rate": 0.00020353448275862068, "loss": 0.1403, "step": 7560}
{"epoch": 16.24, "learning_rate": 0.00020340517241379308, "loss": 0.1719, "step": 7570}
{"epoch": 16.27, "learning_rate": 0.0002032758620689655, "loss": 0.1544, "step": 7580}
{"epoch": 16.29, "learning_rate": 0.00020314655172413792, "loss": 0.1517, "step": 7590}
{"epoch": 16.31, "learning_rate": 0.00020301724137931034, "loss": 0.1559, "step": 7600}
{"epoch": 16.33, "learning_rate": 0.00020288793103448273, "loss": 0.1535, "step": 7610}
{"epoch": 16.35, "learning_rate": 0.00020275862068965515, "loss": 0.1444, "step": 7620}
{"epoch": 16.37, "learning_rate": 0.00020262931034482757, "loss": 0.1718, "step": 7630}
{"epoch": 16.39, "learning_rate": 0.0002025, "loss": 0.1462, "step": 7640}
{"epoch": 16.42, "learning_rate": 0.00020237068965517238, "loss": 0.1567, "step": 7650}
{"epoch": 16.44, "learning_rate": 0.0002022413793103448, "loss": 0.1608, "step": 7660}
{"epoch": 16.46, "learning_rate": 0.00020211206896551722, "loss": 0.1713, "step": 7670}
{"epoch": 16.48, "learning_rate": 0.00020198275862068964, "loss": 0.1564, "step": 7680}
{"epoch": 16.5, "learning_rate": 0.00020185344827586206, "loss": 0.1566, "step": 7690}
{"epoch": 16.52, "learning_rate": 0.00020172413793103448, "loss": 0.1384, "step": 7700}
{"epoch": 16.55, "learning_rate": 0.00020159482758620687, "loss": 0.1522, "step": 7710}
{"epoch": 16.57, "learning_rate": 0.0002014655172413793, "loss": 0.1599, "step": 7720}
{"epoch": 16.59, "learning_rate": 0.00020133620689655171, "loss": 0.1548, "step": 7730}
{"epoch": 16.61, "learning_rate": 0.0002012068965517241, "loss": 0.154, "step": 7740}
{"epoch": 16.63, "learning_rate": 0.00020107758620689653, "loss": 0.1502, "step": 7750}
{"epoch": 16.65, "learning_rate": 0.00020094827586206895, "loss": 0.1554, "step": 7760}
{"epoch": 16.67, "learning_rate": 0.00020081896551724137, "loss": 0.1432, "step": 7770}
{"epoch": 16.7, "learning_rate": 0.0002006896551724138, "loss": 0.1644, "step": 7780}
{"epoch": 16.72, "learning_rate": 0.0002005603448275862, "loss": 0.1601, "step": 7790}
{"epoch": 16.74, "learning_rate": 0.0002004310344827586, "loss": 0.1434, "step": 7800}
{"epoch": 16.74, "eval_loss": 0.24495865404605865, "eval_runtime": 700.6557, "eval_samples_per_second": 3.283, "eval_steps_per_second": 0.411, "step": 7800}
{"epoch": 16.76, "learning_rate": 0.00020030172413793102, "loss": 0.1649, "step": 7810}
{"epoch": 16.78, "learning_rate": 0.0002001724137931034, "loss": 0.156, "step": 7820}
{"epoch": 16.8, "learning_rate": 0.00020004310344827583, "loss": 0.1596, "step": 7830}
{"epoch": 16.82, "learning_rate": 0.00019991379310344825, "loss": 0.1562, "step": 7840}
{"epoch": 16.85, "learning_rate": 0.00019978448275862067, "loss": 0.1445, "step": 7850}
{"epoch": 16.87, "learning_rate": 0.0001996551724137931, "loss": 0.165, "step": 7860}
{"epoch": 16.89, "learning_rate": 0.0001995258620689655, "loss": 0.1407, "step": 7870}
{"epoch": 16.91, "learning_rate": 0.00019939655172413793, "loss": 0.1613, "step": 7880}
{"epoch": 16.93, "learning_rate": 0.00019926724137931033, "loss": 0.1726, "step": 7890}
{"epoch": 16.95, "learning_rate": 0.00019913793103448275, "loss": 0.163, "step": 7900}
{"epoch": 16.97, "learning_rate": 0.00019900862068965514, "loss": 0.1601, "step": 7910}
{"epoch": 17.0, "learning_rate": 0.00019887931034482756, "loss": 0.1667, "step": 7920}
{"epoch": 17.02, "learning_rate": 0.00019874999999999998, "loss": 0.1466, "step": 7930}
{"epoch": 17.04, "learning_rate": 0.0001986206896551724, "loss": 0.1312, "step": 7940}
{"epoch": 17.06, "learning_rate": 0.00019849137931034482, "loss": 0.1368, "step": 7950}
{"epoch": 17.08, "learning_rate": 0.00019836206896551724, "loss": 0.1357, "step": 7960}
{"epoch": 17.1, "learning_rate": 0.00019823275862068966, "loss": 0.1432, "step": 7970}
{"epoch": 17.12, "learning_rate": 0.00019810344827586205, "loss": 0.1354, "step": 7980}
{"epoch": 17.15, "learning_rate": 0.00019797413793103444, "loss": 0.1546, "step": 7990}
{"epoch": 17.17, "learning_rate": 0.00019784482758620686, "loss": 0.1307, "step": 8000}
{"epoch": 17.19, "learning_rate": 0.00019771551724137928, "loss": 0.1379, "step": 8010}
{"epoch": 17.21, "learning_rate": 0.0001975862068965517, "loss": 0.1458, "step": 8020}
{"epoch": 17.23, "learning_rate": 0.00019745689655172412, "loss": 0.1464, "step": 8030}
{"epoch": 17.25, "learning_rate": 0.00019732758620689654, "loss": 0.1579, "step": 8040}
{"epoch": 17.27, "learning_rate": 0.00019719827586206896, "loss": 0.1436, "step": 8050}
{"epoch": 17.3, "learning_rate": 0.00019706896551724138, "loss": 0.1681, "step": 8060}
{"epoch": 17.32, "learning_rate": 0.00019693965517241378, "loss": 0.1513, "step": 8070}
{"epoch": 17.34, "learning_rate": 0.00019681034482758617, "loss": 0.1521, "step": 8080}
{"epoch": 17.36, "learning_rate": 0.0001966810344827586, "loss": 0.1356, "step": 8090}
{"epoch": 17.38, "learning_rate": 0.000196551724137931, "loss": 0.1468, "step": 8100}
{"epoch": 17.38, "eval_loss": 0.24360989034175873, "eval_runtime": 701.7588, "eval_samples_per_second": 3.277, "eval_steps_per_second": 0.41, "step": 8100}
{"epoch": 17.4, "learning_rate": 0.00019642241379310343, "loss": 0.1523, "step": 8110}
{"epoch": 17.42, "learning_rate": 0.00019629310344827585, "loss": 0.1335, "step": 8120}
{"epoch": 17.45, "learning_rate": 0.00019616379310344827, "loss": 0.1431, "step": 8130}
{"epoch": 17.47, "learning_rate": 0.0001960344827586207, "loss": 0.152, "step": 8140}
{"epoch": 17.49, "learning_rate": 0.0001959051724137931, "loss": 0.1443, "step": 8150}
{"epoch": 17.51, "learning_rate": 0.0001957758620689655, "loss": 0.1586, "step": 8160}
{"epoch": 17.53, "learning_rate": 0.0001956465517241379, "loss": 0.1423, "step": 8170}
{"epoch": 17.55, "learning_rate": 0.00019551724137931032, "loss": 0.1314, "step": 8180}
{"epoch": 17.58, "learning_rate": 0.00019538793103448274, "loss": 0.1356, "step": 8190}
{"epoch": 17.6, "learning_rate": 0.00019525862068965516, "loss": 0.1508, "step": 8200}
{"epoch": 17.62, "learning_rate": 0.00019512931034482758, "loss": 0.1655, "step": 8210}
{"epoch": 17.64, "learning_rate": 0.000195, "loss": 0.1443, "step": 8220}
{"epoch": 17.66, "learning_rate": 0.00019487068965517242, "loss": 0.1518, "step": 8230}
{"epoch": 17.68, "learning_rate": 0.00019474137931034484, "loss": 0.1586, "step": 8240}
{"epoch": 17.7, "learning_rate": 0.0001946120689655172, "loss": 0.1459, "step": 8250}
{"epoch": 17.73, "learning_rate": 0.00019448275862068962, "loss": 0.1466, "step": 8260}
{"epoch": 17.75, "learning_rate": 0.00019435344827586204, "loss": 0.1336, "step": 8270}
{"epoch": 17.77, "learning_rate": 0.00019422413793103446, "loss": 0.1522, "step": 8280}
{"epoch": 17.79, "learning_rate": 0.00019409482758620688, "loss": 0.1589, "step": 8290}
{"epoch": 17.81, "learning_rate": 0.0001939655172413793, "loss": 0.1497, "step": 8300}
{"epoch": 17.83, "learning_rate": 0.00019383620689655172, "loss": 0.1363, "step": 8310}
{"epoch": 17.85, "learning_rate": 0.00019370689655172414, "loss": 0.1529, "step": 8320}
{"epoch": 17.88, "learning_rate": 0.00019357758620689656, "loss": 0.1609, "step": 8330}
{"epoch": 17.9, "learning_rate": 0.00019344827586206893, "loss": 0.1446, "step": 8340}
{"epoch": 17.92, "learning_rate": 0.00019331896551724135, "loss": 0.1655, "step": 8350}
{"epoch": 17.94, "learning_rate": 0.00019318965517241377, "loss": 0.1559, "step": 8360}
{"epoch": 17.96, "learning_rate": 0.0001930603448275862, "loss": 0.1672, "step": 8370}
{"epoch": 17.98, "learning_rate": 0.0001929310344827586, "loss": 0.1556, "step": 8380}
{"epoch": 18.0, "learning_rate": 0.00019280172413793103, "loss": 0.1601, "step": 8390}
{"epoch": 18.03, "learning_rate": 0.00019267241379310345, "loss": 0.1474, "step": 8400}
{"epoch": 18.03, "eval_loss": 0.24331876635551453, "eval_runtime": 702.0821, "eval_samples_per_second": 3.276, "eval_steps_per_second": 0.41, "step": 8400}
{"epoch": 18.05, "learning_rate": 0.00019254310344827587, "loss": 0.1252, "step": 8410}
{"epoch": 18.07, "learning_rate": 0.00019241379310344823, "loss": 0.1316, "step": 8420}
{"epoch": 18.09, "learning_rate": 0.00019228448275862065, "loss": 0.1449, "step": 8430}
{"epoch": 18.11, "learning_rate": 0.00019215517241379307, "loss": 0.1487, "step": 8440}
{"epoch": 18.13, "learning_rate": 0.0001920258620689655, "loss": 0.1426, "step": 8450}
{"epoch": 18.15, "learning_rate": 0.00019189655172413791, "loss": 0.1359, "step": 8460}
{"epoch": 18.18, "learning_rate": 0.00019176724137931033, "loss": 0.1491, "step": 8470}
{"epoch": 18.2, "learning_rate": 0.00019163793103448275, "loss": 0.1315, "step": 8480}
{"epoch": 18.22, "learning_rate": 0.00019150862068965517, "loss": 0.1515, "step": 8490}
{"epoch": 18.24, "learning_rate": 0.0001913793103448276, "loss": 0.1311, "step": 8500}
{"epoch": 18.26, "learning_rate": 0.00019124999999999996, "loss": 0.1258, "step": 8510}
{"epoch": 18.28, "learning_rate": 0.00019112068965517238, "loss": 0.1348, "step": 8520}
{"epoch": 18.3, "learning_rate": 0.0001909913793103448, "loss": 0.1434, "step": 8530}
{"epoch": 18.33, "learning_rate": 0.00019086206896551722, "loss": 0.1474, "step": 8540}
{"epoch": 18.35, "learning_rate": 0.00019073275862068964, "loss": 0.1474, "step": 8550}
{"epoch": 18.37, "learning_rate": 0.00019060344827586206, "loss": 0.1537, "step": 8560}
{"epoch": 18.39, "learning_rate": 0.00019047413793103448, "loss": 0.1423, "step": 8570}
{"epoch": 18.41, "learning_rate": 0.0001903448275862069, "loss": 0.1582, "step": 8580}
{"epoch": 18.43, "learning_rate": 0.00019021551724137927, "loss": 0.135, "step": 8590}
{"epoch": 18.45, "learning_rate": 0.00019008620689655169, "loss": 0.137, "step": 8600}
{"epoch": 18.48, "learning_rate": 0.0001899568965517241, "loss": 0.1336, "step": 8610}
{"epoch": 18.5, "learning_rate": 0.00018982758620689653, "loss": 0.1551, "step": 8620}
{"epoch": 18.52, "learning_rate": 0.00018969827586206895, "loss": 0.1365, "step": 8630}
{"epoch": 18.54, "learning_rate": 0.00018956896551724137, "loss": 0.1297, "step": 8640}
{"epoch": 18.56, "learning_rate": 0.00018943965517241379, "loss": 0.144, "step": 8650}
{"epoch": 18.58, "learning_rate": 0.0001893103448275862, "loss": 0.1445, "step": 8660}
{"epoch": 18.61, "learning_rate": 0.00018918103448275863, "loss": 0.1405, "step": 8670}
{"epoch": 18.63, "learning_rate": 0.000189051724137931, "loss": 0.1427, "step": 8680}
{"epoch": 18.65, "learning_rate": 0.0001889224137931034, "loss": 0.1319, "step": 8690}
{"epoch": 18.67, "learning_rate": 0.00018879310344827583, "loss": 0.1425, "step": 8700}
{"epoch": 18.67, "eval_loss": 0.2412930428981781, "eval_runtime": 699.0175, "eval_samples_per_second": 3.29, "eval_steps_per_second": 0.412, "step": 8700}
{"epoch": 18.69, "learning_rate": 0.00018866379310344825, "loss": 0.146, "step": 8710}
{"epoch": 18.71, "learning_rate": 0.00018853448275862067, "loss": 0.1516, "step": 8720}
{"epoch": 18.73, "learning_rate": 0.0001884051724137931, "loss": 0.1346, "step": 8730}
{"epoch": 18.76, "learning_rate": 0.0001882758620689655, "loss": 0.14, "step": 8740}
{"epoch": 18.78, "learning_rate": 0.00018814655172413793, "loss": 0.1342, "step": 8750}
{"epoch": 18.8, "learning_rate": 0.00018801724137931032, "loss": 0.1529, "step": 8760}
{"epoch": 18.82, "learning_rate": 0.00018788793103448274, "loss": 0.1355, "step": 8770}
{"epoch": 18.84, "learning_rate": 0.00018775862068965514, "loss": 0.1333, "step": 8780}
{"epoch": 18.86, "learning_rate": 0.00018762931034482756, "loss": 0.1501, "step": 8790}
{"epoch": 18.88, "learning_rate": 0.00018749999999999998, "loss": 0.1513, "step": 8800}
{"epoch": 18.91, "learning_rate": 0.0001873706896551724, "loss": 0.1459, "step": 8810}
{"epoch": 18.93, "learning_rate": 0.00018724137931034482, "loss": 0.1342, "step": 8820}
{"epoch": 18.95, "learning_rate": 0.00018711206896551724, "loss": 0.1409, "step": 8830}
{"epoch": 18.97, "learning_rate": 0.00018698275862068966, "loss": 0.1373, "step": 8840}
{"epoch": 18.99, "learning_rate": 0.00018685344827586205, "loss": 0.1442, "step": 8850}
{"epoch": 19.01, "learning_rate": 0.00018672413793103447, "loss": 0.1359, "step": 8860}
{"epoch": 19.03, "learning_rate": 0.00018659482758620686, "loss": 0.1371, "step": 8870}
{"epoch": 19.06, "learning_rate": 0.00018646551724137928, "loss": 0.124, "step": 8880}
{"epoch": 19.08, "learning_rate": 0.0001863362068965517, "loss": 0.1314, "step": 8890}
{"epoch": 19.1, "learning_rate": 0.00018620689655172412, "loss": 0.1369, "step": 8900}
{"epoch": 19.12, "learning_rate": 0.00018607758620689654, "loss": 0.1416, "step": 8910}
{"epoch": 19.14, "learning_rate": 0.00018594827586206896, "loss": 0.1432, "step": 8920}
{"epoch": 19.16, "learning_rate": 0.00018581896551724136, "loss": 0.1359, "step": 8930}
{"epoch": 19.18, "learning_rate": 0.00018568965517241378, "loss": 0.1262, "step": 8940}
{"epoch": 19.21, "learning_rate": 0.0001855603448275862, "loss": 0.1113, "step": 8950}
{"epoch": 19.23, "learning_rate": 0.0001854310344827586, "loss": 0.1287, "step": 8960}
{"epoch": 19.25, "learning_rate": 0.000185301724137931, "loss": 0.1329, "step": 8970}
{"epoch": 19.27, "learning_rate": 0.00018517241379310343, "loss": 0.1245, "step": 8980}
{"epoch": 19.29, "learning_rate": 0.00018504310344827585, "loss": 0.1259, "step": 8990}
{"epoch": 19.31, "learning_rate": 0.00018491379310344827, "loss": 0.1346, "step": 9000}
{"epoch": 19.31, "eval_loss": 0.24312053620815277, "eval_runtime": 698.4404, "eval_samples_per_second": 3.293, "eval_steps_per_second": 0.412, "step": 9000}
{"epoch": 19.33, "learning_rate": 0.0001847844827586207, "loss": 0.1292, "step": 9010}
{"epoch": 19.36, "learning_rate": 0.00018465517241379308, "loss": 0.1301, "step": 9020}
{"epoch": 19.38, "learning_rate": 0.0001845258620689655, "loss": 0.1402, "step": 9030}
{"epoch": 19.4, "learning_rate": 0.00018439655172413792, "loss": 0.1383, "step": 9040}
{"epoch": 19.42, "learning_rate": 0.00018426724137931031, "loss": 0.1343, "step": 9050}
{"epoch": 19.44, "learning_rate": 0.00018413793103448273, "loss": 0.1299, "step": 9060}
{"epoch": 19.46, "learning_rate": 0.00018400862068965515, "loss": 0.1326, "step": 9070}
{"epoch": 19.48, "learning_rate": 0.00018387931034482757, "loss": 0.1439, "step": 9080}
{"epoch": 19.51, "learning_rate": 0.00018375, "loss": 0.1357, "step": 9090}
{"epoch": 19.53, "learning_rate": 0.0001836206896551724, "loss": 0.1316, "step": 9100}
{"epoch": 19.55, "learning_rate": 0.0001834913793103448, "loss": 0.1471, "step": 9110}
{"epoch": 19.57, "learning_rate": 0.00018336206896551723, "loss": 0.137, "step": 9120}
{"epoch": 19.59, "learning_rate": 0.00018323275862068965, "loss": 0.1302, "step": 9130}
{"epoch": 19.61, "learning_rate": 0.00018310344827586204, "loss": 0.146, "step": 9140}
{"epoch": 19.64, "learning_rate": 0.00018297413793103446, "loss": 0.1427, "step": 9150}
{"epoch": 19.66, "learning_rate": 0.00018284482758620688, "loss": 0.1431, "step": 9160}
{"epoch": 19.68, "learning_rate": 0.0001827155172413793, "loss": 0.1535, "step": 9170}
{"epoch": 19.7, "learning_rate": 0.00018258620689655172, "loss": 0.1573, "step": 9180}
{"epoch": 19.72, "learning_rate": 0.0001824568965517241, "loss": 0.1412, "step": 9190}
{"epoch": 19.74, "learning_rate": 0.00018232758620689653, "loss": 0.1377, "step": 9200}
{"epoch": 19.76, "learning_rate": 0.00018219827586206895, "loss": 0.137, "step": 9210}
{"epoch": 19.79, "learning_rate": 0.00018206896551724137, "loss": 0.1385, "step": 9220}
{"epoch": 19.81, "learning_rate": 0.00018193965517241377, "loss": 0.1224, "step": 9230}
{"epoch": 19.83, "learning_rate": 0.00018181034482758619, "loss": 0.1443, "step": 9240}
{"epoch": 19.85, "learning_rate": 0.0001816810344827586, "loss": 0.1326, "step": 9250}
{"epoch": 19.87, "learning_rate": 0.00018155172413793103, "loss": 0.1249, "step": 9260}
{"epoch": 19.89, "learning_rate": 0.00018142241379310342, "loss": 0.1447, "step": 9270}
{"epoch": 19.91, "learning_rate": 0.00018129310344827584, "loss": 0.1464, "step": 9280}
{"epoch": 19.94, "learning_rate": 0.00018116379310344826, "loss": 0.1273, "step": 9290}
{"epoch": 19.96, "learning_rate": 0.00018103448275862068, "loss": 0.1485, "step": 9300}
{"epoch": 19.96, "eval_loss": 0.240739107131958, "eval_runtime": 698.1438, "eval_samples_per_second": 3.294, "eval_steps_per_second": 0.413, "step": 9300}
{"epoch": 19.98, "learning_rate": 0.0001809051724137931, "loss": 0.1484, "step": 9310}
{"epoch": 20.0, "learning_rate": 0.0001807758620689655, "loss": 0.1398, "step": 9320}
{"epoch": 20.02, "learning_rate": 0.0001806465517241379, "loss": 0.1239, "step": 9330}
{"epoch": 20.04, "learning_rate": 0.00018051724137931033, "loss": 0.1296, "step": 9340}
{"epoch": 20.06, "learning_rate": 0.00018038793103448275, "loss": 0.122, "step": 9350}
{"epoch": 20.09, "learning_rate": 0.00018025862068965514, "loss": 0.1353, "step": 9360}
{"epoch": 20.11, "learning_rate": 0.00018012931034482756, "loss": 0.1152, "step": 9370}
{"epoch": 20.13, "learning_rate": 0.00017999999999999998, "loss": 0.1367, "step": 9380}
{"epoch": 20.15, "learning_rate": 0.0001798706896551724, "loss": 0.1367, "step": 9390}
{"epoch": 20.17, "learning_rate": 0.00017974137931034482, "loss": 0.1274, "step": 9400}
{"epoch": 20.19, "learning_rate": 0.00017961206896551724, "loss": 0.1285, "step": 9410}
{"epoch": 20.21, "learning_rate": 0.00017948275862068964, "loss": 0.1322, "step": 9420}
{"epoch": 20.24, "learning_rate": 0.00017935344827586206, "loss": 0.1453, "step": 9430}
{"epoch": 20.26, "learning_rate": 0.00017922413793103445, "loss": 0.1247, "step": 9440}
{"epoch": 20.28, "learning_rate": 0.00017909482758620687, "loss": 0.126, "step": 9450}
{"epoch": 20.3, "learning_rate": 0.0001789655172413793, "loss": 0.1279, "step": 9460}
{"epoch": 20.32, "learning_rate": 0.0001788362068965517, "loss": 0.1296, "step": 9470}
{"epoch": 20.34, "learning_rate": 0.00017870689655172413, "loss": 0.1399, "step": 9480}
{"epoch": 20.36, "learning_rate": 0.00017857758620689655, "loss": 0.1395, "step": 9490}
{"epoch": 20.39, "learning_rate": 0.00017844827586206897, "loss": 0.1307, "step": 9500}
{"epoch": 20.41, "learning_rate": 0.00017831896551724136, "loss": 0.1346, "step": 9510}
{"epoch": 20.43, "learning_rate": 0.00017818965517241378, "loss": 0.1197, "step": 9520}
{"epoch": 20.45, "learning_rate": 0.00017806034482758618, "loss": 0.1264, "step": 9530}
{"epoch": 20.47, "learning_rate": 0.0001779310344827586, "loss": 0.1307, "step": 9540}
{"epoch": 20.49, "learning_rate": 0.00017780172413793102, "loss": 0.1241, "step": 9550}
{"epoch": 20.52, "learning_rate": 0.00017767241379310344, "loss": 0.142, "step": 9560}
{"epoch": 20.54, "learning_rate": 0.00017754310344827586, "loss": 0.1208, "step": 9570}
{"epoch": 20.56, "learning_rate": 0.00017741379310344828, "loss": 0.1388, "step": 9580}
{"epoch": 20.58, "learning_rate": 0.0001772844827586207, "loss": 0.1401, "step": 9590}
{"epoch": 20.6, "learning_rate": 0.0001771551724137931, "loss": 0.1369, "step": 9600}
{"epoch": 20.6, "eval_loss": 0.2398538887500763, "eval_runtime": 728.8049, "eval_samples_per_second": 3.156, "eval_steps_per_second": 0.395, "step": 9600}
{"epoch": 19.86, "learning_rate": 0.00010214979195561719, "loss": 0.166, "step": 9610}
{"epoch": 19.88, "learning_rate": 0.00010194174757281553, "loss": 0.166, "step": 9620}
{"epoch": 19.9, "learning_rate": 0.00010173370319001386, "loss": 0.1616, "step": 9630}
{"epoch": 19.92, "learning_rate": 0.00010154646324549237, "loss": 0.1703, "step": 9640}
{"epoch": 19.94, "learning_rate": 0.0001013384188626907, "loss": 0.1638, "step": 9650}
{"epoch": 19.96, "learning_rate": 0.00010113037447988902, "loss": 0.1641, "step": 9660}
{"epoch": 19.98, "learning_rate": 0.00010092233009708737, "loss": 0.1628, "step": 9670}
{"epoch": 20.0, "learning_rate": 0.0001007142857142857, "loss": 0.1576, "step": 9680}
{"epoch": 20.02, "learning_rate": 0.00010050624133148403, "loss": 0.1587, "step": 9690}
{"epoch": 20.04, "learning_rate": 0.00010029819694868237, "loss": 0.1625, "step": 9700}
{"epoch": 20.06, "learning_rate": 0.0001000901525658807, "loss": 0.1456, "step": 9710}
{"epoch": 20.08, "learning_rate": 9.988210818307904e-05, "loss": 0.1658, "step": 9720}
{"epoch": 20.1, "learning_rate": 9.967406380027738e-05, "loss": 0.1501, "step": 9730}
{"epoch": 20.12, "learning_rate": 9.946601941747571e-05, "loss": 0.1608, "step": 9740}
{"epoch": 20.14, "learning_rate": 9.925797503467404e-05, "loss": 0.1573, "step": 9750}
{"epoch": 20.17, "learning_rate": 9.904993065187239e-05, "loss": 0.1685, "step": 9760}
{"epoch": 20.19, "learning_rate": 9.884188626907072e-05, "loss": 0.1479, "step": 9770}
{"epoch": 20.21, "learning_rate": 9.863384188626906e-05, "loss": 0.1452, "step": 9780}
{"epoch": 20.23, "learning_rate": 9.842579750346739e-05, "loss": 0.1647, "step": 9790}
{"epoch": 20.25, "learning_rate": 9.821775312066572e-05, "loss": 0.1735, "step": 9800}
{"epoch": 20.27, "learning_rate": 9.800970873786407e-05, "loss": 0.1359, "step": 9810}
{"epoch": 20.29, "learning_rate": 9.78016643550624e-05, "loss": 0.1716, "step": 9820}
{"epoch": 20.31, "learning_rate": 9.759361997226073e-05, "loss": 0.162, "step": 9830}
{"epoch": 20.33, "learning_rate": 9.738557558945907e-05, "loss": 0.1539, "step": 9840}
{"epoch": 20.35, "learning_rate": 9.71775312066574e-05, "loss": 0.1608, "step": 9850}
{"epoch": 20.37, "learning_rate": 9.696948682385574e-05, "loss": 0.1459, "step": 9860}
{"epoch": 20.39, "learning_rate": 9.676144244105408e-05, "loss": 0.1717, "step": 9870}
{"epoch": 20.41, "learning_rate": 9.655339805825241e-05, "loss": 0.1604, "step": 9880}
{"epoch": 20.43, "learning_rate": 9.634535367545074e-05, "loss": 0.1616, "step": 9890}
{"epoch": 20.45, "learning_rate": 9.613730929264909e-05, "loss": 0.1675, "step": 9900}
{"epoch": 20.45, "eval_loss": 0.15455523133277893, "eval_runtime": 743.7243, "eval_samples_per_second": 3.093, "eval_steps_per_second": 0.387, "step": 9900}
{"epoch": 20.48, "learning_rate": 9.592926490984742e-05, "loss": 0.1781, "step": 9910}
{"epoch": 20.5, "learning_rate": 9.572122052704575e-05, "loss": 0.1589, "step": 9920}
{"epoch": 20.52, "learning_rate": 9.55131761442441e-05, "loss": 0.1523, "step": 9930}
{"epoch": 20.54, "learning_rate": 9.530513176144242e-05, "loss": 0.1637, "step": 9940}
{"epoch": 20.56, "learning_rate": 9.509708737864077e-05, "loss": 0.172, "step": 9950}
{"epoch": 20.58, "learning_rate": 9.48890429958391e-05, "loss": 0.1535, "step": 9960}
{"epoch": 20.6, "learning_rate": 9.468099861303743e-05, "loss": 0.164, "step": 9970}
{"epoch": 20.62, "learning_rate": 9.447295423023578e-05, "loss": 0.1677, "step": 9980}
{"epoch": 20.64, "learning_rate": 9.42649098474341e-05, "loss": 0.1477, "step": 9990}
{"epoch": 20.66, "learning_rate": 9.405686546463244e-05, "loss": 0.1668, "step": 10000}
{"epoch": 20.68, "learning_rate": 9.384882108183078e-05, "loss": 0.1716, "step": 10010}
{"epoch": 20.7, "learning_rate": 9.364077669902911e-05, "loss": 0.1449, "step": 10020}
{"epoch": 20.72, "learning_rate": 9.343273231622744e-05, "loss": 0.155, "step": 10030}
{"epoch": 20.74, "learning_rate": 9.322468793342579e-05, "loss": 0.168, "step": 10040}
{"epoch": 20.76, "learning_rate": 9.301664355062412e-05, "loss": 0.1534, "step": 10050}
{"epoch": 20.79, "learning_rate": 9.280859916782245e-05, "loss": 0.1643, "step": 10060}
{"epoch": 20.81, "learning_rate": 9.26005547850208e-05, "loss": 0.1521, "step": 10070}
{"epoch": 20.83, "learning_rate": 9.239251040221913e-05, "loss": 0.1633, "step": 10080}
{"epoch": 20.85, "learning_rate": 9.218446601941747e-05, "loss": 0.171, "step": 10090}
{"epoch": 20.87, "learning_rate": 9.19764216366158e-05, "loss": 0.1595, "step": 10100}
{"epoch": 20.89, "learning_rate": 9.176837725381413e-05, "loss": 0.1702, "step": 10110}
{"epoch": 20.91, "learning_rate": 9.156033287101248e-05, "loss": 0.1668, "step": 10120}
{"epoch": 20.93, "learning_rate": 9.135228848821081e-05, "loss": 0.154, "step": 10130}
{"epoch": 20.95, "learning_rate": 9.114424410540914e-05, "loss": 0.1453, "step": 10140}
{"epoch": 20.97, "learning_rate": 9.093619972260748e-05, "loss": 0.1741, "step": 10150}
{"epoch": 20.99, "learning_rate": 9.072815533980581e-05, "loss": 0.1411, "step": 10160}
{"epoch": 21.01, "learning_rate": 9.052011095700414e-05, "loss": 0.1547, "step": 10170}
{"epoch": 21.03, "learning_rate": 9.031206657420249e-05, "loss": 0.1463, "step": 10180}
{"epoch": 21.05, "learning_rate": 9.010402219140082e-05, "loss": 0.1493, "step": 10190}
{"epoch": 21.07, "learning_rate": 8.989597780859915e-05, "loss": 0.1367, "step": 10200}
{"epoch": 21.07, "eval_loss": 0.15381494164466858, "eval_runtime": 740.9624, "eval_samples_per_second": 3.104, "eval_steps_per_second": 0.389, "step": 10200}
{"epoch": 21.1, "learning_rate": 8.96879334257975e-05, "loss": 0.1416, "step": 10210}
{"epoch": 21.12, "learning_rate": 8.947988904299583e-05, "loss": 0.1515, "step": 10220}
{"epoch": 21.14, "learning_rate": 8.927184466019416e-05, "loss": 0.1462, "step": 10230}
{"epoch": 21.16, "learning_rate": 8.90638002773925e-05, "loss": 0.1494, "step": 10240}
{"epoch": 21.18, "learning_rate": 8.885575589459083e-05, "loss": 0.1455, "step": 10250}
{"epoch": 21.2, "learning_rate": 8.864771151178918e-05, "loss": 0.1469, "step": 10260}
{"epoch": 21.22, "learning_rate": 8.843966712898751e-05, "loss": 0.1534, "step": 10270}
{"epoch": 21.24, "learning_rate": 8.823162274618584e-05, "loss": 0.149, "step": 10280}
{"epoch": 21.26, "learning_rate": 8.802357836338418e-05, "loss": 0.147, "step": 10290}
{"epoch": 21.28, "learning_rate": 8.781553398058251e-05, "loss": 0.1572, "step": 10300}
{"epoch": 21.3, "learning_rate": 8.760748959778085e-05, "loss": 0.1568, "step": 10310}
{"epoch": 21.32, "learning_rate": 8.739944521497919e-05, "loss": 0.1356, "step": 10320}
{"epoch": 21.34, "learning_rate": 8.719140083217752e-05, "loss": 0.1561, "step": 10330}
{"epoch": 21.36, "learning_rate": 8.698335644937585e-05, "loss": 0.1552, "step": 10340}
{"epoch": 21.38, "learning_rate": 8.67753120665742e-05, "loss": 0.1303, "step": 10350}
{"epoch": 21.4, "learning_rate": 8.656726768377253e-05, "loss": 0.1426, "step": 10360}
{"epoch": 21.43, "learning_rate": 8.635922330097086e-05, "loss": 0.1508, "step": 10370}
{"epoch": 21.45, "learning_rate": 8.61511789181692e-05, "loss": 0.1579, "step": 10380}
{"epoch": 21.47, "learning_rate": 8.594313453536753e-05, "loss": 0.158, "step": 10390}
{"epoch": 21.49, "learning_rate": 8.573509015256588e-05, "loss": 0.1384, "step": 10400}
{"epoch": 21.51, "learning_rate": 8.552704576976421e-05, "loss": 0.144, "step": 10410}
{"epoch": 21.53, "learning_rate": 8.531900138696254e-05, "loss": 0.1507, "step": 10420}
{"epoch": 21.55, "learning_rate": 8.511095700416088e-05, "loss": 0.1306, "step": 10430}
{"epoch": 21.57, "learning_rate": 8.490291262135922e-05, "loss": 0.1431, "step": 10440}
{"epoch": 21.59, "learning_rate": 8.469486823855755e-05, "loss": 0.1594, "step": 10450}
{"epoch": 21.61, "learning_rate": 8.448682385575589e-05, "loss": 0.1491, "step": 10460}
{"epoch": 21.63, "learning_rate": 8.427877947295422e-05, "loss": 0.1569, "step": 10470}
{"epoch": 21.65, "learning_rate": 8.407073509015255e-05, "loss": 0.1555, "step": 10480}
{"epoch": 21.67, "learning_rate": 8.38626907073509e-05, "loss": 0.151, "step": 10490}
{"epoch": 21.69, "learning_rate": 8.365464632454923e-05, "loss": 0.151, "step": 10500}
{"epoch": 21.69, "eval_loss": 0.1540122926235199, "eval_runtime": 735.6052, "eval_samples_per_second": 3.127, "eval_steps_per_second": 0.392, "step": 10500}
{"epoch": 21.71, "learning_rate": 8.344660194174756e-05, "loss": 0.1636, "step": 10510}
{"epoch": 21.74, "learning_rate": 8.32385575589459e-05, "loss": 0.1541, "step": 10520}
{"epoch": 21.76, "learning_rate": 8.303051317614423e-05, "loss": 0.151, "step": 10530}
{"epoch": 21.78, "learning_rate": 8.282246879334257e-05, "loss": 0.15, "step": 10540}
{"epoch": 21.8, "learning_rate": 8.261442441054091e-05, "loss": 0.1657, "step": 10550}
{"epoch": 21.82, "learning_rate": 8.240638002773924e-05, "loss": 0.1579, "step": 10560}
{"epoch": 21.84, "learning_rate": 8.219833564493759e-05, "loss": 0.1518, "step": 10570}
{"epoch": 21.86, "learning_rate": 8.199029126213592e-05, "loss": 0.1618, "step": 10580}
{"epoch": 21.88, "learning_rate": 8.178224687933425e-05, "loss": 0.1429, "step": 10590}
{"epoch": 21.9, "learning_rate": 8.157420249653259e-05, "loss": 0.1567, "step": 10600}
{"epoch": 21.92, "learning_rate": 8.136615811373092e-05, "loss": 0.1651, "step": 10610}
{"epoch": 21.94, "learning_rate": 8.115811373092925e-05, "loss": 0.1444, "step": 10620}
{"epoch": 21.96, "learning_rate": 8.09500693481276e-05, "loss": 0.1577, "step": 10630}
{"epoch": 21.98, "learning_rate": 8.074202496532593e-05, "loss": 0.1619, "step": 10640}
{"epoch": 22.0, "learning_rate": 8.053398058252426e-05, "loss": 0.1604, "step": 10650}
{"epoch": 22.02, "learning_rate": 8.03259361997226e-05, "loss": 0.134, "step": 10660}
{"epoch": 22.05, "learning_rate": 8.011789181692094e-05, "loss": 0.136, "step": 10670}
{"epoch": 22.07, "learning_rate": 7.990984743411927e-05, "loss": 0.1361, "step": 10680}
{"epoch": 22.09, "learning_rate": 7.970180305131761e-05, "loss": 0.1408, "step": 10690}
{"epoch": 22.11, "learning_rate": 7.949375866851594e-05, "loss": 0.1569, "step": 10700}
{"epoch": 22.13, "learning_rate": 7.928571428571429e-05, "loss": 0.1358, "step": 10710}
{"epoch": 22.15, "learning_rate": 7.907766990291262e-05, "loss": 0.1465, "step": 10720}
{"epoch": 22.17, "learning_rate": 7.886962552011095e-05, "loss": 0.1525, "step": 10730}
{"epoch": 22.19, "learning_rate": 7.866158113730929e-05, "loss": 0.1475, "step": 10740}
{"epoch": 22.21, "learning_rate": 7.845353675450762e-05, "loss": 0.1459, "step": 10750}
{"epoch": 22.23, "learning_rate": 7.824549237170595e-05, "loss": 0.1284, "step": 10760}
{"epoch": 22.25, "learning_rate": 7.80374479889043e-05, "loss": 0.1454, "step": 10770}
{"epoch": 22.27, "learning_rate": 7.782940360610263e-05, "loss": 0.1446, "step": 10780}
{"epoch": 22.29, "learning_rate": 7.762135922330096e-05, "loss": 0.1226, "step": 10790}
{"epoch": 22.31, "learning_rate": 7.74133148404993e-05, "loss": 0.1578, "step": 10800}
{"epoch": 22.31, "eval_loss": 0.15434832870960236, "eval_runtime": 702.64, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.41, "step": 10800}
{"epoch": 22.33, "learning_rate": 7.720527045769764e-05, "loss": 0.1344, "step": 10810}
{"epoch": 22.36, "learning_rate": 7.699722607489597e-05, "loss": 0.1439, "step": 10820}
{"epoch": 22.38, "learning_rate": 7.678918169209431e-05, "loss": 0.1488, "step": 10830}
{"epoch": 22.4, "learning_rate": 7.658113730929264e-05, "loss": 0.1349, "step": 10840}
{"epoch": 22.42, "learning_rate": 7.637309292649099e-05, "loss": 0.1383, "step": 10850}
{"epoch": 22.44, "learning_rate": 7.616504854368932e-05, "loss": 0.1632, "step": 10860}
{"epoch": 22.46, "learning_rate": 7.595700416088765e-05, "loss": 0.1475, "step": 10870}
{"epoch": 22.48, "learning_rate": 7.5748959778086e-05, "loss": 0.1422, "step": 10880}
{"epoch": 22.5, "learning_rate": 7.554091539528432e-05, "loss": 0.1278, "step": 10890}
{"epoch": 22.52, "learning_rate": 7.533287101248266e-05, "loss": 0.1516, "step": 10900}
{"epoch": 22.54, "learning_rate": 7.5124826629681e-05, "loss": 0.1319, "step": 10910}
{"epoch": 22.56, "learning_rate": 7.491678224687933e-05, "loss": 0.1384, "step": 10920}
{"epoch": 22.58, "learning_rate": 7.470873786407766e-05, "loss": 0.1406, "step": 10930}
{"epoch": 22.6, "learning_rate": 7.4500693481276e-05, "loss": 0.1507, "step": 10940}
{"epoch": 22.62, "learning_rate": 7.429264909847434e-05, "loss": 0.1422, "step": 10950}
{"epoch": 22.64, "learning_rate": 7.408460471567267e-05, "loss": 0.1207, "step": 10960}
{"epoch": 22.67, "learning_rate": 7.387656033287101e-05, "loss": 0.1567, "step": 10970}
{"epoch": 22.69, "learning_rate": 7.366851595006934e-05, "loss": 0.1491, "step": 10980}
{"epoch": 22.71, "learning_rate": 7.346047156726767e-05, "loss": 0.1488, "step": 10990}
{"epoch": 22.73, "learning_rate": 7.325242718446602e-05, "loss": 0.1335, "step": 11000}
{"epoch": 22.75, "learning_rate": 7.304438280166435e-05, "loss": 0.1365, "step": 11010}
{"epoch": 22.77, "learning_rate": 7.283633841886268e-05, "loss": 0.1308, "step": 11020}
{"epoch": 22.79, "learning_rate": 7.262829403606101e-05, "loss": 0.1382, "step": 11030}
{"epoch": 22.81, "learning_rate": 7.242024965325936e-05, "loss": 0.1569, "step": 11040}
{"epoch": 22.83, "learning_rate": 7.221220527045769e-05, "loss": 0.152, "step": 11050}
{"epoch": 22.85, "learning_rate": 7.200416088765602e-05, "loss": 0.1397, "step": 11060}
{"epoch": 22.87, "learning_rate": 7.179611650485436e-05, "loss": 0.1477, "step": 11070}
{"epoch": 22.89, "learning_rate": 7.15880721220527e-05, "loss": 0.1468, "step": 11080}
{"epoch": 22.91, "learning_rate": 7.138002773925103e-05, "loss": 0.1516, "step": 11090}
{"epoch": 22.93, "learning_rate": 7.117198335644937e-05, "loss": 0.1452, "step": 11100}
{"epoch": 22.93, "eval_loss": 0.1549062728881836, "eval_runtime": 746.6366, "eval_samples_per_second": 3.08, "eval_steps_per_second": 0.386, "step": 11100}
{"epoch": 22.95, "learning_rate": 7.09639389736477e-05, "loss": 0.1647, "step": 11110}
{"epoch": 22.98, "learning_rate": 7.075589459084603e-05, "loss": 0.1606, "step": 11120}
{"epoch": 23.0, "learning_rate": 7.054785020804438e-05, "loss": 0.1401, "step": 11130}
{"epoch": 23.02, "learning_rate": 7.033980582524271e-05, "loss": 0.133, "step": 11140}
{"epoch": 23.04, "learning_rate": 7.013176144244105e-05, "loss": 0.1403, "step": 11150}
{"epoch": 23.06, "learning_rate": 6.992371705963938e-05, "loss": 0.1425, "step": 11160}
{"epoch": 23.08, "learning_rate": 6.971567267683771e-05, "loss": 0.1218, "step": 11170}
{"epoch": 23.1, "learning_rate": 6.950762829403606e-05, "loss": 0.1421, "step": 11180}
{"epoch": 23.12, "learning_rate": 6.929958391123439e-05, "loss": 0.1313, "step": 11190}
{"epoch": 23.14, "learning_rate": 6.909153952843272e-05, "loss": 0.1318, "step": 11200}
{"epoch": 23.16, "learning_rate": 6.888349514563106e-05, "loss": 0.1473, "step": 11210}
{"epoch": 23.18, "learning_rate": 6.86754507628294e-05, "loss": 0.1438, "step": 11220}
{"epoch": 23.2, "learning_rate": 6.846740638002773e-05, "loss": 0.132, "step": 11230}
{"epoch": 23.22, "learning_rate": 6.825936199722607e-05, "loss": 0.1359, "step": 11240}
{"epoch": 23.24, "learning_rate": 6.80513176144244e-05, "loss": 0.1293, "step": 11250}
{"epoch": 23.26, "learning_rate": 6.784327323162273e-05, "loss": 0.1446, "step": 11260}
{"epoch": 23.29, "learning_rate": 6.763522884882108e-05, "loss": 0.1396, "step": 11270}
{"epoch": 23.31, "learning_rate": 6.742718446601941e-05, "loss": 0.1356, "step": 11280}
{"epoch": 23.33, "learning_rate": 6.721914008321775e-05, "loss": 0.1373, "step": 11290}
{"epoch": 23.35, "learning_rate": 6.701109570041608e-05, "loss": 0.1393, "step": 11300}
{"epoch": 23.37, "learning_rate": 6.680305131761441e-05, "loss": 0.1414, "step": 11310}
{"epoch": 23.39, "learning_rate": 6.659500693481276e-05, "loss": 0.1362, "step": 11320}
{"epoch": 23.41, "learning_rate": 6.638696255201109e-05, "loss": 0.1426, "step": 11330}
{"epoch": 23.43, "learning_rate": 6.617891816920942e-05, "loss": 0.1256, "step": 11340}
{"epoch": 23.45, "learning_rate": 6.597087378640777e-05, "loss": 0.143, "step": 11350}
{"epoch": 23.47, "learning_rate": 6.57628294036061e-05, "loss": 0.1344, "step": 11360}
{"epoch": 23.49, "learning_rate": 6.555478502080443e-05, "loss": 0.1317, "step": 11370}
{"epoch": 23.51, "learning_rate": 6.534674063800277e-05, "loss": 0.1276, "step": 11380}
{"epoch": 23.53, "learning_rate": 6.51386962552011e-05, "loss": 0.1466, "step": 11390}
{"epoch": 23.55, "learning_rate": 6.493065187239943e-05, "loss": 0.1427, "step": 11400}
{"epoch": 23.55, "eval_loss": 0.1545754373073578, "eval_runtime": 732.5279, "eval_samples_per_second": 3.14, "eval_steps_per_second": 0.393, "step": 11400}
{"epoch": 23.57, "learning_rate": 6.472260748959778e-05, "loss": 0.1445, "step": 11410}
{"epoch": 23.6, "learning_rate": 6.451456310679611e-05, "loss": 0.1339, "step": 11420}
{"epoch": 23.62, "learning_rate": 6.430651872399444e-05, "loss": 0.1383, "step": 11430}
{"epoch": 23.64, "learning_rate": 6.409847434119278e-05, "loss": 0.1387, "step": 11440}
{"epoch": 23.66, "learning_rate": 6.389042995839112e-05, "loss": 0.1362, "step": 11450}
{"epoch": 23.68, "learning_rate": 6.368238557558946e-05, "loss": 0.1467, "step": 11460}
{"epoch": 23.7, "learning_rate": 6.347434119278779e-05, "loss": 0.1401, "step": 11470}
{"epoch": 23.72, "learning_rate": 6.326629680998612e-05, "loss": 0.1439, "step": 11480}
{"epoch": 23.74, "learning_rate": 6.305825242718447e-05, "loss": 0.1465, "step": 11490}
{"epoch": 23.76, "learning_rate": 6.28502080443828e-05, "loss": 0.1594, "step": 11500}
{"epoch": 23.78, "learning_rate": 6.264216366158113e-05, "loss": 0.1508, "step": 11510}
{"epoch": 23.8, "learning_rate": 6.243411927877947e-05, "loss": 0.1474, "step": 11520}
{"epoch": 23.82, "learning_rate": 6.22260748959778e-05, "loss": 0.1485, "step": 11530}
{"epoch": 23.84, "learning_rate": 6.201803051317613e-05, "loss": 0.1403, "step": 11540}
{"epoch": 23.86, "learning_rate": 6.180998613037448e-05, "loss": 0.1346, "step": 11550}
{"epoch": 23.88, "learning_rate": 6.160194174757281e-05, "loss": 0.1393, "step": 11560}
{"epoch": 23.9, "learning_rate": 6.139389736477114e-05, "loss": 0.124, "step": 11570}
{"epoch": 23.93, "learning_rate": 6.118585298196949e-05, "loss": 0.1674, "step": 11580}
{"epoch": 23.95, "learning_rate": 6.0977808599167816e-05, "loss": 0.1447, "step": 11590}
{"epoch": 23.97, "learning_rate": 6.0769764216366154e-05, "loss": 0.1496, "step": 11600}
{"epoch": 23.99, "learning_rate": 6.056171983356449e-05, "loss": 0.1442, "step": 11610}
{"epoch": 24.01, "learning_rate": 6.035367545076282e-05, "loss": 0.135, "step": 11620}
{"epoch": 24.03, "learning_rate": 6.014563106796116e-05, "loss": 0.1387, "step": 11630}
{"epoch": 24.05, "learning_rate": 5.99375866851595e-05, "loss": 0.1264, "step": 11640}
{"epoch": 24.07, "learning_rate": 5.9729542302357836e-05, "loss": 0.1334, "step": 11650}
{"epoch": 24.09, "learning_rate": 5.952149791955617e-05, "loss": 0.1366, "step": 11660}
{"epoch": 24.11, "learning_rate": 5.9313453536754504e-05, "loss": 0.115, "step": 11670}
{"epoch": 24.13, "learning_rate": 5.910540915395284e-05, "loss": 0.1389, "step": 11680}
{"epoch": 24.15, "learning_rate": 5.889736477115117e-05, "loss": 0.1311, "step": 11690}
{"epoch": 24.17, "learning_rate": 5.868932038834951e-05, "loss": 0.1277, "step": 11700}
{"epoch": 24.17, "eval_loss": 0.15491804480552673, "eval_runtime": 734.8657, "eval_samples_per_second": 3.13, "eval_steps_per_second": 0.392, "step": 11700}
{"epoch": 24.19, "learning_rate": 5.848127600554785e-05, "loss": 0.129, "step": 11710}
{"epoch": 24.21, "learning_rate": 5.8273231622746186e-05, "loss": 0.1401, "step": 11720}
{"epoch": 24.24, "learning_rate": 5.806518723994452e-05, "loss": 0.1375, "step": 11730}
{"epoch": 24.26, "learning_rate": 5.7857142857142855e-05, "loss": 0.1431, "step": 11740}
{"epoch": 24.28, "learning_rate": 5.764909847434119e-05, "loss": 0.1307, "step": 11750}
{"epoch": 24.3, "learning_rate": 5.7441054091539524e-05, "loss": 0.1383, "step": 11760}
{"epoch": 24.32, "learning_rate": 5.723300970873786e-05, "loss": 0.146, "step": 11770}
{"epoch": 24.34, "learning_rate": 5.70249653259362e-05, "loss": 0.1313, "step": 11780}
{"epoch": 24.36, "learning_rate": 5.681692094313454e-05, "loss": 0.1451, "step": 11790}
{"epoch": 24.38, "learning_rate": 5.660887656033287e-05, "loss": 0.1299, "step": 11800}
{"epoch": 24.4, "learning_rate": 5.6400832177531205e-05, "loss": 0.1528, "step": 11810}
{"epoch": 24.42, "learning_rate": 5.619278779472954e-05, "loss": 0.1372, "step": 11820}
{"epoch": 24.44, "learning_rate": 5.600554785020804e-05, "loss": 0.1308, "step": 11830}
{"epoch": 24.46, "learning_rate": 5.579750346740637e-05, "loss": 0.1309, "step": 11840}
{"epoch": 24.48, "learning_rate": 5.558945908460471e-05, "loss": 0.1435, "step": 11850}
{"epoch": 24.5, "learning_rate": 5.5381414701803046e-05, "loss": 0.1425, "step": 11860}
{"epoch": 24.52, "learning_rate": 5.5173370319001384e-05, "loss": 0.1244, "step": 11870}
{"epoch": 24.55, "learning_rate": 5.4965325936199715e-05, "loss": 0.1392, "step": 11880}
{"epoch": 24.57, "learning_rate": 5.475728155339805e-05, "loss": 0.1456, "step": 11890}
{"epoch": 24.59, "learning_rate": 5.454923717059639e-05, "loss": 0.1332, "step": 11900}
{"epoch": 24.61, "learning_rate": 5.434119278779472e-05, "loss": 0.1266, "step": 11910}
{"epoch": 24.63, "learning_rate": 5.413314840499306e-05, "loss": 0.1348, "step": 11920}
{"epoch": 24.65, "learning_rate": 5.3925104022191397e-05, "loss": 0.1417, "step": 11930}
{"epoch": 24.67, "learning_rate": 5.3717059639389734e-05, "loss": 0.1398, "step": 11940}
{"epoch": 24.69, "learning_rate": 5.3509015256588065e-05, "loss": 0.1297, "step": 11950}
{"epoch": 24.71, "learning_rate": 5.33009708737864e-05, "loss": 0.1261, "step": 11960}
{"epoch": 24.73, "learning_rate": 5.309292649098474e-05, "loss": 0.1324, "step": 11970}
{"epoch": 24.75, "learning_rate": 5.288488210818307e-05, "loss": 0.1384, "step": 11980}
{"epoch": 24.77, "learning_rate": 5.267683772538141e-05, "loss": 0.1391, "step": 11990}
{"epoch": 24.79, "learning_rate": 5.246879334257975e-05, "loss": 0.1222, "step": 12000}
{"epoch": 24.79, "eval_loss": 0.1546047329902649, "eval_runtime": 734.1057, "eval_samples_per_second": 3.133, "eval_steps_per_second": 0.392, "step": 12000}
{"epoch": 24.81, "learning_rate": 5.2260748959778085e-05, "loss": 0.1246, "step": 12010}
{"epoch": 24.83, "learning_rate": 5.2052704576976416e-05, "loss": 0.1295, "step": 12020}
{"epoch": 24.86, "learning_rate": 5.1844660194174753e-05, "loss": 0.1282, "step": 12030}
{"epoch": 24.88, "learning_rate": 5.163661581137309e-05, "loss": 0.1285, "step": 12040}
{"epoch": 24.9, "learning_rate": 5.142857142857142e-05, "loss": 0.138, "step": 12050}
{"epoch": 24.92, "learning_rate": 5.122052704576976e-05, "loss": 0.1384, "step": 12060}
{"epoch": 24.94, "learning_rate": 5.10124826629681e-05, "loss": 0.1366, "step": 12070}
{"epoch": 24.96, "learning_rate": 5.080443828016643e-05, "loss": 0.12, "step": 12080}
{"epoch": 24.98, "learning_rate": 5.0596393897364766e-05, "loss": 0.1395, "step": 12090}
{"epoch": 25.0, "learning_rate": 5.0388349514563104e-05, "loss": 0.1222, "step": 12100}
{"epoch": 25.02, "learning_rate": 5.018030513176144e-05, "loss": 0.126, "step": 12110}
{"epoch": 25.04, "learning_rate": 4.997226074895977e-05, "loss": 0.1285, "step": 12120}
{"epoch": 25.06, "learning_rate": 4.976421636615811e-05, "loss": 0.1314, "step": 12130}
{"epoch": 25.08, "learning_rate": 4.955617198335645e-05, "loss": 0.1304, "step": 12140}
{"epoch": 25.1, "learning_rate": 4.934812760055478e-05, "loss": 0.1358, "step": 12150}
{"epoch": 25.12, "learning_rate": 4.914008321775312e-05, "loss": 0.1388, "step": 12160}
{"epoch": 25.14, "learning_rate": 4.8932038834951454e-05, "loss": 0.1244, "step": 12170}
{"epoch": 25.17, "learning_rate": 4.872399445214979e-05, "loss": 0.1293, "step": 12180}
{"epoch": 25.19, "learning_rate": 4.851595006934812e-05, "loss": 0.1241, "step": 12190}
{"epoch": 25.21, "learning_rate": 4.830790568654646e-05, "loss": 0.1243, "step": 12200}
{"epoch": 25.23, "learning_rate": 4.80998613037448e-05, "loss": 0.1381, "step": 12210}
{"epoch": 25.25, "learning_rate": 4.789181692094313e-05, "loss": 0.1395, "step": 12220}
{"epoch": 25.27, "learning_rate": 4.768377253814147e-05, "loss": 0.1273, "step": 12230}
{"epoch": 25.29, "learning_rate": 4.7475728155339805e-05, "loss": 0.1308, "step": 12240}
{"epoch": 25.31, "learning_rate": 4.726768377253814e-05, "loss": 0.1323, "step": 12250}
{"epoch": 25.33, "learning_rate": 4.7059639389736474e-05, "loss": 0.1295, "step": 12260}
{"epoch": 25.35, "learning_rate": 4.685159500693481e-05, "loss": 0.1202, "step": 12270}
{"epoch": 25.37, "learning_rate": 4.664355062413315e-05, "loss": 0.1378, "step": 12280}
{"epoch": 25.39, "learning_rate": 4.643550624133148e-05, "loss": 0.1256, "step": 12290}
{"epoch": 25.41, "learning_rate": 4.622746185852982e-05, "loss": 0.127, "step": 12300}
{"epoch": 25.41, "eval_loss": 0.15560182929039001, "eval_runtime": 743.9907, "eval_samples_per_second": 3.091, "eval_steps_per_second": 0.387, "step": 12300}
{"epoch": 25.43, "learning_rate": 4.6019417475728155e-05, "loss": 0.129, "step": 12310}
{"epoch": 25.45, "learning_rate": 4.581137309292649e-05, "loss": 0.1335, "step": 12320}
{"epoch": 25.48, "learning_rate": 4.5603328710124824e-05, "loss": 0.14, "step": 12330}
{"epoch": 25.5, "learning_rate": 4.539528432732316e-05, "loss": 0.1263, "step": 12340}
{"epoch": 25.52, "learning_rate": 4.51872399445215e-05, "loss": 0.1318, "step": 12350}
{"epoch": 25.54, "learning_rate": 4.497919556171983e-05, "loss": 0.1367, "step": 12360}
{"epoch": 25.56, "learning_rate": 4.477115117891817e-05, "loss": 0.1464, "step": 12370}
{"epoch": 25.58, "learning_rate": 4.4563106796116506e-05, "loss": 0.1166, "step": 12380}
{"epoch": 25.6, "learning_rate": 4.435506241331484e-05, "loss": 0.1388, "step": 12390}
{"epoch": 25.62, "learning_rate": 4.4147018030513175e-05, "loss": 0.1287, "step": 12400}
{"epoch": 25.64, "learning_rate": 4.393897364771151e-05, "loss": 0.1276, "step": 12410}
{"epoch": 25.66, "learning_rate": 4.373092926490985e-05, "loss": 0.1162, "step": 12420}
{"epoch": 25.68, "learning_rate": 4.352288488210818e-05, "loss": 0.1439, "step": 12430}
{"epoch": 25.7, "learning_rate": 4.331484049930652e-05, "loss": 0.138, "step": 12440}
{"epoch": 25.72, "learning_rate": 4.310679611650485e-05, "loss": 0.151, "step": 12450}
{"epoch": 25.74, "learning_rate": 4.289875173370318e-05, "loss": 0.1252, "step": 12460}
{"epoch": 25.76, "learning_rate": 4.269070735090152e-05, "loss": 0.1254, "step": 12470}
{"epoch": 25.79, "learning_rate": 4.2482662968099856e-05, "loss": 0.1349, "step": 12480}
{"epoch": 25.81, "learning_rate": 4.227461858529819e-05, "loss": 0.1435, "step": 12490}
{"epoch": 25.83, "learning_rate": 4.2066574202496525e-05, "loss": 0.1224, "step": 12500}
{"epoch": 25.85, "learning_rate": 4.185852981969486e-05, "loss": 0.1256, "step": 12510}
{"epoch": 25.87, "learning_rate": 4.1650485436893193e-05, "loss": 0.143, "step": 12520}
{"epoch": 25.89, "learning_rate": 4.144244105409153e-05, "loss": 0.1389, "step": 12530}
{"epoch": 25.91, "learning_rate": 4.123439667128987e-05, "loss": 0.1428, "step": 12540}
{"epoch": 25.93, "learning_rate": 4.1026352288488207e-05, "loss": 0.1389, "step": 12550}
{"epoch": 25.95, "learning_rate": 4.081830790568654e-05, "loss": 0.1255, "step": 12560}
{"epoch": 25.97, "learning_rate": 4.0610263522884875e-05, "loss": 0.1388, "step": 12570}
{"epoch": 25.99, "learning_rate": 4.040221914008321e-05, "loss": 0.1401, "step": 12580}
{"epoch": 26.01, "learning_rate": 4.0194174757281544e-05, "loss": 0.1267, "step": 12590}
{"epoch": 26.03, "learning_rate": 3.998613037447988e-05, "loss": 0.1181, "step": 12600}
{"epoch": 26.03, "eval_loss": 0.15446408092975616, "eval_runtime": 739.9772, "eval_samples_per_second": 3.108, "eval_steps_per_second": 0.389, "step": 12600}
{"epoch": 26.05, "learning_rate": 3.977808599167822e-05, "loss": 0.1359, "step": 12610}
{"epoch": 26.07, "learning_rate": 3.957004160887656e-05, "loss": 0.1248, "step": 12620}
{"epoch": 26.1, "learning_rate": 3.936199722607489e-05, "loss": 0.11, "step": 12630}
{"epoch": 26.12, "learning_rate": 3.9153952843273226e-05, "loss": 0.1261, "step": 12640}
{"epoch": 26.14, "learning_rate": 3.8945908460471563e-05, "loss": 0.1211, "step": 12650}
{"epoch": 26.16, "learning_rate": 3.8737864077669894e-05, "loss": 0.1319, "step": 12660}
{"epoch": 26.18, "learning_rate": 3.852981969486823e-05, "loss": 0.1243, "step": 12670}
{"epoch": 26.2, "learning_rate": 3.832177531206657e-05, "loss": 0.131, "step": 12680}
{"epoch": 26.22, "learning_rate": 3.811373092926491e-05, "loss": 0.1162, "step": 12690}
{"epoch": 26.24, "learning_rate": 3.790568654646324e-05, "loss": 0.1249, "step": 12700}
{"epoch": 26.26, "learning_rate": 3.7697642163661576e-05, "loss": 0.1288, "step": 12710}
{"epoch": 26.28, "learning_rate": 3.7489597780859914e-05, "loss": 0.1196, "step": 12720}
{"epoch": 26.3, "learning_rate": 3.7281553398058245e-05, "loss": 0.1372, "step": 12730}
{"epoch": 26.32, "learning_rate": 3.707350901525658e-05, "loss": 0.1197, "step": 12740}
{"epoch": 26.34, "learning_rate": 3.686546463245492e-05, "loss": 0.1211, "step": 12750}
{"epoch": 26.36, "learning_rate": 3.665742024965326e-05, "loss": 0.1299, "step": 12760}
{"epoch": 26.38, "learning_rate": 3.644937586685159e-05, "loss": 0.1124, "step": 12770}
{"epoch": 26.4, "learning_rate": 3.624133148404993e-05, "loss": 0.1331, "step": 12780}
{"epoch": 26.43, "learning_rate": 3.6033287101248264e-05, "loss": 0.1274, "step": 12790}
{"epoch": 26.45, "learning_rate": 3.5825242718446595e-05, "loss": 0.1349, "step": 12800}
{"epoch": 26.47, "learning_rate": 3.561719833564493e-05, "loss": 0.1294, "step": 12810}
{"epoch": 26.49, "learning_rate": 3.540915395284327e-05, "loss": 0.1361, "step": 12820}
{"epoch": 26.51, "learning_rate": 3.52011095700416e-05, "loss": 0.1321, "step": 12830}
{"epoch": 26.53, "learning_rate": 3.499306518723994e-05, "loss": 0.1135, "step": 12840}
{"epoch": 26.55, "learning_rate": 3.478502080443828e-05, "loss": 0.1187, "step": 12850}
{"epoch": 26.57, "learning_rate": 3.4576976421636615e-05, "loss": 0.1249, "step": 12860}
{"epoch": 26.59, "learning_rate": 3.4368932038834946e-05, "loss": 0.1234, "step": 12870}
{"epoch": 26.61, "learning_rate": 3.4160887656033284e-05, "loss": 0.1307, "step": 12880}
{"epoch": 26.63, "learning_rate": 3.395284327323162e-05, "loss": 0.1333, "step": 12890}
{"epoch": 26.65, "learning_rate": 3.374479889042995e-05, "loss": 0.1339, "step": 12900}
{"epoch": 26.65, "eval_loss": 0.15420517325401306, "eval_runtime": 741.7248, "eval_samples_per_second": 3.101, "eval_steps_per_second": 0.388, "step": 12900}
{"epoch": 26.67, "learning_rate": 3.353675450762829e-05, "loss": 0.1321, "step": 12910}
{"epoch": 26.69, "learning_rate": 3.332871012482663e-05, "loss": 0.1372, "step": 12920}
{"epoch": 26.71, "learning_rate": 3.3120665742024965e-05, "loss": 0.1404, "step": 12930}
{"epoch": 26.74, "learning_rate": 3.2912621359223296e-05, "loss": 0.1258, "step": 12940}
{"epoch": 26.76, "learning_rate": 3.2704576976421634e-05, "loss": 0.1367, "step": 12950}
{"epoch": 26.78, "learning_rate": 3.249653259361997e-05, "loss": 0.1296, "step": 12960}
{"epoch": 26.8, "learning_rate": 3.22884882108183e-05, "loss": 0.1213, "step": 12970}
{"epoch": 26.82, "learning_rate": 3.208044382801664e-05, "loss": 0.1332, "step": 12980}
{"epoch": 26.84, "learning_rate": 3.187239944521498e-05, "loss": 0.1148, "step": 12990}
{"epoch": 26.86, "learning_rate": 3.1664355062413316e-05, "loss": 0.1402, "step": 13000}
{"epoch": 26.88, "learning_rate": 3.145631067961165e-05, "loss": 0.1463, "step": 13010}
{"epoch": 26.9, "learning_rate": 3.1248266296809985e-05, "loss": 0.1384, "step": 13020}
{"epoch": 26.92, "learning_rate": 3.104022191400832e-05, "loss": 0.1362, "step": 13030}
{"epoch": 26.94, "learning_rate": 3.083217753120665e-05, "loss": 0.1237, "step": 13040}
{"epoch": 26.96, "learning_rate": 3.062413314840499e-05, "loss": 0.13, "step": 13050}
{"epoch": 26.98, "learning_rate": 3.041608876560333e-05, "loss": 0.1351, "step": 13060}
{"epoch": 27.0, "learning_rate": 3.0208044382801663e-05, "loss": 0.1468, "step": 13070}
{"epoch": 27.02, "learning_rate": 2.9999999999999997e-05, "loss": 0.1158, "step": 13080}
{"epoch": 27.05, "learning_rate": 2.9791955617198335e-05, "loss": 0.1382, "step": 13090}
{"epoch": 27.07, "learning_rate": 2.958391123439667e-05, "loss": 0.1244, "step": 13100}
{"epoch": 27.09, "learning_rate": 2.9375866851595007e-05, "loss": 0.1417, "step": 13110}
{"epoch": 27.11, "learning_rate": 2.916782246879334e-05, "loss": 0.1292, "step": 13120}
{"epoch": 27.13, "learning_rate": 2.8959778085991672e-05, "loss": 0.1314, "step": 13130}
{"epoch": 27.15, "learning_rate": 2.875173370319001e-05, "loss": 0.1299, "step": 13140}
{"epoch": 27.17, "learning_rate": 2.8543689320388344e-05, "loss": 0.1293, "step": 13150}
{"epoch": 27.19, "learning_rate": 2.8335644937586682e-05, "loss": 0.1075, "step": 13160}
{"epoch": 27.21, "learning_rate": 2.8127600554785017e-05, "loss": 0.1288, "step": 13170}
{"epoch": 27.23, "learning_rate": 2.791955617198335e-05, "loss": 0.1344, "step": 13180}
{"epoch": 27.25, "learning_rate": 2.771151178918169e-05, "loss": 0.1386, "step": 13190}
{"epoch": 27.27, "learning_rate": 2.7503467406380023e-05, "loss": 0.1234, "step": 13200}
{"epoch": 27.27, "eval_loss": 0.15529084205627441, "eval_runtime": 744.3068, "eval_samples_per_second": 3.09, "eval_steps_per_second": 0.387, "step": 13200}
{"loss": 0.1409, "learning_rate": 2.729542302357836e-05, "epoch": 27.29, "step": 13210}
{"loss": 0.13, "learning_rate": 2.7087378640776695e-05, "epoch": 27.31, "step": 13220}
{"loss": 0.1326, "learning_rate": 2.6879334257975033e-05, "epoch": 27.33, "step": 13230}
{"loss": 0.131, "learning_rate": 2.6671289875173367e-05, "epoch": 27.36, "step": 13240}
{"loss": 0.107, "learning_rate": 2.64632454923717e-05, "epoch": 27.38, "step": 13250}
{"loss": 0.1364, "learning_rate": 2.625520110957004e-05, "epoch": 27.4, "step": 13260}
{"loss": 0.1239, "learning_rate": 2.6047156726768373e-05, "epoch": 27.42, "step": 13270}
{"loss": 0.1303, "learning_rate": 2.583911234396671e-05, "epoch": 27.44, "step": 13280}
{"loss": 0.1176, "learning_rate": 2.5631067961165045e-05, "epoch": 27.46, "step": 13290}
{"loss": 0.1224, "learning_rate": 2.542302357836338e-05, "epoch": 27.48, "step": 13300}
{"loss": 0.1105, "learning_rate": 2.5214979195561718e-05, "epoch": 27.5, "step": 13310}
{"loss": 0.1188, "learning_rate": 2.5006934812760052e-05, "epoch": 27.52, "step": 13320}
{"loss": 0.1168, "learning_rate": 2.479889042995839e-05, "epoch": 27.54, "step": 13330}
{"loss": 0.1256, "learning_rate": 2.4590846047156724e-05, "epoch": 27.56, "step": 13340}
{"loss": 0.1292, "learning_rate": 2.438280166435506e-05, "epoch": 27.58, "step": 13350}
{"loss": 0.1306, "learning_rate": 2.4174757281553396e-05, "epoch": 27.6, "step": 13360}
{"loss": 0.1264, "learning_rate": 2.396671289875173e-05, "epoch": 27.62, "step": 13370}
{"loss": 0.1282, "learning_rate": 2.3758668515950068e-05, "epoch": 27.64, "step": 13380}
{"loss": 0.1276, "learning_rate": 2.3550624133148402e-05, "epoch": 27.67, "step": 13390}
{"loss": 0.1242, "learning_rate": 2.334257975034674e-05, "epoch": 27.69, "step": 13400}
{"loss": 0.1276, "learning_rate": 2.3134535367545074e-05, "epoch": 27.71, "step": 13410}
{"loss": 0.1169, "learning_rate": 2.2926490984743412e-05, "epoch": 27.73, "step": 13420}
{"loss": 0.1309, "learning_rate": 2.2718446601941746e-05, "epoch": 27.75, "step": 13430}
{"loss": 0.1265, "learning_rate": 2.251040221914008e-05, "epoch": 27.77, "step": 13440}
{"loss": 0.1241, "learning_rate": 2.230235783633842e-05, "epoch": 27.79, "step": 13450}
{"loss": 0.125, "learning_rate": 2.2094313453536753e-05, "epoch": 27.81, "step": 13460}
{"loss": 0.1277, "learning_rate": 2.188626907073509e-05, "epoch": 27.83, "step": 13470}
{"loss": 0.1118, "learning_rate": 2.1678224687933425e-05, "epoch": 27.85, "step": 13480}
{"loss": 0.1305, "learning_rate": 2.147018030513176e-05, "epoch": 27.87, "step": 13490}
{"loss": 0.1394, "learning_rate": 2.1262135922330097e-05, "epoch": 27.89, "step": 13500}
{"eval_loss": 0.15495647490024567, "eval_runtime": 702.9085, "eval_samples_per_second": 3.272, "eval_steps_per_second": 0.41, "epoch": 27.89, "step": 13500}
{"loss": 0.1242, "learning_rate": 2.105409153952843e-05, "epoch": 27.91, "step": 13510}
{"loss": 0.1271, "learning_rate": 2.084604715672677e-05, "epoch": 27.93, "step": 13520}
{"loss": 0.1242, "learning_rate": 2.0638002773925103e-05, "epoch": 27.95, "step": 13530}
{"loss": 0.123, "learning_rate": 2.042995839112344e-05, "epoch": 27.98, "step": 13540}
{"loss": 0.126, "learning_rate": 2.0221914008321775e-05, "epoch": 28.0, "step": 13550}
{"loss": 0.1161, "learning_rate": 2.001386962552011e-05, "epoch": 28.02, "step": 13560}
{"loss": 0.1198, "learning_rate": 1.9805825242718447e-05, "epoch": 28.04, "step": 13570}
{"loss": 0.1312, "learning_rate": 1.959778085991678e-05, "epoch": 28.06, "step": 13580}
{"loss": 0.1193, "learning_rate": 1.9389736477115113e-05, "epoch": 28.08, "step": 13590}
{"loss": 0.1216, "learning_rate": 1.918169209431345e-05, "epoch": 28.1, "step": 13600}
{"loss": 0.1201, "learning_rate": 1.8973647711511785e-05, "epoch": 28.12, "step": 13610}
{"loss": 0.1127, "learning_rate": 1.8765603328710123e-05, "epoch": 28.14, "step": 13620}
{"loss": 0.1292, "learning_rate": 1.855755894590846e-05, "epoch": 28.16, "step": 13630}
{"loss": 0.1239, "learning_rate": 1.8349514563106795e-05, "epoch": 28.18, "step": 13640}
{"loss": 0.136, "learning_rate": 1.8141470180305132e-05, "epoch": 28.2, "step": 13650}
{"loss": 0.1365, "learning_rate": 1.7933425797503467e-05, "epoch": 28.22, "step": 13660}
{"loss": 0.1241, "learning_rate": 1.77253814147018e-05, "epoch": 28.24, "step": 13670}
{"loss": 0.1161, "learning_rate": 1.751733703190014e-05, "epoch": 28.26, "step": 13680}
{"loss": 0.1262, "learning_rate": 1.7309292649098473e-05, "epoch": 28.29, "step": 13690}
{"loss": 0.124, "learning_rate": 1.7101248266296807e-05, "epoch": 28.31, "step": 13700}
{"loss": 0.1321, "learning_rate": 1.689320388349514e-05, "epoch": 28.33, "step": 13710}
{"loss": 0.1234, "learning_rate": 1.668515950069348e-05, "epoch": 28.35, "step": 13720}
{"loss": 0.1118, "learning_rate": 1.6477115117891814e-05, "epoch": 28.37, "step": 13730}
{"loss": 0.1212, "learning_rate": 1.626907073509015e-05, "epoch": 28.39, "step": 13740}
{"loss": 0.1352, "learning_rate": 1.6061026352288486e-05, "epoch": 28.41, "step": 13750}
{"loss": 0.1297, "learning_rate": 1.5852981969486824e-05, "epoch": 28.43, "step": 13760}
{"loss": 0.1422, "learning_rate": 1.5644937586685158e-05, "epoch": 28.45, "step": 13770}
{"loss": 0.1205, "learning_rate": 1.5436893203883492e-05, "epoch": 28.47, "step": 13780}
{"loss": 0.1318, "learning_rate": 1.522884882108183e-05, "epoch": 28.49, "step": 13790}
{"loss": 0.1276, "learning_rate": 1.5020804438280166e-05, "epoch": 28.51, "step": 13800}
{"eval_loss": 0.15447427332401276, "eval_runtime": 724.2999, "eval_samples_per_second": 3.175, "eval_steps_per_second": 0.398, "epoch": 28.51, "step": 13800}
{"loss": 0.1159, "learning_rate": 1.48127600554785e-05, "epoch": 28.53, "step": 13810}
{"loss": 0.1116, "learning_rate": 1.4604715672676836e-05, "epoch": 28.55, "step": 13820}
{"loss": 0.1303, "learning_rate": 1.4396671289875172e-05, "epoch": 28.57, "step": 13830}
{"loss": 0.1283, "learning_rate": 1.4188626907073508e-05, "epoch": 28.6, "step": 13840}
{"loss": 0.1153, "learning_rate": 1.3980582524271844e-05, "epoch": 28.62, "step": 13850}
{"loss": 0.1136, "learning_rate": 1.377253814147018e-05, "epoch": 28.64, "step": 13860}
{"loss": 0.128, "learning_rate": 1.3564493758668515e-05, "epoch": 28.66, "step": 13870}
{"loss": 0.1243, "learning_rate": 1.335644937586685e-05, "epoch": 28.68, "step": 13880}
{"loss": 0.1179, "learning_rate": 1.3148404993065187e-05, "epoch": 28.7, "step": 13890}
{"loss": 0.1235, "learning_rate": 1.2940360610263523e-05, "epoch": 28.72, "step": 13900}
{"loss": 0.1088, "learning_rate": 1.2732316227461859e-05, "epoch": 28.74, "step": 13910}
{"loss": 0.1141, "learning_rate": 1.2524271844660191e-05, "epoch": 28.76, "step": 13920}
{"loss": 0.1253, "learning_rate": 1.2316227461858528e-05, "epoch": 28.78, "step": 13930}
{"loss": 0.1213, "learning_rate": 1.2108183079056864e-05, "epoch": 28.8, "step": 13940}
{"loss": 0.1221, "learning_rate": 1.19001386962552e-05, "epoch": 28.82, "step": 13950}
{"loss": 0.1364, "learning_rate": 1.1692094313453536e-05, "epoch": 28.84, "step": 13960}
{"loss": 0.1252, "learning_rate": 1.1484049930651872e-05, "epoch": 28.86, "step": 13970}
{"loss": 0.1181, "learning_rate": 1.1276005547850206e-05, "epoch": 28.88, "step": 13980}
{"loss": 0.123, "learning_rate": 1.1067961165048542e-05, "epoch": 28.9, "step": 13990}
{"loss": 0.12, "learning_rate": 1.0859916782246878e-05, "epoch": 28.93, "step": 14000}
{"loss": 0.1121, "learning_rate": 1.0651872399445214e-05, "epoch": 28.95, "step": 14010}
{"loss": 0.1218, "learning_rate": 1.044382801664355e-05, "epoch": 28.97, "step": 14020}
{"loss": 0.1262, "learning_rate": 1.0235783633841886e-05, "epoch": 28.99, "step": 14030}
{"loss": 0.1144, "learning_rate": 1.002773925104022e-05, "epoch": 29.01, "step": 14040}
{"loss": 0.1307, "learning_rate": 9.819694868238556e-06, "epoch": 29.03, "step": 14050}
{"loss": 0.1149, "learning_rate": 9.611650485436892e-06, "epoch": 29.05, "step": 14060}
{"loss": 0.1245, "learning_rate": 9.403606102635229e-06, "epoch": 29.07, "step": 14070}
{"loss": 0.1334, "learning_rate": 9.195561719833565e-06, "epoch": 29.09, "step": 14080}
{"loss": 0.138, "learning_rate": 8.987517337031899e-06, "epoch": 29.11, "step": 14090}
{"loss": 0.1182, "learning_rate": 8.779472954230235e-06, "epoch": 29.13, "step": 14100}
{"eval_loss": 0.1546429991722107, "eval_runtime": 723.3859, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.398, "epoch": 29.13, "step": 14100}
{"loss": 0.1132, "learning_rate": 8.571428571428571e-06, "epoch": 29.15, "step": 14110}
{"loss": 0.1084, "learning_rate": 8.363384188626907e-06, "epoch": 29.17, "step": 14120}
{"loss": 0.1267, "learning_rate": 8.155339805825241e-06, "epoch": 29.19, "step": 14130}
{"loss": 0.1333, "learning_rate": 7.947295423023577e-06, "epoch": 29.21, "step": 14140}
{"loss": 0.1251, "learning_rate": 7.739251040221913e-06, "epoch": 29.24, "step": 14150}
{"loss": 0.127, "learning_rate": 7.531206657420249e-06, "epoch": 29.26, "step": 14160}
{"loss": 0.1334, "learning_rate": 7.3231622746185845e-06, "epoch": 29.28, "step": 14170}
{"loss": 0.1196, "learning_rate": 7.1151178918169206e-06, "epoch": 29.3, "step": 14180}
{"loss": 0.1141, "learning_rate": 6.907073509015257e-06, "epoch": 29.32, "step": 14190}
{"loss": 0.1059, "learning_rate": 6.699029126213591e-06, "epoch": 29.34, "step": 14200}
{"loss": 0.1096, "learning_rate": 6.490984743411927e-06, "epoch": 29.36, "step": 14210}
{"loss": 0.111, "learning_rate": 6.282940360610263e-06, "epoch": 29.38, "step": 14220}
{"loss": 0.1167, "learning_rate": 6.074895977808598e-06, "epoch": 29.4, "step": 14230}
{"loss": 0.1232, "learning_rate": 5.866851595006934e-06, "epoch": 29.42, "step": 14240}
{"loss": 0.1396, "learning_rate": 5.65880721220527e-06, "epoch": 29.44, "step": 14250}
{"loss": 0.1161, "learning_rate": 5.450762829403605e-06, "epoch": 29.46, "step": 14260}
{"loss": 0.1085, "learning_rate": 5.2427184466019414e-06, "epoch": 29.48, "step": 14270}
{"loss": 0.1156, "learning_rate": 5.0346740638002775e-06, "epoch": 29.5, "step": 14280}
{"loss": 0.1256, "learning_rate": 4.826629680998613e-06, "epoch": 29.52, "step": 14290}
{"loss": 0.1172, "learning_rate": 4.618585298196948e-06, "epoch": 29.55, "step": 14300}
{"loss": 0.1209, "learning_rate": 4.410540915395284e-06, "epoch": 29.57, "step": 14310}
{"loss": 0.1168, "learning_rate": 4.20249653259362e-06, "epoch": 29.59, "step": 14320}
{"loss": 0.1218, "learning_rate": 3.994452149791955e-06, "epoch": 29.61, "step": 14330}
{"loss": 0.1253, "learning_rate": 3.7864077669902907e-06, "epoch": 29.63, "step": 14340}
{"loss": 0.1148, "learning_rate": 3.5783633841886263e-06, "epoch": 29.65, "step": 14350}
{"loss": 0.1408, "learning_rate": 3.3703190013869623e-06, "epoch": 29.67, "step": 14360}
{"loss": 0.1345, "learning_rate": 3.162274618585298e-06, "epoch": 29.69, "step": 14370}
{"loss": 0.1227, "learning_rate": 2.954230235783634e-06, "epoch": 29.71, "step": 14380}
{"loss": 0.1183, "learning_rate": 2.7461858529819695e-06, "epoch": 29.73, "step": 14390}
{"loss": 0.1285, "learning_rate": 2.5381414701803047e-06, "epoch": 29.75, "step": 14400}
{"eval_loss": 0.15436199307441711, "eval_runtime": 723.9259, "eval_samples_per_second": 3.177, "eval_steps_per_second": 0.398, "epoch": 29.75, "step": 14400}
{"loss": 0.1208, "learning_rate": 2.3300970873786403e-06, "epoch": 29.77, "step": 14410}
{"loss": 0.1064, "learning_rate": 2.1220527045769763e-06, "epoch": 29.79, "step": 14420}
{"loss": 0.1308, "learning_rate": 1.914008321775312e-06, "epoch": 29.81, "step": 14430}
{"loss": 0.1169, "learning_rate": 1.7059639389736475e-06, "epoch": 29.83, "step": 14440}
{"loss": 0.1198, "learning_rate": 1.4979195561719831e-06, "epoch": 29.86, "step": 14450}
{"loss": 0.1269, "learning_rate": 1.289875173370319e-06, "epoch": 29.88, "step": 14460}
{"loss": 0.1106, "learning_rate": 1.0818307905686546e-06, "epoch": 29.9, "step": 14470}
{"loss": 0.0991, "learning_rate": 8.737864077669903e-07, "epoch": 29.92, "step": 14480}
{"loss": 0.1176, "learning_rate": 6.657420249653259e-07, "epoch": 29.94, "step": 14490}
{"loss": 0.1178, "learning_rate": 4.5769764216366153e-07, "epoch": 29.96, "step": 14500}
{"loss": 0.1247, "learning_rate": 2.496532593619972e-07, "epoch": 29.98, "step": 14510}
{"loss": 0.1365, "learning_rate": 4.160887656033287e-08, "epoch": 30.0, "step": 14520}
{"train_runtime": 27742.3797, "train_samples_per_second": 10.466, "train_steps_per_second": 0.523, "total_flos": 1.8860263271314883e+19, "train_loss": 0.011189159681153363, "epoch": 30.0, "step": 14520}