diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,29276 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9984012789768184, + "global_step": 47500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 20.3727, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 22.0558, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-06, + "loss": 21.7109, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000001e-06, + "loss": 20.6611, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 22.4204, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 2.4000000000000003e-06, + "loss": 19.938, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-06, + "loss": 17.4213, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000003e-06, + "loss": 20.1383, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 3.6000000000000003e-06, + "loss": 19.8127, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 23.1777, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.4e-06, + "loss": 23.3547, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.800000000000001e-06, + "loss": 16.6238, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 5.2e-06, + "loss": 17.8069, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 5.600000000000001e-06, + "loss": 20.2351, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 21.9262, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 6.4000000000000006e-06, + "loss": 18.2762, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 6.800000000000001e-06, + "loss": 20.3464, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 7.2000000000000005e-06, + "loss": 19.9402, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 7.600000000000001e-06, + "loss": 18.0535, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 24.1097, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 8.400000000000001e-06, + "loss": 22.9605, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 8.8e-06, + "loss": 18.8428, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 9.200000000000002e-06, + "loss": 13.8694, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 9.600000000000001e-06, + "loss": 17.6931, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 1e-05, + "loss": 20.0871, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 1.04e-05, + "loss": 15.7877, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 1.0800000000000002e-05, + "loss": 13.5578, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 1.1200000000000001e-05, + "loss": 17.1961, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 1.16e-05, + "loss": 14.2598, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 12.4614, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.2400000000000002e-05, + "loss": 11.931, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 1.2800000000000001e-05, + "loss": 13.7064, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 1.3200000000000002e-05, + "loss": 14.322, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 1.3600000000000002e-05, + "loss": 12.1773, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 1.4e-05, + "loss": 15.734, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 1.4400000000000001e-05, + "loss": 15.678, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 1.48e-05, + "loss": 13.6815, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 1.5200000000000002e-05, + "loss": 13.4937, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 1.5600000000000003e-05, + "loss": 11.4397, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 1.6000000000000003e-05, + "loss": 13.4706, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 1.64e-05, + "loss": 12.5004, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 1.6800000000000002e-05, + "loss": 11.8415, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 1.72e-05, + "loss": 16.3443, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 1.76e-05, + "loss": 12.4893, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 1.8e-05, + "loss": 8.1513, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 1.8400000000000003e-05, + "loss": 11.0122, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 1.88e-05, + "loss": 9.984, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 1.9200000000000003e-05, + "loss": 9.837, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 1.9600000000000002e-05, + "loss": 8.5917, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 2e-05, + "loss": 8.5767, + "step": 500 + }, + { + "epoch": 0.02, + "eval_loss": 3.7170910835266113, + "eval_runtime": 244.7625, + "eval_samples_per_second": 10.226, + "eval_steps_per_second": 5.115, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9995748118542458e-05, + "loss": 7.3945, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.999149623708491e-05, + "loss": 7.7318, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.9987244355627367e-05, + "loss": 6.6012, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.9982992474169823e-05, + "loss": 7.4308, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 1.9978740592712276e-05, + "loss": 6.8742, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 1.9974488711254732e-05, + "loss": 6.6656, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 1.9970236829797188e-05, + "loss": 6.815, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 1.9965984948339644e-05, + "loss": 6.5222, + "step": 580 + }, + { + "epoch": 0.02, + "learning_rate": 1.9961733066882097e-05, + "loss": 7.4163, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.9957481185424553e-05, + "loss": 6.5741, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.995322930396701e-05, + "loss": 7.1728, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.9948977422509462e-05, + "loss": 5.768, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.9944725541051918e-05, + "loss": 6.0008, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.9940473659594374e-05, + "loss": 5.5226, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.9936221778136827e-05, + "loss": 5.7387, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.9931969896679283e-05, + "loss": 5.1679, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.9927718015221736e-05, + "loss": 5.0973, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.9923466133764192e-05, + "loss": 5.1589, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.9919214252306648e-05, + "loss": 5.1914, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.99149623708491e-05, + "loss": 4.5417, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.9910710489391557e-05, + "loss": 5.7502, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.9906458607934013e-05, + "loss": 4.8608, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.9902206726476466e-05, + "loss": 5.4553, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.9897954845018922e-05, + "loss": 4.9113, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.9893702963561375e-05, + "loss": 4.6249, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 1.988945108210383e-05, + "loss": 4.3366, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 1.9885199200646287e-05, + "loss": 4.7683, + "step": 770 + }, + { + "epoch": 0.03, + "learning_rate": 1.9880947319188743e-05, + "loss": 4.2559, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 1.9876695437731196e-05, + "loss": 4.6332, + "step": 790 + }, + { + "epoch": 0.03, + "learning_rate": 1.9872443556273652e-05, + "loss": 4.6606, + "step": 800 + }, + { + "epoch": 0.03, + "learning_rate": 1.986819167481611e-05, + "loss": 4.4119, + "step": 810 + }, + { + "epoch": 0.03, + "learning_rate": 1.986393979335856e-05, + "loss": 5.0149, + "step": 820 + }, + { + "epoch": 0.03, + "learning_rate": 1.9859687911901017e-05, + "loss": 4.6104, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.9855436030443474e-05, + "loss": 4.1719, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.9851184148985926e-05, + "loss": 4.5308, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.9846932267528382e-05, + "loss": 4.1654, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.984268038607084e-05, + "loss": 5.0468, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.9838428504613295e-05, + "loss": 4.0722, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.9834176623155748e-05, + "loss": 3.9921, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.9829924741698204e-05, + "loss": 4.235, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.982567286024066e-05, + "loss": 4.0093, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.9821420978783113e-05, + "loss": 4.496, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.981716909732557e-05, + "loss": 4.3413, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.9812917215868025e-05, + "loss": 4.7558, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.9808665334410478e-05, + "loss": 4.7494, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9804413452952934e-05, + "loss": 4.213, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 1.980016157149539e-05, + "loss": 4.8979, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 1.9795909690037846e-05, + "loss": 4.2093, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 1.97916578085803e-05, + "loss": 4.4593, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 1.9787405927122755e-05, + "loss": 3.8047, + "step": 1000 + }, + { + "epoch": 0.04, + "eval_loss": 2.6360342502593994, + "eval_runtime": 306.5779, + "eval_samples_per_second": 8.164, + "eval_steps_per_second": 4.084, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9783154045665208e-05, + "loss": 3.9264, + "step": 1010 + }, + { + "epoch": 0.04, + "learning_rate": 1.9778902164207664e-05, + "loss": 4.5501, + "step": 1020 + }, + { + "epoch": 0.04, + "learning_rate": 1.977465028275012e-05, + "loss": 4.2464, + "step": 1030 + }, + { + "epoch": 0.04, + "learning_rate": 1.9770398401292573e-05, + "loss": 3.8889, + "step": 1040 + }, + { + "epoch": 0.04, + "learning_rate": 1.976614651983503e-05, + "loss": 4.1145, + "step": 1050 + }, + { + "epoch": 0.04, + "learning_rate": 1.9761894638377482e-05, + "loss": 3.9731, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9757642756919938e-05, + "loss": 3.9231, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.9753390875462394e-05, + "loss": 5.0785, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9749138994004847e-05, + "loss": 4.017, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9744887112547303e-05, + "loss": 3.7215, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.974063523108976e-05, + "loss": 3.976, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9736383349632212e-05, + "loss": 3.7487, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9732131468174668e-05, + "loss": 4.1519, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.9727879586717124e-05, + "loss": 3.9152, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9723627705259577e-05, + "loss": 3.4579, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9719375823802033e-05, + "loss": 3.4721, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.971512394234449e-05, + "loss": 3.7831, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9710872060886942e-05, + "loss": 3.5758, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 1.9706620179429398e-05, + "loss": 4.974, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 1.9702368297971854e-05, + "loss": 3.5293, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 1.969811641651431e-05, + "loss": 4.3557, + "step": 1210 + }, + { + "epoch": 0.05, + "learning_rate": 1.9693864535056763e-05, + "loss": 3.6766, + "step": 1220 + }, + { + "epoch": 0.05, + "learning_rate": 1.968961265359922e-05, + "loss": 3.6694, + "step": 1230 + }, + { + "epoch": 0.05, + "learning_rate": 1.9685360772141676e-05, + "loss": 3.9718, + "step": 1240 + }, + { + "epoch": 0.05, + "learning_rate": 1.968110889068413e-05, + "loss": 3.8476, + "step": 1250 + }, + { + "epoch": 0.05, + "learning_rate": 1.9676857009226585e-05, + "loss": 3.825, + "step": 1260 + }, + { + "epoch": 0.05, + "learning_rate": 1.967260512776904e-05, + "loss": 3.9017, + "step": 1270 + }, + { + "epoch": 0.05, + "learning_rate": 1.9668353246311493e-05, + "loss": 3.8978, + "step": 1280 + }, + { + "epoch": 0.05, + "learning_rate": 1.966410136485395e-05, + "loss": 3.8765, + "step": 1290 + }, + { + "epoch": 0.05, + "learning_rate": 1.9659849483396406e-05, + "loss": 3.7227, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.9655597601938862e-05, + "loss": 3.3455, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9651345720481315e-05, + "loss": 3.4121, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.964709383902377e-05, + "loss": 3.3375, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9642841957566227e-05, + "loss": 3.8211, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.963859007610868e-05, + "loss": 3.2857, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.9634338194651136e-05, + "loss": 3.7018, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.9630086313193592e-05, + "loss": 3.5457, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9625834431736045e-05, + "loss": 3.8864, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.96215825502785e-05, + "loss": 3.8284, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 1.9617330668820954e-05, + "loss": 3.3557, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 1.961307878736341e-05, + "loss": 3.2405, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 1.9608826905905866e-05, + "loss": 3.9348, + "step": 1420 + }, + { + "epoch": 0.06, + "learning_rate": 1.960457502444832e-05, + "loss": 3.3789, + "step": 1430 + }, + { + "epoch": 0.06, + "learning_rate": 1.9600323142990775e-05, + "loss": 3.2882, + "step": 1440 + }, + { + "epoch": 0.06, + "learning_rate": 1.9596071261533228e-05, + "loss": 3.4028, + "step": 1450 + }, + { + "epoch": 0.06, + "learning_rate": 1.9591819380075684e-05, + "loss": 3.5537, + "step": 1460 + }, + { + "epoch": 0.06, + "learning_rate": 1.958756749861814e-05, + "loss": 3.3862, + "step": 1470 + }, + { + "epoch": 0.06, + "learning_rate": 1.9583315617160593e-05, + "loss": 3.3672, + "step": 1480 + }, + { + "epoch": 0.06, + "learning_rate": 1.957906373570305e-05, + "loss": 3.0539, + "step": 1490 + }, + { + "epoch": 0.06, + "learning_rate": 1.9574811854245505e-05, + "loss": 3.2658, + "step": 1500 + }, + { + "epoch": 0.06, + "eval_loss": 2.444038152694702, + "eval_runtime": 325.0737, + "eval_samples_per_second": 7.7, + "eval_steps_per_second": 3.851, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 1.957055997278796e-05, + "loss": 3.7892, + "step": 1510 + }, + { + "epoch": 0.06, + "learning_rate": 1.9566308091330414e-05, + "loss": 3.0661, + "step": 1520 + }, + { + "epoch": 0.06, + "learning_rate": 1.956205620987287e-05, + "loss": 3.7278, + "step": 1530 + }, + { + "epoch": 0.06, + "learning_rate": 1.9557804328415326e-05, + "loss": 3.6798, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.955355244695778e-05, + "loss": 3.8645, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.9549300565500235e-05, + "loss": 3.3384, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.954504868404269e-05, + "loss": 3.3859, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9540796802585144e-05, + "loss": 3.1837, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.95365449211276e-05, + "loss": 3.6801, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9532293039670056e-05, + "loss": 3.3226, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.952804115821251e-05, + "loss": 3.9958, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 1.9523789276754965e-05, + "loss": 3.3704, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 1.951953739529742e-05, + "loss": 3.1825, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 1.9515285513839878e-05, + "loss": 3.3603, + "step": 1640 + }, + { + "epoch": 0.07, + "learning_rate": 1.951103363238233e-05, + "loss": 3.3135, + "step": 1650 + }, + { + "epoch": 0.07, + "learning_rate": 1.9506781750924787e-05, + "loss": 3.2238, + "step": 1660 + }, + { + "epoch": 0.07, + "learning_rate": 1.9502529869467243e-05, + "loss": 3.2581, + "step": 1670 + }, + { + "epoch": 0.07, + "learning_rate": 1.9498277988009695e-05, + "loss": 3.1484, + "step": 1680 + }, + { + "epoch": 0.07, + "learning_rate": 1.949402610655215e-05, + "loss": 3.2461, + "step": 1690 + }, + { + "epoch": 0.07, + "learning_rate": 1.9489774225094608e-05, + "loss": 3.4481, + "step": 1700 + }, + { + "epoch": 0.07, + "learning_rate": 1.948552234363706e-05, + "loss": 3.5146, + "step": 1710 + }, + { + "epoch": 0.07, + "learning_rate": 1.9481270462179517e-05, + "loss": 3.23, + "step": 1720 + }, + { + "epoch": 0.07, + "learning_rate": 1.9477018580721973e-05, + "loss": 3.7308, + "step": 1730 + }, + { + "epoch": 0.07, + "learning_rate": 1.9472766699264426e-05, + "loss": 3.8861, + "step": 1740 + }, + { + "epoch": 0.07, + "learning_rate": 1.9468514817806882e-05, + "loss": 3.1529, + "step": 1750 + }, + { + "epoch": 0.07, + "learning_rate": 1.9464262936349338e-05, + "loss": 3.2984, + "step": 1760 + }, + { + "epoch": 0.07, + "learning_rate": 1.946001105489179e-05, + "loss": 3.5839, + "step": 1770 + }, + { + "epoch": 0.07, + "learning_rate": 1.9455759173434247e-05, + "loss": 3.6126, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.94515072919767e-05, + "loss": 3.2796, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9447255410519156e-05, + "loss": 3.3423, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.9443003529061612e-05, + "loss": 3.3523, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9438751647604065e-05, + "loss": 3.2899, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 1.943449976614652e-05, + "loss": 3.4348, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 1.9430247884688977e-05, + "loss": 3.2762, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 1.942599600323143e-05, + "loss": 3.2729, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 1.9421744121773886e-05, + "loss": 3.0372, + "step": 1860 + }, + { + "epoch": 0.08, + "learning_rate": 1.9417492240316342e-05, + "loss": 3.2345, + "step": 1870 + }, + { + "epoch": 0.08, + "learning_rate": 1.9413240358858795e-05, + "loss": 3.4763, + "step": 1880 + }, + { + "epoch": 0.08, + "learning_rate": 1.940898847740125e-05, + "loss": 3.0048, + "step": 1890 + }, + { + "epoch": 0.08, + "learning_rate": 1.9404736595943707e-05, + "loss": 3.0538, + "step": 1900 + }, + { + "epoch": 0.08, + "learning_rate": 1.940048471448616e-05, + "loss": 2.8958, + "step": 1910 + }, + { + "epoch": 0.08, + "learning_rate": 1.9396232833028616e-05, + "loss": 3.5233, + "step": 1920 + }, + { + "epoch": 0.08, + "learning_rate": 1.9391980951571072e-05, + "loss": 3.3356, + "step": 1930 + }, + { + "epoch": 0.08, + "learning_rate": 1.938772907011353e-05, + "loss": 3.295, + "step": 1940 + }, + { + "epoch": 0.08, + "learning_rate": 1.938347718865598e-05, + "loss": 3.837, + "step": 1950 + }, + { + "epoch": 0.08, + "learning_rate": 1.9379225307198437e-05, + "loss": 3.4193, + "step": 1960 + }, + { + "epoch": 0.08, + "learning_rate": 1.9374973425740893e-05, + "loss": 3.148, + "step": 1970 + }, + { + "epoch": 0.08, + "learning_rate": 1.9370721544283346e-05, + "loss": 2.9353, + "step": 1980 + }, + { + "epoch": 0.08, + "learning_rate": 1.9366469662825802e-05, + "loss": 3.6072, + "step": 1990 + }, + { + "epoch": 0.08, + "learning_rate": 1.936221778136826e-05, + "loss": 3.3311, + "step": 2000 + }, + { + "epoch": 0.08, + "eval_loss": 2.323817729949951, + "eval_runtime": 333.611, + "eval_samples_per_second": 7.503, + "eval_steps_per_second": 3.753, + "step": 2000 + }, + { + "epoch": 0.08, + "learning_rate": 1.935796589991071e-05, + "loss": 3.2877, + "step": 2010 + }, + { + "epoch": 0.08, + "learning_rate": 1.9353714018453167e-05, + "loss": 3.2228, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.9349462136995624e-05, + "loss": 3.3077, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9345210255538076e-05, + "loss": 3.5034, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 1.9340958374080532e-05, + "loss": 3.548, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 1.933670649262299e-05, + "loss": 3.4721, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 1.9332454611165445e-05, + "loss": 3.6428, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 1.9328202729707898e-05, + "loss": 3.2712, + "step": 2080 + }, + { + "epoch": 0.09, + "learning_rate": 1.9323950848250354e-05, + "loss": 3.0507, + "step": 2090 + }, + { + "epoch": 0.09, + "learning_rate": 1.9319698966792806e-05, + "loss": 2.9622, + "step": 2100 + }, + { + "epoch": 0.09, + "learning_rate": 1.9315447085335263e-05, + "loss": 3.0459, + "step": 2110 + }, + { + "epoch": 0.09, + "learning_rate": 1.931119520387772e-05, + "loss": 3.1112, + "step": 2120 + }, + { + "epoch": 0.09, + "learning_rate": 1.930694332242017e-05, + "loss": 2.7483, + "step": 2130 + }, + { + "epoch": 0.09, + "learning_rate": 1.9302691440962628e-05, + "loss": 2.857, + "step": 2140 + }, + { + "epoch": 0.09, + "learning_rate": 1.9298439559505084e-05, + "loss": 3.1156, + "step": 2150 + }, + { + "epoch": 0.09, + "learning_rate": 1.9294187678047537e-05, + "loss": 3.0197, + "step": 2160 + }, + { + "epoch": 0.09, + "learning_rate": 1.9289935796589993e-05, + "loss": 3.4195, + "step": 2170 + }, + { + "epoch": 0.09, + "learning_rate": 1.9285683915132446e-05, + "loss": 3.0336, + "step": 2180 + }, + { + "epoch": 0.09, + "learning_rate": 1.92814320336749e-05, + "loss": 3.3716, + "step": 2190 + }, + { + "epoch": 0.09, + "learning_rate": 1.9277180152217358e-05, + "loss": 3.0866, + "step": 2200 + }, + { + "epoch": 0.09, + "learning_rate": 1.927292827075981e-05, + "loss": 3.2992, + "step": 2210 + }, + { + "epoch": 0.09, + "learning_rate": 1.9268676389302267e-05, + "loss": 3.4868, + "step": 2220 + }, + { + "epoch": 0.09, + "learning_rate": 1.9264424507844723e-05, + "loss": 3.4946, + "step": 2230 + }, + { + "epoch": 0.09, + "learning_rate": 1.9260172626387176e-05, + "loss": 3.2567, + "step": 2240 + }, + { + "epoch": 0.09, + "learning_rate": 1.9255920744929632e-05, + "loss": 3.1213, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 1.9251668863472088e-05, + "loss": 3.3634, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 1.9247416982014544e-05, + "loss": 3.3944, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 1.9243165100556997e-05, + "loss": 3.3117, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 1.9238913219099453e-05, + "loss": 3.4853, + "step": 2290 + }, + { + "epoch": 0.1, + "learning_rate": 1.923466133764191e-05, + "loss": 2.935, + "step": 2300 + }, + { + "epoch": 0.1, + "learning_rate": 1.9230409456184362e-05, + "loss": 2.8551, + "step": 2310 + }, + { + "epoch": 0.1, + "learning_rate": 1.9226157574726818e-05, + "loss": 3.8506, + "step": 2320 + }, + { + "epoch": 0.1, + "learning_rate": 1.9221905693269274e-05, + "loss": 2.854, + "step": 2330 + }, + { + "epoch": 0.1, + "learning_rate": 1.9217653811811727e-05, + "loss": 3.1607, + "step": 2340 + }, + { + "epoch": 0.1, + "learning_rate": 1.9213401930354183e-05, + "loss": 3.4625, + "step": 2350 + }, + { + "epoch": 0.1, + "learning_rate": 1.920915004889664e-05, + "loss": 2.5954, + "step": 2360 + }, + { + "epoch": 0.1, + "learning_rate": 1.9204898167439095e-05, + "loss": 3.1057, + "step": 2370 + }, + { + "epoch": 0.1, + "learning_rate": 1.9200646285981548e-05, + "loss": 2.9618, + "step": 2380 + }, + { + "epoch": 0.1, + "learning_rate": 1.9196394404524004e-05, + "loss": 2.9407, + "step": 2390 + }, + { + "epoch": 0.1, + "learning_rate": 1.919214252306646e-05, + "loss": 2.7634, + "step": 2400 + }, + { + "epoch": 0.1, + "learning_rate": 1.9187890641608913e-05, + "loss": 2.6354, + "step": 2410 + }, + { + "epoch": 0.1, + "learning_rate": 1.918363876015137e-05, + "loss": 3.3772, + "step": 2420 + }, + { + "epoch": 0.1, + "learning_rate": 1.9179386878693826e-05, + "loss": 3.1235, + "step": 2430 + }, + { + "epoch": 0.1, + "learning_rate": 1.917513499723628e-05, + "loss": 2.9871, + "step": 2440 + }, + { + "epoch": 0.1, + "learning_rate": 1.9170883115778735e-05, + "loss": 3.3303, + "step": 2450 + }, + { + "epoch": 0.1, + "learning_rate": 1.916663123432119e-05, + "loss": 3.1218, + "step": 2460 + }, + { + "epoch": 0.1, + "learning_rate": 1.9162379352863643e-05, + "loss": 3.142, + "step": 2470 + }, + { + "epoch": 0.1, + "learning_rate": 1.91581274714061e-05, + "loss": 3.0239, + "step": 2480 + }, + { + "epoch": 0.1, + "learning_rate": 1.9153875589948552e-05, + "loss": 2.8272, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 1.914962370849101e-05, + "loss": 3.5088, + "step": 2500 + }, + { + "epoch": 0.11, + "eval_loss": 2.284396171569824, + "eval_runtime": 180.2968, + "eval_samples_per_second": 13.883, + "eval_steps_per_second": 6.944, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 1.9145371827033465e-05, + "loss": 3.3707, + "step": 2510 + }, + { + "epoch": 0.11, + "learning_rate": 1.9141119945575917e-05, + "loss": 3.2181, + "step": 2520 + }, + { + "epoch": 0.11, + "learning_rate": 1.9136868064118374e-05, + "loss": 3.0043, + "step": 2530 + }, + { + "epoch": 0.11, + "learning_rate": 1.913261618266083e-05, + "loss": 2.6138, + "step": 2540 + }, + { + "epoch": 0.11, + "learning_rate": 1.9128364301203282e-05, + "loss": 3.474, + "step": 2550 + }, + { + "epoch": 0.11, + "learning_rate": 1.912411241974574e-05, + "loss": 2.9508, + "step": 2560 + }, + { + "epoch": 0.11, + "learning_rate": 1.911986053828819e-05, + "loss": 3.0993, + "step": 2570 + }, + { + "epoch": 0.11, + "learning_rate": 1.9115608656830648e-05, + "loss": 2.964, + "step": 2580 + }, + { + "epoch": 0.11, + "learning_rate": 1.9111356775373104e-05, + "loss": 3.2431, + "step": 2590 + }, + { + "epoch": 0.11, + "learning_rate": 1.910710489391556e-05, + "loss": 3.5088, + "step": 2600 + }, + { + "epoch": 0.11, + "learning_rate": 1.9102853012458013e-05, + "loss": 2.9043, + "step": 2610 + }, + { + "epoch": 0.11, + "learning_rate": 1.909860113100047e-05, + "loss": 2.7854, + "step": 2620 + }, + { + "epoch": 0.11, + "learning_rate": 1.9094349249542925e-05, + "loss": 2.6917, + "step": 2630 + }, + { + "epoch": 0.11, + "learning_rate": 1.9090097368085378e-05, + "loss": 2.8437, + "step": 2640 + }, + { + "epoch": 0.11, + "learning_rate": 1.9085845486627834e-05, + "loss": 2.9471, + "step": 2650 + }, + { + "epoch": 0.11, + "learning_rate": 1.908159360517029e-05, + "loss": 3.3191, + "step": 2660 + }, + { + "epoch": 0.11, + "learning_rate": 1.9077341723712743e-05, + "loss": 3.2082, + "step": 2670 + }, + { + "epoch": 0.11, + "learning_rate": 1.90730898422552e-05, + "loss": 2.9716, + "step": 2680 + }, + { + "epoch": 0.11, + "learning_rate": 1.9068837960797655e-05, + "loss": 2.7102, + "step": 2690 + }, + { + "epoch": 0.11, + "learning_rate": 1.906458607934011e-05, + "loss": 3.2996, + "step": 2700 + }, + { + "epoch": 0.11, + "learning_rate": 1.9060334197882564e-05, + "loss": 3.1837, + "step": 2710 + }, + { + "epoch": 0.11, + "learning_rate": 1.905608231642502e-05, + "loss": 3.2329, + "step": 2720 + }, + { + "epoch": 0.11, + "learning_rate": 1.9051830434967476e-05, + "loss": 2.9437, + "step": 2730 + }, + { + "epoch": 0.12, + "learning_rate": 1.904757855350993e-05, + "loss": 2.7768, + "step": 2740 + }, + { + "epoch": 0.12, + "learning_rate": 1.9043326672052385e-05, + "loss": 3.008, + "step": 2750 + }, + { + "epoch": 0.12, + "learning_rate": 1.903907479059484e-05, + "loss": 3.2435, + "step": 2760 + }, + { + "epoch": 0.12, + "learning_rate": 1.9034822909137294e-05, + "loss": 3.1503, + "step": 2770 + }, + { + "epoch": 0.12, + "learning_rate": 1.903057102767975e-05, + "loss": 2.9688, + "step": 2780 + }, + { + "epoch": 0.12, + "learning_rate": 1.9026319146222206e-05, + "loss": 2.7462, + "step": 2790 + }, + { + "epoch": 0.12, + "learning_rate": 1.9022067264764663e-05, + "loss": 3.0445, + "step": 2800 + }, + { + "epoch": 0.12, + "learning_rate": 1.9017815383307115e-05, + "loss": 2.9424, + "step": 2810 + }, + { + "epoch": 0.12, + "learning_rate": 1.901356350184957e-05, + "loss": 3.3523, + "step": 2820 + }, + { + "epoch": 0.12, + "learning_rate": 1.9009311620392024e-05, + "loss": 2.9877, + "step": 2830 + }, + { + "epoch": 0.12, + "learning_rate": 1.900505973893448e-05, + "loss": 2.8386, + "step": 2840 + }, + { + "epoch": 0.12, + "learning_rate": 1.9000807857476937e-05, + "loss": 2.9439, + "step": 2850 + }, + { + "epoch": 0.12, + "learning_rate": 1.899655597601939e-05, + "loss": 3.1248, + "step": 2860 + }, + { + "epoch": 0.12, + "learning_rate": 1.8992304094561845e-05, + "loss": 2.6066, + "step": 2870 + }, + { + "epoch": 0.12, + "learning_rate": 1.8988052213104298e-05, + "loss": 2.7073, + "step": 2880 + }, + { + "epoch": 0.12, + "learning_rate": 1.8983800331646754e-05, + "loss": 2.8504, + "step": 2890 + }, + { + "epoch": 0.12, + "learning_rate": 1.897954845018921e-05, + "loss": 2.951, + "step": 2900 + }, + { + "epoch": 0.12, + "learning_rate": 1.8975296568731663e-05, + "loss": 3.1729, + "step": 2910 + }, + { + "epoch": 0.12, + "learning_rate": 1.897104468727412e-05, + "loss": 2.8276, + "step": 2920 + }, + { + "epoch": 0.12, + "learning_rate": 1.8966792805816576e-05, + "loss": 3.0242, + "step": 2930 + }, + { + "epoch": 0.12, + "learning_rate": 1.896254092435903e-05, + "loss": 2.8324, + "step": 2940 + }, + { + "epoch": 0.12, + "learning_rate": 1.8958289042901485e-05, + "loss": 3.336, + "step": 2950 + }, + { + "epoch": 0.12, + "learning_rate": 1.895403716144394e-05, + "loss": 2.609, + "step": 2960 + }, + { + "epoch": 0.12, + "learning_rate": 1.8949785279986393e-05, + "loss": 3.0654, + "step": 2970 + }, + { + "epoch": 0.13, + "learning_rate": 1.894553339852885e-05, + "loss": 2.9748, + "step": 2980 + }, + { + "epoch": 0.13, + "learning_rate": 1.8941281517071306e-05, + "loss": 3.0587, + "step": 2990 + }, + { + "epoch": 0.13, + "learning_rate": 1.8937029635613762e-05, + "loss": 2.8801, + "step": 3000 + }, + { + "epoch": 0.13, + "eval_loss": 2.220201015472412, + "eval_runtime": 157.7813, + "eval_samples_per_second": 15.864, + "eval_steps_per_second": 7.935, + "step": 3000 + }, + { + "epoch": 0.13, + "learning_rate": 1.8932777754156215e-05, + "loss": 3.1024, + "step": 3010 + }, + { + "epoch": 0.13, + "learning_rate": 1.892852587269867e-05, + "loss": 2.9528, + "step": 3020 + }, + { + "epoch": 0.13, + "learning_rate": 1.8924273991241127e-05, + "loss": 3.0732, + "step": 3030 + }, + { + "epoch": 0.13, + "learning_rate": 1.892002210978358e-05, + "loss": 2.8469, + "step": 3040 + }, + { + "epoch": 0.13, + "learning_rate": 1.8915770228326036e-05, + "loss": 2.6897, + "step": 3050 + }, + { + "epoch": 0.13, + "learning_rate": 1.8911518346868492e-05, + "loss": 3.291, + "step": 3060 + }, + { + "epoch": 0.13, + "learning_rate": 1.8907266465410945e-05, + "loss": 2.6824, + "step": 3070 + }, + { + "epoch": 0.13, + "learning_rate": 1.89030145839534e-05, + "loss": 3.202, + "step": 3080 + }, + { + "epoch": 0.13, + "learning_rate": 1.8898762702495857e-05, + "loss": 2.5288, + "step": 3090 + }, + { + "epoch": 0.13, + "learning_rate": 1.889451082103831e-05, + "loss": 3.4173, + "step": 3100 + }, + { + "epoch": 0.13, + "learning_rate": 1.8890258939580766e-05, + "loss": 3.4438, + "step": 3110 + }, + { + "epoch": 0.13, + "learning_rate": 1.8886007058123222e-05, + "loss": 2.9537, + "step": 3120 + }, + { + "epoch": 0.13, + "learning_rate": 1.888175517666568e-05, + "loss": 2.8619, + "step": 3130 + }, + { + "epoch": 0.13, + "learning_rate": 1.887750329520813e-05, + "loss": 3.2763, + "step": 3140 + }, + { + "epoch": 0.13, + "learning_rate": 1.8873251413750587e-05, + "loss": 3.1419, + "step": 3150 + }, + { + "epoch": 0.13, + "learning_rate": 1.8868999532293043e-05, + "loss": 3.1411, + "step": 3160 + }, + { + "epoch": 0.13, + "learning_rate": 1.8864747650835496e-05, + "loss": 2.9325, + "step": 3170 + }, + { + "epoch": 0.13, + "learning_rate": 1.8860495769377952e-05, + "loss": 3.1641, + "step": 3180 + }, + { + "epoch": 0.13, + "learning_rate": 1.885624388792041e-05, + "loss": 2.8487, + "step": 3190 + }, + { + "epoch": 0.13, + "learning_rate": 1.885199200646286e-05, + "loss": 2.8405, + "step": 3200 + }, + { + "epoch": 0.14, + "learning_rate": 1.8847740125005317e-05, + "loss": 2.6751, + "step": 3210 + }, + { + "epoch": 0.14, + "learning_rate": 1.884348824354777e-05, + "loss": 3.2453, + "step": 3220 + }, + { + "epoch": 0.14, + "learning_rate": 1.8839236362090226e-05, + "loss": 3.1109, + "step": 3230 + }, + { + "epoch": 0.14, + "learning_rate": 1.8834984480632682e-05, + "loss": 3.1686, + "step": 3240 + }, + { + "epoch": 0.14, + "learning_rate": 1.8830732599175135e-05, + "loss": 3.1336, + "step": 3250 + }, + { + "epoch": 0.14, + "learning_rate": 1.882648071771759e-05, + "loss": 3.0892, + "step": 3260 + }, + { + "epoch": 0.14, + "learning_rate": 1.8822228836260044e-05, + "loss": 2.9393, + "step": 3270 + }, + { + "epoch": 0.14, + "learning_rate": 1.88179769548025e-05, + "loss": 2.5524, + "step": 3280 + }, + { + "epoch": 0.14, + "learning_rate": 1.8813725073344956e-05, + "loss": 3.1743, + "step": 3290 + }, + { + "epoch": 0.14, + "learning_rate": 1.880947319188741e-05, + "loss": 2.9226, + "step": 3300 + }, + { + "epoch": 0.14, + "learning_rate": 1.8805221310429865e-05, + "loss": 2.862, + "step": 3310 + }, + { + "epoch": 0.14, + "learning_rate": 1.880096942897232e-05, + "loss": 2.6228, + "step": 3320 + }, + { + "epoch": 0.14, + "learning_rate": 1.8796717547514778e-05, + "loss": 3.1708, + "step": 3330 + }, + { + "epoch": 0.14, + "learning_rate": 1.879246566605723e-05, + "loss": 2.6939, + "step": 3340 + }, + { + "epoch": 0.14, + "learning_rate": 1.8788213784599687e-05, + "loss": 3.0765, + "step": 3350 + }, + { + "epoch": 0.14, + "learning_rate": 1.8783961903142143e-05, + "loss": 2.9789, + "step": 3360 + }, + { + "epoch": 0.14, + "learning_rate": 1.8779710021684596e-05, + "loss": 2.7487, + "step": 3370 + }, + { + "epoch": 0.14, + "learning_rate": 1.877545814022705e-05, + "loss": 2.4453, + "step": 3380 + }, + { + "epoch": 0.14, + "learning_rate": 1.8771206258769508e-05, + "loss": 3.0888, + "step": 3390 + }, + { + "epoch": 0.14, + "learning_rate": 1.876695437731196e-05, + "loss": 2.6603, + "step": 3400 + }, + { + "epoch": 0.14, + "learning_rate": 1.8762702495854417e-05, + "loss": 2.9912, + "step": 3410 + }, + { + "epoch": 0.14, + "learning_rate": 1.8758450614396873e-05, + "loss": 3.1868, + "step": 3420 + }, + { + "epoch": 0.14, + "learning_rate": 1.875419873293933e-05, + "loss": 3.0776, + "step": 3430 + }, + { + "epoch": 0.14, + "learning_rate": 1.8749946851481782e-05, + "loss": 2.9323, + "step": 3440 + }, + { + "epoch": 0.15, + "learning_rate": 1.8745694970024238e-05, + "loss": 3.0786, + "step": 3450 + }, + { + "epoch": 0.15, + "learning_rate": 1.8741443088566694e-05, + "loss": 2.7427, + "step": 3460 + }, + { + "epoch": 0.15, + "learning_rate": 1.8737191207109147e-05, + "loss": 2.6738, + "step": 3470 + }, + { + "epoch": 0.15, + "learning_rate": 1.8732939325651603e-05, + "loss": 2.7113, + "step": 3480 + }, + { + "epoch": 0.15, + "learning_rate": 1.872868744419406e-05, + "loss": 2.9205, + "step": 3490 + }, + { + "epoch": 0.15, + "learning_rate": 1.8724435562736512e-05, + "loss": 3.002, + "step": 3500 + }, + { + "epoch": 0.15, + "eval_loss": 2.175995349884033, + "eval_runtime": 157.4547, + "eval_samples_per_second": 15.897, + "eval_steps_per_second": 7.951, + "step": 3500 + }, + { + "epoch": 0.15, + "learning_rate": 1.8720183681278968e-05, + "loss": 2.3474, + "step": 3510 + }, + { + "epoch": 0.15, + "learning_rate": 1.8715931799821424e-05, + "loss": 2.8694, + "step": 3520 + }, + { + "epoch": 0.15, + "learning_rate": 1.8711679918363877e-05, + "loss": 2.6717, + "step": 3530 + }, + { + "epoch": 0.15, + "learning_rate": 1.8707428036906333e-05, + "loss": 2.9325, + "step": 3540 + }, + { + "epoch": 0.15, + "learning_rate": 1.870317615544879e-05, + "loss": 3.0843, + "step": 3550 + }, + { + "epoch": 0.15, + "learning_rate": 1.8698924273991242e-05, + "loss": 3.1572, + "step": 3560 + }, + { + "epoch": 0.15, + "learning_rate": 1.8694672392533698e-05, + "loss": 2.3833, + "step": 3570 + }, + { + "epoch": 0.15, + "learning_rate": 1.8690420511076154e-05, + "loss": 3.0576, + "step": 3580 + }, + { + "epoch": 0.15, + "learning_rate": 1.8686168629618607e-05, + "loss": 2.7256, + "step": 3590 + }, + { + "epoch": 0.15, + "learning_rate": 1.8681916748161063e-05, + "loss": 2.8938, + "step": 3600 + }, + { + "epoch": 0.15, + "learning_rate": 1.8677664866703516e-05, + "loss": 3.5121, + "step": 3610 + }, + { + "epoch": 0.15, + "learning_rate": 1.8673412985245972e-05, + "loss": 3.0321, + "step": 3620 + }, + { + "epoch": 0.15, + "learning_rate": 1.866916110378843e-05, + "loss": 2.7772, + "step": 3630 + }, + { + "epoch": 0.15, + "learning_rate": 1.866490922233088e-05, + "loss": 2.4553, + "step": 3640 + }, + { + "epoch": 0.15, + "learning_rate": 1.8660657340873337e-05, + "loss": 2.3899, + "step": 3650 + }, + { + "epoch": 0.15, + "learning_rate": 1.8656405459415793e-05, + "loss": 2.8928, + "step": 3660 + }, + { + "epoch": 0.15, + "learning_rate": 1.8652153577958246e-05, + "loss": 2.5838, + "step": 3670 + }, + { + "epoch": 0.15, + "learning_rate": 1.8647901696500702e-05, + "loss": 2.8532, + "step": 3680 + }, + { + "epoch": 0.16, + "learning_rate": 1.864364981504316e-05, + "loss": 2.6418, + "step": 3690 + }, + { + "epoch": 0.16, + "learning_rate": 1.863939793358561e-05, + "loss": 2.7067, + "step": 3700 + }, + { + "epoch": 0.16, + "learning_rate": 1.8635146052128067e-05, + "loss": 2.8495, + "step": 3710 + }, + { + "epoch": 0.16, + "learning_rate": 1.8630894170670524e-05, + "loss": 2.57, + "step": 3720 + }, + { + "epoch": 0.16, + "learning_rate": 1.8626642289212976e-05, + "loss": 2.8358, + "step": 3730 + }, + { + "epoch": 0.16, + "learning_rate": 1.8622390407755433e-05, + "loss": 2.5213, + "step": 3740 + }, + { + "epoch": 0.16, + "learning_rate": 1.861813852629789e-05, + "loss": 2.6923, + "step": 3750 + }, + { + "epoch": 0.16, + "learning_rate": 1.8613886644840345e-05, + "loss": 2.7872, + "step": 3760 + }, + { + "epoch": 0.16, + "learning_rate": 1.8609634763382798e-05, + "loss": 3.0025, + "step": 3770 + }, + { + "epoch": 0.16, + "learning_rate": 1.8605382881925254e-05, + "loss": 3.0608, + "step": 3780 + }, + { + "epoch": 0.16, + "learning_rate": 1.860113100046771e-05, + "loss": 2.9327, + "step": 3790 + }, + { + "epoch": 0.16, + "learning_rate": 1.8596879119010163e-05, + "loss": 2.6853, + "step": 3800 + }, + { + "epoch": 0.16, + "learning_rate": 1.859262723755262e-05, + "loss": 3.0209, + "step": 3810 + }, + { + "epoch": 0.16, + "learning_rate": 1.8588375356095075e-05, + "loss": 3.1764, + "step": 3820 + }, + { + "epoch": 0.16, + "learning_rate": 1.8584123474637528e-05, + "loss": 2.6595, + "step": 3830 + }, + { + "epoch": 0.16, + "learning_rate": 1.8579871593179984e-05, + "loss": 3.1195, + "step": 3840 + }, + { + "epoch": 0.16, + "learning_rate": 1.857561971172244e-05, + "loss": 2.4837, + "step": 3850 + }, + { + "epoch": 0.16, + "learning_rate": 1.8571367830264896e-05, + "loss": 2.9376, + "step": 3860 + }, + { + "epoch": 0.16, + "learning_rate": 1.856711594880735e-05, + "loss": 2.7025, + "step": 3870 + }, + { + "epoch": 0.16, + "learning_rate": 1.8562864067349805e-05, + "loss": 2.7312, + "step": 3880 + }, + { + "epoch": 0.16, + "learning_rate": 1.855861218589226e-05, + "loss": 2.7804, + "step": 3890 + }, + { + "epoch": 0.16, + "learning_rate": 1.8554360304434714e-05, + "loss": 2.6139, + "step": 3900 + }, + { + "epoch": 0.16, + "learning_rate": 1.855010842297717e-05, + "loss": 2.4627, + "step": 3910 + }, + { + "epoch": 0.16, + "learning_rate": 1.8545856541519623e-05, + "loss": 3.0103, + "step": 3920 + }, + { + "epoch": 0.17, + "learning_rate": 1.854160466006208e-05, + "loss": 3.1759, + "step": 3930 + }, + { + "epoch": 0.17, + "learning_rate": 1.8537352778604535e-05, + "loss": 2.3429, + "step": 3940 + }, + { + "epoch": 0.17, + "learning_rate": 1.8533100897146988e-05, + "loss": 2.5575, + "step": 3950 + }, + { + "epoch": 0.17, + "learning_rate": 1.8528849015689444e-05, + "loss": 2.6588, + "step": 3960 + }, + { + "epoch": 0.17, + "learning_rate": 1.85245971342319e-05, + "loss": 2.3116, + "step": 3970 + }, + { + "epoch": 0.17, + "learning_rate": 1.8520345252774353e-05, + "loss": 2.6692, + "step": 3980 + }, + { + "epoch": 0.17, + "learning_rate": 1.851609337131681e-05, + "loss": 2.4895, + "step": 3990 + }, + { + "epoch": 0.17, + "learning_rate": 1.8511841489859262e-05, + "loss": 2.5881, + "step": 4000 + }, + { + "epoch": 0.17, + "eval_loss": 2.1984293460845947, + "eval_runtime": 157.7819, + "eval_samples_per_second": 15.864, + "eval_steps_per_second": 7.935, + "step": 4000 + }, + { + "epoch": 0.17, + "learning_rate": 1.8507589608401718e-05, + "loss": 3.1104, + "step": 4010 + }, + { + "epoch": 0.17, + "learning_rate": 1.8503337726944174e-05, + "loss": 2.9546, + "step": 4020 + }, + { + "epoch": 0.17, + "learning_rate": 1.8499085845486627e-05, + "loss": 2.5194, + "step": 4030 + }, + { + "epoch": 0.17, + "learning_rate": 1.8494833964029083e-05, + "loss": 2.501, + "step": 4040 + }, + { + "epoch": 0.17, + "learning_rate": 1.849058208257154e-05, + "loss": 2.9073, + "step": 4050 + }, + { + "epoch": 0.17, + "learning_rate": 1.8486330201113992e-05, + "loss": 2.5472, + "step": 4060 + }, + { + "epoch": 0.17, + "learning_rate": 1.8482078319656448e-05, + "loss": 3.0469, + "step": 4070 + }, + { + "epoch": 0.17, + "learning_rate": 1.8477826438198904e-05, + "loss": 2.54, + "step": 4080 + }, + { + "epoch": 0.17, + "learning_rate": 1.847357455674136e-05, + "loss": 2.5786, + "step": 4090 + }, + { + "epoch": 0.17, + "learning_rate": 1.8469322675283813e-05, + "loss": 3.0802, + "step": 4100 + }, + { + "epoch": 0.17, + "learning_rate": 1.846507079382627e-05, + "loss": 2.5621, + "step": 4110 + }, + { + "epoch": 0.17, + "learning_rate": 1.8460818912368726e-05, + "loss": 2.7328, + "step": 4120 + }, + { + "epoch": 0.17, + "learning_rate": 1.845656703091118e-05, + "loss": 3.0167, + "step": 4130 + }, + { + "epoch": 0.17, + "learning_rate": 1.8452315149453635e-05, + "loss": 2.6309, + "step": 4140 + }, + { + "epoch": 0.17, + "learning_rate": 1.844806326799609e-05, + "loss": 2.6624, + "step": 4150 + }, + { + "epoch": 0.18, + "learning_rate": 1.8443811386538543e-05, + "loss": 2.439, + "step": 4160 + }, + { + "epoch": 0.18, + "learning_rate": 1.8439559505081e-05, + "loss": 3.1749, + "step": 4170 + }, + { + "epoch": 0.18, + "learning_rate": 1.8435307623623456e-05, + "loss": 3.0303, + "step": 4180 + }, + { + "epoch": 0.18, + "learning_rate": 1.8431055742165912e-05, + "loss": 2.8716, + "step": 4190 + }, + { + "epoch": 0.18, + "learning_rate": 1.8426803860708365e-05, + "loss": 2.6538, + "step": 4200 + }, + { + "epoch": 0.18, + "learning_rate": 1.842255197925082e-05, + "loss": 2.8078, + "step": 4210 + }, + { + "epoch": 0.18, + "learning_rate": 1.8418300097793277e-05, + "loss": 2.5088, + "step": 4220 + }, + { + "epoch": 0.18, + "learning_rate": 1.841404821633573e-05, + "loss": 2.9373, + "step": 4230 + }, + { + "epoch": 0.18, + "learning_rate": 1.8409796334878186e-05, + "loss": 3.0147, + "step": 4240 + }, + { + "epoch": 0.18, + "learning_rate": 1.8405544453420642e-05, + "loss": 3.4054, + "step": 4250 + }, + { + "epoch": 0.18, + "learning_rate": 1.8401292571963095e-05, + "loss": 3.2158, + "step": 4260 + }, + { + "epoch": 0.18, + "learning_rate": 1.839704069050555e-05, + "loss": 2.3821, + "step": 4270 + }, + { + "epoch": 0.18, + "learning_rate": 1.8392788809048007e-05, + "loss": 2.7904, + "step": 4280 + }, + { + "epoch": 0.18, + "learning_rate": 1.838853692759046e-05, + "loss": 2.9567, + "step": 4290 + }, + { + "epoch": 0.18, + "learning_rate": 1.8384285046132916e-05, + "loss": 2.2942, + "step": 4300 + }, + { + "epoch": 0.18, + "learning_rate": 1.838003316467537e-05, + "loss": 2.965, + "step": 4310 + }, + { + "epoch": 0.18, + "learning_rate": 1.8375781283217825e-05, + "loss": 2.6916, + "step": 4320 + }, + { + "epoch": 0.18, + "learning_rate": 1.837152940176028e-05, + "loss": 2.5174, + "step": 4330 + }, + { + "epoch": 0.18, + "learning_rate": 1.8367277520302734e-05, + "loss": 2.5812, + "step": 4340 + }, + { + "epoch": 0.18, + "learning_rate": 1.836302563884519e-05, + "loss": 2.8423, + "step": 4350 + }, + { + "epoch": 0.18, + "learning_rate": 1.8358773757387646e-05, + "loss": 2.5655, + "step": 4360 + }, + { + "epoch": 0.18, + "learning_rate": 1.83545218759301e-05, + "loss": 2.3715, + "step": 4370 + }, + { + "epoch": 0.18, + "learning_rate": 1.8350269994472555e-05, + "loss": 2.8404, + "step": 4380 + }, + { + "epoch": 0.18, + "learning_rate": 1.834601811301501e-05, + "loss": 2.9515, + "step": 4390 + }, + { + "epoch": 0.19, + "learning_rate": 1.8341766231557464e-05, + "loss": 2.5119, + "step": 4400 + }, + { + "epoch": 0.19, + "learning_rate": 1.833751435009992e-05, + "loss": 2.5983, + "step": 4410 + }, + { + "epoch": 0.19, + "learning_rate": 1.8333262468642376e-05, + "loss": 2.7768, + "step": 4420 + }, + { + "epoch": 0.19, + "learning_rate": 1.832901058718483e-05, + "loss": 2.9894, + "step": 4430 + }, + { + "epoch": 0.19, + "learning_rate": 1.8324758705727285e-05, + "loss": 2.5563, + "step": 4440 + }, + { + "epoch": 0.19, + "learning_rate": 1.832050682426974e-05, + "loss": 2.8879, + "step": 4450 + }, + { + "epoch": 0.19, + "learning_rate": 1.8316254942812194e-05, + "loss": 2.6981, + "step": 4460 + }, + { + "epoch": 0.19, + "learning_rate": 1.831200306135465e-05, + "loss": 3.0622, + "step": 4470 + }, + { + "epoch": 0.19, + "learning_rate": 1.8307751179897106e-05, + "loss": 2.883, + "step": 4480 + }, + { + "epoch": 0.19, + "learning_rate": 1.8303499298439563e-05, + "loss": 2.9416, + "step": 4490 + }, + { + "epoch": 0.19, + "learning_rate": 1.8299247416982015e-05, + "loss": 2.8995, + "step": 4500 + }, + { + "epoch": 0.19, + "eval_loss": 2.1155664920806885, + "eval_runtime": 157.6562, + "eval_samples_per_second": 15.876, + "eval_steps_per_second": 7.941, + "step": 4500 + }, + { + "epoch": 0.19, + "learning_rate": 1.829499553552447e-05, + "loss": 3.3596, + "step": 4510 + }, + { + "epoch": 0.19, + "learning_rate": 1.8290743654066928e-05, + "loss": 2.5666, + "step": 4520 + }, + { + "epoch": 0.19, + "learning_rate": 1.828649177260938e-05, + "loss": 2.7884, + "step": 4530 + }, + { + "epoch": 0.19, + "learning_rate": 1.8282239891151837e-05, + "loss": 2.5327, + "step": 4540 + }, + { + "epoch": 0.19, + "learning_rate": 1.8277988009694293e-05, + "loss": 2.4704, + "step": 4550 + }, + { + "epoch": 0.19, + "learning_rate": 1.8273736128236746e-05, + "loss": 3.302, + "step": 4560 + }, + { + "epoch": 0.19, + "learning_rate": 1.82694842467792e-05, + "loss": 2.9727, + "step": 4570 + }, + { + "epoch": 0.19, + "learning_rate": 1.8265232365321658e-05, + "loss": 2.4977, + "step": 4580 + }, + { + "epoch": 0.19, + "learning_rate": 1.826098048386411e-05, + "loss": 2.5478, + "step": 4590 + }, + { + "epoch": 0.19, + "learning_rate": 1.8256728602406567e-05, + "loss": 3.0426, + "step": 4600 + }, + { + "epoch": 0.19, + "learning_rate": 1.8252476720949023e-05, + "loss": 2.6851, + "step": 4610 + }, + { + "epoch": 0.19, + "learning_rate": 1.824822483949148e-05, + "loss": 2.7621, + "step": 4620 + }, + { + "epoch": 0.19, + "learning_rate": 1.8243972958033932e-05, + "loss": 2.8677, + "step": 4630 + }, + { + "epoch": 0.2, + "learning_rate": 1.8239721076576388e-05, + "loss": 2.7745, + "step": 4640 + }, + { + "epoch": 0.2, + "learning_rate": 1.823546919511884e-05, + "loss": 2.7964, + "step": 4650 + }, + { + "epoch": 0.2, + "learning_rate": 1.8231217313661297e-05, + "loss": 2.7733, + "step": 4660 + }, + { + "epoch": 0.2, + "learning_rate": 1.8226965432203753e-05, + "loss": 2.7332, + "step": 4670 + }, + { + "epoch": 0.2, + "learning_rate": 1.8222713550746206e-05, + "loss": 2.8351, + "step": 4680 + }, + { + "epoch": 0.2, + "learning_rate": 1.8218461669288662e-05, + "loss": 2.9184, + "step": 4690 + }, + { + "epoch": 0.2, + "learning_rate": 1.8214209787831115e-05, + "loss": 2.775, + "step": 4700 + }, + { + "epoch": 0.2, + "learning_rate": 1.820995790637357e-05, + "loss": 2.6668, + "step": 4710 + }, + { + "epoch": 0.2, + "learning_rate": 1.8205706024916027e-05, + "loss": 2.5477, + "step": 4720 + }, + { + "epoch": 0.2, + "learning_rate": 1.820145414345848e-05, + "loss": 2.5597, + "step": 4730 + }, + { + "epoch": 0.2, + "learning_rate": 1.8197202262000936e-05, + "loss": 2.6637, + "step": 4740 + }, + { + "epoch": 0.2, + "learning_rate": 1.8192950380543392e-05, + "loss": 2.5096, + "step": 4750 + }, + { + "epoch": 0.2, + "learning_rate": 1.8188698499085845e-05, + "loss": 2.9088, + "step": 4760 + }, + { + "epoch": 0.2, + "learning_rate": 1.81844466176283e-05, + "loss": 3.2694, + "step": 4770 + }, + { + "epoch": 0.2, + "learning_rate": 1.8180194736170757e-05, + "loss": 2.3419, + "step": 4780 + }, + { + "epoch": 0.2, + "learning_rate": 1.817594285471321e-05, + "loss": 2.3274, + "step": 4790 + }, + { + "epoch": 0.2, + "learning_rate": 1.8171690973255666e-05, + "loss": 2.5307, + "step": 4800 + }, + { + "epoch": 0.2, + "learning_rate": 1.8167439091798122e-05, + "loss": 2.7317, + "step": 4810 + }, + { + "epoch": 0.2, + "learning_rate": 1.816318721034058e-05, + "loss": 2.4159, + "step": 4820 + }, + { + "epoch": 0.2, + "learning_rate": 1.815893532888303e-05, + "loss": 3.0825, + "step": 4830 + }, + { + "epoch": 0.2, + "learning_rate": 1.8154683447425487e-05, + "loss": 2.4835, + "step": 4840 + }, + { + "epoch": 0.2, + "learning_rate": 1.8150431565967943e-05, + "loss": 2.3891, + "step": 4850 + }, + { + "epoch": 0.2, + "learning_rate": 1.8146179684510396e-05, + "loss": 2.445, + "step": 4860 + }, + { + "epoch": 0.2, + "learning_rate": 1.8141927803052852e-05, + "loss": 2.457, + "step": 4870 + }, + { + "epoch": 0.21, + "learning_rate": 1.813767592159531e-05, + "loss": 2.534, + "step": 4880 + }, + { + "epoch": 0.21, + "learning_rate": 1.813342404013776e-05, + "loss": 2.7876, + "step": 4890 + }, + { + "epoch": 0.21, + "learning_rate": 1.8129172158680217e-05, + "loss": 2.847, + "step": 4900 + }, + { + "epoch": 0.21, + "learning_rate": 1.8124920277222674e-05, + "loss": 2.7351, + "step": 4910 + }, + { + "epoch": 0.21, + "learning_rate": 1.812066839576513e-05, + "loss": 2.5956, + "step": 4920 + }, + { + "epoch": 0.21, + "learning_rate": 1.8116416514307583e-05, + "loss": 2.9122, + "step": 4930 + }, + { + "epoch": 0.21, + "learning_rate": 1.811216463285004e-05, + "loss": 2.2897, + "step": 4940 + }, + { + "epoch": 0.21, + "learning_rate": 1.8107912751392495e-05, + "loss": 2.7125, + "step": 4950 + }, + { + "epoch": 0.21, + "learning_rate": 1.8103660869934948e-05, + "loss": 2.6486, + "step": 4960 + }, + { + "epoch": 0.21, + "learning_rate": 1.8099408988477404e-05, + "loss": 2.5903, + "step": 4970 + }, + { + "epoch": 0.21, + "learning_rate": 1.809515710701986e-05, + "loss": 2.3824, + "step": 4980 + }, + { + "epoch": 0.21, + "learning_rate": 1.8090905225562313e-05, + "loss": 2.87, + "step": 4990 + }, + { + "epoch": 0.21, + "learning_rate": 1.808665334410477e-05, + "loss": 2.8354, + "step": 5000 + }, + { + "epoch": 0.21, + "eval_loss": 2.103600025177002, + "eval_runtime": 158.2313, + "eval_samples_per_second": 15.819, + "eval_steps_per_second": 7.912, + "step": 5000 + }, + { + "epoch": 0.21, + "learning_rate": 1.8082401462647225e-05, + "loss": 2.6096, + "step": 5010 + }, + { + "epoch": 0.21, + "learning_rate": 1.8078149581189678e-05, + "loss": 2.8438, + "step": 5020 + }, + { + "epoch": 0.21, + "learning_rate": 1.8073897699732134e-05, + "loss": 2.5016, + "step": 5030 + }, + { + "epoch": 0.21, + "learning_rate": 1.8069645818274587e-05, + "loss": 2.7681, + "step": 5040 + }, + { + "epoch": 0.21, + "learning_rate": 1.8065393936817043e-05, + "loss": 2.9139, + "step": 5050 + }, + { + "epoch": 0.21, + "learning_rate": 1.80611420553595e-05, + "loss": 2.7467, + "step": 5060 + }, + { + "epoch": 0.21, + "learning_rate": 1.8056890173901952e-05, + "loss": 2.5214, + "step": 5070 + }, + { + "epoch": 0.21, + "learning_rate": 1.8052638292444408e-05, + "loss": 2.5733, + "step": 5080 + }, + { + "epoch": 0.21, + "learning_rate": 1.804838641098686e-05, + "loss": 2.6025, + "step": 5090 + }, + { + "epoch": 0.21, + "learning_rate": 1.8044134529529317e-05, + "loss": 3.0755, + "step": 5100 + }, + { + "epoch": 0.21, + "learning_rate": 1.8039882648071773e-05, + "loss": 2.4955, + "step": 5110 + }, + { + "epoch": 0.22, + "learning_rate": 1.8035630766614226e-05, + "loss": 2.347, + "step": 5120 + }, + { + "epoch": 0.22, + "learning_rate": 1.8031378885156682e-05, + "loss": 2.984, + "step": 5130 + }, + { + "epoch": 0.22, + "learning_rate": 1.8027127003699138e-05, + "loss": 2.4085, + "step": 5140 + }, + { + "epoch": 0.22, + "learning_rate": 1.8022875122241594e-05, + "loss": 2.6538, + "step": 5150 + }, + { + "epoch": 0.22, + "learning_rate": 1.8018623240784047e-05, + "loss": 2.5329, + "step": 5160 + }, + { + "epoch": 0.22, + "learning_rate": 1.8014371359326503e-05, + "loss": 2.6974, + "step": 5170 + }, + { + "epoch": 0.22, + "learning_rate": 1.801011947786896e-05, + "loss": 2.6907, + "step": 5180 + }, + { + "epoch": 0.22, + "learning_rate": 1.8005867596411412e-05, + "loss": 3.0843, + "step": 5190 + }, + { + "epoch": 0.22, + "learning_rate": 1.8001615714953868e-05, + "loss": 2.7841, + "step": 5200 + }, + { + "epoch": 0.22, + "learning_rate": 1.7997363833496324e-05, + "loss": 2.7677, + "step": 5210 + }, + { + "epoch": 0.22, + "learning_rate": 1.7993111952038777e-05, + "loss": 2.4248, + "step": 5220 + }, + { + "epoch": 0.22, + "learning_rate": 1.7988860070581233e-05, + "loss": 2.6094, + "step": 5230 + }, + { + "epoch": 0.22, + "learning_rate": 1.798460818912369e-05, + "loss": 2.8525, + "step": 5240 + }, + { + "epoch": 0.22, + "learning_rate": 1.7980356307666146e-05, + "loss": 3.099, + "step": 5250 + }, + { + "epoch": 0.22, + "learning_rate": 1.7976104426208598e-05, + "loss": 2.2656, + "step": 5260 + }, + { + "epoch": 0.22, + "learning_rate": 1.7971852544751054e-05, + "loss": 2.7402, + "step": 5270 + }, + { + "epoch": 0.22, + "learning_rate": 1.796760066329351e-05, + "loss": 2.9918, + "step": 5280 + }, + { + "epoch": 0.22, + "learning_rate": 1.7963348781835963e-05, + "loss": 2.81, + "step": 5290 + }, + { + "epoch": 0.22, + "learning_rate": 1.795909690037842e-05, + "loss": 2.5187, + "step": 5300 + }, + { + "epoch": 0.22, + "learning_rate": 1.7954845018920876e-05, + "loss": 2.6014, + "step": 5310 + }, + { + "epoch": 0.22, + "learning_rate": 1.795059313746333e-05, + "loss": 3.0447, + "step": 5320 + }, + { + "epoch": 0.22, + "learning_rate": 1.7946341256005785e-05, + "loss": 3.066, + "step": 5330 + }, + { + "epoch": 0.22, + "learning_rate": 1.794208937454824e-05, + "loss": 2.7345, + "step": 5340 + }, + { + "epoch": 0.23, + "learning_rate": 1.7937837493090693e-05, + "loss": 2.7044, + "step": 5350 + }, + { + "epoch": 0.23, + "learning_rate": 1.793358561163315e-05, + "loss": 2.748, + "step": 5360 + }, + { + "epoch": 0.23, + "learning_rate": 1.7929333730175606e-05, + "loss": 2.5782, + "step": 5370 + }, + { + "epoch": 0.23, + "learning_rate": 1.792508184871806e-05, + "loss": 2.9447, + "step": 5380 + }, + { + "epoch": 0.23, + "learning_rate": 1.7920829967260515e-05, + "loss": 2.8424, + "step": 5390 + }, + { + "epoch": 0.23, + "learning_rate": 1.791657808580297e-05, + "loss": 2.8229, + "step": 5400 + }, + { + "epoch": 0.23, + "learning_rate": 1.7912326204345424e-05, + "loss": 2.6956, + "step": 5410 + }, + { + "epoch": 0.23, + "learning_rate": 1.790807432288788e-05, + "loss": 2.7828, + "step": 5420 + }, + { + "epoch": 0.23, + "learning_rate": 1.7903822441430333e-05, + "loss": 2.9823, + "step": 5430 + }, + { + "epoch": 0.23, + "learning_rate": 1.789957055997279e-05, + "loss": 3.0025, + "step": 5440 + }, + { + "epoch": 0.23, + "learning_rate": 1.7895318678515245e-05, + "loss": 2.6832, + "step": 5450 + }, + { + "epoch": 0.23, + "learning_rate": 1.7891066797057698e-05, + "loss": 2.6522, + "step": 5460 + }, + { + "epoch": 0.23, + "learning_rate": 1.7886814915600154e-05, + "loss": 2.4215, + "step": 5470 + }, + { + "epoch": 0.23, + "learning_rate": 1.788256303414261e-05, + "loss": 2.8462, + "step": 5480 + }, + { + "epoch": 0.23, + "learning_rate": 1.7878311152685063e-05, + "loss": 2.2992, + "step": 5490 + }, + { + "epoch": 0.23, + "learning_rate": 1.787405927122752e-05, + "loss": 2.7561, + "step": 5500 + }, + { + "epoch": 0.23, + "eval_loss": 2.066274404525757, + "eval_runtime": 175.5176, + "eval_samples_per_second": 14.261, + "eval_steps_per_second": 7.133, + "step": 5500 + }, + { + "epoch": 0.23, + "learning_rate": 1.7869807389769975e-05, + "loss": 2.6396, + "step": 5510 + }, + { + "epoch": 0.23, + "learning_rate": 1.7865555508312428e-05, + "loss": 2.6427, + "step": 5520 + }, + { + "epoch": 0.23, + "learning_rate": 1.7861303626854884e-05, + "loss": 3.0599, + "step": 5530 + }, + { + "epoch": 0.23, + "learning_rate": 1.785705174539734e-05, + "loss": 2.7358, + "step": 5540 + }, + { + "epoch": 0.23, + "learning_rate": 1.7852799863939796e-05, + "loss": 2.8105, + "step": 5550 + }, + { + "epoch": 0.23, + "learning_rate": 1.784854798248225e-05, + "loss": 2.6185, + "step": 5560 + }, + { + "epoch": 0.23, + "learning_rate": 1.7844296101024705e-05, + "loss": 2.4536, + "step": 5570 + }, + { + "epoch": 0.23, + "learning_rate": 1.784004421956716e-05, + "loss": 2.9464, + "step": 5580 + }, + { + "epoch": 0.24, + "learning_rate": 1.7835792338109614e-05, + "loss": 2.4645, + "step": 5590 + }, + { + "epoch": 0.24, + "learning_rate": 1.783154045665207e-05, + "loss": 2.913, + "step": 5600 + }, + { + "epoch": 0.24, + "learning_rate": 1.7827288575194526e-05, + "loss": 2.6963, + "step": 5610 + }, + { + "epoch": 0.24, + "learning_rate": 1.782303669373698e-05, + "loss": 2.5597, + "step": 5620 + }, + { + "epoch": 0.24, + "learning_rate": 1.7818784812279435e-05, + "loss": 2.9756, + "step": 5630 + }, + { + "epoch": 0.24, + "learning_rate": 1.781453293082189e-05, + "loss": 2.9978, + "step": 5640 + }, + { + "epoch": 0.24, + "learning_rate": 1.7810281049364344e-05, + "loss": 2.6279, + "step": 5650 + }, + { + "epoch": 0.24, + "learning_rate": 1.78060291679068e-05, + "loss": 2.6256, + "step": 5660 + }, + { + "epoch": 0.24, + "learning_rate": 1.7801777286449256e-05, + "loss": 2.6303, + "step": 5670 + }, + { + "epoch": 0.24, + "learning_rate": 1.7797525404991713e-05, + "loss": 2.6303, + "step": 5680 + }, + { + "epoch": 0.24, + "learning_rate": 1.7793273523534165e-05, + "loss": 3.2514, + "step": 5690 + }, + { + "epoch": 0.24, + "learning_rate": 1.778902164207662e-05, + "loss": 2.5286, + "step": 5700 + }, + { + "epoch": 0.24, + "learning_rate": 1.7784769760619078e-05, + "loss": 2.751, + "step": 5710 + }, + { + "epoch": 0.24, + "learning_rate": 1.778051787916153e-05, + "loss": 2.4097, + "step": 5720 + }, + { + "epoch": 0.24, + "learning_rate": 1.7776265997703987e-05, + "loss": 2.733, + "step": 5730 + }, + { + "epoch": 0.24, + "learning_rate": 1.777201411624644e-05, + "loss": 2.4291, + "step": 5740 + }, + { + "epoch": 0.24, + "learning_rate": 1.7767762234788896e-05, + "loss": 2.1838, + "step": 5750 + }, + { + "epoch": 0.24, + "learning_rate": 1.776351035333135e-05, + "loss": 2.5028, + "step": 5760 + }, + { + "epoch": 0.24, + "learning_rate": 1.7759258471873804e-05, + "loss": 2.741, + "step": 5770 + }, + { + "epoch": 0.24, + "learning_rate": 1.775500659041626e-05, + "loss": 2.5766, + "step": 5780 + }, + { + "epoch": 0.24, + "learning_rate": 1.7750754708958717e-05, + "loss": 3.1094, + "step": 5790 + }, + { + "epoch": 0.24, + "learning_rate": 1.774650282750117e-05, + "loss": 2.4569, + "step": 5800 + }, + { + "epoch": 0.24, + "learning_rate": 1.7742250946043626e-05, + "loss": 2.9686, + "step": 5810 + }, + { + "epoch": 0.24, + "learning_rate": 1.773799906458608e-05, + "loss": 2.9987, + "step": 5820 + }, + { + "epoch": 0.25, + "learning_rate": 1.7733747183128535e-05, + "loss": 2.411, + "step": 5830 + }, + { + "epoch": 0.25, + "learning_rate": 1.772949530167099e-05, + "loss": 2.8807, + "step": 5840 + }, + { + "epoch": 0.25, + "learning_rate": 1.7725243420213444e-05, + "loss": 2.774, + "step": 5850 + }, + { + "epoch": 0.25, + "learning_rate": 1.77209915387559e-05, + "loss": 2.6391, + "step": 5860 + }, + { + "epoch": 0.25, + "learning_rate": 1.7716739657298356e-05, + "loss": 2.375, + "step": 5870 + }, + { + "epoch": 0.25, + "learning_rate": 1.7712487775840812e-05, + "loss": 2.4736, + "step": 5880 + }, + { + "epoch": 0.25, + "learning_rate": 1.7708235894383265e-05, + "loss": 2.9305, + "step": 5890 + }, + { + "epoch": 0.25, + "learning_rate": 1.770398401292572e-05, + "loss": 2.6085, + "step": 5900 + }, + { + "epoch": 0.25, + "learning_rate": 1.7699732131468177e-05, + "loss": 2.6392, + "step": 5910 + }, + { + "epoch": 0.25, + "learning_rate": 1.769548025001063e-05, + "loss": 2.0514, + "step": 5920 + }, + { + "epoch": 0.25, + "learning_rate": 1.7691228368553086e-05, + "loss": 2.3497, + "step": 5930 + }, + { + "epoch": 0.25, + "learning_rate": 1.7686976487095542e-05, + "loss": 2.774, + "step": 5940 + }, + { + "epoch": 0.25, + "learning_rate": 1.7682724605637995e-05, + "loss": 3.2576, + "step": 5950 + }, + { + "epoch": 0.25, + "learning_rate": 1.767847272418045e-05, + "loss": 2.362, + "step": 5960 + }, + { + "epoch": 0.25, + "learning_rate": 1.7674220842722907e-05, + "loss": 2.0393, + "step": 5970 + }, + { + "epoch": 0.25, + "learning_rate": 1.7669968961265363e-05, + "loss": 2.8308, + "step": 5980 + }, + { + "epoch": 0.25, + "learning_rate": 1.7665717079807816e-05, + "loss": 2.5397, + "step": 5990 + }, + { + "epoch": 0.25, + "learning_rate": 1.7661465198350272e-05, + "loss": 2.685, + "step": 6000 + }, + { + "epoch": 0.25, + "eval_loss": 2.057528257369995, + "eval_runtime": 175.9189, + "eval_samples_per_second": 14.228, + "eval_steps_per_second": 7.117, + "step": 6000 + }, + { + "epoch": 0.25, + "learning_rate": 1.765721331689273e-05, + "loss": 2.747, + "step": 6010 + }, + { + "epoch": 0.25, + "learning_rate": 1.765296143543518e-05, + "loss": 2.8322, + "step": 6020 + }, + { + "epoch": 0.25, + "learning_rate": 1.7648709553977637e-05, + "loss": 2.3471, + "step": 6030 + }, + { + "epoch": 0.25, + "learning_rate": 1.7644457672520093e-05, + "loss": 2.8333, + "step": 6040 + }, + { + "epoch": 0.25, + "learning_rate": 1.7640205791062546e-05, + "loss": 3.0349, + "step": 6050 + }, + { + "epoch": 0.25, + "learning_rate": 1.7635953909605002e-05, + "loss": 2.3392, + "step": 6060 + }, + { + "epoch": 0.26, + "learning_rate": 1.763170202814746e-05, + "loss": 3.0696, + "step": 6070 + }, + { + "epoch": 0.26, + "learning_rate": 1.762745014668991e-05, + "loss": 2.8775, + "step": 6080 + }, + { + "epoch": 0.26, + "learning_rate": 1.7623198265232367e-05, + "loss": 3.0682, + "step": 6090 + }, + { + "epoch": 0.26, + "learning_rate": 1.7618946383774824e-05, + "loss": 2.7299, + "step": 6100 + }, + { + "epoch": 0.26, + "learning_rate": 1.7614694502317276e-05, + "loss": 2.4431, + "step": 6110 + }, + { + "epoch": 0.26, + "learning_rate": 1.7610442620859733e-05, + "loss": 2.4789, + "step": 6120 + }, + { + "epoch": 0.26, + "learning_rate": 1.7606190739402185e-05, + "loss": 3.0541, + "step": 6130 + }, + { + "epoch": 0.26, + "learning_rate": 1.760193885794464e-05, + "loss": 2.7687, + "step": 6140 + }, + { + "epoch": 0.26, + "learning_rate": 1.7597686976487098e-05, + "loss": 2.5623, + "step": 6150 + }, + { + "epoch": 0.26, + "learning_rate": 1.759343509502955e-05, + "loss": 2.5698, + "step": 6160 + }, + { + "epoch": 0.26, + "learning_rate": 1.7589183213572007e-05, + "loss": 3.0957, + "step": 6170 + }, + { + "epoch": 0.26, + "learning_rate": 1.7584931332114463e-05, + "loss": 3.0593, + "step": 6180 + }, + { + "epoch": 0.26, + "learning_rate": 1.7580679450656915e-05, + "loss": 2.4348, + "step": 6190 + }, + { + "epoch": 0.26, + "learning_rate": 1.757642756919937e-05, + "loss": 2.7781, + "step": 6200 + }, + { + "epoch": 0.26, + "learning_rate": 1.7572175687741828e-05, + "loss": 2.6811, + "step": 6210 + }, + { + "epoch": 0.26, + "learning_rate": 1.756792380628428e-05, + "loss": 2.0182, + "step": 6220 + }, + { + "epoch": 0.26, + "learning_rate": 1.7563671924826737e-05, + "loss": 3.3047, + "step": 6230 + }, + { + "epoch": 0.26, + "learning_rate": 1.7559420043369193e-05, + "loss": 3.0397, + "step": 6240 + }, + { + "epoch": 0.26, + "learning_rate": 1.7555168161911646e-05, + "loss": 2.6576, + "step": 6250 + }, + { + "epoch": 0.26, + "learning_rate": 1.7550916280454102e-05, + "loss": 2.9801, + "step": 6260 + }, + { + "epoch": 0.26, + "learning_rate": 1.7546664398996558e-05, + "loss": 2.2294, + "step": 6270 + }, + { + "epoch": 0.26, + "learning_rate": 1.754241251753901e-05, + "loss": 2.9889, + "step": 6280 + }, + { + "epoch": 0.26, + "learning_rate": 1.7538160636081467e-05, + "loss": 2.264, + "step": 6290 + }, + { + "epoch": 0.27, + "learning_rate": 1.7533908754623923e-05, + "loss": 2.8822, + "step": 6300 + }, + { + "epoch": 0.27, + "learning_rate": 1.752965687316638e-05, + "loss": 3.1016, + "step": 6310 + }, + { + "epoch": 0.27, + "learning_rate": 1.7525404991708832e-05, + "loss": 2.3495, + "step": 6320 + }, + { + "epoch": 0.27, + "learning_rate": 1.7521153110251288e-05, + "loss": 2.54, + "step": 6330 + }, + { + "epoch": 0.27, + "learning_rate": 1.7516901228793744e-05, + "loss": 2.4479, + "step": 6340 + }, + { + "epoch": 0.27, + "learning_rate": 1.7512649347336197e-05, + "loss": 2.154, + "step": 6350 + }, + { + "epoch": 0.27, + "learning_rate": 1.7508397465878653e-05, + "loss": 2.6394, + "step": 6360 + }, + { + "epoch": 0.27, + "learning_rate": 1.750414558442111e-05, + "loss": 3.0602, + "step": 6370 + }, + { + "epoch": 0.27, + "learning_rate": 1.7499893702963562e-05, + "loss": 2.1946, + "step": 6380 + }, + { + "epoch": 0.27, + "learning_rate": 1.7495641821506018e-05, + "loss": 2.7575, + "step": 6390 + }, + { + "epoch": 0.27, + "learning_rate": 1.7491389940048474e-05, + "loss": 2.4044, + "step": 6400 + }, + { + "epoch": 0.27, + "learning_rate": 1.748713805859093e-05, + "loss": 2.5772, + "step": 6410 + }, + { + "epoch": 0.27, + "learning_rate": 1.7482886177133383e-05, + "loss": 2.7697, + "step": 6420 + }, + { + "epoch": 0.27, + "learning_rate": 1.747863429567584e-05, + "loss": 2.5757, + "step": 6430 + }, + { + "epoch": 0.27, + "learning_rate": 1.7474382414218296e-05, + "loss": 2.6727, + "step": 6440 + }, + { + "epoch": 0.27, + "learning_rate": 1.7470130532760748e-05, + "loss": 2.8515, + "step": 6450 + }, + { + "epoch": 0.27, + "learning_rate": 1.7465878651303204e-05, + "loss": 2.0677, + "step": 6460 + }, + { + "epoch": 0.27, + "learning_rate": 1.7461626769845657e-05, + "loss": 2.3963, + "step": 6470 + }, + { + "epoch": 0.27, + "learning_rate": 1.7457374888388113e-05, + "loss": 2.6843, + "step": 6480 + }, + { + "epoch": 0.27, + "learning_rate": 1.745312300693057e-05, + "loss": 2.6221, + "step": 6490 + }, + { + "epoch": 0.27, + "learning_rate": 1.7448871125473022e-05, + "loss": 2.4269, + "step": 6500 + }, + { + "epoch": 0.27, + "eval_loss": 2.064359664916992, + "eval_runtime": 174.2548, + "eval_samples_per_second": 14.364, + "eval_steps_per_second": 7.185, + "step": 6500 + }, + { + "epoch": 0.27, + "learning_rate": 1.744461924401548e-05, + "loss": 2.4325, + "step": 6510 + }, + { + "epoch": 0.27, + "learning_rate": 1.744036736255793e-05, + "loss": 2.9225, + "step": 6520 + }, + { + "epoch": 0.27, + "learning_rate": 1.7436115481100387e-05, + "loss": 2.3408, + "step": 6530 + }, + { + "epoch": 0.28, + "learning_rate": 1.7431863599642843e-05, + "loss": 2.4789, + "step": 6540 + }, + { + "epoch": 0.28, + "learning_rate": 1.7427611718185296e-05, + "loss": 2.8185, + "step": 6550 + }, + { + "epoch": 0.28, + "learning_rate": 1.7423359836727752e-05, + "loss": 2.8567, + "step": 6560 + }, + { + "epoch": 0.28, + "learning_rate": 1.741910795527021e-05, + "loss": 5.9013, + "step": 6570 + }, + { + "epoch": 0.28, + "learning_rate": 1.741485607381266e-05, + "loss": 2.8576, + "step": 6580 + }, + { + "epoch": 0.28, + "learning_rate": 1.7410604192355117e-05, + "loss": 2.1402, + "step": 6590 + }, + { + "epoch": 0.28, + "learning_rate": 1.7406352310897574e-05, + "loss": 2.3285, + "step": 6600 + }, + { + "epoch": 0.28, + "learning_rate": 1.7402100429440026e-05, + "loss": 2.6474, + "step": 6610 + }, + { + "epoch": 0.28, + "learning_rate": 1.7397848547982483e-05, + "loss": 2.8244, + "step": 6620 + }, + { + "epoch": 0.28, + "learning_rate": 1.739359666652494e-05, + "loss": 2.8179, + "step": 6630 + }, + { + "epoch": 0.28, + "learning_rate": 1.7389344785067395e-05, + "loss": 2.7042, + "step": 6640 + }, + { + "epoch": 0.28, + "learning_rate": 1.7385092903609848e-05, + "loss": 2.3928, + "step": 6650 + }, + { + "epoch": 0.28, + "learning_rate": 1.7380841022152304e-05, + "loss": 2.4957, + "step": 6660 + }, + { + "epoch": 0.28, + "learning_rate": 1.737658914069476e-05, + "loss": 2.7624, + "step": 6670 + }, + { + "epoch": 0.28, + "learning_rate": 1.7372337259237213e-05, + "loss": 2.8772, + "step": 6680 + }, + { + "epoch": 0.28, + "learning_rate": 1.736808537777967e-05, + "loss": 2.9064, + "step": 6690 + }, + { + "epoch": 0.28, + "learning_rate": 1.7363833496322125e-05, + "loss": 2.9803, + "step": 6700 + }, + { + "epoch": 0.28, + "learning_rate": 1.7359581614864578e-05, + "loss": 2.3177, + "step": 6710 + }, + { + "epoch": 0.28, + "learning_rate": 1.7355329733407034e-05, + "loss": 2.6294, + "step": 6720 + }, + { + "epoch": 0.28, + "learning_rate": 1.735107785194949e-05, + "loss": 2.4746, + "step": 6730 + }, + { + "epoch": 0.28, + "learning_rate": 1.7346825970491946e-05, + "loss": 1.9463, + "step": 6740 + }, + { + "epoch": 0.28, + "learning_rate": 1.73425740890344e-05, + "loss": 2.6038, + "step": 6750 + }, + { + "epoch": 0.28, + "learning_rate": 1.7338322207576855e-05, + "loss": 2.3591, + "step": 6760 + }, + { + "epoch": 0.28, + "learning_rate": 1.733407032611931e-05, + "loss": 2.5867, + "step": 6770 + }, + { + "epoch": 0.29, + "learning_rate": 1.7329818444661764e-05, + "loss": 2.3575, + "step": 6780 + }, + { + "epoch": 0.29, + "learning_rate": 1.732556656320422e-05, + "loss": 2.5036, + "step": 6790 + }, + { + "epoch": 0.29, + "learning_rate": 1.7321314681746676e-05, + "loss": 2.3801, + "step": 6800 + }, + { + "epoch": 0.29, + "learning_rate": 1.731706280028913e-05, + "loss": 2.2803, + "step": 6810 + }, + { + "epoch": 0.29, + "learning_rate": 1.7312810918831585e-05, + "loss": 2.5604, + "step": 6820 + }, + { + "epoch": 0.29, + "learning_rate": 1.730855903737404e-05, + "loss": 2.3178, + "step": 6830 + }, + { + "epoch": 0.29, + "learning_rate": 1.7304307155916494e-05, + "loss": 2.466, + "step": 6840 + }, + { + "epoch": 0.29, + "learning_rate": 1.730005527445895e-05, + "loss": 2.7137, + "step": 6850 + }, + { + "epoch": 0.29, + "learning_rate": 1.7295803393001403e-05, + "loss": 2.4662, + "step": 6860 + }, + { + "epoch": 0.29, + "learning_rate": 1.729155151154386e-05, + "loss": 2.5953, + "step": 6870 + }, + { + "epoch": 0.29, + "learning_rate": 1.7287299630086315e-05, + "loss": 2.8784, + "step": 6880 + }, + { + "epoch": 0.29, + "learning_rate": 1.7283047748628768e-05, + "loss": 2.4881, + "step": 6890 + }, + { + "epoch": 0.29, + "learning_rate": 1.7278795867171224e-05, + "loss": 2.57, + "step": 6900 + }, + { + "epoch": 0.29, + "learning_rate": 1.7274543985713677e-05, + "loss": 3.0467, + "step": 6910 + }, + { + "epoch": 0.29, + "learning_rate": 1.7270292104256133e-05, + "loss": 2.1957, + "step": 6920 + }, + { + "epoch": 0.29, + "learning_rate": 1.726604022279859e-05, + "loss": 2.5759, + "step": 6930 + }, + { + "epoch": 0.29, + "learning_rate": 1.7261788341341046e-05, + "loss": 2.3922, + "step": 6940 + }, + { + "epoch": 0.29, + "learning_rate": 1.7257536459883498e-05, + "loss": 2.7761, + "step": 6950 + }, + { + "epoch": 0.29, + "learning_rate": 1.7253284578425954e-05, + "loss": 2.3974, + "step": 6960 + }, + { + "epoch": 0.29, + "learning_rate": 1.724903269696841e-05, + "loss": 2.8827, + "step": 6970 + }, + { + "epoch": 0.29, + "learning_rate": 1.7244780815510863e-05, + "loss": 2.5402, + "step": 6980 + }, + { + "epoch": 0.29, + "learning_rate": 1.724052893405332e-05, + "loss": 2.4587, + "step": 6990 + }, + { + "epoch": 0.29, + "learning_rate": 1.7236277052595776e-05, + "loss": 2.3543, + "step": 7000 + }, + { + "epoch": 0.29, + "eval_loss": 2.0410194396972656, + "eval_runtime": 208.3814, + "eval_samples_per_second": 12.012, + "eval_steps_per_second": 6.008, + "step": 7000 + }, + { + "epoch": 0.29, + "learning_rate": 1.723202517113823e-05, + "loss": 2.6713, + "step": 7010 + }, + { + "epoch": 0.3, + "learning_rate": 1.7227773289680685e-05, + "loss": 2.7262, + "step": 7020 + }, + { + "epoch": 0.3, + "learning_rate": 1.722352140822314e-05, + "loss": 2.8134, + "step": 7030 + }, + { + "epoch": 0.3, + "learning_rate": 1.7219269526765597e-05, + "loss": 2.3668, + "step": 7040 + }, + { + "epoch": 0.3, + "learning_rate": 1.721501764530805e-05, + "loss": 2.6703, + "step": 7050 + }, + { + "epoch": 0.3, + "learning_rate": 1.7210765763850506e-05, + "loss": 2.7111, + "step": 7060 + }, + { + "epoch": 0.3, + "learning_rate": 1.7206513882392962e-05, + "loss": 2.4478, + "step": 7070 + }, + { + "epoch": 0.3, + "learning_rate": 1.7202262000935415e-05, + "loss": 2.1211, + "step": 7080 + }, + { + "epoch": 0.3, + "learning_rate": 1.719801011947787e-05, + "loss": 2.6789, + "step": 7090 + }, + { + "epoch": 0.3, + "learning_rate": 1.7193758238020327e-05, + "loss": 2.5602, + "step": 7100 + }, + { + "epoch": 0.3, + "learning_rate": 1.718950635656278e-05, + "loss": 3.0027, + "step": 7110 + }, + { + "epoch": 0.3, + "learning_rate": 1.7185254475105236e-05, + "loss": 2.3461, + "step": 7120 + }, + { + "epoch": 0.3, + "learning_rate": 1.7181002593647692e-05, + "loss": 2.7754, + "step": 7130 + }, + { + "epoch": 0.3, + "learning_rate": 1.7176750712190145e-05, + "loss": 2.5173, + "step": 7140 + }, + { + "epoch": 0.3, + "learning_rate": 1.71724988307326e-05, + "loss": 2.3433, + "step": 7150 + }, + { + "epoch": 0.3, + "learning_rate": 1.7168246949275057e-05, + "loss": 2.2877, + "step": 7160 + }, + { + "epoch": 0.3, + "learning_rate": 1.716399506781751e-05, + "loss": 2.5632, + "step": 7170 + }, + { + "epoch": 0.3, + "learning_rate": 1.7159743186359966e-05, + "loss": 2.6671, + "step": 7180 + }, + { + "epoch": 0.3, + "learning_rate": 1.7155491304902422e-05, + "loss": 2.5462, + "step": 7190 + }, + { + "epoch": 0.3, + "learning_rate": 1.7151239423444875e-05, + "loss": 2.7029, + "step": 7200 + }, + { + "epoch": 0.3, + "learning_rate": 1.714698754198733e-05, + "loss": 2.1722, + "step": 7210 + }, + { + "epoch": 0.3, + "learning_rate": 1.7142735660529787e-05, + "loss": 2.6146, + "step": 7220 + }, + { + "epoch": 0.3, + "learning_rate": 1.713848377907224e-05, + "loss": 2.695, + "step": 7230 + }, + { + "epoch": 0.3, + "learning_rate": 1.7134231897614696e-05, + "loss": 2.782, + "step": 7240 + }, + { + "epoch": 0.31, + "learning_rate": 1.712998001615715e-05, + "loss": 2.7765, + "step": 7250 + }, + { + "epoch": 0.31, + "learning_rate": 1.7125728134699605e-05, + "loss": 2.8255, + "step": 7260 + }, + { + "epoch": 0.31, + "learning_rate": 1.712147625324206e-05, + "loss": 2.662, + "step": 7270 + }, + { + "epoch": 0.31, + "learning_rate": 1.7117224371784514e-05, + "loss": 2.9787, + "step": 7280 + }, + { + "epoch": 0.31, + "learning_rate": 1.711297249032697e-05, + "loss": 2.7982, + "step": 7290 + }, + { + "epoch": 0.31, + "learning_rate": 1.7108720608869426e-05, + "loss": 2.363, + "step": 7300 + }, + { + "epoch": 0.31, + "learning_rate": 1.710446872741188e-05, + "loss": 2.5674, + "step": 7310 + }, + { + "epoch": 0.31, + "learning_rate": 1.7100216845954335e-05, + "loss": 2.5751, + "step": 7320 + }, + { + "epoch": 0.31, + "learning_rate": 1.709596496449679e-05, + "loss": 2.8134, + "step": 7330 + }, + { + "epoch": 0.31, + "learning_rate": 1.7091713083039244e-05, + "loss": 2.2201, + "step": 7340 + }, + { + "epoch": 0.31, + "learning_rate": 1.70874612015817e-05, + "loss": 2.2872, + "step": 7350 + }, + { + "epoch": 0.31, + "learning_rate": 1.7083209320124157e-05, + "loss": 2.5633, + "step": 7360 + }, + { + "epoch": 0.31, + "learning_rate": 1.7078957438666613e-05, + "loss": 2.7733, + "step": 7370 + }, + { + "epoch": 0.31, + "learning_rate": 1.7074705557209065e-05, + "loss": 2.8384, + "step": 7380 + }, + { + "epoch": 0.31, + "learning_rate": 1.707045367575152e-05, + "loss": 2.5711, + "step": 7390 + }, + { + "epoch": 0.31, + "learning_rate": 1.7066201794293978e-05, + "loss": 2.4719, + "step": 7400 + }, + { + "epoch": 0.31, + "learning_rate": 1.706194991283643e-05, + "loss": 2.2026, + "step": 7410 + }, + { + "epoch": 0.31, + "learning_rate": 1.7057698031378887e-05, + "loss": 2.845, + "step": 7420 + }, + { + "epoch": 0.31, + "learning_rate": 1.7053446149921343e-05, + "loss": 2.4645, + "step": 7430 + }, + { + "epoch": 0.31, + "learning_rate": 1.7049194268463796e-05, + "loss": 2.6133, + "step": 7440 + }, + { + "epoch": 0.31, + "learning_rate": 1.7044942387006252e-05, + "loss": 2.2499, + "step": 7450 + }, + { + "epoch": 0.31, + "learning_rate": 1.7040690505548708e-05, + "loss": 2.8703, + "step": 7460 + }, + { + "epoch": 0.31, + "learning_rate": 1.7036438624091164e-05, + "loss": 2.6793, + "step": 7470 + }, + { + "epoch": 0.31, + "learning_rate": 1.7032186742633617e-05, + "loss": 2.7365, + "step": 7480 + }, + { + "epoch": 0.32, + "learning_rate": 1.7027934861176073e-05, + "loss": 2.6857, + "step": 7490 + }, + { + "epoch": 0.32, + "learning_rate": 1.702368297971853e-05, + "loss": 2.5896, + "step": 7500 + }, + { + "epoch": 0.32, + "eval_loss": 2.008735179901123, + "eval_runtime": 320.0336, + "eval_samples_per_second": 7.821, + "eval_steps_per_second": 3.912, + "step": 7500 + }, + { + "epoch": 0.32, + "learning_rate": 1.7019431098260982e-05, + "loss": 2.4888, + "step": 7510 + }, + { + "epoch": 0.32, + "learning_rate": 1.7015179216803438e-05, + "loss": 2.4542, + "step": 7520 + }, + { + "epoch": 0.32, + "learning_rate": 1.7010927335345894e-05, + "loss": 2.6477, + "step": 7530 + }, + { + "epoch": 0.32, + "learning_rate": 1.7006675453888347e-05, + "loss": 2.9978, + "step": 7540 + }, + { + "epoch": 0.32, + "learning_rate": 1.7002423572430803e-05, + "loss": 2.6587, + "step": 7550 + }, + { + "epoch": 0.32, + "learning_rate": 1.6998171690973256e-05, + "loss": 2.8396, + "step": 7560 + }, + { + "epoch": 0.32, + "learning_rate": 1.6993919809515712e-05, + "loss": 2.4553, + "step": 7570 + }, + { + "epoch": 0.32, + "learning_rate": 1.6989667928058168e-05, + "loss": 2.534, + "step": 7580 + }, + { + "epoch": 0.32, + "learning_rate": 1.698541604660062e-05, + "loss": 2.2884, + "step": 7590 + }, + { + "epoch": 0.32, + "learning_rate": 1.6981164165143077e-05, + "loss": 2.4381, + "step": 7600 + }, + { + "epoch": 0.32, + "learning_rate": 1.6976912283685533e-05, + "loss": 2.7952, + "step": 7610 + }, + { + "epoch": 0.32, + "learning_rate": 1.6972660402227986e-05, + "loss": 2.6889, + "step": 7620 + }, + { + "epoch": 0.32, + "learning_rate": 1.6968408520770442e-05, + "loss": 2.1633, + "step": 7630 + }, + { + "epoch": 0.32, + "learning_rate": 1.6964156639312895e-05, + "loss": 2.5299, + "step": 7640 + }, + { + "epoch": 0.32, + "learning_rate": 1.695990475785535e-05, + "loss": 2.7168, + "step": 7650 + }, + { + "epoch": 0.32, + "learning_rate": 1.6955652876397807e-05, + "loss": 2.0173, + "step": 7660 + }, + { + "epoch": 0.32, + "learning_rate": 1.695140099494026e-05, + "loss": 2.6909, + "step": 7670 + }, + { + "epoch": 0.32, + "learning_rate": 1.6947149113482716e-05, + "loss": 2.5593, + "step": 7680 + }, + { + "epoch": 0.32, + "learning_rate": 1.6942897232025172e-05, + "loss": 2.5208, + "step": 7690 + }, + { + "epoch": 0.32, + "learning_rate": 1.693864535056763e-05, + "loss": 2.2565, + "step": 7700 + }, + { + "epoch": 0.32, + "learning_rate": 1.693439346911008e-05, + "loss": 2.5508, + "step": 7710 + }, + { + "epoch": 0.32, + "learning_rate": 1.6930141587652537e-05, + "loss": 2.5953, + "step": 7720 + }, + { + "epoch": 0.33, + "learning_rate": 1.6925889706194993e-05, + "loss": 2.8496, + "step": 7730 + }, + { + "epoch": 0.33, + "learning_rate": 1.6921637824737446e-05, + "loss": 2.5317, + "step": 7740 + }, + { + "epoch": 0.33, + "learning_rate": 1.6917385943279902e-05, + "loss": 2.7538, + "step": 7750 + }, + { + "epoch": 0.33, + "learning_rate": 1.691313406182236e-05, + "loss": 2.4385, + "step": 7760 + }, + { + "epoch": 0.33, + "learning_rate": 1.690888218036481e-05, + "loss": 2.5723, + "step": 7770 + }, + { + "epoch": 0.33, + "learning_rate": 1.6904630298907267e-05, + "loss": 2.6366, + "step": 7780 + }, + { + "epoch": 0.33, + "learning_rate": 1.6900378417449724e-05, + "loss": 2.937, + "step": 7790 + }, + { + "epoch": 0.33, + "learning_rate": 1.689612653599218e-05, + "loss": 2.8732, + "step": 7800 + }, + { + "epoch": 0.33, + "learning_rate": 1.6891874654534633e-05, + "loss": 2.6569, + "step": 7810 + }, + { + "epoch": 0.33, + "learning_rate": 1.688762277307709e-05, + "loss": 2.6205, + "step": 7820 + }, + { + "epoch": 0.33, + "learning_rate": 1.6883370891619545e-05, + "loss": 2.6153, + "step": 7830 + }, + { + "epoch": 0.33, + "learning_rate": 1.6879119010161998e-05, + "loss": 2.3836, + "step": 7840 + }, + { + "epoch": 0.33, + "learning_rate": 1.6874867128704454e-05, + "loss": 2.1343, + "step": 7850 + }, + { + "epoch": 0.33, + "learning_rate": 1.687061524724691e-05, + "loss": 2.1818, + "step": 7860 + }, + { + "epoch": 0.33, + "learning_rate": 1.6866363365789363e-05, + "loss": 2.4748, + "step": 7870 + }, + { + "epoch": 0.33, + "learning_rate": 1.686211148433182e-05, + "loss": 2.3274, + "step": 7880 + }, + { + "epoch": 0.33, + "learning_rate": 1.6857859602874275e-05, + "loss": 3.206, + "step": 7890 + }, + { + "epoch": 0.33, + "learning_rate": 1.6853607721416728e-05, + "loss": 2.2356, + "step": 7900 + }, + { + "epoch": 0.33, + "learning_rate": 1.6849355839959184e-05, + "loss": 2.7072, + "step": 7910 + }, + { + "epoch": 0.33, + "learning_rate": 1.684510395850164e-05, + "loss": 2.6011, + "step": 7920 + }, + { + "epoch": 0.33, + "learning_rate": 1.6840852077044093e-05, + "loss": 2.3916, + "step": 7930 + }, + { + "epoch": 0.33, + "learning_rate": 1.683660019558655e-05, + "loss": 2.1616, + "step": 7940 + }, + { + "epoch": 0.33, + "learning_rate": 1.6832348314129002e-05, + "loss": 2.4811, + "step": 7950 + }, + { + "epoch": 0.33, + "learning_rate": 1.6828096432671458e-05, + "loss": 2.5085, + "step": 7960 + }, + { + "epoch": 0.34, + "learning_rate": 1.6823844551213914e-05, + "loss": 2.4976, + "step": 7970 + }, + { + "epoch": 0.34, + "learning_rate": 1.6819592669756367e-05, + "loss": 2.4867, + "step": 7980 + }, + { + "epoch": 0.34, + "learning_rate": 1.6815340788298823e-05, + "loss": 2.5794, + "step": 7990 + }, + { + "epoch": 0.34, + "learning_rate": 1.681108890684128e-05, + "loss": 2.6095, + "step": 8000 + }, + { + "epoch": 0.34, + "eval_loss": 2.00496768951416, + "eval_runtime": 337.7379, + "eval_samples_per_second": 7.411, + "eval_steps_per_second": 3.707, + "step": 8000 + }, + { + "epoch": 0.34, + "learning_rate": 1.6806837025383732e-05, + "loss": 2.1561, + "step": 8010 + }, + { + "epoch": 0.34, + "learning_rate": 1.6802585143926188e-05, + "loss": 2.4532, + "step": 8020 + }, + { + "epoch": 0.34, + "learning_rate": 1.6798333262468644e-05, + "loss": 2.2522, + "step": 8030 + }, + { + "epoch": 0.34, + "learning_rate": 1.6794081381011097e-05, + "loss": 2.3841, + "step": 8040 + }, + { + "epoch": 0.34, + "learning_rate": 1.6789829499553553e-05, + "loss": 2.3188, + "step": 8050 + }, + { + "epoch": 0.34, + "learning_rate": 1.678557761809601e-05, + "loss": 2.5529, + "step": 8060 + }, + { + "epoch": 0.34, + "learning_rate": 1.6781325736638462e-05, + "loss": 3.0713, + "step": 8070 + }, + { + "epoch": 0.34, + "learning_rate": 1.6777073855180918e-05, + "loss": 2.6333, + "step": 8080 + }, + { + "epoch": 0.34, + "learning_rate": 1.6772821973723374e-05, + "loss": 2.4813, + "step": 8090 + }, + { + "epoch": 0.34, + "learning_rate": 1.676857009226583e-05, + "loss": 2.3983, + "step": 8100 + }, + { + "epoch": 0.34, + "learning_rate": 1.6764318210808283e-05, + "loss": 2.2578, + "step": 8110 + }, + { + "epoch": 0.34, + "learning_rate": 1.676006632935074e-05, + "loss": 2.7892, + "step": 8120 + }, + { + "epoch": 0.34, + "learning_rate": 1.6755814447893196e-05, + "loss": 2.6201, + "step": 8130 + }, + { + "epoch": 0.34, + "learning_rate": 1.675156256643565e-05, + "loss": 2.3545, + "step": 8140 + }, + { + "epoch": 0.34, + "learning_rate": 1.6747310684978104e-05, + "loss": 2.475, + "step": 8150 + }, + { + "epoch": 0.34, + "learning_rate": 1.674305880352056e-05, + "loss": 2.7158, + "step": 8160 + }, + { + "epoch": 0.34, + "learning_rate": 1.6738806922063013e-05, + "loss": 2.1479, + "step": 8170 + }, + { + "epoch": 0.34, + "learning_rate": 1.673455504060547e-05, + "loss": 2.6363, + "step": 8180 + }, + { + "epoch": 0.34, + "learning_rate": 1.6730303159147926e-05, + "loss": 2.0257, + "step": 8190 + }, + { + "epoch": 0.34, + "learning_rate": 1.672605127769038e-05, + "loss": 2.2188, + "step": 8200 + }, + { + "epoch": 0.35, + "learning_rate": 1.6721799396232835e-05, + "loss": 2.8021, + "step": 8210 + }, + { + "epoch": 0.35, + "learning_rate": 1.671754751477529e-05, + "loss": 2.7151, + "step": 8220 + }, + { + "epoch": 0.35, + "learning_rate": 1.6713295633317747e-05, + "loss": 2.3472, + "step": 8230 + }, + { + "epoch": 0.35, + "learning_rate": 1.67090437518602e-05, + "loss": 2.5458, + "step": 8240 + }, + { + "epoch": 0.35, + "learning_rate": 1.6704791870402656e-05, + "loss": 2.3873, + "step": 8250 + }, + { + "epoch": 0.35, + "learning_rate": 1.6700539988945112e-05, + "loss": 2.4865, + "step": 8260 + }, + { + "epoch": 0.35, + "learning_rate": 1.6696288107487565e-05, + "loss": 2.7881, + "step": 8270 + }, + { + "epoch": 0.35, + "learning_rate": 1.669203622603002e-05, + "loss": 2.3015, + "step": 8280 + }, + { + "epoch": 0.35, + "learning_rate": 1.6687784344572474e-05, + "loss": 2.5846, + "step": 8290 + }, + { + "epoch": 0.35, + "learning_rate": 1.668353246311493e-05, + "loss": 2.5504, + "step": 8300 + }, + { + "epoch": 0.35, + "learning_rate": 1.6679280581657386e-05, + "loss": 2.3062, + "step": 8310 + }, + { + "epoch": 0.35, + "learning_rate": 1.667502870019984e-05, + "loss": 2.7249, + "step": 8320 + }, + { + "epoch": 0.35, + "learning_rate": 1.6670776818742295e-05, + "loss": 2.4261, + "step": 8330 + }, + { + "epoch": 0.35, + "learning_rate": 1.6666524937284748e-05, + "loss": 2.3477, + "step": 8340 + }, + { + "epoch": 0.35, + "learning_rate": 1.6662273055827204e-05, + "loss": 2.2926, + "step": 8350 + }, + { + "epoch": 0.35, + "learning_rate": 1.665802117436966e-05, + "loss": 2.4989, + "step": 8360 + }, + { + "epoch": 0.35, + "learning_rate": 1.6653769292912113e-05, + "loss": 2.6045, + "step": 8370 + }, + { + "epoch": 0.35, + "learning_rate": 1.664951741145457e-05, + "loss": 2.5208, + "step": 8380 + }, + { + "epoch": 0.35, + "learning_rate": 1.6645265529997025e-05, + "loss": 2.3486, + "step": 8390 + }, + { + "epoch": 0.35, + "learning_rate": 1.6641013648539478e-05, + "loss": 2.5779, + "step": 8400 + }, + { + "epoch": 0.35, + "learning_rate": 1.6636761767081934e-05, + "loss": 2.395, + "step": 8410 + }, + { + "epoch": 0.35, + "learning_rate": 1.663250988562439e-05, + "loss": 2.2631, + "step": 8420 + }, + { + "epoch": 0.35, + "learning_rate": 1.6628258004166846e-05, + "loss": 2.5388, + "step": 8430 + }, + { + "epoch": 0.36, + "learning_rate": 1.66240061227093e-05, + "loss": 2.5898, + "step": 8440 + }, + { + "epoch": 0.36, + "learning_rate": 1.6619754241251755e-05, + "loss": 2.5621, + "step": 8450 + }, + { + "epoch": 0.36, + "learning_rate": 1.661550235979421e-05, + "loss": 2.1611, + "step": 8460 + }, + { + "epoch": 0.36, + "learning_rate": 1.6611250478336664e-05, + "loss": 2.3162, + "step": 8470 + }, + { + "epoch": 0.36, + "learning_rate": 1.660699859687912e-05, + "loss": 2.9968, + "step": 8480 + }, + { + "epoch": 0.36, + "learning_rate": 1.6602746715421576e-05, + "loss": 2.318, + "step": 8490 + }, + { + "epoch": 0.36, + "learning_rate": 1.659849483396403e-05, + "loss": 3.1466, + "step": 8500 + }, + { + "epoch": 0.36, + "eval_loss": 1.9874720573425293, + "eval_runtime": 335.1236, + "eval_samples_per_second": 7.469, + "eval_steps_per_second": 3.736, + "step": 8500 + }, + { + "epoch": 0.36, + "learning_rate": 1.6594242952506485e-05, + "loss": 2.4721, + "step": 8510 + }, + { + "epoch": 0.36, + "learning_rate": 1.658999107104894e-05, + "loss": 2.2058, + "step": 8520 + }, + { + "epoch": 0.36, + "learning_rate": 1.6585739189591398e-05, + "loss": 2.9404, + "step": 8530 + }, + { + "epoch": 0.36, + "learning_rate": 1.658148730813385e-05, + "loss": 2.6556, + "step": 8540 + }, + { + "epoch": 0.36, + "learning_rate": 1.6577235426676307e-05, + "loss": 2.3895, + "step": 8550 + }, + { + "epoch": 0.36, + "learning_rate": 1.6572983545218763e-05, + "loss": 2.4395, + "step": 8560 + }, + { + "epoch": 0.36, + "learning_rate": 1.6568731663761215e-05, + "loss": 2.4652, + "step": 8570 + }, + { + "epoch": 0.36, + "learning_rate": 1.656447978230367e-05, + "loss": 2.3942, + "step": 8580 + }, + { + "epoch": 0.36, + "learning_rate": 1.6560227900846128e-05, + "loss": 2.5459, + "step": 8590 + }, + { + "epoch": 0.36, + "learning_rate": 1.655597601938858e-05, + "loss": 2.2158, + "step": 8600 + }, + { + "epoch": 0.36, + "learning_rate": 1.6551724137931037e-05, + "loss": 2.7693, + "step": 8610 + }, + { + "epoch": 0.36, + "learning_rate": 1.6547472256473493e-05, + "loss": 2.6083, + "step": 8620 + }, + { + "epoch": 0.36, + "learning_rate": 1.6543220375015946e-05, + "loss": 2.6171, + "step": 8630 + }, + { + "epoch": 0.36, + "learning_rate": 1.6538968493558402e-05, + "loss": 1.9984, + "step": 8640 + }, + { + "epoch": 0.36, + "learning_rate": 1.6534716612100858e-05, + "loss": 2.5385, + "step": 8650 + }, + { + "epoch": 0.36, + "learning_rate": 1.653046473064331e-05, + "loss": 2.9385, + "step": 8660 + }, + { + "epoch": 0.36, + "learning_rate": 1.6526212849185767e-05, + "loss": 2.5199, + "step": 8670 + }, + { + "epoch": 0.37, + "learning_rate": 1.652196096772822e-05, + "loss": 2.1399, + "step": 8680 + }, + { + "epoch": 0.37, + "learning_rate": 1.6517709086270676e-05, + "loss": 2.5872, + "step": 8690 + }, + { + "epoch": 0.37, + "learning_rate": 1.6513457204813132e-05, + "loss": 2.5678, + "step": 8700 + }, + { + "epoch": 0.37, + "learning_rate": 1.6509205323355585e-05, + "loss": 2.1971, + "step": 8710 + }, + { + "epoch": 0.37, + "learning_rate": 1.650495344189804e-05, + "loss": 2.379, + "step": 8720 + }, + { + "epoch": 0.37, + "learning_rate": 1.6500701560440494e-05, + "loss": 2.2224, + "step": 8730 + }, + { + "epoch": 0.37, + "learning_rate": 1.649644967898295e-05, + "loss": 2.7368, + "step": 8740 + }, + { + "epoch": 0.37, + "learning_rate": 1.6492197797525406e-05, + "loss": 2.9536, + "step": 8750 + }, + { + "epoch": 0.37, + "learning_rate": 1.6487945916067862e-05, + "loss": 2.1969, + "step": 8760 + }, + { + "epoch": 0.37, + "learning_rate": 1.6483694034610315e-05, + "loss": 2.2591, + "step": 8770 + }, + { + "epoch": 0.37, + "learning_rate": 1.647944215315277e-05, + "loss": 2.3555, + "step": 8780 + }, + { + "epoch": 0.37, + "learning_rate": 1.6475190271695227e-05, + "loss": 2.1909, + "step": 8790 + }, + { + "epoch": 0.37, + "learning_rate": 1.647093839023768e-05, + "loss": 2.6798, + "step": 8800 + }, + { + "epoch": 0.37, + "learning_rate": 1.6466686508780136e-05, + "loss": 2.8922, + "step": 8810 + }, + { + "epoch": 0.37, + "learning_rate": 1.6462434627322592e-05, + "loss": 2.4089, + "step": 8820 + }, + { + "epoch": 0.37, + "learning_rate": 1.6458182745865045e-05, + "loss": 2.3367, + "step": 8830 + }, + { + "epoch": 0.37, + "learning_rate": 1.64539308644075e-05, + "loss": 2.1407, + "step": 8840 + }, + { + "epoch": 0.37, + "learning_rate": 1.6449678982949957e-05, + "loss": 2.7404, + "step": 8850 + }, + { + "epoch": 0.37, + "learning_rate": 1.6445427101492413e-05, + "loss": 2.4875, + "step": 8860 + }, + { + "epoch": 0.37, + "learning_rate": 1.6441175220034866e-05, + "loss": 3.2661, + "step": 8870 + }, + { + "epoch": 0.37, + "learning_rate": 1.6436923338577322e-05, + "loss": 2.4685, + "step": 8880 + }, + { + "epoch": 0.37, + "learning_rate": 1.643267145711978e-05, + "loss": 2.9441, + "step": 8890 + }, + { + "epoch": 0.37, + "learning_rate": 1.642841957566223e-05, + "loss": 2.4203, + "step": 8900 + }, + { + "epoch": 0.37, + "learning_rate": 1.6424167694204687e-05, + "loss": 2.5944, + "step": 8910 + }, + { + "epoch": 0.38, + "learning_rate": 1.6419915812747143e-05, + "loss": 2.1158, + "step": 8920 + }, + { + "epoch": 0.38, + "learning_rate": 1.6415663931289596e-05, + "loss": 2.8647, + "step": 8930 + }, + { + "epoch": 0.38, + "learning_rate": 1.6411412049832052e-05, + "loss": 2.5355, + "step": 8940 + }, + { + "epoch": 0.38, + "learning_rate": 1.640716016837451e-05, + "loss": 2.6662, + "step": 8950 + }, + { + "epoch": 0.38, + "learning_rate": 1.6402908286916965e-05, + "loss": 2.2165, + "step": 8960 + }, + { + "epoch": 0.38, + "learning_rate": 1.6398656405459417e-05, + "loss": 2.1348, + "step": 8970 + }, + { + "epoch": 0.38, + "learning_rate": 1.6394404524001874e-05, + "loss": 2.1628, + "step": 8980 + }, + { + "epoch": 0.38, + "learning_rate": 1.6390152642544326e-05, + "loss": 2.3188, + "step": 8990 + }, + { + "epoch": 0.38, + "learning_rate": 1.6385900761086783e-05, + "loss": 2.8703, + "step": 9000 + }, + { + "epoch": 0.38, + "eval_loss": 1.9859079122543335, + "eval_runtime": 337.3358, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 3.711, + "step": 9000 + }, + { + "epoch": 0.38, + "learning_rate": 1.638164887962924e-05, + "loss": 2.5798, + "step": 9010 + }, + { + "epoch": 0.38, + "learning_rate": 1.637739699817169e-05, + "loss": 2.5057, + "step": 9020 + }, + { + "epoch": 0.38, + "learning_rate": 1.6373145116714148e-05, + "loss": 2.5603, + "step": 9030 + }, + { + "epoch": 0.38, + "learning_rate": 1.6368893235256604e-05, + "loss": 2.5391, + "step": 9040 + }, + { + "epoch": 0.38, + "learning_rate": 1.6364641353799057e-05, + "loss": 1.8705, + "step": 9050 + }, + { + "epoch": 0.38, + "learning_rate": 1.6360389472341513e-05, + "loss": 2.1202, + "step": 9060 + }, + { + "epoch": 0.38, + "learning_rate": 1.6356137590883965e-05, + "loss": 2.8342, + "step": 9070 + }, + { + "epoch": 0.38, + "learning_rate": 1.635188570942642e-05, + "loss": 2.7267, + "step": 9080 + }, + { + "epoch": 0.38, + "learning_rate": 1.6347633827968878e-05, + "loss": 2.2638, + "step": 9090 + }, + { + "epoch": 0.38, + "learning_rate": 1.634338194651133e-05, + "loss": 2.9132, + "step": 9100 + }, + { + "epoch": 0.38, + "learning_rate": 1.6339130065053787e-05, + "loss": 2.5094, + "step": 9110 + }, + { + "epoch": 0.38, + "learning_rate": 1.6334878183596243e-05, + "loss": 2.6133, + "step": 9120 + }, + { + "epoch": 0.38, + "learning_rate": 1.6330626302138696e-05, + "loss": 2.5961, + "step": 9130 + }, + { + "epoch": 0.38, + "learning_rate": 1.6326374420681152e-05, + "loss": 2.2942, + "step": 9140 + }, + { + "epoch": 0.38, + "learning_rate": 1.6322122539223608e-05, + "loss": 2.6119, + "step": 9150 + }, + { + "epoch": 0.39, + "learning_rate": 1.631787065776606e-05, + "loss": 2.2025, + "step": 9160 + }, + { + "epoch": 0.39, + "learning_rate": 1.6313618776308517e-05, + "loss": 2.6507, + "step": 9170 + }, + { + "epoch": 0.39, + "learning_rate": 1.6309366894850973e-05, + "loss": 2.395, + "step": 9180 + }, + { + "epoch": 0.39, + "learning_rate": 1.630511501339343e-05, + "loss": 2.738, + "step": 9190 + }, + { + "epoch": 0.39, + "learning_rate": 1.6300863131935882e-05, + "loss": 2.2349, + "step": 9200 + }, + { + "epoch": 0.39, + "learning_rate": 1.6296611250478338e-05, + "loss": 2.5223, + "step": 9210 + }, + { + "epoch": 0.39, + "learning_rate": 1.6292359369020794e-05, + "loss": 2.4301, + "step": 9220 + }, + { + "epoch": 0.39, + "learning_rate": 1.6288107487563247e-05, + "loss": 2.5581, + "step": 9230 + }, + { + "epoch": 0.39, + "learning_rate": 1.6283855606105703e-05, + "loss": 2.7029, + "step": 9240 + }, + { + "epoch": 0.39, + "learning_rate": 1.627960372464816e-05, + "loss": 2.4213, + "step": 9250 + }, + { + "epoch": 0.39, + "learning_rate": 1.6275351843190612e-05, + "loss": 2.2374, + "step": 9260 + }, + { + "epoch": 0.39, + "learning_rate": 1.6271099961733068e-05, + "loss": 2.6529, + "step": 9270 + }, + { + "epoch": 0.39, + "learning_rate": 1.6266848080275524e-05, + "loss": 2.0457, + "step": 9280 + }, + { + "epoch": 0.39, + "learning_rate": 1.626259619881798e-05, + "loss": 2.2516, + "step": 9290 + }, + { + "epoch": 0.39, + "learning_rate": 1.6258344317360433e-05, + "loss": 2.4958, + "step": 9300 + }, + { + "epoch": 0.39, + "learning_rate": 1.625409243590289e-05, + "loss": 2.3028, + "step": 9310 + }, + { + "epoch": 0.39, + "learning_rate": 1.6249840554445346e-05, + "loss": 2.623, + "step": 9320 + }, + { + "epoch": 0.39, + "learning_rate": 1.62455886729878e-05, + "loss": 2.1896, + "step": 9330 + }, + { + "epoch": 0.39, + "learning_rate": 1.6241336791530254e-05, + "loss": 3.0736, + "step": 9340 + }, + { + "epoch": 0.39, + "learning_rate": 1.623708491007271e-05, + "loss": 2.4666, + "step": 9350 + }, + { + "epoch": 0.39, + "learning_rate": 1.6232833028615163e-05, + "loss": 2.7424, + "step": 9360 + }, + { + "epoch": 0.39, + "learning_rate": 1.622858114715762e-05, + "loss": 1.9853, + "step": 9370 + }, + { + "epoch": 0.39, + "learning_rate": 1.6224329265700072e-05, + "loss": 2.8972, + "step": 9380 + }, + { + "epoch": 0.4, + "learning_rate": 1.622007738424253e-05, + "loss": 2.6863, + "step": 9390 + }, + { + "epoch": 0.4, + "learning_rate": 1.6215825502784985e-05, + "loss": 1.75, + "step": 9400 + }, + { + "epoch": 0.4, + "learning_rate": 1.6211573621327437e-05, + "loss": 2.4478, + "step": 9410 + }, + { + "epoch": 0.4, + "learning_rate": 1.6207321739869894e-05, + "loss": 3.1368, + "step": 9420 + }, + { + "epoch": 0.4, + "learning_rate": 1.620306985841235e-05, + "loss": 2.3129, + "step": 9430 + }, + { + "epoch": 0.4, + "learning_rate": 1.6198817976954802e-05, + "loss": 2.7234, + "step": 9440 + }, + { + "epoch": 0.4, + "learning_rate": 1.619456609549726e-05, + "loss": 2.6612, + "step": 9450 + }, + { + "epoch": 0.4, + "learning_rate": 1.619031421403971e-05, + "loss": 2.3061, + "step": 9460 + }, + { + "epoch": 0.4, + "learning_rate": 1.6186062332582168e-05, + "loss": 2.1242, + "step": 9470 + }, + { + "epoch": 0.4, + "learning_rate": 1.6181810451124624e-05, + "loss": 2.5048, + "step": 9480 + }, + { + "epoch": 0.4, + "learning_rate": 1.617755856966708e-05, + "loss": 2.4544, + "step": 9490 + }, + { + "epoch": 0.4, + "learning_rate": 1.6173306688209533e-05, + "loss": 2.3058, + "step": 9500 + }, + { + "epoch": 0.4, + "eval_loss": 1.9763813018798828, + "eval_runtime": 157.87, + "eval_samples_per_second": 15.855, + "eval_steps_per_second": 7.931, + "step": 9500 + }, + { + "epoch": 0.4, + "learning_rate": 1.616905480675199e-05, + "loss": 2.5569, + "step": 9510 + }, + { + "epoch": 0.4, + "learning_rate": 1.6164802925294445e-05, + "loss": 1.7934, + "step": 9520 + }, + { + "epoch": 0.4, + "learning_rate": 1.6160551043836898e-05, + "loss": 2.4943, + "step": 9530 + }, + { + "epoch": 0.4, + "learning_rate": 1.6156299162379354e-05, + "loss": 2.9005, + "step": 9540 + }, + { + "epoch": 0.4, + "learning_rate": 1.615204728092181e-05, + "loss": 2.1819, + "step": 9550 + }, + { + "epoch": 0.4, + "learning_rate": 1.6147795399464263e-05, + "loss": 2.3916, + "step": 9560 + }, + { + "epoch": 0.4, + "learning_rate": 1.614354351800672e-05, + "loss": 2.6763, + "step": 9570 + }, + { + "epoch": 0.4, + "learning_rate": 1.6139291636549175e-05, + "loss": 2.6101, + "step": 9580 + }, + { + "epoch": 0.4, + "learning_rate": 1.613503975509163e-05, + "loss": 2.5131, + "step": 9590 + }, + { + "epoch": 0.4, + "learning_rate": 1.6130787873634084e-05, + "loss": 2.9305, + "step": 9600 + }, + { + "epoch": 0.4, + "learning_rate": 1.612653599217654e-05, + "loss": 2.1208, + "step": 9610 + }, + { + "epoch": 0.4, + "learning_rate": 1.6122284110718996e-05, + "loss": 2.4165, + "step": 9620 + }, + { + "epoch": 0.41, + "learning_rate": 1.611803222926145e-05, + "loss": 2.5368, + "step": 9630 + }, + { + "epoch": 0.41, + "learning_rate": 1.6113780347803905e-05, + "loss": 2.8788, + "step": 9640 + }, + { + "epoch": 0.41, + "learning_rate": 1.610952846634636e-05, + "loss": 2.4815, + "step": 9650 + }, + { + "epoch": 0.41, + "learning_rate": 1.6105276584888814e-05, + "loss": 2.8152, + "step": 9660 + }, + { + "epoch": 0.41, + "learning_rate": 1.610102470343127e-05, + "loss": 2.0329, + "step": 9670 + }, + { + "epoch": 0.41, + "learning_rate": 1.6096772821973726e-05, + "loss": 2.9021, + "step": 9680 + }, + { + "epoch": 0.41, + "learning_rate": 1.609252094051618e-05, + "loss": 2.8002, + "step": 9690 + }, + { + "epoch": 0.41, + "learning_rate": 1.6088269059058635e-05, + "loss": 2.4334, + "step": 9700 + }, + { + "epoch": 0.41, + "learning_rate": 1.608401717760109e-05, + "loss": 2.4941, + "step": 9710 + }, + { + "epoch": 0.41, + "learning_rate": 1.6079765296143544e-05, + "loss": 2.2755, + "step": 9720 + }, + { + "epoch": 0.41, + "learning_rate": 1.6075513414686e-05, + "loss": 2.052, + "step": 9730 + }, + { + "epoch": 0.41, + "learning_rate": 1.6071261533228457e-05, + "loss": 2.4636, + "step": 9740 + }, + { + "epoch": 0.41, + "learning_rate": 1.606700965177091e-05, + "loss": 2.3624, + "step": 9750 + }, + { + "epoch": 0.41, + "learning_rate": 1.6062757770313365e-05, + "loss": 2.3953, + "step": 9760 + }, + { + "epoch": 0.41, + "learning_rate": 1.6058505888855818e-05, + "loss": 2.2087, + "step": 9770 + }, + { + "epoch": 0.41, + "learning_rate": 1.6054254007398274e-05, + "loss": 1.9505, + "step": 9780 + }, + { + "epoch": 0.41, + "learning_rate": 1.605000212594073e-05, + "loss": 2.5386, + "step": 9790 + }, + { + "epoch": 0.41, + "learning_rate": 1.6045750244483183e-05, + "loss": 2.4996, + "step": 9800 + }, + { + "epoch": 0.41, + "learning_rate": 1.604149836302564e-05, + "loss": 2.4536, + "step": 9810 + }, + { + "epoch": 0.41, + "learning_rate": 1.6037246481568096e-05, + "loss": 1.847, + "step": 9820 + }, + { + "epoch": 0.41, + "learning_rate": 1.603299460011055e-05, + "loss": 2.4618, + "step": 9830 + }, + { + "epoch": 0.41, + "learning_rate": 1.6028742718653004e-05, + "loss": 2.4738, + "step": 9840 + }, + { + "epoch": 0.41, + "learning_rate": 1.602449083719546e-05, + "loss": 2.1588, + "step": 9850 + }, + { + "epoch": 0.41, + "learning_rate": 1.6020238955737913e-05, + "loss": 2.3344, + "step": 9860 + }, + { + "epoch": 0.42, + "learning_rate": 1.601598707428037e-05, + "loss": 2.5213, + "step": 9870 + }, + { + "epoch": 0.42, + "learning_rate": 1.6011735192822826e-05, + "loss": 2.374, + "step": 9880 + }, + { + "epoch": 0.42, + "learning_rate": 1.600748331136528e-05, + "loss": 2.6436, + "step": 9890 + }, + { + "epoch": 0.42, + "learning_rate": 1.6003231429907735e-05, + "loss": 2.1786, + "step": 9900 + }, + { + "epoch": 0.42, + "learning_rate": 1.599897954845019e-05, + "loss": 2.7025, + "step": 9910 + }, + { + "epoch": 0.42, + "learning_rate": 1.5994727666992647e-05, + "loss": 2.4774, + "step": 9920 + }, + { + "epoch": 0.42, + "learning_rate": 1.59904757855351e-05, + "loss": 2.2819, + "step": 9930 + }, + { + "epoch": 0.42, + "learning_rate": 1.5986223904077556e-05, + "loss": 2.1652, + "step": 9940 + }, + { + "epoch": 0.42, + "learning_rate": 1.5981972022620012e-05, + "loss": 2.084, + "step": 9950 + }, + { + "epoch": 0.42, + "learning_rate": 1.5977720141162465e-05, + "loss": 2.4612, + "step": 9960 + }, + { + "epoch": 0.42, + "learning_rate": 1.597346825970492e-05, + "loss": 2.0647, + "step": 9970 + }, + { + "epoch": 0.42, + "learning_rate": 1.5969216378247377e-05, + "loss": 2.8856, + "step": 9980 + }, + { + "epoch": 0.42, + "learning_rate": 1.596496449678983e-05, + "loss": 2.4716, + "step": 9990 + }, + { + "epoch": 0.42, + "learning_rate": 1.5960712615332286e-05, + "loss": 2.6674, + "step": 10000 + }, + { + "epoch": 0.42, + "eval_loss": 1.9638748168945312, + "eval_runtime": 157.7497, + "eval_samples_per_second": 15.867, + "eval_steps_per_second": 7.937, + "step": 10000 + }, + { + "epoch": 0.42, + "learning_rate": 1.5956460733874742e-05, + "loss": 2.8031, + "step": 10010 + }, + { + "epoch": 0.42, + "learning_rate": 1.5952208852417198e-05, + "loss": 2.4077, + "step": 10020 + }, + { + "epoch": 0.42, + "learning_rate": 1.594795697095965e-05, + "loss": 2.1363, + "step": 10030 + }, + { + "epoch": 0.42, + "learning_rate": 1.5943705089502107e-05, + "loss": 2.5921, + "step": 10040 + }, + { + "epoch": 0.42, + "learning_rate": 1.5939453208044563e-05, + "loss": 2.0634, + "step": 10050 + }, + { + "epoch": 0.42, + "learning_rate": 1.5935201326587016e-05, + "loss": 2.7191, + "step": 10060 + }, + { + "epoch": 0.42, + "learning_rate": 1.5930949445129472e-05, + "loss": 2.2552, + "step": 10070 + }, + { + "epoch": 0.42, + "learning_rate": 1.592669756367193e-05, + "loss": 2.503, + "step": 10080 + }, + { + "epoch": 0.42, + "learning_rate": 1.592244568221438e-05, + "loss": 2.2036, + "step": 10090 + }, + { + "epoch": 0.42, + "learning_rate": 1.5918193800756837e-05, + "loss": 2.637, + "step": 10100 + }, + { + "epoch": 0.43, + "learning_rate": 1.591394191929929e-05, + "loss": 2.2863, + "step": 10110 + }, + { + "epoch": 0.43, + "learning_rate": 1.5909690037841746e-05, + "loss": 2.3317, + "step": 10120 + }, + { + "epoch": 0.43, + "learning_rate": 1.5905438156384202e-05, + "loss": 2.2109, + "step": 10130 + }, + { + "epoch": 0.43, + "learning_rate": 1.5901186274926655e-05, + "loss": 2.5821, + "step": 10140 + }, + { + "epoch": 0.43, + "learning_rate": 1.589693439346911e-05, + "loss": 2.5003, + "step": 10150 + }, + { + "epoch": 0.43, + "learning_rate": 1.5892682512011564e-05, + "loss": 2.5828, + "step": 10160 + }, + { + "epoch": 0.43, + "learning_rate": 1.588843063055402e-05, + "loss": 2.2188, + "step": 10170 + }, + { + "epoch": 0.43, + "learning_rate": 1.5884178749096476e-05, + "loss": 2.1588, + "step": 10180 + }, + { + "epoch": 0.43, + "learning_rate": 1.587992686763893e-05, + "loss": 2.7165, + "step": 10190 + }, + { + "epoch": 0.43, + "learning_rate": 1.5875674986181385e-05, + "loss": 2.4099, + "step": 10200 + }, + { + "epoch": 0.43, + "learning_rate": 1.587142310472384e-05, + "loss": 2.1347, + "step": 10210 + }, + { + "epoch": 0.43, + "learning_rate": 1.5867171223266294e-05, + "loss": 2.3912, + "step": 10220 + }, + { + "epoch": 0.43, + "learning_rate": 1.586291934180875e-05, + "loss": 2.3647, + "step": 10230 + }, + { + "epoch": 0.43, + "learning_rate": 1.5858667460351207e-05, + "loss": 2.6857, + "step": 10240 + }, + { + "epoch": 0.43, + "learning_rate": 1.5854415578893663e-05, + "loss": 2.5615, + "step": 10250 + }, + { + "epoch": 0.43, + "learning_rate": 1.5850163697436115e-05, + "loss": 2.8766, + "step": 10260 + }, + { + "epoch": 0.43, + "learning_rate": 1.584591181597857e-05, + "loss": 2.1335, + "step": 10270 + }, + { + "epoch": 0.43, + "learning_rate": 1.5841659934521028e-05, + "loss": 2.3573, + "step": 10280 + }, + { + "epoch": 0.43, + "learning_rate": 1.583740805306348e-05, + "loss": 2.3425, + "step": 10290 + }, + { + "epoch": 0.43, + "learning_rate": 1.5833156171605937e-05, + "loss": 2.3004, + "step": 10300 + }, + { + "epoch": 0.43, + "learning_rate": 1.5828904290148393e-05, + "loss": 2.429, + "step": 10310 + }, + { + "epoch": 0.43, + "learning_rate": 1.5824652408690846e-05, + "loss": 2.3504, + "step": 10320 + }, + { + "epoch": 0.43, + "learning_rate": 1.5820400527233302e-05, + "loss": 2.4716, + "step": 10330 + }, + { + "epoch": 0.44, + "learning_rate": 1.5816148645775758e-05, + "loss": 2.3105, + "step": 10340 + }, + { + "epoch": 0.44, + "learning_rate": 1.5811896764318214e-05, + "loss": 2.1203, + "step": 10350 + }, + { + "epoch": 0.44, + "learning_rate": 1.5807644882860667e-05, + "loss": 2.7303, + "step": 10360 + }, + { + "epoch": 0.44, + "learning_rate": 1.5803393001403123e-05, + "loss": 2.4012, + "step": 10370 + }, + { + "epoch": 0.44, + "learning_rate": 1.579914111994558e-05, + "loss": 2.5295, + "step": 10380 + }, + { + "epoch": 0.44, + "learning_rate": 1.5794889238488032e-05, + "loss": 2.82, + "step": 10390 + }, + { + "epoch": 0.44, + "learning_rate": 1.5790637357030488e-05, + "loss": 2.279, + "step": 10400 + }, + { + "epoch": 0.44, + "learning_rate": 1.5786385475572944e-05, + "loss": 1.7126, + "step": 10410 + }, + { + "epoch": 0.44, + "learning_rate": 1.5782133594115397e-05, + "loss": 2.3496, + "step": 10420 + }, + { + "epoch": 0.44, + "learning_rate": 1.5777881712657853e-05, + "loss": 2.4562, + "step": 10430 + }, + { + "epoch": 0.44, + "learning_rate": 1.577362983120031e-05, + "loss": 2.485, + "step": 10440 + }, + { + "epoch": 0.44, + "learning_rate": 1.5769377949742762e-05, + "loss": 2.0319, + "step": 10450 + }, + { + "epoch": 0.44, + "learning_rate": 1.5765126068285218e-05, + "loss": 2.8354, + "step": 10460 + }, + { + "epoch": 0.44, + "learning_rate": 1.5760874186827674e-05, + "loss": 1.943, + "step": 10470 + }, + { + "epoch": 0.44, + "learning_rate": 1.5756622305370127e-05, + "loss": 2.2612, + "step": 10480 + }, + { + "epoch": 0.44, + "learning_rate": 1.5752370423912583e-05, + "loss": 2.523, + "step": 10490 + }, + { + "epoch": 0.44, + "learning_rate": 1.5748118542455036e-05, + "loss": 2.8775, + "step": 10500 + }, + { + "epoch": 0.44, + "eval_loss": 1.934218406677246, + "eval_runtime": 157.7398, + "eval_samples_per_second": 15.868, + "eval_steps_per_second": 7.937, + "step": 10500 + }, + { + "epoch": 0.44, + "learning_rate": 1.5743866660997492e-05, + "loss": 2.356, + "step": 10510 + }, + { + "epoch": 0.44, + "learning_rate": 1.573961477953995e-05, + "loss": 2.6021, + "step": 10520 + }, + { + "epoch": 0.44, + "learning_rate": 1.57353628980824e-05, + "loss": 2.2362, + "step": 10530 + }, + { + "epoch": 0.44, + "learning_rate": 1.5731111016624857e-05, + "loss": 2.6383, + "step": 10540 + }, + { + "epoch": 0.44, + "learning_rate": 1.5726859135167313e-05, + "loss": 2.407, + "step": 10550 + }, + { + "epoch": 0.44, + "learning_rate": 1.5722607253709766e-05, + "loss": 2.8033, + "step": 10560 + }, + { + "epoch": 0.44, + "learning_rate": 1.5718355372252222e-05, + "loss": 2.2644, + "step": 10570 + }, + { + "epoch": 0.45, + "learning_rate": 1.571410349079468e-05, + "loss": 3.0515, + "step": 10580 + }, + { + "epoch": 0.45, + "learning_rate": 1.570985160933713e-05, + "loss": 2.0456, + "step": 10590 + }, + { + "epoch": 0.45, + "learning_rate": 1.5705599727879587e-05, + "loss": 2.1008, + "step": 10600 + }, + { + "epoch": 0.45, + "learning_rate": 1.5701347846422044e-05, + "loss": 2.6307, + "step": 10610 + }, + { + "epoch": 0.45, + "learning_rate": 1.5697095964964496e-05, + "loss": 2.4413, + "step": 10620 + }, + { + "epoch": 0.45, + "learning_rate": 1.5692844083506952e-05, + "loss": 2.7403, + "step": 10630 + }, + { + "epoch": 0.45, + "learning_rate": 1.568859220204941e-05, + "loss": 2.5489, + "step": 10640 + }, + { + "epoch": 0.45, + "learning_rate": 1.5684340320591865e-05, + "loss": 2.6111, + "step": 10650 + }, + { + "epoch": 0.45, + "learning_rate": 1.5680088439134318e-05, + "loss": 2.4437, + "step": 10660 + }, + { + "epoch": 0.45, + "learning_rate": 1.5675836557676774e-05, + "loss": 2.334, + "step": 10670 + }, + { + "epoch": 0.45, + "learning_rate": 1.567158467621923e-05, + "loss": 2.208, + "step": 10680 + }, + { + "epoch": 0.45, + "learning_rate": 1.5667332794761683e-05, + "loss": 2.502, + "step": 10690 + }, + { + "epoch": 0.45, + "learning_rate": 1.566308091330414e-05, + "loss": 2.6235, + "step": 10700 + }, + { + "epoch": 0.45, + "learning_rate": 1.5658829031846595e-05, + "loss": 2.4196, + "step": 10710 + }, + { + "epoch": 0.45, + "learning_rate": 1.5654577150389048e-05, + "loss": 2.6142, + "step": 10720 + }, + { + "epoch": 0.45, + "learning_rate": 1.5650325268931504e-05, + "loss": 2.6184, + "step": 10730 + }, + { + "epoch": 0.45, + "learning_rate": 1.564607338747396e-05, + "loss": 2.2517, + "step": 10740 + }, + { + "epoch": 0.45, + "learning_rate": 1.5641821506016413e-05, + "loss": 2.8515, + "step": 10750 + }, + { + "epoch": 0.45, + "learning_rate": 1.563756962455887e-05, + "loss": 2.4117, + "step": 10760 + }, + { + "epoch": 0.45, + "learning_rate": 1.5633317743101325e-05, + "loss": 2.2778, + "step": 10770 + }, + { + "epoch": 0.45, + "learning_rate": 1.562906586164378e-05, + "loss": 2.7605, + "step": 10780 + }, + { + "epoch": 0.45, + "learning_rate": 1.5624813980186234e-05, + "loss": 2.4827, + "step": 10790 + }, + { + "epoch": 0.45, + "learning_rate": 1.562056209872869e-05, + "loss": 2.3697, + "step": 10800 + }, + { + "epoch": 0.45, + "learning_rate": 1.5616310217271143e-05, + "loss": 2.8352, + "step": 10810 + }, + { + "epoch": 0.46, + "learning_rate": 1.56120583358136e-05, + "loss": 2.1289, + "step": 10820 + }, + { + "epoch": 0.46, + "learning_rate": 1.5607806454356055e-05, + "loss": 2.8038, + "step": 10830 + }, + { + "epoch": 0.46, + "learning_rate": 1.5603554572898508e-05, + "loss": 2.6752, + "step": 10840 + }, + { + "epoch": 0.46, + "learning_rate": 1.5599302691440964e-05, + "loss": 2.4184, + "step": 10850 + }, + { + "epoch": 0.46, + "learning_rate": 1.559505080998342e-05, + "loss": 2.7613, + "step": 10860 + }, + { + "epoch": 0.46, + "learning_rate": 1.5590798928525873e-05, + "loss": 1.7401, + "step": 10870 + }, + { + "epoch": 0.46, + "learning_rate": 1.558654704706833e-05, + "loss": 2.1031, + "step": 10880 + }, + { + "epoch": 0.46, + "learning_rate": 1.5582295165610782e-05, + "loss": 2.1724, + "step": 10890 + }, + { + "epoch": 0.46, + "learning_rate": 1.5578043284153238e-05, + "loss": 2.3654, + "step": 10900 + }, + { + "epoch": 0.46, + "learning_rate": 1.5573791402695694e-05, + "loss": 1.8411, + "step": 10910 + }, + { + "epoch": 0.46, + "learning_rate": 1.5569539521238147e-05, + "loss": 2.3196, + "step": 10920 + }, + { + "epoch": 0.46, + "learning_rate": 1.5565287639780603e-05, + "loss": 2.9633, + "step": 10930 + }, + { + "epoch": 0.46, + "learning_rate": 1.556103575832306e-05, + "loss": 2.9868, + "step": 10940 + }, + { + "epoch": 0.46, + "learning_rate": 1.5556783876865512e-05, + "loss": 2.6125, + "step": 10950 + }, + { + "epoch": 0.46, + "learning_rate": 1.5552531995407968e-05, + "loss": 2.3021, + "step": 10960 + }, + { + "epoch": 0.46, + "learning_rate": 1.5548280113950424e-05, + "loss": 2.2658, + "step": 10970 + }, + { + "epoch": 0.46, + "learning_rate": 1.554402823249288e-05, + "loss": 2.5044, + "step": 10980 + }, + { + "epoch": 0.46, + "learning_rate": 1.5539776351035333e-05, + "loss": 2.8643, + "step": 10990 + }, + { + "epoch": 0.46, + "learning_rate": 1.553552446957779e-05, + "loss": 2.5047, + "step": 11000 + }, + { + "epoch": 0.46, + "eval_loss": 1.9329371452331543, + "eval_runtime": 157.6887, + "eval_samples_per_second": 15.873, + "eval_steps_per_second": 7.94, + "step": 11000 + }, + { + "epoch": 0.46, + "learning_rate": 1.5531272588120246e-05, + "loss": 2.3317, + "step": 11010 + }, + { + "epoch": 0.46, + "learning_rate": 1.55270207066627e-05, + "loss": 2.6039, + "step": 11020 + }, + { + "epoch": 0.46, + "learning_rate": 1.5522768825205155e-05, + "loss": 2.8393, + "step": 11030 + }, + { + "epoch": 0.46, + "learning_rate": 1.551851694374761e-05, + "loss": 2.6933, + "step": 11040 + }, + { + "epoch": 0.46, + "learning_rate": 1.5514265062290063e-05, + "loss": 2.1474, + "step": 11050 + }, + { + "epoch": 0.47, + "learning_rate": 1.551001318083252e-05, + "loss": 2.6101, + "step": 11060 + }, + { + "epoch": 0.47, + "learning_rate": 1.5505761299374976e-05, + "loss": 2.3679, + "step": 11070 + }, + { + "epoch": 0.47, + "learning_rate": 1.5501509417917432e-05, + "loss": 1.9772, + "step": 11080 + }, + { + "epoch": 0.47, + "learning_rate": 1.5497257536459885e-05, + "loss": 2.5859, + "step": 11090 + }, + { + "epoch": 0.47, + "learning_rate": 1.549300565500234e-05, + "loss": 1.9622, + "step": 11100 + }, + { + "epoch": 0.47, + "learning_rate": 1.5488753773544797e-05, + "loss": 2.3536, + "step": 11110 + }, + { + "epoch": 0.47, + "learning_rate": 1.548450189208725e-05, + "loss": 2.4914, + "step": 11120 + }, + { + "epoch": 0.47, + "learning_rate": 1.5480250010629706e-05, + "loss": 2.5306, + "step": 11130 + }, + { + "epoch": 0.47, + "learning_rate": 1.5475998129172162e-05, + "loss": 2.2826, + "step": 11140 + }, + { + "epoch": 0.47, + "learning_rate": 1.5471746247714615e-05, + "loss": 2.4863, + "step": 11150 + }, + { + "epoch": 0.47, + "learning_rate": 1.546749436625707e-05, + "loss": 2.7049, + "step": 11160 + }, + { + "epoch": 0.47, + "learning_rate": 1.5463242484799527e-05, + "loss": 1.9233, + "step": 11170 + }, + { + "epoch": 0.47, + "learning_rate": 1.545899060334198e-05, + "loss": 1.9265, + "step": 11180 + }, + { + "epoch": 0.47, + "learning_rate": 1.5454738721884436e-05, + "loss": 2.3964, + "step": 11190 + }, + { + "epoch": 0.47, + "learning_rate": 1.545048684042689e-05, + "loss": 2.4938, + "step": 11200 + }, + { + "epoch": 0.47, + "learning_rate": 1.5446234958969345e-05, + "loss": 2.3325, + "step": 11210 + }, + { + "epoch": 0.47, + "learning_rate": 1.54419830775118e-05, + "loss": 2.0072, + "step": 11220 + }, + { + "epoch": 0.47, + "learning_rate": 1.5437731196054254e-05, + "loss": 2.2178, + "step": 11230 + }, + { + "epoch": 0.47, + "learning_rate": 1.543347931459671e-05, + "loss": 2.2869, + "step": 11240 + }, + { + "epoch": 0.47, + "learning_rate": 1.5429227433139166e-05, + "loss": 2.2809, + "step": 11250 + }, + { + "epoch": 0.47, + "learning_rate": 1.542497555168162e-05, + "loss": 2.0549, + "step": 11260 + }, + { + "epoch": 0.47, + "learning_rate": 1.5420723670224075e-05, + "loss": 2.5829, + "step": 11270 + }, + { + "epoch": 0.47, + "learning_rate": 1.5416471788766528e-05, + "loss": 2.6291, + "step": 11280 + }, + { + "epoch": 0.47, + "learning_rate": 1.5412219907308984e-05, + "loss": 2.5527, + "step": 11290 + }, + { + "epoch": 0.48, + "learning_rate": 1.540796802585144e-05, + "loss": 2.4998, + "step": 11300 + }, + { + "epoch": 0.48, + "learning_rate": 1.5403716144393896e-05, + "loss": 2.2437, + "step": 11310 + }, + { + "epoch": 0.48, + "learning_rate": 1.539946426293635e-05, + "loss": 2.8028, + "step": 11320 + }, + { + "epoch": 0.48, + "learning_rate": 1.5395212381478805e-05, + "loss": 2.146, + "step": 11330 + }, + { + "epoch": 0.48, + "learning_rate": 1.539096050002126e-05, + "loss": 2.3451, + "step": 11340 + }, + { + "epoch": 0.48, + "learning_rate": 1.5386708618563714e-05, + "loss": 2.5306, + "step": 11350 + }, + { + "epoch": 0.48, + "learning_rate": 1.538245673710617e-05, + "loss": 2.472, + "step": 11360 + }, + { + "epoch": 0.48, + "learning_rate": 1.5378204855648626e-05, + "loss": 2.3911, + "step": 11370 + }, + { + "epoch": 0.48, + "learning_rate": 1.537395297419108e-05, + "loss": 2.4329, + "step": 11380 + }, + { + "epoch": 0.48, + "learning_rate": 1.5369701092733535e-05, + "loss": 2.5001, + "step": 11390 + }, + { + "epoch": 0.48, + "learning_rate": 1.536544921127599e-05, + "loss": 2.8697, + "step": 11400 + }, + { + "epoch": 0.48, + "learning_rate": 1.5361197329818448e-05, + "loss": 2.2511, + "step": 11410 + }, + { + "epoch": 0.48, + "learning_rate": 1.53569454483609e-05, + "loss": 2.6034, + "step": 11420 + }, + { + "epoch": 0.48, + "learning_rate": 1.5352693566903357e-05, + "loss": 2.6464, + "step": 11430 + }, + { + "epoch": 0.48, + "learning_rate": 1.5348441685445813e-05, + "loss": 2.543, + "step": 11440 + }, + { + "epoch": 0.48, + "learning_rate": 1.5344189803988265e-05, + "loss": 2.3194, + "step": 11450 + }, + { + "epoch": 0.48, + "learning_rate": 1.533993792253072e-05, + "loss": 2.2008, + "step": 11460 + }, + { + "epoch": 0.48, + "learning_rate": 1.5335686041073178e-05, + "loss": 2.3158, + "step": 11470 + }, + { + "epoch": 0.48, + "learning_rate": 1.533143415961563e-05, + "loss": 2.4198, + "step": 11480 + }, + { + "epoch": 0.48, + "learning_rate": 1.5327182278158087e-05, + "loss": 2.2958, + "step": 11490 + }, + { + "epoch": 0.48, + "learning_rate": 1.5322930396700543e-05, + "loss": 2.5799, + "step": 11500 + }, + { + "epoch": 0.48, + "eval_loss": 1.9254064559936523, + "eval_runtime": 162.9964, + "eval_samples_per_second": 15.356, + "eval_steps_per_second": 7.681, + "step": 11500 + }, + { + "epoch": 0.48, + "learning_rate": 1.5318678515243e-05, + "loss": 2.7565, + "step": 11510 + }, + { + "epoch": 0.48, + "learning_rate": 1.5314426633785452e-05, + "loss": 2.1682, + "step": 11520 + }, + { + "epoch": 0.49, + "learning_rate": 1.5310174752327908e-05, + "loss": 2.5621, + "step": 11530 + }, + { + "epoch": 0.49, + "learning_rate": 1.530592287087036e-05, + "loss": 2.3396, + "step": 11540 + }, + { + "epoch": 0.49, + "learning_rate": 1.5301670989412817e-05, + "loss": 2.2505, + "step": 11550 + }, + { + "epoch": 0.49, + "learning_rate": 1.5297419107955273e-05, + "loss": 2.0222, + "step": 11560 + }, + { + "epoch": 0.49, + "learning_rate": 1.5293167226497726e-05, + "loss": 2.9315, + "step": 11570 + }, + { + "epoch": 0.49, + "learning_rate": 1.5288915345040182e-05, + "loss": 2.4993, + "step": 11580 + }, + { + "epoch": 0.49, + "learning_rate": 1.5284663463582635e-05, + "loss": 3.0336, + "step": 11590 + }, + { + "epoch": 0.49, + "learning_rate": 1.528041158212509e-05, + "loss": 2.3143, + "step": 11600 + }, + { + "epoch": 0.49, + "learning_rate": 1.5276159700667547e-05, + "loss": 2.3467, + "step": 11610 + }, + { + "epoch": 0.49, + "learning_rate": 1.527190781921e-05, + "loss": 2.0148, + "step": 11620 + }, + { + "epoch": 0.49, + "learning_rate": 1.5267655937752456e-05, + "loss": 2.5407, + "step": 11630 + }, + { + "epoch": 0.49, + "learning_rate": 1.5263404056294912e-05, + "loss": 2.3283, + "step": 11640 + }, + { + "epoch": 0.49, + "learning_rate": 1.5259152174837365e-05, + "loss": 2.1069, + "step": 11650 + }, + { + "epoch": 0.49, + "learning_rate": 1.5254900293379823e-05, + "loss": 2.3977, + "step": 11660 + }, + { + "epoch": 0.49, + "learning_rate": 1.5250648411922277e-05, + "loss": 2.3978, + "step": 11670 + }, + { + "epoch": 0.49, + "learning_rate": 1.5246396530464732e-05, + "loss": 2.2315, + "step": 11680 + }, + { + "epoch": 0.49, + "learning_rate": 1.5242144649007186e-05, + "loss": 2.5454, + "step": 11690 + }, + { + "epoch": 0.49, + "learning_rate": 1.5237892767549642e-05, + "loss": 2.266, + "step": 11700 + }, + { + "epoch": 0.49, + "learning_rate": 1.5233640886092097e-05, + "loss": 2.2491, + "step": 11710 + }, + { + "epoch": 0.49, + "learning_rate": 1.5229389004634551e-05, + "loss": 2.0303, + "step": 11720 + }, + { + "epoch": 0.49, + "learning_rate": 1.5225137123177007e-05, + "loss": 2.3941, + "step": 11730 + }, + { + "epoch": 0.49, + "learning_rate": 1.5220885241719463e-05, + "loss": 2.4527, + "step": 11740 + }, + { + "epoch": 0.49, + "learning_rate": 1.5216633360261916e-05, + "loss": 2.1319, + "step": 11750 + }, + { + "epoch": 0.49, + "learning_rate": 1.5212381478804372e-05, + "loss": 1.9728, + "step": 11760 + }, + { + "epoch": 0.5, + "learning_rate": 1.5208129597346828e-05, + "loss": 2.3851, + "step": 11770 + }, + { + "epoch": 0.5, + "learning_rate": 1.5203877715889281e-05, + "loss": 2.5249, + "step": 11780 + }, + { + "epoch": 0.5, + "learning_rate": 1.5199625834431737e-05, + "loss": 2.318, + "step": 11790 + }, + { + "epoch": 0.5, + "learning_rate": 1.5195373952974194e-05, + "loss": 2.3987, + "step": 11800 + }, + { + "epoch": 0.5, + "learning_rate": 1.5191122071516646e-05, + "loss": 2.2828, + "step": 11810 + }, + { + "epoch": 0.5, + "learning_rate": 1.5186870190059102e-05, + "loss": 2.2173, + "step": 11820 + }, + { + "epoch": 0.5, + "learning_rate": 1.5182618308601557e-05, + "loss": 2.3421, + "step": 11830 + }, + { + "epoch": 0.5, + "learning_rate": 1.5178366427144013e-05, + "loss": 2.3685, + "step": 11840 + }, + { + "epoch": 0.5, + "learning_rate": 1.5174114545686468e-05, + "loss": 2.3534, + "step": 11850 + }, + { + "epoch": 0.5, + "learning_rate": 1.5169862664228922e-05, + "loss": 2.2374, + "step": 11860 + }, + { + "epoch": 0.5, + "learning_rate": 1.5165610782771378e-05, + "loss": 2.2038, + "step": 11870 + }, + { + "epoch": 0.5, + "learning_rate": 1.5161358901313833e-05, + "loss": 2.2161, + "step": 11880 + }, + { + "epoch": 0.5, + "learning_rate": 1.5157107019856287e-05, + "loss": 2.6806, + "step": 11890 + }, + { + "epoch": 0.5, + "learning_rate": 1.5152855138398743e-05, + "loss": 2.3153, + "step": 11900 + }, + { + "epoch": 0.5, + "learning_rate": 1.5148603256941196e-05, + "loss": 2.0646, + "step": 11910 + }, + { + "epoch": 0.5, + "learning_rate": 1.5144351375483652e-05, + "loss": 2.3844, + "step": 11920 + }, + { + "epoch": 0.5, + "learning_rate": 1.5140099494026108e-05, + "loss": 2.5267, + "step": 11930 + }, + { + "epoch": 0.5, + "learning_rate": 1.5135847612568564e-05, + "loss": 2.3379, + "step": 11940 + }, + { + "epoch": 0.5, + "learning_rate": 1.5131595731111017e-05, + "loss": 2.2537, + "step": 11950 + }, + { + "epoch": 0.5, + "learning_rate": 1.5127343849653473e-05, + "loss": 2.0048, + "step": 11960 + }, + { + "epoch": 0.5, + "learning_rate": 1.512309196819593e-05, + "loss": 2.5609, + "step": 11970 + }, + { + "epoch": 0.5, + "learning_rate": 1.5118840086738382e-05, + "loss": 2.4389, + "step": 11980 + }, + { + "epoch": 0.5, + "learning_rate": 1.5114588205280838e-05, + "loss": 2.3636, + "step": 11990 + }, + { + "epoch": 0.5, + "learning_rate": 1.5110336323823293e-05, + "loss": 2.4979, + "step": 12000 + }, + { + "epoch": 0.5, + "eval_loss": 1.908052921295166, + "eval_runtime": 175.5855, + "eval_samples_per_second": 14.255, + "eval_steps_per_second": 7.13, + "step": 12000 + }, + { + "epoch": 0.51, + "learning_rate": 1.5106084442365747e-05, + "loss": 2.5816, + "step": 12010 + }, + { + "epoch": 0.51, + "learning_rate": 1.5101832560908203e-05, + "loss": 2.7254, + "step": 12020 + }, + { + "epoch": 0.51, + "learning_rate": 1.5097580679450658e-05, + "loss": 2.1259, + "step": 12030 + }, + { + "epoch": 0.51, + "learning_rate": 1.5093328797993114e-05, + "loss": 2.6026, + "step": 12040 + }, + { + "epoch": 0.51, + "learning_rate": 1.5089076916535569e-05, + "loss": 2.2787, + "step": 12050 + }, + { + "epoch": 0.51, + "learning_rate": 1.5084825035078023e-05, + "loss": 2.3396, + "step": 12060 + }, + { + "epoch": 0.51, + "learning_rate": 1.508057315362048e-05, + "loss": 2.4013, + "step": 12070 + }, + { + "epoch": 0.51, + "learning_rate": 1.5076321272162932e-05, + "loss": 2.5999, + "step": 12080 + }, + { + "epoch": 0.51, + "learning_rate": 1.5072069390705388e-05, + "loss": 1.7368, + "step": 12090 + }, + { + "epoch": 0.51, + "learning_rate": 1.5067817509247844e-05, + "loss": 2.3508, + "step": 12100 + }, + { + "epoch": 0.51, + "learning_rate": 1.5063565627790297e-05, + "loss": 2.0358, + "step": 12110 + }, + { + "epoch": 0.51, + "learning_rate": 1.5059313746332753e-05, + "loss": 2.4323, + "step": 12120 + }, + { + "epoch": 0.51, + "learning_rate": 1.505506186487521e-05, + "loss": 2.298, + "step": 12130 + }, + { + "epoch": 0.51, + "learning_rate": 1.5050809983417665e-05, + "loss": 2.1689, + "step": 12140 + }, + { + "epoch": 0.51, + "learning_rate": 1.5046558101960118e-05, + "loss": 2.3162, + "step": 12150 + }, + { + "epoch": 0.51, + "learning_rate": 1.5042306220502574e-05, + "loss": 2.467, + "step": 12160 + }, + { + "epoch": 0.51, + "learning_rate": 1.5038054339045029e-05, + "loss": 2.0638, + "step": 12170 + }, + { + "epoch": 0.51, + "learning_rate": 1.5033802457587483e-05, + "loss": 2.399, + "step": 12180 + }, + { + "epoch": 0.51, + "learning_rate": 1.502955057612994e-05, + "loss": 2.4608, + "step": 12190 + }, + { + "epoch": 0.51, + "learning_rate": 1.5025298694672394e-05, + "loss": 2.2544, + "step": 12200 + }, + { + "epoch": 0.51, + "learning_rate": 1.5021046813214848e-05, + "loss": 2.4616, + "step": 12210 + }, + { + "epoch": 0.51, + "learning_rate": 1.5016794931757303e-05, + "loss": 2.8345, + "step": 12220 + }, + { + "epoch": 0.51, + "learning_rate": 1.5012543050299759e-05, + "loss": 1.9567, + "step": 12230 + }, + { + "epoch": 0.51, + "learning_rate": 1.5008291168842213e-05, + "loss": 2.3264, + "step": 12240 + }, + { + "epoch": 0.52, + "learning_rate": 1.5004039287384668e-05, + "loss": 2.3828, + "step": 12250 + }, + { + "epoch": 0.52, + "learning_rate": 1.4999787405927124e-05, + "loss": 2.2728, + "step": 12260 + }, + { + "epoch": 0.52, + "learning_rate": 1.499553552446958e-05, + "loss": 2.6114, + "step": 12270 + }, + { + "epoch": 0.52, + "learning_rate": 1.4991283643012033e-05, + "loss": 2.6562, + "step": 12280 + }, + { + "epoch": 0.52, + "learning_rate": 1.4987031761554489e-05, + "loss": 2.5292, + "step": 12290 + }, + { + "epoch": 0.52, + "learning_rate": 1.4982779880096945e-05, + "loss": 1.8669, + "step": 12300 + }, + { + "epoch": 0.52, + "learning_rate": 1.4978527998639398e-05, + "loss": 2.4771, + "step": 12310 + }, + { + "epoch": 0.52, + "learning_rate": 1.4974276117181854e-05, + "loss": 1.9845, + "step": 12320 + }, + { + "epoch": 0.52, + "learning_rate": 1.497002423572431e-05, + "loss": 2.1029, + "step": 12330 + }, + { + "epoch": 0.52, + "learning_rate": 1.4965772354266763e-05, + "loss": 2.3275, + "step": 12340 + }, + { + "epoch": 0.52, + "learning_rate": 1.496152047280922e-05, + "loss": 2.2364, + "step": 12350 + }, + { + "epoch": 0.52, + "learning_rate": 1.4957268591351675e-05, + "loss": 2.2305, + "step": 12360 + }, + { + "epoch": 0.52, + "learning_rate": 1.495301670989413e-05, + "loss": 1.9404, + "step": 12370 + }, + { + "epoch": 0.52, + "learning_rate": 1.4948764828436584e-05, + "loss": 2.4992, + "step": 12380 + }, + { + "epoch": 0.52, + "learning_rate": 1.4944512946979039e-05, + "loss": 2.3739, + "step": 12390 + }, + { + "epoch": 0.52, + "learning_rate": 1.4940261065521495e-05, + "loss": 2.1145, + "step": 12400 + }, + { + "epoch": 0.52, + "learning_rate": 1.493600918406395e-05, + "loss": 2.4534, + "step": 12410 + }, + { + "epoch": 0.52, + "learning_rate": 1.4931757302606404e-05, + "loss": 2.2821, + "step": 12420 + }, + { + "epoch": 0.52, + "learning_rate": 1.492750542114886e-05, + "loss": 2.3779, + "step": 12430 + }, + { + "epoch": 0.52, + "learning_rate": 1.4923253539691314e-05, + "loss": 2.7704, + "step": 12440 + }, + { + "epoch": 0.52, + "learning_rate": 1.4919001658233769e-05, + "loss": 2.2016, + "step": 12450 + }, + { + "epoch": 0.52, + "learning_rate": 1.4914749776776225e-05, + "loss": 2.4465, + "step": 12460 + }, + { + "epoch": 0.52, + "learning_rate": 1.4910497895318681e-05, + "loss": 2.2369, + "step": 12470 + }, + { + "epoch": 0.53, + "learning_rate": 1.4906246013861134e-05, + "loss": 2.1973, + "step": 12480 + }, + { + "epoch": 0.53, + "learning_rate": 1.490199413240359e-05, + "loss": 2.2326, + "step": 12490 + }, + { + "epoch": 0.53, + "learning_rate": 1.4897742250946046e-05, + "loss": 2.8084, + "step": 12500 + }, + { + "epoch": 0.53, + "eval_loss": 1.8825452327728271, + "eval_runtime": 175.6636, + "eval_samples_per_second": 14.249, + "eval_steps_per_second": 7.127, + "step": 12500 + }, + { + "epoch": 0.53, + "learning_rate": 1.4893490369488499e-05, + "loss": 2.4276, + "step": 12510 + }, + { + "epoch": 0.53, + "learning_rate": 1.4889238488030955e-05, + "loss": 2.6502, + "step": 12520 + }, + { + "epoch": 0.53, + "learning_rate": 1.4884986606573411e-05, + "loss": 2.1759, + "step": 12530 + }, + { + "epoch": 0.53, + "learning_rate": 1.4880734725115864e-05, + "loss": 2.8552, + "step": 12540 + }, + { + "epoch": 0.53, + "learning_rate": 1.487648284365832e-05, + "loss": 1.9784, + "step": 12550 + }, + { + "epoch": 0.53, + "learning_rate": 1.4872230962200775e-05, + "loss": 2.3719, + "step": 12560 + }, + { + "epoch": 0.53, + "learning_rate": 1.4867979080743231e-05, + "loss": 2.5856, + "step": 12570 + }, + { + "epoch": 0.53, + "learning_rate": 1.4863727199285685e-05, + "loss": 1.9766, + "step": 12580 + }, + { + "epoch": 0.53, + "learning_rate": 1.485947531782814e-05, + "loss": 2.7214, + "step": 12590 + }, + { + "epoch": 0.53, + "learning_rate": 1.4855223436370596e-05, + "loss": 2.6672, + "step": 12600 + }, + { + "epoch": 0.53, + "learning_rate": 1.4850971554913049e-05, + "loss": 2.4406, + "step": 12610 + }, + { + "epoch": 0.53, + "learning_rate": 1.4846719673455505e-05, + "loss": 2.1624, + "step": 12620 + }, + { + "epoch": 0.53, + "learning_rate": 1.4842467791997961e-05, + "loss": 2.4437, + "step": 12630 + }, + { + "epoch": 0.53, + "learning_rate": 1.4838215910540414e-05, + "loss": 2.6915, + "step": 12640 + }, + { + "epoch": 0.53, + "learning_rate": 1.483396402908287e-05, + "loss": 2.0104, + "step": 12650 + }, + { + "epoch": 0.53, + "learning_rate": 1.4829712147625326e-05, + "loss": 2.6162, + "step": 12660 + }, + { + "epoch": 0.53, + "learning_rate": 1.4825460266167782e-05, + "loss": 2.4617, + "step": 12670 + }, + { + "epoch": 0.53, + "learning_rate": 1.4821208384710235e-05, + "loss": 2.0574, + "step": 12680 + }, + { + "epoch": 0.53, + "learning_rate": 1.4816956503252691e-05, + "loss": 2.5159, + "step": 12690 + }, + { + "epoch": 0.53, + "learning_rate": 1.4812704621795146e-05, + "loss": 2.6508, + "step": 12700 + }, + { + "epoch": 0.53, + "learning_rate": 1.48084527403376e-05, + "loss": 2.7016, + "step": 12710 + }, + { + "epoch": 0.54, + "learning_rate": 1.4804200858880056e-05, + "loss": 2.598, + "step": 12720 + }, + { + "epoch": 0.54, + "learning_rate": 1.479994897742251e-05, + "loss": 2.2087, + "step": 12730 + }, + { + "epoch": 0.54, + "learning_rate": 1.4795697095964965e-05, + "loss": 1.8749, + "step": 12740 + }, + { + "epoch": 0.54, + "learning_rate": 1.4791445214507421e-05, + "loss": 2.2771, + "step": 12750 + }, + { + "epoch": 0.54, + "learning_rate": 1.4787193333049876e-05, + "loss": 2.6843, + "step": 12760 + }, + { + "epoch": 0.54, + "learning_rate": 1.478294145159233e-05, + "loss": 2.4945, + "step": 12770 + }, + { + "epoch": 0.54, + "learning_rate": 1.4778689570134785e-05, + "loss": 2.8105, + "step": 12780 + }, + { + "epoch": 0.54, + "learning_rate": 1.477443768867724e-05, + "loss": 2.5913, + "step": 12790 + }, + { + "epoch": 0.54, + "learning_rate": 1.4770185807219697e-05, + "loss": 2.0956, + "step": 12800 + }, + { + "epoch": 0.54, + "learning_rate": 1.476593392576215e-05, + "loss": 2.7534, + "step": 12810 + }, + { + "epoch": 0.54, + "learning_rate": 1.4761682044304606e-05, + "loss": 2.4345, + "step": 12820 + }, + { + "epoch": 0.54, + "learning_rate": 1.4757430162847062e-05, + "loss": 2.3806, + "step": 12830 + }, + { + "epoch": 0.54, + "learning_rate": 1.4753178281389515e-05, + "loss": 1.8217, + "step": 12840 + }, + { + "epoch": 0.54, + "learning_rate": 1.4748926399931971e-05, + "loss": 2.4636, + "step": 12850 + }, + { + "epoch": 0.54, + "learning_rate": 1.4744674518474427e-05, + "loss": 2.8308, + "step": 12860 + }, + { + "epoch": 0.54, + "learning_rate": 1.474042263701688e-05, + "loss": 2.1157, + "step": 12870 + }, + { + "epoch": 0.54, + "learning_rate": 1.4736170755559336e-05, + "loss": 2.5942, + "step": 12880 + }, + { + "epoch": 0.54, + "learning_rate": 1.4731918874101792e-05, + "loss": 2.4219, + "step": 12890 + }, + { + "epoch": 0.54, + "learning_rate": 1.4727666992644247e-05, + "loss": 2.3702, + "step": 12900 + }, + { + "epoch": 0.54, + "learning_rate": 1.4723415111186701e-05, + "loss": 2.4293, + "step": 12910 + }, + { + "epoch": 0.54, + "learning_rate": 1.4719163229729157e-05, + "loss": 2.1792, + "step": 12920 + }, + { + "epoch": 0.54, + "learning_rate": 1.4714911348271612e-05, + "loss": 2.4994, + "step": 12930 + }, + { + "epoch": 0.54, + "learning_rate": 1.4710659466814066e-05, + "loss": 2.3339, + "step": 12940 + }, + { + "epoch": 0.54, + "learning_rate": 1.470640758535652e-05, + "loss": 2.5192, + "step": 12950 + }, + { + "epoch": 0.55, + "learning_rate": 1.4702155703898977e-05, + "loss": 2.268, + "step": 12960 + }, + { + "epoch": 0.55, + "learning_rate": 1.4697903822441431e-05, + "loss": 2.4811, + "step": 12970 + }, + { + "epoch": 0.55, + "learning_rate": 1.4693651940983886e-05, + "loss": 2.2386, + "step": 12980 + }, + { + "epoch": 0.55, + "learning_rate": 1.4689400059526342e-05, + "loss": 2.6465, + "step": 12990 + }, + { + "epoch": 0.55, + "learning_rate": 1.4685148178068798e-05, + "loss": 2.6341, + "step": 13000 + }, + { + "epoch": 0.55, + "eval_loss": 1.882176160812378, + "eval_runtime": 175.5149, + "eval_samples_per_second": 14.261, + "eval_steps_per_second": 7.133, + "step": 13000 + }, + { + "epoch": 0.55, + "learning_rate": 1.468089629661125e-05, + "loss": 2.2379, + "step": 13010 + }, + { + "epoch": 0.55, + "learning_rate": 1.4676644415153707e-05, + "loss": 2.446, + "step": 13020 + }, + { + "epoch": 0.55, + "learning_rate": 1.4672392533696163e-05, + "loss": 2.3322, + "step": 13030 + }, + { + "epoch": 0.55, + "learning_rate": 1.4668140652238616e-05, + "loss": 2.205, + "step": 13040 + }, + { + "epoch": 0.55, + "learning_rate": 1.4663888770781072e-05, + "loss": 2.3751, + "step": 13050 + }, + { + "epoch": 0.55, + "learning_rate": 1.4659636889323528e-05, + "loss": 2.529, + "step": 13060 + }, + { + "epoch": 0.55, + "learning_rate": 1.4655385007865981e-05, + "loss": 2.4666, + "step": 13070 + }, + { + "epoch": 0.55, + "learning_rate": 1.4651133126408437e-05, + "loss": 2.6353, + "step": 13080 + }, + { + "epoch": 0.55, + "learning_rate": 1.4646881244950893e-05, + "loss": 2.2018, + "step": 13090 + }, + { + "epoch": 0.55, + "learning_rate": 1.4642629363493348e-05, + "loss": 2.2065, + "step": 13100 + }, + { + "epoch": 0.55, + "learning_rate": 1.4638377482035802e-05, + "loss": 2.1817, + "step": 13110 + }, + { + "epoch": 0.55, + "learning_rate": 1.4634125600578257e-05, + "loss": 2.5871, + "step": 13120 + }, + { + "epoch": 0.55, + "learning_rate": 1.4629873719120713e-05, + "loss": 2.6006, + "step": 13130 + }, + { + "epoch": 0.55, + "learning_rate": 1.4625621837663167e-05, + "loss": 2.4155, + "step": 13140 + }, + { + "epoch": 0.55, + "learning_rate": 1.4621369956205622e-05, + "loss": 2.7159, + "step": 13150 + }, + { + "epoch": 0.55, + "learning_rate": 1.4617118074748078e-05, + "loss": 1.9283, + "step": 13160 + }, + { + "epoch": 0.55, + "learning_rate": 1.461286619329053e-05, + "loss": 2.2639, + "step": 13170 + }, + { + "epoch": 0.55, + "learning_rate": 1.4608614311832987e-05, + "loss": 2.7927, + "step": 13180 + }, + { + "epoch": 0.55, + "learning_rate": 1.4604362430375443e-05, + "loss": 2.4052, + "step": 13190 + }, + { + "epoch": 0.56, + "learning_rate": 1.4600110548917896e-05, + "loss": 2.4822, + "step": 13200 + }, + { + "epoch": 0.56, + "learning_rate": 1.4595858667460352e-05, + "loss": 2.7162, + "step": 13210 + }, + { + "epoch": 0.56, + "learning_rate": 1.4591606786002808e-05, + "loss": 2.9996, + "step": 13220 + }, + { + "epoch": 0.56, + "learning_rate": 1.4587354904545264e-05, + "loss": 1.9623, + "step": 13230 + }, + { + "epoch": 0.56, + "learning_rate": 1.4583103023087717e-05, + "loss": 2.4969, + "step": 13240 + }, + { + "epoch": 0.56, + "learning_rate": 1.4578851141630173e-05, + "loss": 2.3253, + "step": 13250 + }, + { + "epoch": 0.56, + "learning_rate": 1.4574599260172627e-05, + "loss": 2.2383, + "step": 13260 + }, + { + "epoch": 0.56, + "learning_rate": 1.4570347378715082e-05, + "loss": 2.1804, + "step": 13270 + }, + { + "epoch": 0.56, + "learning_rate": 1.4566095497257538e-05, + "loss": 2.4405, + "step": 13280 + }, + { + "epoch": 0.56, + "learning_rate": 1.4561843615799993e-05, + "loss": 2.0605, + "step": 13290 + }, + { + "epoch": 0.56, + "learning_rate": 1.4557591734342447e-05, + "loss": 2.4252, + "step": 13300 + }, + { + "epoch": 0.56, + "learning_rate": 1.4553339852884903e-05, + "loss": 2.3363, + "step": 13310 + }, + { + "epoch": 0.56, + "learning_rate": 1.4549087971427358e-05, + "loss": 2.5494, + "step": 13320 + }, + { + "epoch": 0.56, + "learning_rate": 1.4544836089969814e-05, + "loss": 2.2146, + "step": 13330 + }, + { + "epoch": 0.56, + "learning_rate": 1.4540584208512267e-05, + "loss": 2.1387, + "step": 13340 + }, + { + "epoch": 0.56, + "learning_rate": 1.4536332327054723e-05, + "loss": 2.5365, + "step": 13350 + }, + { + "epoch": 0.56, + "learning_rate": 1.4532080445597179e-05, + "loss": 2.2978, + "step": 13360 + }, + { + "epoch": 0.56, + "learning_rate": 1.4527828564139632e-05, + "loss": 2.3413, + "step": 13370 + }, + { + "epoch": 0.56, + "learning_rate": 1.4523576682682088e-05, + "loss": 1.9884, + "step": 13380 + }, + { + "epoch": 0.56, + "learning_rate": 1.4519324801224544e-05, + "loss": 2.0978, + "step": 13390 + }, + { + "epoch": 0.56, + "learning_rate": 1.4515072919766997e-05, + "loss": 1.9556, + "step": 13400 + }, + { + "epoch": 0.56, + "learning_rate": 1.4510821038309453e-05, + "loss": 2.4953, + "step": 13410 + }, + { + "epoch": 0.56, + "learning_rate": 1.4506569156851909e-05, + "loss": 2.1745, + "step": 13420 + }, + { + "epoch": 0.57, + "learning_rate": 1.4502317275394363e-05, + "loss": 2.0909, + "step": 13430 + }, + { + "epoch": 0.57, + "learning_rate": 1.4498065393936818e-05, + "loss": 2.2396, + "step": 13440 + }, + { + "epoch": 0.57, + "learning_rate": 1.4493813512479274e-05, + "loss": 2.2915, + "step": 13450 + }, + { + "epoch": 0.57, + "learning_rate": 1.4489561631021728e-05, + "loss": 2.2874, + "step": 13460 + }, + { + "epoch": 0.57, + "learning_rate": 1.4485309749564183e-05, + "loss": 2.7083, + "step": 13470 + }, + { + "epoch": 0.57, + "learning_rate": 1.4481057868106639e-05, + "loss": 2.1006, + "step": 13480 + }, + { + "epoch": 0.57, + "learning_rate": 1.4476805986649094e-05, + "loss": 1.7699, + "step": 13490 + }, + { + "epoch": 0.57, + "learning_rate": 1.4472554105191548e-05, + "loss": 2.0629, + "step": 13500 + }, + { + "epoch": 0.57, + "eval_loss": 1.8929394483566284, + "eval_runtime": 175.8173, + "eval_samples_per_second": 14.236, + "eval_steps_per_second": 7.121, + "step": 13500 + }, + { + "epoch": 0.57, + "learning_rate": 1.4468302223734002e-05, + "loss": 2.5065, + "step": 13510 + }, + { + "epoch": 0.57, + "learning_rate": 1.4464050342276459e-05, + "loss": 2.4348, + "step": 13520 + }, + { + "epoch": 0.57, + "learning_rate": 1.4459798460818915e-05, + "loss": 2.2692, + "step": 13530 + }, + { + "epoch": 0.57, + "learning_rate": 1.4455546579361368e-05, + "loss": 2.1182, + "step": 13540 + }, + { + "epoch": 0.57, + "learning_rate": 1.4451294697903824e-05, + "loss": 2.4457, + "step": 13550 + }, + { + "epoch": 0.57, + "learning_rate": 1.444704281644628e-05, + "loss": 2.9266, + "step": 13560 + }, + { + "epoch": 0.57, + "learning_rate": 1.4442790934988733e-05, + "loss": 2.1608, + "step": 13570 + }, + { + "epoch": 0.57, + "learning_rate": 1.4438539053531189e-05, + "loss": 2.1326, + "step": 13580 + }, + { + "epoch": 0.57, + "learning_rate": 1.4434287172073645e-05, + "loss": 1.7725, + "step": 13590 + }, + { + "epoch": 0.57, + "learning_rate": 1.4430035290616098e-05, + "loss": 2.1611, + "step": 13600 + }, + { + "epoch": 0.57, + "learning_rate": 1.4425783409158554e-05, + "loss": 2.2394, + "step": 13610 + }, + { + "epoch": 0.57, + "learning_rate": 1.442153152770101e-05, + "loss": 2.3544, + "step": 13620 + }, + { + "epoch": 0.57, + "learning_rate": 1.4417279646243464e-05, + "loss": 2.0564, + "step": 13630 + }, + { + "epoch": 0.57, + "learning_rate": 1.4413027764785919e-05, + "loss": 2.8817, + "step": 13640 + }, + { + "epoch": 0.57, + "learning_rate": 1.4408775883328373e-05, + "loss": 2.2314, + "step": 13650 + }, + { + "epoch": 0.57, + "learning_rate": 1.440452400187083e-05, + "loss": 2.2732, + "step": 13660 + }, + { + "epoch": 0.58, + "learning_rate": 1.4400272120413284e-05, + "loss": 2.1687, + "step": 13670 + }, + { + "epoch": 0.58, + "learning_rate": 1.4396020238955738e-05, + "loss": 2.3604, + "step": 13680 + }, + { + "epoch": 0.58, + "learning_rate": 1.4391768357498195e-05, + "loss": 2.3441, + "step": 13690 + }, + { + "epoch": 0.58, + "learning_rate": 1.4387516476040649e-05, + "loss": 2.2658, + "step": 13700 + }, + { + "epoch": 0.58, + "learning_rate": 1.4383264594583104e-05, + "loss": 2.542, + "step": 13710 + }, + { + "epoch": 0.58, + "learning_rate": 1.437901271312556e-05, + "loss": 2.4687, + "step": 13720 + }, + { + "epoch": 0.58, + "learning_rate": 1.4374760831668012e-05, + "loss": 2.4955, + "step": 13730 + }, + { + "epoch": 0.58, + "learning_rate": 1.4370508950210469e-05, + "loss": 2.3333, + "step": 13740 + }, + { + "epoch": 0.58, + "learning_rate": 1.4366257068752925e-05, + "loss": 2.249, + "step": 13750 + }, + { + "epoch": 0.58, + "learning_rate": 1.4362005187295381e-05, + "loss": 2.4311, + "step": 13760 + }, + { + "epoch": 0.58, + "learning_rate": 1.4357753305837834e-05, + "loss": 2.3719, + "step": 13770 + }, + { + "epoch": 0.58, + "learning_rate": 1.435350142438029e-05, + "loss": 2.1542, + "step": 13780 + }, + { + "epoch": 0.58, + "learning_rate": 1.4349249542922746e-05, + "loss": 2.3959, + "step": 13790 + }, + { + "epoch": 0.58, + "learning_rate": 1.4344997661465199e-05, + "loss": 2.0495, + "step": 13800 + }, + { + "epoch": 0.58, + "learning_rate": 1.4340745780007655e-05, + "loss": 2.0001, + "step": 13810 + }, + { + "epoch": 0.58, + "learning_rate": 1.433649389855011e-05, + "loss": 2.6268, + "step": 13820 + }, + { + "epoch": 0.58, + "learning_rate": 1.4332242017092564e-05, + "loss": 2.2777, + "step": 13830 + }, + { + "epoch": 0.58, + "learning_rate": 1.432799013563502e-05, + "loss": 2.0227, + "step": 13840 + }, + { + "epoch": 0.58, + "learning_rate": 1.4323738254177474e-05, + "loss": 1.8101, + "step": 13850 + }, + { + "epoch": 0.58, + "learning_rate": 1.431948637271993e-05, + "loss": 1.9943, + "step": 13860 + }, + { + "epoch": 0.58, + "learning_rate": 1.4315234491262385e-05, + "loss": 2.2595, + "step": 13870 + }, + { + "epoch": 0.58, + "learning_rate": 1.431098260980484e-05, + "loss": 2.376, + "step": 13880 + }, + { + "epoch": 0.58, + "learning_rate": 1.4306730728347296e-05, + "loss": 2.3874, + "step": 13890 + }, + { + "epoch": 0.58, + "learning_rate": 1.4302478846889748e-05, + "loss": 2.1329, + "step": 13900 + }, + { + "epoch": 0.59, + "learning_rate": 1.4298226965432205e-05, + "loss": 2.3208, + "step": 13910 + }, + { + "epoch": 0.59, + "learning_rate": 1.429397508397466e-05, + "loss": 2.4588, + "step": 13920 + }, + { + "epoch": 0.59, + "learning_rate": 1.4289723202517113e-05, + "loss": 2.8068, + "step": 13930 + }, + { + "epoch": 0.59, + "learning_rate": 1.428547132105957e-05, + "loss": 2.4027, + "step": 13940 + }, + { + "epoch": 0.59, + "learning_rate": 1.4281219439602026e-05, + "loss": 2.4779, + "step": 13950 + }, + { + "epoch": 0.59, + "learning_rate": 1.4276967558144482e-05, + "loss": 2.403, + "step": 13960 + }, + { + "epoch": 0.59, + "learning_rate": 1.4272715676686935e-05, + "loss": 2.1801, + "step": 13970 + }, + { + "epoch": 0.59, + "learning_rate": 1.426846379522939e-05, + "loss": 1.9641, + "step": 13980 + }, + { + "epoch": 0.59, + "learning_rate": 1.4264211913771845e-05, + "loss": 2.4286, + "step": 13990 + }, + { + "epoch": 0.59, + "learning_rate": 1.42599600323143e-05, + "loss": 2.701, + "step": 14000 + }, + { + "epoch": 0.59, + "eval_loss": 1.856201410293579, + "eval_runtime": 175.7531, + "eval_samples_per_second": 14.242, + "eval_steps_per_second": 7.124, + "step": 14000 + }, + { + "epoch": 0.59, + "learning_rate": 1.4255708150856756e-05, + "loss": 2.5715, + "step": 14010 + }, + { + "epoch": 0.59, + "learning_rate": 1.425145626939921e-05, + "loss": 2.6098, + "step": 14020 + }, + { + "epoch": 0.59, + "learning_rate": 1.4247204387941665e-05, + "loss": 2.6931, + "step": 14030 + }, + { + "epoch": 0.59, + "learning_rate": 1.424295250648412e-05, + "loss": 2.5217, + "step": 14040 + }, + { + "epoch": 0.59, + "learning_rate": 1.4238700625026575e-05, + "loss": 2.2983, + "step": 14050 + }, + { + "epoch": 0.59, + "learning_rate": 1.4234448743569032e-05, + "loss": 1.8287, + "step": 14060 + }, + { + "epoch": 0.59, + "learning_rate": 1.4230196862111484e-05, + "loss": 2.5353, + "step": 14070 + }, + { + "epoch": 0.59, + "learning_rate": 1.422594498065394e-05, + "loss": 1.932, + "step": 14080 + }, + { + "epoch": 0.59, + "learning_rate": 1.4221693099196397e-05, + "loss": 2.2853, + "step": 14090 + }, + { + "epoch": 0.59, + "learning_rate": 1.421744121773885e-05, + "loss": 1.8722, + "step": 14100 + }, + { + "epoch": 0.59, + "learning_rate": 1.4213189336281306e-05, + "loss": 2.2378, + "step": 14110 + }, + { + "epoch": 0.59, + "learning_rate": 1.4208937454823762e-05, + "loss": 2.3306, + "step": 14120 + }, + { + "epoch": 0.59, + "learning_rate": 1.4204685573366214e-05, + "loss": 2.4661, + "step": 14130 + }, + { + "epoch": 0.59, + "learning_rate": 1.420043369190867e-05, + "loss": 2.5235, + "step": 14140 + }, + { + "epoch": 0.6, + "learning_rate": 1.4196181810451127e-05, + "loss": 2.7158, + "step": 14150 + }, + { + "epoch": 0.6, + "learning_rate": 1.4191929928993581e-05, + "loss": 2.104, + "step": 14160 + }, + { + "epoch": 0.6, + "learning_rate": 1.4187678047536036e-05, + "loss": 2.3975, + "step": 14170 + }, + { + "epoch": 0.6, + "learning_rate": 1.4183426166078492e-05, + "loss": 1.9535, + "step": 14180 + }, + { + "epoch": 0.6, + "learning_rate": 1.4179174284620946e-05, + "loss": 2.3345, + "step": 14190 + }, + { + "epoch": 0.6, + "learning_rate": 1.41749224031634e-05, + "loss": 2.3982, + "step": 14200 + }, + { + "epoch": 0.6, + "learning_rate": 1.4170670521705855e-05, + "loss": 2.6212, + "step": 14210 + }, + { + "epoch": 0.6, + "learning_rate": 1.4166418640248311e-05, + "loss": 2.0249, + "step": 14220 + }, + { + "epoch": 0.6, + "learning_rate": 1.4162166758790766e-05, + "loss": 2.366, + "step": 14230 + }, + { + "epoch": 0.6, + "learning_rate": 1.415791487733322e-05, + "loss": 2.2266, + "step": 14240 + }, + { + "epoch": 0.6, + "learning_rate": 1.4153662995875676e-05, + "loss": 2.6209, + "step": 14250 + }, + { + "epoch": 0.6, + "learning_rate": 1.4149411114418131e-05, + "loss": 2.3738, + "step": 14260 + }, + { + "epoch": 0.6, + "learning_rate": 1.4145159232960585e-05, + "loss": 2.2413, + "step": 14270 + }, + { + "epoch": 0.6, + "learning_rate": 1.4140907351503042e-05, + "loss": 2.3953, + "step": 14280 + }, + { + "epoch": 0.6, + "learning_rate": 1.4136655470045498e-05, + "loss": 1.8551, + "step": 14290 + }, + { + "epoch": 0.6, + "learning_rate": 1.413240358858795e-05, + "loss": 2.2475, + "step": 14300 + }, + { + "epoch": 0.6, + "learning_rate": 1.4128151707130407e-05, + "loss": 2.5065, + "step": 14310 + }, + { + "epoch": 0.6, + "learning_rate": 1.4123899825672863e-05, + "loss": 2.3926, + "step": 14320 + }, + { + "epoch": 0.6, + "learning_rate": 1.4119647944215316e-05, + "loss": 2.8505, + "step": 14330 + }, + { + "epoch": 0.6, + "learning_rate": 1.4115396062757772e-05, + "loss": 2.351, + "step": 14340 + }, + { + "epoch": 0.6, + "learning_rate": 1.4111144181300228e-05, + "loss": 2.5845, + "step": 14350 + }, + { + "epoch": 0.6, + "learning_rate": 1.410689229984268e-05, + "loss": 2.4993, + "step": 14360 + }, + { + "epoch": 0.6, + "learning_rate": 1.4102640418385137e-05, + "loss": 2.5347, + "step": 14370 + }, + { + "epoch": 0.6, + "learning_rate": 1.4098388536927591e-05, + "loss": 2.4627, + "step": 14380 + }, + { + "epoch": 0.61, + "learning_rate": 1.4094136655470047e-05, + "loss": 2.4872, + "step": 14390 + }, + { + "epoch": 0.61, + "learning_rate": 1.4089884774012502e-05, + "loss": 2.0532, + "step": 14400 + }, + { + "epoch": 0.61, + "learning_rate": 1.4085632892554956e-05, + "loss": 2.5615, + "step": 14410 + }, + { + "epoch": 0.61, + "learning_rate": 1.4081381011097412e-05, + "loss": 2.0314, + "step": 14420 + }, + { + "epoch": 0.61, + "learning_rate": 1.4077129129639865e-05, + "loss": 2.3463, + "step": 14430 + }, + { + "epoch": 0.61, + "learning_rate": 1.4072877248182321e-05, + "loss": 2.3824, + "step": 14440 + }, + { + "epoch": 0.61, + "learning_rate": 1.4068625366724777e-05, + "loss": 2.4601, + "step": 14450 + }, + { + "epoch": 0.61, + "learning_rate": 1.406437348526723e-05, + "loss": 2.4652, + "step": 14460 + }, + { + "epoch": 0.61, + "learning_rate": 1.4060121603809686e-05, + "loss": 2.0304, + "step": 14470 + }, + { + "epoch": 0.61, + "learning_rate": 1.4055869722352143e-05, + "loss": 2.5522, + "step": 14480 + }, + { + "epoch": 0.61, + "learning_rate": 1.4051617840894599e-05, + "loss": 2.0558, + "step": 14490 + }, + { + "epoch": 0.61, + "learning_rate": 1.4047365959437051e-05, + "loss": 2.4705, + "step": 14500 + }, + { + "epoch": 0.61, + "eval_loss": 1.872928261756897, + "eval_runtime": 175.8168, + "eval_samples_per_second": 14.236, + "eval_steps_per_second": 7.121, + "step": 14500 + }, + { + "epoch": 0.61, + "learning_rate": 1.4043114077979508e-05, + "loss": 2.9471, + "step": 14510 + }, + { + "epoch": 0.61, + "learning_rate": 1.4038862196521962e-05, + "loss": 2.2038, + "step": 14520 + }, + { + "epoch": 0.61, + "learning_rate": 1.4034610315064417e-05, + "loss": 2.2991, + "step": 14530 + }, + { + "epoch": 0.61, + "learning_rate": 1.4030358433606873e-05, + "loss": 2.4386, + "step": 14540 + }, + { + "epoch": 0.61, + "learning_rate": 1.4026106552149327e-05, + "loss": 2.4721, + "step": 14550 + }, + { + "epoch": 0.61, + "learning_rate": 1.4021854670691782e-05, + "loss": 2.31, + "step": 14560 + }, + { + "epoch": 0.61, + "learning_rate": 1.4017602789234238e-05, + "loss": 2.3514, + "step": 14570 + }, + { + "epoch": 0.61, + "learning_rate": 1.4013350907776692e-05, + "loss": 2.4382, + "step": 14580 + }, + { + "epoch": 0.61, + "learning_rate": 1.4009099026319148e-05, + "loss": 1.9629, + "step": 14590 + }, + { + "epoch": 0.61, + "learning_rate": 1.4004847144861601e-05, + "loss": 2.5185, + "step": 14600 + }, + { + "epoch": 0.61, + "learning_rate": 1.4000595263404057e-05, + "loss": 2.4013, + "step": 14610 + }, + { + "epoch": 0.62, + "learning_rate": 1.3996343381946513e-05, + "loss": 2.2089, + "step": 14620 + }, + { + "epoch": 0.62, + "learning_rate": 1.3992091500488966e-05, + "loss": 2.7305, + "step": 14630 + }, + { + "epoch": 0.62, + "learning_rate": 1.3987839619031422e-05, + "loss": 2.0156, + "step": 14640 + }, + { + "epoch": 0.62, + "learning_rate": 1.3983587737573879e-05, + "loss": 2.512, + "step": 14650 + }, + { + "epoch": 0.62, + "learning_rate": 1.3979335856116331e-05, + "loss": 2.8554, + "step": 14660 + }, + { + "epoch": 0.62, + "learning_rate": 1.3975083974658787e-05, + "loss": 2.9559, + "step": 14670 + }, + { + "epoch": 0.62, + "learning_rate": 1.3970832093201244e-05, + "loss": 1.9561, + "step": 14680 + }, + { + "epoch": 0.62, + "learning_rate": 1.3966580211743698e-05, + "loss": 2.2716, + "step": 14690 + }, + { + "epoch": 0.62, + "learning_rate": 1.3962328330286152e-05, + "loss": 1.9613, + "step": 14700 + }, + { + "epoch": 0.62, + "learning_rate": 1.3958076448828609e-05, + "loss": 2.7032, + "step": 14710 + }, + { + "epoch": 0.62, + "learning_rate": 1.3953824567371063e-05, + "loss": 2.0244, + "step": 14720 + }, + { + "epoch": 0.62, + "learning_rate": 1.3949572685913518e-05, + "loss": 2.2966, + "step": 14730 + }, + { + "epoch": 0.62, + "learning_rate": 1.3945320804455974e-05, + "loss": 2.0398, + "step": 14740 + }, + { + "epoch": 0.62, + "learning_rate": 1.3941068922998428e-05, + "loss": 2.3359, + "step": 14750 + }, + { + "epoch": 0.62, + "learning_rate": 1.3936817041540883e-05, + "loss": 2.3542, + "step": 14760 + }, + { + "epoch": 0.62, + "learning_rate": 1.3932565160083337e-05, + "loss": 2.6888, + "step": 14770 + }, + { + "epoch": 0.62, + "learning_rate": 1.3928313278625793e-05, + "loss": 2.1765, + "step": 14780 + }, + { + "epoch": 0.62, + "learning_rate": 1.3924061397168248e-05, + "loss": 2.5522, + "step": 14790 + }, + { + "epoch": 0.62, + "learning_rate": 1.3919809515710702e-05, + "loss": 1.9513, + "step": 14800 + }, + { + "epoch": 0.62, + "learning_rate": 1.3915557634253158e-05, + "loss": 1.9658, + "step": 14810 + }, + { + "epoch": 0.62, + "learning_rate": 1.3911305752795614e-05, + "loss": 1.8836, + "step": 14820 + }, + { + "epoch": 0.62, + "learning_rate": 1.3907053871338067e-05, + "loss": 2.4389, + "step": 14830 + }, + { + "epoch": 0.62, + "learning_rate": 1.3902801989880523e-05, + "loss": 2.0884, + "step": 14840 + }, + { + "epoch": 0.62, + "learning_rate": 1.389855010842298e-05, + "loss": 2.33, + "step": 14850 + }, + { + "epoch": 0.63, + "learning_rate": 1.3894298226965432e-05, + "loss": 2.5639, + "step": 14860 + }, + { + "epoch": 0.63, + "learning_rate": 1.3890046345507888e-05, + "loss": 2.5959, + "step": 14870 + }, + { + "epoch": 0.63, + "learning_rate": 1.3885794464050345e-05, + "loss": 2.5403, + "step": 14880 + }, + { + "epoch": 0.63, + "learning_rate": 1.3881542582592797e-05, + "loss": 2.5303, + "step": 14890 + }, + { + "epoch": 0.63, + "learning_rate": 1.3877290701135254e-05, + "loss": 1.7927, + "step": 14900 + }, + { + "epoch": 0.63, + "learning_rate": 1.387303881967771e-05, + "loss": 1.9845, + "step": 14910 + }, + { + "epoch": 0.63, + "learning_rate": 1.3868786938220164e-05, + "loss": 2.5558, + "step": 14920 + }, + { + "epoch": 0.63, + "learning_rate": 1.3864535056762619e-05, + "loss": 2.8331, + "step": 14930 + }, + { + "epoch": 0.63, + "learning_rate": 1.3860283175305073e-05, + "loss": 2.6268, + "step": 14940 + }, + { + "epoch": 0.63, + "learning_rate": 1.385603129384753e-05, + "loss": 2.0546, + "step": 14950 + }, + { + "epoch": 0.63, + "learning_rate": 1.3851779412389984e-05, + "loss": 2.0332, + "step": 14960 + }, + { + "epoch": 0.63, + "learning_rate": 1.3847527530932438e-05, + "loss": 1.941, + "step": 14970 + }, + { + "epoch": 0.63, + "learning_rate": 1.3843275649474894e-05, + "loss": 2.4304, + "step": 14980 + }, + { + "epoch": 0.63, + "learning_rate": 1.3839023768017347e-05, + "loss": 2.3973, + "step": 14990 + }, + { + "epoch": 0.63, + "learning_rate": 1.3834771886559803e-05, + "loss": 1.8707, + "step": 15000 + }, + { + "epoch": 0.63, + "eval_loss": 1.883968710899353, + "eval_runtime": 174.809, + "eval_samples_per_second": 14.318, + "eval_steps_per_second": 7.162, + "step": 15000 + }, + { + "epoch": 0.63, + "learning_rate": 1.383052000510226e-05, + "loss": 2.941, + "step": 15010 + }, + { + "epoch": 0.63, + "learning_rate": 1.3826268123644715e-05, + "loss": 2.4149, + "step": 15020 + }, + { + "epoch": 0.63, + "learning_rate": 1.3822016242187168e-05, + "loss": 2.3082, + "step": 15030 + }, + { + "epoch": 0.63, + "learning_rate": 1.3817764360729624e-05, + "loss": 2.2591, + "step": 15040 + }, + { + "epoch": 0.63, + "learning_rate": 1.381351247927208e-05, + "loss": 2.2677, + "step": 15050 + }, + { + "epoch": 0.63, + "learning_rate": 1.3809260597814533e-05, + "loss": 2.216, + "step": 15060 + }, + { + "epoch": 0.63, + "learning_rate": 1.380500871635699e-05, + "loss": 2.0113, + "step": 15070 + }, + { + "epoch": 0.63, + "learning_rate": 1.3800756834899444e-05, + "loss": 2.2585, + "step": 15080 + }, + { + "epoch": 0.63, + "learning_rate": 1.3796504953441898e-05, + "loss": 2.2651, + "step": 15090 + }, + { + "epoch": 0.64, + "learning_rate": 1.3792253071984355e-05, + "loss": 2.4062, + "step": 15100 + }, + { + "epoch": 0.64, + "learning_rate": 1.3788001190526809e-05, + "loss": 2.7675, + "step": 15110 + }, + { + "epoch": 0.64, + "learning_rate": 1.3783749309069265e-05, + "loss": 2.1533, + "step": 15120 + }, + { + "epoch": 0.64, + "learning_rate": 1.377949742761172e-05, + "loss": 2.1812, + "step": 15130 + }, + { + "epoch": 0.64, + "learning_rate": 1.3775245546154174e-05, + "loss": 2.3384, + "step": 15140 + }, + { + "epoch": 0.64, + "learning_rate": 1.377099366469663e-05, + "loss": 2.4825, + "step": 15150 + }, + { + "epoch": 0.64, + "learning_rate": 1.3766741783239083e-05, + "loss": 2.3008, + "step": 15160 + }, + { + "epoch": 0.64, + "learning_rate": 1.3762489901781539e-05, + "loss": 2.5847, + "step": 15170 + }, + { + "epoch": 0.64, + "learning_rate": 1.3758238020323995e-05, + "loss": 2.0014, + "step": 15180 + }, + { + "epoch": 0.64, + "learning_rate": 1.3753986138866448e-05, + "loss": 2.3731, + "step": 15190 + }, + { + "epoch": 0.64, + "learning_rate": 1.3749734257408904e-05, + "loss": 2.5206, + "step": 15200 + }, + { + "epoch": 0.64, + "learning_rate": 1.374548237595136e-05, + "loss": 2.1148, + "step": 15210 + }, + { + "epoch": 0.64, + "learning_rate": 1.3741230494493817e-05, + "loss": 2.2672, + "step": 15220 + }, + { + "epoch": 0.64, + "learning_rate": 1.373697861303627e-05, + "loss": 2.6911, + "step": 15230 + }, + { + "epoch": 0.64, + "learning_rate": 1.3732726731578725e-05, + "loss": 2.2124, + "step": 15240 + }, + { + "epoch": 0.64, + "learning_rate": 1.372847485012118e-05, + "loss": 2.2615, + "step": 15250 + }, + { + "epoch": 0.64, + "learning_rate": 1.3724222968663634e-05, + "loss": 1.7735, + "step": 15260 + }, + { + "epoch": 0.64, + "learning_rate": 1.371997108720609e-05, + "loss": 2.3346, + "step": 15270 + }, + { + "epoch": 0.64, + "learning_rate": 1.3715719205748545e-05, + "loss": 2.4171, + "step": 15280 + }, + { + "epoch": 0.64, + "learning_rate": 1.3711467324291e-05, + "loss": 2.5776, + "step": 15290 + }, + { + "epoch": 0.64, + "learning_rate": 1.3707215442833456e-05, + "loss": 2.3896, + "step": 15300 + }, + { + "epoch": 0.64, + "learning_rate": 1.370296356137591e-05, + "loss": 2.488, + "step": 15310 + }, + { + "epoch": 0.64, + "learning_rate": 1.3698711679918364e-05, + "loss": 2.0988, + "step": 15320 + }, + { + "epoch": 0.64, + "learning_rate": 1.3694459798460819e-05, + "loss": 2.3117, + "step": 15330 + }, + { + "epoch": 0.65, + "learning_rate": 1.3690207917003275e-05, + "loss": 2.2531, + "step": 15340 + }, + { + "epoch": 0.65, + "learning_rate": 1.3685956035545731e-05, + "loss": 2.2214, + "step": 15350 + }, + { + "epoch": 0.65, + "learning_rate": 1.3681704154088184e-05, + "loss": 2.3255, + "step": 15360 + }, + { + "epoch": 0.65, + "learning_rate": 1.367745227263064e-05, + "loss": 2.5794, + "step": 15370 + }, + { + "epoch": 0.65, + "learning_rate": 1.3673200391173096e-05, + "loss": 2.2217, + "step": 15380 + }, + { + "epoch": 0.65, + "learning_rate": 1.3668948509715549e-05, + "loss": 2.5223, + "step": 15390 + }, + { + "epoch": 0.65, + "learning_rate": 1.3664696628258005e-05, + "loss": 2.2301, + "step": 15400 + }, + { + "epoch": 0.65, + "learning_rate": 1.3660444746800461e-05, + "loss": 2.6519, + "step": 15410 + }, + { + "epoch": 0.65, + "learning_rate": 1.3656192865342914e-05, + "loss": 2.2755, + "step": 15420 + }, + { + "epoch": 0.65, + "learning_rate": 1.365194098388537e-05, + "loss": 2.2666, + "step": 15430 + }, + { + "epoch": 0.65, + "learning_rate": 1.3647689102427826e-05, + "loss": 2.3709, + "step": 15440 + }, + { + "epoch": 0.65, + "learning_rate": 1.3643437220970281e-05, + "loss": 2.6039, + "step": 15450 + }, + { + "epoch": 0.65, + "learning_rate": 1.3639185339512735e-05, + "loss": 2.5073, + "step": 15460 + }, + { + "epoch": 0.65, + "learning_rate": 1.363493345805519e-05, + "loss": 2.1521, + "step": 15470 + }, + { + "epoch": 0.65, + "learning_rate": 1.3630681576597646e-05, + "loss": 2.7038, + "step": 15480 + }, + { + "epoch": 0.65, + "learning_rate": 1.36264296951401e-05, + "loss": 2.6435, + "step": 15490 + }, + { + "epoch": 0.65, + "learning_rate": 1.3622177813682555e-05, + "loss": 1.9067, + "step": 15500 + }, + { + "epoch": 0.65, + "eval_loss": 1.8592156171798706, + "eval_runtime": 175.7012, + "eval_samples_per_second": 14.246, + "eval_steps_per_second": 7.126, + "step": 15500 + }, + { + "epoch": 0.65, + "learning_rate": 1.3617925932225011e-05, + "loss": 2.1339, + "step": 15510 + }, + { + "epoch": 0.65, + "learning_rate": 1.3613674050767466e-05, + "loss": 2.3421, + "step": 15520 + }, + { + "epoch": 0.65, + "learning_rate": 1.360942216930992e-05, + "loss": 1.9724, + "step": 15530 + }, + { + "epoch": 0.65, + "learning_rate": 1.3605170287852376e-05, + "loss": 2.4298, + "step": 15540 + }, + { + "epoch": 0.65, + "learning_rate": 1.3600918406394832e-05, + "loss": 2.6905, + "step": 15550 + }, + { + "epoch": 0.65, + "learning_rate": 1.3596666524937285e-05, + "loss": 2.1393, + "step": 15560 + }, + { + "epoch": 0.66, + "learning_rate": 1.3592414643479741e-05, + "loss": 2.4432, + "step": 15570 + }, + { + "epoch": 0.66, + "learning_rate": 1.3588162762022197e-05, + "loss": 2.36, + "step": 15580 + }, + { + "epoch": 0.66, + "learning_rate": 1.358391088056465e-05, + "loss": 2.3451, + "step": 15590 + }, + { + "epoch": 0.66, + "learning_rate": 1.3579658999107106e-05, + "loss": 2.3377, + "step": 15600 + }, + { + "epoch": 0.66, + "learning_rate": 1.3575407117649562e-05, + "loss": 2.1833, + "step": 15610 + }, + { + "epoch": 0.66, + "learning_rate": 1.3571155236192015e-05, + "loss": 2.0854, + "step": 15620 + }, + { + "epoch": 0.66, + "learning_rate": 1.3566903354734471e-05, + "loss": 2.6914, + "step": 15630 + }, + { + "epoch": 0.66, + "learning_rate": 1.3562651473276926e-05, + "loss": 2.4138, + "step": 15640 + }, + { + "epoch": 0.66, + "learning_rate": 1.3558399591819382e-05, + "loss": 2.6276, + "step": 15650 + }, + { + "epoch": 0.66, + "learning_rate": 1.3554147710361836e-05, + "loss": 2.2723, + "step": 15660 + }, + { + "epoch": 0.66, + "learning_rate": 1.3549895828904291e-05, + "loss": 1.9168, + "step": 15670 + }, + { + "epoch": 0.66, + "learning_rate": 1.3545643947446747e-05, + "loss": 2.207, + "step": 15680 + }, + { + "epoch": 0.66, + "learning_rate": 1.3541392065989201e-05, + "loss": 2.1042, + "step": 15690 + }, + { + "epoch": 0.66, + "learning_rate": 1.3537140184531656e-05, + "loss": 2.593, + "step": 15700 + }, + { + "epoch": 0.66, + "learning_rate": 1.3532888303074112e-05, + "loss": 1.8281, + "step": 15710 + }, + { + "epoch": 0.66, + "learning_rate": 1.3528636421616565e-05, + "loss": 2.3192, + "step": 15720 + }, + { + "epoch": 0.66, + "learning_rate": 1.3524384540159021e-05, + "loss": 2.4385, + "step": 15730 + }, + { + "epoch": 0.66, + "learning_rate": 1.3520132658701477e-05, + "loss": 2.2035, + "step": 15740 + }, + { + "epoch": 0.66, + "learning_rate": 1.351588077724393e-05, + "loss": 2.3731, + "step": 15750 + }, + { + "epoch": 0.66, + "learning_rate": 1.3511628895786386e-05, + "loss": 2.3403, + "step": 15760 + }, + { + "epoch": 0.66, + "learning_rate": 1.3507377014328842e-05, + "loss": 1.8895, + "step": 15770 + }, + { + "epoch": 0.66, + "learning_rate": 1.3503125132871298e-05, + "loss": 1.9967, + "step": 15780 + }, + { + "epoch": 0.66, + "learning_rate": 1.3498873251413751e-05, + "loss": 2.3283, + "step": 15790 + }, + { + "epoch": 0.66, + "learning_rate": 1.3494621369956207e-05, + "loss": 2.2754, + "step": 15800 + }, + { + "epoch": 0.67, + "learning_rate": 1.3490369488498662e-05, + "loss": 2.3961, + "step": 15810 + }, + { + "epoch": 0.67, + "learning_rate": 1.3486117607041116e-05, + "loss": 2.1002, + "step": 15820 + }, + { + "epoch": 0.67, + "learning_rate": 1.3481865725583572e-05, + "loss": 2.339, + "step": 15830 + }, + { + "epoch": 0.67, + "learning_rate": 1.3477613844126027e-05, + "loss": 2.7073, + "step": 15840 + }, + { + "epoch": 0.67, + "learning_rate": 1.3473361962668481e-05, + "loss": 2.6434, + "step": 15850 + }, + { + "epoch": 0.67, + "learning_rate": 1.3469110081210936e-05, + "loss": 2.0592, + "step": 15860 + }, + { + "epoch": 0.67, + "learning_rate": 1.3464858199753392e-05, + "loss": 2.3421, + "step": 15870 + }, + { + "epoch": 0.67, + "learning_rate": 1.3460606318295848e-05, + "loss": 2.53, + "step": 15880 + }, + { + "epoch": 0.67, + "learning_rate": 1.34563544368383e-05, + "loss": 1.9826, + "step": 15890 + }, + { + "epoch": 0.67, + "learning_rate": 1.3452102555380757e-05, + "loss": 2.6527, + "step": 15900 + }, + { + "epoch": 0.67, + "learning_rate": 1.3447850673923213e-05, + "loss": 2.0403, + "step": 15910 + }, + { + "epoch": 0.67, + "learning_rate": 1.3443598792465666e-05, + "loss": 2.3132, + "step": 15920 + }, + { + "epoch": 0.67, + "learning_rate": 1.3439346911008122e-05, + "loss": 2.2089, + "step": 15930 + }, + { + "epoch": 0.67, + "learning_rate": 1.3435095029550578e-05, + "loss": 2.6101, + "step": 15940 + }, + { + "epoch": 0.67, + "learning_rate": 1.3430843148093031e-05, + "loss": 2.5606, + "step": 15950 + }, + { + "epoch": 0.67, + "learning_rate": 1.3426591266635487e-05, + "loss": 2.819, + "step": 15960 + }, + { + "epoch": 0.67, + "learning_rate": 1.3422339385177943e-05, + "loss": 2.1818, + "step": 15970 + }, + { + "epoch": 0.67, + "learning_rate": 1.3418087503720398e-05, + "loss": 2.4026, + "step": 15980 + }, + { + "epoch": 0.67, + "learning_rate": 1.3413835622262852e-05, + "loss": 2.4839, + "step": 15990 + }, + { + "epoch": 0.67, + "learning_rate": 1.3409583740805308e-05, + "loss": 2.6198, + "step": 16000 + }, + { + "epoch": 0.67, + "eval_loss": 1.8519717454910278, + "eval_runtime": 176.1518, + "eval_samples_per_second": 14.209, + "eval_steps_per_second": 7.108, + "step": 16000 + }, + { + "epoch": 0.67, + "learning_rate": 1.3405331859347763e-05, + "loss": 2.5192, + "step": 16010 + }, + { + "epoch": 0.67, + "learning_rate": 1.3401079977890217e-05, + "loss": 2.1524, + "step": 16020 + }, + { + "epoch": 0.67, + "learning_rate": 1.3396828096432672e-05, + "loss": 2.0034, + "step": 16030 + }, + { + "epoch": 0.67, + "learning_rate": 1.3392576214975128e-05, + "loss": 1.9832, + "step": 16040 + }, + { + "epoch": 0.68, + "learning_rate": 1.3388324333517582e-05, + "loss": 2.32, + "step": 16050 + }, + { + "epoch": 0.68, + "learning_rate": 1.3384072452060037e-05, + "loss": 2.4981, + "step": 16060 + }, + { + "epoch": 0.68, + "learning_rate": 1.3379820570602493e-05, + "loss": 2.0976, + "step": 16070 + }, + { + "epoch": 0.68, + "learning_rate": 1.3375568689144949e-05, + "loss": 2.4842, + "step": 16080 + }, + { + "epoch": 0.68, + "learning_rate": 1.3371316807687402e-05, + "loss": 2.2168, + "step": 16090 + }, + { + "epoch": 0.68, + "learning_rate": 1.3367064926229858e-05, + "loss": 2.4269, + "step": 16100 + }, + { + "epoch": 0.68, + "learning_rate": 1.3362813044772314e-05, + "loss": 2.3498, + "step": 16110 + }, + { + "epoch": 0.68, + "learning_rate": 1.3358561163314767e-05, + "loss": 2.5344, + "step": 16120 + }, + { + "epoch": 0.68, + "learning_rate": 1.3354309281857223e-05, + "loss": 2.1391, + "step": 16130 + }, + { + "epoch": 0.68, + "learning_rate": 1.335005740039968e-05, + "loss": 2.1381, + "step": 16140 + }, + { + "epoch": 0.68, + "learning_rate": 1.3345805518942132e-05, + "loss": 1.8247, + "step": 16150 + }, + { + "epoch": 0.68, + "learning_rate": 1.3341553637484588e-05, + "loss": 2.121, + "step": 16160 + }, + { + "epoch": 0.68, + "learning_rate": 1.3337301756027044e-05, + "loss": 1.4757, + "step": 16170 + }, + { + "epoch": 0.68, + "learning_rate": 1.3333049874569499e-05, + "loss": 2.389, + "step": 16180 + }, + { + "epoch": 0.68, + "learning_rate": 1.3328797993111953e-05, + "loss": 2.8002, + "step": 16190 + }, + { + "epoch": 0.68, + "learning_rate": 1.3324546111654408e-05, + "loss": 2.3572, + "step": 16200 + }, + { + "epoch": 0.68, + "learning_rate": 1.3320294230196864e-05, + "loss": 2.1279, + "step": 16210 + }, + { + "epoch": 0.68, + "learning_rate": 1.3316042348739318e-05, + "loss": 2.2369, + "step": 16220 + }, + { + "epoch": 0.68, + "learning_rate": 1.3311790467281773e-05, + "loss": 2.3375, + "step": 16230 + }, + { + "epoch": 0.68, + "learning_rate": 1.3307538585824229e-05, + "loss": 2.2948, + "step": 16240 + }, + { + "epoch": 0.68, + "learning_rate": 1.3303286704366682e-05, + "loss": 1.8357, + "step": 16250 + }, + { + "epoch": 0.68, + "learning_rate": 1.3299034822909138e-05, + "loss": 2.1718, + "step": 16260 + }, + { + "epoch": 0.68, + "learning_rate": 1.3294782941451594e-05, + "loss": 2.1868, + "step": 16270 + }, + { + "epoch": 0.68, + "learning_rate": 1.3290531059994047e-05, + "loss": 2.158, + "step": 16280 + }, + { + "epoch": 0.69, + "learning_rate": 1.3286279178536503e-05, + "loss": 2.0758, + "step": 16290 + }, + { + "epoch": 0.69, + "learning_rate": 1.3282027297078959e-05, + "loss": 2.027, + "step": 16300 + }, + { + "epoch": 0.69, + "learning_rate": 1.3277775415621415e-05, + "loss": 2.3991, + "step": 16310 + }, + { + "epoch": 0.69, + "learning_rate": 1.3273523534163868e-05, + "loss": 2.4214, + "step": 16320 + }, + { + "epoch": 0.69, + "learning_rate": 1.3269271652706324e-05, + "loss": 2.2119, + "step": 16330 + }, + { + "epoch": 0.69, + "learning_rate": 1.3265019771248779e-05, + "loss": 2.4542, + "step": 16340 + }, + { + "epoch": 0.69, + "learning_rate": 1.3260767889791233e-05, + "loss": 1.8715, + "step": 16350 + }, + { + "epoch": 0.69, + "learning_rate": 1.3256516008333689e-05, + "loss": 2.2014, + "step": 16360 + }, + { + "epoch": 0.69, + "learning_rate": 1.3252264126876144e-05, + "loss": 2.7697, + "step": 16370 + }, + { + "epoch": 0.69, + "learning_rate": 1.3248012245418598e-05, + "loss": 2.712, + "step": 16380 + }, + { + "epoch": 0.69, + "learning_rate": 1.3243760363961054e-05, + "loss": 2.3395, + "step": 16390 + }, + { + "epoch": 0.69, + "learning_rate": 1.3239508482503509e-05, + "loss": 2.0136, + "step": 16400 + }, + { + "epoch": 0.69, + "learning_rate": 1.3235256601045965e-05, + "loss": 2.5494, + "step": 16410 + }, + { + "epoch": 0.69, + "learning_rate": 1.3231004719588418e-05, + "loss": 2.2552, + "step": 16420 + }, + { + "epoch": 0.69, + "learning_rate": 1.3226752838130874e-05, + "loss": 1.9919, + "step": 16430 + }, + { + "epoch": 0.69, + "learning_rate": 1.322250095667333e-05, + "loss": 2.3593, + "step": 16440 + }, + { + "epoch": 0.69, + "learning_rate": 1.3218249075215783e-05, + "loss": 2.2038, + "step": 16450 + }, + { + "epoch": 0.69, + "learning_rate": 1.3213997193758239e-05, + "loss": 2.053, + "step": 16460 + }, + { + "epoch": 0.69, + "learning_rate": 1.3209745312300695e-05, + "loss": 2.644, + "step": 16470 + }, + { + "epoch": 0.69, + "learning_rate": 1.3205493430843148e-05, + "loss": 2.0581, + "step": 16480 + }, + { + "epoch": 0.69, + "learning_rate": 1.3201241549385604e-05, + "loss": 2.167, + "step": 16490 + }, + { + "epoch": 0.69, + "learning_rate": 1.319698966792806e-05, + "loss": 2.1646, + "step": 16500 + }, + { + "epoch": 0.69, + "eval_loss": 1.8592852354049683, + "eval_runtime": 175.6489, + "eval_samples_per_second": 14.25, + "eval_steps_per_second": 7.128, + "step": 16500 + }, + { + "epoch": 0.69, + "learning_rate": 1.3192737786470514e-05, + "loss": 2.2407, + "step": 16510 + }, + { + "epoch": 0.7, + "learning_rate": 1.3188485905012969e-05, + "loss": 2.7319, + "step": 16520 + }, + { + "epoch": 0.7, + "learning_rate": 1.3184234023555425e-05, + "loss": 2.0932, + "step": 16530 + }, + { + "epoch": 0.7, + "learning_rate": 1.317998214209788e-05, + "loss": 2.2724, + "step": 16540 + }, + { + "epoch": 0.7, + "learning_rate": 1.3175730260640334e-05, + "loss": 2.4698, + "step": 16550 + }, + { + "epoch": 0.7, + "learning_rate": 1.317147837918279e-05, + "loss": 2.039, + "step": 16560 + }, + { + "epoch": 0.7, + "learning_rate": 1.3167226497725245e-05, + "loss": 2.1941, + "step": 16570 + }, + { + "epoch": 0.7, + "learning_rate": 1.3162974616267699e-05, + "loss": 2.3781, + "step": 16580 + }, + { + "epoch": 0.7, + "learning_rate": 1.3158722734810154e-05, + "loss": 2.2378, + "step": 16590 + }, + { + "epoch": 0.7, + "learning_rate": 1.315447085335261e-05, + "loss": 2.2077, + "step": 16600 + }, + { + "epoch": 0.7, + "learning_rate": 1.3150218971895066e-05, + "loss": 2.5321, + "step": 16610 + }, + { + "epoch": 0.7, + "learning_rate": 1.3145967090437519e-05, + "loss": 1.9455, + "step": 16620 + }, + { + "epoch": 0.7, + "learning_rate": 1.3141715208979975e-05, + "loss": 2.1657, + "step": 16630 + }, + { + "epoch": 0.7, + "learning_rate": 1.3137463327522431e-05, + "loss": 2.3863, + "step": 16640 + }, + { + "epoch": 0.7, + "learning_rate": 1.3133211446064884e-05, + "loss": 2.2692, + "step": 16650 + }, + { + "epoch": 0.7, + "learning_rate": 1.312895956460734e-05, + "loss": 1.9171, + "step": 16660 + }, + { + "epoch": 0.7, + "learning_rate": 1.3124707683149796e-05, + "loss": 2.3214, + "step": 16670 + }, + { + "epoch": 0.7, + "learning_rate": 1.3120455801692249e-05, + "loss": 2.3556, + "step": 16680 + }, + { + "epoch": 0.7, + "learning_rate": 1.3116203920234705e-05, + "loss": 2.1717, + "step": 16690 + }, + { + "epoch": 0.7, + "learning_rate": 1.3111952038777161e-05, + "loss": 2.0603, + "step": 16700 + }, + { + "epoch": 0.7, + "learning_rate": 1.3107700157319616e-05, + "loss": 2.3345, + "step": 16710 + }, + { + "epoch": 0.7, + "learning_rate": 1.310344827586207e-05, + "loss": 2.1033, + "step": 16720 + }, + { + "epoch": 0.7, + "learning_rate": 1.3099196394404526e-05, + "loss": 1.9156, + "step": 16730 + }, + { + "epoch": 0.7, + "learning_rate": 1.309494451294698e-05, + "loss": 1.9722, + "step": 16740 + }, + { + "epoch": 0.7, + "learning_rate": 1.3090692631489435e-05, + "loss": 2.5184, + "step": 16750 + }, + { + "epoch": 0.71, + "learning_rate": 1.308644075003189e-05, + "loss": 2.248, + "step": 16760 + }, + { + "epoch": 0.71, + "learning_rate": 1.3082188868574346e-05, + "loss": 2.654, + "step": 16770 + }, + { + "epoch": 0.71, + "learning_rate": 1.30779369871168e-05, + "loss": 1.7152, + "step": 16780 + }, + { + "epoch": 0.71, + "learning_rate": 1.3073685105659255e-05, + "loss": 2.1579, + "step": 16790 + }, + { + "epoch": 0.71, + "learning_rate": 1.306943322420171e-05, + "loss": 2.4243, + "step": 16800 + }, + { + "epoch": 0.71, + "learning_rate": 1.3065181342744163e-05, + "loss": 1.8675, + "step": 16810 + }, + { + "epoch": 0.71, + "learning_rate": 1.306092946128662e-05, + "loss": 1.998, + "step": 16820 + }, + { + "epoch": 0.71, + "learning_rate": 1.3056677579829076e-05, + "loss": 2.5531, + "step": 16830 + }, + { + "epoch": 0.71, + "learning_rate": 1.3052425698371532e-05, + "loss": 2.0088, + "step": 16840 + }, + { + "epoch": 0.71, + "learning_rate": 1.3048173816913985e-05, + "loss": 2.0293, + "step": 16850 + }, + { + "epoch": 0.71, + "learning_rate": 1.3043921935456441e-05, + "loss": 2.1525, + "step": 16860 + }, + { + "epoch": 0.71, + "learning_rate": 1.3039670053998897e-05, + "loss": 2.0561, + "step": 16870 + }, + { + "epoch": 0.71, + "learning_rate": 1.303541817254135e-05, + "loss": 2.2735, + "step": 16880 + }, + { + "epoch": 0.71, + "learning_rate": 1.3031166291083806e-05, + "loss": 2.3812, + "step": 16890 + }, + { + "epoch": 0.71, + "learning_rate": 1.302691440962626e-05, + "loss": 1.9527, + "step": 16900 + }, + { + "epoch": 0.71, + "learning_rate": 1.3022662528168715e-05, + "loss": 1.9467, + "step": 16910 + }, + { + "epoch": 0.71, + "learning_rate": 1.3018410646711171e-05, + "loss": 2.245, + "step": 16920 + }, + { + "epoch": 0.71, + "learning_rate": 1.3014158765253625e-05, + "loss": 1.9354, + "step": 16930 + }, + { + "epoch": 0.71, + "learning_rate": 1.3009906883796082e-05, + "loss": 2.4325, + "step": 16940 + }, + { + "epoch": 0.71, + "learning_rate": 1.3005655002338536e-05, + "loss": 2.4116, + "step": 16950 + }, + { + "epoch": 0.71, + "learning_rate": 1.300140312088099e-05, + "loss": 2.5103, + "step": 16960 + }, + { + "epoch": 0.71, + "learning_rate": 1.2997151239423447e-05, + "loss": 2.2477, + "step": 16970 + }, + { + "epoch": 0.71, + "learning_rate": 1.29928993579659e-05, + "loss": 2.0229, + "step": 16980 + }, + { + "epoch": 0.71, + "learning_rate": 1.2988647476508356e-05, + "loss": 2.4175, + "step": 16990 + }, + { + "epoch": 0.72, + "learning_rate": 1.2984395595050812e-05, + "loss": 2.0822, + "step": 17000 + }, + { + "epoch": 0.72, + "eval_loss": 1.8584306240081787, + "eval_runtime": 175.881, + "eval_samples_per_second": 14.231, + "eval_steps_per_second": 7.118, + "step": 17000 + }, + { + "epoch": 0.72, + "learning_rate": 1.2980143713593265e-05, + "loss": 1.9894, + "step": 17010 + }, + { + "epoch": 0.72, + "learning_rate": 1.297589183213572e-05, + "loss": 2.0724, + "step": 17020 + }, + { + "epoch": 0.72, + "learning_rate": 1.2971639950678177e-05, + "loss": 1.8834, + "step": 17030 + }, + { + "epoch": 0.72, + "learning_rate": 1.2967388069220633e-05, + "loss": 2.3956, + "step": 17040 + }, + { + "epoch": 0.72, + "learning_rate": 1.2963136187763086e-05, + "loss": 2.1201, + "step": 17050 + }, + { + "epoch": 0.72, + "learning_rate": 1.2958884306305542e-05, + "loss": 2.2739, + "step": 17060 + }, + { + "epoch": 0.72, + "learning_rate": 1.2954632424847996e-05, + "loss": 2.5214, + "step": 17070 + }, + { + "epoch": 0.72, + "learning_rate": 1.295038054339045e-05, + "loss": 2.524, + "step": 17080 + }, + { + "epoch": 0.72, + "learning_rate": 1.2946128661932907e-05, + "loss": 2.0987, + "step": 17090 + }, + { + "epoch": 0.72, + "learning_rate": 1.2941876780475361e-05, + "loss": 2.6096, + "step": 17100 + }, + { + "epoch": 0.72, + "learning_rate": 1.2937624899017816e-05, + "loss": 2.0223, + "step": 17110 + }, + { + "epoch": 0.72, + "learning_rate": 1.2933373017560272e-05, + "loss": 2.1142, + "step": 17120 + }, + { + "epoch": 0.72, + "learning_rate": 1.2929121136102726e-05, + "loss": 2.5442, + "step": 17130 + }, + { + "epoch": 0.72, + "learning_rate": 1.2924869254645183e-05, + "loss": 2.6321, + "step": 17140 + }, + { + "epoch": 0.72, + "learning_rate": 1.2920617373187635e-05, + "loss": 2.2613, + "step": 17150 + }, + { + "epoch": 0.72, + "learning_rate": 1.2916365491730092e-05, + "loss": 2.1481, + "step": 17160 + }, + { + "epoch": 0.72, + "learning_rate": 1.2912113610272548e-05, + "loss": 2.2593, + "step": 17170 + }, + { + "epoch": 0.72, + "learning_rate": 1.2907861728815e-05, + "loss": 2.3222, + "step": 17180 + }, + { + "epoch": 0.72, + "learning_rate": 1.2903609847357457e-05, + "loss": 2.1073, + "step": 17190 + }, + { + "epoch": 0.72, + "learning_rate": 1.2899357965899913e-05, + "loss": 2.5689, + "step": 17200 + }, + { + "epoch": 0.72, + "learning_rate": 1.2895106084442366e-05, + "loss": 2.3708, + "step": 17210 + }, + { + "epoch": 0.72, + "learning_rate": 1.2890854202984822e-05, + "loss": 2.4703, + "step": 17220 + }, + { + "epoch": 0.72, + "learning_rate": 1.2886602321527278e-05, + "loss": 2.4671, + "step": 17230 + }, + { + "epoch": 0.73, + "learning_rate": 1.2882350440069732e-05, + "loss": 2.3027, + "step": 17240 + }, + { + "epoch": 0.73, + "learning_rate": 1.2878098558612187e-05, + "loss": 2.0178, + "step": 17250 + }, + { + "epoch": 0.73, + "learning_rate": 1.2873846677154643e-05, + "loss": 2.0933, + "step": 17260 + }, + { + "epoch": 0.73, + "learning_rate": 1.2869594795697097e-05, + "loss": 2.7651, + "step": 17270 + }, + { + "epoch": 0.73, + "learning_rate": 1.2865342914239552e-05, + "loss": 2.5752, + "step": 17280 + }, + { + "epoch": 0.73, + "learning_rate": 1.2861091032782006e-05, + "loss": 2.0839, + "step": 17290 + }, + { + "epoch": 0.73, + "learning_rate": 1.2856839151324462e-05, + "loss": 2.5019, + "step": 17300 + }, + { + "epoch": 0.73, + "learning_rate": 1.2852587269866917e-05, + "loss": 2.5049, + "step": 17310 + }, + { + "epoch": 0.73, + "learning_rate": 1.2848335388409371e-05, + "loss": 2.2519, + "step": 17320 + }, + { + "epoch": 0.73, + "learning_rate": 1.2844083506951828e-05, + "loss": 2.8233, + "step": 17330 + }, + { + "epoch": 0.73, + "learning_rate": 1.2839831625494282e-05, + "loss": 2.3101, + "step": 17340 + }, + { + "epoch": 0.73, + "learning_rate": 1.2835579744036736e-05, + "loss": 2.232, + "step": 17350 + }, + { + "epoch": 0.73, + "learning_rate": 1.2831327862579193e-05, + "loss": 2.0227, + "step": 17360 + }, + { + "epoch": 0.73, + "learning_rate": 1.2827075981121649e-05, + "loss": 2.5267, + "step": 17370 + }, + { + "epoch": 0.73, + "learning_rate": 1.2822824099664102e-05, + "loss": 2.5093, + "step": 17380 + }, + { + "epoch": 0.73, + "learning_rate": 1.2818572218206558e-05, + "loss": 1.6604, + "step": 17390 + }, + { + "epoch": 0.73, + "learning_rate": 1.2814320336749014e-05, + "loss": 2.7735, + "step": 17400 + }, + { + "epoch": 0.73, + "learning_rate": 1.2810068455291467e-05, + "loss": 1.9258, + "step": 17410 + }, + { + "epoch": 0.73, + "learning_rate": 1.2805816573833923e-05, + "loss": 2.1722, + "step": 17420 + }, + { + "epoch": 0.73, + "learning_rate": 1.2801564692376379e-05, + "loss": 2.4616, + "step": 17430 + }, + { + "epoch": 0.73, + "learning_rate": 1.2797312810918832e-05, + "loss": 2.2542, + "step": 17440 + }, + { + "epoch": 0.73, + "learning_rate": 1.2793060929461288e-05, + "loss": 1.9519, + "step": 17450 + }, + { + "epoch": 0.73, + "learning_rate": 1.2788809048003742e-05, + "loss": 2.3988, + "step": 17460 + }, + { + "epoch": 0.73, + "learning_rate": 1.2784557166546198e-05, + "loss": 2.1753, + "step": 17470 + }, + { + "epoch": 0.74, + "learning_rate": 1.2780305285088653e-05, + "loss": 2.139, + "step": 17480 + }, + { + "epoch": 0.74, + "learning_rate": 1.2776053403631107e-05, + "loss": 2.3725, + "step": 17490 + }, + { + "epoch": 0.74, + "learning_rate": 1.2771801522173563e-05, + "loss": 2.574, + "step": 17500 + }, + { + "epoch": 0.74, + "eval_loss": 1.8318504095077515, + "eval_runtime": 175.9495, + "eval_samples_per_second": 14.226, + "eval_steps_per_second": 7.116, + "step": 17500 + }, + { + "epoch": 0.74, + "learning_rate": 1.2767549640716018e-05, + "loss": 2.6461, + "step": 17510 + }, + { + "epoch": 0.74, + "learning_rate": 1.2763297759258472e-05, + "loss": 2.26, + "step": 17520 + }, + { + "epoch": 0.74, + "learning_rate": 1.2759045877800929e-05, + "loss": 1.8639, + "step": 17530 + }, + { + "epoch": 0.74, + "learning_rate": 1.2754793996343381e-05, + "loss": 2.3485, + "step": 17540 + }, + { + "epoch": 0.74, + "learning_rate": 1.2750542114885837e-05, + "loss": 2.3243, + "step": 17550 + }, + { + "epoch": 0.74, + "learning_rate": 1.2746290233428294e-05, + "loss": 2.317, + "step": 17560 + }, + { + "epoch": 0.74, + "learning_rate": 1.274203835197075e-05, + "loss": 2.4058, + "step": 17570 + }, + { + "epoch": 0.74, + "learning_rate": 1.2737786470513203e-05, + "loss": 2.0532, + "step": 17580 + }, + { + "epoch": 0.74, + "learning_rate": 1.2733534589055659e-05, + "loss": 2.863, + "step": 17590 + }, + { + "epoch": 0.74, + "learning_rate": 1.2729282707598115e-05, + "loss": 1.7981, + "step": 17600 + }, + { + "epoch": 0.74, + "learning_rate": 1.2725030826140568e-05, + "loss": 2.1557, + "step": 17610 + }, + { + "epoch": 0.74, + "learning_rate": 1.2720778944683024e-05, + "loss": 2.5958, + "step": 17620 + }, + { + "epoch": 0.74, + "learning_rate": 1.2716527063225478e-05, + "loss": 2.1916, + "step": 17630 + }, + { + "epoch": 0.74, + "learning_rate": 1.2712275181767933e-05, + "loss": 2.1137, + "step": 17640 + }, + { + "epoch": 0.74, + "learning_rate": 1.2708023300310389e-05, + "loss": 2.4346, + "step": 17650 + }, + { + "epoch": 0.74, + "learning_rate": 1.2703771418852843e-05, + "loss": 2.2496, + "step": 17660 + }, + { + "epoch": 0.74, + "learning_rate": 1.26995195373953e-05, + "loss": 2.2978, + "step": 17670 + }, + { + "epoch": 0.74, + "learning_rate": 1.2695267655937752e-05, + "loss": 2.3618, + "step": 17680 + }, + { + "epoch": 0.74, + "learning_rate": 1.2691015774480208e-05, + "loss": 2.6477, + "step": 17690 + }, + { + "epoch": 0.74, + "learning_rate": 1.2686763893022665e-05, + "loss": 2.3733, + "step": 17700 + }, + { + "epoch": 0.75, + "learning_rate": 1.2682512011565117e-05, + "loss": 1.8352, + "step": 17710 + }, + { + "epoch": 0.75, + "learning_rate": 1.2678260130107573e-05, + "loss": 1.903, + "step": 17720 + }, + { + "epoch": 0.75, + "learning_rate": 1.267400824865003e-05, + "loss": 2.4592, + "step": 17730 + }, + { + "epoch": 0.75, + "learning_rate": 1.2669756367192482e-05, + "loss": 2.0751, + "step": 17740 + }, + { + "epoch": 0.75, + "learning_rate": 1.2665504485734938e-05, + "loss": 2.1852, + "step": 17750 + }, + { + "epoch": 0.75, + "learning_rate": 1.2661252604277395e-05, + "loss": 1.8026, + "step": 17760 + }, + { + "epoch": 0.75, + "learning_rate": 1.2657000722819849e-05, + "loss": 2.2377, + "step": 17770 + }, + { + "epoch": 0.75, + "learning_rate": 1.2652748841362304e-05, + "loss": 2.654, + "step": 17780 + }, + { + "epoch": 0.75, + "learning_rate": 1.264849695990476e-05, + "loss": 1.941, + "step": 17790 + }, + { + "epoch": 0.75, + "learning_rate": 1.2644245078447214e-05, + "loss": 2.1042, + "step": 17800 + }, + { + "epoch": 0.75, + "learning_rate": 1.2639993196989669e-05, + "loss": 2.4165, + "step": 17810 + }, + { + "epoch": 0.75, + "learning_rate": 1.2635741315532125e-05, + "loss": 2.0932, + "step": 17820 + }, + { + "epoch": 0.75, + "learning_rate": 1.263148943407458e-05, + "loss": 2.3665, + "step": 17830 + }, + { + "epoch": 0.75, + "learning_rate": 1.2627237552617034e-05, + "loss": 2.376, + "step": 17840 + }, + { + "epoch": 0.75, + "learning_rate": 1.2622985671159488e-05, + "loss": 2.4151, + "step": 17850 + }, + { + "epoch": 0.75, + "learning_rate": 1.2618733789701944e-05, + "loss": 2.1847, + "step": 17860 + }, + { + "epoch": 0.75, + "learning_rate": 1.2614481908244399e-05, + "loss": 2.3342, + "step": 17870 + }, + { + "epoch": 0.75, + "learning_rate": 1.2610230026786853e-05, + "loss": 2.1517, + "step": 17880 + }, + { + "epoch": 0.75, + "learning_rate": 1.260597814532931e-05, + "loss": 2.0448, + "step": 17890 + }, + { + "epoch": 0.75, + "learning_rate": 1.2601726263871766e-05, + "loss": 2.2522, + "step": 17900 + }, + { + "epoch": 0.75, + "learning_rate": 1.2597474382414218e-05, + "loss": 2.1466, + "step": 17910 + }, + { + "epoch": 0.75, + "learning_rate": 1.2593222500956674e-05, + "loss": 2.3031, + "step": 17920 + }, + { + "epoch": 0.75, + "learning_rate": 1.258897061949913e-05, + "loss": 2.1069, + "step": 17930 + }, + { + "epoch": 0.75, + "learning_rate": 1.2584718738041583e-05, + "loss": 2.1193, + "step": 17940 + }, + { + "epoch": 0.76, + "learning_rate": 1.258046685658404e-05, + "loss": 1.9281, + "step": 17950 + }, + { + "epoch": 0.76, + "learning_rate": 1.2576214975126496e-05, + "loss": 1.7614, + "step": 17960 + }, + { + "epoch": 0.76, + "learning_rate": 1.2571963093668948e-05, + "loss": 1.954, + "step": 17970 + }, + { + "epoch": 0.76, + "learning_rate": 1.2567711212211405e-05, + "loss": 2.1048, + "step": 17980 + }, + { + "epoch": 0.76, + "learning_rate": 1.256345933075386e-05, + "loss": 2.3576, + "step": 17990 + }, + { + "epoch": 0.76, + "learning_rate": 1.2559207449296315e-05, + "loss": 2.5629, + "step": 18000 + }, + { + "epoch": 0.76, + "eval_loss": 1.8561279773712158, + "eval_runtime": 176.0578, + "eval_samples_per_second": 14.217, + "eval_steps_per_second": 7.111, + "step": 18000 + }, + { + "epoch": 0.76, + "learning_rate": 1.255495556783877e-05, + "loss": 2.1933, + "step": 18010 + }, + { + "epoch": 0.76, + "learning_rate": 1.2550703686381224e-05, + "loss": 2.3697, + "step": 18020 + }, + { + "epoch": 0.76, + "learning_rate": 1.254645180492368e-05, + "loss": 2.3221, + "step": 18030 + }, + { + "epoch": 0.76, + "learning_rate": 1.2542199923466135e-05, + "loss": 2.3338, + "step": 18040 + }, + { + "epoch": 0.76, + "learning_rate": 1.253794804200859e-05, + "loss": 2.5145, + "step": 18050 + }, + { + "epoch": 0.76, + "learning_rate": 1.2533696160551045e-05, + "loss": 2.0809, + "step": 18060 + }, + { + "epoch": 0.76, + "learning_rate": 1.2529444279093498e-05, + "loss": 2.5053, + "step": 18070 + }, + { + "epoch": 0.76, + "learning_rate": 1.2525192397635954e-05, + "loss": 1.9817, + "step": 18080 + }, + { + "epoch": 0.76, + "learning_rate": 1.252094051617841e-05, + "loss": 2.1424, + "step": 18090 + }, + { + "epoch": 0.76, + "learning_rate": 1.2516688634720867e-05, + "loss": 2.3833, + "step": 18100 + }, + { + "epoch": 0.76, + "learning_rate": 1.251243675326332e-05, + "loss": 2.2463, + "step": 18110 + }, + { + "epoch": 0.76, + "learning_rate": 1.2508184871805775e-05, + "loss": 2.4337, + "step": 18120 + }, + { + "epoch": 0.76, + "learning_rate": 1.2503932990348232e-05, + "loss": 2.3263, + "step": 18130 + }, + { + "epoch": 0.76, + "learning_rate": 1.2499681108890684e-05, + "loss": 2.4375, + "step": 18140 + }, + { + "epoch": 0.76, + "learning_rate": 1.249542922743314e-05, + "loss": 1.8182, + "step": 18150 + }, + { + "epoch": 0.76, + "learning_rate": 1.2491177345975595e-05, + "loss": 2.3109, + "step": 18160 + }, + { + "epoch": 0.76, + "learning_rate": 1.248692546451805e-05, + "loss": 2.5621, + "step": 18170 + }, + { + "epoch": 0.76, + "learning_rate": 1.2482673583060506e-05, + "loss": 2.2866, + "step": 18180 + }, + { + "epoch": 0.77, + "learning_rate": 1.247842170160296e-05, + "loss": 2.0402, + "step": 18190 + }, + { + "epoch": 0.77, + "learning_rate": 1.2474169820145416e-05, + "loss": 1.9998, + "step": 18200 + }, + { + "epoch": 0.77, + "learning_rate": 1.246991793868787e-05, + "loss": 1.9806, + "step": 18210 + }, + { + "epoch": 0.77, + "learning_rate": 1.2465666057230325e-05, + "loss": 2.3714, + "step": 18220 + }, + { + "epoch": 0.77, + "learning_rate": 1.2461414175772781e-05, + "loss": 2.431, + "step": 18230 + }, + { + "epoch": 0.77, + "learning_rate": 1.2457162294315234e-05, + "loss": 2.3278, + "step": 18240 + }, + { + "epoch": 0.77, + "learning_rate": 1.245291041285769e-05, + "loss": 2.4033, + "step": 18250 + }, + { + "epoch": 0.77, + "learning_rate": 1.2448658531400146e-05, + "loss": 2.2226, + "step": 18260 + }, + { + "epoch": 0.77, + "learning_rate": 1.2444406649942599e-05, + "loss": 2.2681, + "step": 18270 + }, + { + "epoch": 0.77, + "learning_rate": 1.2440154768485055e-05, + "loss": 2.4701, + "step": 18280 + }, + { + "epoch": 0.77, + "learning_rate": 1.2435902887027511e-05, + "loss": 2.611, + "step": 18290 + }, + { + "epoch": 0.77, + "learning_rate": 1.2431651005569964e-05, + "loss": 1.9993, + "step": 18300 + }, + { + "epoch": 0.77, + "learning_rate": 1.242739912411242e-05, + "loss": 1.9156, + "step": 18310 + }, + { + "epoch": 0.77, + "learning_rate": 1.2423147242654876e-05, + "loss": 2.6014, + "step": 18320 + }, + { + "epoch": 0.77, + "learning_rate": 1.2418895361197331e-05, + "loss": 2.1876, + "step": 18330 + }, + { + "epoch": 0.77, + "learning_rate": 1.2414643479739785e-05, + "loss": 2.2175, + "step": 18340 + }, + { + "epoch": 0.77, + "learning_rate": 1.2410391598282242e-05, + "loss": 2.2158, + "step": 18350 + }, + { + "epoch": 0.77, + "learning_rate": 1.2406139716824696e-05, + "loss": 2.3834, + "step": 18360 + }, + { + "epoch": 0.77, + "learning_rate": 1.240188783536715e-05, + "loss": 1.9719, + "step": 18370 + }, + { + "epoch": 0.77, + "learning_rate": 1.2397635953909607e-05, + "loss": 2.4343, + "step": 18380 + }, + { + "epoch": 0.77, + "learning_rate": 1.2393384072452061e-05, + "loss": 2.2525, + "step": 18390 + }, + { + "epoch": 0.77, + "learning_rate": 1.2389132190994516e-05, + "loss": 2.2265, + "step": 18400 + }, + { + "epoch": 0.77, + "learning_rate": 1.238488030953697e-05, + "loss": 2.1699, + "step": 18410 + }, + { + "epoch": 0.77, + "learning_rate": 1.2380628428079426e-05, + "loss": 2.5775, + "step": 18420 + }, + { + "epoch": 0.78, + "learning_rate": 1.2376376546621882e-05, + "loss": 2.1577, + "step": 18430 + }, + { + "epoch": 0.78, + "learning_rate": 1.2372124665164335e-05, + "loss": 2.1021, + "step": 18440 + }, + { + "epoch": 0.78, + "learning_rate": 1.2367872783706791e-05, + "loss": 2.3303, + "step": 18450 + }, + { + "epoch": 0.78, + "learning_rate": 1.2363620902249247e-05, + "loss": 2.2932, + "step": 18460 + }, + { + "epoch": 0.78, + "learning_rate": 1.23593690207917e-05, + "loss": 2.7011, + "step": 18470 + }, + { + "epoch": 0.78, + "learning_rate": 1.2355117139334156e-05, + "loss": 2.1523, + "step": 18480 + }, + { + "epoch": 0.78, + "learning_rate": 1.2350865257876612e-05, + "loss": 2.3008, + "step": 18490 + }, + { + "epoch": 0.78, + "learning_rate": 1.2346613376419065e-05, + "loss": 1.9752, + "step": 18500 + }, + { + "epoch": 0.78, + "eval_loss": 1.8424828052520752, + "eval_runtime": 174.3309, + "eval_samples_per_second": 14.358, + "eval_steps_per_second": 7.182, + "step": 18500 + }, + { + "epoch": 0.78, + "learning_rate": 1.2342361494961521e-05, + "loss": 2.284, + "step": 18510 + }, + { + "epoch": 0.78, + "learning_rate": 1.2338109613503978e-05, + "loss": 2.1015, + "step": 18520 + }, + { + "epoch": 0.78, + "learning_rate": 1.2333857732046432e-05, + "loss": 2.2809, + "step": 18530 + }, + { + "epoch": 0.78, + "learning_rate": 1.2329605850588886e-05, + "loss": 2.3151, + "step": 18540 + }, + { + "epoch": 0.78, + "learning_rate": 1.2325353969131341e-05, + "loss": 1.662, + "step": 18550 + }, + { + "epoch": 0.78, + "learning_rate": 1.2321102087673797e-05, + "loss": 1.9858, + "step": 18560 + }, + { + "epoch": 0.78, + "learning_rate": 1.2316850206216252e-05, + "loss": 2.5695, + "step": 18570 + }, + { + "epoch": 0.78, + "learning_rate": 1.2312598324758706e-05, + "loss": 2.8181, + "step": 18580 + }, + { + "epoch": 0.78, + "learning_rate": 1.2308346443301162e-05, + "loss": 2.388, + "step": 18590 + }, + { + "epoch": 0.78, + "learning_rate": 1.2304094561843617e-05, + "loss": 2.2133, + "step": 18600 + }, + { + "epoch": 0.78, + "learning_rate": 1.2299842680386071e-05, + "loss": 2.3169, + "step": 18610 + }, + { + "epoch": 0.78, + "learning_rate": 1.2295590798928527e-05, + "loss": 1.9527, + "step": 18620 + }, + { + "epoch": 0.78, + "learning_rate": 1.2291338917470983e-05, + "loss": 2.2607, + "step": 18630 + }, + { + "epoch": 0.78, + "learning_rate": 1.2287087036013436e-05, + "loss": 2.5624, + "step": 18640 + }, + { + "epoch": 0.78, + "learning_rate": 1.2282835154555892e-05, + "loss": 2.2322, + "step": 18650 + }, + { + "epoch": 0.79, + "learning_rate": 1.2278583273098348e-05, + "loss": 2.1425, + "step": 18660 + }, + { + "epoch": 0.79, + "learning_rate": 1.2274331391640801e-05, + "loss": 1.9878, + "step": 18670 + }, + { + "epoch": 0.79, + "learning_rate": 1.2270079510183257e-05, + "loss": 2.6583, + "step": 18680 + }, + { + "epoch": 0.79, + "learning_rate": 1.2265827628725713e-05, + "loss": 2.3085, + "step": 18690 + }, + { + "epoch": 0.79, + "learning_rate": 1.2261575747268166e-05, + "loss": 2.4383, + "step": 18700 + }, + { + "epoch": 0.79, + "learning_rate": 1.2257323865810622e-05, + "loss": 2.8947, + "step": 18710 + }, + { + "epoch": 0.79, + "learning_rate": 1.2253071984353077e-05, + "loss": 2.5398, + "step": 18720 + }, + { + "epoch": 0.79, + "learning_rate": 1.2248820102895533e-05, + "loss": 2.4291, + "step": 18730 + }, + { + "epoch": 0.79, + "learning_rate": 1.2244568221437987e-05, + "loss": 2.5805, + "step": 18740 + }, + { + "epoch": 0.79, + "learning_rate": 1.2240316339980442e-05, + "loss": 2.2642, + "step": 18750 + }, + { + "epoch": 0.79, + "learning_rate": 1.2236064458522898e-05, + "loss": 2.3821, + "step": 18760 + }, + { + "epoch": 0.79, + "learning_rate": 1.2231812577065353e-05, + "loss": 1.8382, + "step": 18770 + }, + { + "epoch": 0.79, + "learning_rate": 1.2227560695607807e-05, + "loss": 1.8444, + "step": 18780 + }, + { + "epoch": 0.79, + "learning_rate": 1.2223308814150263e-05, + "loss": 2.4173, + "step": 18790 + }, + { + "epoch": 0.79, + "learning_rate": 1.2219056932692716e-05, + "loss": 2.3083, + "step": 18800 + }, + { + "epoch": 0.79, + "learning_rate": 1.2214805051235172e-05, + "loss": 2.6496, + "step": 18810 + }, + { + "epoch": 0.79, + "learning_rate": 1.2210553169777628e-05, + "loss": 1.9028, + "step": 18820 + }, + { + "epoch": 0.79, + "learning_rate": 1.2206301288320081e-05, + "loss": 2.0677, + "step": 18830 + }, + { + "epoch": 0.79, + "learning_rate": 1.2202049406862537e-05, + "loss": 2.0836, + "step": 18840 + }, + { + "epoch": 0.79, + "learning_rate": 1.2197797525404993e-05, + "loss": 2.4567, + "step": 18850 + }, + { + "epoch": 0.79, + "learning_rate": 1.219354564394745e-05, + "loss": 2.4262, + "step": 18860 + }, + { + "epoch": 0.79, + "learning_rate": 1.2189293762489902e-05, + "loss": 2.5371, + "step": 18870 + }, + { + "epoch": 0.79, + "learning_rate": 1.2185041881032358e-05, + "loss": 2.4472, + "step": 18880 + }, + { + "epoch": 0.79, + "learning_rate": 1.2180789999574813e-05, + "loss": 2.341, + "step": 18890 + }, + { + "epoch": 0.8, + "learning_rate": 1.2176538118117267e-05, + "loss": 2.3559, + "step": 18900 + }, + { + "epoch": 0.8, + "learning_rate": 1.2172286236659723e-05, + "loss": 1.8539, + "step": 18910 + }, + { + "epoch": 0.8, + "learning_rate": 1.2168034355202178e-05, + "loss": 2.2824, + "step": 18920 + }, + { + "epoch": 0.8, + "learning_rate": 1.2163782473744632e-05, + "loss": 1.9494, + "step": 18930 + }, + { + "epoch": 0.8, + "learning_rate": 1.2159530592287088e-05, + "loss": 2.4157, + "step": 18940 + }, + { + "epoch": 0.8, + "learning_rate": 1.2155278710829543e-05, + "loss": 2.6123, + "step": 18950 + }, + { + "epoch": 0.8, + "learning_rate": 1.2151026829371999e-05, + "loss": 2.6423, + "step": 18960 + }, + { + "epoch": 0.8, + "learning_rate": 1.2146774947914452e-05, + "loss": 1.8365, + "step": 18970 + }, + { + "epoch": 0.8, + "learning_rate": 1.2142523066456908e-05, + "loss": 2.1457, + "step": 18980 + }, + { + "epoch": 0.8, + "learning_rate": 1.2138271184999364e-05, + "loss": 1.8823, + "step": 18990 + }, + { + "epoch": 0.8, + "learning_rate": 1.2134019303541817e-05, + "loss": 1.9448, + "step": 19000 + }, + { + "epoch": 0.8, + "eval_loss": 1.8287702798843384, + "eval_runtime": 175.7832, + "eval_samples_per_second": 14.239, + "eval_steps_per_second": 7.122, + "step": 19000 + }, + { + "epoch": 0.8, + "learning_rate": 1.2129767422084273e-05, + "loss": 2.3223, + "step": 19010 + }, + { + "epoch": 0.8, + "learning_rate": 1.212551554062673e-05, + "loss": 2.3445, + "step": 19020 + }, + { + "epoch": 0.8, + "learning_rate": 1.2121263659169182e-05, + "loss": 2.1137, + "step": 19030 + }, + { + "epoch": 0.8, + "learning_rate": 1.2117011777711638e-05, + "loss": 2.4059, + "step": 19040 + }, + { + "epoch": 0.8, + "learning_rate": 1.2112759896254094e-05, + "loss": 2.6698, + "step": 19050 + }, + { + "epoch": 0.8, + "learning_rate": 1.2108508014796549e-05, + "loss": 2.4193, + "step": 19060 + }, + { + "epoch": 0.8, + "learning_rate": 1.2104256133339003e-05, + "loss": 1.9789, + "step": 19070 + }, + { + "epoch": 0.8, + "learning_rate": 1.210000425188146e-05, + "loss": 2.1658, + "step": 19080 + }, + { + "epoch": 0.8, + "learning_rate": 1.2095752370423914e-05, + "loss": 2.1202, + "step": 19090 + }, + { + "epoch": 0.8, + "learning_rate": 1.2091500488966368e-05, + "loss": 2.3003, + "step": 19100 + }, + { + "epoch": 0.8, + "learning_rate": 1.2087248607508823e-05, + "loss": 2.6162, + "step": 19110 + }, + { + "epoch": 0.8, + "learning_rate": 1.2082996726051279e-05, + "loss": 2.3541, + "step": 19120 + }, + { + "epoch": 0.8, + "learning_rate": 1.2078744844593733e-05, + "loss": 2.4186, + "step": 19130 + }, + { + "epoch": 0.81, + "learning_rate": 1.2074492963136188e-05, + "loss": 2.9454, + "step": 19140 + }, + { + "epoch": 0.81, + "learning_rate": 1.2070241081678644e-05, + "loss": 2.0989, + "step": 19150 + }, + { + "epoch": 0.81, + "learning_rate": 1.20659892002211e-05, + "loss": 2.4327, + "step": 19160 + }, + { + "epoch": 0.81, + "learning_rate": 1.2061737318763553e-05, + "loss": 2.6236, + "step": 19170 + }, + { + "epoch": 0.81, + "learning_rate": 1.2057485437306009e-05, + "loss": 2.5432, + "step": 19180 + }, + { + "epoch": 0.81, + "learning_rate": 1.2053233555848465e-05, + "loss": 2.1143, + "step": 19190 + }, + { + "epoch": 0.81, + "learning_rate": 1.2048981674390918e-05, + "loss": 2.2297, + "step": 19200 + }, + { + "epoch": 0.81, + "learning_rate": 1.2044729792933374e-05, + "loss": 2.3105, + "step": 19210 + }, + { + "epoch": 0.81, + "learning_rate": 1.204047791147583e-05, + "loss": 2.1033, + "step": 19220 + }, + { + "epoch": 0.81, + "learning_rate": 1.2036226030018283e-05, + "loss": 1.9961, + "step": 19230 + }, + { + "epoch": 0.81, + "learning_rate": 1.203197414856074e-05, + "loss": 1.9142, + "step": 19240 + }, + { + "epoch": 0.81, + "learning_rate": 1.2027722267103195e-05, + "loss": 2.2007, + "step": 19250 + }, + { + "epoch": 0.81, + "learning_rate": 1.202347038564565e-05, + "loss": 2.5775, + "step": 19260 + }, + { + "epoch": 0.81, + "learning_rate": 1.2019218504188104e-05, + "loss": 1.6784, + "step": 19270 + }, + { + "epoch": 0.81, + "learning_rate": 1.2014966622730559e-05, + "loss": 1.8458, + "step": 19280 + }, + { + "epoch": 0.81, + "learning_rate": 1.2010714741273015e-05, + "loss": 2.3964, + "step": 19290 + }, + { + "epoch": 0.81, + "learning_rate": 1.200646285981547e-05, + "loss": 2.0427, + "step": 19300 + }, + { + "epoch": 0.81, + "learning_rate": 1.2002210978357924e-05, + "loss": 1.8841, + "step": 19310 + }, + { + "epoch": 0.81, + "learning_rate": 1.199795909690038e-05, + "loss": 2.3461, + "step": 19320 + }, + { + "epoch": 0.81, + "learning_rate": 1.1993707215442834e-05, + "loss": 2.5327, + "step": 19330 + }, + { + "epoch": 0.81, + "learning_rate": 1.1989455333985289e-05, + "loss": 2.0839, + "step": 19340 + }, + { + "epoch": 0.81, + "learning_rate": 1.1985203452527745e-05, + "loss": 2.2541, + "step": 19350 + }, + { + "epoch": 0.81, + "learning_rate": 1.1980951571070198e-05, + "loss": 2.2845, + "step": 19360 + }, + { + "epoch": 0.81, + "learning_rate": 1.1976699689612654e-05, + "loss": 2.4602, + "step": 19370 + }, + { + "epoch": 0.82, + "learning_rate": 1.197244780815511e-05, + "loss": 1.7821, + "step": 19380 + }, + { + "epoch": 0.82, + "learning_rate": 1.1968195926697566e-05, + "loss": 2.02, + "step": 19390 + }, + { + "epoch": 0.82, + "learning_rate": 1.1963944045240019e-05, + "loss": 1.92, + "step": 19400 + }, + { + "epoch": 0.82, + "learning_rate": 1.1959692163782475e-05, + "loss": 1.9421, + "step": 19410 + }, + { + "epoch": 0.82, + "learning_rate": 1.1955440282324931e-05, + "loss": 2.3449, + "step": 19420 + }, + { + "epoch": 0.82, + "learning_rate": 1.1951188400867384e-05, + "loss": 2.1307, + "step": 19430 + }, + { + "epoch": 0.82, + "learning_rate": 1.194693651940984e-05, + "loss": 2.0286, + "step": 19440 + }, + { + "epoch": 0.82, + "learning_rate": 1.1942684637952295e-05, + "loss": 2.3532, + "step": 19450 + }, + { + "epoch": 0.82, + "learning_rate": 1.1938432756494749e-05, + "loss": 1.7903, + "step": 19460 + }, + { + "epoch": 0.82, + "learning_rate": 1.1934180875037205e-05, + "loss": 2.1869, + "step": 19470 + }, + { + "epoch": 0.82, + "learning_rate": 1.192992899357966e-05, + "loss": 2.1925, + "step": 19480 + }, + { + "epoch": 0.82, + "learning_rate": 1.1925677112122116e-05, + "loss": 2.3387, + "step": 19490 + }, + { + "epoch": 0.82, + "learning_rate": 1.1921425230664569e-05, + "loss": 2.1027, + "step": 19500 + }, + { + "epoch": 0.82, + "eval_loss": 1.8289752006530762, + "eval_runtime": 175.8843, + "eval_samples_per_second": 14.231, + "eval_steps_per_second": 7.118, + "step": 19500 + }, + { + "epoch": 0.82, + "learning_rate": 1.1917173349207025e-05, + "loss": 2.2901, + "step": 19510 + }, + { + "epoch": 0.82, + "learning_rate": 1.1912921467749481e-05, + "loss": 2.0443, + "step": 19520 + }, + { + "epoch": 0.82, + "learning_rate": 1.1908669586291934e-05, + "loss": 2.1688, + "step": 19530 + }, + { + "epoch": 0.82, + "learning_rate": 1.190441770483439e-05, + "loss": 2.1367, + "step": 19540 + }, + { + "epoch": 0.82, + "learning_rate": 1.1900165823376846e-05, + "loss": 2.14, + "step": 19550 + }, + { + "epoch": 0.82, + "learning_rate": 1.1895913941919299e-05, + "loss": 2.0942, + "step": 19560 + }, + { + "epoch": 0.82, + "learning_rate": 1.1891662060461755e-05, + "loss": 2.3131, + "step": 19570 + }, + { + "epoch": 0.82, + "learning_rate": 1.1887410179004211e-05, + "loss": 1.9678, + "step": 19580 + }, + { + "epoch": 0.82, + "learning_rate": 1.1883158297546666e-05, + "loss": 2.3818, + "step": 19590 + }, + { + "epoch": 0.82, + "learning_rate": 1.187890641608912e-05, + "loss": 2.1617, + "step": 19600 + }, + { + "epoch": 0.83, + "learning_rate": 1.1874654534631576e-05, + "loss": 2.0647, + "step": 19610 + }, + { + "epoch": 0.83, + "learning_rate": 1.187040265317403e-05, + "loss": 2.1498, + "step": 19620 + }, + { + "epoch": 0.83, + "learning_rate": 1.1866150771716485e-05, + "loss": 2.09, + "step": 19630 + }, + { + "epoch": 0.83, + "learning_rate": 1.1861898890258941e-05, + "loss": 2.3031, + "step": 19640 + }, + { + "epoch": 0.83, + "learning_rate": 1.1857647008801396e-05, + "loss": 1.7862, + "step": 19650 + }, + { + "epoch": 0.83, + "learning_rate": 1.185339512734385e-05, + "loss": 2.3439, + "step": 19660 + }, + { + "epoch": 0.83, + "learning_rate": 1.1849143245886305e-05, + "loss": 2.2409, + "step": 19670 + }, + { + "epoch": 0.83, + "learning_rate": 1.184489136442876e-05, + "loss": 1.838, + "step": 19680 + }, + { + "epoch": 0.83, + "learning_rate": 1.1840639482971217e-05, + "loss": 2.435, + "step": 19690 + }, + { + "epoch": 0.83, + "learning_rate": 1.183638760151367e-05, + "loss": 1.8176, + "step": 19700 + }, + { + "epoch": 0.83, + "learning_rate": 1.1832135720056126e-05, + "loss": 2.5323, + "step": 19710 + }, + { + "epoch": 0.83, + "learning_rate": 1.1827883838598582e-05, + "loss": 2.392, + "step": 19720 + }, + { + "epoch": 0.83, + "learning_rate": 1.1823631957141035e-05, + "loss": 2.6848, + "step": 19730 + }, + { + "epoch": 0.83, + "learning_rate": 1.1819380075683491e-05, + "loss": 2.3956, + "step": 19740 + }, + { + "epoch": 0.83, + "learning_rate": 1.1815128194225947e-05, + "loss": 2.0737, + "step": 19750 + }, + { + "epoch": 0.83, + "learning_rate": 1.18108763127684e-05, + "loss": 2.4459, + "step": 19760 + }, + { + "epoch": 0.83, + "learning_rate": 1.1806624431310856e-05, + "loss": 1.9749, + "step": 19770 + }, + { + "epoch": 0.83, + "learning_rate": 1.1802372549853312e-05, + "loss": 2.01, + "step": 19780 + }, + { + "epoch": 0.83, + "learning_rate": 1.1798120668395767e-05, + "loss": 1.8578, + "step": 19790 + }, + { + "epoch": 0.83, + "learning_rate": 1.1793868786938221e-05, + "loss": 2.1742, + "step": 19800 + }, + { + "epoch": 0.83, + "learning_rate": 1.1789616905480677e-05, + "loss": 2.3556, + "step": 19810 + }, + { + "epoch": 0.83, + "learning_rate": 1.1785365024023132e-05, + "loss": 2.2053, + "step": 19820 + }, + { + "epoch": 0.83, + "learning_rate": 1.1781113142565586e-05, + "loss": 2.1769, + "step": 19830 + }, + { + "epoch": 0.83, + "learning_rate": 1.177686126110804e-05, + "loss": 2.2273, + "step": 19840 + }, + { + "epoch": 0.84, + "learning_rate": 1.1772609379650497e-05, + "loss": 2.3378, + "step": 19850 + }, + { + "epoch": 0.84, + "learning_rate": 1.1768357498192951e-05, + "loss": 2.3385, + "step": 19860 + }, + { + "epoch": 0.84, + "learning_rate": 1.1764105616735406e-05, + "loss": 2.1603, + "step": 19870 + }, + { + "epoch": 0.84, + "learning_rate": 1.1759853735277862e-05, + "loss": 2.0272, + "step": 19880 + }, + { + "epoch": 0.84, + "learning_rate": 1.1755601853820315e-05, + "loss": 2.0062, + "step": 19890 + }, + { + "epoch": 0.84, + "learning_rate": 1.175134997236277e-05, + "loss": 2.406, + "step": 19900 + }, + { + "epoch": 0.84, + "learning_rate": 1.1747098090905227e-05, + "loss": 2.4801, + "step": 19910 + }, + { + "epoch": 0.84, + "learning_rate": 1.1742846209447683e-05, + "loss": 1.9527, + "step": 19920 + }, + { + "epoch": 0.84, + "learning_rate": 1.1738594327990136e-05, + "loss": 2.2372, + "step": 19930 + }, + { + "epoch": 0.84, + "learning_rate": 1.1734342446532592e-05, + "loss": 2.4285, + "step": 19940 + }, + { + "epoch": 0.84, + "learning_rate": 1.1730090565075048e-05, + "loss": 2.214, + "step": 19950 + }, + { + "epoch": 0.84, + "learning_rate": 1.1725838683617501e-05, + "loss": 2.6894, + "step": 19960 + }, + { + "epoch": 0.84, + "learning_rate": 1.1721586802159957e-05, + "loss": 2.2481, + "step": 19970 + }, + { + "epoch": 0.84, + "learning_rate": 1.1717334920702411e-05, + "loss": 2.3957, + "step": 19980 + }, + { + "epoch": 0.84, + "learning_rate": 1.1713083039244866e-05, + "loss": 2.1499, + "step": 19990 + }, + { + "epoch": 0.84, + "learning_rate": 1.1708831157787322e-05, + "loss": 2.3084, + "step": 20000 + }, + { + "epoch": 0.84, + "eval_loss": 1.8395805358886719, + "eval_runtime": 175.6854, + "eval_samples_per_second": 14.247, + "eval_steps_per_second": 7.126, + "step": 20000 + }, + { + "epoch": 0.84, + "learning_rate": 1.1704579276329777e-05, + "loss": 2.11, + "step": 20010 + }, + { + "epoch": 0.84, + "learning_rate": 1.1700327394872233e-05, + "loss": 1.8904, + "step": 20020 + }, + { + "epoch": 0.84, + "learning_rate": 1.1696075513414687e-05, + "loss": 2.3283, + "step": 20030 + }, + { + "epoch": 0.84, + "learning_rate": 1.1691823631957142e-05, + "loss": 1.8581, + "step": 20040 + }, + { + "epoch": 0.84, + "learning_rate": 1.1687571750499598e-05, + "loss": 2.0448, + "step": 20050 + }, + { + "epoch": 0.84, + "learning_rate": 1.168331986904205e-05, + "loss": 2.0497, + "step": 20060 + }, + { + "epoch": 0.84, + "learning_rate": 1.1679067987584507e-05, + "loss": 2.1895, + "step": 20070 + }, + { + "epoch": 0.84, + "learning_rate": 1.1674816106126963e-05, + "loss": 1.7949, + "step": 20080 + }, + { + "epoch": 0.85, + "learning_rate": 1.1670564224669416e-05, + "loss": 2.0302, + "step": 20090 + }, + { + "epoch": 0.85, + "learning_rate": 1.1666312343211872e-05, + "loss": 2.1369, + "step": 20100 + }, + { + "epoch": 0.85, + "learning_rate": 1.1662060461754328e-05, + "loss": 2.1922, + "step": 20110 + }, + { + "epoch": 0.85, + "learning_rate": 1.1657808580296784e-05, + "loss": 2.4506, + "step": 20120 + }, + { + "epoch": 0.85, + "learning_rate": 1.1653556698839237e-05, + "loss": 2.4267, + "step": 20130 + }, + { + "epoch": 0.85, + "learning_rate": 1.1649304817381693e-05, + "loss": 1.9311, + "step": 20140 + }, + { + "epoch": 0.85, + "learning_rate": 1.1645052935924147e-05, + "loss": 2.3057, + "step": 20150 + }, + { + "epoch": 0.85, + "learning_rate": 1.1640801054466602e-05, + "loss": 1.9138, + "step": 20160 + }, + { + "epoch": 0.85, + "learning_rate": 1.1636549173009058e-05, + "loss": 2.2328, + "step": 20170 + }, + { + "epoch": 0.85, + "learning_rate": 1.1632297291551512e-05, + "loss": 2.2676, + "step": 20180 + }, + { + "epoch": 0.85, + "learning_rate": 1.1628045410093967e-05, + "loss": 2.4848, + "step": 20190 + }, + { + "epoch": 0.85, + "learning_rate": 1.1623793528636423e-05, + "loss": 2.147, + "step": 20200 + }, + { + "epoch": 0.85, + "learning_rate": 1.1619541647178878e-05, + "loss": 2.1419, + "step": 20210 + }, + { + "epoch": 0.85, + "learning_rate": 1.1615289765721334e-05, + "loss": 2.0516, + "step": 20220 + }, + { + "epoch": 0.85, + "learning_rate": 1.1611037884263786e-05, + "loss": 1.8975, + "step": 20230 + }, + { + "epoch": 0.85, + "learning_rate": 1.1606786002806243e-05, + "loss": 2.4029, + "step": 20240 + }, + { + "epoch": 0.85, + "learning_rate": 1.1602534121348699e-05, + "loss": 2.2535, + "step": 20250 + }, + { + "epoch": 0.85, + "learning_rate": 1.1598282239891152e-05, + "loss": 2.0156, + "step": 20260 + }, + { + "epoch": 0.85, + "learning_rate": 1.1594030358433608e-05, + "loss": 2.8164, + "step": 20270 + }, + { + "epoch": 0.85, + "learning_rate": 1.1589778476976064e-05, + "loss": 2.3693, + "step": 20280 + }, + { + "epoch": 0.85, + "learning_rate": 1.1585526595518517e-05, + "loss": 1.9898, + "step": 20290 + }, + { + "epoch": 0.85, + "learning_rate": 1.1581274714060973e-05, + "loss": 2.8203, + "step": 20300 + }, + { + "epoch": 0.85, + "learning_rate": 1.1577022832603429e-05, + "loss": 2.4059, + "step": 20310 + }, + { + "epoch": 0.85, + "learning_rate": 1.1572770951145882e-05, + "loss": 2.1781, + "step": 20320 + }, + { + "epoch": 0.86, + "learning_rate": 1.1568519069688338e-05, + "loss": 2.2044, + "step": 20330 + }, + { + "epoch": 0.86, + "learning_rate": 1.1564267188230794e-05, + "loss": 2.264, + "step": 20340 + }, + { + "epoch": 0.86, + "learning_rate": 1.1560015306773248e-05, + "loss": 2.4177, + "step": 20350 + }, + { + "epoch": 0.86, + "learning_rate": 1.1555763425315703e-05, + "loss": 1.8219, + "step": 20360 + }, + { + "epoch": 0.86, + "learning_rate": 1.1551511543858157e-05, + "loss": 2.5629, + "step": 20370 + }, + { + "epoch": 0.86, + "learning_rate": 1.1547259662400614e-05, + "loss": 2.4924, + "step": 20380 + }, + { + "epoch": 0.86, + "learning_rate": 1.1543007780943068e-05, + "loss": 2.3696, + "step": 20390 + }, + { + "epoch": 0.86, + "learning_rate": 1.1538755899485522e-05, + "loss": 2.1506, + "step": 20400 + }, + { + "epoch": 0.86, + "learning_rate": 1.1534504018027979e-05, + "loss": 2.3529, + "step": 20410 + }, + { + "epoch": 0.86, + "learning_rate": 1.1530252136570433e-05, + "loss": 2.3756, + "step": 20420 + }, + { + "epoch": 0.86, + "learning_rate": 1.1526000255112887e-05, + "loss": 2.5483, + "step": 20430 + }, + { + "epoch": 0.86, + "learning_rate": 1.1521748373655344e-05, + "loss": 2.2056, + "step": 20440 + }, + { + "epoch": 0.86, + "learning_rate": 1.15174964921978e-05, + "loss": 2.2388, + "step": 20450 + }, + { + "epoch": 0.86, + "learning_rate": 1.1513244610740253e-05, + "loss": 1.8584, + "step": 20460 + }, + { + "epoch": 0.86, + "learning_rate": 1.1508992729282709e-05, + "loss": 2.2237, + "step": 20470 + }, + { + "epoch": 0.86, + "learning_rate": 1.1504740847825165e-05, + "loss": 1.9827, + "step": 20480 + }, + { + "epoch": 0.86, + "learning_rate": 1.1500488966367618e-05, + "loss": 2.5274, + "step": 20490 + }, + { + "epoch": 0.86, + "learning_rate": 1.1496237084910074e-05, + "loss": 1.9706, + "step": 20500 + }, + { + "epoch": 0.86, + "eval_loss": 1.834281086921692, + "eval_runtime": 175.8828, + "eval_samples_per_second": 14.231, + "eval_steps_per_second": 7.118, + "step": 20500 + }, + { + "epoch": 0.86, + "learning_rate": 1.149198520345253e-05, + "loss": 2.2801, + "step": 20510 + }, + { + "epoch": 0.86, + "learning_rate": 1.1487733321994983e-05, + "loss": 2.4809, + "step": 20520 + }, + { + "epoch": 0.86, + "learning_rate": 1.1483481440537439e-05, + "loss": 2.5073, + "step": 20530 + }, + { + "epoch": 0.86, + "learning_rate": 1.1479229559079893e-05, + "loss": 1.6704, + "step": 20540 + }, + { + "epoch": 0.86, + "learning_rate": 1.147497767762235e-05, + "loss": 2.1681, + "step": 20550 + }, + { + "epoch": 0.86, + "learning_rate": 1.1470725796164804e-05, + "loss": 2.3059, + "step": 20560 + }, + { + "epoch": 0.87, + "learning_rate": 1.1466473914707258e-05, + "loss": 2.4117, + "step": 20570 + }, + { + "epoch": 0.87, + "learning_rate": 1.1462222033249715e-05, + "loss": 2.028, + "step": 20580 + }, + { + "epoch": 0.87, + "learning_rate": 1.1457970151792169e-05, + "loss": 2.3308, + "step": 20590 + }, + { + "epoch": 0.87, + "learning_rate": 1.1453718270334623e-05, + "loss": 2.0801, + "step": 20600 + }, + { + "epoch": 0.87, + "learning_rate": 1.144946638887708e-05, + "loss": 2.0363, + "step": 20610 + }, + { + "epoch": 0.87, + "learning_rate": 1.1445214507419532e-05, + "loss": 2.3273, + "step": 20620 + }, + { + "epoch": 0.87, + "learning_rate": 1.1440962625961989e-05, + "loss": 2.7378, + "step": 20630 + }, + { + "epoch": 0.87, + "learning_rate": 1.1436710744504445e-05, + "loss": 2.8054, + "step": 20640 + }, + { + "epoch": 0.87, + "learning_rate": 1.14324588630469e-05, + "loss": 2.0178, + "step": 20650 + }, + { + "epoch": 0.87, + "learning_rate": 1.1428206981589354e-05, + "loss": 2.4087, + "step": 20660 + }, + { + "epoch": 0.87, + "learning_rate": 1.142395510013181e-05, + "loss": 2.0017, + "step": 20670 + }, + { + "epoch": 0.87, + "learning_rate": 1.1419703218674266e-05, + "loss": 2.5347, + "step": 20680 + }, + { + "epoch": 0.87, + "learning_rate": 1.1415451337216719e-05, + "loss": 2.5313, + "step": 20690 + }, + { + "epoch": 0.87, + "learning_rate": 1.1411199455759175e-05, + "loss": 2.4035, + "step": 20700 + }, + { + "epoch": 0.87, + "learning_rate": 1.140694757430163e-05, + "loss": 2.2844, + "step": 20710 + }, + { + "epoch": 0.87, + "learning_rate": 1.1402695692844084e-05, + "loss": 2.917, + "step": 20720 + }, + { + "epoch": 0.87, + "learning_rate": 1.139844381138654e-05, + "loss": 2.602, + "step": 20730 + }, + { + "epoch": 0.87, + "learning_rate": 1.1394191929928994e-05, + "loss": 2.2953, + "step": 20740 + }, + { + "epoch": 0.87, + "learning_rate": 1.138994004847145e-05, + "loss": 2.0735, + "step": 20750 + }, + { + "epoch": 0.87, + "learning_rate": 1.1385688167013905e-05, + "loss": 2.2651, + "step": 20760 + }, + { + "epoch": 0.87, + "learning_rate": 1.138143628555636e-05, + "loss": 2.5932, + "step": 20770 + }, + { + "epoch": 0.87, + "learning_rate": 1.1377184404098816e-05, + "loss": 2.3563, + "step": 20780 + }, + { + "epoch": 0.87, + "learning_rate": 1.1372932522641268e-05, + "loss": 2.2868, + "step": 20790 + }, + { + "epoch": 0.88, + "learning_rate": 1.1368680641183724e-05, + "loss": 2.1799, + "step": 20800 + }, + { + "epoch": 0.88, + "learning_rate": 1.136442875972618e-05, + "loss": 2.3529, + "step": 20810 + }, + { + "epoch": 0.88, + "learning_rate": 1.1360176878268633e-05, + "loss": 2.4422, + "step": 20820 + }, + { + "epoch": 0.88, + "learning_rate": 1.135592499681109e-05, + "loss": 2.3017, + "step": 20830 + }, + { + "epoch": 0.88, + "learning_rate": 1.1351673115353546e-05, + "loss": 2.6883, + "step": 20840 + }, + { + "epoch": 0.88, + "learning_rate": 1.1347421233895998e-05, + "loss": 2.2134, + "step": 20850 + }, + { + "epoch": 0.88, + "learning_rate": 1.1343169352438455e-05, + "loss": 2.8651, + "step": 20860 + }, + { + "epoch": 0.88, + "learning_rate": 1.133891747098091e-05, + "loss": 1.7896, + "step": 20870 + }, + { + "epoch": 0.88, + "learning_rate": 1.1334665589523365e-05, + "loss": 1.8225, + "step": 20880 + }, + { + "epoch": 0.88, + "learning_rate": 1.133041370806582e-05, + "loss": 2.0254, + "step": 20890 + }, + { + "epoch": 0.88, + "learning_rate": 1.1326161826608276e-05, + "loss": 2.4365, + "step": 20900 + }, + { + "epoch": 0.88, + "learning_rate": 1.132190994515073e-05, + "loss": 2.5786, + "step": 20910 + }, + { + "epoch": 0.88, + "learning_rate": 1.1317658063693185e-05, + "loss": 2.1062, + "step": 20920 + }, + { + "epoch": 0.88, + "learning_rate": 1.131340618223564e-05, + "loss": 2.3608, + "step": 20930 + }, + { + "epoch": 0.88, + "learning_rate": 1.1309154300778095e-05, + "loss": 2.5084, + "step": 20940 + }, + { + "epoch": 0.88, + "learning_rate": 1.130490241932055e-05, + "loss": 2.4546, + "step": 20950 + }, + { + "epoch": 0.88, + "learning_rate": 1.1300650537863004e-05, + "loss": 2.3638, + "step": 20960 + }, + { + "epoch": 0.88, + "learning_rate": 1.129639865640546e-05, + "loss": 2.2632, + "step": 20970 + }, + { + "epoch": 0.88, + "learning_rate": 1.1292146774947917e-05, + "loss": 2.2695, + "step": 20980 + }, + { + "epoch": 0.88, + "learning_rate": 1.128789489349037e-05, + "loss": 2.5105, + "step": 20990 + }, + { + "epoch": 0.88, + "learning_rate": 1.1283643012032826e-05, + "loss": 2.3075, + "step": 21000 + }, + { + "epoch": 0.88, + "eval_loss": 1.822985053062439, + "eval_runtime": 175.9116, + "eval_samples_per_second": 14.229, + "eval_steps_per_second": 7.117, + "step": 21000 + }, + { + "epoch": 0.88, + "learning_rate": 1.1279391130575282e-05, + "loss": 1.9912, + "step": 21010 + }, + { + "epoch": 0.88, + "learning_rate": 1.1275139249117734e-05, + "loss": 2.5211, + "step": 21020 + }, + { + "epoch": 0.88, + "learning_rate": 1.127088736766019e-05, + "loss": 2.2959, + "step": 21030 + }, + { + "epoch": 0.89, + "learning_rate": 1.1266635486202647e-05, + "loss": 2.1357, + "step": 21040 + }, + { + "epoch": 0.89, + "learning_rate": 1.12623836047451e-05, + "loss": 2.093, + "step": 21050 + }, + { + "epoch": 0.89, + "learning_rate": 1.1258131723287556e-05, + "loss": 2.3973, + "step": 21060 + }, + { + "epoch": 0.89, + "learning_rate": 1.1253879841830012e-05, + "loss": 2.2446, + "step": 21070 + }, + { + "epoch": 0.89, + "learning_rate": 1.1249627960372466e-05, + "loss": 2.3044, + "step": 21080 + }, + { + "epoch": 0.89, + "learning_rate": 1.124537607891492e-05, + "loss": 1.7142, + "step": 21090 + }, + { + "epoch": 0.89, + "learning_rate": 1.1241124197457375e-05, + "loss": 2.2407, + "step": 21100 + }, + { + "epoch": 0.89, + "learning_rate": 1.1236872315999831e-05, + "loss": 2.0207, + "step": 21110 + }, + { + "epoch": 0.89, + "learning_rate": 1.1232620434542286e-05, + "loss": 2.6111, + "step": 21120 + }, + { + "epoch": 0.89, + "learning_rate": 1.122836855308474e-05, + "loss": 2.5053, + "step": 21130 + }, + { + "epoch": 0.89, + "learning_rate": 1.1224116671627196e-05, + "loss": 1.7139, + "step": 21140 + }, + { + "epoch": 0.89, + "learning_rate": 1.1219864790169651e-05, + "loss": 2.1259, + "step": 21150 + }, + { + "epoch": 0.89, + "learning_rate": 1.1215612908712105e-05, + "loss": 2.6204, + "step": 21160 + }, + { + "epoch": 0.89, + "learning_rate": 1.1211361027254561e-05, + "loss": 2.0562, + "step": 21170 + }, + { + "epoch": 0.89, + "learning_rate": 1.1207109145797018e-05, + "loss": 2.081, + "step": 21180 + }, + { + "epoch": 0.89, + "learning_rate": 1.120285726433947e-05, + "loss": 2.3546, + "step": 21190 + }, + { + "epoch": 0.89, + "learning_rate": 1.1198605382881927e-05, + "loss": 2.3827, + "step": 21200 + }, + { + "epoch": 0.89, + "learning_rate": 1.1194353501424383e-05, + "loss": 2.1586, + "step": 21210 + }, + { + "epoch": 0.89, + "learning_rate": 1.1190101619966835e-05, + "loss": 2.1258, + "step": 21220 + }, + { + "epoch": 0.89, + "learning_rate": 1.1185849738509292e-05, + "loss": 2.5523, + "step": 21230 + }, + { + "epoch": 0.89, + "learning_rate": 1.1181597857051748e-05, + "loss": 2.3021, + "step": 21240 + }, + { + "epoch": 0.89, + "learning_rate": 1.11773459755942e-05, + "loss": 2.0766, + "step": 21250 + }, + { + "epoch": 0.89, + "learning_rate": 1.1173094094136657e-05, + "loss": 2.3272, + "step": 21260 + }, + { + "epoch": 0.89, + "learning_rate": 1.1168842212679111e-05, + "loss": 2.0162, + "step": 21270 + }, + { + "epoch": 0.9, + "learning_rate": 1.1164590331221567e-05, + "loss": 2.2479, + "step": 21280 + }, + { + "epoch": 0.9, + "learning_rate": 1.1160338449764022e-05, + "loss": 2.1886, + "step": 21290 + }, + { + "epoch": 0.9, + "learning_rate": 1.1156086568306476e-05, + "loss": 1.9603, + "step": 21300 + }, + { + "epoch": 0.9, + "learning_rate": 1.1151834686848932e-05, + "loss": 2.5595, + "step": 21310 + }, + { + "epoch": 0.9, + "learning_rate": 1.1147582805391385e-05, + "loss": 2.602, + "step": 21320 + }, + { + "epoch": 0.9, + "learning_rate": 1.1143330923933841e-05, + "loss": 2.0311, + "step": 21330 + }, + { + "epoch": 0.9, + "learning_rate": 1.1139079042476297e-05, + "loss": 2.3421, + "step": 21340 + }, + { + "epoch": 0.9, + "learning_rate": 1.113482716101875e-05, + "loss": 2.2651, + "step": 21350 + }, + { + "epoch": 0.9, + "learning_rate": 1.1130575279561206e-05, + "loss": 2.3257, + "step": 21360 + }, + { + "epoch": 0.9, + "learning_rate": 1.1126323398103662e-05, + "loss": 2.0084, + "step": 21370 + }, + { + "epoch": 0.9, + "learning_rate": 1.1122071516646115e-05, + "loss": 2.7653, + "step": 21380 + }, + { + "epoch": 0.9, + "learning_rate": 1.1117819635188571e-05, + "loss": 2.2977, + "step": 21390 + }, + { + "epoch": 0.9, + "learning_rate": 1.1113567753731028e-05, + "loss": 2.0105, + "step": 21400 + }, + { + "epoch": 0.9, + "learning_rate": 1.1109315872273482e-05, + "loss": 2.3345, + "step": 21410 + }, + { + "epoch": 0.9, + "learning_rate": 1.1105063990815936e-05, + "loss": 2.4214, + "step": 21420 + }, + { + "epoch": 0.9, + "learning_rate": 1.1100812109358393e-05, + "loss": 2.5472, + "step": 21430 + }, + { + "epoch": 0.9, + "learning_rate": 1.1096560227900847e-05, + "loss": 2.2811, + "step": 21440 + }, + { + "epoch": 0.9, + "learning_rate": 1.1092308346443302e-05, + "loss": 2.3565, + "step": 21450 + }, + { + "epoch": 0.9, + "learning_rate": 1.1088056464985758e-05, + "loss": 2.026, + "step": 21460 + }, + { + "epoch": 0.9, + "learning_rate": 1.1083804583528212e-05, + "loss": 1.721, + "step": 21470 + }, + { + "epoch": 0.9, + "learning_rate": 1.1079552702070667e-05, + "loss": 2.2634, + "step": 21480 + }, + { + "epoch": 0.9, + "learning_rate": 1.1075300820613121e-05, + "loss": 2.2773, + "step": 21490 + }, + { + "epoch": 0.9, + "learning_rate": 1.1071048939155577e-05, + "loss": 1.7214, + "step": 21500 + }, + { + "epoch": 0.9, + "eval_loss": 1.824805498123169, + "eval_runtime": 174.3778, + "eval_samples_per_second": 14.354, + "eval_steps_per_second": 7.18, + "step": 21500 + }, + { + "epoch": 0.9, + "learning_rate": 1.1066797057698033e-05, + "loss": 2.6856, + "step": 21510 + }, + { + "epoch": 0.91, + "learning_rate": 1.1062545176240486e-05, + "loss": 2.0154, + "step": 21520 + }, + { + "epoch": 0.91, + "learning_rate": 1.1058293294782942e-05, + "loss": 1.9537, + "step": 21530 + }, + { + "epoch": 0.91, + "learning_rate": 1.1054041413325398e-05, + "loss": 2.0085, + "step": 21540 + }, + { + "epoch": 0.91, + "learning_rate": 1.1049789531867851e-05, + "loss": 2.3019, + "step": 21550 + }, + { + "epoch": 0.91, + "learning_rate": 1.1045537650410307e-05, + "loss": 2.0823, + "step": 21560 + }, + { + "epoch": 0.91, + "learning_rate": 1.1041285768952764e-05, + "loss": 2.0807, + "step": 21570 + }, + { + "epoch": 0.91, + "learning_rate": 1.1037033887495216e-05, + "loss": 2.4829, + "step": 21580 + }, + { + "epoch": 0.91, + "learning_rate": 1.1032782006037672e-05, + "loss": 2.1712, + "step": 21590 + }, + { + "epoch": 0.91, + "learning_rate": 1.1028530124580129e-05, + "loss": 2.5054, + "step": 21600 + }, + { + "epoch": 0.91, + "learning_rate": 1.1024278243122583e-05, + "loss": 2.1296, + "step": 21610 + }, + { + "epoch": 0.91, + "learning_rate": 1.1020026361665038e-05, + "loss": 2.0915, + "step": 21620 + }, + { + "epoch": 0.91, + "learning_rate": 1.1015774480207494e-05, + "loss": 2.5908, + "step": 21630 + }, + { + "epoch": 0.91, + "learning_rate": 1.1011522598749948e-05, + "loss": 1.7967, + "step": 21640 + }, + { + "epoch": 0.91, + "learning_rate": 1.1007270717292403e-05, + "loss": 1.9879, + "step": 21650 + }, + { + "epoch": 0.91, + "learning_rate": 1.1003018835834857e-05, + "loss": 2.0687, + "step": 21660 + }, + { + "epoch": 0.91, + "learning_rate": 1.0998766954377313e-05, + "loss": 2.6076, + "step": 21670 + }, + { + "epoch": 0.91, + "learning_rate": 1.0994515072919768e-05, + "loss": 2.8764, + "step": 21680 + }, + { + "epoch": 0.91, + "learning_rate": 1.0990263191462222e-05, + "loss": 1.9984, + "step": 21690 + }, + { + "epoch": 0.91, + "learning_rate": 1.0986011310004678e-05, + "loss": 2.6241, + "step": 21700 + }, + { + "epoch": 0.91, + "learning_rate": 1.0981759428547134e-05, + "loss": 2.3973, + "step": 21710 + }, + { + "epoch": 0.91, + "learning_rate": 1.0977507547089587e-05, + "loss": 2.7101, + "step": 21720 + }, + { + "epoch": 0.91, + "learning_rate": 1.0973255665632043e-05, + "loss": 2.1444, + "step": 21730 + }, + { + "epoch": 0.91, + "learning_rate": 1.09690037841745e-05, + "loss": 2.1706, + "step": 21740 + }, + { + "epoch": 0.92, + "learning_rate": 1.0964751902716952e-05, + "loss": 1.6332, + "step": 21750 + }, + { + "epoch": 0.92, + "learning_rate": 1.0960500021259408e-05, + "loss": 1.9248, + "step": 21760 + }, + { + "epoch": 0.92, + "learning_rate": 1.0956248139801865e-05, + "loss": 2.0422, + "step": 21770 + }, + { + "epoch": 0.92, + "learning_rate": 1.0951996258344317e-05, + "loss": 2.0232, + "step": 21780 + }, + { + "epoch": 0.92, + "learning_rate": 1.0947744376886773e-05, + "loss": 2.0951, + "step": 21790 + }, + { + "epoch": 0.92, + "learning_rate": 1.0943492495429228e-05, + "loss": 2.21, + "step": 21800 + }, + { + "epoch": 0.92, + "learning_rate": 1.0939240613971684e-05, + "loss": 2.1676, + "step": 21810 + }, + { + "epoch": 0.92, + "learning_rate": 1.0934988732514139e-05, + "loss": 2.6876, + "step": 21820 + }, + { + "epoch": 0.92, + "learning_rate": 1.0930736851056593e-05, + "loss": 2.5208, + "step": 21830 + }, + { + "epoch": 0.92, + "learning_rate": 1.0926484969599049e-05, + "loss": 2.076, + "step": 21840 + }, + { + "epoch": 0.92, + "learning_rate": 1.0922233088141504e-05, + "loss": 2.2026, + "step": 21850 + }, + { + "epoch": 0.92, + "learning_rate": 1.0917981206683958e-05, + "loss": 2.3495, + "step": 21860 + }, + { + "epoch": 0.92, + "learning_rate": 1.0913729325226414e-05, + "loss": 2.1546, + "step": 21870 + }, + { + "epoch": 0.92, + "learning_rate": 1.0909477443768867e-05, + "loss": 2.0528, + "step": 21880 + }, + { + "epoch": 0.92, + "learning_rate": 1.0905225562311323e-05, + "loss": 2.0197, + "step": 21890 + }, + { + "epoch": 0.92, + "learning_rate": 1.090097368085378e-05, + "loss": 2.2806, + "step": 21900 + }, + { + "epoch": 0.92, + "learning_rate": 1.0896721799396232e-05, + "loss": 1.7185, + "step": 21910 + }, + { + "epoch": 0.92, + "learning_rate": 1.0892469917938688e-05, + "loss": 1.7569, + "step": 21920 + }, + { + "epoch": 0.92, + "learning_rate": 1.0888218036481144e-05, + "loss": 2.1597, + "step": 21930 + }, + { + "epoch": 0.92, + "learning_rate": 1.08839661550236e-05, + "loss": 2.5093, + "step": 21940 + }, + { + "epoch": 0.92, + "learning_rate": 1.0879714273566053e-05, + "loss": 2.3191, + "step": 21950 + }, + { + "epoch": 0.92, + "learning_rate": 1.087546239210851e-05, + "loss": 2.1572, + "step": 21960 + }, + { + "epoch": 0.92, + "learning_rate": 1.0871210510650964e-05, + "loss": 2.4078, + "step": 21970 + }, + { + "epoch": 0.92, + "learning_rate": 1.0866958629193418e-05, + "loss": 2.315, + "step": 21980 + }, + { + "epoch": 0.93, + "learning_rate": 1.0862706747735874e-05, + "loss": 2.5181, + "step": 21990 + }, + { + "epoch": 0.93, + "learning_rate": 1.0858454866278329e-05, + "loss": 1.9297, + "step": 22000 + }, + { + "epoch": 0.93, + "eval_loss": 1.8141272068023682, + "eval_runtime": 174.4359, + "eval_samples_per_second": 14.349, + "eval_steps_per_second": 7.177, + "step": 22000 + }, + { + "epoch": 0.93, + "learning_rate": 1.0854202984820783e-05, + "loss": 1.8454, + "step": 22010 + }, + { + "epoch": 0.93, + "learning_rate": 1.084995110336324e-05, + "loss": 2.1891, + "step": 22020 + }, + { + "epoch": 0.93, + "learning_rate": 1.0845699221905694e-05, + "loss": 1.814, + "step": 22030 + }, + { + "epoch": 0.93, + "learning_rate": 1.084144734044815e-05, + "loss": 1.9533, + "step": 22040 + }, + { + "epoch": 0.93, + "learning_rate": 1.0837195458990603e-05, + "loss": 2.4355, + "step": 22050 + }, + { + "epoch": 0.93, + "learning_rate": 1.0832943577533059e-05, + "loss": 1.9416, + "step": 22060 + }, + { + "epoch": 0.93, + "learning_rate": 1.0828691696075515e-05, + "loss": 2.0757, + "step": 22070 + }, + { + "epoch": 0.93, + "learning_rate": 1.0824439814617968e-05, + "loss": 1.9605, + "step": 22080 + }, + { + "epoch": 0.93, + "learning_rate": 1.0820187933160424e-05, + "loss": 2.1604, + "step": 22090 + }, + { + "epoch": 0.93, + "learning_rate": 1.081593605170288e-05, + "loss": 1.9458, + "step": 22100 + }, + { + "epoch": 0.93, + "learning_rate": 1.0811684170245333e-05, + "loss": 2.3094, + "step": 22110 + }, + { + "epoch": 0.93, + "learning_rate": 1.080743228878779e-05, + "loss": 2.7009, + "step": 22120 + }, + { + "epoch": 0.93, + "learning_rate": 1.0803180407330245e-05, + "loss": 2.1032, + "step": 22130 + }, + { + "epoch": 0.93, + "learning_rate": 1.07989285258727e-05, + "loss": 2.4193, + "step": 22140 + }, + { + "epoch": 0.93, + "learning_rate": 1.0794676644415154e-05, + "loss": 1.9241, + "step": 22150 + }, + { + "epoch": 0.93, + "learning_rate": 1.079042476295761e-05, + "loss": 2.2416, + "step": 22160 + }, + { + "epoch": 0.93, + "learning_rate": 1.0786172881500065e-05, + "loss": 2.2766, + "step": 22170 + }, + { + "epoch": 0.93, + "learning_rate": 1.078192100004252e-05, + "loss": 2.7925, + "step": 22180 + }, + { + "epoch": 0.93, + "learning_rate": 1.0777669118584974e-05, + "loss": 2.0584, + "step": 22190 + }, + { + "epoch": 0.93, + "learning_rate": 1.077341723712743e-05, + "loss": 1.7813, + "step": 22200 + }, + { + "epoch": 0.93, + "learning_rate": 1.0769165355669884e-05, + "loss": 1.6141, + "step": 22210 + }, + { + "epoch": 0.93, + "learning_rate": 1.0764913474212339e-05, + "loss": 2.2464, + "step": 22220 + }, + { + "epoch": 0.94, + "learning_rate": 1.0760661592754795e-05, + "loss": 2.0246, + "step": 22230 + }, + { + "epoch": 0.94, + "learning_rate": 1.0756409711297251e-05, + "loss": 2.0217, + "step": 22240 + }, + { + "epoch": 0.94, + "learning_rate": 1.0752157829839704e-05, + "loss": 2.8785, + "step": 22250 + }, + { + "epoch": 0.94, + "learning_rate": 1.074790594838216e-05, + "loss": 2.5433, + "step": 22260 + }, + { + "epoch": 0.94, + "learning_rate": 1.0743654066924616e-05, + "loss": 2.0261, + "step": 22270 + }, + { + "epoch": 0.94, + "learning_rate": 1.0739402185467069e-05, + "loss": 2.4837, + "step": 22280 + }, + { + "epoch": 0.94, + "learning_rate": 1.0735150304009525e-05, + "loss": 2.1878, + "step": 22290 + }, + { + "epoch": 0.94, + "learning_rate": 1.0730898422551981e-05, + "loss": 2.0769, + "step": 22300 + }, + { + "epoch": 0.94, + "learning_rate": 1.0726646541094434e-05, + "loss": 1.976, + "step": 22310 + }, + { + "epoch": 0.94, + "learning_rate": 1.072239465963689e-05, + "loss": 2.3659, + "step": 22320 + }, + { + "epoch": 0.94, + "learning_rate": 1.0718142778179346e-05, + "loss": 1.9599, + "step": 22330 + }, + { + "epoch": 0.94, + "learning_rate": 1.0713890896721801e-05, + "loss": 1.8481, + "step": 22340 + }, + { + "epoch": 0.94, + "learning_rate": 1.0709639015264255e-05, + "loss": 2.7933, + "step": 22350 + }, + { + "epoch": 0.94, + "learning_rate": 1.070538713380671e-05, + "loss": 2.5314, + "step": 22360 + }, + { + "epoch": 0.94, + "learning_rate": 1.0701135252349166e-05, + "loss": 2.1009, + "step": 22370 + }, + { + "epoch": 0.94, + "learning_rate": 1.069688337089162e-05, + "loss": 2.3417, + "step": 22380 + }, + { + "epoch": 0.94, + "learning_rate": 1.0692631489434075e-05, + "loss": 1.9509, + "step": 22390 + }, + { + "epoch": 0.94, + "learning_rate": 1.0688379607976531e-05, + "loss": 2.1662, + "step": 22400 + }, + { + "epoch": 0.94, + "learning_rate": 1.0684127726518985e-05, + "loss": 2.0892, + "step": 22410 + }, + { + "epoch": 0.94, + "learning_rate": 1.067987584506144e-05, + "loss": 2.1241, + "step": 22420 + }, + { + "epoch": 0.94, + "learning_rate": 1.0675623963603896e-05, + "loss": 2.0205, + "step": 22430 + }, + { + "epoch": 0.94, + "learning_rate": 1.0671372082146349e-05, + "loss": 1.8768, + "step": 22440 + }, + { + "epoch": 0.94, + "learning_rate": 1.0667120200688805e-05, + "loss": 2.3859, + "step": 22450 + }, + { + "epoch": 0.94, + "learning_rate": 1.0662868319231261e-05, + "loss": 2.0095, + "step": 22460 + }, + { + "epoch": 0.95, + "learning_rate": 1.0658616437773717e-05, + "loss": 2.0586, + "step": 22470 + }, + { + "epoch": 0.95, + "learning_rate": 1.065436455631617e-05, + "loss": 2.232, + "step": 22480 + }, + { + "epoch": 0.95, + "learning_rate": 1.0650112674858626e-05, + "loss": 2.09, + "step": 22490 + }, + { + "epoch": 0.95, + "learning_rate": 1.0645860793401082e-05, + "loss": 2.412, + "step": 22500 + }, + { + "epoch": 0.95, + "eval_loss": 1.8309388160705566, + "eval_runtime": 175.8105, + "eval_samples_per_second": 14.237, + "eval_steps_per_second": 7.121, + "step": 22500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0641608911943535e-05, + "loss": 2.3572, + "step": 22510 + }, + { + "epoch": 0.95, + "learning_rate": 1.0637357030485991e-05, + "loss": 2.1156, + "step": 22520 + }, + { + "epoch": 0.95, + "learning_rate": 1.0633105149028446e-05, + "loss": 2.0792, + "step": 22530 + }, + { + "epoch": 0.95, + "learning_rate": 1.06288532675709e-05, + "loss": 2.026, + "step": 22540 + }, + { + "epoch": 0.95, + "learning_rate": 1.0624601386113356e-05, + "loss": 2.1075, + "step": 22550 + }, + { + "epoch": 0.95, + "learning_rate": 1.062034950465581e-05, + "loss": 2.1914, + "step": 22560 + }, + { + "epoch": 0.95, + "learning_rate": 1.0616097623198267e-05, + "loss": 2.3427, + "step": 22570 + }, + { + "epoch": 0.95, + "learning_rate": 1.0611845741740721e-05, + "loss": 2.2625, + "step": 22580 + }, + { + "epoch": 0.95, + "learning_rate": 1.0607593860283176e-05, + "loss": 2.2144, + "step": 22590 + }, + { + "epoch": 0.95, + "learning_rate": 1.0603341978825632e-05, + "loss": 2.6625, + "step": 22600 + }, + { + "epoch": 0.95, + "learning_rate": 1.0599090097368085e-05, + "loss": 2.0088, + "step": 22610 + }, + { + "epoch": 0.95, + "learning_rate": 1.0594838215910541e-05, + "loss": 2.1113, + "step": 22620 + }, + { + "epoch": 0.95, + "learning_rate": 1.0590586334452997e-05, + "loss": 2.2633, + "step": 22630 + }, + { + "epoch": 0.95, + "learning_rate": 1.058633445299545e-05, + "loss": 2.4962, + "step": 22640 + }, + { + "epoch": 0.95, + "learning_rate": 1.0582082571537906e-05, + "loss": 2.1041, + "step": 22650 + }, + { + "epoch": 0.95, + "learning_rate": 1.0577830690080362e-05, + "loss": 2.0901, + "step": 22660 + }, + { + "epoch": 0.95, + "learning_rate": 1.0573578808622818e-05, + "loss": 1.8119, + "step": 22670 + }, + { + "epoch": 0.95, + "learning_rate": 1.0569326927165271e-05, + "loss": 2.5824, + "step": 22680 + }, + { + "epoch": 0.95, + "learning_rate": 1.0565075045707727e-05, + "loss": 2.2369, + "step": 22690 + }, + { + "epoch": 0.96, + "learning_rate": 1.0560823164250182e-05, + "loss": 1.9169, + "step": 22700 + }, + { + "epoch": 0.96, + "learning_rate": 1.0556571282792636e-05, + "loss": 2.4513, + "step": 22710 + }, + { + "epoch": 0.96, + "learning_rate": 1.0552319401335092e-05, + "loss": 2.5086, + "step": 22720 + }, + { + "epoch": 0.96, + "learning_rate": 1.0548067519877547e-05, + "loss": 2.3318, + "step": 22730 + }, + { + "epoch": 0.96, + "learning_rate": 1.0543815638420001e-05, + "loss": 2.0951, + "step": 22740 + }, + { + "epoch": 0.96, + "learning_rate": 1.0539563756962456e-05, + "loss": 2.194, + "step": 22750 + }, + { + "epoch": 0.96, + "learning_rate": 1.0535311875504912e-05, + "loss": 2.0352, + "step": 22760 + }, + { + "epoch": 0.96, + "learning_rate": 1.0531059994047368e-05, + "loss": 2.3134, + "step": 22770 + }, + { + "epoch": 0.96, + "learning_rate": 1.052680811258982e-05, + "loss": 2.3509, + "step": 22780 + }, + { + "epoch": 0.96, + "learning_rate": 1.0522556231132277e-05, + "loss": 2.2912, + "step": 22790 + }, + { + "epoch": 0.96, + "learning_rate": 1.0518304349674733e-05, + "loss": 1.8441, + "step": 22800 + }, + { + "epoch": 0.96, + "learning_rate": 1.0514052468217186e-05, + "loss": 2.019, + "step": 22810 + }, + { + "epoch": 0.96, + "learning_rate": 1.0509800586759642e-05, + "loss": 1.9165, + "step": 22820 + }, + { + "epoch": 0.96, + "learning_rate": 1.0505548705302098e-05, + "loss": 1.957, + "step": 22830 + }, + { + "epoch": 0.96, + "learning_rate": 1.0501296823844551e-05, + "loss": 2.5048, + "step": 22840 + }, + { + "epoch": 0.96, + "learning_rate": 1.0497044942387007e-05, + "loss": 2.2552, + "step": 22850 + }, + { + "epoch": 0.96, + "learning_rate": 1.0492793060929463e-05, + "loss": 1.9776, + "step": 22860 + }, + { + "epoch": 0.96, + "learning_rate": 1.0488541179471916e-05, + "loss": 2.4988, + "step": 22870 + }, + { + "epoch": 0.96, + "learning_rate": 1.0484289298014372e-05, + "loss": 2.5497, + "step": 22880 + }, + { + "epoch": 0.96, + "learning_rate": 1.0480037416556828e-05, + "loss": 2.0912, + "step": 22890 + }, + { + "epoch": 0.96, + "learning_rate": 1.0475785535099283e-05, + "loss": 2.238, + "step": 22900 + }, + { + "epoch": 0.96, + "learning_rate": 1.0471533653641737e-05, + "loss": 2.6684, + "step": 22910 + }, + { + "epoch": 0.96, + "learning_rate": 1.0467281772184192e-05, + "loss": 1.9461, + "step": 22920 + }, + { + "epoch": 0.96, + "learning_rate": 1.0463029890726648e-05, + "loss": 1.8546, + "step": 22930 + }, + { + "epoch": 0.97, + "learning_rate": 1.0458778009269102e-05, + "loss": 2.3796, + "step": 22940 + }, + { + "epoch": 0.97, + "learning_rate": 1.0454526127811557e-05, + "loss": 2.4635, + "step": 22950 + }, + { + "epoch": 0.97, + "learning_rate": 1.0450274246354013e-05, + "loss": 2.6029, + "step": 22960 + }, + { + "epoch": 0.97, + "learning_rate": 1.0446022364896467e-05, + "loss": 2.3095, + "step": 22970 + }, + { + "epoch": 0.97, + "learning_rate": 1.0441770483438922e-05, + "loss": 2.0818, + "step": 22980 + }, + { + "epoch": 0.97, + "learning_rate": 1.0437518601981378e-05, + "loss": 2.0784, + "step": 22990 + }, + { + "epoch": 0.97, + "learning_rate": 1.0433266720523834e-05, + "loss": 2.4203, + "step": 23000 + }, + { + "epoch": 0.97, + "eval_loss": 1.7879308462142944, + "eval_runtime": 175.9943, + "eval_samples_per_second": 14.222, + "eval_steps_per_second": 7.114, + "step": 23000 + }, + { + "epoch": 0.97, + "learning_rate": 1.0429014839066287e-05, + "loss": 2.2036, + "step": 23010 + }, + { + "epoch": 0.97, + "learning_rate": 1.0424762957608743e-05, + "loss": 2.3537, + "step": 23020 + }, + { + "epoch": 0.97, + "learning_rate": 1.0420511076151199e-05, + "loss": 2.2634, + "step": 23030 + }, + { + "epoch": 0.97, + "learning_rate": 1.0416259194693652e-05, + "loss": 2.4369, + "step": 23040 + }, + { + "epoch": 0.97, + "learning_rate": 1.0412007313236108e-05, + "loss": 2.0881, + "step": 23050 + }, + { + "epoch": 0.97, + "learning_rate": 1.0407755431778564e-05, + "loss": 2.0322, + "step": 23060 + }, + { + "epoch": 0.97, + "learning_rate": 1.0403503550321017e-05, + "loss": 2.1656, + "step": 23070 + }, + { + "epoch": 0.97, + "learning_rate": 1.0399251668863473e-05, + "loss": 2.8787, + "step": 23080 + }, + { + "epoch": 0.97, + "learning_rate": 1.0394999787405928e-05, + "loss": 2.1997, + "step": 23090 + }, + { + "epoch": 0.97, + "learning_rate": 1.0390747905948384e-05, + "loss": 2.1858, + "step": 23100 + }, + { + "epoch": 0.97, + "learning_rate": 1.0386496024490838e-05, + "loss": 2.5543, + "step": 23110 + }, + { + "epoch": 0.97, + "learning_rate": 1.0382244143033293e-05, + "loss": 2.6911, + "step": 23120 + }, + { + "epoch": 0.97, + "learning_rate": 1.0377992261575749e-05, + "loss": 1.7698, + "step": 23130 + }, + { + "epoch": 0.97, + "learning_rate": 1.0373740380118202e-05, + "loss": 2.2497, + "step": 23140 + }, + { + "epoch": 0.97, + "learning_rate": 1.0369488498660658e-05, + "loss": 2.3248, + "step": 23150 + }, + { + "epoch": 0.97, + "learning_rate": 1.0365236617203114e-05, + "loss": 2.0568, + "step": 23160 + }, + { + "epoch": 0.97, + "learning_rate": 1.0360984735745567e-05, + "loss": 2.2153, + "step": 23170 + }, + { + "epoch": 0.98, + "learning_rate": 1.0356732854288023e-05, + "loss": 2.6964, + "step": 23180 + }, + { + "epoch": 0.98, + "learning_rate": 1.0352480972830479e-05, + "loss": 2.2131, + "step": 23190 + }, + { + "epoch": 0.98, + "learning_rate": 1.0348229091372935e-05, + "loss": 2.6161, + "step": 23200 + }, + { + "epoch": 0.98, + "learning_rate": 1.0343977209915388e-05, + "loss": 2.4675, + "step": 23210 + }, + { + "epoch": 0.98, + "learning_rate": 1.0339725328457844e-05, + "loss": 2.3536, + "step": 23220 + }, + { + "epoch": 0.98, + "learning_rate": 1.0335473447000298e-05, + "loss": 2.7163, + "step": 23230 + }, + { + "epoch": 0.98, + "learning_rate": 1.0331221565542753e-05, + "loss": 2.2453, + "step": 23240 + }, + { + "epoch": 0.98, + "learning_rate": 1.0326969684085209e-05, + "loss": 2.085, + "step": 23250 + }, + { + "epoch": 0.98, + "learning_rate": 1.0322717802627664e-05, + "loss": 2.059, + "step": 23260 + }, + { + "epoch": 0.98, + "learning_rate": 1.0318465921170118e-05, + "loss": 2.5816, + "step": 23270 + }, + { + "epoch": 0.98, + "learning_rate": 1.0314214039712574e-05, + "loss": 2.396, + "step": 23280 + }, + { + "epoch": 0.98, + "learning_rate": 1.0309962158255029e-05, + "loss": 1.5413, + "step": 23290 + }, + { + "epoch": 0.98, + "learning_rate": 1.0305710276797485e-05, + "loss": 2.3084, + "step": 23300 + }, + { + "epoch": 0.98, + "learning_rate": 1.0301458395339938e-05, + "loss": 2.0634, + "step": 23310 + }, + { + "epoch": 0.98, + "learning_rate": 1.0297206513882394e-05, + "loss": 2.2418, + "step": 23320 + }, + { + "epoch": 0.98, + "learning_rate": 1.029295463242485e-05, + "loss": 2.1639, + "step": 23330 + }, + { + "epoch": 0.98, + "learning_rate": 1.0288702750967303e-05, + "loss": 2.8353, + "step": 23340 + }, + { + "epoch": 0.98, + "learning_rate": 1.0284450869509759e-05, + "loss": 2.2332, + "step": 23350 + }, + { + "epoch": 0.98, + "learning_rate": 1.0280198988052215e-05, + "loss": 2.1627, + "step": 23360 + }, + { + "epoch": 0.98, + "learning_rate": 1.0275947106594668e-05, + "loss": 2.2034, + "step": 23370 + }, + { + "epoch": 0.98, + "learning_rate": 1.0271695225137124e-05, + "loss": 2.5933, + "step": 23380 + }, + { + "epoch": 0.98, + "learning_rate": 1.026744334367958e-05, + "loss": 2.4328, + "step": 23390 + }, + { + "epoch": 0.98, + "learning_rate": 1.0263191462222033e-05, + "loss": 2.6858, + "step": 23400 + }, + { + "epoch": 0.98, + "learning_rate": 1.0258939580764489e-05, + "loss": 2.2375, + "step": 23410 + }, + { + "epoch": 0.99, + "learning_rate": 1.0254687699306945e-05, + "loss": 1.9781, + "step": 23420 + }, + { + "epoch": 0.99, + "learning_rate": 1.02504358178494e-05, + "loss": 2.4411, + "step": 23430 + }, + { + "epoch": 0.99, + "learning_rate": 1.0246183936391854e-05, + "loss": 2.1322, + "step": 23440 + }, + { + "epoch": 0.99, + "learning_rate": 1.024193205493431e-05, + "loss": 2.2164, + "step": 23450 + }, + { + "epoch": 0.99, + "learning_rate": 1.0237680173476765e-05, + "loss": 2.2055, + "step": 23460 + }, + { + "epoch": 0.99, + "learning_rate": 1.0233428292019219e-05, + "loss": 2.1434, + "step": 23470 + }, + { + "epoch": 0.99, + "learning_rate": 1.0229176410561673e-05, + "loss": 1.8297, + "step": 23480 + }, + { + "epoch": 0.99, + "learning_rate": 1.022492452910413e-05, + "loss": 2.2009, + "step": 23490 + }, + { + "epoch": 0.99, + "learning_rate": 1.0220672647646584e-05, + "loss": 1.8702, + "step": 23500 + }, + { + "epoch": 0.99, + "eval_loss": 1.7928341627120972, + "eval_runtime": 175.9978, + "eval_samples_per_second": 14.222, + "eval_steps_per_second": 7.114, + "step": 23500 + }, + { + "epoch": 0.99, + "learning_rate": 1.0216420766189039e-05, + "loss": 1.8704, + "step": 23510 + }, + { + "epoch": 0.99, + "learning_rate": 1.0212168884731495e-05, + "loss": 2.3736, + "step": 23520 + }, + { + "epoch": 0.99, + "learning_rate": 1.0207917003273951e-05, + "loss": 2.6456, + "step": 23530 + }, + { + "epoch": 0.99, + "learning_rate": 1.0203665121816404e-05, + "loss": 2.1232, + "step": 23540 + }, + { + "epoch": 0.99, + "learning_rate": 1.019941324035886e-05, + "loss": 2.342, + "step": 23550 + }, + { + "epoch": 0.99, + "learning_rate": 1.0195161358901316e-05, + "loss": 2.4895, + "step": 23560 + }, + { + "epoch": 0.99, + "learning_rate": 1.0190909477443769e-05, + "loss": 2.0022, + "step": 23570 + }, + { + "epoch": 0.99, + "learning_rate": 1.0186657595986225e-05, + "loss": 1.9872, + "step": 23580 + }, + { + "epoch": 0.99, + "learning_rate": 1.0182405714528681e-05, + "loss": 2.2341, + "step": 23590 + }, + { + "epoch": 0.99, + "learning_rate": 1.0178153833071134e-05, + "loss": 2.2823, + "step": 23600 + }, + { + "epoch": 0.99, + "learning_rate": 1.017390195161359e-05, + "loss": 1.7177, + "step": 23610 + }, + { + "epoch": 0.99, + "learning_rate": 1.0169650070156044e-05, + "loss": 2.5706, + "step": 23620 + }, + { + "epoch": 0.99, + "learning_rate": 1.01653981886985e-05, + "loss": 2.6527, + "step": 23630 + }, + { + "epoch": 0.99, + "learning_rate": 1.0161146307240955e-05, + "loss": 2.2877, + "step": 23640 + }, + { + "epoch": 0.99, + "learning_rate": 1.015689442578341e-05, + "loss": 2.2885, + "step": 23650 + }, + { + "epoch": 1.0, + "learning_rate": 1.0152642544325866e-05, + "loss": 1.9024, + "step": 23660 + }, + { + "epoch": 1.0, + "learning_rate": 1.014839066286832e-05, + "loss": 2.1591, + "step": 23670 + }, + { + "epoch": 1.0, + "learning_rate": 1.0144138781410775e-05, + "loss": 2.5355, + "step": 23680 + }, + { + "epoch": 1.0, + "learning_rate": 1.013988689995323e-05, + "loss": 2.4954, + "step": 23690 + }, + { + "epoch": 1.0, + "learning_rate": 1.0135635018495683e-05, + "loss": 2.1401, + "step": 23700 + }, + { + "epoch": 1.0, + "learning_rate": 1.013138313703814e-05, + "loss": 2.05, + "step": 23710 + }, + { + "epoch": 1.0, + "learning_rate": 1.0127131255580596e-05, + "loss": 2.4109, + "step": 23720 + }, + { + "epoch": 1.0, + "learning_rate": 1.0122879374123052e-05, + "loss": 2.0181, + "step": 23730 + }, + { + "epoch": 1.0, + "learning_rate": 1.0118627492665505e-05, + "loss": 2.3981, + "step": 23740 + }, + { + "epoch": 1.0, + "learning_rate": 1.011437561120796e-05, + "loss": 2.0846, + "step": 23750 + }, + { + "epoch": 1.0, + "learning_rate": 1.0110123729750417e-05, + "loss": 2.0017, + "step": 23760 + }, + { + "epoch": 1.0, + "learning_rate": 1.010587184829287e-05, + "loss": 2.1176, + "step": 23770 + }, + { + "epoch": 1.0, + "learning_rate": 1.0101619966835326e-05, + "loss": 2.0457, + "step": 23780 + }, + { + "epoch": 1.0, + "learning_rate": 1.009736808537778e-05, + "loss": 2.139, + "step": 23790 + }, + { + "epoch": 1.0, + "learning_rate": 1.0093116203920235e-05, + "loss": 1.7997, + "step": 23800 + }, + { + "epoch": 1.0, + "learning_rate": 1.0088864322462691e-05, + "loss": 2.1645, + "step": 23810 + }, + { + "epoch": 1.0, + "learning_rate": 1.0084612441005145e-05, + "loss": 1.9419, + "step": 23820 + }, + { + "epoch": 1.0, + "learning_rate": 1.0080360559547602e-05, + "loss": 2.497, + "step": 23830 + }, + { + "epoch": 1.0, + "learning_rate": 1.0076108678090056e-05, + "loss": 2.4095, + "step": 23840 + }, + { + "epoch": 1.0, + "learning_rate": 1.007185679663251e-05, + "loss": 2.2074, + "step": 23850 + }, + { + "epoch": 1.0, + "learning_rate": 1.0067604915174967e-05, + "loss": 2.2012, + "step": 23860 + }, + { + "epoch": 1.0, + "learning_rate": 1.006335303371742e-05, + "loss": 2.4029, + "step": 23870 + }, + { + "epoch": 1.0, + "learning_rate": 1.0059101152259876e-05, + "loss": 2.0716, + "step": 23880 + }, + { + "epoch": 1.01, + "learning_rate": 1.0054849270802332e-05, + "loss": 2.1061, + "step": 23890 + }, + { + "epoch": 1.01, + "learning_rate": 1.0050597389344784e-05, + "loss": 2.175, + "step": 23900 + }, + { + "epoch": 1.01, + "learning_rate": 1.004634550788724e-05, + "loss": 2.2063, + "step": 23910 + }, + { + "epoch": 1.01, + "learning_rate": 1.0042093626429697e-05, + "loss": 2.1733, + "step": 23920 + }, + { + "epoch": 1.01, + "learning_rate": 1.003784174497215e-05, + "loss": 2.1249, + "step": 23930 + }, + { + "epoch": 1.01, + "learning_rate": 1.0033589863514606e-05, + "loss": 2.0847, + "step": 23940 + }, + { + "epoch": 1.01, + "learning_rate": 1.0029337982057062e-05, + "loss": 1.8854, + "step": 23950 + }, + { + "epoch": 1.01, + "learning_rate": 1.0025086100599516e-05, + "loss": 2.2463, + "step": 23960 + }, + { + "epoch": 1.01, + "learning_rate": 1.002083421914197e-05, + "loss": 1.8417, + "step": 23970 + }, + { + "epoch": 1.01, + "learning_rate": 1.0016582337684427e-05, + "loss": 1.9418, + "step": 23980 + }, + { + "epoch": 1.01, + "learning_rate": 1.0012330456226881e-05, + "loss": 2.2803, + "step": 23990 + }, + { + "epoch": 1.01, + "learning_rate": 1.0008078574769336e-05, + "loss": 2.0295, + "step": 24000 + }, + { + "epoch": 1.01, + "eval_loss": 1.7961026430130005, + "eval_runtime": 176.058, + "eval_samples_per_second": 14.217, + "eval_steps_per_second": 7.111, + "step": 24000 + }, + { + "epoch": 1.01, + "learning_rate": 1.000382669331179e-05, + "loss": 2.1783, + "step": 24010 + }, + { + "epoch": 1.01, + "learning_rate": 9.999574811854246e-06, + "loss": 1.8572, + "step": 24020 + }, + { + "epoch": 1.01, + "learning_rate": 9.995322930396701e-06, + "loss": 2.477, + "step": 24030 + }, + { + "epoch": 1.01, + "learning_rate": 9.991071048939155e-06, + "loss": 2.0883, + "step": 24040 + }, + { + "epoch": 1.01, + "learning_rate": 9.986819167481612e-06, + "loss": 1.9533, + "step": 24050 + }, + { + "epoch": 1.01, + "learning_rate": 9.982567286024066e-06, + "loss": 1.791, + "step": 24060 + }, + { + "epoch": 1.01, + "learning_rate": 9.978315404566522e-06, + "loss": 2.0543, + "step": 24070 + }, + { + "epoch": 1.01, + "learning_rate": 9.974063523108977e-06, + "loss": 2.035, + "step": 24080 + }, + { + "epoch": 1.01, + "learning_rate": 9.969811641651431e-06, + "loss": 1.9221, + "step": 24090 + }, + { + "epoch": 1.01, + "learning_rate": 9.965559760193887e-06, + "loss": 2.1826, + "step": 24100 + }, + { + "epoch": 1.01, + "learning_rate": 9.961307878736342e-06, + "loss": 2.0506, + "step": 24110 + }, + { + "epoch": 1.01, + "learning_rate": 9.957055997278798e-06, + "loss": 1.9634, + "step": 24120 + }, + { + "epoch": 1.02, + "learning_rate": 9.952804115821252e-06, + "loss": 1.8171, + "step": 24130 + }, + { + "epoch": 1.02, + "learning_rate": 9.948552234363707e-06, + "loss": 2.3548, + "step": 24140 + }, + { + "epoch": 1.02, + "learning_rate": 9.944300352906163e-06, + "loss": 2.1878, + "step": 24150 + }, + { + "epoch": 1.02, + "learning_rate": 9.940048471448617e-06, + "loss": 1.7794, + "step": 24160 + }, + { + "epoch": 1.02, + "learning_rate": 9.935796589991072e-06, + "loss": 2.1026, + "step": 24170 + }, + { + "epoch": 1.02, + "learning_rate": 9.931544708533526e-06, + "loss": 2.7091, + "step": 24180 + }, + { + "epoch": 1.02, + "learning_rate": 9.927292827075982e-06, + "loss": 2.2819, + "step": 24190 + }, + { + "epoch": 1.02, + "learning_rate": 9.923040945618437e-06, + "loss": 1.9126, + "step": 24200 + }, + { + "epoch": 1.02, + "learning_rate": 9.918789064160891e-06, + "loss": 2.0997, + "step": 24210 + }, + { + "epoch": 1.02, + "learning_rate": 9.914537182703346e-06, + "loss": 1.7042, + "step": 24220 + }, + { + "epoch": 1.02, + "learning_rate": 9.910285301245802e-06, + "loss": 1.9185, + "step": 24230 + }, + { + "epoch": 1.02, + "learning_rate": 9.906033419788256e-06, + "loss": 2.2732, + "step": 24240 + }, + { + "epoch": 1.02, + "learning_rate": 9.901781538330713e-06, + "loss": 2.1674, + "step": 24250 + }, + { + "epoch": 1.02, + "learning_rate": 9.897529656873167e-06, + "loss": 1.549, + "step": 24260 + }, + { + "epoch": 1.02, + "learning_rate": 9.893277775415621e-06, + "loss": 2.4823, + "step": 24270 + }, + { + "epoch": 1.02, + "learning_rate": 9.889025893958078e-06, + "loss": 2.7062, + "step": 24280 + }, + { + "epoch": 1.02, + "learning_rate": 9.884774012500532e-06, + "loss": 2.7915, + "step": 24290 + }, + { + "epoch": 1.02, + "learning_rate": 9.880522131042988e-06, + "loss": 1.9146, + "step": 24300 + }, + { + "epoch": 1.02, + "learning_rate": 9.876270249585443e-06, + "loss": 1.7695, + "step": 24310 + }, + { + "epoch": 1.02, + "learning_rate": 9.872018368127897e-06, + "loss": 1.8908, + "step": 24320 + }, + { + "epoch": 1.02, + "learning_rate": 9.867766486670353e-06, + "loss": 2.155, + "step": 24330 + }, + { + "epoch": 1.02, + "learning_rate": 9.863514605212808e-06, + "loss": 2.1778, + "step": 24340 + }, + { + "epoch": 1.02, + "learning_rate": 9.859262723755262e-06, + "loss": 2.388, + "step": 24350 + }, + { + "epoch": 1.02, + "learning_rate": 9.855010842297718e-06, + "loss": 2.188, + "step": 24360 + }, + { + "epoch": 1.03, + "learning_rate": 9.850758960840173e-06, + "loss": 2.4158, + "step": 24370 + }, + { + "epoch": 1.03, + "learning_rate": 9.846507079382627e-06, + "loss": 2.0438, + "step": 24380 + }, + { + "epoch": 1.03, + "learning_rate": 9.842255197925082e-06, + "loss": 2.3838, + "step": 24390 + }, + { + "epoch": 1.03, + "learning_rate": 9.838003316467538e-06, + "loss": 1.9735, + "step": 24400 + }, + { + "epoch": 1.03, + "learning_rate": 9.833751435009992e-06, + "loss": 1.5923, + "step": 24410 + }, + { + "epoch": 1.03, + "learning_rate": 9.829499553552447e-06, + "loss": 2.4302, + "step": 24420 + }, + { + "epoch": 1.03, + "learning_rate": 9.825247672094903e-06, + "loss": 2.009, + "step": 24430 + }, + { + "epoch": 1.03, + "learning_rate": 9.820995790637357e-06, + "loss": 2.1147, + "step": 24440 + }, + { + "epoch": 1.03, + "learning_rate": 9.816743909179814e-06, + "loss": 2.5158, + "step": 24450 + }, + { + "epoch": 1.03, + "learning_rate": 9.812492027722268e-06, + "loss": 1.8816, + "step": 24460 + }, + { + "epoch": 1.03, + "learning_rate": 9.808240146264722e-06, + "loss": 1.9242, + "step": 24470 + }, + { + "epoch": 1.03, + "learning_rate": 9.803988264807179e-06, + "loss": 2.4337, + "step": 24480 + }, + { + "epoch": 1.03, + "learning_rate": 9.799736383349633e-06, + "loss": 2.4206, + "step": 24490 + }, + { + "epoch": 1.03, + "learning_rate": 9.79548450189209e-06, + "loss": 2.4817, + "step": 24500 + }, + { + "epoch": 1.03, + "eval_loss": 1.7808321714401245, + "eval_runtime": 175.8437, + "eval_samples_per_second": 14.234, + "eval_steps_per_second": 7.12, + "step": 24500 + }, + { + "epoch": 1.03, + "learning_rate": 9.791232620434544e-06, + "loss": 2.0811, + "step": 24510 + }, + { + "epoch": 1.03, + "learning_rate": 9.786980738976998e-06, + "loss": 2.0437, + "step": 24520 + }, + { + "epoch": 1.03, + "learning_rate": 9.782728857519453e-06, + "loss": 2.5538, + "step": 24530 + }, + { + "epoch": 1.03, + "learning_rate": 9.778476976061909e-06, + "loss": 2.0296, + "step": 24540 + }, + { + "epoch": 1.03, + "learning_rate": 9.774225094604363e-06, + "loss": 2.3742, + "step": 24550 + }, + { + "epoch": 1.03, + "learning_rate": 9.769973213146818e-06, + "loss": 1.8981, + "step": 24560 + }, + { + "epoch": 1.03, + "learning_rate": 9.765721331689272e-06, + "loss": 1.7186, + "step": 24570 + }, + { + "epoch": 1.03, + "learning_rate": 9.761469450231728e-06, + "loss": 2.207, + "step": 24580 + }, + { + "epoch": 1.03, + "learning_rate": 9.757217568774183e-06, + "loss": 2.148, + "step": 24590 + }, + { + "epoch": 1.03, + "learning_rate": 9.752965687316639e-06, + "loss": 2.3233, + "step": 24600 + }, + { + "epoch": 1.04, + "learning_rate": 9.748713805859093e-06, + "loss": 2.3885, + "step": 24610 + }, + { + "epoch": 1.04, + "learning_rate": 9.744461924401548e-06, + "loss": 2.1731, + "step": 24620 + }, + { + "epoch": 1.04, + "learning_rate": 9.740210042944004e-06, + "loss": 2.1489, + "step": 24630 + }, + { + "epoch": 1.04, + "learning_rate": 9.735958161486458e-06, + "loss": 2.2787, + "step": 24640 + }, + { + "epoch": 1.04, + "learning_rate": 9.731706280028913e-06, + "loss": 2.5155, + "step": 24650 + }, + { + "epoch": 1.04, + "learning_rate": 9.727454398571369e-06, + "loss": 2.5187, + "step": 24660 + }, + { + "epoch": 1.04, + "learning_rate": 9.723202517113823e-06, + "loss": 1.9613, + "step": 24670 + }, + { + "epoch": 1.04, + "learning_rate": 9.71895063565628e-06, + "loss": 2.1069, + "step": 24680 + }, + { + "epoch": 1.04, + "learning_rate": 9.714698754198734e-06, + "loss": 1.8688, + "step": 24690 + }, + { + "epoch": 1.04, + "learning_rate": 9.710446872741189e-06, + "loss": 2.0668, + "step": 24700 + }, + { + "epoch": 1.04, + "learning_rate": 9.706194991283645e-06, + "loss": 3.0117, + "step": 24710 + }, + { + "epoch": 1.04, + "learning_rate": 9.7019431098261e-06, + "loss": 1.9196, + "step": 24720 + }, + { + "epoch": 1.04, + "learning_rate": 9.697691228368554e-06, + "loss": 2.1805, + "step": 24730 + }, + { + "epoch": 1.04, + "learning_rate": 9.693439346911008e-06, + "loss": 2.3032, + "step": 24740 + }, + { + "epoch": 1.04, + "learning_rate": 9.689187465453464e-06, + "loss": 2.1187, + "step": 24750 + }, + { + "epoch": 1.04, + "learning_rate": 9.684935583995919e-06, + "loss": 2.5357, + "step": 24760 + }, + { + "epoch": 1.04, + "learning_rate": 9.680683702538373e-06, + "loss": 1.7142, + "step": 24770 + }, + { + "epoch": 1.04, + "learning_rate": 9.67643182108083e-06, + "loss": 2.1176, + "step": 24780 + }, + { + "epoch": 1.04, + "learning_rate": 9.672179939623284e-06, + "loss": 2.0409, + "step": 24790 + }, + { + "epoch": 1.04, + "learning_rate": 9.667928058165738e-06, + "loss": 2.0488, + "step": 24800 + }, + { + "epoch": 1.04, + "learning_rate": 9.663676176708194e-06, + "loss": 2.1542, + "step": 24810 + }, + { + "epoch": 1.04, + "learning_rate": 9.659424295250649e-06, + "loss": 2.1647, + "step": 24820 + }, + { + "epoch": 1.04, + "learning_rate": 9.655172413793105e-06, + "loss": 2.4398, + "step": 24830 + }, + { + "epoch": 1.05, + "learning_rate": 9.65092053233556e-06, + "loss": 1.9817, + "step": 24840 + }, + { + "epoch": 1.05, + "learning_rate": 9.646668650878014e-06, + "loss": 2.2528, + "step": 24850 + }, + { + "epoch": 1.05, + "learning_rate": 9.64241676942047e-06, + "loss": 2.4099, + "step": 24860 + }, + { + "epoch": 1.05, + "learning_rate": 9.638164887962925e-06, + "loss": 2.1171, + "step": 24870 + }, + { + "epoch": 1.05, + "learning_rate": 9.63391300650538e-06, + "loss": 2.5138, + "step": 24880 + }, + { + "epoch": 1.05, + "learning_rate": 9.629661125047835e-06, + "loss": 2.2478, + "step": 24890 + }, + { + "epoch": 1.05, + "learning_rate": 9.62540924359029e-06, + "loss": 2.1804, + "step": 24900 + }, + { + "epoch": 1.05, + "learning_rate": 9.621157362132744e-06, + "loss": 2.1283, + "step": 24910 + }, + { + "epoch": 1.05, + "learning_rate": 9.616905480675199e-06, + "loss": 2.4359, + "step": 24920 + }, + { + "epoch": 1.05, + "learning_rate": 9.612653599217655e-06, + "loss": 2.3146, + "step": 24930 + }, + { + "epoch": 1.05, + "learning_rate": 9.608401717760109e-06, + "loss": 2.1418, + "step": 24940 + }, + { + "epoch": 1.05, + "learning_rate": 9.604149836302564e-06, + "loss": 2.1131, + "step": 24950 + }, + { + "epoch": 1.05, + "learning_rate": 9.59989795484502e-06, + "loss": 1.7208, + "step": 24960 + }, + { + "epoch": 1.05, + "learning_rate": 9.595646073387474e-06, + "loss": 1.9904, + "step": 24970 + }, + { + "epoch": 1.05, + "learning_rate": 9.59139419192993e-06, + "loss": 1.8274, + "step": 24980 + }, + { + "epoch": 1.05, + "learning_rate": 9.587142310472385e-06, + "loss": 2.1403, + "step": 24990 + }, + { + "epoch": 1.05, + "learning_rate": 9.58289042901484e-06, + "loss": 2.1498, + "step": 25000 + }, + { + "epoch": 1.05, + "eval_loss": 1.8270052671432495, + "eval_runtime": 175.899, + "eval_samples_per_second": 14.23, + "eval_steps_per_second": 7.118, + "step": 25000 + }, + { + "epoch": 1.05, + "learning_rate": 9.578638547557295e-06, + "loss": 2.5489, + "step": 25010 + }, + { + "epoch": 1.05, + "learning_rate": 9.57438666609975e-06, + "loss": 2.049, + "step": 25020 + }, + { + "epoch": 1.05, + "learning_rate": 9.570134784642206e-06, + "loss": 2.0372, + "step": 25030 + }, + { + "epoch": 1.05, + "learning_rate": 9.56588290318466e-06, + "loss": 1.9352, + "step": 25040 + }, + { + "epoch": 1.05, + "learning_rate": 9.561631021727115e-06, + "loss": 2.8194, + "step": 25050 + }, + { + "epoch": 1.05, + "learning_rate": 9.557379140269571e-06, + "loss": 2.0632, + "step": 25060 + }, + { + "epoch": 1.05, + "learning_rate": 9.553127258812026e-06, + "loss": 2.1373, + "step": 25070 + }, + { + "epoch": 1.06, + "learning_rate": 9.54887537735448e-06, + "loss": 1.5917, + "step": 25080 + }, + { + "epoch": 1.06, + "learning_rate": 9.544623495896934e-06, + "loss": 1.9999, + "step": 25090 + }, + { + "epoch": 1.06, + "learning_rate": 9.54037161443939e-06, + "loss": 2.4615, + "step": 25100 + }, + { + "epoch": 1.06, + "learning_rate": 9.536119732981845e-06, + "loss": 1.9341, + "step": 25110 + }, + { + "epoch": 1.06, + "learning_rate": 9.5318678515243e-06, + "loss": 2.0574, + "step": 25120 + }, + { + "epoch": 1.06, + "learning_rate": 9.527615970066756e-06, + "loss": 2.2517, + "step": 25130 + }, + { + "epoch": 1.06, + "learning_rate": 9.52336408860921e-06, + "loss": 2.8717, + "step": 25140 + }, + { + "epoch": 1.06, + "learning_rate": 9.519112207151665e-06, + "loss": 1.9216, + "step": 25150 + }, + { + "epoch": 1.06, + "learning_rate": 9.51486032569412e-06, + "loss": 1.7792, + "step": 25160 + }, + { + "epoch": 1.06, + "learning_rate": 9.510608444236575e-06, + "loss": 2.2475, + "step": 25170 + }, + { + "epoch": 1.06, + "learning_rate": 9.50635656277903e-06, + "loss": 1.8762, + "step": 25180 + }, + { + "epoch": 1.06, + "learning_rate": 9.502104681321486e-06, + "loss": 2.123, + "step": 25190 + }, + { + "epoch": 1.06, + "learning_rate": 9.49785279986394e-06, + "loss": 2.3417, + "step": 25200 + }, + { + "epoch": 1.06, + "learning_rate": 9.493600918406396e-06, + "loss": 1.6224, + "step": 25210 + }, + { + "epoch": 1.06, + "learning_rate": 9.489349036948851e-06, + "loss": 2.2181, + "step": 25220 + }, + { + "epoch": 1.06, + "learning_rate": 9.485097155491305e-06, + "loss": 2.2656, + "step": 25230 + }, + { + "epoch": 1.06, + "learning_rate": 9.480845274033762e-06, + "loss": 2.4773, + "step": 25240 + }, + { + "epoch": 1.06, + "learning_rate": 9.476593392576216e-06, + "loss": 5.3294, + "step": 25250 + }, + { + "epoch": 1.06, + "learning_rate": 9.47234151111867e-06, + "loss": 2.719, + "step": 25260 + }, + { + "epoch": 1.06, + "learning_rate": 9.468089629661127e-06, + "loss": 2.0256, + "step": 25270 + }, + { + "epoch": 1.06, + "learning_rate": 9.463837748203581e-06, + "loss": 2.7081, + "step": 25280 + }, + { + "epoch": 1.06, + "learning_rate": 9.459585866746035e-06, + "loss": 2.7167, + "step": 25290 + }, + { + "epoch": 1.06, + "learning_rate": 9.45533398528849e-06, + "loss": 2.3192, + "step": 25300 + }, + { + "epoch": 1.06, + "learning_rate": 9.451082103830946e-06, + "loss": 2.0319, + "step": 25310 + }, + { + "epoch": 1.07, + "learning_rate": 9.4468302223734e-06, + "loss": 2.1389, + "step": 25320 + }, + { + "epoch": 1.07, + "learning_rate": 9.442578340915855e-06, + "loss": 2.1409, + "step": 25330 + }, + { + "epoch": 1.07, + "learning_rate": 9.438326459458311e-06, + "loss": 2.0399, + "step": 25340 + }, + { + "epoch": 1.07, + "learning_rate": 9.434074578000766e-06, + "loss": 1.9943, + "step": 25350 + }, + { + "epoch": 1.07, + "learning_rate": 9.429822696543222e-06, + "loss": 1.9661, + "step": 25360 + }, + { + "epoch": 1.07, + "learning_rate": 9.425570815085676e-06, + "loss": 2.0869, + "step": 25370 + }, + { + "epoch": 1.07, + "learning_rate": 9.42131893362813e-06, + "loss": 2.0276, + "step": 25380 + }, + { + "epoch": 1.07, + "learning_rate": 9.417067052170587e-06, + "loss": 2.4021, + "step": 25390 + }, + { + "epoch": 1.07, + "learning_rate": 9.412815170713041e-06, + "loss": 2.0104, + "step": 25400 + }, + { + "epoch": 1.07, + "learning_rate": 9.408563289255497e-06, + "loss": 2.2962, + "step": 25410 + }, + { + "epoch": 1.07, + "learning_rate": 9.404311407797952e-06, + "loss": 2.1656, + "step": 25420 + }, + { + "epoch": 1.07, + "learning_rate": 9.400059526340406e-06, + "loss": 2.1771, + "step": 25430 + }, + { + "epoch": 1.07, + "learning_rate": 9.39580764488286e-06, + "loss": 2.49, + "step": 25440 + }, + { + "epoch": 1.07, + "learning_rate": 9.391555763425317e-06, + "loss": 2.2185, + "step": 25450 + }, + { + "epoch": 1.07, + "learning_rate": 9.387303881967771e-06, + "loss": 2.1741, + "step": 25460 + }, + { + "epoch": 1.07, + "learning_rate": 9.383052000510226e-06, + "loss": 1.9142, + "step": 25470 + }, + { + "epoch": 1.07, + "learning_rate": 9.37880011905268e-06, + "loss": 2.1593, + "step": 25480 + }, + { + "epoch": 1.07, + "learning_rate": 9.374548237595137e-06, + "loss": 2.6488, + "step": 25490 + }, + { + "epoch": 1.07, + "learning_rate": 9.370296356137591e-06, + "loss": 1.5643, + "step": 25500 + }, + { + "epoch": 1.07, + "eval_loss": 1.8130146265029907, + "eval_runtime": 174.1071, + "eval_samples_per_second": 14.376, + "eval_steps_per_second": 7.191, + "step": 25500 + }, + { + "epoch": 1.07, + "learning_rate": 9.366044474680047e-06, + "loss": 2.4988, + "step": 25510 + }, + { + "epoch": 1.07, + "learning_rate": 9.361792593222502e-06, + "loss": 1.8701, + "step": 25520 + }, + { + "epoch": 1.07, + "learning_rate": 9.357540711764956e-06, + "loss": 1.9622, + "step": 25530 + }, + { + "epoch": 1.07, + "learning_rate": 9.353288830307412e-06, + "loss": 2.2419, + "step": 25540 + }, + { + "epoch": 1.07, + "learning_rate": 9.349036948849867e-06, + "loss": 2.479, + "step": 25550 + }, + { + "epoch": 1.08, + "learning_rate": 9.344785067392323e-06, + "loss": 2.3276, + "step": 25560 + }, + { + "epoch": 1.08, + "learning_rate": 9.340533185934777e-06, + "loss": 1.6181, + "step": 25570 + }, + { + "epoch": 1.08, + "learning_rate": 9.336281304477232e-06, + "loss": 2.2207, + "step": 25580 + }, + { + "epoch": 1.08, + "learning_rate": 9.332029423019688e-06, + "loss": 2.4773, + "step": 25590 + }, + { + "epoch": 1.08, + "learning_rate": 9.327777541562142e-06, + "loss": 2.1505, + "step": 25600 + }, + { + "epoch": 1.08, + "learning_rate": 9.323525660104597e-06, + "loss": 1.9943, + "step": 25610 + }, + { + "epoch": 1.08, + "learning_rate": 9.319273778647053e-06, + "loss": 2.2931, + "step": 25620 + }, + { + "epoch": 1.08, + "learning_rate": 9.315021897189507e-06, + "loss": 1.637, + "step": 25630 + }, + { + "epoch": 1.08, + "learning_rate": 9.310770015731962e-06, + "loss": 1.9188, + "step": 25640 + }, + { + "epoch": 1.08, + "learning_rate": 9.306518134274416e-06, + "loss": 1.9153, + "step": 25650 + }, + { + "epoch": 1.08, + "learning_rate": 9.302266252816872e-06, + "loss": 2.1277, + "step": 25660 + }, + { + "epoch": 1.08, + "learning_rate": 9.298014371359327e-06, + "loss": 2.6407, + "step": 25670 + }, + { + "epoch": 1.08, + "learning_rate": 9.293762489901781e-06, + "loss": 2.0261, + "step": 25680 + }, + { + "epoch": 1.08, + "learning_rate": 9.289510608444238e-06, + "loss": 1.9497, + "step": 25690 + }, + { + "epoch": 1.08, + "learning_rate": 9.285258726986692e-06, + "loss": 2.6189, + "step": 25700 + }, + { + "epoch": 1.08, + "learning_rate": 9.281006845529146e-06, + "loss": 1.9009, + "step": 25710 + }, + { + "epoch": 1.08, + "learning_rate": 9.276754964071603e-06, + "loss": 2.254, + "step": 25720 + }, + { + "epoch": 1.08, + "learning_rate": 9.272503082614057e-06, + "loss": 2.2595, + "step": 25730 + }, + { + "epoch": 1.08, + "learning_rate": 9.268251201156513e-06, + "loss": 2.3933, + "step": 25740 + }, + { + "epoch": 1.08, + "learning_rate": 9.263999319698968e-06, + "loss": 2.318, + "step": 25750 + }, + { + "epoch": 1.08, + "learning_rate": 9.259747438241422e-06, + "loss": 2.0637, + "step": 25760 + }, + { + "epoch": 1.08, + "learning_rate": 9.255495556783878e-06, + "loss": 2.4044, + "step": 25770 + }, + { + "epoch": 1.08, + "learning_rate": 9.251243675326333e-06, + "loss": 2.54, + "step": 25780 + }, + { + "epoch": 1.09, + "learning_rate": 9.246991793868789e-06, + "loss": 2.075, + "step": 25790 + }, + { + "epoch": 1.09, + "learning_rate": 9.242739912411243e-06, + "loss": 2.491, + "step": 25800 + }, + { + "epoch": 1.09, + "learning_rate": 9.238488030953698e-06, + "loss": 1.9432, + "step": 25810 + }, + { + "epoch": 1.09, + "learning_rate": 9.234236149496152e-06, + "loss": 2.0367, + "step": 25820 + }, + { + "epoch": 1.09, + "learning_rate": 9.229984268038607e-06, + "loss": 2.1673, + "step": 25830 + }, + { + "epoch": 1.09, + "learning_rate": 9.225732386581063e-06, + "loss": 2.1345, + "step": 25840 + }, + { + "epoch": 1.09, + "learning_rate": 9.221480505123517e-06, + "loss": 2.0569, + "step": 25850 + }, + { + "epoch": 1.09, + "learning_rate": 9.217228623665972e-06, + "loss": 1.986, + "step": 25860 + }, + { + "epoch": 1.09, + "learning_rate": 9.212976742208428e-06, + "loss": 2.3173, + "step": 25870 + }, + { + "epoch": 1.09, + "learning_rate": 9.208724860750882e-06, + "loss": 2.5024, + "step": 25880 + }, + { + "epoch": 1.09, + "learning_rate": 9.204472979293339e-06, + "loss": 2.3327, + "step": 25890 + }, + { + "epoch": 1.09, + "learning_rate": 9.200221097835793e-06, + "loss": 1.8746, + "step": 25900 + }, + { + "epoch": 1.09, + "learning_rate": 9.195969216378247e-06, + "loss": 2.4627, + "step": 25910 + }, + { + "epoch": 1.09, + "learning_rate": 9.191717334920704e-06, + "loss": 1.8956, + "step": 25920 + }, + { + "epoch": 1.09, + "learning_rate": 9.187465453463158e-06, + "loss": 1.9613, + "step": 25930 + }, + { + "epoch": 1.09, + "learning_rate": 9.183213572005614e-06, + "loss": 1.7535, + "step": 25940 + }, + { + "epoch": 1.09, + "learning_rate": 9.178961690548069e-06, + "loss": 2.539, + "step": 25950 + }, + { + "epoch": 1.09, + "learning_rate": 9.174709809090523e-06, + "loss": 1.6916, + "step": 25960 + }, + { + "epoch": 1.09, + "learning_rate": 9.17045792763298e-06, + "loss": 2.0812, + "step": 25970 + }, + { + "epoch": 1.09, + "learning_rate": 9.166206046175434e-06, + "loss": 2.3501, + "step": 25980 + }, + { + "epoch": 1.09, + "learning_rate": 9.161954164717888e-06, + "loss": 2.1016, + "step": 25990 + }, + { + "epoch": 1.09, + "learning_rate": 9.157702283260343e-06, + "loss": 2.1681, + "step": 26000 + }, + { + "epoch": 1.09, + "eval_loss": 1.8029661178588867, + "eval_runtime": 175.9282, + "eval_samples_per_second": 14.227, + "eval_steps_per_second": 7.117, + "step": 26000 + }, + { + "epoch": 1.09, + "learning_rate": 9.153450401802799e-06, + "loss": 2.029, + "step": 26010 + }, + { + "epoch": 1.09, + "learning_rate": 9.149198520345253e-06, + "loss": 2.1774, + "step": 26020 + }, + { + "epoch": 1.1, + "learning_rate": 9.144946638887708e-06, + "loss": 2.2816, + "step": 26030 + }, + { + "epoch": 1.1, + "learning_rate": 9.140694757430164e-06, + "loss": 2.2293, + "step": 26040 + }, + { + "epoch": 1.1, + "learning_rate": 9.136442875972618e-06, + "loss": 2.6347, + "step": 26050 + }, + { + "epoch": 1.1, + "learning_rate": 9.132190994515073e-06, + "loss": 2.0672, + "step": 26060 + }, + { + "epoch": 1.1, + "learning_rate": 9.127939113057529e-06, + "loss": 2.284, + "step": 26070 + }, + { + "epoch": 1.1, + "learning_rate": 9.123687231599983e-06, + "loss": 2.3167, + "step": 26080 + }, + { + "epoch": 1.1, + "learning_rate": 9.11943535014244e-06, + "loss": 1.5823, + "step": 26090 + }, + { + "epoch": 1.1, + "learning_rate": 9.115183468684894e-06, + "loss": 2.4238, + "step": 26100 + }, + { + "epoch": 1.1, + "learning_rate": 9.110931587227349e-06, + "loss": 2.1433, + "step": 26110 + }, + { + "epoch": 1.1, + "learning_rate": 9.106679705769805e-06, + "loss": 1.8668, + "step": 26120 + }, + { + "epoch": 1.1, + "learning_rate": 9.102427824312259e-06, + "loss": 2.5338, + "step": 26130 + }, + { + "epoch": 1.1, + "learning_rate": 9.098175942854715e-06, + "loss": 1.9807, + "step": 26140 + }, + { + "epoch": 1.1, + "learning_rate": 9.09392406139717e-06, + "loss": 2.1115, + "step": 26150 + }, + { + "epoch": 1.1, + "learning_rate": 9.089672179939624e-06, + "loss": 1.9726, + "step": 26160 + }, + { + "epoch": 1.1, + "learning_rate": 9.085420298482079e-06, + "loss": 2.0894, + "step": 26170 + }, + { + "epoch": 1.1, + "learning_rate": 9.081168417024535e-06, + "loss": 2.2522, + "step": 26180 + }, + { + "epoch": 1.1, + "learning_rate": 9.07691653556699e-06, + "loss": 2.2052, + "step": 26190 + }, + { + "epoch": 1.1, + "learning_rate": 9.072664654109444e-06, + "loss": 2.1807, + "step": 26200 + }, + { + "epoch": 1.1, + "learning_rate": 9.068412772651898e-06, + "loss": 1.9953, + "step": 26210 + }, + { + "epoch": 1.1, + "learning_rate": 9.064160891194354e-06, + "loss": 2.5388, + "step": 26220 + }, + { + "epoch": 1.1, + "learning_rate": 9.059909009736809e-06, + "loss": 2.431, + "step": 26230 + }, + { + "epoch": 1.1, + "learning_rate": 9.055657128279263e-06, + "loss": 2.4511, + "step": 26240 + }, + { + "epoch": 1.1, + "learning_rate": 9.05140524682172e-06, + "loss": 2.5282, + "step": 26250 + }, + { + "epoch": 1.1, + "learning_rate": 9.047153365364174e-06, + "loss": 2.3258, + "step": 26260 + }, + { + "epoch": 1.11, + "learning_rate": 9.04290148390663e-06, + "loss": 2.2077, + "step": 26270 + }, + { + "epoch": 1.11, + "learning_rate": 9.038649602449084e-06, + "loss": 2.1033, + "step": 26280 + }, + { + "epoch": 1.11, + "learning_rate": 9.034397720991539e-06, + "loss": 2.064, + "step": 26290 + }, + { + "epoch": 1.11, + "learning_rate": 9.030145839533995e-06, + "loss": 2.1307, + "step": 26300 + }, + { + "epoch": 1.11, + "learning_rate": 9.02589395807645e-06, + "loss": 2.1043, + "step": 26310 + }, + { + "epoch": 1.11, + "learning_rate": 9.021642076618906e-06, + "loss": 1.842, + "step": 26320 + }, + { + "epoch": 1.11, + "learning_rate": 9.01739019516136e-06, + "loss": 1.8477, + "step": 26330 + }, + { + "epoch": 1.11, + "learning_rate": 9.013138313703815e-06, + "loss": 1.968, + "step": 26340 + }, + { + "epoch": 1.11, + "learning_rate": 9.008886432246269e-06, + "loss": 2.2406, + "step": 26350 + }, + { + "epoch": 1.11, + "learning_rate": 9.004634550788725e-06, + "loss": 2.3495, + "step": 26360 + }, + { + "epoch": 1.11, + "learning_rate": 9.00038266933118e-06, + "loss": 2.318, + "step": 26370 + }, + { + "epoch": 1.11, + "learning_rate": 8.996130787873634e-06, + "loss": 1.9728, + "step": 26380 + }, + { + "epoch": 1.11, + "learning_rate": 8.991878906416089e-06, + "loss": 2.7301, + "step": 26390 + }, + { + "epoch": 1.11, + "learning_rate": 8.987627024958545e-06, + "loss": 2.3417, + "step": 26400 + }, + { + "epoch": 1.11, + "learning_rate": 8.983375143501e-06, + "loss": 2.547, + "step": 26410 + }, + { + "epoch": 1.11, + "learning_rate": 8.979123262043455e-06, + "loss": 2.2439, + "step": 26420 + }, + { + "epoch": 1.11, + "learning_rate": 8.97487138058591e-06, + "loss": 2.1761, + "step": 26430 + }, + { + "epoch": 1.11, + "learning_rate": 8.970619499128364e-06, + "loss": 1.8553, + "step": 26440 + }, + { + "epoch": 1.11, + "learning_rate": 8.96636761767082e-06, + "loss": 2.4548, + "step": 26450 + }, + { + "epoch": 1.11, + "learning_rate": 8.962115736213275e-06, + "loss": 1.7375, + "step": 26460 + }, + { + "epoch": 1.11, + "learning_rate": 8.957863854755731e-06, + "loss": 2.3438, + "step": 26470 + }, + { + "epoch": 1.11, + "learning_rate": 8.953611973298186e-06, + "loss": 2.4082, + "step": 26480 + }, + { + "epoch": 1.11, + "learning_rate": 8.94936009184064e-06, + "loss": 2.2813, + "step": 26490 + }, + { + "epoch": 1.11, + "learning_rate": 8.945108210383096e-06, + "loss": 2.1074, + "step": 26500 + }, + { + "epoch": 1.11, + "eval_loss": 1.7794862985610962, + "eval_runtime": 175.953, + "eval_samples_per_second": 14.225, + "eval_steps_per_second": 7.116, + "step": 26500 + }, + { + "epoch": 1.12, + "learning_rate": 8.94085632892555e-06, + "loss": 2.3367, + "step": 26510 + }, + { + "epoch": 1.12, + "learning_rate": 8.936604447468005e-06, + "loss": 2.0808, + "step": 26520 + }, + { + "epoch": 1.12, + "learning_rate": 8.932352566010461e-06, + "loss": 2.0238, + "step": 26530 + }, + { + "epoch": 1.12, + "learning_rate": 8.928100684552916e-06, + "loss": 1.9315, + "step": 26540 + }, + { + "epoch": 1.12, + "learning_rate": 8.92384880309537e-06, + "loss": 2.1414, + "step": 26550 + }, + { + "epoch": 1.12, + "learning_rate": 8.919596921637825e-06, + "loss": 2.2046, + "step": 26560 + }, + { + "epoch": 1.12, + "learning_rate": 8.91534504018028e-06, + "loss": 1.559, + "step": 26570 + }, + { + "epoch": 1.12, + "learning_rate": 8.911093158722735e-06, + "loss": 2.3783, + "step": 26580 + }, + { + "epoch": 1.12, + "learning_rate": 8.90684127726519e-06, + "loss": 2.259, + "step": 26590 + }, + { + "epoch": 1.12, + "learning_rate": 8.902589395807646e-06, + "loss": 2.2287, + "step": 26600 + }, + { + "epoch": 1.12, + "learning_rate": 8.8983375143501e-06, + "loss": 2.2234, + "step": 26610 + }, + { + "epoch": 1.12, + "learning_rate": 8.894085632892556e-06, + "loss": 2.3311, + "step": 26620 + }, + { + "epoch": 1.12, + "learning_rate": 8.889833751435011e-06, + "loss": 2.4397, + "step": 26630 + }, + { + "epoch": 1.12, + "learning_rate": 8.885581869977465e-06, + "loss": 2.0832, + "step": 26640 + }, + { + "epoch": 1.12, + "learning_rate": 8.881329988519921e-06, + "loss": 1.9653, + "step": 26650 + }, + { + "epoch": 1.12, + "learning_rate": 8.877078107062376e-06, + "loss": 1.7654, + "step": 26660 + }, + { + "epoch": 1.12, + "learning_rate": 8.87282622560483e-06, + "loss": 2.2557, + "step": 26670 + }, + { + "epoch": 1.12, + "learning_rate": 8.868574344147287e-06, + "loss": 2.0998, + "step": 26680 + }, + { + "epoch": 1.12, + "learning_rate": 8.864322462689741e-06, + "loss": 1.5496, + "step": 26690 + }, + { + "epoch": 1.12, + "learning_rate": 8.860070581232197e-06, + "loss": 2.0313, + "step": 26700 + }, + { + "epoch": 1.12, + "learning_rate": 8.855818699774652e-06, + "loss": 2.3958, + "step": 26710 + }, + { + "epoch": 1.12, + "learning_rate": 8.851566818317106e-06, + "loss": 2.2315, + "step": 26720 + }, + { + "epoch": 1.12, + "learning_rate": 8.84731493685956e-06, + "loss": 2.4052, + "step": 26730 + }, + { + "epoch": 1.12, + "learning_rate": 8.843063055402015e-06, + "loss": 2.1748, + "step": 26740 + }, + { + "epoch": 1.13, + "learning_rate": 8.838811173944471e-06, + "loss": 2.2979, + "step": 26750 + }, + { + "epoch": 1.13, + "learning_rate": 8.834559292486926e-06, + "loss": 2.0609, + "step": 26760 + }, + { + "epoch": 1.13, + "learning_rate": 8.83030741102938e-06, + "loss": 2.4294, + "step": 26770 + }, + { + "epoch": 1.13, + "learning_rate": 8.826055529571836e-06, + "loss": 2.4692, + "step": 26780 + }, + { + "epoch": 1.13, + "learning_rate": 8.82180364811429e-06, + "loss": 2.3065, + "step": 26790 + }, + { + "epoch": 1.13, + "learning_rate": 8.817551766656747e-06, + "loss": 1.8414, + "step": 26800 + }, + { + "epoch": 1.13, + "learning_rate": 8.813299885199201e-06, + "loss": 1.9798, + "step": 26810 + }, + { + "epoch": 1.13, + "learning_rate": 8.809048003741656e-06, + "loss": 2.6128, + "step": 26820 + }, + { + "epoch": 1.13, + "learning_rate": 8.804796122284112e-06, + "loss": 2.4113, + "step": 26830 + }, + { + "epoch": 1.13, + "learning_rate": 8.800544240826566e-06, + "loss": 2.3281, + "step": 26840 + }, + { + "epoch": 1.13, + "learning_rate": 8.796292359369022e-06, + "loss": 2.0856, + "step": 26850 + }, + { + "epoch": 1.13, + "learning_rate": 8.792040477911477e-06, + "loss": 1.9102, + "step": 26860 + }, + { + "epoch": 1.13, + "learning_rate": 8.787788596453931e-06, + "loss": 1.8436, + "step": 26870 + }, + { + "epoch": 1.13, + "learning_rate": 8.783536714996388e-06, + "loss": 2.187, + "step": 26880 + }, + { + "epoch": 1.13, + "learning_rate": 8.779284833538842e-06, + "loss": 2.4518, + "step": 26890 + }, + { + "epoch": 1.13, + "learning_rate": 8.775032952081296e-06, + "loss": 2.7198, + "step": 26900 + }, + { + "epoch": 1.13, + "learning_rate": 8.770781070623751e-06, + "loss": 2.3112, + "step": 26910 + }, + { + "epoch": 1.13, + "learning_rate": 8.766529189166207e-06, + "loss": 2.0266, + "step": 26920 + }, + { + "epoch": 1.13, + "learning_rate": 8.762277307708662e-06, + "loss": 2.4087, + "step": 26930 + }, + { + "epoch": 1.13, + "learning_rate": 8.758025426251116e-06, + "loss": 1.793, + "step": 26940 + }, + { + "epoch": 1.13, + "learning_rate": 8.753773544793572e-06, + "loss": 1.9159, + "step": 26950 + }, + { + "epoch": 1.13, + "learning_rate": 8.749521663336027e-06, + "loss": 2.0469, + "step": 26960 + }, + { + "epoch": 1.13, + "learning_rate": 8.745269781878481e-06, + "loss": 2.1949, + "step": 26970 + }, + { + "epoch": 1.14, + "learning_rate": 8.741017900420937e-06, + "loss": 2.1446, + "step": 26980 + }, + { + "epoch": 1.14, + "learning_rate": 8.736766018963392e-06, + "loss": 2.1481, + "step": 26990 + }, + { + "epoch": 1.14, + "learning_rate": 8.732514137505848e-06, + "loss": 2.3402, + "step": 27000 + }, + { + "epoch": 1.14, + "eval_loss": 1.7784305810928345, + "eval_runtime": 175.9346, + "eval_samples_per_second": 14.227, + "eval_steps_per_second": 7.116, + "step": 27000 + }, + { + "epoch": 1.14, + "learning_rate": 8.728262256048302e-06, + "loss": 1.9918, + "step": 27010 + }, + { + "epoch": 1.14, + "learning_rate": 8.724010374590757e-06, + "loss": 1.9544, + "step": 27020 + }, + { + "epoch": 1.14, + "learning_rate": 8.719758493133213e-06, + "loss": 2.1023, + "step": 27030 + }, + { + "epoch": 1.14, + "learning_rate": 8.715506611675667e-06, + "loss": 2.3112, + "step": 27040 + }, + { + "epoch": 1.14, + "learning_rate": 8.711254730218124e-06, + "loss": 2.2315, + "step": 27050 + }, + { + "epoch": 1.14, + "learning_rate": 8.707002848760578e-06, + "loss": 2.2551, + "step": 27060 + }, + { + "epoch": 1.14, + "learning_rate": 8.702750967303032e-06, + "loss": 1.783, + "step": 27070 + }, + { + "epoch": 1.14, + "learning_rate": 8.698499085845487e-06, + "loss": 2.2734, + "step": 27080 + }, + { + "epoch": 1.14, + "learning_rate": 8.694247204387943e-06, + "loss": 2.235, + "step": 27090 + }, + { + "epoch": 1.14, + "learning_rate": 8.689995322930397e-06, + "loss": 2.0812, + "step": 27100 + }, + { + "epoch": 1.14, + "learning_rate": 8.685743441472852e-06, + "loss": 2.3111, + "step": 27110 + }, + { + "epoch": 1.14, + "learning_rate": 8.681491560015306e-06, + "loss": 1.9091, + "step": 27120 + }, + { + "epoch": 1.14, + "learning_rate": 8.677239678557763e-06, + "loss": 2.1717, + "step": 27130 + }, + { + "epoch": 1.14, + "learning_rate": 8.672987797100217e-06, + "loss": 1.6394, + "step": 27140 + }, + { + "epoch": 1.14, + "learning_rate": 8.668735915642673e-06, + "loss": 2.1665, + "step": 27150 + }, + { + "epoch": 1.14, + "learning_rate": 8.664484034185128e-06, + "loss": 2.6547, + "step": 27160 + }, + { + "epoch": 1.14, + "learning_rate": 8.660232152727582e-06, + "loss": 2.5331, + "step": 27170 + }, + { + "epoch": 1.14, + "learning_rate": 8.655980271270038e-06, + "loss": 2.1644, + "step": 27180 + }, + { + "epoch": 1.14, + "learning_rate": 8.651728389812493e-06, + "loss": 2.5835, + "step": 27190 + }, + { + "epoch": 1.14, + "learning_rate": 8.647476508354947e-06, + "loss": 1.8968, + "step": 27200 + }, + { + "epoch": 1.14, + "learning_rate": 8.643224626897403e-06, + "loss": 2.1733, + "step": 27210 + }, + { + "epoch": 1.15, + "learning_rate": 8.638972745439858e-06, + "loss": 2.2962, + "step": 27220 + }, + { + "epoch": 1.15, + "learning_rate": 8.634720863982314e-06, + "loss": 1.7098, + "step": 27230 + }, + { + "epoch": 1.15, + "learning_rate": 8.630468982524768e-06, + "loss": 2.4761, + "step": 27240 + }, + { + "epoch": 1.15, + "learning_rate": 8.626217101067223e-06, + "loss": 1.9359, + "step": 27250 + }, + { + "epoch": 1.15, + "learning_rate": 8.621965219609677e-06, + "loss": 1.9284, + "step": 27260 + }, + { + "epoch": 1.15, + "learning_rate": 8.617713338152133e-06, + "loss": 1.934, + "step": 27270 + }, + { + "epoch": 1.15, + "learning_rate": 8.613461456694588e-06, + "loss": 2.2519, + "step": 27280 + }, + { + "epoch": 1.15, + "learning_rate": 8.609209575237042e-06, + "loss": 2.2796, + "step": 27290 + }, + { + "epoch": 1.15, + "learning_rate": 8.604957693779497e-06, + "loss": 2.0495, + "step": 27300 + }, + { + "epoch": 1.15, + "learning_rate": 8.600705812321953e-06, + "loss": 1.8942, + "step": 27310 + }, + { + "epoch": 1.15, + "learning_rate": 8.596453930864407e-06, + "loss": 1.9881, + "step": 27320 + }, + { + "epoch": 1.15, + "learning_rate": 8.592202049406864e-06, + "loss": 2.444, + "step": 27330 + }, + { + "epoch": 1.15, + "learning_rate": 8.587950167949318e-06, + "loss": 2.2539, + "step": 27340 + }, + { + "epoch": 1.15, + "learning_rate": 8.583698286491773e-06, + "loss": 1.6349, + "step": 27350 + }, + { + "epoch": 1.15, + "learning_rate": 8.579446405034229e-06, + "loss": 1.8034, + "step": 27360 + }, + { + "epoch": 1.15, + "learning_rate": 8.575194523576683e-06, + "loss": 1.714, + "step": 27370 + }, + { + "epoch": 1.15, + "learning_rate": 8.57094264211914e-06, + "loss": 1.8281, + "step": 27380 + }, + { + "epoch": 1.15, + "learning_rate": 8.566690760661594e-06, + "loss": 1.9654, + "step": 27390 + }, + { + "epoch": 1.15, + "learning_rate": 8.562438879204048e-06, + "loss": 1.8929, + "step": 27400 + }, + { + "epoch": 1.15, + "learning_rate": 8.558186997746504e-06, + "loss": 1.9911, + "step": 27410 + }, + { + "epoch": 1.15, + "learning_rate": 8.553935116288959e-06, + "loss": 2.025, + "step": 27420 + }, + { + "epoch": 1.15, + "learning_rate": 8.549683234831413e-06, + "loss": 2.3029, + "step": 27430 + }, + { + "epoch": 1.15, + "learning_rate": 8.54543135337387e-06, + "loss": 2.0765, + "step": 27440 + }, + { + "epoch": 1.15, + "learning_rate": 8.541179471916324e-06, + "loss": 2.17, + "step": 27450 + }, + { + "epoch": 1.16, + "learning_rate": 8.536927590458778e-06, + "loss": 2.0451, + "step": 27460 + }, + { + "epoch": 1.16, + "learning_rate": 8.532675709001233e-06, + "loss": 2.2552, + "step": 27470 + }, + { + "epoch": 1.16, + "learning_rate": 8.528423827543689e-06, + "loss": 2.0923, + "step": 27480 + }, + { + "epoch": 1.16, + "learning_rate": 8.524171946086143e-06, + "loss": 2.2716, + "step": 27490 + }, + { + "epoch": 1.16, + "learning_rate": 8.519920064628598e-06, + "loss": 1.8374, + "step": 27500 + }, + { + "epoch": 1.16, + "eval_loss": 1.7960634231567383, + "eval_runtime": 175.5325, + "eval_samples_per_second": 14.259, + "eval_steps_per_second": 7.133, + "step": 27500 + }, + { + "epoch": 1.16, + "learning_rate": 8.515668183171054e-06, + "loss": 1.9933, + "step": 27510 + }, + { + "epoch": 1.16, + "learning_rate": 8.511416301713508e-06, + "loss": 2.3631, + "step": 27520 + }, + { + "epoch": 1.16, + "learning_rate": 8.507164420255965e-06, + "loss": 2.209, + "step": 27530 + }, + { + "epoch": 1.16, + "learning_rate": 8.502912538798419e-06, + "loss": 2.4285, + "step": 27540 + }, + { + "epoch": 1.16, + "learning_rate": 8.498660657340874e-06, + "loss": 2.2684, + "step": 27550 + }, + { + "epoch": 1.16, + "learning_rate": 8.49440877588333e-06, + "loss": 2.0219, + "step": 27560 + }, + { + "epoch": 1.16, + "learning_rate": 8.490156894425784e-06, + "loss": 1.8807, + "step": 27570 + }, + { + "epoch": 1.16, + "learning_rate": 8.48590501296824e-06, + "loss": 2.6944, + "step": 27580 + }, + { + "epoch": 1.16, + "learning_rate": 8.481653131510695e-06, + "loss": 2.3637, + "step": 27590 + }, + { + "epoch": 1.16, + "learning_rate": 8.47740125005315e-06, + "loss": 2.2071, + "step": 27600 + }, + { + "epoch": 1.16, + "learning_rate": 8.473149368595605e-06, + "loss": 2.0359, + "step": 27610 + }, + { + "epoch": 1.16, + "learning_rate": 8.46889748713806e-06, + "loss": 2.1155, + "step": 27620 + }, + { + "epoch": 1.16, + "learning_rate": 8.464645605680514e-06, + "loss": 2.3826, + "step": 27630 + }, + { + "epoch": 1.16, + "learning_rate": 8.460393724222969e-06, + "loss": 1.8784, + "step": 27640 + }, + { + "epoch": 1.16, + "learning_rate": 8.456141842765423e-06, + "loss": 2.3723, + "step": 27650 + }, + { + "epoch": 1.16, + "learning_rate": 8.45188996130788e-06, + "loss": 1.7547, + "step": 27660 + }, + { + "epoch": 1.16, + "learning_rate": 8.447638079850334e-06, + "loss": 1.9922, + "step": 27670 + }, + { + "epoch": 1.16, + "learning_rate": 8.443386198392788e-06, + "loss": 2.2056, + "step": 27680 + }, + { + "epoch": 1.16, + "learning_rate": 8.439134316935244e-06, + "loss": 2.4756, + "step": 27690 + }, + { + "epoch": 1.17, + "learning_rate": 8.434882435477699e-06, + "loss": 1.7505, + "step": 27700 + }, + { + "epoch": 1.17, + "learning_rate": 8.430630554020155e-06, + "loss": 2.1735, + "step": 27710 + }, + { + "epoch": 1.17, + "learning_rate": 8.42637867256261e-06, + "loss": 2.3311, + "step": 27720 + }, + { + "epoch": 1.17, + "learning_rate": 8.422126791105064e-06, + "loss": 2.1869, + "step": 27730 + }, + { + "epoch": 1.17, + "learning_rate": 8.41787490964752e-06, + "loss": 1.7889, + "step": 27740 + }, + { + "epoch": 1.17, + "learning_rate": 8.413623028189975e-06, + "loss": 2.1902, + "step": 27750 + }, + { + "epoch": 1.17, + "learning_rate": 8.40937114673243e-06, + "loss": 2.3086, + "step": 27760 + }, + { + "epoch": 1.17, + "learning_rate": 8.405119265274885e-06, + "loss": 2.6292, + "step": 27770 + }, + { + "epoch": 1.17, + "learning_rate": 8.40086738381734e-06, + "loss": 2.3843, + "step": 27780 + }, + { + "epoch": 1.17, + "learning_rate": 8.396615502359796e-06, + "loss": 1.9254, + "step": 27790 + }, + { + "epoch": 1.17, + "learning_rate": 8.39236362090225e-06, + "loss": 1.703, + "step": 27800 + }, + { + "epoch": 1.17, + "learning_rate": 8.388111739444705e-06, + "loss": 1.6527, + "step": 27810 + }, + { + "epoch": 1.17, + "learning_rate": 8.38385985798716e-06, + "loss": 2.2103, + "step": 27820 + }, + { + "epoch": 1.17, + "learning_rate": 8.379607976529615e-06, + "loss": 2.1751, + "step": 27830 + }, + { + "epoch": 1.17, + "learning_rate": 8.37535609507207e-06, + "loss": 2.2004, + "step": 27840 + }, + { + "epoch": 1.17, + "learning_rate": 8.371104213614524e-06, + "loss": 1.9038, + "step": 27850 + }, + { + "epoch": 1.17, + "learning_rate": 8.36685233215698e-06, + "loss": 2.2364, + "step": 27860 + }, + { + "epoch": 1.17, + "learning_rate": 8.362600450699435e-06, + "loss": 1.9736, + "step": 27870 + }, + { + "epoch": 1.17, + "learning_rate": 8.35834856924189e-06, + "loss": 1.9249, + "step": 27880 + }, + { + "epoch": 1.17, + "learning_rate": 8.354096687784345e-06, + "loss": 1.7786, + "step": 27890 + }, + { + "epoch": 1.17, + "learning_rate": 8.3498448063268e-06, + "loss": 2.1232, + "step": 27900 + }, + { + "epoch": 1.17, + "learning_rate": 8.345592924869256e-06, + "loss": 2.3536, + "step": 27910 + }, + { + "epoch": 1.17, + "learning_rate": 8.34134104341171e-06, + "loss": 2.2933, + "step": 27920 + }, + { + "epoch": 1.18, + "learning_rate": 8.337089161954165e-06, + "loss": 1.7072, + "step": 27930 + }, + { + "epoch": 1.18, + "learning_rate": 8.332837280496621e-06, + "loss": 2.2741, + "step": 27940 + }, + { + "epoch": 1.18, + "learning_rate": 8.328585399039076e-06, + "loss": 2.356, + "step": 27950 + }, + { + "epoch": 1.18, + "learning_rate": 8.324333517581532e-06, + "loss": 2.1711, + "step": 27960 + }, + { + "epoch": 1.18, + "learning_rate": 8.320081636123986e-06, + "loss": 1.9022, + "step": 27970 + }, + { + "epoch": 1.18, + "learning_rate": 8.31582975466644e-06, + "loss": 2.5043, + "step": 27980 + }, + { + "epoch": 1.18, + "learning_rate": 8.311577873208895e-06, + "loss": 2.411, + "step": 27990 + }, + { + "epoch": 1.18, + "learning_rate": 8.307325991751351e-06, + "loss": 1.9705, + "step": 28000 + }, + { + "epoch": 1.18, + "eval_loss": 1.7835320234298706, + "eval_runtime": 175.7479, + "eval_samples_per_second": 14.242, + "eval_steps_per_second": 7.124, + "step": 28000 + }, + { + "epoch": 1.18, + "learning_rate": 8.303074110293806e-06, + "loss": 2.3563, + "step": 28010 + }, + { + "epoch": 1.18, + "learning_rate": 8.29882222883626e-06, + "loss": 2.5579, + "step": 28020 + }, + { + "epoch": 1.18, + "learning_rate": 8.294570347378715e-06, + "loss": 2.2202, + "step": 28030 + }, + { + "epoch": 1.18, + "learning_rate": 8.29031846592117e-06, + "loss": 2.2095, + "step": 28040 + }, + { + "epoch": 1.18, + "learning_rate": 8.286066584463625e-06, + "loss": 2.0755, + "step": 28050 + }, + { + "epoch": 1.18, + "learning_rate": 8.281814703006081e-06, + "loss": 2.3718, + "step": 28060 + }, + { + "epoch": 1.18, + "learning_rate": 8.277562821548536e-06, + "loss": 2.4411, + "step": 28070 + }, + { + "epoch": 1.18, + "learning_rate": 8.27331094009099e-06, + "loss": 2.5163, + "step": 28080 + }, + { + "epoch": 1.18, + "learning_rate": 8.269059058633446e-06, + "loss": 2.1317, + "step": 28090 + }, + { + "epoch": 1.18, + "learning_rate": 8.264807177175901e-06, + "loss": 2.2966, + "step": 28100 + }, + { + "epoch": 1.18, + "learning_rate": 8.260555295718357e-06, + "loss": 2.1478, + "step": 28110 + }, + { + "epoch": 1.18, + "learning_rate": 8.256303414260812e-06, + "loss": 2.1012, + "step": 28120 + }, + { + "epoch": 1.18, + "learning_rate": 8.252051532803266e-06, + "loss": 1.8812, + "step": 28130 + }, + { + "epoch": 1.18, + "learning_rate": 8.247799651345722e-06, + "loss": 1.7168, + "step": 28140 + }, + { + "epoch": 1.18, + "learning_rate": 8.243547769888177e-06, + "loss": 2.199, + "step": 28150 + }, + { + "epoch": 1.18, + "learning_rate": 8.239295888430631e-06, + "loss": 2.252, + "step": 28160 + }, + { + "epoch": 1.19, + "learning_rate": 8.235044006973086e-06, + "loss": 1.89, + "step": 28170 + }, + { + "epoch": 1.19, + "learning_rate": 8.230792125515542e-06, + "loss": 2.1628, + "step": 28180 + }, + { + "epoch": 1.19, + "learning_rate": 8.226540244057996e-06, + "loss": 2.1145, + "step": 28190 + }, + { + "epoch": 1.19, + "learning_rate": 8.22228836260045e-06, + "loss": 2.167, + "step": 28200 + }, + { + "epoch": 1.19, + "learning_rate": 8.218036481142905e-06, + "loss": 2.5227, + "step": 28210 + }, + { + "epoch": 1.19, + "learning_rate": 8.213784599685361e-06, + "loss": 2.1437, + "step": 28220 + }, + { + "epoch": 1.19, + "learning_rate": 8.209532718227816e-06, + "loss": 2.0659, + "step": 28230 + }, + { + "epoch": 1.19, + "learning_rate": 8.205280836770272e-06, + "loss": 2.4093, + "step": 28240 + }, + { + "epoch": 1.19, + "learning_rate": 8.201028955312726e-06, + "loss": 2.0918, + "step": 28250 + }, + { + "epoch": 1.19, + "learning_rate": 8.19677707385518e-06, + "loss": 2.1433, + "step": 28260 + }, + { + "epoch": 1.19, + "learning_rate": 8.192525192397637e-06, + "loss": 2.0139, + "step": 28270 + }, + { + "epoch": 1.19, + "learning_rate": 8.188273310940091e-06, + "loss": 2.1022, + "step": 28280 + }, + { + "epoch": 1.19, + "learning_rate": 8.184021429482548e-06, + "loss": 2.0868, + "step": 28290 + }, + { + "epoch": 1.19, + "learning_rate": 8.179769548025002e-06, + "loss": 1.7078, + "step": 28300 + }, + { + "epoch": 1.19, + "learning_rate": 8.175517666567456e-06, + "loss": 1.9889, + "step": 28310 + }, + { + "epoch": 1.19, + "learning_rate": 8.171265785109913e-06, + "loss": 2.3366, + "step": 28320 + }, + { + "epoch": 1.19, + "learning_rate": 8.167013903652367e-06, + "loss": 2.2599, + "step": 28330 + }, + { + "epoch": 1.19, + "learning_rate": 8.162762022194821e-06, + "loss": 2.2115, + "step": 28340 + }, + { + "epoch": 1.19, + "learning_rate": 8.158510140737278e-06, + "loss": 1.867, + "step": 28350 + }, + { + "epoch": 1.19, + "learning_rate": 8.154258259279732e-06, + "loss": 1.7084, + "step": 28360 + }, + { + "epoch": 1.19, + "learning_rate": 8.150006377822187e-06, + "loss": 2.4791, + "step": 28370 + }, + { + "epoch": 1.19, + "learning_rate": 8.145754496364641e-06, + "loss": 1.8911, + "step": 28380 + }, + { + "epoch": 1.19, + "learning_rate": 8.141502614907097e-06, + "loss": 2.0083, + "step": 28390 + }, + { + "epoch": 1.19, + "learning_rate": 8.137250733449552e-06, + "loss": 2.436, + "step": 28400 + }, + { + "epoch": 1.2, + "learning_rate": 8.132998851992006e-06, + "loss": 2.5692, + "step": 28410 + }, + { + "epoch": 1.2, + "learning_rate": 8.128746970534462e-06, + "loss": 1.878, + "step": 28420 + }, + { + "epoch": 1.2, + "learning_rate": 8.124495089076917e-06, + "loss": 1.6927, + "step": 28430 + }, + { + "epoch": 1.2, + "learning_rate": 8.120243207619373e-06, + "loss": 2.2977, + "step": 28440 + }, + { + "epoch": 1.2, + "learning_rate": 8.115991326161827e-06, + "loss": 2.2699, + "step": 28450 + }, + { + "epoch": 1.2, + "learning_rate": 8.111739444704282e-06, + "loss": 1.844, + "step": 28460 + }, + { + "epoch": 1.2, + "learning_rate": 8.107487563246738e-06, + "loss": 2.162, + "step": 28470 + }, + { + "epoch": 1.2, + "learning_rate": 8.103235681789192e-06, + "loss": 2.3989, + "step": 28480 + }, + { + "epoch": 1.2, + "learning_rate": 8.098983800331649e-06, + "loss": 1.8529, + "step": 28490 + }, + { + "epoch": 1.2, + "learning_rate": 8.094731918874103e-06, + "loss": 2.0539, + "step": 28500 + }, + { + "epoch": 1.2, + "eval_loss": 1.7728729248046875, + "eval_runtime": 176.2414, + "eval_samples_per_second": 14.202, + "eval_steps_per_second": 7.104, + "step": 28500 + }, + { + "epoch": 1.2, + "learning_rate": 8.090480037416557e-06, + "loss": 1.6733, + "step": 28510 + }, + { + "epoch": 1.2, + "learning_rate": 8.086228155959014e-06, + "loss": 2.2849, + "step": 28520 + }, + { + "epoch": 1.2, + "learning_rate": 8.081976274501468e-06, + "loss": 2.5538, + "step": 28530 + }, + { + "epoch": 1.2, + "learning_rate": 8.077724393043923e-06, + "loss": 2.4862, + "step": 28540 + }, + { + "epoch": 1.2, + "learning_rate": 8.073472511586377e-06, + "loss": 2.3368, + "step": 28550 + }, + { + "epoch": 1.2, + "learning_rate": 8.069220630128831e-06, + "loss": 2.6631, + "step": 28560 + }, + { + "epoch": 1.2, + "learning_rate": 8.064968748671288e-06, + "loss": 2.1342, + "step": 28570 + }, + { + "epoch": 1.2, + "learning_rate": 8.060716867213742e-06, + "loss": 2.0555, + "step": 28580 + }, + { + "epoch": 1.2, + "learning_rate": 8.056464985756198e-06, + "loss": 2.3854, + "step": 28590 + }, + { + "epoch": 1.2, + "learning_rate": 8.052213104298653e-06, + "loss": 2.4276, + "step": 28600 + }, + { + "epoch": 1.2, + "learning_rate": 8.047961222841107e-06, + "loss": 2.2421, + "step": 28610 + }, + { + "epoch": 1.2, + "learning_rate": 8.043709341383563e-06, + "loss": 1.6778, + "step": 28620 + }, + { + "epoch": 1.2, + "learning_rate": 8.039457459926018e-06, + "loss": 2.1492, + "step": 28630 + }, + { + "epoch": 1.2, + "learning_rate": 8.035205578468474e-06, + "loss": 1.6066, + "step": 28640 + }, + { + "epoch": 1.21, + "learning_rate": 8.030953697010928e-06, + "loss": 2.134, + "step": 28650 + }, + { + "epoch": 1.21, + "learning_rate": 8.026701815553383e-06, + "loss": 1.6651, + "step": 28660 + }, + { + "epoch": 1.21, + "learning_rate": 8.022449934095839e-06, + "loss": 2.3821, + "step": 28670 + }, + { + "epoch": 1.21, + "learning_rate": 8.018198052638293e-06, + "loss": 1.8024, + "step": 28680 + }, + { + "epoch": 1.21, + "learning_rate": 8.013946171180748e-06, + "loss": 2.3084, + "step": 28690 + }, + { + "epoch": 1.21, + "learning_rate": 8.009694289723204e-06, + "loss": 2.1194, + "step": 28700 + }, + { + "epoch": 1.21, + "learning_rate": 8.005442408265658e-06, + "loss": 1.9815, + "step": 28710 + }, + { + "epoch": 1.21, + "learning_rate": 8.001190526808113e-06, + "loss": 1.8908, + "step": 28720 + }, + { + "epoch": 1.21, + "learning_rate": 7.996938645350567e-06, + "loss": 2.0239, + "step": 28730 + }, + { + "epoch": 1.21, + "learning_rate": 7.992686763893024e-06, + "loss": 2.3303, + "step": 28740 + }, + { + "epoch": 1.21, + "learning_rate": 7.988434882435478e-06, + "loss": 1.8689, + "step": 28750 + }, + { + "epoch": 1.21, + "learning_rate": 7.984183000977932e-06, + "loss": 2.2181, + "step": 28760 + }, + { + "epoch": 1.21, + "learning_rate": 7.979931119520389e-06, + "loss": 2.3179, + "step": 28770 + }, + { + "epoch": 1.21, + "learning_rate": 7.975679238062843e-06, + "loss": 2.0093, + "step": 28780 + }, + { + "epoch": 1.21, + "learning_rate": 7.971427356605298e-06, + "loss": 2.3236, + "step": 28790 + }, + { + "epoch": 1.21, + "learning_rate": 7.967175475147754e-06, + "loss": 2.1852, + "step": 28800 + }, + { + "epoch": 1.21, + "learning_rate": 7.962923593690208e-06, + "loss": 2.062, + "step": 28810 + }, + { + "epoch": 1.21, + "learning_rate": 7.958671712232664e-06, + "loss": 2.6228, + "step": 28820 + }, + { + "epoch": 1.21, + "learning_rate": 7.954419830775119e-06, + "loss": 2.2448, + "step": 28830 + }, + { + "epoch": 1.21, + "learning_rate": 7.950167949317573e-06, + "loss": 2.3091, + "step": 28840 + }, + { + "epoch": 1.21, + "learning_rate": 7.94591606786003e-06, + "loss": 2.1589, + "step": 28850 + }, + { + "epoch": 1.21, + "learning_rate": 7.941664186402484e-06, + "loss": 1.9526, + "step": 28860 + }, + { + "epoch": 1.21, + "learning_rate": 7.93741230494494e-06, + "loss": 2.165, + "step": 28870 + }, + { + "epoch": 1.22, + "learning_rate": 7.933160423487394e-06, + "loss": 2.0298, + "step": 28880 + }, + { + "epoch": 1.22, + "learning_rate": 7.928908542029849e-06, + "loss": 2.2625, + "step": 28890 + }, + { + "epoch": 1.22, + "learning_rate": 7.924656660572303e-06, + "loss": 2.1089, + "step": 28900 + }, + { + "epoch": 1.22, + "learning_rate": 7.92040477911476e-06, + "loss": 2.4739, + "step": 28910 + }, + { + "epoch": 1.22, + "learning_rate": 7.916152897657214e-06, + "loss": 1.879, + "step": 28920 + }, + { + "epoch": 1.22, + "learning_rate": 7.911901016199668e-06, + "loss": 2.0006, + "step": 28930 + }, + { + "epoch": 1.22, + "learning_rate": 7.907649134742123e-06, + "loss": 2.1949, + "step": 28940 + }, + { + "epoch": 1.22, + "learning_rate": 7.903397253284579e-06, + "loss": 2.3032, + "step": 28950 + }, + { + "epoch": 1.22, + "learning_rate": 7.899145371827033e-06, + "loss": 2.0933, + "step": 28960 + }, + { + "epoch": 1.22, + "learning_rate": 7.89489349036949e-06, + "loss": 1.83, + "step": 28970 + }, + { + "epoch": 1.22, + "learning_rate": 7.890641608911944e-06, + "loss": 2.0929, + "step": 28980 + }, + { + "epoch": 1.22, + "learning_rate": 7.886389727454399e-06, + "loss": 2.0977, + "step": 28990 + }, + { + "epoch": 1.22, + "learning_rate": 7.882137845996855e-06, + "loss": 2.0148, + "step": 29000 + }, + { + "epoch": 1.22, + "eval_loss": 1.784656047821045, + "eval_runtime": 174.4708, + "eval_samples_per_second": 14.346, + "eval_steps_per_second": 7.176, + "step": 29000 + }, + { + "epoch": 1.22, + "learning_rate": 7.87788596453931e-06, + "loss": 2.2318, + "step": 29010 + }, + { + "epoch": 1.22, + "learning_rate": 7.873634083081765e-06, + "loss": 2.0887, + "step": 29020 + }, + { + "epoch": 1.22, + "learning_rate": 7.86938220162422e-06, + "loss": 2.6538, + "step": 29030 + }, + { + "epoch": 1.22, + "learning_rate": 7.865130320166674e-06, + "loss": 2.1592, + "step": 29040 + }, + { + "epoch": 1.22, + "learning_rate": 7.86087843870913e-06, + "loss": 2.2841, + "step": 29050 + }, + { + "epoch": 1.22, + "learning_rate": 7.856626557251585e-06, + "loss": 2.4645, + "step": 29060 + }, + { + "epoch": 1.22, + "learning_rate": 7.85237467579404e-06, + "loss": 2.272, + "step": 29070 + }, + { + "epoch": 1.22, + "learning_rate": 7.848122794336494e-06, + "loss": 2.0791, + "step": 29080 + }, + { + "epoch": 1.22, + "learning_rate": 7.84387091287895e-06, + "loss": 2.5805, + "step": 29090 + }, + { + "epoch": 1.22, + "learning_rate": 7.839619031421404e-06, + "loss": 2.1916, + "step": 29100 + }, + { + "epoch": 1.22, + "learning_rate": 7.835367149963859e-06, + "loss": 1.6838, + "step": 29110 + }, + { + "epoch": 1.23, + "learning_rate": 7.831115268506315e-06, + "loss": 2.1771, + "step": 29120 + }, + { + "epoch": 1.23, + "learning_rate": 7.82686338704877e-06, + "loss": 2.0263, + "step": 29130 + }, + { + "epoch": 1.23, + "learning_rate": 7.822611505591224e-06, + "loss": 2.1806, + "step": 29140 + }, + { + "epoch": 1.23, + "learning_rate": 7.81835962413368e-06, + "loss": 2.3806, + "step": 29150 + }, + { + "epoch": 1.23, + "learning_rate": 7.814107742676135e-06, + "loss": 2.0126, + "step": 29160 + }, + { + "epoch": 1.23, + "learning_rate": 7.80985586121859e-06, + "loss": 1.817, + "step": 29170 + }, + { + "epoch": 1.23, + "learning_rate": 7.805603979761045e-06, + "loss": 2.336, + "step": 29180 + }, + { + "epoch": 1.23, + "learning_rate": 7.8013520983035e-06, + "loss": 2.2979, + "step": 29190 + }, + { + "epoch": 1.23, + "learning_rate": 7.797100216845956e-06, + "loss": 2.3, + "step": 29200 + }, + { + "epoch": 1.23, + "learning_rate": 7.79284833538841e-06, + "loss": 1.6785, + "step": 29210 + }, + { + "epoch": 1.23, + "learning_rate": 7.788596453930865e-06, + "loss": 2.1275, + "step": 29220 + }, + { + "epoch": 1.23, + "learning_rate": 7.78434457247332e-06, + "loss": 2.4896, + "step": 29230 + }, + { + "epoch": 1.23, + "learning_rate": 7.780092691015775e-06, + "loss": 2.3844, + "step": 29240 + }, + { + "epoch": 1.23, + "learning_rate": 7.77584080955823e-06, + "loss": 2.2897, + "step": 29250 + }, + { + "epoch": 1.23, + "learning_rate": 7.771588928100686e-06, + "loss": 1.7411, + "step": 29260 + }, + { + "epoch": 1.23, + "learning_rate": 7.76733704664314e-06, + "loss": 2.4193, + "step": 29270 + }, + { + "epoch": 1.23, + "learning_rate": 7.763085165185595e-06, + "loss": 2.0962, + "step": 29280 + }, + { + "epoch": 1.23, + "learning_rate": 7.75883328372805e-06, + "loss": 2.1699, + "step": 29290 + }, + { + "epoch": 1.23, + "learning_rate": 7.754581402270505e-06, + "loss": 2.2598, + "step": 29300 + }, + { + "epoch": 1.23, + "learning_rate": 7.75032952081296e-06, + "loss": 2.0741, + "step": 29310 + }, + { + "epoch": 1.23, + "learning_rate": 7.746077639355414e-06, + "loss": 2.0311, + "step": 29320 + }, + { + "epoch": 1.23, + "learning_rate": 7.74182575789787e-06, + "loss": 2.4243, + "step": 29330 + }, + { + "epoch": 1.23, + "learning_rate": 7.737573876440325e-06, + "loss": 2.3444, + "step": 29340 + }, + { + "epoch": 1.23, + "learning_rate": 7.733321994982781e-06, + "loss": 2.1163, + "step": 29350 + }, + { + "epoch": 1.24, + "learning_rate": 7.729070113525236e-06, + "loss": 2.3206, + "step": 29360 + }, + { + "epoch": 1.24, + "learning_rate": 7.72481823206769e-06, + "loss": 1.7765, + "step": 29370 + }, + { + "epoch": 1.24, + "learning_rate": 7.720566350610146e-06, + "loss": 2.348, + "step": 29380 + }, + { + "epoch": 1.24, + "learning_rate": 7.7163144691526e-06, + "loss": 2.2471, + "step": 29390 + }, + { + "epoch": 1.24, + "learning_rate": 7.712062587695057e-06, + "loss": 2.2791, + "step": 29400 + }, + { + "epoch": 1.24, + "learning_rate": 7.707810706237511e-06, + "loss": 1.6969, + "step": 29410 + }, + { + "epoch": 1.24, + "learning_rate": 7.703558824779966e-06, + "loss": 2.3942, + "step": 29420 + }, + { + "epoch": 1.24, + "learning_rate": 7.699306943322422e-06, + "loss": 1.9153, + "step": 29430 + }, + { + "epoch": 1.24, + "learning_rate": 7.695055061864876e-06, + "loss": 2.3807, + "step": 29440 + }, + { + "epoch": 1.24, + "learning_rate": 7.69080318040733e-06, + "loss": 2.1779, + "step": 29450 + }, + { + "epoch": 1.24, + "learning_rate": 7.686551298949785e-06, + "loss": 1.9697, + "step": 29460 + }, + { + "epoch": 1.24, + "learning_rate": 7.68229941749224e-06, + "loss": 2.3212, + "step": 29470 + }, + { + "epoch": 1.24, + "learning_rate": 7.678047536034696e-06, + "loss": 1.9044, + "step": 29480 + }, + { + "epoch": 1.24, + "learning_rate": 7.67379565457715e-06, + "loss": 2.4155, + "step": 29490 + }, + { + "epoch": 1.24, + "learning_rate": 7.669543773119606e-06, + "loss": 1.9734, + "step": 29500 + }, + { + "epoch": 1.24, + "eval_loss": 1.762258768081665, + "eval_runtime": 175.5265, + "eval_samples_per_second": 14.26, + "eval_steps_per_second": 7.133, + "step": 29500 + }, + { + "epoch": 1.24, + "learning_rate": 7.665291891662061e-06, + "loss": 2.1499, + "step": 29510 + }, + { + "epoch": 1.24, + "learning_rate": 7.661040010204515e-06, + "loss": 2.0907, + "step": 29520 + }, + { + "epoch": 1.24, + "learning_rate": 7.656788128746971e-06, + "loss": 1.8271, + "step": 29530 + }, + { + "epoch": 1.24, + "learning_rate": 7.652536247289426e-06, + "loss": 2.0215, + "step": 29540 + }, + { + "epoch": 1.24, + "learning_rate": 7.648284365831882e-06, + "loss": 2.1363, + "step": 29550 + }, + { + "epoch": 1.24, + "learning_rate": 7.644032484374337e-06, + "loss": 2.2156, + "step": 29560 + }, + { + "epoch": 1.24, + "learning_rate": 7.639780602916791e-06, + "loss": 1.9902, + "step": 29570 + }, + { + "epoch": 1.24, + "learning_rate": 7.635528721459247e-06, + "loss": 1.8233, + "step": 29580 + }, + { + "epoch": 1.24, + "learning_rate": 7.631276840001702e-06, + "loss": 1.9334, + "step": 29590 + }, + { + "epoch": 1.25, + "learning_rate": 7.627024958544157e-06, + "loss": 2.4853, + "step": 29600 + }, + { + "epoch": 1.25, + "learning_rate": 7.622773077086611e-06, + "loss": 2.425, + "step": 29610 + }, + { + "epoch": 1.25, + "learning_rate": 7.618521195629066e-06, + "loss": 2.1252, + "step": 29620 + }, + { + "epoch": 1.25, + "learning_rate": 7.614269314171522e-06, + "loss": 2.0679, + "step": 29630 + }, + { + "epoch": 1.25, + "learning_rate": 7.6100174327139765e-06, + "loss": 2.2087, + "step": 29640 + }, + { + "epoch": 1.25, + "learning_rate": 7.605765551256432e-06, + "loss": 2.0734, + "step": 29650 + }, + { + "epoch": 1.25, + "learning_rate": 7.601513669798887e-06, + "loss": 2.2149, + "step": 29660 + }, + { + "epoch": 1.25, + "learning_rate": 7.5972617883413415e-06, + "loss": 2.3035, + "step": 29670 + }, + { + "epoch": 1.25, + "learning_rate": 7.593009906883797e-06, + "loss": 2.2851, + "step": 29680 + }, + { + "epoch": 1.25, + "learning_rate": 7.588758025426251e-06, + "loss": 2.4831, + "step": 29690 + }, + { + "epoch": 1.25, + "learning_rate": 7.5845061439687075e-06, + "loss": 2.1056, + "step": 29700 + }, + { + "epoch": 1.25, + "learning_rate": 7.580254262511162e-06, + "loss": 1.8801, + "step": 29710 + }, + { + "epoch": 1.25, + "learning_rate": 7.576002381053616e-06, + "loss": 2.2619, + "step": 29720 + }, + { + "epoch": 1.25, + "learning_rate": 7.5717504995960725e-06, + "loss": 2.1201, + "step": 29730 + }, + { + "epoch": 1.25, + "learning_rate": 7.567498618138527e-06, + "loss": 2.043, + "step": 29740 + }, + { + "epoch": 1.25, + "learning_rate": 7.5632467366809814e-06, + "loss": 1.9556, + "step": 29750 + }, + { + "epoch": 1.25, + "learning_rate": 7.558994855223437e-06, + "loss": 2.2447, + "step": 29760 + }, + { + "epoch": 1.25, + "learning_rate": 7.554742973765892e-06, + "loss": 1.8696, + "step": 29770 + }, + { + "epoch": 1.25, + "learning_rate": 7.550491092308347e-06, + "loss": 2.0987, + "step": 29780 + }, + { + "epoch": 1.25, + "learning_rate": 7.546239210850802e-06, + "loss": 2.1271, + "step": 29790 + }, + { + "epoch": 1.25, + "learning_rate": 7.541987329393256e-06, + "loss": 2.0724, + "step": 29800 + }, + { + "epoch": 1.25, + "learning_rate": 7.5377354479357124e-06, + "loss": 2.1989, + "step": 29810 + }, + { + "epoch": 1.25, + "learning_rate": 7.533483566478167e-06, + "loss": 2.011, + "step": 29820 + }, + { + "epoch": 1.25, + "learning_rate": 7.529231685020623e-06, + "loss": 2.1491, + "step": 29830 + }, + { + "epoch": 1.26, + "learning_rate": 7.5249798035630775e-06, + "loss": 2.0264, + "step": 29840 + }, + { + "epoch": 1.26, + "learning_rate": 7.520727922105532e-06, + "loss": 2.0702, + "step": 29850 + }, + { + "epoch": 1.26, + "learning_rate": 7.516476040647987e-06, + "loss": 2.1978, + "step": 29860 + }, + { + "epoch": 1.26, + "learning_rate": 7.512224159190442e-06, + "loss": 2.2314, + "step": 29870 + }, + { + "epoch": 1.26, + "learning_rate": 7.507972277732898e-06, + "loss": 2.3425, + "step": 29880 + }, + { + "epoch": 1.26, + "learning_rate": 7.503720396275352e-06, + "loss": 1.9546, + "step": 29890 + }, + { + "epoch": 1.26, + "learning_rate": 7.499468514817807e-06, + "loss": 2.7283, + "step": 29900 + }, + { + "epoch": 1.26, + "learning_rate": 7.495216633360263e-06, + "loss": 2.3941, + "step": 29910 + }, + { + "epoch": 1.26, + "learning_rate": 7.490964751902717e-06, + "loss": 1.8176, + "step": 29920 + }, + { + "epoch": 1.26, + "learning_rate": 7.486712870445173e-06, + "loss": 2.1529, + "step": 29930 + }, + { + "epoch": 1.26, + "learning_rate": 7.482460988987628e-06, + "loss": 2.0351, + "step": 29940 + }, + { + "epoch": 1.26, + "learning_rate": 7.4782091075300825e-06, + "loss": 1.8016, + "step": 29950 + }, + { + "epoch": 1.26, + "learning_rate": 7.473957226072538e-06, + "loss": 1.6143, + "step": 29960 + }, + { + "epoch": 1.26, + "learning_rate": 7.469705344614992e-06, + "loss": 1.9063, + "step": 29970 + }, + { + "epoch": 1.26, + "learning_rate": 7.465453463157448e-06, + "loss": 2.4023, + "step": 29980 + }, + { + "epoch": 1.26, + "learning_rate": 7.461201581699903e-06, + "loss": 1.9606, + "step": 29990 + }, + { + "epoch": 1.26, + "learning_rate": 7.456949700242357e-06, + "loss": 2.2853, + "step": 30000 + }, + { + "epoch": 1.26, + "eval_loss": 1.768416404724121, + "eval_runtime": 175.6228, + "eval_samples_per_second": 14.252, + "eval_steps_per_second": 7.129, + "step": 30000 + }, + { + "epoch": 1.26, + "learning_rate": 7.4526978187848135e-06, + "loss": 1.8119, + "step": 30010 + }, + { + "epoch": 1.26, + "learning_rate": 7.448445937327268e-06, + "loss": 1.8843, + "step": 30020 + }, + { + "epoch": 1.26, + "learning_rate": 7.444194055869723e-06, + "loss": 2.4566, + "step": 30030 + }, + { + "epoch": 1.26, + "learning_rate": 7.439942174412178e-06, + "loss": 1.6584, + "step": 30040 + }, + { + "epoch": 1.26, + "learning_rate": 7.435690292954633e-06, + "loss": 2.5253, + "step": 30050 + }, + { + "epoch": 1.26, + "learning_rate": 7.431438411497088e-06, + "loss": 2.1786, + "step": 30060 + }, + { + "epoch": 1.27, + "learning_rate": 7.427186530039543e-06, + "loss": 2.2506, + "step": 30070 + }, + { + "epoch": 1.27, + "learning_rate": 7.422934648581999e-06, + "loss": 2.1316, + "step": 30080 + }, + { + "epoch": 1.27, + "learning_rate": 7.418682767124453e-06, + "loss": 2.0686, + "step": 30090 + }, + { + "epoch": 1.27, + "learning_rate": 7.414430885666908e-06, + "loss": 1.7463, + "step": 30100 + }, + { + "epoch": 1.27, + "learning_rate": 7.410179004209363e-06, + "loss": 2.2856, + "step": 30110 + }, + { + "epoch": 1.27, + "learning_rate": 7.405927122751818e-06, + "loss": 1.9305, + "step": 30120 + }, + { + "epoch": 1.27, + "learning_rate": 7.401675241294274e-06, + "loss": 1.7928, + "step": 30130 + }, + { + "epoch": 1.27, + "learning_rate": 7.397423359836728e-06, + "loss": 2.4415, + "step": 30140 + }, + { + "epoch": 1.27, + "learning_rate": 7.393171478379183e-06, + "loss": 2.2208, + "step": 30150 + }, + { + "epoch": 1.27, + "learning_rate": 7.388919596921639e-06, + "loss": 2.0496, + "step": 30160 + }, + { + "epoch": 1.27, + "learning_rate": 7.384667715464093e-06, + "loss": 2.1804, + "step": 30170 + }, + { + "epoch": 1.27, + "learning_rate": 7.380415834006549e-06, + "loss": 2.5044, + "step": 30180 + }, + { + "epoch": 1.27, + "learning_rate": 7.376163952549004e-06, + "loss": 2.0653, + "step": 30190 + }, + { + "epoch": 1.27, + "learning_rate": 7.371912071091458e-06, + "loss": 2.1761, + "step": 30200 + }, + { + "epoch": 1.27, + "learning_rate": 7.367660189633914e-06, + "loss": 2.1186, + "step": 30210 + }, + { + "epoch": 1.27, + "learning_rate": 7.363408308176369e-06, + "loss": 2.1766, + "step": 30220 + }, + { + "epoch": 1.27, + "learning_rate": 7.359156426718823e-06, + "loss": 2.4063, + "step": 30230 + }, + { + "epoch": 1.27, + "learning_rate": 7.354904545261279e-06, + "loss": 2.0167, + "step": 30240 + }, + { + "epoch": 1.27, + "learning_rate": 7.350652663803733e-06, + "loss": 2.4051, + "step": 30250 + }, + { + "epoch": 1.27, + "learning_rate": 7.346400782346189e-06, + "loss": 2.0316, + "step": 30260 + }, + { + "epoch": 1.27, + "learning_rate": 7.342148900888644e-06, + "loss": 1.8998, + "step": 30270 + }, + { + "epoch": 1.27, + "learning_rate": 7.337897019431098e-06, + "loss": 2.305, + "step": 30280 + }, + { + "epoch": 1.27, + "learning_rate": 7.333645137973554e-06, + "loss": 2.0012, + "step": 30290 + }, + { + "epoch": 1.27, + "learning_rate": 7.329393256516009e-06, + "loss": 2.2966, + "step": 30300 + }, + { + "epoch": 1.28, + "learning_rate": 7.325141375058464e-06, + "loss": 2.3181, + "step": 30310 + }, + { + "epoch": 1.28, + "learning_rate": 7.320889493600919e-06, + "loss": 2.2078, + "step": 30320 + }, + { + "epoch": 1.28, + "learning_rate": 7.316637612143374e-06, + "loss": 2.0477, + "step": 30330 + }, + { + "epoch": 1.28, + "learning_rate": 7.312385730685829e-06, + "loss": 2.0226, + "step": 30340 + }, + { + "epoch": 1.28, + "learning_rate": 7.308133849228284e-06, + "loss": 2.1437, + "step": 30350 + }, + { + "epoch": 1.28, + "learning_rate": 7.30388196777074e-06, + "loss": 2.5629, + "step": 30360 + }, + { + "epoch": 1.28, + "learning_rate": 7.299630086313194e-06, + "loss": 2.4503, + "step": 30370 + }, + { + "epoch": 1.28, + "learning_rate": 7.295378204855649e-06, + "loss": 1.9968, + "step": 30380 + }, + { + "epoch": 1.28, + "learning_rate": 7.291126323398104e-06, + "loss": 2.0472, + "step": 30390 + }, + { + "epoch": 1.28, + "learning_rate": 7.286874441940559e-06, + "loss": 2.2337, + "step": 30400 + }, + { + "epoch": 1.28, + "learning_rate": 7.282622560483015e-06, + "loss": 1.8869, + "step": 30410 + }, + { + "epoch": 1.28, + "learning_rate": 7.278370679025469e-06, + "loss": 2.1899, + "step": 30420 + }, + { + "epoch": 1.28, + "learning_rate": 7.2741187975679236e-06, + "loss": 1.478, + "step": 30430 + }, + { + "epoch": 1.28, + "learning_rate": 7.26986691611038e-06, + "loss": 2.2113, + "step": 30440 + }, + { + "epoch": 1.28, + "learning_rate": 7.265615034652834e-06, + "loss": 1.8112, + "step": 30450 + }, + { + "epoch": 1.28, + "learning_rate": 7.26136315319529e-06, + "loss": 2.0767, + "step": 30460 + }, + { + "epoch": 1.28, + "learning_rate": 7.257111271737745e-06, + "loss": 2.2814, + "step": 30470 + }, + { + "epoch": 1.28, + "learning_rate": 7.252859390280199e-06, + "loss": 2.4961, + "step": 30480 + }, + { + "epoch": 1.28, + "learning_rate": 7.2486075088226546e-06, + "loss": 2.1519, + "step": 30490 + }, + { + "epoch": 1.28, + "learning_rate": 7.24435562736511e-06, + "loss": 2.2354, + "step": 30500 + }, + { + "epoch": 1.28, + "eval_loss": 1.7716997861862183, + "eval_runtime": 175.8883, + "eval_samples_per_second": 14.231, + "eval_steps_per_second": 7.118, + "step": 30500 + }, + { + "epoch": 1.28, + "learning_rate": 7.240103745907565e-06, + "loss": 2.3328, + "step": 30510 + }, + { + "epoch": 1.28, + "learning_rate": 7.23585186445002e-06, + "loss": 1.8702, + "step": 30520 + }, + { + "epoch": 1.28, + "learning_rate": 7.231599982992474e-06, + "loss": 2.2167, + "step": 30530 + }, + { + "epoch": 1.28, + "learning_rate": 7.22734810153493e-06, + "loss": 1.9658, + "step": 30540 + }, + { + "epoch": 1.29, + "learning_rate": 7.223096220077385e-06, + "loss": 1.9015, + "step": 30550 + }, + { + "epoch": 1.29, + "learning_rate": 7.21884433861984e-06, + "loss": 1.6753, + "step": 30560 + }, + { + "epoch": 1.29, + "learning_rate": 7.214592457162295e-06, + "loss": 2.3337, + "step": 30570 + }, + { + "epoch": 1.29, + "learning_rate": 7.21034057570475e-06, + "loss": 2.136, + "step": 30580 + }, + { + "epoch": 1.29, + "learning_rate": 7.206088694247205e-06, + "loss": 2.0738, + "step": 30590 + }, + { + "epoch": 1.29, + "learning_rate": 7.2018368127896595e-06, + "loss": 2.1366, + "step": 30600 + }, + { + "epoch": 1.29, + "learning_rate": 7.197584931332116e-06, + "loss": 2.1618, + "step": 30610 + }, + { + "epoch": 1.29, + "learning_rate": 7.19333304987457e-06, + "loss": 1.9825, + "step": 30620 + }, + { + "epoch": 1.29, + "learning_rate": 7.189081168417025e-06, + "loss": 1.8846, + "step": 30630 + }, + { + "epoch": 1.29, + "learning_rate": 7.184829286959481e-06, + "loss": 2.3258, + "step": 30640 + }, + { + "epoch": 1.29, + "learning_rate": 7.180577405501935e-06, + "loss": 2.2125, + "step": 30650 + }, + { + "epoch": 1.29, + "learning_rate": 7.1763255240443905e-06, + "loss": 1.7932, + "step": 30660 + }, + { + "epoch": 1.29, + "learning_rate": 7.172073642586845e-06, + "loss": 2.1522, + "step": 30670 + }, + { + "epoch": 1.29, + "learning_rate": 7.1678217611293e-06, + "loss": 2.0979, + "step": 30680 + }, + { + "epoch": 1.29, + "learning_rate": 7.163569879671756e-06, + "loss": 1.6993, + "step": 30690 + }, + { + "epoch": 1.29, + "learning_rate": 7.15931799821421e-06, + "loss": 2.257, + "step": 30700 + }, + { + "epoch": 1.29, + "learning_rate": 7.155066116756666e-06, + "loss": 2.1497, + "step": 30710 + }, + { + "epoch": 1.29, + "learning_rate": 7.150814235299121e-06, + "loss": 2.1923, + "step": 30720 + }, + { + "epoch": 1.29, + "learning_rate": 7.146562353841575e-06, + "loss": 1.9727, + "step": 30730 + }, + { + "epoch": 1.29, + "learning_rate": 7.142310472384031e-06, + "loss": 2.3352, + "step": 30740 + }, + { + "epoch": 1.29, + "learning_rate": 7.138058590926486e-06, + "loss": 1.7339, + "step": 30750 + }, + { + "epoch": 1.29, + "learning_rate": 7.13380670946894e-06, + "loss": 1.9481, + "step": 30760 + }, + { + "epoch": 1.29, + "learning_rate": 7.1295548280113955e-06, + "loss": 1.8164, + "step": 30770 + }, + { + "epoch": 1.29, + "learning_rate": 7.12530294655385e-06, + "loss": 1.7699, + "step": 30780 + }, + { + "epoch": 1.3, + "learning_rate": 7.121051065096306e-06, + "loss": 1.7728, + "step": 30790 + }, + { + "epoch": 1.3, + "learning_rate": 7.1167991836387606e-06, + "loss": 2.3271, + "step": 30800 + }, + { + "epoch": 1.3, + "learning_rate": 7.112547302181215e-06, + "loss": 2.4951, + "step": 30810 + }, + { + "epoch": 1.3, + "learning_rate": 7.108295420723671e-06, + "loss": 2.127, + "step": 30820 + }, + { + "epoch": 1.3, + "learning_rate": 7.104043539266126e-06, + "loss": 2.3694, + "step": 30830 + }, + { + "epoch": 1.3, + "learning_rate": 7.099791657808581e-06, + "loss": 2.6008, + "step": 30840 + }, + { + "epoch": 1.3, + "learning_rate": 7.095539776351036e-06, + "loss": 2.1604, + "step": 30850 + }, + { + "epoch": 1.3, + "learning_rate": 7.091287894893491e-06, + "loss": 2.303, + "step": 30860 + }, + { + "epoch": 1.3, + "learning_rate": 7.087036013435946e-06, + "loss": 1.8119, + "step": 30870 + }, + { + "epoch": 1.3, + "learning_rate": 7.0827841319784005e-06, + "loss": 2.0222, + "step": 30880 + }, + { + "epoch": 1.3, + "learning_rate": 7.078532250520857e-06, + "loss": 2.493, + "step": 30890 + }, + { + "epoch": 1.3, + "learning_rate": 7.074280369063311e-06, + "loss": 1.7992, + "step": 30900 + }, + { + "epoch": 1.3, + "learning_rate": 7.0700284876057655e-06, + "loss": 2.3118, + "step": 30910 + }, + { + "epoch": 1.3, + "learning_rate": 7.065776606148222e-06, + "loss": 2.258, + "step": 30920 + }, + { + "epoch": 1.3, + "learning_rate": 7.061524724690676e-06, + "loss": 2.1641, + "step": 30930 + }, + { + "epoch": 1.3, + "learning_rate": 7.0572728432331314e-06, + "loss": 1.9602, + "step": 30940 + }, + { + "epoch": 1.3, + "learning_rate": 7.053020961775586e-06, + "loss": 2.3728, + "step": 30950 + }, + { + "epoch": 1.3, + "learning_rate": 7.048769080318041e-06, + "loss": 2.2433, + "step": 30960 + }, + { + "epoch": 1.3, + "learning_rate": 7.0445171988604965e-06, + "loss": 2.1174, + "step": 30970 + }, + { + "epoch": 1.3, + "learning_rate": 7.040265317402951e-06, + "loss": 2.2292, + "step": 30980 + }, + { + "epoch": 1.3, + "learning_rate": 7.036013435945407e-06, + "loss": 2.113, + "step": 30990 + }, + { + "epoch": 1.3, + "learning_rate": 7.031761554487862e-06, + "loss": 2.2651, + "step": 31000 + }, + { + "epoch": 1.3, + "eval_loss": 1.7629889249801636, + "eval_runtime": 175.7029, + "eval_samples_per_second": 14.246, + "eval_steps_per_second": 7.126, + "step": 31000 + }, + { + "epoch": 1.3, + "learning_rate": 7.027509673030316e-06, + "loss": 2.0244, + "step": 31010 + }, + { + "epoch": 1.31, + "learning_rate": 7.023257791572771e-06, + "loss": 2.4088, + "step": 31020 + }, + { + "epoch": 1.31, + "learning_rate": 7.019005910115227e-06, + "loss": 1.9128, + "step": 31030 + }, + { + "epoch": 1.31, + "learning_rate": 7.014754028657682e-06, + "loss": 2.1604, + "step": 31040 + }, + { + "epoch": 1.31, + "learning_rate": 7.010502147200136e-06, + "loss": 1.9239, + "step": 31050 + }, + { + "epoch": 1.31, + "learning_rate": 7.006250265742591e-06, + "loss": 1.816, + "step": 31060 + }, + { + "epoch": 1.31, + "learning_rate": 7.001998384285047e-06, + "loss": 2.4882, + "step": 31070 + }, + { + "epoch": 1.31, + "learning_rate": 6.9977465028275015e-06, + "loss": 2.2132, + "step": 31080 + }, + { + "epoch": 1.31, + "learning_rate": 6.993494621369958e-06, + "loss": 2.2542, + "step": 31090 + }, + { + "epoch": 1.31, + "learning_rate": 6.989242739912412e-06, + "loss": 1.9381, + "step": 31100 + }, + { + "epoch": 1.31, + "learning_rate": 6.9849908584548666e-06, + "loss": 2.0868, + "step": 31110 + }, + { + "epoch": 1.31, + "learning_rate": 6.980738976997322e-06, + "loss": 2.2442, + "step": 31120 + }, + { + "epoch": 1.31, + "learning_rate": 6.976487095539777e-06, + "loss": 2.2274, + "step": 31130 + }, + { + "epoch": 1.31, + "learning_rate": 6.9722352140822325e-06, + "loss": 2.1612, + "step": 31140 + }, + { + "epoch": 1.31, + "learning_rate": 6.967983332624687e-06, + "loss": 2.375, + "step": 31150 + }, + { + "epoch": 1.31, + "learning_rate": 6.963731451167141e-06, + "loss": 2.2004, + "step": 31160 + }, + { + "epoch": 1.31, + "learning_rate": 6.9594795697095975e-06, + "loss": 2.187, + "step": 31170 + }, + { + "epoch": 1.31, + "learning_rate": 6.955227688252052e-06, + "loss": 2.2825, + "step": 31180 + }, + { + "epoch": 1.31, + "learning_rate": 6.950975806794507e-06, + "loss": 2.3688, + "step": 31190 + }, + { + "epoch": 1.31, + "learning_rate": 6.946723925336963e-06, + "loss": 2.1461, + "step": 31200 + }, + { + "epoch": 1.31, + "learning_rate": 6.942472043879417e-06, + "loss": 1.8028, + "step": 31210 + }, + { + "epoch": 1.31, + "learning_rate": 6.938220162421872e-06, + "loss": 2.0668, + "step": 31220 + }, + { + "epoch": 1.31, + "learning_rate": 6.933968280964327e-06, + "loss": 1.7507, + "step": 31230 + }, + { + "epoch": 1.31, + "learning_rate": 6.929716399506782e-06, + "loss": 2.0785, + "step": 31240 + }, + { + "epoch": 1.31, + "learning_rate": 6.9254645180492374e-06, + "loss": 2.0424, + "step": 31250 + }, + { + "epoch": 1.32, + "learning_rate": 6.921212636591692e-06, + "loss": 2.3862, + "step": 31260 + }, + { + "epoch": 1.32, + "learning_rate": 6.916960755134148e-06, + "loss": 2.0227, + "step": 31270 + }, + { + "epoch": 1.32, + "learning_rate": 6.9127088736766025e-06, + "loss": 2.5728, + "step": 31280 + }, + { + "epoch": 1.32, + "learning_rate": 6.908456992219057e-06, + "loss": 2.1277, + "step": 31290 + }, + { + "epoch": 1.32, + "learning_rate": 6.904205110761512e-06, + "loss": 1.9434, + "step": 31300 + }, + { + "epoch": 1.32, + "learning_rate": 6.899953229303968e-06, + "loss": 2.2722, + "step": 31310 + }, + { + "epoch": 1.32, + "learning_rate": 6.895701347846423e-06, + "loss": 1.9561, + "step": 31320 + }, + { + "epoch": 1.32, + "learning_rate": 6.891449466388877e-06, + "loss": 1.9599, + "step": 31330 + }, + { + "epoch": 1.32, + "learning_rate": 6.887197584931332e-06, + "loss": 2.0738, + "step": 31340 + }, + { + "epoch": 1.32, + "learning_rate": 6.882945703473788e-06, + "loss": 1.8086, + "step": 31350 + }, + { + "epoch": 1.32, + "learning_rate": 6.878693822016242e-06, + "loss": 1.7633, + "step": 31360 + }, + { + "epoch": 1.32, + "learning_rate": 6.8744419405586986e-06, + "loss": 1.924, + "step": 31370 + }, + { + "epoch": 1.32, + "learning_rate": 6.870190059101153e-06, + "loss": 2.0792, + "step": 31380 + }, + { + "epoch": 1.32, + "learning_rate": 6.8659381776436075e-06, + "loss": 2.3389, + "step": 31390 + }, + { + "epoch": 1.32, + "learning_rate": 6.861686296186063e-06, + "loss": 2.4297, + "step": 31400 + }, + { + "epoch": 1.32, + "learning_rate": 6.857434414728518e-06, + "loss": 1.7596, + "step": 31410 + }, + { + "epoch": 1.32, + "learning_rate": 6.853182533270973e-06, + "loss": 2.2129, + "step": 31420 + }, + { + "epoch": 1.32, + "learning_rate": 6.848930651813428e-06, + "loss": 2.4238, + "step": 31430 + }, + { + "epoch": 1.32, + "learning_rate": 6.844678770355882e-06, + "loss": 2.1243, + "step": 31440 + }, + { + "epoch": 1.32, + "learning_rate": 6.8404268888983385e-06, + "loss": 2.0298, + "step": 31450 + }, + { + "epoch": 1.32, + "learning_rate": 6.836175007440793e-06, + "loss": 1.9073, + "step": 31460 + }, + { + "epoch": 1.32, + "learning_rate": 6.831923125983248e-06, + "loss": 2.0428, + "step": 31470 + }, + { + "epoch": 1.32, + "learning_rate": 6.8276712445257035e-06, + "loss": 2.3143, + "step": 31480 + }, + { + "epoch": 1.32, + "learning_rate": 6.823419363068158e-06, + "loss": 1.8345, + "step": 31490 + }, + { + "epoch": 1.33, + "learning_rate": 6.819167481610613e-06, + "loss": 2.4983, + "step": 31500 + }, + { + "epoch": 1.33, + "eval_loss": 1.7589433193206787, + "eval_runtime": 175.8885, + "eval_samples_per_second": 14.231, + "eval_steps_per_second": 7.118, + "step": 31500 + }, + { + "epoch": 1.33, + "learning_rate": 6.814915600153068e-06, + "loss": 2.1111, + "step": 31510 + }, + { + "epoch": 1.33, + "learning_rate": 6.810663718695524e-06, + "loss": 1.6782, + "step": 31520 + }, + { + "epoch": 1.33, + "learning_rate": 6.806411837237978e-06, + "loss": 2.6629, + "step": 31530 + }, + { + "epoch": 1.33, + "learning_rate": 6.802159955780433e-06, + "loss": 2.2417, + "step": 31540 + }, + { + "epoch": 1.33, + "learning_rate": 6.797908074322889e-06, + "loss": 2.1622, + "step": 31550 + }, + { + "epoch": 1.33, + "learning_rate": 6.7936561928653434e-06, + "loss": 2.2477, + "step": 31560 + }, + { + "epoch": 1.33, + "learning_rate": 6.789404311407799e-06, + "loss": 1.5379, + "step": 31570 + }, + { + "epoch": 1.33, + "learning_rate": 6.785152429950253e-06, + "loss": 1.9817, + "step": 31580 + }, + { + "epoch": 1.33, + "learning_rate": 6.7809005484927085e-06, + "loss": 2.1176, + "step": 31590 + }, + { + "epoch": 1.33, + "learning_rate": 6.776648667035164e-06, + "loss": 2.0681, + "step": 31600 + }, + { + "epoch": 1.33, + "learning_rate": 6.772396785577618e-06, + "loss": 1.9443, + "step": 31610 + }, + { + "epoch": 1.33, + "learning_rate": 6.7681449041200744e-06, + "loss": 2.6853, + "step": 31620 + }, + { + "epoch": 1.33, + "learning_rate": 6.763893022662529e-06, + "loss": 2.517, + "step": 31630 + }, + { + "epoch": 1.33, + "learning_rate": 6.759641141204983e-06, + "loss": 1.8634, + "step": 31640 + }, + { + "epoch": 1.33, + "learning_rate": 6.7553892597474395e-06, + "loss": 1.9171, + "step": 31650 + }, + { + "epoch": 1.33, + "learning_rate": 6.751137378289894e-06, + "loss": 1.9872, + "step": 31660 + }, + { + "epoch": 1.33, + "learning_rate": 6.746885496832349e-06, + "loss": 2.1077, + "step": 31670 + }, + { + "epoch": 1.33, + "learning_rate": 6.742633615374804e-06, + "loss": 2.1553, + "step": 31680 + }, + { + "epoch": 1.33, + "learning_rate": 6.738381733917258e-06, + "loss": 2.2523, + "step": 31690 + }, + { + "epoch": 1.33, + "learning_rate": 6.734129852459714e-06, + "loss": 2.293, + "step": 31700 + }, + { + "epoch": 1.33, + "learning_rate": 6.729877971002169e-06, + "loss": 2.3029, + "step": 31710 + }, + { + "epoch": 1.33, + "learning_rate": 6.725626089544625e-06, + "loss": 1.7122, + "step": 31720 + }, + { + "epoch": 1.33, + "learning_rate": 6.721374208087079e-06, + "loss": 2.3072, + "step": 31730 + }, + { + "epoch": 1.34, + "learning_rate": 6.717122326629534e-06, + "loss": 2.0192, + "step": 31740 + }, + { + "epoch": 1.34, + "learning_rate": 6.712870445171989e-06, + "loss": 2.2539, + "step": 31750 + }, + { + "epoch": 1.34, + "learning_rate": 6.7086185637144445e-06, + "loss": 2.0044, + "step": 31760 + }, + { + "epoch": 1.34, + "learning_rate": 6.704366682256899e-06, + "loss": 2.2183, + "step": 31770 + }, + { + "epoch": 1.34, + "learning_rate": 6.700114800799354e-06, + "loss": 1.8438, + "step": 31780 + }, + { + "epoch": 1.34, + "learning_rate": 6.695862919341809e-06, + "loss": 2.1252, + "step": 31790 + }, + { + "epoch": 1.34, + "learning_rate": 6.691611037884265e-06, + "loss": 1.9709, + "step": 31800 + }, + { + "epoch": 1.34, + "learning_rate": 6.687359156426719e-06, + "loss": 2.1972, + "step": 31810 + }, + { + "epoch": 1.34, + "learning_rate": 6.683107274969174e-06, + "loss": 2.5023, + "step": 31820 + }, + { + "epoch": 1.34, + "learning_rate": 6.67885539351163e-06, + "loss": 2.1028, + "step": 31830 + }, + { + "epoch": 1.34, + "learning_rate": 6.674603512054084e-06, + "loss": 1.8368, + "step": 31840 + }, + { + "epoch": 1.34, + "learning_rate": 6.67035163059654e-06, + "loss": 1.742, + "step": 31850 + }, + { + "epoch": 1.34, + "learning_rate": 6.666099749138994e-06, + "loss": 2.1388, + "step": 31860 + }, + { + "epoch": 1.34, + "learning_rate": 6.6618478676814494e-06, + "loss": 1.8245, + "step": 31870 + }, + { + "epoch": 1.34, + "learning_rate": 6.657595986223905e-06, + "loss": 2.1489, + "step": 31880 + }, + { + "epoch": 1.34, + "learning_rate": 6.653344104766359e-06, + "loss": 2.0893, + "step": 31890 + }, + { + "epoch": 1.34, + "learning_rate": 6.649092223308815e-06, + "loss": 2.1849, + "step": 31900 + }, + { + "epoch": 1.34, + "learning_rate": 6.64484034185127e-06, + "loss": 2.0908, + "step": 31910 + }, + { + "epoch": 1.34, + "learning_rate": 6.640588460393724e-06, + "loss": 1.8421, + "step": 31920 + }, + { + "epoch": 1.34, + "learning_rate": 6.63633657893618e-06, + "loss": 2.0227, + "step": 31930 + }, + { + "epoch": 1.34, + "learning_rate": 6.632084697478635e-06, + "loss": 2.4361, + "step": 31940 + }, + { + "epoch": 1.34, + "learning_rate": 6.62783281602109e-06, + "loss": 2.0913, + "step": 31950 + }, + { + "epoch": 1.34, + "learning_rate": 6.623580934563545e-06, + "loss": 2.2489, + "step": 31960 + }, + { + "epoch": 1.35, + "learning_rate": 6.619329053105999e-06, + "loss": 2.0853, + "step": 31970 + }, + { + "epoch": 1.35, + "learning_rate": 6.615077171648455e-06, + "loss": 1.5733, + "step": 31980 + }, + { + "epoch": 1.35, + "learning_rate": 6.61082529019091e-06, + "loss": 2.4172, + "step": 31990 + }, + { + "epoch": 1.35, + "learning_rate": 6.606573408733366e-06, + "loss": 1.9044, + "step": 32000 + }, + { + "epoch": 1.35, + "eval_loss": 1.7534154653549194, + "eval_runtime": 175.7062, + "eval_samples_per_second": 14.245, + "eval_steps_per_second": 7.126, + "step": 32000 + }, + { + "epoch": 1.35, + "learning_rate": 6.60232152727582e-06, + "loss": 1.9327, + "step": 32010 + }, + { + "epoch": 1.35, + "learning_rate": 6.598069645818275e-06, + "loss": 2.2584, + "step": 32020 + }, + { + "epoch": 1.35, + "learning_rate": 6.59381776436073e-06, + "loss": 2.2586, + "step": 32030 + }, + { + "epoch": 1.35, + "learning_rate": 6.589565882903185e-06, + "loss": 1.9329, + "step": 32040 + }, + { + "epoch": 1.35, + "learning_rate": 6.585314001445641e-06, + "loss": 1.9196, + "step": 32050 + }, + { + "epoch": 1.35, + "learning_rate": 6.581062119988095e-06, + "loss": 1.9311, + "step": 32060 + }, + { + "epoch": 1.35, + "learning_rate": 6.57681023853055e-06, + "loss": 2.0028, + "step": 32070 + }, + { + "epoch": 1.35, + "learning_rate": 6.572558357073006e-06, + "loss": 2.0391, + "step": 32080 + }, + { + "epoch": 1.35, + "learning_rate": 6.56830647561546e-06, + "loss": 2.1205, + "step": 32090 + }, + { + "epoch": 1.35, + "learning_rate": 6.5640545941579155e-06, + "loss": 1.9866, + "step": 32100 + }, + { + "epoch": 1.35, + "learning_rate": 6.559802712700371e-06, + "loss": 2.078, + "step": 32110 + }, + { + "epoch": 1.35, + "learning_rate": 6.555550831242825e-06, + "loss": 2.1073, + "step": 32120 + }, + { + "epoch": 1.35, + "learning_rate": 6.551298949785281e-06, + "loss": 2.1966, + "step": 32130 + }, + { + "epoch": 1.35, + "learning_rate": 6.547047068327735e-06, + "loss": 2.0649, + "step": 32140 + }, + { + "epoch": 1.35, + "learning_rate": 6.542795186870191e-06, + "loss": 2.1104, + "step": 32150 + }, + { + "epoch": 1.35, + "learning_rate": 6.538543305412646e-06, + "loss": 2.4335, + "step": 32160 + }, + { + "epoch": 1.35, + "learning_rate": 6.5342914239551e-06, + "loss": 1.6751, + "step": 32170 + }, + { + "epoch": 1.35, + "learning_rate": 6.530039542497556e-06, + "loss": 2.1906, + "step": 32180 + }, + { + "epoch": 1.35, + "learning_rate": 6.525787661040011e-06, + "loss": 1.8149, + "step": 32190 + }, + { + "epoch": 1.35, + "learning_rate": 6.521535779582466e-06, + "loss": 2.2962, + "step": 32200 + }, + { + "epoch": 1.36, + "learning_rate": 6.5172838981249205e-06, + "loss": 2.3522, + "step": 32210 + }, + { + "epoch": 1.36, + "learning_rate": 6.513032016667376e-06, + "loss": 1.9007, + "step": 32220 + }, + { + "epoch": 1.36, + "learning_rate": 6.508780135209831e-06, + "loss": 1.5771, + "step": 32230 + }, + { + "epoch": 1.36, + "learning_rate": 6.5045282537522856e-06, + "loss": 1.8895, + "step": 32240 + }, + { + "epoch": 1.36, + "learning_rate": 6.50027637229474e-06, + "loss": 2.3177, + "step": 32250 + }, + { + "epoch": 1.36, + "learning_rate": 6.496024490837196e-06, + "loss": 1.8772, + "step": 32260 + }, + { + "epoch": 1.36, + "learning_rate": 6.491772609379651e-06, + "loss": 1.78, + "step": 32270 + }, + { + "epoch": 1.36, + "learning_rate": 6.487520727922107e-06, + "loss": 2.3023, + "step": 32280 + }, + { + "epoch": 1.36, + "learning_rate": 6.483268846464561e-06, + "loss": 2.0458, + "step": 32290 + }, + { + "epoch": 1.36, + "learning_rate": 6.479016965007016e-06, + "loss": 1.8302, + "step": 32300 + }, + { + "epoch": 1.36, + "learning_rate": 6.474765083549471e-06, + "loss": 1.8139, + "step": 32310 + }, + { + "epoch": 1.36, + "learning_rate": 6.470513202091926e-06, + "loss": 2.1669, + "step": 32320 + }, + { + "epoch": 1.36, + "learning_rate": 6.466261320634382e-06, + "loss": 2.1452, + "step": 32330 + }, + { + "epoch": 1.36, + "learning_rate": 6.462009439176836e-06, + "loss": 2.1107, + "step": 32340 + }, + { + "epoch": 1.36, + "learning_rate": 6.4577575577192905e-06, + "loss": 1.9855, + "step": 32350 + }, + { + "epoch": 1.36, + "learning_rate": 6.453505676261747e-06, + "loss": 2.0678, + "step": 32360 + }, + { + "epoch": 1.36, + "learning_rate": 6.449253794804201e-06, + "loss": 1.7296, + "step": 32370 + }, + { + "epoch": 1.36, + "learning_rate": 6.4450019133466565e-06, + "loss": 1.9016, + "step": 32380 + }, + { + "epoch": 1.36, + "learning_rate": 6.440750031889112e-06, + "loss": 2.5799, + "step": 32390 + }, + { + "epoch": 1.36, + "learning_rate": 6.436498150431566e-06, + "loss": 2.2696, + "step": 32400 + }, + { + "epoch": 1.36, + "learning_rate": 6.4322462689740215e-06, + "loss": 2.1848, + "step": 32410 + }, + { + "epoch": 1.36, + "learning_rate": 6.427994387516476e-06, + "loss": 2.1398, + "step": 32420 + }, + { + "epoch": 1.36, + "learning_rate": 6.423742506058932e-06, + "loss": 1.9232, + "step": 32430 + }, + { + "epoch": 1.36, + "learning_rate": 6.419490624601387e-06, + "loss": 2.2663, + "step": 32440 + }, + { + "epoch": 1.37, + "learning_rate": 6.415238743143841e-06, + "loss": 2.1383, + "step": 32450 + }, + { + "epoch": 1.37, + "learning_rate": 6.410986861686297e-06, + "loss": 2.0458, + "step": 32460 + }, + { + "epoch": 1.37, + "learning_rate": 6.406734980228752e-06, + "loss": 1.9462, + "step": 32470 + }, + { + "epoch": 1.37, + "learning_rate": 6.402483098771207e-06, + "loss": 2.4849, + "step": 32480 + }, + { + "epoch": 1.37, + "learning_rate": 6.3982312173136614e-06, + "loss": 2.114, + "step": 32490 + }, + { + "epoch": 1.37, + "learning_rate": 6.393979335856117e-06, + "loss": 2.0785, + "step": 32500 + }, + { + "epoch": 1.37, + "eval_loss": 1.7712970972061157, + "eval_runtime": 174.1828, + "eval_samples_per_second": 14.37, + "eval_steps_per_second": 7.188, + "step": 32500 + }, + { + "epoch": 1.37, + "learning_rate": 6.389727454398572e-06, + "loss": 2.2324, + "step": 32510 + }, + { + "epoch": 1.37, + "learning_rate": 6.3854755729410265e-06, + "loss": 2.4942, + "step": 32520 + }, + { + "epoch": 1.37, + "learning_rate": 6.381223691483483e-06, + "loss": 2.2043, + "step": 32530 + }, + { + "epoch": 1.37, + "learning_rate": 6.376971810025937e-06, + "loss": 2.3819, + "step": 32540 + }, + { + "epoch": 1.37, + "learning_rate": 6.3727199285683916e-06, + "loss": 2.0652, + "step": 32550 + }, + { + "epoch": 1.37, + "learning_rate": 6.368468047110848e-06, + "loss": 2.3862, + "step": 32560 + }, + { + "epoch": 1.37, + "learning_rate": 6.364216165653302e-06, + "loss": 2.0991, + "step": 32570 + }, + { + "epoch": 1.37, + "learning_rate": 6.3599642841957575e-06, + "loss": 2.0504, + "step": 32580 + }, + { + "epoch": 1.37, + "learning_rate": 6.355712402738212e-06, + "loss": 1.8913, + "step": 32590 + }, + { + "epoch": 1.37, + "learning_rate": 6.351460521280666e-06, + "loss": 1.8265, + "step": 32600 + }, + { + "epoch": 1.37, + "learning_rate": 6.3472086398231226e-06, + "loss": 1.9062, + "step": 32610 + }, + { + "epoch": 1.37, + "learning_rate": 6.342956758365577e-06, + "loss": 1.8239, + "step": 32620 + }, + { + "epoch": 1.37, + "learning_rate": 6.338704876908033e-06, + "loss": 1.8398, + "step": 32630 + }, + { + "epoch": 1.37, + "learning_rate": 6.334452995450488e-06, + "loss": 2.1208, + "step": 32640 + }, + { + "epoch": 1.37, + "learning_rate": 6.330201113992942e-06, + "loss": 2.2409, + "step": 32650 + }, + { + "epoch": 1.37, + "learning_rate": 6.325949232535397e-06, + "loss": 1.9637, + "step": 32660 + }, + { + "epoch": 1.37, + "learning_rate": 6.321697351077853e-06, + "loss": 1.8296, + "step": 32670 + }, + { + "epoch": 1.37, + "learning_rate": 6.317445469620308e-06, + "loss": 2.0955, + "step": 32680 + }, + { + "epoch": 1.38, + "learning_rate": 6.3131935881627625e-06, + "loss": 1.8445, + "step": 32690 + }, + { + "epoch": 1.38, + "learning_rate": 6.308941706705217e-06, + "loss": 2.1954, + "step": 32700 + }, + { + "epoch": 1.38, + "learning_rate": 6.304689825247673e-06, + "loss": 1.9115, + "step": 32710 + }, + { + "epoch": 1.38, + "learning_rate": 6.3004379437901275e-06, + "loss": 1.792, + "step": 32720 + }, + { + "epoch": 1.38, + "learning_rate": 6.296186062332583e-06, + "loss": 2.1596, + "step": 32730 + }, + { + "epoch": 1.38, + "learning_rate": 6.291934180875038e-06, + "loss": 1.7065, + "step": 32740 + }, + { + "epoch": 1.38, + "learning_rate": 6.287682299417493e-06, + "loss": 1.9132, + "step": 32750 + }, + { + "epoch": 1.38, + "learning_rate": 6.283430417959948e-06, + "loss": 2.6364, + "step": 32760 + }, + { + "epoch": 1.38, + "learning_rate": 6.279178536502402e-06, + "loss": 2.143, + "step": 32770 + }, + { + "epoch": 1.38, + "learning_rate": 6.274926655044858e-06, + "loss": 2.4071, + "step": 32780 + }, + { + "epoch": 1.38, + "learning_rate": 6.270674773587313e-06, + "loss": 2.0815, + "step": 32790 + }, + { + "epoch": 1.38, + "learning_rate": 6.2664228921297674e-06, + "loss": 2.3196, + "step": 32800 + }, + { + "epoch": 1.38, + "learning_rate": 6.262171010672224e-06, + "loss": 2.3816, + "step": 32810 + }, + { + "epoch": 1.38, + "learning_rate": 6.257919129214678e-06, + "loss": 2.175, + "step": 32820 + }, + { + "epoch": 1.38, + "learning_rate": 6.2536672477571325e-06, + "loss": 1.9625, + "step": 32830 + }, + { + "epoch": 1.38, + "learning_rate": 6.249415366299588e-06, + "loss": 1.8776, + "step": 32840 + }, + { + "epoch": 1.38, + "learning_rate": 6.245163484842043e-06, + "loss": 2.041, + "step": 32850 + }, + { + "epoch": 1.38, + "learning_rate": 6.240911603384498e-06, + "loss": 2.3219, + "step": 32860 + }, + { + "epoch": 1.38, + "learning_rate": 6.236659721926953e-06, + "loss": 2.2202, + "step": 32870 + }, + { + "epoch": 1.38, + "learning_rate": 6.232407840469407e-06, + "loss": 2.2712, + "step": 32880 + }, + { + "epoch": 1.38, + "learning_rate": 6.2281559590118635e-06, + "loss": 1.8479, + "step": 32890 + }, + { + "epoch": 1.38, + "learning_rate": 6.223904077554318e-06, + "loss": 2.4935, + "step": 32900 + }, + { + "epoch": 1.38, + "learning_rate": 6.219652196096774e-06, + "loss": 2.167, + "step": 32910 + }, + { + "epoch": 1.38, + "learning_rate": 6.2154003146392286e-06, + "loss": 1.871, + "step": 32920 + }, + { + "epoch": 1.39, + "learning_rate": 6.211148433181683e-06, + "loss": 1.8298, + "step": 32930 + }, + { + "epoch": 1.39, + "learning_rate": 6.206896551724138e-06, + "loss": 2.3033, + "step": 32940 + }, + { + "epoch": 1.39, + "learning_rate": 6.202644670266594e-06, + "loss": 2.0429, + "step": 32950 + }, + { + "epoch": 1.39, + "learning_rate": 6.198392788809049e-06, + "loss": 1.8533, + "step": 32960 + }, + { + "epoch": 1.39, + "learning_rate": 6.194140907351503e-06, + "loss": 1.7284, + "step": 32970 + }, + { + "epoch": 1.39, + "learning_rate": 6.189889025893958e-06, + "loss": 2.4582, + "step": 32980 + }, + { + "epoch": 1.39, + "learning_rate": 6.185637144436414e-06, + "loss": 1.625, + "step": 32990 + }, + { + "epoch": 1.39, + "learning_rate": 6.1813852629788685e-06, + "loss": 1.9835, + "step": 33000 + }, + { + "epoch": 1.39, + "eval_loss": 1.7666312456130981, + "eval_runtime": 175.9092, + "eval_samples_per_second": 14.229, + "eval_steps_per_second": 7.117, + "step": 33000 + }, + { + "epoch": 1.39, + "learning_rate": 6.177133381521324e-06, + "loss": 2.0395, + "step": 33010 + }, + { + "epoch": 1.39, + "learning_rate": 6.172881500063779e-06, + "loss": 1.8279, + "step": 33020 + }, + { + "epoch": 1.39, + "learning_rate": 6.1686296186062335e-06, + "loss": 2.1629, + "step": 33030 + }, + { + "epoch": 1.39, + "learning_rate": 6.164377737148689e-06, + "loss": 2.4225, + "step": 33040 + }, + { + "epoch": 1.39, + "learning_rate": 6.160125855691143e-06, + "loss": 1.9997, + "step": 33050 + }, + { + "epoch": 1.39, + "learning_rate": 6.1558739742335994e-06, + "loss": 2.0743, + "step": 33060 + }, + { + "epoch": 1.39, + "learning_rate": 6.151622092776054e-06, + "loss": 1.9704, + "step": 33070 + }, + { + "epoch": 1.39, + "learning_rate": 6.147370211318508e-06, + "loss": 2.1134, + "step": 33080 + }, + { + "epoch": 1.39, + "learning_rate": 6.1431183298609645e-06, + "loss": 1.9697, + "step": 33090 + }, + { + "epoch": 1.39, + "learning_rate": 6.138866448403419e-06, + "loss": 2.3882, + "step": 33100 + }, + { + "epoch": 1.39, + "learning_rate": 6.134614566945874e-06, + "loss": 2.5031, + "step": 33110 + }, + { + "epoch": 1.39, + "learning_rate": 6.130362685488329e-06, + "loss": 1.9838, + "step": 33120 + }, + { + "epoch": 1.39, + "learning_rate": 6.126110804030784e-06, + "loss": 2.5724, + "step": 33130 + }, + { + "epoch": 1.39, + "learning_rate": 6.121858922573239e-06, + "loss": 2.175, + "step": 33140 + }, + { + "epoch": 1.39, + "learning_rate": 6.117607041115694e-06, + "loss": 1.6917, + "step": 33150 + }, + { + "epoch": 1.4, + "learning_rate": 6.11335515965815e-06, + "loss": 2.0187, + "step": 33160 + }, + { + "epoch": 1.4, + "learning_rate": 6.109103278200604e-06, + "loss": 2.1692, + "step": 33170 + }, + { + "epoch": 1.4, + "learning_rate": 6.104851396743059e-06, + "loss": 2.4369, + "step": 33180 + }, + { + "epoch": 1.4, + "learning_rate": 6.100599515285515e-06, + "loss": 1.7183, + "step": 33190 + }, + { + "epoch": 1.4, + "learning_rate": 6.0963476338279695e-06, + "loss": 1.6395, + "step": 33200 + }, + { + "epoch": 1.4, + "learning_rate": 6.092095752370425e-06, + "loss": 2.3003, + "step": 33210 + }, + { + "epoch": 1.4, + "learning_rate": 6.087843870912879e-06, + "loss": 2.0762, + "step": 33220 + }, + { + "epoch": 1.4, + "learning_rate": 6.0835919894553346e-06, + "loss": 2.5726, + "step": 33230 + }, + { + "epoch": 1.4, + "learning_rate": 6.07934010799779e-06, + "loss": 2.1681, + "step": 33240 + }, + { + "epoch": 1.4, + "learning_rate": 6.075088226540244e-06, + "loss": 2.3166, + "step": 33250 + }, + { + "epoch": 1.4, + "learning_rate": 6.0708363450827005e-06, + "loss": 2.4197, + "step": 33260 + }, + { + "epoch": 1.4, + "learning_rate": 6.066584463625155e-06, + "loss": 2.1967, + "step": 33270 + }, + { + "epoch": 1.4, + "learning_rate": 6.062332582167609e-06, + "loss": 2.246, + "step": 33280 + }, + { + "epoch": 1.4, + "learning_rate": 6.058080700710065e-06, + "loss": 2.095, + "step": 33290 + }, + { + "epoch": 1.4, + "learning_rate": 6.05382881925252e-06, + "loss": 2.4741, + "step": 33300 + }, + { + "epoch": 1.4, + "learning_rate": 6.0495769377949745e-06, + "loss": 1.7434, + "step": 33310 + }, + { + "epoch": 1.4, + "learning_rate": 6.04532505633743e-06, + "loss": 2.0141, + "step": 33320 + }, + { + "epoch": 1.4, + "learning_rate": 6.041073174879884e-06, + "loss": 2.2197, + "step": 33330 + }, + { + "epoch": 1.4, + "learning_rate": 6.03682129342234e-06, + "loss": 2.3397, + "step": 33340 + }, + { + "epoch": 1.4, + "learning_rate": 6.032569411964795e-06, + "loss": 2.1607, + "step": 33350 + }, + { + "epoch": 1.4, + "learning_rate": 6.028317530507249e-06, + "loss": 2.1818, + "step": 33360 + }, + { + "epoch": 1.4, + "learning_rate": 6.0240656490497054e-06, + "loss": 2.0139, + "step": 33370 + }, + { + "epoch": 1.4, + "learning_rate": 6.01981376759216e-06, + "loss": 2.3018, + "step": 33380 + }, + { + "epoch": 1.4, + "learning_rate": 6.015561886134615e-06, + "loss": 2.0767, + "step": 33390 + }, + { + "epoch": 1.41, + "learning_rate": 6.01131000467707e-06, + "loss": 2.0438, + "step": 33400 + }, + { + "epoch": 1.41, + "learning_rate": 6.007058123219525e-06, + "loss": 2.016, + "step": 33410 + }, + { + "epoch": 1.41, + "learning_rate": 6.00280624176198e-06, + "loss": 1.6808, + "step": 33420 + }, + { + "epoch": 1.41, + "learning_rate": 5.998554360304435e-06, + "loss": 1.8655, + "step": 33430 + }, + { + "epoch": 1.41, + "learning_rate": 5.994302478846891e-06, + "loss": 1.9778, + "step": 33440 + }, + { + "epoch": 1.41, + "learning_rate": 5.990050597389345e-06, + "loss": 2.5145, + "step": 33450 + }, + { + "epoch": 1.41, + "learning_rate": 5.9857987159318e-06, + "loss": 2.1583, + "step": 33460 + }, + { + "epoch": 1.41, + "learning_rate": 5.981546834474256e-06, + "loss": 2.1911, + "step": 33470 + }, + { + "epoch": 1.41, + "learning_rate": 5.97729495301671e-06, + "loss": 2.1878, + "step": 33480 + }, + { + "epoch": 1.41, + "learning_rate": 5.973043071559166e-06, + "loss": 1.7057, + "step": 33490 + }, + { + "epoch": 1.41, + "learning_rate": 5.96879119010162e-06, + "loss": 2.5451, + "step": 33500 + }, + { + "epoch": 1.41, + "eval_loss": 1.7640694379806519, + "eval_runtime": 175.7004, + "eval_samples_per_second": 14.246, + "eval_steps_per_second": 7.126, + "step": 33500 + }, + { + "epoch": 1.41, + "learning_rate": 5.964539308644075e-06, + "loss": 1.8074, + "step": 33510 + }, + { + "epoch": 1.41, + "learning_rate": 5.960287427186531e-06, + "loss": 2.3821, + "step": 33520 + }, + { + "epoch": 1.41, + "learning_rate": 5.956035545728985e-06, + "loss": 2.203, + "step": 33530 + }, + { + "epoch": 1.41, + "learning_rate": 5.951783664271441e-06, + "loss": 2.1969, + "step": 33540 + }, + { + "epoch": 1.41, + "learning_rate": 5.947531782813896e-06, + "loss": 2.5106, + "step": 33550 + }, + { + "epoch": 1.41, + "learning_rate": 5.94327990135635e-06, + "loss": 2.1026, + "step": 33560 + }, + { + "epoch": 1.41, + "learning_rate": 5.939028019898806e-06, + "loss": 1.931, + "step": 33570 + }, + { + "epoch": 1.41, + "learning_rate": 5.934776138441261e-06, + "loss": 2.279, + "step": 33580 + }, + { + "epoch": 1.41, + "learning_rate": 5.930524256983716e-06, + "loss": 2.0317, + "step": 33590 + }, + { + "epoch": 1.41, + "learning_rate": 5.926272375526171e-06, + "loss": 2.4391, + "step": 33600 + }, + { + "epoch": 1.41, + "learning_rate": 5.922020494068625e-06, + "loss": 2.2738, + "step": 33610 + }, + { + "epoch": 1.41, + "learning_rate": 5.917768612611081e-06, + "loss": 2.0268, + "step": 33620 + }, + { + "epoch": 1.41, + "learning_rate": 5.913516731153536e-06, + "loss": 2.2811, + "step": 33630 + }, + { + "epoch": 1.42, + "learning_rate": 5.909264849695991e-06, + "loss": 1.6955, + "step": 33640 + }, + { + "epoch": 1.42, + "learning_rate": 5.905012968238446e-06, + "loss": 1.9845, + "step": 33650 + }, + { + "epoch": 1.42, + "learning_rate": 5.900761086780901e-06, + "loss": 2.0359, + "step": 33660 + }, + { + "epoch": 1.42, + "learning_rate": 5.896509205323356e-06, + "loss": 2.185, + "step": 33670 + }, + { + "epoch": 1.42, + "learning_rate": 5.892257323865811e-06, + "loss": 2.0223, + "step": 33680 + }, + { + "epoch": 1.42, + "learning_rate": 5.888005442408267e-06, + "loss": 1.8995, + "step": 33690 + }, + { + "epoch": 1.42, + "learning_rate": 5.883753560950721e-06, + "loss": 2.2747, + "step": 33700 + }, + { + "epoch": 1.42, + "learning_rate": 5.879501679493176e-06, + "loss": 2.0273, + "step": 33710 + }, + { + "epoch": 1.42, + "learning_rate": 5.875249798035632e-06, + "loss": 1.6985, + "step": 33720 + }, + { + "epoch": 1.42, + "learning_rate": 5.870997916578086e-06, + "loss": 1.9237, + "step": 33730 + }, + { + "epoch": 1.42, + "learning_rate": 5.866746035120542e-06, + "loss": 2.2394, + "step": 33740 + }, + { + "epoch": 1.42, + "learning_rate": 5.862494153662996e-06, + "loss": 2.4825, + "step": 33750 + }, + { + "epoch": 1.42, + "learning_rate": 5.858242272205451e-06, + "loss": 2.0602, + "step": 33760 + }, + { + "epoch": 1.42, + "learning_rate": 5.853990390747907e-06, + "loss": 2.2383, + "step": 33770 + }, + { + "epoch": 1.42, + "learning_rate": 5.849738509290361e-06, + "loss": 2.3586, + "step": 33780 + }, + { + "epoch": 1.42, + "learning_rate": 5.8454866278328156e-06, + "loss": 1.9765, + "step": 33790 + }, + { + "epoch": 1.42, + "learning_rate": 5.841234746375272e-06, + "loss": 2.0437, + "step": 33800 + }, + { + "epoch": 1.42, + "learning_rate": 5.836982864917726e-06, + "loss": 1.6747, + "step": 33810 + }, + { + "epoch": 1.42, + "learning_rate": 5.832730983460182e-06, + "loss": 2.1325, + "step": 33820 + }, + { + "epoch": 1.42, + "learning_rate": 5.828479102002637e-06, + "loss": 2.1148, + "step": 33830 + }, + { + "epoch": 1.42, + "learning_rate": 5.824227220545091e-06, + "loss": 1.809, + "step": 33840 + }, + { + "epoch": 1.42, + "learning_rate": 5.8199753390875466e-06, + "loss": 2.4527, + "step": 33850 + }, + { + "epoch": 1.42, + "learning_rate": 5.815723457630002e-06, + "loss": 2.1952, + "step": 33860 + }, + { + "epoch": 1.42, + "learning_rate": 5.811471576172457e-06, + "loss": 1.9291, + "step": 33870 + }, + { + "epoch": 1.43, + "learning_rate": 5.807219694714912e-06, + "loss": 2.3615, + "step": 33880 + }, + { + "epoch": 1.43, + "learning_rate": 5.802967813257366e-06, + "loss": 2.2254, + "step": 33890 + }, + { + "epoch": 1.43, + "learning_rate": 5.798715931799822e-06, + "loss": 1.9964, + "step": 33900 + }, + { + "epoch": 1.43, + "learning_rate": 5.794464050342277e-06, + "loss": 2.2679, + "step": 33910 + }, + { + "epoch": 1.43, + "learning_rate": 5.790212168884732e-06, + "loss": 2.2388, + "step": 33920 + }, + { + "epoch": 1.43, + "learning_rate": 5.785960287427187e-06, + "loss": 1.9743, + "step": 33930 + }, + { + "epoch": 1.43, + "learning_rate": 5.781708405969642e-06, + "loss": 2.4368, + "step": 33940 + }, + { + "epoch": 1.43, + "learning_rate": 5.777456524512097e-06, + "loss": 1.5072, + "step": 33950 + }, + { + "epoch": 1.43, + "learning_rate": 5.7732046430545515e-06, + "loss": 2.0864, + "step": 33960 + }, + { + "epoch": 1.43, + "learning_rate": 5.768952761597008e-06, + "loss": 2.4148, + "step": 33970 + }, + { + "epoch": 1.43, + "learning_rate": 5.764700880139462e-06, + "loss": 1.8377, + "step": 33980 + }, + { + "epoch": 1.43, + "learning_rate": 5.760448998681917e-06, + "loss": 2.3857, + "step": 33990 + }, + { + "epoch": 1.43, + "learning_rate": 5.756197117224373e-06, + "loss": 2.2245, + "step": 34000 + }, + { + "epoch": 1.43, + "eval_loss": 1.7518954277038574, + "eval_runtime": 175.8609, + "eval_samples_per_second": 14.233, + "eval_steps_per_second": 7.119, + "step": 34000 + }, + { + "epoch": 1.43, + "learning_rate": 5.751945235766827e-06, + "loss": 1.9569, + "step": 34010 + }, + { + "epoch": 1.43, + "learning_rate": 5.7476933543092825e-06, + "loss": 2.3212, + "step": 34020 + }, + { + "epoch": 1.43, + "learning_rate": 5.743441472851737e-06, + "loss": 1.8699, + "step": 34030 + }, + { + "epoch": 1.43, + "learning_rate": 5.739189591394192e-06, + "loss": 2.2364, + "step": 34040 + }, + { + "epoch": 1.43, + "learning_rate": 5.734937709936648e-06, + "loss": 1.7592, + "step": 34050 + }, + { + "epoch": 1.43, + "learning_rate": 5.730685828479102e-06, + "loss": 2.54, + "step": 34060 + }, + { + "epoch": 1.43, + "learning_rate": 5.726433947021558e-06, + "loss": 1.9775, + "step": 34070 + }, + { + "epoch": 1.43, + "learning_rate": 5.722182065564013e-06, + "loss": 1.7832, + "step": 34080 + }, + { + "epoch": 1.43, + "learning_rate": 5.717930184106467e-06, + "loss": 1.8524, + "step": 34090 + }, + { + "epoch": 1.43, + "learning_rate": 5.713678302648923e-06, + "loss": 2.1769, + "step": 34100 + }, + { + "epoch": 1.44, + "learning_rate": 5.709426421191378e-06, + "loss": 1.97, + "step": 34110 + }, + { + "epoch": 1.44, + "learning_rate": 5.705174539733833e-06, + "loss": 2.0002, + "step": 34120 + }, + { + "epoch": 1.44, + "learning_rate": 5.7009226582762875e-06, + "loss": 2.2234, + "step": 34130 + }, + { + "epoch": 1.44, + "learning_rate": 5.696670776818743e-06, + "loss": 2.2216, + "step": 34140 + }, + { + "epoch": 1.44, + "learning_rate": 5.692418895361198e-06, + "loss": 2.0222, + "step": 34150 + }, + { + "epoch": 1.44, + "learning_rate": 5.6881670139036526e-06, + "loss": 2.3931, + "step": 34160 + }, + { + "epoch": 1.44, + "learning_rate": 5.683915132446109e-06, + "loss": 2.0872, + "step": 34170 + }, + { + "epoch": 1.44, + "learning_rate": 5.679663250988563e-06, + "loss": 1.8038, + "step": 34180 + }, + { + "epoch": 1.44, + "learning_rate": 5.675411369531018e-06, + "loss": 1.7638, + "step": 34190 + }, + { + "epoch": 1.44, + "learning_rate": 5.671159488073473e-06, + "loss": 1.9803, + "step": 34200 + }, + { + "epoch": 1.44, + "learning_rate": 5.666907606615928e-06, + "loss": 2.4514, + "step": 34210 + }, + { + "epoch": 1.44, + "learning_rate": 5.6626557251583835e-06, + "loss": 2.0363, + "step": 34220 + }, + { + "epoch": 1.44, + "learning_rate": 5.658403843700838e-06, + "loss": 2.0239, + "step": 34230 + }, + { + "epoch": 1.44, + "learning_rate": 5.6541519622432925e-06, + "loss": 2.4211, + "step": 34240 + }, + { + "epoch": 1.44, + "learning_rate": 5.649900080785749e-06, + "loss": 1.9727, + "step": 34250 + }, + { + "epoch": 1.44, + "learning_rate": 5.645648199328203e-06, + "loss": 2.3525, + "step": 34260 + }, + { + "epoch": 1.44, + "learning_rate": 5.641396317870658e-06, + "loss": 2.1225, + "step": 34270 + }, + { + "epoch": 1.44, + "learning_rate": 5.637144436413114e-06, + "loss": 1.8737, + "step": 34280 + }, + { + "epoch": 1.44, + "learning_rate": 5.632892554955568e-06, + "loss": 2.1636, + "step": 34290 + }, + { + "epoch": 1.44, + "learning_rate": 5.6286406734980234e-06, + "loss": 2.3626, + "step": 34300 + }, + { + "epoch": 1.44, + "learning_rate": 5.624388792040478e-06, + "loss": 1.9545, + "step": 34310 + }, + { + "epoch": 1.44, + "learning_rate": 5.620136910582933e-06, + "loss": 2.0651, + "step": 34320 + }, + { + "epoch": 1.44, + "learning_rate": 5.6158850291253885e-06, + "loss": 1.6739, + "step": 34330 + }, + { + "epoch": 1.44, + "learning_rate": 5.611633147667843e-06, + "loss": 2.0615, + "step": 34340 + }, + { + "epoch": 1.45, + "learning_rate": 5.607381266210299e-06, + "loss": 1.6146, + "step": 34350 + }, + { + "epoch": 1.45, + "learning_rate": 5.603129384752754e-06, + "loss": 2.1332, + "step": 34360 + }, + { + "epoch": 1.45, + "learning_rate": 5.598877503295208e-06, + "loss": 1.9554, + "step": 34370 + }, + { + "epoch": 1.45, + "learning_rate": 5.594625621837664e-06, + "loss": 2.0376, + "step": 34380 + }, + { + "epoch": 1.45, + "learning_rate": 5.590373740380119e-06, + "loss": 2.5308, + "step": 34390 + }, + { + "epoch": 1.45, + "learning_rate": 5.586121858922574e-06, + "loss": 2.0548, + "step": 34400 + }, + { + "epoch": 1.45, + "learning_rate": 5.581869977465028e-06, + "loss": 1.9477, + "step": 34410 + }, + { + "epoch": 1.45, + "learning_rate": 5.577618096007483e-06, + "loss": 2.1934, + "step": 34420 + }, + { + "epoch": 1.45, + "learning_rate": 5.573366214549939e-06, + "loss": 2.3392, + "step": 34430 + }, + { + "epoch": 1.45, + "learning_rate": 5.5691143330923935e-06, + "loss": 2.3634, + "step": 34440 + }, + { + "epoch": 1.45, + "learning_rate": 5.56486245163485e-06, + "loss": 1.9666, + "step": 34450 + }, + { + "epoch": 1.45, + "learning_rate": 5.560610570177304e-06, + "loss": 2.2298, + "step": 34460 + }, + { + "epoch": 1.45, + "learning_rate": 5.5563586887197585e-06, + "loss": 1.9168, + "step": 34470 + }, + { + "epoch": 1.45, + "learning_rate": 5.552106807262214e-06, + "loss": 1.6325, + "step": 34480 + }, + { + "epoch": 1.45, + "learning_rate": 5.547854925804669e-06, + "loss": 2.262, + "step": 34490 + }, + { + "epoch": 1.45, + "learning_rate": 5.5436030443471245e-06, + "loss": 1.6855, + "step": 34500 + }, + { + "epoch": 1.45, + "eval_loss": 1.7510865926742554, + "eval_runtime": 176.082, + "eval_samples_per_second": 14.215, + "eval_steps_per_second": 7.11, + "step": 34500 + }, + { + "epoch": 1.45, + "learning_rate": 5.539351162889579e-06, + "loss": 2.4015, + "step": 34510 + }, + { + "epoch": 1.45, + "learning_rate": 5.535099281432033e-06, + "loss": 2.3955, + "step": 34520 + }, + { + "epoch": 1.45, + "learning_rate": 5.5308473999744895e-06, + "loss": 2.1583, + "step": 34530 + }, + { + "epoch": 1.45, + "learning_rate": 5.526595518516944e-06, + "loss": 2.2425, + "step": 34540 + }, + { + "epoch": 1.45, + "learning_rate": 5.522343637059399e-06, + "loss": 2.2618, + "step": 34550 + }, + { + "epoch": 1.45, + "learning_rate": 5.518091755601855e-06, + "loss": 1.7956, + "step": 34560 + }, + { + "epoch": 1.45, + "learning_rate": 5.513839874144309e-06, + "loss": 2.1824, + "step": 34570 + }, + { + "epoch": 1.45, + "learning_rate": 5.509587992686764e-06, + "loss": 2.0588, + "step": 34580 + }, + { + "epoch": 1.46, + "learning_rate": 5.505336111229219e-06, + "loss": 2.1407, + "step": 34590 + }, + { + "epoch": 1.46, + "learning_rate": 5.501084229771675e-06, + "loss": 1.9659, + "step": 34600 + }, + { + "epoch": 1.46, + "learning_rate": 5.4968323483141294e-06, + "loss": 2.2308, + "step": 34610 + }, + { + "epoch": 1.46, + "learning_rate": 5.492580466856584e-06, + "loss": 2.0814, + "step": 34620 + }, + { + "epoch": 1.46, + "learning_rate": 5.48832858539904e-06, + "loss": 2.0365, + "step": 34630 + }, + { + "epoch": 1.46, + "learning_rate": 5.4840767039414945e-06, + "loss": 2.7061, + "step": 34640 + }, + { + "epoch": 1.46, + "learning_rate": 5.47982482248395e-06, + "loss": 2.011, + "step": 34650 + }, + { + "epoch": 1.46, + "learning_rate": 5.475572941026404e-06, + "loss": 2.0643, + "step": 34660 + }, + { + "epoch": 1.46, + "learning_rate": 5.4713210595688596e-06, + "loss": 1.9561, + "step": 34670 + }, + { + "epoch": 1.46, + "learning_rate": 5.467069178111315e-06, + "loss": 2.2013, + "step": 34680 + }, + { + "epoch": 1.46, + "learning_rate": 5.462817296653769e-06, + "loss": 2.221, + "step": 34690 + }, + { + "epoch": 1.46, + "learning_rate": 5.4585654151962255e-06, + "loss": 2.2442, + "step": 34700 + }, + { + "epoch": 1.46, + "learning_rate": 5.45431353373868e-06, + "loss": 2.303, + "step": 34710 + }, + { + "epoch": 1.46, + "learning_rate": 5.450061652281134e-06, + "loss": 2.4457, + "step": 34720 + }, + { + "epoch": 1.46, + "learning_rate": 5.4458097708235906e-06, + "loss": 2.1348, + "step": 34730 + }, + { + "epoch": 1.46, + "learning_rate": 5.441557889366045e-06, + "loss": 2.097, + "step": 34740 + }, + { + "epoch": 1.46, + "learning_rate": 5.4373060079085e-06, + "loss": 2.2935, + "step": 34750 + }, + { + "epoch": 1.46, + "learning_rate": 5.433054126450955e-06, + "loss": 2.3818, + "step": 34760 + }, + { + "epoch": 1.46, + "learning_rate": 5.42880224499341e-06, + "loss": 2.4383, + "step": 34770 + }, + { + "epoch": 1.46, + "learning_rate": 5.424550363535865e-06, + "loss": 2.2461, + "step": 34780 + }, + { + "epoch": 1.46, + "learning_rate": 5.42029848207832e-06, + "loss": 2.4632, + "step": 34790 + }, + { + "epoch": 1.46, + "learning_rate": 5.416046600620774e-06, + "loss": 2.1457, + "step": 34800 + }, + { + "epoch": 1.46, + "learning_rate": 5.4117947191632305e-06, + "loss": 2.0426, + "step": 34810 + }, + { + "epoch": 1.46, + "learning_rate": 5.407542837705685e-06, + "loss": 2.1143, + "step": 34820 + }, + { + "epoch": 1.47, + "learning_rate": 5.40329095624814e-06, + "loss": 2.0542, + "step": 34830 + }, + { + "epoch": 1.47, + "learning_rate": 5.3990390747905955e-06, + "loss": 1.601, + "step": 34840 + }, + { + "epoch": 1.47, + "learning_rate": 5.39478719333305e-06, + "loss": 1.88, + "step": 34850 + }, + { + "epoch": 1.47, + "learning_rate": 5.390535311875505e-06, + "loss": 1.8141, + "step": 34860 + }, + { + "epoch": 1.47, + "learning_rate": 5.38628343041796e-06, + "loss": 1.9591, + "step": 34870 + }, + { + "epoch": 1.47, + "learning_rate": 5.382031548960416e-06, + "loss": 2.2129, + "step": 34880 + }, + { + "epoch": 1.47, + "learning_rate": 5.37777966750287e-06, + "loss": 2.1054, + "step": 34890 + }, + { + "epoch": 1.47, + "learning_rate": 5.373527786045325e-06, + "loss": 2.0846, + "step": 34900 + }, + { + "epoch": 1.47, + "learning_rate": 5.369275904587781e-06, + "loss": 2.2735, + "step": 34910 + }, + { + "epoch": 1.47, + "learning_rate": 5.3650240231302354e-06, + "loss": 1.8749, + "step": 34920 + }, + { + "epoch": 1.47, + "learning_rate": 5.360772141672691e-06, + "loss": 2.1951, + "step": 34930 + }, + { + "epoch": 1.47, + "learning_rate": 5.356520260215145e-06, + "loss": 2.1941, + "step": 34940 + }, + { + "epoch": 1.47, + "learning_rate": 5.3522683787576005e-06, + "loss": 2.3601, + "step": 34950 + }, + { + "epoch": 1.47, + "learning_rate": 5.348016497300056e-06, + "loss": 1.3729, + "step": 34960 + }, + { + "epoch": 1.47, + "learning_rate": 5.34376461584251e-06, + "loss": 2.1726, + "step": 34970 + }, + { + "epoch": 1.47, + "learning_rate": 5.339512734384966e-06, + "loss": 1.9625, + "step": 34980 + }, + { + "epoch": 1.47, + "learning_rate": 5.335260852927421e-06, + "loss": 1.9156, + "step": 34990 + }, + { + "epoch": 1.47, + "learning_rate": 5.331008971469875e-06, + "loss": 2.1513, + "step": 35000 + }, + { + "epoch": 1.47, + "eval_loss": 1.7616750001907349, + "eval_runtime": 175.2829, + "eval_samples_per_second": 14.28, + "eval_steps_per_second": 7.143, + "step": 35000 + }, + { + "epoch": 1.47, + "learning_rate": 5.3267570900123315e-06, + "loss": 2.07, + "step": 35010 + }, + { + "epoch": 1.47, + "learning_rate": 5.322505208554786e-06, + "loss": 2.4348, + "step": 35020 + }, + { + "epoch": 1.47, + "learning_rate": 5.318253327097241e-06, + "loss": 2.5164, + "step": 35030 + }, + { + "epoch": 1.47, + "learning_rate": 5.314001445639696e-06, + "loss": 2.2986, + "step": 35040 + }, + { + "epoch": 1.47, + "learning_rate": 5.309749564182151e-06, + "loss": 2.2002, + "step": 35050 + }, + { + "epoch": 1.48, + "learning_rate": 5.305497682724606e-06, + "loss": 2.5061, + "step": 35060 + }, + { + "epoch": 1.48, + "learning_rate": 5.301245801267061e-06, + "loss": 2.4472, + "step": 35070 + }, + { + "epoch": 1.48, + "learning_rate": 5.296993919809517e-06, + "loss": 2.0103, + "step": 35080 + }, + { + "epoch": 1.48, + "learning_rate": 5.292742038351971e-06, + "loss": 2.0856, + "step": 35090 + }, + { + "epoch": 1.48, + "learning_rate": 5.288490156894426e-06, + "loss": 2.0458, + "step": 35100 + }, + { + "epoch": 1.48, + "learning_rate": 5.284238275436881e-06, + "loss": 2.1446, + "step": 35110 + }, + { + "epoch": 1.48, + "learning_rate": 5.2799863939793365e-06, + "loss": 1.7831, + "step": 35120 + }, + { + "epoch": 1.48, + "learning_rate": 5.275734512521792e-06, + "loss": 2.1475, + "step": 35130 + }, + { + "epoch": 1.48, + "learning_rate": 5.271482631064246e-06, + "loss": 2.0982, + "step": 35140 + }, + { + "epoch": 1.48, + "learning_rate": 5.267230749606701e-06, + "loss": 1.8416, + "step": 35150 + }, + { + "epoch": 1.48, + "learning_rate": 5.262978868149157e-06, + "loss": 1.9284, + "step": 35160 + }, + { + "epoch": 1.48, + "learning_rate": 5.258726986691611e-06, + "loss": 2.3603, + "step": 35170 + }, + { + "epoch": 1.48, + "learning_rate": 5.254475105234067e-06, + "loss": 2.4798, + "step": 35180 + }, + { + "epoch": 1.48, + "learning_rate": 5.250223223776522e-06, + "loss": 2.2247, + "step": 35190 + }, + { + "epoch": 1.48, + "learning_rate": 5.245971342318976e-06, + "loss": 2.3189, + "step": 35200 + }, + { + "epoch": 1.48, + "learning_rate": 5.241719460861432e-06, + "loss": 2.1824, + "step": 35210 + }, + { + "epoch": 1.48, + "learning_rate": 5.237467579403886e-06, + "loss": 2.3432, + "step": 35220 + }, + { + "epoch": 1.48, + "learning_rate": 5.233215697946342e-06, + "loss": 2.0543, + "step": 35230 + }, + { + "epoch": 1.48, + "learning_rate": 5.228963816488797e-06, + "loss": 1.8353, + "step": 35240 + }, + { + "epoch": 1.48, + "learning_rate": 5.224711935031251e-06, + "loss": 2.5007, + "step": 35250 + }, + { + "epoch": 1.48, + "learning_rate": 5.220460053573707e-06, + "loss": 2.0003, + "step": 35260 + }, + { + "epoch": 1.48, + "learning_rate": 5.216208172116162e-06, + "loss": 2.3392, + "step": 35270 + }, + { + "epoch": 1.48, + "learning_rate": 5.211956290658617e-06, + "loss": 2.0539, + "step": 35280 + }, + { + "epoch": 1.48, + "learning_rate": 5.207704409201072e-06, + "loss": 2.1073, + "step": 35290 + }, + { + "epoch": 1.49, + "learning_rate": 5.203452527743527e-06, + "loss": 2.4152, + "step": 35300 + }, + { + "epoch": 1.49, + "learning_rate": 5.199200646285982e-06, + "loss": 1.3909, + "step": 35310 + }, + { + "epoch": 1.49, + "learning_rate": 5.194948764828437e-06, + "loss": 2.1223, + "step": 35320 + }, + { + "epoch": 1.49, + "learning_rate": 5.190696883370891e-06, + "loss": 2.0514, + "step": 35330 + }, + { + "epoch": 1.49, + "learning_rate": 5.186445001913347e-06, + "loss": 2.0323, + "step": 35340 + }, + { + "epoch": 1.49, + "learning_rate": 5.182193120455802e-06, + "loss": 2.138, + "step": 35350 + }, + { + "epoch": 1.49, + "learning_rate": 5.177941238998258e-06, + "loss": 2.0379, + "step": 35360 + }, + { + "epoch": 1.49, + "learning_rate": 5.173689357540712e-06, + "loss": 2.1295, + "step": 35370 + }, + { + "epoch": 1.49, + "learning_rate": 5.169437476083167e-06, + "loss": 1.7035, + "step": 35380 + }, + { + "epoch": 1.49, + "learning_rate": 5.165185594625622e-06, + "loss": 2.0688, + "step": 35390 + }, + { + "epoch": 1.49, + "learning_rate": 5.160933713168077e-06, + "loss": 2.0947, + "step": 35400 + }, + { + "epoch": 1.49, + "learning_rate": 5.156681831710533e-06, + "loss": 1.8796, + "step": 35410 + }, + { + "epoch": 1.49, + "learning_rate": 5.152429950252987e-06, + "loss": 2.1555, + "step": 35420 + }, + { + "epoch": 1.49, + "learning_rate": 5.148178068795442e-06, + "loss": 2.1799, + "step": 35430 + }, + { + "epoch": 1.49, + "learning_rate": 5.143926187337898e-06, + "loss": 2.5292, + "step": 35440 + }, + { + "epoch": 1.49, + "learning_rate": 5.139674305880352e-06, + "loss": 2.3218, + "step": 35450 + }, + { + "epoch": 1.49, + "learning_rate": 5.1354224244228075e-06, + "loss": 1.4921, + "step": 35460 + }, + { + "epoch": 1.49, + "learning_rate": 5.131170542965263e-06, + "loss": 2.1965, + "step": 35470 + }, + { + "epoch": 1.49, + "learning_rate": 5.126918661507717e-06, + "loss": 2.4084, + "step": 35480 + }, + { + "epoch": 1.49, + "learning_rate": 5.122666780050173e-06, + "loss": 2.4196, + "step": 35490 + }, + { + "epoch": 1.49, + "learning_rate": 5.118414898592627e-06, + "loss": 1.9576, + "step": 35500 + }, + { + "epoch": 1.49, + "eval_loss": 1.747657299041748, + "eval_runtime": 176.1258, + "eval_samples_per_second": 14.211, + "eval_steps_per_second": 7.109, + "step": 35500 + }, + { + "epoch": 1.49, + "learning_rate": 5.114163017135083e-06, + "loss": 2.3121, + "step": 35510 + }, + { + "epoch": 1.49, + "learning_rate": 5.109911135677538e-06, + "loss": 2.4393, + "step": 35520 + }, + { + "epoch": 1.49, + "learning_rate": 5.105659254219992e-06, + "loss": 2.4803, + "step": 35530 + }, + { + "epoch": 1.5, + "learning_rate": 5.101407372762448e-06, + "loss": 2.0533, + "step": 35540 + }, + { + "epoch": 1.5, + "learning_rate": 5.097155491304903e-06, + "loss": 2.0527, + "step": 35550 + }, + { + "epoch": 1.5, + "learning_rate": 5.092903609847358e-06, + "loss": 2.2304, + "step": 35560 + }, + { + "epoch": 1.5, + "learning_rate": 5.0886517283898125e-06, + "loss": 2.1561, + "step": 35570 + }, + { + "epoch": 1.5, + "learning_rate": 5.084399846932268e-06, + "loss": 2.2062, + "step": 35580 + }, + { + "epoch": 1.5, + "learning_rate": 5.080147965474723e-06, + "loss": 2.0185, + "step": 35590 + }, + { + "epoch": 1.5, + "learning_rate": 5.0758960840171776e-06, + "loss": 2.4212, + "step": 35600 + }, + { + "epoch": 1.5, + "learning_rate": 5.071644202559634e-06, + "loss": 2.05, + "step": 35610 + }, + { + "epoch": 1.5, + "learning_rate": 5.067392321102088e-06, + "loss": 1.958, + "step": 35620 + }, + { + "epoch": 1.5, + "learning_rate": 5.063140439644543e-06, + "loss": 2.0309, + "step": 35630 + }, + { + "epoch": 1.5, + "learning_rate": 5.058888558186999e-06, + "loss": 1.5439, + "step": 35640 + }, + { + "epoch": 1.5, + "learning_rate": 5.054636676729453e-06, + "loss": 2.072, + "step": 35650 + }, + { + "epoch": 1.5, + "learning_rate": 5.0503847952719086e-06, + "loss": 2.22, + "step": 35660 + }, + { + "epoch": 1.5, + "learning_rate": 5.046132913814363e-06, + "loss": 1.9343, + "step": 35670 + }, + { + "epoch": 1.5, + "learning_rate": 5.041881032356818e-06, + "loss": 2.2888, + "step": 35680 + }, + { + "epoch": 1.5, + "learning_rate": 5.037629150899274e-06, + "loss": 1.9664, + "step": 35690 + }, + { + "epoch": 1.5, + "learning_rate": 5.033377269441728e-06, + "loss": 2.4235, + "step": 35700 + }, + { + "epoch": 1.5, + "learning_rate": 5.029125387984184e-06, + "loss": 2.2587, + "step": 35710 + }, + { + "epoch": 1.5, + "learning_rate": 5.024873506526639e-06, + "loss": 2.1917, + "step": 35720 + }, + { + "epoch": 1.5, + "learning_rate": 5.020621625069093e-06, + "loss": 2.0016, + "step": 35730 + }, + { + "epoch": 1.5, + "learning_rate": 5.0163697436115485e-06, + "loss": 2.4665, + "step": 35740 + }, + { + "epoch": 1.5, + "learning_rate": 5.012117862154004e-06, + "loss": 2.1793, + "step": 35750 + }, + { + "epoch": 1.5, + "learning_rate": 5.007865980696459e-06, + "loss": 2.0033, + "step": 35760 + }, + { + "epoch": 1.5, + "learning_rate": 5.0036140992389135e-06, + "loss": 1.8352, + "step": 35770 + }, + { + "epoch": 1.51, + "learning_rate": 4.999362217781369e-06, + "loss": 2.1996, + "step": 35780 + }, + { + "epoch": 1.51, + "learning_rate": 4.995110336323823e-06, + "loss": 1.9625, + "step": 35790 + }, + { + "epoch": 1.51, + "learning_rate": 4.990858454866279e-06, + "loss": 1.7395, + "step": 35800 + }, + { + "epoch": 1.51, + "learning_rate": 4.986606573408734e-06, + "loss": 2.5111, + "step": 35810 + }, + { + "epoch": 1.51, + "learning_rate": 4.982354691951189e-06, + "loss": 2.0511, + "step": 35820 + }, + { + "epoch": 1.51, + "learning_rate": 4.978102810493644e-06, + "loss": 2.3589, + "step": 35830 + }, + { + "epoch": 1.51, + "learning_rate": 4.973850929036099e-06, + "loss": 1.9662, + "step": 35840 + }, + { + "epoch": 1.51, + "learning_rate": 4.9695990475785534e-06, + "loss": 1.9488, + "step": 35850 + }, + { + "epoch": 1.51, + "learning_rate": 4.965347166121009e-06, + "loss": 2.0706, + "step": 35860 + }, + { + "epoch": 1.51, + "learning_rate": 4.961095284663464e-06, + "loss": 2.1325, + "step": 35870 + }, + { + "epoch": 1.51, + "learning_rate": 4.956843403205919e-06, + "loss": 1.6996, + "step": 35880 + }, + { + "epoch": 1.51, + "learning_rate": 4.952591521748374e-06, + "loss": 2.2667, + "step": 35890 + }, + { + "epoch": 1.51, + "learning_rate": 4.948339640290829e-06, + "loss": 1.6537, + "step": 35900 + }, + { + "epoch": 1.51, + "learning_rate": 4.944087758833284e-06, + "loss": 2.1472, + "step": 35910 + }, + { + "epoch": 1.51, + "learning_rate": 4.93983587737574e-06, + "loss": 1.527, + "step": 35920 + }, + { + "epoch": 1.51, + "learning_rate": 4.935583995918194e-06, + "loss": 2.4018, + "step": 35930 + }, + { + "epoch": 1.51, + "learning_rate": 4.9313321144606495e-06, + "loss": 2.0018, + "step": 35940 + }, + { + "epoch": 1.51, + "learning_rate": 4.927080233003104e-06, + "loss": 1.7936, + "step": 35950 + }, + { + "epoch": 1.51, + "learning_rate": 4.922828351545559e-06, + "loss": 2.4642, + "step": 35960 + }, + { + "epoch": 1.51, + "learning_rate": 4.9185764700880146e-06, + "loss": 2.4036, + "step": 35970 + }, + { + "epoch": 1.51, + "learning_rate": 4.914324588630469e-06, + "loss": 1.8644, + "step": 35980 + }, + { + "epoch": 1.51, + "learning_rate": 4.910072707172924e-06, + "loss": 2.0751, + "step": 35990 + }, + { + "epoch": 1.51, + "learning_rate": 4.90582082571538e-06, + "loss": 2.0943, + "step": 36000 + }, + { + "epoch": 1.51, + "eval_loss": 1.7490991353988647, + "eval_runtime": 175.4591, + "eval_samples_per_second": 14.265, + "eval_steps_per_second": 7.136, + "step": 36000 + }, + { + "epoch": 1.51, + "learning_rate": 4.901568944257835e-06, + "loss": 2.0113, + "step": 36010 + }, + { + "epoch": 1.52, + "learning_rate": 4.897317062800289e-06, + "loss": 1.8348, + "step": 36020 + }, + { + "epoch": 1.52, + "learning_rate": 4.893065181342745e-06, + "loss": 2.6659, + "step": 36030 + }, + { + "epoch": 1.52, + "learning_rate": 4.888813299885199e-06, + "loss": 2.4203, + "step": 36040 + }, + { + "epoch": 1.52, + "learning_rate": 4.8845614184276545e-06, + "loss": 2.3977, + "step": 36050 + }, + { + "epoch": 1.52, + "learning_rate": 4.88030953697011e-06, + "loss": 2.0347, + "step": 36060 + }, + { + "epoch": 1.52, + "learning_rate": 4.876057655512565e-06, + "loss": 1.9492, + "step": 36070 + }, + { + "epoch": 1.52, + "learning_rate": 4.8718057740550195e-06, + "loss": 1.8611, + "step": 36080 + }, + { + "epoch": 1.52, + "learning_rate": 4.867553892597475e-06, + "loss": 1.6959, + "step": 36090 + }, + { + "epoch": 1.52, + "learning_rate": 4.86330201113993e-06, + "loss": 1.7844, + "step": 36100 + }, + { + "epoch": 1.52, + "learning_rate": 4.859050129682385e-06, + "loss": 2.132, + "step": 36110 + }, + { + "epoch": 1.52, + "learning_rate": 4.85479824822484e-06, + "loss": 2.2562, + "step": 36120 + }, + { + "epoch": 1.52, + "learning_rate": 4.850546366767294e-06, + "loss": 1.8299, + "step": 36130 + }, + { + "epoch": 1.52, + "learning_rate": 4.84629448530975e-06, + "loss": 2.1926, + "step": 36140 + }, + { + "epoch": 1.52, + "learning_rate": 4.842042603852205e-06, + "loss": 1.9032, + "step": 36150 + }, + { + "epoch": 1.52, + "learning_rate": 4.83779072239466e-06, + "loss": 2.3041, + "step": 36160 + }, + { + "epoch": 1.52, + "learning_rate": 4.833538840937116e-06, + "loss": 2.111, + "step": 36170 + }, + { + "epoch": 1.52, + "learning_rate": 4.82928695947957e-06, + "loss": 2.187, + "step": 36180 + }, + { + "epoch": 1.52, + "learning_rate": 4.825035078022025e-06, + "loss": 2.0693, + "step": 36190 + }, + { + "epoch": 1.52, + "learning_rate": 4.820783196564481e-06, + "loss": 1.9847, + "step": 36200 + }, + { + "epoch": 1.52, + "learning_rate": 4.816531315106935e-06, + "loss": 2.1018, + "step": 36210 + }, + { + "epoch": 1.52, + "learning_rate": 4.8122794336493896e-06, + "loss": 2.1058, + "step": 36220 + }, + { + "epoch": 1.52, + "learning_rate": 4.808027552191845e-06, + "loss": 1.9707, + "step": 36230 + }, + { + "epoch": 1.52, + "learning_rate": 4.8037756707343e-06, + "loss": 1.8293, + "step": 36240 + }, + { + "epoch": 1.53, + "learning_rate": 4.7995237892767555e-06, + "loss": 2.3156, + "step": 36250 + }, + { + "epoch": 1.53, + "learning_rate": 4.795271907819211e-06, + "loss": 1.685, + "step": 36260 + }, + { + "epoch": 1.53, + "learning_rate": 4.791020026361665e-06, + "loss": 1.6848, + "step": 36270 + }, + { + "epoch": 1.53, + "learning_rate": 4.7867681449041206e-06, + "loss": 1.7966, + "step": 36280 + }, + { + "epoch": 1.53, + "learning_rate": 4.782516263446576e-06, + "loss": 2.0305, + "step": 36290 + }, + { + "epoch": 1.53, + "learning_rate": 4.77826438198903e-06, + "loss": 1.9382, + "step": 36300 + }, + { + "epoch": 1.53, + "learning_rate": 4.774012500531486e-06, + "loss": 2.0167, + "step": 36310 + }, + { + "epoch": 1.53, + "learning_rate": 4.76976061907394e-06, + "loss": 1.9864, + "step": 36320 + }, + { + "epoch": 1.53, + "learning_rate": 4.765508737616395e-06, + "loss": 2.4167, + "step": 36330 + }, + { + "epoch": 1.53, + "learning_rate": 4.761256856158851e-06, + "loss": 2.4879, + "step": 36340 + }, + { + "epoch": 1.53, + "learning_rate": 4.757004974701306e-06, + "loss": 1.9834, + "step": 36350 + }, + { + "epoch": 1.53, + "learning_rate": 4.752753093243761e-06, + "loss": 2.2833, + "step": 36360 + }, + { + "epoch": 1.53, + "learning_rate": 4.748501211786216e-06, + "loss": 1.9415, + "step": 36370 + }, + { + "epoch": 1.53, + "learning_rate": 4.744249330328671e-06, + "loss": 2.3043, + "step": 36380 + }, + { + "epoch": 1.53, + "learning_rate": 4.7399974488711255e-06, + "loss": 1.8226, + "step": 36390 + }, + { + "epoch": 1.53, + "learning_rate": 4.735745567413581e-06, + "loss": 2.1448, + "step": 36400 + }, + { + "epoch": 1.53, + "learning_rate": 4.731493685956036e-06, + "loss": 2.5569, + "step": 36410 + }, + { + "epoch": 1.53, + "learning_rate": 4.727241804498491e-06, + "loss": 2.1507, + "step": 36420 + }, + { + "epoch": 1.53, + "learning_rate": 4.722989923040946e-06, + "loss": 1.9932, + "step": 36430 + }, + { + "epoch": 1.53, + "learning_rate": 4.718738041583401e-06, + "loss": 2.0433, + "step": 36440 + }, + { + "epoch": 1.53, + "learning_rate": 4.7144861601258565e-06, + "loss": 2.4259, + "step": 36450 + }, + { + "epoch": 1.53, + "learning_rate": 4.710234278668311e-06, + "loss": 2.389, + "step": 36460 + }, + { + "epoch": 1.53, + "learning_rate": 4.705982397210766e-06, + "loss": 2.0994, + "step": 36470 + }, + { + "epoch": 1.53, + "learning_rate": 4.701730515753221e-06, + "loss": 2.5288, + "step": 36480 + }, + { + "epoch": 1.54, + "learning_rate": 4.697478634295676e-06, + "loss": 2.4069, + "step": 36490 + }, + { + "epoch": 1.54, + "learning_rate": 4.693226752838131e-06, + "loss": 1.669, + "step": 36500 + }, + { + "epoch": 1.54, + "eval_loss": 1.754565954208374, + "eval_runtime": 175.7316, + "eval_samples_per_second": 14.243, + "eval_steps_per_second": 7.125, + "step": 36500 + }, + { + "epoch": 1.54, + "learning_rate": 4.688974871380586e-06, + "loss": 2.0982, + "step": 36510 + }, + { + "epoch": 1.54, + "learning_rate": 4.684722989923041e-06, + "loss": 2.4972, + "step": 36520 + }, + { + "epoch": 1.54, + "learning_rate": 4.680471108465496e-06, + "loss": 2.5963, + "step": 36530 + }, + { + "epoch": 1.54, + "learning_rate": 4.676219227007952e-06, + "loss": 2.0617, + "step": 36540 + }, + { + "epoch": 1.54, + "learning_rate": 4.671967345550407e-06, + "loss": 1.9341, + "step": 36550 + }, + { + "epoch": 1.54, + "learning_rate": 4.6677154640928615e-06, + "loss": 2.0048, + "step": 36560 + }, + { + "epoch": 1.54, + "learning_rate": 4.663463582635317e-06, + "loss": 2.2699, + "step": 36570 + }, + { + "epoch": 1.54, + "learning_rate": 4.659211701177771e-06, + "loss": 1.9895, + "step": 36580 + }, + { + "epoch": 1.54, + "learning_rate": 4.6549598197202265e-06, + "loss": 2.3447, + "step": 36590 + }, + { + "epoch": 1.54, + "learning_rate": 4.650707938262682e-06, + "loss": 1.969, + "step": 36600 + }, + { + "epoch": 1.54, + "learning_rate": 4.646456056805136e-06, + "loss": 2.2418, + "step": 36610 + }, + { + "epoch": 1.54, + "learning_rate": 4.642204175347592e-06, + "loss": 2.2551, + "step": 36620 + }, + { + "epoch": 1.54, + "learning_rate": 4.637952293890047e-06, + "loss": 1.6052, + "step": 36630 + }, + { + "epoch": 1.54, + "learning_rate": 4.633700412432502e-06, + "loss": 1.8645, + "step": 36640 + }, + { + "epoch": 1.54, + "learning_rate": 4.629448530974957e-06, + "loss": 2.3318, + "step": 36650 + }, + { + "epoch": 1.54, + "learning_rate": 4.625196649517412e-06, + "loss": 2.1373, + "step": 36660 + }, + { + "epoch": 1.54, + "learning_rate": 4.6209447680598665e-06, + "loss": 2.3078, + "step": 36670 + }, + { + "epoch": 1.54, + "learning_rate": 4.616692886602322e-06, + "loss": 2.3501, + "step": 36680 + }, + { + "epoch": 1.54, + "learning_rate": 4.612441005144777e-06, + "loss": 1.6993, + "step": 36690 + }, + { + "epoch": 1.54, + "learning_rate": 4.6081891236872315e-06, + "loss": 1.8396, + "step": 36700 + }, + { + "epoch": 1.54, + "learning_rate": 4.603937242229687e-06, + "loss": 2.2297, + "step": 36710 + }, + { + "epoch": 1.54, + "learning_rate": 4.599685360772142e-06, + "loss": 1.9559, + "step": 36720 + }, + { + "epoch": 1.55, + "learning_rate": 4.5954334793145974e-06, + "loss": 2.153, + "step": 36730 + }, + { + "epoch": 1.55, + "learning_rate": 4.591181597857052e-06, + "loss": 2.2837, + "step": 36740 + }, + { + "epoch": 1.55, + "learning_rate": 4.586929716399507e-06, + "loss": 1.6944, + "step": 36750 + }, + { + "epoch": 1.55, + "learning_rate": 4.582677834941962e-06, + "loss": 1.879, + "step": 36760 + }, + { + "epoch": 1.55, + "learning_rate": 4.578425953484417e-06, + "loss": 1.9212, + "step": 36770 + }, + { + "epoch": 1.55, + "learning_rate": 4.574174072026872e-06, + "loss": 2.3385, + "step": 36780 + }, + { + "epoch": 1.55, + "learning_rate": 4.5699221905693276e-06, + "loss": 2.0329, + "step": 36790 + }, + { + "epoch": 1.55, + "learning_rate": 4.565670309111782e-06, + "loss": 2.0217, + "step": 36800 + }, + { + "epoch": 1.55, + "learning_rate": 4.561418427654237e-06, + "loss": 2.2787, + "step": 36810 + }, + { + "epoch": 1.55, + "learning_rate": 4.557166546196693e-06, + "loss": 1.9281, + "step": 36820 + }, + { + "epoch": 1.55, + "learning_rate": 4.552914664739148e-06, + "loss": 2.0044, + "step": 36830 + }, + { + "epoch": 1.55, + "learning_rate": 4.548662783281602e-06, + "loss": 2.0486, + "step": 36840 + }, + { + "epoch": 1.55, + "learning_rate": 4.544410901824058e-06, + "loss": 2.3855, + "step": 36850 + }, + { + "epoch": 1.55, + "learning_rate": 4.540159020366512e-06, + "loss": 1.7565, + "step": 36860 + }, + { + "epoch": 1.55, + "learning_rate": 4.5359071389089675e-06, + "loss": 2.4534, + "step": 36870 + }, + { + "epoch": 1.55, + "learning_rate": 4.531655257451423e-06, + "loss": 1.9945, + "step": 36880 + }, + { + "epoch": 1.55, + "learning_rate": 4.527403375993878e-06, + "loss": 2.3056, + "step": 36890 + }, + { + "epoch": 1.55, + "learning_rate": 4.5231514945363325e-06, + "loss": 2.261, + "step": 36900 + }, + { + "epoch": 1.55, + "learning_rate": 4.518899613078788e-06, + "loss": 1.9996, + "step": 36910 + }, + { + "epoch": 1.55, + "learning_rate": 4.514647731621243e-06, + "loss": 2.1015, + "step": 36920 + }, + { + "epoch": 1.55, + "learning_rate": 4.510395850163698e-06, + "loss": 2.1077, + "step": 36930 + }, + { + "epoch": 1.55, + "learning_rate": 4.506143968706153e-06, + "loss": 1.9946, + "step": 36940 + }, + { + "epoch": 1.55, + "learning_rate": 4.501892087248607e-06, + "loss": 2.2828, + "step": 36950 + }, + { + "epoch": 1.55, + "learning_rate": 4.497640205791063e-06, + "loss": 2.2453, + "step": 36960 + }, + { + "epoch": 1.56, + "learning_rate": 4.493388324333518e-06, + "loss": 2.032, + "step": 36970 + }, + { + "epoch": 1.56, + "learning_rate": 4.489136442875973e-06, + "loss": 2.162, + "step": 36980 + }, + { + "epoch": 1.56, + "learning_rate": 4.484884561418428e-06, + "loss": 2.1001, + "step": 36990 + }, + { + "epoch": 1.56, + "learning_rate": 4.480632679960883e-06, + "loss": 2.1434, + "step": 37000 + }, + { + "epoch": 1.56, + "eval_loss": 1.7376418113708496, + "eval_runtime": 176.1379, + "eval_samples_per_second": 14.21, + "eval_steps_per_second": 7.108, + "step": 37000 + }, + { + "epoch": 1.56, + "learning_rate": 4.476380798503338e-06, + "loss": 2.0959, + "step": 37010 + }, + { + "epoch": 1.56, + "learning_rate": 4.472128917045793e-06, + "loss": 2.4057, + "step": 37020 + }, + { + "epoch": 1.56, + "learning_rate": 4.467877035588248e-06, + "loss": 2.2582, + "step": 37030 + }, + { + "epoch": 1.56, + "learning_rate": 4.463625154130703e-06, + "loss": 1.7448, + "step": 37040 + }, + { + "epoch": 1.56, + "learning_rate": 4.459373272673158e-06, + "loss": 2.0855, + "step": 37050 + }, + { + "epoch": 1.56, + "learning_rate": 4.455121391215613e-06, + "loss": 2.2708, + "step": 37060 + }, + { + "epoch": 1.56, + "learning_rate": 4.4508695097580685e-06, + "loss": 2.0224, + "step": 37070 + }, + { + "epoch": 1.56, + "learning_rate": 4.446617628300524e-06, + "loss": 2.2547, + "step": 37080 + }, + { + "epoch": 1.56, + "learning_rate": 4.442365746842978e-06, + "loss": 1.9236, + "step": 37090 + }, + { + "epoch": 1.56, + "learning_rate": 4.4381138653854336e-06, + "loss": 1.7799, + "step": 37100 + }, + { + "epoch": 1.56, + "learning_rate": 4.433861983927889e-06, + "loss": 2.1988, + "step": 37110 + }, + { + "epoch": 1.56, + "learning_rate": 4.429610102470343e-06, + "loss": 2.3449, + "step": 37120 + }, + { + "epoch": 1.56, + "learning_rate": 4.425358221012799e-06, + "loss": 2.2117, + "step": 37130 + }, + { + "epoch": 1.56, + "learning_rate": 4.421106339555253e-06, + "loss": 2.1477, + "step": 37140 + }, + { + "epoch": 1.56, + "learning_rate": 4.416854458097708e-06, + "loss": 2.2606, + "step": 37150 + }, + { + "epoch": 1.56, + "learning_rate": 4.412602576640164e-06, + "loss": 1.779, + "step": 37160 + }, + { + "epoch": 1.56, + "learning_rate": 4.408350695182619e-06, + "loss": 1.9888, + "step": 37170 + }, + { + "epoch": 1.56, + "learning_rate": 4.404098813725074e-06, + "loss": 2.2328, + "step": 37180 + }, + { + "epoch": 1.56, + "learning_rate": 4.399846932267529e-06, + "loss": 2.0528, + "step": 37190 + }, + { + "epoch": 1.57, + "learning_rate": 4.395595050809984e-06, + "loss": 1.7668, + "step": 37200 + }, + { + "epoch": 1.57, + "learning_rate": 4.3913431693524385e-06, + "loss": 1.8782, + "step": 37210 + }, + { + "epoch": 1.57, + "learning_rate": 4.387091287894894e-06, + "loss": 2.1357, + "step": 37220 + }, + { + "epoch": 1.57, + "learning_rate": 4.382839406437348e-06, + "loss": 2.2905, + "step": 37230 + }, + { + "epoch": 1.57, + "learning_rate": 4.378587524979804e-06, + "loss": 2.0164, + "step": 37240 + }, + { + "epoch": 1.57, + "learning_rate": 4.374335643522259e-06, + "loss": 2.2011, + "step": 37250 + }, + { + "epoch": 1.57, + "learning_rate": 4.370083762064714e-06, + "loss": 2.1939, + "step": 37260 + }, + { + "epoch": 1.57, + "learning_rate": 4.3658318806071695e-06, + "loss": 1.9964, + "step": 37270 + }, + { + "epoch": 1.57, + "learning_rate": 4.361579999149624e-06, + "loss": 1.7766, + "step": 37280 + }, + { + "epoch": 1.57, + "learning_rate": 4.357328117692079e-06, + "loss": 2.1014, + "step": 37290 + }, + { + "epoch": 1.57, + "learning_rate": 4.353076236234534e-06, + "loss": 2.0096, + "step": 37300 + }, + { + "epoch": 1.57, + "learning_rate": 4.348824354776989e-06, + "loss": 2.3916, + "step": 37310 + }, + { + "epoch": 1.57, + "learning_rate": 4.344572473319444e-06, + "loss": 2.0797, + "step": 37320 + }, + { + "epoch": 1.57, + "learning_rate": 4.340320591861899e-06, + "loss": 2.3945, + "step": 37330 + }, + { + "epoch": 1.57, + "learning_rate": 4.336068710404354e-06, + "loss": 1.627, + "step": 37340 + }, + { + "epoch": 1.57, + "learning_rate": 4.3318168289468094e-06, + "loss": 2.3023, + "step": 37350 + }, + { + "epoch": 1.57, + "learning_rate": 4.327564947489265e-06, + "loss": 1.9037, + "step": 37360 + }, + { + "epoch": 1.57, + "learning_rate": 4.32331306603172e-06, + "loss": 1.7088, + "step": 37370 + }, + { + "epoch": 1.57, + "learning_rate": 4.3190611845741745e-06, + "loss": 1.7849, + "step": 37380 + }, + { + "epoch": 1.57, + "learning_rate": 4.314809303116629e-06, + "loss": 1.8289, + "step": 37390 + }, + { + "epoch": 1.57, + "learning_rate": 4.310557421659084e-06, + "loss": 2.3683, + "step": 37400 + }, + { + "epoch": 1.57, + "learning_rate": 4.3063055402015396e-06, + "loss": 2.1933, + "step": 37410 + }, + { + "epoch": 1.57, + "learning_rate": 4.302053658743995e-06, + "loss": 2.2794, + "step": 37420 + }, + { + "epoch": 1.57, + "learning_rate": 4.297801777286449e-06, + "loss": 2.0994, + "step": 37430 + }, + { + "epoch": 1.58, + "learning_rate": 4.293549895828905e-06, + "loss": 2.2796, + "step": 37440 + }, + { + "epoch": 1.58, + "learning_rate": 4.28929801437136e-06, + "loss": 1.8149, + "step": 37450 + }, + { + "epoch": 1.58, + "learning_rate": 4.285046132913815e-06, + "loss": 2.1684, + "step": 37460 + }, + { + "epoch": 1.58, + "learning_rate": 4.28079425145627e-06, + "loss": 1.8682, + "step": 37470 + }, + { + "epoch": 1.58, + "learning_rate": 4.276542369998725e-06, + "loss": 1.9193, + "step": 37480 + }, + { + "epoch": 1.58, + "learning_rate": 4.2722904885411795e-06, + "loss": 2.0892, + "step": 37490 + }, + { + "epoch": 1.58, + "learning_rate": 4.268038607083635e-06, + "loss": 2.1553, + "step": 37500 + }, + { + "epoch": 1.58, + "eval_loss": 1.7463552951812744, + "eval_runtime": 176.0244, + "eval_samples_per_second": 14.22, + "eval_steps_per_second": 7.113, + "step": 37500 + }, + { + "epoch": 1.58, + "learning_rate": 4.26378672562609e-06, + "loss": 1.6898, + "step": 37510 + }, + { + "epoch": 1.58, + "learning_rate": 4.2595348441685445e-06, + "loss": 2.1169, + "step": 37520 + }, + { + "epoch": 1.58, + "learning_rate": 4.255282962711e-06, + "loss": 2.3435, + "step": 37530 + }, + { + "epoch": 1.58, + "learning_rate": 4.251031081253455e-06, + "loss": 2.0221, + "step": 37540 + }, + { + "epoch": 1.58, + "learning_rate": 4.2467791997959105e-06, + "loss": 1.7718, + "step": 37550 + }, + { + "epoch": 1.58, + "learning_rate": 4.242527318338365e-06, + "loss": 2.4137, + "step": 37560 + }, + { + "epoch": 1.58, + "learning_rate": 4.23827543688082e-06, + "loss": 1.5029, + "step": 37570 + }, + { + "epoch": 1.58, + "learning_rate": 4.234023555423275e-06, + "loss": 2.308, + "step": 37580 + }, + { + "epoch": 1.58, + "learning_rate": 4.22977167396573e-06, + "loss": 2.1589, + "step": 37590 + }, + { + "epoch": 1.58, + "learning_rate": 4.225519792508185e-06, + "loss": 2.1282, + "step": 37600 + }, + { + "epoch": 1.58, + "learning_rate": 4.221267911050641e-06, + "loss": 2.0603, + "step": 37610 + }, + { + "epoch": 1.58, + "learning_rate": 4.217016029593095e-06, + "loss": 2.3884, + "step": 37620 + }, + { + "epoch": 1.58, + "learning_rate": 4.21276414813555e-06, + "loss": 2.3481, + "step": 37630 + }, + { + "epoch": 1.58, + "learning_rate": 4.208512266678006e-06, + "loss": 2.3606, + "step": 37640 + }, + { + "epoch": 1.58, + "learning_rate": 4.20426038522046e-06, + "loss": 2.076, + "step": 37650 + }, + { + "epoch": 1.58, + "learning_rate": 4.2000085037629154e-06, + "loss": 1.9552, + "step": 37660 + }, + { + "epoch": 1.58, + "learning_rate": 4.19575662230537e-06, + "loss": 2.5605, + "step": 37670 + }, + { + "epoch": 1.59, + "learning_rate": 4.191504740847825e-06, + "loss": 2.4783, + "step": 37680 + }, + { + "epoch": 1.59, + "learning_rate": 4.1872528593902805e-06, + "loss": 1.8744, + "step": 37690 + }, + { + "epoch": 1.59, + "learning_rate": 4.183000977932736e-06, + "loss": 1.8266, + "step": 37700 + }, + { + "epoch": 1.59, + "learning_rate": 4.17874909647519e-06, + "loss": 1.784, + "step": 37710 + }, + { + "epoch": 1.59, + "learning_rate": 4.1744972150176456e-06, + "loss": 1.855, + "step": 37720 + }, + { + "epoch": 1.59, + "learning_rate": 4.170245333560101e-06, + "loss": 1.9852, + "step": 37730 + }, + { + "epoch": 1.59, + "learning_rate": 4.165993452102556e-06, + "loss": 1.9374, + "step": 37740 + }, + { + "epoch": 1.59, + "learning_rate": 4.161741570645011e-06, + "loss": 2.0234, + "step": 37750 + }, + { + "epoch": 1.59, + "learning_rate": 4.157489689187466e-06, + "loss": 1.7482, + "step": 37760 + }, + { + "epoch": 1.59, + "learning_rate": 4.15323780772992e-06, + "loss": 2.1098, + "step": 37770 + }, + { + "epoch": 1.59, + "learning_rate": 4.148985926272376e-06, + "loss": 2.4678, + "step": 37780 + }, + { + "epoch": 1.59, + "learning_rate": 4.144734044814831e-06, + "loss": 2.0931, + "step": 37790 + }, + { + "epoch": 1.59, + "learning_rate": 4.140482163357286e-06, + "loss": 2.2251, + "step": 37800 + }, + { + "epoch": 1.59, + "learning_rate": 4.136230281899741e-06, + "loss": 2.319, + "step": 37810 + }, + { + "epoch": 1.59, + "learning_rate": 4.131978400442196e-06, + "loss": 2.2545, + "step": 37820 + }, + { + "epoch": 1.59, + "learning_rate": 4.127726518984651e-06, + "loss": 2.139, + "step": 37830 + }, + { + "epoch": 1.59, + "learning_rate": 4.123474637527106e-06, + "loss": 2.2262, + "step": 37840 + }, + { + "epoch": 1.59, + "learning_rate": 4.119222756069561e-06, + "loss": 2.0406, + "step": 37850 + }, + { + "epoch": 1.59, + "learning_rate": 4.114970874612016e-06, + "loss": 1.7894, + "step": 37860 + }, + { + "epoch": 1.59, + "learning_rate": 4.110718993154471e-06, + "loss": 2.4756, + "step": 37870 + }, + { + "epoch": 1.59, + "learning_rate": 4.106467111696926e-06, + "loss": 2.2345, + "step": 37880 + }, + { + "epoch": 1.59, + "learning_rate": 4.1022152302393815e-06, + "loss": 2.3746, + "step": 37890 + }, + { + "epoch": 1.59, + "learning_rate": 4.097963348781837e-06, + "loss": 1.7428, + "step": 37900 + }, + { + "epoch": 1.59, + "learning_rate": 4.093711467324291e-06, + "loss": 2.3988, + "step": 37910 + }, + { + "epoch": 1.6, + "learning_rate": 4.089459585866747e-06, + "loss": 1.9973, + "step": 37920 + }, + { + "epoch": 1.6, + "learning_rate": 4.085207704409201e-06, + "loss": 2.0614, + "step": 37930 + }, + { + "epoch": 1.6, + "learning_rate": 4.080955822951656e-06, + "loss": 1.8093, + "step": 37940 + }, + { + "epoch": 1.6, + "learning_rate": 4.076703941494112e-06, + "loss": 2.2133, + "step": 37950 + }, + { + "epoch": 1.6, + "learning_rate": 4.072452060036566e-06, + "loss": 1.9063, + "step": 37960 + }, + { + "epoch": 1.6, + "learning_rate": 4.0682001785790214e-06, + "loss": 2.6421, + "step": 37970 + }, + { + "epoch": 1.6, + "learning_rate": 4.063948297121477e-06, + "loss": 2.2117, + "step": 37980 + }, + { + "epoch": 1.6, + "learning_rate": 4.059696415663932e-06, + "loss": 2.1171, + "step": 37990 + }, + { + "epoch": 1.6, + "learning_rate": 4.0554445342063865e-06, + "loss": 2.06, + "step": 38000 + }, + { + "epoch": 1.6, + "eval_loss": 1.745969533920288, + "eval_runtime": 175.921, + "eval_samples_per_second": 14.228, + "eval_steps_per_second": 7.117, + "step": 38000 + }, + { + "epoch": 1.6, + "learning_rate": 4.051192652748842e-06, + "loss": 1.6237, + "step": 38010 + }, + { + "epoch": 1.6, + "learning_rate": 4.046940771291297e-06, + "loss": 2.3635, + "step": 38020 + }, + { + "epoch": 1.6, + "learning_rate": 4.0426888898337516e-06, + "loss": 2.0552, + "step": 38030 + }, + { + "epoch": 1.6, + "learning_rate": 4.038437008376207e-06, + "loss": 2.4618, + "step": 38040 + }, + { + "epoch": 1.6, + "learning_rate": 4.034185126918661e-06, + "loss": 2.0934, + "step": 38050 + }, + { + "epoch": 1.6, + "learning_rate": 4.029933245461117e-06, + "loss": 1.8939, + "step": 38060 + }, + { + "epoch": 1.6, + "learning_rate": 4.025681364003572e-06, + "loss": 1.8378, + "step": 38070 + }, + { + "epoch": 1.6, + "learning_rate": 4.021429482546027e-06, + "loss": 1.5897, + "step": 38080 + }, + { + "epoch": 1.6, + "learning_rate": 4.0171776010884826e-06, + "loss": 2.397, + "step": 38090 + }, + { + "epoch": 1.6, + "learning_rate": 4.012925719630937e-06, + "loss": 2.1118, + "step": 38100 + }, + { + "epoch": 1.6, + "learning_rate": 4.008673838173392e-06, + "loss": 2.2938, + "step": 38110 + }, + { + "epoch": 1.6, + "learning_rate": 4.004421956715847e-06, + "loss": 2.3173, + "step": 38120 + }, + { + "epoch": 1.6, + "learning_rate": 4.000170075258302e-06, + "loss": 2.3025, + "step": 38130 + }, + { + "epoch": 1.6, + "learning_rate": 3.995918193800757e-06, + "loss": 2.0702, + "step": 38140 + }, + { + "epoch": 1.61, + "learning_rate": 3.991666312343212e-06, + "loss": 2.1073, + "step": 38150 + }, + { + "epoch": 1.61, + "learning_rate": 3.987414430885667e-06, + "loss": 2.7411, + "step": 38160 + }, + { + "epoch": 1.61, + "learning_rate": 3.9831625494281225e-06, + "loss": 2.1455, + "step": 38170 + }, + { + "epoch": 1.61, + "learning_rate": 3.978910667970578e-06, + "loss": 1.9867, + "step": 38180 + }, + { + "epoch": 1.61, + "learning_rate": 3.974658786513032e-06, + "loss": 2.1154, + "step": 38190 + }, + { + "epoch": 1.61, + "learning_rate": 3.9704069050554875e-06, + "loss": 2.1478, + "step": 38200 + }, + { + "epoch": 1.61, + "learning_rate": 3.966155023597942e-06, + "loss": 1.744, + "step": 38210 + }, + { + "epoch": 1.61, + "learning_rate": 3.961903142140397e-06, + "loss": 2.3651, + "step": 38220 + }, + { + "epoch": 1.61, + "learning_rate": 3.957651260682853e-06, + "loss": 2.4052, + "step": 38230 + }, + { + "epoch": 1.61, + "learning_rate": 3.953399379225307e-06, + "loss": 1.8449, + "step": 38240 + }, + { + "epoch": 1.61, + "learning_rate": 3.949147497767762e-06, + "loss": 1.8959, + "step": 38250 + }, + { + "epoch": 1.61, + "learning_rate": 3.944895616310218e-06, + "loss": 1.8913, + "step": 38260 + }, + { + "epoch": 1.61, + "learning_rate": 3.940643734852673e-06, + "loss": 2.0492, + "step": 38270 + }, + { + "epoch": 1.61, + "learning_rate": 3.936391853395128e-06, + "loss": 2.2655, + "step": 38280 + }, + { + "epoch": 1.61, + "learning_rate": 3.932139971937583e-06, + "loss": 2.3627, + "step": 38290 + }, + { + "epoch": 1.61, + "learning_rate": 3.927888090480037e-06, + "loss": 2.2161, + "step": 38300 + }, + { + "epoch": 1.61, + "learning_rate": 3.9236362090224925e-06, + "loss": 2.0692, + "step": 38310 + }, + { + "epoch": 1.61, + "learning_rate": 3.919384327564948e-06, + "loss": 1.6689, + "step": 38320 + }, + { + "epoch": 1.61, + "learning_rate": 3.915132446107403e-06, + "loss": 2.0331, + "step": 38330 + }, + { + "epoch": 1.61, + "learning_rate": 3.9108805646498576e-06, + "loss": 2.0686, + "step": 38340 + }, + { + "epoch": 1.61, + "learning_rate": 3.906628683192313e-06, + "loss": 2.5842, + "step": 38350 + }, + { + "epoch": 1.61, + "learning_rate": 3.902376801734768e-06, + "loss": 2.0441, + "step": 38360 + }, + { + "epoch": 1.61, + "learning_rate": 3.8981249202772235e-06, + "loss": 2.0406, + "step": 38370 + }, + { + "epoch": 1.61, + "learning_rate": 3.893873038819678e-06, + "loss": 2.2905, + "step": 38380 + }, + { + "epoch": 1.62, + "learning_rate": 3.889621157362133e-06, + "loss": 1.9497, + "step": 38390 + }, + { + "epoch": 1.62, + "learning_rate": 3.885369275904588e-06, + "loss": 1.7564, + "step": 38400 + }, + { + "epoch": 1.62, + "learning_rate": 3.881117394447043e-06, + "loss": 1.895, + "step": 38410 + }, + { + "epoch": 1.62, + "learning_rate": 3.876865512989498e-06, + "loss": 2.4167, + "step": 38420 + }, + { + "epoch": 1.62, + "learning_rate": 3.872613631531954e-06, + "loss": 2.4465, + "step": 38430 + }, + { + "epoch": 1.62, + "learning_rate": 3.868361750074408e-06, + "loss": 2.1046, + "step": 38440 + }, + { + "epoch": 1.62, + "learning_rate": 3.864109868616863e-06, + "loss": 2.1757, + "step": 38450 + }, + { + "epoch": 1.62, + "learning_rate": 3.859857987159319e-06, + "loss": 2.6139, + "step": 38460 + }, + { + "epoch": 1.62, + "learning_rate": 3.855606105701773e-06, + "loss": 2.2124, + "step": 38470 + }, + { + "epoch": 1.62, + "learning_rate": 3.8513542242442285e-06, + "loss": 2.0222, + "step": 38480 + }, + { + "epoch": 1.62, + "learning_rate": 3.847102342786683e-06, + "loss": 2.1389, + "step": 38490 + }, + { + "epoch": 1.62, + "learning_rate": 3.842850461329138e-06, + "loss": 2.2977, + "step": 38500 + }, + { + "epoch": 1.62, + "eval_loss": 1.7398837804794312, + "eval_runtime": 174.2372, + "eval_samples_per_second": 14.365, + "eval_steps_per_second": 7.186, + "step": 38500 + }, + { + "epoch": 1.62, + "learning_rate": 3.8385985798715935e-06, + "loss": 2.4658, + "step": 38510 + }, + { + "epoch": 1.62, + "learning_rate": 3.834346698414049e-06, + "loss": 1.8469, + "step": 38520 + }, + { + "epoch": 1.62, + "learning_rate": 3.830094816956503e-06, + "loss": 2.4372, + "step": 38530 + }, + { + "epoch": 1.62, + "learning_rate": 3.825842935498959e-06, + "loss": 1.6964, + "step": 38540 + }, + { + "epoch": 1.62, + "learning_rate": 3.821591054041414e-06, + "loss": 1.8705, + "step": 38550 + }, + { + "epoch": 1.62, + "learning_rate": 3.817339172583868e-06, + "loss": 2.2879, + "step": 38560 + }, + { + "epoch": 1.62, + "learning_rate": 3.813087291126324e-06, + "loss": 2.1464, + "step": 38570 + }, + { + "epoch": 1.62, + "learning_rate": 3.8088354096687785e-06, + "loss": 1.8558, + "step": 38580 + }, + { + "epoch": 1.62, + "learning_rate": 3.804583528211234e-06, + "loss": 2.0456, + "step": 38590 + }, + { + "epoch": 1.62, + "learning_rate": 3.8003316467536887e-06, + "loss": 2.2801, + "step": 38600 + }, + { + "epoch": 1.62, + "learning_rate": 3.796079765296144e-06, + "loss": 1.9854, + "step": 38610 + }, + { + "epoch": 1.62, + "learning_rate": 3.791827883838599e-06, + "loss": 2.5356, + "step": 38620 + }, + { + "epoch": 1.63, + "learning_rate": 3.787576002381054e-06, + "loss": 2.319, + "step": 38630 + }, + { + "epoch": 1.63, + "learning_rate": 3.7833241209235087e-06, + "loss": 1.9153, + "step": 38640 + }, + { + "epoch": 1.63, + "learning_rate": 3.779072239465964e-06, + "loss": 1.8595, + "step": 38650 + }, + { + "epoch": 1.63, + "learning_rate": 3.7748203580084193e-06, + "loss": 2.1707, + "step": 38660 + }, + { + "epoch": 1.63, + "learning_rate": 3.770568476550874e-06, + "loss": 1.9023, + "step": 38670 + }, + { + "epoch": 1.63, + "learning_rate": 3.766316595093329e-06, + "loss": 2.3358, + "step": 38680 + }, + { + "epoch": 1.63, + "learning_rate": 3.762064713635784e-06, + "loss": 2.0008, + "step": 38690 + }, + { + "epoch": 1.63, + "learning_rate": 3.7578128321782392e-06, + "loss": 1.6161, + "step": 38700 + }, + { + "epoch": 1.63, + "learning_rate": 3.7535609507206945e-06, + "loss": 1.9317, + "step": 38710 + }, + { + "epoch": 1.63, + "learning_rate": 3.7493090692631494e-06, + "loss": 1.9281, + "step": 38720 + }, + { + "epoch": 1.63, + "learning_rate": 3.7450571878056043e-06, + "loss": 2.2011, + "step": 38730 + }, + { + "epoch": 1.63, + "learning_rate": 3.740805306348059e-06, + "loss": 2.0454, + "step": 38740 + }, + { + "epoch": 1.63, + "learning_rate": 3.7365534248905145e-06, + "loss": 2.2612, + "step": 38750 + }, + { + "epoch": 1.63, + "learning_rate": 3.7323015434329694e-06, + "loss": 2.2228, + "step": 38760 + }, + { + "epoch": 1.63, + "learning_rate": 3.7280496619754243e-06, + "loss": 2.2572, + "step": 38770 + }, + { + "epoch": 1.63, + "learning_rate": 3.723797780517879e-06, + "loss": 1.7262, + "step": 38780 + }, + { + "epoch": 1.63, + "learning_rate": 3.7195458990603345e-06, + "loss": 1.8567, + "step": 38790 + }, + { + "epoch": 1.63, + "learning_rate": 3.7152940176027898e-06, + "loss": 2.0451, + "step": 38800 + }, + { + "epoch": 1.63, + "learning_rate": 3.7110421361452446e-06, + "loss": 2.3198, + "step": 38810 + }, + { + "epoch": 1.63, + "learning_rate": 3.7067902546876995e-06, + "loss": 2.1443, + "step": 38820 + }, + { + "epoch": 1.63, + "learning_rate": 3.7025383732301544e-06, + "loss": 1.7047, + "step": 38830 + }, + { + "epoch": 1.63, + "learning_rate": 3.6982864917726097e-06, + "loss": 2.2287, + "step": 38840 + }, + { + "epoch": 1.63, + "learning_rate": 3.694034610315065e-06, + "loss": 2.5186, + "step": 38850 + }, + { + "epoch": 1.63, + "learning_rate": 3.68978272885752e-06, + "loss": 1.7028, + "step": 38860 + }, + { + "epoch": 1.64, + "learning_rate": 3.6855308473999748e-06, + "loss": 2.2628, + "step": 38870 + }, + { + "epoch": 1.64, + "learning_rate": 3.6812789659424297e-06, + "loss": 2.1352, + "step": 38880 + }, + { + "epoch": 1.64, + "learning_rate": 3.677027084484885e-06, + "loss": 2.2716, + "step": 38890 + }, + { + "epoch": 1.64, + "learning_rate": 3.67277520302734e-06, + "loss": 2.4243, + "step": 38900 + }, + { + "epoch": 1.64, + "learning_rate": 3.668523321569795e-06, + "loss": 2.1119, + "step": 38910 + }, + { + "epoch": 1.64, + "learning_rate": 3.6642714401122496e-06, + "loss": 1.5971, + "step": 38920 + }, + { + "epoch": 1.64, + "learning_rate": 3.660019558654705e-06, + "loss": 2.2422, + "step": 38930 + }, + { + "epoch": 1.64, + "learning_rate": 3.6557676771971602e-06, + "loss": 2.1503, + "step": 38940 + }, + { + "epoch": 1.64, + "learning_rate": 3.651515795739615e-06, + "loss": 2.3962, + "step": 38950 + }, + { + "epoch": 1.64, + "learning_rate": 3.6472639142820704e-06, + "loss": 1.8617, + "step": 38960 + }, + { + "epoch": 1.64, + "learning_rate": 3.643012032824525e-06, + "loss": 1.9732, + "step": 38970 + }, + { + "epoch": 1.64, + "learning_rate": 3.63876015136698e-06, + "loss": 2.1593, + "step": 38980 + }, + { + "epoch": 1.64, + "learning_rate": 3.6345082699094355e-06, + "loss": 1.8509, + "step": 38990 + }, + { + "epoch": 1.64, + "learning_rate": 3.6302563884518904e-06, + "loss": 2.1659, + "step": 39000 + }, + { + "epoch": 1.64, + "eval_loss": 1.7403854131698608, + "eval_runtime": 175.8224, + "eval_samples_per_second": 14.236, + "eval_steps_per_second": 7.121, + "step": 39000 + }, + { + "epoch": 1.64, + "learning_rate": 3.626004506994345e-06, + "loss": 2.571, + "step": 39010 + }, + { + "epoch": 1.64, + "learning_rate": 3.6217526255368e-06, + "loss": 2.0228, + "step": 39020 + }, + { + "epoch": 1.64, + "learning_rate": 3.6175007440792554e-06, + "loss": 2.1392, + "step": 39030 + }, + { + "epoch": 1.64, + "learning_rate": 3.6132488626217103e-06, + "loss": 2.2451, + "step": 39040 + }, + { + "epoch": 1.64, + "learning_rate": 3.6089969811641656e-06, + "loss": 2.3339, + "step": 39050 + }, + { + "epoch": 1.64, + "learning_rate": 3.60474509970662e-06, + "loss": 2.0396, + "step": 39060 + }, + { + "epoch": 1.64, + "learning_rate": 3.6004932182490754e-06, + "loss": 2.571, + "step": 39070 + }, + { + "epoch": 1.64, + "learning_rate": 3.5962413367915307e-06, + "loss": 2.0907, + "step": 39080 + }, + { + "epoch": 1.64, + "learning_rate": 3.5919894553339856e-06, + "loss": 2.1145, + "step": 39090 + }, + { + "epoch": 1.64, + "learning_rate": 3.587737573876441e-06, + "loss": 1.8426, + "step": 39100 + }, + { + "epoch": 1.65, + "learning_rate": 3.5834856924188953e-06, + "loss": 1.8662, + "step": 39110 + }, + { + "epoch": 1.65, + "learning_rate": 3.5792338109613506e-06, + "loss": 1.8386, + "step": 39120 + }, + { + "epoch": 1.65, + "learning_rate": 3.574981929503806e-06, + "loss": 1.9898, + "step": 39130 + }, + { + "epoch": 1.65, + "learning_rate": 3.570730048046261e-06, + "loss": 1.7737, + "step": 39140 + }, + { + "epoch": 1.65, + "learning_rate": 3.566478166588716e-06, + "loss": 2.8594, + "step": 39150 + }, + { + "epoch": 1.65, + "learning_rate": 3.5622262851311706e-06, + "loss": 2.5668, + "step": 39160 + }, + { + "epoch": 1.65, + "learning_rate": 3.557974403673626e-06, + "loss": 2.1744, + "step": 39170 + }, + { + "epoch": 1.65, + "learning_rate": 3.5537225222160808e-06, + "loss": 2.2878, + "step": 39180 + }, + { + "epoch": 1.65, + "learning_rate": 3.549470640758536e-06, + "loss": 1.7849, + "step": 39190 + }, + { + "epoch": 1.65, + "learning_rate": 3.5452187593009914e-06, + "loss": 1.9586, + "step": 39200 + }, + { + "epoch": 1.65, + "learning_rate": 3.540966877843446e-06, + "loss": 2.1255, + "step": 39210 + }, + { + "epoch": 1.65, + "learning_rate": 3.536714996385901e-06, + "loss": 2.091, + "step": 39220 + }, + { + "epoch": 1.65, + "learning_rate": 3.532463114928356e-06, + "loss": 2.0458, + "step": 39230 + }, + { + "epoch": 1.65, + "learning_rate": 3.5282112334708113e-06, + "loss": 2.0761, + "step": 39240 + }, + { + "epoch": 1.65, + "learning_rate": 3.523959352013266e-06, + "loss": 2.0046, + "step": 39250 + }, + { + "epoch": 1.65, + "learning_rate": 3.519707470555721e-06, + "loss": 2.2256, + "step": 39260 + }, + { + "epoch": 1.65, + "learning_rate": 3.515455589098176e-06, + "loss": 2.3956, + "step": 39270 + }, + { + "epoch": 1.65, + "learning_rate": 3.5112037076406313e-06, + "loss": 1.9632, + "step": 39280 + }, + { + "epoch": 1.65, + "learning_rate": 3.5069518261830866e-06, + "loss": 1.9887, + "step": 39290 + }, + { + "epoch": 1.65, + "learning_rate": 3.502699944725541e-06, + "loss": 2.1451, + "step": 39300 + }, + { + "epoch": 1.65, + "learning_rate": 3.4984480632679964e-06, + "loss": 2.2727, + "step": 39310 + }, + { + "epoch": 1.65, + "learning_rate": 3.4941961818104512e-06, + "loss": 1.9085, + "step": 39320 + }, + { + "epoch": 1.65, + "learning_rate": 3.4899443003529065e-06, + "loss": 2.1372, + "step": 39330 + }, + { + "epoch": 1.66, + "learning_rate": 3.485692418895362e-06, + "loss": 2.0505, + "step": 39340 + }, + { + "epoch": 1.66, + "learning_rate": 3.4814405374378163e-06, + "loss": 2.2831, + "step": 39350 + }, + { + "epoch": 1.66, + "learning_rate": 3.4771886559802716e-06, + "loss": 1.6421, + "step": 39360 + }, + { + "epoch": 1.66, + "learning_rate": 3.4729367745227265e-06, + "loss": 1.5879, + "step": 39370 + }, + { + "epoch": 1.66, + "learning_rate": 3.468684893065182e-06, + "loss": 2.1306, + "step": 39380 + }, + { + "epoch": 1.66, + "learning_rate": 3.464433011607637e-06, + "loss": 2.0623, + "step": 39390 + }, + { + "epoch": 1.66, + "learning_rate": 3.4601811301500916e-06, + "loss": 1.9673, + "step": 39400 + }, + { + "epoch": 1.66, + "learning_rate": 3.4559292486925464e-06, + "loss": 1.9179, + "step": 39410 + }, + { + "epoch": 1.66, + "learning_rate": 3.4516773672350018e-06, + "loss": 2.3303, + "step": 39420 + }, + { + "epoch": 1.66, + "learning_rate": 3.447425485777457e-06, + "loss": 1.8826, + "step": 39430 + }, + { + "epoch": 1.66, + "learning_rate": 3.443173604319912e-06, + "loss": 2.3108, + "step": 39440 + }, + { + "epoch": 1.66, + "learning_rate": 3.438921722862367e-06, + "loss": 2.4321, + "step": 39450 + }, + { + "epoch": 1.66, + "learning_rate": 3.4346698414048217e-06, + "loss": 2.2305, + "step": 39460 + }, + { + "epoch": 1.66, + "learning_rate": 3.430417959947277e-06, + "loss": 2.3232, + "step": 39470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4261660784897323e-06, + "loss": 2.3438, + "step": 39480 + }, + { + "epoch": 1.66, + "learning_rate": 3.4219141970321868e-06, + "loss": 2.0287, + "step": 39490 + }, + { + "epoch": 1.66, + "learning_rate": 3.417662315574642e-06, + "loss": 1.6394, + "step": 39500 + }, + { + "epoch": 1.66, + "eval_loss": 1.739558219909668, + "eval_runtime": 176.2216, + "eval_samples_per_second": 14.204, + "eval_steps_per_second": 7.105, + "step": 39500 + }, + { + "epoch": 1.66, + "learning_rate": 3.413410434117097e-06, + "loss": 1.9423, + "step": 39510 + }, + { + "epoch": 1.66, + "learning_rate": 3.4091585526595523e-06, + "loss": 2.2243, + "step": 39520 + }, + { + "epoch": 1.66, + "learning_rate": 3.404906671202007e-06, + "loss": 2.1207, + "step": 39530 + }, + { + "epoch": 1.66, + "learning_rate": 3.400654789744462e-06, + "loss": 2.2572, + "step": 39540 + }, + { + "epoch": 1.66, + "learning_rate": 3.396402908286917e-06, + "loss": 2.3871, + "step": 39550 + }, + { + "epoch": 1.66, + "learning_rate": 3.3921510268293722e-06, + "loss": 2.1014, + "step": 39560 + }, + { + "epoch": 1.66, + "learning_rate": 3.3878991453718275e-06, + "loss": 2.235, + "step": 39570 + }, + { + "epoch": 1.67, + "learning_rate": 3.3836472639142824e-06, + "loss": 2.0478, + "step": 39580 + }, + { + "epoch": 1.67, + "learning_rate": 3.3793953824567373e-06, + "loss": 2.2684, + "step": 39590 + }, + { + "epoch": 1.67, + "learning_rate": 3.375143500999192e-06, + "loss": 2.2871, + "step": 39600 + }, + { + "epoch": 1.67, + "learning_rate": 3.3708916195416475e-06, + "loss": 2.3627, + "step": 39610 + }, + { + "epoch": 1.67, + "learning_rate": 3.3666397380841028e-06, + "loss": 2.2261, + "step": 39620 + }, + { + "epoch": 1.67, + "learning_rate": 3.3623878566265577e-06, + "loss": 1.8954, + "step": 39630 + }, + { + "epoch": 1.67, + "learning_rate": 3.3581359751690125e-06, + "loss": 1.9103, + "step": 39640 + }, + { + "epoch": 1.67, + "learning_rate": 3.3538840937114674e-06, + "loss": 2.0318, + "step": 39650 + }, + { + "epoch": 1.67, + "learning_rate": 3.3496322122539227e-06, + "loss": 2.5308, + "step": 39660 + }, + { + "epoch": 1.67, + "learning_rate": 3.3453803307963776e-06, + "loss": 2.1057, + "step": 39670 + }, + { + "epoch": 1.67, + "learning_rate": 3.341128449338833e-06, + "loss": 1.9694, + "step": 39680 + }, + { + "epoch": 1.67, + "learning_rate": 3.3368765678812874e-06, + "loss": 2.1225, + "step": 39690 + }, + { + "epoch": 1.67, + "learning_rate": 3.3326246864237427e-06, + "loss": 1.6829, + "step": 39700 + }, + { + "epoch": 1.67, + "learning_rate": 3.328372804966198e-06, + "loss": 1.8024, + "step": 39710 + }, + { + "epoch": 1.67, + "learning_rate": 3.324120923508653e-06, + "loss": 2.2156, + "step": 39720 + }, + { + "epoch": 1.67, + "learning_rate": 3.319869042051108e-06, + "loss": 2.145, + "step": 39730 + }, + { + "epoch": 1.67, + "learning_rate": 3.3156171605935626e-06, + "loss": 1.945, + "step": 39740 + }, + { + "epoch": 1.67, + "learning_rate": 3.311365279136018e-06, + "loss": 1.4955, + "step": 39750 + }, + { + "epoch": 1.67, + "learning_rate": 3.3071133976784732e-06, + "loss": 1.5375, + "step": 39760 + }, + { + "epoch": 1.67, + "learning_rate": 3.302861516220928e-06, + "loss": 1.9554, + "step": 39770 + }, + { + "epoch": 1.67, + "learning_rate": 3.298609634763383e-06, + "loss": 1.4847, + "step": 39780 + }, + { + "epoch": 1.67, + "learning_rate": 3.294357753305838e-06, + "loss": 1.9126, + "step": 39790 + }, + { + "epoch": 1.67, + "learning_rate": 3.290105871848293e-06, + "loss": 2.4683, + "step": 39800 + }, + { + "epoch": 1.67, + "learning_rate": 3.285853990390748e-06, + "loss": 2.5539, + "step": 39810 + }, + { + "epoch": 1.68, + "learning_rate": 3.2816021089332034e-06, + "loss": 2.2314, + "step": 39820 + }, + { + "epoch": 1.68, + "learning_rate": 3.277350227475658e-06, + "loss": 2.4671, + "step": 39830 + }, + { + "epoch": 1.68, + "learning_rate": 3.273098346018113e-06, + "loss": 2.0932, + "step": 39840 + }, + { + "epoch": 1.68, + "learning_rate": 3.2688464645605685e-06, + "loss": 1.9059, + "step": 39850 + }, + { + "epoch": 1.68, + "learning_rate": 3.2645945831030233e-06, + "loss": 1.9942, + "step": 39860 + }, + { + "epoch": 1.68, + "learning_rate": 3.2603427016454786e-06, + "loss": 2.345, + "step": 39870 + }, + { + "epoch": 1.68, + "learning_rate": 3.256090820187933e-06, + "loss": 1.9271, + "step": 39880 + }, + { + "epoch": 1.68, + "learning_rate": 3.2518389387303884e-06, + "loss": 2.1968, + "step": 39890 + }, + { + "epoch": 1.68, + "learning_rate": 3.2475870572728437e-06, + "loss": 2.1401, + "step": 39900 + }, + { + "epoch": 1.68, + "learning_rate": 3.2433351758152986e-06, + "loss": 1.9934, + "step": 39910 + }, + { + "epoch": 1.68, + "learning_rate": 3.239083294357754e-06, + "loss": 1.9551, + "step": 39920 + }, + { + "epoch": 1.68, + "learning_rate": 3.2348314129002084e-06, + "loss": 1.7779, + "step": 39930 + }, + { + "epoch": 1.68, + "learning_rate": 3.2305795314426637e-06, + "loss": 2.2155, + "step": 39940 + }, + { + "epoch": 1.68, + "learning_rate": 3.2263276499851185e-06, + "loss": 2.4332, + "step": 39950 + }, + { + "epoch": 1.68, + "learning_rate": 3.222075768527574e-06, + "loss": 2.0219, + "step": 39960 + }, + { + "epoch": 1.68, + "learning_rate": 3.217823887070029e-06, + "loss": 1.7372, + "step": 39970 + }, + { + "epoch": 1.68, + "learning_rate": 3.2135720056124836e-06, + "loss": 2.3289, + "step": 39980 + }, + { + "epoch": 1.68, + "learning_rate": 3.209320124154939e-06, + "loss": 2.1965, + "step": 39990 + }, + { + "epoch": 1.68, + "learning_rate": 3.205068242697394e-06, + "loss": 2.1019, + "step": 40000 + }, + { + "epoch": 1.68, + "eval_loss": 1.736899495124817, + "eval_runtime": 176.1277, + "eval_samples_per_second": 14.211, + "eval_steps_per_second": 7.108, + "step": 40000 + }, + { + "epoch": 1.68, + "learning_rate": 3.200816361239849e-06, + "loss": 2.2945, + "step": 40010 + }, + { + "epoch": 1.68, + "learning_rate": 3.1965644797823036e-06, + "loss": 1.9417, + "step": 40020 + }, + { + "epoch": 1.68, + "learning_rate": 3.192312598324759e-06, + "loss": 2.1428, + "step": 40030 + }, + { + "epoch": 1.68, + "learning_rate": 3.188060716867214e-06, + "loss": 2.4317, + "step": 40040 + }, + { + "epoch": 1.68, + "learning_rate": 3.183808835409669e-06, + "loss": 2.1994, + "step": 40050 + }, + { + "epoch": 1.69, + "learning_rate": 3.1795569539521244e-06, + "loss": 1.6647, + "step": 40060 + }, + { + "epoch": 1.69, + "learning_rate": 3.175305072494579e-06, + "loss": 2.3434, + "step": 40070 + }, + { + "epoch": 1.69, + "learning_rate": 3.171053191037034e-06, + "loss": 1.9678, + "step": 40080 + }, + { + "epoch": 1.69, + "learning_rate": 3.166801309579489e-06, + "loss": 2.0424, + "step": 40090 + }, + { + "epoch": 1.69, + "learning_rate": 3.1625494281219443e-06, + "loss": 1.8569, + "step": 40100 + }, + { + "epoch": 1.69, + "learning_rate": 3.1582975466643996e-06, + "loss": 1.929, + "step": 40110 + }, + { + "epoch": 1.69, + "learning_rate": 3.154045665206854e-06, + "loss": 1.7804, + "step": 40120 + }, + { + "epoch": 1.69, + "learning_rate": 3.1497937837493094e-06, + "loss": 2.2012, + "step": 40130 + }, + { + "epoch": 1.69, + "learning_rate": 3.1455419022917643e-06, + "loss": 2.3599, + "step": 40140 + }, + { + "epoch": 1.69, + "learning_rate": 3.1412900208342196e-06, + "loss": 2.2751, + "step": 40150 + }, + { + "epoch": 1.69, + "learning_rate": 3.137038139376675e-06, + "loss": 2.0306, + "step": 40160 + }, + { + "epoch": 1.69, + "learning_rate": 3.1327862579191293e-06, + "loss": 1.9452, + "step": 40170 + }, + { + "epoch": 1.69, + "learning_rate": 3.1285343764615842e-06, + "loss": 2.5462, + "step": 40180 + }, + { + "epoch": 1.69, + "learning_rate": 3.1242824950040395e-06, + "loss": 1.9403, + "step": 40190 + }, + { + "epoch": 1.69, + "learning_rate": 3.120030613546495e-06, + "loss": 1.5562, + "step": 40200 + }, + { + "epoch": 1.69, + "learning_rate": 3.1157787320889497e-06, + "loss": 1.988, + "step": 40210 + }, + { + "epoch": 1.69, + "learning_rate": 3.1115268506314046e-06, + "loss": 1.9707, + "step": 40220 + }, + { + "epoch": 1.69, + "learning_rate": 3.1072749691738595e-06, + "loss": 2.1308, + "step": 40230 + }, + { + "epoch": 1.69, + "learning_rate": 3.1030230877163148e-06, + "loss": 2.4308, + "step": 40240 + }, + { + "epoch": 1.69, + "learning_rate": 3.09877120625877e-06, + "loss": 2.0105, + "step": 40250 + }, + { + "epoch": 1.69, + "learning_rate": 3.0945193248012245e-06, + "loss": 1.8751, + "step": 40260 + }, + { + "epoch": 1.69, + "learning_rate": 3.09026744334368e-06, + "loss": 1.9968, + "step": 40270 + }, + { + "epoch": 1.69, + "learning_rate": 3.0860155618861347e-06, + "loss": 1.909, + "step": 40280 + }, + { + "epoch": 1.7, + "learning_rate": 3.08176368042859e-06, + "loss": 1.3458, + "step": 40290 + }, + { + "epoch": 1.7, + "learning_rate": 3.0775117989710453e-06, + "loss": 2.4988, + "step": 40300 + }, + { + "epoch": 1.7, + "learning_rate": 3.0732599175135e-06, + "loss": 2.0061, + "step": 40310 + }, + { + "epoch": 1.7, + "learning_rate": 3.0690080360559547e-06, + "loss": 2.3653, + "step": 40320 + }, + { + "epoch": 1.7, + "learning_rate": 3.06475615459841e-06, + "loss": 2.0347, + "step": 40330 + }, + { + "epoch": 1.7, + "learning_rate": 3.0605042731408653e-06, + "loss": 2.1352, + "step": 40340 + }, + { + "epoch": 1.7, + "learning_rate": 3.05625239168332e-06, + "loss": 2.0918, + "step": 40350 + }, + { + "epoch": 1.7, + "learning_rate": 3.052000510225775e-06, + "loss": 1.8342, + "step": 40360 + }, + { + "epoch": 1.7, + "learning_rate": 3.04774862876823e-06, + "loss": 2.0742, + "step": 40370 + }, + { + "epoch": 1.7, + "learning_rate": 3.0434967473106852e-06, + "loss": 2.4816, + "step": 40380 + }, + { + "epoch": 1.7, + "learning_rate": 3.0392448658531405e-06, + "loss": 2.3585, + "step": 40390 + }, + { + "epoch": 1.7, + "learning_rate": 3.0349929843955954e-06, + "loss": 2.0589, + "step": 40400 + }, + { + "epoch": 1.7, + "learning_rate": 3.0307411029380503e-06, + "loss": 1.9231, + "step": 40410 + }, + { + "epoch": 1.7, + "learning_rate": 3.026489221480505e-06, + "loss": 1.6204, + "step": 40420 + }, + { + "epoch": 1.7, + "learning_rate": 3.0222373400229605e-06, + "loss": 1.6995, + "step": 40430 + }, + { + "epoch": 1.7, + "learning_rate": 3.0179854585654154e-06, + "loss": 2.1029, + "step": 40440 + }, + { + "epoch": 1.7, + "learning_rate": 3.0137335771078707e-06, + "loss": 2.1059, + "step": 40450 + }, + { + "epoch": 1.7, + "learning_rate": 3.009481695650325e-06, + "loss": 2.1145, + "step": 40460 + }, + { + "epoch": 1.7, + "learning_rate": 3.0052298141927804e-06, + "loss": 1.5657, + "step": 40470 + }, + { + "epoch": 1.7, + "learning_rate": 3.0009779327352358e-06, + "loss": 1.6706, + "step": 40480 + }, + { + "epoch": 1.7, + "learning_rate": 2.9967260512776906e-06, + "loss": 2.0865, + "step": 40490 + }, + { + "epoch": 1.7, + "learning_rate": 2.992474169820146e-06, + "loss": 1.6778, + "step": 40500 + }, + { + "epoch": 1.7, + "eval_loss": 1.747696876525879, + "eval_runtime": 175.7241, + "eval_samples_per_second": 14.244, + "eval_steps_per_second": 7.125, + "step": 40500 + }, + { + "epoch": 1.7, + "learning_rate": 2.9882222883626004e-06, + "loss": 2.2589, + "step": 40510 + }, + { + "epoch": 1.7, + "learning_rate": 2.9839704069050557e-06, + "loss": 1.9951, + "step": 40520 + }, + { + "epoch": 1.71, + "learning_rate": 2.979718525447511e-06, + "loss": 2.075, + "step": 40530 + }, + { + "epoch": 1.71, + "learning_rate": 2.975466643989966e-06, + "loss": 2.4767, + "step": 40540 + }, + { + "epoch": 1.71, + "learning_rate": 2.9712147625324208e-06, + "loss": 2.0628, + "step": 40550 + }, + { + "epoch": 1.71, + "learning_rate": 2.9669628810748757e-06, + "loss": 2.1439, + "step": 40560 + }, + { + "epoch": 1.71, + "learning_rate": 2.962710999617331e-06, + "loss": 2.5834, + "step": 40570 + }, + { + "epoch": 1.71, + "learning_rate": 2.958459118159786e-06, + "loss": 1.9482, + "step": 40580 + }, + { + "epoch": 1.71, + "learning_rate": 2.954207236702241e-06, + "loss": 2.0323, + "step": 40590 + }, + { + "epoch": 1.71, + "learning_rate": 2.9499553552446956e-06, + "loss": 1.833, + "step": 40600 + }, + { + "epoch": 1.71, + "learning_rate": 2.945703473787151e-06, + "loss": 1.7037, + "step": 40610 + }, + { + "epoch": 1.71, + "learning_rate": 2.9414515923296062e-06, + "loss": 2.0919, + "step": 40620 + }, + { + "epoch": 1.71, + "learning_rate": 2.937199710872061e-06, + "loss": 2.0546, + "step": 40630 + }, + { + "epoch": 1.71, + "learning_rate": 2.9329478294145164e-06, + "loss": 2.1635, + "step": 40640 + }, + { + "epoch": 1.71, + "learning_rate": 2.928695947956971e-06, + "loss": 2.1947, + "step": 40650 + }, + { + "epoch": 1.71, + "learning_rate": 2.924444066499426e-06, + "loss": 2.3327, + "step": 40660 + }, + { + "epoch": 1.71, + "learning_rate": 2.9201921850418815e-06, + "loss": 1.9751, + "step": 40670 + }, + { + "epoch": 1.71, + "learning_rate": 2.9159403035843364e-06, + "loss": 2.0792, + "step": 40680 + }, + { + "epoch": 1.71, + "learning_rate": 2.9116884221267917e-06, + "loss": 2.0507, + "step": 40690 + }, + { + "epoch": 1.71, + "learning_rate": 2.907436540669246e-06, + "loss": 2.0128, + "step": 40700 + }, + { + "epoch": 1.71, + "learning_rate": 2.9031846592117014e-06, + "loss": 2.0428, + "step": 40710 + }, + { + "epoch": 1.71, + "learning_rate": 2.8989327777541563e-06, + "loss": 2.24, + "step": 40720 + }, + { + "epoch": 1.71, + "learning_rate": 2.8946808962966116e-06, + "loss": 2.4284, + "step": 40730 + }, + { + "epoch": 1.71, + "learning_rate": 2.890429014839067e-06, + "loss": 2.2595, + "step": 40740 + }, + { + "epoch": 1.71, + "learning_rate": 2.8861771333815214e-06, + "loss": 1.9509, + "step": 40750 + }, + { + "epoch": 1.71, + "learning_rate": 2.8819252519239767e-06, + "loss": 1.7006, + "step": 40760 + }, + { + "epoch": 1.72, + "learning_rate": 2.8776733704664316e-06, + "loss": 2.0867, + "step": 40770 + }, + { + "epoch": 1.72, + "learning_rate": 2.873421489008887e-06, + "loss": 1.9262, + "step": 40780 + }, + { + "epoch": 1.72, + "learning_rate": 2.8691696075513413e-06, + "loss": 1.9245, + "step": 40790 + }, + { + "epoch": 1.72, + "learning_rate": 2.8649177260937966e-06, + "loss": 2.0081, + "step": 40800 + }, + { + "epoch": 1.72, + "learning_rate": 2.860665844636252e-06, + "loss": 2.3462, + "step": 40810 + }, + { + "epoch": 1.72, + "learning_rate": 2.856413963178707e-06, + "loss": 1.8247, + "step": 40820 + }, + { + "epoch": 1.72, + "learning_rate": 2.852162081721162e-06, + "loss": 2.1083, + "step": 40830 + }, + { + "epoch": 1.72, + "learning_rate": 2.8479102002636166e-06, + "loss": 2.024, + "step": 40840 + }, + { + "epoch": 1.72, + "learning_rate": 2.843658318806072e-06, + "loss": 2.2802, + "step": 40850 + }, + { + "epoch": 1.72, + "learning_rate": 2.8394064373485268e-06, + "loss": 2.3956, + "step": 40860 + }, + { + "epoch": 1.72, + "learning_rate": 2.835154555890982e-06, + "loss": 2.0755, + "step": 40870 + }, + { + "epoch": 1.72, + "learning_rate": 2.8309026744334374e-06, + "loss": 2.1163, + "step": 40880 + }, + { + "epoch": 1.72, + "learning_rate": 2.826650792975892e-06, + "loss": 2.0023, + "step": 40890 + }, + { + "epoch": 1.72, + "learning_rate": 2.822398911518347e-06, + "loss": 2.0573, + "step": 40900 + }, + { + "epoch": 1.72, + "learning_rate": 2.818147030060802e-06, + "loss": 1.7237, + "step": 40910 + }, + { + "epoch": 1.72, + "learning_rate": 2.8138951486032573e-06, + "loss": 2.2315, + "step": 40920 + }, + { + "epoch": 1.72, + "learning_rate": 2.8096432671457126e-06, + "loss": 2.4632, + "step": 40930 + }, + { + "epoch": 1.72, + "learning_rate": 2.805391385688167e-06, + "loss": 2.3754, + "step": 40940 + }, + { + "epoch": 1.72, + "learning_rate": 2.8011395042306224e-06, + "loss": 1.8312, + "step": 40950 + }, + { + "epoch": 1.72, + "learning_rate": 2.7968876227730773e-06, + "loss": 2.0822, + "step": 40960 + }, + { + "epoch": 1.72, + "learning_rate": 2.7926357413155326e-06, + "loss": 1.9008, + "step": 40970 + }, + { + "epoch": 1.72, + "learning_rate": 2.7883838598579875e-06, + "loss": 1.6843, + "step": 40980 + }, + { + "epoch": 1.72, + "learning_rate": 2.7841319784004424e-06, + "loss": 2.1599, + "step": 40990 + }, + { + "epoch": 1.72, + "learning_rate": 2.7798800969428972e-06, + "loss": 2.0825, + "step": 41000 + }, + { + "epoch": 1.72, + "eval_loss": 1.7327312231063843, + "eval_runtime": 175.8219, + "eval_samples_per_second": 14.236, + "eval_steps_per_second": 7.121, + "step": 41000 + }, + { + "epoch": 1.73, + "learning_rate": 2.7756282154853525e-06, + "loss": 1.9554, + "step": 41010 + }, + { + "epoch": 1.73, + "learning_rate": 2.771376334027808e-06, + "loss": 2.1843, + "step": 41020 + }, + { + "epoch": 1.73, + "learning_rate": 2.7671244525702623e-06, + "loss": 2.2919, + "step": 41030 + }, + { + "epoch": 1.73, + "learning_rate": 2.7628725711127176e-06, + "loss": 2.1009, + "step": 41040 + }, + { + "epoch": 1.73, + "learning_rate": 2.7586206896551725e-06, + "loss": 1.9775, + "step": 41050 + }, + { + "epoch": 1.73, + "learning_rate": 2.754368808197628e-06, + "loss": 2.2389, + "step": 41060 + }, + { + "epoch": 1.73, + "learning_rate": 2.750116926740083e-06, + "loss": 2.0114, + "step": 41070 + }, + { + "epoch": 1.73, + "learning_rate": 2.7458650452825376e-06, + "loss": 2.124, + "step": 41080 + }, + { + "epoch": 1.73, + "learning_rate": 2.7416131638249924e-06, + "loss": 2.2319, + "step": 41090 + }, + { + "epoch": 1.73, + "learning_rate": 2.7373612823674478e-06, + "loss": 2.1945, + "step": 41100 + }, + { + "epoch": 1.73, + "learning_rate": 2.733109400909903e-06, + "loss": 2.0162, + "step": 41110 + }, + { + "epoch": 1.73, + "learning_rate": 2.728857519452358e-06, + "loss": 2.0864, + "step": 41120 + }, + { + "epoch": 1.73, + "learning_rate": 2.724605637994813e-06, + "loss": 2.4334, + "step": 41130 + }, + { + "epoch": 1.73, + "learning_rate": 2.7203537565372677e-06, + "loss": 2.4696, + "step": 41140 + }, + { + "epoch": 1.73, + "learning_rate": 2.716101875079723e-06, + "loss": 2.141, + "step": 41150 + }, + { + "epoch": 1.73, + "learning_rate": 2.7118499936221783e-06, + "loss": 2.658, + "step": 41160 + }, + { + "epoch": 1.73, + "learning_rate": 2.707598112164633e-06, + "loss": 2.2342, + "step": 41170 + }, + { + "epoch": 1.73, + "learning_rate": 2.703346230707088e-06, + "loss": 1.8181, + "step": 41180 + }, + { + "epoch": 1.73, + "learning_rate": 2.699094349249543e-06, + "loss": 2.1224, + "step": 41190 + }, + { + "epoch": 1.73, + "learning_rate": 2.6948424677919983e-06, + "loss": 2.8241, + "step": 41200 + }, + { + "epoch": 1.73, + "learning_rate": 2.6905905863344536e-06, + "loss": 2.0384, + "step": 41210 + }, + { + "epoch": 1.73, + "learning_rate": 2.6863387048769085e-06, + "loss": 2.2898, + "step": 41220 + }, + { + "epoch": 1.73, + "learning_rate": 2.682086823419363e-06, + "loss": 2.2362, + "step": 41230 + }, + { + "epoch": 1.74, + "learning_rate": 2.6778349419618182e-06, + "loss": 2.3073, + "step": 41240 + }, + { + "epoch": 1.74, + "learning_rate": 2.6735830605042735e-06, + "loss": 2.0308, + "step": 41250 + }, + { + "epoch": 1.74, + "learning_rate": 2.6693311790467284e-06, + "loss": 2.4844, + "step": 41260 + }, + { + "epoch": 1.74, + "learning_rate": 2.6650792975891833e-06, + "loss": 2.058, + "step": 41270 + }, + { + "epoch": 1.74, + "learning_rate": 2.660827416131638e-06, + "loss": 1.623, + "step": 41280 + }, + { + "epoch": 1.74, + "learning_rate": 2.6565755346740935e-06, + "loss": 2.1991, + "step": 41290 + }, + { + "epoch": 1.74, + "learning_rate": 2.6523236532165488e-06, + "loss": 2.2852, + "step": 41300 + }, + { + "epoch": 1.74, + "learning_rate": 2.6480717717590037e-06, + "loss": 2.4844, + "step": 41310 + }, + { + "epoch": 1.74, + "learning_rate": 2.6438198903014585e-06, + "loss": 1.9889, + "step": 41320 + }, + { + "epoch": 1.74, + "learning_rate": 2.6395680088439134e-06, + "loss": 2.1942, + "step": 41330 + }, + { + "epoch": 1.74, + "learning_rate": 2.6353161273863687e-06, + "loss": 2.0585, + "step": 41340 + }, + { + "epoch": 1.74, + "learning_rate": 2.6310642459288236e-06, + "loss": 2.1481, + "step": 41350 + }, + { + "epoch": 1.74, + "learning_rate": 2.626812364471279e-06, + "loss": 2.0574, + "step": 41360 + }, + { + "epoch": 1.74, + "learning_rate": 2.6225604830137334e-06, + "loss": 1.9496, + "step": 41370 + }, + { + "epoch": 1.74, + "learning_rate": 2.6183086015561887e-06, + "loss": 2.2692, + "step": 41380 + }, + { + "epoch": 1.74, + "learning_rate": 2.614056720098644e-06, + "loss": 1.6283, + "step": 41390 + }, + { + "epoch": 1.74, + "learning_rate": 2.609804838641099e-06, + "loss": 1.812, + "step": 41400 + }, + { + "epoch": 1.74, + "learning_rate": 2.605552957183554e-06, + "loss": 2.2804, + "step": 41410 + }, + { + "epoch": 1.74, + "learning_rate": 2.6013010757260086e-06, + "loss": 1.9433, + "step": 41420 + }, + { + "epoch": 1.74, + "learning_rate": 2.597049194268464e-06, + "loss": 2.2863, + "step": 41430 + }, + { + "epoch": 1.74, + "learning_rate": 2.5927973128109192e-06, + "loss": 1.9486, + "step": 41440 + }, + { + "epoch": 1.74, + "learning_rate": 2.588545431353374e-06, + "loss": 1.8636, + "step": 41450 + }, + { + "epoch": 1.74, + "learning_rate": 2.5842935498958294e-06, + "loss": 2.1634, + "step": 41460 + }, + { + "epoch": 1.74, + "learning_rate": 2.580041668438284e-06, + "loss": 2.0405, + "step": 41470 + }, + { + "epoch": 1.75, + "learning_rate": 2.575789786980739e-06, + "loss": 1.8633, + "step": 41480 + }, + { + "epoch": 1.75, + "learning_rate": 2.571537905523194e-06, + "loss": 1.7699, + "step": 41490 + }, + { + "epoch": 1.75, + "learning_rate": 2.5672860240656494e-06, + "loss": 2.5621, + "step": 41500 + }, + { + "epoch": 1.75, + "eval_loss": 1.7315673828125, + "eval_runtime": 175.8241, + "eval_samples_per_second": 14.236, + "eval_steps_per_second": 7.121, + "step": 41500 + }, + { + "epoch": 1.75, + "learning_rate": 2.5630341426081047e-06, + "loss": 2.2221, + "step": 41510 + }, + { + "epoch": 1.75, + "learning_rate": 2.558782261150559e-06, + "loss": 2.1501, + "step": 41520 + }, + { + "epoch": 1.75, + "learning_rate": 2.5545303796930144e-06, + "loss": 2.3411, + "step": 41530 + }, + { + "epoch": 1.75, + "learning_rate": 2.5502784982354693e-06, + "loss": 2.1032, + "step": 41540 + }, + { + "epoch": 1.75, + "learning_rate": 2.5460266167779246e-06, + "loss": 1.8093, + "step": 41550 + }, + { + "epoch": 1.75, + "learning_rate": 2.541774735320379e-06, + "loss": 2.1871, + "step": 41560 + }, + { + "epoch": 1.75, + "learning_rate": 2.5375228538628344e-06, + "loss": 2.5805, + "step": 41570 + }, + { + "epoch": 1.75, + "learning_rate": 2.5332709724052897e-06, + "loss": 2.0588, + "step": 41580 + }, + { + "epoch": 1.75, + "learning_rate": 2.5290190909477446e-06, + "loss": 2.0337, + "step": 41590 + }, + { + "epoch": 1.75, + "learning_rate": 2.5247672094902e-06, + "loss": 1.6492, + "step": 41600 + }, + { + "epoch": 1.75, + "learning_rate": 2.5205153280326544e-06, + "loss": 2.1416, + "step": 41610 + }, + { + "epoch": 1.75, + "learning_rate": 2.5162634465751097e-06, + "loss": 2.5335, + "step": 41620 + }, + { + "epoch": 1.75, + "learning_rate": 2.5120115651175645e-06, + "loss": 1.8925, + "step": 41630 + }, + { + "epoch": 1.75, + "learning_rate": 2.50775968366002e-06, + "loss": 1.8441, + "step": 41640 + }, + { + "epoch": 1.75, + "learning_rate": 2.503507802202475e-06, + "loss": 2.11, + "step": 41650 + }, + { + "epoch": 1.75, + "learning_rate": 2.49925592074493e-06, + "loss": 2.0362, + "step": 41660 + }, + { + "epoch": 1.75, + "learning_rate": 2.495004039287385e-06, + "loss": 1.9229, + "step": 41670 + }, + { + "epoch": 1.75, + "learning_rate": 2.49075215782984e-06, + "loss": 2.3089, + "step": 41680 + }, + { + "epoch": 1.75, + "learning_rate": 2.486500276372295e-06, + "loss": 2.028, + "step": 41690 + }, + { + "epoch": 1.75, + "learning_rate": 2.48224839491475e-06, + "loss": 2.1496, + "step": 41700 + }, + { + "epoch": 1.75, + "learning_rate": 2.4779965134572053e-06, + "loss": 1.9989, + "step": 41710 + }, + { + "epoch": 1.76, + "learning_rate": 2.47374463199966e-06, + "loss": 1.616, + "step": 41720 + }, + { + "epoch": 1.76, + "learning_rate": 2.469492750542115e-06, + "loss": 1.8583, + "step": 41730 + }, + { + "epoch": 1.76, + "learning_rate": 2.46524086908457e-06, + "loss": 2.1548, + "step": 41740 + }, + { + "epoch": 1.76, + "learning_rate": 2.4609889876270252e-06, + "loss": 1.9858, + "step": 41750 + }, + { + "epoch": 1.76, + "learning_rate": 2.45673710616948e-06, + "loss": 2.3754, + "step": 41760 + }, + { + "epoch": 1.76, + "learning_rate": 2.452485224711935e-06, + "loss": 2.2034, + "step": 41770 + }, + { + "epoch": 1.76, + "learning_rate": 2.4482333432543903e-06, + "loss": 2.0934, + "step": 41780 + }, + { + "epoch": 1.76, + "learning_rate": 2.443981461796845e-06, + "loss": 1.7186, + "step": 41790 + }, + { + "epoch": 1.76, + "learning_rate": 2.4397295803393005e-06, + "loss": 2.0101, + "step": 41800 + }, + { + "epoch": 1.76, + "learning_rate": 2.4354776988817554e-06, + "loss": 2.2067, + "step": 41810 + }, + { + "epoch": 1.76, + "learning_rate": 2.4312258174242103e-06, + "loss": 2.0457, + "step": 41820 + }, + { + "epoch": 1.76, + "learning_rate": 2.4269739359666656e-06, + "loss": 2.1525, + "step": 41830 + }, + { + "epoch": 1.76, + "learning_rate": 2.4227220545091204e-06, + "loss": 2.1126, + "step": 41840 + }, + { + "epoch": 1.76, + "learning_rate": 2.4184701730515758e-06, + "loss": 1.9887, + "step": 41850 + }, + { + "epoch": 1.76, + "learning_rate": 2.4142182915940306e-06, + "loss": 2.3211, + "step": 41860 + }, + { + "epoch": 1.76, + "learning_rate": 2.4099664101364855e-06, + "loss": 1.9687, + "step": 41870 + }, + { + "epoch": 1.76, + "learning_rate": 2.405714528678941e-06, + "loss": 1.8773, + "step": 41880 + }, + { + "epoch": 1.76, + "learning_rate": 2.4014626472213957e-06, + "loss": 1.8474, + "step": 41890 + }, + { + "epoch": 1.76, + "learning_rate": 2.3972107657638506e-06, + "loss": 1.8167, + "step": 41900 + }, + { + "epoch": 1.76, + "learning_rate": 2.3929588843063055e-06, + "loss": 2.4848, + "step": 41910 + }, + { + "epoch": 1.76, + "learning_rate": 2.3887070028487608e-06, + "loss": 2.096, + "step": 41920 + }, + { + "epoch": 1.76, + "learning_rate": 2.384455121391216e-06, + "loss": 1.7895, + "step": 41930 + }, + { + "epoch": 1.76, + "learning_rate": 2.380203239933671e-06, + "loss": 2.124, + "step": 41940 + }, + { + "epoch": 1.76, + "learning_rate": 2.375951358476126e-06, + "loss": 1.7622, + "step": 41950 + }, + { + "epoch": 1.77, + "learning_rate": 2.3716994770185807e-06, + "loss": 2.5117, + "step": 41960 + }, + { + "epoch": 1.77, + "learning_rate": 2.367447595561036e-06, + "loss": 1.7705, + "step": 41970 + }, + { + "epoch": 1.77, + "learning_rate": 2.363195714103491e-06, + "loss": 2.0807, + "step": 41980 + }, + { + "epoch": 1.77, + "learning_rate": 2.3589438326459462e-06, + "loss": 2.247, + "step": 41990 + }, + { + "epoch": 1.77, + "learning_rate": 2.354691951188401e-06, + "loss": 2.1884, + "step": 42000 + }, + { + "epoch": 1.77, + "eval_loss": 1.7364351749420166, + "eval_runtime": 174.6812, + "eval_samples_per_second": 14.329, + "eval_steps_per_second": 7.167, + "step": 42000 + }, + { + "epoch": 1.77, + "learning_rate": 2.350440069730856e-06, + "loss": 2.1428, + "step": 42010 + }, + { + "epoch": 1.77, + "learning_rate": 2.3461881882733113e-06, + "loss": 2.2284, + "step": 42020 + }, + { + "epoch": 1.77, + "learning_rate": 2.341936306815766e-06, + "loss": 1.9592, + "step": 42030 + }, + { + "epoch": 1.77, + "learning_rate": 2.337684425358221e-06, + "loss": 1.9115, + "step": 42040 + }, + { + "epoch": 1.77, + "learning_rate": 2.3334325439006764e-06, + "loss": 2.2041, + "step": 42050 + }, + { + "epoch": 1.77, + "learning_rate": 2.3291806624431312e-06, + "loss": 1.9779, + "step": 42060 + }, + { + "epoch": 1.77, + "learning_rate": 2.3249287809855865e-06, + "loss": 1.9501, + "step": 42070 + }, + { + "epoch": 1.77, + "learning_rate": 2.3206768995280414e-06, + "loss": 1.9092, + "step": 42080 + }, + { + "epoch": 1.77, + "learning_rate": 2.3164250180704963e-06, + "loss": 2.0939, + "step": 42090 + }, + { + "epoch": 1.77, + "learning_rate": 2.312173136612951e-06, + "loss": 1.8454, + "step": 42100 + }, + { + "epoch": 1.77, + "learning_rate": 2.3079212551554065e-06, + "loss": 1.9993, + "step": 42110 + }, + { + "epoch": 1.77, + "learning_rate": 2.303669373697862e-06, + "loss": 2.185, + "step": 42120 + }, + { + "epoch": 1.77, + "learning_rate": 2.2994174922403163e-06, + "loss": 2.3047, + "step": 42130 + }, + { + "epoch": 1.77, + "learning_rate": 2.2951656107827716e-06, + "loss": 2.1145, + "step": 42140 + }, + { + "epoch": 1.77, + "learning_rate": 2.2909137293252264e-06, + "loss": 1.759, + "step": 42150 + }, + { + "epoch": 1.77, + "learning_rate": 2.2866618478676818e-06, + "loss": 2.3151, + "step": 42160 + }, + { + "epoch": 1.77, + "learning_rate": 2.2824099664101366e-06, + "loss": 2.107, + "step": 42170 + }, + { + "epoch": 1.77, + "learning_rate": 2.2781580849525915e-06, + "loss": 1.9285, + "step": 42180 + }, + { + "epoch": 1.78, + "learning_rate": 2.273906203495047e-06, + "loss": 1.7268, + "step": 42190 + }, + { + "epoch": 1.78, + "learning_rate": 2.2696543220375017e-06, + "loss": 2.0042, + "step": 42200 + }, + { + "epoch": 1.78, + "learning_rate": 2.265402440579957e-06, + "loss": 1.8875, + "step": 42210 + }, + { + "epoch": 1.78, + "learning_rate": 2.261150559122412e-06, + "loss": 2.0278, + "step": 42220 + }, + { + "epoch": 1.78, + "learning_rate": 2.2568986776648668e-06, + "loss": 2.0565, + "step": 42230 + }, + { + "epoch": 1.78, + "learning_rate": 2.252646796207322e-06, + "loss": 2.0557, + "step": 42240 + }, + { + "epoch": 1.78, + "learning_rate": 2.248394914749777e-06, + "loss": 2.3697, + "step": 42250 + }, + { + "epoch": 1.78, + "learning_rate": 2.244143033292232e-06, + "loss": 2.1159, + "step": 42260 + }, + { + "epoch": 1.78, + "learning_rate": 2.2398911518346867e-06, + "loss": 2.259, + "step": 42270 + }, + { + "epoch": 1.78, + "learning_rate": 2.235639270377142e-06, + "loss": 2.234, + "step": 42280 + }, + { + "epoch": 1.78, + "learning_rate": 2.2313873889195973e-06, + "loss": 1.9082, + "step": 42290 + }, + { + "epoch": 1.78, + "learning_rate": 2.2271355074620522e-06, + "loss": 1.8677, + "step": 42300 + }, + { + "epoch": 1.78, + "learning_rate": 2.222883626004507e-06, + "loss": 1.9178, + "step": 42310 + }, + { + "epoch": 1.78, + "learning_rate": 2.218631744546962e-06, + "loss": 2.0711, + "step": 42320 + }, + { + "epoch": 1.78, + "learning_rate": 2.2143798630894173e-06, + "loss": 1.9575, + "step": 42330 + }, + { + "epoch": 1.78, + "learning_rate": 2.2101279816318726e-06, + "loss": 2.1129, + "step": 42340 + }, + { + "epoch": 1.78, + "learning_rate": 2.2058761001743275e-06, + "loss": 2.1437, + "step": 42350 + }, + { + "epoch": 1.78, + "learning_rate": 2.2016242187167824e-06, + "loss": 2.0994, + "step": 42360 + }, + { + "epoch": 1.78, + "learning_rate": 2.1973723372592372e-06, + "loss": 1.968, + "step": 42370 + }, + { + "epoch": 1.78, + "learning_rate": 2.1931204558016925e-06, + "loss": 2.584, + "step": 42380 + }, + { + "epoch": 1.78, + "learning_rate": 2.1888685743441474e-06, + "loss": 2.273, + "step": 42390 + }, + { + "epoch": 1.78, + "learning_rate": 2.1846166928866023e-06, + "loss": 1.9261, + "step": 42400 + }, + { + "epoch": 1.78, + "learning_rate": 2.1803648114290576e-06, + "loss": 2.1038, + "step": 42410 + }, + { + "epoch": 1.78, + "learning_rate": 2.1761129299715125e-06, + "loss": 1.7815, + "step": 42420 + }, + { + "epoch": 1.79, + "learning_rate": 2.171861048513968e-06, + "loss": 1.9469, + "step": 42430 + }, + { + "epoch": 1.79, + "learning_rate": 2.1676091670564227e-06, + "loss": 1.9804, + "step": 42440 + }, + { + "epoch": 1.79, + "learning_rate": 2.1633572855988776e-06, + "loss": 1.8273, + "step": 42450 + }, + { + "epoch": 1.79, + "learning_rate": 2.159105404141333e-06, + "loss": 2.06, + "step": 42460 + }, + { + "epoch": 1.79, + "learning_rate": 2.1548535226837877e-06, + "loss": 2.397, + "step": 42470 + }, + { + "epoch": 1.79, + "learning_rate": 2.150601641226243e-06, + "loss": 2.0802, + "step": 42480 + }, + { + "epoch": 1.79, + "learning_rate": 2.146349759768698e-06, + "loss": 1.9114, + "step": 42490 + }, + { + "epoch": 1.79, + "learning_rate": 2.142097878311153e-06, + "loss": 1.9195, + "step": 42500 + }, + { + "epoch": 1.79, + "eval_loss": 1.7350587844848633, + "eval_runtime": 175.8431, + "eval_samples_per_second": 14.234, + "eval_steps_per_second": 7.12, + "step": 42500 + }, + { + "epoch": 1.79, + "learning_rate": 2.1378459968536077e-06, + "loss": 1.8898, + "step": 42510 + }, + { + "epoch": 1.79, + "learning_rate": 2.133594115396063e-06, + "loss": 1.8898, + "step": 42520 + }, + { + "epoch": 1.79, + "learning_rate": 2.129342233938518e-06, + "loss": 1.8507, + "step": 42530 + }, + { + "epoch": 1.79, + "learning_rate": 2.1250903524809728e-06, + "loss": 2.0565, + "step": 42540 + }, + { + "epoch": 1.79, + "learning_rate": 2.120838471023428e-06, + "loss": 2.2139, + "step": 42550 + }, + { + "epoch": 1.79, + "learning_rate": 2.116586589565883e-06, + "loss": 2.3074, + "step": 42560 + }, + { + "epoch": 1.79, + "learning_rate": 2.1123347081083383e-06, + "loss": 1.7307, + "step": 42570 + }, + { + "epoch": 1.79, + "learning_rate": 2.108082826650793e-06, + "loss": 2.0316, + "step": 42580 + }, + { + "epoch": 1.79, + "learning_rate": 2.103830945193248e-06, + "loss": 2.7731, + "step": 42590 + }, + { + "epoch": 1.79, + "learning_rate": 2.0995790637357033e-06, + "loss": 1.7271, + "step": 42600 + }, + { + "epoch": 1.79, + "learning_rate": 2.0953271822781582e-06, + "loss": 2.1797, + "step": 42610 + }, + { + "epoch": 1.79, + "learning_rate": 2.0910753008206135e-06, + "loss": 1.8662, + "step": 42620 + }, + { + "epoch": 1.79, + "learning_rate": 2.0868234193630684e-06, + "loss": 2.6484, + "step": 42630 + }, + { + "epoch": 1.79, + "learning_rate": 2.0825715379055233e-06, + "loss": 2.1295, + "step": 42640 + }, + { + "epoch": 1.79, + "learning_rate": 2.0783196564479786e-06, + "loss": 1.6942, + "step": 42650 + }, + { + "epoch": 1.79, + "learning_rate": 2.0740677749904335e-06, + "loss": 2.3459, + "step": 42660 + }, + { + "epoch": 1.8, + "learning_rate": 2.0698158935328884e-06, + "loss": 2.0255, + "step": 42670 + }, + { + "epoch": 1.8, + "learning_rate": 2.0655640120753432e-06, + "loss": 2.4654, + "step": 42680 + }, + { + "epoch": 1.8, + "learning_rate": 2.0613121306177985e-06, + "loss": 1.888, + "step": 42690 + }, + { + "epoch": 1.8, + "learning_rate": 2.057060249160254e-06, + "loss": 1.9747, + "step": 42700 + }, + { + "epoch": 1.8, + "learning_rate": 2.0528083677027087e-06, + "loss": 1.6326, + "step": 42710 + }, + { + "epoch": 1.8, + "learning_rate": 2.0485564862451636e-06, + "loss": 1.6464, + "step": 42720 + }, + { + "epoch": 1.8, + "learning_rate": 2.0443046047876185e-06, + "loss": 2.2843, + "step": 42730 + }, + { + "epoch": 1.8, + "learning_rate": 2.040052723330074e-06, + "loss": 2.2483, + "step": 42740 + }, + { + "epoch": 1.8, + "learning_rate": 2.0358008418725287e-06, + "loss": 2.1342, + "step": 42750 + }, + { + "epoch": 1.8, + "learning_rate": 2.031548960414984e-06, + "loss": 1.8196, + "step": 42760 + }, + { + "epoch": 1.8, + "learning_rate": 2.027297078957439e-06, + "loss": 2.1734, + "step": 42770 + }, + { + "epoch": 1.8, + "learning_rate": 2.0230451974998937e-06, + "loss": 1.8203, + "step": 42780 + }, + { + "epoch": 1.8, + "learning_rate": 2.018793316042349e-06, + "loss": 1.8205, + "step": 42790 + }, + { + "epoch": 1.8, + "learning_rate": 2.014541434584804e-06, + "loss": 1.9341, + "step": 42800 + }, + { + "epoch": 1.8, + "learning_rate": 2.010289553127259e-06, + "loss": 1.7776, + "step": 42810 + }, + { + "epoch": 1.8, + "learning_rate": 2.006037671669714e-06, + "loss": 2.1631, + "step": 42820 + }, + { + "epoch": 1.8, + "learning_rate": 2.001785790212169e-06, + "loss": 2.3703, + "step": 42830 + }, + { + "epoch": 1.8, + "learning_rate": 1.9975339087546243e-06, + "loss": 2.4234, + "step": 42840 + }, + { + "epoch": 1.8, + "learning_rate": 1.993282027297079e-06, + "loss": 2.3221, + "step": 42850 + }, + { + "epoch": 1.8, + "learning_rate": 1.989030145839534e-06, + "loss": 1.8707, + "step": 42860 + }, + { + "epoch": 1.8, + "learning_rate": 1.984778264381989e-06, + "loss": 1.8046, + "step": 42870 + }, + { + "epoch": 1.8, + "learning_rate": 1.9805263829244443e-06, + "loss": 1.9571, + "step": 42880 + }, + { + "epoch": 1.8, + "learning_rate": 1.9762745014668996e-06, + "loss": 2.2652, + "step": 42890 + }, + { + "epoch": 1.8, + "learning_rate": 1.9720226200093544e-06, + "loss": 2.3992, + "step": 42900 + }, + { + "epoch": 1.81, + "learning_rate": 1.9677707385518093e-06, + "loss": 1.7401, + "step": 42910 + }, + { + "epoch": 1.81, + "learning_rate": 1.963518857094264e-06, + "loss": 2.2501, + "step": 42920 + }, + { + "epoch": 1.81, + "learning_rate": 1.9592669756367195e-06, + "loss": 1.6796, + "step": 42930 + }, + { + "epoch": 1.81, + "learning_rate": 1.9550150941791744e-06, + "loss": 2.0322, + "step": 42940 + }, + { + "epoch": 1.81, + "learning_rate": 1.9507632127216293e-06, + "loss": 1.8914, + "step": 42950 + }, + { + "epoch": 1.81, + "learning_rate": 1.9465113312640846e-06, + "loss": 2.4243, + "step": 42960 + }, + { + "epoch": 1.81, + "learning_rate": 1.9422594498065395e-06, + "loss": 2.4589, + "step": 42970 + }, + { + "epoch": 1.81, + "learning_rate": 1.9380075683489948e-06, + "loss": 2.1706, + "step": 42980 + }, + { + "epoch": 1.81, + "learning_rate": 1.9337556868914497e-06, + "loss": 1.8146, + "step": 42990 + }, + { + "epoch": 1.81, + "learning_rate": 1.9295038054339045e-06, + "loss": 2.1325, + "step": 43000 + }, + { + "epoch": 1.81, + "eval_loss": 1.7335894107818604, + "eval_runtime": 176.1718, + "eval_samples_per_second": 14.208, + "eval_steps_per_second": 7.107, + "step": 43000 + }, + { + "epoch": 1.81, + "learning_rate": 1.92525192397636e-06, + "loss": 2.1366, + "step": 43010 + }, + { + "epoch": 1.81, + "learning_rate": 1.9210000425188147e-06, + "loss": 2.2995, + "step": 43020 + }, + { + "epoch": 1.81, + "learning_rate": 1.91674816106127e-06, + "loss": 2.0707, + "step": 43030 + }, + { + "epoch": 1.81, + "learning_rate": 1.9124962796037245e-06, + "loss": 2.3193, + "step": 43040 + }, + { + "epoch": 1.81, + "learning_rate": 1.90824439814618e-06, + "loss": 2.2661, + "step": 43050 + }, + { + "epoch": 1.81, + "learning_rate": 1.903992516688635e-06, + "loss": 2.4724, + "step": 43060 + }, + { + "epoch": 1.81, + "learning_rate": 1.8997406352310898e-06, + "loss": 2.1197, + "step": 43070 + }, + { + "epoch": 1.81, + "learning_rate": 1.895488753773545e-06, + "loss": 2.4334, + "step": 43080 + }, + { + "epoch": 1.81, + "learning_rate": 1.891236872316e-06, + "loss": 2.0957, + "step": 43090 + }, + { + "epoch": 1.81, + "learning_rate": 1.886984990858455e-06, + "loss": 2.2178, + "step": 43100 + }, + { + "epoch": 1.81, + "learning_rate": 1.88273310940091e-06, + "loss": 2.0521, + "step": 43110 + }, + { + "epoch": 1.81, + "learning_rate": 1.878481227943365e-06, + "loss": 2.0047, + "step": 43120 + }, + { + "epoch": 1.81, + "learning_rate": 1.8742293464858201e-06, + "loss": 2.359, + "step": 43130 + }, + { + "epoch": 1.81, + "learning_rate": 1.869977465028275e-06, + "loss": 2.0012, + "step": 43140 + }, + { + "epoch": 1.82, + "learning_rate": 1.8657255835707303e-06, + "loss": 1.5969, + "step": 43150 + }, + { + "epoch": 1.82, + "learning_rate": 1.8614737021131852e-06, + "loss": 2.1817, + "step": 43160 + }, + { + "epoch": 1.82, + "learning_rate": 1.8572218206556403e-06, + "loss": 2.6578, + "step": 43170 + }, + { + "epoch": 1.82, + "learning_rate": 1.8529699391980954e-06, + "loss": 2.5675, + "step": 43180 + }, + { + "epoch": 1.82, + "learning_rate": 1.8487180577405503e-06, + "loss": 2.0409, + "step": 43190 + }, + { + "epoch": 1.82, + "learning_rate": 1.8444661762830054e-06, + "loss": 1.6609, + "step": 43200 + }, + { + "epoch": 1.82, + "learning_rate": 1.8402142948254602e-06, + "loss": 2.0456, + "step": 43210 + }, + { + "epoch": 1.82, + "learning_rate": 1.8359624133679155e-06, + "loss": 1.9981, + "step": 43220 + }, + { + "epoch": 1.82, + "learning_rate": 1.8317105319103706e-06, + "loss": 2.3557, + "step": 43230 + }, + { + "epoch": 1.82, + "learning_rate": 1.8274586504528255e-06, + "loss": 1.8564, + "step": 43240 + }, + { + "epoch": 1.82, + "learning_rate": 1.8232067689952806e-06, + "loss": 2.1589, + "step": 43250 + }, + { + "epoch": 1.82, + "learning_rate": 1.8189548875377355e-06, + "loss": 2.1792, + "step": 43260 + }, + { + "epoch": 1.82, + "learning_rate": 1.8147030060801906e-06, + "loss": 2.0653, + "step": 43270 + }, + { + "epoch": 1.82, + "learning_rate": 1.8104511246226455e-06, + "loss": 2.2793, + "step": 43280 + }, + { + "epoch": 1.82, + "learning_rate": 1.8061992431651008e-06, + "loss": 1.7779, + "step": 43290 + }, + { + "epoch": 1.82, + "learning_rate": 1.8019473617075559e-06, + "loss": 2.2637, + "step": 43300 + }, + { + "epoch": 1.82, + "learning_rate": 1.7976954802500107e-06, + "loss": 1.8934, + "step": 43310 + }, + { + "epoch": 1.82, + "learning_rate": 1.7934435987924658e-06, + "loss": 1.9671, + "step": 43320 + }, + { + "epoch": 1.82, + "learning_rate": 1.7891917173349207e-06, + "loss": 1.7957, + "step": 43330 + }, + { + "epoch": 1.82, + "learning_rate": 1.7849398358773758e-06, + "loss": 2.1045, + "step": 43340 + }, + { + "epoch": 1.82, + "learning_rate": 1.7806879544198311e-06, + "loss": 2.2057, + "step": 43350 + }, + { + "epoch": 1.82, + "learning_rate": 1.776436072962286e-06, + "loss": 2.4658, + "step": 43360 + }, + { + "epoch": 1.82, + "learning_rate": 1.772184191504741e-06, + "loss": 2.2629, + "step": 43370 + }, + { + "epoch": 1.83, + "learning_rate": 1.767932310047196e-06, + "loss": 2.2685, + "step": 43380 + }, + { + "epoch": 1.83, + "learning_rate": 1.763680428589651e-06, + "loss": 1.9794, + "step": 43390 + }, + { + "epoch": 1.83, + "learning_rate": 1.759428547132106e-06, + "loss": 2.2, + "step": 43400 + }, + { + "epoch": 1.83, + "learning_rate": 1.755176665674561e-06, + "loss": 2.2813, + "step": 43410 + }, + { + "epoch": 1.83, + "learning_rate": 1.7509247842170164e-06, + "loss": 2.2738, + "step": 43420 + }, + { + "epoch": 1.83, + "learning_rate": 1.7466729027594712e-06, + "loss": 1.9699, + "step": 43430 + }, + { + "epoch": 1.83, + "learning_rate": 1.7424210213019263e-06, + "loss": 1.7798, + "step": 43440 + }, + { + "epoch": 1.83, + "learning_rate": 1.7381691398443812e-06, + "loss": 2.2218, + "step": 43450 + }, + { + "epoch": 1.83, + "learning_rate": 1.7339172583868363e-06, + "loss": 2.3333, + "step": 43460 + }, + { + "epoch": 1.83, + "learning_rate": 1.7296653769292914e-06, + "loss": 2.2299, + "step": 43470 + }, + { + "epoch": 1.83, + "learning_rate": 1.7254134954717463e-06, + "loss": 2.229, + "step": 43480 + }, + { + "epoch": 1.83, + "learning_rate": 1.7211616140142016e-06, + "loss": 2.1926, + "step": 43490 + }, + { + "epoch": 1.83, + "learning_rate": 1.7169097325566565e-06, + "loss": 1.9533, + "step": 43500 + }, + { + "epoch": 1.83, + "eval_loss": 1.737502098083496, + "eval_runtime": 175.9057, + "eval_samples_per_second": 14.229, + "eval_steps_per_second": 7.117, + "step": 43500 + }, + { + "epoch": 1.83, + "learning_rate": 1.7126578510991116e-06, + "loss": 1.6707, + "step": 43510 + }, + { + "epoch": 1.83, + "learning_rate": 1.7084059696415664e-06, + "loss": 1.8694, + "step": 43520 + }, + { + "epoch": 1.83, + "learning_rate": 1.7041540881840215e-06, + "loss": 2.3288, + "step": 43530 + }, + { + "epoch": 1.83, + "learning_rate": 1.6999022067264766e-06, + "loss": 2.1223, + "step": 43540 + }, + { + "epoch": 1.83, + "learning_rate": 1.6956503252689315e-06, + "loss": 1.8674, + "step": 43550 + }, + { + "epoch": 1.83, + "learning_rate": 1.6913984438113868e-06, + "loss": 2.1238, + "step": 43560 + }, + { + "epoch": 1.83, + "learning_rate": 1.6871465623538417e-06, + "loss": 1.9917, + "step": 43570 + }, + { + "epoch": 1.83, + "learning_rate": 1.6828946808962968e-06, + "loss": 2.1347, + "step": 43580 + }, + { + "epoch": 1.83, + "learning_rate": 1.6786427994387519e-06, + "loss": 1.9695, + "step": 43590 + }, + { + "epoch": 1.83, + "learning_rate": 1.6743909179812068e-06, + "loss": 2.4367, + "step": 43600 + }, + { + "epoch": 1.83, + "learning_rate": 1.6701390365236619e-06, + "loss": 2.082, + "step": 43610 + }, + { + "epoch": 1.84, + "learning_rate": 1.6658871550661167e-06, + "loss": 2.1691, + "step": 43620 + }, + { + "epoch": 1.84, + "learning_rate": 1.661635273608572e-06, + "loss": 2.0205, + "step": 43630 + }, + { + "epoch": 1.84, + "learning_rate": 1.657383392151027e-06, + "loss": 1.9344, + "step": 43640 + }, + { + "epoch": 1.84, + "learning_rate": 1.653131510693482e-06, + "loss": 1.7473, + "step": 43650 + }, + { + "epoch": 1.84, + "learning_rate": 1.6488796292359371e-06, + "loss": 2.46, + "step": 43660 + }, + { + "epoch": 1.84, + "learning_rate": 1.644627747778392e-06, + "loss": 1.9346, + "step": 43670 + }, + { + "epoch": 1.84, + "learning_rate": 1.640375866320847e-06, + "loss": 1.8243, + "step": 43680 + }, + { + "epoch": 1.84, + "learning_rate": 1.636123984863302e-06, + "loss": 2.0834, + "step": 43690 + }, + { + "epoch": 1.84, + "learning_rate": 1.6318721034057573e-06, + "loss": 2.1517, + "step": 43700 + }, + { + "epoch": 1.84, + "learning_rate": 1.6276202219482124e-06, + "loss": 2.3432, + "step": 43710 + }, + { + "epoch": 1.84, + "learning_rate": 1.6233683404906673e-06, + "loss": 2.3242, + "step": 43720 + }, + { + "epoch": 1.84, + "learning_rate": 1.6191164590331224e-06, + "loss": 2.154, + "step": 43730 + }, + { + "epoch": 1.84, + "learning_rate": 1.6148645775755772e-06, + "loss": 2.5902, + "step": 43740 + }, + { + "epoch": 1.84, + "learning_rate": 1.6106126961180323e-06, + "loss": 2.1923, + "step": 43750 + }, + { + "epoch": 1.84, + "learning_rate": 1.6063608146604872e-06, + "loss": 2.0517, + "step": 43760 + }, + { + "epoch": 1.84, + "learning_rate": 1.6021089332029425e-06, + "loss": 1.5443, + "step": 43770 + }, + { + "epoch": 1.84, + "learning_rate": 1.5978570517453976e-06, + "loss": 2.1229, + "step": 43780 + }, + { + "epoch": 1.84, + "learning_rate": 1.5936051702878525e-06, + "loss": 2.3246, + "step": 43790 + }, + { + "epoch": 1.84, + "learning_rate": 1.5893532888303076e-06, + "loss": 2.3142, + "step": 43800 + }, + { + "epoch": 1.84, + "learning_rate": 1.5851014073727625e-06, + "loss": 2.4578, + "step": 43810 + }, + { + "epoch": 1.84, + "learning_rate": 1.5808495259152176e-06, + "loss": 1.636, + "step": 43820 + }, + { + "epoch": 1.84, + "learning_rate": 1.5765976444576729e-06, + "loss": 1.7894, + "step": 43830 + }, + { + "epoch": 1.84, + "learning_rate": 1.5723457630001277e-06, + "loss": 2.3135, + "step": 43840 + }, + { + "epoch": 1.84, + "learning_rate": 1.5680938815425828e-06, + "loss": 2.2809, + "step": 43850 + }, + { + "epoch": 1.85, + "learning_rate": 1.5638420000850377e-06, + "loss": 2.3676, + "step": 43860 + }, + { + "epoch": 1.85, + "learning_rate": 1.5595901186274928e-06, + "loss": 2.0304, + "step": 43870 + }, + { + "epoch": 1.85, + "learning_rate": 1.5553382371699477e-06, + "loss": 2.2491, + "step": 43880 + }, + { + "epoch": 1.85, + "learning_rate": 1.5510863557124028e-06, + "loss": 2.1846, + "step": 43890 + }, + { + "epoch": 1.85, + "learning_rate": 1.546834474254858e-06, + "loss": 1.8177, + "step": 43900 + }, + { + "epoch": 1.85, + "learning_rate": 1.5425825927973128e-06, + "loss": 2.2895, + "step": 43910 + }, + { + "epoch": 1.85, + "learning_rate": 1.538330711339768e-06, + "loss": 1.8667, + "step": 43920 + }, + { + "epoch": 1.85, + "learning_rate": 1.534078829882223e-06, + "loss": 2.2535, + "step": 43930 + }, + { + "epoch": 1.85, + "learning_rate": 1.529826948424678e-06, + "loss": 2.3201, + "step": 43940 + }, + { + "epoch": 1.85, + "learning_rate": 1.5255750669671331e-06, + "loss": 1.79, + "step": 43950 + }, + { + "epoch": 1.85, + "learning_rate": 1.521323185509588e-06, + "loss": 2.1128, + "step": 43960 + }, + { + "epoch": 1.85, + "learning_rate": 1.5170713040520433e-06, + "loss": 1.9392, + "step": 43970 + }, + { + "epoch": 1.85, + "learning_rate": 1.512819422594498e-06, + "loss": 1.7095, + "step": 43980 + }, + { + "epoch": 1.85, + "learning_rate": 1.5085675411369533e-06, + "loss": 1.665, + "step": 43990 + }, + { + "epoch": 1.85, + "learning_rate": 1.5043156596794082e-06, + "loss": 2.4102, + "step": 44000 + }, + { + "epoch": 1.85, + "eval_loss": 1.737618327140808, + "eval_runtime": 176.0317, + "eval_samples_per_second": 14.219, + "eval_steps_per_second": 7.112, + "step": 44000 + }, + { + "epoch": 1.85, + "learning_rate": 1.5000637782218633e-06, + "loss": 1.961, + "step": 44010 + }, + { + "epoch": 1.85, + "learning_rate": 1.4958118967643184e-06, + "loss": 1.9114, + "step": 44020 + }, + { + "epoch": 1.85, + "learning_rate": 1.4915600153067733e-06, + "loss": 2.2823, + "step": 44030 + }, + { + "epoch": 1.85, + "learning_rate": 1.4873081338492283e-06, + "loss": 2.166, + "step": 44040 + }, + { + "epoch": 1.85, + "learning_rate": 1.4830562523916832e-06, + "loss": 2.2117, + "step": 44050 + }, + { + "epoch": 1.85, + "learning_rate": 1.4788043709341385e-06, + "loss": 2.1801, + "step": 44060 + }, + { + "epoch": 1.85, + "learning_rate": 1.4745524894765936e-06, + "loss": 2.0113, + "step": 44070 + }, + { + "epoch": 1.85, + "learning_rate": 1.4703006080190485e-06, + "loss": 1.9797, + "step": 44080 + }, + { + "epoch": 1.85, + "learning_rate": 1.4660487265615036e-06, + "loss": 2.0611, + "step": 44090 + }, + { + "epoch": 1.86, + "learning_rate": 1.4617968451039585e-06, + "loss": 2.1161, + "step": 44100 + }, + { + "epoch": 1.86, + "learning_rate": 1.4575449636464136e-06, + "loss": 1.682, + "step": 44110 + }, + { + "epoch": 1.86, + "learning_rate": 1.4532930821888689e-06, + "loss": 2.1025, + "step": 44120 + }, + { + "epoch": 1.86, + "learning_rate": 1.4490412007313238e-06, + "loss": 1.86, + "step": 44130 + }, + { + "epoch": 1.86, + "learning_rate": 1.4447893192737789e-06, + "loss": 1.8424, + "step": 44140 + }, + { + "epoch": 1.86, + "learning_rate": 1.4405374378162337e-06, + "loss": 1.8724, + "step": 44150 + }, + { + "epoch": 1.86, + "learning_rate": 1.4362855563586888e-06, + "loss": 2.3283, + "step": 44160 + }, + { + "epoch": 1.86, + "learning_rate": 1.4320336749011437e-06, + "loss": 1.9669, + "step": 44170 + }, + { + "epoch": 1.86, + "learning_rate": 1.4277817934435988e-06, + "loss": 2.5114, + "step": 44180 + }, + { + "epoch": 1.86, + "learning_rate": 1.4235299119860541e-06, + "loss": 2.3339, + "step": 44190 + }, + { + "epoch": 1.86, + "learning_rate": 1.419278030528509e-06, + "loss": 2.0356, + "step": 44200 + }, + { + "epoch": 1.86, + "learning_rate": 1.415026149070964e-06, + "loss": 2.1621, + "step": 44210 + }, + { + "epoch": 1.86, + "learning_rate": 1.410774267613419e-06, + "loss": 1.8599, + "step": 44220 + }, + { + "epoch": 1.86, + "learning_rate": 1.406522386155874e-06, + "loss": 1.8576, + "step": 44230 + }, + { + "epoch": 1.86, + "learning_rate": 1.4022705046983292e-06, + "loss": 2.0705, + "step": 44240 + }, + { + "epoch": 1.86, + "learning_rate": 1.398018623240784e-06, + "loss": 1.9939, + "step": 44250 + }, + { + "epoch": 1.86, + "learning_rate": 1.3937667417832394e-06, + "loss": 2.3555, + "step": 44260 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895148603256942e-06, + "loss": 2.4827, + "step": 44270 + }, + { + "epoch": 1.86, + "learning_rate": 1.3852629788681493e-06, + "loss": 2.4176, + "step": 44280 + }, + { + "epoch": 1.86, + "learning_rate": 1.3810110974106042e-06, + "loss": 2.4387, + "step": 44290 + }, + { + "epoch": 1.86, + "learning_rate": 1.3767592159530593e-06, + "loss": 2.0681, + "step": 44300 + }, + { + "epoch": 1.86, + "learning_rate": 1.3725073344955144e-06, + "loss": 1.8416, + "step": 44310 + }, + { + "epoch": 1.86, + "learning_rate": 1.3682554530379693e-06, + "loss": 2.4489, + "step": 44320 + }, + { + "epoch": 1.87, + "learning_rate": 1.3640035715804246e-06, + "loss": 2.2564, + "step": 44330 + }, + { + "epoch": 1.87, + "learning_rate": 1.3597516901228795e-06, + "loss": 1.8832, + "step": 44340 + }, + { + "epoch": 1.87, + "learning_rate": 1.3554998086653346e-06, + "loss": 2.1685, + "step": 44350 + }, + { + "epoch": 1.87, + "learning_rate": 1.3512479272077897e-06, + "loss": 2.0292, + "step": 44360 + }, + { + "epoch": 1.87, + "learning_rate": 1.3469960457502445e-06, + "loss": 1.9095, + "step": 44370 + }, + { + "epoch": 1.87, + "learning_rate": 1.3427441642926996e-06, + "loss": 2.0641, + "step": 44380 + }, + { + "epoch": 1.87, + "learning_rate": 1.3384922828351545e-06, + "loss": 2.5147, + "step": 44390 + }, + { + "epoch": 1.87, + "learning_rate": 1.3342404013776098e-06, + "loss": 2.2525, + "step": 44400 + }, + { + "epoch": 1.87, + "learning_rate": 1.3299885199200647e-06, + "loss": 2.2836, + "step": 44410 + }, + { + "epoch": 1.87, + "learning_rate": 1.3257366384625198e-06, + "loss": 2.2603, + "step": 44420 + }, + { + "epoch": 1.87, + "learning_rate": 1.3214847570049749e-06, + "loss": 1.9764, + "step": 44430 + }, + { + "epoch": 1.87, + "learning_rate": 1.3172328755474298e-06, + "loss": 2.1242, + "step": 44440 + }, + { + "epoch": 1.87, + "learning_rate": 1.3129809940898849e-06, + "loss": 2.5716, + "step": 44450 + }, + { + "epoch": 1.87, + "learning_rate": 1.3087291126323397e-06, + "loss": 2.1799, + "step": 44460 + }, + { + "epoch": 1.87, + "learning_rate": 1.304477231174795e-06, + "loss": 1.9316, + "step": 44470 + }, + { + "epoch": 1.87, + "learning_rate": 1.3002253497172501e-06, + "loss": 2.0683, + "step": 44480 + }, + { + "epoch": 1.87, + "learning_rate": 1.295973468259705e-06, + "loss": 2.4333, + "step": 44490 + }, + { + "epoch": 1.87, + "learning_rate": 1.2917215868021601e-06, + "loss": 2.1635, + "step": 44500 + }, + { + "epoch": 1.87, + "eval_loss": 1.731579303741455, + "eval_runtime": 175.8675, + "eval_samples_per_second": 14.232, + "eval_steps_per_second": 7.119, + "step": 44500 + }, + { + "epoch": 1.87, + "learning_rate": 1.287469705344615e-06, + "loss": 2.051, + "step": 44510 + }, + { + "epoch": 1.87, + "learning_rate": 1.28321782388707e-06, + "loss": 2.4778, + "step": 44520 + }, + { + "epoch": 1.87, + "learning_rate": 1.278965942429525e-06, + "loss": 2.21, + "step": 44530 + }, + { + "epoch": 1.87, + "learning_rate": 1.2747140609719803e-06, + "loss": 2.3338, + "step": 44540 + }, + { + "epoch": 1.87, + "learning_rate": 1.2704621795144354e-06, + "loss": 1.7422, + "step": 44550 + }, + { + "epoch": 1.87, + "learning_rate": 1.2662102980568903e-06, + "loss": 1.9787, + "step": 44560 + }, + { + "epoch": 1.88, + "learning_rate": 1.2619584165993453e-06, + "loss": 1.9784, + "step": 44570 + }, + { + "epoch": 1.88, + "learning_rate": 1.2577065351418002e-06, + "loss": 2.1287, + "step": 44580 + }, + { + "epoch": 1.88, + "learning_rate": 1.2534546536842553e-06, + "loss": 2.0922, + "step": 44590 + }, + { + "epoch": 1.88, + "learning_rate": 1.2492027722267104e-06, + "loss": 2.2547, + "step": 44600 + }, + { + "epoch": 1.88, + "learning_rate": 1.2449508907691655e-06, + "loss": 1.8577, + "step": 44610 + }, + { + "epoch": 1.88, + "learning_rate": 1.2406990093116204e-06, + "loss": 1.823, + "step": 44620 + }, + { + "epoch": 1.88, + "learning_rate": 1.2364471278540755e-06, + "loss": 1.8993, + "step": 44630 + }, + { + "epoch": 1.88, + "learning_rate": 1.2321952463965306e-06, + "loss": 1.6693, + "step": 44640 + }, + { + "epoch": 1.88, + "learning_rate": 1.2279433649389857e-06, + "loss": 1.5397, + "step": 44650 + }, + { + "epoch": 1.88, + "learning_rate": 1.2236914834814406e-06, + "loss": 2.1911, + "step": 44660 + }, + { + "epoch": 1.88, + "learning_rate": 1.2194396020238957e-06, + "loss": 2.1, + "step": 44670 + }, + { + "epoch": 1.88, + "learning_rate": 1.2151877205663507e-06, + "loss": 1.9287, + "step": 44680 + }, + { + "epoch": 1.88, + "learning_rate": 1.2109358391088058e-06, + "loss": 2.3998, + "step": 44690 + }, + { + "epoch": 1.88, + "learning_rate": 1.2066839576512607e-06, + "loss": 2.2794, + "step": 44700 + }, + { + "epoch": 1.88, + "learning_rate": 1.2024320761937158e-06, + "loss": 2.3223, + "step": 44710 + }, + { + "epoch": 1.88, + "learning_rate": 1.198180194736171e-06, + "loss": 2.4034, + "step": 44720 + }, + { + "epoch": 1.88, + "learning_rate": 1.1939283132786258e-06, + "loss": 2.3119, + "step": 44730 + }, + { + "epoch": 1.88, + "learning_rate": 1.1896764318210809e-06, + "loss": 2.0402, + "step": 44740 + }, + { + "epoch": 1.88, + "learning_rate": 1.185424550363536e-06, + "loss": 1.5494, + "step": 44750 + }, + { + "epoch": 1.88, + "learning_rate": 1.181172668905991e-06, + "loss": 1.6709, + "step": 44760 + }, + { + "epoch": 1.88, + "learning_rate": 1.176920787448446e-06, + "loss": 1.8675, + "step": 44770 + }, + { + "epoch": 1.88, + "learning_rate": 1.172668905990901e-06, + "loss": 2.3726, + "step": 44780 + }, + { + "epoch": 1.88, + "learning_rate": 1.1684170245333561e-06, + "loss": 2.1301, + "step": 44790 + }, + { + "epoch": 1.88, + "learning_rate": 1.164165143075811e-06, + "loss": 2.1072, + "step": 44800 + }, + { + "epoch": 1.89, + "learning_rate": 1.1599132616182663e-06, + "loss": 1.9413, + "step": 44810 + }, + { + "epoch": 1.89, + "learning_rate": 1.1556613801607212e-06, + "loss": 1.8215, + "step": 44820 + }, + { + "epoch": 1.89, + "learning_rate": 1.1514094987031763e-06, + "loss": 2.2847, + "step": 44830 + }, + { + "epoch": 1.89, + "learning_rate": 1.1471576172456312e-06, + "loss": 1.9615, + "step": 44840 + }, + { + "epoch": 1.89, + "learning_rate": 1.1429057357880863e-06, + "loss": 2.334, + "step": 44850 + }, + { + "epoch": 1.89, + "learning_rate": 1.1386538543305414e-06, + "loss": 2.1077, + "step": 44860 + }, + { + "epoch": 1.89, + "learning_rate": 1.1344019728729965e-06, + "loss": 1.8033, + "step": 44870 + }, + { + "epoch": 1.89, + "learning_rate": 1.1301500914154516e-06, + "loss": 2.0681, + "step": 44880 + }, + { + "epoch": 1.89, + "learning_rate": 1.1258982099579064e-06, + "loss": 2.0213, + "step": 44890 + }, + { + "epoch": 1.89, + "learning_rate": 1.1216463285003615e-06, + "loss": 1.8618, + "step": 44900 + }, + { + "epoch": 1.89, + "learning_rate": 1.1173944470428164e-06, + "loss": 1.9152, + "step": 44910 + }, + { + "epoch": 1.89, + "learning_rate": 1.1131425655852715e-06, + "loss": 2.1678, + "step": 44920 + }, + { + "epoch": 1.89, + "learning_rate": 1.1088906841277266e-06, + "loss": 2.3216, + "step": 44930 + }, + { + "epoch": 1.89, + "learning_rate": 1.1046388026701817e-06, + "loss": 2.428, + "step": 44940 + }, + { + "epoch": 1.89, + "learning_rate": 1.1003869212126366e-06, + "loss": 2.1514, + "step": 44950 + }, + { + "epoch": 1.89, + "learning_rate": 1.0961350397550917e-06, + "loss": 1.9048, + "step": 44960 + }, + { + "epoch": 1.89, + "learning_rate": 1.0918831582975468e-06, + "loss": 2.1308, + "step": 44970 + }, + { + "epoch": 1.89, + "learning_rate": 1.0876312768400016e-06, + "loss": 1.7374, + "step": 44980 + }, + { + "epoch": 1.89, + "learning_rate": 1.083379395382457e-06, + "loss": 2.0971, + "step": 44990 + }, + { + "epoch": 1.89, + "learning_rate": 1.0791275139249118e-06, + "loss": 2.0447, + "step": 45000 + }, + { + "epoch": 1.89, + "eval_loss": 1.7354670763015747, + "eval_runtime": 175.9119, + "eval_samples_per_second": 14.229, + "eval_steps_per_second": 7.117, + "step": 45000 + }, + { + "epoch": 1.89, + "learning_rate": 1.074875632467367e-06, + "loss": 1.6593, + "step": 45010 + }, + { + "epoch": 1.89, + "learning_rate": 1.0706237510098218e-06, + "loss": 1.9934, + "step": 45020 + }, + { + "epoch": 1.89, + "learning_rate": 1.066371869552277e-06, + "loss": 2.028, + "step": 45030 + }, + { + "epoch": 1.89, + "learning_rate": 1.062119988094732e-06, + "loss": 1.8805, + "step": 45040 + }, + { + "epoch": 1.9, + "learning_rate": 1.057868106637187e-06, + "loss": 2.2899, + "step": 45050 + }, + { + "epoch": 1.9, + "learning_rate": 1.0536162251796422e-06, + "loss": 2.0075, + "step": 45060 + }, + { + "epoch": 1.9, + "learning_rate": 1.049364343722097e-06, + "loss": 1.8379, + "step": 45070 + }, + { + "epoch": 1.9, + "learning_rate": 1.0451124622645522e-06, + "loss": 2.3873, + "step": 45080 + }, + { + "epoch": 1.9, + "learning_rate": 1.040860580807007e-06, + "loss": 2.2744, + "step": 45090 + }, + { + "epoch": 1.9, + "learning_rate": 1.0366086993494623e-06, + "loss": 1.6716, + "step": 45100 + }, + { + "epoch": 1.9, + "learning_rate": 1.0323568178919172e-06, + "loss": 2.1744, + "step": 45110 + }, + { + "epoch": 1.9, + "learning_rate": 1.0281049364343723e-06, + "loss": 2.097, + "step": 45120 + }, + { + "epoch": 1.9, + "learning_rate": 1.0238530549768274e-06, + "loss": 2.1333, + "step": 45130 + }, + { + "epoch": 1.9, + "learning_rate": 1.0196011735192823e-06, + "loss": 1.8945, + "step": 45140 + }, + { + "epoch": 1.9, + "learning_rate": 1.0153492920617374e-06, + "loss": 2.2689, + "step": 45150 + }, + { + "epoch": 1.9, + "learning_rate": 1.0110974106041925e-06, + "loss": 1.9336, + "step": 45160 + }, + { + "epoch": 1.9, + "learning_rate": 1.0068455291466476e-06, + "loss": 2.1484, + "step": 45170 + }, + { + "epoch": 1.9, + "learning_rate": 1.0025936476891025e-06, + "loss": 1.8955, + "step": 45180 + }, + { + "epoch": 1.9, + "learning_rate": 9.983417662315576e-07, + "loss": 1.9074, + "step": 45190 + }, + { + "epoch": 1.9, + "learning_rate": 9.940898847740127e-07, + "loss": 1.9534, + "step": 45200 + }, + { + "epoch": 1.9, + "learning_rate": 9.898380033164675e-07, + "loss": 2.0347, + "step": 45210 + }, + { + "epoch": 1.9, + "learning_rate": 9.855861218589226e-07, + "loss": 1.9925, + "step": 45220 + }, + { + "epoch": 1.9, + "learning_rate": 9.813342404013777e-07, + "loss": 1.9415, + "step": 45230 + }, + { + "epoch": 1.9, + "learning_rate": 9.770823589438328e-07, + "loss": 2.0953, + "step": 45240 + }, + { + "epoch": 1.9, + "learning_rate": 9.728304774862877e-07, + "loss": 1.947, + "step": 45250 + }, + { + "epoch": 1.9, + "learning_rate": 9.685785960287428e-07, + "loss": 1.7225, + "step": 45260 + }, + { + "epoch": 1.9, + "learning_rate": 9.643267145711979e-07, + "loss": 2.1531, + "step": 45270 + }, + { + "epoch": 1.91, + "learning_rate": 9.60074833113653e-07, + "loss": 2.3571, + "step": 45280 + }, + { + "epoch": 1.91, + "learning_rate": 9.558229516561079e-07, + "loss": 1.992, + "step": 45290 + }, + { + "epoch": 1.91, + "learning_rate": 9.51571070198563e-07, + "loss": 1.9213, + "step": 45300 + }, + { + "epoch": 1.91, + "learning_rate": 9.473191887410179e-07, + "loss": 1.6985, + "step": 45310 + }, + { + "epoch": 1.91, + "learning_rate": 9.43067307283473e-07, + "loss": 2.0781, + "step": 45320 + }, + { + "epoch": 1.91, + "learning_rate": 9.38815425825928e-07, + "loss": 2.4186, + "step": 45330 + }, + { + "epoch": 1.91, + "learning_rate": 9.345635443683831e-07, + "loss": 1.806, + "step": 45340 + }, + { + "epoch": 1.91, + "learning_rate": 9.303116629108382e-07, + "loss": 2.1777, + "step": 45350 + }, + { + "epoch": 1.91, + "learning_rate": 9.260597814532932e-07, + "loss": 2.0575, + "step": 45360 + }, + { + "epoch": 1.91, + "learning_rate": 9.218078999957482e-07, + "loss": 2.0456, + "step": 45370 + }, + { + "epoch": 1.91, + "learning_rate": 9.175560185382032e-07, + "loss": 2.2842, + "step": 45380 + }, + { + "epoch": 1.91, + "learning_rate": 9.133041370806582e-07, + "loss": 2.0703, + "step": 45390 + }, + { + "epoch": 1.91, + "learning_rate": 9.090522556231134e-07, + "loss": 2.0194, + "step": 45400 + }, + { + "epoch": 1.91, + "learning_rate": 9.048003741655683e-07, + "loss": 2.5022, + "step": 45410 + }, + { + "epoch": 1.91, + "learning_rate": 9.005484927080233e-07, + "loss": 1.9706, + "step": 45420 + }, + { + "epoch": 1.91, + "learning_rate": 8.962966112504784e-07, + "loss": 2.0497, + "step": 45430 + }, + { + "epoch": 1.91, + "learning_rate": 8.920447297929334e-07, + "loss": 2.2135, + "step": 45440 + }, + { + "epoch": 1.91, + "learning_rate": 8.877928483353884e-07, + "loss": 2.0119, + "step": 45450 + }, + { + "epoch": 1.91, + "learning_rate": 8.835409668778436e-07, + "loss": 1.981, + "step": 45460 + }, + { + "epoch": 1.91, + "learning_rate": 8.792890854202986e-07, + "loss": 2.1487, + "step": 45470 + }, + { + "epoch": 1.91, + "learning_rate": 8.750372039627536e-07, + "loss": 2.5634, + "step": 45480 + }, + { + "epoch": 1.91, + "learning_rate": 8.707853225052086e-07, + "loss": 2.3092, + "step": 45490 + }, + { + "epoch": 1.91, + "learning_rate": 8.665334410476637e-07, + "loss": 2.1664, + "step": 45500 + }, + { + "epoch": 1.91, + "eval_loss": 1.7321257591247559, + "eval_runtime": 174.5157, + "eval_samples_per_second": 14.343, + "eval_steps_per_second": 7.174, + "step": 45500 + }, + { + "epoch": 1.91, + "learning_rate": 8.622815595901186e-07, + "loss": 1.8467, + "step": 45510 + }, + { + "epoch": 1.92, + "learning_rate": 8.580296781325737e-07, + "loss": 1.9513, + "step": 45520 + }, + { + "epoch": 1.92, + "learning_rate": 8.537777966750288e-07, + "loss": 1.8185, + "step": 45530 + }, + { + "epoch": 1.92, + "learning_rate": 8.495259152174838e-07, + "loss": 2.3288, + "step": 45540 + }, + { + "epoch": 1.92, + "learning_rate": 8.452740337599388e-07, + "loss": 1.966, + "step": 45550 + }, + { + "epoch": 1.92, + "learning_rate": 8.410221523023938e-07, + "loss": 2.037, + "step": 45560 + }, + { + "epoch": 1.92, + "learning_rate": 8.367702708448489e-07, + "loss": 1.9471, + "step": 45570 + }, + { + "epoch": 1.92, + "learning_rate": 8.32518389387304e-07, + "loss": 2.0111, + "step": 45580 + }, + { + "epoch": 1.92, + "learning_rate": 8.28266507929759e-07, + "loss": 1.9612, + "step": 45590 + }, + { + "epoch": 1.92, + "learning_rate": 8.240146264722141e-07, + "loss": 1.5161, + "step": 45600 + }, + { + "epoch": 1.92, + "learning_rate": 8.197627450146691e-07, + "loss": 1.9682, + "step": 45610 + }, + { + "epoch": 1.92, + "learning_rate": 8.15510863557124e-07, + "loss": 2.0455, + "step": 45620 + }, + { + "epoch": 1.92, + "learning_rate": 8.11258982099579e-07, + "loss": 1.9825, + "step": 45630 + }, + { + "epoch": 1.92, + "learning_rate": 8.070071006420342e-07, + "loss": 1.9404, + "step": 45640 + }, + { + "epoch": 1.92, + "learning_rate": 8.027552191844892e-07, + "loss": 1.8732, + "step": 45650 + }, + { + "epoch": 1.92, + "learning_rate": 7.985033377269442e-07, + "loss": 2.3782, + "step": 45660 + }, + { + "epoch": 1.92, + "learning_rate": 7.942514562693993e-07, + "loss": 1.8175, + "step": 45670 + }, + { + "epoch": 1.92, + "learning_rate": 7.899995748118543e-07, + "loss": 2.3118, + "step": 45680 + }, + { + "epoch": 1.92, + "learning_rate": 7.857476933543093e-07, + "loss": 2.1101, + "step": 45690 + }, + { + "epoch": 1.92, + "learning_rate": 7.814958118967645e-07, + "loss": 1.8881, + "step": 45700 + }, + { + "epoch": 1.92, + "learning_rate": 7.772439304392195e-07, + "loss": 1.8899, + "step": 45710 + }, + { + "epoch": 1.92, + "learning_rate": 7.729920489816745e-07, + "loss": 2.336, + "step": 45720 + }, + { + "epoch": 1.92, + "learning_rate": 7.687401675241294e-07, + "loss": 2.294, + "step": 45730 + }, + { + "epoch": 1.92, + "learning_rate": 7.644882860665845e-07, + "loss": 2.6725, + "step": 45740 + }, + { + "epoch": 1.92, + "learning_rate": 7.602364046090395e-07, + "loss": 2.3393, + "step": 45750 + }, + { + "epoch": 1.93, + "learning_rate": 7.559845231514946e-07, + "loss": 2.2539, + "step": 45760 + }, + { + "epoch": 1.93, + "learning_rate": 7.517326416939497e-07, + "loss": 1.9912, + "step": 45770 + }, + { + "epoch": 1.93, + "learning_rate": 7.474807602364047e-07, + "loss": 2.3202, + "step": 45780 + }, + { + "epoch": 1.93, + "learning_rate": 7.432288787788597e-07, + "loss": 1.7479, + "step": 45790 + }, + { + "epoch": 1.93, + "learning_rate": 7.389769973213147e-07, + "loss": 2.1202, + "step": 45800 + }, + { + "epoch": 1.93, + "learning_rate": 7.347251158637697e-07, + "loss": 1.2499, + "step": 45810 + }, + { + "epoch": 1.93, + "learning_rate": 7.304732344062249e-07, + "loss": 1.9322, + "step": 45820 + }, + { + "epoch": 1.93, + "learning_rate": 7.262213529486798e-07, + "loss": 2.1149, + "step": 45830 + }, + { + "epoch": 1.93, + "learning_rate": 7.219694714911348e-07, + "loss": 1.8979, + "step": 45840 + }, + { + "epoch": 1.93, + "learning_rate": 7.177175900335899e-07, + "loss": 2.0963, + "step": 45850 + }, + { + "epoch": 1.93, + "learning_rate": 7.134657085760449e-07, + "loss": 2.3248, + "step": 45860 + }, + { + "epoch": 1.93, + "learning_rate": 7.092138271184999e-07, + "loss": 2.2861, + "step": 45870 + }, + { + "epoch": 1.93, + "learning_rate": 7.049619456609551e-07, + "loss": 2.2305, + "step": 45880 + }, + { + "epoch": 1.93, + "learning_rate": 7.007100642034101e-07, + "loss": 1.9268, + "step": 45890 + }, + { + "epoch": 1.93, + "learning_rate": 6.964581827458651e-07, + "loss": 2.0687, + "step": 45900 + }, + { + "epoch": 1.93, + "learning_rate": 6.922063012883201e-07, + "loss": 2.2125, + "step": 45910 + }, + { + "epoch": 1.93, + "learning_rate": 6.879544198307752e-07, + "loss": 2.8913, + "step": 45920 + }, + { + "epoch": 1.93, + "learning_rate": 6.837025383732303e-07, + "loss": 1.9888, + "step": 45930 + }, + { + "epoch": 1.93, + "learning_rate": 6.794506569156852e-07, + "loss": 2.1703, + "step": 45940 + }, + { + "epoch": 1.93, + "learning_rate": 6.751987754581403e-07, + "loss": 2.3161, + "step": 45950 + }, + { + "epoch": 1.93, + "learning_rate": 6.709468940005953e-07, + "loss": 2.4341, + "step": 45960 + }, + { + "epoch": 1.93, + "learning_rate": 6.666950125430503e-07, + "loss": 2.1793, + "step": 45970 + }, + { + "epoch": 1.93, + "learning_rate": 6.624431310855053e-07, + "loss": 2.4448, + "step": 45980 + }, + { + "epoch": 1.93, + "learning_rate": 6.581912496279605e-07, + "loss": 2.4227, + "step": 45990 + }, + { + "epoch": 1.94, + "learning_rate": 6.539393681704155e-07, + "loss": 2.159, + "step": 46000 + }, + { + "epoch": 1.94, + "eval_loss": 1.7307124137878418, + "eval_runtime": 176.3216, + "eval_samples_per_second": 14.196, + "eval_steps_per_second": 7.101, + "step": 46000 + }, + { + "epoch": 1.94, + "learning_rate": 6.496874867128705e-07, + "loss": 2.0888, + "step": 46010 + }, + { + "epoch": 1.94, + "learning_rate": 6.454356052553256e-07, + "loss": 2.3548, + "step": 46020 + }, + { + "epoch": 1.94, + "learning_rate": 6.411837237977806e-07, + "loss": 2.0837, + "step": 46030 + }, + { + "epoch": 1.94, + "learning_rate": 6.369318423402355e-07, + "loss": 2.129, + "step": 46040 + }, + { + "epoch": 1.94, + "learning_rate": 6.326799608826907e-07, + "loss": 2.1136, + "step": 46050 + }, + { + "epoch": 1.94, + "learning_rate": 6.284280794251457e-07, + "loss": 1.82, + "step": 46060 + }, + { + "epoch": 1.94, + "learning_rate": 6.241761979676007e-07, + "loss": 2.4332, + "step": 46070 + }, + { + "epoch": 1.94, + "learning_rate": 6.199243165100557e-07, + "loss": 2.4913, + "step": 46080 + }, + { + "epoch": 1.94, + "learning_rate": 6.156724350525108e-07, + "loss": 2.1268, + "step": 46090 + }, + { + "epoch": 1.94, + "learning_rate": 6.114205535949658e-07, + "loss": 1.8976, + "step": 46100 + }, + { + "epoch": 1.94, + "learning_rate": 6.071686721374209e-07, + "loss": 2.7105, + "step": 46110 + }, + { + "epoch": 1.94, + "learning_rate": 6.029167906798759e-07, + "loss": 1.9377, + "step": 46120 + }, + { + "epoch": 1.94, + "learning_rate": 5.98664909222331e-07, + "loss": 1.6299, + "step": 46130 + }, + { + "epoch": 1.94, + "learning_rate": 5.94413027764786e-07, + "loss": 2.3748, + "step": 46140 + }, + { + "epoch": 1.94, + "learning_rate": 5.901611463072409e-07, + "loss": 1.6503, + "step": 46150 + }, + { + "epoch": 1.94, + "learning_rate": 5.85909264849696e-07, + "loss": 1.9663, + "step": 46160 + }, + { + "epoch": 1.94, + "learning_rate": 5.81657383392151e-07, + "loss": 2.0319, + "step": 46170 + }, + { + "epoch": 1.94, + "learning_rate": 5.774055019346061e-07, + "loss": 1.6007, + "step": 46180 + }, + { + "epoch": 1.94, + "learning_rate": 5.731536204770612e-07, + "loss": 1.4921, + "step": 46190 + }, + { + "epoch": 1.94, + "learning_rate": 5.689017390195162e-07, + "loss": 2.2399, + "step": 46200 + }, + { + "epoch": 1.94, + "learning_rate": 5.646498575619712e-07, + "loss": 2.7385, + "step": 46210 + }, + { + "epoch": 1.94, + "learning_rate": 5.603979761044263e-07, + "loss": 2.3728, + "step": 46220 + }, + { + "epoch": 1.94, + "learning_rate": 5.561460946468813e-07, + "loss": 2.0037, + "step": 46230 + }, + { + "epoch": 1.95, + "learning_rate": 5.518942131893363e-07, + "loss": 1.7057, + "step": 46240 + }, + { + "epoch": 1.95, + "learning_rate": 5.476423317317913e-07, + "loss": 2.0428, + "step": 46250 + }, + { + "epoch": 1.95, + "learning_rate": 5.433904502742464e-07, + "loss": 2.0758, + "step": 46260 + }, + { + "epoch": 1.95, + "learning_rate": 5.391385688167014e-07, + "loss": 1.8275, + "step": 46270 + }, + { + "epoch": 1.95, + "learning_rate": 5.348866873591565e-07, + "loss": 1.7757, + "step": 46280 + }, + { + "epoch": 1.95, + "learning_rate": 5.306348059016115e-07, + "loss": 1.9977, + "step": 46290 + }, + { + "epoch": 1.95, + "learning_rate": 5.263829244440665e-07, + "loss": 1.8494, + "step": 46300 + }, + { + "epoch": 1.95, + "learning_rate": 5.221310429865216e-07, + "loss": 2.1142, + "step": 46310 + }, + { + "epoch": 1.95, + "learning_rate": 5.178791615289766e-07, + "loss": 2.307, + "step": 46320 + }, + { + "epoch": 1.95, + "learning_rate": 5.136272800714316e-07, + "loss": 2.566, + "step": 46330 + }, + { + "epoch": 1.95, + "learning_rate": 5.093753986138867e-07, + "loss": 2.0823, + "step": 46340 + }, + { + "epoch": 1.95, + "learning_rate": 5.051235171563418e-07, + "loss": 1.5522, + "step": 46350 + }, + { + "epoch": 1.95, + "learning_rate": 5.008716356987967e-07, + "loss": 1.9832, + "step": 46360 + }, + { + "epoch": 1.95, + "learning_rate": 4.966197542412518e-07, + "loss": 1.8085, + "step": 46370 + }, + { + "epoch": 1.95, + "learning_rate": 4.923678727837068e-07, + "loss": 2.0295, + "step": 46380 + }, + { + "epoch": 1.95, + "learning_rate": 4.881159913261619e-07, + "loss": 2.2643, + "step": 46390 + }, + { + "epoch": 1.95, + "learning_rate": 4.838641098686169e-07, + "loss": 1.7472, + "step": 46400 + }, + { + "epoch": 1.95, + "learning_rate": 4.796122284110719e-07, + "loss": 2.0468, + "step": 46410 + }, + { + "epoch": 1.95, + "learning_rate": 4.75360346953527e-07, + "loss": 2.3911, + "step": 46420 + }, + { + "epoch": 1.95, + "learning_rate": 4.7110846549598203e-07, + "loss": 2.1216, + "step": 46430 + }, + { + "epoch": 1.95, + "learning_rate": 4.66856584038437e-07, + "loss": 2.4274, + "step": 46440 + }, + { + "epoch": 1.95, + "learning_rate": 4.626047025808921e-07, + "loss": 1.8198, + "step": 46450 + }, + { + "epoch": 1.95, + "learning_rate": 4.5835282112334715e-07, + "loss": 1.9714, + "step": 46460 + }, + { + "epoch": 1.96, + "learning_rate": 4.5410093966580214e-07, + "loss": 1.5653, + "step": 46470 + }, + { + "epoch": 1.96, + "learning_rate": 4.4984905820825723e-07, + "loss": 1.8752, + "step": 46480 + }, + { + "epoch": 1.96, + "learning_rate": 4.455971767507122e-07, + "loss": 1.9234, + "step": 46490 + }, + { + "epoch": 1.96, + "learning_rate": 4.4134529529316726e-07, + "loss": 1.8075, + "step": 46500 + }, + { + "epoch": 1.96, + "eval_loss": 1.730423927307129, + "eval_runtime": 175.8476, + "eval_samples_per_second": 14.234, + "eval_steps_per_second": 7.12, + "step": 46500 + }, + { + "epoch": 1.96, + "learning_rate": 4.3709341383562235e-07, + "loss": 1.9106, + "step": 46510 + }, + { + "epoch": 1.96, + "learning_rate": 4.3284153237807734e-07, + "loss": 2.4398, + "step": 46520 + }, + { + "epoch": 1.96, + "learning_rate": 4.285896509205324e-07, + "loss": 2.4179, + "step": 46530 + }, + { + "epoch": 1.96, + "learning_rate": 4.243377694629874e-07, + "loss": 1.7337, + "step": 46540 + }, + { + "epoch": 1.96, + "learning_rate": 4.2008588800544246e-07, + "loss": 1.85, + "step": 46550 + }, + { + "epoch": 1.96, + "learning_rate": 4.1583400654789745e-07, + "loss": 2.0444, + "step": 46560 + }, + { + "epoch": 1.96, + "learning_rate": 4.1158212509035254e-07, + "loss": 2.1128, + "step": 46570 + }, + { + "epoch": 1.96, + "learning_rate": 4.0733024363280753e-07, + "loss": 1.9479, + "step": 46580 + }, + { + "epoch": 1.96, + "learning_rate": 4.0307836217526257e-07, + "loss": 1.8774, + "step": 46590 + }, + { + "epoch": 1.96, + "learning_rate": 3.9882648071771767e-07, + "loss": 2.3917, + "step": 46600 + }, + { + "epoch": 1.96, + "learning_rate": 3.9457459926017265e-07, + "loss": 2.0014, + "step": 46610 + }, + { + "epoch": 1.96, + "learning_rate": 3.903227178026277e-07, + "loss": 2.0376, + "step": 46620 + }, + { + "epoch": 1.96, + "learning_rate": 3.8607083634508274e-07, + "loss": 1.7728, + "step": 46630 + }, + { + "epoch": 1.96, + "learning_rate": 3.818189548875378e-07, + "loss": 2.0113, + "step": 46640 + }, + { + "epoch": 1.96, + "learning_rate": 3.7756707342999276e-07, + "loss": 1.9953, + "step": 46650 + }, + { + "epoch": 1.96, + "learning_rate": 3.7331519197244786e-07, + "loss": 2.2275, + "step": 46660 + }, + { + "epoch": 1.96, + "learning_rate": 3.690633105149029e-07, + "loss": 1.6948, + "step": 46670 + }, + { + "epoch": 1.96, + "learning_rate": 3.648114290573579e-07, + "loss": 2.005, + "step": 46680 + }, + { + "epoch": 1.96, + "learning_rate": 3.60559547599813e-07, + "loss": 1.8244, + "step": 46690 + }, + { + "epoch": 1.96, + "learning_rate": 3.5630766614226797e-07, + "loss": 2.1882, + "step": 46700 + }, + { + "epoch": 1.97, + "learning_rate": 3.52055784684723e-07, + "loss": 1.4587, + "step": 46710 + }, + { + "epoch": 1.97, + "learning_rate": 3.478039032271781e-07, + "loss": 2.0443, + "step": 46720 + }, + { + "epoch": 1.97, + "learning_rate": 3.435520217696331e-07, + "loss": 2.1665, + "step": 46730 + }, + { + "epoch": 1.97, + "learning_rate": 3.3930014031208813e-07, + "loss": 1.856, + "step": 46740 + }, + { + "epoch": 1.97, + "learning_rate": 3.3504825885454317e-07, + "loss": 2.052, + "step": 46750 + }, + { + "epoch": 1.97, + "learning_rate": 3.307963773969982e-07, + "loss": 2.127, + "step": 46760 + }, + { + "epoch": 1.97, + "learning_rate": 3.265444959394532e-07, + "loss": 1.7498, + "step": 46770 + }, + { + "epoch": 1.97, + "learning_rate": 3.222926144819083e-07, + "loss": 2.1656, + "step": 46780 + }, + { + "epoch": 1.97, + "learning_rate": 3.180407330243633e-07, + "loss": 2.2154, + "step": 46790 + }, + { + "epoch": 1.97, + "learning_rate": 3.137888515668183e-07, + "loss": 2.3625, + "step": 46800 + }, + { + "epoch": 1.97, + "learning_rate": 3.0953697010927336e-07, + "loss": 1.5446, + "step": 46810 + }, + { + "epoch": 1.97, + "learning_rate": 3.052850886517284e-07, + "loss": 2.333, + "step": 46820 + }, + { + "epoch": 1.97, + "learning_rate": 3.0103320719418344e-07, + "loss": 2.1273, + "step": 46830 + }, + { + "epoch": 1.97, + "learning_rate": 2.967813257366385e-07, + "loss": 2.0625, + "step": 46840 + }, + { + "epoch": 1.97, + "learning_rate": 2.925294442790935e-07, + "loss": 1.7183, + "step": 46850 + }, + { + "epoch": 1.97, + "learning_rate": 2.8827756282154857e-07, + "loss": 1.9478, + "step": 46860 + }, + { + "epoch": 1.97, + "learning_rate": 2.840256813640036e-07, + "loss": 1.7617, + "step": 46870 + }, + { + "epoch": 1.97, + "learning_rate": 2.7977379990645865e-07, + "loss": 1.9488, + "step": 46880 + }, + { + "epoch": 1.97, + "learning_rate": 2.755219184489137e-07, + "loss": 2.2433, + "step": 46890 + }, + { + "epoch": 1.97, + "learning_rate": 2.712700369913687e-07, + "loss": 1.9074, + "step": 46900 + }, + { + "epoch": 1.97, + "learning_rate": 2.670181555338237e-07, + "loss": 1.9099, + "step": 46910 + }, + { + "epoch": 1.97, + "learning_rate": 2.6276627407627876e-07, + "loss": 1.9294, + "step": 46920 + }, + { + "epoch": 1.97, + "learning_rate": 2.585143926187338e-07, + "loss": 1.9785, + "step": 46930 + }, + { + "epoch": 1.97, + "learning_rate": 2.5426251116118884e-07, + "loss": 2.2433, + "step": 46940 + }, + { + "epoch": 1.98, + "learning_rate": 2.500106297036439e-07, + "loss": 2.3595, + "step": 46950 + }, + { + "epoch": 1.98, + "learning_rate": 2.457587482460989e-07, + "loss": 2.0593, + "step": 46960 + }, + { + "epoch": 1.98, + "learning_rate": 2.4150686678855396e-07, + "loss": 1.6623, + "step": 46970 + }, + { + "epoch": 1.98, + "learning_rate": 2.37254985331009e-07, + "loss": 2.1463, + "step": 46980 + }, + { + "epoch": 1.98, + "learning_rate": 2.3300310387346402e-07, + "loss": 1.8958, + "step": 46990 + }, + { + "epoch": 1.98, + "learning_rate": 2.2875122241591906e-07, + "loss": 1.7826, + "step": 47000 + }, + { + "epoch": 1.98, + "eval_loss": 1.7335072755813599, + "eval_runtime": 175.9239, + "eval_samples_per_second": 14.228, + "eval_steps_per_second": 7.117, + "step": 47000 + }, + { + "epoch": 1.98, + "learning_rate": 2.244993409583741e-07, + "loss": 1.9725, + "step": 47010 + }, + { + "epoch": 1.98, + "learning_rate": 2.202474595008291e-07, + "loss": 2.3016, + "step": 47020 + }, + { + "epoch": 1.98, + "learning_rate": 2.1599557804328415e-07, + "loss": 2.0188, + "step": 47030 + }, + { + "epoch": 1.98, + "learning_rate": 2.1174369658573922e-07, + "loss": 1.9837, + "step": 47040 + }, + { + "epoch": 1.98, + "learning_rate": 2.0749181512819426e-07, + "loss": 2.1594, + "step": 47050 + }, + { + "epoch": 1.98, + "learning_rate": 2.0323993367064928e-07, + "loss": 2.0787, + "step": 47060 + }, + { + "epoch": 1.98, + "learning_rate": 1.9898805221310432e-07, + "loss": 2.0541, + "step": 47070 + }, + { + "epoch": 1.98, + "learning_rate": 1.9473617075555936e-07, + "loss": 1.8778, + "step": 47080 + }, + { + "epoch": 1.98, + "learning_rate": 1.9048428929801437e-07, + "loss": 2.6506, + "step": 47090 + }, + { + "epoch": 1.98, + "learning_rate": 1.8623240784046944e-07, + "loss": 2.3363, + "step": 47100 + }, + { + "epoch": 1.98, + "learning_rate": 1.8198052638292448e-07, + "loss": 1.9014, + "step": 47110 + }, + { + "epoch": 1.98, + "learning_rate": 1.777286449253795e-07, + "loss": 1.8956, + "step": 47120 + }, + { + "epoch": 1.98, + "learning_rate": 1.7347676346783453e-07, + "loss": 2.1171, + "step": 47130 + }, + { + "epoch": 1.98, + "learning_rate": 1.6922488201028957e-07, + "loss": 2.0703, + "step": 47140 + }, + { + "epoch": 1.98, + "learning_rate": 1.649730005527446e-07, + "loss": 1.8094, + "step": 47150 + }, + { + "epoch": 1.98, + "learning_rate": 1.6072111909519963e-07, + "loss": 2.2072, + "step": 47160 + }, + { + "epoch": 1.98, + "learning_rate": 1.564692376376547e-07, + "loss": 2.6089, + "step": 47170 + }, + { + "epoch": 1.98, + "learning_rate": 1.522173561801097e-07, + "loss": 2.0519, + "step": 47180 + }, + { + "epoch": 1.99, + "learning_rate": 1.4796547472256475e-07, + "loss": 2.1111, + "step": 47190 + }, + { + "epoch": 1.99, + "learning_rate": 1.4371359326501977e-07, + "loss": 1.8261, + "step": 47200 + }, + { + "epoch": 1.99, + "learning_rate": 1.3946171180747483e-07, + "loss": 1.8875, + "step": 47210 + }, + { + "epoch": 1.99, + "learning_rate": 1.3520983034992985e-07, + "loss": 2.3299, + "step": 47220 + }, + { + "epoch": 1.99, + "learning_rate": 1.309579488923849e-07, + "loss": 1.7025, + "step": 47230 + }, + { + "epoch": 1.99, + "learning_rate": 1.2670606743483993e-07, + "loss": 2.2977, + "step": 47240 + }, + { + "epoch": 1.99, + "learning_rate": 1.2245418597729497e-07, + "loss": 2.1292, + "step": 47250 + }, + { + "epoch": 1.99, + "learning_rate": 1.1820230451975e-07, + "loss": 2.1523, + "step": 47260 + }, + { + "epoch": 1.99, + "learning_rate": 1.1395042306220504e-07, + "loss": 1.7311, + "step": 47270 + }, + { + "epoch": 1.99, + "learning_rate": 1.0969854160466007e-07, + "loss": 2.1331, + "step": 47280 + }, + { + "epoch": 1.99, + "learning_rate": 1.054466601471151e-07, + "loss": 1.7428, + "step": 47290 + }, + { + "epoch": 1.99, + "learning_rate": 1.0119477868957015e-07, + "loss": 2.2247, + "step": 47300 + }, + { + "epoch": 1.99, + "learning_rate": 9.694289723202517e-08, + "loss": 2.0562, + "step": 47310 + }, + { + "epoch": 1.99, + "learning_rate": 9.26910157744802e-08, + "loss": 1.9015, + "step": 47320 + }, + { + "epoch": 1.99, + "learning_rate": 8.843913431693526e-08, + "loss": 2.1303, + "step": 47330 + }, + { + "epoch": 1.99, + "learning_rate": 8.418725285939028e-08, + "loss": 1.8816, + "step": 47340 + }, + { + "epoch": 1.99, + "learning_rate": 7.993537140184531e-08, + "loss": 2.2489, + "step": 47350 + }, + { + "epoch": 1.99, + "learning_rate": 7.568348994430036e-08, + "loss": 2.5636, + "step": 47360 + }, + { + "epoch": 1.99, + "learning_rate": 7.143160848675539e-08, + "loss": 2.5362, + "step": 47370 + }, + { + "epoch": 1.99, + "learning_rate": 6.717972702921043e-08, + "loss": 2.2663, + "step": 47380 + }, + { + "epoch": 1.99, + "learning_rate": 6.292784557166547e-08, + "loss": 2.4488, + "step": 47390 + }, + { + "epoch": 1.99, + "learning_rate": 5.86759641141205e-08, + "loss": 1.5922, + "step": 47400 + }, + { + "epoch": 1.99, + "learning_rate": 5.4424082656575536e-08, + "loss": 2.1635, + "step": 47410 + }, + { + "epoch": 2.0, + "learning_rate": 5.0172201199030576e-08, + "loss": 1.5336, + "step": 47420 + }, + { + "epoch": 2.0, + "learning_rate": 4.5920319741485604e-08, + "loss": 1.998, + "step": 47430 + }, + { + "epoch": 2.0, + "learning_rate": 4.1668438283940644e-08, + "loss": 2.2229, + "step": 47440 + }, + { + "epoch": 2.0, + "learning_rate": 3.7416556826395685e-08, + "loss": 1.8672, + "step": 47450 + }, + { + "epoch": 2.0, + "learning_rate": 3.316467536885072e-08, + "loss": 1.8159, + "step": 47460 + }, + { + "epoch": 2.0, + "learning_rate": 2.8912793911305757e-08, + "loss": 1.5926, + "step": 47470 + }, + { + "epoch": 2.0, + "learning_rate": 2.466091245376079e-08, + "loss": 1.6906, + "step": 47480 + }, + { + "epoch": 2.0, + "learning_rate": 2.0409030996215825e-08, + "loss": 2.1867, + "step": 47490 + }, + { + "epoch": 2.0, + "learning_rate": 1.6157149538670862e-08, + "loss": 1.9429, + "step": 47500 + }, + { + "epoch": 2.0, + "eval_loss": 1.7336406707763672, + "eval_runtime": 175.8157, + "eval_samples_per_second": 14.236, + "eval_steps_per_second": 7.121, + "step": 47500 + } + ], + "max_steps": 47538, + "num_train_epochs": 2, + "total_flos": 3.021220380205056e+16, + "trial_name": null, + "trial_params": null +}