diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.897013328984666, - "global_step": 6500000, + "epoch": 4.796324097211897, + "global_step": 8000000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -78526,11 +78526,18131 @@ "eval_samples_per_second": 477.914, "eval_steps_per_second": 79.653, "step": 6500000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8750590690312677e-05, + "loss": 1.1265, + "step": 6500500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8748490724752114e-05, + "loss": 1.1418, + "step": 6501000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8746390759191547e-05, + "loss": 1.1576, + "step": 6501500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8744294993562104e-05, + "loss": 1.1346, + "step": 6502000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8742195028001537e-05, + "loss": 1.1479, + "step": 6502500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8740095062440974e-05, + "loss": 1.127, + "step": 6503000 + }, + { + "epoch": 3.9, + "learning_rate": 2.873799509688041e-05, + "loss": 1.179, + "step": 6503500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8735895131319845e-05, + "loss": 1.1507, + "step": 6504000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8733799365690398e-05, + "loss": 1.1546, + "step": 6504500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8731699400129835e-05, + "loss": 1.1458, + "step": 6505000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8729599434569272e-05, + "loss": 1.1623, + "step": 6505500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8727499469008705e-05, + "loss": 1.1579, + "step": 6506000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8725399503448142e-05, + "loss": 1.1621, + "step": 6506500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8723303737818696e-05, + "loss": 1.1139, + "step": 6507000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8721203772258133e-05, + "loss": 1.1145, + "step": 6507500 + }, + { + "epoch": 3.9, + "learning_rate": 2.871910380669757e-05, + "loss": 1.1221, + "step": 6508000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8717003841137003e-05, + "loss": 1.1303, + "step": 6508500 + }, + { + "epoch": 3.9, + "learning_rate": 2.871490387557644e-05, + "loss": 1.1757, + "step": 6509000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8712803910015877e-05, + "loss": 1.1302, + "step": 6509500 + }, + { + "epoch": 3.9, + "learning_rate": 2.871070394445531e-05, + "loss": 1.1413, + "step": 6510000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8708608178825867e-05, + "loss": 1.1361, + "step": 6510500 + }, + { + "epoch": 3.9, + "learning_rate": 2.87065082132653e-05, + "loss": 1.135, + "step": 6511000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8704408247704737e-05, + "loss": 1.1599, + "step": 6511500 + }, + { + "epoch": 3.9, + "learning_rate": 2.8702308282144174e-05, + "loss": 1.1331, + "step": 6512000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8700208316583608e-05, + "loss": 1.1425, + "step": 6512500 + }, + { + "epoch": 3.9, + "learning_rate": 2.869811255095416e-05, + "loss": 1.1182, + "step": 6513000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8696012585393598e-05, + "loss": 1.1179, + "step": 6513500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8693912619833035e-05, + "loss": 1.1591, + "step": 6514000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8691812654272472e-05, + "loss": 1.1661, + "step": 6514500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8689712688711905e-05, + "loss": 1.1541, + "step": 6515000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8687612723151342e-05, + "loss": 1.1357, + "step": 6515500 + }, + { + "epoch": 3.91, + "learning_rate": 2.868551275759078e-05, + "loss": 1.1656, + "step": 6516000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8683416991961332e-05, + "loss": 1.1305, + "step": 6516500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8681317026400766e-05, + "loss": 1.1531, + "step": 6517000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8679217060840203e-05, + "loss": 1.1646, + "step": 6517500 + }, + { + "epoch": 3.91, + "learning_rate": 2.867711709527964e-05, + "loss": 1.119, + "step": 6518000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8675017129719073e-05, + "loss": 1.1151, + "step": 6518500 + }, + { + "epoch": 3.91, + "learning_rate": 2.867291716415851e-05, + "loss": 1.1619, + "step": 6519000 + }, + { + "epoch": 3.91, + "learning_rate": 2.867081719859794e-05, + "loss": 1.1333, + "step": 6519500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8668717233037377e-05, + "loss": 1.1357, + "step": 6520000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8666621467407937e-05, + "loss": 1.1619, + "step": 6520500 + }, + { + "epoch": 3.91, + "learning_rate": 2.866452150184737e-05, + "loss": 1.1516, + "step": 6521000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8662425736217928e-05, + "loss": 1.1531, + "step": 6521500 + }, + { + "epoch": 3.91, + "learning_rate": 2.866032577065736e-05, + "loss": 1.155, + "step": 6522000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8658225805096798e-05, + "loss": 1.1458, + "step": 6522500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8656125839536235e-05, + "loss": 1.1442, + "step": 6523000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8654025873975668e-05, + "loss": 1.1605, + "step": 6523500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8651925908415098e-05, + "loss": 1.1747, + "step": 6524000 + }, + { + "epoch": 3.91, + "learning_rate": 2.864983014278566e-05, + "loss": 1.1515, + "step": 6524500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8647730177225095e-05, + "loss": 1.1339, + "step": 6525000 + }, + { + "epoch": 3.91, + "learning_rate": 2.864563441159565e-05, + "loss": 1.1529, + "step": 6525500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8643534446035086e-05, + "loss": 1.1247, + "step": 6526000 + }, + { + "epoch": 3.91, + "learning_rate": 2.864143448047452e-05, + "loss": 1.1317, + "step": 6526500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8639334514913956e-05, + "loss": 1.1487, + "step": 6527000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8637234549353393e-05, + "loss": 1.1228, + "step": 6527500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8635134583792826e-05, + "loss": 1.1575, + "step": 6528000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8633034618232263e-05, + "loss": 1.1244, + "step": 6528500 + }, + { + "epoch": 3.91, + "learning_rate": 2.8630934652671693e-05, + "loss": 1.1857, + "step": 6529000 + }, + { + "epoch": 3.91, + "learning_rate": 2.8628838887042254e-05, + "loss": 1.1311, + "step": 6529500 + }, + { + "epoch": 3.91, + "learning_rate": 2.862673892148169e-05, + "loss": 1.1563, + "step": 6530000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8624643155852244e-05, + "loss": 1.1278, + "step": 6530500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8622543190291678e-05, + "loss": 1.1403, + "step": 6531000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8620443224731114e-05, + "loss": 1.1502, + "step": 6531500 + }, + { + "epoch": 3.92, + "learning_rate": 2.861834325917055e-05, + "loss": 1.0933, + "step": 6532000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8616243293609985e-05, + "loss": 1.1212, + "step": 6532500 + }, + { + "epoch": 3.92, + "learning_rate": 2.861414332804942e-05, + "loss": 1.1327, + "step": 6533000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8612043362488852e-05, + "loss": 1.1203, + "step": 6533500 + }, + { + "epoch": 3.92, + "learning_rate": 2.860994339692829e-05, + "loss": 1.1624, + "step": 6534000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8607843431367722e-05, + "loss": 1.1682, + "step": 6534500 + }, + { + "epoch": 3.92, + "learning_rate": 2.860574346580716e-05, + "loss": 1.1483, + "step": 6535000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8603643500246596e-05, + "loss": 1.1605, + "step": 6535500 + }, + { + "epoch": 3.92, + "learning_rate": 2.860154353468603e-05, + "loss": 1.1192, + "step": 6536000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8599447769056586e-05, + "loss": 1.1167, + "step": 6536500 + }, + { + "epoch": 3.92, + "learning_rate": 2.859734780349602e-05, + "loss": 1.1704, + "step": 6537000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8595247837935456e-05, + "loss": 1.1449, + "step": 6537500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8593147872374893e-05, + "loss": 1.1307, + "step": 6538000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8591047906814327e-05, + "loss": 1.1684, + "step": 6538500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8588947941253764e-05, + "loss": 1.1343, + "step": 6539000 + }, + { + "epoch": 3.92, + "learning_rate": 2.85868479756932e-05, + "loss": 1.1515, + "step": 6539500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8584748010132634e-05, + "loss": 1.1379, + "step": 6540000 + }, + { + "epoch": 3.92, + "learning_rate": 2.858265224450319e-05, + "loss": 1.1502, + "step": 6540500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8580556478873744e-05, + "loss": 1.1715, + "step": 6541000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8578456513313178e-05, + "loss": 1.1301, + "step": 6541500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8576356547752615e-05, + "loss": 1.1212, + "step": 6542000 + }, + { + "epoch": 3.92, + "learning_rate": 2.857425658219205e-05, + "loss": 1.1383, + "step": 6542500 + }, + { + "epoch": 3.92, + "learning_rate": 2.8572156616631485e-05, + "loss": 1.1494, + "step": 6543000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8570056651070922e-05, + "loss": 1.0922, + "step": 6543500 + }, + { + "epoch": 3.92, + "learning_rate": 2.856795668551036e-05, + "loss": 1.1376, + "step": 6544000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8565856719949792e-05, + "loss": 1.1256, + "step": 6544500 + }, + { + "epoch": 3.92, + "learning_rate": 2.856375675438923e-05, + "loss": 1.1655, + "step": 6545000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8561660988759783e-05, + "loss": 1.1351, + "step": 6545500 + }, + { + "epoch": 3.92, + "learning_rate": 2.855956102319922e-05, + "loss": 1.1422, + "step": 6546000 + }, + { + "epoch": 3.92, + "learning_rate": 2.8557461057638656e-05, + "loss": 1.1466, + "step": 6546500 + }, + { + "epoch": 3.93, + "learning_rate": 2.855536109207809e-05, + "loss": 1.1292, + "step": 6547000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8553261126517527e-05, + "loss": 1.1679, + "step": 6547500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8551161160956963e-05, + "loss": 1.1309, + "step": 6548000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8549065395327517e-05, + "loss": 1.1794, + "step": 6548500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8546965429766954e-05, + "loss": 1.1424, + "step": 6549000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8544865464206387e-05, + "loss": 1.1279, + "step": 6549500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8542765498645824e-05, + "loss": 1.1205, + "step": 6550000 + }, + { + "epoch": 3.93, + "learning_rate": 2.854066553308526e-05, + "loss": 1.1501, + "step": 6550500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8538569767455815e-05, + "loss": 1.1525, + "step": 6551000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8536469801895248e-05, + "loss": 1.1488, + "step": 6551500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8534369836334685e-05, + "loss": 1.1394, + "step": 6552000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8532269870774122e-05, + "loss": 1.1221, + "step": 6552500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8530169905213555e-05, + "loss": 1.1207, + "step": 6553000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8528069939652985e-05, + "loss": 1.129, + "step": 6553500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8525974174023546e-05, + "loss": 1.1655, + "step": 6554000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8523874208462983e-05, + "loss": 1.145, + "step": 6554500 + }, + { + "epoch": 3.93, + "learning_rate": 2.852177424290242e-05, + "loss": 1.1447, + "step": 6555000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8519674277341853e-05, + "loss": 1.1401, + "step": 6555500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8517574311781283e-05, + "loss": 1.1545, + "step": 6556000 + }, + { + "epoch": 3.93, + "learning_rate": 2.851547434622072e-05, + "loss": 1.1027, + "step": 6556500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8513374380660157e-05, + "loss": 1.1132, + "step": 6557000 + }, + { + "epoch": 3.93, + "learning_rate": 2.851127441509959e-05, + "loss": 1.1295, + "step": 6557500 + }, + { + "epoch": 3.93, + "learning_rate": 2.850917864947015e-05, + "loss": 1.1303, + "step": 6558000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8507082883840704e-05, + "loss": 1.1643, + "step": 6558500 + }, + { + "epoch": 3.93, + "learning_rate": 2.850498291828014e-05, + "loss": 1.1344, + "step": 6559000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8502882952719578e-05, + "loss": 1.1642, + "step": 6559500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8500782987159015e-05, + "loss": 1.1263, + "step": 6560000 + }, + { + "epoch": 3.93, + "learning_rate": 2.849868302159844e-05, + "loss": 1.1592, + "step": 6560500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8496583056037878e-05, + "loss": 1.1624, + "step": 6561000 + }, + { + "epoch": 3.93, + "learning_rate": 2.8494483090477315e-05, + "loss": 1.1383, + "step": 6561500 + }, + { + "epoch": 3.93, + "learning_rate": 2.8492387324847875e-05, + "loss": 1.1196, + "step": 6562000 + }, + { + "epoch": 3.93, + "learning_rate": 2.849028735928731e-05, + "loss": 1.1474, + "step": 6562500 + }, + { + "epoch": 3.93, + "learning_rate": 2.848818739372674e-05, + "loss": 1.1447, + "step": 6563000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8486087428166176e-05, + "loss": 1.1722, + "step": 6563500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8483987462605612e-05, + "loss": 1.1221, + "step": 6564000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8481887497045046e-05, + "loss": 1.1389, + "step": 6564500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8479791731415606e-05, + "loss": 1.1421, + "step": 6565000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8477691765855036e-05, + "loss": 1.1445, + "step": 6565500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8475591800294473e-05, + "loss": 1.1419, + "step": 6566000 + }, + { + "epoch": 3.94, + "learning_rate": 2.847349183473391e-05, + "loss": 1.1388, + "step": 6566500 + }, + { + "epoch": 3.94, + "learning_rate": 2.847139606910447e-05, + "loss": 1.1422, + "step": 6567000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8469296103543904e-05, + "loss": 1.1452, + "step": 6567500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8467196137983334e-05, + "loss": 1.136, + "step": 6568000 + }, + { + "epoch": 3.94, + "learning_rate": 2.846509617242277e-05, + "loss": 1.1411, + "step": 6568500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8462996206862204e-05, + "loss": 1.1512, + "step": 6569000 + }, + { + "epoch": 3.94, + "learning_rate": 2.846089624130164e-05, + "loss": 1.1579, + "step": 6569500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8458796275741078e-05, + "loss": 1.1598, + "step": 6570000 + }, + { + "epoch": 3.94, + "learning_rate": 2.845669631018051e-05, + "loss": 1.1416, + "step": 6570500 + }, + { + "epoch": 3.94, + "learning_rate": 2.845460054455107e-05, + "loss": 1.1398, + "step": 6571000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8452500578990502e-05, + "loss": 1.1411, + "step": 6571500 + }, + { + "epoch": 3.94, + "learning_rate": 2.845040061342994e-05, + "loss": 1.1808, + "step": 6572000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8448300647869375e-05, + "loss": 1.1606, + "step": 6572500 + }, + { + "epoch": 3.94, + "learning_rate": 2.844620488223993e-05, + "loss": 1.1672, + "step": 6573000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8444104916679366e-05, + "loss": 1.1308, + "step": 6573500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8442009151049926e-05, + "loss": 1.1324, + "step": 6574000 + }, + { + "epoch": 3.94, + "learning_rate": 2.843990918548936e-05, + "loss": 1.1399, + "step": 6574500 + }, + { + "epoch": 3.94, + "learning_rate": 2.843780921992879e-05, + "loss": 1.1661, + "step": 6575000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8435709254368227e-05, + "loss": 1.1364, + "step": 6575500 + }, + { + "epoch": 3.94, + "learning_rate": 2.843360928880766e-05, + "loss": 1.1257, + "step": 6576000 + }, + { + "epoch": 3.94, + "learning_rate": 2.843151352317822e-05, + "loss": 1.136, + "step": 6576500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8429413557617657e-05, + "loss": 1.1531, + "step": 6577000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8427313592057087e-05, + "loss": 1.1784, + "step": 6577500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8425213626496524e-05, + "loss": 1.1312, + "step": 6578000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8423117860867085e-05, + "loss": 1.1468, + "step": 6578500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8421017895306518e-05, + "loss": 1.1303, + "step": 6579000 + }, + { + "epoch": 3.94, + "learning_rate": 2.8418917929745948e-05, + "loss": 1.1404, + "step": 6579500 + }, + { + "epoch": 3.94, + "learning_rate": 2.8416817964185385e-05, + "loss": 1.1437, + "step": 6580000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8414717998624822e-05, + "loss": 1.1471, + "step": 6580500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8412618033064255e-05, + "loss": 1.1351, + "step": 6581000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8410518067503692e-05, + "loss": 1.1305, + "step": 6581500 + }, + { + "epoch": 3.95, + "learning_rate": 2.840841810194313e-05, + "loss": 1.1592, + "step": 6582000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8406322336313683e-05, + "loss": 1.1355, + "step": 6582500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8404222370753116e-05, + "loss": 1.1395, + "step": 6583000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8402122405192553e-05, + "loss": 1.1338, + "step": 6583500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8400026639563113e-05, + "loss": 1.1247, + "step": 6584000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8397926674002543e-05, + "loss": 1.1598, + "step": 6584500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8395830908373104e-05, + "loss": 1.1406, + "step": 6585000 + }, + { + "epoch": 3.95, + "learning_rate": 2.839373094281254e-05, + "loss": 1.1325, + "step": 6585500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8391630977251974e-05, + "loss": 1.1666, + "step": 6586000 + }, + { + "epoch": 3.95, + "learning_rate": 2.838953101169141e-05, + "loss": 1.0994, + "step": 6586500 + }, + { + "epoch": 3.95, + "learning_rate": 2.838743104613084e-05, + "loss": 1.1249, + "step": 6587000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8385331080570278e-05, + "loss": 1.1261, + "step": 6587500 + }, + { + "epoch": 3.95, + "learning_rate": 2.838323111500971e-05, + "loss": 1.134, + "step": 6588000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8381131149449148e-05, + "loss": 1.1502, + "step": 6588500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8379031183888585e-05, + "loss": 1.1713, + "step": 6589000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8376931218328018e-05, + "loss": 1.149, + "step": 6589500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8374831252767455e-05, + "loss": 1.1434, + "step": 6590000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8372731287206892e-05, + "loss": 1.1035, + "step": 6590500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8370635521577446e-05, + "loss": 1.1335, + "step": 6591000 + }, + { + "epoch": 3.95, + "learning_rate": 2.836853555601688e-05, + "loss": 1.1326, + "step": 6591500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8366439790387436e-05, + "loss": 1.1416, + "step": 6592000 + }, + { + "epoch": 3.95, + "learning_rate": 2.836433982482687e-05, + "loss": 1.1375, + "step": 6592500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8362239859266306e-05, + "loss": 1.1277, + "step": 6593000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8360139893705743e-05, + "loss": 1.1489, + "step": 6593500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8358044128076297e-05, + "loss": 1.1477, + "step": 6594000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8355944162515734e-05, + "loss": 1.129, + "step": 6594500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8353844196955167e-05, + "loss": 1.1445, + "step": 6595000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8351744231394604e-05, + "loss": 1.1469, + "step": 6595500 + }, + { + "epoch": 3.95, + "learning_rate": 2.8349648465765164e-05, + "loss": 1.1493, + "step": 6596000 + }, + { + "epoch": 3.95, + "learning_rate": 2.8347548500204594e-05, + "loss": 1.1268, + "step": 6596500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8345448534644028e-05, + "loss": 1.1436, + "step": 6597000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8343348569083465e-05, + "loss": 1.1636, + "step": 6597500 + }, + { + "epoch": 3.96, + "learning_rate": 2.83412486035229e-05, + "loss": 1.1557, + "step": 6598000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8339148637962335e-05, + "loss": 1.1582, + "step": 6598500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8337052872332892e-05, + "loss": 1.1278, + "step": 6599000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8334952906772325e-05, + "loss": 1.1254, + "step": 6599500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8332852941211762e-05, + "loss": 1.1299, + "step": 6600000 + }, + { + "epoch": 3.96, + "eval_loss": 1.1053975820541382, + "eval_runtime": 1136.7471, + "eval_samples_per_second": 463.357, + "eval_steps_per_second": 77.227, + "step": 6600000 + }, + { + "epoch": 3.96, + "learning_rate": 2.83307529756512e-05, + "loss": 1.1166, + "step": 6600500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8328657210021753e-05, + "loss": 1.1778, + "step": 6601000 + }, + { + "epoch": 3.96, + "learning_rate": 2.832655724446119e-05, + "loss": 1.1451, + "step": 6601500 + }, + { + "epoch": 3.96, + "learning_rate": 2.832446147883175e-05, + "loss": 1.132, + "step": 6602000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8322361513271183e-05, + "loss": 1.1175, + "step": 6602500 + }, + { + "epoch": 3.96, + "learning_rate": 2.832026154771062e-05, + "loss": 1.1149, + "step": 6603000 + }, + { + "epoch": 3.96, + "learning_rate": 2.831816158215005e-05, + "loss": 1.1919, + "step": 6603500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8316061616589484e-05, + "loss": 1.1429, + "step": 6604000 + }, + { + "epoch": 3.96, + "learning_rate": 2.831396165102892e-05, + "loss": 1.1324, + "step": 6604500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8311861685468357e-05, + "loss": 1.1427, + "step": 6605000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8309761719907794e-05, + "loss": 1.1462, + "step": 6605500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8307661754347228e-05, + "loss": 1.1253, + "step": 6606000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8305561788786664e-05, + "loss": 1.1617, + "step": 6606500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8303466023157218e-05, + "loss": 1.113, + "step": 6607000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8301366057596655e-05, + "loss": 1.1298, + "step": 6607500 + }, + { + "epoch": 3.96, + "learning_rate": 2.829926609203609e-05, + "loss": 1.1618, + "step": 6608000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8297166126475525e-05, + "loss": 1.1417, + "step": 6608500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8295066160914962e-05, + "loss": 1.1328, + "step": 6609000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8292966195354395e-05, + "loss": 1.1584, + "step": 6609500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8290866229793832e-05, + "loss": 1.1354, + "step": 6610000 + }, + { + "epoch": 3.96, + "learning_rate": 2.8288770464164386e-05, + "loss": 1.1325, + "step": 6610500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8286670498603823e-05, + "loss": 1.1364, + "step": 6611000 + }, + { + "epoch": 3.96, + "learning_rate": 2.828457053304326e-05, + "loss": 1.1489, + "step": 6611500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8282470567482693e-05, + "loss": 1.1213, + "step": 6612000 + }, + { + "epoch": 3.96, + "learning_rate": 2.828037060192213e-05, + "loss": 1.1831, + "step": 6612500 + }, + { + "epoch": 3.96, + "learning_rate": 2.8278274836292683e-05, + "loss": 1.1451, + "step": 6613000 + }, + { + "epoch": 3.97, + "learning_rate": 2.827617487073212e-05, + "loss": 1.1673, + "step": 6613500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8274074905171557e-05, + "loss": 1.1529, + "step": 6614000 + }, + { + "epoch": 3.97, + "learning_rate": 2.827197493961099e-05, + "loss": 1.1351, + "step": 6614500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8269874974050427e-05, + "loss": 1.1315, + "step": 6615000 + }, + { + "epoch": 3.97, + "learning_rate": 2.826777920842098e-05, + "loss": 1.1342, + "step": 6615500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8265679242860418e-05, + "loss": 1.1648, + "step": 6616000 + }, + { + "epoch": 3.97, + "learning_rate": 2.826357927729985e-05, + "loss": 1.1257, + "step": 6616500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8261479311739288e-05, + "loss": 1.1424, + "step": 6617000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8259383546109842e-05, + "loss": 1.1509, + "step": 6617500 + }, + { + "epoch": 3.97, + "learning_rate": 2.825728358054928e-05, + "loss": 1.1564, + "step": 6618000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8255183614988715e-05, + "loss": 1.1612, + "step": 6618500 + }, + { + "epoch": 3.97, + "learning_rate": 2.825308364942815e-05, + "loss": 1.1354, + "step": 6619000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8250983683867586e-05, + "loss": 1.1359, + "step": 6619500 + }, + { + "epoch": 3.97, + "learning_rate": 2.824889211816926e-05, + "loss": 1.1657, + "step": 6620000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8246792152608693e-05, + "loss": 1.1439, + "step": 6620500 + }, + { + "epoch": 3.97, + "learning_rate": 2.824469218704813e-05, + "loss": 1.1338, + "step": 6621000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8242592221487567e-05, + "loss": 1.1486, + "step": 6621500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8240492255927e-05, + "loss": 1.1352, + "step": 6622000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8238392290366437e-05, + "loss": 1.1408, + "step": 6622500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8236292324805874e-05, + "loss": 1.1077, + "step": 6623000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8234192359245307e-05, + "loss": 1.1398, + "step": 6623500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8232092393684744e-05, + "loss": 1.1576, + "step": 6624000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8229996628055298e-05, + "loss": 1.1296, + "step": 6624500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8227896662494735e-05, + "loss": 1.1579, + "step": 6625000 + }, + { + "epoch": 3.97, + "learning_rate": 2.822579669693417e-05, + "loss": 1.1228, + "step": 6625500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8223696731373605e-05, + "loss": 1.1247, + "step": 6626000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8221600965744162e-05, + "loss": 1.1206, + "step": 6626500 + }, + { + "epoch": 3.97, + "learning_rate": 2.8219501000183595e-05, + "loss": 1.1666, + "step": 6627000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8217401034623032e-05, + "loss": 1.1292, + "step": 6627500 + }, + { + "epoch": 3.97, + "learning_rate": 2.821530106906247e-05, + "loss": 1.1444, + "step": 6628000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8213201103501902e-05, + "loss": 1.1176, + "step": 6628500 + }, + { + "epoch": 3.97, + "learning_rate": 2.821110113794134e-05, + "loss": 1.1175, + "step": 6629000 + }, + { + "epoch": 3.97, + "learning_rate": 2.8209005372311893e-05, + "loss": 1.155, + "step": 6629500 + }, + { + "epoch": 3.97, + "learning_rate": 2.820690540675133e-05, + "loss": 1.1742, + "step": 6630000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8204809641121883e-05, + "loss": 1.1485, + "step": 6630500 + }, + { + "epoch": 3.98, + "learning_rate": 2.820270967556132e-05, + "loss": 1.1496, + "step": 6631000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8200609710000754e-05, + "loss": 1.1309, + "step": 6631500 + }, + { + "epoch": 3.98, + "learning_rate": 2.819850974444019e-05, + "loss": 1.1193, + "step": 6632000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8196409778879627e-05, + "loss": 1.1329, + "step": 6632500 + }, + { + "epoch": 3.98, + "learning_rate": 2.819430981331906e-05, + "loss": 1.1419, + "step": 6633000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8192209847758498e-05, + "loss": 1.1545, + "step": 6633500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8190109882197934e-05, + "loss": 1.1379, + "step": 6634000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8188009916637368e-05, + "loss": 1.1372, + "step": 6634500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8185914151007925e-05, + "loss": 1.1271, + "step": 6635000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8183814185447358e-05, + "loss": 1.1563, + "step": 6635500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8181714219886795e-05, + "loss": 1.1735, + "step": 6636000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8179614254326232e-05, + "loss": 1.1352, + "step": 6636500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8177522688627902e-05, + "loss": 1.1551, + "step": 6637000 + }, + { + "epoch": 3.98, + "learning_rate": 2.817542272306734e-05, + "loss": 1.1317, + "step": 6637500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8173322757506776e-05, + "loss": 1.118, + "step": 6638000 + }, + { + "epoch": 3.98, + "learning_rate": 2.817122279194621e-05, + "loss": 1.1318, + "step": 6638500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8169122826385646e-05, + "loss": 1.146, + "step": 6639000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8167022860825083e-05, + "loss": 1.1461, + "step": 6639500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8164922895264517e-05, + "loss": 1.1404, + "step": 6640000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8162822929703953e-05, + "loss": 1.1597, + "step": 6640500 + }, + { + "epoch": 3.98, + "learning_rate": 2.816072296414339e-05, + "loss": 1.1558, + "step": 6641000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8158622998582824e-05, + "loss": 1.1395, + "step": 6641500 + }, + { + "epoch": 3.98, + "learning_rate": 2.815652303302226e-05, + "loss": 1.1533, + "step": 6642000 + }, + { + "epoch": 3.98, + "learning_rate": 2.815442306746169e-05, + "loss": 1.1513, + "step": 6642500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8152323101901127e-05, + "loss": 1.1391, + "step": 6643000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8150227336271688e-05, + "loss": 1.1492, + "step": 6643500 + }, + { + "epoch": 3.98, + "learning_rate": 2.814812737071112e-05, + "loss": 1.1401, + "step": 6644000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8146027405150558e-05, + "loss": 1.1278, + "step": 6644500 + }, + { + "epoch": 3.98, + "learning_rate": 2.8143927439589988e-05, + "loss": 1.1623, + "step": 6645000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8141827474029425e-05, + "loss": 1.1539, + "step": 6645500 + }, + { + "epoch": 3.98, + "learning_rate": 2.813972750846886e-05, + "loss": 1.1516, + "step": 6646000 + }, + { + "epoch": 3.98, + "learning_rate": 2.8137627542908295e-05, + "loss": 1.1657, + "step": 6646500 + }, + { + "epoch": 3.99, + "learning_rate": 2.813553177727885e-05, + "loss": 1.1493, + "step": 6647000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8133431811718286e-05, + "loss": 1.1608, + "step": 6647500 + }, + { + "epoch": 3.99, + "learning_rate": 2.813133184615772e-05, + "loss": 1.1212, + "step": 6648000 + }, + { + "epoch": 3.99, + "learning_rate": 2.812923608052828e-05, + "loss": 1.1432, + "step": 6648500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8127136114967716e-05, + "loss": 1.1756, + "step": 6649000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8125036149407147e-05, + "loss": 1.1559, + "step": 6649500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8122936183846583e-05, + "loss": 1.175, + "step": 6650000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8120836218286017e-05, + "loss": 1.1538, + "step": 6650500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8118736252725454e-05, + "loss": 1.1343, + "step": 6651000 + }, + { + "epoch": 3.99, + "learning_rate": 2.811663628716489e-05, + "loss": 1.1303, + "step": 6651500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8114536321604324e-05, + "loss": 1.1432, + "step": 6652000 + }, + { + "epoch": 3.99, + "learning_rate": 2.811243635604376e-05, + "loss": 1.1276, + "step": 6652500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8110340590414314e-05, + "loss": 1.1386, + "step": 6653000 + }, + { + "epoch": 3.99, + "learning_rate": 2.810824062485375e-05, + "loss": 1.1416, + "step": 6653500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8106140659293188e-05, + "loss": 1.1282, + "step": 6654000 + }, + { + "epoch": 3.99, + "learning_rate": 2.810404069373262e-05, + "loss": 1.1437, + "step": 6654500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8101944928103175e-05, + "loss": 1.1523, + "step": 6655000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8099844962542612e-05, + "loss": 1.1614, + "step": 6655500 + }, + { + "epoch": 3.99, + "learning_rate": 2.809774499698205e-05, + "loss": 1.1564, + "step": 6656000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8095645031421482e-05, + "loss": 1.1576, + "step": 6656500 + }, + { + "epoch": 3.99, + "learning_rate": 2.809354926579204e-05, + "loss": 1.1441, + "step": 6657000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8091449300231473e-05, + "loss": 1.1578, + "step": 6657500 + }, + { + "epoch": 3.99, + "learning_rate": 2.808934933467091e-05, + "loss": 1.143, + "step": 6658000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8087249369110346e-05, + "loss": 1.1553, + "step": 6658500 + }, + { + "epoch": 3.99, + "learning_rate": 2.808514940354978e-05, + "loss": 1.138, + "step": 6659000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8083049437989217e-05, + "loss": 1.1453, + "step": 6659500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8080949472428654e-05, + "loss": 1.1214, + "step": 6660000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8078849506868087e-05, + "loss": 1.1574, + "step": 6660500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8076753741238644e-05, + "loss": 1.1698, + "step": 6661000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8074657975609198e-05, + "loss": 1.1365, + "step": 6661500 + }, + { + "epoch": 3.99, + "learning_rate": 2.807255801004863e-05, + "loss": 1.1467, + "step": 6662000 + }, + { + "epoch": 3.99, + "learning_rate": 2.8070458044488068e-05, + "loss": 1.1199, + "step": 6662500 + }, + { + "epoch": 3.99, + "learning_rate": 2.8068358078927505e-05, + "loss": 1.1397, + "step": 6663000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8066258113366938e-05, + "loss": 1.1153, + "step": 6663500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8064158147806375e-05, + "loss": 1.1376, + "step": 6664000 + }, + { + "epoch": 4.0, + "learning_rate": 2.806206238217693e-05, + "loss": 1.1499, + "step": 6664500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8059962416616365e-05, + "loss": 1.1314, + "step": 6665000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8057862451055802e-05, + "loss": 1.1478, + "step": 6665500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8055762485495236e-05, + "loss": 1.1365, + "step": 6666000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8053666719865793e-05, + "loss": 1.1097, + "step": 6666500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8051566754305226e-05, + "loss": 1.1546, + "step": 6667000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8049466788744663e-05, + "loss": 1.1474, + "step": 6667500 + }, + { + "epoch": 4.0, + "learning_rate": 2.80473668231841e-05, + "loss": 1.1392, + "step": 6668000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8045271057554653e-05, + "loss": 1.1135, + "step": 6668500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8043175291925214e-05, + "loss": 1.1623, + "step": 6669000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8041075326364647e-05, + "loss": 1.166, + "step": 6669500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8038975360804084e-05, + "loss": 1.1476, + "step": 6670000 + }, + { + "epoch": 4.0, + "learning_rate": 2.803687539524352e-05, + "loss": 1.1547, + "step": 6670500 + }, + { + "epoch": 4.0, + "learning_rate": 2.803477542968295e-05, + "loss": 1.1494, + "step": 6671000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8032675464122384e-05, + "loss": 1.1527, + "step": 6671500 + }, + { + "epoch": 4.0, + "learning_rate": 2.803057549856182e-05, + "loss": 1.1182, + "step": 6672000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8028475533001258e-05, + "loss": 1.0951, + "step": 6672500 + }, + { + "epoch": 4.0, + "learning_rate": 2.802637976737182e-05, + "loss": 1.123, + "step": 6673000 + }, + { + "epoch": 4.0, + "learning_rate": 2.802427980181125e-05, + "loss": 1.0784, + "step": 6673500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8022179836250682e-05, + "loss": 1.0887, + "step": 6674000 + }, + { + "epoch": 4.0, + "learning_rate": 2.802007987069012e-05, + "loss": 1.0797, + "step": 6674500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8017979905129556e-05, + "loss": 1.0999, + "step": 6675000 + }, + { + "epoch": 4.0, + "learning_rate": 2.801587993956899e-05, + "loss": 1.0957, + "step": 6675500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8013784173939543e-05, + "loss": 1.102, + "step": 6676000 + }, + { + "epoch": 4.0, + "learning_rate": 2.801168420837898e-05, + "loss": 1.0994, + "step": 6676500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8009584242818416e-05, + "loss": 1.1061, + "step": 6677000 + }, + { + "epoch": 4.0, + "learning_rate": 2.8007484277257853e-05, + "loss": 1.0902, + "step": 6677500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8005384311697287e-05, + "loss": 1.0907, + "step": 6678000 + }, + { + "epoch": 4.0, + "learning_rate": 2.800328854606784e-05, + "loss": 1.1171, + "step": 6678500 + }, + { + "epoch": 4.0, + "learning_rate": 2.8001188580507277e-05, + "loss": 1.1177, + "step": 6679000 + }, + { + "epoch": 4.0, + "learning_rate": 2.7999088614946714e-05, + "loss": 1.0836, + "step": 6679500 + }, + { + "epoch": 4.0, + "learning_rate": 2.7996988649386147e-05, + "loss": 1.1335, + "step": 6680000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7994888683825584e-05, + "loss": 1.1234, + "step": 6680500 + }, + { + "epoch": 4.01, + "learning_rate": 2.799278871826502e-05, + "loss": 1.1053, + "step": 6681000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7990692952635578e-05, + "loss": 1.1265, + "step": 6681500 + }, + { + "epoch": 4.01, + "learning_rate": 2.798859298707501e-05, + "loss": 1.0843, + "step": 6682000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7986493021514445e-05, + "loss": 1.1397, + "step": 6682500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7984393055953882e-05, + "loss": 1.1023, + "step": 6683000 + }, + { + "epoch": 4.01, + "learning_rate": 2.798229729032444e-05, + "loss": 1.0721, + "step": 6683500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7980197324763876e-05, + "loss": 1.1035, + "step": 6684000 + }, + { + "epoch": 4.01, + "learning_rate": 2.797809735920331e-05, + "loss": 1.1101, + "step": 6684500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7975997393642743e-05, + "loss": 1.0797, + "step": 6685000 + }, + { + "epoch": 4.01, + "learning_rate": 2.797389742808218e-05, + "loss": 1.115, + "step": 6685500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7971797462521616e-05, + "loss": 1.08, + "step": 6686000 + }, + { + "epoch": 4.01, + "learning_rate": 2.796969749696105e-05, + "loss": 1.081, + "step": 6686500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7967597531400483e-05, + "loss": 1.1315, + "step": 6687000 + }, + { + "epoch": 4.01, + "learning_rate": 2.796550176577104e-05, + "loss": 1.105, + "step": 6687500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7963406000141597e-05, + "loss": 1.1179, + "step": 6688000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7961306034581034e-05, + "loss": 1.107, + "step": 6688500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7959206069020467e-05, + "loss": 1.0812, + "step": 6689000 + }, + { + "epoch": 4.01, + "learning_rate": 2.79571061034599e-05, + "loss": 1.0981, + "step": 6689500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7955006137899338e-05, + "loss": 1.1077, + "step": 6690000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7952910372269895e-05, + "loss": 1.116, + "step": 6690500 + }, + { + "epoch": 4.01, + "learning_rate": 2.795081040670933e-05, + "loss": 1.0949, + "step": 6691000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7948710441148765e-05, + "loss": 1.0958, + "step": 6691500 + }, + { + "epoch": 4.01, + "learning_rate": 2.79466104755882e-05, + "loss": 1.0903, + "step": 6692000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7944510510027635e-05, + "loss": 1.0907, + "step": 6692500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7942410544467072e-05, + "loss": 1.1169, + "step": 6693000 + }, + { + "epoch": 4.01, + "learning_rate": 2.794031477883763e-05, + "loss": 1.0921, + "step": 6693500 + }, + { + "epoch": 4.01, + "learning_rate": 2.793821481327706e-05, + "loss": 1.0921, + "step": 6694000 + }, + { + "epoch": 4.01, + "learning_rate": 2.7936114847716496e-05, + "loss": 1.0842, + "step": 6694500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7934014882155933e-05, + "loss": 1.0857, + "step": 6695000 + }, + { + "epoch": 4.01, + "learning_rate": 2.793191911652649e-05, + "loss": 1.0997, + "step": 6695500 + }, + { + "epoch": 4.01, + "learning_rate": 2.7929823350897043e-05, + "loss": 1.0893, + "step": 6696000 + }, + { + "epoch": 4.01, + "learning_rate": 2.792772338533648e-05, + "loss": 1.1014, + "step": 6696500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7925623419775914e-05, + "loss": 1.0807, + "step": 6697000 + }, + { + "epoch": 4.02, + "learning_rate": 2.792352345421535e-05, + "loss": 1.0693, + "step": 6697500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7921423488654787e-05, + "loss": 1.0768, + "step": 6698000 + }, + { + "epoch": 4.02, + "learning_rate": 2.791932352309422e-05, + "loss": 1.0837, + "step": 6698500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7917223557533654e-05, + "loss": 1.089, + "step": 6699000 + }, + { + "epoch": 4.02, + "learning_rate": 2.791512359197309e-05, + "loss": 1.1103, + "step": 6699500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7913023626412528e-05, + "loss": 1.0976, + "step": 6700000 + }, + { + "epoch": 4.02, + "eval_loss": 1.1042001247406006, + "eval_runtime": 1105.5887, + "eval_samples_per_second": 476.416, + "eval_steps_per_second": 79.403, + "step": 6700000 + }, + { + "epoch": 4.02, + "learning_rate": 2.791092366085196e-05, + "loss": 1.0699, + "step": 6700500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7908827895222515e-05, + "loss": 1.1071, + "step": 6701000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7906727929661952e-05, + "loss": 1.1017, + "step": 6701500 + }, + { + "epoch": 4.02, + "learning_rate": 2.790462796410139e-05, + "loss": 1.0971, + "step": 6702000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7902532198471946e-05, + "loss": 1.1278, + "step": 6702500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7900432232911383e-05, + "loss": 1.0875, + "step": 6703000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7898332267350813e-05, + "loss": 1.077, + "step": 6703500 + }, + { + "epoch": 4.02, + "learning_rate": 2.789623230179025e-05, + "loss": 1.1148, + "step": 6704000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7894132336229686e-05, + "loss": 1.1277, + "step": 6704500 + }, + { + "epoch": 4.02, + "learning_rate": 2.789203237066912e-05, + "loss": 1.0689, + "step": 6705000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7889932405108557e-05, + "loss": 1.1066, + "step": 6705500 + }, + { + "epoch": 4.02, + "learning_rate": 2.788783243954799e-05, + "loss": 1.1246, + "step": 6706000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7885736673918547e-05, + "loss": 1.0982, + "step": 6706500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7883636708357984e-05, + "loss": 1.1093, + "step": 6707000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7881536742797417e-05, + "loss": 1.1031, + "step": 6707500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7879436777236854e-05, + "loss": 1.1149, + "step": 6708000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7877336811676288e-05, + "loss": 1.0958, + "step": 6708500 + }, + { + "epoch": 4.02, + "learning_rate": 2.787523684611572e-05, + "loss": 1.1406, + "step": 6709000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7873136880555158e-05, + "loss": 1.0926, + "step": 6709500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7871041114925715e-05, + "loss": 1.1037, + "step": 6710000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7868941149365152e-05, + "loss": 1.0921, + "step": 6710500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7866841183804585e-05, + "loss": 1.0839, + "step": 6711000 + }, + { + "epoch": 4.02, + "learning_rate": 2.786474121824402e-05, + "loss": 1.0922, + "step": 6711500 + }, + { + "epoch": 4.02, + "learning_rate": 2.7862641252683456e-05, + "loss": 1.0899, + "step": 6712000 + }, + { + "epoch": 4.02, + "learning_rate": 2.7860541287122892e-05, + "loss": 1.1145, + "step": 6712500 + }, + { + "epoch": 4.02, + "learning_rate": 2.785844552149345e-05, + "loss": 1.1082, + "step": 6713000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7856349755864003e-05, + "loss": 1.1, + "step": 6713500 + }, + { + "epoch": 4.03, + "learning_rate": 2.785424979030344e-05, + "loss": 1.1247, + "step": 6714000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7852149824742873e-05, + "loss": 1.1087, + "step": 6714500 + }, + { + "epoch": 4.03, + "learning_rate": 2.785004985918231e-05, + "loss": 1.1108, + "step": 6715000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7847949893621744e-05, + "loss": 1.1321, + "step": 6715500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7845849928061177e-05, + "loss": 1.0895, + "step": 6716000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7843754162431737e-05, + "loss": 1.0997, + "step": 6716500 + }, + { + "epoch": 4.03, + "learning_rate": 2.784165419687117e-05, + "loss": 1.1038, + "step": 6717000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7839554231310608e-05, + "loss": 1.0815, + "step": 6717500 + }, + { + "epoch": 4.03, + "learning_rate": 2.783745426575004e-05, + "loss": 1.1015, + "step": 6718000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7835354300189475e-05, + "loss": 1.088, + "step": 6718500 + }, + { + "epoch": 4.03, + "learning_rate": 2.783325433462891e-05, + "loss": 1.1224, + "step": 6719000 + }, + { + "epoch": 4.03, + "learning_rate": 2.783115436906835e-05, + "loss": 1.1163, + "step": 6719500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7829054403507782e-05, + "loss": 1.1105, + "step": 6720000 + }, + { + "epoch": 4.03, + "learning_rate": 2.782695443794722e-05, + "loss": 1.1135, + "step": 6720500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7824854472386652e-05, + "loss": 1.0769, + "step": 6721000 + }, + { + "epoch": 4.03, + "learning_rate": 2.782275870675721e-05, + "loss": 1.1109, + "step": 6721500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7820658741196646e-05, + "loss": 1.0998, + "step": 6722000 + }, + { + "epoch": 4.03, + "learning_rate": 2.781855877563608e-05, + "loss": 1.136, + "step": 6722500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7816458810075513e-05, + "loss": 1.1141, + "step": 6723000 + }, + { + "epoch": 4.03, + "learning_rate": 2.781435884451495e-05, + "loss": 1.1206, + "step": 6723500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7812263078885507e-05, + "loss": 1.1066, + "step": 6724000 + }, + { + "epoch": 4.03, + "learning_rate": 2.781016311332494e-05, + "loss": 1.098, + "step": 6724500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7808063147764377e-05, + "loss": 1.1055, + "step": 6725000 + }, + { + "epoch": 4.03, + "learning_rate": 2.780596738213493e-05, + "loss": 1.0986, + "step": 6725500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7803867416574367e-05, + "loss": 1.1165, + "step": 6726000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7801767451013804e-05, + "loss": 1.1311, + "step": 6726500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7799667485453238e-05, + "loss": 1.0955, + "step": 6727000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7797567519892675e-05, + "loss": 1.0762, + "step": 6727500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7795467554332108e-05, + "loss": 1.0958, + "step": 6728000 + }, + { + "epoch": 4.03, + "learning_rate": 2.779336758877154e-05, + "loss": 1.1248, + "step": 6728500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7791267623210978e-05, + "loss": 1.1025, + "step": 6729000 + }, + { + "epoch": 4.03, + "learning_rate": 2.7789171857581535e-05, + "loss": 1.1038, + "step": 6729500 + }, + { + "epoch": 4.03, + "learning_rate": 2.7787071892020972e-05, + "loss": 1.1074, + "step": 6730000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7784971926460406e-05, + "loss": 1.1046, + "step": 6730500 + }, + { + "epoch": 4.04, + "learning_rate": 2.778287196089984e-05, + "loss": 1.1062, + "step": 6731000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7780771995339276e-05, + "loss": 1.1035, + "step": 6731500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7778672029778713e-05, + "loss": 1.0792, + "step": 6732000 + }, + { + "epoch": 4.04, + "learning_rate": 2.777657626414927e-05, + "loss": 1.0923, + "step": 6732500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7774476298588703e-05, + "loss": 1.0809, + "step": 6733000 + }, + { + "epoch": 4.04, + "learning_rate": 2.777238053295926e-05, + "loss": 1.1036, + "step": 6733500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7770280567398694e-05, + "loss": 1.1067, + "step": 6734000 + }, + { + "epoch": 4.04, + "learning_rate": 2.776818060183813e-05, + "loss": 1.1032, + "step": 6734500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7766080636277564e-05, + "loss": 1.1346, + "step": 6735000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7763980670716997e-05, + "loss": 1.1225, + "step": 6735500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7761880705156434e-05, + "loss": 1.1248, + "step": 6736000 + }, + { + "epoch": 4.04, + "learning_rate": 2.775978073959587e-05, + "loss": 1.0935, + "step": 6736500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7757680774035308e-05, + "loss": 1.1185, + "step": 6737000 + }, + { + "epoch": 4.04, + "learning_rate": 2.775558500840586e-05, + "loss": 1.1204, + "step": 6737500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7753485042845295e-05, + "loss": 1.0994, + "step": 6738000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7751385077284732e-05, + "loss": 1.1057, + "step": 6738500 + }, + { + "epoch": 4.04, + "learning_rate": 2.774928511172417e-05, + "loss": 1.081, + "step": 6739000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7747185146163602e-05, + "loss": 1.0747, + "step": 6739500 + }, + { + "epoch": 4.04, + "learning_rate": 2.774508938053416e-05, + "loss": 1.106, + "step": 6740000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7742989414973592e-05, + "loss": 1.1109, + "step": 6740500 + }, + { + "epoch": 4.04, + "learning_rate": 2.774088944941303e-05, + "loss": 1.0931, + "step": 6741000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7738789483852466e-05, + "loss": 1.0992, + "step": 6741500 + }, + { + "epoch": 4.04, + "learning_rate": 2.77366895182919e-05, + "loss": 1.1114, + "step": 6742000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7734589552731333e-05, + "loss": 1.0996, + "step": 6742500 + }, + { + "epoch": 4.04, + "learning_rate": 2.773248958717077e-05, + "loss": 1.1265, + "step": 6743000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7730393821541327e-05, + "loss": 1.1023, + "step": 6743500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7728293855980764e-05, + "loss": 1.1232, + "step": 6744000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7726198090351317e-05, + "loss": 1.1105, + "step": 6744500 + }, + { + "epoch": 4.04, + "learning_rate": 2.772409812479075e-05, + "loss": 1.0796, + "step": 6745000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7721998159230188e-05, + "loss": 1.0974, + "step": 6745500 + }, + { + "epoch": 4.04, + "learning_rate": 2.7719898193669624e-05, + "loss": 1.1317, + "step": 6746000 + }, + { + "epoch": 4.04, + "learning_rate": 2.7717798228109058e-05, + "loss": 1.0828, + "step": 6746500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7715698262548495e-05, + "loss": 1.1073, + "step": 6747000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7713598296987928e-05, + "loss": 1.1082, + "step": 6747500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7711498331427365e-05, + "loss": 1.1336, + "step": 6748000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7709402565797922e-05, + "loss": 1.1136, + "step": 6748500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7707302600237356e-05, + "loss": 1.0775, + "step": 6749000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7705202634676792e-05, + "loss": 1.0865, + "step": 6749500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7703102669116226e-05, + "loss": 1.0845, + "step": 6750000 + }, + { + "epoch": 4.05, + "learning_rate": 2.770100270355566e-05, + "loss": 1.1213, + "step": 6750500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7698902737995096e-05, + "loss": 1.1024, + "step": 6751000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7696802772434533e-05, + "loss": 1.0939, + "step": 6751500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7694702806873966e-05, + "loss": 1.0909, + "step": 6752000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7692607041244523e-05, + "loss": 1.101, + "step": 6752500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7690507075683957e-05, + "loss": 1.1097, + "step": 6753000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7688411310054514e-05, + "loss": 1.1101, + "step": 6753500 + }, + { + "epoch": 4.05, + "learning_rate": 2.768631554442507e-05, + "loss": 1.1193, + "step": 6754000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7684215578864504e-05, + "loss": 1.1049, + "step": 6754500 + }, + { + "epoch": 4.05, + "learning_rate": 2.768211561330394e-05, + "loss": 1.1058, + "step": 6755000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7680015647743378e-05, + "loss": 1.0939, + "step": 6755500 + }, + { + "epoch": 4.05, + "learning_rate": 2.767791568218281e-05, + "loss": 1.1267, + "step": 6756000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7675815716622248e-05, + "loss": 1.1316, + "step": 6756500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7673715751061682e-05, + "loss": 1.1128, + "step": 6757000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7671615785501115e-05, + "loss": 1.1058, + "step": 6757500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7669515819940552e-05, + "loss": 1.1003, + "step": 6758000 + }, + { + "epoch": 4.05, + "learning_rate": 2.766742005431111e-05, + "loss": 1.1204, + "step": 6758500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7665320088750546e-05, + "loss": 1.1024, + "step": 6759000 + }, + { + "epoch": 4.05, + "learning_rate": 2.766322012318998e-05, + "loss": 1.1095, + "step": 6759500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7661120157629413e-05, + "loss": 1.1191, + "step": 6760000 + }, + { + "epoch": 4.05, + "learning_rate": 2.765902019206885e-05, + "loss": 1.109, + "step": 6760500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7656924426439407e-05, + "loss": 1.1154, + "step": 6761000 + }, + { + "epoch": 4.05, + "learning_rate": 2.765482446087884e-05, + "loss": 1.0895, + "step": 6761500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7652724495318277e-05, + "loss": 1.1, + "step": 6762000 + }, + { + "epoch": 4.05, + "learning_rate": 2.765062452975771e-05, + "loss": 1.1222, + "step": 6762500 + }, + { + "epoch": 4.05, + "learning_rate": 2.7648528764128267e-05, + "loss": 1.0936, + "step": 6763000 + }, + { + "epoch": 4.05, + "learning_rate": 2.7646428798567704e-05, + "loss": 1.0933, + "step": 6763500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7644328833007138e-05, + "loss": 1.1006, + "step": 6764000 + }, + { + "epoch": 4.06, + "learning_rate": 2.764222886744657e-05, + "loss": 1.1088, + "step": 6764500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7640128901886008e-05, + "loss": 1.1005, + "step": 6765000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7638028936325445e-05, + "loss": 1.1131, + "step": 6765500 + }, + { + "epoch": 4.06, + "learning_rate": 2.763592897076488e-05, + "loss": 1.0527, + "step": 6766000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7633833205135435e-05, + "loss": 1.1177, + "step": 6766500 + }, + { + "epoch": 4.06, + "learning_rate": 2.763173323957487e-05, + "loss": 1.0913, + "step": 6767000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7629633274014305e-05, + "loss": 1.0798, + "step": 6767500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7627533308453742e-05, + "loss": 1.0966, + "step": 6768000 + }, + { + "epoch": 4.06, + "learning_rate": 2.76254375428243e-05, + "loss": 1.1047, + "step": 6768500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7623337577263733e-05, + "loss": 1.1143, + "step": 6769000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7621237611703166e-05, + "loss": 1.1149, + "step": 6769500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7619137646142603e-05, + "loss": 1.0979, + "step": 6770000 + }, + { + "epoch": 4.06, + "learning_rate": 2.761704608044428e-05, + "loss": 1.0826, + "step": 6770500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7614946114883714e-05, + "loss": 1.12, + "step": 6771000 + }, + { + "epoch": 4.06, + "learning_rate": 2.761284614932315e-05, + "loss": 1.1176, + "step": 6771500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7610746183762587e-05, + "loss": 1.1035, + "step": 6772000 + }, + { + "epoch": 4.06, + "learning_rate": 2.760864621820202e-05, + "loss": 1.1009, + "step": 6772500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7606546252641458e-05, + "loss": 1.0949, + "step": 6773000 + }, + { + "epoch": 4.06, + "learning_rate": 2.760444628708089e-05, + "loss": 1.1017, + "step": 6773500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7602346321520324e-05, + "loss": 1.106, + "step": 6774000 + }, + { + "epoch": 4.06, + "learning_rate": 2.760025055589088e-05, + "loss": 1.1099, + "step": 6774500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7598150590330318e-05, + "loss": 1.1, + "step": 6775000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7596050624769755e-05, + "loss": 1.0991, + "step": 6775500 + }, + { + "epoch": 4.06, + "learning_rate": 2.759395065920919e-05, + "loss": 1.1099, + "step": 6776000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7591850693648622e-05, + "loss": 1.1094, + "step": 6776500 + }, + { + "epoch": 4.06, + "learning_rate": 2.758975072808806e-05, + "loss": 1.1087, + "step": 6777000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7587650762527496e-05, + "loss": 1.1071, + "step": 6777500 + }, + { + "epoch": 4.06, + "learning_rate": 2.758555079696693e-05, + "loss": 1.1125, + "step": 6778000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7583455031337483e-05, + "loss": 1.1117, + "step": 6778500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7581359265708043e-05, + "loss": 1.1064, + "step": 6779000 + }, + { + "epoch": 4.06, + "learning_rate": 2.7579259300147477e-05, + "loss": 1.0645, + "step": 6779500 + }, + { + "epoch": 4.06, + "learning_rate": 2.7577159334586913e-05, + "loss": 1.0815, + "step": 6780000 + }, + { + "epoch": 4.07, + "learning_rate": 2.757505936902635e-05, + "loss": 1.1059, + "step": 6780500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7572963603396904e-05, + "loss": 1.1156, + "step": 6781000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7570863637836337e-05, + "loss": 1.1028, + "step": 6781500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7568763672275774e-05, + "loss": 1.1111, + "step": 6782000 + }, + { + "epoch": 4.07, + "learning_rate": 2.756666370671521e-05, + "loss": 1.1037, + "step": 6782500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7564563741154644e-05, + "loss": 1.0984, + "step": 6783000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7562463775594078e-05, + "loss": 1.0954, + "step": 6783500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7560363810033515e-05, + "loss": 1.1251, + "step": 6784000 + }, + { + "epoch": 4.07, + "learning_rate": 2.755826384447295e-05, + "loss": 1.1067, + "step": 6784500 + }, + { + "epoch": 4.07, + "learning_rate": 2.755616807884351e-05, + "loss": 1.1245, + "step": 6785000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7554072313214062e-05, + "loss": 1.1046, + "step": 6785500 + }, + { + "epoch": 4.07, + "learning_rate": 2.75519723476535e-05, + "loss": 1.0868, + "step": 6786000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7549872382092932e-05, + "loss": 1.1265, + "step": 6786500 + }, + { + "epoch": 4.07, + "learning_rate": 2.754777241653237e-05, + "loss": 1.1063, + "step": 6787000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7545672450971806e-05, + "loss": 1.1009, + "step": 6787500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7543572485411236e-05, + "loss": 1.1147, + "step": 6788000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7541472519850673e-05, + "loss": 1.0998, + "step": 6788500 + }, + { + "epoch": 4.07, + "learning_rate": 2.753937255429011e-05, + "loss": 1.0933, + "step": 6789000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7537272588729543e-05, + "loss": 1.0982, + "step": 6789500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7535176823100104e-05, + "loss": 1.1222, + "step": 6790000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7533076857539534e-05, + "loss": 1.0852, + "step": 6790500 + }, + { + "epoch": 4.07, + "learning_rate": 2.753097689197897e-05, + "loss": 1.0952, + "step": 6791000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7528876926418408e-05, + "loss": 1.108, + "step": 6791500 + }, + { + "epoch": 4.07, + "learning_rate": 2.752677696085784e-05, + "loss": 1.0951, + "step": 6792000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7524681195228398e-05, + "loss": 1.1052, + "step": 6792500 + }, + { + "epoch": 4.07, + "learning_rate": 2.752258122966783e-05, + "loss": 1.0918, + "step": 6793000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7520481264107268e-05, + "loss": 1.1251, + "step": 6793500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7518381298546705e-05, + "loss": 1.0912, + "step": 6794000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7516285532917262e-05, + "loss": 1.1171, + "step": 6794500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7514189767287816e-05, + "loss": 1.1125, + "step": 6795000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7512089801727252e-05, + "loss": 1.1045, + "step": 6795500 + }, + { + "epoch": 4.07, + "learning_rate": 2.7509989836166686e-05, + "loss": 1.1078, + "step": 6796000 + }, + { + "epoch": 4.07, + "learning_rate": 2.7507889870606123e-05, + "loss": 1.1118, + "step": 6796500 + }, + { + "epoch": 4.08, + "learning_rate": 2.750578990504556e-05, + "loss": 1.1053, + "step": 6797000 + }, + { + "epoch": 4.08, + "learning_rate": 2.750368993948499e-05, + "loss": 1.1427, + "step": 6797500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7501589973924427e-05, + "loss": 1.1063, + "step": 6798000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7499490008363863e-05, + "loss": 1.1371, + "step": 6798500 + }, + { + "epoch": 4.08, + "learning_rate": 2.749739424273442e-05, + "loss": 1.0789, + "step": 6799000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7495294277173854e-05, + "loss": 1.1126, + "step": 6799500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7493194311613287e-05, + "loss": 1.0887, + "step": 6800000 + }, + { + "epoch": 4.08, + "eval_loss": 1.1015839576721191, + "eval_runtime": 1110.8058, + "eval_samples_per_second": 474.178, + "eval_steps_per_second": 79.03, + "step": 6800000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7491094346052724e-05, + "loss": 1.1064, + "step": 6800500 + }, + { + "epoch": 4.08, + "learning_rate": 2.748899858042328e-05, + "loss": 1.1013, + "step": 6801000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7486898614862718e-05, + "loss": 1.1268, + "step": 6801500 + }, + { + "epoch": 4.08, + "learning_rate": 2.748479864930215e-05, + "loss": 1.0941, + "step": 6802000 + }, + { + "epoch": 4.08, + "learning_rate": 2.748270288367271e-05, + "loss": 1.124, + "step": 6802500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7480602918112142e-05, + "loss": 1.117, + "step": 6803000 + }, + { + "epoch": 4.08, + "learning_rate": 2.747850295255158e-05, + "loss": 1.0988, + "step": 6803500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7476402986991016e-05, + "loss": 1.1171, + "step": 6804000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7474303021430446e-05, + "loss": 1.103, + "step": 6804500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7472203055869882e-05, + "loss": 1.113, + "step": 6805000 + }, + { + "epoch": 4.08, + "learning_rate": 2.747010309030932e-05, + "loss": 1.1066, + "step": 6805500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7468003124748753e-05, + "loss": 1.0861, + "step": 6806000 + }, + { + "epoch": 4.08, + "learning_rate": 2.746590315918819e-05, + "loss": 1.0916, + "step": 6806500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7463807393558743e-05, + "loss": 1.0984, + "step": 6807000 + }, + { + "epoch": 4.08, + "learning_rate": 2.746170742799818e-05, + "loss": 1.1021, + "step": 6807500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7459607462437617e-05, + "loss": 1.1295, + "step": 6808000 + }, + { + "epoch": 4.08, + "learning_rate": 2.745750749687705e-05, + "loss": 1.092, + "step": 6808500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7455407531316487e-05, + "loss": 1.1074, + "step": 6809000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7453307565755924e-05, + "loss": 1.0971, + "step": 6809500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7451211800126478e-05, + "loss": 1.119, + "step": 6810000 + }, + { + "epoch": 4.08, + "learning_rate": 2.744911183456591e-05, + "loss": 1.0841, + "step": 6810500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7447011869005348e-05, + "loss": 1.0983, + "step": 6811000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7444911903444785e-05, + "loss": 1.1035, + "step": 6811500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7442811937884218e-05, + "loss": 1.119, + "step": 6812000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7440716172254775e-05, + "loss": 1.0848, + "step": 6812500 + }, + { + "epoch": 4.08, + "learning_rate": 2.7438620406625332e-05, + "loss": 1.1094, + "step": 6813000 + }, + { + "epoch": 4.08, + "learning_rate": 2.7436520441064766e-05, + "loss": 1.1133, + "step": 6813500 + }, + { + "epoch": 4.09, + "learning_rate": 2.74344204755042e-05, + "loss": 1.1134, + "step": 6814000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7432320509943636e-05, + "loss": 1.1203, + "step": 6814500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7430224744314193e-05, + "loss": 1.1206, + "step": 6815000 + }, + { + "epoch": 4.09, + "learning_rate": 2.742812477875363e-05, + "loss": 1.0886, + "step": 6815500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7426024813193063e-05, + "loss": 1.1356, + "step": 6816000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7423924847632497e-05, + "loss": 1.1121, + "step": 6816500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7421824882071933e-05, + "loss": 1.1172, + "step": 6817000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7419724916511367e-05, + "loss": 1.1113, + "step": 6817500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7417624950950804e-05, + "loss": 1.1084, + "step": 6818000 + }, + { + "epoch": 4.09, + "learning_rate": 2.741552498539024e-05, + "loss": 1.1143, + "step": 6818500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7413429219760794e-05, + "loss": 1.1158, + "step": 6819000 + }, + { + "epoch": 4.09, + "learning_rate": 2.741132925420023e-05, + "loss": 1.0951, + "step": 6819500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7409229288639664e-05, + "loss": 1.1055, + "step": 6820000 + }, + { + "epoch": 4.09, + "learning_rate": 2.74071293230791e-05, + "loss": 1.1292, + "step": 6820500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7405029357518538e-05, + "loss": 1.1109, + "step": 6821000 + }, + { + "epoch": 4.09, + "learning_rate": 2.740292939195797e-05, + "loss": 1.1227, + "step": 6821500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7400829426397405e-05, + "loss": 1.0926, + "step": 6822000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7398729460836842e-05, + "loss": 1.1248, + "step": 6822500 + }, + { + "epoch": 4.09, + "learning_rate": 2.739662949527628e-05, + "loss": 1.0952, + "step": 6823000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7394529529715712e-05, + "loss": 1.129, + "step": 6823500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7392433764086266e-05, + "loss": 1.1085, + "step": 6824000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7390333798525703e-05, + "loss": 1.1205, + "step": 6824500 + }, + { + "epoch": 4.09, + "learning_rate": 2.738823383296514e-05, + "loss": 1.0958, + "step": 6825000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7386133867404573e-05, + "loss": 1.1103, + "step": 6825500 + }, + { + "epoch": 4.09, + "learning_rate": 2.738403390184401e-05, + "loss": 1.0888, + "step": 6826000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7381938136214563e-05, + "loss": 1.1221, + "step": 6826500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7379838170654e-05, + "loss": 1.092, + "step": 6827000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7377738205093437e-05, + "loss": 1.114, + "step": 6827500 + }, + { + "epoch": 4.09, + "learning_rate": 2.737563823953287e-05, + "loss": 1.0996, + "step": 6828000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7373538273972307e-05, + "loss": 1.1398, + "step": 6828500 + }, + { + "epoch": 4.09, + "learning_rate": 2.737144250834286e-05, + "loss": 1.1142, + "step": 6829000 + }, + { + "epoch": 4.09, + "learning_rate": 2.7369342542782298e-05, + "loss": 1.1082, + "step": 6829500 + }, + { + "epoch": 4.09, + "learning_rate": 2.7367242577221735e-05, + "loss": 1.1167, + "step": 6830000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7365142611661168e-05, + "loss": 1.1084, + "step": 6830500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7363042646100605e-05, + "loss": 1.1295, + "step": 6831000 + }, + { + "epoch": 4.1, + "learning_rate": 2.736094268054004e-05, + "loss": 1.1329, + "step": 6831500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7358846914910595e-05, + "loss": 1.0933, + "step": 6832000 + }, + { + "epoch": 4.1, + "learning_rate": 2.735674694935003e-05, + "loss": 1.1253, + "step": 6832500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7354646983789466e-05, + "loss": 1.1145, + "step": 6833000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7352547018228903e-05, + "loss": 1.0771, + "step": 6833500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7350447052668336e-05, + "loss": 1.1167, + "step": 6834000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7348351287038893e-05, + "loss": 1.1228, + "step": 6834500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7346251321478326e-05, + "loss": 1.0675, + "step": 6835000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7344151355917763e-05, + "loss": 1.1444, + "step": 6835500 + }, + { + "epoch": 4.1, + "learning_rate": 2.73420513903572e-05, + "loss": 1.1145, + "step": 6836000 + }, + { + "epoch": 4.1, + "learning_rate": 2.733995142479663e-05, + "loss": 1.1516, + "step": 6836500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7337851459236067e-05, + "loss": 1.1141, + "step": 6837000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7335751493675504e-05, + "loss": 1.1055, + "step": 6837500 + }, + { + "epoch": 4.1, + "learning_rate": 2.733365572804606e-05, + "loss": 1.1067, + "step": 6838000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7331555762485498e-05, + "loss": 1.1488, + "step": 6838500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7329455796924928e-05, + "loss": 1.1261, + "step": 6839000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7327355831364365e-05, + "loss": 1.1072, + "step": 6839500 + }, + { + "epoch": 4.1, + "learning_rate": 2.73252558658038e-05, + "loss": 1.1064, + "step": 6840000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7323155900243235e-05, + "loss": 1.1369, + "step": 6840500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7321060134613792e-05, + "loss": 1.1436, + "step": 6841000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7318960169053225e-05, + "loss": 1.0874, + "step": 6841500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7316864403423782e-05, + "loss": 1.0803, + "step": 6842000 + }, + { + "epoch": 4.1, + "learning_rate": 2.731476443786322e-05, + "loss": 1.1165, + "step": 6842500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7312664472302656e-05, + "loss": 1.1123, + "step": 6843000 + }, + { + "epoch": 4.1, + "learning_rate": 2.7310564506742086e-05, + "loss": 1.1061, + "step": 6843500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7308464541181523e-05, + "loss": 1.1123, + "step": 6844000 + }, + { + "epoch": 4.1, + "learning_rate": 2.730636457562096e-05, + "loss": 1.1017, + "step": 6844500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7304264610060397e-05, + "loss": 1.1115, + "step": 6845000 + }, + { + "epoch": 4.1, + "learning_rate": 2.730216464449983e-05, + "loss": 1.126, + "step": 6845500 + }, + { + "epoch": 4.1, + "learning_rate": 2.7300068878870384e-05, + "loss": 1.1314, + "step": 6846000 + }, + { + "epoch": 4.1, + "learning_rate": 2.729796891330982e-05, + "loss": 1.1182, + "step": 6846500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7295868947749257e-05, + "loss": 1.111, + "step": 6847000 + }, + { + "epoch": 4.11, + "learning_rate": 2.729376898218869e-05, + "loss": 1.1493, + "step": 6847500 + }, + { + "epoch": 4.11, + "learning_rate": 2.729167321655925e-05, + "loss": 1.118, + "step": 6848000 + }, + { + "epoch": 4.11, + "learning_rate": 2.728957325099868e-05, + "loss": 1.1342, + "step": 6848500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7287473285438118e-05, + "loss": 1.1174, + "step": 6849000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7285373319877555e-05, + "loss": 1.0999, + "step": 6849500 + }, + { + "epoch": 4.11, + "learning_rate": 2.728327335431699e-05, + "loss": 1.1207, + "step": 6850000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7281177588687545e-05, + "loss": 1.1054, + "step": 6850500 + }, + { + "epoch": 4.11, + "learning_rate": 2.727907762312698e-05, + "loss": 1.1054, + "step": 6851000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7276977657566416e-05, + "loss": 1.1014, + "step": 6851500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7274877692005852e-05, + "loss": 1.1035, + "step": 6852000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7272777726445286e-05, + "loss": 1.1178, + "step": 6852500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7270677760884723e-05, + "loss": 1.114, + "step": 6853000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7268581995255276e-05, + "loss": 1.0994, + "step": 6853500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7266482029694713e-05, + "loss": 1.0971, + "step": 6854000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7264382064134147e-05, + "loss": 1.1231, + "step": 6854500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7262286298504707e-05, + "loss": 1.0951, + "step": 6855000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7260186332944137e-05, + "loss": 1.1121, + "step": 6855500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7258086367383574e-05, + "loss": 1.1483, + "step": 6856000 + }, + { + "epoch": 4.11, + "learning_rate": 2.725598640182301e-05, + "loss": 1.1194, + "step": 6856500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7253886436262444e-05, + "loss": 1.1017, + "step": 6857000 + }, + { + "epoch": 4.11, + "learning_rate": 2.725178647070188e-05, + "loss": 1.0907, + "step": 6857500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7249686505141315e-05, + "loss": 1.1076, + "step": 6858000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7247586539580748e-05, + "loss": 1.1086, + "step": 6858500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7245486574020185e-05, + "loss": 1.1159, + "step": 6859000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7243386608459622e-05, + "loss": 1.1063, + "step": 6859500 + }, + { + "epoch": 4.11, + "learning_rate": 2.724129084283018e-05, + "loss": 1.1143, + "step": 6860000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7239190877269612e-05, + "loss": 1.1094, + "step": 6860500 + }, + { + "epoch": 4.11, + "learning_rate": 2.7237090911709046e-05, + "loss": 1.0927, + "step": 6861000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7234990946148482e-05, + "loss": 1.1082, + "step": 6861500 + }, + { + "epoch": 4.11, + "learning_rate": 2.723289518051904e-05, + "loss": 1.0989, + "step": 6862000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7230795214958476e-05, + "loss": 1.0945, + "step": 6862500 + }, + { + "epoch": 4.11, + "learning_rate": 2.722869524939791e-05, + "loss": 1.1073, + "step": 6863000 + }, + { + "epoch": 4.11, + "learning_rate": 2.7226595283837343e-05, + "loss": 1.1123, + "step": 6863500 + }, + { + "epoch": 4.12, + "learning_rate": 2.722449531827678e-05, + "loss": 1.103, + "step": 6864000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7222399552647337e-05, + "loss": 1.0981, + "step": 6864500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7220299587086774e-05, + "loss": 1.1409, + "step": 6865000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7218199621526204e-05, + "loss": 1.1019, + "step": 6865500 + }, + { + "epoch": 4.12, + "learning_rate": 2.721609965596564e-05, + "loss": 1.1166, + "step": 6866000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7213999690405078e-05, + "loss": 1.1385, + "step": 6866500 + }, + { + "epoch": 4.12, + "learning_rate": 2.721189972484451e-05, + "loss": 1.1397, + "step": 6867000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7209803959215068e-05, + "loss": 1.104, + "step": 6867500 + }, + { + "epoch": 4.12, + "learning_rate": 2.72077039936545e-05, + "loss": 1.1103, + "step": 6868000 + }, + { + "epoch": 4.12, + "learning_rate": 2.720560402809394e-05, + "loss": 1.1064, + "step": 6868500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7203504062533375e-05, + "loss": 1.1136, + "step": 6869000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7201408296903932e-05, + "loss": 1.11, + "step": 6869500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7199308331343366e-05, + "loss": 1.0982, + "step": 6870000 + }, + { + "epoch": 4.12, + "learning_rate": 2.71972083657828e-05, + "loss": 1.1084, + "step": 6870500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7195108400222236e-05, + "loss": 1.0734, + "step": 6871000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7193008434661673e-05, + "loss": 1.1411, + "step": 6871500 + }, + { + "epoch": 4.12, + "learning_rate": 2.719091266903223e-05, + "loss": 1.1074, + "step": 6872000 + }, + { + "epoch": 4.12, + "learning_rate": 2.718881270347166e-05, + "loss": 1.1156, + "step": 6872500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7186712737911097e-05, + "loss": 1.1208, + "step": 6873000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7184612772350533e-05, + "loss": 1.1012, + "step": 6873500 + }, + { + "epoch": 4.12, + "learning_rate": 2.718251700672109e-05, + "loss": 1.1078, + "step": 6874000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7180417041160527e-05, + "loss": 1.1246, + "step": 6874500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7178317075599957e-05, + "loss": 1.0907, + "step": 6875000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7176217110039394e-05, + "loss": 1.1062, + "step": 6875500 + }, + { + "epoch": 4.12, + "learning_rate": 2.717411714447883e-05, + "loss": 1.1102, + "step": 6876000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7172017178918265e-05, + "loss": 1.0909, + "step": 6876500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7169921413288825e-05, + "loss": 1.1136, + "step": 6877000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7167821447728255e-05, + "loss": 1.135, + "step": 6877500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7165721482167692e-05, + "loss": 1.0908, + "step": 6878000 + }, + { + "epoch": 4.12, + "learning_rate": 2.716362151660713e-05, + "loss": 1.1129, + "step": 6878500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7161525750977686e-05, + "loss": 1.116, + "step": 6879000 + }, + { + "epoch": 4.12, + "learning_rate": 2.7159425785417116e-05, + "loss": 1.1111, + "step": 6879500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7157325819856553e-05, + "loss": 1.1365, + "step": 6880000 + }, + { + "epoch": 4.13, + "learning_rate": 2.715522585429599e-05, + "loss": 1.1188, + "step": 6880500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7153125888735426e-05, + "loss": 1.0812, + "step": 6881000 + }, + { + "epoch": 4.13, + "learning_rate": 2.715102592317486e-05, + "loss": 1.1011, + "step": 6881500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7148925957614297e-05, + "loss": 1.0899, + "step": 6882000 + }, + { + "epoch": 4.13, + "learning_rate": 2.714682599205373e-05, + "loss": 1.0958, + "step": 6882500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7144726026493163e-05, + "loss": 1.1016, + "step": 6883000 + }, + { + "epoch": 4.13, + "learning_rate": 2.714263026086372e-05, + "loss": 1.1112, + "step": 6883500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7140530295303157e-05, + "loss": 1.0992, + "step": 6884000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7138430329742594e-05, + "loss": 1.1072, + "step": 6884500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7136330364182028e-05, + "loss": 1.1256, + "step": 6885000 + }, + { + "epoch": 4.13, + "learning_rate": 2.713423039862146e-05, + "loss": 1.1247, + "step": 6885500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7132130433060898e-05, + "loss": 1.1175, + "step": 6886000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7130034667431455e-05, + "loss": 1.1009, + "step": 6886500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7127934701870888e-05, + "loss": 1.0995, + "step": 6887000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7125834736310322e-05, + "loss": 1.1152, + "step": 6887500 + }, + { + "epoch": 4.13, + "learning_rate": 2.712373477074976e-05, + "loss": 1.0998, + "step": 6888000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7121639005120316e-05, + "loss": 1.1194, + "step": 6888500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7119539039559752e-05, + "loss": 1.0983, + "step": 6889000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7117439073999186e-05, + "loss": 1.1041, + "step": 6889500 + }, + { + "epoch": 4.13, + "learning_rate": 2.711533910843862e-05, + "loss": 1.0946, + "step": 6890000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7113239142878056e-05, + "loss": 1.1038, + "step": 6890500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7111147577179737e-05, + "loss": 1.101, + "step": 6891000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7109047611619167e-05, + "loss": 1.1217, + "step": 6891500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7106947646058604e-05, + "loss": 1.1252, + "step": 6892000 + }, + { + "epoch": 4.13, + "learning_rate": 2.710484768049804e-05, + "loss": 1.1203, + "step": 6892500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7102747714937474e-05, + "loss": 1.1031, + "step": 6893000 + }, + { + "epoch": 4.13, + "learning_rate": 2.710064774937691e-05, + "loss": 1.0958, + "step": 6893500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7098547783816348e-05, + "loss": 1.1095, + "step": 6894000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7096447818255778e-05, + "loss": 1.1263, + "step": 6894500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7094347852695214e-05, + "loss": 1.1408, + "step": 6895000 + }, + { + "epoch": 4.13, + "learning_rate": 2.709224788713465e-05, + "loss": 1.1096, + "step": 6895500 + }, + { + "epoch": 4.13, + "learning_rate": 2.7090152121505208e-05, + "loss": 1.1094, + "step": 6896000 + }, + { + "epoch": 4.13, + "learning_rate": 2.7088052155944642e-05, + "loss": 1.1285, + "step": 6896500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7085952190384075e-05, + "loss": 1.1069, + "step": 6897000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7083852224823512e-05, + "loss": 1.0934, + "step": 6897500 + }, + { + "epoch": 4.14, + "learning_rate": 2.708175225926295e-05, + "loss": 1.1032, + "step": 6898000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7079652293702382e-05, + "loss": 1.1052, + "step": 6898500 + }, + { + "epoch": 4.14, + "learning_rate": 2.707755652807294e-05, + "loss": 1.0881, + "step": 6899000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7075456562512373e-05, + "loss": 1.1221, + "step": 6899500 + }, + { + "epoch": 4.14, + "learning_rate": 2.707335659695181e-05, + "loss": 1.0958, + "step": 6900000 + }, + { + "epoch": 4.14, + "eval_loss": 1.0961161851882935, + "eval_runtime": 1104.919, + "eval_samples_per_second": 476.705, + "eval_steps_per_second": 79.451, + "step": 6900000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7071256631391246e-05, + "loss": 1.1103, + "step": 6900500 + }, + { + "epoch": 4.14, + "learning_rate": 2.706915666583068e-05, + "loss": 1.1092, + "step": 6901000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7067056700270117e-05, + "loss": 1.1185, + "step": 6901500 + }, + { + "epoch": 4.14, + "learning_rate": 2.706495673470955e-05, + "loss": 1.1211, + "step": 6902000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7062856769148984e-05, + "loss": 1.1344, + "step": 6902500 + }, + { + "epoch": 4.14, + "learning_rate": 2.706076100351954e-05, + "loss": 1.1008, + "step": 6903000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7058661037958977e-05, + "loss": 1.1185, + "step": 6903500 + }, + { + "epoch": 4.14, + "learning_rate": 2.705656107239841e-05, + "loss": 1.1351, + "step": 6904000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7054461106837848e-05, + "loss": 1.1097, + "step": 6904500 + }, + { + "epoch": 4.14, + "learning_rate": 2.705236114127728e-05, + "loss": 1.1174, + "step": 6905000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7050265375647838e-05, + "loss": 1.1179, + "step": 6905500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7048165410087275e-05, + "loss": 1.1133, + "step": 6906000 + }, + { + "epoch": 4.14, + "learning_rate": 2.704606544452671e-05, + "loss": 1.0956, + "step": 6906500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7043965478966145e-05, + "loss": 1.0875, + "step": 6907000 + }, + { + "epoch": 4.14, + "learning_rate": 2.704186551340558e-05, + "loss": 1.1104, + "step": 6907500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7039769747776136e-05, + "loss": 1.0812, + "step": 6908000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7037669782215573e-05, + "loss": 1.1348, + "step": 6908500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7035569816655006e-05, + "loss": 1.0865, + "step": 6909000 + }, + { + "epoch": 4.14, + "learning_rate": 2.703346985109444e-05, + "loss": 1.1028, + "step": 6909500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7031374085465e-05, + "loss": 1.1179, + "step": 6910000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7029274119904433e-05, + "loss": 1.1212, + "step": 6910500 + }, + { + "epoch": 4.14, + "learning_rate": 2.702717415434387e-05, + "loss": 1.1247, + "step": 6911000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7025074188783304e-05, + "loss": 1.1272, + "step": 6911500 + }, + { + "epoch": 4.14, + "learning_rate": 2.7022974223222737e-05, + "loss": 1.1097, + "step": 6912000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7020878457593294e-05, + "loss": 1.1299, + "step": 6912500 + }, + { + "epoch": 4.14, + "learning_rate": 2.701877849203273e-05, + "loss": 1.0996, + "step": 6913000 + }, + { + "epoch": 4.14, + "learning_rate": 2.7016678526472168e-05, + "loss": 1.1152, + "step": 6913500 + }, + { + "epoch": 4.15, + "learning_rate": 2.70145785609116e-05, + "loss": 1.1205, + "step": 6914000 + }, + { + "epoch": 4.15, + "learning_rate": 2.7012482795282158e-05, + "loss": 1.1038, + "step": 6914500 + }, + { + "epoch": 4.15, + "learning_rate": 2.701038282972159e-05, + "loss": 1.1285, + "step": 6915000 + }, + { + "epoch": 4.15, + "learning_rate": 2.700828286416103e-05, + "loss": 1.1126, + "step": 6915500 + }, + { + "epoch": 4.15, + "learning_rate": 2.7006182898600462e-05, + "loss": 1.1208, + "step": 6916000 + }, + { + "epoch": 4.15, + "learning_rate": 2.7004082933039895e-05, + "loss": 1.1091, + "step": 6916500 + }, + { + "epoch": 4.15, + "learning_rate": 2.7001982967479332e-05, + "loss": 1.0976, + "step": 6917000 + }, + { + "epoch": 4.15, + "learning_rate": 2.699988300191877e-05, + "loss": 1.0749, + "step": 6917500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6997783036358203e-05, + "loss": 1.1015, + "step": 6918000 + }, + { + "epoch": 4.15, + "learning_rate": 2.699568727072876e-05, + "loss": 1.1121, + "step": 6918500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6993587305168193e-05, + "loss": 1.0831, + "step": 6919000 + }, + { + "epoch": 4.15, + "learning_rate": 2.699148733960763e-05, + "loss": 1.1387, + "step": 6919500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6989387374047067e-05, + "loss": 1.0743, + "step": 6920000 + }, + { + "epoch": 4.15, + "learning_rate": 2.69872874084865e-05, + "loss": 1.1261, + "step": 6920500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6985191642857057e-05, + "loss": 1.1023, + "step": 6921000 + }, + { + "epoch": 4.15, + "learning_rate": 2.698309167729649e-05, + "loss": 1.0984, + "step": 6921500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6980991711735927e-05, + "loss": 1.1264, + "step": 6922000 + }, + { + "epoch": 4.15, + "learning_rate": 2.6978891746175364e-05, + "loss": 1.096, + "step": 6922500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6976791780614798e-05, + "loss": 1.0982, + "step": 6923000 + }, + { + "epoch": 4.15, + "learning_rate": 2.697469181505423e-05, + "loss": 1.1053, + "step": 6923500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6972591849493668e-05, + "loss": 1.0959, + "step": 6924000 + }, + { + "epoch": 4.15, + "learning_rate": 2.6970496083864225e-05, + "loss": 1.0996, + "step": 6924500 + }, + { + "epoch": 4.15, + "learning_rate": 2.696839611830366e-05, + "loss": 1.1039, + "step": 6925000 + }, + { + "epoch": 4.15, + "learning_rate": 2.6966296152743095e-05, + "loss": 1.1087, + "step": 6925500 + }, + { + "epoch": 4.15, + "learning_rate": 2.696419618718253e-05, + "loss": 1.1099, + "step": 6926000 + }, + { + "epoch": 4.15, + "learning_rate": 2.6962100421553086e-05, + "loss": 1.1337, + "step": 6926500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6960004655923643e-05, + "loss": 1.1069, + "step": 6927000 + }, + { + "epoch": 4.15, + "learning_rate": 2.695790469036308e-05, + "loss": 1.1308, + "step": 6927500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6955804724802513e-05, + "loss": 1.1099, + "step": 6928000 + }, + { + "epoch": 4.15, + "learning_rate": 2.6953704759241946e-05, + "loss": 1.1014, + "step": 6928500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6951604793681383e-05, + "loss": 1.1114, + "step": 6929000 + }, + { + "epoch": 4.15, + "learning_rate": 2.694950482812082e-05, + "loss": 1.1025, + "step": 6929500 + }, + { + "epoch": 4.15, + "learning_rate": 2.6947404862560254e-05, + "loss": 1.1079, + "step": 6930000 + }, + { + "epoch": 4.16, + "learning_rate": 2.694530489699969e-05, + "loss": 1.1161, + "step": 6930500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6943204931439124e-05, + "loss": 1.0865, + "step": 6931000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6941104965878557e-05, + "loss": 1.1052, + "step": 6931500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6939005000317994e-05, + "loss": 1.1014, + "step": 6932000 + }, + { + "epoch": 4.16, + "learning_rate": 2.693690503475743e-05, + "loss": 1.1237, + "step": 6932500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6934809269127985e-05, + "loss": 1.1129, + "step": 6933000 + }, + { + "epoch": 4.16, + "learning_rate": 2.693271350349854e-05, + "loss": 1.1313, + "step": 6933500 + }, + { + "epoch": 4.16, + "learning_rate": 2.693061353793798e-05, + "loss": 1.1039, + "step": 6934000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6928513572377412e-05, + "loss": 1.1039, + "step": 6934500 + }, + { + "epoch": 4.16, + "learning_rate": 2.692641360681685e-05, + "loss": 1.106, + "step": 6935000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6924313641256282e-05, + "loss": 1.0976, + "step": 6935500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6922213675695716e-05, + "loss": 1.0889, + "step": 6936000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6920113710135153e-05, + "loss": 1.0729, + "step": 6936500 + }, + { + "epoch": 4.16, + "learning_rate": 2.691801374457459e-05, + "loss": 1.1357, + "step": 6937000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6915917978945146e-05, + "loss": 1.1413, + "step": 6937500 + }, + { + "epoch": 4.16, + "learning_rate": 2.691381801338458e-05, + "loss": 1.1438, + "step": 6938000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6911718047824013e-05, + "loss": 1.1087, + "step": 6938500 + }, + { + "epoch": 4.16, + "learning_rate": 2.690961808226345e-05, + "loss": 1.0966, + "step": 6939000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6907522316634007e-05, + "loss": 1.0965, + "step": 6939500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6905422351073444e-05, + "loss": 1.092, + "step": 6940000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6903322385512877e-05, + "loss": 1.1321, + "step": 6940500 + }, + { + "epoch": 4.16, + "learning_rate": 2.690122241995231e-05, + "loss": 1.0838, + "step": 6941000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6899122454391748e-05, + "loss": 1.114, + "step": 6941500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6897026688762305e-05, + "loss": 1.1222, + "step": 6942000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6894926723201738e-05, + "loss": 1.1192, + "step": 6942500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6892826757641175e-05, + "loss": 1.091, + "step": 6943000 + }, + { + "epoch": 4.16, + "learning_rate": 2.689072679208061e-05, + "loss": 1.1177, + "step": 6943500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6888631026451165e-05, + "loss": 1.1127, + "step": 6944000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6886531060890602e-05, + "loss": 1.1064, + "step": 6944500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6884431095330036e-05, + "loss": 1.1155, + "step": 6945000 + }, + { + "epoch": 4.16, + "learning_rate": 2.688233112976947e-05, + "loss": 1.0787, + "step": 6945500 + }, + { + "epoch": 4.16, + "learning_rate": 2.6880231164208906e-05, + "loss": 1.1253, + "step": 6946000 + }, + { + "epoch": 4.16, + "learning_rate": 2.6878135398579463e-05, + "loss": 1.1048, + "step": 6946500 + }, + { + "epoch": 4.17, + "learning_rate": 2.68760354330189e-05, + "loss": 1.1152, + "step": 6947000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6873935467458333e-05, + "loss": 1.1106, + "step": 6947500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6871835501897767e-05, + "loss": 1.1225, + "step": 6948000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6869735536337204e-05, + "loss": 1.1242, + "step": 6948500 + }, + { + "epoch": 4.17, + "learning_rate": 2.686763557077664e-05, + "loss": 1.0918, + "step": 6949000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6865535605216074e-05, + "loss": 1.077, + "step": 6949500 + }, + { + "epoch": 4.17, + "learning_rate": 2.686343983958663e-05, + "loss": 1.1159, + "step": 6950000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6861339874026064e-05, + "loss": 1.1412, + "step": 6950500 + }, + { + "epoch": 4.17, + "learning_rate": 2.68592399084655e-05, + "loss": 1.1384, + "step": 6951000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6857139942904938e-05, + "loss": 1.0889, + "step": 6951500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6855044177275495e-05, + "loss": 1.133, + "step": 6952000 + }, + { + "epoch": 4.17, + "learning_rate": 2.685294841164605e-05, + "loss": 1.0989, + "step": 6952500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6850848446085485e-05, + "loss": 1.1525, + "step": 6953000 + }, + { + "epoch": 4.17, + "learning_rate": 2.684874848052492e-05, + "loss": 1.0835, + "step": 6953500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6846648514964356e-05, + "loss": 1.1168, + "step": 6954000 + }, + { + "epoch": 4.17, + "learning_rate": 2.684454854940379e-05, + "loss": 1.1041, + "step": 6954500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6842448583843223e-05, + "loss": 1.1204, + "step": 6955000 + }, + { + "epoch": 4.17, + "learning_rate": 2.684034861828266e-05, + "loss": 1.1064, + "step": 6955500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6838248652722096e-05, + "loss": 1.1138, + "step": 6956000 + }, + { + "epoch": 4.17, + "learning_rate": 2.683614868716153e-05, + "loss": 1.1074, + "step": 6956500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6834048721600967e-05, + "loss": 1.1012, + "step": 6957000 + }, + { + "epoch": 4.17, + "learning_rate": 2.683195295597152e-05, + "loss": 1.1006, + "step": 6957500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6829852990410957e-05, + "loss": 1.133, + "step": 6958000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6827753024850394e-05, + "loss": 1.1041, + "step": 6958500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6825653059289827e-05, + "loss": 1.1361, + "step": 6959000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6823553093729264e-05, + "loss": 1.0986, + "step": 6959500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6821453128168698e-05, + "loss": 1.1062, + "step": 6960000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6819361562470375e-05, + "loss": 1.1284, + "step": 6960500 + }, + { + "epoch": 4.17, + "learning_rate": 2.681726159690981e-05, + "loss": 1.1295, + "step": 6961000 + }, + { + "epoch": 4.17, + "learning_rate": 2.681516163134925e-05, + "loss": 1.0945, + "step": 6961500 + }, + { + "epoch": 4.17, + "learning_rate": 2.681306166578868e-05, + "loss": 1.1237, + "step": 6962000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6810961700228115e-05, + "loss": 1.1375, + "step": 6962500 + }, + { + "epoch": 4.17, + "learning_rate": 2.6808861734667552e-05, + "loss": 1.1245, + "step": 6963000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6806761769106986e-05, + "loss": 1.1076, + "step": 6963500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6804661803546422e-05, + "loss": 1.1001, + "step": 6964000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6802566037916976e-05, + "loss": 1.0939, + "step": 6964500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6800466072356413e-05, + "loss": 1.1451, + "step": 6965000 + }, + { + "epoch": 4.18, + "learning_rate": 2.679837030672697e-05, + "loss": 1.1219, + "step": 6965500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6796270341166407e-05, + "loss": 1.0967, + "step": 6966000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6794170375605837e-05, + "loss": 1.116, + "step": 6966500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6792070410045274e-05, + "loss": 1.1049, + "step": 6967000 + }, + { + "epoch": 4.18, + "learning_rate": 2.678997044448471e-05, + "loss": 1.1038, + "step": 6967500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6787870478924144e-05, + "loss": 1.1135, + "step": 6968000 + }, + { + "epoch": 4.18, + "learning_rate": 2.678577051336358e-05, + "loss": 1.0843, + "step": 6968500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6783670547803018e-05, + "loss": 1.1373, + "step": 6969000 + }, + { + "epoch": 4.18, + "learning_rate": 2.678157478217357e-05, + "loss": 1.1125, + "step": 6969500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6779474816613008e-05, + "loss": 1.1101, + "step": 6970000 + }, + { + "epoch": 4.18, + "learning_rate": 2.677737485105244e-05, + "loss": 1.0993, + "step": 6970500 + }, + { + "epoch": 4.18, + "learning_rate": 2.677527488549188e-05, + "loss": 1.077, + "step": 6971000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6773174919931312e-05, + "loss": 1.1182, + "step": 6971500 + }, + { + "epoch": 4.18, + "learning_rate": 2.677107495437075e-05, + "loss": 1.0932, + "step": 6972000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6768979188741306e-05, + "loss": 1.1211, + "step": 6972500 + }, + { + "epoch": 4.18, + "learning_rate": 2.676687922318074e-05, + "loss": 1.1, + "step": 6973000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6764783457551293e-05, + "loss": 1.1017, + "step": 6973500 + }, + { + "epoch": 4.18, + "learning_rate": 2.676268349199073e-05, + "loss": 1.1147, + "step": 6974000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6760583526430166e-05, + "loss": 1.1049, + "step": 6974500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6758483560869603e-05, + "loss": 1.1317, + "step": 6975000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6756383595309037e-05, + "loss": 1.1029, + "step": 6975500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6754283629748473e-05, + "loss": 1.1064, + "step": 6976000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6752183664187907e-05, + "loss": 1.0892, + "step": 6976500 + }, + { + "epoch": 4.18, + "learning_rate": 2.675008369862734e-05, + "loss": 1.1043, + "step": 6977000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6747987932997897e-05, + "loss": 1.1199, + "step": 6977500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6745887967437334e-05, + "loss": 1.1132, + "step": 6978000 + }, + { + "epoch": 4.18, + "learning_rate": 2.674378800187677e-05, + "loss": 1.0926, + "step": 6978500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6741688036316205e-05, + "loss": 1.1303, + "step": 6979000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6739588070755638e-05, + "loss": 1.1084, + "step": 6979500 + }, + { + "epoch": 4.18, + "learning_rate": 2.6737488105195075e-05, + "loss": 1.1373, + "step": 6980000 + }, + { + "epoch": 4.19, + "learning_rate": 2.673538813963451e-05, + "loss": 1.1211, + "step": 6980500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6733288174073945e-05, + "loss": 1.1337, + "step": 6981000 + }, + { + "epoch": 4.19, + "learning_rate": 2.67311924084445e-05, + "loss": 1.1146, + "step": 6981500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6729092442883936e-05, + "loss": 1.1014, + "step": 6982000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6726992477323372e-05, + "loss": 1.1166, + "step": 6982500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6724892511762806e-05, + "loss": 1.121, + "step": 6983000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6722796746133363e-05, + "loss": 1.1255, + "step": 6983500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6720696780572796e-05, + "loss": 1.0823, + "step": 6984000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6718596815012233e-05, + "loss": 1.1042, + "step": 6984500 + }, + { + "epoch": 4.19, + "learning_rate": 2.671649684945167e-05, + "loss": 1.1647, + "step": 6985000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6714401083822227e-05, + "loss": 1.1135, + "step": 6985500 + }, + { + "epoch": 4.19, + "learning_rate": 2.671230111826166e-05, + "loss": 1.1113, + "step": 6986000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6710201152701094e-05, + "loss": 1.1195, + "step": 6986500 + }, + { + "epoch": 4.19, + "learning_rate": 2.670810118714053e-05, + "loss": 1.1024, + "step": 6987000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6706001221579968e-05, + "loss": 1.0875, + "step": 6987500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6703905455950525e-05, + "loss": 1.114, + "step": 6988000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6701809690321078e-05, + "loss": 1.1141, + "step": 6988500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6699709724760515e-05, + "loss": 1.1075, + "step": 6989000 + }, + { + "epoch": 4.19, + "learning_rate": 2.669760975919995e-05, + "loss": 1.1169, + "step": 6989500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6695509793639385e-05, + "loss": 1.1158, + "step": 6990000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6693409828078822e-05, + "loss": 1.1042, + "step": 6990500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6691309862518252e-05, + "loss": 1.1042, + "step": 6991000 + }, + { + "epoch": 4.19, + "learning_rate": 2.668920989695769e-05, + "loss": 1.1256, + "step": 6991500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6687109931397126e-05, + "loss": 1.0965, + "step": 6992000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6685014165767683e-05, + "loss": 1.1359, + "step": 6992500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6682914200207116e-05, + "loss": 1.1192, + "step": 6993000 + }, + { + "epoch": 4.19, + "learning_rate": 2.668081423464655e-05, + "loss": 1.0981, + "step": 6993500 + }, + { + "epoch": 4.19, + "learning_rate": 2.6678714269085987e-05, + "loss": 1.1194, + "step": 6994000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6676618503456544e-05, + "loss": 1.0847, + "step": 6994500 + }, + { + "epoch": 4.19, + "learning_rate": 2.667451853789598e-05, + "loss": 1.0929, + "step": 6995000 + }, + { + "epoch": 4.19, + "learning_rate": 2.667241857233541e-05, + "loss": 1.13, + "step": 6995500 + }, + { + "epoch": 4.19, + "learning_rate": 2.667032280670597e-05, + "loss": 1.104, + "step": 6996000 + }, + { + "epoch": 4.19, + "learning_rate": 2.6668222841145404e-05, + "loss": 1.1254, + "step": 6996500 + }, + { + "epoch": 4.19, + "learning_rate": 2.666612287558484e-05, + "loss": 1.0985, + "step": 6997000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6664022910024278e-05, + "loss": 1.1023, + "step": 6997500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6661922944463708e-05, + "loss": 1.0989, + "step": 6998000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6659822978903145e-05, + "loss": 1.1212, + "step": 6998500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6657723013342582e-05, + "loss": 1.1182, + "step": 6999000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6655623047782015e-05, + "loss": 1.1241, + "step": 6999500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6653523082221452e-05, + "loss": 1.13, + "step": 7000000 + }, + { + "epoch": 4.2, + "eval_loss": 1.0938860177993774, + "eval_runtime": 1101.4526, + "eval_samples_per_second": 478.205, + "eval_steps_per_second": 79.701, + "step": 7000000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6651427316592006e-05, + "loss": 1.101, + "step": 7000500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6649327351031442e-05, + "loss": 1.1237, + "step": 7001000 + }, + { + "epoch": 4.2, + "learning_rate": 2.664722738547088e-05, + "loss": 1.1115, + "step": 7001500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6645127419910313e-05, + "loss": 1.0917, + "step": 7002000 + }, + { + "epoch": 4.2, + "learning_rate": 2.664302745434975e-05, + "loss": 1.086, + "step": 7002500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6640927488789183e-05, + "loss": 1.1106, + "step": 7003000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6638827523228617e-05, + "loss": 1.0988, + "step": 7003500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6636731757599173e-05, + "loss": 1.0975, + "step": 7004000 + }, + { + "epoch": 4.2, + "learning_rate": 2.663463179203861e-05, + "loss": 1.1179, + "step": 7004500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6632531826478047e-05, + "loss": 1.1007, + "step": 7005000 + }, + { + "epoch": 4.2, + "learning_rate": 2.663043186091748e-05, + "loss": 1.0985, + "step": 7005500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6628331895356914e-05, + "loss": 1.1329, + "step": 7006000 + }, + { + "epoch": 4.2, + "learning_rate": 2.662623192979635e-05, + "loss": 1.157, + "step": 7006500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6624136164166908e-05, + "loss": 1.1197, + "step": 7007000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6622036198606345e-05, + "loss": 1.1024, + "step": 7007500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6619936233045778e-05, + "loss": 1.1162, + "step": 7008000 + }, + { + "epoch": 4.2, + "learning_rate": 2.661783626748521e-05, + "loss": 1.0849, + "step": 7008500 + }, + { + "epoch": 4.2, + "learning_rate": 2.661574050185577e-05, + "loss": 1.0909, + "step": 7009000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6613640536295205e-05, + "loss": 1.1323, + "step": 7009500 + }, + { + "epoch": 4.2, + "learning_rate": 2.661154057073464e-05, + "loss": 1.1141, + "step": 7010000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6609440605174072e-05, + "loss": 1.1436, + "step": 7010500 + }, + { + "epoch": 4.2, + "learning_rate": 2.660734063961351e-05, + "loss": 1.1168, + "step": 7011000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6605244873984066e-05, + "loss": 1.0884, + "step": 7011500 + }, + { + "epoch": 4.2, + "learning_rate": 2.6603144908423503e-05, + "loss": 1.1199, + "step": 7012000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6601044942862937e-05, + "loss": 1.1099, + "step": 7012500 + }, + { + "epoch": 4.2, + "learning_rate": 2.659894497730237e-05, + "loss": 1.1009, + "step": 7013000 + }, + { + "epoch": 4.2, + "learning_rate": 2.6596845011741807e-05, + "loss": 1.1321, + "step": 7013500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6594749246112364e-05, + "loss": 1.1133, + "step": 7014000 + }, + { + "epoch": 4.21, + "learning_rate": 2.65926492805518e-05, + "loss": 1.1185, + "step": 7014500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6590549314991234e-05, + "loss": 1.1212, + "step": 7015000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6588449349430668e-05, + "loss": 1.1139, + "step": 7015500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6586349383870104e-05, + "loss": 1.0749, + "step": 7016000 + }, + { + "epoch": 4.21, + "learning_rate": 2.658425361824066e-05, + "loss": 1.112, + "step": 7016500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6582153652680098e-05, + "loss": 1.1256, + "step": 7017000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6580053687119528e-05, + "loss": 1.1163, + "step": 7017500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6577953721558965e-05, + "loss": 1.1055, + "step": 7018000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6575862155860646e-05, + "loss": 1.1125, + "step": 7018500 + }, + { + "epoch": 4.21, + "learning_rate": 2.657376219030008e-05, + "loss": 1.1142, + "step": 7019000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6571662224739513e-05, + "loss": 1.1012, + "step": 7019500 + }, + { + "epoch": 4.21, + "learning_rate": 2.656956225917895e-05, + "loss": 1.1292, + "step": 7020000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6567462293618383e-05, + "loss": 1.1048, + "step": 7020500 + }, + { + "epoch": 4.21, + "learning_rate": 2.656536232805782e-05, + "loss": 1.1029, + "step": 7021000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6563262362497257e-05, + "loss": 1.0997, + "step": 7021500 + }, + { + "epoch": 4.21, + "learning_rate": 2.656116239693669e-05, + "loss": 1.1123, + "step": 7022000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6559062431376123e-05, + "loss": 1.1295, + "step": 7022500 + }, + { + "epoch": 4.21, + "learning_rate": 2.655696246581556e-05, + "loss": 1.093, + "step": 7023000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6554862500254997e-05, + "loss": 1.1142, + "step": 7023500 + }, + { + "epoch": 4.21, + "learning_rate": 2.655276253469443e-05, + "loss": 1.1111, + "step": 7024000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6550666769064984e-05, + "loss": 1.0865, + "step": 7024500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6548571003435545e-05, + "loss": 1.1096, + "step": 7025000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6546471037874978e-05, + "loss": 1.1359, + "step": 7025500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6544371072314415e-05, + "loss": 1.1082, + "step": 7026000 + }, + { + "epoch": 4.21, + "learning_rate": 2.654227110675385e-05, + "loss": 1.117, + "step": 7026500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6540171141193282e-05, + "loss": 1.0906, + "step": 7027000 + }, + { + "epoch": 4.21, + "learning_rate": 2.653807117563272e-05, + "loss": 1.0989, + "step": 7027500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6535971210072155e-05, + "loss": 1.1188, + "step": 7028000 + }, + { + "epoch": 4.21, + "learning_rate": 2.6533875444442712e-05, + "loss": 1.1074, + "step": 7028500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6531775478882146e-05, + "loss": 1.1005, + "step": 7029000 + }, + { + "epoch": 4.21, + "learning_rate": 2.652967551332158e-05, + "loss": 1.1237, + "step": 7029500 + }, + { + "epoch": 4.21, + "learning_rate": 2.6527575547761016e-05, + "loss": 1.1104, + "step": 7030000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6525475582200453e-05, + "loss": 1.1383, + "step": 7030500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6523375616639886e-05, + "loss": 1.0952, + "step": 7031000 + }, + { + "epoch": 4.22, + "learning_rate": 2.652127985101044e-05, + "loss": 1.0915, + "step": 7031500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6519179885449877e-05, + "loss": 1.1039, + "step": 7032000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6517079919889314e-05, + "loss": 1.0953, + "step": 7032500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6514979954328747e-05, + "loss": 1.0999, + "step": 7033000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6512879988768184e-05, + "loss": 1.1367, + "step": 7033500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6510784223138738e-05, + "loss": 1.137, + "step": 7034000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6508684257578174e-05, + "loss": 1.0915, + "step": 7034500 + }, + { + "epoch": 4.22, + "learning_rate": 2.650658429201761e-05, + "loss": 1.1547, + "step": 7035000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6504484326457045e-05, + "loss": 1.0978, + "step": 7035500 + }, + { + "epoch": 4.22, + "learning_rate": 2.650238436089648e-05, + "loss": 1.1061, + "step": 7036000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6500288595267035e-05, + "loss": 1.106, + "step": 7036500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6498188629706472e-05, + "loss": 1.1006, + "step": 7037000 + }, + { + "epoch": 4.22, + "learning_rate": 2.649608866414591e-05, + "loss": 1.1028, + "step": 7037500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6493988698585342e-05, + "loss": 1.1134, + "step": 7038000 + }, + { + "epoch": 4.22, + "learning_rate": 2.649188873302478e-05, + "loss": 1.1447, + "step": 7038500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6489792967395333e-05, + "loss": 1.1097, + "step": 7039000 + }, + { + "epoch": 4.22, + "learning_rate": 2.648769300183477e-05, + "loss": 1.1143, + "step": 7039500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6485593036274203e-05, + "loss": 1.1055, + "step": 7040000 + }, + { + "epoch": 4.22, + "learning_rate": 2.648349307071364e-05, + "loss": 1.1337, + "step": 7040500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6481393105153077e-05, + "loss": 1.1181, + "step": 7041000 + }, + { + "epoch": 4.22, + "learning_rate": 2.647929313959251e-05, + "loss": 1.1208, + "step": 7041500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6477197373963067e-05, + "loss": 1.1011, + "step": 7042000 + }, + { + "epoch": 4.22, + "learning_rate": 2.64750974084025e-05, + "loss": 1.1149, + "step": 7042500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6472997442841938e-05, + "loss": 1.1013, + "step": 7043000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6470897477281374e-05, + "loss": 1.1273, + "step": 7043500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6468797511720808e-05, + "loss": 1.0871, + "step": 7044000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6466701746091365e-05, + "loss": 1.1071, + "step": 7044500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6464601780530798e-05, + "loss": 1.1004, + "step": 7045000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6462501814970235e-05, + "loss": 1.1006, + "step": 7045500 + }, + { + "epoch": 4.22, + "learning_rate": 2.6460401849409672e-05, + "loss": 1.1462, + "step": 7046000 + }, + { + "epoch": 4.22, + "learning_rate": 2.6458301883849102e-05, + "loss": 1.1046, + "step": 7046500 + }, + { + "epoch": 4.22, + "learning_rate": 2.645620191828854e-05, + "loss": 1.088, + "step": 7047000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6454101952727976e-05, + "loss": 1.1157, + "step": 7047500 + }, + { + "epoch": 4.23, + "learning_rate": 2.645200198716741e-05, + "loss": 1.1236, + "step": 7048000 + }, + { + "epoch": 4.23, + "learning_rate": 2.644990622153797e-05, + "loss": 1.1064, + "step": 7048500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6447810455908523e-05, + "loss": 1.1195, + "step": 7049000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6445710490347957e-05, + "loss": 1.1082, + "step": 7049500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6443610524787393e-05, + "loss": 1.108, + "step": 7050000 + }, + { + "epoch": 4.23, + "learning_rate": 2.644151055922683e-05, + "loss": 1.1198, + "step": 7050500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6439410593666264e-05, + "loss": 1.1003, + "step": 7051000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6437310628105697e-05, + "loss": 1.1257, + "step": 7051500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6435210662545134e-05, + "loss": 1.1128, + "step": 7052000 + }, + { + "epoch": 4.23, + "learning_rate": 2.643311489691569e-05, + "loss": 1.1, + "step": 7052500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6431014931355128e-05, + "loss": 1.1066, + "step": 7053000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6428914965794558e-05, + "loss": 1.1095, + "step": 7053500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6426815000233995e-05, + "loss": 1.1109, + "step": 7054000 + }, + { + "epoch": 4.23, + "learning_rate": 2.642471503467343e-05, + "loss": 1.1325, + "step": 7054500 + }, + { + "epoch": 4.23, + "learning_rate": 2.642261926904399e-05, + "loss": 1.1038, + "step": 7055000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6420519303483425e-05, + "loss": 1.1486, + "step": 7055500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6418419337922855e-05, + "loss": 1.1164, + "step": 7056000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6416319372362292e-05, + "loss": 1.117, + "step": 7056500 + }, + { + "epoch": 4.23, + "learning_rate": 2.641421940680173e-05, + "loss": 1.107, + "step": 7057000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6412119441241163e-05, + "loss": 1.1392, + "step": 7057500 + }, + { + "epoch": 4.23, + "learning_rate": 2.64100194756806e-05, + "loss": 1.13, + "step": 7058000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6407919510120033e-05, + "loss": 1.0785, + "step": 7058500 + }, + { + "epoch": 4.23, + "learning_rate": 2.640582374449059e-05, + "loss": 1.0909, + "step": 7059000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6403727978861147e-05, + "loss": 1.1188, + "step": 7059500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6401628013300584e-05, + "loss": 1.1243, + "step": 7060000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6399528047740014e-05, + "loss": 1.1184, + "step": 7060500 + }, + { + "epoch": 4.23, + "learning_rate": 2.639742808217945e-05, + "loss": 1.1242, + "step": 7061000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6395328116618887e-05, + "loss": 1.0821, + "step": 7061500 + }, + { + "epoch": 4.23, + "learning_rate": 2.639322815105832e-05, + "loss": 1.1058, + "step": 7062000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6391128185497758e-05, + "loss": 1.1045, + "step": 7062500 + }, + { + "epoch": 4.23, + "learning_rate": 2.6389028219937195e-05, + "loss": 1.1053, + "step": 7063000 + }, + { + "epoch": 4.23, + "learning_rate": 2.6386932454307748e-05, + "loss": 1.1296, + "step": 7063500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6384832488747185e-05, + "loss": 1.1119, + "step": 7064000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6382736723117742e-05, + "loss": 1.1264, + "step": 7064500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6380636757557175e-05, + "loss": 1.1321, + "step": 7065000 + }, + { + "epoch": 4.24, + "learning_rate": 2.637853679199661e-05, + "loss": 1.1282, + "step": 7065500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6376436826436046e-05, + "loss": 1.095, + "step": 7066000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6374336860875483e-05, + "loss": 1.1067, + "step": 7066500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6372236895314916e-05, + "loss": 1.126, + "step": 7067000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6370136929754353e-05, + "loss": 1.1246, + "step": 7067500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6368036964193786e-05, + "loss": 1.0811, + "step": 7068000 + }, + { + "epoch": 4.24, + "learning_rate": 2.636593699863322e-05, + "loss": 1.129, + "step": 7068500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6363837033072657e-05, + "loss": 1.1047, + "step": 7069000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6361737067512094e-05, + "loss": 1.1224, + "step": 7069500 + }, + { + "epoch": 4.24, + "learning_rate": 2.635964130188265e-05, + "loss": 1.1175, + "step": 7070000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6357541336322084e-05, + "loss": 1.0896, + "step": 7070500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6355441370761517e-05, + "loss": 1.1021, + "step": 7071000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6353341405200954e-05, + "loss": 1.111, + "step": 7071500 + }, + { + "epoch": 4.24, + "learning_rate": 2.635124143964039e-05, + "loss": 1.1155, + "step": 7072000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6349145674010948e-05, + "loss": 1.1367, + "step": 7072500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6347045708450378e-05, + "loss": 1.109, + "step": 7073000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6344945742889815e-05, + "loss": 1.1194, + "step": 7073500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6342845777329252e-05, + "loss": 1.1214, + "step": 7074000 + }, + { + "epoch": 4.24, + "learning_rate": 2.634074581176869e-05, + "loss": 1.1346, + "step": 7074500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6338645846208122e-05, + "loss": 1.1052, + "step": 7075000 + }, + { + "epoch": 4.24, + "learning_rate": 2.6336550080578676e-05, + "loss": 1.1135, + "step": 7075500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6334450115018113e-05, + "loss": 1.1425, + "step": 7076000 + }, + { + "epoch": 4.24, + "learning_rate": 2.633235014945755e-05, + "loss": 1.1029, + "step": 7076500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6330250183896983e-05, + "loss": 1.0937, + "step": 7077000 + }, + { + "epoch": 4.24, + "learning_rate": 2.632815441826754e-05, + "loss": 1.1109, + "step": 7077500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6326054452706973e-05, + "loss": 1.1003, + "step": 7078000 + }, + { + "epoch": 4.24, + "learning_rate": 2.632395448714641e-05, + "loss": 1.1171, + "step": 7078500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6321854521585847e-05, + "loss": 1.1365, + "step": 7079000 + }, + { + "epoch": 4.24, + "learning_rate": 2.631975455602528e-05, + "loss": 1.1311, + "step": 7079500 + }, + { + "epoch": 4.24, + "learning_rate": 2.6317658790395837e-05, + "loss": 1.1228, + "step": 7080000 + }, + { + "epoch": 4.25, + "learning_rate": 2.631555882483527e-05, + "loss": 1.1068, + "step": 7080500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6313458859274708e-05, + "loss": 1.1118, + "step": 7081000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6311358893714145e-05, + "loss": 1.1303, + "step": 7081500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6309258928153578e-05, + "loss": 1.1148, + "step": 7082000 + }, + { + "epoch": 4.25, + "learning_rate": 2.630716316252413e-05, + "loss": 1.1257, + "step": 7082500 + }, + { + "epoch": 4.25, + "learning_rate": 2.630506319696357e-05, + "loss": 1.0981, + "step": 7083000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6302963231403005e-05, + "loss": 1.1329, + "step": 7083500 + }, + { + "epoch": 4.25, + "learning_rate": 2.630086326584244e-05, + "loss": 1.0933, + "step": 7084000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6298767500213e-05, + "loss": 1.104, + "step": 7084500 + }, + { + "epoch": 4.25, + "learning_rate": 2.629666753465243e-05, + "loss": 1.1413, + "step": 7085000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6294567569091866e-05, + "loss": 1.1227, + "step": 7085500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6292467603531303e-05, + "loss": 1.0863, + "step": 7086000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6290367637970736e-05, + "loss": 1.1186, + "step": 7086500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6288271872341293e-05, + "loss": 1.1185, + "step": 7087000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6286171906780727e-05, + "loss": 1.1399, + "step": 7087500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6284071941220164e-05, + "loss": 1.0998, + "step": 7088000 + }, + { + "epoch": 4.25, + "learning_rate": 2.628197617559072e-05, + "loss": 1.1086, + "step": 7088500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6279876210030157e-05, + "loss": 1.1031, + "step": 7089000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6277776244469587e-05, + "loss": 1.1038, + "step": 7089500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6275676278909024e-05, + "loss": 1.1304, + "step": 7090000 + }, + { + "epoch": 4.25, + "learning_rate": 2.627358051327958e-05, + "loss": 1.0953, + "step": 7090500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6271480547719018e-05, + "loss": 1.1455, + "step": 7091000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6269380582158455e-05, + "loss": 1.091, + "step": 7091500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6267280616597885e-05, + "loss": 1.0988, + "step": 7092000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6265180651037322e-05, + "loss": 1.0949, + "step": 7092500 + }, + { + "epoch": 4.25, + "learning_rate": 2.626308068547676e-05, + "loss": 1.1101, + "step": 7093000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6260980719916192e-05, + "loss": 1.1452, + "step": 7093500 + }, + { + "epoch": 4.25, + "learning_rate": 2.625888075435563e-05, + "loss": 1.0996, + "step": 7094000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6256780788795066e-05, + "loss": 1.1038, + "step": 7094500 + }, + { + "epoch": 4.25, + "learning_rate": 2.6254680823234496e-05, + "loss": 1.1048, + "step": 7095000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6252580857673933e-05, + "loss": 1.1012, + "step": 7095500 + }, + { + "epoch": 4.25, + "learning_rate": 2.625048509204449e-05, + "loss": 1.0886, + "step": 7096000 + }, + { + "epoch": 4.25, + "learning_rate": 2.6248385126483927e-05, + "loss": 1.1351, + "step": 7096500 + }, + { + "epoch": 4.25, + "learning_rate": 2.624628516092336e-05, + "loss": 1.142, + "step": 7097000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6244185195362794e-05, + "loss": 1.1158, + "step": 7097500 + }, + { + "epoch": 4.26, + "learning_rate": 2.624208522980223e-05, + "loss": 1.134, + "step": 7098000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6239989464172787e-05, + "loss": 1.1476, + "step": 7098500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6237889498612224e-05, + "loss": 1.1154, + "step": 7099000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6235789533051658e-05, + "loss": 1.1034, + "step": 7099500 + }, + { + "epoch": 4.26, + "learning_rate": 2.623368956749109e-05, + "loss": 1.0942, + "step": 7100000 + }, + { + "epoch": 4.26, + "eval_loss": 1.094136357307434, + "eval_runtime": 1103.2512, + "eval_samples_per_second": 477.425, + "eval_steps_per_second": 79.571, + "step": 7100000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6231589601930528e-05, + "loss": 1.1401, + "step": 7100500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6229493836301085e-05, + "loss": 1.1107, + "step": 7101000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6227393870740522e-05, + "loss": 1.1045, + "step": 7101500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6225293905179952e-05, + "loss": 1.1181, + "step": 7102000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6223198139550512e-05, + "loss": 1.1187, + "step": 7102500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6221098173989946e-05, + "loss": 1.129, + "step": 7103000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6218998208429382e-05, + "loss": 1.1183, + "step": 7103500 + }, + { + "epoch": 4.26, + "learning_rate": 2.621689824286882e-05, + "loss": 1.1201, + "step": 7104000 + }, + { + "epoch": 4.26, + "learning_rate": 2.621479827730825e-05, + "loss": 1.1255, + "step": 7104500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6212698311747686e-05, + "loss": 1.1056, + "step": 7105000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6210598346187123e-05, + "loss": 1.1132, + "step": 7105500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6208498380626557e-05, + "loss": 1.1331, + "step": 7106000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6206398415065993e-05, + "loss": 1.133, + "step": 7106500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6204302649436547e-05, + "loss": 1.1054, + "step": 7107000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6202202683875984e-05, + "loss": 1.1208, + "step": 7107500 + }, + { + "epoch": 4.26, + "learning_rate": 2.620010271831542e-05, + "loss": 1.1063, + "step": 7108000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6198002752754854e-05, + "loss": 1.0986, + "step": 7108500 + }, + { + "epoch": 4.26, + "learning_rate": 2.619591118705653e-05, + "loss": 1.1417, + "step": 7109000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6193811221495968e-05, + "loss": 1.0861, + "step": 7109500 + }, + { + "epoch": 4.26, + "learning_rate": 2.61917112559354e-05, + "loss": 1.101, + "step": 7110000 + }, + { + "epoch": 4.26, + "learning_rate": 2.618961129037484e-05, + "loss": 1.1249, + "step": 7110500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6187511324814275e-05, + "loss": 1.0863, + "step": 7111000 + }, + { + "epoch": 4.26, + "learning_rate": 2.6185411359253705e-05, + "loss": 1.1143, + "step": 7111500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6183311393693142e-05, + "loss": 1.0801, + "step": 7112000 + }, + { + "epoch": 4.26, + "learning_rate": 2.618121142813258e-05, + "loss": 1.1378, + "step": 7112500 + }, + { + "epoch": 4.26, + "learning_rate": 2.6179111462572012e-05, + "loss": 1.1244, + "step": 7113000 + }, + { + "epoch": 4.26, + "learning_rate": 2.617701149701145e-05, + "loss": 1.1158, + "step": 7113500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6174911531450883e-05, + "loss": 1.1029, + "step": 7114000 + }, + { + "epoch": 4.27, + "learning_rate": 2.617281156589032e-05, + "loss": 1.1233, + "step": 7114500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6170715800260877e-05, + "loss": 1.0978, + "step": 7115000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6168620034631434e-05, + "loss": 1.0864, + "step": 7115500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6166520069070867e-05, + "loss": 1.1017, + "step": 7116000 + }, + { + "epoch": 4.27, + "learning_rate": 2.61644201035103e-05, + "loss": 1.1097, + "step": 7116500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6162320137949737e-05, + "loss": 1.1009, + "step": 7117000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6160220172389174e-05, + "loss": 1.1077, + "step": 7117500 + }, + { + "epoch": 4.27, + "learning_rate": 2.615812440675973e-05, + "loss": 1.1103, + "step": 7118000 + }, + { + "epoch": 4.27, + "learning_rate": 2.615602444119916e-05, + "loss": 1.084, + "step": 7118500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6153924475638598e-05, + "loss": 1.1226, + "step": 7119000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6151824510078035e-05, + "loss": 1.1173, + "step": 7119500 + }, + { + "epoch": 4.27, + "learning_rate": 2.614972454451747e-05, + "loss": 1.1262, + "step": 7120000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6147624578956905e-05, + "loss": 1.112, + "step": 7120500 + }, + { + "epoch": 4.27, + "learning_rate": 2.614552881332746e-05, + "loss": 1.1112, + "step": 7121000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6143428847766896e-05, + "loss": 1.1291, + "step": 7121500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6141328882206332e-05, + "loss": 1.1086, + "step": 7122000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6139228916645766e-05, + "loss": 1.1182, + "step": 7122500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6137128951085203e-05, + "loss": 1.1111, + "step": 7123000 + }, + { + "epoch": 4.27, + "learning_rate": 2.613502898552464e-05, + "loss": 1.1239, + "step": 7123500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6132933219895193e-05, + "loss": 1.1135, + "step": 7124000 + }, + { + "epoch": 4.27, + "learning_rate": 2.613083325433463e-05, + "loss": 1.1116, + "step": 7124500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6128733288774063e-05, + "loss": 1.1286, + "step": 7125000 + }, + { + "epoch": 4.27, + "learning_rate": 2.61266333232135e-05, + "loss": 1.1, + "step": 7125500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6124537557584054e-05, + "loss": 1.1007, + "step": 7126000 + }, + { + "epoch": 4.27, + "learning_rate": 2.612243759202349e-05, + "loss": 1.1421, + "step": 7126500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6120337626462924e-05, + "loss": 1.121, + "step": 7127000 + }, + { + "epoch": 4.27, + "learning_rate": 2.611823766090236e-05, + "loss": 1.0977, + "step": 7127500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6116137695341798e-05, + "loss": 1.1336, + "step": 7128000 + }, + { + "epoch": 4.27, + "learning_rate": 2.611404192971235e-05, + "loss": 1.1251, + "step": 7128500 + }, + { + "epoch": 4.27, + "learning_rate": 2.6111941964151788e-05, + "loss": 1.1242, + "step": 7129000 + }, + { + "epoch": 4.27, + "learning_rate": 2.6109841998591222e-05, + "loss": 1.1024, + "step": 7129500 + }, + { + "epoch": 4.27, + "learning_rate": 2.610774203303066e-05, + "loss": 1.0996, + "step": 7130000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6105642067470095e-05, + "loss": 1.0866, + "step": 7130500 + }, + { + "epoch": 4.28, + "learning_rate": 2.610354630184065e-05, + "loss": 1.1058, + "step": 7131000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6101446336280086e-05, + "loss": 1.1442, + "step": 7131500 + }, + { + "epoch": 4.28, + "learning_rate": 2.609934637071952e-05, + "loss": 1.1203, + "step": 7132000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6097246405158956e-05, + "loss": 1.1005, + "step": 7132500 + }, + { + "epoch": 4.28, + "learning_rate": 2.609515063952951e-05, + "loss": 1.1182, + "step": 7133000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6093050673968947e-05, + "loss": 1.1318, + "step": 7133500 + }, + { + "epoch": 4.28, + "learning_rate": 2.609095070840838e-05, + "loss": 1.0991, + "step": 7134000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6088850742847817e-05, + "loss": 1.1225, + "step": 7134500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6086750777287254e-05, + "loss": 1.1256, + "step": 7135000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6084659211588927e-05, + "loss": 1.0988, + "step": 7135500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6082559246028364e-05, + "loss": 1.121, + "step": 7136000 + }, + { + "epoch": 4.28, + "learning_rate": 2.60804592804678e-05, + "loss": 1.1223, + "step": 7136500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6078359314907235e-05, + "loss": 1.0989, + "step": 7137000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6076259349346668e-05, + "loss": 1.1025, + "step": 7137500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6074159383786105e-05, + "loss": 1.0884, + "step": 7138000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6072059418225542e-05, + "loss": 1.1295, + "step": 7138500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6069959452664975e-05, + "loss": 1.1365, + "step": 7139000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6067859487104412e-05, + "loss": 1.1073, + "step": 7139500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6065763721474966e-05, + "loss": 1.1087, + "step": 7140000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6063663755914402e-05, + "loss": 1.1231, + "step": 7140500 + }, + { + "epoch": 4.28, + "learning_rate": 2.6061563790353836e-05, + "loss": 1.1097, + "step": 7141000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6059463824793273e-05, + "loss": 1.1146, + "step": 7141500 + }, + { + "epoch": 4.28, + "learning_rate": 2.605736385923271e-05, + "loss": 1.1213, + "step": 7142000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6055268093603263e-05, + "loss": 1.1343, + "step": 7142500 + }, + { + "epoch": 4.28, + "learning_rate": 2.60531681280427e-05, + "loss": 1.0961, + "step": 7143000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6051068162482134e-05, + "loss": 1.0908, + "step": 7143500 + }, + { + "epoch": 4.28, + "learning_rate": 2.604896819692157e-05, + "loss": 1.1058, + "step": 7144000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6046868231361007e-05, + "loss": 1.1137, + "step": 7144500 + }, + { + "epoch": 4.28, + "learning_rate": 2.604477246573156e-05, + "loss": 1.104, + "step": 7145000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6042672500170998e-05, + "loss": 1.1273, + "step": 7145500 + }, + { + "epoch": 4.28, + "learning_rate": 2.604057253461043e-05, + "loss": 1.0898, + "step": 7146000 + }, + { + "epoch": 4.28, + "learning_rate": 2.6038472569049868e-05, + "loss": 1.1389, + "step": 7146500 + }, + { + "epoch": 4.28, + "learning_rate": 2.603637680342042e-05, + "loss": 1.0843, + "step": 7147000 + }, + { + "epoch": 4.29, + "learning_rate": 2.603427683785986e-05, + "loss": 1.1115, + "step": 7147500 + }, + { + "epoch": 4.29, + "learning_rate": 2.6032176872299292e-05, + "loss": 1.1248, + "step": 7148000 + }, + { + "epoch": 4.29, + "learning_rate": 2.603007690673873e-05, + "loss": 1.1257, + "step": 7148500 + }, + { + "epoch": 4.29, + "learning_rate": 2.6027976941178166e-05, + "loss": 1.0989, + "step": 7149000 + }, + { + "epoch": 4.29, + "learning_rate": 2.602588117554872e-05, + "loss": 1.1205, + "step": 7149500 + }, + { + "epoch": 4.29, + "learning_rate": 2.6023781209988156e-05, + "loss": 1.1033, + "step": 7150000 + }, + { + "epoch": 4.29, + "learning_rate": 2.6021685444358713e-05, + "loss": 1.1167, + "step": 7150500 + }, + { + "epoch": 4.29, + "learning_rate": 2.6019585478798146e-05, + "loss": 1.131, + "step": 7151000 + }, + { + "epoch": 4.29, + "learning_rate": 2.6017485513237583e-05, + "loss": 1.1109, + "step": 7151500 + }, + { + "epoch": 4.29, + "learning_rate": 2.6015385547677017e-05, + "loss": 1.1322, + "step": 7152000 + }, + { + "epoch": 4.29, + "learning_rate": 2.6013285582116454e-05, + "loss": 1.0956, + "step": 7152500 + }, + { + "epoch": 4.29, + "learning_rate": 2.6011185616555887e-05, + "loss": 1.1021, + "step": 7153000 + }, + { + "epoch": 4.29, + "learning_rate": 2.6009085650995324e-05, + "loss": 1.1128, + "step": 7153500 + }, + { + "epoch": 4.29, + "learning_rate": 2.600698568543476e-05, + "loss": 1.1478, + "step": 7154000 + }, + { + "epoch": 4.29, + "learning_rate": 2.6004894119736434e-05, + "loss": 1.1263, + "step": 7154500 + }, + { + "epoch": 4.29, + "learning_rate": 2.600279415417587e-05, + "loss": 1.0941, + "step": 7155000 + }, + { + "epoch": 4.29, + "learning_rate": 2.6000694188615308e-05, + "loss": 1.1104, + "step": 7155500 + }, + { + "epoch": 4.29, + "learning_rate": 2.599859422305474e-05, + "loss": 1.0944, + "step": 7156000 + }, + { + "epoch": 4.29, + "learning_rate": 2.599649425749418e-05, + "loss": 1.127, + "step": 7156500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5994394291933612e-05, + "loss": 1.1318, + "step": 7157000 + }, + { + "epoch": 4.29, + "learning_rate": 2.5992294326373045e-05, + "loss": 1.1257, + "step": 7157500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5990194360812482e-05, + "loss": 1.0964, + "step": 7158000 + }, + { + "epoch": 4.29, + "learning_rate": 2.598809439525192e-05, + "loss": 1.1121, + "step": 7158500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5985994429691352e-05, + "loss": 1.1153, + "step": 7159000 + }, + { + "epoch": 4.29, + "learning_rate": 2.598389866406191e-05, + "loss": 1.1282, + "step": 7159500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5981798698501343e-05, + "loss": 1.1259, + "step": 7160000 + }, + { + "epoch": 4.29, + "learning_rate": 2.597969873294078e-05, + "loss": 1.1077, + "step": 7160500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5977598767380217e-05, + "loss": 1.1383, + "step": 7161000 + }, + { + "epoch": 4.29, + "learning_rate": 2.597549880181965e-05, + "loss": 1.1084, + "step": 7161500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5973398836259083e-05, + "loss": 1.1256, + "step": 7162000 + }, + { + "epoch": 4.29, + "learning_rate": 2.597129887069852e-05, + "loss": 1.1252, + "step": 7162500 + }, + { + "epoch": 4.29, + "learning_rate": 2.5969203105069077e-05, + "loss": 1.1227, + "step": 7163000 + }, + { + "epoch": 4.29, + "learning_rate": 2.5967103139508514e-05, + "loss": 1.1211, + "step": 7163500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5965003173947948e-05, + "loss": 1.1389, + "step": 7164000 + }, + { + "epoch": 4.3, + "learning_rate": 2.596290320838738e-05, + "loss": 1.1343, + "step": 7164500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5960803242826818e-05, + "loss": 1.0913, + "step": 7165000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5958707477197375e-05, + "loss": 1.0989, + "step": 7165500 + }, + { + "epoch": 4.3, + "learning_rate": 2.595660751163681e-05, + "loss": 1.11, + "step": 7166000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5954507546076242e-05, + "loss": 1.1088, + "step": 7166500 + }, + { + "epoch": 4.3, + "learning_rate": 2.595240758051568e-05, + "loss": 1.0806, + "step": 7167000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5950307614955115e-05, + "loss": 1.1309, + "step": 7167500 + }, + { + "epoch": 4.3, + "learning_rate": 2.594820764939455e-05, + "loss": 1.0935, + "step": 7168000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5946107683833986e-05, + "loss": 1.1224, + "step": 7168500 + }, + { + "epoch": 4.3, + "learning_rate": 2.594401191820454e-05, + "loss": 1.1231, + "step": 7169000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5941916152575096e-05, + "loss": 1.1247, + "step": 7169500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5939816187014533e-05, + "loss": 1.1049, + "step": 7170000 + }, + { + "epoch": 4.3, + "learning_rate": 2.593771622145397e-05, + "loss": 1.0785, + "step": 7170500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5935616255893403e-05, + "loss": 1.1153, + "step": 7171000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5933516290332837e-05, + "loss": 1.0975, + "step": 7171500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5931416324772274e-05, + "loss": 1.1043, + "step": 7172000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5929316359211707e-05, + "loss": 1.1045, + "step": 7172500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5927216393651144e-05, + "loss": 1.1053, + "step": 7173000 + }, + { + "epoch": 4.3, + "learning_rate": 2.592511642809058e-05, + "loss": 1.1296, + "step": 7173500 + }, + { + "epoch": 4.3, + "learning_rate": 2.592301646253001e-05, + "loss": 1.1214, + "step": 7174000 + }, + { + "epoch": 4.3, + "learning_rate": 2.592092069690057e-05, + "loss": 1.1488, + "step": 7174500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5918820731340005e-05, + "loss": 1.1132, + "step": 7175000 + }, + { + "epoch": 4.3, + "learning_rate": 2.591672076577944e-05, + "loss": 1.125, + "step": 7175500 + }, + { + "epoch": 4.3, + "learning_rate": 2.591462080021888e-05, + "loss": 1.1143, + "step": 7176000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5912525034589432e-05, + "loss": 1.1227, + "step": 7176500 + }, + { + "epoch": 4.3, + "learning_rate": 2.5910425069028866e-05, + "loss": 1.0856, + "step": 7177000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5908325103468302e-05, + "loss": 1.0883, + "step": 7177500 + }, + { + "epoch": 4.3, + "learning_rate": 2.590622513790774e-05, + "loss": 1.1035, + "step": 7178000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5904129372278293e-05, + "loss": 1.1121, + "step": 7178500 + }, + { + "epoch": 4.3, + "learning_rate": 2.590202940671773e-05, + "loss": 1.1316, + "step": 7179000 + }, + { + "epoch": 4.3, + "learning_rate": 2.5899929441157163e-05, + "loss": 1.1198, + "step": 7179500 + }, + { + "epoch": 4.3, + "learning_rate": 2.58978294755966e-05, + "loss": 1.1203, + "step": 7180000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5895729510036037e-05, + "loss": 1.1103, + "step": 7180500 + }, + { + "epoch": 4.31, + "learning_rate": 2.589363374440659e-05, + "loss": 1.1084, + "step": 7181000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5891533778846027e-05, + "loss": 1.1404, + "step": 7181500 + }, + { + "epoch": 4.31, + "learning_rate": 2.588943381328546e-05, + "loss": 1.0821, + "step": 7182000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5887333847724898e-05, + "loss": 1.1035, + "step": 7182500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5885233882164334e-05, + "loss": 1.1023, + "step": 7183000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5883133916603764e-05, + "loss": 1.1039, + "step": 7183500 + }, + { + "epoch": 4.31, + "learning_rate": 2.58810339510432e-05, + "loss": 1.1137, + "step": 7184000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5878938185413758e-05, + "loss": 1.1212, + "step": 7184500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5876838219853195e-05, + "loss": 1.0997, + "step": 7185000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5874738254292632e-05, + "loss": 1.0874, + "step": 7185500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5872638288732062e-05, + "loss": 1.139, + "step": 7186000 + }, + { + "epoch": 4.31, + "learning_rate": 2.58705383231715e-05, + "loss": 1.1018, + "step": 7186500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5868438357610936e-05, + "loss": 1.1336, + "step": 7187000 + }, + { + "epoch": 4.31, + "learning_rate": 2.586633839205037e-05, + "loss": 1.1226, + "step": 7187500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5864242626420926e-05, + "loss": 1.1334, + "step": 7188000 + }, + { + "epoch": 4.31, + "learning_rate": 2.586214266086036e-05, + "loss": 1.1223, + "step": 7188500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5860042695299796e-05, + "loss": 1.1171, + "step": 7189000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5857942729739233e-05, + "loss": 1.1174, + "step": 7189500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5855842764178667e-05, + "loss": 1.1035, + "step": 7190000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5853746998549224e-05, + "loss": 1.1172, + "step": 7190500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5851647032988657e-05, + "loss": 1.1113, + "step": 7191000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5849547067428094e-05, + "loss": 1.1217, + "step": 7191500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5847447101867527e-05, + "loss": 1.1204, + "step": 7192000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5845347136306964e-05, + "loss": 1.1265, + "step": 7192500 + }, + { + "epoch": 4.31, + "learning_rate": 2.584325137067752e-05, + "loss": 1.0782, + "step": 7193000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5841151405116955e-05, + "loss": 1.1423, + "step": 7193500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5839055639487512e-05, + "loss": 1.1023, + "step": 7194000 + }, + { + "epoch": 4.31, + "learning_rate": 2.583695567392695e-05, + "loss": 1.1203, + "step": 7194500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5834855708366382e-05, + "loss": 1.0985, + "step": 7195000 + }, + { + "epoch": 4.31, + "learning_rate": 2.5832755742805815e-05, + "loss": 1.1226, + "step": 7195500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5830655777245252e-05, + "loss": 1.1571, + "step": 7196000 + }, + { + "epoch": 4.31, + "learning_rate": 2.582855581168469e-05, + "loss": 1.1139, + "step": 7196500 + }, + { + "epoch": 4.31, + "learning_rate": 2.5826455846124123e-05, + "loss": 1.0913, + "step": 7197000 + }, + { + "epoch": 4.32, + "learning_rate": 2.582435588056356e-05, + "loss": 1.0955, + "step": 7197500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5822260114934113e-05, + "loss": 1.0899, + "step": 7198000 + }, + { + "epoch": 4.32, + "learning_rate": 2.582016014937355e-05, + "loss": 1.1048, + "step": 7198500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5818060183812983e-05, + "loss": 1.1176, + "step": 7199000 + }, + { + "epoch": 4.32, + "learning_rate": 2.581596021825242e-05, + "loss": 1.1354, + "step": 7199500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5813860252691857e-05, + "loss": 1.1147, + "step": 7200000 + }, + { + "epoch": 4.32, + "eval_loss": 1.08935546875, + "eval_runtime": 1105.7679, + "eval_samples_per_second": 476.339, + "eval_steps_per_second": 79.39, + "step": 7200000 + }, + { + "epoch": 4.32, + "learning_rate": 2.581176448706241e-05, + "loss": 1.0939, + "step": 7200500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5809664521501847e-05, + "loss": 1.0863, + "step": 7201000 + }, + { + "epoch": 4.32, + "learning_rate": 2.580756455594128e-05, + "loss": 1.1042, + "step": 7201500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5805464590380718e-05, + "loss": 1.1076, + "step": 7202000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5803364624820155e-05, + "loss": 1.1468, + "step": 7202500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5801268859190708e-05, + "loss": 1.1102, + "step": 7203000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5799168893630145e-05, + "loss": 1.1118, + "step": 7203500 + }, + { + "epoch": 4.32, + "learning_rate": 2.579706892806958e-05, + "loss": 1.0949, + "step": 7204000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5794968962509015e-05, + "loss": 1.0806, + "step": 7204500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5792868996948452e-05, + "loss": 1.1055, + "step": 7205000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5790769031387882e-05, + "loss": 1.1324, + "step": 7205500 + }, + { + "epoch": 4.32, + "learning_rate": 2.578867326575844e-05, + "loss": 1.0903, + "step": 7206000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5786573300197876e-05, + "loss": 1.0703, + "step": 7206500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5784473334637313e-05, + "loss": 1.0941, + "step": 7207000 + }, + { + "epoch": 4.32, + "learning_rate": 2.578237336907675e-05, + "loss": 1.1246, + "step": 7207500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5780277603447303e-05, + "loss": 1.1361, + "step": 7208000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5778177637886737e-05, + "loss": 1.1168, + "step": 7208500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5776077672326174e-05, + "loss": 1.1109, + "step": 7209000 + }, + { + "epoch": 4.32, + "learning_rate": 2.577397770676561e-05, + "loss": 1.1167, + "step": 7209500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5771881941136164e-05, + "loss": 1.0936, + "step": 7210000 + }, + { + "epoch": 4.32, + "learning_rate": 2.57697819755756e-05, + "loss": 1.102, + "step": 7210500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5767686209946158e-05, + "loss": 1.1357, + "step": 7211000 + }, + { + "epoch": 4.32, + "learning_rate": 2.576558624438559e-05, + "loss": 1.1216, + "step": 7211500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5763486278825028e-05, + "loss": 1.1042, + "step": 7212000 + }, + { + "epoch": 4.32, + "learning_rate": 2.576138631326446e-05, + "loss": 1.104, + "step": 7212500 + }, + { + "epoch": 4.32, + "learning_rate": 2.5759286347703895e-05, + "loss": 1.1095, + "step": 7213000 + }, + { + "epoch": 4.32, + "learning_rate": 2.5757186382143332e-05, + "loss": 1.1179, + "step": 7213500 + }, + { + "epoch": 4.33, + "learning_rate": 2.575508641658277e-05, + "loss": 1.1132, + "step": 7214000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5752986451022206e-05, + "loss": 1.1242, + "step": 7214500 + }, + { + "epoch": 4.33, + "learning_rate": 2.575089068539276e-05, + "loss": 1.0941, + "step": 7215000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5748790719832193e-05, + "loss": 1.1283, + "step": 7215500 + }, + { + "epoch": 4.33, + "learning_rate": 2.574669495420275e-05, + "loss": 1.0878, + "step": 7216000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5744594988642186e-05, + "loss": 1.0854, + "step": 7216500 + }, + { + "epoch": 4.33, + "learning_rate": 2.574249502308162e-05, + "loss": 1.1257, + "step": 7217000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5740395057521057e-05, + "loss": 1.1298, + "step": 7217500 + }, + { + "epoch": 4.33, + "learning_rate": 2.573829509196049e-05, + "loss": 1.1046, + "step": 7218000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5736195126399927e-05, + "loss": 1.1189, + "step": 7218500 + }, + { + "epoch": 4.33, + "learning_rate": 2.5734095160839364e-05, + "loss": 1.1321, + "step": 7219000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5731995195278797e-05, + "loss": 1.1022, + "step": 7219500 + }, + { + "epoch": 4.33, + "learning_rate": 2.572989522971823e-05, + "loss": 1.0668, + "step": 7220000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5727795264157668e-05, + "loss": 1.1225, + "step": 7220500 + }, + { + "epoch": 4.33, + "learning_rate": 2.5725703698459345e-05, + "loss": 1.121, + "step": 7221000 + }, + { + "epoch": 4.33, + "learning_rate": 2.572360373289878e-05, + "loss": 1.0907, + "step": 7221500 + }, + { + "epoch": 4.33, + "learning_rate": 2.5721503767338215e-05, + "loss": 1.129, + "step": 7222000 + }, + { + "epoch": 4.33, + "learning_rate": 2.571940380177765e-05, + "loss": 1.091, + "step": 7222500 + }, + { + "epoch": 4.33, + "learning_rate": 2.5717308036148206e-05, + "loss": 1.1178, + "step": 7223000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5715208070587642e-05, + "loss": 1.099, + "step": 7223500 + }, + { + "epoch": 4.33, + "learning_rate": 2.571310810502708e-05, + "loss": 1.1031, + "step": 7224000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5711008139466513e-05, + "loss": 1.0976, + "step": 7224500 + }, + { + "epoch": 4.33, + "learning_rate": 2.5708908173905946e-05, + "loss": 1.1221, + "step": 7225000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5706808208345383e-05, + "loss": 1.1084, + "step": 7225500 + }, + { + "epoch": 4.33, + "learning_rate": 2.570470824278482e-05, + "loss": 1.1165, + "step": 7226000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5702608277224253e-05, + "loss": 1.1199, + "step": 7226500 + }, + { + "epoch": 4.33, + "learning_rate": 2.5700508311663687e-05, + "loss": 1.1324, + "step": 7227000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5698412546034244e-05, + "loss": 1.0912, + "step": 7227500 + }, + { + "epoch": 4.33, + "learning_rate": 2.569631258047368e-05, + "loss": 1.1159, + "step": 7228000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5694212614913117e-05, + "loss": 1.1319, + "step": 7228500 + }, + { + "epoch": 4.33, + "learning_rate": 2.569211264935255e-05, + "loss": 1.1102, + "step": 7229000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5690016883723104e-05, + "loss": 1.1082, + "step": 7229500 + }, + { + "epoch": 4.33, + "learning_rate": 2.568791691816254e-05, + "loss": 1.1164, + "step": 7230000 + }, + { + "epoch": 4.33, + "learning_rate": 2.5685816952601978e-05, + "loss": 1.0967, + "step": 7230500 + }, + { + "epoch": 4.34, + "learning_rate": 2.568371698704141e-05, + "loss": 1.1223, + "step": 7231000 + }, + { + "epoch": 4.34, + "learning_rate": 2.568162122141197e-05, + "loss": 1.1179, + "step": 7231500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5679521255851402e-05, + "loss": 1.0998, + "step": 7232000 + }, + { + "epoch": 4.34, + "learning_rate": 2.567742129029084e-05, + "loss": 1.1066, + "step": 7232500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5675321324730276e-05, + "loss": 1.1033, + "step": 7233000 + }, + { + "epoch": 4.34, + "learning_rate": 2.567322135916971e-05, + "loss": 1.1222, + "step": 7233500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5671125593540266e-05, + "loss": 1.0864, + "step": 7234000 + }, + { + "epoch": 4.34, + "learning_rate": 2.56690256279797e-05, + "loss": 1.0872, + "step": 7234500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5666925662419136e-05, + "loss": 1.1062, + "step": 7235000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5664825696858573e-05, + "loss": 1.1076, + "step": 7235500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5662725731298007e-05, + "loss": 1.1113, + "step": 7236000 + }, + { + "epoch": 4.34, + "learning_rate": 2.566062576573744e-05, + "loss": 1.1073, + "step": 7236500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5658525800176877e-05, + "loss": 1.0825, + "step": 7237000 + }, + { + "epoch": 4.34, + "learning_rate": 2.565642583461631e-05, + "loss": 1.0923, + "step": 7237500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5654330068986867e-05, + "loss": 1.0792, + "step": 7238000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5652230103426304e-05, + "loss": 1.1145, + "step": 7238500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5650134337796858e-05, + "loss": 1.0902, + "step": 7239000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5648034372236295e-05, + "loss": 1.0608, + "step": 7239500 + }, + { + "epoch": 4.34, + "learning_rate": 2.564593440667573e-05, + "loss": 1.1357, + "step": 7240000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5643834441115165e-05, + "loss": 1.0829, + "step": 7240500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5641734475554602e-05, + "loss": 1.0832, + "step": 7241000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5639634509994035e-05, + "loss": 1.1002, + "step": 7241500 + }, + { + "epoch": 4.34, + "learning_rate": 2.563753454443347e-05, + "loss": 1.121, + "step": 7242000 + }, + { + "epoch": 4.34, + "learning_rate": 2.563543877880403e-05, + "loss": 1.1202, + "step": 7242500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5633338813243463e-05, + "loss": 1.1058, + "step": 7243000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5631238847682896e-05, + "loss": 1.0994, + "step": 7243500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5629138882122333e-05, + "loss": 1.1311, + "step": 7244000 + }, + { + "epoch": 4.34, + "learning_rate": 2.5627038916561766e-05, + "loss": 1.1013, + "step": 7244500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5624943150932323e-05, + "loss": 1.0978, + "step": 7245000 + }, + { + "epoch": 4.34, + "learning_rate": 2.562284318537176e-05, + "loss": 1.0832, + "step": 7245500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5620743219811194e-05, + "loss": 1.1127, + "step": 7246000 + }, + { + "epoch": 4.34, + "learning_rate": 2.561864325425063e-05, + "loss": 1.1001, + "step": 7246500 + }, + { + "epoch": 4.34, + "learning_rate": 2.5616543288690064e-05, + "loss": 1.1032, + "step": 7247000 + }, + { + "epoch": 4.35, + "learning_rate": 2.56144433231295e-05, + "loss": 1.1024, + "step": 7247500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5612343357568938e-05, + "loss": 1.1298, + "step": 7248000 + }, + { + "epoch": 4.35, + "learning_rate": 2.561024759193949e-05, + "loss": 1.0967, + "step": 7248500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5608147626378925e-05, + "loss": 1.0992, + "step": 7249000 + }, + { + "epoch": 4.35, + "learning_rate": 2.560604766081836e-05, + "loss": 1.114, + "step": 7249500 + }, + { + "epoch": 4.35, + "learning_rate": 2.56039476952578e-05, + "loss": 1.1092, + "step": 7250000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5601847729697235e-05, + "loss": 1.1117, + "step": 7250500 + }, + { + "epoch": 4.35, + "learning_rate": 2.559975196406779e-05, + "loss": 1.1126, + "step": 7251000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5597651998507222e-05, + "loss": 1.1178, + "step": 7251500 + }, + { + "epoch": 4.35, + "learning_rate": 2.559555203294666e-05, + "loss": 1.1109, + "step": 7252000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5593452067386096e-05, + "loss": 1.1, + "step": 7252500 + }, + { + "epoch": 4.35, + "learning_rate": 2.559135210182553e-05, + "loss": 1.1209, + "step": 7253000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5589256336196086e-05, + "loss": 1.1151, + "step": 7253500 + }, + { + "epoch": 4.35, + "learning_rate": 2.558715637063552e-05, + "loss": 1.108, + "step": 7254000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5585056405074957e-05, + "loss": 1.1244, + "step": 7254500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5582960639445514e-05, + "loss": 1.1013, + "step": 7255000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5580860673884947e-05, + "loss": 1.1193, + "step": 7255500 + }, + { + "epoch": 4.35, + "learning_rate": 2.557876070832438e-05, + "loss": 1.1175, + "step": 7256000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5576660742763817e-05, + "loss": 1.0846, + "step": 7256500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5574560777203254e-05, + "loss": 1.1029, + "step": 7257000 + }, + { + "epoch": 4.35, + "learning_rate": 2.557246081164269e-05, + "loss": 1.1286, + "step": 7257500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5570360846082125e-05, + "loss": 1.1204, + "step": 7258000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5568260880521558e-05, + "loss": 1.1105, + "step": 7258500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5566160914960995e-05, + "loss": 1.0979, + "step": 7259000 + }, + { + "epoch": 4.35, + "learning_rate": 2.556406094940043e-05, + "loss": 1.1041, + "step": 7259500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5561960983839865e-05, + "loss": 1.1305, + "step": 7260000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5559861018279302e-05, + "loss": 1.0881, + "step": 7260500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5557765252649856e-05, + "loss": 1.0929, + "step": 7261000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5555669487020413e-05, + "loss": 1.1151, + "step": 7261500 + }, + { + "epoch": 4.35, + "learning_rate": 2.555356952145985e-05, + "loss": 1.1284, + "step": 7262000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5551469555899283e-05, + "loss": 1.1008, + "step": 7262500 + }, + { + "epoch": 4.35, + "learning_rate": 2.5549369590338716e-05, + "loss": 1.1324, + "step": 7263000 + }, + { + "epoch": 4.35, + "learning_rate": 2.5547273824709273e-05, + "loss": 1.1112, + "step": 7263500 + }, + { + "epoch": 4.36, + "learning_rate": 2.554517385914871e-05, + "loss": 1.0945, + "step": 7264000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5543073893588147e-05, + "loss": 1.101, + "step": 7264500 + }, + { + "epoch": 4.36, + "learning_rate": 2.554097392802758e-05, + "loss": 1.1016, + "step": 7265000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5538878162398134e-05, + "loss": 1.1243, + "step": 7265500 + }, + { + "epoch": 4.36, + "learning_rate": 2.553677819683757e-05, + "loss": 1.1311, + "step": 7266000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5534678231277008e-05, + "loss": 1.0794, + "step": 7266500 + }, + { + "epoch": 4.36, + "learning_rate": 2.553257826571644e-05, + "loss": 1.144, + "step": 7267000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5530478300155878e-05, + "loss": 1.1156, + "step": 7267500 + }, + { + "epoch": 4.36, + "learning_rate": 2.552837833459531e-05, + "loss": 1.1113, + "step": 7268000 + }, + { + "epoch": 4.36, + "learning_rate": 2.552627836903475e-05, + "loss": 1.1087, + "step": 7268500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5524182603405305e-05, + "loss": 1.1068, + "step": 7269000 + }, + { + "epoch": 4.36, + "learning_rate": 2.552208263784474e-05, + "loss": 1.0995, + "step": 7269500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5519982672284176e-05, + "loss": 1.1182, + "step": 7270000 + }, + { + "epoch": 4.36, + "learning_rate": 2.551788270672361e-05, + "loss": 1.112, + "step": 7270500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5515786941094166e-05, + "loss": 1.1108, + "step": 7271000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5513686975533603e-05, + "loss": 1.0922, + "step": 7271500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5511587009973036e-05, + "loss": 1.126, + "step": 7272000 + }, + { + "epoch": 4.36, + "learning_rate": 2.550948704441247e-05, + "loss": 1.129, + "step": 7272500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5507387078851907e-05, + "loss": 1.1068, + "step": 7273000 + }, + { + "epoch": 4.36, + "learning_rate": 2.550528711329134e-05, + "loss": 1.0905, + "step": 7273500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5503187147730777e-05, + "loss": 1.0995, + "step": 7274000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5501087182170214e-05, + "loss": 1.1117, + "step": 7274500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5498987216609647e-05, + "loss": 1.0853, + "step": 7275000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5496891450980204e-05, + "loss": 1.1162, + "step": 7275500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5494791485419638e-05, + "loss": 1.0862, + "step": 7276000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5492691519859075e-05, + "loss": 1.1542, + "step": 7276500 + }, + { + "epoch": 4.36, + "learning_rate": 2.549059155429851e-05, + "loss": 1.1012, + "step": 7277000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5488495788669065e-05, + "loss": 1.1075, + "step": 7277500 + }, + { + "epoch": 4.36, + "learning_rate": 2.54863958231085e-05, + "loss": 1.1146, + "step": 7278000 + }, + { + "epoch": 4.36, + "learning_rate": 2.5484295857547935e-05, + "loss": 1.1371, + "step": 7278500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5482195891987372e-05, + "loss": 1.1027, + "step": 7279000 + }, + { + "epoch": 4.36, + "learning_rate": 2.548009592642681e-05, + "loss": 1.1161, + "step": 7279500 + }, + { + "epoch": 4.36, + "learning_rate": 2.5478000160797363e-05, + "loss": 1.1137, + "step": 7280000 + }, + { + "epoch": 4.36, + "learning_rate": 2.547590439516792e-05, + "loss": 1.1091, + "step": 7280500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5473804429607353e-05, + "loss": 1.1058, + "step": 7281000 + }, + { + "epoch": 4.37, + "learning_rate": 2.547170446404679e-05, + "loss": 1.0876, + "step": 7281500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5469604498486223e-05, + "loss": 1.1165, + "step": 7282000 + }, + { + "epoch": 4.37, + "learning_rate": 2.546750453292566e-05, + "loss": 1.0986, + "step": 7282500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5465404567365094e-05, + "loss": 1.102, + "step": 7283000 + }, + { + "epoch": 4.37, + "learning_rate": 2.546330460180453e-05, + "loss": 1.1318, + "step": 7283500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5461204636243967e-05, + "loss": 1.1011, + "step": 7284000 + }, + { + "epoch": 4.37, + "learning_rate": 2.545910887061452e-05, + "loss": 1.1232, + "step": 7284500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5457008905053954e-05, + "loss": 1.1069, + "step": 7285000 + }, + { + "epoch": 4.37, + "learning_rate": 2.545490893949339e-05, + "loss": 1.1214, + "step": 7285500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5452808973932828e-05, + "loss": 1.0934, + "step": 7286000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5450713208303385e-05, + "loss": 1.114, + "step": 7286500 + }, + { + "epoch": 4.37, + "learning_rate": 2.544861324274282e-05, + "loss": 1.0969, + "step": 7287000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5446513277182252e-05, + "loss": 1.1192, + "step": 7287500 + }, + { + "epoch": 4.37, + "learning_rate": 2.544441331162169e-05, + "loss": 1.116, + "step": 7288000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5442313346061126e-05, + "loss": 1.0839, + "step": 7288500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5440217580431683e-05, + "loss": 1.1075, + "step": 7289000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5438117614871116e-05, + "loss": 1.0826, + "step": 7289500 + }, + { + "epoch": 4.37, + "learning_rate": 2.543601764931055e-05, + "loss": 1.1168, + "step": 7290000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5433917683749986e-05, + "loss": 1.0868, + "step": 7290500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5431817718189423e-05, + "loss": 1.1098, + "step": 7291000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5429721952559977e-05, + "loss": 1.118, + "step": 7291500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5427621986999414e-05, + "loss": 1.1099, + "step": 7292000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5425522021438847e-05, + "loss": 1.1078, + "step": 7292500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5423426255809404e-05, + "loss": 1.1029, + "step": 7293000 + }, + { + "epoch": 4.37, + "learning_rate": 2.542132629024884e-05, + "loss": 1.1181, + "step": 7293500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5419226324688274e-05, + "loss": 1.1287, + "step": 7294000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5417126359127708e-05, + "loss": 1.0935, + "step": 7294500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5415026393567145e-05, + "loss": 1.1032, + "step": 7295000 + }, + { + "epoch": 4.37, + "learning_rate": 2.541292642800658e-05, + "loss": 1.1156, + "step": 7295500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5410826462446015e-05, + "loss": 1.0906, + "step": 7296000 + }, + { + "epoch": 4.37, + "learning_rate": 2.5408726496885452e-05, + "loss": 1.1007, + "step": 7296500 + }, + { + "epoch": 4.37, + "learning_rate": 2.5406630731256005e-05, + "loss": 1.1069, + "step": 7297000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5404530765695442e-05, + "loss": 1.0979, + "step": 7297500 + }, + { + "epoch": 4.38, + "learning_rate": 2.540243080013488e-05, + "loss": 1.1156, + "step": 7298000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5400330834574312e-05, + "loss": 1.0998, + "step": 7298500 + }, + { + "epoch": 4.38, + "learning_rate": 2.539823086901375e-05, + "loss": 1.0841, + "step": 7299000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5396135103384303e-05, + "loss": 1.1056, + "step": 7299500 + }, + { + "epoch": 4.38, + "learning_rate": 2.539403513782374e-05, + "loss": 1.1086, + "step": 7300000 + }, + { + "epoch": 4.38, + "eval_loss": 1.0867536067962646, + "eval_runtime": 1112.0821, + "eval_samples_per_second": 473.634, + "eval_steps_per_second": 78.939, + "step": 7300000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5391935172263177e-05, + "loss": 1.1027, + "step": 7300500 + }, + { + "epoch": 4.38, + "learning_rate": 2.538983520670261e-05, + "loss": 1.1292, + "step": 7301000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5387735241142043e-05, + "loss": 1.0909, + "step": 7301500 + }, + { + "epoch": 4.38, + "learning_rate": 2.53856394755126e-05, + "loss": 1.1298, + "step": 7302000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5383539509952037e-05, + "loss": 1.1205, + "step": 7302500 + }, + { + "epoch": 4.38, + "learning_rate": 2.538143954439147e-05, + "loss": 1.111, + "step": 7303000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5379339578830908e-05, + "loss": 1.1242, + "step": 7303500 + }, + { + "epoch": 4.38, + "learning_rate": 2.537724381320146e-05, + "loss": 1.111, + "step": 7304000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5375143847640898e-05, + "loss": 1.1297, + "step": 7304500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5373043882080335e-05, + "loss": 1.1054, + "step": 7305000 + }, + { + "epoch": 4.38, + "learning_rate": 2.537094391651977e-05, + "loss": 1.0879, + "step": 7305500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5368843950959205e-05, + "loss": 1.1185, + "step": 7306000 + }, + { + "epoch": 4.38, + "learning_rate": 2.536674818532976e-05, + "loss": 1.1193, + "step": 7306500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5364648219769196e-05, + "loss": 1.1073, + "step": 7307000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5362548254208632e-05, + "loss": 1.1517, + "step": 7307500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5360448288648066e-05, + "loss": 1.1299, + "step": 7308000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5358348323087503e-05, + "loss": 1.1319, + "step": 7308500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5356252557458056e-05, + "loss": 1.1471, + "step": 7309000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5354152591897493e-05, + "loss": 1.125, + "step": 7309500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5352052626336927e-05, + "loss": 1.0912, + "step": 7310000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5349952660776363e-05, + "loss": 1.127, + "step": 7310500 + }, + { + "epoch": 4.38, + "learning_rate": 2.5347856895146917e-05, + "loss": 1.1254, + "step": 7311000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5345756929586354e-05, + "loss": 1.1055, + "step": 7311500 + }, + { + "epoch": 4.38, + "learning_rate": 2.534366116395691e-05, + "loss": 1.1177, + "step": 7312000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5341561198396348e-05, + "loss": 1.109, + "step": 7312500 + }, + { + "epoch": 4.38, + "learning_rate": 2.533946123283578e-05, + "loss": 1.1251, + "step": 7313000 + }, + { + "epoch": 4.38, + "learning_rate": 2.5337361267275215e-05, + "loss": 1.1033, + "step": 7313500 + }, + { + "epoch": 4.39, + "learning_rate": 2.533526130171465e-05, + "loss": 1.1078, + "step": 7314000 + }, + { + "epoch": 4.39, + "learning_rate": 2.533316133615409e-05, + "loss": 1.0762, + "step": 7314500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5331061370593522e-05, + "loss": 1.1059, + "step": 7315000 + }, + { + "epoch": 4.39, + "learning_rate": 2.532896140503296e-05, + "loss": 1.0803, + "step": 7315500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5326861439472392e-05, + "loss": 1.109, + "step": 7316000 + }, + { + "epoch": 4.39, + "learning_rate": 2.532476987377407e-05, + "loss": 1.102, + "step": 7316500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5322669908213506e-05, + "loss": 1.1054, + "step": 7317000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5320569942652943e-05, + "loss": 1.1003, + "step": 7317500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5318469977092373e-05, + "loss": 1.1035, + "step": 7318000 + }, + { + "epoch": 4.39, + "learning_rate": 2.531637001153181e-05, + "loss": 1.1, + "step": 7318500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5314270045971247e-05, + "loss": 1.1141, + "step": 7319000 + }, + { + "epoch": 4.39, + "learning_rate": 2.531217008041068e-05, + "loss": 1.1169, + "step": 7319500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5310070114850117e-05, + "loss": 1.1102, + "step": 7320000 + }, + { + "epoch": 4.39, + "learning_rate": 2.530797434922067e-05, + "loss": 1.1213, + "step": 7320500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5305874383660107e-05, + "loss": 1.1191, + "step": 7321000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5303774418099544e-05, + "loss": 1.1212, + "step": 7321500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5301674452538978e-05, + "loss": 1.1067, + "step": 7322000 + }, + { + "epoch": 4.39, + "learning_rate": 2.529957868690953e-05, + "loss": 1.088, + "step": 7322500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5297478721348968e-05, + "loss": 1.1512, + "step": 7323000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5295378755788405e-05, + "loss": 1.1175, + "step": 7323500 + }, + { + "epoch": 4.39, + "learning_rate": 2.529327879022784e-05, + "loss": 1.1244, + "step": 7324000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5291178824667275e-05, + "loss": 1.0932, + "step": 7324500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5289078859106712e-05, + "loss": 1.076, + "step": 7325000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5286978893546146e-05, + "loss": 1.119, + "step": 7325500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5284883127916703e-05, + "loss": 1.1184, + "step": 7326000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5282783162356136e-05, + "loss": 1.1155, + "step": 7326500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5280683196795573e-05, + "loss": 1.112, + "step": 7327000 + }, + { + "epoch": 4.39, + "learning_rate": 2.527858323123501e-05, + "loss": 1.0981, + "step": 7327500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5276483265674443e-05, + "loss": 1.1161, + "step": 7328000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5274383300113877e-05, + "loss": 1.1013, + "step": 7328500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5272283334553313e-05, + "loss": 1.0909, + "step": 7329000 + }, + { + "epoch": 4.39, + "learning_rate": 2.527018336899275e-05, + "loss": 1.0878, + "step": 7329500 + }, + { + "epoch": 4.39, + "learning_rate": 2.5268083403432184e-05, + "loss": 1.1267, + "step": 7330000 + }, + { + "epoch": 4.39, + "learning_rate": 2.5265987637802737e-05, + "loss": 1.094, + "step": 7330500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5263887672242174e-05, + "loss": 1.1051, + "step": 7331000 + }, + { + "epoch": 4.4, + "learning_rate": 2.526178770668161e-05, + "loss": 1.1039, + "step": 7331500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5259687741121044e-05, + "loss": 1.1003, + "step": 7332000 + }, + { + "epoch": 4.4, + "learning_rate": 2.525758777556048e-05, + "loss": 1.1103, + "step": 7332500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5255492009931035e-05, + "loss": 1.1141, + "step": 7333000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5253392044370472e-05, + "loss": 1.0989, + "step": 7333500 + }, + { + "epoch": 4.4, + "learning_rate": 2.525129207880991e-05, + "loss": 1.1163, + "step": 7334000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5249192113249342e-05, + "loss": 1.1062, + "step": 7334500 + }, + { + "epoch": 4.4, + "learning_rate": 2.52470963476199e-05, + "loss": 1.1212, + "step": 7335000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5244996382059332e-05, + "loss": 1.1168, + "step": 7335500 + }, + { + "epoch": 4.4, + "learning_rate": 2.524289641649877e-05, + "loss": 1.1292, + "step": 7336000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5240796450938206e-05, + "loss": 1.1312, + "step": 7336500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5238700685308763e-05, + "loss": 1.0961, + "step": 7337000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5236600719748193e-05, + "loss": 1.1217, + "step": 7337500 + }, + { + "epoch": 4.4, + "learning_rate": 2.523450075418763e-05, + "loss": 1.1153, + "step": 7338000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5232400788627067e-05, + "loss": 1.0897, + "step": 7338500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5230305022997624e-05, + "loss": 1.113, + "step": 7339000 + }, + { + "epoch": 4.4, + "learning_rate": 2.522820505743706e-05, + "loss": 1.1044, + "step": 7339500 + }, + { + "epoch": 4.4, + "learning_rate": 2.522610509187649e-05, + "loss": 1.1138, + "step": 7340000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5224005126315928e-05, + "loss": 1.1104, + "step": 7340500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5221905160755364e-05, + "loss": 1.1072, + "step": 7341000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5219805195194798e-05, + "loss": 1.103, + "step": 7341500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5217709429565355e-05, + "loss": 1.0897, + "step": 7342000 + }, + { + "epoch": 4.4, + "learning_rate": 2.521560946400479e-05, + "loss": 1.1209, + "step": 7342500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5213509498444225e-05, + "loss": 1.1061, + "step": 7343000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5211409532883662e-05, + "loss": 1.0803, + "step": 7343500 + }, + { + "epoch": 4.4, + "learning_rate": 2.520931376725422e-05, + "loss": 1.1124, + "step": 7344000 + }, + { + "epoch": 4.4, + "learning_rate": 2.520721380169365e-05, + "loss": 1.1188, + "step": 7344500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5205113836133086e-05, + "loss": 1.1097, + "step": 7345000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5203013870572523e-05, + "loss": 1.1151, + "step": 7345500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5200913905011956e-05, + "loss": 1.109, + "step": 7346000 + }, + { + "epoch": 4.4, + "learning_rate": 2.5198818139382517e-05, + "loss": 1.1158, + "step": 7346500 + }, + { + "epoch": 4.4, + "learning_rate": 2.5196718173821947e-05, + "loss": 1.0797, + "step": 7347000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5194618208261383e-05, + "loss": 1.0864, + "step": 7347500 + }, + { + "epoch": 4.41, + "learning_rate": 2.519251824270082e-05, + "loss": 1.082, + "step": 7348000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5190418277140254e-05, + "loss": 1.1029, + "step": 7348500 + }, + { + "epoch": 4.41, + "learning_rate": 2.518832251151081e-05, + "loss": 1.1087, + "step": 7349000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5186226745881368e-05, + "loss": 1.1211, + "step": 7349500 + }, + { + "epoch": 4.41, + "learning_rate": 2.51841267803208e-05, + "loss": 1.0941, + "step": 7350000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5182026814760238e-05, + "loss": 1.1183, + "step": 7350500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5179926849199675e-05, + "loss": 1.1007, + "step": 7351000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5177826883639105e-05, + "loss": 1.1182, + "step": 7351500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5175726918078542e-05, + "loss": 1.1318, + "step": 7352000 + }, + { + "epoch": 4.41, + "learning_rate": 2.517362695251798e-05, + "loss": 1.0863, + "step": 7352500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5171526986957412e-05, + "loss": 1.0963, + "step": 7353000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5169431221327972e-05, + "loss": 1.0835, + "step": 7353500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5167331255767403e-05, + "loss": 1.1111, + "step": 7354000 + }, + { + "epoch": 4.41, + "learning_rate": 2.516523129020684e-05, + "loss": 1.115, + "step": 7354500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5163131324646276e-05, + "loss": 1.1063, + "step": 7355000 + }, + { + "epoch": 4.41, + "learning_rate": 2.516103135908571e-05, + "loss": 1.0978, + "step": 7355500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5158931393525147e-05, + "loss": 1.0936, + "step": 7356000 + }, + { + "epoch": 4.41, + "learning_rate": 2.51568356278957e-05, + "loss": 1.103, + "step": 7356500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5154735662335137e-05, + "loss": 1.1147, + "step": 7357000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5152635696774574e-05, + "loss": 1.1123, + "step": 7357500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5150535731214007e-05, + "loss": 1.0867, + "step": 7358000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5148435765653444e-05, + "loss": 1.1195, + "step": 7358500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5146340000023998e-05, + "loss": 1.1041, + "step": 7359000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5144240034463435e-05, + "loss": 1.1151, + "step": 7359500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5142140068902868e-05, + "loss": 1.0806, + "step": 7360000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5140040103342305e-05, + "loss": 1.1394, + "step": 7360500 + }, + { + "epoch": 4.41, + "learning_rate": 2.513794433771286e-05, + "loss": 1.1145, + "step": 7361000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5135844372152295e-05, + "loss": 1.0833, + "step": 7361500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5133744406591732e-05, + "loss": 1.1202, + "step": 7362000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5131644441031166e-05, + "loss": 1.125, + "step": 7362500 + }, + { + "epoch": 4.41, + "learning_rate": 2.5129544475470602e-05, + "loss": 1.1085, + "step": 7363000 + }, + { + "epoch": 4.41, + "learning_rate": 2.5127448709841156e-05, + "loss": 1.1054, + "step": 7363500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5125348744280593e-05, + "loss": 1.1118, + "step": 7364000 + }, + { + "epoch": 4.42, + "learning_rate": 2.512324877872003e-05, + "loss": 1.1111, + "step": 7364500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5121148813159463e-05, + "loss": 1.1166, + "step": 7365000 + }, + { + "epoch": 4.42, + "learning_rate": 2.511905304753002e-05, + "loss": 1.1017, + "step": 7365500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5116953081969454e-05, + "loss": 1.112, + "step": 7366000 + }, + { + "epoch": 4.42, + "learning_rate": 2.511485311640889e-05, + "loss": 1.1192, + "step": 7366500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5112753150848327e-05, + "loss": 1.0884, + "step": 7367000 + }, + { + "epoch": 4.42, + "learning_rate": 2.511065318528776e-05, + "loss": 1.1157, + "step": 7367500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5108553219727198e-05, + "loss": 1.0887, + "step": 7368000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5106453254166634e-05, + "loss": 1.0926, + "step": 7368500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5104357488537188e-05, + "loss": 1.1522, + "step": 7369000 + }, + { + "epoch": 4.42, + "learning_rate": 2.510225752297662e-05, + "loss": 1.1342, + "step": 7369500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5100157557416058e-05, + "loss": 1.1415, + "step": 7370000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5098057591855495e-05, + "loss": 1.1161, + "step": 7370500 + }, + { + "epoch": 4.42, + "learning_rate": 2.509596182622605e-05, + "loss": 1.1189, + "step": 7371000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5093861860665486e-05, + "loss": 1.1043, + "step": 7371500 + }, + { + "epoch": 4.42, + "learning_rate": 2.509176189510492e-05, + "loss": 1.1184, + "step": 7372000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5089661929544356e-05, + "loss": 1.1045, + "step": 7372500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5087561963983793e-05, + "loss": 1.0795, + "step": 7373000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5085466198354346e-05, + "loss": 1.0863, + "step": 7373500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5083370432724903e-05, + "loss": 1.1085, + "step": 7374000 + }, + { + "epoch": 4.42, + "learning_rate": 2.508127046716434e-05, + "loss": 1.0946, + "step": 7374500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5079170501603774e-05, + "loss": 1.0934, + "step": 7375000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5077070536043207e-05, + "loss": 1.0952, + "step": 7375500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5074970570482644e-05, + "loss": 1.0987, + "step": 7376000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5072870604922077e-05, + "loss": 1.1103, + "step": 7376500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5070770639361514e-05, + "loss": 1.0898, + "step": 7377000 + }, + { + "epoch": 4.42, + "learning_rate": 2.506867067380095e-05, + "loss": 1.0979, + "step": 7377500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5066574908171505e-05, + "loss": 1.1216, + "step": 7378000 + }, + { + "epoch": 4.42, + "learning_rate": 2.506447494261094e-05, + "loss": 1.093, + "step": 7378500 + }, + { + "epoch": 4.42, + "learning_rate": 2.5062374977050375e-05, + "loss": 1.1252, + "step": 7379000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5060275011489812e-05, + "loss": 1.1201, + "step": 7379500 + }, + { + "epoch": 4.42, + "learning_rate": 2.505817504592925e-05, + "loss": 1.0984, + "step": 7380000 + }, + { + "epoch": 4.42, + "learning_rate": 2.5056079280299802e-05, + "loss": 1.1139, + "step": 7380500 + }, + { + "epoch": 4.43, + "learning_rate": 2.505397931473924e-05, + "loss": 1.1305, + "step": 7381000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5051879349178672e-05, + "loss": 1.1204, + "step": 7381500 + }, + { + "epoch": 4.43, + "learning_rate": 2.504978358354923e-05, + "loss": 1.0999, + "step": 7382000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5047683617988663e-05, + "loss": 1.1226, + "step": 7382500 + }, + { + "epoch": 4.43, + "learning_rate": 2.50455836524281e-05, + "loss": 1.0772, + "step": 7383000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5043483686867533e-05, + "loss": 1.1295, + "step": 7383500 + }, + { + "epoch": 4.43, + "learning_rate": 2.504138372130697e-05, + "loss": 1.1331, + "step": 7384000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5039283755746407e-05, + "loss": 1.1122, + "step": 7384500 + }, + { + "epoch": 4.43, + "learning_rate": 2.503718379018584e-05, + "loss": 1.1019, + "step": 7385000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5035083824625274e-05, + "loss": 1.1132, + "step": 7385500 + }, + { + "epoch": 4.43, + "learning_rate": 2.503298805899583e-05, + "loss": 1.111, + "step": 7386000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5030888093435268e-05, + "loss": 1.1012, + "step": 7386500 + }, + { + "epoch": 4.43, + "learning_rate": 2.5028788127874704e-05, + "loss": 1.1527, + "step": 7387000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5026688162314138e-05, + "loss": 1.0956, + "step": 7387500 + }, + { + "epoch": 4.43, + "learning_rate": 2.502458819675357e-05, + "loss": 1.1088, + "step": 7388000 + }, + { + "epoch": 4.43, + "learning_rate": 2.502249243112413e-05, + "loss": 1.0897, + "step": 7388500 + }, + { + "epoch": 4.43, + "learning_rate": 2.5020392465563565e-05, + "loss": 1.1157, + "step": 7389000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5018292500003002e-05, + "loss": 1.1173, + "step": 7389500 + }, + { + "epoch": 4.43, + "learning_rate": 2.5016192534442432e-05, + "loss": 1.097, + "step": 7390000 + }, + { + "epoch": 4.43, + "learning_rate": 2.501409676881299e-05, + "loss": 1.1103, + "step": 7390500 + }, + { + "epoch": 4.43, + "learning_rate": 2.5011996803252426e-05, + "loss": 1.101, + "step": 7391000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5009896837691863e-05, + "loss": 1.1038, + "step": 7391500 + }, + { + "epoch": 4.43, + "learning_rate": 2.5007796872131296e-05, + "loss": 1.1001, + "step": 7392000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5005701106501853e-05, + "loss": 1.1277, + "step": 7392500 + }, + { + "epoch": 4.43, + "learning_rate": 2.5003601140941287e-05, + "loss": 1.0846, + "step": 7393000 + }, + { + "epoch": 4.43, + "learning_rate": 2.5001501175380723e-05, + "loss": 1.0849, + "step": 7393500 + }, + { + "epoch": 4.43, + "learning_rate": 2.499940120982016e-05, + "loss": 1.0944, + "step": 7394000 + }, + { + "epoch": 4.43, + "learning_rate": 2.4997301244259594e-05, + "loss": 1.1146, + "step": 7394500 + }, + { + "epoch": 4.43, + "learning_rate": 2.499520547863015e-05, + "loss": 1.1269, + "step": 7395000 + }, + { + "epoch": 4.43, + "learning_rate": 2.4993105513069584e-05, + "loss": 1.1046, + "step": 7395500 + }, + { + "epoch": 4.43, + "learning_rate": 2.499100554750902e-05, + "loss": 1.1168, + "step": 7396000 + }, + { + "epoch": 4.43, + "learning_rate": 2.4988905581948458e-05, + "loss": 1.0968, + "step": 7396500 + }, + { + "epoch": 4.43, + "learning_rate": 2.498680561638789e-05, + "loss": 1.0907, + "step": 7397000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4984709850758445e-05, + "loss": 1.1242, + "step": 7397500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4982609885197882e-05, + "loss": 1.1166, + "step": 7398000 + }, + { + "epoch": 4.44, + "learning_rate": 2.498050991963732e-05, + "loss": 1.1002, + "step": 7398500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4978409954076752e-05, + "loss": 1.1077, + "step": 7399000 + }, + { + "epoch": 4.44, + "learning_rate": 2.497631418844731e-05, + "loss": 1.1191, + "step": 7399500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4974214222886743e-05, + "loss": 1.0956, + "step": 7400000 + }, + { + "epoch": 4.44, + "eval_loss": 1.0819727182388306, + "eval_runtime": 1101.2934, + "eval_samples_per_second": 478.274, + "eval_steps_per_second": 79.713, + "step": 7400000 + }, + { + "epoch": 4.44, + "learning_rate": 2.497211425732618e-05, + "loss": 1.1036, + "step": 7400500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4970014291765616e-05, + "loss": 1.0922, + "step": 7401000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4967918526136173e-05, + "loss": 1.1333, + "step": 7401500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4965818560575607e-05, + "loss": 1.1308, + "step": 7402000 + }, + { + "epoch": 4.44, + "learning_rate": 2.496371859501504e-05, + "loss": 1.1089, + "step": 7402500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4961618629454477e-05, + "loss": 1.0969, + "step": 7403000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4959518663893914e-05, + "loss": 1.1031, + "step": 7403500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4957422898264467e-05, + "loss": 1.115, + "step": 7404000 + }, + { + "epoch": 4.44, + "learning_rate": 2.49553229327039e-05, + "loss": 1.1123, + "step": 7404500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4953222967143338e-05, + "loss": 1.1056, + "step": 7405000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4951123001582775e-05, + "loss": 1.0957, + "step": 7405500 + }, + { + "epoch": 4.44, + "learning_rate": 2.494902303602221e-05, + "loss": 1.1064, + "step": 7406000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4946927270392765e-05, + "loss": 1.1157, + "step": 7406500 + }, + { + "epoch": 4.44, + "learning_rate": 2.49448273048322e-05, + "loss": 1.1099, + "step": 7407000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4942727339271635e-05, + "loss": 1.0938, + "step": 7407500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4940627373711072e-05, + "loss": 1.0929, + "step": 7408000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4938527408150506e-05, + "loss": 1.0948, + "step": 7408500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4936427442589942e-05, + "loss": 1.1184, + "step": 7409000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4934331676960496e-05, + "loss": 1.124, + "step": 7409500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4932231711399933e-05, + "loss": 1.1297, + "step": 7410000 + }, + { + "epoch": 4.44, + "learning_rate": 2.493013174583937e-05, + "loss": 1.1004, + "step": 7410500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4928031780278803e-05, + "loss": 1.0813, + "step": 7411000 + }, + { + "epoch": 4.44, + "learning_rate": 2.4925936014649357e-05, + "loss": 1.0981, + "step": 7411500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4923836049088794e-05, + "loss": 1.0896, + "step": 7412000 + }, + { + "epoch": 4.44, + "learning_rate": 2.492173608352823e-05, + "loss": 1.1012, + "step": 7412500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4919636117967667e-05, + "loss": 1.1075, + "step": 7413000 + }, + { + "epoch": 4.44, + "learning_rate": 2.49175361524071e-05, + "loss": 1.1104, + "step": 7413500 + }, + { + "epoch": 4.44, + "learning_rate": 2.4915436186846534e-05, + "loss": 1.1289, + "step": 7414000 + }, + { + "epoch": 4.45, + "learning_rate": 2.491333622128597e-05, + "loss": 1.1326, + "step": 7414500 + }, + { + "epoch": 4.45, + "learning_rate": 2.4911240455656528e-05, + "loss": 1.1019, + "step": 7415000 + }, + { + "epoch": 4.45, + "learning_rate": 2.490914049009596e-05, + "loss": 1.1398, + "step": 7415500 + }, + { + "epoch": 4.45, + "learning_rate": 2.4907040524535398e-05, + "loss": 1.1104, + "step": 7416000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4904940558974832e-05, + "loss": 1.0763, + "step": 7416500 + }, + { + "epoch": 4.45, + "learning_rate": 2.490284059341427e-05, + "loss": 1.1, + "step": 7417000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4900744827784826e-05, + "loss": 1.0805, + "step": 7417500 + }, + { + "epoch": 4.45, + "learning_rate": 2.489864486222426e-05, + "loss": 1.0974, + "step": 7418000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4896549096594813e-05, + "loss": 1.067, + "step": 7418500 + }, + { + "epoch": 4.45, + "learning_rate": 2.489444913103425e-05, + "loss": 1.087, + "step": 7419000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4892349165473686e-05, + "loss": 1.1075, + "step": 7419500 + }, + { + "epoch": 4.45, + "learning_rate": 2.4890249199913123e-05, + "loss": 1.1312, + "step": 7420000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4888149234352557e-05, + "loss": 1.1226, + "step": 7420500 + }, + { + "epoch": 4.45, + "learning_rate": 2.488604926879199e-05, + "loss": 1.1121, + "step": 7421000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4883949303231427e-05, + "loss": 1.0908, + "step": 7421500 + }, + { + "epoch": 4.45, + "learning_rate": 2.488184933767086e-05, + "loss": 1.1004, + "step": 7422000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4879749372110297e-05, + "loss": 1.1191, + "step": 7422500 + }, + { + "epoch": 4.45, + "learning_rate": 2.4877649406549734e-05, + "loss": 1.1159, + "step": 7423000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4875549440989167e-05, + "loss": 1.1039, + "step": 7423500 + }, + { + "epoch": 4.45, + "learning_rate": 2.48734494754286e-05, + "loss": 1.101, + "step": 7424000 + }, + { + "epoch": 4.45, + "learning_rate": 2.487135790973028e-05, + "loss": 1.1022, + "step": 7424500 + }, + { + "epoch": 4.45, + "learning_rate": 2.4869257944169715e-05, + "loss": 1.0754, + "step": 7425000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4867157978609152e-05, + "loss": 1.1119, + "step": 7425500 + }, + { + "epoch": 4.45, + "learning_rate": 2.4865058013048585e-05, + "loss": 1.1031, + "step": 7426000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4862962247419142e-05, + "loss": 1.123, + "step": 7426500 + }, + { + "epoch": 4.45, + "learning_rate": 2.486086228185858e-05, + "loss": 1.0838, + "step": 7427000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4858762316298012e-05, + "loss": 1.1071, + "step": 7427500 + }, + { + "epoch": 4.45, + "learning_rate": 2.485666235073745e-05, + "loss": 1.103, + "step": 7428000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4854566585108003e-05, + "loss": 1.1402, + "step": 7428500 + }, + { + "epoch": 4.45, + "learning_rate": 2.485246661954744e-05, + "loss": 1.123, + "step": 7429000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4850366653986873e-05, + "loss": 1.1218, + "step": 7429500 + }, + { + "epoch": 4.45, + "learning_rate": 2.484826668842631e-05, + "loss": 1.1101, + "step": 7430000 + }, + { + "epoch": 4.45, + "learning_rate": 2.4846170922796864e-05, + "loss": 1.1123, + "step": 7430500 + }, + { + "epoch": 4.46, + "learning_rate": 2.48440709572363e-05, + "loss": 1.1118, + "step": 7431000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4841970991675737e-05, + "loss": 1.1152, + "step": 7431500 + }, + { + "epoch": 4.46, + "learning_rate": 2.483987102611517e-05, + "loss": 1.1393, + "step": 7432000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4837771060554608e-05, + "loss": 1.1377, + "step": 7432500 + }, + { + "epoch": 4.46, + "learning_rate": 2.483567529492516e-05, + "loss": 1.1108, + "step": 7433000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4833575329364598e-05, + "loss": 1.0918, + "step": 7433500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4831475363804035e-05, + "loss": 1.1175, + "step": 7434000 + }, + { + "epoch": 4.46, + "learning_rate": 2.482937539824347e-05, + "loss": 1.107, + "step": 7434500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4827275432682905e-05, + "loss": 1.1054, + "step": 7435000 + }, + { + "epoch": 4.46, + "learning_rate": 2.482517966705346e-05, + "loss": 1.1446, + "step": 7435500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4823079701492896e-05, + "loss": 1.0919, + "step": 7436000 + }, + { + "epoch": 4.46, + "learning_rate": 2.482097973593233e-05, + "loss": 1.0988, + "step": 7436500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4818879770371766e-05, + "loss": 1.0911, + "step": 7437000 + }, + { + "epoch": 4.46, + "learning_rate": 2.481678400474232e-05, + "loss": 1.1106, + "step": 7437500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4814684039181756e-05, + "loss": 1.0979, + "step": 7438000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4812584073621193e-05, + "loss": 1.0853, + "step": 7438500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4810484108060627e-05, + "loss": 1.082, + "step": 7439000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4808384142500063e-05, + "loss": 1.1189, + "step": 7439500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4806288376870617e-05, + "loss": 1.1123, + "step": 7440000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4804188411310054e-05, + "loss": 1.114, + "step": 7440500 + }, + { + "epoch": 4.46, + "learning_rate": 2.480208844574949e-05, + "loss": 1.0859, + "step": 7441000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4799988480188924e-05, + "loss": 1.1041, + "step": 7441500 + }, + { + "epoch": 4.46, + "learning_rate": 2.479788851462836e-05, + "loss": 1.1015, + "step": 7442000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4795792748998915e-05, + "loss": 1.084, + "step": 7442500 + }, + { + "epoch": 4.46, + "learning_rate": 2.479369278343835e-05, + "loss": 1.0923, + "step": 7443000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4791592817877785e-05, + "loss": 1.1248, + "step": 7443500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4789492852317222e-05, + "loss": 1.0863, + "step": 7444000 + }, + { + "epoch": 4.46, + "learning_rate": 2.4787397086687775e-05, + "loss": 1.1038, + "step": 7444500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4785297121127212e-05, + "loss": 1.095, + "step": 7445000 + }, + { + "epoch": 4.46, + "learning_rate": 2.478319715556665e-05, + "loss": 1.0976, + "step": 7445500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4781097190006083e-05, + "loss": 1.1227, + "step": 7446000 + }, + { + "epoch": 4.46, + "learning_rate": 2.477899722444552e-05, + "loss": 1.0986, + "step": 7446500 + }, + { + "epoch": 4.46, + "learning_rate": 2.4776897258884956e-05, + "loss": 1.0902, + "step": 7447000 + }, + { + "epoch": 4.47, + "learning_rate": 2.477480149325551e-05, + "loss": 1.0904, + "step": 7447500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4772701527694947e-05, + "loss": 1.0893, + "step": 7448000 + }, + { + "epoch": 4.47, + "learning_rate": 2.477060156213438e-05, + "loss": 1.1108, + "step": 7448500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4768501596573817e-05, + "loss": 1.0984, + "step": 7449000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4766401631013254e-05, + "loss": 1.1063, + "step": 7449500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4764305865383807e-05, + "loss": 1.1217, + "step": 7450000 + }, + { + "epoch": 4.47, + "learning_rate": 2.476220589982324e-05, + "loss": 1.1006, + "step": 7450500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4760105934262678e-05, + "loss": 1.0982, + "step": 7451000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4758005968702115e-05, + "loss": 1.1141, + "step": 7451500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4755906003141548e-05, + "loss": 1.1104, + "step": 7452000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4753810237512105e-05, + "loss": 1.1056, + "step": 7452500 + }, + { + "epoch": 4.47, + "learning_rate": 2.475171027195154e-05, + "loss": 1.1154, + "step": 7453000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4749610306390975e-05, + "loss": 1.1071, + "step": 7453500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4747510340830412e-05, + "loss": 1.1272, + "step": 7454000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4745414575200966e-05, + "loss": 1.1293, + "step": 7454500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4743314609640403e-05, + "loss": 1.0934, + "step": 7455000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4741214644079836e-05, + "loss": 1.135, + "step": 7455500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4739114678519273e-05, + "loss": 1.0855, + "step": 7456000 + }, + { + "epoch": 4.47, + "learning_rate": 2.473701471295871e-05, + "loss": 1.1227, + "step": 7456500 + }, + { + "epoch": 4.47, + "learning_rate": 2.473491474739814e-05, + "loss": 1.1052, + "step": 7457000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4732814781837577e-05, + "loss": 1.1244, + "step": 7457500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4730719016208134e-05, + "loss": 1.1011, + "step": 7458000 + }, + { + "epoch": 4.47, + "learning_rate": 2.472861905064757e-05, + "loss": 1.1125, + "step": 7458500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4726519085087007e-05, + "loss": 1.0989, + "step": 7459000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4724419119526437e-05, + "loss": 1.1215, + "step": 7459500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4722319153965874e-05, + "loss": 1.0873, + "step": 7460000 + }, + { + "epoch": 4.47, + "learning_rate": 2.472022338833643e-05, + "loss": 1.1106, + "step": 7460500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4718123422775868e-05, + "loss": 1.096, + "step": 7461000 + }, + { + "epoch": 4.47, + "learning_rate": 2.47160234572153e-05, + "loss": 1.0873, + "step": 7461500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4713923491654735e-05, + "loss": 1.1038, + "step": 7462000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4711823526094172e-05, + "loss": 1.0957, + "step": 7462500 + }, + { + "epoch": 4.47, + "learning_rate": 2.470972776046473e-05, + "loss": 1.0892, + "step": 7463000 + }, + { + "epoch": 4.47, + "learning_rate": 2.4707627794904166e-05, + "loss": 1.1252, + "step": 7463500 + }, + { + "epoch": 4.47, + "learning_rate": 2.4705527829343596e-05, + "loss": 1.1212, + "step": 7464000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4703427863783032e-05, + "loss": 1.1256, + "step": 7464500 + }, + { + "epoch": 4.48, + "learning_rate": 2.470132789822247e-05, + "loss": 1.1171, + "step": 7465000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4699227932661903e-05, + "loss": 1.0859, + "step": 7465500 + }, + { + "epoch": 4.48, + "learning_rate": 2.469712796710134e-05, + "loss": 1.1185, + "step": 7466000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4695028001540776e-05, + "loss": 1.1007, + "step": 7466500 + }, + { + "epoch": 4.48, + "learning_rate": 2.469293223591133e-05, + "loss": 1.0856, + "step": 7467000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4690832270350767e-05, + "loss": 1.0976, + "step": 7467500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4688736504721324e-05, + "loss": 1.1136, + "step": 7468000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4686636539160757e-05, + "loss": 1.1122, + "step": 7468500 + }, + { + "epoch": 4.48, + "learning_rate": 2.468453657360019e-05, + "loss": 1.1314, + "step": 7469000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4682436608039628e-05, + "loss": 1.1105, + "step": 7469500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4680336642479064e-05, + "loss": 1.129, + "step": 7470000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4678236676918498e-05, + "loss": 1.1043, + "step": 7470500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4676136711357935e-05, + "loss": 1.0941, + "step": 7471000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4674036745797368e-05, + "loss": 1.1017, + "step": 7471500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4671940980167925e-05, + "loss": 1.11, + "step": 7472000 + }, + { + "epoch": 4.48, + "learning_rate": 2.466984101460736e-05, + "loss": 1.0979, + "step": 7472500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4667741049046795e-05, + "loss": 1.1045, + "step": 7473000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4665641083486232e-05, + "loss": 1.0886, + "step": 7473500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4663545317856786e-05, + "loss": 1.0895, + "step": 7474000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4661445352296223e-05, + "loss": 1.1256, + "step": 7474500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4659345386735656e-05, + "loss": 1.0896, + "step": 7475000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4657245421175093e-05, + "loss": 1.1118, + "step": 7475500 + }, + { + "epoch": 4.48, + "learning_rate": 2.465514545561453e-05, + "loss": 1.1279, + "step": 7476000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4653049689985083e-05, + "loss": 1.0868, + "step": 7476500 + }, + { + "epoch": 4.48, + "learning_rate": 2.465094972442452e-05, + "loss": 1.1001, + "step": 7477000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4648849758863954e-05, + "loss": 1.1387, + "step": 7477500 + }, + { + "epoch": 4.48, + "learning_rate": 2.464674979330339e-05, + "loss": 1.1008, + "step": 7478000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4644649827742827e-05, + "loss": 1.1051, + "step": 7478500 + }, + { + "epoch": 4.48, + "learning_rate": 2.46425582620445e-05, + "loss": 1.1224, + "step": 7479000 + }, + { + "epoch": 4.48, + "learning_rate": 2.4640458296483938e-05, + "loss": 1.0942, + "step": 7479500 + }, + { + "epoch": 4.48, + "learning_rate": 2.4638358330923375e-05, + "loss": 1.1131, + "step": 7480000 + }, + { + "epoch": 4.48, + "learning_rate": 2.463625836536281e-05, + "loss": 1.106, + "step": 7480500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4634158399802242e-05, + "loss": 1.1317, + "step": 7481000 + }, + { + "epoch": 4.49, + "learning_rate": 2.463205843424168e-05, + "loss": 1.1177, + "step": 7481500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4629958468681112e-05, + "loss": 1.115, + "step": 7482000 + }, + { + "epoch": 4.49, + "learning_rate": 2.462785850312055e-05, + "loss": 1.1143, + "step": 7482500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4625758537559986e-05, + "loss": 1.1109, + "step": 7483000 + }, + { + "epoch": 4.49, + "learning_rate": 2.462366277193054e-05, + "loss": 1.1177, + "step": 7483500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4621562806369976e-05, + "loss": 1.115, + "step": 7484000 + }, + { + "epoch": 4.49, + "learning_rate": 2.461946284080941e-05, + "loss": 1.1244, + "step": 7484500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4617362875248847e-05, + "loss": 1.1165, + "step": 7485000 + }, + { + "epoch": 4.49, + "learning_rate": 2.46152671096194e-05, + "loss": 1.1331, + "step": 7485500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4613167144058837e-05, + "loss": 1.0722, + "step": 7486000 + }, + { + "epoch": 4.49, + "learning_rate": 2.461106717849827e-05, + "loss": 1.0999, + "step": 7486500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4608967212937707e-05, + "loss": 1.1252, + "step": 7487000 + }, + { + "epoch": 4.49, + "learning_rate": 2.4606867247377144e-05, + "loss": 1.0954, + "step": 7487500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4604771481747698e-05, + "loss": 1.1145, + "step": 7488000 + }, + { + "epoch": 4.49, + "learning_rate": 2.4602671516187135e-05, + "loss": 1.0745, + "step": 7488500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4600571550626568e-05, + "loss": 1.12, + "step": 7489000 + }, + { + "epoch": 4.49, + "learning_rate": 2.4598471585066005e-05, + "loss": 1.0918, + "step": 7489500 + }, + { + "epoch": 4.49, + "learning_rate": 2.459637161950544e-05, + "loss": 1.0847, + "step": 7490000 + }, + { + "epoch": 4.49, + "learning_rate": 2.4594275853875995e-05, + "loss": 1.095, + "step": 7490500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4592175888315432e-05, + "loss": 1.1301, + "step": 7491000 + }, + { + "epoch": 4.49, + "learning_rate": 2.4590075922754866e-05, + "loss": 1.1277, + "step": 7491500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4587975957194302e-05, + "loss": 1.1111, + "step": 7492000 + }, + { + "epoch": 4.49, + "learning_rate": 2.458587599163374e-05, + "loss": 1.1133, + "step": 7492500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4583780226004293e-05, + "loss": 1.091, + "step": 7493000 + }, + { + "epoch": 4.49, + "learning_rate": 2.4581680260443726e-05, + "loss": 1.114, + "step": 7493500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4579580294883163e-05, + "loss": 1.1266, + "step": 7494000 + }, + { + "epoch": 4.49, + "learning_rate": 2.45774803293226e-05, + "loss": 1.093, + "step": 7494500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4575380363762037e-05, + "loss": 1.1002, + "step": 7495000 + }, + { + "epoch": 4.49, + "learning_rate": 2.457328459813259e-05, + "loss": 1.1107, + "step": 7495500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4571184632572024e-05, + "loss": 1.077, + "step": 7496000 + }, + { + "epoch": 4.49, + "learning_rate": 2.456908466701146e-05, + "loss": 1.0986, + "step": 7496500 + }, + { + "epoch": 4.49, + "learning_rate": 2.4566984701450898e-05, + "loss": 1.0944, + "step": 7497000 + }, + { + "epoch": 4.5, + "learning_rate": 2.456488473589033e-05, + "loss": 1.1146, + "step": 7497500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4562788970260888e-05, + "loss": 1.1162, + "step": 7498000 + }, + { + "epoch": 4.5, + "learning_rate": 2.456068900470032e-05, + "loss": 1.0997, + "step": 7498500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4558589039139758e-05, + "loss": 1.1123, + "step": 7499000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4556489073579195e-05, + "loss": 1.1413, + "step": 7499500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4554389108018625e-05, + "loss": 1.1256, + "step": 7500000 + }, + { + "epoch": 4.5, + "eval_loss": 1.0793567895889282, + "eval_runtime": 1105.9813, + "eval_samples_per_second": 476.247, + "eval_steps_per_second": 79.375, + "step": 7500000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4552293342389186e-05, + "loss": 1.0892, + "step": 7500500 + }, + { + "epoch": 4.5, + "learning_rate": 2.455019337682862e-05, + "loss": 1.1003, + "step": 7501000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4548093411268056e-05, + "loss": 1.0527, + "step": 7501500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4545993445707493e-05, + "loss": 1.1102, + "step": 7502000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4543893480146923e-05, + "loss": 1.1144, + "step": 7502500 + }, + { + "epoch": 4.5, + "learning_rate": 2.454179351458636e-05, + "loss": 1.0879, + "step": 7503000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4539693549025796e-05, + "loss": 1.0898, + "step": 7503500 + }, + { + "epoch": 4.5, + "learning_rate": 2.453759358346523e-05, + "loss": 1.1116, + "step": 7504000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4535502017766907e-05, + "loss": 1.1066, + "step": 7504500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4533402052206344e-05, + "loss": 1.0779, + "step": 7505000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4531302086645777e-05, + "loss": 1.0772, + "step": 7505500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4529202121085214e-05, + "loss": 1.1103, + "step": 7506000 + }, + { + "epoch": 4.5, + "learning_rate": 2.452710215552465e-05, + "loss": 1.1026, + "step": 7506500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4525006389895205e-05, + "loss": 1.1216, + "step": 7507000 + }, + { + "epoch": 4.5, + "learning_rate": 2.452290642433464e-05, + "loss": 1.083, + "step": 7507500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4520806458774075e-05, + "loss": 1.1449, + "step": 7508000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4518706493213512e-05, + "loss": 1.1183, + "step": 7508500 + }, + { + "epoch": 4.5, + "learning_rate": 2.451660652765295e-05, + "loss": 1.1037, + "step": 7509000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4514506562092382e-05, + "loss": 1.102, + "step": 7509500 + }, + { + "epoch": 4.5, + "learning_rate": 2.4512410796462936e-05, + "loss": 1.0942, + "step": 7510000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4510310830902372e-05, + "loss": 1.0821, + "step": 7510500 + }, + { + "epoch": 4.5, + "learning_rate": 2.450821086534181e-05, + "loss": 1.1434, + "step": 7511000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4506110899781243e-05, + "loss": 1.1118, + "step": 7511500 + }, + { + "epoch": 4.5, + "learning_rate": 2.45040151341518e-05, + "loss": 1.1333, + "step": 7512000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4501915168591233e-05, + "loss": 1.0913, + "step": 7512500 + }, + { + "epoch": 4.5, + "learning_rate": 2.449981520303067e-05, + "loss": 1.1104, + "step": 7513000 + }, + { + "epoch": 4.5, + "learning_rate": 2.4497715237470107e-05, + "loss": 1.1215, + "step": 7513500 + }, + { + "epoch": 4.5, + "learning_rate": 2.449561527190954e-05, + "loss": 1.1285, + "step": 7514000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4493519506280097e-05, + "loss": 1.0964, + "step": 7514500 + }, + { + "epoch": 4.51, + "learning_rate": 2.449141954071953e-05, + "loss": 1.0949, + "step": 7515000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4489319575158968e-05, + "loss": 1.0909, + "step": 7515500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4487219609598404e-05, + "loss": 1.1189, + "step": 7516000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4485119644037838e-05, + "loss": 1.1029, + "step": 7516500 + }, + { + "epoch": 4.51, + "learning_rate": 2.448302387840839e-05, + "loss": 1.1232, + "step": 7517000 + }, + { + "epoch": 4.51, + "learning_rate": 2.448092391284783e-05, + "loss": 1.1275, + "step": 7517500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4478823947287265e-05, + "loss": 1.1189, + "step": 7518000 + }, + { + "epoch": 4.51, + "learning_rate": 2.44767239817267e-05, + "loss": 1.1089, + "step": 7518500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4474624016166135e-05, + "loss": 1.0983, + "step": 7519000 + }, + { + "epoch": 4.51, + "learning_rate": 2.447252825053669e-05, + "loss": 1.1265, + "step": 7519500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4470428284976126e-05, + "loss": 1.1083, + "step": 7520000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4468328319415563e-05, + "loss": 1.1332, + "step": 7520500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4466228353854996e-05, + "loss": 1.1438, + "step": 7521000 + }, + { + "epoch": 4.51, + "learning_rate": 2.446412838829443e-05, + "loss": 1.1111, + "step": 7521500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4462032622664987e-05, + "loss": 1.086, + "step": 7522000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4459932657104423e-05, + "loss": 1.0774, + "step": 7522500 + }, + { + "epoch": 4.51, + "learning_rate": 2.445783269154386e-05, + "loss": 1.0884, + "step": 7523000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4455732725983294e-05, + "loss": 1.1285, + "step": 7523500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4453632760422727e-05, + "loss": 1.1096, + "step": 7524000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4451532794862164e-05, + "loss": 1.1442, + "step": 7524500 + }, + { + "epoch": 4.51, + "learning_rate": 2.444943702923272e-05, + "loss": 1.0941, + "step": 7525000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4447337063672155e-05, + "loss": 1.0897, + "step": 7525500 + }, + { + "epoch": 4.51, + "learning_rate": 2.444523709811159e-05, + "loss": 1.107, + "step": 7526000 + }, + { + "epoch": 4.51, + "learning_rate": 2.4443137132551025e-05, + "loss": 1.1121, + "step": 7526500 + }, + { + "epoch": 4.51, + "learning_rate": 2.444103716699046e-05, + "loss": 1.1284, + "step": 7527000 + }, + { + "epoch": 4.51, + "learning_rate": 2.443894140136102e-05, + "loss": 1.1107, + "step": 7527500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4436841435800452e-05, + "loss": 1.1365, + "step": 7528000 + }, + { + "epoch": 4.51, + "learning_rate": 2.443474147023989e-05, + "loss": 1.1035, + "step": 7528500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4432641504679322e-05, + "loss": 1.0991, + "step": 7529000 + }, + { + "epoch": 4.51, + "learning_rate": 2.443054573904988e-05, + "loss": 1.1189, + "step": 7529500 + }, + { + "epoch": 4.51, + "learning_rate": 2.4428445773489316e-05, + "loss": 1.0918, + "step": 7530000 + }, + { + "epoch": 4.51, + "learning_rate": 2.442634580792875e-05, + "loss": 1.0939, + "step": 7530500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4424245842368183e-05, + "loss": 1.1103, + "step": 7531000 + }, + { + "epoch": 4.52, + "learning_rate": 2.442215007673874e-05, + "loss": 1.1208, + "step": 7531500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4420050111178177e-05, + "loss": 1.087, + "step": 7532000 + }, + { + "epoch": 4.52, + "learning_rate": 2.441795014561761e-05, + "loss": 1.1015, + "step": 7532500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4415850180057047e-05, + "loss": 1.1084, + "step": 7533000 + }, + { + "epoch": 4.52, + "learning_rate": 2.441375021449648e-05, + "loss": 1.0994, + "step": 7533500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4411654448867038e-05, + "loss": 1.0786, + "step": 7534000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4409554483306475e-05, + "loss": 1.1432, + "step": 7534500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4407454517745908e-05, + "loss": 1.0933, + "step": 7535000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4405354552185345e-05, + "loss": 1.1178, + "step": 7535500 + }, + { + "epoch": 4.52, + "learning_rate": 2.44032587865559e-05, + "loss": 1.0849, + "step": 7536000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4401158820995335e-05, + "loss": 1.0914, + "step": 7536500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4399058855434772e-05, + "loss": 1.0789, + "step": 7537000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4396958889874206e-05, + "loss": 1.1101, + "step": 7537500 + }, + { + "epoch": 4.52, + "learning_rate": 2.439486312424476e-05, + "loss": 1.0976, + "step": 7538000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4392763158684196e-05, + "loss": 1.0849, + "step": 7538500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4390663193123633e-05, + "loss": 1.1032, + "step": 7539000 + }, + { + "epoch": 4.52, + "learning_rate": 2.438856322756307e-05, + "loss": 1.0919, + "step": 7539500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4386463262002503e-05, + "loss": 1.1009, + "step": 7540000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4384367496373057e-05, + "loss": 1.1129, + "step": 7540500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4382267530812494e-05, + "loss": 1.1247, + "step": 7541000 + }, + { + "epoch": 4.52, + "learning_rate": 2.438016756525193e-05, + "loss": 1.0723, + "step": 7541500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4378067599691364e-05, + "loss": 1.1061, + "step": 7542000 + }, + { + "epoch": 4.52, + "learning_rate": 2.43759676341308e-05, + "loss": 1.0612, + "step": 7542500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4373871868501354e-05, + "loss": 1.0764, + "step": 7543000 + }, + { + "epoch": 4.52, + "learning_rate": 2.437177190294079e-05, + "loss": 1.08, + "step": 7543500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4369671937380228e-05, + "loss": 1.0818, + "step": 7544000 + }, + { + "epoch": 4.52, + "learning_rate": 2.436757197181966e-05, + "loss": 1.1193, + "step": 7544500 + }, + { + "epoch": 4.52, + "learning_rate": 2.4365472006259098e-05, + "loss": 1.1479, + "step": 7545000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4363376240629652e-05, + "loss": 1.0995, + "step": 7545500 + }, + { + "epoch": 4.52, + "learning_rate": 2.436127627506909e-05, + "loss": 1.0784, + "step": 7546000 + }, + { + "epoch": 4.52, + "learning_rate": 2.4359176309508526e-05, + "loss": 1.1243, + "step": 7546500 + }, + { + "epoch": 4.52, + "learning_rate": 2.435707634394796e-05, + "loss": 1.1285, + "step": 7547000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4354980578318513e-05, + "loss": 1.111, + "step": 7547500 + }, + { + "epoch": 4.53, + "learning_rate": 2.435288061275795e-05, + "loss": 1.1061, + "step": 7548000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4350780647197386e-05, + "loss": 1.0756, + "step": 7548500 + }, + { + "epoch": 4.53, + "learning_rate": 2.434868068163682e-05, + "loss": 1.092, + "step": 7549000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4346580716076257e-05, + "loss": 1.0855, + "step": 7549500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4344480750515693e-05, + "loss": 1.0927, + "step": 7550000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4342384984886247e-05, + "loss": 1.1195, + "step": 7550500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4340285019325684e-05, + "loss": 1.0914, + "step": 7551000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4338185053765117e-05, + "loss": 1.1037, + "step": 7551500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4336089288135674e-05, + "loss": 1.1008, + "step": 7552000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4333989322575108e-05, + "loss": 1.1019, + "step": 7552500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4331889357014545e-05, + "loss": 1.0973, + "step": 7553000 + }, + { + "epoch": 4.53, + "learning_rate": 2.432978939145398e-05, + "loss": 1.1238, + "step": 7553500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4327689425893415e-05, + "loss": 1.1075, + "step": 7554000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4325589460332852e-05, + "loss": 1.0849, + "step": 7554500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4323489494772285e-05, + "loss": 1.0997, + "step": 7555000 + }, + { + "epoch": 4.53, + "learning_rate": 2.432138952921172e-05, + "loss": 1.0869, + "step": 7555500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4319293763582276e-05, + "loss": 1.1038, + "step": 7556000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4317193798021712e-05, + "loss": 1.0906, + "step": 7556500 + }, + { + "epoch": 4.53, + "learning_rate": 2.431509383246115e-05, + "loss": 1.1084, + "step": 7557000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4312993866900583e-05, + "loss": 1.0985, + "step": 7557500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4310893901340016e-05, + "loss": 1.0999, + "step": 7558000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4308798135710573e-05, + "loss": 1.0758, + "step": 7558500 + }, + { + "epoch": 4.53, + "learning_rate": 2.430669817015001e-05, + "loss": 1.097, + "step": 7559000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4304598204589447e-05, + "loss": 1.1278, + "step": 7559500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4302498239028877e-05, + "loss": 1.1105, + "step": 7560000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4300402473399437e-05, + "loss": 1.1217, + "step": 7560500 + }, + { + "epoch": 4.53, + "learning_rate": 2.429830250783887e-05, + "loss": 1.0959, + "step": 7561000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4296202542278308e-05, + "loss": 1.1122, + "step": 7561500 + }, + { + "epoch": 4.53, + "learning_rate": 2.429410257671774e-05, + "loss": 1.0971, + "step": 7562000 + }, + { + "epoch": 4.53, + "learning_rate": 2.4292006811088298e-05, + "loss": 1.085, + "step": 7562500 + }, + { + "epoch": 4.53, + "learning_rate": 2.428990684552773e-05, + "loss": 1.1138, + "step": 7563000 + }, + { + "epoch": 4.53, + "learning_rate": 2.428780687996717e-05, + "loss": 1.1101, + "step": 7563500 + }, + { + "epoch": 4.53, + "learning_rate": 2.4285706914406605e-05, + "loss": 1.1163, + "step": 7564000 + }, + { + "epoch": 4.54, + "learning_rate": 2.428361114877716e-05, + "loss": 1.1202, + "step": 7564500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4281511183216596e-05, + "loss": 1.1014, + "step": 7565000 + }, + { + "epoch": 4.54, + "learning_rate": 2.427941121765603e-05, + "loss": 1.1144, + "step": 7565500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4277311252095466e-05, + "loss": 1.1142, + "step": 7566000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4275211286534903e-05, + "loss": 1.1179, + "step": 7566500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4273115520905456e-05, + "loss": 1.1083, + "step": 7567000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4271015555344893e-05, + "loss": 1.0817, + "step": 7567500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4268915589784327e-05, + "loss": 1.0842, + "step": 7568000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4266815624223763e-05, + "loss": 1.1119, + "step": 7568500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4264719858594317e-05, + "loss": 1.0803, + "step": 7569000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4262619893033754e-05, + "loss": 1.1045, + "step": 7569500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4260519927473187e-05, + "loss": 1.1214, + "step": 7570000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4258419961912624e-05, + "loss": 1.1063, + "step": 7570500 + }, + { + "epoch": 4.54, + "learning_rate": 2.425632419628318e-05, + "loss": 1.1017, + "step": 7571000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4254224230722615e-05, + "loss": 1.0812, + "step": 7571500 + }, + { + "epoch": 4.54, + "learning_rate": 2.425212426516205e-05, + "loss": 1.105, + "step": 7572000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4250024299601485e-05, + "loss": 1.0837, + "step": 7572500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4247924334040922e-05, + "loss": 1.1381, + "step": 7573000 + }, + { + "epoch": 4.54, + "learning_rate": 2.424582856841148e-05, + "loss": 1.0667, + "step": 7573500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4243728602850912e-05, + "loss": 1.0963, + "step": 7574000 + }, + { + "epoch": 4.54, + "learning_rate": 2.424162863729035e-05, + "loss": 1.083, + "step": 7574500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4239528671729783e-05, + "loss": 1.1171, + "step": 7575000 + }, + { + "epoch": 4.54, + "learning_rate": 2.423742870616922e-05, + "loss": 1.1175, + "step": 7575500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4235332940539773e-05, + "loss": 1.1183, + "step": 7576000 + }, + { + "epoch": 4.54, + "learning_rate": 2.423323297497921e-05, + "loss": 1.1109, + "step": 7576500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4231133009418643e-05, + "loss": 1.112, + "step": 7577000 + }, + { + "epoch": 4.54, + "learning_rate": 2.422903304385808e-05, + "loss": 1.1086, + "step": 7577500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4226937278228637e-05, + "loss": 1.1171, + "step": 7578000 + }, + { + "epoch": 4.54, + "learning_rate": 2.422483731266807e-05, + "loss": 1.1091, + "step": 7578500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4222737347107507e-05, + "loss": 1.118, + "step": 7579000 + }, + { + "epoch": 4.54, + "learning_rate": 2.422063738154694e-05, + "loss": 1.1, + "step": 7579500 + }, + { + "epoch": 4.54, + "learning_rate": 2.4218537415986378e-05, + "loss": 1.1009, + "step": 7580000 + }, + { + "epoch": 4.54, + "learning_rate": 2.4216441650356935e-05, + "loss": 1.1003, + "step": 7580500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4214341684796368e-05, + "loss": 1.107, + "step": 7581000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4212241719235805e-05, + "loss": 1.1255, + "step": 7581500 + }, + { + "epoch": 4.55, + "learning_rate": 2.421014175367524e-05, + "loss": 1.1116, + "step": 7582000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4208041788114675e-05, + "loss": 1.0843, + "step": 7582500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4205946022485232e-05, + "loss": 1.0833, + "step": 7583000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4203846056924666e-05, + "loss": 1.1285, + "step": 7583500 + }, + { + "epoch": 4.55, + "learning_rate": 2.42017460913641e-05, + "loss": 1.0883, + "step": 7584000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4199646125803536e-05, + "loss": 1.1226, + "step": 7584500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4197550360174093e-05, + "loss": 1.1064, + "step": 7585000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4195450394613526e-05, + "loss": 1.0894, + "step": 7585500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4193350429052963e-05, + "loss": 1.1134, + "step": 7586000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4191250463492397e-05, + "loss": 1.0994, + "step": 7586500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4189154697862954e-05, + "loss": 1.1104, + "step": 7587000 + }, + { + "epoch": 4.55, + "learning_rate": 2.418705473230239e-05, + "loss": 1.087, + "step": 7587500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4184954766741824e-05, + "loss": 1.0944, + "step": 7588000 + }, + { + "epoch": 4.55, + "learning_rate": 2.418285480118126e-05, + "loss": 1.0971, + "step": 7588500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4180754835620694e-05, + "loss": 1.1001, + "step": 7589000 + }, + { + "epoch": 4.55, + "learning_rate": 2.417865906999125e-05, + "loss": 1.1005, + "step": 7589500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4176559104430688e-05, + "loss": 1.1126, + "step": 7590000 + }, + { + "epoch": 4.55, + "learning_rate": 2.417445913887012e-05, + "loss": 1.0998, + "step": 7590500 + }, + { + "epoch": 4.55, + "learning_rate": 2.417235917330956e-05, + "loss": 1.1247, + "step": 7591000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4170263407680115e-05, + "loss": 1.111, + "step": 7591500 + }, + { + "epoch": 4.55, + "learning_rate": 2.416816344211955e-05, + "loss": 1.1059, + "step": 7592000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4166063476558986e-05, + "loss": 1.1063, + "step": 7592500 + }, + { + "epoch": 4.55, + "learning_rate": 2.416396351099842e-05, + "loss": 1.1181, + "step": 7593000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4161867745368976e-05, + "loss": 1.1112, + "step": 7593500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4159767779808413e-05, + "loss": 1.095, + "step": 7594000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4157667814247846e-05, + "loss": 1.0822, + "step": 7594500 + }, + { + "epoch": 4.55, + "learning_rate": 2.415556784868728e-05, + "loss": 1.1052, + "step": 7595000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4153467883126717e-05, + "loss": 1.1003, + "step": 7595500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4151372117497274e-05, + "loss": 1.112, + "step": 7596000 + }, + { + "epoch": 4.55, + "learning_rate": 2.4149272151936707e-05, + "loss": 1.1064, + "step": 7596500 + }, + { + "epoch": 4.55, + "learning_rate": 2.4147172186376144e-05, + "loss": 1.111, + "step": 7597000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4145072220815577e-05, + "loss": 1.1167, + "step": 7597500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4142976455186134e-05, + "loss": 1.09, + "step": 7598000 + }, + { + "epoch": 4.56, + "learning_rate": 2.414087648962557e-05, + "loss": 1.1465, + "step": 7598500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4138776524065005e-05, + "loss": 1.1239, + "step": 7599000 + }, + { + "epoch": 4.56, + "learning_rate": 2.413667655850444e-05, + "loss": 1.1056, + "step": 7599500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4134580792874995e-05, + "loss": 1.0662, + "step": 7600000 + }, + { + "epoch": 4.56, + "eval_loss": 1.0789257287979126, + "eval_runtime": 1114.5617, + "eval_samples_per_second": 472.58, + "eval_steps_per_second": 78.764, + "step": 7600000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4132480827314432e-05, + "loss": 1.0794, + "step": 7600500 + }, + { + "epoch": 4.56, + "learning_rate": 2.413038086175387e-05, + "loss": 1.103, + "step": 7601000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4128280896193302e-05, + "loss": 1.0822, + "step": 7601500 + }, + { + "epoch": 4.56, + "learning_rate": 2.412618093063274e-05, + "loss": 1.1107, + "step": 7602000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4124080965072173e-05, + "loss": 1.0978, + "step": 7602500 + }, + { + "epoch": 4.56, + "learning_rate": 2.412198519944273e-05, + "loss": 1.1279, + "step": 7603000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4119885233882163e-05, + "loss": 1.1161, + "step": 7603500 + }, + { + "epoch": 4.56, + "learning_rate": 2.41177852683216e-05, + "loss": 1.0895, + "step": 7604000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4115685302761033e-05, + "loss": 1.1347, + "step": 7604500 + }, + { + "epoch": 4.56, + "learning_rate": 2.411358533720047e-05, + "loss": 1.0934, + "step": 7605000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4111489571571027e-05, + "loss": 1.0992, + "step": 7605500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4109393805941584e-05, + "loss": 1.1092, + "step": 7606000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4107293840381018e-05, + "loss": 1.0997, + "step": 7606500 + }, + { + "epoch": 4.56, + "learning_rate": 2.410519387482045e-05, + "loss": 1.1397, + "step": 7607000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4103093909259888e-05, + "loss": 1.1217, + "step": 7607500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4100993943699325e-05, + "loss": 1.1463, + "step": 7608000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4098893978138758e-05, + "loss": 1.103, + "step": 7608500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4096794012578195e-05, + "loss": 1.0786, + "step": 7609000 + }, + { + "epoch": 4.56, + "learning_rate": 2.409469404701763e-05, + "loss": 1.1256, + "step": 7609500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4092598281388185e-05, + "loss": 1.1128, + "step": 7610000 + }, + { + "epoch": 4.56, + "learning_rate": 2.409049831582762e-05, + "loss": 1.1193, + "step": 7610500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4088398350267056e-05, + "loss": 1.1281, + "step": 7611000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4086298384706493e-05, + "loss": 1.094, + "step": 7611500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4084198419145926e-05, + "loss": 1.1079, + "step": 7612000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4082102653516483e-05, + "loss": 1.1037, + "step": 7612500 + }, + { + "epoch": 4.56, + "learning_rate": 2.4080002687955916e-05, + "loss": 1.082, + "step": 7613000 + }, + { + "epoch": 4.56, + "learning_rate": 2.4077902722395353e-05, + "loss": 1.0814, + "step": 7613500 + }, + { + "epoch": 4.56, + "learning_rate": 2.407580275683479e-05, + "loss": 1.1052, + "step": 7614000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4073706991205344e-05, + "loss": 1.1107, + "step": 7614500 + }, + { + "epoch": 4.57, + "learning_rate": 2.407160702564478e-05, + "loss": 1.0944, + "step": 7615000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4069507060084214e-05, + "loss": 1.094, + "step": 7615500 + }, + { + "epoch": 4.57, + "learning_rate": 2.406740709452365e-05, + "loss": 1.1018, + "step": 7616000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4065307128963084e-05, + "loss": 1.0919, + "step": 7616500 + }, + { + "epoch": 4.57, + "learning_rate": 2.406321136333364e-05, + "loss": 1.0749, + "step": 7617000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4061111397773075e-05, + "loss": 1.1053, + "step": 7617500 + }, + { + "epoch": 4.57, + "learning_rate": 2.405901143221251e-05, + "loss": 1.1237, + "step": 7618000 + }, + { + "epoch": 4.57, + "learning_rate": 2.405691146665195e-05, + "loss": 1.1117, + "step": 7618500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4054815701022502e-05, + "loss": 1.1017, + "step": 7619000 + }, + { + "epoch": 4.57, + "learning_rate": 2.405271573546194e-05, + "loss": 1.1321, + "step": 7619500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4050615769901372e-05, + "loss": 1.1014, + "step": 7620000 + }, + { + "epoch": 4.57, + "learning_rate": 2.404851580434081e-05, + "loss": 1.1297, + "step": 7620500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4046415838780246e-05, + "loss": 1.1029, + "step": 7621000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4044315873219676e-05, + "loss": 1.1113, + "step": 7621500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4042215907659113e-05, + "loss": 1.0915, + "step": 7622000 + }, + { + "epoch": 4.57, + "learning_rate": 2.404012014202967e-05, + "loss": 1.0937, + "step": 7622500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4038020176469107e-05, + "loss": 1.0851, + "step": 7623000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4035920210908544e-05, + "loss": 1.1007, + "step": 7623500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4033820245347974e-05, + "loss": 1.1045, + "step": 7624000 + }, + { + "epoch": 4.57, + "learning_rate": 2.403172447971853e-05, + "loss": 1.1125, + "step": 7624500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4029624514157967e-05, + "loss": 1.1316, + "step": 7625000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4027524548597404e-05, + "loss": 1.1186, + "step": 7625500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4025424583036838e-05, + "loss": 1.1098, + "step": 7626000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4023328817407395e-05, + "loss": 1.1061, + "step": 7626500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4021228851846828e-05, + "loss": 1.1125, + "step": 7627000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4019128886286265e-05, + "loss": 1.1225, + "step": 7627500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4017028920725702e-05, + "loss": 1.082, + "step": 7628000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4014928955165132e-05, + "loss": 1.1111, + "step": 7628500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4012833189535692e-05, + "loss": 1.1013, + "step": 7629000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4010733223975126e-05, + "loss": 1.1021, + "step": 7629500 + }, + { + "epoch": 4.57, + "learning_rate": 2.4008637458345683e-05, + "loss": 1.0972, + "step": 7630000 + }, + { + "epoch": 4.57, + "learning_rate": 2.4006537492785116e-05, + "loss": 1.1151, + "step": 7630500 + }, + { + "epoch": 4.58, + "learning_rate": 2.4004437527224553e-05, + "loss": 1.0718, + "step": 7631000 + }, + { + "epoch": 4.58, + "learning_rate": 2.4002337561663987e-05, + "loss": 1.1087, + "step": 7631500 + }, + { + "epoch": 4.58, + "learning_rate": 2.4000237596103423e-05, + "loss": 1.0991, + "step": 7632000 + }, + { + "epoch": 4.58, + "learning_rate": 2.399813763054286e-05, + "loss": 1.093, + "step": 7632500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3996037664982297e-05, + "loss": 1.0991, + "step": 7633000 + }, + { + "epoch": 4.58, + "learning_rate": 2.399394189935285e-05, + "loss": 1.1045, + "step": 7633500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3991841933792284e-05, + "loss": 1.0938, + "step": 7634000 + }, + { + "epoch": 4.58, + "learning_rate": 2.398974196823172e-05, + "loss": 1.0882, + "step": 7634500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3987642002671158e-05, + "loss": 1.0933, + "step": 7635000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3985542037110588e-05, + "loss": 1.1075, + "step": 7635500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3983442071550025e-05, + "loss": 1.1114, + "step": 7636000 + }, + { + "epoch": 4.58, + "learning_rate": 2.398134630592058e-05, + "loss": 1.1046, + "step": 7636500 + }, + { + "epoch": 4.58, + "learning_rate": 2.397924634036002e-05, + "loss": 1.111, + "step": 7637000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3977146374799455e-05, + "loss": 1.0939, + "step": 7637500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3975046409238885e-05, + "loss": 1.1377, + "step": 7638000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3972946443678322e-05, + "loss": 1.0888, + "step": 7638500 + }, + { + "epoch": 4.58, + "learning_rate": 2.397084647811776e-05, + "loss": 1.1298, + "step": 7639000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3968750712488316e-05, + "loss": 1.1001, + "step": 7639500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3966650746927753e-05, + "loss": 1.0923, + "step": 7640000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3964550781367183e-05, + "loss": 1.1002, + "step": 7640500 + }, + { + "epoch": 4.58, + "learning_rate": 2.396245081580662e-05, + "loss": 1.1146, + "step": 7641000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3960350850246057e-05, + "loss": 1.099, + "step": 7641500 + }, + { + "epoch": 4.58, + "learning_rate": 2.395825088468549e-05, + "loss": 1.1138, + "step": 7642000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3956150919124927e-05, + "loss": 1.093, + "step": 7642500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3954050953564364e-05, + "loss": 1.1131, + "step": 7643000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3951955187934917e-05, + "loss": 1.0896, + "step": 7643500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3949855222374354e-05, + "loss": 1.1075, + "step": 7644000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3947755256813788e-05, + "loss": 1.078, + "step": 7644500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3945655291253225e-05, + "loss": 1.0913, + "step": 7645000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3943555325692658e-05, + "loss": 1.087, + "step": 7645500 + }, + { + "epoch": 4.58, + "learning_rate": 2.394145536013209e-05, + "loss": 1.0893, + "step": 7646000 + }, + { + "epoch": 4.58, + "learning_rate": 2.393935959450265e-05, + "loss": 1.1255, + "step": 7646500 + }, + { + "epoch": 4.58, + "learning_rate": 2.3937259628942085e-05, + "loss": 1.0901, + "step": 7647000 + }, + { + "epoch": 4.58, + "learning_rate": 2.3935159663381522e-05, + "loss": 1.0986, + "step": 7647500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3933059697820956e-05, + "loss": 1.1044, + "step": 7648000 + }, + { + "epoch": 4.59, + "learning_rate": 2.393095973226039e-05, + "loss": 1.0931, + "step": 7648500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3928863966630946e-05, + "loss": 1.0822, + "step": 7649000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3926764001070383e-05, + "loss": 1.0819, + "step": 7649500 + }, + { + "epoch": 4.59, + "learning_rate": 2.392466403550982e-05, + "loss": 1.1352, + "step": 7650000 + }, + { + "epoch": 4.59, + "learning_rate": 2.392256406994925e-05, + "loss": 1.074, + "step": 7650500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3920464104388687e-05, + "loss": 1.1207, + "step": 7651000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3918368338759244e-05, + "loss": 1.0744, + "step": 7651500 + }, + { + "epoch": 4.59, + "learning_rate": 2.391626837319868e-05, + "loss": 1.1079, + "step": 7652000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3914168407638117e-05, + "loss": 1.0641, + "step": 7652500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3912068442077547e-05, + "loss": 1.1141, + "step": 7653000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3909968476516984e-05, + "loss": 1.102, + "step": 7653500 + }, + { + "epoch": 4.59, + "learning_rate": 2.390787271088754e-05, + "loss": 1.1043, + "step": 7654000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3905772745326978e-05, + "loss": 1.0804, + "step": 7654500 + }, + { + "epoch": 4.59, + "learning_rate": 2.390367277976641e-05, + "loss": 1.1104, + "step": 7655000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3901572814205845e-05, + "loss": 1.1126, + "step": 7655500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3899477048576402e-05, + "loss": 1.1246, + "step": 7656000 + }, + { + "epoch": 4.59, + "learning_rate": 2.389737708301584e-05, + "loss": 1.0872, + "step": 7656500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3895277117455276e-05, + "loss": 1.072, + "step": 7657000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3893177151894706e-05, + "loss": 1.1222, + "step": 7657500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3891077186334143e-05, + "loss": 1.0944, + "step": 7658000 + }, + { + "epoch": 4.59, + "learning_rate": 2.38889814207047e-05, + "loss": 1.1152, + "step": 7658500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3886881455144136e-05, + "loss": 1.1287, + "step": 7659000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3884781489583573e-05, + "loss": 1.0982, + "step": 7659500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3882685723954127e-05, + "loss": 1.0966, + "step": 7660000 + }, + { + "epoch": 4.59, + "learning_rate": 2.388058575839356e-05, + "loss": 1.0934, + "step": 7660500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3878485792832997e-05, + "loss": 1.1113, + "step": 7661000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3876385827272434e-05, + "loss": 1.1002, + "step": 7661500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3874285861711867e-05, + "loss": 1.1064, + "step": 7662000 + }, + { + "epoch": 4.59, + "learning_rate": 2.38721858961513e-05, + "loss": 1.0966, + "step": 7662500 + }, + { + "epoch": 4.59, + "learning_rate": 2.3870085930590738e-05, + "loss": 1.0796, + "step": 7663000 + }, + { + "epoch": 4.59, + "learning_rate": 2.3867985965030175e-05, + "loss": 1.112, + "step": 7663500 + }, + { + "epoch": 4.59, + "learning_rate": 2.386589019940073e-05, + "loss": 1.0933, + "step": 7664000 + }, + { + "epoch": 4.6, + "learning_rate": 2.386379023384016e-05, + "loss": 1.0954, + "step": 7664500 + }, + { + "epoch": 4.6, + "learning_rate": 2.38616902682796e-05, + "loss": 1.1018, + "step": 7665000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3859590302719035e-05, + "loss": 1.099, + "step": 7665500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3857490337158472e-05, + "loss": 1.1089, + "step": 7666000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3855390371597906e-05, + "loss": 1.1102, + "step": 7666500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3853290406037342e-05, + "loss": 1.0946, + "step": 7667000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3851190440476776e-05, + "loss": 1.0937, + "step": 7667500 + }, + { + "epoch": 4.6, + "learning_rate": 2.384909047491621e-05, + "loss": 1.1065, + "step": 7668000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3846990509355646e-05, + "loss": 1.1072, + "step": 7668500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3844894743726203e-05, + "loss": 1.0872, + "step": 7669000 + }, + { + "epoch": 4.6, + "learning_rate": 2.384279477816564e-05, + "loss": 1.1156, + "step": 7669500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3840694812605073e-05, + "loss": 1.1115, + "step": 7670000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3838594847044507e-05, + "loss": 1.0662, + "step": 7670500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3836499081415064e-05, + "loss": 1.1013, + "step": 7671000 + }, + { + "epoch": 4.6, + "learning_rate": 2.38343991158545e-05, + "loss": 1.0918, + "step": 7671500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3832299150293934e-05, + "loss": 1.0846, + "step": 7672000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3830199184733368e-05, + "loss": 1.112, + "step": 7672500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3828103419103928e-05, + "loss": 1.107, + "step": 7673000 + }, + { + "epoch": 4.6, + "learning_rate": 2.382600345354336e-05, + "loss": 1.1193, + "step": 7673500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3823903487982798e-05, + "loss": 1.0736, + "step": 7674000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3821803522422232e-05, + "loss": 1.0843, + "step": 7674500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3819703556861665e-05, + "loss": 1.0961, + "step": 7675000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3817603591301102e-05, + "loss": 1.0881, + "step": 7675500 + }, + { + "epoch": 4.6, + "learning_rate": 2.381550782567166e-05, + "loss": 1.117, + "step": 7676000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3813407860111096e-05, + "loss": 1.1087, + "step": 7676500 + }, + { + "epoch": 4.6, + "learning_rate": 2.381130789455053e-05, + "loss": 1.0821, + "step": 7677000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3809212128921086e-05, + "loss": 1.0965, + "step": 7677500 + }, + { + "epoch": 4.6, + "learning_rate": 2.380711216336052e-05, + "loss": 1.0925, + "step": 7678000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3805012197799957e-05, + "loss": 1.0834, + "step": 7678500 + }, + { + "epoch": 4.6, + "learning_rate": 2.3802912232239393e-05, + "loss": 1.1042, + "step": 7679000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3800812266678823e-05, + "loss": 1.0787, + "step": 7679500 + }, + { + "epoch": 4.6, + "learning_rate": 2.379871230111826e-05, + "loss": 1.1067, + "step": 7680000 + }, + { + "epoch": 4.6, + "learning_rate": 2.3796612335557697e-05, + "loss": 1.0992, + "step": 7680500 + }, + { + "epoch": 4.61, + "learning_rate": 2.379451236999713e-05, + "loss": 1.0889, + "step": 7681000 + }, + { + "epoch": 4.61, + "learning_rate": 2.379241660436769e-05, + "loss": 1.089, + "step": 7681500 + }, + { + "epoch": 4.61, + "learning_rate": 2.379031663880712e-05, + "loss": 1.0948, + "step": 7682000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3788216673246558e-05, + "loss": 1.1199, + "step": 7682500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3786116707685995e-05, + "loss": 1.1059, + "step": 7683000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3784016742125428e-05, + "loss": 1.071, + "step": 7683500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3781920976495985e-05, + "loss": 1.094, + "step": 7684000 + }, + { + "epoch": 4.61, + "learning_rate": 2.377982101093542e-05, + "loss": 1.1005, + "step": 7684500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3777721045374855e-05, + "loss": 1.0749, + "step": 7685000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3775621079814292e-05, + "loss": 1.1404, + "step": 7685500 + }, + { + "epoch": 4.61, + "learning_rate": 2.377352531418485e-05, + "loss": 1.1056, + "step": 7686000 + }, + { + "epoch": 4.61, + "learning_rate": 2.377142534862428e-05, + "loss": 1.0792, + "step": 7686500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3769325383063716e-05, + "loss": 1.0858, + "step": 7687000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3767225417503153e-05, + "loss": 1.1124, + "step": 7687500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3765125451942587e-05, + "loss": 1.0951, + "step": 7688000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3763029686313147e-05, + "loss": 1.1255, + "step": 7688500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3760929720752577e-05, + "loss": 1.1209, + "step": 7689000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3758829755192014e-05, + "loss": 1.0718, + "step": 7689500 + }, + { + "epoch": 4.61, + "learning_rate": 2.375672978963145e-05, + "loss": 1.1181, + "step": 7690000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3754634024002008e-05, + "loss": 1.1185, + "step": 7690500 + }, + { + "epoch": 4.61, + "learning_rate": 2.375253405844144e-05, + "loss": 1.1074, + "step": 7691000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3750434092880875e-05, + "loss": 1.1004, + "step": 7691500 + }, + { + "epoch": 4.61, + "learning_rate": 2.374833412732031e-05, + "loss": 1.0975, + "step": 7692000 + }, + { + "epoch": 4.61, + "learning_rate": 2.374623836169087e-05, + "loss": 1.1074, + "step": 7692500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3744138396130305e-05, + "loss": 1.0871, + "step": 7693000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3742038430569735e-05, + "loss": 1.0955, + "step": 7693500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3739938465009172e-05, + "loss": 1.1169, + "step": 7694000 + }, + { + "epoch": 4.61, + "learning_rate": 2.373784269937973e-05, + "loss": 1.1148, + "step": 7694500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3735742733819166e-05, + "loss": 1.1137, + "step": 7695000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3733642768258603e-05, + "loss": 1.065, + "step": 7695500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3731542802698033e-05, + "loss": 1.0832, + "step": 7696000 + }, + { + "epoch": 4.61, + "learning_rate": 2.372944283713747e-05, + "loss": 1.0696, + "step": 7696500 + }, + { + "epoch": 4.61, + "learning_rate": 2.3727347071508027e-05, + "loss": 1.1001, + "step": 7697000 + }, + { + "epoch": 4.61, + "learning_rate": 2.3725247105947463e-05, + "loss": 1.0998, + "step": 7697500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3723147140386897e-05, + "loss": 1.0862, + "step": 7698000 + }, + { + "epoch": 4.62, + "learning_rate": 2.372104717482633e-05, + "loss": 1.105, + "step": 7698500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3718947209265767e-05, + "loss": 1.1149, + "step": 7699000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3716851443636324e-05, + "loss": 1.1281, + "step": 7699500 + }, + { + "epoch": 4.62, + "learning_rate": 2.371475147807576e-05, + "loss": 1.1235, + "step": 7700000 + }, + { + "epoch": 4.62, + "eval_loss": 1.0742263793945312, + "eval_runtime": 1107.8327, + "eval_samples_per_second": 475.451, + "eval_steps_per_second": 79.242, + "step": 7700000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3712651512515195e-05, + "loss": 1.107, + "step": 7700500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3710551546954628e-05, + "loss": 1.0942, + "step": 7701000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3708451581394065e-05, + "loss": 1.0996, + "step": 7701500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3706355815764622e-05, + "loss": 1.1009, + "step": 7702000 + }, + { + "epoch": 4.62, + "learning_rate": 2.370425585020406e-05, + "loss": 1.0983, + "step": 7702500 + }, + { + "epoch": 4.62, + "learning_rate": 2.370215588464349e-05, + "loss": 1.1076, + "step": 7703000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3700055919082926e-05, + "loss": 1.0944, + "step": 7703500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3697960153453483e-05, + "loss": 1.109, + "step": 7704000 + }, + { + "epoch": 4.62, + "learning_rate": 2.369586018789292e-05, + "loss": 1.0914, + "step": 7704500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3693760222332356e-05, + "loss": 1.1143, + "step": 7705000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3691660256771786e-05, + "loss": 1.0804, + "step": 7705500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3689564491142343e-05, + "loss": 1.1056, + "step": 7706000 + }, + { + "epoch": 4.62, + "learning_rate": 2.368746452558178e-05, + "loss": 1.0825, + "step": 7706500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3685364560021217e-05, + "loss": 1.0701, + "step": 7707000 + }, + { + "epoch": 4.62, + "learning_rate": 2.368326459446065e-05, + "loss": 1.1058, + "step": 7707500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3681168828831207e-05, + "loss": 1.1037, + "step": 7708000 + }, + { + "epoch": 4.62, + "learning_rate": 2.367906886327064e-05, + "loss": 1.1141, + "step": 7708500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3676973097641198e-05, + "loss": 1.1344, + "step": 7709000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3674873132080635e-05, + "loss": 1.1121, + "step": 7709500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3672773166520068e-05, + "loss": 1.0875, + "step": 7710000 + }, + { + "epoch": 4.62, + "learning_rate": 2.36706732009595e-05, + "loss": 1.1022, + "step": 7710500 + }, + { + "epoch": 4.62, + "learning_rate": 2.366857323539894e-05, + "loss": 1.0852, + "step": 7711000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3666473269838375e-05, + "loss": 1.0864, + "step": 7711500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3664373304277812e-05, + "loss": 1.1243, + "step": 7712000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3662273338717246e-05, + "loss": 1.0857, + "step": 7712500 + }, + { + "epoch": 4.62, + "learning_rate": 2.36601775730878e-05, + "loss": 1.1199, + "step": 7713000 + }, + { + "epoch": 4.62, + "learning_rate": 2.3658077607527236e-05, + "loss": 1.1077, + "step": 7713500 + }, + { + "epoch": 4.62, + "learning_rate": 2.3655977641966673e-05, + "loss": 1.1182, + "step": 7714000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3653877676406106e-05, + "loss": 1.0874, + "step": 7714500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3651781910776663e-05, + "loss": 1.1044, + "step": 7715000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3649681945216097e-05, + "loss": 1.098, + "step": 7715500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3647581979655534e-05, + "loss": 1.0991, + "step": 7716000 + }, + { + "epoch": 4.63, + "learning_rate": 2.364548201409497e-05, + "loss": 1.0922, + "step": 7716500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3643382048534404e-05, + "loss": 1.0879, + "step": 7717000 + }, + { + "epoch": 4.63, + "learning_rate": 2.364128628290496e-05, + "loss": 1.1028, + "step": 7717500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3639186317344394e-05, + "loss": 1.1196, + "step": 7718000 + }, + { + "epoch": 4.63, + "learning_rate": 2.363708635178383e-05, + "loss": 1.0566, + "step": 7718500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3634986386223268e-05, + "loss": 1.0786, + "step": 7719000 + }, + { + "epoch": 4.63, + "learning_rate": 2.36328864206627e-05, + "loss": 1.1049, + "step": 7719500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3630790655033255e-05, + "loss": 1.096, + "step": 7720000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3628690689472692e-05, + "loss": 1.1126, + "step": 7720500 + }, + { + "epoch": 4.63, + "learning_rate": 2.362659072391213e-05, + "loss": 1.09, + "step": 7721000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3624490758351562e-05, + "loss": 1.0929, + "step": 7721500 + }, + { + "epoch": 4.63, + "learning_rate": 2.362239499272212e-05, + "loss": 1.133, + "step": 7722000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3620295027161553e-05, + "loss": 1.1035, + "step": 7722500 + }, + { + "epoch": 4.63, + "learning_rate": 2.361819506160099e-05, + "loss": 1.1105, + "step": 7723000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3616095096040426e-05, + "loss": 1.1105, + "step": 7723500 + }, + { + "epoch": 4.63, + "learning_rate": 2.361399513047986e-05, + "loss": 1.1194, + "step": 7724000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3611899364850417e-05, + "loss": 1.1006, + "step": 7724500 + }, + { + "epoch": 4.63, + "learning_rate": 2.360979939928985e-05, + "loss": 1.0837, + "step": 7725000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3607699433729287e-05, + "loss": 1.1068, + "step": 7725500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3605599468168724e-05, + "loss": 1.1314, + "step": 7726000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3603499502608157e-05, + "loss": 1.0977, + "step": 7726500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3601407936909834e-05, + "loss": 1.1217, + "step": 7727000 + }, + { + "epoch": 4.63, + "learning_rate": 2.359930797134927e-05, + "loss": 1.1109, + "step": 7727500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3597208005788705e-05, + "loss": 1.0711, + "step": 7728000 + }, + { + "epoch": 4.63, + "learning_rate": 2.359510804022814e-05, + "loss": 1.0763, + "step": 7728500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3593008074667575e-05, + "loss": 1.0967, + "step": 7729000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3590912309038132e-05, + "loss": 1.1026, + "step": 7729500 + }, + { + "epoch": 4.63, + "learning_rate": 2.3588812343477565e-05, + "loss": 1.073, + "step": 7730000 + }, + { + "epoch": 4.63, + "learning_rate": 2.3586712377917002e-05, + "loss": 1.1141, + "step": 7730500 + }, + { + "epoch": 4.64, + "learning_rate": 2.358461241235644e-05, + "loss": 1.1, + "step": 7731000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3582512446795873e-05, + "loss": 1.0817, + "step": 7731500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3580412481235306e-05, + "loss": 1.1017, + "step": 7732000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3578312515674743e-05, + "loss": 1.1101, + "step": 7732500 + }, + { + "epoch": 4.64, + "learning_rate": 2.357621255011418e-05, + "loss": 1.0934, + "step": 7733000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3574112584553613e-05, + "loss": 1.0851, + "step": 7733500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3572016818924167e-05, + "loss": 1.1208, + "step": 7734000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3569916853363604e-05, + "loss": 1.1371, + "step": 7734500 + }, + { + "epoch": 4.64, + "learning_rate": 2.356781688780304e-05, + "loss": 1.1125, + "step": 7735000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3565716922242474e-05, + "loss": 1.0814, + "step": 7735500 + }, + { + "epoch": 4.64, + "learning_rate": 2.356362115661303e-05, + "loss": 1.0998, + "step": 7736000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3561521191052464e-05, + "loss": 1.1076, + "step": 7736500 + }, + { + "epoch": 4.64, + "learning_rate": 2.35594212254919e-05, + "loss": 1.1151, + "step": 7737000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3557321259931338e-05, + "loss": 1.104, + "step": 7737500 + }, + { + "epoch": 4.64, + "learning_rate": 2.355522129437077e-05, + "loss": 1.1038, + "step": 7738000 + }, + { + "epoch": 4.64, + "learning_rate": 2.355312132881021e-05, + "loss": 1.1034, + "step": 7738500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3551021363249642e-05, + "loss": 1.1056, + "step": 7739000 + }, + { + "epoch": 4.64, + "learning_rate": 2.35489255976202e-05, + "loss": 1.1084, + "step": 7739500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3546825632059636e-05, + "loss": 1.0766, + "step": 7740000 + }, + { + "epoch": 4.64, + "learning_rate": 2.354472566649907e-05, + "loss": 1.0962, + "step": 7740500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3542625700938506e-05, + "loss": 1.1266, + "step": 7741000 + }, + { + "epoch": 4.64, + "learning_rate": 2.354052993530906e-05, + "loss": 1.0921, + "step": 7741500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3538434169679616e-05, + "loss": 1.1217, + "step": 7742000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3536334204119053e-05, + "loss": 1.0665, + "step": 7742500 + }, + { + "epoch": 4.64, + "learning_rate": 2.353423423855849e-05, + "loss": 1.1176, + "step": 7743000 + }, + { + "epoch": 4.64, + "learning_rate": 2.353213427299792e-05, + "loss": 1.1188, + "step": 7743500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3530034307437357e-05, + "loss": 1.0633, + "step": 7744000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3527934341876794e-05, + "loss": 1.1089, + "step": 7744500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3525834376316227e-05, + "loss": 1.0982, + "step": 7745000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3523734410755664e-05, + "loss": 1.1132, + "step": 7745500 + }, + { + "epoch": 4.64, + "learning_rate": 2.3521638645126218e-05, + "loss": 1.0915, + "step": 7746000 + }, + { + "epoch": 4.64, + "learning_rate": 2.3519542879496775e-05, + "loss": 1.1118, + "step": 7746500 + }, + { + "epoch": 4.64, + "learning_rate": 2.351744291393621e-05, + "loss": 1.0535, + "step": 7747000 + }, + { + "epoch": 4.64, + "learning_rate": 2.351534294837565e-05, + "loss": 1.0695, + "step": 7747500 + }, + { + "epoch": 4.65, + "learning_rate": 2.351324298281508e-05, + "loss": 1.0902, + "step": 7748000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3511143017254515e-05, + "loss": 1.0862, + "step": 7748500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3509043051693952e-05, + "loss": 1.0994, + "step": 7749000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3506943086133386e-05, + "loss": 1.0921, + "step": 7749500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3504843120572823e-05, + "loss": 1.0997, + "step": 7750000 + }, + { + "epoch": 4.65, + "learning_rate": 2.350274315501226e-05, + "loss": 1.1014, + "step": 7750500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3500643189451693e-05, + "loss": 1.1046, + "step": 7751000 + }, + { + "epoch": 4.65, + "learning_rate": 2.349854742382225e-05, + "loss": 1.1123, + "step": 7751500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3496447458261683e-05, + "loss": 1.0987, + "step": 7752000 + }, + { + "epoch": 4.65, + "learning_rate": 2.349434749270112e-05, + "loss": 1.0887, + "step": 7752500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3492247527140557e-05, + "loss": 1.1014, + "step": 7753000 + }, + { + "epoch": 4.65, + "learning_rate": 2.349015176151111e-05, + "loss": 1.089, + "step": 7753500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3488051795950547e-05, + "loss": 1.1011, + "step": 7754000 + }, + { + "epoch": 4.65, + "learning_rate": 2.348595183038998e-05, + "loss": 1.1018, + "step": 7754500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3483851864829418e-05, + "loss": 1.1081, + "step": 7755000 + }, + { + "epoch": 4.65, + "learning_rate": 2.348175189926885e-05, + "loss": 1.096, + "step": 7755500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3479656133639408e-05, + "loss": 1.1115, + "step": 7756000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3477556168078845e-05, + "loss": 1.0748, + "step": 7756500 + }, + { + "epoch": 4.65, + "learning_rate": 2.347545620251828e-05, + "loss": 1.1013, + "step": 7757000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3473356236957715e-05, + "loss": 1.1052, + "step": 7757500 + }, + { + "epoch": 4.65, + "learning_rate": 2.347125627139715e-05, + "loss": 1.1033, + "step": 7758000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3469160505767706e-05, + "loss": 1.1068, + "step": 7758500 + }, + { + "epoch": 4.65, + "learning_rate": 2.346706054020714e-05, + "loss": 1.0854, + "step": 7759000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3464960574646576e-05, + "loss": 1.0882, + "step": 7759500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3462860609086013e-05, + "loss": 1.0838, + "step": 7760000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3460760643525446e-05, + "loss": 1.0753, + "step": 7760500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3458664877896003e-05, + "loss": 1.1202, + "step": 7761000 + }, + { + "epoch": 4.65, + "learning_rate": 2.3456564912335437e-05, + "loss": 1.1069, + "step": 7761500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3454464946774874e-05, + "loss": 1.1012, + "step": 7762000 + }, + { + "epoch": 4.65, + "learning_rate": 2.345236498121431e-05, + "loss": 1.1181, + "step": 7762500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3450273415515984e-05, + "loss": 1.092, + "step": 7763000 + }, + { + "epoch": 4.65, + "learning_rate": 2.344817344995542e-05, + "loss": 1.1371, + "step": 7763500 + }, + { + "epoch": 4.65, + "learning_rate": 2.3446073484394858e-05, + "loss": 1.1011, + "step": 7764000 + }, + { + "epoch": 4.66, + "learning_rate": 2.344397351883429e-05, + "loss": 1.0994, + "step": 7764500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3441873553273725e-05, + "loss": 1.0915, + "step": 7765000 + }, + { + "epoch": 4.66, + "learning_rate": 2.343977358771316e-05, + "loss": 1.128, + "step": 7765500 + }, + { + "epoch": 4.66, + "learning_rate": 2.343767782208372e-05, + "loss": 1.1048, + "step": 7766000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3435577856523155e-05, + "loss": 1.0974, + "step": 7766500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3433477890962585e-05, + "loss": 1.1101, + "step": 7767000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3431377925402022e-05, + "loss": 1.1102, + "step": 7767500 + }, + { + "epoch": 4.66, + "learning_rate": 2.342927795984146e-05, + "loss": 1.1088, + "step": 7768000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3427177994280893e-05, + "loss": 1.0915, + "step": 7768500 + }, + { + "epoch": 4.66, + "learning_rate": 2.342507802872033e-05, + "loss": 1.1053, + "step": 7769000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3422978063159766e-05, + "loss": 1.1144, + "step": 7769500 + }, + { + "epoch": 4.66, + "learning_rate": 2.342088229753032e-05, + "loss": 1.1015, + "step": 7770000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3418782331969757e-05, + "loss": 1.0935, + "step": 7770500 + }, + { + "epoch": 4.66, + "learning_rate": 2.341668236640919e-05, + "loss": 1.0877, + "step": 7771000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3414582400848627e-05, + "loss": 1.1105, + "step": 7771500 + }, + { + "epoch": 4.66, + "learning_rate": 2.341248663521918e-05, + "loss": 1.1163, + "step": 7772000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3410386669658617e-05, + "loss": 1.0767, + "step": 7772500 + }, + { + "epoch": 4.66, + "learning_rate": 2.340828670409805e-05, + "loss": 1.0923, + "step": 7773000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3406186738537488e-05, + "loss": 1.1018, + "step": 7773500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3404086772976925e-05, + "loss": 1.1022, + "step": 7774000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3401991007347478e-05, + "loss": 1.1133, + "step": 7774500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3399891041786915e-05, + "loss": 1.1084, + "step": 7775000 + }, + { + "epoch": 4.66, + "learning_rate": 2.339779107622635e-05, + "loss": 1.0933, + "step": 7775500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3395691110665785e-05, + "loss": 1.0815, + "step": 7776000 + }, + { + "epoch": 4.66, + "learning_rate": 2.339359534503634e-05, + "loss": 1.1086, + "step": 7776500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3391495379475776e-05, + "loss": 1.108, + "step": 7777000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3389395413915213e-05, + "loss": 1.1067, + "step": 7777500 + }, + { + "epoch": 4.66, + "learning_rate": 2.338729964828577e-05, + "loss": 1.1222, + "step": 7778000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3385199682725203e-05, + "loss": 1.1028, + "step": 7778500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3383099717164636e-05, + "loss": 1.0836, + "step": 7779000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3380999751604073e-05, + "loss": 1.08, + "step": 7779500 + }, + { + "epoch": 4.66, + "learning_rate": 2.3378899786043507e-05, + "loss": 1.116, + "step": 7780000 + }, + { + "epoch": 4.66, + "learning_rate": 2.3376799820482944e-05, + "loss": 1.0752, + "step": 7780500 + }, + { + "epoch": 4.67, + "learning_rate": 2.337469985492238e-05, + "loss": 1.112, + "step": 7781000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3372599889361814e-05, + "loss": 1.1124, + "step": 7781500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3370499923801247e-05, + "loss": 1.1059, + "step": 7782000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3368404158171804e-05, + "loss": 1.0877, + "step": 7782500 + }, + { + "epoch": 4.67, + "learning_rate": 2.336630419261124e-05, + "loss": 1.0937, + "step": 7783000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3364204227050678e-05, + "loss": 1.0958, + "step": 7783500 + }, + { + "epoch": 4.67, + "learning_rate": 2.336210426149011e-05, + "loss": 1.1025, + "step": 7784000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3360004295929545e-05, + "loss": 1.0849, + "step": 7784500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3357908530300102e-05, + "loss": 1.1066, + "step": 7785000 + }, + { + "epoch": 4.67, + "learning_rate": 2.335580856473954e-05, + "loss": 1.1063, + "step": 7785500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3353708599178976e-05, + "loss": 1.1081, + "step": 7786000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3351608633618406e-05, + "loss": 1.1038, + "step": 7786500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3349508668057843e-05, + "loss": 1.0991, + "step": 7787000 + }, + { + "epoch": 4.67, + "learning_rate": 2.33474129024284e-05, + "loss": 1.1048, + "step": 7787500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3345312936867836e-05, + "loss": 1.114, + "step": 7788000 + }, + { + "epoch": 4.67, + "learning_rate": 2.334321297130727e-05, + "loss": 1.0909, + "step": 7788500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3341113005746703e-05, + "loss": 1.0969, + "step": 7789000 + }, + { + "epoch": 4.67, + "learning_rate": 2.333901304018614e-05, + "loss": 1.0743, + "step": 7789500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3336917274556697e-05, + "loss": 1.0909, + "step": 7790000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3334817308996134e-05, + "loss": 1.1108, + "step": 7790500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3332717343435567e-05, + "loss": 1.087, + "step": 7791000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3330617377875e-05, + "loss": 1.0918, + "step": 7791500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3328521612245558e-05, + "loss": 1.1109, + "step": 7792000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3326425846616115e-05, + "loss": 1.0906, + "step": 7792500 + }, + { + "epoch": 4.67, + "learning_rate": 2.332432588105555e-05, + "loss": 1.1065, + "step": 7793000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3322225915494985e-05, + "loss": 1.1029, + "step": 7793500 + }, + { + "epoch": 4.67, + "learning_rate": 2.332012594993442e-05, + "loss": 1.1019, + "step": 7794000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3318025984373855e-05, + "loss": 1.1024, + "step": 7794500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3315926018813292e-05, + "loss": 1.0885, + "step": 7795000 + }, + { + "epoch": 4.67, + "learning_rate": 2.331382605325273e-05, + "loss": 1.1206, + "step": 7795500 + }, + { + "epoch": 4.67, + "learning_rate": 2.331172608769216e-05, + "loss": 1.1094, + "step": 7796000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3309626122131596e-05, + "loss": 1.0872, + "step": 7796500 + }, + { + "epoch": 4.67, + "learning_rate": 2.3307526156571033e-05, + "loss": 1.0838, + "step": 7797000 + }, + { + "epoch": 4.67, + "learning_rate": 2.3305426191010466e-05, + "loss": 1.1017, + "step": 7797500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3303326225449903e-05, + "loss": 1.1119, + "step": 7798000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3301230459820457e-05, + "loss": 1.0664, + "step": 7798500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3299134694191014e-05, + "loss": 1.1187, + "step": 7799000 + }, + { + "epoch": 4.68, + "learning_rate": 2.329703472863045e-05, + "loss": 1.1178, + "step": 7799500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3294934763069887e-05, + "loss": 1.1132, + "step": 7800000 + }, + { + "epoch": 4.68, + "eval_loss": 1.069945216178894, + "eval_runtime": 1102.5508, + "eval_samples_per_second": 477.729, + "eval_steps_per_second": 79.622, + "step": 7800000 + }, + { + "epoch": 4.68, + "learning_rate": 2.329283479750932e-05, + "loss": 1.0898, + "step": 7800500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3290739031879874e-05, + "loss": 1.0913, + "step": 7801000 + }, + { + "epoch": 4.68, + "learning_rate": 2.328863906631931e-05, + "loss": 1.0606, + "step": 7801500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3286539100758748e-05, + "loss": 1.0769, + "step": 7802000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3284443335129305e-05, + "loss": 1.0986, + "step": 7802500 + }, + { + "epoch": 4.68, + "learning_rate": 2.328234336956874e-05, + "loss": 1.0797, + "step": 7803000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3280243404008172e-05, + "loss": 1.116, + "step": 7803500 + }, + { + "epoch": 4.68, + "learning_rate": 2.327814343844761e-05, + "loss": 1.0902, + "step": 7804000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3276043472887046e-05, + "loss": 1.087, + "step": 7804500 + }, + { + "epoch": 4.68, + "learning_rate": 2.327394350732648e-05, + "loss": 1.1168, + "step": 7805000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3271843541765913e-05, + "loss": 1.117, + "step": 7805500 + }, + { + "epoch": 4.68, + "learning_rate": 2.326974357620535e-05, + "loss": 1.1115, + "step": 7806000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3267647810575906e-05, + "loss": 1.095, + "step": 7806500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3265547845015343e-05, + "loss": 1.1224, + "step": 7807000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3263447879454777e-05, + "loss": 1.1193, + "step": 7807500 + }, + { + "epoch": 4.68, + "learning_rate": 2.326134791389421e-05, + "loss": 1.1108, + "step": 7808000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3259247948333647e-05, + "loss": 1.1047, + "step": 7808500 + }, + { + "epoch": 4.68, + "learning_rate": 2.325714798277308e-05, + "loss": 1.1293, + "step": 7809000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3255048017212517e-05, + "loss": 1.0908, + "step": 7809500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3252952251583074e-05, + "loss": 1.0694, + "step": 7810000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3250852286022508e-05, + "loss": 1.095, + "step": 7810500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3248752320461945e-05, + "loss": 1.1226, + "step": 7811000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3246652354901378e-05, + "loss": 1.0723, + "step": 7811500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3244556589271935e-05, + "loss": 1.1044, + "step": 7812000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3242456623711372e-05, + "loss": 1.0751, + "step": 7812500 + }, + { + "epoch": 4.68, + "learning_rate": 2.3240356658150805e-05, + "loss": 1.0957, + "step": 7813000 + }, + { + "epoch": 4.68, + "learning_rate": 2.3238256692590242e-05, + "loss": 1.102, + "step": 7813500 + }, + { + "epoch": 4.68, + "learning_rate": 2.32361609269608e-05, + "loss": 1.1081, + "step": 7814000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3234060961400233e-05, + "loss": 1.0671, + "step": 7814500 + }, + { + "epoch": 4.69, + "learning_rate": 2.323196099583967e-05, + "loss": 1.0773, + "step": 7815000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3229865230210223e-05, + "loss": 1.093, + "step": 7815500 + }, + { + "epoch": 4.69, + "learning_rate": 2.322776526464966e-05, + "loss": 1.0839, + "step": 7816000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3225665299089097e-05, + "loss": 1.0982, + "step": 7816500 + }, + { + "epoch": 4.69, + "learning_rate": 2.322356533352853e-05, + "loss": 1.1077, + "step": 7817000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3221465367967964e-05, + "loss": 1.0843, + "step": 7817500 + }, + { + "epoch": 4.69, + "learning_rate": 2.32193654024074e-05, + "loss": 1.112, + "step": 7818000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3217265436846834e-05, + "loss": 1.0835, + "step": 7818500 + }, + { + "epoch": 4.69, + "learning_rate": 2.321516547128627e-05, + "loss": 1.113, + "step": 7819000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3213065505725708e-05, + "loss": 1.1277, + "step": 7819500 + }, + { + "epoch": 4.69, + "learning_rate": 2.321096554016514e-05, + "loss": 1.0933, + "step": 7820000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3208865574604575e-05, + "loss": 1.0944, + "step": 7820500 + }, + { + "epoch": 4.69, + "learning_rate": 2.320676560904401e-05, + "loss": 1.0954, + "step": 7821000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3204665643483445e-05, + "loss": 1.0768, + "step": 7821500 + }, + { + "epoch": 4.69, + "learning_rate": 2.320256567792288e-05, + "loss": 1.0982, + "step": 7822000 + }, + { + "epoch": 4.69, + "learning_rate": 2.320046991229344e-05, + "loss": 1.1071, + "step": 7822500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3198369946732872e-05, + "loss": 1.0855, + "step": 7823000 + }, + { + "epoch": 4.69, + "learning_rate": 2.319626998117231e-05, + "loss": 1.1133, + "step": 7823500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3194170015611742e-05, + "loss": 1.0733, + "step": 7824000 + }, + { + "epoch": 4.69, + "learning_rate": 2.31920742499823e-05, + "loss": 1.0916, + "step": 7824500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3189974284421733e-05, + "loss": 1.1108, + "step": 7825000 + }, + { + "epoch": 4.69, + "learning_rate": 2.318787431886117e-05, + "loss": 1.0844, + "step": 7825500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3185774353300607e-05, + "loss": 1.0787, + "step": 7826000 + }, + { + "epoch": 4.69, + "learning_rate": 2.318367438774004e-05, + "loss": 1.0802, + "step": 7826500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3181578622110597e-05, + "loss": 1.0959, + "step": 7827000 + }, + { + "epoch": 4.69, + "learning_rate": 2.317947865655003e-05, + "loss": 1.1133, + "step": 7827500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3177378690989467e-05, + "loss": 1.1144, + "step": 7828000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3175278725428904e-05, + "loss": 1.0862, + "step": 7828500 + }, + { + "epoch": 4.69, + "learning_rate": 2.317318295979946e-05, + "loss": 1.0918, + "step": 7829000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3171082994238895e-05, + "loss": 1.1053, + "step": 7829500 + }, + { + "epoch": 4.69, + "learning_rate": 2.3168983028678328e-05, + "loss": 1.0743, + "step": 7830000 + }, + { + "epoch": 4.69, + "learning_rate": 2.3166883063117765e-05, + "loss": 1.0979, + "step": 7830500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3164783097557198e-05, + "loss": 1.1216, + "step": 7831000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3162683131996635e-05, + "loss": 1.1136, + "step": 7831500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3160587366367192e-05, + "loss": 1.086, + "step": 7832000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3158487400806626e-05, + "loss": 1.1217, + "step": 7832500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3156387435246062e-05, + "loss": 1.081, + "step": 7833000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3154287469685496e-05, + "loss": 1.0924, + "step": 7833500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3152191704056053e-05, + "loss": 1.1041, + "step": 7834000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3150091738495486e-05, + "loss": 1.1054, + "step": 7834500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3147991772934923e-05, + "loss": 1.1053, + "step": 7835000 + }, + { + "epoch": 4.7, + "learning_rate": 2.314589180737436e-05, + "loss": 1.0919, + "step": 7835500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3143796041744917e-05, + "loss": 1.0865, + "step": 7836000 + }, + { + "epoch": 4.7, + "learning_rate": 2.314169607618435e-05, + "loss": 1.1106, + "step": 7836500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3139596110623784e-05, + "loss": 1.0942, + "step": 7837000 + }, + { + "epoch": 4.7, + "learning_rate": 2.313749614506322e-05, + "loss": 1.109, + "step": 7837500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3135396179502654e-05, + "loss": 1.1093, + "step": 7838000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3133300413873215e-05, + "loss": 1.0909, + "step": 7838500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3131200448312648e-05, + "loss": 1.1015, + "step": 7839000 + }, + { + "epoch": 4.7, + "learning_rate": 2.312910048275208e-05, + "loss": 1.101, + "step": 7839500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3127000517191518e-05, + "loss": 1.1144, + "step": 7840000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3124904751562075e-05, + "loss": 1.1133, + "step": 7840500 + }, + { + "epoch": 4.7, + "learning_rate": 2.312280478600151e-05, + "loss": 1.0728, + "step": 7841000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3120704820440946e-05, + "loss": 1.1133, + "step": 7841500 + }, + { + "epoch": 4.7, + "learning_rate": 2.311860485488038e-05, + "loss": 1.0902, + "step": 7842000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3116509089250936e-05, + "loss": 1.0797, + "step": 7842500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3114409123690373e-05, + "loss": 1.0949, + "step": 7843000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3112309158129806e-05, + "loss": 1.068, + "step": 7843500 + }, + { + "epoch": 4.7, + "learning_rate": 2.311020919256924e-05, + "loss": 1.1045, + "step": 7844000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3108109227008677e-05, + "loss": 1.1103, + "step": 7844500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3106013461379234e-05, + "loss": 1.1023, + "step": 7845000 + }, + { + "epoch": 4.7, + "learning_rate": 2.310391349581867e-05, + "loss": 1.0808, + "step": 7845500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3101813530258104e-05, + "loss": 1.0947, + "step": 7846000 + }, + { + "epoch": 4.7, + "learning_rate": 2.3099713564697537e-05, + "loss": 1.0832, + "step": 7846500 + }, + { + "epoch": 4.7, + "learning_rate": 2.3097617799068094e-05, + "loss": 1.104, + "step": 7847000 + }, + { + "epoch": 4.7, + "learning_rate": 2.309551783350753e-05, + "loss": 1.102, + "step": 7847500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3093417867946965e-05, + "loss": 1.0923, + "step": 7848000 + }, + { + "epoch": 4.71, + "learning_rate": 2.30913179023864e-05, + "loss": 1.0813, + "step": 7848500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3089222136756955e-05, + "loss": 1.0966, + "step": 7849000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3087122171196392e-05, + "loss": 1.0718, + "step": 7849500 + }, + { + "epoch": 4.71, + "learning_rate": 2.308502220563583e-05, + "loss": 1.1009, + "step": 7850000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3082922240075262e-05, + "loss": 1.1032, + "step": 7850500 + }, + { + "epoch": 4.71, + "learning_rate": 2.30808222745147e-05, + "loss": 1.1121, + "step": 7851000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3078722308954132e-05, + "loss": 1.1063, + "step": 7851500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3076622343393566e-05, + "loss": 1.102, + "step": 7852000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3074522377833003e-05, + "loss": 1.0842, + "step": 7852500 + }, + { + "epoch": 4.71, + "learning_rate": 2.307242661220356e-05, + "loss": 1.0787, + "step": 7853000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3070326646642997e-05, + "loss": 1.0838, + "step": 7853500 + }, + { + "epoch": 4.71, + "learning_rate": 2.306822668108243e-05, + "loss": 1.0896, + "step": 7854000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3066126715521863e-05, + "loss": 1.0963, + "step": 7854500 + }, + { + "epoch": 4.71, + "learning_rate": 2.30640267499613e-05, + "loss": 1.1017, + "step": 7855000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3061930984331857e-05, + "loss": 1.0925, + "step": 7855500 + }, + { + "epoch": 4.71, + "learning_rate": 2.305983101877129e-05, + "loss": 1.0872, + "step": 7856000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3057731053210728e-05, + "loss": 1.1192, + "step": 7856500 + }, + { + "epoch": 4.71, + "learning_rate": 2.305563108765016e-05, + "loss": 1.0918, + "step": 7857000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3053535322020718e-05, + "loss": 1.1229, + "step": 7857500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3051435356460155e-05, + "loss": 1.0879, + "step": 7858000 + }, + { + "epoch": 4.71, + "learning_rate": 2.304933539089959e-05, + "loss": 1.0876, + "step": 7858500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3047235425339022e-05, + "loss": 1.0742, + "step": 7859000 + }, + { + "epoch": 4.71, + "learning_rate": 2.304513545977846e-05, + "loss": 1.0897, + "step": 7859500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3043035494217895e-05, + "loss": 1.0635, + "step": 7860000 + }, + { + "epoch": 4.71, + "learning_rate": 2.304093552865733e-05, + "loss": 1.0995, + "step": 7860500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3038835563096766e-05, + "loss": 1.1026, + "step": 7861000 + }, + { + "epoch": 4.71, + "learning_rate": 2.303673979746732e-05, + "loss": 1.0956, + "step": 7861500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3034639831906756e-05, + "loss": 1.076, + "step": 7862000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3032539866346193e-05, + "loss": 1.1067, + "step": 7862500 + }, + { + "epoch": 4.71, + "learning_rate": 2.3030439900785627e-05, + "loss": 1.0939, + "step": 7863000 + }, + { + "epoch": 4.71, + "learning_rate": 2.3028344135156183e-05, + "loss": 1.0732, + "step": 7863500 + }, + { + "epoch": 4.71, + "learning_rate": 2.302624836952674e-05, + "loss": 1.0891, + "step": 7864000 + }, + { + "epoch": 4.72, + "learning_rate": 2.3024148403966174e-05, + "loss": 1.111, + "step": 7864500 + }, + { + "epoch": 4.72, + "learning_rate": 2.302204843840561e-05, + "loss": 1.091, + "step": 7865000 + }, + { + "epoch": 4.72, + "learning_rate": 2.3019948472845044e-05, + "loss": 1.0763, + "step": 7865500 + }, + { + "epoch": 4.72, + "learning_rate": 2.3017848507284478e-05, + "loss": 1.1218, + "step": 7866000 + }, + { + "epoch": 4.72, + "learning_rate": 2.3015748541723915e-05, + "loss": 1.0819, + "step": 7866500 + }, + { + "epoch": 4.72, + "learning_rate": 2.301364857616335e-05, + "loss": 1.0858, + "step": 7867000 + }, + { + "epoch": 4.72, + "learning_rate": 2.3011548610602788e-05, + "loss": 1.1106, + "step": 7867500 + }, + { + "epoch": 4.72, + "learning_rate": 2.3009452844973342e-05, + "loss": 1.1212, + "step": 7868000 + }, + { + "epoch": 4.72, + "learning_rate": 2.3007352879412775e-05, + "loss": 1.0921, + "step": 7868500 + }, + { + "epoch": 4.72, + "learning_rate": 2.3005252913852212e-05, + "loss": 1.1165, + "step": 7869000 + }, + { + "epoch": 4.72, + "learning_rate": 2.300315294829165e-05, + "loss": 1.1072, + "step": 7869500 + }, + { + "epoch": 4.72, + "learning_rate": 2.3001052982731082e-05, + "loss": 1.111, + "step": 7870000 + }, + { + "epoch": 4.72, + "learning_rate": 2.299895301717052e-05, + "loss": 1.0833, + "step": 7870500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2996857251541073e-05, + "loss": 1.0979, + "step": 7871000 + }, + { + "epoch": 4.72, + "learning_rate": 2.299475728598051e-05, + "loss": 1.1083, + "step": 7871500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2992657320419947e-05, + "loss": 1.0819, + "step": 7872000 + }, + { + "epoch": 4.72, + "learning_rate": 2.299055735485938e-05, + "loss": 1.0938, + "step": 7872500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2988461589229934e-05, + "loss": 1.1035, + "step": 7873000 + }, + { + "epoch": 4.72, + "learning_rate": 2.298636162366937e-05, + "loss": 1.0785, + "step": 7873500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2984261658108807e-05, + "loss": 1.1128, + "step": 7874000 + }, + { + "epoch": 4.72, + "learning_rate": 2.2982161692548244e-05, + "loss": 1.1008, + "step": 7874500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2980065926918798e-05, + "loss": 1.0896, + "step": 7875000 + }, + { + "epoch": 4.72, + "learning_rate": 2.297796596135823e-05, + "loss": 1.1069, + "step": 7875500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2975865995797668e-05, + "loss": 1.1111, + "step": 7876000 + }, + { + "epoch": 4.72, + "learning_rate": 2.2973766030237105e-05, + "loss": 1.0973, + "step": 7876500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2971666064676538e-05, + "loss": 1.0976, + "step": 7877000 + }, + { + "epoch": 4.72, + "learning_rate": 2.2969566099115975e-05, + "loss": 1.0722, + "step": 7877500 + }, + { + "epoch": 4.72, + "learning_rate": 2.296747033348653e-05, + "loss": 1.0866, + "step": 7878000 + }, + { + "epoch": 4.72, + "learning_rate": 2.2965370367925966e-05, + "loss": 1.0856, + "step": 7878500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2963270402365402e-05, + "loss": 1.0937, + "step": 7879000 + }, + { + "epoch": 4.72, + "learning_rate": 2.2961170436804836e-05, + "loss": 1.0908, + "step": 7879500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2959070471244273e-05, + "loss": 1.0793, + "step": 7880000 + }, + { + "epoch": 4.72, + "learning_rate": 2.2956974705614826e-05, + "loss": 1.1142, + "step": 7880500 + }, + { + "epoch": 4.72, + "learning_rate": 2.2954874740054263e-05, + "loss": 1.0919, + "step": 7881000 + }, + { + "epoch": 4.73, + "learning_rate": 2.29527747744937e-05, + "loss": 1.1087, + "step": 7881500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2950674808933133e-05, + "loss": 1.0959, + "step": 7882000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2948579043303687e-05, + "loss": 1.0954, + "step": 7882500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2946479077743124e-05, + "loss": 1.0812, + "step": 7883000 + }, + { + "epoch": 4.73, + "learning_rate": 2.294437911218256e-05, + "loss": 1.0753, + "step": 7883500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2942279146621994e-05, + "loss": 1.1152, + "step": 7884000 + }, + { + "epoch": 4.73, + "learning_rate": 2.294017918106143e-05, + "loss": 1.0856, + "step": 7884500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2938083415431985e-05, + "loss": 1.1096, + "step": 7885000 + }, + { + "epoch": 4.73, + "learning_rate": 2.293598344987142e-05, + "loss": 1.0988, + "step": 7885500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2933883484310858e-05, + "loss": 1.0961, + "step": 7886000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2931783518750292e-05, + "loss": 1.0966, + "step": 7886500 + }, + { + "epoch": 4.73, + "learning_rate": 2.292968355318973e-05, + "loss": 1.0901, + "step": 7887000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2927587787560282e-05, + "loss": 1.0854, + "step": 7887500 + }, + { + "epoch": 4.73, + "learning_rate": 2.292548782199972e-05, + "loss": 1.0875, + "step": 7888000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2923387856439156e-05, + "loss": 1.1086, + "step": 7888500 + }, + { + "epoch": 4.73, + "learning_rate": 2.292128789087859e-05, + "loss": 1.1031, + "step": 7889000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2919187925318026e-05, + "loss": 1.101, + "step": 7889500 + }, + { + "epoch": 4.73, + "learning_rate": 2.291708795975746e-05, + "loss": 1.0981, + "step": 7890000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2914987994196893e-05, + "loss": 1.0891, + "step": 7890500 + }, + { + "epoch": 4.73, + "learning_rate": 2.291288802863633e-05, + "loss": 1.1273, + "step": 7891000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2910792263006887e-05, + "loss": 1.1176, + "step": 7891500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2908692297446324e-05, + "loss": 1.0817, + "step": 7892000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2906592331885757e-05, + "loss": 1.0928, + "step": 7892500 + }, + { + "epoch": 4.73, + "learning_rate": 2.290449236632519e-05, + "loss": 1.1001, + "step": 7893000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2902396600695748e-05, + "loss": 1.0843, + "step": 7893500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2900296635135184e-05, + "loss": 1.0936, + "step": 7894000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2898196669574618e-05, + "loss": 1.096, + "step": 7894500 + }, + { + "epoch": 4.73, + "learning_rate": 2.289609670401405e-05, + "loss": 1.1054, + "step": 7895000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2894000938384612e-05, + "loss": 1.0833, + "step": 7895500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2891900972824045e-05, + "loss": 1.0944, + "step": 7896000 + }, + { + "epoch": 4.73, + "learning_rate": 2.2889801007263482e-05, + "loss": 1.1057, + "step": 7896500 + }, + { + "epoch": 4.73, + "learning_rate": 2.2887701041702915e-05, + "loss": 1.1072, + "step": 7897000 + }, + { + "epoch": 4.73, + "learning_rate": 2.288560107614235e-05, + "loss": 1.0852, + "step": 7897500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2883505310512906e-05, + "loss": 1.0901, + "step": 7898000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2881405344952343e-05, + "loss": 1.1064, + "step": 7898500 + }, + { + "epoch": 4.74, + "learning_rate": 2.287930537939178e-05, + "loss": 1.1182, + "step": 7899000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2877205413831213e-05, + "loss": 1.0951, + "step": 7899500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2875105448270647e-05, + "loss": 1.0874, + "step": 7900000 + }, + { + "epoch": 4.74, + "eval_loss": 1.0676285028457642, + "eval_runtime": 1100.8847, + "eval_samples_per_second": 478.452, + "eval_steps_per_second": 79.742, + "step": 7900000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2873013882572327e-05, + "loss": 1.1437, + "step": 7900500 + }, + { + "epoch": 4.74, + "learning_rate": 2.287091391701176e-05, + "loss": 1.1005, + "step": 7901000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2868813951451194e-05, + "loss": 1.0894, + "step": 7901500 + }, + { + "epoch": 4.74, + "learning_rate": 2.286671398589063e-05, + "loss": 1.123, + "step": 7902000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2864614020330068e-05, + "loss": 1.0945, + "step": 7902500 + }, + { + "epoch": 4.74, + "learning_rate": 2.28625140547695e-05, + "loss": 1.111, + "step": 7903000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2860414089208938e-05, + "loss": 1.1178, + "step": 7903500 + }, + { + "epoch": 4.74, + "learning_rate": 2.285831412364837e-05, + "loss": 1.0933, + "step": 7904000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2856214158087805e-05, + "loss": 1.0671, + "step": 7904500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2854118392458362e-05, + "loss": 1.0718, + "step": 7905000 + }, + { + "epoch": 4.74, + "learning_rate": 2.28520184268978e-05, + "loss": 1.1045, + "step": 7905500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2849918461337235e-05, + "loss": 1.106, + "step": 7906000 + }, + { + "epoch": 4.74, + "learning_rate": 2.284781849577667e-05, + "loss": 1.1085, + "step": 7906500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2845722730147226e-05, + "loss": 1.1229, + "step": 7907000 + }, + { + "epoch": 4.74, + "learning_rate": 2.284362276458666e-05, + "loss": 1.1038, + "step": 7907500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2841522799026096e-05, + "loss": 1.0665, + "step": 7908000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2839422833465533e-05, + "loss": 1.0862, + "step": 7908500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2837327067836087e-05, + "loss": 1.0953, + "step": 7909000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2835227102275523e-05, + "loss": 1.0891, + "step": 7909500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2833127136714957e-05, + "loss": 1.0798, + "step": 7910000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2831027171154394e-05, + "loss": 1.0823, + "step": 7910500 + }, + { + "epoch": 4.74, + "learning_rate": 2.282892720559383e-05, + "loss": 1.094, + "step": 7911000 + }, + { + "epoch": 4.74, + "learning_rate": 2.282682724003326e-05, + "loss": 1.0886, + "step": 7911500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2824727274472698e-05, + "loss": 1.0869, + "step": 7912000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2822631508843255e-05, + "loss": 1.0965, + "step": 7912500 + }, + { + "epoch": 4.74, + "learning_rate": 2.282053154328269e-05, + "loss": 1.1041, + "step": 7913000 + }, + { + "epoch": 4.74, + "learning_rate": 2.2818431577722125e-05, + "loss": 1.1056, + "step": 7913500 + }, + { + "epoch": 4.74, + "learning_rate": 2.2816331612161558e-05, + "loss": 1.108, + "step": 7914000 + }, + { + "epoch": 4.75, + "learning_rate": 2.281424004646324e-05, + "loss": 1.0883, + "step": 7914500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2812140080902672e-05, + "loss": 1.0984, + "step": 7915000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2810040115342106e-05, + "loss": 1.0865, + "step": 7915500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2807940149781543e-05, + "loss": 1.1085, + "step": 7916000 + }, + { + "epoch": 4.75, + "learning_rate": 2.280584018422098e-05, + "loss": 1.1246, + "step": 7916500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2803740218660413e-05, + "loss": 1.046, + "step": 7917000 + }, + { + "epoch": 4.75, + "learning_rate": 2.280164025309985e-05, + "loss": 1.0921, + "step": 7917500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2799540287539287e-05, + "loss": 1.0978, + "step": 7918000 + }, + { + "epoch": 4.75, + "learning_rate": 2.279744452190984e-05, + "loss": 1.1057, + "step": 7918500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2795344556349274e-05, + "loss": 1.1132, + "step": 7919000 + }, + { + "epoch": 4.75, + "learning_rate": 2.279324459078871e-05, + "loss": 1.1156, + "step": 7919500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2791144625228147e-05, + "loss": 1.077, + "step": 7920000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2789044659667584e-05, + "loss": 1.0697, + "step": 7920500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2786948894038138e-05, + "loss": 1.0933, + "step": 7921000 + }, + { + "epoch": 4.75, + "learning_rate": 2.278484892847757e-05, + "loss": 1.1319, + "step": 7921500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2782748962917008e-05, + "loss": 1.1139, + "step": 7922000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2780648997356445e-05, + "loss": 1.1121, + "step": 7922500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2778553231727e-05, + "loss": 1.1149, + "step": 7923000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2776453266166435e-05, + "loss": 1.0856, + "step": 7923500 + }, + { + "epoch": 4.75, + "learning_rate": 2.277435330060587e-05, + "loss": 1.0778, + "step": 7924000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2772253335045306e-05, + "loss": 1.0712, + "step": 7924500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2770157569415863e-05, + "loss": 1.0722, + "step": 7925000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2768057603855296e-05, + "loss": 1.069, + "step": 7925500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2765957638294733e-05, + "loss": 1.101, + "step": 7926000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2763857672734166e-05, + "loss": 1.1014, + "step": 7926500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2761761907104723e-05, + "loss": 1.1294, + "step": 7927000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2759661941544157e-05, + "loss": 1.075, + "step": 7927500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2757561975983594e-05, + "loss": 1.0867, + "step": 7928000 + }, + { + "epoch": 4.75, + "learning_rate": 2.2755462010423027e-05, + "loss": 1.0678, + "step": 7928500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2753362044862464e-05, + "loss": 1.0928, + "step": 7929000 + }, + { + "epoch": 4.75, + "learning_rate": 2.275126627923302e-05, + "loss": 1.1144, + "step": 7929500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2749166313672454e-05, + "loss": 1.0863, + "step": 7930000 + }, + { + "epoch": 4.75, + "learning_rate": 2.274706634811189e-05, + "loss": 1.1062, + "step": 7930500 + }, + { + "epoch": 4.75, + "learning_rate": 2.2744966382551325e-05, + "loss": 1.0822, + "step": 7931000 + }, + { + "epoch": 4.76, + "learning_rate": 2.274287061692188e-05, + "loss": 1.0906, + "step": 7931500 + }, + { + "epoch": 4.76, + "learning_rate": 2.274077065136132e-05, + "loss": 1.0809, + "step": 7932000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2738670685800752e-05, + "loss": 1.1033, + "step": 7932500 + }, + { + "epoch": 4.76, + "learning_rate": 2.273657072024019e-05, + "loss": 1.124, + "step": 7933000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2734470754679622e-05, + "loss": 1.0919, + "step": 7933500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2732379188981303e-05, + "loss": 1.0688, + "step": 7934000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2730279223420733e-05, + "loss": 1.0913, + "step": 7934500 + }, + { + "epoch": 4.76, + "learning_rate": 2.272817925786017e-05, + "loss": 1.0848, + "step": 7935000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2726079292299606e-05, + "loss": 1.1064, + "step": 7935500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2723979326739043e-05, + "loss": 1.0878, + "step": 7936000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2721879361178477e-05, + "loss": 1.0949, + "step": 7936500 + }, + { + "epoch": 4.76, + "learning_rate": 2.271977939561791e-05, + "loss": 1.0975, + "step": 7937000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2717679430057347e-05, + "loss": 1.1134, + "step": 7937500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2715583664427904e-05, + "loss": 1.0759, + "step": 7938000 + }, + { + "epoch": 4.76, + "learning_rate": 2.271348789879846e-05, + "loss": 1.1072, + "step": 7938500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2711387933237894e-05, + "loss": 1.0911, + "step": 7939000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2709287967677328e-05, + "loss": 1.1049, + "step": 7939500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2707188002116765e-05, + "loss": 1.0849, + "step": 7940000 + }, + { + "epoch": 4.76, + "learning_rate": 2.27050880365562e-05, + "loss": 1.092, + "step": 7940500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2702988070995635e-05, + "loss": 1.1116, + "step": 7941000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2700888105435072e-05, + "loss": 1.1286, + "step": 7941500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2698788139874505e-05, + "loss": 1.0736, + "step": 7942000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2696692374245062e-05, + "loss": 1.0872, + "step": 7942500 + }, + { + "epoch": 4.76, + "learning_rate": 2.26945924086845e-05, + "loss": 1.0819, + "step": 7943000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2692492443123933e-05, + "loss": 1.104, + "step": 7943500 + }, + { + "epoch": 4.76, + "learning_rate": 2.269039247756337e-05, + "loss": 1.0834, + "step": 7944000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2688292512002803e-05, + "loss": 1.1109, + "step": 7944500 + }, + { + "epoch": 4.76, + "learning_rate": 2.2686192546442236e-05, + "loss": 1.0966, + "step": 7945000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2684096780812793e-05, + "loss": 1.0763, + "step": 7945500 + }, + { + "epoch": 4.76, + "learning_rate": 2.268199681525223e-05, + "loss": 1.107, + "step": 7946000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2679896849691664e-05, + "loss": 1.0639, + "step": 7946500 + }, + { + "epoch": 4.76, + "learning_rate": 2.267780108406222e-05, + "loss": 1.0816, + "step": 7947000 + }, + { + "epoch": 4.76, + "learning_rate": 2.2675701118501657e-05, + "loss": 1.0802, + "step": 7947500 + }, + { + "epoch": 4.77, + "learning_rate": 2.267360115294109e-05, + "loss": 1.0719, + "step": 7948000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2671501187380528e-05, + "loss": 1.1044, + "step": 7948500 + }, + { + "epoch": 4.77, + "learning_rate": 2.266940122181996e-05, + "loss": 1.1055, + "step": 7949000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2667301256259395e-05, + "loss": 1.1048, + "step": 7949500 + }, + { + "epoch": 4.77, + "learning_rate": 2.266520129069883e-05, + "loss": 1.1016, + "step": 7950000 + }, + { + "epoch": 4.77, + "learning_rate": 2.266310132513827e-05, + "loss": 1.0991, + "step": 7950500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2661005559508825e-05, + "loss": 1.0981, + "step": 7951000 + }, + { + "epoch": 4.77, + "learning_rate": 2.265890559394826e-05, + "loss": 1.1127, + "step": 7951500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2656805628387692e-05, + "loss": 1.0729, + "step": 7952000 + }, + { + "epoch": 4.77, + "learning_rate": 2.265470566282713e-05, + "loss": 1.084, + "step": 7952500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2652605697266566e-05, + "loss": 1.0756, + "step": 7953000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2650505731706e-05, + "loss": 1.096, + "step": 7953500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2648405766145436e-05, + "loss": 1.0728, + "step": 7954000 + }, + { + "epoch": 4.77, + "learning_rate": 2.264631000051599e-05, + "loss": 1.081, + "step": 7954500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2644210034955427e-05, + "loss": 1.1107, + "step": 7955000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2642110069394863e-05, + "loss": 1.0962, + "step": 7955500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2640010103834297e-05, + "loss": 1.0979, + "step": 7956000 + }, + { + "epoch": 4.77, + "learning_rate": 2.263791013827373e-05, + "loss": 1.0821, + "step": 7956500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2635814372644287e-05, + "loss": 1.0911, + "step": 7957000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2633714407083724e-05, + "loss": 1.1026, + "step": 7957500 + }, + { + "epoch": 4.77, + "learning_rate": 2.263161864145428e-05, + "loss": 1.074, + "step": 7958000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2629518675893715e-05, + "loss": 1.1007, + "step": 7958500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2627418710333148e-05, + "loss": 1.0889, + "step": 7959000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2625318744772585e-05, + "loss": 1.0927, + "step": 7959500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2623218779212022e-05, + "loss": 1.085, + "step": 7960000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2621118813651455e-05, + "loss": 1.0788, + "step": 7960500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2619018848090892e-05, + "loss": 1.0884, + "step": 7961000 + }, + { + "epoch": 4.77, + "learning_rate": 2.2616918882530326e-05, + "loss": 1.103, + "step": 7961500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2614823116900883e-05, + "loss": 1.0903, + "step": 7962000 + }, + { + "epoch": 4.77, + "learning_rate": 2.261272315134032e-05, + "loss": 1.0827, + "step": 7962500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2610623185779753e-05, + "loss": 1.0897, + "step": 7963000 + }, + { + "epoch": 4.77, + "learning_rate": 2.260852322021919e-05, + "loss": 1.1329, + "step": 7963500 + }, + { + "epoch": 4.77, + "learning_rate": 2.2606423254658623e-05, + "loss": 1.0954, + "step": 7964000 + }, + { + "epoch": 4.78, + "learning_rate": 2.260432748902918e-05, + "loss": 1.0985, + "step": 7964500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2602227523468617e-05, + "loss": 1.0713, + "step": 7965000 + }, + { + "epoch": 4.78, + "learning_rate": 2.260012755790805e-05, + "loss": 1.0904, + "step": 7965500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2598027592347484e-05, + "loss": 1.1413, + "step": 7966000 + }, + { + "epoch": 4.78, + "learning_rate": 2.259593602664916e-05, + "loss": 1.103, + "step": 7966500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2593836061088598e-05, + "loss": 1.061, + "step": 7967000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2591736095528035e-05, + "loss": 1.0954, + "step": 7967500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2589636129967468e-05, + "loss": 1.0896, + "step": 7968000 + }, + { + "epoch": 4.78, + "learning_rate": 2.25875361644069e-05, + "loss": 1.1173, + "step": 7968500 + }, + { + "epoch": 4.78, + "learning_rate": 2.258543619884634e-05, + "loss": 1.0622, + "step": 7969000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2583336233285775e-05, + "loss": 1.1156, + "step": 7969500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2581240467656332e-05, + "loss": 1.0745, + "step": 7970000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2579140502095762e-05, + "loss": 1.1007, + "step": 7970500 + }, + { + "epoch": 4.78, + "learning_rate": 2.25770405365352e-05, + "loss": 1.0821, + "step": 7971000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2574940570974636e-05, + "loss": 1.0737, + "step": 7971500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2572840605414073e-05, + "loss": 1.1015, + "step": 7972000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2570740639853506e-05, + "loss": 1.0993, + "step": 7972500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2568640674292943e-05, + "loss": 1.1087, + "step": 7973000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2566540708732377e-05, + "loss": 1.1032, + "step": 7973500 + }, + { + "epoch": 4.78, + "learning_rate": 2.256444074317181e-05, + "loss": 1.0769, + "step": 7974000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2562340777611247e-05, + "loss": 1.0854, + "step": 7974500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2560245011981804e-05, + "loss": 1.065, + "step": 7975000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2558145046421237e-05, + "loss": 1.0904, + "step": 7975500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2556045080860674e-05, + "loss": 1.0972, + "step": 7976000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2553945115300108e-05, + "loss": 1.0865, + "step": 7976500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2551845149739544e-05, + "loss": 1.0852, + "step": 7977000 + }, + { + "epoch": 4.78, + "learning_rate": 2.25497493841101e-05, + "loss": 1.0787, + "step": 7977500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2547649418549535e-05, + "loss": 1.1129, + "step": 7978000 + }, + { + "epoch": 4.78, + "learning_rate": 2.254554945298897e-05, + "loss": 1.0747, + "step": 7978500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2543449487428405e-05, + "loss": 1.0788, + "step": 7979000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2541349521867842e-05, + "loss": 1.1049, + "step": 7979500 + }, + { + "epoch": 4.78, + "learning_rate": 2.25392537562384e-05, + "loss": 1.1188, + "step": 7980000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2537153790677832e-05, + "loss": 1.0949, + "step": 7980500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2535053825117266e-05, + "loss": 1.0846, + "step": 7981000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2532953859556703e-05, + "loss": 1.101, + "step": 7981500 + }, + { + "epoch": 4.79, + "learning_rate": 2.253085389399614e-05, + "loss": 1.1128, + "step": 7982000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2528758128366697e-05, + "loss": 1.1057, + "step": 7982500 + }, + { + "epoch": 4.79, + "learning_rate": 2.252665816280613e-05, + "loss": 1.1035, + "step": 7983000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2524558197245563e-05, + "loss": 1.0907, + "step": 7983500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2522458231685e-05, + "loss": 1.0898, + "step": 7984000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2520362466055557e-05, + "loss": 1.0599, + "step": 7984500 + }, + { + "epoch": 4.79, + "learning_rate": 2.251826250049499e-05, + "loss": 1.0886, + "step": 7985000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2516162534934424e-05, + "loss": 1.1069, + "step": 7985500 + }, + { + "epoch": 4.79, + "learning_rate": 2.251406256937386e-05, + "loss": 1.1, + "step": 7986000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2511962603813298e-05, + "loss": 1.1138, + "step": 7986500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2509866838183855e-05, + "loss": 1.0822, + "step": 7987000 + }, + { + "epoch": 4.79, + "learning_rate": 2.250776687262329e-05, + "loss": 1.0993, + "step": 7987500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2505666907062722e-05, + "loss": 1.089, + "step": 7988000 + }, + { + "epoch": 4.79, + "learning_rate": 2.250356694150216e-05, + "loss": 1.1093, + "step": 7988500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2501466975941595e-05, + "loss": 1.0576, + "step": 7989000 + }, + { + "epoch": 4.79, + "learning_rate": 2.249937541024327e-05, + "loss": 1.1065, + "step": 7989500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2497275444682706e-05, + "loss": 1.0719, + "step": 7990000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2495175479122143e-05, + "loss": 1.0914, + "step": 7990500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2493075513561576e-05, + "loss": 1.0964, + "step": 7991000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2490975548001013e-05, + "loss": 1.0738, + "step": 7991500 + }, + { + "epoch": 4.79, + "learning_rate": 2.248887558244045e-05, + "loss": 1.0782, + "step": 7992000 + }, + { + "epoch": 4.79, + "learning_rate": 2.248677561687988e-05, + "loss": 1.0911, + "step": 7992500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2484675651319317e-05, + "loss": 1.0885, + "step": 7993000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2482575685758754e-05, + "loss": 1.1287, + "step": 7993500 + }, + { + "epoch": 4.79, + "learning_rate": 2.248047992012931e-05, + "loss": 1.087, + "step": 7994000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2478384154499864e-05, + "loss": 1.0979, + "step": 7994500 + }, + { + "epoch": 4.79, + "learning_rate": 2.24762841889393e-05, + "loss": 1.1095, + "step": 7995000 + }, + { + "epoch": 4.79, + "learning_rate": 2.2474184223378735e-05, + "loss": 1.067, + "step": 7995500 + }, + { + "epoch": 4.79, + "learning_rate": 2.247208425781817e-05, + "loss": 1.0771, + "step": 7996000 + }, + { + "epoch": 4.79, + "learning_rate": 2.246998429225761e-05, + "loss": 1.0995, + "step": 7996500 + }, + { + "epoch": 4.79, + "learning_rate": 2.2467888526628162e-05, + "loss": 1.0885, + "step": 7997000 + }, + { + "epoch": 4.79, + "learning_rate": 2.24657885610676e-05, + "loss": 1.091, + "step": 7997500 + }, + { + "epoch": 4.8, + "learning_rate": 2.2463688595507032e-05, + "loss": 1.1111, + "step": 7998000 + }, + { + "epoch": 4.8, + "learning_rate": 2.246158862994647e-05, + "loss": 1.1194, + "step": 7998500 + }, + { + "epoch": 4.8, + "learning_rate": 2.2459488664385906e-05, + "loss": 1.1093, + "step": 7999000 + }, + { + "epoch": 4.8, + "learning_rate": 2.2457388698825336e-05, + "loss": 1.0779, + "step": 7999500 + }, + { + "epoch": 4.8, + "learning_rate": 2.2455288733264773e-05, + "loss": 1.0806, + "step": 8000000 + }, + { + "epoch": 4.8, + "eval_loss": 1.0654122829437256, + "eval_runtime": 1096.7784, + "eval_samples_per_second": 480.243, + "eval_steps_per_second": 80.041, + "step": 8000000 } ], "max_steps": 13343552, "num_train_epochs": 8, - "total_flos": 3.442361330450166e+18, + "total_flos": 4.236358805426442e+18, "trial_name": null, "trial_params": null }