| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.866571018651363, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028694404591104734, |
| "grad_norm": 6.9535064697265625, |
| "learning_rate": 2.9993209101500904e-05, |
| "loss": 0.8011, |
| "num_input_tokens_seen": 26464, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05738880918220947, |
| "grad_norm": 3.7668867111206055, |
| "learning_rate": 2.997284255484393e-05, |
| "loss": 0.1799, |
| "num_input_tokens_seen": 53568, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08608321377331421, |
| "grad_norm": 5.130955219268799, |
| "learning_rate": 2.9938918800982563e-05, |
| "loss": 0.1418, |
| "num_input_tokens_seen": 79840, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11477761836441894, |
| "grad_norm": 5.7834391593933105, |
| "learning_rate": 2.9891468556286034e-05, |
| "loss": 0.1428, |
| "num_input_tokens_seen": 106848, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.14347202295552366, |
| "grad_norm": 3.7895705699920654, |
| "learning_rate": 2.983053478472707e-05, |
| "loss": 0.1297, |
| "num_input_tokens_seen": 133600, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17216642754662842, |
| "grad_norm": 2.2129366397857666, |
| "learning_rate": 2.975617265898004e-05, |
| "loss": 0.1392, |
| "num_input_tokens_seen": 159872, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.20086083213773315, |
| "grad_norm": 5.727760314941406, |
| "learning_rate": 2.9668449510464707e-05, |
| "loss": 0.1418, |
| "num_input_tokens_seen": 186912, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22955523672883787, |
| "grad_norm": 7.682604789733887, |
| "learning_rate": 2.9567444768380745e-05, |
| "loss": 0.1336, |
| "num_input_tokens_seen": 213696, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2582496413199426, |
| "grad_norm": 5.216188430786133, |
| "learning_rate": 2.9453249887788343e-05, |
| "loss": 0.0996, |
| "num_input_tokens_seen": 240736, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.28694404591104733, |
| "grad_norm": 4.260756015777588, |
| "learning_rate": 2.9325968266799934e-05, |
| "loss": 0.1102, |
| "num_input_tokens_seen": 266816, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.31563845050215206, |
| "grad_norm": 1.7236586809158325, |
| "learning_rate": 2.918571515295803e-05, |
| "loss": 0.1443, |
| "num_input_tokens_seen": 293504, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.34433285509325684, |
| "grad_norm": 3.35003399848938, |
| "learning_rate": 2.9032617538884018e-05, |
| "loss": 0.1224, |
| "num_input_tokens_seen": 319872, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.37302725968436157, |
| "grad_norm": 3.347268581390381, |
| "learning_rate": 2.8866814047292232e-05, |
| "loss": 0.1678, |
| "num_input_tokens_seen": 346816, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4017216642754663, |
| "grad_norm": 3.474947929382324, |
| "learning_rate": 2.8688454805473647e-05, |
| "loss": 0.1154, |
| "num_input_tokens_seen": 374304, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.430416068866571, |
| "grad_norm": 6.668752670288086, |
| "learning_rate": 2.84977013093626e-05, |
| "loss": 0.1285, |
| "num_input_tokens_seen": 401568, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.45911047345767575, |
| "grad_norm": 4.752432346343994, |
| "learning_rate": 2.8294726277309815e-05, |
| "loss": 0.1144, |
| "num_input_tokens_seen": 428384, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 3.637117624282837, |
| "learning_rate": 2.8079713493694024e-05, |
| "loss": 0.0875, |
| "num_input_tokens_seen": 454880, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5164992826398852, |
| "grad_norm": 3.157396078109741, |
| "learning_rate": 2.7852857642513838e-05, |
| "loss": 0.151, |
| "num_input_tokens_seen": 481856, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5451936872309899, |
| "grad_norm": 2.444923162460327, |
| "learning_rate": 2.7614364131110498e-05, |
| "loss": 0.1153, |
| "num_input_tokens_seen": 508192, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5738880918220947, |
| "grad_norm": 3.9938931465148926, |
| "learning_rate": 2.7364448904181152e-05, |
| "loss": 0.0971, |
| "num_input_tokens_seen": 534240, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6025824964131994, |
| "grad_norm": 2.390864133834839, |
| "learning_rate": 2.7103338248251055e-05, |
| "loss": 0.1035, |
| "num_input_tokens_seen": 561664, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6312769010043041, |
| "grad_norm": 4.270871639251709, |
| "learning_rate": 2.6831268586781746e-05, |
| "loss": 0.0908, |
| "num_input_tokens_seen": 588128, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6599713055954088, |
| "grad_norm": 2.367370367050171, |
| "learning_rate": 2.6548486266100645e-05, |
| "loss": 0.1216, |
| "num_input_tokens_seen": 614432, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6886657101865137, |
| "grad_norm": 2.0463204383850098, |
| "learning_rate": 2.6255247332346036e-05, |
| "loss": 0.0952, |
| "num_input_tokens_seen": 640832, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7173601147776184, |
| "grad_norm": 3.1408205032348633, |
| "learning_rate": 2.5951817299629266e-05, |
| "loss": 0.1073, |
| "num_input_tokens_seen": 668448, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7460545193687231, |
| "grad_norm": 3.5036535263061523, |
| "learning_rate": 2.5638470909624166e-05, |
| "loss": 0.1048, |
| "num_input_tokens_seen": 695008, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7747489239598279, |
| "grad_norm": 2.891324996948242, |
| "learning_rate": 2.531549188280135e-05, |
| "loss": 0.1015, |
| "num_input_tokens_seen": 722208, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8034433285509326, |
| "grad_norm": 1.6332054138183594, |
| "learning_rate": 2.498317266153262e-05, |
| "loss": 0.0815, |
| "num_input_tokens_seen": 749216, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8321377331420373, |
| "grad_norm": 4.443371295928955, |
| "learning_rate": 2.464181414529809e-05, |
| "loss": 0.0843, |
| "num_input_tokens_seen": 776000, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.860832137733142, |
| "grad_norm": 3.0447335243225098, |
| "learning_rate": 2.4291725418235848e-05, |
| "loss": 0.1321, |
| "num_input_tokens_seen": 802560, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8895265423242468, |
| "grad_norm": 3.728531837463379, |
| "learning_rate": 2.3933223469280704e-05, |
| "loss": 0.0868, |
| "num_input_tokens_seen": 828704, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9182209469153515, |
| "grad_norm": 3.703484296798706, |
| "learning_rate": 2.3566632905145604e-05, |
| "loss": 0.1012, |
| "num_input_tokens_seen": 855616, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9469153515064562, |
| "grad_norm": 3.728098154067993, |
| "learning_rate": 2.3192285656405456e-05, |
| "loss": 0.1139, |
| "num_input_tokens_seen": 882400, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 1.163082480430603, |
| "learning_rate": 2.2810520676949537e-05, |
| "loss": 0.1004, |
| "num_input_tokens_seen": 909760, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0028694404591105, |
| "grad_norm": 3.1779849529266357, |
| "learning_rate": 2.2421683637074648e-05, |
| "loss": 0.1131, |
| "num_input_tokens_seen": 934112, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0315638450502151, |
| "grad_norm": 1.9103726148605347, |
| "learning_rate": 2.2026126610496852e-05, |
| "loss": 0.0896, |
| "num_input_tokens_seen": 960320, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.06025824964132, |
| "grad_norm": 2.4402358531951904, |
| "learning_rate": 2.1624207755565232e-05, |
| "loss": 0.0629, |
| "num_input_tokens_seen": 986688, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0889526542324246, |
| "grad_norm": 3.0046820640563965, |
| "learning_rate": 2.121629099096628e-05, |
| "loss": 0.0658, |
| "num_input_tokens_seen": 1013088, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 2.693220376968384, |
| "learning_rate": 2.0802745666212592e-05, |
| "loss": 0.0777, |
| "num_input_tokens_seen": 1039808, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.146341463414634, |
| "grad_norm": 2.451016664505005, |
| "learning_rate": 2.0383946227214188e-05, |
| "loss": 0.0908, |
| "num_input_tokens_seen": 1066144, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.175035868005739, |
| "grad_norm": 0.8696689605712891, |
| "learning_rate": 1.9960271877235306e-05, |
| "loss": 0.0731, |
| "num_input_tokens_seen": 1092576, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2037302725968435, |
| "grad_norm": 2.7256174087524414, |
| "learning_rate": 1.953210623354359e-05, |
| "loss": 0.0661, |
| "num_input_tokens_seen": 1119104, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.2324246771879483, |
| "grad_norm": 2.751459836959839, |
| "learning_rate": 1.909983698006266e-05, |
| "loss": 0.0391, |
| "num_input_tokens_seen": 1145376, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.2611190817790532, |
| "grad_norm": 3.420569658279419, |
| "learning_rate": 1.8663855516342468e-05, |
| "loss": 0.0594, |
| "num_input_tokens_seen": 1172736, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2898134863701578, |
| "grad_norm": 3.2319304943084717, |
| "learning_rate": 1.8224556603165363e-05, |
| "loss": 0.0492, |
| "num_input_tokens_seen": 1199296, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3185078909612624, |
| "grad_norm": 6.221270561218262, |
| "learning_rate": 1.7782338005108694e-05, |
| "loss": 0.0465, |
| "num_input_tokens_seen": 1226368, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3472022955523673, |
| "grad_norm": 1.9661167860031128, |
| "learning_rate": 1.733760013038765e-05, |
| "loss": 0.1173, |
| "num_input_tokens_seen": 1252352, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.375896700143472, |
| "grad_norm": 2.8439626693725586, |
| "learning_rate": 1.689074566830434e-05, |
| "loss": 0.0721, |
| "num_input_tokens_seen": 1278912, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4045911047345767, |
| "grad_norm": 2.0552496910095215, |
| "learning_rate": 1.6442179224631558e-05, |
| "loss": 0.061, |
| "num_input_tokens_seen": 1306176, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.4332855093256816, |
| "grad_norm": 2.505808115005493, |
| "learning_rate": 1.5992306955261175e-05, |
| "loss": 0.0599, |
| "num_input_tokens_seen": 1332608, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4619799139167862, |
| "grad_norm": 1.2224159240722656, |
| "learning_rate": 1.5541536198449044e-05, |
| "loss": 0.0721, |
| "num_input_tokens_seen": 1359520, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.490674318507891, |
| "grad_norm": 1.0898300409317017, |
| "learning_rate": 1.5090275105989284e-05, |
| "loss": 0.0595, |
| "num_input_tokens_seen": 1386368, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5193687230989958, |
| "grad_norm": 1.6561291217803955, |
| "learning_rate": 1.463893227365195e-05, |
| "loss": 0.0519, |
| "num_input_tokens_seen": 1412992, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5480631276901005, |
| "grad_norm": 3.09395694732666, |
| "learning_rate": 1.4187916371218739e-05, |
| "loss": 0.0752, |
| "num_input_tokens_seen": 1439616, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.576757532281205, |
| "grad_norm": 3.0223190784454346, |
| "learning_rate": 1.3737635772451642e-05, |
| "loss": 0.0895, |
| "num_input_tokens_seen": 1466464, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.60545193687231, |
| "grad_norm": 2.7507283687591553, |
| "learning_rate": 1.328849818532963e-05, |
| "loss": 0.1118, |
| "num_input_tokens_seen": 1493440, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.6341463414634148, |
| "grad_norm": 1.4776769876480103, |
| "learning_rate": 1.2840910282888211e-05, |
| "loss": 0.0917, |
| "num_input_tokens_seen": 1520256, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6628407460545194, |
| "grad_norm": 0.9096710681915283, |
| "learning_rate": 1.2395277334996045e-05, |
| "loss": 0.0797, |
| "num_input_tokens_seen": 1547712, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.691535150645624, |
| "grad_norm": 3.431025266647339, |
| "learning_rate": 1.1952002841402057e-05, |
| "loss": 0.0609, |
| "num_input_tokens_seen": 1574528, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.7202295552367288, |
| "grad_norm": 5.7561726570129395, |
| "learning_rate": 1.1511488166385349e-05, |
| "loss": 0.0602, |
| "num_input_tokens_seen": 1600768, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7489239598278337, |
| "grad_norm": 2.8678972721099854, |
| "learning_rate": 1.107413217533863e-05, |
| "loss": 0.0714, |
| "num_input_tokens_seen": 1627744, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7776183644189383, |
| "grad_norm": 3.1199018955230713, |
| "learning_rate": 1.0640330873614336e-05, |
| "loss": 0.0559, |
| "num_input_tokens_seen": 1654784, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.806312769010043, |
| "grad_norm": 4.480892181396484, |
| "learning_rate": 1.0210477047960303e-05, |
| "loss": 0.1277, |
| "num_input_tokens_seen": 1680928, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.8350071736011477, |
| "grad_norm": 3.975166082382202, |
| "learning_rate": 9.78495991086979e-06, |
| "loss": 0.0817, |
| "num_input_tokens_seen": 1707232, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.8637015781922526, |
| "grad_norm": 1.2731448411941528, |
| "learning_rate": 9.364164748167806e-06, |
| "loss": 0.073, |
| "num_input_tokens_seen": 1733568, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.8923959827833574, |
| "grad_norm": 2.2199535369873047, |
| "learning_rate": 8.948472570152874e-06, |
| "loss": 0.0637, |
| "num_input_tokens_seen": 1760608, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.921090387374462, |
| "grad_norm": 2.6249306201934814, |
| "learning_rate": 8.538259766610019e-06, |
| "loss": 0.0512, |
| "num_input_tokens_seen": 1788064, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.9497847919655666, |
| "grad_norm": 4.65223503112793, |
| "learning_rate": 8.133897766007499e-06, |
| "loss": 0.0608, |
| "num_input_tokens_seen": 1814688, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9784791965566715, |
| "grad_norm": 6.477709770202637, |
| "learning_rate": 7.735752699185711e-06, |
| "loss": 0.0596, |
| "num_input_tokens_seen": 1841056, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.005738880918221, |
| "grad_norm": 1.0149333477020264, |
| "learning_rate": 7.344185067842878e-06, |
| "loss": 0.059, |
| "num_input_tokens_seen": 1865472, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.034433285509326, |
| "grad_norm": 1.824137806892395, |
| "learning_rate": 6.959549418117669e-06, |
| "loss": 0.0411, |
| "num_input_tokens_seen": 1891968, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.0631276901004303, |
| "grad_norm": 1.539084553718567, |
| "learning_rate": 6.582194019564266e-06, |
| "loss": 0.0296, |
| "num_input_tokens_seen": 1918912, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.091822094691535, |
| "grad_norm": 3.0269973278045654, |
| "learning_rate": 6.2124605498106336e-06, |
| "loss": 0.0433, |
| "num_input_tokens_seen": 1945312, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.12051649928264, |
| "grad_norm": 1.6702455282211304, |
| "learning_rate": 5.850683785185409e-06, |
| "loss": 0.0283, |
| "num_input_tokens_seen": 1971520, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.149210903873745, |
| "grad_norm": 2.5078916549682617, |
| "learning_rate": 5.497191297593647e-06, |
| "loss": 0.0419, |
| "num_input_tokens_seen": 1998528, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.177905308464849, |
| "grad_norm": 0.1790330410003662, |
| "learning_rate": 5.1523031579157994e-06, |
| "loss": 0.0601, |
| "num_input_tokens_seen": 2025088, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.206599713055954, |
| "grad_norm": 2.8430678844451904, |
| "learning_rate": 4.816331646198556e-06, |
| "loss": 0.0368, |
| "num_input_tokens_seen": 2052032, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "grad_norm": 2.523798704147339, |
| "learning_rate": 4.4895809688998655e-06, |
| "loss": 0.0474, |
| "num_input_tokens_seen": 2079104, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.2639885222381637, |
| "grad_norm": 4.507711410522461, |
| "learning_rate": 4.172346983444269e-06, |
| "loss": 0.0498, |
| "num_input_tokens_seen": 2106496, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.292682926829268, |
| "grad_norm": 5.649550914764404, |
| "learning_rate": 3.864916930337852e-06, |
| "loss": 0.0389, |
| "num_input_tokens_seen": 2133248, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.321377331420373, |
| "grad_norm": 2.364619255065918, |
| "learning_rate": 3.567569173085455e-06, |
| "loss": 0.0275, |
| "num_input_tokens_seen": 2159840, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.350071736011478, |
| "grad_norm": 2.0380613803863525, |
| "learning_rate": 3.2805729461455307e-06, |
| "loss": 0.0269, |
| "num_input_tokens_seen": 2186528, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.3787661406025826, |
| "grad_norm": 3.2729814052581787, |
| "learning_rate": 3.0041881111509783e-06, |
| "loss": 0.0586, |
| "num_input_tokens_seen": 2213344, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.407460545193687, |
| "grad_norm": 4.258326053619385, |
| "learning_rate": 2.7386649216166233e-06, |
| "loss": 0.046, |
| "num_input_tokens_seen": 2239648, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.436154949784792, |
| "grad_norm": 3.044335126876831, |
| "learning_rate": 2.484243796346367e-06, |
| "loss": 0.0414, |
| "num_input_tokens_seen": 2267008, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.4648493543758967, |
| "grad_norm": 1.9635220766067505, |
| "learning_rate": 2.241155101745242e-06, |
| "loss": 0.0375, |
| "num_input_tokens_seen": 2293664, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.4935437589670015, |
| "grad_norm": 0.2657749652862549, |
| "learning_rate": 2.0096189432334194e-06, |
| "loss": 0.0541, |
| "num_input_tokens_seen": 2320032, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.5222381635581064, |
| "grad_norm": 2.3013086318969727, |
| "learning_rate": 1.7898449659510841e-06, |
| "loss": 0.0324, |
| "num_input_tokens_seen": 2346848, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.5509325681492108, |
| "grad_norm": 3.4938905239105225, |
| "learning_rate": 1.5820321649345582e-06, |
| "loss": 0.0704, |
| "num_input_tokens_seen": 2373408, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.5796269727403156, |
| "grad_norm": 1.1432446241378784, |
| "learning_rate": 1.3863687049356465e-06, |
| "loss": 0.0376, |
| "num_input_tokens_seen": 2400192, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.6083213773314204, |
| "grad_norm": 3.4845967292785645, |
| "learning_rate": 1.2030317500472572e-06, |
| "loss": 0.0377, |
| "num_input_tokens_seen": 2426720, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.637015781922525, |
| "grad_norm": 0.9145282506942749, |
| "learning_rate": 1.0321873032896328e-06, |
| "loss": 0.0459, |
| "num_input_tokens_seen": 2453504, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.6657101865136297, |
| "grad_norm": 6.571470260620117, |
| "learning_rate": 8.7399005630238e-07, |
| "loss": 0.0352, |
| "num_input_tokens_seen": 2480064, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.6944045911047345, |
| "grad_norm": 1.0509833097457886, |
| "learning_rate": 7.285832492784456e-07, |
| "loss": 0.0308, |
| "num_input_tokens_seen": 2507904, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.7230989956958394, |
| "grad_norm": 1.271236777305603, |
| "learning_rate": 5.960985412668457e-07, |
| "loss": 0.0359, |
| "num_input_tokens_seen": 2534624, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.751793400286944, |
| "grad_norm": 4.02854061126709, |
| "learning_rate": 4.766558909615504e-07, |
| "loss": 0.0251, |
| "num_input_tokens_seen": 2560384, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.7804878048780486, |
| "grad_norm": 2.239231824874878, |
| "learning_rate": 3.703634480845175e-07, |
| "loss": 0.0278, |
| "num_input_tokens_seen": 2586976, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.8091822094691534, |
| "grad_norm": 1.5159451961517334, |
| "learning_rate": 2.7731745546118295e-07, |
| "loss": 0.0337, |
| "num_input_tokens_seen": 2613568, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.8378766140602583, |
| "grad_norm": 2.9302163124084473, |
| "learning_rate": 1.9760216187710788e-07, |
| "loss": 0.0418, |
| "num_input_tokens_seen": 2640128, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.866571018651363, |
| "grad_norm": 3.014836311340332, |
| "learning_rate": 1.3128974579462771e-07, |
| "loss": 0.042, |
| "num_input_tokens_seen": 2666880, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1044, |
| "num_input_tokens_seen": 2666880, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1416557001900032e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|