| { |
| "best_global_step": null, |
| "best_metric": 0.4795108139514923, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 30000, |
| "global_step": 65896, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0015175427947068108, |
| "grad_norm": 56.41612243652344, |
| "learning_rate": 1.5022761760242794e-07, |
| "loss": 8.3409, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0030350855894136215, |
| "grad_norm": 18.706037521362305, |
| "learning_rate": 3.019726858877087e-07, |
| "loss": 5.1846, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004552628384120432, |
| "grad_norm": 3.0564115047454834, |
| "learning_rate": 4.537177541729894e-07, |
| "loss": 1.1549, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.006070171178827243, |
| "grad_norm": 1.5621784925460815, |
| "learning_rate": 6.054628224582701e-07, |
| "loss": 0.335, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.007587713973534054, |
| "grad_norm": 1.4899357557296753, |
| "learning_rate": 7.57207890743551e-07, |
| "loss": 0.244, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.009105256768240864, |
| "grad_norm": 2.1066386699676514, |
| "learning_rate": 9.089529590288317e-07, |
| "loss": 0.2106, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.010622799562947675, |
| "grad_norm": 1.2894740104675293, |
| "learning_rate": 1.0606980273141124e-06, |
| "loss": 0.1825, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.012140342357654486, |
| "grad_norm": 2.292886972427368, |
| "learning_rate": 1.212443095599393e-06, |
| "loss": 0.1729, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.013657885152361297, |
| "grad_norm": 0.8042752146720886, |
| "learning_rate": 1.3641881638846738e-06, |
| "loss": 0.1696, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.015175427947068108, |
| "grad_norm": 1.586381435394287, |
| "learning_rate": 1.5159332321699546e-06, |
| "loss": 0.1612, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.016692970741774917, |
| "grad_norm": 2.209632635116577, |
| "learning_rate": 1.6676783004552353e-06, |
| "loss": 0.1693, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.018210513536481728, |
| "grad_norm": 1.8002041578292847, |
| "learning_rate": 1.819423368740516e-06, |
| "loss": 0.1649, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.01972805633118854, |
| "grad_norm": 0.7994608879089355, |
| "learning_rate": 1.971168437025797e-06, |
| "loss": 0.1666, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.02124559912589535, |
| "grad_norm": 0.8511660099029541, |
| "learning_rate": 2.1229135053110773e-06, |
| "loss": 0.1671, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.02276314192060216, |
| "grad_norm": 1.5392524003982544, |
| "learning_rate": 2.274658573596358e-06, |
| "loss": 0.1573, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.024280684715308972, |
| "grad_norm": 1.7585276365280151, |
| "learning_rate": 2.426403641881639e-06, |
| "loss": 0.1589, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.025798227510015783, |
| "grad_norm": 1.419176697731018, |
| "learning_rate": 2.57814871016692e-06, |
| "loss": 0.155, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.027315770304722594, |
| "grad_norm": 1.071205973625183, |
| "learning_rate": 2.729893778452201e-06, |
| "loss": 0.1616, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.028833313099429405, |
| "grad_norm": 1.5084614753723145, |
| "learning_rate": 2.8816388467374813e-06, |
| "loss": 0.1639, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.030350855894136216, |
| "grad_norm": 0.5268077254295349, |
| "learning_rate": 3.0333839150227617e-06, |
| "loss": 0.1531, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03186839868884302, |
| "grad_norm": 1.1704555749893188, |
| "learning_rate": 3.185128983308043e-06, |
| "loss": 0.1637, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.033385941483549834, |
| "grad_norm": 8.939970970153809, |
| "learning_rate": 3.3368740515933235e-06, |
| "loss": 0.1569, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.034903484278256645, |
| "grad_norm": 0.9303669929504395, |
| "learning_rate": 3.488619119878604e-06, |
| "loss": 0.1561, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.036421027072963456, |
| "grad_norm": 1.3250867128372192, |
| "learning_rate": 3.6403641881638852e-06, |
| "loss": 0.157, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.03793856986767027, |
| "grad_norm": 0.671481192111969, |
| "learning_rate": 3.7921092564491657e-06, |
| "loss": 0.1539, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.03945611266237708, |
| "grad_norm": 1.0448344945907593, |
| "learning_rate": 3.9438543247344466e-06, |
| "loss": 0.1513, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.04097365545708389, |
| "grad_norm": 0.7382568717002869, |
| "learning_rate": 4.0955993930197274e-06, |
| "loss": 0.1515, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.0424911982517907, |
| "grad_norm": 0.9559106230735779, |
| "learning_rate": 4.247344461305008e-06, |
| "loss": 0.1579, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.04400874104649751, |
| "grad_norm": 1.2689694166183472, |
| "learning_rate": 4.399089529590288e-06, |
| "loss": 0.1544, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.04552628384120432, |
| "grad_norm": 1.0517489910125732, |
| "learning_rate": 4.55083459787557e-06, |
| "loss": 0.1558, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.04704382663591113, |
| "grad_norm": 0.9564648270606995, |
| "learning_rate": 4.70257966616085e-06, |
| "loss": 0.1593, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.048561369430617944, |
| "grad_norm": 5.6809797286987305, |
| "learning_rate": 4.854324734446131e-06, |
| "loss": 0.1584, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.050078912225324755, |
| "grad_norm": 0.6552232503890991, |
| "learning_rate": 5.006069802731411e-06, |
| "loss": 0.1547, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.051596455020031566, |
| "grad_norm": 1.9638985395431519, |
| "learning_rate": 5.157814871016692e-06, |
| "loss": 0.1518, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.05311399781473838, |
| "grad_norm": 1.733556866645813, |
| "learning_rate": 5.309559939301974e-06, |
| "loss": 0.1608, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.05463154060944519, |
| "grad_norm": 2.305605173110962, |
| "learning_rate": 5.4613050075872545e-06, |
| "loss": 0.1567, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.056149083404152, |
| "grad_norm": 1.0986473560333252, |
| "learning_rate": 5.6130500758725345e-06, |
| "loss": 0.1621, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.05766662619885881, |
| "grad_norm": 1.204899549484253, |
| "learning_rate": 5.764795144157815e-06, |
| "loss": 0.1612, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.05918416899356562, |
| "grad_norm": 2.068692684173584, |
| "learning_rate": 5.916540212443096e-06, |
| "loss": 0.1612, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.06070171178827243, |
| "grad_norm": 1.4351601600646973, |
| "learning_rate": 6.068285280728376e-06, |
| "loss": 0.1601, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.06221925458297924, |
| "grad_norm": 0.6356476545333862, |
| "learning_rate": 6.220030349013657e-06, |
| "loss": 0.1626, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.06373679737768605, |
| "grad_norm": 1.5154367685317993, |
| "learning_rate": 6.371775417298939e-06, |
| "loss": 0.1613, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.06525434017239286, |
| "grad_norm": 3.8945472240448, |
| "learning_rate": 6.52352048558422e-06, |
| "loss": 0.1595, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.06677188296709967, |
| "grad_norm": 3.9455080032348633, |
| "learning_rate": 6.6752655538695e-06, |
| "loss": 0.1573, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.06828942576180648, |
| "grad_norm": 0.725102961063385, |
| "learning_rate": 6.827010622154781e-06, |
| "loss": 0.1589, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.06980696855651329, |
| "grad_norm": 0.5262890458106995, |
| "learning_rate": 6.978755690440061e-06, |
| "loss": 0.1612, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.0713245113512201, |
| "grad_norm": 0.8161805272102356, |
| "learning_rate": 7.130500758725342e-06, |
| "loss": 0.1571, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.07284205414592691, |
| "grad_norm": 0.5373992323875427, |
| "learning_rate": 7.2822458270106225e-06, |
| "loss": 0.1623, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.07435959694063372, |
| "grad_norm": 0.9811096787452698, |
| "learning_rate": 7.433990895295904e-06, |
| "loss": 0.159, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.07587713973534053, |
| "grad_norm": 0.9945287108421326, |
| "learning_rate": 7.585735963581184e-06, |
| "loss": 0.1514, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.07739468253004735, |
| "grad_norm": 0.566162645816803, |
| "learning_rate": 7.737481031866465e-06, |
| "loss": 0.1606, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.07891222532475416, |
| "grad_norm": 0.9396035075187683, |
| "learning_rate": 7.889226100151746e-06, |
| "loss": 0.1553, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.08042976811946097, |
| "grad_norm": 2.3921587467193604, |
| "learning_rate": 8.040971168437027e-06, |
| "loss": 0.1578, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.08194731091416778, |
| "grad_norm": 0.6013012528419495, |
| "learning_rate": 8.192716236722306e-06, |
| "loss": 0.1638, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.08346485370887459, |
| "grad_norm": 0.69361412525177, |
| "learning_rate": 8.344461305007589e-06, |
| "loss": 0.1607, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.0849823965035814, |
| "grad_norm": 10.008782386779785, |
| "learning_rate": 8.49620637329287e-06, |
| "loss": 0.1623, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.08649993929828821, |
| "grad_norm": 7.383462429046631, |
| "learning_rate": 8.64795144157815e-06, |
| "loss": 0.1609, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.08801748209299502, |
| "grad_norm": 0.6986634135246277, |
| "learning_rate": 8.79969650986343e-06, |
| "loss": 0.1631, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.08953502488770183, |
| "grad_norm": 0.8260175585746765, |
| "learning_rate": 8.95144157814871e-06, |
| "loss": 0.1641, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.09105256768240864, |
| "grad_norm": 1.675830364227295, |
| "learning_rate": 9.103186646433991e-06, |
| "loss": 0.1638, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.09257011047711546, |
| "grad_norm": 2.092184543609619, |
| "learning_rate": 9.254931714719272e-06, |
| "loss": 0.169, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.09408765327182227, |
| "grad_norm": 0.577240526676178, |
| "learning_rate": 9.406676783004553e-06, |
| "loss": 0.1621, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.09560519606652908, |
| "grad_norm": 2.742009162902832, |
| "learning_rate": 9.558421851289834e-06, |
| "loss": 0.1615, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.09712273886123589, |
| "grad_norm": 0.755526602268219, |
| "learning_rate": 9.710166919575115e-06, |
| "loss": 0.1657, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.0986402816559427, |
| "grad_norm": 0.8038458824157715, |
| "learning_rate": 9.861911987860396e-06, |
| "loss": 0.1636, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.10015782445064951, |
| "grad_norm": 0.8791661858558655, |
| "learning_rate": 9.99999943176563e-06, |
| "loss": 0.1645, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.10167536724535632, |
| "grad_norm": 0.9931176900863647, |
| "learning_rate": 9.999916652173913e-06, |
| "loss": 0.1727, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.10319291004006313, |
| "grad_norm": 0.6823338866233826, |
| "learning_rate": 9.999693570463897e-06, |
| "loss": 0.1689, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.10471045283476994, |
| "grad_norm": 0.7916462421417236, |
| "learning_rate": 9.999330192895455e-06, |
| "loss": 0.1637, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.10622799562947675, |
| "grad_norm": 0.6410049796104431, |
| "learning_rate": 9.998826529665285e-06, |
| "loss": 0.1731, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.10774553842418357, |
| "grad_norm": 1.0823462009429932, |
| "learning_rate": 9.998182594906624e-06, |
| "loss": 0.1673, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.10926308121889038, |
| "grad_norm": 1.3753924369812012, |
| "learning_rate": 9.997398406688858e-06, |
| "loss": 0.1625, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.11078062401359719, |
| "grad_norm": 2.6931698322296143, |
| "learning_rate": 9.996473987017008e-06, |
| "loss": 0.167, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.112298166808304, |
| "grad_norm": 1.7609068155288696, |
| "learning_rate": 9.995409361831112e-06, |
| "loss": 0.1645, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.11381570960301081, |
| "grad_norm": 0.9486920833587646, |
| "learning_rate": 9.994204561005502e-06, |
| "loss": 0.1663, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.11533325239771762, |
| "grad_norm": 1.1171340942382812, |
| "learning_rate": 9.992859618347963e-06, |
| "loss": 0.165, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.11685079519242443, |
| "grad_norm": 1.054910659790039, |
| "learning_rate": 9.991374571598786e-06, |
| "loss": 0.1645, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.11836833798713124, |
| "grad_norm": 1.4396705627441406, |
| "learning_rate": 9.989749462429707e-06, |
| "loss": 0.1674, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.11988588078183805, |
| "grad_norm": 4.53713321685791, |
| "learning_rate": 9.987984336442738e-06, |
| "loss": 0.1621, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.12140342357654486, |
| "grad_norm": 1.5164715051651, |
| "learning_rate": 9.986079243168885e-06, |
| "loss": 0.1658, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.12292096637125167, |
| "grad_norm": 0.6809989213943481, |
| "learning_rate": 9.984034236066764e-06, |
| "loss": 0.168, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.12443850916595849, |
| "grad_norm": 1.363065481185913, |
| "learning_rate": 9.981849372521101e-06, |
| "loss": 0.1611, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.12595605196066528, |
| "grad_norm": 1.713124394416809, |
| "learning_rate": 9.979524713841111e-06, |
| "loss": 0.1592, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.1274735947553721, |
| "grad_norm": 3.671640157699585, |
| "learning_rate": 9.97706032525879e-06, |
| "loss": 0.1787, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.1289911375500789, |
| "grad_norm": 0.6882109045982361, |
| "learning_rate": 9.97445627592708e-06, |
| "loss": 0.1663, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.13050868034478572, |
| "grad_norm": 0.4362512230873108, |
| "learning_rate": 9.971712638917924e-06, |
| "loss": 0.1629, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.13202622313949253, |
| "grad_norm": 0.7280552983283997, |
| "learning_rate": 9.968829491220221e-06, |
| "loss": 0.1667, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.13354376593419934, |
| "grad_norm": 0.7983876466751099, |
| "learning_rate": 9.965806913737671e-06, |
| "loss": 0.1656, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.13506130872890615, |
| "grad_norm": 0.5041903853416443, |
| "learning_rate": 9.962644991286487e-06, |
| "loss": 0.1669, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.13657885152361296, |
| "grad_norm": 4.93981409072876, |
| "learning_rate": 9.959343812593037e-06, |
| "loss": 0.1672, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.13809639431831977, |
| "grad_norm": 1.671231746673584, |
| "learning_rate": 9.955903470291331e-06, |
| "loss": 0.1737, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.13961393711302658, |
| "grad_norm": 1.0303494930267334, |
| "learning_rate": 9.952324060920446e-06, |
| "loss": 0.173, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.1411314799077334, |
| "grad_norm": 1.154575228691101, |
| "learning_rate": 9.948605684921799e-06, |
| "loss": 0.1704, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.1426490227024402, |
| "grad_norm": 2.287119150161743, |
| "learning_rate": 9.944748446636334e-06, |
| "loss": 0.1644, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.14416656549714701, |
| "grad_norm": 0.7774553298950195, |
| "learning_rate": 9.940752454301597e-06, |
| "loss": 0.1714, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.14568410829185383, |
| "grad_norm": 0.48550257086753845, |
| "learning_rate": 9.936617820048692e-06, |
| "loss": 0.1615, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.14720165108656064, |
| "grad_norm": 0.9614207744598389, |
| "learning_rate": 9.932344659899146e-06, |
| "loss": 0.1674, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.14871919388126745, |
| "grad_norm": 0.6777101755142212, |
| "learning_rate": 9.927933093761638e-06, |
| "loss": 0.1704, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.15023673667597426, |
| "grad_norm": 1.3050462007522583, |
| "learning_rate": 9.923383245428651e-06, |
| "loss": 0.161, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.15175427947068107, |
| "grad_norm": 0.702738344669342, |
| "learning_rate": 9.91869524257298e-06, |
| "loss": 0.1684, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.15327182226538788, |
| "grad_norm": 0.5779732465744019, |
| "learning_rate": 9.91386921674417e-06, |
| "loss": 0.1681, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.1547893650600947, |
| "grad_norm": 1.1301337480545044, |
| "learning_rate": 9.9089053033648e-06, |
| "loss": 0.1715, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.1563069078548015, |
| "grad_norm": 0.9821518063545227, |
| "learning_rate": 9.903803641726713e-06, |
| "loss": 0.1747, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.1578244506495083, |
| "grad_norm": 2.881585121154785, |
| "learning_rate": 9.898564374987075e-06, |
| "loss": 0.163, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.15934199344421512, |
| "grad_norm": 1.2632317543029785, |
| "learning_rate": 9.893187650164384e-06, |
| "loss": 0.1677, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.16085953623892194, |
| "grad_norm": 0.5543649792671204, |
| "learning_rate": 9.887673618134333e-06, |
| "loss": 0.164, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.16237707903362875, |
| "grad_norm": 0.6111209988594055, |
| "learning_rate": 9.882022433625574e-06, |
| "loss": 0.1584, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.16389462182833556, |
| "grad_norm": 1.4198076725006104, |
| "learning_rate": 9.876234255215383e-06, |
| "loss": 0.1699, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.16541216462304237, |
| "grad_norm": 0.710670530796051, |
| "learning_rate": 9.870309245325206e-06, |
| "loss": 0.1638, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.16692970741774918, |
| "grad_norm": 0.851134181022644, |
| "learning_rate": 9.864247570216102e-06, |
| "loss": 0.1709, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.168447250212456, |
| "grad_norm": 2.1078672409057617, |
| "learning_rate": 9.858049399984076e-06, |
| "loss": 0.1621, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.1699647930071628, |
| "grad_norm": 1.062860369682312, |
| "learning_rate": 9.851714908555313e-06, |
| "loss": 0.1675, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.1714823358018696, |
| "grad_norm": 0.6492528319358826, |
| "learning_rate": 9.845244273681287e-06, |
| "loss": 0.1663, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.17299987859657642, |
| "grad_norm": 0.5542171001434326, |
| "learning_rate": 9.838637676933782e-06, |
| "loss": 0.1616, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.17451742139128323, |
| "grad_norm": 1.5355699062347412, |
| "learning_rate": 9.831895303699792e-06, |
| "loss": 0.171, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.17603496418599004, |
| "grad_norm": 0.8221864104270935, |
| "learning_rate": 9.82501734317632e-06, |
| "loss": 0.1671, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.17755250698069686, |
| "grad_norm": 9.875476837158203, |
| "learning_rate": 9.818003988365068e-06, |
| "loss": 0.1668, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.17907004977540367, |
| "grad_norm": 1.923878788948059, |
| "learning_rate": 9.810855436067027e-06, |
| "loss": 0.1743, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.18058759257011048, |
| "grad_norm": 1.0459812879562378, |
| "learning_rate": 9.803571886876943e-06, |
| "loss": 0.1718, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.1821051353648173, |
| "grad_norm": 1.3900260925292969, |
| "learning_rate": 9.7961535451777e-06, |
| "loss": 0.1706, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.1836226781595241, |
| "grad_norm": 0.52808678150177, |
| "learning_rate": 9.788600619134582e-06, |
| "loss": 0.1704, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.1851402209542309, |
| "grad_norm": 3.0196077823638916, |
| "learning_rate": 9.780913320689425e-06, |
| "loss": 0.1723, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.18665776374893772, |
| "grad_norm": 0.9358514547348022, |
| "learning_rate": 9.773091865554673e-06, |
| "loss": 0.1627, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.18817530654364453, |
| "grad_norm": 0.5178400874137878, |
| "learning_rate": 9.765136473207335e-06, |
| "loss": 0.1669, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.18969284933835134, |
| "grad_norm": 0.7377560138702393, |
| "learning_rate": 9.757047366882807e-06, |
| "loss": 0.1691, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.19121039213305815, |
| "grad_norm": 0.4387303292751312, |
| "learning_rate": 9.748824773568626e-06, |
| "loss": 0.1676, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.19272793492776497, |
| "grad_norm": 0.7734019756317139, |
| "learning_rate": 9.740468923998088e-06, |
| "loss": 0.166, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.19424547772247178, |
| "grad_norm": 5.757879257202148, |
| "learning_rate": 9.731980052643782e-06, |
| "loss": 0.1619, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.1957630205171786, |
| "grad_norm": 2.1326065063476562, |
| "learning_rate": 9.723358397711004e-06, |
| "loss": 0.1646, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.1972805633118854, |
| "grad_norm": 2.209416151046753, |
| "learning_rate": 9.71460420113108e-06, |
| "loss": 0.1695, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.1987981061065922, |
| "grad_norm": 2.3190994262695312, |
| "learning_rate": 9.705717708554567e-06, |
| "loss": 0.1668, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.20031564890129902, |
| "grad_norm": 1.0803442001342773, |
| "learning_rate": 9.69669916934437e-06, |
| "loss": 0.1644, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.20183319169600583, |
| "grad_norm": 1.2388124465942383, |
| "learning_rate": 9.687548836568736e-06, |
| "loss": 0.1688, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.20335073449071264, |
| "grad_norm": 3.6386444568634033, |
| "learning_rate": 9.678266966994163e-06, |
| "loss": 0.1616, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.20486827728541945, |
| "grad_norm": 0.5421572327613831, |
| "learning_rate": 9.668853821078184e-06, |
| "loss": 0.1668, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.20638582008012626, |
| "grad_norm": 0.970447838306427, |
| "learning_rate": 9.659309662962061e-06, |
| "loss": 0.168, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.20790336287483308, |
| "grad_norm": 0.7634482383728027, |
| "learning_rate": 9.649634760463383e-06, |
| "loss": 0.1631, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.2094209056695399, |
| "grad_norm": 0.7025083899497986, |
| "learning_rate": 9.639829385068538e-06, |
| "loss": 0.1607, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.2109384484642467, |
| "grad_norm": 0.6824074983596802, |
| "learning_rate": 9.6298938119251e-06, |
| "loss": 0.1611, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.2124559912589535, |
| "grad_norm": 0.5447623133659363, |
| "learning_rate": 9.619828319834105e-06, |
| "loss": 0.173, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.21397353405366032, |
| "grad_norm": 0.7672458291053772, |
| "learning_rate": 9.609633191242239e-06, |
| "loss": 0.1731, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.21549107684836713, |
| "grad_norm": 4.314092636108398, |
| "learning_rate": 9.599308712233895e-06, |
| "loss": 0.1681, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.21700861964307394, |
| "grad_norm": 0.6085025072097778, |
| "learning_rate": 9.588855172523157e-06, |
| "loss": 0.1721, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.21852616243778075, |
| "grad_norm": 2.6311752796173096, |
| "learning_rate": 9.578272865445671e-06, |
| "loss": 0.1637, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.22004370523248756, |
| "grad_norm": 0.9000397324562073, |
| "learning_rate": 9.567562087950403e-06, |
| "loss": 0.1656, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.22156124802719437, |
| "grad_norm": 0.8496785163879395, |
| "learning_rate": 9.55672314059132e-06, |
| "loss": 0.1688, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.22307879082190119, |
| "grad_norm": 1.8966537714004517, |
| "learning_rate": 9.545756327518947e-06, |
| "loss": 0.1721, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.224596333616608, |
| "grad_norm": 0.5740467309951782, |
| "learning_rate": 9.534661956471834e-06, |
| "loss": 0.162, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.2261138764113148, |
| "grad_norm": 0.6003543138504028, |
| "learning_rate": 9.523440338767922e-06, |
| "loss": 0.1747, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.22763141920602162, |
| "grad_norm": 2.5062880516052246, |
| "learning_rate": 9.512091789295807e-06, |
| "loss": 0.1693, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.22914896200072843, |
| "grad_norm": 0.5050596594810486, |
| "learning_rate": 9.500616626505906e-06, |
| "loss": 0.1648, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.23066650479543524, |
| "grad_norm": 0.9980542659759521, |
| "learning_rate": 9.489015172401511e-06, |
| "loss": 0.1665, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.23218404759014205, |
| "grad_norm": 0.49514228105545044, |
| "learning_rate": 9.477287752529772e-06, |
| "loss": 0.1648, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.23370159038484886, |
| "grad_norm": 0.6534783244132996, |
| "learning_rate": 9.46543469597254e-06, |
| "loss": 0.1676, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.23521913317955567, |
| "grad_norm": 1.3305740356445312, |
| "learning_rate": 9.45345633533715e-06, |
| "loss": 0.1747, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.23673667597426248, |
| "grad_norm": 1.0036684274673462, |
| "learning_rate": 9.44135300674708e-06, |
| "loss": 0.1719, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.2382542187689693, |
| "grad_norm": 1.052393913269043, |
| "learning_rate": 9.429125049832518e-06, |
| "loss": 0.1702, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.2397717615636761, |
| "grad_norm": 1.700551986694336, |
| "learning_rate": 9.416772807720835e-06, |
| "loss": 0.1642, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.24128930435838292, |
| "grad_norm": 1.1149799823760986, |
| "learning_rate": 9.404296627026959e-06, |
| "loss": 0.1707, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.24280684715308973, |
| "grad_norm": 0.7962595820426941, |
| "learning_rate": 9.391696857843638e-06, |
| "loss": 0.1688, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.24432438994779654, |
| "grad_norm": 2.124986171722412, |
| "learning_rate": 9.378973853731627e-06, |
| "loss": 0.1584, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.24584193274250335, |
| "grad_norm": 2.773843288421631, |
| "learning_rate": 9.366127971709764e-06, |
| "loss": 0.168, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.24735947553721016, |
| "grad_norm": 0.8750647306442261, |
| "learning_rate": 9.353159572244953e-06, |
| "loss": 0.1677, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.24887701833191697, |
| "grad_norm": 1.1571807861328125, |
| "learning_rate": 9.340069019242038e-06, |
| "loss": 0.1729, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.2503945611266238, |
| "grad_norm": 0.78291255235672, |
| "learning_rate": 9.326856680033609e-06, |
| "loss": 0.1678, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.25191210392133057, |
| "grad_norm": 4.446779727935791, |
| "learning_rate": 9.313522925369678e-06, |
| "loss": 0.1672, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.2534296467160374, |
| "grad_norm": 0.4619388282299042, |
| "learning_rate": 9.300068129407292e-06, |
| "loss": 0.1663, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.2549471895107442, |
| "grad_norm": 0.7868841886520386, |
| "learning_rate": 9.286492669700016e-06, |
| "loss": 0.1681, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.256464732305451, |
| "grad_norm": 0.615048885345459, |
| "learning_rate": 9.272796927187353e-06, |
| "loss": 0.1686, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.2579822751001578, |
| "grad_norm": 0.76714688539505, |
| "learning_rate": 9.258981286184046e-06, |
| "loss": 0.1646, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.25949981789486465, |
| "grad_norm": 1.5852404832839966, |
| "learning_rate": 9.245046134369295e-06, |
| "loss": 0.1663, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.26101736068957143, |
| "grad_norm": 0.47872257232666016, |
| "learning_rate": 9.230991862775884e-06, |
| "loss": 0.1667, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.26253490348427827, |
| "grad_norm": 0.9261009097099304, |
| "learning_rate": 9.216818865779203e-06, |
| "loss": 0.1687, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.26405244627898505, |
| "grad_norm": 1.3889875411987305, |
| "learning_rate": 9.20252754108618e-06, |
| "loss": 0.1663, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.2655699890736919, |
| "grad_norm": 1.199637532234192, |
| "learning_rate": 9.188118289724127e-06, |
| "loss": 0.1561, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.2670875318683987, |
| "grad_norm": 2.0619025230407715, |
| "learning_rate": 9.17359151602948e-06, |
| "loss": 0.1658, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.2686050746631055, |
| "grad_norm": 0.5356110334396362, |
| "learning_rate": 9.158947627636462e-06, |
| "loss": 0.1579, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.2701226174578123, |
| "grad_norm": 0.7900151014328003, |
| "learning_rate": 9.144187035465631e-06, |
| "loss": 0.1696, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.27164016025251914, |
| "grad_norm": 0.6425641179084778, |
| "learning_rate": 9.129310153712365e-06, |
| "loss": 0.1702, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.2731577030472259, |
| "grad_norm": 0.46129781007766724, |
| "learning_rate": 9.114317399835225e-06, |
| "loss": 0.1662, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.27467524584193276, |
| "grad_norm": 0.8664289116859436, |
| "learning_rate": 9.099209194544248e-06, |
| "loss": 0.1646, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.27619278863663954, |
| "grad_norm": 2.408888339996338, |
| "learning_rate": 9.083985961789148e-06, |
| "loss": 0.1705, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.2777103314313464, |
| "grad_norm": 0.7840184569358826, |
| "learning_rate": 9.0686481287474e-06, |
| "loss": 0.1671, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.27922787422605316, |
| "grad_norm": 1.1906856298446655, |
| "learning_rate": 9.053196125812276e-06, |
| "loss": 0.1666, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.28074541702076, |
| "grad_norm": 0.7326360940933228, |
| "learning_rate": 9.037630386580752e-06, |
| "loss": 0.1694, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.2822629598154668, |
| "grad_norm": 1.0893489122390747, |
| "learning_rate": 9.021951347841344e-06, |
| "loss": 0.1643, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.2837805026101736, |
| "grad_norm": 0.863768994808197, |
| "learning_rate": 9.006159449561859e-06, |
| "loss": 0.1685, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.2852980454048804, |
| "grad_norm": 0.8099831938743591, |
| "learning_rate": 8.990255134877037e-06, |
| "loss": 0.1674, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.28681558819958725, |
| "grad_norm": 0.7958328723907471, |
| "learning_rate": 8.974238850076128e-06, |
| "loss": 0.1654, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.28833313099429403, |
| "grad_norm": 0.6013241410255432, |
| "learning_rate": 8.95811104459036e-06, |
| "loss": 0.1688, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.28985067378900087, |
| "grad_norm": 0.7762428522109985, |
| "learning_rate": 8.941872170980333e-06, |
| "loss": 0.1652, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.29136821658370765, |
| "grad_norm": 0.7196159958839417, |
| "learning_rate": 8.925522684923311e-06, |
| "loss": 0.1716, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.2928857593784145, |
| "grad_norm": 0.737194836139679, |
| "learning_rate": 8.909063045200454e-06, |
| "loss": 0.1534, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.2944033021731213, |
| "grad_norm": 0.6643932461738586, |
| "learning_rate": 8.892493713683918e-06, |
| "loss": 0.1689, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.2959208449678281, |
| "grad_norm": 0.5714944005012512, |
| "learning_rate": 8.875815155323923e-06, |
| "loss": 0.1698, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.2974383877625349, |
| "grad_norm": 0.9755032658576965, |
| "learning_rate": 8.85902783813568e-06, |
| "loss": 0.1688, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.29895593055724173, |
| "grad_norm": 0.7520804405212402, |
| "learning_rate": 8.842132233186272e-06, |
| "loss": 0.1678, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.3004734733519485, |
| "grad_norm": 1.14603853225708, |
| "learning_rate": 8.825128814581439e-06, |
| "loss": 0.1705, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.30199101614665536, |
| "grad_norm": 1.068724274635315, |
| "learning_rate": 8.808018059452264e-06, |
| "loss": 0.1694, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.30350855894136214, |
| "grad_norm": 1.168589472770691, |
| "learning_rate": 8.790800447941786e-06, |
| "loss": 0.1672, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.305026101736069, |
| "grad_norm": 0.6969729065895081, |
| "learning_rate": 8.773476463191533e-06, |
| "loss": 0.1626, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.30654364453077576, |
| "grad_norm": 0.7738513946533203, |
| "learning_rate": 8.756046591327963e-06, |
| "loss": 0.1665, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.3080611873254826, |
| "grad_norm": 0.5603029131889343, |
| "learning_rate": 8.738511321448815e-06, |
| "loss": 0.1724, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.3095787301201894, |
| "grad_norm": 0.8661625981330872, |
| "learning_rate": 8.720871145609394e-06, |
| "loss": 0.1675, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.3110962729148962, |
| "grad_norm": 1.929918885231018, |
| "learning_rate": 8.70312655880876e-06, |
| "loss": 0.163, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.312613815709603, |
| "grad_norm": 0.903670608997345, |
| "learning_rate": 8.685278058975832e-06, |
| "loss": 0.1675, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.31413135850430984, |
| "grad_norm": 0.876111626625061, |
| "learning_rate": 8.667326146955431e-06, |
| "loss": 0.1722, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.3156489012990166, |
| "grad_norm": 0.5705169439315796, |
| "learning_rate": 8.649271326494209e-06, |
| "loss": 0.1605, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.31716644409372347, |
| "grad_norm": 0.7466210722923279, |
| "learning_rate": 8.631114104226523e-06, |
| "loss": 0.165, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.31868398688843025, |
| "grad_norm": 0.6578019261360168, |
| "learning_rate": 8.612854989660215e-06, |
| "loss": 0.1665, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.3202015296831371, |
| "grad_norm": 0.5048807263374329, |
| "learning_rate": 8.594494495162317e-06, |
| "loss": 0.1638, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.32171907247784387, |
| "grad_norm": 0.7712035179138184, |
| "learning_rate": 8.576033135944674e-06, |
| "loss": 0.1671, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.3232366152725507, |
| "grad_norm": 0.49530839920043945, |
| "learning_rate": 8.557471430049476e-06, |
| "loss": 0.1648, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.3247541580672575, |
| "grad_norm": 1.7516402006149292, |
| "learning_rate": 8.538809898334743e-06, |
| "loss": 0.1682, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.32627170086196433, |
| "grad_norm": 0.9786944389343262, |
| "learning_rate": 8.520049064459687e-06, |
| "loss": 0.1674, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.3277892436566711, |
| "grad_norm": 0.8049986362457275, |
| "learning_rate": 8.50118945487003e-06, |
| "loss": 0.1584, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.32930678645137795, |
| "grad_norm": 1.1162434816360474, |
| "learning_rate": 8.482231598783231e-06, |
| "loss": 0.1638, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.33082432924608474, |
| "grad_norm": 1.419999361038208, |
| "learning_rate": 8.463176028173632e-06, |
| "loss": 0.16, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.3323418720407916, |
| "grad_norm": 0.6773690581321716, |
| "learning_rate": 8.444023277757527e-06, |
| "loss": 0.162, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.33385941483549836, |
| "grad_norm": 0.5430874824523926, |
| "learning_rate": 8.424773884978169e-06, |
| "loss": 0.1581, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.3353769576302052, |
| "grad_norm": 1.547601580619812, |
| "learning_rate": 8.405428389990678e-06, |
| "loss": 0.1635, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.336894500424912, |
| "grad_norm": 2.0339860916137695, |
| "learning_rate": 8.385987335646889e-06, |
| "loss": 0.1725, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.3384120432196188, |
| "grad_norm": 1.0022940635681152, |
| "learning_rate": 8.366451267480114e-06, |
| "loss": 0.1634, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.3399295860143256, |
| "grad_norm": 2.0224385261535645, |
| "learning_rate": 8.346820733689845e-06, |
| "loss": 0.1657, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.34144712880903244, |
| "grad_norm": 0.5869084000587463, |
| "learning_rate": 8.327096285126356e-06, |
| "loss": 0.1696, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.3429646716037392, |
| "grad_norm": 0.8519582152366638, |
| "learning_rate": 8.307278475275258e-06, |
| "loss": 0.1627, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.34448221439844606, |
| "grad_norm": 0.4737469255924225, |
| "learning_rate": 8.287367860241961e-06, |
| "loss": 0.1669, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.34599975719315285, |
| "grad_norm": 1.4706870317459106, |
| "learning_rate": 8.267364998736073e-06, |
| "loss": 0.1681, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.3475172999878597, |
| "grad_norm": 0.866769015789032, |
| "learning_rate": 8.247270452055718e-06, |
| "loss": 0.16, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.34903484278256647, |
| "grad_norm": 1.338789939880371, |
| "learning_rate": 8.227084784071786e-06, |
| "loss": 0.1616, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.3505523855772733, |
| "grad_norm": 1.5837043523788452, |
| "learning_rate": 8.206808561212119e-06, |
| "loss": 0.1626, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.3520699283719801, |
| "grad_norm": 2.0691418647766113, |
| "learning_rate": 8.1864423524456e-06, |
| "loss": 0.168, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.3535874711666869, |
| "grad_norm": 0.9734016060829163, |
| "learning_rate": 8.165986729266207e-06, |
| "loss": 0.1643, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.3551050139613937, |
| "grad_norm": 0.6484026312828064, |
| "learning_rate": 8.14544226567696e-06, |
| "loss": 0.1642, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.3566225567561005, |
| "grad_norm": 0.8267654180526733, |
| "learning_rate": 8.124809538173816e-06, |
| "loss": 0.1702, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.35814009955080733, |
| "grad_norm": 1.2854251861572266, |
| "learning_rate": 8.104089125729509e-06, |
| "loss": 0.1714, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.3596576423455141, |
| "grad_norm": 1.147830605506897, |
| "learning_rate": 8.083281609777278e-06, |
| "loss": 0.1622, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.36117518514022096, |
| "grad_norm": 7.396162509918213, |
| "learning_rate": 8.06238757419457e-06, |
| "loss": 0.1746, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.36269272793492774, |
| "grad_norm": 0.7266018390655518, |
| "learning_rate": 8.041407605286647e-06, |
| "loss": 0.1623, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.3642102707296346, |
| "grad_norm": 0.3472922444343567, |
| "learning_rate": 8.020342291770143e-06, |
| "loss": 0.16, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.36572781352434136, |
| "grad_norm": 0.5582528114318848, |
| "learning_rate": 7.99919222475653e-06, |
| "loss": 0.1632, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.3672453563190482, |
| "grad_norm": 1.1052249670028687, |
| "learning_rate": 7.977957997735541e-06, |
| "loss": 0.1628, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.368762899113755, |
| "grad_norm": 0.760474443435669, |
| "learning_rate": 7.956640206558517e-06, |
| "loss": 0.1673, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.3702804419084618, |
| "grad_norm": 0.8195217251777649, |
| "learning_rate": 7.935239449421684e-06, |
| "loss": 0.1665, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.3717979847031686, |
| "grad_norm": 1.5612919330596924, |
| "learning_rate": 7.913756326849359e-06, |
| "loss": 0.1685, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.37331552749787544, |
| "grad_norm": 1.6288279294967651, |
| "learning_rate": 7.892191441677115e-06, |
| "loss": 0.1527, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.3748330702925822, |
| "grad_norm": 0.48679786920547485, |
| "learning_rate": 7.870545399034853e-06, |
| "loss": 0.1608, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.37635061308728907, |
| "grad_norm": 0.5854870676994324, |
| "learning_rate": 7.848818806329825e-06, |
| "loss": 0.1638, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.37786815588199585, |
| "grad_norm": 1.408368468284607, |
| "learning_rate": 7.82701227322959e-06, |
| "loss": 0.1616, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.3793856986767027, |
| "grad_norm": 0.739921510219574, |
| "learning_rate": 7.805126411644907e-06, |
| "loss": 0.1608, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.38090324147140947, |
| "grad_norm": 0.7832688093185425, |
| "learning_rate": 7.78316183571256e-06, |
| "loss": 0.1675, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.3824207842661163, |
| "grad_norm": 1.3807283639907837, |
| "learning_rate": 7.761119161778129e-06, |
| "loss": 0.1639, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.3839383270608231, |
| "grad_norm": 3.3557193279266357, |
| "learning_rate": 7.738999008378695e-06, |
| "loss": 0.1696, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.38545586985552993, |
| "grad_norm": 1.0659217834472656, |
| "learning_rate": 7.71680199622548e-06, |
| "loss": 0.1661, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.3869734126502367, |
| "grad_norm": 1.3830986022949219, |
| "learning_rate": 7.694528748186432e-06, |
| "loss": 0.1564, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.38849095544494355, |
| "grad_norm": 0.6899144053459167, |
| "learning_rate": 7.672179889268748e-06, |
| "loss": 0.1693, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.39000849823965034, |
| "grad_norm": 0.9374479055404663, |
| "learning_rate": 7.649756046601327e-06, |
| "loss": 0.1668, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.3915260410343572, |
| "grad_norm": 0.6372075080871582, |
| "learning_rate": 7.627257849417188e-06, |
| "loss": 0.1597, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.39304358382906396, |
| "grad_norm": 0.5880036354064941, |
| "learning_rate": 7.604685929035798e-06, |
| "loss": 0.162, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.3945611266237708, |
| "grad_norm": 0.8676182627677917, |
| "learning_rate": 7.582040918845362e-06, |
| "loss": 0.1676, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.3960786694184776, |
| "grad_norm": 0.9364919066429138, |
| "learning_rate": 7.559323454285055e-06, |
| "loss": 0.1597, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.3975962122131844, |
| "grad_norm": 0.6055238842964172, |
| "learning_rate": 7.53653417282718e-06, |
| "loss": 0.1593, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.3991137550078912, |
| "grad_norm": 1.3126447200775146, |
| "learning_rate": 7.513673713959293e-06, |
| "loss": 0.1678, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.40063129780259804, |
| "grad_norm": 0.8760477900505066, |
| "learning_rate": 7.490742719166248e-06, |
| "loss": 0.1669, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.4021488405973048, |
| "grad_norm": 0.8734946846961975, |
| "learning_rate": 7.467741831912199e-06, |
| "loss": 0.1672, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.40366638339201166, |
| "grad_norm": 0.9851352572441101, |
| "learning_rate": 7.444671697622544e-06, |
| "loss": 0.1692, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.40518392618671845, |
| "grad_norm": 1.0815508365631104, |
| "learning_rate": 7.42153296366582e-06, |
| "loss": 0.1682, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.4067014689814253, |
| "grad_norm": 0.7282068133354187, |
| "learning_rate": 7.398326279335525e-06, |
| "loss": 0.1571, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.40821901177613207, |
| "grad_norm": 1.295764684677124, |
| "learning_rate": 7.37505229583191e-06, |
| "loss": 0.1635, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.4097365545708389, |
| "grad_norm": 0.6792795062065125, |
| "learning_rate": 7.351711666243699e-06, |
| "loss": 0.1622, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.4112540973655457, |
| "grad_norm": 0.6477678418159485, |
| "learning_rate": 7.328305045529764e-06, |
| "loss": 0.1632, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.41277164016025253, |
| "grad_norm": 0.544188380241394, |
| "learning_rate": 7.304833090500749e-06, |
| "loss": 0.1687, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.4142891829549593, |
| "grad_norm": 0.7626290917396545, |
| "learning_rate": 7.281296459800634e-06, |
| "loss": 0.1623, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.41580672574966615, |
| "grad_norm": 0.6300278902053833, |
| "learning_rate": 7.257695813888257e-06, |
| "loss": 0.164, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.41732426854437293, |
| "grad_norm": 0.5807547569274902, |
| "learning_rate": 7.2340318150187825e-06, |
| "loss": 0.155, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.4188418113390798, |
| "grad_norm": 0.816822350025177, |
| "learning_rate": 7.210305127225112e-06, |
| "loss": 0.1626, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.42035935413378656, |
| "grad_norm": 0.9591571092605591, |
| "learning_rate": 7.186516416299255e-06, |
| "loss": 0.1672, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.4218768969284934, |
| "grad_norm": 0.5725838541984558, |
| "learning_rate": 7.162666349773647e-06, |
| "loss": 0.1613, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.4233944397232002, |
| "grad_norm": 0.8374956846237183, |
| "learning_rate": 7.138755596902415e-06, |
| "loss": 0.1686, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.424911982517907, |
| "grad_norm": 0.8108429908752441, |
| "learning_rate": 7.1147848286425995e-06, |
| "loss": 0.1657, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.4264295253126138, |
| "grad_norm": 0.5929946899414062, |
| "learning_rate": 7.090754717635325e-06, |
| "loss": 0.1595, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.42794706810732064, |
| "grad_norm": 0.7109673023223877, |
| "learning_rate": 7.066665938186926e-06, |
| "loss": 0.1606, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.4294646109020274, |
| "grad_norm": 1.7905889749526978, |
| "learning_rate": 7.04251916625003e-06, |
| "loss": 0.1724, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.43098215369673426, |
| "grad_norm": 0.7144661545753479, |
| "learning_rate": 7.018315079404584e-06, |
| "loss": 0.1628, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.43249969649144104, |
| "grad_norm": 3.669461965560913, |
| "learning_rate": 6.994054356838835e-06, |
| "loss": 0.1596, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.4340172392861479, |
| "grad_norm": 0.5240535140037537, |
| "learning_rate": 6.969737679330291e-06, |
| "loss": 0.1599, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.43553478208085467, |
| "grad_norm": 0.728648841381073, |
| "learning_rate": 6.945365729226594e-06, |
| "loss": 0.1659, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.4370523248755615, |
| "grad_norm": 1.0085320472717285, |
| "learning_rate": 6.920939190426392e-06, |
| "loss": 0.1618, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.4385698676702683, |
| "grad_norm": 0.7288528084754944, |
| "learning_rate": 6.89645874836014e-06, |
| "loss": 0.1629, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.4400874104649751, |
| "grad_norm": 0.5646519660949707, |
| "learning_rate": 6.871925089970861e-06, |
| "loss": 0.1641, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.4416049532596819, |
| "grad_norm": 3.171855926513672, |
| "learning_rate": 6.847338903694882e-06, |
| "loss": 0.1657, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.44312249605438875, |
| "grad_norm": 0.8458850979804993, |
| "learning_rate": 6.8227008794425055e-06, |
| "loss": 0.1638, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.44464003884909553, |
| "grad_norm": 0.879671037197113, |
| "learning_rate": 6.798011708578655e-06, |
| "loss": 0.1587, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.44615758164380237, |
| "grad_norm": 2.345825433731079, |
| "learning_rate": 6.773272083903475e-06, |
| "loss": 0.1654, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.44767512443850915, |
| "grad_norm": 0.6839026212692261, |
| "learning_rate": 6.748482699632884e-06, |
| "loss": 0.1659, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.449192667233216, |
| "grad_norm": 0.6057868003845215, |
| "learning_rate": 6.723644251379106e-06, |
| "loss": 0.1658, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.4507102100279228, |
| "grad_norm": 0.9070055484771729, |
| "learning_rate": 6.698757436131138e-06, |
| "loss": 0.1594, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.4522277528226296, |
| "grad_norm": 0.7737216353416443, |
| "learning_rate": 6.673822952235201e-06, |
| "loss": 0.1661, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.4537452956173364, |
| "grad_norm": 0.5271407961845398, |
| "learning_rate": 6.648841499375143e-06, |
| "loss": 0.1613, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.45526283841204324, |
| "grad_norm": 0.6933364868164062, |
| "learning_rate": 6.623813778552796e-06, |
| "loss": 0.1657, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.45526283841204324, |
| "eval_loss": 0.5074921250343323, |
| "eval_runtime": 989.6205, |
| "eval_samples_per_second": 50.321, |
| "eval_steps_per_second": 6.29, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.45678038120675, |
| "grad_norm": 0.5691549181938171, |
| "learning_rate": 6.59874049206832e-06, |
| "loss": 0.162, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.45829792400145686, |
| "grad_norm": 1.0941545963287354, |
| "learning_rate": 6.573622343500482e-06, |
| "loss": 0.1696, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.45981546679616364, |
| "grad_norm": 2.0004982948303223, |
| "learning_rate": 6.548460037686925e-06, |
| "loss": 0.1633, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.4613330095908705, |
| "grad_norm": 0.568308413028717, |
| "learning_rate": 6.5232542807043765e-06, |
| "loss": 0.1569, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.46285055238557726, |
| "grad_norm": 1.2990856170654297, |
| "learning_rate": 6.498005779848848e-06, |
| "loss": 0.1583, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.4643680951802841, |
| "grad_norm": 1.7516120672225952, |
| "learning_rate": 6.472715243615781e-06, |
| "loss": 0.1572, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.4658856379749909, |
| "grad_norm": 0.8424046635627747, |
| "learning_rate": 6.4473833816801675e-06, |
| "loss": 0.1599, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.4674031807696977, |
| "grad_norm": 0.7119935750961304, |
| "learning_rate": 6.422010904876634e-06, |
| "loss": 0.1607, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.4689207235644045, |
| "grad_norm": 0.7812116742134094, |
| "learning_rate": 6.396598525179495e-06, |
| "loss": 0.1653, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.47043826635911135, |
| "grad_norm": 1.2674237489700317, |
| "learning_rate": 6.371146955682781e-06, |
| "loss": 0.163, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.47195580915381813, |
| "grad_norm": 0.7816241979598999, |
| "learning_rate": 6.34565691058022e-06, |
| "loss": 0.1575, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.47347335194852497, |
| "grad_norm": 0.6579769849777222, |
| "learning_rate": 6.320129105145198e-06, |
| "loss": 0.1647, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.47499089474323175, |
| "grad_norm": 0.8202933073043823, |
| "learning_rate": 6.294564255710695e-06, |
| "loss": 0.1583, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.4765084375379386, |
| "grad_norm": 0.5198040008544922, |
| "learning_rate": 6.26896307964917e-06, |
| "loss": 0.1598, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.4780259803326454, |
| "grad_norm": 0.5005636215209961, |
| "learning_rate": 6.243326295352451e-06, |
| "loss": 0.1536, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.4795435231273522, |
| "grad_norm": 1.1730496883392334, |
| "learning_rate": 6.217654622211553e-06, |
| "loss": 0.1701, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.481061065922059, |
| "grad_norm": 0.9473150372505188, |
| "learning_rate": 6.191948780596511e-06, |
| "loss": 0.1586, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.48257860871676583, |
| "grad_norm": 1.1389187574386597, |
| "learning_rate": 6.166209491836157e-06, |
| "loss": 0.1567, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.4840961515114726, |
| "grad_norm": 0.5175455808639526, |
| "learning_rate": 6.140437478197876e-06, |
| "loss": 0.1665, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.48561369430617946, |
| "grad_norm": 0.5194241404533386, |
| "learning_rate": 6.114633462867344e-06, |
| "loss": 0.1654, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.48713123710088624, |
| "grad_norm": 0.6536809802055359, |
| "learning_rate": 6.088798169928236e-06, |
| "loss": 0.1591, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.4886487798955931, |
| "grad_norm": 0.6603504419326782, |
| "learning_rate": 6.0629323243419006e-06, |
| "loss": 0.1638, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.49016632269029986, |
| "grad_norm": 1.4609029293060303, |
| "learning_rate": 6.037036651927022e-06, |
| "loss": 0.1639, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.4916838654850067, |
| "grad_norm": 0.5603197813034058, |
| "learning_rate": 6.011111879339252e-06, |
| "loss": 0.1651, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.4932014082797135, |
| "grad_norm": 7.655277252197266, |
| "learning_rate": 5.98515873405082e-06, |
| "loss": 0.1621, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.4947189510744203, |
| "grad_norm": 0.5782756209373474, |
| "learning_rate": 5.959177944330118e-06, |
| "loss": 0.1594, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.4962364938691271, |
| "grad_norm": 1.0311505794525146, |
| "learning_rate": 5.933170239221266e-06, |
| "loss": 0.1586, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.49775403666383394, |
| "grad_norm": 3.3592989444732666, |
| "learning_rate": 5.907136348523651e-06, |
| "loss": 0.1674, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.4992715794585407, |
| "grad_norm": 0.6535949110984802, |
| "learning_rate": 5.8810770027714544e-06, |
| "loss": 0.1548, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.5007891222532476, |
| "grad_norm": 3.045614004135132, |
| "learning_rate": 5.8549929332131494e-06, |
| "loss": 0.1602, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.5023066650479544, |
| "grad_norm": 0.8914518356323242, |
| "learning_rate": 5.828884871790977e-06, |
| "loss": 0.1587, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.5038242078426611, |
| "grad_norm": 1.4585373401641846, |
| "learning_rate": 5.802753551120417e-06, |
| "loss": 0.1643, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.505341750637368, |
| "grad_norm": 1.471450686454773, |
| "learning_rate": 5.77659970446962e-06, |
| "loss": 0.1686, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.5068592934320748, |
| "grad_norm": 2.6323044300079346, |
| "learning_rate": 5.750424065738837e-06, |
| "loss": 0.1596, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.5083768362267816, |
| "grad_norm": 0.884983241558075, |
| "learning_rate": 5.724227369439823e-06, |
| "loss": 0.163, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.5098943790214884, |
| "grad_norm": 0.4667279124259949, |
| "learning_rate": 5.69801035067523e-06, |
| "loss": 0.1597, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.5114119218161952, |
| "grad_norm": 0.9215657114982605, |
| "learning_rate": 5.671773745117977e-06, |
| "loss": 0.1609, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.512929464610902, |
| "grad_norm": 0.6155304908752441, |
| "learning_rate": 5.6455182889906e-06, |
| "loss": 0.1579, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.5144470074056089, |
| "grad_norm": 0.7917832732200623, |
| "learning_rate": 5.619244719044605e-06, |
| "loss": 0.1598, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.5159645502003156, |
| "grad_norm": 0.7764760255813599, |
| "learning_rate": 5.5929537725397845e-06, |
| "loss": 0.1576, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.5174820929950225, |
| "grad_norm": 0.3823162019252777, |
| "learning_rate": 5.566646187223535e-06, |
| "loss": 0.1625, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.5189996357897293, |
| "grad_norm": 0.46521395444869995, |
| "learning_rate": 5.5403227013101515e-06, |
| "loss": 0.1647, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.5205171785844361, |
| "grad_norm": 0.7488669157028198, |
| "learning_rate": 5.513984053460112e-06, |
| "loss": 0.16, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.5220347213791429, |
| "grad_norm": 0.6630042195320129, |
| "learning_rate": 5.4876309827593554e-06, |
| "loss": 0.1632, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.5235522641738497, |
| "grad_norm": 1.024338960647583, |
| "learning_rate": 5.461264228698537e-06, |
| "loss": 0.1542, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.5250698069685565, |
| "grad_norm": 1.1433038711547852, |
| "learning_rate": 5.434884531152281e-06, |
| "loss": 0.1572, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.5265873497632634, |
| "grad_norm": 0.4922288656234741, |
| "learning_rate": 5.408492630358414e-06, |
| "loss": 0.1672, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.5281048925579701, |
| "grad_norm": 1.3577494621276855, |
| "learning_rate": 5.3820892668972005e-06, |
| "loss": 0.1591, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.529622435352677, |
| "grad_norm": 1.1060514450073242, |
| "learning_rate": 5.355675181670554e-06, |
| "loss": 0.164, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.5311399781473838, |
| "grad_norm": 0.9221227765083313, |
| "learning_rate": 5.329251115881253e-06, |
| "loss": 0.1698, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.5326575209420905, |
| "grad_norm": 0.5531550049781799, |
| "learning_rate": 5.3028178110121395e-06, |
| "loss": 0.1629, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.5341750637367974, |
| "grad_norm": 1.188730001449585, |
| "learning_rate": 5.276376008805309e-06, |
| "loss": 0.1589, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.5356926065315042, |
| "grad_norm": 1.3050966262817383, |
| "learning_rate": 5.249926451241305e-06, |
| "loss": 0.1704, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.537210149326211, |
| "grad_norm": 0.5301448106765747, |
| "learning_rate": 5.2234698805182885e-06, |
| "loss": 0.1624, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.5387276921209178, |
| "grad_norm": 2.257366180419922, |
| "learning_rate": 5.1970070390312184e-06, |
| "loss": 0.1575, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.5402452349156246, |
| "grad_norm": 0.47665533423423767, |
| "learning_rate": 5.1705386693510175e-06, |
| "loss": 0.1579, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.5417627777103314, |
| "grad_norm": 0.8027414083480835, |
| "learning_rate": 5.144065514203731e-06, |
| "loss": 0.1555, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.5432803205050383, |
| "grad_norm": 0.7599822878837585, |
| "learning_rate": 5.117588316449694e-06, |
| "loss": 0.1584, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.544797863299745, |
| "grad_norm": 1.1928893327713013, |
| "learning_rate": 5.091107819062676e-06, |
| "loss": 0.1623, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.5463154060944518, |
| "grad_norm": 0.7790924906730652, |
| "learning_rate": 5.06462476510904e-06, |
| "loss": 0.1604, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.5478329488891587, |
| "grad_norm": 0.8471243381500244, |
| "learning_rate": 5.038139897726886e-06, |
| "loss": 0.1632, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.5493504916838655, |
| "grad_norm": 2.127570390701294, |
| "learning_rate": 5.011653960105204e-06, |
| "loss": 0.1567, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.5508680344785722, |
| "grad_norm": 0.45868635177612305, |
| "learning_rate": 4.985167695463012e-06, |
| "loss": 0.1561, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.5523855772732791, |
| "grad_norm": 0.9626381993293762, |
| "learning_rate": 4.958681847028508e-06, |
| "loss": 0.1589, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.5539031200679859, |
| "grad_norm": 0.9326303005218506, |
| "learning_rate": 4.932197158018208e-06, |
| "loss": 0.1606, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.5554206628626928, |
| "grad_norm": 0.5278806090354919, |
| "learning_rate": 4.9057143716160945e-06, |
| "loss": 0.1631, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.5569382056573995, |
| "grad_norm": 0.6460352540016174, |
| "learning_rate": 4.879234230952764e-06, |
| "loss": 0.1603, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.5584557484521063, |
| "grad_norm": 0.8054344058036804, |
| "learning_rate": 4.8527574790845635e-06, |
| "loss": 0.1582, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.5599732912468132, |
| "grad_norm": 1.1287479400634766, |
| "learning_rate": 4.826284858972757e-06, |
| "loss": 0.1584, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.56149083404152, |
| "grad_norm": 0.7059003710746765, |
| "learning_rate": 4.7998171134626595e-06, |
| "loss": 0.1564, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.5630083768362267, |
| "grad_norm": 0.6124716401100159, |
| "learning_rate": 4.7733549852628085e-06, |
| "loss": 0.1607, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.5645259196309336, |
| "grad_norm": 0.9231120944023132, |
| "learning_rate": 4.746899216924106e-06, |
| "loss": 0.1646, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.5660434624256404, |
| "grad_norm": 1.342411994934082, |
| "learning_rate": 4.720450550818996e-06, |
| "loss": 0.1539, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.5675610052203472, |
| "grad_norm": 1.3646472692489624, |
| "learning_rate": 4.694009729120626e-06, |
| "loss": 0.158, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.569078548015054, |
| "grad_norm": 1.1396809816360474, |
| "learning_rate": 4.667577493782025e-06, |
| "loss": 0.1564, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.5705960908097608, |
| "grad_norm": 0.6573958992958069, |
| "learning_rate": 4.641154586515277e-06, |
| "loss": 0.1578, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.5721136336044677, |
| "grad_norm": 0.7357528805732727, |
| "learning_rate": 4.614741748770714e-06, |
| "loss": 0.1597, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.5736311763991745, |
| "grad_norm": 0.7496752142906189, |
| "learning_rate": 4.588339721716109e-06, |
| "loss": 0.154, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.5751487191938812, |
| "grad_norm": 0.8587321639060974, |
| "learning_rate": 4.561949246215875e-06, |
| "loss": 0.1574, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.5766662619885881, |
| "grad_norm": 0.512344241142273, |
| "learning_rate": 4.535571062810281e-06, |
| "loss": 0.1591, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.5781838047832949, |
| "grad_norm": 0.6572905778884888, |
| "learning_rate": 4.509205911694666e-06, |
| "loss": 0.1614, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.5797013475780017, |
| "grad_norm": 0.7880915999412537, |
| "learning_rate": 4.482854532698675e-06, |
| "loss": 0.1626, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.5812188903727085, |
| "grad_norm": 0.6261263489723206, |
| "learning_rate": 4.456517665265491e-06, |
| "loss": 0.1606, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.5827364331674153, |
| "grad_norm": 2.0507938861846924, |
| "learning_rate": 4.430196048431093e-06, |
| "loss": 0.1566, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.5842539759621221, |
| "grad_norm": 1.5023155212402344, |
| "learning_rate": 4.403890420803511e-06, |
| "loss": 0.1613, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.585771518756829, |
| "grad_norm": 0.5439951419830322, |
| "learning_rate": 4.377601520542107e-06, |
| "loss": 0.1555, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.5872890615515357, |
| "grad_norm": 2.4225058555603027, |
| "learning_rate": 4.3513300853368565e-06, |
| "loss": 0.1556, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.5888066043462425, |
| "grad_norm": 1.1448410749435425, |
| "learning_rate": 4.32507685238765e-06, |
| "loss": 0.1544, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.5903241471409494, |
| "grad_norm": 0.8714995384216309, |
| "learning_rate": 4.298842558383609e-06, |
| "loss": 0.1583, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.5918416899356562, |
| "grad_norm": 0.4775989055633545, |
| "learning_rate": 4.272627939482406e-06, |
| "loss": 0.1575, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.593359232730363, |
| "grad_norm": 0.8010126948356628, |
| "learning_rate": 4.2464337312896185e-06, |
| "loss": 0.1588, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.5948767755250698, |
| "grad_norm": 0.7952308654785156, |
| "learning_rate": 4.220260668838076e-06, |
| "loss": 0.1583, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.5963943183197766, |
| "grad_norm": 0.5506075024604797, |
| "learning_rate": 4.194109486567242e-06, |
| "loss": 0.1582, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.5979118611144835, |
| "grad_norm": 0.8976244926452637, |
| "learning_rate": 4.167980918302605e-06, |
| "loss": 0.1564, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.5994294039091902, |
| "grad_norm": 0.7584324479103088, |
| "learning_rate": 4.141875697235081e-06, |
| "loss": 0.1588, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.600946946703897, |
| "grad_norm": 0.5679917335510254, |
| "learning_rate": 4.115794555900443e-06, |
| "loss": 0.1603, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.6024644894986039, |
| "grad_norm": 2.806525945663452, |
| "learning_rate": 4.089738226158768e-06, |
| "loss": 0.1612, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.6039820322933107, |
| "grad_norm": 0.5877603888511658, |
| "learning_rate": 4.063707439173894e-06, |
| "loss": 0.1583, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.6054995750880174, |
| "grad_norm": 0.8282864093780518, |
| "learning_rate": 4.0377029253929104e-06, |
| "loss": 0.1528, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.6070171178827243, |
| "grad_norm": 0.6813582181930542, |
| "learning_rate": 4.011725414525653e-06, |
| "loss": 0.1528, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.6085346606774311, |
| "grad_norm": 0.43593981862068176, |
| "learning_rate": 3.985775635524234e-06, |
| "loss": 0.1546, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.610052203472138, |
| "grad_norm": 0.5289904475212097, |
| "learning_rate": 3.959854316562584e-06, |
| "loss": 0.1523, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.6115697462668447, |
| "grad_norm": 0.6840155720710754, |
| "learning_rate": 3.933962185016021e-06, |
| "loss": 0.1621, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.6130872890615515, |
| "grad_norm": 0.8754764795303345, |
| "learning_rate": 3.908099967440838e-06, |
| "loss": 0.1577, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.6146048318562584, |
| "grad_norm": 0.7703122496604919, |
| "learning_rate": 3.882268389553912e-06, |
| "loss": 0.1574, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.6161223746509652, |
| "grad_norm": 0.6455814838409424, |
| "learning_rate": 3.856468176212345e-06, |
| "loss": 0.1573, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.6176399174456719, |
| "grad_norm": 5.197359085083008, |
| "learning_rate": 3.830700051393125e-06, |
| "loss": 0.1495, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.6191574602403788, |
| "grad_norm": 1.4284406900405884, |
| "learning_rate": 3.804964738172803e-06, |
| "loss": 0.1565, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.6206750030350856, |
| "grad_norm": 0.9033112525939941, |
| "learning_rate": 3.7792629587072086e-06, |
| "loss": 0.1641, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.6221925458297924, |
| "grad_norm": 1.4378776550292969, |
| "learning_rate": 3.753595434211187e-06, |
| "loss": 0.1572, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.6237100886244992, |
| "grad_norm": 1.1654125452041626, |
| "learning_rate": 3.7279628849383526e-06, |
| "loss": 0.1533, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.625227631419206, |
| "grad_norm": 1.5666859149932861, |
| "learning_rate": 3.702366030160891e-06, |
| "loss": 0.1571, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.6267451742139128, |
| "grad_norm": 1.6572262048721313, |
| "learning_rate": 3.6768055881493616e-06, |
| "loss": 0.1546, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.6282627170086197, |
| "grad_norm": 0.7941910028457642, |
| "learning_rate": 3.651282276152556e-06, |
| "loss": 0.1569, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.6297802598033264, |
| "grad_norm": 2.4592742919921875, |
| "learning_rate": 3.6257968103773567e-06, |
| "loss": 0.1548, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.6312978025980333, |
| "grad_norm": 0.6559963822364807, |
| "learning_rate": 3.6003499059686564e-06, |
| "loss": 0.1533, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.6328153453927401, |
| "grad_norm": 0.561519980430603, |
| "learning_rate": 3.574942276989273e-06, |
| "loss": 0.1621, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.6343328881874469, |
| "grad_norm": 0.48774096369743347, |
| "learning_rate": 3.5495746363999255e-06, |
| "loss": 0.153, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.6358504309821537, |
| "grad_norm": 0.5202430486679077, |
| "learning_rate": 3.524247696039223e-06, |
| "loss": 0.154, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.6373679737768605, |
| "grad_norm": 2.0533859729766846, |
| "learning_rate": 3.498962166603688e-06, |
| "loss": 0.1609, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.6388855165715673, |
| "grad_norm": 0.495179146528244, |
| "learning_rate": 3.4737187576278175e-06, |
| "loss": 0.1528, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.6404030593662742, |
| "grad_norm": 0.5103787779808044, |
| "learning_rate": 3.4485181774641697e-06, |
| "loss": 0.164, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.6419206021609809, |
| "grad_norm": 0.9691652059555054, |
| "learning_rate": 3.4233611332634874e-06, |
| "loss": 0.1578, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.6434381449556877, |
| "grad_norm": 0.7118510603904724, |
| "learning_rate": 3.3982483309548574e-06, |
| "loss": 0.1544, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.6449556877503946, |
| "grad_norm": 1.5103263854980469, |
| "learning_rate": 3.3731804752258988e-06, |
| "loss": 0.1564, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.6464732305451014, |
| "grad_norm": 0.7205672860145569, |
| "learning_rate": 3.348158269502989e-06, |
| "loss": 0.1519, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.6479907733398081, |
| "grad_norm": 1.0135701894760132, |
| "learning_rate": 3.323182415931525e-06, |
| "loss": 0.158, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.649508316134515, |
| "grad_norm": 2.973532199859619, |
| "learning_rate": 3.2982536153562238e-06, |
| "loss": 0.1604, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.6510258589292218, |
| "grad_norm": 1.3079688549041748, |
| "learning_rate": 3.2733725673014514e-06, |
| "loss": 0.1594, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.6525434017239287, |
| "grad_norm": 0.47416195273399353, |
| "learning_rate": 3.2485399699515936e-06, |
| "loss": 0.1567, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.6540609445186354, |
| "grad_norm": 0.927847146987915, |
| "learning_rate": 3.223756520131471e-06, |
| "loss": 0.1505, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.6555784873133422, |
| "grad_norm": 0.8857322335243225, |
| "learning_rate": 3.1990229132867755e-06, |
| "loss": 0.1566, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.6570960301080491, |
| "grad_norm": 1.3829877376556396, |
| "learning_rate": 3.174339843464567e-06, |
| "loss": 0.1593, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.6586135729027559, |
| "grad_norm": 1.6694592237472534, |
| "learning_rate": 3.1497080032937832e-06, |
| "loss": 0.1592, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.6601311156974626, |
| "grad_norm": 0.580848753452301, |
| "learning_rate": 3.1251280839658215e-06, |
| "loss": 0.1516, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.6616486584921695, |
| "grad_norm": 0.8469058275222778, |
| "learning_rate": 3.1006007752151247e-06, |
| "loss": 0.1559, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.6631662012868763, |
| "grad_norm": 1.1796588897705078, |
| "learning_rate": 3.076126765299844e-06, |
| "loss": 0.1578, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.6646837440815831, |
| "grad_norm": 1.4025541543960571, |
| "learning_rate": 3.0517067409825115e-06, |
| "loss": 0.1594, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.6662012868762899, |
| "grad_norm": 0.6020201444625854, |
| "learning_rate": 3.027341387510781e-06, |
| "loss": 0.1522, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.6677188296709967, |
| "grad_norm": 0.46768826246261597, |
| "learning_rate": 3.0030313885981876e-06, |
| "loss": 0.1557, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.6692363724657036, |
| "grad_norm": 0.6836826801300049, |
| "learning_rate": 2.978777426404975e-06, |
| "loss": 0.1576, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.6707539152604104, |
| "grad_norm": 0.8283591270446777, |
| "learning_rate": 2.9545801815189403e-06, |
| "loss": 0.1563, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.6722714580551171, |
| "grad_norm": 0.5825768113136292, |
| "learning_rate": 2.930440332936345e-06, |
| "loss": 0.1586, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.673789000849824, |
| "grad_norm": 0.7869064211845398, |
| "learning_rate": 2.9063585580428586e-06, |
| "loss": 0.1555, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.6753065436445308, |
| "grad_norm": 0.5700305104255676, |
| "learning_rate": 2.8823355325945545e-06, |
| "loss": 0.1574, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.6768240864392376, |
| "grad_norm": 0.5473443865776062, |
| "learning_rate": 2.8583719306989386e-06, |
| "loss": 0.1546, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.6783416292339444, |
| "grad_norm": 0.9259124398231506, |
| "learning_rate": 2.834468424796044e-06, |
| "loss": 0.1533, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.6798591720286512, |
| "grad_norm": 2.3020691871643066, |
| "learning_rate": 2.8106256856395536e-06, |
| "loss": 0.1602, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.681376714823358, |
| "grad_norm": 0.6738184094429016, |
| "learning_rate": 2.78684438227798e-06, |
| "loss": 0.1603, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.6828942576180649, |
| "grad_norm": 0.4569184482097626, |
| "learning_rate": 2.763125182035898e-06, |
| "loss": 0.1497, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.6844118004127716, |
| "grad_norm": 0.7541574835777283, |
| "learning_rate": 2.7394687504952065e-06, |
| "loss": 0.1574, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.6859293432074784, |
| "grad_norm": 1.6138263940811157, |
| "learning_rate": 2.7158757514764674e-06, |
| "loss": 0.1587, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.6874468860021853, |
| "grad_norm": 1.5819748640060425, |
| "learning_rate": 2.692346847020259e-06, |
| "loss": 0.1544, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.6889644287968921, |
| "grad_norm": 1.57101309299469, |
| "learning_rate": 2.66888269736862e-06, |
| "loss": 0.1527, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.6904819715915989, |
| "grad_norm": 0.48757970333099365, |
| "learning_rate": 2.645483960946501e-06, |
| "loss": 0.154, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.6919995143863057, |
| "grad_norm": 0.48652932047843933, |
| "learning_rate": 2.622151294343308e-06, |
| "loss": 0.1528, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.6935170571810125, |
| "grad_norm": 0.6868948936462402, |
| "learning_rate": 2.5988853522944626e-06, |
| "loss": 0.1606, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.6950345999757194, |
| "grad_norm": 0.7019301056861877, |
| "learning_rate": 2.575686787663041e-06, |
| "loss": 0.1534, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.6965521427704261, |
| "grad_norm": 0.9775063991546631, |
| "learning_rate": 2.552556251421443e-06, |
| "loss": 0.1518, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.6980696855651329, |
| "grad_norm": 0.6645476818084717, |
| "learning_rate": 2.529494392633138e-06, |
| "loss": 0.1473, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.6995872283598398, |
| "grad_norm": 2.778902053833008, |
| "learning_rate": 2.506501858434439e-06, |
| "loss": 0.145, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.7011047711545466, |
| "grad_norm": 1.3645416498184204, |
| "learning_rate": 2.483579294016355e-06, |
| "loss": 0.1535, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.7026223139492533, |
| "grad_norm": 0.9883469343185425, |
| "learning_rate": 2.4607273426064725e-06, |
| "loss": 0.1538, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.7041398567439602, |
| "grad_norm": 0.7800496816635132, |
| "learning_rate": 2.4379466454509236e-06, |
| "loss": 0.1532, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.705657399538667, |
| "grad_norm": 0.6158055067062378, |
| "learning_rate": 2.4152378417963733e-06, |
| "loss": 0.1551, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.7071749423333737, |
| "grad_norm": 0.6619333028793335, |
| "learning_rate": 2.3926015688721e-06, |
| "loss": 0.1573, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.7086924851280806, |
| "grad_norm": 0.5126205086708069, |
| "learning_rate": 2.3700384618720973e-06, |
| "loss": 0.1469, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.7102100279227874, |
| "grad_norm": 0.6815778613090515, |
| "learning_rate": 2.3475491539372596e-06, |
| "loss": 0.1548, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.7117275707174943, |
| "grad_norm": 0.8624967336654663, |
| "learning_rate": 2.325134276137619e-06, |
| "loss": 0.1563, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.713245113512201, |
| "grad_norm": 0.5875204205513, |
| "learning_rate": 2.3027944574546237e-06, |
| "loss": 0.1541, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.7147626563069078, |
| "grad_norm": 0.5968221426010132, |
| "learning_rate": 2.2805303247635035e-06, |
| "loss": 0.1503, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.7162801991016147, |
| "grad_norm": 1.5649338960647583, |
| "learning_rate": 2.258342502815665e-06, |
| "loss": 0.1483, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.7177977418963215, |
| "grad_norm": 0.739321231842041, |
| "learning_rate": 2.2362316142211755e-06, |
| "loss": 0.1527, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.7193152846910282, |
| "grad_norm": 1.2006664276123047, |
| "learning_rate": 2.2141982794312737e-06, |
| "loss": 0.1586, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.7208328274857351, |
| "grad_norm": 0.6313862204551697, |
| "learning_rate": 2.19224311672098e-06, |
| "loss": 0.1582, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.7223503702804419, |
| "grad_norm": 1.3812549114227295, |
| "learning_rate": 2.170366742171727e-06, |
| "loss": 0.1535, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.7238679130751488, |
| "grad_norm": 0.39481043815612793, |
| "learning_rate": 2.148569769654089e-06, |
| "loss": 0.1522, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.7253854558698555, |
| "grad_norm": 1.3398104906082153, |
| "learning_rate": 2.1268528108105424e-06, |
| "loss": 0.1564, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.7269029986645623, |
| "grad_norm": 0.962664783000946, |
| "learning_rate": 2.105216475038314e-06, |
| "loss": 0.1505, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.7284205414592692, |
| "grad_norm": 0.7913764119148254, |
| "learning_rate": 2.0836613694722696e-06, |
| "loss": 0.1532, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.729938084253976, |
| "grad_norm": 1.8171051740646362, |
| "learning_rate": 2.0621880989678895e-06, |
| "loss": 0.156, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.7314556270486827, |
| "grad_norm": 1.8144663572311401, |
| "learning_rate": 2.0407972660842824e-06, |
| "loss": 0.1517, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.7329731698433896, |
| "grad_norm": 0.5789530277252197, |
| "learning_rate": 2.0194894710672908e-06, |
| "loss": 0.15, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.7344907126380964, |
| "grad_norm": 2.0675876140594482, |
| "learning_rate": 1.998265311832634e-06, |
| "loss": 0.1489, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.7360082554328032, |
| "grad_norm": 2.8520278930664062, |
| "learning_rate": 1.9771253839491423e-06, |
| "loss": 0.1546, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.73752579822751, |
| "grad_norm": 0.9103041291236877, |
| "learning_rate": 1.956070280622036e-06, |
| "loss": 0.1559, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.7390433410222168, |
| "grad_norm": 0.4860074520111084, |
| "learning_rate": 1.9351005926762808e-06, |
| "loss": 0.1548, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.7405608838169236, |
| "grad_norm": 0.9042688012123108, |
| "learning_rate": 1.9142169085400175e-06, |
| "loss": 0.1568, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.7420784266116305, |
| "grad_norm": 1.0387529134750366, |
| "learning_rate": 1.8934198142280357e-06, |
| "loss": 0.1534, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.7435959694063372, |
| "grad_norm": 0.46772363781929016, |
| "learning_rate": 1.8727098933253435e-06, |
| "loss": 0.1512, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.745113512201044, |
| "grad_norm": 1.0915402173995972, |
| "learning_rate": 1.8520877269707804e-06, |
| "loss": 0.1541, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.7466310549957509, |
| "grad_norm": 0.6211321949958801, |
| "learning_rate": 1.8315538938407195e-06, |
| "loss": 0.1553, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.7481485977904577, |
| "grad_norm": 0.9021114110946655, |
| "learning_rate": 1.8111089701328205e-06, |
| "loss": 0.1439, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.7496661405851645, |
| "grad_norm": 0.821042537689209, |
| "learning_rate": 1.7907535295498702e-06, |
| "loss": 0.1525, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.7511836833798713, |
| "grad_norm": 0.9626114368438721, |
| "learning_rate": 1.770488143283674e-06, |
| "loss": 0.1498, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.7527012261745781, |
| "grad_norm": 0.6211876273155212, |
| "learning_rate": 1.7503133799990384e-06, |
| "loss": 0.1549, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.754218768969285, |
| "grad_norm": 1.3714808225631714, |
| "learning_rate": 1.7302298058178025e-06, |
| "loss": 0.1557, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.7557363117639917, |
| "grad_norm": 0.7492864727973938, |
| "learning_rate": 1.7102379843029643e-06, |
| "loss": 0.1619, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.7572538545586985, |
| "grad_norm": 0.6472452282905579, |
| "learning_rate": 1.690338476442852e-06, |
| "loss": 0.1569, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.7587713973534054, |
| "grad_norm": 1.0946698188781738, |
| "learning_rate": 1.6705318406353999e-06, |
| "loss": 0.1505, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.7602889401481122, |
| "grad_norm": 0.5974524021148682, |
| "learning_rate": 1.6508186326724607e-06, |
| "loss": 0.1519, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.7618064829428189, |
| "grad_norm": 0.8393105864524841, |
| "learning_rate": 1.6311994057242259e-06, |
| "loss": 0.1561, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.7633240257375258, |
| "grad_norm": 3.725569248199463, |
| "learning_rate": 1.6116747103236902e-06, |
| "loss": 0.1539, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.7648415685322326, |
| "grad_norm": 0.8107349872589111, |
| "learning_rate": 1.5922450943512136e-06, |
| "loss": 0.1534, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.7663591113269395, |
| "grad_norm": 1.49680757522583, |
| "learning_rate": 1.572911103019139e-06, |
| "loss": 0.1568, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.7678766541216462, |
| "grad_norm": 0.8140336275100708, |
| "learning_rate": 1.5536732788564963e-06, |
| "loss": 0.1525, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.769394196916353, |
| "grad_norm": 0.5300224423408508, |
| "learning_rate": 1.5345321616937841e-06, |
| "loss": 0.1565, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.7709117397110599, |
| "grad_norm": 1.5161974430084229, |
| "learning_rate": 1.5154882886478095e-06, |
| "loss": 0.1541, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.7724292825057667, |
| "grad_norm": 0.9870403409004211, |
| "learning_rate": 1.496542194106629e-06, |
| "loss": 0.1466, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.7739468253004734, |
| "grad_norm": 1.346680760383606, |
| "learning_rate": 1.4776944097145413e-06, |
| "loss": 0.1552, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.7754643680951803, |
| "grad_norm": 0.9406745433807373, |
| "learning_rate": 1.4589454643571816e-06, |
| "loss": 0.148, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.7769819108898871, |
| "grad_norm": 0.8680304288864136, |
| "learning_rate": 1.4402958841466664e-06, |
| "loss": 0.1529, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.778499453684594, |
| "grad_norm": 1.0303786993026733, |
| "learning_rate": 1.4217461924068438e-06, |
| "loss": 0.152, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.7800169964793007, |
| "grad_norm": 0.7590554356575012, |
| "learning_rate": 1.4032969096585968e-06, |
| "loss": 0.1523, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.7815345392740075, |
| "grad_norm": 0.8220491409301758, |
| "learning_rate": 1.3849485536052488e-06, |
| "loss": 0.1485, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.7830520820687144, |
| "grad_norm": 0.7371506690979004, |
| "learning_rate": 1.3667016391180231e-06, |
| "loss": 0.1526, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.7845696248634212, |
| "grad_norm": 0.9485886693000793, |
| "learning_rate": 1.3485566782216097e-06, |
| "loss": 0.1508, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.7860871676581279, |
| "grad_norm": 0.862425684928894, |
| "learning_rate": 1.3305141800797827e-06, |
| "loss": 0.1552, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.7876047104528348, |
| "grad_norm": 1.46304190158844, |
| "learning_rate": 1.3125746509811266e-06, |
| "loss": 0.1556, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.7891222532475416, |
| "grad_norm": 0.8005903959274292, |
| "learning_rate": 1.2947385943248165e-06, |
| "loss": 0.1497, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.7906397960422484, |
| "grad_norm": 0.5657956004142761, |
| "learning_rate": 1.2770065106065043e-06, |
| "loss": 0.1529, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.7921573388369552, |
| "grad_norm": 0.7030004858970642, |
| "learning_rate": 1.2593788974042636e-06, |
| "loss": 0.1495, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.793674881631662, |
| "grad_norm": 0.7087785601615906, |
| "learning_rate": 1.2418562493646374e-06, |
| "loss": 0.1579, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.7951924244263688, |
| "grad_norm": 0.6476522088050842, |
| "learning_rate": 1.2244390581887478e-06, |
| "loss": 0.1597, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.7967099672210757, |
| "grad_norm": 2.871662139892578, |
| "learning_rate": 1.2071278126185042e-06, |
| "loss": 0.1546, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.7982275100157824, |
| "grad_norm": 0.6594822406768799, |
| "learning_rate": 1.1899229984228922e-06, |
| "loss": 0.1516, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.7997450528104892, |
| "grad_norm": 0.890384316444397, |
| "learning_rate": 1.1728250983843308e-06, |
| "loss": 0.1527, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.8012625956051961, |
| "grad_norm": 0.6064486503601074, |
| "learning_rate": 1.1558345922851404e-06, |
| "loss": 0.1497, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.8027801383999029, |
| "grad_norm": 0.6611379384994507, |
| "learning_rate": 1.138951956894065e-06, |
| "loss": 0.1519, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.8042976811946096, |
| "grad_norm": 0.7390156984329224, |
| "learning_rate": 1.122177665952906e-06, |
| "loss": 0.1512, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.8058152239893165, |
| "grad_norm": 1.1560479402542114, |
| "learning_rate": 1.1055121901632165e-06, |
| "loss": 0.1561, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.8073327667840233, |
| "grad_norm": 1.7963082790374756, |
| "learning_rate": 1.0889559971731073e-06, |
| "loss": 0.1522, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.8088503095787302, |
| "grad_norm": 0.9894265532493591, |
| "learning_rate": 1.0725095515641088e-06, |
| "loss": 0.1512, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.8103678523734369, |
| "grad_norm": 1.401088833808899, |
| "learning_rate": 1.0561733148381475e-06, |
| "loss": 0.1565, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.8118853951681437, |
| "grad_norm": 0.6930254697799683, |
| "learning_rate": 1.0399477454045875e-06, |
| "loss": 0.1584, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.8134029379628506, |
| "grad_norm": 0.8673112392425537, |
| "learning_rate": 1.023833298567372e-06, |
| "loss": 0.1543, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.8149204807575574, |
| "grad_norm": 1.2038730382919312, |
| "learning_rate": 1.0078304265122425e-06, |
| "loss": 0.1503, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.8164380235522641, |
| "grad_norm": 0.9704771637916565, |
| "learning_rate": 9.919395782940561e-07, |
| "loss": 0.1518, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.817955566346971, |
| "grad_norm": 1.151596188545227, |
| "learning_rate": 9.761611998241766e-07, |
| "loss": 0.151, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.8194731091416778, |
| "grad_norm": 0.46842506527900696, |
| "learning_rate": 9.604957338579724e-07, |
| "loss": 0.1516, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.8209906519363847, |
| "grad_norm": 2.784548044204712, |
| "learning_rate": 9.449436199823797e-07, |
| "loss": 0.1505, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.8225081947310914, |
| "grad_norm": 1.2736271619796753, |
| "learning_rate": 9.295052946035804e-07, |
| "loss": 0.1511, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.8240257375257982, |
| "grad_norm": 1.5416733026504517, |
| "learning_rate": 9.141811909347454e-07, |
| "loss": 0.1499, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.8255432803205051, |
| "grad_norm": 0.6435291767120361, |
| "learning_rate": 8.98971738983882e-07, |
| "loss": 0.1556, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.8270608231152119, |
| "grad_norm": 0.45951247215270996, |
| "learning_rate": 8.838773655417731e-07, |
| "loss": 0.1516, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.8285783659099186, |
| "grad_norm": 2.424105167388916, |
| "learning_rate": 8.688984941699907e-07, |
| "loss": 0.1528, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.8300959087046255, |
| "grad_norm": 3.5320215225219727, |
| "learning_rate": 8.540355451890204e-07, |
| "loss": 0.1467, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.8316134514993323, |
| "grad_norm": 0.8505892753601074, |
| "learning_rate": 8.392889356664563e-07, |
| "loss": 0.1442, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.8331309942940391, |
| "grad_norm": 0.7711630463600159, |
| "learning_rate": 8.246590794053111e-07, |
| "loss": 0.1539, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.8346485370887459, |
| "grad_norm": 1.0841100215911865, |
| "learning_rate": 8.101463869323889e-07, |
| "loss": 0.1522, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.8361660798834527, |
| "grad_norm": 0.7660624980926514, |
| "learning_rate": 7.957512654867805e-07, |
| "loss": 0.1462, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.8376836226781595, |
| "grad_norm": 1.2569801807403564, |
| "learning_rate": 7.81474119008424e-07, |
| "loss": 0.153, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.8392011654728664, |
| "grad_norm": 0.9163670539855957, |
| "learning_rate": 7.673153481267781e-07, |
| "loss": 0.1513, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.8407187082675731, |
| "grad_norm": 1.1001312732696533, |
| "learning_rate": 7.532753501495732e-07, |
| "loss": 0.1528, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.84223625106228, |
| "grad_norm": 1.1752309799194336, |
| "learning_rate": 7.393545190516704e-07, |
| "loss": 0.1495, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.8437537938569868, |
| "grad_norm": 1.5676056146621704, |
| "learning_rate": 7.255532454639968e-07, |
| "loss": 0.1524, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.8452713366516936, |
| "grad_norm": 2.6412742137908936, |
| "learning_rate": 7.118719166625953e-07, |
| "loss": 0.1534, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.8467888794464004, |
| "grad_norm": 0.74490886926651, |
| "learning_rate": 6.983109165577451e-07, |
| "loss": 0.1497, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.8483064222411072, |
| "grad_norm": 0.660461962223053, |
| "learning_rate": 6.848706256832e-07, |
| "loss": 0.1527, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.849823965035814, |
| "grad_norm": 1.0362908840179443, |
| "learning_rate": 6.715514211855007e-07, |
| "loss": 0.156, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.8513415078305209, |
| "grad_norm": 1.0476360321044922, |
| "learning_rate": 6.583536768134008e-07, |
| "loss": 0.1532, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.8528590506252276, |
| "grad_norm": 0.7230373620986938, |
| "learning_rate": 6.452777629073698e-07, |
| "loss": 0.1513, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.8543765934199344, |
| "grad_norm": 0.875400960445404, |
| "learning_rate": 6.3232404638921e-07, |
| "loss": 0.1509, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.8558941362146413, |
| "grad_norm": 0.6312329769134521, |
| "learning_rate": 6.194928907517534e-07, |
| "loss": 0.1518, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.8574116790093481, |
| "grad_norm": 0.7422769069671631, |
| "learning_rate": 6.067846560486646e-07, |
| "loss": 0.1437, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.8589292218040548, |
| "grad_norm": 0.7523478269577026, |
| "learning_rate": 5.941996988843385e-07, |
| "loss": 0.149, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.8604467645987617, |
| "grad_norm": 0.9320465326309204, |
| "learning_rate": 5.817383724038906e-07, |
| "loss": 0.1526, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.8619643073934685, |
| "grad_norm": 0.7207502126693726, |
| "learning_rate": 5.694010262832522e-07, |
| "loss": 0.1538, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.8634818501881754, |
| "grad_norm": 4.081971645355225, |
| "learning_rate": 5.571880067193514e-07, |
| "loss": 0.1485, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.8649993929828821, |
| "grad_norm": 0.9618120789527893, |
| "learning_rate": 5.450996564204053e-07, |
| "loss": 0.1503, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.8665169357775889, |
| "grad_norm": 12.091854095458984, |
| "learning_rate": 5.331363145962981e-07, |
| "loss": 0.1441, |
| "step": 57100 |
| }, |
| { |
| "epoch": 0.8680344785722958, |
| "grad_norm": 1.4775971174240112, |
| "learning_rate": 5.212983169490671e-07, |
| "loss": 0.1517, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.8695520213670026, |
| "grad_norm": 0.7318031787872314, |
| "learning_rate": 5.095859956634774e-07, |
| "loss": 0.1469, |
| "step": 57300 |
| }, |
| { |
| "epoch": 0.8710695641617093, |
| "grad_norm": 0.4980830252170563, |
| "learning_rate": 4.97999679397706e-07, |
| "loss": 0.1564, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.8725871069564162, |
| "grad_norm": 1.0416113138198853, |
| "learning_rate": 4.865396932741151e-07, |
| "loss": 0.1514, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.874104649751123, |
| "grad_norm": 1.5563331842422485, |
| "learning_rate": 4.7520635887013164e-07, |
| "loss": 0.1536, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.8756221925458298, |
| "grad_norm": 0.7019147872924805, |
| "learning_rate": 4.639999942092205e-07, |
| "loss": 0.1563, |
| "step": 57700 |
| }, |
| { |
| "epoch": 0.8771397353405366, |
| "grad_norm": 10.20484447479248, |
| "learning_rate": 4.5292091375196524e-07, |
| "loss": 0.1538, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.8786572781352434, |
| "grad_norm": 6.894394397735596, |
| "learning_rate": 4.4196942838723834e-07, |
| "loss": 0.1498, |
| "step": 57900 |
| }, |
| { |
| "epoch": 0.8801748209299503, |
| "grad_norm": 1.0151070356369019, |
| "learning_rate": 4.311458454234829e-07, |
| "loss": 0.1491, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.881692363724657, |
| "grad_norm": 15.238383293151855, |
| "learning_rate": 4.2045046858008367e-07, |
| "loss": 0.1529, |
| "step": 58100 |
| }, |
| { |
| "epoch": 0.8832099065193638, |
| "grad_norm": 0.8131313323974609, |
| "learning_rate": 4.098835979788507e-07, |
| "loss": 0.1509, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.8847274493140707, |
| "grad_norm": 2.1564486026763916, |
| "learning_rate": 3.9944553013559153e-07, |
| "loss": 0.1598, |
| "step": 58300 |
| }, |
| { |
| "epoch": 0.8862449921087775, |
| "grad_norm": 1.2616732120513916, |
| "learning_rate": 3.891365579517936e-07, |
| "loss": 0.1538, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.8877625349034842, |
| "grad_norm": 0.682653546333313, |
| "learning_rate": 3.7895697070640835e-07, |
| "loss": 0.1505, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.8892800776981911, |
| "grad_norm": 0.6538369059562683, |
| "learning_rate": 3.6890705404772575e-07, |
| "loss": 0.1527, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.8907976204928979, |
| "grad_norm": 0.8717949986457825, |
| "learning_rate": 3.5898708998536866e-07, |
| "loss": 0.1518, |
| "step": 58700 |
| }, |
| { |
| "epoch": 0.8923151632876047, |
| "grad_norm": 0.9392913579940796, |
| "learning_rate": 3.491973568823692e-07, |
| "loss": 0.1537, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.8938327060823115, |
| "grad_norm": 0.7152376770973206, |
| "learning_rate": 3.395381294473665e-07, |
| "loss": 0.152, |
| "step": 58900 |
| }, |
| { |
| "epoch": 0.8953502488770183, |
| "grad_norm": 1.1813652515411377, |
| "learning_rate": 3.3000967872689135e-07, |
| "loss": 0.1521, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.8968677916717251, |
| "grad_norm": 0.9120343327522278, |
| "learning_rate": 3.206122720977667e-07, |
| "loss": 0.1574, |
| "step": 59100 |
| }, |
| { |
| "epoch": 0.898385334466432, |
| "grad_norm": 3.3989994525909424, |
| "learning_rate": 3.1134617325959795e-07, |
| "loss": 0.1524, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.8999028772611387, |
| "grad_norm": 1.1465574502944946, |
| "learning_rate": 3.022116422273802e-07, |
| "loss": 0.1508, |
| "step": 59300 |
| }, |
| { |
| "epoch": 0.9014204200558456, |
| "grad_norm": 1.940820574760437, |
| "learning_rate": 2.9320893532419515e-07, |
| "loss": 0.1496, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.9029379628505524, |
| "grad_norm": 1.1829354763031006, |
| "learning_rate": 2.8433830517402505e-07, |
| "loss": 0.1471, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.9044555056452592, |
| "grad_norm": 1.006872534751892, |
| "learning_rate": 2.7560000069465856e-07, |
| "loss": 0.1461, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.905973048439966, |
| "grad_norm": 1.5081512928009033, |
| "learning_rate": 2.6699426709071e-07, |
| "loss": 0.1444, |
| "step": 59700 |
| }, |
| { |
| "epoch": 0.9074905912346728, |
| "grad_norm": 0.7259085178375244, |
| "learning_rate": 2.585213458467339e-07, |
| "loss": 0.1454, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.9090081340293796, |
| "grad_norm": 0.7794526815414429, |
| "learning_rate": 2.501814747204551e-07, |
| "loss": 0.1483, |
| "step": 59900 |
| }, |
| { |
| "epoch": 0.9105256768240865, |
| "grad_norm": 1.0124095678329468, |
| "learning_rate": 2.4197488773609004e-07, |
| "loss": 0.1499, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.9105256768240865, |
| "eval_loss": 0.4795108139514923, |
| "eval_runtime": 988.4632, |
| "eval_samples_per_second": 50.38, |
| "eval_steps_per_second": 6.298, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.9120432196187932, |
| "grad_norm": 0.6119298934936523, |
| "learning_rate": 2.3390181517778665e-07, |
| "loss": 0.1459, |
| "step": 60100 |
| }, |
| { |
| "epoch": 0.9135607624135, |
| "grad_norm": 0.997187614440918, |
| "learning_rate": 2.2596248358315699e-07, |
| "loss": 0.1525, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.9150783052082069, |
| "grad_norm": 0.8470273017883301, |
| "learning_rate": 2.1815711573692222e-07, |
| "loss": 0.1485, |
| "step": 60300 |
| }, |
| { |
| "epoch": 0.9165958480029137, |
| "grad_norm": 29.62990951538086, |
| "learning_rate": 2.104859306646623e-07, |
| "loss": 0.1497, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.9181133907976204, |
| "grad_norm": 2.7757768630981445, |
| "learning_rate": 2.0294914362666895e-07, |
| "loss": 0.1453, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.9196309335923273, |
| "grad_norm": 1.834951400756836, |
| "learning_rate": 1.955469661119047e-07, |
| "loss": 0.1486, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.9211484763870341, |
| "grad_norm": 1.0161601305007935, |
| "learning_rate": 1.8827960583206906e-07, |
| "loss": 0.1478, |
| "step": 60700 |
| }, |
| { |
| "epoch": 0.922666019181741, |
| "grad_norm": 0.5595516562461853, |
| "learning_rate": 1.8114726671576988e-07, |
| "loss": 0.1499, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.9241835619764477, |
| "grad_norm": 1.4370942115783691, |
| "learning_rate": 1.7415014890280024e-07, |
| "loss": 0.1532, |
| "step": 60900 |
| }, |
| { |
| "epoch": 0.9257011047711545, |
| "grad_norm": 1.0193774700164795, |
| "learning_rate": 1.6728844873852402e-07, |
| "loss": 0.1533, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.9272186475658614, |
| "grad_norm": 1.3867748975753784, |
| "learning_rate": 1.6056235876836413e-07, |
| "loss": 0.1527, |
| "step": 61100 |
| }, |
| { |
| "epoch": 0.9287361903605682, |
| "grad_norm": 0.9046171307563782, |
| "learning_rate": 1.5397206773240136e-07, |
| "loss": 0.1462, |
| "step": 61200 |
| }, |
| { |
| "epoch": 0.9302537331552749, |
| "grad_norm": 0.798395037651062, |
| "learning_rate": 1.4751776056007583e-07, |
| "loss": 0.155, |
| "step": 61300 |
| }, |
| { |
| "epoch": 0.9317712759499818, |
| "grad_norm": 1.0858103036880493, |
| "learning_rate": 1.4119961836500218e-07, |
| "loss": 0.151, |
| "step": 61400 |
| }, |
| { |
| "epoch": 0.9332888187446886, |
| "grad_norm": 0.8098335266113281, |
| "learning_rate": 1.3501781843988038e-07, |
| "loss": 0.1438, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.9348063615393954, |
| "grad_norm": 1.0074046850204468, |
| "learning_rate": 1.2897253425152855e-07, |
| "loss": 0.1452, |
| "step": 61600 |
| }, |
| { |
| "epoch": 0.9363239043341022, |
| "grad_norm": 1.212449312210083, |
| "learning_rate": 1.230639354360086e-07, |
| "loss": 0.1464, |
| "step": 61700 |
| }, |
| { |
| "epoch": 0.937841447128809, |
| "grad_norm": 0.6493209600448608, |
| "learning_rate": 1.1729218779387208e-07, |
| "loss": 0.1487, |
| "step": 61800 |
| }, |
| { |
| "epoch": 0.9393589899235159, |
| "grad_norm": 0.5616501569747925, |
| "learning_rate": 1.1165745328550081e-07, |
| "loss": 0.1537, |
| "step": 61900 |
| }, |
| { |
| "epoch": 0.9408765327182227, |
| "grad_norm": 0.7600206732749939, |
| "learning_rate": 1.0615989002657034e-07, |
| "loss": 0.1532, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.9423940755129294, |
| "grad_norm": 0.9349325895309448, |
| "learning_rate": 1.0079965228360411e-07, |
| "loss": 0.1486, |
| "step": 62100 |
| }, |
| { |
| "epoch": 0.9439116183076363, |
| "grad_norm": 1.0201034545898438, |
| "learning_rate": 9.557689046965302e-08, |
| "loss": 0.1524, |
| "step": 62200 |
| }, |
| { |
| "epoch": 0.9454291611023431, |
| "grad_norm": 0.7782607078552246, |
| "learning_rate": 9.049175114006825e-08, |
| "loss": 0.1523, |
| "step": 62300 |
| }, |
| { |
| "epoch": 0.9469467038970499, |
| "grad_norm": 1.189127802848816, |
| "learning_rate": 8.55443769883929e-08, |
| "loss": 0.1591, |
| "step": 62400 |
| }, |
| { |
| "epoch": 0.9484642466917567, |
| "grad_norm": 0.599915623664856, |
| "learning_rate": 8.07349068423563e-08, |
| "loss": 0.1541, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.9499817894864635, |
| "grad_norm": 1.3700199127197266, |
| "learning_rate": 7.606347565997652e-08, |
| "loss": 0.148, |
| "step": 62600 |
| }, |
| { |
| "epoch": 0.9514993322811703, |
| "grad_norm": 0.8257864117622375, |
| "learning_rate": 7.153021452577846e-08, |
| "loss": 0.1462, |
| "step": 62700 |
| }, |
| { |
| "epoch": 0.9530168750758772, |
| "grad_norm": 0.75310218334198, |
| "learning_rate": 6.713525064710958e-08, |
| "loss": 0.1481, |
| "step": 62800 |
| }, |
| { |
| "epoch": 0.9545344178705839, |
| "grad_norm": 1.1317468881607056, |
| "learning_rate": 6.287870735057488e-08, |
| "loss": 0.1492, |
| "step": 62900 |
| }, |
| { |
| "epoch": 0.9560519606652907, |
| "grad_norm": 0.7714105248451233, |
| "learning_rate": 5.8760704078572593e-08, |
| "loss": 0.1506, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.9575695034599976, |
| "grad_norm": 1.6061201095581055, |
| "learning_rate": 5.478135638594617e-08, |
| "loss": 0.1503, |
| "step": 63100 |
| }, |
| { |
| "epoch": 0.9590870462547044, |
| "grad_norm": 0.6467359066009521, |
| "learning_rate": 5.094077593673863e-08, |
| "loss": 0.1448, |
| "step": 63200 |
| }, |
| { |
| "epoch": 0.9606045890494112, |
| "grad_norm": 0.6276798844337463, |
| "learning_rate": 4.723907050106169e-08, |
| "loss": 0.1508, |
| "step": 63300 |
| }, |
| { |
| "epoch": 0.962122131844118, |
| "grad_norm": 0.886906087398529, |
| "learning_rate": 4.3676343952068765e-08, |
| "loss": 0.1412, |
| "step": 63400 |
| }, |
| { |
| "epoch": 0.9636396746388248, |
| "grad_norm": 0.833092212677002, |
| "learning_rate": 4.0252696263043956e-08, |
| "loss": 0.1562, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.9651572174335317, |
| "grad_norm": 1.2917169332504272, |
| "learning_rate": 3.696822350459206e-08, |
| "loss": 0.1514, |
| "step": 63600 |
| }, |
| { |
| "epoch": 0.9666747602282384, |
| "grad_norm": 1.8088849782943726, |
| "learning_rate": 3.382301784194686e-08, |
| "loss": 0.1483, |
| "step": 63700 |
| }, |
| { |
| "epoch": 0.9681923030229452, |
| "grad_norm": 2.2121670246124268, |
| "learning_rate": 3.0817167532383727e-08, |
| "loss": 0.1434, |
| "step": 63800 |
| }, |
| { |
| "epoch": 0.9697098458176521, |
| "grad_norm": 0.5262467861175537, |
| "learning_rate": 2.795075692274052e-08, |
| "loss": 0.1514, |
| "step": 63900 |
| }, |
| { |
| "epoch": 0.9712273886123589, |
| "grad_norm": 0.9518747925758362, |
| "learning_rate": 2.5223866447055544e-08, |
| "loss": 0.1475, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.9727449314070656, |
| "grad_norm": 0.6502547264099121, |
| "learning_rate": 2.2636572624304964e-08, |
| "loss": 0.1463, |
| "step": 64100 |
| }, |
| { |
| "epoch": 0.9742624742017725, |
| "grad_norm": 1.0486894845962524, |
| "learning_rate": 2.018894805626115e-08, |
| "loss": 0.1543, |
| "step": 64200 |
| }, |
| { |
| "epoch": 0.9757800169964793, |
| "grad_norm": 0.6395145654678345, |
| "learning_rate": 1.788106142545043e-08, |
| "loss": 0.1543, |
| "step": 64300 |
| }, |
| { |
| "epoch": 0.9772975597911862, |
| "grad_norm": 4.918582916259766, |
| "learning_rate": 1.5712977493229088e-08, |
| "loss": 0.1473, |
| "step": 64400 |
| }, |
| { |
| "epoch": 0.9788151025858929, |
| "grad_norm": 0.7501472234725952, |
| "learning_rate": 1.3684757097965351e-08, |
| "loss": 0.1473, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.9803326453805997, |
| "grad_norm": 4.3455491065979, |
| "learning_rate": 1.179645715333133e-08, |
| "loss": 0.1506, |
| "step": 64600 |
| }, |
| { |
| "epoch": 0.9818501881753066, |
| "grad_norm": 1.2910078763961792, |
| "learning_rate": 1.004813064670651e-08, |
| "loss": 0.1474, |
| "step": 64700 |
| }, |
| { |
| "epoch": 0.9833677309700134, |
| "grad_norm": 2.1404385566711426, |
| "learning_rate": 8.439826637691162e-09, |
| "loss": 0.1517, |
| "step": 64800 |
| }, |
| { |
| "epoch": 0.9848852737647201, |
| "grad_norm": 1.020157814025879, |
| "learning_rate": 6.971590256729666e-09, |
| "loss": 0.1517, |
| "step": 64900 |
| }, |
| { |
| "epoch": 0.986402816559427, |
| "grad_norm": 0.48167684674263, |
| "learning_rate": 5.643462703843749e-09, |
| "loss": 0.1505, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.9879203593541338, |
| "grad_norm": 0.8583424091339111, |
| "learning_rate": 4.455481247476745e-09, |
| "loss": 0.1554, |
| "step": 65100 |
| }, |
| { |
| "epoch": 0.9894379021488406, |
| "grad_norm": 0.6753373742103577, |
| "learning_rate": 3.407679223446647e-09, |
| "loss": 0.1524, |
| "step": 65200 |
| }, |
| { |
| "epoch": 0.9909554449435474, |
| "grad_norm": 0.7973353266716003, |
| "learning_rate": 2.5000860340124167e-09, |
| "loss": 0.1569, |
| "step": 65300 |
| }, |
| { |
| "epoch": 0.9924729877382542, |
| "grad_norm": 3.2762601375579834, |
| "learning_rate": 1.7327271470479746e-09, |
| "loss": 0.1558, |
| "step": 65400 |
| }, |
| { |
| "epoch": 0.993990530532961, |
| "grad_norm": 1.0592360496520996, |
| "learning_rate": 1.1056240953283281e-09, |
| "loss": 0.1491, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.9955080733276679, |
| "grad_norm": 1.079147458076477, |
| "learning_rate": 6.18794475923945e-10, |
| "loss": 0.1515, |
| "step": 65600 |
| }, |
| { |
| "epoch": 0.9970256161223746, |
| "grad_norm": 0.4301016628742218, |
| "learning_rate": 2.722519497072584e-10, |
| "loss": 0.1533, |
| "step": 65700 |
| }, |
| { |
| "epoch": 0.9985431589170815, |
| "grad_norm": 3.838508367538452, |
| "learning_rate": 6.600624097075071e-11, |
| "loss": 0.1514, |
| "step": 65800 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 65896, |
| "total_flos": 1.1742044747616118e+20, |
| "train_loss": 0.1813333111942806, |
| "train_runtime": 211973.7233, |
| "train_samples_per_second": 9.948, |
| "train_steps_per_second": 0.311 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 65896, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1742044747616118e+20, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|