| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.988593155893536, |
| "eval_steps": 500, |
| "global_step": 1970, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0025348542458808617, |
| "grad_norm": 1.1835554838180542, |
| "learning_rate": 0.0, |
| "loss": 2.7162, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005069708491761723, |
| "grad_norm": 1.1406067609786987, |
| "learning_rate": 4e-05, |
| "loss": 2.7021, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0076045627376425855, |
| "grad_norm": 1.1929512023925781, |
| "learning_rate": 8e-05, |
| "loss": 2.5728, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010139416983523447, |
| "grad_norm": 1.523325800895691, |
| "learning_rate": 0.00012, |
| "loss": 2.5825, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.012674271229404309, |
| "grad_norm": 1.712708592414856, |
| "learning_rate": 0.00016, |
| "loss": 2.1986, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.015209125475285171, |
| "grad_norm": 1.263485312461853, |
| "learning_rate": 0.0002, |
| "loss": 2.1478, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.017743979721166033, |
| "grad_norm": 1.2837083339691162, |
| "learning_rate": 0.00019989821882951655, |
| "loss": 2.2153, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.020278833967046894, |
| "grad_norm": 1.0831111669540405, |
| "learning_rate": 0.0001997964376590331, |
| "loss": 1.9272, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.022813688212927757, |
| "grad_norm": 0.7921498417854309, |
| "learning_rate": 0.00019969465648854963, |
| "loss": 1.4929, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.025348542458808618, |
| "grad_norm": 0.9243067502975464, |
| "learning_rate": 0.00019959287531806617, |
| "loss": 1.4312, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02788339670468948, |
| "grad_norm": 1.2378944158554077, |
| "learning_rate": 0.0001994910941475827, |
| "loss": 1.1605, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.030418250950570342, |
| "grad_norm": 1.401106834411621, |
| "learning_rate": 0.00019938931297709925, |
| "loss": 1.0236, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.032953105196451206, |
| "grad_norm": 1.0503413677215576, |
| "learning_rate": 0.00019928753180661578, |
| "loss": 0.8441, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.035487959442332066, |
| "grad_norm": 0.928716778755188, |
| "learning_rate": 0.00019918575063613232, |
| "loss": 0.8098, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03802281368821293, |
| "grad_norm": 0.6546494364738464, |
| "learning_rate": 0.00019908396946564886, |
| "loss": 0.5083, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04055766793409379, |
| "grad_norm": 0.8399775624275208, |
| "learning_rate": 0.0001989821882951654, |
| "loss": 0.5798, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.043092522179974654, |
| "grad_norm": 0.6111662983894348, |
| "learning_rate": 0.00019888040712468194, |
| "loss": 0.471, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.045627376425855515, |
| "grad_norm": 0.6786199808120728, |
| "learning_rate": 0.00019877862595419848, |
| "loss": 0.5124, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.048162230671736375, |
| "grad_norm": 0.7001961469650269, |
| "learning_rate": 0.00019867684478371502, |
| "loss": 0.5764, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.050697084917617236, |
| "grad_norm": 0.5670634508132935, |
| "learning_rate": 0.00019857506361323156, |
| "loss": 0.5595, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.053231939163498096, |
| "grad_norm": 0.6825580596923828, |
| "learning_rate": 0.0001984732824427481, |
| "loss": 0.6601, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05576679340937896, |
| "grad_norm": 0.5777536630630493, |
| "learning_rate": 0.00019837150127226464, |
| "loss": 0.6232, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.058301647655259824, |
| "grad_norm": 0.7791958451271057, |
| "learning_rate": 0.00019826972010178118, |
| "loss": 0.4741, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.060836501901140684, |
| "grad_norm": 0.7647196054458618, |
| "learning_rate": 0.00019816793893129772, |
| "loss": 0.574, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06337135614702155, |
| "grad_norm": 0.6175855398178101, |
| "learning_rate": 0.00019806615776081426, |
| "loss": 0.6792, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06590621039290241, |
| "grad_norm": 0.7071298360824585, |
| "learning_rate": 0.0001979643765903308, |
| "loss": 0.6333, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06844106463878327, |
| "grad_norm": 0.7675352692604065, |
| "learning_rate": 0.00019786259541984734, |
| "loss": 0.5004, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07097591888466413, |
| "grad_norm": 0.6224766969680786, |
| "learning_rate": 0.00019776081424936387, |
| "loss": 0.5649, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07351077313054499, |
| "grad_norm": 0.6023550629615784, |
| "learning_rate": 0.00019765903307888041, |
| "loss": 0.4004, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07604562737642585, |
| "grad_norm": 0.6253474354743958, |
| "learning_rate": 0.00019755725190839695, |
| "loss": 0.548, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07858048162230671, |
| "grad_norm": 0.43560266494750977, |
| "learning_rate": 0.00019745547073791352, |
| "loss": 0.4721, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.08111533586818757, |
| "grad_norm": 0.6321932077407837, |
| "learning_rate": 0.00019735368956743003, |
| "loss": 0.4671, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08365019011406843, |
| "grad_norm": 0.41977155208587646, |
| "learning_rate": 0.00019725190839694657, |
| "loss": 0.3716, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08618504435994931, |
| "grad_norm": 0.4449223279953003, |
| "learning_rate": 0.0001971501272264631, |
| "loss": 0.6045, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.08871989860583017, |
| "grad_norm": 0.5593668222427368, |
| "learning_rate": 0.00019704834605597965, |
| "loss": 0.3789, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09125475285171103, |
| "grad_norm": 0.4293775260448456, |
| "learning_rate": 0.0001969465648854962, |
| "loss": 0.3834, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.09378960709759189, |
| "grad_norm": 0.49535441398620605, |
| "learning_rate": 0.00019684478371501273, |
| "loss": 0.5504, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09632446134347275, |
| "grad_norm": 0.4620949625968933, |
| "learning_rate": 0.00019674300254452927, |
| "loss": 0.3212, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09885931558935361, |
| "grad_norm": 0.46665605902671814, |
| "learning_rate": 0.0001966412213740458, |
| "loss": 0.4868, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10139416983523447, |
| "grad_norm": 0.4120428264141083, |
| "learning_rate": 0.00019653944020356235, |
| "loss": 0.4926, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10392902408111533, |
| "grad_norm": 0.41570335626602173, |
| "learning_rate": 0.00019643765903307889, |
| "loss": 0.5068, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.10646387832699619, |
| "grad_norm": 0.4141896665096283, |
| "learning_rate": 0.00019633587786259542, |
| "loss": 0.4064, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.10899873257287707, |
| "grad_norm": 0.3192928433418274, |
| "learning_rate": 0.00019623409669211196, |
| "loss": 0.4581, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.11153358681875793, |
| "grad_norm": 0.4188425838947296, |
| "learning_rate": 0.00019613231552162853, |
| "loss": 0.371, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.11406844106463879, |
| "grad_norm": 0.3750368654727936, |
| "learning_rate": 0.00019603053435114504, |
| "loss": 0.3728, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.11660329531051965, |
| "grad_norm": 0.5102046728134155, |
| "learning_rate": 0.00019592875318066158, |
| "loss": 0.357, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.11913814955640051, |
| "grad_norm": 0.4143039882183075, |
| "learning_rate": 0.00019582697201017812, |
| "loss": 0.4373, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.12167300380228137, |
| "grad_norm": 0.42558473348617554, |
| "learning_rate": 0.00019572519083969466, |
| "loss": 0.5877, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.12420785804816223, |
| "grad_norm": 0.35768038034439087, |
| "learning_rate": 0.0001956234096692112, |
| "loss": 0.3326, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1267427122940431, |
| "grad_norm": 0.32826319336891174, |
| "learning_rate": 0.00019552162849872774, |
| "loss": 0.3521, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12927756653992395, |
| "grad_norm": 0.3507271409034729, |
| "learning_rate": 0.00019541984732824428, |
| "loss": 0.4157, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.13181242078580482, |
| "grad_norm": 0.5069169402122498, |
| "learning_rate": 0.00019531806615776082, |
| "loss": 0.4453, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.13434727503168567, |
| "grad_norm": 0.4759957492351532, |
| "learning_rate": 0.00019521628498727736, |
| "loss": 0.5131, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.13688212927756654, |
| "grad_norm": 0.4045158326625824, |
| "learning_rate": 0.0001951145038167939, |
| "loss": 0.3927, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1394169835234474, |
| "grad_norm": 0.49629393219947815, |
| "learning_rate": 0.00019501272264631046, |
| "loss": 0.4708, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14195183776932827, |
| "grad_norm": 0.3735599219799042, |
| "learning_rate": 0.00019491094147582698, |
| "loss": 0.4076, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1444866920152091, |
| "grad_norm": 0.4713466763496399, |
| "learning_rate": 0.00019480916030534354, |
| "loss": 0.4187, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.14702154626108999, |
| "grad_norm": 0.6454377770423889, |
| "learning_rate": 0.00019470737913486005, |
| "loss": 0.4032, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.14955640050697086, |
| "grad_norm": 0.39378786087036133, |
| "learning_rate": 0.00019460559796437662, |
| "loss": 0.3508, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1520912547528517, |
| "grad_norm": 0.3768695592880249, |
| "learning_rate": 0.00019450381679389313, |
| "loss": 0.3129, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15462610899873258, |
| "grad_norm": 0.4250476062297821, |
| "learning_rate": 0.00019440203562340967, |
| "loss": 0.3426, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.15716096324461343, |
| "grad_norm": 0.3653964698314667, |
| "learning_rate": 0.0001943002544529262, |
| "loss": 0.3339, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1596958174904943, |
| "grad_norm": 0.4973353445529938, |
| "learning_rate": 0.00019419847328244275, |
| "loss": 0.4759, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.16223067173637515, |
| "grad_norm": 0.41738295555114746, |
| "learning_rate": 0.0001940966921119593, |
| "loss": 0.3809, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.16476552598225602, |
| "grad_norm": 0.42326119542121887, |
| "learning_rate": 0.00019399491094147583, |
| "loss": 0.3399, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.16730038022813687, |
| "grad_norm": 0.4244116246700287, |
| "learning_rate": 0.00019389312977099237, |
| "loss": 0.4085, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.16983523447401774, |
| "grad_norm": 0.40235379338264465, |
| "learning_rate": 0.0001937913486005089, |
| "loss": 0.3016, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.17237008871989862, |
| "grad_norm": 0.3983120322227478, |
| "learning_rate": 0.00019368956743002547, |
| "loss": 0.5101, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.17490494296577946, |
| "grad_norm": 0.4857071042060852, |
| "learning_rate": 0.00019358778625954199, |
| "loss": 0.3131, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.17743979721166034, |
| "grad_norm": 0.5238108038902283, |
| "learning_rate": 0.00019348600508905855, |
| "loss": 0.5841, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17997465145754118, |
| "grad_norm": 0.5322052240371704, |
| "learning_rate": 0.00019338422391857506, |
| "loss": 0.3895, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.18250950570342206, |
| "grad_norm": 0.4643409252166748, |
| "learning_rate": 0.00019328244274809163, |
| "loss": 0.364, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1850443599493029, |
| "grad_norm": 0.36517271399497986, |
| "learning_rate": 0.00019318066157760814, |
| "loss": 0.4092, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.18757921419518378, |
| "grad_norm": 0.49409031867980957, |
| "learning_rate": 0.00019307888040712468, |
| "loss": 0.3359, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.19011406844106463, |
| "grad_norm": 0.44665688276290894, |
| "learning_rate": 0.00019297709923664122, |
| "loss": 0.3275, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1926489226869455, |
| "grad_norm": 0.353208065032959, |
| "learning_rate": 0.00019287531806615776, |
| "loss": 0.3396, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.19518377693282637, |
| "grad_norm": 0.4061962366104126, |
| "learning_rate": 0.0001927735368956743, |
| "loss": 0.4658, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.19771863117870722, |
| "grad_norm": 0.4785591959953308, |
| "learning_rate": 0.00019267175572519084, |
| "loss": 0.4705, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2002534854245881, |
| "grad_norm": 0.44644224643707275, |
| "learning_rate": 0.00019256997455470738, |
| "loss": 0.3573, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.20278833967046894, |
| "grad_norm": 0.4554955065250397, |
| "learning_rate": 0.00019246819338422392, |
| "loss": 0.3822, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.20532319391634982, |
| "grad_norm": 0.4537349343299866, |
| "learning_rate": 0.00019236641221374049, |
| "loss": 0.5222, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.20785804816223066, |
| "grad_norm": 0.32820987701416016, |
| "learning_rate": 0.000192264631043257, |
| "loss": 0.3185, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.21039290240811154, |
| "grad_norm": 0.39827391505241394, |
| "learning_rate": 0.00019216284987277356, |
| "loss": 0.3693, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.21292775665399238, |
| "grad_norm": 0.4188093841075897, |
| "learning_rate": 0.00019206106870229008, |
| "loss": 0.4168, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.21546261089987326, |
| "grad_norm": 0.4770517349243164, |
| "learning_rate": 0.00019195928753180664, |
| "loss": 0.4113, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.21799746514575413, |
| "grad_norm": 0.346224844455719, |
| "learning_rate": 0.00019185750636132315, |
| "loss": 0.4238, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.22053231939163498, |
| "grad_norm": 0.37398770451545715, |
| "learning_rate": 0.00019175572519083972, |
| "loss": 0.4285, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.22306717363751585, |
| "grad_norm": 0.35467982292175293, |
| "learning_rate": 0.00019165394402035623, |
| "loss": 0.3201, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2256020278833967, |
| "grad_norm": 0.3411659002304077, |
| "learning_rate": 0.00019155216284987277, |
| "loss": 0.3428, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.22813688212927757, |
| "grad_norm": 0.4002087712287903, |
| "learning_rate": 0.0001914503816793893, |
| "loss": 0.5375, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23067173637515842, |
| "grad_norm": 0.4339190423488617, |
| "learning_rate": 0.00019134860050890585, |
| "loss": 0.3355, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2332065906210393, |
| "grad_norm": 0.43449410796165466, |
| "learning_rate": 0.00019124681933842242, |
| "loss": 0.4355, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.23574144486692014, |
| "grad_norm": 0.4565323293209076, |
| "learning_rate": 0.00019114503816793893, |
| "loss": 0.3178, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.23827629911280102, |
| "grad_norm": 0.46309894323349, |
| "learning_rate": 0.0001910432569974555, |
| "loss": 0.3308, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.24081115335868186, |
| "grad_norm": 0.3554096817970276, |
| "learning_rate": 0.000190941475826972, |
| "loss": 0.3358, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.24334600760456274, |
| "grad_norm": 0.39129987359046936, |
| "learning_rate": 0.00019083969465648857, |
| "loss": 0.3988, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2458808618504436, |
| "grad_norm": 0.4193456470966339, |
| "learning_rate": 0.0001907379134860051, |
| "loss": 0.4064, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.24841571609632446, |
| "grad_norm": 0.39571425318717957, |
| "learning_rate": 0.00019063613231552165, |
| "loss": 0.3213, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2509505703422053, |
| "grad_norm": 0.48566195368766785, |
| "learning_rate": 0.00019053435114503817, |
| "loss": 0.3505, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2534854245880862, |
| "grad_norm": 0.43266433477401733, |
| "learning_rate": 0.00019043256997455473, |
| "loss": 0.3579, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25602027883396705, |
| "grad_norm": 0.31110769510269165, |
| "learning_rate": 0.00019033078880407124, |
| "loss": 0.2832, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2585551330798479, |
| "grad_norm": 0.40166690945625305, |
| "learning_rate": 0.00019022900763358778, |
| "loss": 0.2964, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.26108998732572875, |
| "grad_norm": 0.554072380065918, |
| "learning_rate": 0.00019012722646310432, |
| "loss": 0.3661, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.26362484157160965, |
| "grad_norm": 0.45009374618530273, |
| "learning_rate": 0.00019002544529262086, |
| "loss": 0.3812, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2661596958174905, |
| "grad_norm": 0.48349273204803467, |
| "learning_rate": 0.00018992366412213743, |
| "loss": 0.4183, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.26869455006337134, |
| "grad_norm": 0.4157555103302002, |
| "learning_rate": 0.00018982188295165394, |
| "loss": 0.2962, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.27122940430925224, |
| "grad_norm": 0.3300265073776245, |
| "learning_rate": 0.0001897201017811705, |
| "loss": 0.3351, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2737642585551331, |
| "grad_norm": 0.3690893054008484, |
| "learning_rate": 0.00018961832061068702, |
| "loss": 0.3251, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.27629911280101394, |
| "grad_norm": 0.49013710021972656, |
| "learning_rate": 0.00018951653944020359, |
| "loss": 0.4757, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2788339670468948, |
| "grad_norm": 0.4416143000125885, |
| "learning_rate": 0.0001894147582697201, |
| "loss": 0.4421, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2813688212927757, |
| "grad_norm": 0.3613321781158447, |
| "learning_rate": 0.00018931297709923666, |
| "loss": 0.3475, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.28390367553865653, |
| "grad_norm": 0.45548489689826965, |
| "learning_rate": 0.00018921119592875318, |
| "loss": 0.3587, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2864385297845374, |
| "grad_norm": 0.49439120292663574, |
| "learning_rate": 0.00018910941475826974, |
| "loss": 0.4017, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2889733840304182, |
| "grad_norm": 0.35214680433273315, |
| "learning_rate": 0.00018900763358778626, |
| "loss": 0.2645, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2915082382762991, |
| "grad_norm": 0.5512099266052246, |
| "learning_rate": 0.00018890585241730282, |
| "loss": 0.3736, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.29404309252217997, |
| "grad_norm": 0.4146886467933655, |
| "learning_rate": 0.00018880407124681936, |
| "loss": 0.3361, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2965779467680608, |
| "grad_norm": 0.42954355478286743, |
| "learning_rate": 0.00018870229007633587, |
| "loss": 0.3841, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2991128010139417, |
| "grad_norm": 0.47189798951148987, |
| "learning_rate": 0.00018860050890585244, |
| "loss": 0.3591, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.30164765525982257, |
| "grad_norm": 0.5082337260246277, |
| "learning_rate": 0.00018849872773536895, |
| "loss": 0.4249, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3041825095057034, |
| "grad_norm": 0.4005051255226135, |
| "learning_rate": 0.00018839694656488552, |
| "loss": 0.4433, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.30671736375158426, |
| "grad_norm": 0.4730987250804901, |
| "learning_rate": 0.00018829516539440203, |
| "loss": 0.3575, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.30925221799746516, |
| "grad_norm": 0.5227373242378235, |
| "learning_rate": 0.0001881933842239186, |
| "loss": 0.3511, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.311787072243346, |
| "grad_norm": 0.3693684935569763, |
| "learning_rate": 0.0001880916030534351, |
| "loss": 0.3097, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.31432192648922685, |
| "grad_norm": 0.45321500301361084, |
| "learning_rate": 0.00018798982188295168, |
| "loss": 0.4464, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.31685678073510776, |
| "grad_norm": 0.3797638714313507, |
| "learning_rate": 0.0001878880407124682, |
| "loss": 0.328, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3193916349809886, |
| "grad_norm": 0.3996891975402832, |
| "learning_rate": 0.00018778625954198475, |
| "loss": 0.28, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.32192648922686945, |
| "grad_norm": 0.3931027352809906, |
| "learning_rate": 0.00018768447837150127, |
| "loss": 0.2439, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3244613434727503, |
| "grad_norm": 0.4259742200374603, |
| "learning_rate": 0.00018758269720101783, |
| "loss": 0.3068, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3269961977186312, |
| "grad_norm": 0.4267159402370453, |
| "learning_rate": 0.00018748091603053437, |
| "loss": 0.3405, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.32953105196451205, |
| "grad_norm": 0.41900908946990967, |
| "learning_rate": 0.0001873791348600509, |
| "loss": 0.327, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3320659062103929, |
| "grad_norm": 0.436499685049057, |
| "learning_rate": 0.00018727735368956745, |
| "loss": 0.5089, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.33460076045627374, |
| "grad_norm": 0.43961402773857117, |
| "learning_rate": 0.00018717557251908396, |
| "loss": 0.339, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.33713561470215464, |
| "grad_norm": 0.45645856857299805, |
| "learning_rate": 0.00018707379134860053, |
| "loss": 0.3738, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3396704689480355, |
| "grad_norm": 0.36948803067207336, |
| "learning_rate": 0.00018697201017811704, |
| "loss": 0.2777, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.34220532319391633, |
| "grad_norm": 0.32040536403656006, |
| "learning_rate": 0.0001868702290076336, |
| "loss": 0.3679, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.34474017743979724, |
| "grad_norm": 0.37474381923675537, |
| "learning_rate": 0.00018676844783715012, |
| "loss": 0.4282, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3472750316856781, |
| "grad_norm": 0.4243752360343933, |
| "learning_rate": 0.0001866666666666667, |
| "loss": 0.533, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.34980988593155893, |
| "grad_norm": 0.39162227511405945, |
| "learning_rate": 0.0001865648854961832, |
| "loss": 0.2989, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3523447401774398, |
| "grad_norm": 0.3585897386074066, |
| "learning_rate": 0.00018646310432569977, |
| "loss": 0.3368, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3548795944233207, |
| "grad_norm": 0.39330482482910156, |
| "learning_rate": 0.00018636132315521628, |
| "loss": 0.4904, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3574144486692015, |
| "grad_norm": 0.3404198884963989, |
| "learning_rate": 0.00018625954198473284, |
| "loss": 0.2684, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.35994930291508237, |
| "grad_norm": 0.34813976287841797, |
| "learning_rate": 0.00018615776081424938, |
| "loss": 0.2988, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.36248415716096327, |
| "grad_norm": 0.4100090265274048, |
| "learning_rate": 0.00018605597964376592, |
| "loss": 0.3325, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3650190114068441, |
| "grad_norm": 0.2897261083126068, |
| "learning_rate": 0.00018595419847328246, |
| "loss": 0.2487, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.36755386565272496, |
| "grad_norm": 0.43023669719696045, |
| "learning_rate": 0.00018585241730279897, |
| "loss": 0.4875, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3700887198986058, |
| "grad_norm": 0.39708128571510315, |
| "learning_rate": 0.00018575063613231554, |
| "loss": 0.3742, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3726235741444867, |
| "grad_norm": 0.4191845953464508, |
| "learning_rate": 0.00018564885496183205, |
| "loss": 0.3253, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.37515842839036756, |
| "grad_norm": 0.3373403549194336, |
| "learning_rate": 0.00018554707379134862, |
| "loss": 0.2636, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.3776932826362484, |
| "grad_norm": 0.3522009551525116, |
| "learning_rate": 0.00018544529262086513, |
| "loss": 0.2413, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.38022813688212925, |
| "grad_norm": 0.4140997529029846, |
| "learning_rate": 0.0001853435114503817, |
| "loss": 0.3663, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.38276299112801015, |
| "grad_norm": 0.3986112177371979, |
| "learning_rate": 0.0001852417302798982, |
| "loss": 0.276, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.385297845373891, |
| "grad_norm": 0.46847087144851685, |
| "learning_rate": 0.00018513994910941478, |
| "loss": 0.3369, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.38783269961977185, |
| "grad_norm": 0.43623679876327515, |
| "learning_rate": 0.00018503816793893132, |
| "loss": 0.37, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.39036755386565275, |
| "grad_norm": 0.4128822684288025, |
| "learning_rate": 0.00018493638676844785, |
| "loss": 0.3763, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3929024081115336, |
| "grad_norm": 0.3352810740470886, |
| "learning_rate": 0.0001848346055979644, |
| "loss": 0.2446, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.39543726235741444, |
| "grad_norm": 0.580634355545044, |
| "learning_rate": 0.00018473282442748093, |
| "loss": 0.3691, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3979721166032953, |
| "grad_norm": 0.452499657869339, |
| "learning_rate": 0.00018463104325699747, |
| "loss": 0.4361, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.4005069708491762, |
| "grad_norm": 0.4160007834434509, |
| "learning_rate": 0.000184529262086514, |
| "loss": 0.4003, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.40304182509505704, |
| "grad_norm": 0.3049513101577759, |
| "learning_rate": 0.00018442748091603055, |
| "loss": 0.2167, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4055766793409379, |
| "grad_norm": 0.38912078738212585, |
| "learning_rate": 0.00018432569974554706, |
| "loss": 0.2766, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.40811153358681873, |
| "grad_norm": 0.4433249831199646, |
| "learning_rate": 0.00018422391857506363, |
| "loss": 0.3331, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.41064638783269963, |
| "grad_norm": 0.36410561203956604, |
| "learning_rate": 0.00018412213740458014, |
| "loss": 0.2719, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4131812420785805, |
| "grad_norm": 0.47044846415519714, |
| "learning_rate": 0.0001840203562340967, |
| "loss": 0.3602, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4157160963244613, |
| "grad_norm": 0.38755008578300476, |
| "learning_rate": 0.00018391857506361322, |
| "loss": 0.2815, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.41825095057034223, |
| "grad_norm": 0.39241930842399597, |
| "learning_rate": 0.0001838167938931298, |
| "loss": 0.3642, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4207858048162231, |
| "grad_norm": 0.37138187885284424, |
| "learning_rate": 0.00018371501272264633, |
| "loss": 0.267, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4233206590621039, |
| "grad_norm": 0.4508083462715149, |
| "learning_rate": 0.00018361323155216287, |
| "loss": 0.4093, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.42585551330798477, |
| "grad_norm": 0.4390806257724762, |
| "learning_rate": 0.0001835114503816794, |
| "loss": 0.424, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.42839036755386567, |
| "grad_norm": 0.4640062153339386, |
| "learning_rate": 0.00018340966921119594, |
| "loss": 0.4065, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4309252217997465, |
| "grad_norm": 0.37822040915489197, |
| "learning_rate": 0.00018330788804071248, |
| "loss": 0.2854, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.43346007604562736, |
| "grad_norm": 0.3658731281757355, |
| "learning_rate": 0.00018320610687022902, |
| "loss": 0.2826, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.43599493029150826, |
| "grad_norm": 0.4271928369998932, |
| "learning_rate": 0.00018310432569974556, |
| "loss": 0.4538, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4385297845373891, |
| "grad_norm": 0.33550775051116943, |
| "learning_rate": 0.00018300254452926207, |
| "loss": 0.3015, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.44106463878326996, |
| "grad_norm": 0.5374005436897278, |
| "learning_rate": 0.00018290076335877864, |
| "loss": 0.2771, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4435994930291508, |
| "grad_norm": 0.4630737602710724, |
| "learning_rate": 0.00018279898218829515, |
| "loss": 0.3786, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4461343472750317, |
| "grad_norm": 0.4163656234741211, |
| "learning_rate": 0.00018269720101781172, |
| "loss": 0.3224, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.44866920152091255, |
| "grad_norm": 0.43972182273864746, |
| "learning_rate": 0.00018259541984732826, |
| "loss": 0.4192, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.4512040557667934, |
| "grad_norm": 0.4114130437374115, |
| "learning_rate": 0.0001824936386768448, |
| "loss": 0.2979, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.45373891001267425, |
| "grad_norm": 0.5002878308296204, |
| "learning_rate": 0.00018239185750636134, |
| "loss": 0.3339, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.45627376425855515, |
| "grad_norm": 0.42383208870887756, |
| "learning_rate": 0.00018229007633587788, |
| "loss": 0.2958, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.458808618504436, |
| "grad_norm": 0.3234981894493103, |
| "learning_rate": 0.00018218829516539442, |
| "loss": 0.2215, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.46134347275031684, |
| "grad_norm": 0.33356910943984985, |
| "learning_rate": 0.00018208651399491096, |
| "loss": 0.3017, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.46387832699619774, |
| "grad_norm": 0.442376047372818, |
| "learning_rate": 0.0001819847328244275, |
| "loss": 0.2751, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4664131812420786, |
| "grad_norm": 0.4563845992088318, |
| "learning_rate": 0.00018188295165394403, |
| "loss": 0.3001, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.46894803548795944, |
| "grad_norm": 0.3957296907901764, |
| "learning_rate": 0.00018178117048346057, |
| "loss": 0.3864, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4714828897338403, |
| "grad_norm": 0.32932132482528687, |
| "learning_rate": 0.0001816793893129771, |
| "loss": 0.2528, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4740177439797212, |
| "grad_norm": 0.3960365951061249, |
| "learning_rate": 0.00018157760814249365, |
| "loss": 0.3975, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.47655259822560203, |
| "grad_norm": 0.38450995087623596, |
| "learning_rate": 0.00018147582697201016, |
| "loss": 0.2552, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4790874524714829, |
| "grad_norm": 0.4259994626045227, |
| "learning_rate": 0.00018137404580152673, |
| "loss": 0.3, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4816223067173637, |
| "grad_norm": 0.4965859055519104, |
| "learning_rate": 0.00018127226463104327, |
| "loss": 0.3099, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4841571609632446, |
| "grad_norm": 0.38229548931121826, |
| "learning_rate": 0.0001811704834605598, |
| "loss": 0.3799, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4866920152091255, |
| "grad_norm": 0.4622017741203308, |
| "learning_rate": 0.00018106870229007635, |
| "loss": 0.4815, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4892268694550063, |
| "grad_norm": 0.3207991123199463, |
| "learning_rate": 0.0001809669211195929, |
| "loss": 0.2534, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4917617237008872, |
| "grad_norm": 0.3322354555130005, |
| "learning_rate": 0.00018086513994910943, |
| "loss": 0.2331, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.49429657794676807, |
| "grad_norm": 0.35752132534980774, |
| "learning_rate": 0.00018076335877862597, |
| "loss": 0.3621, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4968314321926489, |
| "grad_norm": 0.2801353633403778, |
| "learning_rate": 0.0001806615776081425, |
| "loss": 0.2198, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.49936628643852976, |
| "grad_norm": 0.5065000057220459, |
| "learning_rate": 0.00018055979643765905, |
| "loss": 0.3806, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5019011406844106, |
| "grad_norm": 0.4308508336544037, |
| "learning_rate": 0.00018045801526717558, |
| "loss": 0.4028, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5044359949302915, |
| "grad_norm": 0.5432320833206177, |
| "learning_rate": 0.00018035623409669212, |
| "loss": 0.506, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5069708491761724, |
| "grad_norm": 0.37079155445098877, |
| "learning_rate": 0.00018025445292620866, |
| "loss": 0.2242, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5095057034220533, |
| "grad_norm": 0.3533012568950653, |
| "learning_rate": 0.00018015267175572518, |
| "loss": 0.3462, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5120405576679341, |
| "grad_norm": 0.37727662920951843, |
| "learning_rate": 0.00018005089058524174, |
| "loss": 0.2421, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.514575411913815, |
| "grad_norm": 0.42737269401550293, |
| "learning_rate": 0.00017994910941475828, |
| "loss": 0.3338, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5171102661596958, |
| "grad_norm": 0.41085687279701233, |
| "learning_rate": 0.00017984732824427482, |
| "loss": 0.4233, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5196451204055766, |
| "grad_norm": 0.4871644675731659, |
| "learning_rate": 0.00017974554707379136, |
| "loss": 0.3504, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5221799746514575, |
| "grad_norm": 0.308347225189209, |
| "learning_rate": 0.0001796437659033079, |
| "loss": 0.27, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5247148288973384, |
| "grad_norm": 0.31587716937065125, |
| "learning_rate": 0.00017954198473282444, |
| "loss": 0.3161, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5272496831432193, |
| "grad_norm": 0.471392959356308, |
| "learning_rate": 0.00017944020356234098, |
| "loss": 0.3758, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5297845373891001, |
| "grad_norm": 0.33414778113365173, |
| "learning_rate": 0.00017933842239185752, |
| "loss": 0.3095, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.532319391634981, |
| "grad_norm": 0.26553916931152344, |
| "learning_rate": 0.00017923664122137406, |
| "loss": 0.232, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5348542458808618, |
| "grad_norm": 0.27914223074913025, |
| "learning_rate": 0.0001791348600508906, |
| "loss": 0.2438, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5373891001267427, |
| "grad_norm": 0.36625003814697266, |
| "learning_rate": 0.00017903307888040713, |
| "loss": 0.2479, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5399239543726235, |
| "grad_norm": 0.3876325488090515, |
| "learning_rate": 0.00017893129770992367, |
| "loss": 0.3428, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5424588086185045, |
| "grad_norm": 0.5402606129646301, |
| "learning_rate": 0.0001788295165394402, |
| "loss": 0.394, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5449936628643853, |
| "grad_norm": 0.4023256301879883, |
| "learning_rate": 0.00017872773536895675, |
| "loss": 0.3348, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5475285171102662, |
| "grad_norm": 0.4440263509750366, |
| "learning_rate": 0.0001786259541984733, |
| "loss": 0.3001, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.550063371356147, |
| "grad_norm": 0.39178457856178284, |
| "learning_rate": 0.00017852417302798983, |
| "loss": 0.2561, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5525982256020279, |
| "grad_norm": 0.5261508226394653, |
| "learning_rate": 0.00017842239185750637, |
| "loss": 0.4583, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5551330798479087, |
| "grad_norm": 0.3981377184391022, |
| "learning_rate": 0.0001783206106870229, |
| "loss": 0.265, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5576679340937896, |
| "grad_norm": 0.3689790666103363, |
| "learning_rate": 0.00017821882951653945, |
| "loss": 0.3965, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5602027883396705, |
| "grad_norm": 0.38442498445510864, |
| "learning_rate": 0.000178117048346056, |
| "loss": 0.268, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5627376425855514, |
| "grad_norm": 0.3051845133304596, |
| "learning_rate": 0.00017801526717557253, |
| "loss": 0.2362, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5652724968314322, |
| "grad_norm": 0.41551336646080017, |
| "learning_rate": 0.00017791348600508907, |
| "loss": 0.3428, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5678073510773131, |
| "grad_norm": 0.2885109484195709, |
| "learning_rate": 0.0001778117048346056, |
| "loss": 0.2328, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5703422053231939, |
| "grad_norm": 0.48813045024871826, |
| "learning_rate": 0.00017770992366412215, |
| "loss": 0.3502, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5728770595690748, |
| "grad_norm": 0.4413661062717438, |
| "learning_rate": 0.00017760814249363869, |
| "loss": 0.2687, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5754119138149556, |
| "grad_norm": 0.422799289226532, |
| "learning_rate": 0.00017750636132315522, |
| "loss": 0.4776, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5779467680608364, |
| "grad_norm": 0.39486098289489746, |
| "learning_rate": 0.00017740458015267176, |
| "loss": 0.3551, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5804816223067174, |
| "grad_norm": 0.366207480430603, |
| "learning_rate": 0.0001773027989821883, |
| "loss": 0.2639, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5830164765525983, |
| "grad_norm": 0.334626704454422, |
| "learning_rate": 0.00017720101781170484, |
| "loss": 0.2407, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5855513307984791, |
| "grad_norm": 0.5580838918685913, |
| "learning_rate": 0.00017709923664122138, |
| "loss": 0.3856, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5880861850443599, |
| "grad_norm": 0.3495747148990631, |
| "learning_rate": 0.00017699745547073792, |
| "loss": 0.3113, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5906210392902408, |
| "grad_norm": 0.38515543937683105, |
| "learning_rate": 0.00017689567430025446, |
| "loss": 0.3765, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5931558935361216, |
| "grad_norm": 0.43240851163864136, |
| "learning_rate": 0.000176793893129771, |
| "loss": 0.3094, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5956907477820025, |
| "grad_norm": 0.42353445291519165, |
| "learning_rate": 0.00017669211195928754, |
| "loss": 0.2992, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5982256020278834, |
| "grad_norm": 0.42463192343711853, |
| "learning_rate": 0.00017659033078880408, |
| "loss": 0.2486, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6007604562737643, |
| "grad_norm": 0.4749039113521576, |
| "learning_rate": 0.00017648854961832062, |
| "loss": 0.3742, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6032953105196451, |
| "grad_norm": 0.5651363730430603, |
| "learning_rate": 0.00017638676844783716, |
| "loss": 0.3079, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.605830164765526, |
| "grad_norm": 0.34195011854171753, |
| "learning_rate": 0.0001762849872773537, |
| "loss": 0.3236, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6083650190114068, |
| "grad_norm": 0.5522583723068237, |
| "learning_rate": 0.00017618320610687024, |
| "loss": 0.3026, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6108998732572877, |
| "grad_norm": 0.41445448994636536, |
| "learning_rate": 0.00017608142493638677, |
| "loss": 0.32, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6134347275031685, |
| "grad_norm": 0.5023159384727478, |
| "learning_rate": 0.00017597964376590331, |
| "loss": 0.2658, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6159695817490495, |
| "grad_norm": 0.39539164304733276, |
| "learning_rate": 0.00017587786259541985, |
| "loss": 0.2687, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6185044359949303, |
| "grad_norm": 0.3105890154838562, |
| "learning_rate": 0.0001757760814249364, |
| "loss": 0.2224, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6210392902408112, |
| "grad_norm": 0.3665928840637207, |
| "learning_rate": 0.00017567430025445293, |
| "loss": 0.3101, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.623574144486692, |
| "grad_norm": 0.28569111227989197, |
| "learning_rate": 0.00017557251908396947, |
| "loss": 0.2316, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6261089987325729, |
| "grad_norm": 0.24598725140094757, |
| "learning_rate": 0.000175470737913486, |
| "loss": 0.2314, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6286438529784537, |
| "grad_norm": 0.4301004111766815, |
| "learning_rate": 0.00017536895674300255, |
| "loss": 0.2606, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6311787072243346, |
| "grad_norm": 0.36598455905914307, |
| "learning_rate": 0.0001752671755725191, |
| "loss": 0.2243, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.6337135614702155, |
| "grad_norm": 0.31714677810668945, |
| "learning_rate": 0.00017516539440203563, |
| "loss": 0.2561, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6362484157160964, |
| "grad_norm": 0.5131182670593262, |
| "learning_rate": 0.0001750636132315522, |
| "loss": 0.3216, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6387832699619772, |
| "grad_norm": 0.4067549407482147, |
| "learning_rate": 0.0001749618320610687, |
| "loss": 0.3032, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.641318124207858, |
| "grad_norm": 0.6457440853118896, |
| "learning_rate": 0.00017486005089058525, |
| "loss": 0.349, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.6438529784537389, |
| "grad_norm": 0.3759848177433014, |
| "learning_rate": 0.00017475826972010179, |
| "loss": 0.2974, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6463878326996197, |
| "grad_norm": 0.40348076820373535, |
| "learning_rate": 0.00017465648854961833, |
| "loss": 0.2781, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6489226869455006, |
| "grad_norm": 0.2639053463935852, |
| "learning_rate": 0.00017455470737913486, |
| "loss": 0.2413, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6514575411913816, |
| "grad_norm": 0.4014027416706085, |
| "learning_rate": 0.0001744529262086514, |
| "loss": 0.2878, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.6539923954372624, |
| "grad_norm": 0.4871384799480438, |
| "learning_rate": 0.00017435114503816794, |
| "loss": 0.2527, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6565272496831432, |
| "grad_norm": 0.28687578439712524, |
| "learning_rate": 0.00017424936386768448, |
| "loss": 0.2233, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6590621039290241, |
| "grad_norm": 0.36948761343955994, |
| "learning_rate": 0.00017414758269720102, |
| "loss": 0.3007, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6615969581749049, |
| "grad_norm": 0.6034134030342102, |
| "learning_rate": 0.00017404580152671756, |
| "loss": 0.3054, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6641318124207858, |
| "grad_norm": 0.3481515645980835, |
| "learning_rate": 0.0001739440203562341, |
| "loss": 0.2388, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.3772611916065216, |
| "learning_rate": 0.00017384223918575064, |
| "loss": 0.317, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6692015209125475, |
| "grad_norm": 0.4693986177444458, |
| "learning_rate": 0.0001737404580152672, |
| "loss": 0.3441, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6717363751584284, |
| "grad_norm": 0.38484400510787964, |
| "learning_rate": 0.00017363867684478372, |
| "loss": 0.2637, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6742712294043093, |
| "grad_norm": 0.3638555407524109, |
| "learning_rate": 0.00017353689567430026, |
| "loss": 0.2695, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6768060836501901, |
| "grad_norm": 0.36848586797714233, |
| "learning_rate": 0.0001734351145038168, |
| "loss": 0.3149, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.679340937896071, |
| "grad_norm": 0.31740638613700867, |
| "learning_rate": 0.00017333333333333334, |
| "loss": 0.3049, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6818757921419518, |
| "grad_norm": 0.41415438055992126, |
| "learning_rate": 0.00017323155216284988, |
| "loss": 0.231, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6844106463878327, |
| "grad_norm": 0.41449829936027527, |
| "learning_rate": 0.00017312977099236641, |
| "loss": 0.3344, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6869455006337135, |
| "grad_norm": 0.30683189630508423, |
| "learning_rate": 0.00017302798982188295, |
| "loss": 0.283, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6894803548795945, |
| "grad_norm": 0.29896244406700134, |
| "learning_rate": 0.0001729262086513995, |
| "loss": 0.2363, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6920152091254753, |
| "grad_norm": 0.44181492924690247, |
| "learning_rate": 0.00017282442748091603, |
| "loss": 0.3439, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6945500633713562, |
| "grad_norm": 0.43460434675216675, |
| "learning_rate": 0.00017272264631043257, |
| "loss": 0.3004, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.697084917617237, |
| "grad_norm": 0.40781405568122864, |
| "learning_rate": 0.00017262086513994914, |
| "loss": 0.2554, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6996197718631179, |
| "grad_norm": 0.39359861612319946, |
| "learning_rate": 0.00017251908396946565, |
| "loss": 0.3094, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7021546261089987, |
| "grad_norm": 0.4507496953010559, |
| "learning_rate": 0.00017241730279898222, |
| "loss": 0.2985, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7046894803548795, |
| "grad_norm": 0.4513093829154968, |
| "learning_rate": 0.00017231552162849873, |
| "loss": 0.4, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7072243346007605, |
| "grad_norm": 0.3133571147918701, |
| "learning_rate": 0.0001722137404580153, |
| "loss": 0.2241, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7097591888466414, |
| "grad_norm": 0.36957162618637085, |
| "learning_rate": 0.0001721119592875318, |
| "loss": 0.2461, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7122940430925222, |
| "grad_norm": 0.4224545955657959, |
| "learning_rate": 0.00017201017811704835, |
| "loss": 0.3178, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.714828897338403, |
| "grad_norm": 0.4696861207485199, |
| "learning_rate": 0.0001719083969465649, |
| "loss": 0.3911, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7173637515842839, |
| "grad_norm": 0.44058746099472046, |
| "learning_rate": 0.00017180661577608143, |
| "loss": 0.3169, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.7198986058301647, |
| "grad_norm": 0.32616788148880005, |
| "learning_rate": 0.00017170483460559797, |
| "loss": 0.2441, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7224334600760456, |
| "grad_norm": 0.3941279649734497, |
| "learning_rate": 0.0001716030534351145, |
| "loss": 0.3433, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7249683143219265, |
| "grad_norm": 0.3746216297149658, |
| "learning_rate": 0.00017150127226463104, |
| "loss": 0.3993, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7275031685678074, |
| "grad_norm": 0.3758716881275177, |
| "learning_rate": 0.00017139949109414758, |
| "loss": 0.3139, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7300380228136882, |
| "grad_norm": 0.35631927847862244, |
| "learning_rate": 0.00017129770992366415, |
| "loss": 0.2316, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7325728770595691, |
| "grad_norm": 0.48128026723861694, |
| "learning_rate": 0.00017119592875318066, |
| "loss": 0.3306, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7351077313054499, |
| "grad_norm": 0.3464122414588928, |
| "learning_rate": 0.00017109414758269723, |
| "loss": 0.3148, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7376425855513308, |
| "grad_norm": 0.3772057294845581, |
| "learning_rate": 0.00017099236641221374, |
| "loss": 0.274, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.7401774397972116, |
| "grad_norm": 0.2896706759929657, |
| "learning_rate": 0.0001708905852417303, |
| "loss": 0.2275, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7427122940430925, |
| "grad_norm": 0.48482832312583923, |
| "learning_rate": 0.00017078880407124682, |
| "loss": 0.2913, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7452471482889734, |
| "grad_norm": 0.3086034655570984, |
| "learning_rate": 0.00017068702290076336, |
| "loss": 0.2453, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7477820025348543, |
| "grad_norm": 0.42840075492858887, |
| "learning_rate": 0.0001705852417302799, |
| "loss": 0.352, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7503168567807351, |
| "grad_norm": 0.4574609398841858, |
| "learning_rate": 0.00017048346055979644, |
| "loss": 0.3698, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.752851711026616, |
| "grad_norm": 0.4295889735221863, |
| "learning_rate": 0.00017038167938931298, |
| "loss": 0.3341, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7553865652724968, |
| "grad_norm": 0.46036672592163086, |
| "learning_rate": 0.00017027989821882952, |
| "loss": 0.3175, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.7579214195183777, |
| "grad_norm": 0.45897790789604187, |
| "learning_rate": 0.00017017811704834608, |
| "loss": 0.31, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7604562737642585, |
| "grad_norm": 0.2966432273387909, |
| "learning_rate": 0.0001700763358778626, |
| "loss": 0.2439, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7629911280101395, |
| "grad_norm": 0.32714638113975525, |
| "learning_rate": 0.00016997455470737916, |
| "loss": 0.2653, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.7655259822560203, |
| "grad_norm": 0.32264646887779236, |
| "learning_rate": 0.00016987277353689567, |
| "loss": 0.2728, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.7680608365019012, |
| "grad_norm": 0.4073767066001892, |
| "learning_rate": 0.00016977099236641224, |
| "loss": 0.3501, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.770595690747782, |
| "grad_norm": 0.5493949055671692, |
| "learning_rate": 0.00016966921119592875, |
| "loss": 0.3212, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.7731305449936628, |
| "grad_norm": 0.335705429315567, |
| "learning_rate": 0.00016956743002544532, |
| "loss": 0.299, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.7756653992395437, |
| "grad_norm": 0.32758405804634094, |
| "learning_rate": 0.00016946564885496183, |
| "loss": 0.2547, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.7782002534854245, |
| "grad_norm": 0.32411983609199524, |
| "learning_rate": 0.0001693638676844784, |
| "loss": 0.2593, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7807351077313055, |
| "grad_norm": 0.5713444352149963, |
| "learning_rate": 0.0001692620865139949, |
| "loss": 0.3661, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7832699619771863, |
| "grad_norm": 0.3287065327167511, |
| "learning_rate": 0.00016916030534351145, |
| "loss": 0.2559, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7858048162230672, |
| "grad_norm": 0.3499440550804138, |
| "learning_rate": 0.000169058524173028, |
| "loss": 0.3489, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.788339670468948, |
| "grad_norm": 0.259787917137146, |
| "learning_rate": 0.00016895674300254453, |
| "loss": 0.2451, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7908745247148289, |
| "grad_norm": 0.3902716338634491, |
| "learning_rate": 0.0001688549618320611, |
| "loss": 0.2821, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.7934093789607097, |
| "grad_norm": 0.4061296582221985, |
| "learning_rate": 0.0001687531806615776, |
| "loss": 0.4289, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7959442332065906, |
| "grad_norm": 0.3062605857849121, |
| "learning_rate": 0.00016865139949109417, |
| "loss": 0.2489, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.7984790874524715, |
| "grad_norm": 0.36886945366859436, |
| "learning_rate": 0.00016854961832061068, |
| "loss": 0.4049, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8010139416983524, |
| "grad_norm": 0.25828975439071655, |
| "learning_rate": 0.00016844783715012725, |
| "loss": 0.238, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8035487959442332, |
| "grad_norm": 0.39747142791748047, |
| "learning_rate": 0.00016834605597964376, |
| "loss": 0.3928, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8060836501901141, |
| "grad_norm": 0.3884779214859009, |
| "learning_rate": 0.00016824427480916033, |
| "loss": 0.2881, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8086185044359949, |
| "grad_norm": 0.3687349855899811, |
| "learning_rate": 0.00016814249363867684, |
| "loss": 0.3662, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.8111533586818758, |
| "grad_norm": 0.3631541132926941, |
| "learning_rate": 0.0001680407124681934, |
| "loss": 0.2657, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8136882129277566, |
| "grad_norm": 0.3174535930156708, |
| "learning_rate": 0.00016793893129770992, |
| "loss": 0.2636, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.8162230671736375, |
| "grad_norm": 0.44168904423713684, |
| "learning_rate": 0.00016783715012722646, |
| "loss": 0.2882, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8187579214195184, |
| "grad_norm": 0.370685875415802, |
| "learning_rate": 0.000167735368956743, |
| "loss": 0.3228, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.8212927756653993, |
| "grad_norm": 0.3001299798488617, |
| "learning_rate": 0.00016763358778625954, |
| "loss": 0.2256, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8238276299112801, |
| "grad_norm": 0.37992653250694275, |
| "learning_rate": 0.0001675318066157761, |
| "loss": 0.2633, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.826362484157161, |
| "grad_norm": 0.4739125072956085, |
| "learning_rate": 0.00016743002544529262, |
| "loss": 0.3044, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.8288973384030418, |
| "grad_norm": 0.36424344778060913, |
| "learning_rate": 0.00016732824427480918, |
| "loss": 0.3311, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8314321926489227, |
| "grad_norm": 0.4474777579307556, |
| "learning_rate": 0.0001672264631043257, |
| "loss": 0.4099, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.8339670468948035, |
| "grad_norm": 0.4337301552295685, |
| "learning_rate": 0.00016712468193384226, |
| "loss": 0.3567, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8365019011406845, |
| "grad_norm": 0.37666353583335876, |
| "learning_rate": 0.00016702290076335877, |
| "loss": 0.3079, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8390367553865653, |
| "grad_norm": 0.36810433864593506, |
| "learning_rate": 0.00016692111959287534, |
| "loss": 0.414, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8415716096324461, |
| "grad_norm": 0.3914581537246704, |
| "learning_rate": 0.00016681933842239185, |
| "loss": 0.2807, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.844106463878327, |
| "grad_norm": 0.3891938626766205, |
| "learning_rate": 0.00016671755725190842, |
| "loss": 0.3101, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.8466413181242078, |
| "grad_norm": 0.4397302269935608, |
| "learning_rate": 0.00016661577608142493, |
| "loss": 0.2659, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8491761723700887, |
| "grad_norm": 0.3152853846549988, |
| "learning_rate": 0.0001665139949109415, |
| "loss": 0.308, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8517110266159695, |
| "grad_norm": 0.2894272208213806, |
| "learning_rate": 0.00016641221374045804, |
| "loss": 0.2675, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.8542458808618505, |
| "grad_norm": 0.27995947003364563, |
| "learning_rate": 0.00016631043256997455, |
| "loss": 0.2603, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.8567807351077313, |
| "grad_norm": 0.42209070920944214, |
| "learning_rate": 0.00016620865139949112, |
| "loss": 0.3417, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.8593155893536122, |
| "grad_norm": 0.3781871795654297, |
| "learning_rate": 0.00016610687022900763, |
| "loss": 0.3441, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.861850443599493, |
| "grad_norm": 0.3438952565193176, |
| "learning_rate": 0.0001660050890585242, |
| "loss": 0.2249, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8643852978453739, |
| "grad_norm": 0.32164961099624634, |
| "learning_rate": 0.0001659033078880407, |
| "loss": 0.2472, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.8669201520912547, |
| "grad_norm": 0.3517252504825592, |
| "learning_rate": 0.00016580152671755727, |
| "loss": 0.2434, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.8694550063371356, |
| "grad_norm": 0.29841092228889465, |
| "learning_rate": 0.00016569974554707378, |
| "loss": 0.2536, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.8719898605830165, |
| "grad_norm": 0.3351423144340515, |
| "learning_rate": 0.00016559796437659035, |
| "loss": 0.2501, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.8745247148288974, |
| "grad_norm": 0.3979301154613495, |
| "learning_rate": 0.00016549618320610686, |
| "loss": 0.2358, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8770595690747782, |
| "grad_norm": 0.3859489858150482, |
| "learning_rate": 0.00016539440203562343, |
| "loss": 0.2675, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.8795944233206591, |
| "grad_norm": 0.3836475908756256, |
| "learning_rate": 0.00016529262086513994, |
| "loss": 0.2179, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.8821292775665399, |
| "grad_norm": 0.3986142575740814, |
| "learning_rate": 0.0001651908396946565, |
| "loss": 0.2599, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.8846641318124208, |
| "grad_norm": 0.4105628430843353, |
| "learning_rate": 0.00016508905852417305, |
| "loss": 0.242, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.8871989860583016, |
| "grad_norm": 0.34334608912467957, |
| "learning_rate": 0.00016498727735368956, |
| "loss": 0.2771, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8897338403041825, |
| "grad_norm": 0.3412443995475769, |
| "learning_rate": 0.00016488549618320613, |
| "loss": 0.2289, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.8922686945500634, |
| "grad_norm": 0.3596668541431427, |
| "learning_rate": 0.00016478371501272264, |
| "loss": 0.2253, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8948035487959443, |
| "grad_norm": 0.43112802505493164, |
| "learning_rate": 0.0001646819338422392, |
| "loss": 0.3116, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8973384030418251, |
| "grad_norm": 0.4306243062019348, |
| "learning_rate": 0.00016458015267175572, |
| "loss": 0.3099, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.899873257287706, |
| "grad_norm": 0.2773829996585846, |
| "learning_rate": 0.00016447837150127228, |
| "loss": 0.2765, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9024081115335868, |
| "grad_norm": 0.5014198422431946, |
| "learning_rate": 0.0001643765903307888, |
| "loss": 0.302, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.9049429657794676, |
| "grad_norm": 0.4376792013645172, |
| "learning_rate": 0.00016427480916030536, |
| "loss": 0.2967, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.9074778200253485, |
| "grad_norm": 0.34460946917533875, |
| "learning_rate": 0.00016417302798982187, |
| "loss": 0.3678, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9100126742712294, |
| "grad_norm": 0.23346909880638123, |
| "learning_rate": 0.00016407124681933844, |
| "loss": 0.2409, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9125475285171103, |
| "grad_norm": 0.35633108019828796, |
| "learning_rate": 0.00016396946564885498, |
| "loss": 0.3555, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9150823827629911, |
| "grad_norm": 0.26780250668525696, |
| "learning_rate": 0.00016386768447837152, |
| "loss": 0.2543, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.917617237008872, |
| "grad_norm": 0.34583303332328796, |
| "learning_rate": 0.00016376590330788806, |
| "loss": 0.2444, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.9201520912547528, |
| "grad_norm": 0.38331279158592224, |
| "learning_rate": 0.0001636641221374046, |
| "loss": 0.3549, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.9226869455006337, |
| "grad_norm": 0.37290483713150024, |
| "learning_rate": 0.00016356234096692114, |
| "loss": 0.3311, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9252217997465145, |
| "grad_norm": 0.406568318605423, |
| "learning_rate": 0.00016346055979643765, |
| "loss": 0.2774, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9277566539923955, |
| "grad_norm": 0.35498303174972534, |
| "learning_rate": 0.00016335877862595422, |
| "loss": 0.2121, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.9302915082382763, |
| "grad_norm": 0.3682021498680115, |
| "learning_rate": 0.00016325699745547073, |
| "loss": 0.2648, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9328263624841572, |
| "grad_norm": 0.37826359272003174, |
| "learning_rate": 0.0001631552162849873, |
| "loss": 0.2214, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.935361216730038, |
| "grad_norm": 0.4018029570579529, |
| "learning_rate": 0.0001630534351145038, |
| "loss": 0.2291, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9378960709759189, |
| "grad_norm": 0.4628411531448364, |
| "learning_rate": 0.00016295165394402037, |
| "loss": 0.3486, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9404309252217997, |
| "grad_norm": 0.5615106821060181, |
| "learning_rate": 0.00016284987277353689, |
| "loss": 0.3281, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.9429657794676806, |
| "grad_norm": 0.40337833762168884, |
| "learning_rate": 0.00016274809160305345, |
| "loss": 0.22, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9455006337135615, |
| "grad_norm": 0.4247727692127228, |
| "learning_rate": 0.00016264631043257, |
| "loss": 0.2801, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.9480354879594424, |
| "grad_norm": 0.28746598958969116, |
| "learning_rate": 0.00016254452926208653, |
| "loss": 0.2349, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.9505703422053232, |
| "grad_norm": 0.3654968738555908, |
| "learning_rate": 0.00016244274809160307, |
| "loss": 0.2696, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9531051964512041, |
| "grad_norm": 0.3999825417995453, |
| "learning_rate": 0.0001623409669211196, |
| "loss": 0.4228, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.9556400506970849, |
| "grad_norm": 0.3065613806247711, |
| "learning_rate": 0.00016223918575063615, |
| "loss": 0.2505, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.9581749049429658, |
| "grad_norm": 0.3503481149673462, |
| "learning_rate": 0.0001621374045801527, |
| "loss": 0.2953, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.9607097591888466, |
| "grad_norm": 0.28918176889419556, |
| "learning_rate": 0.00016203562340966923, |
| "loss": 0.2454, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.9632446134347274, |
| "grad_norm": 0.3047085404396057, |
| "learning_rate": 0.00016193384223918574, |
| "loss": 0.2639, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9657794676806084, |
| "grad_norm": 0.3775922358036041, |
| "learning_rate": 0.0001618320610687023, |
| "loss": 0.3787, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.9683143219264893, |
| "grad_norm": 0.32147660851478577, |
| "learning_rate": 0.00016173027989821882, |
| "loss": 0.2273, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.9708491761723701, |
| "grad_norm": 0.355747252702713, |
| "learning_rate": 0.00016162849872773538, |
| "loss": 0.2805, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.973384030418251, |
| "grad_norm": 0.2670198082923889, |
| "learning_rate": 0.0001615267175572519, |
| "loss": 0.2393, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.9759188846641318, |
| "grad_norm": 0.3395114839076996, |
| "learning_rate": 0.00016142493638676846, |
| "loss": 0.2893, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.9784537389100126, |
| "grad_norm": 0.3189052641391754, |
| "learning_rate": 0.000161323155216285, |
| "loss": 0.2442, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.9809885931558935, |
| "grad_norm": 0.49379605054855347, |
| "learning_rate": 0.00016122137404580154, |
| "loss": 0.3126, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.9835234474017744, |
| "grad_norm": 0.2787371575832367, |
| "learning_rate": 0.00016111959287531808, |
| "loss": 0.2329, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.9860583016476553, |
| "grad_norm": 0.3559485673904419, |
| "learning_rate": 0.00016101781170483462, |
| "loss": 0.335, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.9885931558935361, |
| "grad_norm": 0.43041396141052246, |
| "learning_rate": 0.00016091603053435116, |
| "loss": 0.3069, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.991128010139417, |
| "grad_norm": 0.3231935203075409, |
| "learning_rate": 0.0001608142493638677, |
| "loss": 0.2354, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.9936628643852978, |
| "grad_norm": 0.3676549792289734, |
| "learning_rate": 0.00016071246819338424, |
| "loss": 0.2958, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.9961977186311787, |
| "grad_norm": 0.37902191281318665, |
| "learning_rate": 0.00016061068702290075, |
| "loss": 0.2792, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.9987325728770595, |
| "grad_norm": 0.47126442193984985, |
| "learning_rate": 0.00016050890585241732, |
| "loss": 0.4871, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4303727447986603, |
| "learning_rate": 0.00016040712468193383, |
| "loss": 0.2121, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.002534854245881, |
| "grad_norm": 0.3156070411205292, |
| "learning_rate": 0.0001603053435114504, |
| "loss": 0.2528, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.0050697084917617, |
| "grad_norm": 0.3030865788459778, |
| "learning_rate": 0.00016020356234096693, |
| "loss": 0.2029, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.0076045627376427, |
| "grad_norm": 0.2900277376174927, |
| "learning_rate": 0.00016010178117048347, |
| "loss": 0.2192, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.0101394169835234, |
| "grad_norm": 0.4288582503795624, |
| "learning_rate": 0.00016, |
| "loss": 0.308, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.0126742712294043, |
| "grad_norm": 0.3376273214817047, |
| "learning_rate": 0.00015989821882951655, |
| "loss": 0.2569, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.015209125475285, |
| "grad_norm": 0.39375385642051697, |
| "learning_rate": 0.0001597964376590331, |
| "loss": 0.2104, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.017743979721166, |
| "grad_norm": 0.2907378077507019, |
| "learning_rate": 0.00015969465648854963, |
| "loss": 0.2057, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.020278833967047, |
| "grad_norm": 0.3524622917175293, |
| "learning_rate": 0.00015959287531806617, |
| "loss": 0.2296, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.0228136882129277, |
| "grad_norm": 0.36487293243408203, |
| "learning_rate": 0.0001594910941475827, |
| "loss": 0.2133, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.0253485424588087, |
| "grad_norm": 0.4489257335662842, |
| "learning_rate": 0.00015938931297709925, |
| "loss": 0.2162, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0278833967046894, |
| "grad_norm": 0.41142696142196655, |
| "learning_rate": 0.0001592875318066158, |
| "loss": 0.2383, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.0304182509505704, |
| "grad_norm": 0.3364538848400116, |
| "learning_rate": 0.00015918575063613233, |
| "loss": 0.2077, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.0329531051964511, |
| "grad_norm": 0.576775312423706, |
| "learning_rate": 0.00015908396946564884, |
| "loss": 0.2435, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.035487959442332, |
| "grad_norm": 0.6190880537033081, |
| "learning_rate": 0.0001589821882951654, |
| "loss": 0.252, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.038022813688213, |
| "grad_norm": 0.4943700432777405, |
| "learning_rate": 0.00015888040712468195, |
| "loss": 0.3275, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0405576679340938, |
| "grad_norm": 0.3160712420940399, |
| "learning_rate": 0.00015877862595419848, |
| "loss": 0.217, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.0430925221799747, |
| "grad_norm": 0.34546172618865967, |
| "learning_rate": 0.00015867684478371502, |
| "loss": 0.2509, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.0456273764258555, |
| "grad_norm": 0.3498256802558899, |
| "learning_rate": 0.00015857506361323156, |
| "loss": 0.2376, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.0481622306717364, |
| "grad_norm": 0.29526984691619873, |
| "learning_rate": 0.0001584732824427481, |
| "loss": 0.2305, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.0506970849176172, |
| "grad_norm": 0.30113956332206726, |
| "learning_rate": 0.00015837150127226464, |
| "loss": 0.2205, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.053231939163498, |
| "grad_norm": 0.4007863402366638, |
| "learning_rate": 0.00015826972010178118, |
| "loss": 0.2407, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.055766793409379, |
| "grad_norm": 0.2594064176082611, |
| "learning_rate": 0.00015816793893129772, |
| "loss": 0.1923, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.0583016476552598, |
| "grad_norm": 0.23412476480007172, |
| "learning_rate": 0.00015806615776081426, |
| "loss": 0.2158, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.0608365019011408, |
| "grad_norm": 0.397443562746048, |
| "learning_rate": 0.0001579643765903308, |
| "loss": 0.3666, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.0633713561470215, |
| "grad_norm": 0.3756926655769348, |
| "learning_rate": 0.00015786259541984734, |
| "loss": 0.2081, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0659062103929025, |
| "grad_norm": 0.5698515772819519, |
| "learning_rate": 0.00015776081424936388, |
| "loss": 0.2265, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.0684410646387832, |
| "grad_norm": 0.3608737289905548, |
| "learning_rate": 0.00015765903307888042, |
| "loss": 0.3821, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.0709759188846641, |
| "grad_norm": 0.4109106957912445, |
| "learning_rate": 0.00015755725190839696, |
| "loss": 0.3484, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.073510773130545, |
| "grad_norm": 0.38270992040634155, |
| "learning_rate": 0.0001574554707379135, |
| "loss": 0.2365, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.0760456273764258, |
| "grad_norm": 0.2857488989830017, |
| "learning_rate": 0.00015735368956743004, |
| "loss": 0.263, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.0785804816223068, |
| "grad_norm": 0.25236523151397705, |
| "learning_rate": 0.00015725190839694657, |
| "loss": 0.2216, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.0811153358681875, |
| "grad_norm": 0.40370991826057434, |
| "learning_rate": 0.00015715012722646311, |
| "loss": 0.3711, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.0836501901140685, |
| "grad_norm": 0.2624306380748749, |
| "learning_rate": 0.00015704834605597965, |
| "loss": 0.2082, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.0861850443599492, |
| "grad_norm": 0.4375905692577362, |
| "learning_rate": 0.0001569465648854962, |
| "loss": 0.3474, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.0887198986058302, |
| "grad_norm": 0.3287188410758972, |
| "learning_rate": 0.00015684478371501273, |
| "loss": 0.3097, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.091254752851711, |
| "grad_norm": 0.2669587731361389, |
| "learning_rate": 0.00015674300254452927, |
| "loss": 0.229, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.0937896070975919, |
| "grad_norm": 0.28192129731178284, |
| "learning_rate": 0.0001566412213740458, |
| "loss": 0.2226, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.0963244613434728, |
| "grad_norm": 0.30673590302467346, |
| "learning_rate": 0.00015653944020356235, |
| "loss": 0.2331, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.0988593155893536, |
| "grad_norm": 0.34343135356903076, |
| "learning_rate": 0.0001564376590330789, |
| "loss": 0.2567, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1013941698352345, |
| "grad_norm": 0.4853306710720062, |
| "learning_rate": 0.00015633587786259543, |
| "loss": 0.3688, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1039290240811153, |
| "grad_norm": 0.42215099930763245, |
| "learning_rate": 0.00015623409669211197, |
| "loss": 0.3465, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.1064638783269962, |
| "grad_norm": 0.5882295370101929, |
| "learning_rate": 0.0001561323155216285, |
| "loss": 0.4502, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.1089987325728772, |
| "grad_norm": 0.44578316807746887, |
| "learning_rate": 0.00015603053435114505, |
| "loss": 0.3345, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.111533586818758, |
| "grad_norm": 0.366653174161911, |
| "learning_rate": 0.00015592875318066159, |
| "loss": 0.2111, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.1140684410646389, |
| "grad_norm": 0.4964495003223419, |
| "learning_rate": 0.00015582697201017812, |
| "loss": 0.2731, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1166032953105196, |
| "grad_norm": 0.3171039819717407, |
| "learning_rate": 0.00015572519083969466, |
| "loss": 0.2148, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.1191381495564006, |
| "grad_norm": 0.3483026921749115, |
| "learning_rate": 0.0001556234096692112, |
| "loss": 0.2481, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.1216730038022813, |
| "grad_norm": 0.37379321455955505, |
| "learning_rate": 0.00015552162849872774, |
| "loss": 0.3292, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.1242078580481623, |
| "grad_norm": 0.32108721137046814, |
| "learning_rate": 0.00015541984732824428, |
| "loss": 0.3363, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.126742712294043, |
| "grad_norm": 0.3879946768283844, |
| "learning_rate": 0.00015531806615776082, |
| "loss": 0.2891, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.129277566539924, |
| "grad_norm": 0.2334345281124115, |
| "learning_rate": 0.00015521628498727736, |
| "loss": 0.2183, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.131812420785805, |
| "grad_norm": 0.274795264005661, |
| "learning_rate": 0.0001551145038167939, |
| "loss": 0.2002, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.1343472750316856, |
| "grad_norm": 0.45602667331695557, |
| "learning_rate": 0.00015501272264631044, |
| "loss": 0.3282, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.1368821292775666, |
| "grad_norm": 0.25433096289634705, |
| "learning_rate": 0.00015491094147582698, |
| "loss": 0.2195, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.1394169835234473, |
| "grad_norm": 0.3606742024421692, |
| "learning_rate": 0.00015480916030534352, |
| "loss": 0.244, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1419518377693283, |
| "grad_norm": 0.3597625494003296, |
| "learning_rate": 0.00015470737913486006, |
| "loss": 0.2117, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.144486692015209, |
| "grad_norm": 0.32967302203178406, |
| "learning_rate": 0.0001546055979643766, |
| "loss": 0.2662, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.14702154626109, |
| "grad_norm": 0.32538869976997375, |
| "learning_rate": 0.00015450381679389314, |
| "loss": 0.2439, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.149556400506971, |
| "grad_norm": 0.36263129115104675, |
| "learning_rate": 0.00015440203562340968, |
| "loss": 0.2688, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.1520912547528517, |
| "grad_norm": 0.4200229346752167, |
| "learning_rate": 0.00015430025445292621, |
| "loss": 0.3201, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.1546261089987326, |
| "grad_norm": 0.35889115929603577, |
| "learning_rate": 0.00015419847328244275, |
| "loss": 0.2584, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.1571609632446134, |
| "grad_norm": 0.36060044169425964, |
| "learning_rate": 0.0001540966921119593, |
| "loss": 0.2496, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.1596958174904943, |
| "grad_norm": 0.3046696186065674, |
| "learning_rate": 0.00015399491094147583, |
| "loss": 0.2102, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.162230671736375, |
| "grad_norm": 0.4576256275177002, |
| "learning_rate": 0.00015389312977099237, |
| "loss": 0.3594, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.164765525982256, |
| "grad_norm": 0.3436565697193146, |
| "learning_rate": 0.0001537913486005089, |
| "loss": 0.2289, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.167300380228137, |
| "grad_norm": 0.4197808802127838, |
| "learning_rate": 0.00015368956743002545, |
| "loss": 0.2863, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.1698352344740177, |
| "grad_norm": 0.3584151566028595, |
| "learning_rate": 0.000153587786259542, |
| "loss": 0.2797, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.1723700887198987, |
| "grad_norm": 0.29760056734085083, |
| "learning_rate": 0.00015348600508905853, |
| "loss": 0.212, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.1749049429657794, |
| "grad_norm": 0.3856862485408783, |
| "learning_rate": 0.00015338422391857507, |
| "loss": 0.2986, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.1774397972116604, |
| "grad_norm": 0.42522993683815, |
| "learning_rate": 0.0001532824427480916, |
| "loss": 0.2869, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.179974651457541, |
| "grad_norm": 0.33221253752708435, |
| "learning_rate": 0.00015318066157760815, |
| "loss": 0.2236, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.182509505703422, |
| "grad_norm": 0.35414496064186096, |
| "learning_rate": 0.00015307888040712469, |
| "loss": 0.2658, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.1850443599493028, |
| "grad_norm": 0.41883930563926697, |
| "learning_rate": 0.00015297709923664123, |
| "loss": 0.3939, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.1875792141951838, |
| "grad_norm": 0.3070299029350281, |
| "learning_rate": 0.00015287531806615776, |
| "loss": 0.2208, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.1901140684410647, |
| "grad_norm": 0.30749714374542236, |
| "learning_rate": 0.0001527735368956743, |
| "loss": 0.242, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.1926489226869454, |
| "grad_norm": 0.2579677104949951, |
| "learning_rate": 0.00015267175572519084, |
| "loss": 0.2435, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.1951837769328264, |
| "grad_norm": 0.46220460534095764, |
| "learning_rate": 0.00015256997455470738, |
| "loss": 0.2803, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.1977186311787071, |
| "grad_norm": 0.3824957609176636, |
| "learning_rate": 0.00015246819338422392, |
| "loss": 0.3143, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.200253485424588, |
| "grad_norm": 0.3049899637699127, |
| "learning_rate": 0.00015236641221374046, |
| "loss": 0.2231, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.202788339670469, |
| "grad_norm": 0.4378805458545685, |
| "learning_rate": 0.000152264631043257, |
| "loss": 0.2041, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.2053231939163498, |
| "grad_norm": 0.3902495801448822, |
| "learning_rate": 0.00015216284987277354, |
| "loss": 0.3055, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.2078580481622307, |
| "grad_norm": 0.3150664269924164, |
| "learning_rate": 0.00015206106870229008, |
| "loss": 0.2222, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2103929024081115, |
| "grad_norm": 0.3551795184612274, |
| "learning_rate": 0.00015195928753180662, |
| "loss": 0.2304, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.2129277566539924, |
| "grad_norm": 0.35522422194480896, |
| "learning_rate": 0.00015185750636132316, |
| "loss": 0.2636, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.2154626108998732, |
| "grad_norm": 0.35261449217796326, |
| "learning_rate": 0.0001517557251908397, |
| "loss": 0.2743, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2179974651457541, |
| "grad_norm": 0.4755167067050934, |
| "learning_rate": 0.00015165394402035624, |
| "loss": 0.321, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.2205323193916349, |
| "grad_norm": 0.36083585023880005, |
| "learning_rate": 0.0001515521628498728, |
| "loss": 0.2549, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.2230671736375158, |
| "grad_norm": 0.3213503956794739, |
| "learning_rate": 0.00015145038167938932, |
| "loss": 0.2685, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.2256020278833968, |
| "grad_norm": 0.29988422989845276, |
| "learning_rate": 0.00015134860050890588, |
| "loss": 0.3253, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2281368821292775, |
| "grad_norm": 0.3549601435661316, |
| "learning_rate": 0.0001512468193384224, |
| "loss": 0.2574, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.2306717363751585, |
| "grad_norm": 0.33347830176353455, |
| "learning_rate": 0.00015114503816793893, |
| "loss": 0.3408, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.2332065906210392, |
| "grad_norm": 0.2988692820072174, |
| "learning_rate": 0.00015104325699745547, |
| "loss": 0.2583, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.2357414448669202, |
| "grad_norm": 0.2710984945297241, |
| "learning_rate": 0.000150941475826972, |
| "loss": 0.2708, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.2382762991128011, |
| "grad_norm": 0.28278592228889465, |
| "learning_rate": 0.00015083969465648855, |
| "loss": 0.2345, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.2408111533586819, |
| "grad_norm": 0.31838810443878174, |
| "learning_rate": 0.0001507379134860051, |
| "loss": 0.2193, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2433460076045628, |
| "grad_norm": 0.31196919083595276, |
| "learning_rate": 0.00015063613231552163, |
| "loss": 0.2334, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.2458808618504436, |
| "grad_norm": 0.3953218460083008, |
| "learning_rate": 0.00015053435114503817, |
| "loss": 0.2716, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.2484157160963245, |
| "grad_norm": 0.4814457297325134, |
| "learning_rate": 0.0001504325699745547, |
| "loss": 0.2847, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.2509505703422052, |
| "grad_norm": 0.5870761275291443, |
| "learning_rate": 0.00015033078880407125, |
| "loss": 0.3685, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.2534854245880862, |
| "grad_norm": 0.30315646529197693, |
| "learning_rate": 0.00015022900763358781, |
| "loss": 0.2112, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.256020278833967, |
| "grad_norm": 0.4358583390712738, |
| "learning_rate": 0.00015012722646310433, |
| "loss": 0.279, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.258555133079848, |
| "grad_norm": 0.3699369728565216, |
| "learning_rate": 0.0001500254452926209, |
| "loss": 0.2941, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.2610899873257289, |
| "grad_norm": 0.338522344827652, |
| "learning_rate": 0.0001499236641221374, |
| "loss": 0.273, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.2636248415716096, |
| "grad_norm": 0.29661208391189575, |
| "learning_rate": 0.00014982188295165397, |
| "loss": 0.23, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.2661596958174905, |
| "grad_norm": 0.4247685968875885, |
| "learning_rate": 0.00014972010178117048, |
| "loss": 0.3112, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2686945500633713, |
| "grad_norm": 0.44488340616226196, |
| "learning_rate": 0.00014961832061068702, |
| "loss": 0.3796, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.2712294043092522, |
| "grad_norm": 0.30672356486320496, |
| "learning_rate": 0.00014951653944020356, |
| "loss": 0.2222, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.2737642585551332, |
| "grad_norm": 0.3291172981262207, |
| "learning_rate": 0.0001494147582697201, |
| "loss": 0.2177, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.276299112801014, |
| "grad_norm": 0.4180152118206024, |
| "learning_rate": 0.00014931297709923664, |
| "loss": 0.3673, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.2788339670468947, |
| "grad_norm": 0.41350388526916504, |
| "learning_rate": 0.00014921119592875318, |
| "loss": 0.2544, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.2813688212927756, |
| "grad_norm": 0.3517690598964691, |
| "learning_rate": 0.00014910941475826972, |
| "loss": 0.2139, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.2839036755386566, |
| "grad_norm": 0.4273949861526489, |
| "learning_rate": 0.00014900763358778626, |
| "loss": 0.255, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.2864385297845373, |
| "grad_norm": 0.3510381877422333, |
| "learning_rate": 0.00014890585241730283, |
| "loss": 0.2503, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.2889733840304183, |
| "grad_norm": 0.4069119393825531, |
| "learning_rate": 0.00014880407124681934, |
| "loss": 0.3267, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.291508238276299, |
| "grad_norm": 0.6244072318077087, |
| "learning_rate": 0.0001487022900763359, |
| "loss": 0.2519, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.29404309252218, |
| "grad_norm": 0.473450630903244, |
| "learning_rate": 0.00014860050890585242, |
| "loss": 0.3093, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.296577946768061, |
| "grad_norm": 0.3139822781085968, |
| "learning_rate": 0.00014849872773536898, |
| "loss": 0.2396, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.2991128010139417, |
| "grad_norm": 0.23700624704360962, |
| "learning_rate": 0.0001483969465648855, |
| "loss": 0.1945, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3016476552598226, |
| "grad_norm": 0.42849189043045044, |
| "learning_rate": 0.00014829516539440203, |
| "loss": 0.2275, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.3041825095057034, |
| "grad_norm": 0.4083426296710968, |
| "learning_rate": 0.00014819338422391857, |
| "loss": 0.3626, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.3067173637515843, |
| "grad_norm": 0.4541410207748413, |
| "learning_rate": 0.0001480916030534351, |
| "loss": 0.3102, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.3092522179974653, |
| "grad_norm": 0.6483343839645386, |
| "learning_rate": 0.00014798982188295165, |
| "loss": 0.3427, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.311787072243346, |
| "grad_norm": 0.3928525447845459, |
| "learning_rate": 0.0001478880407124682, |
| "loss": 0.3155, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.3143219264892267, |
| "grad_norm": 0.319035142660141, |
| "learning_rate": 0.00014778625954198476, |
| "loss": 0.2555, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.3168567807351077, |
| "grad_norm": 0.2855183780193329, |
| "learning_rate": 0.00014768447837150127, |
| "loss": 0.2115, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3193916349809887, |
| "grad_norm": 0.3499714136123657, |
| "learning_rate": 0.00014758269720101784, |
| "loss": 0.254, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.3219264892268694, |
| "grad_norm": 0.40895748138427734, |
| "learning_rate": 0.00014748091603053435, |
| "loss": 0.2975, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.3244613434727504, |
| "grad_norm": 0.30614539980888367, |
| "learning_rate": 0.00014737913486005091, |
| "loss": 0.2584, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.326996197718631, |
| "grad_norm": 0.2832574248313904, |
| "learning_rate": 0.00014727735368956743, |
| "loss": 0.2259, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.329531051964512, |
| "grad_norm": 0.3444589674472809, |
| "learning_rate": 0.000147175572519084, |
| "loss": 0.2608, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.332065906210393, |
| "grad_norm": 0.35170844197273254, |
| "learning_rate": 0.0001470737913486005, |
| "loss": 0.3019, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.3346007604562737, |
| "grad_norm": 0.46164563298225403, |
| "learning_rate": 0.00014697201017811707, |
| "loss": 0.2024, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.3371356147021547, |
| "grad_norm": 0.2369971126317978, |
| "learning_rate": 0.00014687022900763358, |
| "loss": 0.1967, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.3396704689480354, |
| "grad_norm": 0.43180060386657715, |
| "learning_rate": 0.00014676844783715012, |
| "loss": 0.2415, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.3422053231939164, |
| "grad_norm": 0.3531292676925659, |
| "learning_rate": 0.00014666666666666666, |
| "loss": 0.2283, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3447401774397973, |
| "grad_norm": 0.49374547600746155, |
| "learning_rate": 0.0001465648854961832, |
| "loss": 0.3025, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.347275031685678, |
| "grad_norm": 0.4822668731212616, |
| "learning_rate": 0.00014646310432569977, |
| "loss": 0.3498, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.3498098859315588, |
| "grad_norm": 0.4463392496109009, |
| "learning_rate": 0.00014636132315521628, |
| "loss": 0.2186, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.3523447401774398, |
| "grad_norm": 0.40042299032211304, |
| "learning_rate": 0.00014625954198473285, |
| "loss": 0.2316, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.3548795944233207, |
| "grad_norm": 0.41266927123069763, |
| "learning_rate": 0.00014615776081424936, |
| "loss": 0.2324, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.3574144486692015, |
| "grad_norm": 0.46208152174949646, |
| "learning_rate": 0.00014605597964376593, |
| "loss": 0.2261, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.3599493029150824, |
| "grad_norm": 0.38895705342292786, |
| "learning_rate": 0.00014595419847328244, |
| "loss": 0.2732, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.3624841571609632, |
| "grad_norm": 0.4489743113517761, |
| "learning_rate": 0.000145852417302799, |
| "loss": 0.3197, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.3650190114068441, |
| "grad_norm": 0.25082916021347046, |
| "learning_rate": 0.00014575063613231552, |
| "loss": 0.2096, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.367553865652725, |
| "grad_norm": 0.3681942820549011, |
| "learning_rate": 0.00014564885496183208, |
| "loss": 0.2496, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3700887198986058, |
| "grad_norm": 0.30986878275871277, |
| "learning_rate": 0.0001455470737913486, |
| "loss": 0.2244, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.3726235741444868, |
| "grad_norm": 0.42349961400032043, |
| "learning_rate": 0.00014544529262086513, |
| "loss": 0.2315, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.3751584283903675, |
| "grad_norm": 0.29656872153282166, |
| "learning_rate": 0.00014534351145038167, |
| "loss": 0.2458, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.3776932826362485, |
| "grad_norm": 0.4033924341201782, |
| "learning_rate": 0.0001452417302798982, |
| "loss": 0.3506, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.3802281368821292, |
| "grad_norm": 0.3998583257198334, |
| "learning_rate": 0.00014513994910941478, |
| "loss": 0.3108, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.3827629911280102, |
| "grad_norm": 0.3335135281085968, |
| "learning_rate": 0.0001450381679389313, |
| "loss": 0.2816, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.385297845373891, |
| "grad_norm": 0.39304816722869873, |
| "learning_rate": 0.00014493638676844786, |
| "loss": 0.3968, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.3878326996197718, |
| "grad_norm": 0.34913384914398193, |
| "learning_rate": 0.00014483460559796437, |
| "loss": 0.2653, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.3903675538656528, |
| "grad_norm": 0.3312399387359619, |
| "learning_rate": 0.00014473282442748094, |
| "loss": 0.2629, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.3929024081115335, |
| "grad_norm": 0.31613558530807495, |
| "learning_rate": 0.00014463104325699745, |
| "loss": 0.2033, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.3954372623574145, |
| "grad_norm": 0.2872864603996277, |
| "learning_rate": 0.00014452926208651402, |
| "loss": 0.2097, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.3979721166032952, |
| "grad_norm": 0.24432098865509033, |
| "learning_rate": 0.00014442748091603053, |
| "loss": 0.2172, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.4005069708491762, |
| "grad_norm": 0.31649062037467957, |
| "learning_rate": 0.0001443256997455471, |
| "loss": 0.2255, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.4030418250950571, |
| "grad_norm": 0.2483261376619339, |
| "learning_rate": 0.0001442239185750636, |
| "loss": 0.1856, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.4055766793409379, |
| "grad_norm": 0.437757670879364, |
| "learning_rate": 0.00014412213740458017, |
| "loss": 0.2713, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.4081115335868186, |
| "grad_norm": 0.43551307916641235, |
| "learning_rate": 0.0001440203562340967, |
| "loss": 0.2654, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.4106463878326996, |
| "grad_norm": 0.5781947374343872, |
| "learning_rate": 0.00014391857506361322, |
| "loss": 0.3242, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.4131812420785805, |
| "grad_norm": 0.3809725344181061, |
| "learning_rate": 0.0001438167938931298, |
| "loss": 0.2176, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4157160963244613, |
| "grad_norm": 0.38208654522895813, |
| "learning_rate": 0.0001437150127226463, |
| "loss": 0.2043, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.4182509505703422, |
| "grad_norm": 0.39930659532546997, |
| "learning_rate": 0.00014361323155216287, |
| "loss": 0.2914, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.420785804816223, |
| "grad_norm": 0.3019846975803375, |
| "learning_rate": 0.00014351145038167938, |
| "loss": 0.2037, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.423320659062104, |
| "grad_norm": 0.4549913704395294, |
| "learning_rate": 0.00014340966921119595, |
| "loss": 0.2308, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.4258555133079849, |
| "grad_norm": 0.38887929916381836, |
| "learning_rate": 0.00014330788804071246, |
| "loss": 0.2339, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.4283903675538656, |
| "grad_norm": 0.3481290340423584, |
| "learning_rate": 0.00014320610687022903, |
| "loss": 0.2206, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.4309252217997466, |
| "grad_norm": 0.46603840589523315, |
| "learning_rate": 0.00014310432569974554, |
| "loss": 0.3006, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.4334600760456273, |
| "grad_norm": 0.3586963713169098, |
| "learning_rate": 0.0001430025445292621, |
| "loss": 0.2646, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.4359949302915083, |
| "grad_norm": 0.3106522560119629, |
| "learning_rate": 0.00014290076335877862, |
| "loss": 0.2725, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.4385297845373892, |
| "grad_norm": 0.48086050152778625, |
| "learning_rate": 0.00014279898218829518, |
| "loss": 0.3007, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.44106463878327, |
| "grad_norm": 0.44636330008506775, |
| "learning_rate": 0.00014269720101781172, |
| "loss": 0.3755, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.4435994930291507, |
| "grad_norm": 0.3114064633846283, |
| "learning_rate": 0.00014259541984732824, |
| "loss": 0.2606, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4461343472750317, |
| "grad_norm": 0.358394593000412, |
| "learning_rate": 0.0001424936386768448, |
| "loss": 0.27, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.4486692015209126, |
| "grad_norm": 0.3568032681941986, |
| "learning_rate": 0.00014239185750636131, |
| "loss": 0.2767, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.4512040557667933, |
| "grad_norm": 0.4407200515270233, |
| "learning_rate": 0.00014229007633587788, |
| "loss": 0.3786, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.4537389100126743, |
| "grad_norm": 0.4096840023994446, |
| "learning_rate": 0.0001421882951653944, |
| "loss": 0.3199, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.456273764258555, |
| "grad_norm": 0.3343110680580139, |
| "learning_rate": 0.00014208651399491096, |
| "loss": 0.2538, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.458808618504436, |
| "grad_norm": 0.27782517671585083, |
| "learning_rate": 0.00014198473282442747, |
| "loss": 0.2179, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.461343472750317, |
| "grad_norm": 0.2901310920715332, |
| "learning_rate": 0.00014188295165394404, |
| "loss": 0.2552, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.4638783269961977, |
| "grad_norm": 0.3634903132915497, |
| "learning_rate": 0.00014178117048346055, |
| "loss": 0.257, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.4664131812420786, |
| "grad_norm": 0.37307262420654297, |
| "learning_rate": 0.00014167938931297712, |
| "loss": 0.254, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.4689480354879594, |
| "grad_norm": 0.27726346254348755, |
| "learning_rate": 0.00014157760814249366, |
| "loss": 0.1938, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4714828897338403, |
| "grad_norm": 0.3364371657371521, |
| "learning_rate": 0.0001414758269720102, |
| "loss": 0.2094, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.4740177439797213, |
| "grad_norm": 0.4418800473213196, |
| "learning_rate": 0.00014137404580152673, |
| "loss": 0.3243, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.476552598225602, |
| "grad_norm": 0.42042022943496704, |
| "learning_rate": 0.00014127226463104327, |
| "loss": 0.2333, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.4790874524714828, |
| "grad_norm": 0.36881470680236816, |
| "learning_rate": 0.0001411704834605598, |
| "loss": 0.2513, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.4816223067173637, |
| "grad_norm": 0.4009782671928406, |
| "learning_rate": 0.00014106870229007632, |
| "loss": 0.3085, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.4841571609632447, |
| "grad_norm": 0.43179744482040405, |
| "learning_rate": 0.0001409669211195929, |
| "loss": 0.3189, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.4866920152091254, |
| "grad_norm": 0.3721300959587097, |
| "learning_rate": 0.0001408651399491094, |
| "loss": 0.2318, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.4892268694550064, |
| "grad_norm": 0.3875066339969635, |
| "learning_rate": 0.00014076335877862597, |
| "loss": 0.2753, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.491761723700887, |
| "grad_norm": 0.35223937034606934, |
| "learning_rate": 0.00014066157760814248, |
| "loss": 0.2257, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.494296577946768, |
| "grad_norm": 0.30979710817337036, |
| "learning_rate": 0.00014055979643765905, |
| "loss": 0.2149, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.496831432192649, |
| "grad_norm": 0.23923753201961517, |
| "learning_rate": 0.00014045801526717556, |
| "loss": 0.1911, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.4993662864385298, |
| "grad_norm": 0.40893304347991943, |
| "learning_rate": 0.00014035623409669213, |
| "loss": 0.2756, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.5019011406844105, |
| "grad_norm": 0.2659086585044861, |
| "learning_rate": 0.00014025445292620867, |
| "loss": 0.2154, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.5044359949302915, |
| "grad_norm": 0.30749884247779846, |
| "learning_rate": 0.0001401526717557252, |
| "loss": 0.2184, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5069708491761724, |
| "grad_norm": 0.3892879784107208, |
| "learning_rate": 0.00014005089058524175, |
| "loss": 0.2849, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.5095057034220534, |
| "grad_norm": 0.5041462779045105, |
| "learning_rate": 0.00013994910941475828, |
| "loss": 0.2551, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.512040557667934, |
| "grad_norm": 0.4143123924732208, |
| "learning_rate": 0.00013984732824427482, |
| "loss": 0.2485, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.5145754119138148, |
| "grad_norm": 0.5315548181533813, |
| "learning_rate": 0.00013974554707379136, |
| "loss": 0.3242, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.5171102661596958, |
| "grad_norm": 0.28680169582366943, |
| "learning_rate": 0.0001396437659033079, |
| "loss": 0.227, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.5196451204055768, |
| "grad_norm": 0.3015950620174408, |
| "learning_rate": 0.00013954198473282441, |
| "loss": 0.2122, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5221799746514575, |
| "grad_norm": 0.30785971879959106, |
| "learning_rate": 0.00013944020356234098, |
| "loss": 0.2194, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.5247148288973384, |
| "grad_norm": 0.3596206605434418, |
| "learning_rate": 0.0001393384223918575, |
| "loss": 0.2574, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.5272496831432192, |
| "grad_norm": 0.18499840795993805, |
| "learning_rate": 0.00013923664122137406, |
| "loss": 0.1944, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.5297845373891001, |
| "grad_norm": 0.4346081614494324, |
| "learning_rate": 0.00013913486005089057, |
| "loss": 0.3187, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.532319391634981, |
| "grad_norm": 0.46154457330703735, |
| "learning_rate": 0.00013903307888040714, |
| "loss": 0.3149, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.5348542458808618, |
| "grad_norm": 0.3444209098815918, |
| "learning_rate": 0.00013893129770992368, |
| "loss": 0.2801, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.5373891001267426, |
| "grad_norm": 0.550620436668396, |
| "learning_rate": 0.00013882951653944022, |
| "loss": 0.3038, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.5399239543726235, |
| "grad_norm": 0.36603689193725586, |
| "learning_rate": 0.00013872773536895676, |
| "loss": 0.3224, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.5424588086185045, |
| "grad_norm": 0.213638037443161, |
| "learning_rate": 0.0001386259541984733, |
| "loss": 0.2081, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.5449936628643854, |
| "grad_norm": 0.34508904814720154, |
| "learning_rate": 0.00013852417302798983, |
| "loss": 0.2474, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5475285171102662, |
| "grad_norm": 0.42072099447250366, |
| "learning_rate": 0.00013842239185750637, |
| "loss": 0.3049, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.550063371356147, |
| "grad_norm": 0.3760271966457367, |
| "learning_rate": 0.0001383206106870229, |
| "loss": 0.2499, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.5525982256020279, |
| "grad_norm": 0.24040678143501282, |
| "learning_rate": 0.00013821882951653943, |
| "loss": 0.2134, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.5551330798479088, |
| "grad_norm": 0.458035945892334, |
| "learning_rate": 0.000138117048346056, |
| "loss": 0.3375, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.5576679340937896, |
| "grad_norm": 0.30446937680244446, |
| "learning_rate": 0.0001380152671755725, |
| "loss": 0.2252, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5602027883396705, |
| "grad_norm": 0.3036455810070038, |
| "learning_rate": 0.00013791348600508907, |
| "loss": 0.2095, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.5627376425855513, |
| "grad_norm": 0.4190979301929474, |
| "learning_rate": 0.0001378117048346056, |
| "loss": 0.2932, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.5652724968314322, |
| "grad_norm": 0.27648523449897766, |
| "learning_rate": 0.00013770992366412215, |
| "loss": 0.2133, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.5678073510773132, |
| "grad_norm": 0.28326693177223206, |
| "learning_rate": 0.0001376081424936387, |
| "loss": 0.2087, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.570342205323194, |
| "grad_norm": 0.3020143508911133, |
| "learning_rate": 0.00013750636132315523, |
| "loss": 0.2321, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.5728770595690746, |
| "grad_norm": 0.3246900141239166, |
| "learning_rate": 0.00013740458015267177, |
| "loss": 0.2121, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.5754119138149556, |
| "grad_norm": 0.3806106448173523, |
| "learning_rate": 0.0001373027989821883, |
| "loss": 0.2856, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.5779467680608366, |
| "grad_norm": 0.3568238317966461, |
| "learning_rate": 0.00013720101781170485, |
| "loss": 0.2579, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.5804816223067175, |
| "grad_norm": 0.45590534806251526, |
| "learning_rate": 0.00013709923664122139, |
| "loss": 0.2059, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.5830164765525983, |
| "grad_norm": 0.41996893286705017, |
| "learning_rate": 0.00013699745547073792, |
| "loss": 0.2154, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.585551330798479, |
| "grad_norm": 0.5142170190811157, |
| "learning_rate": 0.00013689567430025446, |
| "loss": 0.2708, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.58808618504436, |
| "grad_norm": 0.36335933208465576, |
| "learning_rate": 0.000136793893129771, |
| "loss": 0.2501, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.590621039290241, |
| "grad_norm": 0.3186666667461395, |
| "learning_rate": 0.00013669211195928752, |
| "loss": 0.2227, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.5931558935361216, |
| "grad_norm": 0.29709601402282715, |
| "learning_rate": 0.00013659033078880408, |
| "loss": 0.2265, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.5956907477820024, |
| "grad_norm": 0.2891612648963928, |
| "learning_rate": 0.00013648854961832062, |
| "loss": 0.2298, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.5982256020278833, |
| "grad_norm": 0.2191978096961975, |
| "learning_rate": 0.00013638676844783716, |
| "loss": 0.2049, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.6007604562737643, |
| "grad_norm": 0.37781399488449097, |
| "learning_rate": 0.0001362849872773537, |
| "loss": 0.3664, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.6032953105196452, |
| "grad_norm": 0.3082154393196106, |
| "learning_rate": 0.00013618320610687024, |
| "loss": 0.2063, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.605830164765526, |
| "grad_norm": 0.318317711353302, |
| "learning_rate": 0.00013608142493638678, |
| "loss": 0.2085, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.6083650190114067, |
| "grad_norm": 0.45566102862358093, |
| "learning_rate": 0.00013597964376590332, |
| "loss": 0.2876, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.6108998732572877, |
| "grad_norm": 0.3186021149158478, |
| "learning_rate": 0.00013587786259541986, |
| "loss": 0.2704, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.6134347275031686, |
| "grad_norm": 0.28905680775642395, |
| "learning_rate": 0.0001357760814249364, |
| "loss": 0.209, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.6159695817490496, |
| "grad_norm": 0.23341360688209534, |
| "learning_rate": 0.00013567430025445294, |
| "loss": 0.1835, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.6185044359949303, |
| "grad_norm": 0.336247056722641, |
| "learning_rate": 0.00013557251908396947, |
| "loss": 0.2547, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.621039290240811, |
| "grad_norm": 0.3736225366592407, |
| "learning_rate": 0.00013547073791348601, |
| "loss": 0.3053, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.623574144486692, |
| "grad_norm": 0.3983825743198395, |
| "learning_rate": 0.00013536895674300255, |
| "loss": 0.2395, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.626108998732573, |
| "grad_norm": 0.35913559794425964, |
| "learning_rate": 0.0001352671755725191, |
| "loss": 0.2918, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.6286438529784537, |
| "grad_norm": 0.2984326183795929, |
| "learning_rate": 0.00013516539440203563, |
| "loss": 0.2148, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.6311787072243344, |
| "grad_norm": 0.3113880753517151, |
| "learning_rate": 0.00013506361323155217, |
| "loss": 0.2044, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.6337135614702154, |
| "grad_norm": 0.5340004563331604, |
| "learning_rate": 0.0001349618320610687, |
| "loss": 0.3234, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.6362484157160964, |
| "grad_norm": 0.38927194476127625, |
| "learning_rate": 0.00013486005089058525, |
| "loss": 0.2866, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.6387832699619773, |
| "grad_norm": 0.38895881175994873, |
| "learning_rate": 0.0001347582697201018, |
| "loss": 0.2324, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.641318124207858, |
| "grad_norm": 0.41959917545318604, |
| "learning_rate": 0.00013465648854961833, |
| "loss": 0.2666, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.6438529784537388, |
| "grad_norm": 0.4299626648426056, |
| "learning_rate": 0.00013455470737913487, |
| "loss": 0.2905, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.6463878326996197, |
| "grad_norm": 0.4236285090446472, |
| "learning_rate": 0.0001344529262086514, |
| "loss": 0.292, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.6489226869455007, |
| "grad_norm": 0.8049849271774292, |
| "learning_rate": 0.00013435114503816795, |
| "loss": 0.2351, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.6514575411913817, |
| "grad_norm": 0.3420075476169586, |
| "learning_rate": 0.00013424936386768449, |
| "loss": 0.2355, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.6539923954372624, |
| "grad_norm": 0.3632122874259949, |
| "learning_rate": 0.00013414758269720103, |
| "loss": 0.2377, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.6565272496831431, |
| "grad_norm": 0.27961722016334534, |
| "learning_rate": 0.00013404580152671756, |
| "loss": 0.2299, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.659062103929024, |
| "grad_norm": 0.3043057918548584, |
| "learning_rate": 0.0001339440203562341, |
| "loss": 0.2321, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.661596958174905, |
| "grad_norm": 0.3421036899089813, |
| "learning_rate": 0.00013384223918575064, |
| "loss": 0.2492, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.6641318124207858, |
| "grad_norm": 0.39606526494026184, |
| "learning_rate": 0.00013374045801526718, |
| "loss": 0.3401, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.35081973671913147, |
| "learning_rate": 0.00013363867684478372, |
| "loss": 0.2175, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.6692015209125475, |
| "grad_norm": 0.420175701379776, |
| "learning_rate": 0.00013353689567430026, |
| "loss": 0.2813, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.6717363751584284, |
| "grad_norm": 0.24181438982486725, |
| "learning_rate": 0.0001334351145038168, |
| "loss": 0.219, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.6742712294043094, |
| "grad_norm": 0.6243584752082825, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 0.3087, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.6768060836501901, |
| "grad_norm": 0.4036748707294464, |
| "learning_rate": 0.00013323155216284988, |
| "loss": 0.251, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.6793409378960709, |
| "grad_norm": 0.39555415511131287, |
| "learning_rate": 0.00013312977099236642, |
| "loss": 0.3279, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.6818757921419518, |
| "grad_norm": 0.4018571674823761, |
| "learning_rate": 0.00013302798982188296, |
| "loss": 0.2337, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.6844106463878328, |
| "grad_norm": 0.36354130506515503, |
| "learning_rate": 0.0001329262086513995, |
| "loss": 0.2503, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.6869455006337135, |
| "grad_norm": 0.32249706983566284, |
| "learning_rate": 0.00013282442748091604, |
| "loss": 0.27, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.6894803548795945, |
| "grad_norm": 0.33560654520988464, |
| "learning_rate": 0.00013272264631043258, |
| "loss": 0.203, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.6920152091254752, |
| "grad_norm": 0.39997267723083496, |
| "learning_rate": 0.00013262086513994911, |
| "loss": 0.2662, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.6945500633713562, |
| "grad_norm": 0.6739961504936218, |
| "learning_rate": 0.00013251908396946565, |
| "loss": 0.2803, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.6970849176172371, |
| "grad_norm": 0.5863606929779053, |
| "learning_rate": 0.0001324173027989822, |
| "loss": 0.351, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.6996197718631179, |
| "grad_norm": 0.4408819079399109, |
| "learning_rate": 0.00013231552162849873, |
| "loss": 0.1814, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.7021546261089986, |
| "grad_norm": 0.3341253697872162, |
| "learning_rate": 0.00013221374045801527, |
| "loss": 0.2156, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.7046894803548795, |
| "grad_norm": 0.3035176992416382, |
| "learning_rate": 0.0001321119592875318, |
| "loss": 0.2308, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7072243346007605, |
| "grad_norm": 0.4395483136177063, |
| "learning_rate": 0.00013201017811704835, |
| "loss": 0.3418, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.7097591888466415, |
| "grad_norm": 0.22972792387008667, |
| "learning_rate": 0.0001319083969465649, |
| "loss": 0.1873, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.7122940430925222, |
| "grad_norm": 0.47378918528556824, |
| "learning_rate": 0.00013180661577608143, |
| "loss": 0.2514, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.714828897338403, |
| "grad_norm": 0.3947070240974426, |
| "learning_rate": 0.00013170483460559797, |
| "loss": 0.2289, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.717363751584284, |
| "grad_norm": 0.3789718747138977, |
| "learning_rate": 0.0001316030534351145, |
| "loss": 0.2476, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.7198986058301649, |
| "grad_norm": 0.4904823899269104, |
| "learning_rate": 0.00013150127226463105, |
| "loss": 0.2163, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.7224334600760456, |
| "grad_norm": 0.3285132646560669, |
| "learning_rate": 0.0001313994910941476, |
| "loss": 0.2786, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.7249683143219265, |
| "grad_norm": 0.4326847493648529, |
| "learning_rate": 0.00013129770992366413, |
| "loss": 0.2409, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.7275031685678073, |
| "grad_norm": 0.3819947838783264, |
| "learning_rate": 0.00013119592875318067, |
| "loss": 0.2076, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.7300380228136882, |
| "grad_norm": 0.4046533703804016, |
| "learning_rate": 0.0001310941475826972, |
| "loss": 0.2717, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.7325728770595692, |
| "grad_norm": 0.34681758284568787, |
| "learning_rate": 0.00013099236641221374, |
| "loss": 0.2389, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.73510773130545, |
| "grad_norm": 0.35155028104782104, |
| "learning_rate": 0.00013089058524173028, |
| "loss": 0.2407, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.7376425855513307, |
| "grad_norm": 0.3306678533554077, |
| "learning_rate": 0.00013078880407124682, |
| "loss": 0.2767, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.7401774397972116, |
| "grad_norm": 0.27715572714805603, |
| "learning_rate": 0.00013068702290076336, |
| "loss": 0.1955, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.7427122940430926, |
| "grad_norm": 0.3591010272502899, |
| "learning_rate": 0.0001305852417302799, |
| "loss": 0.2269, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.7452471482889735, |
| "grad_norm": 0.39104408025741577, |
| "learning_rate": 0.00013048346055979644, |
| "loss": 0.2392, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.7477820025348543, |
| "grad_norm": 0.44545605778694153, |
| "learning_rate": 0.00013038167938931298, |
| "loss": 0.2823, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.750316856780735, |
| "grad_norm": 0.29502785205841064, |
| "learning_rate": 0.00013027989821882952, |
| "loss": 0.1899, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.752851711026616, |
| "grad_norm": 0.40423381328582764, |
| "learning_rate": 0.00013017811704834606, |
| "loss": 0.2069, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.755386565272497, |
| "grad_norm": 0.38649502396583557, |
| "learning_rate": 0.0001300763358778626, |
| "loss": 0.1938, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.7579214195183777, |
| "grad_norm": 0.40014389157295227, |
| "learning_rate": 0.00012997455470737914, |
| "loss": 0.2825, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.7604562737642584, |
| "grad_norm": 0.4783387780189514, |
| "learning_rate": 0.00012987277353689568, |
| "loss": 0.2629, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.7629911280101394, |
| "grad_norm": 0.4938651919364929, |
| "learning_rate": 0.00012977099236641222, |
| "loss": 0.2976, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.7655259822560203, |
| "grad_norm": 0.32507607340812683, |
| "learning_rate": 0.00012966921119592875, |
| "loss": 0.2097, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.7680608365019013, |
| "grad_norm": 0.31158536672592163, |
| "learning_rate": 0.0001295674300254453, |
| "loss": 0.223, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.770595690747782, |
| "grad_norm": 0.5594013333320618, |
| "learning_rate": 0.00012946564885496183, |
| "loss": 0.3523, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.7731305449936627, |
| "grad_norm": 0.5820282697677612, |
| "learning_rate": 0.00012936386768447837, |
| "loss": 0.3181, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7756653992395437, |
| "grad_norm": 0.3635233938694, |
| "learning_rate": 0.0001292620865139949, |
| "loss": 0.2387, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.7782002534854247, |
| "grad_norm": 0.3195054531097412, |
| "learning_rate": 0.00012916030534351148, |
| "loss": 0.2046, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.7807351077313056, |
| "grad_norm": 0.3483947217464447, |
| "learning_rate": 0.000129058524173028, |
| "loss": 0.2576, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.7832699619771863, |
| "grad_norm": 0.3419065475463867, |
| "learning_rate": 0.00012895674300254456, |
| "loss": 0.2361, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.785804816223067, |
| "grad_norm": 0.3142557442188263, |
| "learning_rate": 0.00012885496183206107, |
| "loss": 0.2172, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.788339670468948, |
| "grad_norm": 0.3502836227416992, |
| "learning_rate": 0.0001287531806615776, |
| "loss": 0.2621, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.790874524714829, |
| "grad_norm": 0.37896937131881714, |
| "learning_rate": 0.00012865139949109415, |
| "loss": 0.2374, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.7934093789607097, |
| "grad_norm": 0.3880506455898285, |
| "learning_rate": 0.0001285496183206107, |
| "loss": 0.2862, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.7959442332065905, |
| "grad_norm": 0.2648681700229645, |
| "learning_rate": 0.00012844783715012723, |
| "loss": 0.206, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.7984790874524714, |
| "grad_norm": 0.25072911381721497, |
| "learning_rate": 0.00012834605597964377, |
| "loss": 0.2123, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.8010139416983524, |
| "grad_norm": 0.3076663315296173, |
| "learning_rate": 0.0001282442748091603, |
| "loss": 0.2983, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.8035487959442333, |
| "grad_norm": 0.4219549000263214, |
| "learning_rate": 0.00012814249363867684, |
| "loss": 0.2213, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.806083650190114, |
| "grad_norm": 0.2831745445728302, |
| "learning_rate": 0.00012804071246819338, |
| "loss": 0.2062, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.8086185044359948, |
| "grad_norm": 0.4014468491077423, |
| "learning_rate": 0.00012793893129770992, |
| "loss": 0.2945, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.8111533586818758, |
| "grad_norm": 0.2980962097644806, |
| "learning_rate": 0.0001278371501272265, |
| "loss": 0.2179, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.8136882129277567, |
| "grad_norm": 0.2338070124387741, |
| "learning_rate": 0.000127735368956743, |
| "loss": 0.1664, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.8162230671736375, |
| "grad_norm": 0.6155439615249634, |
| "learning_rate": 0.00012763358778625957, |
| "loss": 0.3429, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.8187579214195184, |
| "grad_norm": 0.46969589591026306, |
| "learning_rate": 0.00012753180661577608, |
| "loss": 0.2584, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.8212927756653992, |
| "grad_norm": 0.5578194260597229, |
| "learning_rate": 0.00012743002544529265, |
| "loss": 0.2695, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.8238276299112801, |
| "grad_norm": 0.34903043508529663, |
| "learning_rate": 0.00012732824427480916, |
| "loss": 0.2119, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.826362484157161, |
| "grad_norm": 0.3990432322025299, |
| "learning_rate": 0.0001272264631043257, |
| "loss": 0.2487, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.8288973384030418, |
| "grad_norm": 0.3382611572742462, |
| "learning_rate": 0.00012712468193384224, |
| "loss": 0.2313, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.8314321926489225, |
| "grad_norm": 0.30938395857810974, |
| "learning_rate": 0.00012702290076335878, |
| "loss": 0.2113, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.8339670468948035, |
| "grad_norm": 0.39266690611839294, |
| "learning_rate": 0.00012692111959287532, |
| "loss": 0.2609, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.8365019011406845, |
| "grad_norm": 0.4396655261516571, |
| "learning_rate": 0.00012681933842239186, |
| "loss": 0.2518, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.8390367553865654, |
| "grad_norm": 0.4134500324726105, |
| "learning_rate": 0.0001267175572519084, |
| "loss": 0.3317, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.8415716096324461, |
| "grad_norm": 0.29644638299942017, |
| "learning_rate": 0.00012661577608142493, |
| "loss": 0.1912, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.8441064638783269, |
| "grad_norm": 0.3661201596260071, |
| "learning_rate": 0.0001265139949109415, |
| "loss": 0.2911, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.8466413181242078, |
| "grad_norm": 0.4504169225692749, |
| "learning_rate": 0.000126412213740458, |
| "loss": 0.3409, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.8491761723700888, |
| "grad_norm": 0.28516069054603577, |
| "learning_rate": 0.00012631043256997458, |
| "loss": 0.254, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.8517110266159695, |
| "grad_norm": 0.33754590153694153, |
| "learning_rate": 0.0001262086513994911, |
| "loss": 0.2275, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.8542458808618505, |
| "grad_norm": 0.26562589406967163, |
| "learning_rate": 0.00012610687022900766, |
| "loss": 0.1979, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.8567807351077312, |
| "grad_norm": 0.3081592321395874, |
| "learning_rate": 0.00012600508905852417, |
| "loss": 0.2099, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.8593155893536122, |
| "grad_norm": 0.34866124391555786, |
| "learning_rate": 0.0001259033078880407, |
| "loss": 0.3038, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.8618504435994931, |
| "grad_norm": 0.2867881953716278, |
| "learning_rate": 0.00012580152671755725, |
| "loss": 0.2225, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.8643852978453739, |
| "grad_norm": 0.2374526560306549, |
| "learning_rate": 0.0001256997455470738, |
| "loss": 0.1945, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.8669201520912546, |
| "grad_norm": 0.3072168827056885, |
| "learning_rate": 0.00012559796437659033, |
| "loss": 0.2135, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.8694550063371356, |
| "grad_norm": 0.36897239089012146, |
| "learning_rate": 0.00012549618320610687, |
| "loss": 0.3225, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.8719898605830165, |
| "grad_norm": 0.3114832937717438, |
| "learning_rate": 0.00012539440203562343, |
| "loss": 0.2064, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.8745247148288975, |
| "grad_norm": 0.40082940459251404, |
| "learning_rate": 0.00012529262086513995, |
| "loss": 0.2145, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.8770595690747782, |
| "grad_norm": 0.28362375497817993, |
| "learning_rate": 0.0001251908396946565, |
| "loss": 0.2044, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.879594423320659, |
| "grad_norm": 0.2738857567310333, |
| "learning_rate": 0.00012508905852417302, |
| "loss": 0.1852, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.88212927756654, |
| "grad_norm": 0.37283095717430115, |
| "learning_rate": 0.0001249872773536896, |
| "loss": 0.248, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.8846641318124209, |
| "grad_norm": 0.3065252900123596, |
| "learning_rate": 0.0001248854961832061, |
| "loss": 0.2028, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.8871989860583016, |
| "grad_norm": 0.2891787588596344, |
| "learning_rate": 0.00012478371501272267, |
| "loss": 0.1977, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.8897338403041823, |
| "grad_norm": 0.5002029538154602, |
| "learning_rate": 0.00012468193384223918, |
| "loss": 0.2731, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.8922686945500633, |
| "grad_norm": 0.34734681248664856, |
| "learning_rate": 0.00012458015267175575, |
| "loss": 0.2236, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.8948035487959443, |
| "grad_norm": 0.4372716248035431, |
| "learning_rate": 0.00012447837150127226, |
| "loss": 0.3787, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.8973384030418252, |
| "grad_norm": 0.41203773021698, |
| "learning_rate": 0.0001243765903307888, |
| "loss": 0.2385, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.899873257287706, |
| "grad_norm": 0.28231269121170044, |
| "learning_rate": 0.00012427480916030534, |
| "loss": 0.1966, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9024081115335867, |
| "grad_norm": 0.3689015209674835, |
| "learning_rate": 0.00012417302798982188, |
| "loss": 0.2266, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.9049429657794676, |
| "grad_norm": 0.35862621665000916, |
| "learning_rate": 0.00012407124681933844, |
| "loss": 0.2226, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.9074778200253486, |
| "grad_norm": 0.27552056312561035, |
| "learning_rate": 0.00012396946564885496, |
| "loss": 0.2049, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.9100126742712296, |
| "grad_norm": 0.3665705919265747, |
| "learning_rate": 0.00012386768447837152, |
| "loss": 0.2262, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.9125475285171103, |
| "grad_norm": 0.37812677025794983, |
| "learning_rate": 0.00012376590330788803, |
| "loss": 0.2561, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.915082382762991, |
| "grad_norm": 0.34638741612434387, |
| "learning_rate": 0.0001236641221374046, |
| "loss": 0.2152, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.917617237008872, |
| "grad_norm": 0.3499183654785156, |
| "learning_rate": 0.00012356234096692111, |
| "loss": 0.2823, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.920152091254753, |
| "grad_norm": 0.3274863362312317, |
| "learning_rate": 0.00012346055979643768, |
| "loss": 0.202, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.9226869455006337, |
| "grad_norm": 0.4568060338497162, |
| "learning_rate": 0.0001233587786259542, |
| "loss": 0.3531, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.9252217997465144, |
| "grad_norm": 0.3351891040802002, |
| "learning_rate": 0.00012325699745547076, |
| "loss": 0.3491, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9277566539923954, |
| "grad_norm": 0.3045225739479065, |
| "learning_rate": 0.00012315521628498727, |
| "loss": 0.2412, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.9302915082382763, |
| "grad_norm": 0.4453962445259094, |
| "learning_rate": 0.0001230534351145038, |
| "loss": 0.485, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.9328263624841573, |
| "grad_norm": 0.4568649232387543, |
| "learning_rate": 0.00012295165394402038, |
| "loss": 0.4203, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.935361216730038, |
| "grad_norm": 0.33376067876815796, |
| "learning_rate": 0.0001228498727735369, |
| "loss": 0.2287, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.9378960709759188, |
| "grad_norm": 0.2670106887817383, |
| "learning_rate": 0.00012274809160305346, |
| "loss": 0.2265, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.9404309252217997, |
| "grad_norm": 0.25930914282798767, |
| "learning_rate": 0.00012264631043256997, |
| "loss": 0.2661, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.9429657794676807, |
| "grad_norm": 0.22364859282970428, |
| "learning_rate": 0.00012254452926208653, |
| "loss": 0.1938, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.9455006337135616, |
| "grad_norm": 0.4107860028743744, |
| "learning_rate": 0.00012244274809160305, |
| "loss": 0.3227, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.9480354879594424, |
| "grad_norm": 0.24454613029956818, |
| "learning_rate": 0.0001223409669211196, |
| "loss": 0.2813, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.950570342205323, |
| "grad_norm": 0.28310418128967285, |
| "learning_rate": 0.00012223918575063612, |
| "loss": 0.2065, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.953105196451204, |
| "grad_norm": 0.28080177307128906, |
| "learning_rate": 0.0001221374045801527, |
| "loss": 0.1941, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.955640050697085, |
| "grad_norm": 0.365400105714798, |
| "learning_rate": 0.0001220356234096692, |
| "loss": 0.2657, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.9581749049429658, |
| "grad_norm": 0.3115444779396057, |
| "learning_rate": 0.00012193384223918576, |
| "loss": 0.2117, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.9607097591888465, |
| "grad_norm": 0.30900898575782776, |
| "learning_rate": 0.00012183206106870228, |
| "loss": 0.2563, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.9632446134347274, |
| "grad_norm": 0.341789573431015, |
| "learning_rate": 0.00012173027989821883, |
| "loss": 0.2396, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.9657794676806084, |
| "grad_norm": 0.39556756615638733, |
| "learning_rate": 0.00012162849872773539, |
| "loss": 0.2203, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.9683143219264894, |
| "grad_norm": 0.4282820224761963, |
| "learning_rate": 0.00012152671755725191, |
| "loss": 0.2476, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.97084917617237, |
| "grad_norm": 0.3683648109436035, |
| "learning_rate": 0.00012142493638676847, |
| "loss": 0.2414, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.9733840304182508, |
| "grad_norm": 0.19751296937465668, |
| "learning_rate": 0.00012132315521628499, |
| "loss": 0.1622, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.9759188846641318, |
| "grad_norm": 0.4522268772125244, |
| "learning_rate": 0.00012122137404580154, |
| "loss": 0.3372, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9784537389100127, |
| "grad_norm": 0.3386411666870117, |
| "learning_rate": 0.00012111959287531807, |
| "loss": 0.1966, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.9809885931558935, |
| "grad_norm": 0.3266599178314209, |
| "learning_rate": 0.00012101781170483461, |
| "loss": 0.2507, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.9835234474017744, |
| "grad_norm": 0.395271897315979, |
| "learning_rate": 0.00012091603053435115, |
| "loss": 0.2626, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.9860583016476552, |
| "grad_norm": 0.23269407451152802, |
| "learning_rate": 0.00012081424936386769, |
| "loss": 0.1806, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.9885931558935361, |
| "grad_norm": 0.3929823040962219, |
| "learning_rate": 0.00012071246819338421, |
| "loss": 0.2912, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.991128010139417, |
| "grad_norm": 0.2597116529941559, |
| "learning_rate": 0.00012061068702290077, |
| "loss": 0.1918, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.9936628643852978, |
| "grad_norm": 0.44690757989883423, |
| "learning_rate": 0.00012050890585241729, |
| "loss": 0.2644, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.9961977186311786, |
| "grad_norm": 0.4133460819721222, |
| "learning_rate": 0.00012040712468193385, |
| "loss": 0.2541, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.9987325728770595, |
| "grad_norm": 0.33399301767349243, |
| "learning_rate": 0.0001203053435114504, |
| "loss": 0.2778, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6268282532691956, |
| "learning_rate": 0.00012020356234096692, |
| "loss": 0.3105, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.002534854245881, |
| "grad_norm": 0.38419365882873535, |
| "learning_rate": 0.00012010178117048348, |
| "loss": 0.2352, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.005069708491762, |
| "grad_norm": 0.30469566583633423, |
| "learning_rate": 0.00012, |
| "loss": 0.2011, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.0076045627376424, |
| "grad_norm": 0.36411482095718384, |
| "learning_rate": 0.00011989821882951656, |
| "loss": 0.2324, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.0101394169835234, |
| "grad_norm": 0.40986311435699463, |
| "learning_rate": 0.00011979643765903308, |
| "loss": 0.2217, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.0126742712294043, |
| "grad_norm": 0.46682968735694885, |
| "learning_rate": 0.00011969465648854963, |
| "loss": 0.2688, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.0152091254752853, |
| "grad_norm": 0.31846344470977783, |
| "learning_rate": 0.00011959287531806616, |
| "loss": 0.1984, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.017743979721166, |
| "grad_norm": 0.48346126079559326, |
| "learning_rate": 0.0001194910941475827, |
| "loss": 0.2404, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.0202788339670468, |
| "grad_norm": 0.5090253949165344, |
| "learning_rate": 0.00011938931297709924, |
| "loss": 0.2363, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.0228136882129277, |
| "grad_norm": 0.4886679947376251, |
| "learning_rate": 0.00011928753180661578, |
| "loss": 0.2656, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.0253485424588087, |
| "grad_norm": 0.5652650594711304, |
| "learning_rate": 0.00011918575063613233, |
| "loss": 0.2444, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.0278833967046896, |
| "grad_norm": 0.7158893346786499, |
| "learning_rate": 0.00011908396946564886, |
| "loss": 0.2362, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.03041825095057, |
| "grad_norm": 0.5168672800064087, |
| "learning_rate": 0.00011898218829516541, |
| "loss": 0.2067, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.032953105196451, |
| "grad_norm": 0.7243991494178772, |
| "learning_rate": 0.00011888040712468194, |
| "loss": 0.2458, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.035487959442332, |
| "grad_norm": 0.4199936091899872, |
| "learning_rate": 0.00011877862595419849, |
| "loss": 0.2009, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.038022813688213, |
| "grad_norm": 0.41791805624961853, |
| "learning_rate": 0.00011867684478371501, |
| "loss": 0.2325, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.040557667934094, |
| "grad_norm": 0.6389465928077698, |
| "learning_rate": 0.00011857506361323157, |
| "loss": 0.2636, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.0430925221799745, |
| "grad_norm": 0.6254114508628845, |
| "learning_rate": 0.00011847328244274809, |
| "loss": 0.2292, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.0456273764258555, |
| "grad_norm": 0.8436942100524902, |
| "learning_rate": 0.00011837150127226465, |
| "loss": 0.2913, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.0481622306717364, |
| "grad_norm": 0.42698097229003906, |
| "learning_rate": 0.00011826972010178117, |
| "loss": 0.2107, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.0506970849176174, |
| "grad_norm": 0.432607501745224, |
| "learning_rate": 0.00011816793893129771, |
| "loss": 0.1851, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.053231939163498, |
| "grad_norm": 0.48241573572158813, |
| "learning_rate": 0.00011806615776081425, |
| "loss": 0.2333, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.055766793409379, |
| "grad_norm": 0.3920150101184845, |
| "learning_rate": 0.00011796437659033079, |
| "loss": 0.2256, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.05830164765526, |
| "grad_norm": 0.3601329028606415, |
| "learning_rate": 0.00011786259541984734, |
| "loss": 0.2428, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.0608365019011408, |
| "grad_norm": 0.428524911403656, |
| "learning_rate": 0.00011776081424936387, |
| "loss": 0.3109, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.0633713561470217, |
| "grad_norm": 0.22846737504005432, |
| "learning_rate": 0.00011765903307888042, |
| "loss": 0.1715, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.0659062103929022, |
| "grad_norm": 0.3656214475631714, |
| "learning_rate": 0.00011755725190839695, |
| "loss": 0.2211, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.068441064638783, |
| "grad_norm": 0.2633965015411377, |
| "learning_rate": 0.0001174554707379135, |
| "loss": 0.1933, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.070975918884664, |
| "grad_norm": 0.4318942129611969, |
| "learning_rate": 0.00011735368956743003, |
| "loss": 0.2829, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.073510773130545, |
| "grad_norm": 0.2643216848373413, |
| "learning_rate": 0.00011725190839694658, |
| "loss": 0.1938, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.076045627376426, |
| "grad_norm": 0.4560074508190155, |
| "learning_rate": 0.0001171501272264631, |
| "loss": 0.3017, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.0785804816223066, |
| "grad_norm": 0.380374550819397, |
| "learning_rate": 0.00011704834605597966, |
| "loss": 0.2141, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.0811153358681875, |
| "grad_norm": 0.321417897939682, |
| "learning_rate": 0.00011694656488549618, |
| "loss": 0.2058, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.0836501901140685, |
| "grad_norm": 0.350496768951416, |
| "learning_rate": 0.00011684478371501274, |
| "loss": 0.1761, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.0861850443599494, |
| "grad_norm": 0.35794898867607117, |
| "learning_rate": 0.00011674300254452927, |
| "loss": 0.2016, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.08871989860583, |
| "grad_norm": 0.37890860438346863, |
| "learning_rate": 0.0001166412213740458, |
| "loss": 0.253, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.091254752851711, |
| "grad_norm": 0.41833457350730896, |
| "learning_rate": 0.00011653944020356235, |
| "loss": 0.2012, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.093789607097592, |
| "grad_norm": 0.49572086334228516, |
| "learning_rate": 0.00011643765903307888, |
| "loss": 0.214, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.096324461343473, |
| "grad_norm": 0.44266751408576965, |
| "learning_rate": 0.00011633587786259543, |
| "loss": 0.2496, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.098859315589354, |
| "grad_norm": 0.7018102407455444, |
| "learning_rate": 0.00011623409669211196, |
| "loss": 0.3996, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.1013941698352343, |
| "grad_norm": 0.42781826853752136, |
| "learning_rate": 0.00011613231552162851, |
| "loss": 0.2325, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.1039290240811153, |
| "grad_norm": 0.35814788937568665, |
| "learning_rate": 0.00011603053435114504, |
| "loss": 0.2003, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.106463878326996, |
| "grad_norm": 0.2381380945444107, |
| "learning_rate": 0.00011592875318066159, |
| "loss": 0.1791, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.108998732572877, |
| "grad_norm": 0.3152197003364563, |
| "learning_rate": 0.00011582697201017811, |
| "loss": 0.1802, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.111533586818758, |
| "grad_norm": 0.3493264615535736, |
| "learning_rate": 0.00011572519083969467, |
| "loss": 0.173, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.1140684410646386, |
| "grad_norm": 0.339036762714386, |
| "learning_rate": 0.0001156234096692112, |
| "loss": 0.1875, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.1166032953105196, |
| "grad_norm": 0.3622972369194031, |
| "learning_rate": 0.00011552162849872775, |
| "loss": 0.1892, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.1191381495564006, |
| "grad_norm": 0.7021862268447876, |
| "learning_rate": 0.00011541984732824429, |
| "loss": 0.272, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.1216730038022815, |
| "grad_norm": 0.4027453064918518, |
| "learning_rate": 0.00011531806615776081, |
| "loss": 0.2296, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.124207858048162, |
| "grad_norm": 0.3509223163127899, |
| "learning_rate": 0.00011521628498727736, |
| "loss": 0.1812, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.126742712294043, |
| "grad_norm": 0.4156752824783325, |
| "learning_rate": 0.00011511450381679389, |
| "loss": 0.2444, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.129277566539924, |
| "grad_norm": 0.3596971035003662, |
| "learning_rate": 0.00011501272264631044, |
| "loss": 0.1944, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.131812420785805, |
| "grad_norm": 0.4088239371776581, |
| "learning_rate": 0.00011491094147582697, |
| "loss": 0.1892, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.134347275031686, |
| "grad_norm": 0.3603368103504181, |
| "learning_rate": 0.00011480916030534352, |
| "loss": 0.1955, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.1368821292775664, |
| "grad_norm": 0.3702489733695984, |
| "learning_rate": 0.00011470737913486005, |
| "loss": 0.2401, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.1394169835234473, |
| "grad_norm": 0.427312433719635, |
| "learning_rate": 0.0001146055979643766, |
| "loss": 0.2097, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.1419518377693283, |
| "grad_norm": 0.34239426255226135, |
| "learning_rate": 0.00011450381679389313, |
| "loss": 0.2055, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.1444866920152093, |
| "grad_norm": 0.522627055644989, |
| "learning_rate": 0.00011440203562340968, |
| "loss": 0.2206, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.14702154626109, |
| "grad_norm": 0.5005999207496643, |
| "learning_rate": 0.0001143002544529262, |
| "loss": 0.2187, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.1495564005069707, |
| "grad_norm": 0.4834093451499939, |
| "learning_rate": 0.00011419847328244276, |
| "loss": 0.2616, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.1520912547528517, |
| "grad_norm": 0.3305776119232178, |
| "learning_rate": 0.0001140966921119593, |
| "loss": 0.2193, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.1546261089987326, |
| "grad_norm": 0.3691657781600952, |
| "learning_rate": 0.00011399491094147584, |
| "loss": 0.2343, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.1571609632446136, |
| "grad_norm": 0.4711242914199829, |
| "learning_rate": 0.00011389312977099238, |
| "loss": 0.2961, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.159695817490494, |
| "grad_norm": 0.4091726839542389, |
| "learning_rate": 0.0001137913486005089, |
| "loss": 0.2735, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.162230671736375, |
| "grad_norm": 0.28634020686149597, |
| "learning_rate": 0.00011368956743002545, |
| "loss": 0.2026, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.164765525982256, |
| "grad_norm": 0.3120497763156891, |
| "learning_rate": 0.00011358778625954198, |
| "loss": 0.1826, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.167300380228137, |
| "grad_norm": 0.3803773522377014, |
| "learning_rate": 0.00011348600508905853, |
| "loss": 0.2206, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.169835234474018, |
| "grad_norm": 0.4069412648677826, |
| "learning_rate": 0.00011338422391857506, |
| "loss": 0.23, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.1723700887198985, |
| "grad_norm": 0.31032097339630127, |
| "learning_rate": 0.00011328244274809161, |
| "loss": 0.1774, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.1749049429657794, |
| "grad_norm": 0.3429819941520691, |
| "learning_rate": 0.00011318066157760814, |
| "loss": 0.207, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.1774397972116604, |
| "grad_norm": 0.32155394554138184, |
| "learning_rate": 0.00011307888040712469, |
| "loss": 0.1817, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.1799746514575413, |
| "grad_norm": 0.3859189450740814, |
| "learning_rate": 0.00011297709923664124, |
| "loss": 0.205, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.182509505703422, |
| "grad_norm": 0.33794042468070984, |
| "learning_rate": 0.00011287531806615777, |
| "loss": 0.2002, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.185044359949303, |
| "grad_norm": 0.38762131333351135, |
| "learning_rate": 0.00011277353689567431, |
| "loss": 0.206, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.1875792141951838, |
| "grad_norm": 0.35734203457832336, |
| "learning_rate": 0.00011267175572519085, |
| "loss": 0.2332, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.1901140684410647, |
| "grad_norm": 0.32456931471824646, |
| "learning_rate": 0.00011256997455470739, |
| "loss": 0.1873, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.1926489226869457, |
| "grad_norm": 0.5198532938957214, |
| "learning_rate": 0.00011246819338422391, |
| "loss": 0.2408, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.195183776932826, |
| "grad_norm": 0.3863469362258911, |
| "learning_rate": 0.00011236641221374046, |
| "loss": 0.1778, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.197718631178707, |
| "grad_norm": 0.39902037382125854, |
| "learning_rate": 0.00011226463104325699, |
| "loss": 0.1982, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.200253485424588, |
| "grad_norm": 0.3974783718585968, |
| "learning_rate": 0.00011216284987277354, |
| "loss": 0.2157, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.202788339670469, |
| "grad_norm": 0.33785662055015564, |
| "learning_rate": 0.00011206106870229007, |
| "loss": 0.2152, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.20532319391635, |
| "grad_norm": 0.4233367145061493, |
| "learning_rate": 0.00011195928753180662, |
| "loss": 0.2992, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.2078580481622305, |
| "grad_norm": 0.37665534019470215, |
| "learning_rate": 0.00011185750636132315, |
| "loss": 0.2273, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.2103929024081115, |
| "grad_norm": 0.3841243088245392, |
| "learning_rate": 0.0001117557251908397, |
| "loss": 0.1991, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.2129277566539924, |
| "grad_norm": 0.3544892966747284, |
| "learning_rate": 0.00011165394402035625, |
| "loss": 0.2098, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.2154626108998734, |
| "grad_norm": 0.43662142753601074, |
| "learning_rate": 0.00011155216284987278, |
| "loss": 0.2411, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.2179974651457544, |
| "grad_norm": 0.3305199146270752, |
| "learning_rate": 0.00011145038167938933, |
| "loss": 0.1803, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.220532319391635, |
| "grad_norm": 0.34674328565597534, |
| "learning_rate": 0.00011134860050890586, |
| "loss": 0.2206, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.223067173637516, |
| "grad_norm": 0.39985305070877075, |
| "learning_rate": 0.0001112468193384224, |
| "loss": 0.2951, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.225602027883397, |
| "grad_norm": 0.36231693625450134, |
| "learning_rate": 0.00011114503816793894, |
| "loss": 0.2601, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.2281368821292777, |
| "grad_norm": 0.4199659526348114, |
| "learning_rate": 0.00011104325699745548, |
| "loss": 0.2719, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.2306717363751583, |
| "grad_norm": 0.3472574055194855, |
| "learning_rate": 0.000110941475826972, |
| "loss": 0.2437, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.233206590621039, |
| "grad_norm": 0.2765200436115265, |
| "learning_rate": 0.00011083969465648855, |
| "loss": 0.1983, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.23574144486692, |
| "grad_norm": 0.4466260075569153, |
| "learning_rate": 0.00011073791348600508, |
| "loss": 0.2323, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.238276299112801, |
| "grad_norm": 0.43661364912986755, |
| "learning_rate": 0.00011063613231552163, |
| "loss": 0.2957, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.240811153358682, |
| "grad_norm": 0.3262166976928711, |
| "learning_rate": 0.00011053435114503819, |
| "loss": 0.195, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.2433460076045626, |
| "grad_norm": 0.5085666179656982, |
| "learning_rate": 0.00011043256997455471, |
| "loss": 0.3349, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.2458808618504436, |
| "grad_norm": 0.46551409363746643, |
| "learning_rate": 0.00011033078880407126, |
| "loss": 0.3318, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.2484157160963245, |
| "grad_norm": 0.425530344247818, |
| "learning_rate": 0.00011022900763358779, |
| "loss": 0.2857, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.2509505703422055, |
| "grad_norm": 0.3377918601036072, |
| "learning_rate": 0.00011012722646310434, |
| "loss": 0.2215, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.253485424588086, |
| "grad_norm": 0.3491476774215698, |
| "learning_rate": 0.00011002544529262087, |
| "loss": 0.2471, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.256020278833967, |
| "grad_norm": 0.3779531419277191, |
| "learning_rate": 0.00010992366412213742, |
| "loss": 0.1984, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.258555133079848, |
| "grad_norm": 0.425077885389328, |
| "learning_rate": 0.00010982188295165395, |
| "loss": 0.2535, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.261089987325729, |
| "grad_norm": 0.40296900272369385, |
| "learning_rate": 0.00010972010178117049, |
| "loss": 0.1955, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.26362484157161, |
| "grad_norm": 0.4394761919975281, |
| "learning_rate": 0.00010961832061068703, |
| "loss": 0.2638, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.2661596958174903, |
| "grad_norm": 0.4743111729621887, |
| "learning_rate": 0.00010951653944020357, |
| "loss": 0.1932, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.2686945500633713, |
| "grad_norm": 0.5121330618858337, |
| "learning_rate": 0.00010941475826972009, |
| "loss": 0.2541, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.2712294043092522, |
| "grad_norm": 0.2810382544994354, |
| "learning_rate": 0.00010931297709923664, |
| "loss": 0.1884, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.273764258555133, |
| "grad_norm": 0.3637334108352661, |
| "learning_rate": 0.0001092111959287532, |
| "loss": 0.2208, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.2762991128010137, |
| "grad_norm": 0.4116186201572418, |
| "learning_rate": 0.00010910941475826972, |
| "loss": 0.1898, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.2788339670468947, |
| "grad_norm": 0.4166296720504761, |
| "learning_rate": 0.00010900763358778628, |
| "loss": 0.2399, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.2813688212927756, |
| "grad_norm": 0.5998784303665161, |
| "learning_rate": 0.0001089058524173028, |
| "loss": 0.2926, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.2839036755386566, |
| "grad_norm": 0.6252371668815613, |
| "learning_rate": 0.00010880407124681935, |
| "loss": 0.2392, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.2864385297845375, |
| "grad_norm": 0.4495537579059601, |
| "learning_rate": 0.00010870229007633588, |
| "loss": 0.2142, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.288973384030418, |
| "grad_norm": 0.5659827589988708, |
| "learning_rate": 0.00010860050890585243, |
| "loss": 0.2993, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.291508238276299, |
| "grad_norm": 0.4290786385536194, |
| "learning_rate": 0.00010849872773536896, |
| "loss": 0.3127, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.29404309252218, |
| "grad_norm": 0.3835826516151428, |
| "learning_rate": 0.0001083969465648855, |
| "loss": 0.1927, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.296577946768061, |
| "grad_norm": 0.4915788769721985, |
| "learning_rate": 0.00010829516539440204, |
| "loss": 0.2553, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.299112801013942, |
| "grad_norm": 0.42122524976730347, |
| "learning_rate": 0.00010819338422391858, |
| "loss": 0.2133, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.3016476552598224, |
| "grad_norm": 0.3904586732387543, |
| "learning_rate": 0.0001080916030534351, |
| "loss": 0.2064, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.3041825095057034, |
| "grad_norm": 0.3680777847766876, |
| "learning_rate": 0.00010798982188295166, |
| "loss": 0.1989, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.3067173637515843, |
| "grad_norm": 0.44054466485977173, |
| "learning_rate": 0.00010788804071246821, |
| "loss": 0.2386, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.3092522179974653, |
| "grad_norm": 0.28730717301368713, |
| "learning_rate": 0.00010778625954198473, |
| "loss": 0.175, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.3117870722433462, |
| "grad_norm": 0.4209315776824951, |
| "learning_rate": 0.00010768447837150129, |
| "loss": 0.2197, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.3143219264892267, |
| "grad_norm": 0.41457393765449524, |
| "learning_rate": 0.00010758269720101781, |
| "loss": 0.202, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.3168567807351077, |
| "grad_norm": 0.40807071328163147, |
| "learning_rate": 0.00010748091603053437, |
| "loss": 0.3087, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.3193916349809887, |
| "grad_norm": 0.42118731141090393, |
| "learning_rate": 0.00010737913486005089, |
| "loss": 0.2269, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.3219264892268696, |
| "grad_norm": 0.3436257541179657, |
| "learning_rate": 0.00010727735368956744, |
| "loss": 0.1987, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.32446134347275, |
| "grad_norm": 0.3721463978290558, |
| "learning_rate": 0.00010717557251908397, |
| "loss": 0.2081, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.326996197718631, |
| "grad_norm": 0.45050719380378723, |
| "learning_rate": 0.00010707379134860052, |
| "loss": 0.2199, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.329531051964512, |
| "grad_norm": 0.42665717005729675, |
| "learning_rate": 0.00010697201017811705, |
| "loss": 0.2176, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.332065906210393, |
| "grad_norm": 0.35217922925949097, |
| "learning_rate": 0.00010687022900763359, |
| "loss": 0.1915, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.334600760456274, |
| "grad_norm": 0.5407602190971375, |
| "learning_rate": 0.00010676844783715014, |
| "loss": 0.2309, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.3371356147021545, |
| "grad_norm": 0.6984291076660156, |
| "learning_rate": 0.00010666666666666667, |
| "loss": 0.2779, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.3396704689480354, |
| "grad_norm": 0.5333911776542664, |
| "learning_rate": 0.00010656488549618322, |
| "loss": 0.2659, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.3422053231939164, |
| "grad_norm": 0.5130952596664429, |
| "learning_rate": 0.00010646310432569974, |
| "loss": 0.315, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.3447401774397973, |
| "grad_norm": 0.3874262869358063, |
| "learning_rate": 0.0001063613231552163, |
| "loss": 0.294, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.347275031685678, |
| "grad_norm": 0.37864431738853455, |
| "learning_rate": 0.00010625954198473282, |
| "loss": 0.1894, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.349809885931559, |
| "grad_norm": 0.406448632478714, |
| "learning_rate": 0.00010615776081424938, |
| "loss": 0.1913, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.3523447401774398, |
| "grad_norm": 0.4278213381767273, |
| "learning_rate": 0.0001060559796437659, |
| "loss": 0.2136, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.3548795944233207, |
| "grad_norm": 0.3853738009929657, |
| "learning_rate": 0.00010595419847328246, |
| "loss": 0.213, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.3574144486692017, |
| "grad_norm": 0.3785664737224579, |
| "learning_rate": 0.00010585241730279898, |
| "loss": 0.22, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.359949302915082, |
| "grad_norm": 0.5863676071166992, |
| "learning_rate": 0.00010575063613231553, |
| "loss": 0.2305, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.362484157160963, |
| "grad_norm": 0.36629414558410645, |
| "learning_rate": 0.00010564885496183206, |
| "loss": 0.2041, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.365019011406844, |
| "grad_norm": 0.44699156284332275, |
| "learning_rate": 0.0001055470737913486, |
| "loss": 0.2763, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.367553865652725, |
| "grad_norm": 0.4775685667991638, |
| "learning_rate": 0.00010544529262086515, |
| "loss": 0.2779, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.3700887198986056, |
| "grad_norm": 0.3192265033721924, |
| "learning_rate": 0.00010534351145038168, |
| "loss": 0.1861, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.3726235741444865, |
| "grad_norm": 0.3589562177658081, |
| "learning_rate": 0.00010524173027989823, |
| "loss": 0.2266, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.3751584283903675, |
| "grad_norm": 0.36193573474884033, |
| "learning_rate": 0.00010513994910941476, |
| "loss": 0.2105, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.3776932826362485, |
| "grad_norm": 0.4141902029514313, |
| "learning_rate": 0.00010503816793893131, |
| "loss": 0.2676, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.3802281368821294, |
| "grad_norm": 0.3118525445461273, |
| "learning_rate": 0.00010493638676844783, |
| "loss": 0.1941, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.3827629911280104, |
| "grad_norm": 0.3232119679450989, |
| "learning_rate": 0.00010483460559796439, |
| "loss": 0.2065, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.385297845373891, |
| "grad_norm": 0.30440258979797363, |
| "learning_rate": 0.00010473282442748091, |
| "loss": 0.1834, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.387832699619772, |
| "grad_norm": 0.5841143131256104, |
| "learning_rate": 0.00010463104325699747, |
| "loss": 0.3785, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.390367553865653, |
| "grad_norm": 0.31851619482040405, |
| "learning_rate": 0.00010452926208651399, |
| "loss": 0.1798, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.3929024081115338, |
| "grad_norm": 0.3820517361164093, |
| "learning_rate": 0.00010442748091603054, |
| "loss": 0.2376, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.3954372623574143, |
| "grad_norm": 0.4379272758960724, |
| "learning_rate": 0.00010432569974554708, |
| "loss": 0.2356, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.3979721166032952, |
| "grad_norm": 0.3120323419570923, |
| "learning_rate": 0.00010422391857506362, |
| "loss": 0.1936, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.400506970849176, |
| "grad_norm": 0.3143107295036316, |
| "learning_rate": 0.00010412213740458016, |
| "loss": 0.184, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.403041825095057, |
| "grad_norm": 0.44618573784828186, |
| "learning_rate": 0.00010402035623409669, |
| "loss": 0.2468, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.405576679340938, |
| "grad_norm": 0.3838117718696594, |
| "learning_rate": 0.00010391857506361324, |
| "loss": 0.2276, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.4081115335868186, |
| "grad_norm": 0.3427219092845917, |
| "learning_rate": 0.00010381679389312977, |
| "loss": 0.2169, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.4106463878326996, |
| "grad_norm": 0.3738270699977875, |
| "learning_rate": 0.00010371501272264632, |
| "loss": 0.2447, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.4131812420785805, |
| "grad_norm": 0.33645015954971313, |
| "learning_rate": 0.00010361323155216285, |
| "loss": 0.1939, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.4157160963244615, |
| "grad_norm": 0.45420047640800476, |
| "learning_rate": 0.0001035114503816794, |
| "loss": 0.242, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.418250950570342, |
| "grad_norm": 0.47141382098197937, |
| "learning_rate": 0.00010340966921119592, |
| "loss": 0.2923, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.420785804816223, |
| "grad_norm": 0.42177528142929077, |
| "learning_rate": 0.00010330788804071248, |
| "loss": 0.2827, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.423320659062104, |
| "grad_norm": 0.409502774477005, |
| "learning_rate": 0.000103206106870229, |
| "loss": 0.2016, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.425855513307985, |
| "grad_norm": 0.47684770822525024, |
| "learning_rate": 0.00010310432569974556, |
| "loss": 0.2093, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.428390367553866, |
| "grad_norm": 0.3357095718383789, |
| "learning_rate": 0.0001030025445292621, |
| "loss": 0.1744, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.4309252217997463, |
| "grad_norm": 0.4120575487613678, |
| "learning_rate": 0.00010290076335877863, |
| "loss": 0.214, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.4334600760456273, |
| "grad_norm": 0.5090222954750061, |
| "learning_rate": 0.00010279898218829517, |
| "loss": 0.2427, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.4359949302915083, |
| "grad_norm": 0.4142550528049469, |
| "learning_rate": 0.0001026972010178117, |
| "loss": 0.2412, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.4385297845373892, |
| "grad_norm": 0.3446972966194153, |
| "learning_rate": 0.00010259541984732825, |
| "loss": 0.1952, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.4410646387832697, |
| "grad_norm": 0.37858110666275024, |
| "learning_rate": 0.00010249363867684478, |
| "loss": 0.1964, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.4435994930291507, |
| "grad_norm": 0.3989041745662689, |
| "learning_rate": 0.00010239185750636133, |
| "loss": 0.2115, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.4461343472750317, |
| "grad_norm": 0.3948146402835846, |
| "learning_rate": 0.00010229007633587786, |
| "loss": 0.2067, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.4486692015209126, |
| "grad_norm": 0.3683820068836212, |
| "learning_rate": 0.00010218829516539441, |
| "loss": 0.1881, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.4512040557667936, |
| "grad_norm": 0.36742380261421204, |
| "learning_rate": 0.00010208651399491094, |
| "loss": 0.2302, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.453738910012674, |
| "grad_norm": 0.32195988297462463, |
| "learning_rate": 0.00010198473282442749, |
| "loss": 0.1994, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.456273764258555, |
| "grad_norm": 0.42296963930130005, |
| "learning_rate": 0.00010188295165394401, |
| "loss": 0.2657, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.458808618504436, |
| "grad_norm": 0.3555774688720703, |
| "learning_rate": 0.00010178117048346057, |
| "loss": 0.1812, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.461343472750317, |
| "grad_norm": 0.6991668343544006, |
| "learning_rate": 0.00010167938931297712, |
| "loss": 0.4318, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.463878326996198, |
| "grad_norm": 0.4290355443954468, |
| "learning_rate": 0.00010157760814249365, |
| "loss": 0.1856, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.4664131812420784, |
| "grad_norm": 0.3479045331478119, |
| "learning_rate": 0.00010147582697201018, |
| "loss": 0.1844, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.4689480354879594, |
| "grad_norm": 0.3862701952457428, |
| "learning_rate": 0.00010137404580152672, |
| "loss": 0.2108, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.4714828897338403, |
| "grad_norm": 0.34411442279815674, |
| "learning_rate": 0.00010127226463104326, |
| "loss": 0.1851, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.4740177439797213, |
| "grad_norm": 0.2434609979391098, |
| "learning_rate": 0.00010117048346055979, |
| "loss": 0.1757, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.4765525982256023, |
| "grad_norm": 0.3341599106788635, |
| "learning_rate": 0.00010106870229007634, |
| "loss": 0.1879, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.4790874524714828, |
| "grad_norm": 0.27678003907203674, |
| "learning_rate": 0.00010096692111959287, |
| "loss": 0.1943, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.4816223067173637, |
| "grad_norm": 0.2388005256652832, |
| "learning_rate": 0.00010086513994910942, |
| "loss": 0.1804, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.4841571609632447, |
| "grad_norm": 0.5265661478042603, |
| "learning_rate": 0.00010076335877862595, |
| "loss": 0.2813, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.4866920152091256, |
| "grad_norm": 0.337007075548172, |
| "learning_rate": 0.0001006615776081425, |
| "loss": 0.1976, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.489226869455006, |
| "grad_norm": 0.42700427770614624, |
| "learning_rate": 0.00010055979643765905, |
| "loss": 0.2031, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.491761723700887, |
| "grad_norm": 0.3900333642959595, |
| "learning_rate": 0.00010045801526717558, |
| "loss": 0.2178, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.494296577946768, |
| "grad_norm": 0.45332932472229004, |
| "learning_rate": 0.00010035623409669213, |
| "loss": 0.2537, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.496831432192649, |
| "grad_norm": 0.30331265926361084, |
| "learning_rate": 0.00010025445292620866, |
| "loss": 0.2074, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.49936628643853, |
| "grad_norm": 0.3379949927330017, |
| "learning_rate": 0.0001001526717557252, |
| "loss": 0.1768, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.5019011406844105, |
| "grad_norm": 0.40859973430633545, |
| "learning_rate": 0.00010005089058524174, |
| "loss": 0.1984, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.5044359949302915, |
| "grad_norm": 0.3993757963180542, |
| "learning_rate": 9.994910941475827e-05, |
| "loss": 0.2162, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.5069708491761724, |
| "grad_norm": 0.5887713432312012, |
| "learning_rate": 9.984732824427481e-05, |
| "loss": 0.2806, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.5095057034220534, |
| "grad_norm": 0.3590678572654724, |
| "learning_rate": 9.974554707379135e-05, |
| "loss": 0.2045, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.512040557667934, |
| "grad_norm": 0.3090289831161499, |
| "learning_rate": 9.964376590330789e-05, |
| "loss": 0.2151, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.514575411913815, |
| "grad_norm": 0.42125657200813293, |
| "learning_rate": 9.954198473282443e-05, |
| "loss": 0.2277, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.517110266159696, |
| "grad_norm": 0.3213401734828949, |
| "learning_rate": 9.944020356234097e-05, |
| "loss": 0.1927, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.5196451204055768, |
| "grad_norm": 0.4558688998222351, |
| "learning_rate": 9.933842239185751e-05, |
| "loss": 0.2418, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.5221799746514577, |
| "grad_norm": 0.5181113481521606, |
| "learning_rate": 9.923664122137405e-05, |
| "loss": 0.2955, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.5247148288973387, |
| "grad_norm": 0.409424751996994, |
| "learning_rate": 9.913486005089059e-05, |
| "loss": 0.226, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.527249683143219, |
| "grad_norm": 0.44536876678466797, |
| "learning_rate": 9.903307888040713e-05, |
| "loss": 0.2412, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.5297845373891, |
| "grad_norm": 0.5028473734855652, |
| "learning_rate": 9.893129770992367e-05, |
| "loss": 0.2658, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.532319391634981, |
| "grad_norm": 0.3157128691673279, |
| "learning_rate": 9.882951653944021e-05, |
| "loss": 0.1939, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.5348542458808616, |
| "grad_norm": 0.3184659481048584, |
| "learning_rate": 9.872773536895676e-05, |
| "loss": 0.2113, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.5373891001267426, |
| "grad_norm": 0.5658953785896301, |
| "learning_rate": 9.862595419847329e-05, |
| "loss": 0.2641, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.5399239543726235, |
| "grad_norm": 0.5306189060211182, |
| "learning_rate": 9.852417302798982e-05, |
| "loss": 0.2495, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.5424588086185045, |
| "grad_norm": 0.5272448062896729, |
| "learning_rate": 9.842239185750636e-05, |
| "loss": 0.2212, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.5449936628643854, |
| "grad_norm": 0.3216992914676666, |
| "learning_rate": 9.83206106870229e-05, |
| "loss": 0.2284, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.5475285171102664, |
| "grad_norm": 0.3573670983314514, |
| "learning_rate": 9.821882951653944e-05, |
| "loss": 0.2568, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.550063371356147, |
| "grad_norm": 0.4088655710220337, |
| "learning_rate": 9.811704834605598e-05, |
| "loss": 0.2033, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.552598225602028, |
| "grad_norm": 0.33729737997055054, |
| "learning_rate": 9.801526717557252e-05, |
| "loss": 0.1843, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.555133079847909, |
| "grad_norm": 0.3298558294773102, |
| "learning_rate": 9.791348600508906e-05, |
| "loss": 0.193, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.5576679340937893, |
| "grad_norm": 0.33454427123069763, |
| "learning_rate": 9.78117048346056e-05, |
| "loss": 0.1823, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.5602027883396703, |
| "grad_norm": 0.3466435670852661, |
| "learning_rate": 9.770992366412214e-05, |
| "loss": 0.2204, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.5627376425855513, |
| "grad_norm": 0.3551004230976105, |
| "learning_rate": 9.760814249363868e-05, |
| "loss": 0.2027, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.565272496831432, |
| "grad_norm": 0.4317062795162201, |
| "learning_rate": 9.750636132315523e-05, |
| "loss": 0.2099, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.567807351077313, |
| "grad_norm": 0.5695217847824097, |
| "learning_rate": 9.740458015267177e-05, |
| "loss": 0.2547, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.570342205323194, |
| "grad_norm": 0.4523742198944092, |
| "learning_rate": 9.730279898218831e-05, |
| "loss": 0.2501, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.5728770595690746, |
| "grad_norm": 0.3191470503807068, |
| "learning_rate": 9.720101781170484e-05, |
| "loss": 0.1918, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.5754119138149556, |
| "grad_norm": 0.36234062910079956, |
| "learning_rate": 9.709923664122138e-05, |
| "loss": 0.2081, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.5779467680608366, |
| "grad_norm": 0.42196425795555115, |
| "learning_rate": 9.699745547073791e-05, |
| "loss": 0.2801, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.5804816223067175, |
| "grad_norm": 0.3382538855075836, |
| "learning_rate": 9.689567430025445e-05, |
| "loss": 0.221, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.583016476552598, |
| "grad_norm": 0.5736209750175476, |
| "learning_rate": 9.679389312977099e-05, |
| "loss": 0.2684, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.585551330798479, |
| "grad_norm": 0.4692763686180115, |
| "learning_rate": 9.669211195928753e-05, |
| "loss": 0.244, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.58808618504436, |
| "grad_norm": 0.4888627827167511, |
| "learning_rate": 9.659033078880407e-05, |
| "loss": 0.2493, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.590621039290241, |
| "grad_norm": 0.29745686054229736, |
| "learning_rate": 9.648854961832061e-05, |
| "loss": 0.1757, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.593155893536122, |
| "grad_norm": 0.476639062166214, |
| "learning_rate": 9.638676844783715e-05, |
| "loss": 0.2031, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.5956907477820024, |
| "grad_norm": 0.4214845895767212, |
| "learning_rate": 9.628498727735369e-05, |
| "loss": 0.2588, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.5982256020278833, |
| "grad_norm": 0.3036046326160431, |
| "learning_rate": 9.618320610687024e-05, |
| "loss": 0.2031, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.6007604562737643, |
| "grad_norm": 0.7941879630088806, |
| "learning_rate": 9.608142493638678e-05, |
| "loss": 0.2096, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.6032953105196452, |
| "grad_norm": 0.36381933093070984, |
| "learning_rate": 9.597964376590332e-05, |
| "loss": 0.2102, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.6058301647655258, |
| "grad_norm": 0.3213381767272949, |
| "learning_rate": 9.587786259541986e-05, |
| "loss": 0.1884, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.6083650190114067, |
| "grad_norm": 0.38559427857398987, |
| "learning_rate": 9.577608142493639e-05, |
| "loss": 0.2229, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.6108998732572877, |
| "grad_norm": 0.4000662863254547, |
| "learning_rate": 9.567430025445293e-05, |
| "loss": 0.198, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.6134347275031686, |
| "grad_norm": 0.3635396659374237, |
| "learning_rate": 9.557251908396946e-05, |
| "loss": 0.2267, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.6159695817490496, |
| "grad_norm": 0.31810763478279114, |
| "learning_rate": 9.5470737913486e-05, |
| "loss": 0.1691, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.6185044359949305, |
| "grad_norm": 0.29606062173843384, |
| "learning_rate": 9.536895674300254e-05, |
| "loss": 0.1834, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.621039290240811, |
| "grad_norm": 0.3528769612312317, |
| "learning_rate": 9.526717557251908e-05, |
| "loss": 0.2086, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.623574144486692, |
| "grad_norm": 0.4795662760734558, |
| "learning_rate": 9.516539440203562e-05, |
| "loss": 0.2429, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.626108998732573, |
| "grad_norm": 0.4627299904823303, |
| "learning_rate": 9.506361323155216e-05, |
| "loss": 0.1956, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.6286438529784535, |
| "grad_norm": 0.3330387473106384, |
| "learning_rate": 9.496183206106871e-05, |
| "loss": 0.1891, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.6311787072243344, |
| "grad_norm": 0.4265390634536743, |
| "learning_rate": 9.486005089058525e-05, |
| "loss": 0.2086, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.6337135614702154, |
| "grad_norm": 0.37214142084121704, |
| "learning_rate": 9.475826972010179e-05, |
| "loss": 0.2321, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.6362484157160964, |
| "grad_norm": 0.4183201491832733, |
| "learning_rate": 9.465648854961833e-05, |
| "loss": 0.2029, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.6387832699619773, |
| "grad_norm": 0.5688794851303101, |
| "learning_rate": 9.455470737913487e-05, |
| "loss": 0.2481, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.6413181242078583, |
| "grad_norm": 0.38355833292007446, |
| "learning_rate": 9.445292620865141e-05, |
| "loss": 0.1989, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.643852978453739, |
| "grad_norm": 0.4998534023761749, |
| "learning_rate": 9.435114503816794e-05, |
| "loss": 0.2272, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.6463878326996197, |
| "grad_norm": 0.2796792685985565, |
| "learning_rate": 9.424936386768448e-05, |
| "loss": 0.1694, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.6489226869455007, |
| "grad_norm": 0.30551543831825256, |
| "learning_rate": 9.414758269720102e-05, |
| "loss": 0.1782, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.6514575411913817, |
| "grad_norm": 0.3933429718017578, |
| "learning_rate": 9.404580152671755e-05, |
| "loss": 0.272, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.653992395437262, |
| "grad_norm": 0.3543720841407776, |
| "learning_rate": 9.39440203562341e-05, |
| "loss": 0.2271, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.656527249683143, |
| "grad_norm": 0.2716831564903259, |
| "learning_rate": 9.384223918575063e-05, |
| "loss": 0.1898, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.659062103929024, |
| "grad_norm": 0.3037743866443634, |
| "learning_rate": 9.374045801526719e-05, |
| "loss": 0.1911, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.661596958174905, |
| "grad_norm": 0.4390093982219696, |
| "learning_rate": 9.363867684478373e-05, |
| "loss": 0.2369, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.664131812420786, |
| "grad_norm": 0.3383953273296356, |
| "learning_rate": 9.353689567430026e-05, |
| "loss": 0.2519, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.28227975964546204, |
| "learning_rate": 9.34351145038168e-05, |
| "loss": 0.1926, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.6692015209125475, |
| "grad_norm": 0.33451253175735474, |
| "learning_rate": 9.333333333333334e-05, |
| "loss": 0.1864, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.6717363751584284, |
| "grad_norm": 0.4116145372390747, |
| "learning_rate": 9.323155216284988e-05, |
| "loss": 0.2462, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.6742712294043094, |
| "grad_norm": 0.43822887539863586, |
| "learning_rate": 9.312977099236642e-05, |
| "loss": 0.2014, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.67680608365019, |
| "grad_norm": 0.4394984841346741, |
| "learning_rate": 9.302798982188296e-05, |
| "loss": 0.2378, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.679340937896071, |
| "grad_norm": 0.4073251783847809, |
| "learning_rate": 9.292620865139949e-05, |
| "loss": 0.2711, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.681875792141952, |
| "grad_norm": 0.3316657841205597, |
| "learning_rate": 9.282442748091603e-05, |
| "loss": 0.214, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.6844106463878328, |
| "grad_norm": 0.2994216978549957, |
| "learning_rate": 9.272264631043257e-05, |
| "loss": 0.1838, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.6869455006337137, |
| "grad_norm": 0.5388765335083008, |
| "learning_rate": 9.26208651399491e-05, |
| "loss": 0.277, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.6894803548795947, |
| "grad_norm": 0.3714945912361145, |
| "learning_rate": 9.251908396946566e-05, |
| "loss": 0.2428, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.692015209125475, |
| "grad_norm": 0.32202383875846863, |
| "learning_rate": 9.24173027989822e-05, |
| "loss": 0.2063, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.694550063371356, |
| "grad_norm": 0.4116881191730499, |
| "learning_rate": 9.231552162849874e-05, |
| "loss": 0.2661, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.697084917617237, |
| "grad_norm": 0.36626386642456055, |
| "learning_rate": 9.221374045801528e-05, |
| "loss": 0.2897, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.6996197718631176, |
| "grad_norm": 0.33859655261039734, |
| "learning_rate": 9.211195928753181e-05, |
| "loss": 0.1959, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.7021546261089986, |
| "grad_norm": 0.38263705372810364, |
| "learning_rate": 9.201017811704835e-05, |
| "loss": 0.2827, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.7046894803548795, |
| "grad_norm": 0.3557961583137512, |
| "learning_rate": 9.19083969465649e-05, |
| "loss": 0.176, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.7072243346007605, |
| "grad_norm": 0.35334861278533936, |
| "learning_rate": 9.180661577608143e-05, |
| "loss": 0.2183, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.7097591888466415, |
| "grad_norm": 0.4672026038169861, |
| "learning_rate": 9.170483460559797e-05, |
| "loss": 0.2715, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.7122940430925224, |
| "grad_norm": 0.41585099697113037, |
| "learning_rate": 9.160305343511451e-05, |
| "loss": 0.1912, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.714828897338403, |
| "grad_norm": 0.54674232006073, |
| "learning_rate": 9.150127226463104e-05, |
| "loss": 0.2493, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.717363751584284, |
| "grad_norm": 0.30595988035202026, |
| "learning_rate": 9.139949109414758e-05, |
| "loss": 0.1843, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.719898605830165, |
| "grad_norm": 0.3521415889263153, |
| "learning_rate": 9.129770992366413e-05, |
| "loss": 0.2047, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.7224334600760454, |
| "grad_norm": 0.47393590211868286, |
| "learning_rate": 9.119592875318067e-05, |
| "loss": 0.3398, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.7249683143219263, |
| "grad_norm": 0.4672793745994568, |
| "learning_rate": 9.109414758269721e-05, |
| "loss": 0.3569, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.7275031685678073, |
| "grad_norm": 0.41231435537338257, |
| "learning_rate": 9.099236641221375e-05, |
| "loss": 0.2323, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.7300380228136882, |
| "grad_norm": 0.36700156331062317, |
| "learning_rate": 9.089058524173029e-05, |
| "loss": 0.2023, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.732572877059569, |
| "grad_norm": 0.32198184728622437, |
| "learning_rate": 9.078880407124683e-05, |
| "loss": 0.1814, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.73510773130545, |
| "grad_norm": 0.46826303005218506, |
| "learning_rate": 9.068702290076337e-05, |
| "loss": 0.2216, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.7376425855513307, |
| "grad_norm": 0.3026100695133209, |
| "learning_rate": 9.05852417302799e-05, |
| "loss": 0.1826, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.7401774397972116, |
| "grad_norm": 0.2897210717201233, |
| "learning_rate": 9.048346055979644e-05, |
| "loss": 0.1853, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.7427122940430926, |
| "grad_norm": 0.296286940574646, |
| "learning_rate": 9.038167938931298e-05, |
| "loss": 0.1776, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.7452471482889735, |
| "grad_norm": 0.374600887298584, |
| "learning_rate": 9.027989821882952e-05, |
| "loss": 0.2031, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.747782002534854, |
| "grad_norm": 0.5333495140075684, |
| "learning_rate": 9.017811704834606e-05, |
| "loss": 0.2798, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.750316856780735, |
| "grad_norm": 0.43342864513397217, |
| "learning_rate": 9.007633587786259e-05, |
| "loss": 0.2063, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.752851711026616, |
| "grad_norm": 0.5283639430999756, |
| "learning_rate": 8.997455470737914e-05, |
| "loss": 0.25, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.755386565272497, |
| "grad_norm": 0.556190013885498, |
| "learning_rate": 8.987277353689568e-05, |
| "loss": 0.2044, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.757921419518378, |
| "grad_norm": 0.35083258152008057, |
| "learning_rate": 8.977099236641222e-05, |
| "loss": 0.188, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.7604562737642584, |
| "grad_norm": 0.42917102575302124, |
| "learning_rate": 8.966921119592876e-05, |
| "loss": 0.2511, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.7629911280101394, |
| "grad_norm": 0.5665780305862427, |
| "learning_rate": 8.95674300254453e-05, |
| "loss": 0.3307, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.7655259822560203, |
| "grad_norm": 0.40193435549736023, |
| "learning_rate": 8.946564885496184e-05, |
| "loss": 0.2453, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.7680608365019013, |
| "grad_norm": 0.46344733238220215, |
| "learning_rate": 8.936386768447838e-05, |
| "loss": 0.2096, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.770595690747782, |
| "grad_norm": 0.4600921869277954, |
| "learning_rate": 8.926208651399492e-05, |
| "loss": 0.2161, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.7731305449936627, |
| "grad_norm": 0.46053385734558105, |
| "learning_rate": 8.916030534351145e-05, |
| "loss": 0.2369, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.7756653992395437, |
| "grad_norm": 0.45449280738830566, |
| "learning_rate": 8.9058524173028e-05, |
| "loss": 0.2344, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.7782002534854247, |
| "grad_norm": 0.39411383867263794, |
| "learning_rate": 8.895674300254453e-05, |
| "loss": 0.2082, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.7807351077313056, |
| "grad_norm": 0.38967519998550415, |
| "learning_rate": 8.885496183206107e-05, |
| "loss": 0.2264, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.7832699619771866, |
| "grad_norm": 0.3357069194316864, |
| "learning_rate": 8.875318066157761e-05, |
| "loss": 0.1896, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.785804816223067, |
| "grad_norm": 0.4941220283508301, |
| "learning_rate": 8.865139949109415e-05, |
| "loss": 0.3003, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.788339670468948, |
| "grad_norm": 0.3897833526134491, |
| "learning_rate": 8.854961832061069e-05, |
| "loss": 0.1907, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.790874524714829, |
| "grad_norm": 0.4247800409793854, |
| "learning_rate": 8.844783715012723e-05, |
| "loss": 0.1843, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.7934093789607095, |
| "grad_norm": 0.46850237250328064, |
| "learning_rate": 8.834605597964377e-05, |
| "loss": 0.2501, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.7959442332065905, |
| "grad_norm": 0.4753093421459198, |
| "learning_rate": 8.824427480916031e-05, |
| "loss": 0.2277, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.7984790874524714, |
| "grad_norm": 0.3235141932964325, |
| "learning_rate": 8.814249363867685e-05, |
| "loss": 0.1817, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.8010139416983524, |
| "grad_norm": 0.48403674364089966, |
| "learning_rate": 8.804071246819339e-05, |
| "loss": 0.2278, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.8035487959442333, |
| "grad_norm": 0.30417025089263916, |
| "learning_rate": 8.793893129770993e-05, |
| "loss": 0.1867, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.8060836501901143, |
| "grad_norm": 0.30289140343666077, |
| "learning_rate": 8.783715012722647e-05, |
| "loss": 0.1898, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.808618504435995, |
| "grad_norm": 0.47156116366386414, |
| "learning_rate": 8.7735368956743e-05, |
| "loss": 0.2381, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.8111533586818758, |
| "grad_norm": 0.4420924186706543, |
| "learning_rate": 8.763358778625954e-05, |
| "loss": 0.251, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.8136882129277567, |
| "grad_norm": 0.42235851287841797, |
| "learning_rate": 8.75318066157761e-05, |
| "loss": 0.2007, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.8162230671736372, |
| "grad_norm": 0.40069061517715454, |
| "learning_rate": 8.743002544529262e-05, |
| "loss": 0.2052, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.818757921419518, |
| "grad_norm": 0.5213333368301392, |
| "learning_rate": 8.732824427480916e-05, |
| "loss": 0.2236, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.821292775665399, |
| "grad_norm": 0.3919121026992798, |
| "learning_rate": 8.72264631043257e-05, |
| "loss": 0.2338, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.82382762991128, |
| "grad_norm": 0.4295049011707306, |
| "learning_rate": 8.712468193384224e-05, |
| "loss": 0.2713, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.826362484157161, |
| "grad_norm": 0.25834596157073975, |
| "learning_rate": 8.702290076335878e-05, |
| "loss": 0.1701, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.828897338403042, |
| "grad_norm": 0.36217084527015686, |
| "learning_rate": 8.692111959287532e-05, |
| "loss": 0.1963, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.8314321926489225, |
| "grad_norm": 0.39089757204055786, |
| "learning_rate": 8.681933842239186e-05, |
| "loss": 0.186, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.8339670468948035, |
| "grad_norm": 0.45900896191596985, |
| "learning_rate": 8.67175572519084e-05, |
| "loss": 0.22, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.8365019011406845, |
| "grad_norm": 0.2946614623069763, |
| "learning_rate": 8.661577608142494e-05, |
| "loss": 0.1771, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.8390367553865654, |
| "grad_norm": 0.4160090982913971, |
| "learning_rate": 8.651399491094148e-05, |
| "loss": 0.2083, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.841571609632446, |
| "grad_norm": 0.43507587909698486, |
| "learning_rate": 8.641221374045802e-05, |
| "loss": 0.2595, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.844106463878327, |
| "grad_norm": 0.449813574552536, |
| "learning_rate": 8.631043256997457e-05, |
| "loss": 0.2982, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.846641318124208, |
| "grad_norm": 0.33715054392814636, |
| "learning_rate": 8.620865139949111e-05, |
| "loss": 0.1851, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.849176172370089, |
| "grad_norm": 0.4767422676086426, |
| "learning_rate": 8.610687022900765e-05, |
| "loss": 0.2865, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.8517110266159698, |
| "grad_norm": 0.4232870042324066, |
| "learning_rate": 8.600508905852417e-05, |
| "loss": 0.2355, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.8542458808618507, |
| "grad_norm": 0.286565363407135, |
| "learning_rate": 8.590330788804071e-05, |
| "loss": 0.188, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.8567807351077312, |
| "grad_norm": 0.304606169462204, |
| "learning_rate": 8.580152671755725e-05, |
| "loss": 0.2367, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.859315589353612, |
| "grad_norm": 0.4730917811393738, |
| "learning_rate": 8.569974554707379e-05, |
| "loss": 0.2925, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.861850443599493, |
| "grad_norm": 0.348651647567749, |
| "learning_rate": 8.559796437659033e-05, |
| "loss": 0.242, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.8643852978453737, |
| "grad_norm": 0.31156882643699646, |
| "learning_rate": 8.549618320610687e-05, |
| "loss": 0.1865, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.8669201520912546, |
| "grad_norm": 0.4416813254356384, |
| "learning_rate": 8.539440203562341e-05, |
| "loss": 0.311, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.8694550063371356, |
| "grad_norm": 0.2997666895389557, |
| "learning_rate": 8.529262086513995e-05, |
| "loss": 0.1956, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.8719898605830165, |
| "grad_norm": 0.30020904541015625, |
| "learning_rate": 8.519083969465649e-05, |
| "loss": 0.206, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.8745247148288975, |
| "grad_norm": 0.4457029104232788, |
| "learning_rate": 8.508905852417304e-05, |
| "loss": 0.2422, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.8770595690747784, |
| "grad_norm": 0.3519587218761444, |
| "learning_rate": 8.498727735368958e-05, |
| "loss": 0.2277, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.879594423320659, |
| "grad_norm": 0.3482111394405365, |
| "learning_rate": 8.488549618320612e-05, |
| "loss": 0.1981, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.88212927756654, |
| "grad_norm": 0.31978392601013184, |
| "learning_rate": 8.478371501272266e-05, |
| "loss": 0.1849, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.884664131812421, |
| "grad_norm": 0.2380414754152298, |
| "learning_rate": 8.46819338422392e-05, |
| "loss": 0.1619, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.8871989860583014, |
| "grad_norm": 0.25577735900878906, |
| "learning_rate": 8.458015267175572e-05, |
| "loss": 0.1594, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.8897338403041823, |
| "grad_norm": 0.36093661189079285, |
| "learning_rate": 8.447837150127226e-05, |
| "loss": 0.1937, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.8922686945500633, |
| "grad_norm": 0.3542689085006714, |
| "learning_rate": 8.43765903307888e-05, |
| "loss": 0.2219, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.8948035487959443, |
| "grad_norm": 0.3966139853000641, |
| "learning_rate": 8.427480916030534e-05, |
| "loss": 0.2427, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.897338403041825, |
| "grad_norm": 0.3684738278388977, |
| "learning_rate": 8.417302798982188e-05, |
| "loss": 0.2093, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.899873257287706, |
| "grad_norm": 0.430477499961853, |
| "learning_rate": 8.407124681933842e-05, |
| "loss": 0.2266, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.9024081115335867, |
| "grad_norm": 0.32896652817726135, |
| "learning_rate": 8.396946564885496e-05, |
| "loss": 0.2447, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.9049429657794676, |
| "grad_norm": 0.45568832755088806, |
| "learning_rate": 8.38676844783715e-05, |
| "loss": 0.2251, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.9074778200253486, |
| "grad_norm": 0.48290732502937317, |
| "learning_rate": 8.376590330788805e-05, |
| "loss": 0.2471, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.9100126742712296, |
| "grad_norm": 0.40795937180519104, |
| "learning_rate": 8.366412213740459e-05, |
| "loss": 0.2031, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.91254752851711, |
| "grad_norm": 0.362835168838501, |
| "learning_rate": 8.356234096692113e-05, |
| "loss": 0.1991, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.915082382762991, |
| "grad_norm": 0.38601744174957275, |
| "learning_rate": 8.346055979643767e-05, |
| "loss": 0.1821, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.917617237008872, |
| "grad_norm": 0.2641182541847229, |
| "learning_rate": 8.335877862595421e-05, |
| "loss": 0.16, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.920152091254753, |
| "grad_norm": 0.5600478053092957, |
| "learning_rate": 8.325699745547075e-05, |
| "loss": 0.2476, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.922686945500634, |
| "grad_norm": 0.3873019516468048, |
| "learning_rate": 8.315521628498727e-05, |
| "loss": 0.2264, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.9252217997465144, |
| "grad_norm": 0.2946743667125702, |
| "learning_rate": 8.305343511450381e-05, |
| "loss": 0.1776, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.9277566539923954, |
| "grad_norm": 0.3886416554450989, |
| "learning_rate": 8.295165394402035e-05, |
| "loss": 0.2123, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.9302915082382763, |
| "grad_norm": 0.39706671237945557, |
| "learning_rate": 8.284987277353689e-05, |
| "loss": 0.2319, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.9328263624841573, |
| "grad_norm": 0.30693602561950684, |
| "learning_rate": 8.274809160305343e-05, |
| "loss": 0.1939, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.935361216730038, |
| "grad_norm": 0.37277474999427795, |
| "learning_rate": 8.264631043256997e-05, |
| "loss": 0.2194, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.9378960709759188, |
| "grad_norm": 0.442508727312088, |
| "learning_rate": 8.254452926208652e-05, |
| "loss": 0.2142, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.9404309252217997, |
| "grad_norm": 0.275898814201355, |
| "learning_rate": 8.244274809160306e-05, |
| "loss": 0.1791, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.9429657794676807, |
| "grad_norm": 0.4033918082714081, |
| "learning_rate": 8.23409669211196e-05, |
| "loss": 0.295, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.9455006337135616, |
| "grad_norm": 0.46713244915008545, |
| "learning_rate": 8.223918575063614e-05, |
| "loss": 0.2662, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.9480354879594426, |
| "grad_norm": 0.37975406646728516, |
| "learning_rate": 8.213740458015268e-05, |
| "loss": 0.1915, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.950570342205323, |
| "grad_norm": 0.31382545828819275, |
| "learning_rate": 8.203562340966922e-05, |
| "loss": 0.1793, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.953105196451204, |
| "grad_norm": 0.42415499687194824, |
| "learning_rate": 8.193384223918576e-05, |
| "loss": 0.2375, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.955640050697085, |
| "grad_norm": 0.4227803647518158, |
| "learning_rate": 8.18320610687023e-05, |
| "loss": 0.213, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.9581749049429655, |
| "grad_norm": 0.3395853638648987, |
| "learning_rate": 8.173027989821882e-05, |
| "loss": 0.1942, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.9607097591888465, |
| "grad_norm": 0.4627746641635895, |
| "learning_rate": 8.162849872773536e-05, |
| "loss": 0.2266, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.9632446134347274, |
| "grad_norm": 0.36325398087501526, |
| "learning_rate": 8.15267175572519e-05, |
| "loss": 0.2176, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.9657794676806084, |
| "grad_norm": 0.4188767671585083, |
| "learning_rate": 8.142493638676844e-05, |
| "loss": 0.1992, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.9683143219264894, |
| "grad_norm": 0.3149709403514862, |
| "learning_rate": 8.1323155216285e-05, |
| "loss": 0.1829, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.9708491761723703, |
| "grad_norm": 0.26542145013809204, |
| "learning_rate": 8.122137404580153e-05, |
| "loss": 0.1801, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.973384030418251, |
| "grad_norm": 0.28748998045921326, |
| "learning_rate": 8.111959287531807e-05, |
| "loss": 0.1764, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.975918884664132, |
| "grad_norm": 0.3103797733783722, |
| "learning_rate": 8.101781170483461e-05, |
| "loss": 0.2047, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.9784537389100127, |
| "grad_norm": 0.3357256054878235, |
| "learning_rate": 8.091603053435115e-05, |
| "loss": 0.2303, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.9809885931558933, |
| "grad_norm": 0.4399915933609009, |
| "learning_rate": 8.081424936386769e-05, |
| "loss": 0.2423, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.983523447401774, |
| "grad_norm": 0.3486070930957794, |
| "learning_rate": 8.071246819338423e-05, |
| "loss": 0.19, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.986058301647655, |
| "grad_norm": 0.33286648988723755, |
| "learning_rate": 8.061068702290077e-05, |
| "loss": 0.1788, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.988593155893536, |
| "grad_norm": 0.2841028571128845, |
| "learning_rate": 8.050890585241731e-05, |
| "loss": 0.167, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.991128010139417, |
| "grad_norm": 0.44933149218559265, |
| "learning_rate": 8.040712468193385e-05, |
| "loss": 0.3098, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.993662864385298, |
| "grad_norm": 0.2849741280078888, |
| "learning_rate": 8.030534351145038e-05, |
| "loss": 0.1896, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.9961977186311786, |
| "grad_norm": 0.39720216393470764, |
| "learning_rate": 8.020356234096691e-05, |
| "loss": 0.2426, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.9987325728770595, |
| "grad_norm": 0.3838231563568115, |
| "learning_rate": 8.010178117048347e-05, |
| "loss": 0.2194, |
| "step": 1184 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.6684709787368774, |
| "learning_rate": 8e-05, |
| "loss": 0.2783, |
| "step": 1185 |
| }, |
| { |
| "epoch": 3.002534854245881, |
| "grad_norm": 0.44380757212638855, |
| "learning_rate": 7.989821882951655e-05, |
| "loss": 0.2938, |
| "step": 1186 |
| }, |
| { |
| "epoch": 3.005069708491762, |
| "grad_norm": 0.4787996709346771, |
| "learning_rate": 7.979643765903309e-05, |
| "loss": 0.2998, |
| "step": 1187 |
| }, |
| { |
| "epoch": 3.0076045627376424, |
| "grad_norm": 0.36355340480804443, |
| "learning_rate": 7.969465648854962e-05, |
| "loss": 0.1555, |
| "step": 1188 |
| }, |
| { |
| "epoch": 3.0101394169835234, |
| "grad_norm": 0.37890535593032837, |
| "learning_rate": 7.959287531806616e-05, |
| "loss": 0.1743, |
| "step": 1189 |
| }, |
| { |
| "epoch": 3.0126742712294043, |
| "grad_norm": 0.4317542612552643, |
| "learning_rate": 7.94910941475827e-05, |
| "loss": 0.1891, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.0152091254752853, |
| "grad_norm": 0.3477863669395447, |
| "learning_rate": 7.938931297709924e-05, |
| "loss": 0.1576, |
| "step": 1191 |
| }, |
| { |
| "epoch": 3.017743979721166, |
| "grad_norm": 0.414050817489624, |
| "learning_rate": 7.928753180661578e-05, |
| "loss": 0.2014, |
| "step": 1192 |
| }, |
| { |
| "epoch": 3.0202788339670468, |
| "grad_norm": 0.3596842288970947, |
| "learning_rate": 7.918575063613232e-05, |
| "loss": 0.1482, |
| "step": 1193 |
| }, |
| { |
| "epoch": 3.0228136882129277, |
| "grad_norm": 0.49169921875, |
| "learning_rate": 7.908396946564886e-05, |
| "loss": 0.1686, |
| "step": 1194 |
| }, |
| { |
| "epoch": 3.0253485424588087, |
| "grad_norm": 0.44806674122810364, |
| "learning_rate": 7.89821882951654e-05, |
| "loss": 0.2044, |
| "step": 1195 |
| }, |
| { |
| "epoch": 3.0278833967046896, |
| "grad_norm": 0.43101197481155396, |
| "learning_rate": 7.888040712468194e-05, |
| "loss": 0.1911, |
| "step": 1196 |
| }, |
| { |
| "epoch": 3.03041825095057, |
| "grad_norm": 0.5595632195472717, |
| "learning_rate": 7.877862595419848e-05, |
| "loss": 0.1823, |
| "step": 1197 |
| }, |
| { |
| "epoch": 3.032953105196451, |
| "grad_norm": 0.5024780035018921, |
| "learning_rate": 7.867684478371502e-05, |
| "loss": 0.1789, |
| "step": 1198 |
| }, |
| { |
| "epoch": 3.035487959442332, |
| "grad_norm": 0.4227488934993744, |
| "learning_rate": 7.857506361323156e-05, |
| "loss": 0.1539, |
| "step": 1199 |
| }, |
| { |
| "epoch": 3.038022813688213, |
| "grad_norm": 0.43486127257347107, |
| "learning_rate": 7.84732824427481e-05, |
| "loss": 0.1577, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.040557667934094, |
| "grad_norm": 0.47951167821884155, |
| "learning_rate": 7.837150127226464e-05, |
| "loss": 0.1975, |
| "step": 1201 |
| }, |
| { |
| "epoch": 3.0430925221799745, |
| "grad_norm": 0.4223075211048126, |
| "learning_rate": 7.826972010178117e-05, |
| "loss": 0.1719, |
| "step": 1202 |
| }, |
| { |
| "epoch": 3.0456273764258555, |
| "grad_norm": 0.6699900031089783, |
| "learning_rate": 7.816793893129771e-05, |
| "loss": 0.2139, |
| "step": 1203 |
| }, |
| { |
| "epoch": 3.0481622306717364, |
| "grad_norm": 0.6038373708724976, |
| "learning_rate": 7.806615776081425e-05, |
| "loss": 0.2163, |
| "step": 1204 |
| }, |
| { |
| "epoch": 3.0506970849176174, |
| "grad_norm": 0.530208945274353, |
| "learning_rate": 7.796437659033079e-05, |
| "loss": 0.1482, |
| "step": 1205 |
| }, |
| { |
| "epoch": 3.053231939163498, |
| "grad_norm": 0.6380701661109924, |
| "learning_rate": 7.786259541984733e-05, |
| "loss": 0.2191, |
| "step": 1206 |
| }, |
| { |
| "epoch": 3.055766793409379, |
| "grad_norm": 0.6455860137939453, |
| "learning_rate": 7.776081424936387e-05, |
| "loss": 0.1812, |
| "step": 1207 |
| }, |
| { |
| "epoch": 3.05830164765526, |
| "grad_norm": 0.5198556184768677, |
| "learning_rate": 7.765903307888041e-05, |
| "loss": 0.1602, |
| "step": 1208 |
| }, |
| { |
| "epoch": 3.0608365019011408, |
| "grad_norm": 0.4842750132083893, |
| "learning_rate": 7.755725190839695e-05, |
| "loss": 0.1739, |
| "step": 1209 |
| }, |
| { |
| "epoch": 3.0633713561470217, |
| "grad_norm": 0.6345165371894836, |
| "learning_rate": 7.745547073791349e-05, |
| "loss": 0.1841, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.0659062103929022, |
| "grad_norm": 0.551673173904419, |
| "learning_rate": 7.735368956743003e-05, |
| "loss": 0.1755, |
| "step": 1211 |
| }, |
| { |
| "epoch": 3.068441064638783, |
| "grad_norm": 0.5332705974578857, |
| "learning_rate": 7.725190839694657e-05, |
| "loss": 0.2175, |
| "step": 1212 |
| }, |
| { |
| "epoch": 3.070975918884664, |
| "grad_norm": 0.6630911231040955, |
| "learning_rate": 7.715012722646311e-05, |
| "loss": 0.2868, |
| "step": 1213 |
| }, |
| { |
| "epoch": 3.073510773130545, |
| "grad_norm": 0.42508792877197266, |
| "learning_rate": 7.704834605597965e-05, |
| "loss": 0.1811, |
| "step": 1214 |
| }, |
| { |
| "epoch": 3.076045627376426, |
| "grad_norm": 0.504231870174408, |
| "learning_rate": 7.694656488549619e-05, |
| "loss": 0.1765, |
| "step": 1215 |
| }, |
| { |
| "epoch": 3.0785804816223066, |
| "grad_norm": 0.39370813965797424, |
| "learning_rate": 7.684478371501273e-05, |
| "loss": 0.1739, |
| "step": 1216 |
| }, |
| { |
| "epoch": 3.0811153358681875, |
| "grad_norm": 0.5411176085472107, |
| "learning_rate": 7.674300254452926e-05, |
| "loss": 0.2015, |
| "step": 1217 |
| }, |
| { |
| "epoch": 3.0836501901140685, |
| "grad_norm": 0.58034348487854, |
| "learning_rate": 7.66412213740458e-05, |
| "loss": 0.2293, |
| "step": 1218 |
| }, |
| { |
| "epoch": 3.0861850443599494, |
| "grad_norm": 0.48355352878570557, |
| "learning_rate": 7.653944020356234e-05, |
| "loss": 0.1858, |
| "step": 1219 |
| }, |
| { |
| "epoch": 3.08871989860583, |
| "grad_norm": 0.3532313406467438, |
| "learning_rate": 7.643765903307888e-05, |
| "loss": 0.1689, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.091254752851711, |
| "grad_norm": 0.36245197057724, |
| "learning_rate": 7.633587786259542e-05, |
| "loss": 0.1744, |
| "step": 1221 |
| }, |
| { |
| "epoch": 3.093789607097592, |
| "grad_norm": 0.4752829372882843, |
| "learning_rate": 7.623409669211196e-05, |
| "loss": 0.1733, |
| "step": 1222 |
| }, |
| { |
| "epoch": 3.096324461343473, |
| "grad_norm": 0.3701539933681488, |
| "learning_rate": 7.61323155216285e-05, |
| "loss": 0.158, |
| "step": 1223 |
| }, |
| { |
| "epoch": 3.098859315589354, |
| "grad_norm": 0.45548319816589355, |
| "learning_rate": 7.603053435114504e-05, |
| "loss": 0.1822, |
| "step": 1224 |
| }, |
| { |
| "epoch": 3.1013941698352343, |
| "grad_norm": 0.376499205827713, |
| "learning_rate": 7.592875318066158e-05, |
| "loss": 0.1613, |
| "step": 1225 |
| }, |
| { |
| "epoch": 3.1039290240811153, |
| "grad_norm": 0.4430786967277527, |
| "learning_rate": 7.582697201017812e-05, |
| "loss": 0.1691, |
| "step": 1226 |
| }, |
| { |
| "epoch": 3.106463878326996, |
| "grad_norm": 0.44311538338661194, |
| "learning_rate": 7.572519083969466e-05, |
| "loss": 0.1853, |
| "step": 1227 |
| }, |
| { |
| "epoch": 3.108998732572877, |
| "grad_norm": 0.5815149545669556, |
| "learning_rate": 7.56234096692112e-05, |
| "loss": 0.2039, |
| "step": 1228 |
| }, |
| { |
| "epoch": 3.111533586818758, |
| "grad_norm": 0.5101373195648193, |
| "learning_rate": 7.552162849872774e-05, |
| "loss": 0.2022, |
| "step": 1229 |
| }, |
| { |
| "epoch": 3.1140684410646386, |
| "grad_norm": 0.6038093566894531, |
| "learning_rate": 7.541984732824428e-05, |
| "loss": 0.1859, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.1166032953105196, |
| "grad_norm": 0.5133914351463318, |
| "learning_rate": 7.531806615776081e-05, |
| "loss": 0.1626, |
| "step": 1231 |
| }, |
| { |
| "epoch": 3.1191381495564006, |
| "grad_norm": 0.40495821833610535, |
| "learning_rate": 7.521628498727735e-05, |
| "loss": 0.1739, |
| "step": 1232 |
| }, |
| { |
| "epoch": 3.1216730038022815, |
| "grad_norm": 0.6585063934326172, |
| "learning_rate": 7.511450381679391e-05, |
| "loss": 0.2402, |
| "step": 1233 |
| }, |
| { |
| "epoch": 3.124207858048162, |
| "grad_norm": 0.45598068833351135, |
| "learning_rate": 7.501272264631045e-05, |
| "loss": 0.1632, |
| "step": 1234 |
| }, |
| { |
| "epoch": 3.126742712294043, |
| "grad_norm": 0.42114904522895813, |
| "learning_rate": 7.491094147582699e-05, |
| "loss": 0.1638, |
| "step": 1235 |
| }, |
| { |
| "epoch": 3.129277566539924, |
| "grad_norm": 0.443198561668396, |
| "learning_rate": 7.480916030534351e-05, |
| "loss": 0.2148, |
| "step": 1236 |
| }, |
| { |
| "epoch": 3.131812420785805, |
| "grad_norm": 0.5573143362998962, |
| "learning_rate": 7.470737913486005e-05, |
| "loss": 0.2219, |
| "step": 1237 |
| }, |
| { |
| "epoch": 3.134347275031686, |
| "grad_norm": 0.6023311614990234, |
| "learning_rate": 7.460559796437659e-05, |
| "loss": 0.1987, |
| "step": 1238 |
| }, |
| { |
| "epoch": 3.1368821292775664, |
| "grad_norm": 0.5282934904098511, |
| "learning_rate": 7.450381679389313e-05, |
| "loss": 0.2377, |
| "step": 1239 |
| }, |
| { |
| "epoch": 3.1394169835234473, |
| "grad_norm": 0.49694669246673584, |
| "learning_rate": 7.440203562340967e-05, |
| "loss": 0.1804, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.1419518377693283, |
| "grad_norm": 0.43045276403427124, |
| "learning_rate": 7.430025445292621e-05, |
| "loss": 0.1635, |
| "step": 1241 |
| }, |
| { |
| "epoch": 3.1444866920152093, |
| "grad_norm": 0.4798453152179718, |
| "learning_rate": 7.419847328244275e-05, |
| "loss": 0.1696, |
| "step": 1242 |
| }, |
| { |
| "epoch": 3.14702154626109, |
| "grad_norm": 0.5173293352127075, |
| "learning_rate": 7.409669211195929e-05, |
| "loss": 0.1802, |
| "step": 1243 |
| }, |
| { |
| "epoch": 3.1495564005069707, |
| "grad_norm": 0.5398945808410645, |
| "learning_rate": 7.399491094147583e-05, |
| "loss": 0.1949, |
| "step": 1244 |
| }, |
| { |
| "epoch": 3.1520912547528517, |
| "grad_norm": 0.5297830700874329, |
| "learning_rate": 7.389312977099238e-05, |
| "loss": 0.1987, |
| "step": 1245 |
| }, |
| { |
| "epoch": 3.1546261089987326, |
| "grad_norm": 0.5320866703987122, |
| "learning_rate": 7.379134860050892e-05, |
| "loss": 0.1715, |
| "step": 1246 |
| }, |
| { |
| "epoch": 3.1571609632446136, |
| "grad_norm": 0.6132882833480835, |
| "learning_rate": 7.368956743002546e-05, |
| "loss": 0.3204, |
| "step": 1247 |
| }, |
| { |
| "epoch": 3.159695817490494, |
| "grad_norm": 0.4120640158653259, |
| "learning_rate": 7.3587786259542e-05, |
| "loss": 0.157, |
| "step": 1248 |
| }, |
| { |
| "epoch": 3.162230671736375, |
| "grad_norm": 0.6765384674072266, |
| "learning_rate": 7.348600508905854e-05, |
| "loss": 0.2186, |
| "step": 1249 |
| }, |
| { |
| "epoch": 3.164765525982256, |
| "grad_norm": 0.6318830847740173, |
| "learning_rate": 7.338422391857506e-05, |
| "loss": 0.2189, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.167300380228137, |
| "grad_norm": 0.508305013179779, |
| "learning_rate": 7.32824427480916e-05, |
| "loss": 0.1962, |
| "step": 1251 |
| }, |
| { |
| "epoch": 3.169835234474018, |
| "grad_norm": 0.603520393371582, |
| "learning_rate": 7.318066157760814e-05, |
| "loss": 0.2615, |
| "step": 1252 |
| }, |
| { |
| "epoch": 3.1723700887198985, |
| "grad_norm": 0.7639157176017761, |
| "learning_rate": 7.307888040712468e-05, |
| "loss": 0.2982, |
| "step": 1253 |
| }, |
| { |
| "epoch": 3.1749049429657794, |
| "grad_norm": 0.5995659232139587, |
| "learning_rate": 7.297709923664122e-05, |
| "loss": 0.2206, |
| "step": 1254 |
| }, |
| { |
| "epoch": 3.1774397972116604, |
| "grad_norm": 0.6512479186058044, |
| "learning_rate": 7.287531806615776e-05, |
| "loss": 0.2065, |
| "step": 1255 |
| }, |
| { |
| "epoch": 3.1799746514575413, |
| "grad_norm": 0.4128544330596924, |
| "learning_rate": 7.27735368956743e-05, |
| "loss": 0.1589, |
| "step": 1256 |
| }, |
| { |
| "epoch": 3.182509505703422, |
| "grad_norm": 0.5341802835464478, |
| "learning_rate": 7.267175572519084e-05, |
| "loss": 0.1812, |
| "step": 1257 |
| }, |
| { |
| "epoch": 3.185044359949303, |
| "grad_norm": 0.38032597303390503, |
| "learning_rate": 7.256997455470739e-05, |
| "loss": 0.1773, |
| "step": 1258 |
| }, |
| { |
| "epoch": 3.1875792141951838, |
| "grad_norm": 0.5732728838920593, |
| "learning_rate": 7.246819338422393e-05, |
| "loss": 0.2047, |
| "step": 1259 |
| }, |
| { |
| "epoch": 3.1901140684410647, |
| "grad_norm": 0.47396236658096313, |
| "learning_rate": 7.236641221374047e-05, |
| "loss": 0.2095, |
| "step": 1260 |
| }, |
| { |
| "epoch": 3.1926489226869457, |
| "grad_norm": 0.4764629304409027, |
| "learning_rate": 7.226463104325701e-05, |
| "loss": 0.1802, |
| "step": 1261 |
| }, |
| { |
| "epoch": 3.195183776932826, |
| "grad_norm": 0.5802401304244995, |
| "learning_rate": 7.216284987277355e-05, |
| "loss": 0.1821, |
| "step": 1262 |
| }, |
| { |
| "epoch": 3.197718631178707, |
| "grad_norm": 0.47988972067832947, |
| "learning_rate": 7.206106870229009e-05, |
| "loss": 0.163, |
| "step": 1263 |
| }, |
| { |
| "epoch": 3.200253485424588, |
| "grad_norm": 0.48500359058380127, |
| "learning_rate": 7.195928753180661e-05, |
| "loss": 0.1739, |
| "step": 1264 |
| }, |
| { |
| "epoch": 3.202788339670469, |
| "grad_norm": 0.7479031682014465, |
| "learning_rate": 7.185750636132315e-05, |
| "loss": 0.2646, |
| "step": 1265 |
| }, |
| { |
| "epoch": 3.20532319391635, |
| "grad_norm": 0.48695701360702515, |
| "learning_rate": 7.175572519083969e-05, |
| "loss": 0.1822, |
| "step": 1266 |
| }, |
| { |
| "epoch": 3.2078580481622305, |
| "grad_norm": 0.712354838848114, |
| "learning_rate": 7.165394402035623e-05, |
| "loss": 0.1827, |
| "step": 1267 |
| }, |
| { |
| "epoch": 3.2103929024081115, |
| "grad_norm": 0.4304606020450592, |
| "learning_rate": 7.155216284987277e-05, |
| "loss": 0.1759, |
| "step": 1268 |
| }, |
| { |
| "epoch": 3.2129277566539924, |
| "grad_norm": 0.44741392135620117, |
| "learning_rate": 7.145038167938931e-05, |
| "loss": 0.1979, |
| "step": 1269 |
| }, |
| { |
| "epoch": 3.2154626108998734, |
| "grad_norm": 0.3691045045852661, |
| "learning_rate": 7.134860050890586e-05, |
| "loss": 0.1575, |
| "step": 1270 |
| }, |
| { |
| "epoch": 3.2179974651457544, |
| "grad_norm": 0.4908023476600647, |
| "learning_rate": 7.12468193384224e-05, |
| "loss": 0.1854, |
| "step": 1271 |
| }, |
| { |
| "epoch": 3.220532319391635, |
| "grad_norm": 0.3953510820865631, |
| "learning_rate": 7.114503816793894e-05, |
| "loss": 0.1821, |
| "step": 1272 |
| }, |
| { |
| "epoch": 3.223067173637516, |
| "grad_norm": 0.35227248072624207, |
| "learning_rate": 7.104325699745548e-05, |
| "loss": 0.173, |
| "step": 1273 |
| }, |
| { |
| "epoch": 3.225602027883397, |
| "grad_norm": 0.41285187005996704, |
| "learning_rate": 7.094147582697202e-05, |
| "loss": 0.1708, |
| "step": 1274 |
| }, |
| { |
| "epoch": 3.2281368821292777, |
| "grad_norm": 0.5076828002929688, |
| "learning_rate": 7.083969465648856e-05, |
| "loss": 0.2128, |
| "step": 1275 |
| }, |
| { |
| "epoch": 3.2306717363751583, |
| "grad_norm": 0.5385151505470276, |
| "learning_rate": 7.07379134860051e-05, |
| "loss": 0.2181, |
| "step": 1276 |
| }, |
| { |
| "epoch": 3.233206590621039, |
| "grad_norm": 0.4620850086212158, |
| "learning_rate": 7.063613231552164e-05, |
| "loss": 0.212, |
| "step": 1277 |
| }, |
| { |
| "epoch": 3.23574144486692, |
| "grad_norm": 0.6768701672554016, |
| "learning_rate": 7.053435114503816e-05, |
| "loss": 0.2704, |
| "step": 1278 |
| }, |
| { |
| "epoch": 3.238276299112801, |
| "grad_norm": 0.43216967582702637, |
| "learning_rate": 7.04325699745547e-05, |
| "loss": 0.1633, |
| "step": 1279 |
| }, |
| { |
| "epoch": 3.240811153358682, |
| "grad_norm": 0.3756103813648224, |
| "learning_rate": 7.033078880407124e-05, |
| "loss": 0.1767, |
| "step": 1280 |
| }, |
| { |
| "epoch": 3.2433460076045626, |
| "grad_norm": 0.612819254398346, |
| "learning_rate": 7.022900763358778e-05, |
| "loss": 0.2563, |
| "step": 1281 |
| }, |
| { |
| "epoch": 3.2458808618504436, |
| "grad_norm": 0.5477813482284546, |
| "learning_rate": 7.012722646310433e-05, |
| "loss": 0.2053, |
| "step": 1282 |
| }, |
| { |
| "epoch": 3.2484157160963245, |
| "grad_norm": 0.3412390351295471, |
| "learning_rate": 7.002544529262087e-05, |
| "loss": 0.1506, |
| "step": 1283 |
| }, |
| { |
| "epoch": 3.2509505703422055, |
| "grad_norm": 0.34337860345840454, |
| "learning_rate": 6.992366412213741e-05, |
| "loss": 0.1612, |
| "step": 1284 |
| }, |
| { |
| "epoch": 3.253485424588086, |
| "grad_norm": 0.37943509221076965, |
| "learning_rate": 6.982188295165395e-05, |
| "loss": 0.168, |
| "step": 1285 |
| }, |
| { |
| "epoch": 3.256020278833967, |
| "grad_norm": 0.6030418872833252, |
| "learning_rate": 6.972010178117049e-05, |
| "loss": 0.2146, |
| "step": 1286 |
| }, |
| { |
| "epoch": 3.258555133079848, |
| "grad_norm": 0.34367507696151733, |
| "learning_rate": 6.961832061068703e-05, |
| "loss": 0.1726, |
| "step": 1287 |
| }, |
| { |
| "epoch": 3.261089987325729, |
| "grad_norm": 0.3952295780181885, |
| "learning_rate": 6.951653944020357e-05, |
| "loss": 0.1754, |
| "step": 1288 |
| }, |
| { |
| "epoch": 3.26362484157161, |
| "grad_norm": 0.5151681900024414, |
| "learning_rate": 6.941475826972011e-05, |
| "loss": 0.1849, |
| "step": 1289 |
| }, |
| { |
| "epoch": 3.2661596958174903, |
| "grad_norm": 0.496988445520401, |
| "learning_rate": 6.931297709923665e-05, |
| "loss": 0.1938, |
| "step": 1290 |
| }, |
| { |
| "epoch": 3.2686945500633713, |
| "grad_norm": 0.45343711972236633, |
| "learning_rate": 6.921119592875319e-05, |
| "loss": 0.1845, |
| "step": 1291 |
| }, |
| { |
| "epoch": 3.2712294043092522, |
| "grad_norm": 0.5323635935783386, |
| "learning_rate": 6.910941475826971e-05, |
| "loss": 0.177, |
| "step": 1292 |
| }, |
| { |
| "epoch": 3.273764258555133, |
| "grad_norm": 0.39680036902427673, |
| "learning_rate": 6.900763358778625e-05, |
| "loss": 0.1843, |
| "step": 1293 |
| }, |
| { |
| "epoch": 3.2762991128010137, |
| "grad_norm": 0.4767110049724579, |
| "learning_rate": 6.89058524173028e-05, |
| "loss": 0.2103, |
| "step": 1294 |
| }, |
| { |
| "epoch": 3.2788339670468947, |
| "grad_norm": 0.5565052032470703, |
| "learning_rate": 6.880407124681934e-05, |
| "loss": 0.2185, |
| "step": 1295 |
| }, |
| { |
| "epoch": 3.2813688212927756, |
| "grad_norm": 0.5472534894943237, |
| "learning_rate": 6.870229007633588e-05, |
| "loss": 0.2237, |
| "step": 1296 |
| }, |
| { |
| "epoch": 3.2839036755386566, |
| "grad_norm": 0.632560133934021, |
| "learning_rate": 6.860050890585242e-05, |
| "loss": 0.2213, |
| "step": 1297 |
| }, |
| { |
| "epoch": 3.2864385297845375, |
| "grad_norm": 0.5626386404037476, |
| "learning_rate": 6.849872773536896e-05, |
| "loss": 0.2324, |
| "step": 1298 |
| }, |
| { |
| "epoch": 3.288973384030418, |
| "grad_norm": 0.5527671575546265, |
| "learning_rate": 6.83969465648855e-05, |
| "loss": 0.227, |
| "step": 1299 |
| }, |
| { |
| "epoch": 3.291508238276299, |
| "grad_norm": 0.6093178391456604, |
| "learning_rate": 6.829516539440204e-05, |
| "loss": 0.2368, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.29404309252218, |
| "grad_norm": 0.3845243453979492, |
| "learning_rate": 6.819338422391858e-05, |
| "loss": 0.1804, |
| "step": 1301 |
| }, |
| { |
| "epoch": 3.296577946768061, |
| "grad_norm": 0.6384890079498291, |
| "learning_rate": 6.809160305343512e-05, |
| "loss": 0.2598, |
| "step": 1302 |
| }, |
| { |
| "epoch": 3.299112801013942, |
| "grad_norm": 0.5135822892189026, |
| "learning_rate": 6.798982188295166e-05, |
| "loss": 0.2142, |
| "step": 1303 |
| }, |
| { |
| "epoch": 3.3016476552598224, |
| "grad_norm": 0.4996071457862854, |
| "learning_rate": 6.78880407124682e-05, |
| "loss": 0.2107, |
| "step": 1304 |
| }, |
| { |
| "epoch": 3.3041825095057034, |
| "grad_norm": 0.31445005536079407, |
| "learning_rate": 6.778625954198474e-05, |
| "loss": 0.1764, |
| "step": 1305 |
| }, |
| { |
| "epoch": 3.3067173637515843, |
| "grad_norm": 0.544301450252533, |
| "learning_rate": 6.768447837150128e-05, |
| "loss": 0.2856, |
| "step": 1306 |
| }, |
| { |
| "epoch": 3.3092522179974653, |
| "grad_norm": 0.5029551982879639, |
| "learning_rate": 6.758269720101782e-05, |
| "loss": 0.2374, |
| "step": 1307 |
| }, |
| { |
| "epoch": 3.3117870722433462, |
| "grad_norm": 0.3769523799419403, |
| "learning_rate": 6.748091603053436e-05, |
| "loss": 0.1853, |
| "step": 1308 |
| }, |
| { |
| "epoch": 3.3143219264892267, |
| "grad_norm": 0.3540287911891937, |
| "learning_rate": 6.73791348600509e-05, |
| "loss": 0.193, |
| "step": 1309 |
| }, |
| { |
| "epoch": 3.3168567807351077, |
| "grad_norm": 0.42674198746681213, |
| "learning_rate": 6.727735368956743e-05, |
| "loss": 0.1953, |
| "step": 1310 |
| }, |
| { |
| "epoch": 3.3193916349809887, |
| "grad_norm": 0.5152068138122559, |
| "learning_rate": 6.717557251908397e-05, |
| "loss": 0.1871, |
| "step": 1311 |
| }, |
| { |
| "epoch": 3.3219264892268696, |
| "grad_norm": 0.48964372277259827, |
| "learning_rate": 6.707379134860051e-05, |
| "loss": 0.2142, |
| "step": 1312 |
| }, |
| { |
| "epoch": 3.32446134347275, |
| "grad_norm": 0.5390191674232483, |
| "learning_rate": 6.697201017811705e-05, |
| "loss": 0.1764, |
| "step": 1313 |
| }, |
| { |
| "epoch": 3.326996197718631, |
| "grad_norm": 0.3849482238292694, |
| "learning_rate": 6.687022900763359e-05, |
| "loss": 0.1681, |
| "step": 1314 |
| }, |
| { |
| "epoch": 3.329531051964512, |
| "grad_norm": 0.36165010929107666, |
| "learning_rate": 6.676844783715013e-05, |
| "loss": 0.148, |
| "step": 1315 |
| }, |
| { |
| "epoch": 3.332065906210393, |
| "grad_norm": 0.47739362716674805, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.1748, |
| "step": 1316 |
| }, |
| { |
| "epoch": 3.334600760456274, |
| "grad_norm": 0.41228094696998596, |
| "learning_rate": 6.656488549618321e-05, |
| "loss": 0.2006, |
| "step": 1317 |
| }, |
| { |
| "epoch": 3.3371356147021545, |
| "grad_norm": 0.43494951725006104, |
| "learning_rate": 6.646310432569975e-05, |
| "loss": 0.1821, |
| "step": 1318 |
| }, |
| { |
| "epoch": 3.3396704689480354, |
| "grad_norm": 0.5502039194107056, |
| "learning_rate": 6.636132315521629e-05, |
| "loss": 0.208, |
| "step": 1319 |
| }, |
| { |
| "epoch": 3.3422053231939164, |
| "grad_norm": 0.5151738524436951, |
| "learning_rate": 6.625954198473283e-05, |
| "loss": 0.2304, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.3447401774397973, |
| "grad_norm": 0.3866114914417267, |
| "learning_rate": 6.615776081424937e-05, |
| "loss": 0.1738, |
| "step": 1321 |
| }, |
| { |
| "epoch": 3.347275031685678, |
| "grad_norm": 0.5542702674865723, |
| "learning_rate": 6.60559796437659e-05, |
| "loss": 0.1885, |
| "step": 1322 |
| }, |
| { |
| "epoch": 3.349809885931559, |
| "grad_norm": 0.5107680559158325, |
| "learning_rate": 6.595419847328245e-05, |
| "loss": 0.1856, |
| "step": 1323 |
| }, |
| { |
| "epoch": 3.3523447401774398, |
| "grad_norm": 0.8266568183898926, |
| "learning_rate": 6.585241730279898e-05, |
| "loss": 0.2826, |
| "step": 1324 |
| }, |
| { |
| "epoch": 3.3548795944233207, |
| "grad_norm": 0.45209088921546936, |
| "learning_rate": 6.575063613231552e-05, |
| "loss": 0.1519, |
| "step": 1325 |
| }, |
| { |
| "epoch": 3.3574144486692017, |
| "grad_norm": 0.4708397388458252, |
| "learning_rate": 6.564885496183206e-05, |
| "loss": 0.1834, |
| "step": 1326 |
| }, |
| { |
| "epoch": 3.359949302915082, |
| "grad_norm": 0.39958736300468445, |
| "learning_rate": 6.55470737913486e-05, |
| "loss": 0.1444, |
| "step": 1327 |
| }, |
| { |
| "epoch": 3.362484157160963, |
| "grad_norm": 0.5764468312263489, |
| "learning_rate": 6.544529262086514e-05, |
| "loss": 0.2024, |
| "step": 1328 |
| }, |
| { |
| "epoch": 3.365019011406844, |
| "grad_norm": 0.4573269188404083, |
| "learning_rate": 6.534351145038168e-05, |
| "loss": 0.1857, |
| "step": 1329 |
| }, |
| { |
| "epoch": 3.367553865652725, |
| "grad_norm": 0.598423957824707, |
| "learning_rate": 6.524173027989822e-05, |
| "loss": 0.2206, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.3700887198986056, |
| "grad_norm": 0.5643012523651123, |
| "learning_rate": 6.513994910941476e-05, |
| "loss": 0.157, |
| "step": 1331 |
| }, |
| { |
| "epoch": 3.3726235741444865, |
| "grad_norm": 0.6568096876144409, |
| "learning_rate": 6.50381679389313e-05, |
| "loss": 0.2588, |
| "step": 1332 |
| }, |
| { |
| "epoch": 3.3751584283903675, |
| "grad_norm": 0.6552339792251587, |
| "learning_rate": 6.493638676844784e-05, |
| "loss": 0.2032, |
| "step": 1333 |
| }, |
| { |
| "epoch": 3.3776932826362485, |
| "grad_norm": 0.5274556279182434, |
| "learning_rate": 6.483460559796438e-05, |
| "loss": 0.1877, |
| "step": 1334 |
| }, |
| { |
| "epoch": 3.3802281368821294, |
| "grad_norm": 0.43894869089126587, |
| "learning_rate": 6.473282442748092e-05, |
| "loss": 0.155, |
| "step": 1335 |
| }, |
| { |
| "epoch": 3.3827629911280104, |
| "grad_norm": 0.6116171479225159, |
| "learning_rate": 6.463104325699746e-05, |
| "loss": 0.2978, |
| "step": 1336 |
| }, |
| { |
| "epoch": 3.385297845373891, |
| "grad_norm": 0.4588301479816437, |
| "learning_rate": 6.4529262086514e-05, |
| "loss": 0.1765, |
| "step": 1337 |
| }, |
| { |
| "epoch": 3.387832699619772, |
| "grad_norm": 0.4299813508987427, |
| "learning_rate": 6.442748091603053e-05, |
| "loss": 0.1725, |
| "step": 1338 |
| }, |
| { |
| "epoch": 3.390367553865653, |
| "grad_norm": 0.4996776580810547, |
| "learning_rate": 6.432569974554707e-05, |
| "loss": 0.1815, |
| "step": 1339 |
| }, |
| { |
| "epoch": 3.3929024081115338, |
| "grad_norm": 0.42195963859558105, |
| "learning_rate": 6.422391857506361e-05, |
| "loss": 0.1544, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.3954372623574143, |
| "grad_norm": 0.3918668031692505, |
| "learning_rate": 6.412213740458015e-05, |
| "loss": 0.1677, |
| "step": 1341 |
| }, |
| { |
| "epoch": 3.3979721166032952, |
| "grad_norm": 0.5436106324195862, |
| "learning_rate": 6.402035623409669e-05, |
| "loss": 0.2624, |
| "step": 1342 |
| }, |
| { |
| "epoch": 3.400506970849176, |
| "grad_norm": 0.5056617856025696, |
| "learning_rate": 6.391857506361324e-05, |
| "loss": 0.1735, |
| "step": 1343 |
| }, |
| { |
| "epoch": 3.403041825095057, |
| "grad_norm": 0.497035950422287, |
| "learning_rate": 6.381679389312978e-05, |
| "loss": 0.192, |
| "step": 1344 |
| }, |
| { |
| "epoch": 3.405576679340938, |
| "grad_norm": 0.4464019238948822, |
| "learning_rate": 6.371501272264632e-05, |
| "loss": 0.165, |
| "step": 1345 |
| }, |
| { |
| "epoch": 3.4081115335868186, |
| "grad_norm": 0.3940610885620117, |
| "learning_rate": 6.361323155216285e-05, |
| "loss": 0.1698, |
| "step": 1346 |
| }, |
| { |
| "epoch": 3.4106463878326996, |
| "grad_norm": 0.34197869896888733, |
| "learning_rate": 6.351145038167939e-05, |
| "loss": 0.1676, |
| "step": 1347 |
| }, |
| { |
| "epoch": 3.4131812420785805, |
| "grad_norm": 0.5477511286735535, |
| "learning_rate": 6.340966921119593e-05, |
| "loss": 0.2913, |
| "step": 1348 |
| }, |
| { |
| "epoch": 3.4157160963244615, |
| "grad_norm": 0.47384947538375854, |
| "learning_rate": 6.330788804071247e-05, |
| "loss": 0.1807, |
| "step": 1349 |
| }, |
| { |
| "epoch": 3.418250950570342, |
| "grad_norm": 0.4805784821510315, |
| "learning_rate": 6.3206106870229e-05, |
| "loss": 0.1844, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.420785804816223, |
| "grad_norm": 0.4914521276950836, |
| "learning_rate": 6.310432569974555e-05, |
| "loss": 0.21, |
| "step": 1351 |
| }, |
| { |
| "epoch": 3.423320659062104, |
| "grad_norm": 0.42754796147346497, |
| "learning_rate": 6.300254452926209e-05, |
| "loss": 0.2003, |
| "step": 1352 |
| }, |
| { |
| "epoch": 3.425855513307985, |
| "grad_norm": 0.5367889404296875, |
| "learning_rate": 6.290076335877862e-05, |
| "loss": 0.2126, |
| "step": 1353 |
| }, |
| { |
| "epoch": 3.428390367553866, |
| "grad_norm": 0.5015621781349182, |
| "learning_rate": 6.279898218829516e-05, |
| "loss": 0.176, |
| "step": 1354 |
| }, |
| { |
| "epoch": 3.4309252217997463, |
| "grad_norm": 0.4498123228549957, |
| "learning_rate": 6.269720101781172e-05, |
| "loss": 0.1963, |
| "step": 1355 |
| }, |
| { |
| "epoch": 3.4334600760456273, |
| "grad_norm": 0.4548507034778595, |
| "learning_rate": 6.259541984732826e-05, |
| "loss": 0.185, |
| "step": 1356 |
| }, |
| { |
| "epoch": 3.4359949302915083, |
| "grad_norm": 0.5188789963722229, |
| "learning_rate": 6.24936386768448e-05, |
| "loss": 0.2152, |
| "step": 1357 |
| }, |
| { |
| "epoch": 3.4385297845373892, |
| "grad_norm": 0.5717540979385376, |
| "learning_rate": 6.239185750636133e-05, |
| "loss": 0.2541, |
| "step": 1358 |
| }, |
| { |
| "epoch": 3.4410646387832697, |
| "grad_norm": 0.43195176124572754, |
| "learning_rate": 6.229007633587787e-05, |
| "loss": 0.1841, |
| "step": 1359 |
| }, |
| { |
| "epoch": 3.4435994930291507, |
| "grad_norm": 0.8148223161697388, |
| "learning_rate": 6.21882951653944e-05, |
| "loss": 0.1903, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.4461343472750317, |
| "grad_norm": 0.39928868412971497, |
| "learning_rate": 6.208651399491094e-05, |
| "loss": 0.1551, |
| "step": 1361 |
| }, |
| { |
| "epoch": 3.4486692015209126, |
| "grad_norm": 0.8072621822357178, |
| "learning_rate": 6.198473282442748e-05, |
| "loss": 0.1973, |
| "step": 1362 |
| }, |
| { |
| "epoch": 3.4512040557667936, |
| "grad_norm": 0.6420927047729492, |
| "learning_rate": 6.188295165394402e-05, |
| "loss": 0.2304, |
| "step": 1363 |
| }, |
| { |
| "epoch": 3.453738910012674, |
| "grad_norm": 0.4896611273288727, |
| "learning_rate": 6.178117048346056e-05, |
| "loss": 0.1968, |
| "step": 1364 |
| }, |
| { |
| "epoch": 3.456273764258555, |
| "grad_norm": 0.5518379211425781, |
| "learning_rate": 6.16793893129771e-05, |
| "loss": 0.2136, |
| "step": 1365 |
| }, |
| { |
| "epoch": 3.458808618504436, |
| "grad_norm": 0.35489922761917114, |
| "learning_rate": 6.157760814249364e-05, |
| "loss": 0.1735, |
| "step": 1366 |
| }, |
| { |
| "epoch": 3.461343472750317, |
| "grad_norm": 0.3575512766838074, |
| "learning_rate": 6.147582697201019e-05, |
| "loss": 0.1704, |
| "step": 1367 |
| }, |
| { |
| "epoch": 3.463878326996198, |
| "grad_norm": 0.46745261549949646, |
| "learning_rate": 6.137404580152673e-05, |
| "loss": 0.1702, |
| "step": 1368 |
| }, |
| { |
| "epoch": 3.4664131812420784, |
| "grad_norm": 0.39378833770751953, |
| "learning_rate": 6.127226463104327e-05, |
| "loss": 0.1512, |
| "step": 1369 |
| }, |
| { |
| "epoch": 3.4689480354879594, |
| "grad_norm": 0.5645838975906372, |
| "learning_rate": 6.11704834605598e-05, |
| "loss": 0.2053, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.4714828897338403, |
| "grad_norm": 0.3613208830356598, |
| "learning_rate": 6.106870229007635e-05, |
| "loss": 0.1749, |
| "step": 1371 |
| }, |
| { |
| "epoch": 3.4740177439797213, |
| "grad_norm": 0.573124349117279, |
| "learning_rate": 6.096692111959288e-05, |
| "loss": 0.2229, |
| "step": 1372 |
| }, |
| { |
| "epoch": 3.4765525982256023, |
| "grad_norm": 0.43110212683677673, |
| "learning_rate": 6.086513994910942e-05, |
| "loss": 0.2082, |
| "step": 1373 |
| }, |
| { |
| "epoch": 3.4790874524714828, |
| "grad_norm": 0.6268284320831299, |
| "learning_rate": 6.076335877862596e-05, |
| "loss": 0.2826, |
| "step": 1374 |
| }, |
| { |
| "epoch": 3.4816223067173637, |
| "grad_norm": 0.5699491500854492, |
| "learning_rate": 6.0661577608142496e-05, |
| "loss": 0.2373, |
| "step": 1375 |
| }, |
| { |
| "epoch": 3.4841571609632447, |
| "grad_norm": 0.451548308134079, |
| "learning_rate": 6.0559796437659035e-05, |
| "loss": 0.1782, |
| "step": 1376 |
| }, |
| { |
| "epoch": 3.4866920152091256, |
| "grad_norm": 0.44955211877822876, |
| "learning_rate": 6.0458015267175575e-05, |
| "loss": 0.1896, |
| "step": 1377 |
| }, |
| { |
| "epoch": 3.489226869455006, |
| "grad_norm": 0.44076019525527954, |
| "learning_rate": 6.035623409669211e-05, |
| "loss": 0.1854, |
| "step": 1378 |
| }, |
| { |
| "epoch": 3.491761723700887, |
| "grad_norm": 0.8012815117835999, |
| "learning_rate": 6.0254452926208646e-05, |
| "loss": 0.2067, |
| "step": 1379 |
| }, |
| { |
| "epoch": 3.494296577946768, |
| "grad_norm": 0.5558981895446777, |
| "learning_rate": 6.01526717557252e-05, |
| "loss": 0.1913, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.496831432192649, |
| "grad_norm": 0.42501258850097656, |
| "learning_rate": 6.005089058524174e-05, |
| "loss": 0.1781, |
| "step": 1381 |
| }, |
| { |
| "epoch": 3.49936628643853, |
| "grad_norm": 0.3618164658546448, |
| "learning_rate": 5.994910941475828e-05, |
| "loss": 0.1472, |
| "step": 1382 |
| }, |
| { |
| "epoch": 3.5019011406844105, |
| "grad_norm": 0.5384409427642822, |
| "learning_rate": 5.984732824427482e-05, |
| "loss": 0.2063, |
| "step": 1383 |
| }, |
| { |
| "epoch": 3.5044359949302915, |
| "grad_norm": 0.5103084444999695, |
| "learning_rate": 5.974554707379135e-05, |
| "loss": 0.1737, |
| "step": 1384 |
| }, |
| { |
| "epoch": 3.5069708491761724, |
| "grad_norm": 0.37908968329429626, |
| "learning_rate": 5.964376590330789e-05, |
| "loss": 0.1599, |
| "step": 1385 |
| }, |
| { |
| "epoch": 3.5095057034220534, |
| "grad_norm": 0.5049726963043213, |
| "learning_rate": 5.954198473282443e-05, |
| "loss": 0.1891, |
| "step": 1386 |
| }, |
| { |
| "epoch": 3.512040557667934, |
| "grad_norm": 0.4436114430427551, |
| "learning_rate": 5.944020356234097e-05, |
| "loss": 0.1667, |
| "step": 1387 |
| }, |
| { |
| "epoch": 3.514575411913815, |
| "grad_norm": 0.6733534336090088, |
| "learning_rate": 5.933842239185751e-05, |
| "loss": 0.2714, |
| "step": 1388 |
| }, |
| { |
| "epoch": 3.517110266159696, |
| "grad_norm": 0.7258228659629822, |
| "learning_rate": 5.9236641221374046e-05, |
| "loss": 0.258, |
| "step": 1389 |
| }, |
| { |
| "epoch": 3.5196451204055768, |
| "grad_norm": 0.6425923705101013, |
| "learning_rate": 5.9134860050890586e-05, |
| "loss": 0.1791, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.5221799746514577, |
| "grad_norm": 0.45786988735198975, |
| "learning_rate": 5.9033078880407125e-05, |
| "loss": 0.1989, |
| "step": 1391 |
| }, |
| { |
| "epoch": 3.5247148288973387, |
| "grad_norm": 0.43258994817733765, |
| "learning_rate": 5.893129770992367e-05, |
| "loss": 0.166, |
| "step": 1392 |
| }, |
| { |
| "epoch": 3.527249683143219, |
| "grad_norm": 0.36486050486564636, |
| "learning_rate": 5.882951653944021e-05, |
| "loss": 0.1634, |
| "step": 1393 |
| }, |
| { |
| "epoch": 3.5297845373891, |
| "grad_norm": 0.5883339047431946, |
| "learning_rate": 5.872773536895675e-05, |
| "loss": 0.2236, |
| "step": 1394 |
| }, |
| { |
| "epoch": 3.532319391634981, |
| "grad_norm": 0.6296584010124207, |
| "learning_rate": 5.862595419847329e-05, |
| "loss": 0.1866, |
| "step": 1395 |
| }, |
| { |
| "epoch": 3.5348542458808616, |
| "grad_norm": 0.4262075126171112, |
| "learning_rate": 5.852417302798983e-05, |
| "loss": 0.1707, |
| "step": 1396 |
| }, |
| { |
| "epoch": 3.5373891001267426, |
| "grad_norm": 0.459573894739151, |
| "learning_rate": 5.842239185750637e-05, |
| "loss": 0.1654, |
| "step": 1397 |
| }, |
| { |
| "epoch": 3.5399239543726235, |
| "grad_norm": 0.47115570306777954, |
| "learning_rate": 5.83206106870229e-05, |
| "loss": 0.1936, |
| "step": 1398 |
| }, |
| { |
| "epoch": 3.5424588086185045, |
| "grad_norm": 0.41362589597702026, |
| "learning_rate": 5.821882951653944e-05, |
| "loss": 0.1897, |
| "step": 1399 |
| }, |
| { |
| "epoch": 3.5449936628643854, |
| "grad_norm": 0.4314422607421875, |
| "learning_rate": 5.811704834605598e-05, |
| "loss": 0.172, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.5475285171102664, |
| "grad_norm": 0.48116129636764526, |
| "learning_rate": 5.801526717557252e-05, |
| "loss": 0.1721, |
| "step": 1401 |
| }, |
| { |
| "epoch": 3.550063371356147, |
| "grad_norm": 0.3902725279331207, |
| "learning_rate": 5.791348600508906e-05, |
| "loss": 0.1886, |
| "step": 1402 |
| }, |
| { |
| "epoch": 3.552598225602028, |
| "grad_norm": 0.37996864318847656, |
| "learning_rate": 5.78117048346056e-05, |
| "loss": 0.1705, |
| "step": 1403 |
| }, |
| { |
| "epoch": 3.555133079847909, |
| "grad_norm": 0.589279294013977, |
| "learning_rate": 5.770992366412214e-05, |
| "loss": 0.1848, |
| "step": 1404 |
| }, |
| { |
| "epoch": 3.5576679340937893, |
| "grad_norm": 0.4233790636062622, |
| "learning_rate": 5.760814249363868e-05, |
| "loss": 0.18, |
| "step": 1405 |
| }, |
| { |
| "epoch": 3.5602027883396703, |
| "grad_norm": 0.3760955333709717, |
| "learning_rate": 5.750636132315522e-05, |
| "loss": 0.1743, |
| "step": 1406 |
| }, |
| { |
| "epoch": 3.5627376425855513, |
| "grad_norm": 0.552793562412262, |
| "learning_rate": 5.740458015267176e-05, |
| "loss": 0.2315, |
| "step": 1407 |
| }, |
| { |
| "epoch": 3.565272496831432, |
| "grad_norm": 0.5440211892127991, |
| "learning_rate": 5.73027989821883e-05, |
| "loss": 0.186, |
| "step": 1408 |
| }, |
| { |
| "epoch": 3.567807351077313, |
| "grad_norm": 0.5183967351913452, |
| "learning_rate": 5.720101781170484e-05, |
| "loss": 0.1626, |
| "step": 1409 |
| }, |
| { |
| "epoch": 3.570342205323194, |
| "grad_norm": 0.47962069511413574, |
| "learning_rate": 5.709923664122138e-05, |
| "loss": 0.1813, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.5728770595690746, |
| "grad_norm": 0.8065668940544128, |
| "learning_rate": 5.699745547073792e-05, |
| "loss": 0.2537, |
| "step": 1411 |
| }, |
| { |
| "epoch": 3.5754119138149556, |
| "grad_norm": 0.46018585562705994, |
| "learning_rate": 5.689567430025445e-05, |
| "loss": 0.1756, |
| "step": 1412 |
| }, |
| { |
| "epoch": 3.5779467680608366, |
| "grad_norm": 0.5229590535163879, |
| "learning_rate": 5.679389312977099e-05, |
| "loss": 0.1873, |
| "step": 1413 |
| }, |
| { |
| "epoch": 3.5804816223067175, |
| "grad_norm": 0.510209321975708, |
| "learning_rate": 5.669211195928753e-05, |
| "loss": 0.167, |
| "step": 1414 |
| }, |
| { |
| "epoch": 3.583016476552598, |
| "grad_norm": 0.4264031648635864, |
| "learning_rate": 5.659033078880407e-05, |
| "loss": 0.1705, |
| "step": 1415 |
| }, |
| { |
| "epoch": 3.585551330798479, |
| "grad_norm": 0.6208323240280151, |
| "learning_rate": 5.648854961832062e-05, |
| "loss": 0.2268, |
| "step": 1416 |
| }, |
| { |
| "epoch": 3.58808618504436, |
| "grad_norm": 0.3730670213699341, |
| "learning_rate": 5.6386768447837154e-05, |
| "loss": 0.1676, |
| "step": 1417 |
| }, |
| { |
| "epoch": 3.590621039290241, |
| "grad_norm": 0.52936190366745, |
| "learning_rate": 5.628498727735369e-05, |
| "loss": 0.2055, |
| "step": 1418 |
| }, |
| { |
| "epoch": 3.593155893536122, |
| "grad_norm": 0.44800981879234314, |
| "learning_rate": 5.618320610687023e-05, |
| "loss": 0.1782, |
| "step": 1419 |
| }, |
| { |
| "epoch": 3.5956907477820024, |
| "grad_norm": 0.37429654598236084, |
| "learning_rate": 5.608142493638677e-05, |
| "loss": 0.1566, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.5982256020278833, |
| "grad_norm": 0.5618942975997925, |
| "learning_rate": 5.597964376590331e-05, |
| "loss": 0.2249, |
| "step": 1421 |
| }, |
| { |
| "epoch": 3.6007604562737643, |
| "grad_norm": 0.6893648505210876, |
| "learning_rate": 5.587786259541985e-05, |
| "loss": 0.2104, |
| "step": 1422 |
| }, |
| { |
| "epoch": 3.6032953105196452, |
| "grad_norm": 0.4185943603515625, |
| "learning_rate": 5.577608142493639e-05, |
| "loss": 0.1729, |
| "step": 1423 |
| }, |
| { |
| "epoch": 3.6058301647655258, |
| "grad_norm": 0.46326011419296265, |
| "learning_rate": 5.567430025445293e-05, |
| "loss": 0.1888, |
| "step": 1424 |
| }, |
| { |
| "epoch": 3.6083650190114067, |
| "grad_norm": 0.4564262628555298, |
| "learning_rate": 5.557251908396947e-05, |
| "loss": 0.1957, |
| "step": 1425 |
| }, |
| { |
| "epoch": 3.6108998732572877, |
| "grad_norm": 0.654411256313324, |
| "learning_rate": 5.5470737913486e-05, |
| "loss": 0.2101, |
| "step": 1426 |
| }, |
| { |
| "epoch": 3.6134347275031686, |
| "grad_norm": 0.4059501886367798, |
| "learning_rate": 5.536895674300254e-05, |
| "loss": 0.1638, |
| "step": 1427 |
| }, |
| { |
| "epoch": 3.6159695817490496, |
| "grad_norm": 0.4155724346637726, |
| "learning_rate": 5.526717557251909e-05, |
| "loss": 0.1799, |
| "step": 1428 |
| }, |
| { |
| "epoch": 3.6185044359949305, |
| "grad_norm": 0.4041290581226349, |
| "learning_rate": 5.516539440203563e-05, |
| "loss": 0.1755, |
| "step": 1429 |
| }, |
| { |
| "epoch": 3.621039290240811, |
| "grad_norm": 0.3458746373653412, |
| "learning_rate": 5.506361323155217e-05, |
| "loss": 0.1474, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.623574144486692, |
| "grad_norm": 0.5046303272247314, |
| "learning_rate": 5.496183206106871e-05, |
| "loss": 0.2554, |
| "step": 1431 |
| }, |
| { |
| "epoch": 3.626108998732573, |
| "grad_norm": 0.4284549951553345, |
| "learning_rate": 5.4860050890585244e-05, |
| "loss": 0.1855, |
| "step": 1432 |
| }, |
| { |
| "epoch": 3.6286438529784535, |
| "grad_norm": 0.5116839408874512, |
| "learning_rate": 5.475826972010178e-05, |
| "loss": 0.1777, |
| "step": 1433 |
| }, |
| { |
| "epoch": 3.6311787072243344, |
| "grad_norm": 0.4303711950778961, |
| "learning_rate": 5.465648854961832e-05, |
| "loss": 0.1792, |
| "step": 1434 |
| }, |
| { |
| "epoch": 3.6337135614702154, |
| "grad_norm": 0.4602053463459015, |
| "learning_rate": 5.455470737913486e-05, |
| "loss": 0.1716, |
| "step": 1435 |
| }, |
| { |
| "epoch": 3.6362484157160964, |
| "grad_norm": 0.47606271505355835, |
| "learning_rate": 5.44529262086514e-05, |
| "loss": 0.2063, |
| "step": 1436 |
| }, |
| { |
| "epoch": 3.6387832699619773, |
| "grad_norm": 0.5861607193946838, |
| "learning_rate": 5.435114503816794e-05, |
| "loss": 0.2133, |
| "step": 1437 |
| }, |
| { |
| "epoch": 3.6413181242078583, |
| "grad_norm": 0.42663708329200745, |
| "learning_rate": 5.424936386768448e-05, |
| "loss": 0.1662, |
| "step": 1438 |
| }, |
| { |
| "epoch": 3.643852978453739, |
| "grad_norm": 0.6255937218666077, |
| "learning_rate": 5.414758269720102e-05, |
| "loss": 0.1875, |
| "step": 1439 |
| }, |
| { |
| "epoch": 3.6463878326996197, |
| "grad_norm": 0.5422307252883911, |
| "learning_rate": 5.404580152671755e-05, |
| "loss": 0.1624, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.6489226869455007, |
| "grad_norm": 0.540477991104126, |
| "learning_rate": 5.3944020356234104e-05, |
| "loss": 0.2489, |
| "step": 1441 |
| }, |
| { |
| "epoch": 3.6514575411913817, |
| "grad_norm": 0.5656100511550903, |
| "learning_rate": 5.3842239185750643e-05, |
| "loss": 0.2289, |
| "step": 1442 |
| }, |
| { |
| "epoch": 3.653992395437262, |
| "grad_norm": 0.5202456712722778, |
| "learning_rate": 5.374045801526718e-05, |
| "loss": 0.23, |
| "step": 1443 |
| }, |
| { |
| "epoch": 3.656527249683143, |
| "grad_norm": 0.5069813132286072, |
| "learning_rate": 5.363867684478372e-05, |
| "loss": 0.1845, |
| "step": 1444 |
| }, |
| { |
| "epoch": 3.659062103929024, |
| "grad_norm": 0.5711066126823425, |
| "learning_rate": 5.353689567430026e-05, |
| "loss": 0.2076, |
| "step": 1445 |
| }, |
| { |
| "epoch": 3.661596958174905, |
| "grad_norm": 0.5115897059440613, |
| "learning_rate": 5.3435114503816794e-05, |
| "loss": 0.1696, |
| "step": 1446 |
| }, |
| { |
| "epoch": 3.664131812420786, |
| "grad_norm": 0.6119818687438965, |
| "learning_rate": 5.333333333333333e-05, |
| "loss": 0.1905, |
| "step": 1447 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 0.7333729863166809, |
| "learning_rate": 5.323155216284987e-05, |
| "loss": 0.2208, |
| "step": 1448 |
| }, |
| { |
| "epoch": 3.6692015209125475, |
| "grad_norm": 0.5657917857170105, |
| "learning_rate": 5.312977099236641e-05, |
| "loss": 0.218, |
| "step": 1449 |
| }, |
| { |
| "epoch": 3.6717363751584284, |
| "grad_norm": 0.5568459033966064, |
| "learning_rate": 5.302798982188295e-05, |
| "loss": 0.1957, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.6742712294043094, |
| "grad_norm": 0.40060222148895264, |
| "learning_rate": 5.292620865139949e-05, |
| "loss": 0.1634, |
| "step": 1451 |
| }, |
| { |
| "epoch": 3.67680608365019, |
| "grad_norm": 0.5395296216011047, |
| "learning_rate": 5.282442748091603e-05, |
| "loss": 0.2284, |
| "step": 1452 |
| }, |
| { |
| "epoch": 3.679340937896071, |
| "grad_norm": 0.395298570394516, |
| "learning_rate": 5.2722646310432576e-05, |
| "loss": 0.1717, |
| "step": 1453 |
| }, |
| { |
| "epoch": 3.681875792141952, |
| "grad_norm": 0.4693946838378906, |
| "learning_rate": 5.2620865139949115e-05, |
| "loss": 0.1719, |
| "step": 1454 |
| }, |
| { |
| "epoch": 3.6844106463878328, |
| "grad_norm": 0.5206104516983032, |
| "learning_rate": 5.2519083969465654e-05, |
| "loss": 0.2158, |
| "step": 1455 |
| }, |
| { |
| "epoch": 3.6869455006337137, |
| "grad_norm": 0.5576691031455994, |
| "learning_rate": 5.2417302798982194e-05, |
| "loss": 0.2031, |
| "step": 1456 |
| }, |
| { |
| "epoch": 3.6894803548795947, |
| "grad_norm": 0.5826637148857117, |
| "learning_rate": 5.231552162849873e-05, |
| "loss": 0.2785, |
| "step": 1457 |
| }, |
| { |
| "epoch": 3.692015209125475, |
| "grad_norm": 0.5928865075111389, |
| "learning_rate": 5.221374045801527e-05, |
| "loss": 0.1765, |
| "step": 1458 |
| }, |
| { |
| "epoch": 3.694550063371356, |
| "grad_norm": 0.5932832956314087, |
| "learning_rate": 5.211195928753181e-05, |
| "loss": 0.1767, |
| "step": 1459 |
| }, |
| { |
| "epoch": 3.697084917617237, |
| "grad_norm": 0.4178262948989868, |
| "learning_rate": 5.2010178117048344e-05, |
| "loss": 0.1636, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.6996197718631176, |
| "grad_norm": 0.6029627919197083, |
| "learning_rate": 5.1908396946564884e-05, |
| "loss": 0.2086, |
| "step": 1461 |
| }, |
| { |
| "epoch": 3.7021546261089986, |
| "grad_norm": 0.48641863465309143, |
| "learning_rate": 5.180661577608142e-05, |
| "loss": 0.1613, |
| "step": 1462 |
| }, |
| { |
| "epoch": 3.7046894803548795, |
| "grad_norm": 0.40176740288734436, |
| "learning_rate": 5.170483460559796e-05, |
| "loss": 0.1647, |
| "step": 1463 |
| }, |
| { |
| "epoch": 3.7072243346007605, |
| "grad_norm": 0.42600035667419434, |
| "learning_rate": 5.16030534351145e-05, |
| "loss": 0.1818, |
| "step": 1464 |
| }, |
| { |
| "epoch": 3.7097591888466415, |
| "grad_norm": 0.48061972856521606, |
| "learning_rate": 5.150127226463105e-05, |
| "loss": 0.187, |
| "step": 1465 |
| }, |
| { |
| "epoch": 3.7122940430925224, |
| "grad_norm": 0.4085710346698761, |
| "learning_rate": 5.139949109414759e-05, |
| "loss": 0.1562, |
| "step": 1466 |
| }, |
| { |
| "epoch": 3.714828897338403, |
| "grad_norm": 0.4378439486026764, |
| "learning_rate": 5.1297709923664126e-05, |
| "loss": 0.1723, |
| "step": 1467 |
| }, |
| { |
| "epoch": 3.717363751584284, |
| "grad_norm": 0.5806863307952881, |
| "learning_rate": 5.1195928753180665e-05, |
| "loss": 0.2069, |
| "step": 1468 |
| }, |
| { |
| "epoch": 3.719898605830165, |
| "grad_norm": 0.4711120128631592, |
| "learning_rate": 5.1094147582697205e-05, |
| "loss": 0.1851, |
| "step": 1469 |
| }, |
| { |
| "epoch": 3.7224334600760454, |
| "grad_norm": 0.47227099537849426, |
| "learning_rate": 5.0992366412213744e-05, |
| "loss": 0.1885, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.7249683143219263, |
| "grad_norm": 0.4405531585216522, |
| "learning_rate": 5.0890585241730283e-05, |
| "loss": 0.1662, |
| "step": 1471 |
| }, |
| { |
| "epoch": 3.7275031685678073, |
| "grad_norm": 0.5168079733848572, |
| "learning_rate": 5.078880407124682e-05, |
| "loss": 0.2002, |
| "step": 1472 |
| }, |
| { |
| "epoch": 3.7300380228136882, |
| "grad_norm": 0.3839830160140991, |
| "learning_rate": 5.068702290076336e-05, |
| "loss": 0.168, |
| "step": 1473 |
| }, |
| { |
| "epoch": 3.732572877059569, |
| "grad_norm": 0.338012158870697, |
| "learning_rate": 5.0585241730279895e-05, |
| "loss": 0.1596, |
| "step": 1474 |
| }, |
| { |
| "epoch": 3.73510773130545, |
| "grad_norm": 0.5466023087501526, |
| "learning_rate": 5.0483460559796434e-05, |
| "loss": 0.2379, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.7376425855513307, |
| "grad_norm": 0.44543328881263733, |
| "learning_rate": 5.038167938931297e-05, |
| "loss": 0.1778, |
| "step": 1476 |
| }, |
| { |
| "epoch": 3.7401774397972116, |
| "grad_norm": 0.4166903793811798, |
| "learning_rate": 5.0279898218829526e-05, |
| "loss": 0.1554, |
| "step": 1477 |
| }, |
| { |
| "epoch": 3.7427122940430926, |
| "grad_norm": 0.3806212544441223, |
| "learning_rate": 5.0178117048346065e-05, |
| "loss": 0.1648, |
| "step": 1478 |
| }, |
| { |
| "epoch": 3.7452471482889735, |
| "grad_norm": 0.5990723967552185, |
| "learning_rate": 5.00763358778626e-05, |
| "loss": 0.2348, |
| "step": 1479 |
| }, |
| { |
| "epoch": 3.747782002534854, |
| "grad_norm": 0.715096116065979, |
| "learning_rate": 4.997455470737914e-05, |
| "loss": 0.2201, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.750316856780735, |
| "grad_norm": 0.6297019124031067, |
| "learning_rate": 4.9872773536895677e-05, |
| "loss": 0.2398, |
| "step": 1481 |
| }, |
| { |
| "epoch": 3.752851711026616, |
| "grad_norm": 0.6131380200386047, |
| "learning_rate": 4.9770992366412216e-05, |
| "loss": 0.2128, |
| "step": 1482 |
| }, |
| { |
| "epoch": 3.755386565272497, |
| "grad_norm": 0.5018277764320374, |
| "learning_rate": 4.9669211195928755e-05, |
| "loss": 0.1913, |
| "step": 1483 |
| }, |
| { |
| "epoch": 3.757921419518378, |
| "grad_norm": 0.516939103603363, |
| "learning_rate": 4.9567430025445294e-05, |
| "loss": 0.1958, |
| "step": 1484 |
| }, |
| { |
| "epoch": 3.7604562737642584, |
| "grad_norm": 0.4485652446746826, |
| "learning_rate": 4.9465648854961834e-05, |
| "loss": 0.1678, |
| "step": 1485 |
| }, |
| { |
| "epoch": 3.7629911280101394, |
| "grad_norm": 0.6227991580963135, |
| "learning_rate": 4.936386768447838e-05, |
| "loss": 0.2403, |
| "step": 1486 |
| }, |
| { |
| "epoch": 3.7655259822560203, |
| "grad_norm": 0.42331916093826294, |
| "learning_rate": 4.926208651399491e-05, |
| "loss": 0.1673, |
| "step": 1487 |
| }, |
| { |
| "epoch": 3.7680608365019013, |
| "grad_norm": 0.5072351098060608, |
| "learning_rate": 4.916030534351145e-05, |
| "loss": 0.204, |
| "step": 1488 |
| }, |
| { |
| "epoch": 3.770595690747782, |
| "grad_norm": 0.445578008890152, |
| "learning_rate": 4.905852417302799e-05, |
| "loss": 0.1908, |
| "step": 1489 |
| }, |
| { |
| "epoch": 3.7731305449936627, |
| "grad_norm": 0.49046698212623596, |
| "learning_rate": 4.895674300254453e-05, |
| "loss": 0.1615, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.7756653992395437, |
| "grad_norm": 0.37768882513046265, |
| "learning_rate": 4.885496183206107e-05, |
| "loss": 0.1604, |
| "step": 1491 |
| }, |
| { |
| "epoch": 3.7782002534854247, |
| "grad_norm": 0.38343289494514465, |
| "learning_rate": 4.8753180661577616e-05, |
| "loss": 0.1709, |
| "step": 1492 |
| }, |
| { |
| "epoch": 3.7807351077313056, |
| "grad_norm": 0.4102202355861664, |
| "learning_rate": 4.8651399491094155e-05, |
| "loss": 0.1629, |
| "step": 1493 |
| }, |
| { |
| "epoch": 3.7832699619771866, |
| "grad_norm": 0.4545007050037384, |
| "learning_rate": 4.854961832061069e-05, |
| "loss": 0.1709, |
| "step": 1494 |
| }, |
| { |
| "epoch": 3.785804816223067, |
| "grad_norm": 0.48300206661224365, |
| "learning_rate": 4.844783715012723e-05, |
| "loss": 0.2211, |
| "step": 1495 |
| }, |
| { |
| "epoch": 3.788339670468948, |
| "grad_norm": 0.5301868319511414, |
| "learning_rate": 4.8346055979643766e-05, |
| "loss": 0.2053, |
| "step": 1496 |
| }, |
| { |
| "epoch": 3.790874524714829, |
| "grad_norm": 0.48716598749160767, |
| "learning_rate": 4.8244274809160306e-05, |
| "loss": 0.2392, |
| "step": 1497 |
| }, |
| { |
| "epoch": 3.7934093789607095, |
| "grad_norm": 0.6201879978179932, |
| "learning_rate": 4.8142493638676845e-05, |
| "loss": 0.2267, |
| "step": 1498 |
| }, |
| { |
| "epoch": 3.7959442332065905, |
| "grad_norm": 0.46254560351371765, |
| "learning_rate": 4.804071246819339e-05, |
| "loss": 0.1824, |
| "step": 1499 |
| }, |
| { |
| "epoch": 3.7984790874524714, |
| "grad_norm": 0.6153382658958435, |
| "learning_rate": 4.793893129770993e-05, |
| "loss": 0.2095, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.8010139416983524, |
| "grad_norm": 0.6054911613464355, |
| "learning_rate": 4.783715012722646e-05, |
| "loss": 0.2291, |
| "step": 1501 |
| }, |
| { |
| "epoch": 3.8035487959442333, |
| "grad_norm": 0.3899902403354645, |
| "learning_rate": 4.7735368956743e-05, |
| "loss": 0.1507, |
| "step": 1502 |
| }, |
| { |
| "epoch": 3.8060836501901143, |
| "grad_norm": 0.4634632170200348, |
| "learning_rate": 4.763358778625954e-05, |
| "loss": 0.1436, |
| "step": 1503 |
| }, |
| { |
| "epoch": 3.808618504435995, |
| "grad_norm": 0.6829271912574768, |
| "learning_rate": 4.753180661577608e-05, |
| "loss": 0.2611, |
| "step": 1504 |
| }, |
| { |
| "epoch": 3.8111533586818758, |
| "grad_norm": 0.553393542766571, |
| "learning_rate": 4.743002544529263e-05, |
| "loss": 0.1862, |
| "step": 1505 |
| }, |
| { |
| "epoch": 3.8136882129277567, |
| "grad_norm": 0.4285520315170288, |
| "learning_rate": 4.7328244274809166e-05, |
| "loss": 0.1522, |
| "step": 1506 |
| }, |
| { |
| "epoch": 3.8162230671736372, |
| "grad_norm": 0.5505307912826538, |
| "learning_rate": 4.7226463104325705e-05, |
| "loss": 0.2056, |
| "step": 1507 |
| }, |
| { |
| "epoch": 3.818757921419518, |
| "grad_norm": 0.635071873664856, |
| "learning_rate": 4.712468193384224e-05, |
| "loss": 0.1899, |
| "step": 1508 |
| }, |
| { |
| "epoch": 3.821292775665399, |
| "grad_norm": 0.4297153353691101, |
| "learning_rate": 4.702290076335878e-05, |
| "loss": 0.1632, |
| "step": 1509 |
| }, |
| { |
| "epoch": 3.82382762991128, |
| "grad_norm": 0.5538508892059326, |
| "learning_rate": 4.6921119592875317e-05, |
| "loss": 0.1965, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.826362484157161, |
| "grad_norm": 0.6736975908279419, |
| "learning_rate": 4.681933842239186e-05, |
| "loss": 0.2334, |
| "step": 1511 |
| }, |
| { |
| "epoch": 3.828897338403042, |
| "grad_norm": 0.49381881952285767, |
| "learning_rate": 4.67175572519084e-05, |
| "loss": 0.2074, |
| "step": 1512 |
| }, |
| { |
| "epoch": 3.8314321926489225, |
| "grad_norm": 0.4285455346107483, |
| "learning_rate": 4.661577608142494e-05, |
| "loss": 0.176, |
| "step": 1513 |
| }, |
| { |
| "epoch": 3.8339670468948035, |
| "grad_norm": 0.5771308541297913, |
| "learning_rate": 4.651399491094148e-05, |
| "loss": 0.229, |
| "step": 1514 |
| }, |
| { |
| "epoch": 3.8365019011406845, |
| "grad_norm": 0.4749429225921631, |
| "learning_rate": 4.641221374045801e-05, |
| "loss": 0.1968, |
| "step": 1515 |
| }, |
| { |
| "epoch": 3.8390367553865654, |
| "grad_norm": 0.48094430565834045, |
| "learning_rate": 4.631043256997455e-05, |
| "loss": 0.1982, |
| "step": 1516 |
| }, |
| { |
| "epoch": 3.841571609632446, |
| "grad_norm": 0.49878042936325073, |
| "learning_rate": 4.62086513994911e-05, |
| "loss": 0.1552, |
| "step": 1517 |
| }, |
| { |
| "epoch": 3.844106463878327, |
| "grad_norm": 0.4872034192085266, |
| "learning_rate": 4.610687022900764e-05, |
| "loss": 0.1808, |
| "step": 1518 |
| }, |
| { |
| "epoch": 3.846641318124208, |
| "grad_norm": 0.4905577600002289, |
| "learning_rate": 4.600508905852418e-05, |
| "loss": 0.1703, |
| "step": 1519 |
| }, |
| { |
| "epoch": 3.849176172370089, |
| "grad_norm": 0.49980783462524414, |
| "learning_rate": 4.5903307888040716e-05, |
| "loss": 0.1727, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.8517110266159698, |
| "grad_norm": 0.5426180958747864, |
| "learning_rate": 4.5801526717557256e-05, |
| "loss": 0.2192, |
| "step": 1521 |
| }, |
| { |
| "epoch": 3.8542458808618507, |
| "grad_norm": 0.6399853825569153, |
| "learning_rate": 4.569974554707379e-05, |
| "loss": 0.2387, |
| "step": 1522 |
| }, |
| { |
| "epoch": 3.8567807351077312, |
| "grad_norm": 0.5311464667320251, |
| "learning_rate": 4.5597964376590334e-05, |
| "loss": 0.1976, |
| "step": 1523 |
| }, |
| { |
| "epoch": 3.859315589353612, |
| "grad_norm": 0.5433202981948853, |
| "learning_rate": 4.5496183206106874e-05, |
| "loss": 0.1916, |
| "step": 1524 |
| }, |
| { |
| "epoch": 3.861850443599493, |
| "grad_norm": 0.4024597704410553, |
| "learning_rate": 4.539440203562341e-05, |
| "loss": 0.1643, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.8643852978453737, |
| "grad_norm": 0.347566157579422, |
| "learning_rate": 4.529262086513995e-05, |
| "loss": 0.1676, |
| "step": 1526 |
| }, |
| { |
| "epoch": 3.8669201520912546, |
| "grad_norm": 0.45405861735343933, |
| "learning_rate": 4.519083969465649e-05, |
| "loss": 0.1963, |
| "step": 1527 |
| }, |
| { |
| "epoch": 3.8694550063371356, |
| "grad_norm": 0.6430472731590271, |
| "learning_rate": 4.508905852417303e-05, |
| "loss": 0.2322, |
| "step": 1528 |
| }, |
| { |
| "epoch": 3.8719898605830165, |
| "grad_norm": 0.4391939043998718, |
| "learning_rate": 4.498727735368957e-05, |
| "loss": 0.1871, |
| "step": 1529 |
| }, |
| { |
| "epoch": 3.8745247148288975, |
| "grad_norm": 0.47301623225212097, |
| "learning_rate": 4.488549618320611e-05, |
| "loss": 0.1549, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.8770595690747784, |
| "grad_norm": 0.4237573742866516, |
| "learning_rate": 4.478371501272265e-05, |
| "loss": 0.1548, |
| "step": 1531 |
| }, |
| { |
| "epoch": 3.879594423320659, |
| "grad_norm": 0.5859849452972412, |
| "learning_rate": 4.468193384223919e-05, |
| "loss": 0.2023, |
| "step": 1532 |
| }, |
| { |
| "epoch": 3.88212927756654, |
| "grad_norm": 0.45050573348999023, |
| "learning_rate": 4.458015267175573e-05, |
| "loss": 0.165, |
| "step": 1533 |
| }, |
| { |
| "epoch": 3.884664131812421, |
| "grad_norm": 0.5347339510917664, |
| "learning_rate": 4.447837150127227e-05, |
| "loss": 0.1854, |
| "step": 1534 |
| }, |
| { |
| "epoch": 3.8871989860583014, |
| "grad_norm": 0.375836580991745, |
| "learning_rate": 4.4376590330788806e-05, |
| "loss": 0.152, |
| "step": 1535 |
| }, |
| { |
| "epoch": 3.8897338403041823, |
| "grad_norm": 0.5403718948364258, |
| "learning_rate": 4.4274809160305345e-05, |
| "loss": 0.2065, |
| "step": 1536 |
| }, |
| { |
| "epoch": 3.8922686945500633, |
| "grad_norm": 0.5624736547470093, |
| "learning_rate": 4.4173027989821885e-05, |
| "loss": 0.1857, |
| "step": 1537 |
| }, |
| { |
| "epoch": 3.8948035487959443, |
| "grad_norm": 0.5971560478210449, |
| "learning_rate": 4.4071246819338424e-05, |
| "loss": 0.1928, |
| "step": 1538 |
| }, |
| { |
| "epoch": 3.897338403041825, |
| "grad_norm": 0.5225517153739929, |
| "learning_rate": 4.396946564885496e-05, |
| "loss": 0.2054, |
| "step": 1539 |
| }, |
| { |
| "epoch": 3.899873257287706, |
| "grad_norm": 0.47341519594192505, |
| "learning_rate": 4.38676844783715e-05, |
| "loss": 0.1786, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.9024081115335867, |
| "grad_norm": 0.3734676241874695, |
| "learning_rate": 4.376590330788805e-05, |
| "loss": 0.1447, |
| "step": 1541 |
| }, |
| { |
| "epoch": 3.9049429657794676, |
| "grad_norm": 0.5003755688667297, |
| "learning_rate": 4.366412213740458e-05, |
| "loss": 0.1734, |
| "step": 1542 |
| }, |
| { |
| "epoch": 3.9074778200253486, |
| "grad_norm": 0.41165000200271606, |
| "learning_rate": 4.356234096692112e-05, |
| "loss": 0.172, |
| "step": 1543 |
| }, |
| { |
| "epoch": 3.9100126742712296, |
| "grad_norm": 0.45096197724342346, |
| "learning_rate": 4.346055979643766e-05, |
| "loss": 0.1726, |
| "step": 1544 |
| }, |
| { |
| "epoch": 3.91254752851711, |
| "grad_norm": 0.5445842146873474, |
| "learning_rate": 4.33587786259542e-05, |
| "loss": 0.206, |
| "step": 1545 |
| }, |
| { |
| "epoch": 3.915082382762991, |
| "grad_norm": 0.5139321088790894, |
| "learning_rate": 4.325699745547074e-05, |
| "loss": 0.1803, |
| "step": 1546 |
| }, |
| { |
| "epoch": 3.917617237008872, |
| "grad_norm": 0.5652433633804321, |
| "learning_rate": 4.3155216284987285e-05, |
| "loss": 0.2051, |
| "step": 1547 |
| }, |
| { |
| "epoch": 3.920152091254753, |
| "grad_norm": 0.38091734051704407, |
| "learning_rate": 4.3053435114503824e-05, |
| "loss": 0.1541, |
| "step": 1548 |
| }, |
| { |
| "epoch": 3.922686945500634, |
| "grad_norm": 0.3614705801010132, |
| "learning_rate": 4.2951653944020356e-05, |
| "loss": 0.147, |
| "step": 1549 |
| }, |
| { |
| "epoch": 3.9252217997465144, |
| "grad_norm": 0.4551761746406555, |
| "learning_rate": 4.2849872773536896e-05, |
| "loss": 0.1685, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.9277566539923954, |
| "grad_norm": 0.5226624011993408, |
| "learning_rate": 4.2748091603053435e-05, |
| "loss": 0.1727, |
| "step": 1551 |
| }, |
| { |
| "epoch": 3.9302915082382763, |
| "grad_norm": 0.3541867136955261, |
| "learning_rate": 4.2646310432569974e-05, |
| "loss": 0.1488, |
| "step": 1552 |
| }, |
| { |
| "epoch": 3.9328263624841573, |
| "grad_norm": 0.4599204659461975, |
| "learning_rate": 4.254452926208652e-05, |
| "loss": 0.1536, |
| "step": 1553 |
| }, |
| { |
| "epoch": 3.935361216730038, |
| "grad_norm": 0.45082637667655945, |
| "learning_rate": 4.244274809160306e-05, |
| "loss": 0.1671, |
| "step": 1554 |
| }, |
| { |
| "epoch": 3.9378960709759188, |
| "grad_norm": 0.6053276658058167, |
| "learning_rate": 4.23409669211196e-05, |
| "loss": 0.2043, |
| "step": 1555 |
| }, |
| { |
| "epoch": 3.9404309252217997, |
| "grad_norm": 0.506443440914154, |
| "learning_rate": 4.223918575063613e-05, |
| "loss": 0.1893, |
| "step": 1556 |
| }, |
| { |
| "epoch": 3.9429657794676807, |
| "grad_norm": 0.6029784679412842, |
| "learning_rate": 4.213740458015267e-05, |
| "loss": 0.201, |
| "step": 1557 |
| }, |
| { |
| "epoch": 3.9455006337135616, |
| "grad_norm": 0.3993350863456726, |
| "learning_rate": 4.203562340966921e-05, |
| "loss": 0.1637, |
| "step": 1558 |
| }, |
| { |
| "epoch": 3.9480354879594426, |
| "grad_norm": 0.5887712836265564, |
| "learning_rate": 4.193384223918575e-05, |
| "loss": 0.2207, |
| "step": 1559 |
| }, |
| { |
| "epoch": 3.950570342205323, |
| "grad_norm": 0.5538966059684753, |
| "learning_rate": 4.1832061068702296e-05, |
| "loss": 0.1674, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.953105196451204, |
| "grad_norm": 0.4831174910068512, |
| "learning_rate": 4.1730279898218835e-05, |
| "loss": 0.1694, |
| "step": 1561 |
| }, |
| { |
| "epoch": 3.955640050697085, |
| "grad_norm": 0.39700761437416077, |
| "learning_rate": 4.1628498727735374e-05, |
| "loss": 0.1695, |
| "step": 1562 |
| }, |
| { |
| "epoch": 3.9581749049429655, |
| "grad_norm": 0.5388202667236328, |
| "learning_rate": 4.152671755725191e-05, |
| "loss": 0.1769, |
| "step": 1563 |
| }, |
| { |
| "epoch": 3.9607097591888465, |
| "grad_norm": 0.5717085599899292, |
| "learning_rate": 4.1424936386768446e-05, |
| "loss": 0.2602, |
| "step": 1564 |
| }, |
| { |
| "epoch": 3.9632446134347274, |
| "grad_norm": 0.4135623872280121, |
| "learning_rate": 4.1323155216284985e-05, |
| "loss": 0.1512, |
| "step": 1565 |
| }, |
| { |
| "epoch": 3.9657794676806084, |
| "grad_norm": 0.478411465883255, |
| "learning_rate": 4.122137404580153e-05, |
| "loss": 0.1967, |
| "step": 1566 |
| }, |
| { |
| "epoch": 3.9683143219264894, |
| "grad_norm": 0.4836915135383606, |
| "learning_rate": 4.111959287531807e-05, |
| "loss": 0.2297, |
| "step": 1567 |
| }, |
| { |
| "epoch": 3.9708491761723703, |
| "grad_norm": 0.6355355978012085, |
| "learning_rate": 4.101781170483461e-05, |
| "loss": 0.2291, |
| "step": 1568 |
| }, |
| { |
| "epoch": 3.973384030418251, |
| "grad_norm": 0.42811089754104614, |
| "learning_rate": 4.091603053435115e-05, |
| "loss": 0.1518, |
| "step": 1569 |
| }, |
| { |
| "epoch": 3.975918884664132, |
| "grad_norm": 0.5778828859329224, |
| "learning_rate": 4.081424936386768e-05, |
| "loss": 0.1638, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.9784537389100127, |
| "grad_norm": 0.4650358259677887, |
| "learning_rate": 4.071246819338422e-05, |
| "loss": 0.1658, |
| "step": 1571 |
| }, |
| { |
| "epoch": 3.9809885931558933, |
| "grad_norm": 0.5939072966575623, |
| "learning_rate": 4.061068702290077e-05, |
| "loss": 0.2276, |
| "step": 1572 |
| }, |
| { |
| "epoch": 3.983523447401774, |
| "grad_norm": 0.5296881794929504, |
| "learning_rate": 4.050890585241731e-05, |
| "loss": 0.1895, |
| "step": 1573 |
| }, |
| { |
| "epoch": 3.986058301647655, |
| "grad_norm": 0.4479645788669586, |
| "learning_rate": 4.0407124681933846e-05, |
| "loss": 0.168, |
| "step": 1574 |
| }, |
| { |
| "epoch": 3.988593155893536, |
| "grad_norm": 0.6041486859321594, |
| "learning_rate": 4.0305343511450385e-05, |
| "loss": 0.2225, |
| "step": 1575 |
| }, |
| { |
| "epoch": 3.991128010139417, |
| "grad_norm": 1.0764771699905396, |
| "learning_rate": 4.0203562340966925e-05, |
| "loss": 0.1736, |
| "step": 1576 |
| }, |
| { |
| "epoch": 3.993662864385298, |
| "grad_norm": 0.4830266535282135, |
| "learning_rate": 4.010178117048346e-05, |
| "loss": 0.2017, |
| "step": 1577 |
| }, |
| { |
| "epoch": 3.9961977186311786, |
| "grad_norm": 0.4032004773616791, |
| "learning_rate": 4e-05, |
| "loss": 0.1723, |
| "step": 1578 |
| }, |
| { |
| "epoch": 3.9987325728770595, |
| "grad_norm": 0.4441380798816681, |
| "learning_rate": 3.989821882951654e-05, |
| "loss": 0.1714, |
| "step": 1579 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.673060953617096, |
| "learning_rate": 3.979643765903308e-05, |
| "loss": 0.1651, |
| "step": 1580 |
| }, |
| { |
| "epoch": 4.002534854245881, |
| "grad_norm": 0.5185714960098267, |
| "learning_rate": 3.969465648854962e-05, |
| "loss": 0.1877, |
| "step": 1581 |
| }, |
| { |
| "epoch": 4.005069708491762, |
| "grad_norm": 0.4302978217601776, |
| "learning_rate": 3.959287531806616e-05, |
| "loss": 0.1575, |
| "step": 1582 |
| }, |
| { |
| "epoch": 4.007604562737643, |
| "grad_norm": 0.45982813835144043, |
| "learning_rate": 3.94910941475827e-05, |
| "loss": 0.1615, |
| "step": 1583 |
| }, |
| { |
| "epoch": 4.010139416983524, |
| "grad_norm": 0.4118313789367676, |
| "learning_rate": 3.938931297709924e-05, |
| "loss": 0.1508, |
| "step": 1584 |
| }, |
| { |
| "epoch": 4.012674271229404, |
| "grad_norm": 0.6039855480194092, |
| "learning_rate": 3.928753180661578e-05, |
| "loss": 0.1782, |
| "step": 1585 |
| }, |
| { |
| "epoch": 4.015209125475285, |
| "grad_norm": 0.4311355948448181, |
| "learning_rate": 3.918575063613232e-05, |
| "loss": 0.1488, |
| "step": 1586 |
| }, |
| { |
| "epoch": 4.017743979721166, |
| "grad_norm": 0.7398537993431091, |
| "learning_rate": 3.908396946564886e-05, |
| "loss": 0.1879, |
| "step": 1587 |
| }, |
| { |
| "epoch": 4.020278833967047, |
| "grad_norm": 0.37064164876937866, |
| "learning_rate": 3.8982188295165396e-05, |
| "loss": 0.1257, |
| "step": 1588 |
| }, |
| { |
| "epoch": 4.022813688212928, |
| "grad_norm": 0.46931344270706177, |
| "learning_rate": 3.8880407124681936e-05, |
| "loss": 0.1579, |
| "step": 1589 |
| }, |
| { |
| "epoch": 4.025348542458809, |
| "grad_norm": 0.4544156789779663, |
| "learning_rate": 3.8778625954198475e-05, |
| "loss": 0.134, |
| "step": 1590 |
| }, |
| { |
| "epoch": 4.02788339670469, |
| "grad_norm": 0.5562132000923157, |
| "learning_rate": 3.8676844783715014e-05, |
| "loss": 0.1488, |
| "step": 1591 |
| }, |
| { |
| "epoch": 4.030418250950571, |
| "grad_norm": 0.5679481625556946, |
| "learning_rate": 3.8575063613231554e-05, |
| "loss": 0.1322, |
| "step": 1592 |
| }, |
| { |
| "epoch": 4.032953105196452, |
| "grad_norm": 0.6101714372634888, |
| "learning_rate": 3.847328244274809e-05, |
| "loss": 0.1534, |
| "step": 1593 |
| }, |
| { |
| "epoch": 4.035487959442332, |
| "grad_norm": 0.8060622215270996, |
| "learning_rate": 3.837150127226463e-05, |
| "loss": 0.1986, |
| "step": 1594 |
| }, |
| { |
| "epoch": 4.038022813688213, |
| "grad_norm": 0.5501425266265869, |
| "learning_rate": 3.826972010178117e-05, |
| "loss": 0.1444, |
| "step": 1595 |
| }, |
| { |
| "epoch": 4.0405576679340935, |
| "grad_norm": 0.5117461085319519, |
| "learning_rate": 3.816793893129771e-05, |
| "loss": 0.1259, |
| "step": 1596 |
| }, |
| { |
| "epoch": 4.0430925221799745, |
| "grad_norm": 0.571770429611206, |
| "learning_rate": 3.806615776081425e-05, |
| "loss": 0.1413, |
| "step": 1597 |
| }, |
| { |
| "epoch": 4.0456273764258555, |
| "grad_norm": 0.7756439447402954, |
| "learning_rate": 3.796437659033079e-05, |
| "loss": 0.1874, |
| "step": 1598 |
| }, |
| { |
| "epoch": 4.048162230671736, |
| "grad_norm": 0.6393389701843262, |
| "learning_rate": 3.786259541984733e-05, |
| "loss": 0.1226, |
| "step": 1599 |
| }, |
| { |
| "epoch": 4.050697084917617, |
| "grad_norm": 0.7177454233169556, |
| "learning_rate": 3.776081424936387e-05, |
| "loss": 0.1382, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.053231939163498, |
| "grad_norm": 0.6561391353607178, |
| "learning_rate": 3.765903307888041e-05, |
| "loss": 0.1557, |
| "step": 1601 |
| }, |
| { |
| "epoch": 4.055766793409379, |
| "grad_norm": 0.8319444060325623, |
| "learning_rate": 3.7557251908396954e-05, |
| "loss": 0.1608, |
| "step": 1602 |
| }, |
| { |
| "epoch": 4.05830164765526, |
| "grad_norm": 0.7468693852424622, |
| "learning_rate": 3.745547073791349e-05, |
| "loss": 0.1442, |
| "step": 1603 |
| }, |
| { |
| "epoch": 4.06083650190114, |
| "grad_norm": 0.623657763004303, |
| "learning_rate": 3.7353689567430025e-05, |
| "loss": 0.1395, |
| "step": 1604 |
| }, |
| { |
| "epoch": 4.063371356147021, |
| "grad_norm": 0.5870152115821838, |
| "learning_rate": 3.7251908396946565e-05, |
| "loss": 0.1322, |
| "step": 1605 |
| }, |
| { |
| "epoch": 4.065906210392902, |
| "grad_norm": 0.6840811371803284, |
| "learning_rate": 3.7150127226463104e-05, |
| "loss": 0.132, |
| "step": 1606 |
| }, |
| { |
| "epoch": 4.068441064638783, |
| "grad_norm": 0.6177504658699036, |
| "learning_rate": 3.704834605597964e-05, |
| "loss": 0.1265, |
| "step": 1607 |
| }, |
| { |
| "epoch": 4.070975918884664, |
| "grad_norm": 0.6908831000328064, |
| "learning_rate": 3.694656488549619e-05, |
| "loss": 0.1593, |
| "step": 1608 |
| }, |
| { |
| "epoch": 4.073510773130545, |
| "grad_norm": 0.787434458732605, |
| "learning_rate": 3.684478371501273e-05, |
| "loss": 0.1184, |
| "step": 1609 |
| }, |
| { |
| "epoch": 4.076045627376426, |
| "grad_norm": 0.8011195063591003, |
| "learning_rate": 3.674300254452927e-05, |
| "loss": 0.1341, |
| "step": 1610 |
| }, |
| { |
| "epoch": 4.078580481622307, |
| "grad_norm": 0.5523831248283386, |
| "learning_rate": 3.66412213740458e-05, |
| "loss": 0.1283, |
| "step": 1611 |
| }, |
| { |
| "epoch": 4.081115335868188, |
| "grad_norm": 0.6396963596343994, |
| "learning_rate": 3.653944020356234e-05, |
| "loss": 0.1424, |
| "step": 1612 |
| }, |
| { |
| "epoch": 4.083650190114068, |
| "grad_norm": 0.7471883893013, |
| "learning_rate": 3.643765903307888e-05, |
| "loss": 0.1627, |
| "step": 1613 |
| }, |
| { |
| "epoch": 4.086185044359949, |
| "grad_norm": 0.5498061776161194, |
| "learning_rate": 3.633587786259542e-05, |
| "loss": 0.1478, |
| "step": 1614 |
| }, |
| { |
| "epoch": 4.08871989860583, |
| "grad_norm": 0.6853391528129578, |
| "learning_rate": 3.6234096692111965e-05, |
| "loss": 0.1588, |
| "step": 1615 |
| }, |
| { |
| "epoch": 4.091254752851711, |
| "grad_norm": 0.6638361811637878, |
| "learning_rate": 3.6132315521628504e-05, |
| "loss": 0.1695, |
| "step": 1616 |
| }, |
| { |
| "epoch": 4.093789607097592, |
| "grad_norm": 0.6155263781547546, |
| "learning_rate": 3.603053435114504e-05, |
| "loss": 0.1355, |
| "step": 1617 |
| }, |
| { |
| "epoch": 4.096324461343473, |
| "grad_norm": 0.574590265750885, |
| "learning_rate": 3.5928753180661576e-05, |
| "loss": 0.1498, |
| "step": 1618 |
| }, |
| { |
| "epoch": 4.098859315589354, |
| "grad_norm": 0.5972251296043396, |
| "learning_rate": 3.5826972010178115e-05, |
| "loss": 0.1684, |
| "step": 1619 |
| }, |
| { |
| "epoch": 4.101394169835235, |
| "grad_norm": 0.668618381023407, |
| "learning_rate": 3.5725190839694654e-05, |
| "loss": 0.1377, |
| "step": 1620 |
| }, |
| { |
| "epoch": 4.103929024081116, |
| "grad_norm": 0.6238232851028442, |
| "learning_rate": 3.56234096692112e-05, |
| "loss": 0.2025, |
| "step": 1621 |
| }, |
| { |
| "epoch": 4.106463878326996, |
| "grad_norm": 0.9182467460632324, |
| "learning_rate": 3.552162849872774e-05, |
| "loss": 0.1539, |
| "step": 1622 |
| }, |
| { |
| "epoch": 4.108998732572877, |
| "grad_norm": 0.6368919014930725, |
| "learning_rate": 3.541984732824428e-05, |
| "loss": 0.1421, |
| "step": 1623 |
| }, |
| { |
| "epoch": 4.111533586818758, |
| "grad_norm": 0.7871132493019104, |
| "learning_rate": 3.531806615776082e-05, |
| "loss": 0.1482, |
| "step": 1624 |
| }, |
| { |
| "epoch": 4.114068441064639, |
| "grad_norm": 0.7697343230247498, |
| "learning_rate": 3.521628498727735e-05, |
| "loss": 0.1607, |
| "step": 1625 |
| }, |
| { |
| "epoch": 4.11660329531052, |
| "grad_norm": 0.5805296897888184, |
| "learning_rate": 3.511450381679389e-05, |
| "loss": 0.1497, |
| "step": 1626 |
| }, |
| { |
| "epoch": 4.119138149556401, |
| "grad_norm": 0.6484183073043823, |
| "learning_rate": 3.5012722646310436e-05, |
| "loss": 0.1827, |
| "step": 1627 |
| }, |
| { |
| "epoch": 4.1216730038022815, |
| "grad_norm": 1.0351064205169678, |
| "learning_rate": 3.4910941475826976e-05, |
| "loss": 0.2331, |
| "step": 1628 |
| }, |
| { |
| "epoch": 4.1242078580481625, |
| "grad_norm": 0.620452344417572, |
| "learning_rate": 3.4809160305343515e-05, |
| "loss": 0.1516, |
| "step": 1629 |
| }, |
| { |
| "epoch": 4.126742712294043, |
| "grad_norm": 0.6269112229347229, |
| "learning_rate": 3.4707379134860054e-05, |
| "loss": 0.1322, |
| "step": 1630 |
| }, |
| { |
| "epoch": 4.129277566539924, |
| "grad_norm": 0.7780957221984863, |
| "learning_rate": 3.4605597964376594e-05, |
| "loss": 0.1974, |
| "step": 1631 |
| }, |
| { |
| "epoch": 4.1318124207858045, |
| "grad_norm": 0.6183624267578125, |
| "learning_rate": 3.4503816793893126e-05, |
| "loss": 0.1423, |
| "step": 1632 |
| }, |
| { |
| "epoch": 4.134347275031685, |
| "grad_norm": 0.715943455696106, |
| "learning_rate": 3.440203562340967e-05, |
| "loss": 0.1422, |
| "step": 1633 |
| }, |
| { |
| "epoch": 4.136882129277566, |
| "grad_norm": 0.6383997201919556, |
| "learning_rate": 3.430025445292621e-05, |
| "loss": 0.1566, |
| "step": 1634 |
| }, |
| { |
| "epoch": 4.139416983523447, |
| "grad_norm": 0.6354379653930664, |
| "learning_rate": 3.419847328244275e-05, |
| "loss": 0.14, |
| "step": 1635 |
| }, |
| { |
| "epoch": 4.141951837769328, |
| "grad_norm": 0.5692049264907837, |
| "learning_rate": 3.409669211195929e-05, |
| "loss": 0.1315, |
| "step": 1636 |
| }, |
| { |
| "epoch": 4.144486692015209, |
| "grad_norm": 0.5286855697631836, |
| "learning_rate": 3.399491094147583e-05, |
| "loss": 0.119, |
| "step": 1637 |
| }, |
| { |
| "epoch": 4.14702154626109, |
| "grad_norm": 0.6007808446884155, |
| "learning_rate": 3.389312977099237e-05, |
| "loss": 0.1368, |
| "step": 1638 |
| }, |
| { |
| "epoch": 4.149556400506971, |
| "grad_norm": 0.8727791905403137, |
| "learning_rate": 3.379134860050891e-05, |
| "loss": 0.1635, |
| "step": 1639 |
| }, |
| { |
| "epoch": 4.152091254752852, |
| "grad_norm": 0.7203207015991211, |
| "learning_rate": 3.368956743002545e-05, |
| "loss": 0.1668, |
| "step": 1640 |
| }, |
| { |
| "epoch": 4.154626108998732, |
| "grad_norm": 0.7178492546081543, |
| "learning_rate": 3.358778625954199e-05, |
| "loss": 0.1601, |
| "step": 1641 |
| }, |
| { |
| "epoch": 4.157160963244613, |
| "grad_norm": 0.6133365035057068, |
| "learning_rate": 3.3486005089058526e-05, |
| "loss": 0.1438, |
| "step": 1642 |
| }, |
| { |
| "epoch": 4.159695817490494, |
| "grad_norm": 0.690122127532959, |
| "learning_rate": 3.3384223918575065e-05, |
| "loss": 0.1592, |
| "step": 1643 |
| }, |
| { |
| "epoch": 4.162230671736375, |
| "grad_norm": 0.5469484925270081, |
| "learning_rate": 3.3282442748091605e-05, |
| "loss": 0.1499, |
| "step": 1644 |
| }, |
| { |
| "epoch": 4.164765525982256, |
| "grad_norm": 0.7380850911140442, |
| "learning_rate": 3.3180661577608144e-05, |
| "loss": 0.1724, |
| "step": 1645 |
| }, |
| { |
| "epoch": 4.167300380228137, |
| "grad_norm": 0.6949165463447571, |
| "learning_rate": 3.307888040712468e-05, |
| "loss": 0.1642, |
| "step": 1646 |
| }, |
| { |
| "epoch": 4.169835234474018, |
| "grad_norm": 0.6445840001106262, |
| "learning_rate": 3.297709923664122e-05, |
| "loss": 0.1576, |
| "step": 1647 |
| }, |
| { |
| "epoch": 4.172370088719899, |
| "grad_norm": 0.577178418636322, |
| "learning_rate": 3.287531806615776e-05, |
| "loss": 0.1482, |
| "step": 1648 |
| }, |
| { |
| "epoch": 4.17490494296578, |
| "grad_norm": 0.5232000350952148, |
| "learning_rate": 3.27735368956743e-05, |
| "loss": 0.1385, |
| "step": 1649 |
| }, |
| { |
| "epoch": 4.17743979721166, |
| "grad_norm": 0.8429796695709229, |
| "learning_rate": 3.267175572519084e-05, |
| "loss": 0.2456, |
| "step": 1650 |
| }, |
| { |
| "epoch": 4.179974651457541, |
| "grad_norm": 0.5647293925285339, |
| "learning_rate": 3.256997455470738e-05, |
| "loss": 0.1482, |
| "step": 1651 |
| }, |
| { |
| "epoch": 4.182509505703422, |
| "grad_norm": 0.7679947018623352, |
| "learning_rate": 3.246819338422392e-05, |
| "loss": 0.1705, |
| "step": 1652 |
| }, |
| { |
| "epoch": 4.185044359949303, |
| "grad_norm": 0.7913497686386108, |
| "learning_rate": 3.236641221374046e-05, |
| "loss": 0.2133, |
| "step": 1653 |
| }, |
| { |
| "epoch": 4.187579214195184, |
| "grad_norm": 0.5105036497116089, |
| "learning_rate": 3.2264631043257e-05, |
| "loss": 0.1335, |
| "step": 1654 |
| }, |
| { |
| "epoch": 4.190114068441065, |
| "grad_norm": 0.6503207087516785, |
| "learning_rate": 3.216284987277354e-05, |
| "loss": 0.1872, |
| "step": 1655 |
| }, |
| { |
| "epoch": 4.192648922686946, |
| "grad_norm": 0.9579104781150818, |
| "learning_rate": 3.2061068702290076e-05, |
| "loss": 0.1985, |
| "step": 1656 |
| }, |
| { |
| "epoch": 4.195183776932827, |
| "grad_norm": 0.5334345698356628, |
| "learning_rate": 3.195928753180662e-05, |
| "loss": 0.137, |
| "step": 1657 |
| }, |
| { |
| "epoch": 4.197718631178708, |
| "grad_norm": 0.7031605243682861, |
| "learning_rate": 3.185750636132316e-05, |
| "loss": 0.1574, |
| "step": 1658 |
| }, |
| { |
| "epoch": 4.200253485424588, |
| "grad_norm": 0.6237590909004211, |
| "learning_rate": 3.1755725190839694e-05, |
| "loss": 0.1686, |
| "step": 1659 |
| }, |
| { |
| "epoch": 4.202788339670469, |
| "grad_norm": 0.827680230140686, |
| "learning_rate": 3.1653944020356234e-05, |
| "loss": 0.1765, |
| "step": 1660 |
| }, |
| { |
| "epoch": 4.20532319391635, |
| "grad_norm": 0.6170578002929688, |
| "learning_rate": 3.155216284987277e-05, |
| "loss": 0.1699, |
| "step": 1661 |
| }, |
| { |
| "epoch": 4.2078580481622305, |
| "grad_norm": 0.600803017616272, |
| "learning_rate": 3.145038167938931e-05, |
| "loss": 0.1345, |
| "step": 1662 |
| }, |
| { |
| "epoch": 4.2103929024081115, |
| "grad_norm": 0.5505921840667725, |
| "learning_rate": 3.134860050890586e-05, |
| "loss": 0.1418, |
| "step": 1663 |
| }, |
| { |
| "epoch": 4.212927756653992, |
| "grad_norm": 0.5893916487693787, |
| "learning_rate": 3.12468193384224e-05, |
| "loss": 0.1414, |
| "step": 1664 |
| }, |
| { |
| "epoch": 4.215462610899873, |
| "grad_norm": 0.7622592449188232, |
| "learning_rate": 3.114503816793894e-05, |
| "loss": 0.1568, |
| "step": 1665 |
| }, |
| { |
| "epoch": 4.217997465145754, |
| "grad_norm": 0.6462287306785583, |
| "learning_rate": 3.104325699745547e-05, |
| "loss": 0.1641, |
| "step": 1666 |
| }, |
| { |
| "epoch": 4.220532319391635, |
| "grad_norm": 0.4971311092376709, |
| "learning_rate": 3.094147582697201e-05, |
| "loss": 0.1276, |
| "step": 1667 |
| }, |
| { |
| "epoch": 4.223067173637516, |
| "grad_norm": 0.7270475029945374, |
| "learning_rate": 3.083969465648855e-05, |
| "loss": 0.1603, |
| "step": 1668 |
| }, |
| { |
| "epoch": 4.225602027883396, |
| "grad_norm": 0.5765766501426697, |
| "learning_rate": 3.0737913486005094e-05, |
| "loss": 0.1341, |
| "step": 1669 |
| }, |
| { |
| "epoch": 4.228136882129277, |
| "grad_norm": 0.577694296836853, |
| "learning_rate": 3.0636132315521633e-05, |
| "loss": 0.1415, |
| "step": 1670 |
| }, |
| { |
| "epoch": 4.230671736375158, |
| "grad_norm": 0.6085098385810852, |
| "learning_rate": 3.053435114503817e-05, |
| "loss": 0.1359, |
| "step": 1671 |
| }, |
| { |
| "epoch": 4.233206590621039, |
| "grad_norm": 0.6224119663238525, |
| "learning_rate": 3.043256997455471e-05, |
| "loss": 0.1494, |
| "step": 1672 |
| }, |
| { |
| "epoch": 4.23574144486692, |
| "grad_norm": 0.4535973072052002, |
| "learning_rate": 3.0330788804071248e-05, |
| "loss": 0.1415, |
| "step": 1673 |
| }, |
| { |
| "epoch": 4.238276299112801, |
| "grad_norm": 0.6283777356147766, |
| "learning_rate": 3.0229007633587787e-05, |
| "loss": 0.1569, |
| "step": 1674 |
| }, |
| { |
| "epoch": 4.240811153358682, |
| "grad_norm": 0.6005566120147705, |
| "learning_rate": 3.0127226463104323e-05, |
| "loss": 0.1385, |
| "step": 1675 |
| }, |
| { |
| "epoch": 4.243346007604563, |
| "grad_norm": 0.6437854766845703, |
| "learning_rate": 3.002544529262087e-05, |
| "loss": 0.1584, |
| "step": 1676 |
| }, |
| { |
| "epoch": 4.245880861850444, |
| "grad_norm": 0.5184986591339111, |
| "learning_rate": 2.992366412213741e-05, |
| "loss": 0.1384, |
| "step": 1677 |
| }, |
| { |
| "epoch": 4.248415716096324, |
| "grad_norm": 0.5969160199165344, |
| "learning_rate": 2.9821882951653945e-05, |
| "loss": 0.1609, |
| "step": 1678 |
| }, |
| { |
| "epoch": 4.250950570342205, |
| "grad_norm": 0.85272616147995, |
| "learning_rate": 2.9720101781170484e-05, |
| "loss": 0.178, |
| "step": 1679 |
| }, |
| { |
| "epoch": 4.253485424588086, |
| "grad_norm": 0.5351912379264832, |
| "learning_rate": 2.9618320610687023e-05, |
| "loss": 0.1465, |
| "step": 1680 |
| }, |
| { |
| "epoch": 4.256020278833967, |
| "grad_norm": 0.5821883678436279, |
| "learning_rate": 2.9516539440203562e-05, |
| "loss": 0.135, |
| "step": 1681 |
| }, |
| { |
| "epoch": 4.258555133079848, |
| "grad_norm": 0.5453548431396484, |
| "learning_rate": 2.9414758269720105e-05, |
| "loss": 0.1287, |
| "step": 1682 |
| }, |
| { |
| "epoch": 4.261089987325729, |
| "grad_norm": 0.6280243396759033, |
| "learning_rate": 2.9312977099236644e-05, |
| "loss": 0.152, |
| "step": 1683 |
| }, |
| { |
| "epoch": 4.26362484157161, |
| "grad_norm": 0.5709437131881714, |
| "learning_rate": 2.9211195928753184e-05, |
| "loss": 0.1487, |
| "step": 1684 |
| }, |
| { |
| "epoch": 4.266159695817491, |
| "grad_norm": 0.4667048752307892, |
| "learning_rate": 2.910941475826972e-05, |
| "loss": 0.129, |
| "step": 1685 |
| }, |
| { |
| "epoch": 4.268694550063372, |
| "grad_norm": 0.5744767189025879, |
| "learning_rate": 2.900763358778626e-05, |
| "loss": 0.1668, |
| "step": 1686 |
| }, |
| { |
| "epoch": 4.271229404309253, |
| "grad_norm": 0.552631139755249, |
| "learning_rate": 2.89058524173028e-05, |
| "loss": 0.128, |
| "step": 1687 |
| }, |
| { |
| "epoch": 4.273764258555133, |
| "grad_norm": 0.46616679430007935, |
| "learning_rate": 2.880407124681934e-05, |
| "loss": 0.1168, |
| "step": 1688 |
| }, |
| { |
| "epoch": 4.276299112801014, |
| "grad_norm": 0.7842658758163452, |
| "learning_rate": 2.870229007633588e-05, |
| "loss": 0.1617, |
| "step": 1689 |
| }, |
| { |
| "epoch": 4.278833967046895, |
| "grad_norm": 0.5530945062637329, |
| "learning_rate": 2.860050890585242e-05, |
| "loss": 0.1619, |
| "step": 1690 |
| }, |
| { |
| "epoch": 4.281368821292776, |
| "grad_norm": 0.9341786503791809, |
| "learning_rate": 2.849872773536896e-05, |
| "loss": 0.231, |
| "step": 1691 |
| }, |
| { |
| "epoch": 4.283903675538657, |
| "grad_norm": 0.8043704032897949, |
| "learning_rate": 2.8396946564885495e-05, |
| "loss": 0.1826, |
| "step": 1692 |
| }, |
| { |
| "epoch": 4.2864385297845375, |
| "grad_norm": 0.4446638524532318, |
| "learning_rate": 2.8295165394402034e-05, |
| "loss": 0.1413, |
| "step": 1693 |
| }, |
| { |
| "epoch": 4.2889733840304185, |
| "grad_norm": 0.6845833659172058, |
| "learning_rate": 2.8193384223918577e-05, |
| "loss": 0.1577, |
| "step": 1694 |
| }, |
| { |
| "epoch": 4.2915082382762995, |
| "grad_norm": 0.6702572107315063, |
| "learning_rate": 2.8091603053435116e-05, |
| "loss": 0.1714, |
| "step": 1695 |
| }, |
| { |
| "epoch": 4.29404309252218, |
| "grad_norm": 0.6405001282691956, |
| "learning_rate": 2.7989821882951656e-05, |
| "loss": 0.1527, |
| "step": 1696 |
| }, |
| { |
| "epoch": 4.2965779467680605, |
| "grad_norm": 0.6155828833580017, |
| "learning_rate": 2.7888040712468195e-05, |
| "loss": 0.1471, |
| "step": 1697 |
| }, |
| { |
| "epoch": 4.299112801013941, |
| "grad_norm": 0.5606924295425415, |
| "learning_rate": 2.7786259541984734e-05, |
| "loss": 0.1331, |
| "step": 1698 |
| }, |
| { |
| "epoch": 4.301647655259822, |
| "grad_norm": 0.7498462200164795, |
| "learning_rate": 2.768447837150127e-05, |
| "loss": 0.1713, |
| "step": 1699 |
| }, |
| { |
| "epoch": 4.304182509505703, |
| "grad_norm": 0.6262723803520203, |
| "learning_rate": 2.7582697201017816e-05, |
| "loss": 0.1585, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.306717363751584, |
| "grad_norm": 0.6729116439819336, |
| "learning_rate": 2.7480916030534355e-05, |
| "loss": 0.1347, |
| "step": 1701 |
| }, |
| { |
| "epoch": 4.309252217997465, |
| "grad_norm": 0.7870539426803589, |
| "learning_rate": 2.737913486005089e-05, |
| "loss": 0.1512, |
| "step": 1702 |
| }, |
| { |
| "epoch": 4.311787072243346, |
| "grad_norm": 0.4943903684616089, |
| "learning_rate": 2.727735368956743e-05, |
| "loss": 0.1274, |
| "step": 1703 |
| }, |
| { |
| "epoch": 4.314321926489227, |
| "grad_norm": 0.4763108193874359, |
| "learning_rate": 2.717557251908397e-05, |
| "loss": 0.1228, |
| "step": 1704 |
| }, |
| { |
| "epoch": 4.316856780735108, |
| "grad_norm": 0.6400578618049622, |
| "learning_rate": 2.707379134860051e-05, |
| "loss": 0.1558, |
| "step": 1705 |
| }, |
| { |
| "epoch": 4.319391634980988, |
| "grad_norm": 0.5445212125778198, |
| "learning_rate": 2.6972010178117052e-05, |
| "loss": 0.1328, |
| "step": 1706 |
| }, |
| { |
| "epoch": 4.321926489226869, |
| "grad_norm": 0.6329374313354492, |
| "learning_rate": 2.687022900763359e-05, |
| "loss": 0.1615, |
| "step": 1707 |
| }, |
| { |
| "epoch": 4.32446134347275, |
| "grad_norm": 0.5299343466758728, |
| "learning_rate": 2.676844783715013e-05, |
| "loss": 0.122, |
| "step": 1708 |
| }, |
| { |
| "epoch": 4.326996197718631, |
| "grad_norm": 0.6486507058143616, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.1553, |
| "step": 1709 |
| }, |
| { |
| "epoch": 4.329531051964512, |
| "grad_norm": 0.6306889653205872, |
| "learning_rate": 2.6564885496183206e-05, |
| "loss": 0.1638, |
| "step": 1710 |
| }, |
| { |
| "epoch": 4.332065906210393, |
| "grad_norm": 0.6417018175125122, |
| "learning_rate": 2.6463104325699745e-05, |
| "loss": 0.1404, |
| "step": 1711 |
| }, |
| { |
| "epoch": 4.334600760456274, |
| "grad_norm": 0.7283552289009094, |
| "learning_rate": 2.6361323155216288e-05, |
| "loss": 0.1837, |
| "step": 1712 |
| }, |
| { |
| "epoch": 4.337135614702155, |
| "grad_norm": 0.7142099142074585, |
| "learning_rate": 2.6259541984732827e-05, |
| "loss": 0.1535, |
| "step": 1713 |
| }, |
| { |
| "epoch": 4.339670468948036, |
| "grad_norm": 0.6059632897377014, |
| "learning_rate": 2.6157760814249367e-05, |
| "loss": 0.1551, |
| "step": 1714 |
| }, |
| { |
| "epoch": 4.342205323193916, |
| "grad_norm": 0.6492133140563965, |
| "learning_rate": 2.6055979643765906e-05, |
| "loss": 0.1413, |
| "step": 1715 |
| }, |
| { |
| "epoch": 4.344740177439797, |
| "grad_norm": 0.7166099548339844, |
| "learning_rate": 2.5954198473282442e-05, |
| "loss": 0.1534, |
| "step": 1716 |
| }, |
| { |
| "epoch": 4.347275031685678, |
| "grad_norm": 0.6357300877571106, |
| "learning_rate": 2.585241730279898e-05, |
| "loss": 0.1445, |
| "step": 1717 |
| }, |
| { |
| "epoch": 4.349809885931559, |
| "grad_norm": 0.6684461236000061, |
| "learning_rate": 2.5750636132315524e-05, |
| "loss": 0.1469, |
| "step": 1718 |
| }, |
| { |
| "epoch": 4.35234474017744, |
| "grad_norm": 0.7808713912963867, |
| "learning_rate": 2.5648854961832063e-05, |
| "loss": 0.1892, |
| "step": 1719 |
| }, |
| { |
| "epoch": 4.354879594423321, |
| "grad_norm": 0.6660336852073669, |
| "learning_rate": 2.5547073791348602e-05, |
| "loss": 0.1545, |
| "step": 1720 |
| }, |
| { |
| "epoch": 4.357414448669202, |
| "grad_norm": 0.7266603112220764, |
| "learning_rate": 2.5445292620865142e-05, |
| "loss": 0.1346, |
| "step": 1721 |
| }, |
| { |
| "epoch": 4.359949302915083, |
| "grad_norm": 0.5710493326187134, |
| "learning_rate": 2.534351145038168e-05, |
| "loss": 0.1199, |
| "step": 1722 |
| }, |
| { |
| "epoch": 4.362484157160964, |
| "grad_norm": 0.6178765296936035, |
| "learning_rate": 2.5241730279898217e-05, |
| "loss": 0.1416, |
| "step": 1723 |
| }, |
| { |
| "epoch": 4.365019011406844, |
| "grad_norm": 0.5881832242012024, |
| "learning_rate": 2.5139949109414763e-05, |
| "loss": 0.1389, |
| "step": 1724 |
| }, |
| { |
| "epoch": 4.367553865652725, |
| "grad_norm": 0.5589767694473267, |
| "learning_rate": 2.50381679389313e-05, |
| "loss": 0.1356, |
| "step": 1725 |
| }, |
| { |
| "epoch": 4.370088719898606, |
| "grad_norm": 0.611072301864624, |
| "learning_rate": 2.4936386768447838e-05, |
| "loss": 0.1618, |
| "step": 1726 |
| }, |
| { |
| "epoch": 4.3726235741444865, |
| "grad_norm": 1.0045723915100098, |
| "learning_rate": 2.4834605597964378e-05, |
| "loss": 0.2004, |
| "step": 1727 |
| }, |
| { |
| "epoch": 4.3751584283903675, |
| "grad_norm": 1.0154621601104736, |
| "learning_rate": 2.4732824427480917e-05, |
| "loss": 0.1593, |
| "step": 1728 |
| }, |
| { |
| "epoch": 4.3776932826362485, |
| "grad_norm": 0.7933842539787292, |
| "learning_rate": 2.4631043256997456e-05, |
| "loss": 0.183, |
| "step": 1729 |
| }, |
| { |
| "epoch": 4.380228136882129, |
| "grad_norm": 0.8141732811927795, |
| "learning_rate": 2.4529262086513996e-05, |
| "loss": 0.1412, |
| "step": 1730 |
| }, |
| { |
| "epoch": 4.38276299112801, |
| "grad_norm": 0.6575155854225159, |
| "learning_rate": 2.4427480916030535e-05, |
| "loss": 0.1592, |
| "step": 1731 |
| }, |
| { |
| "epoch": 4.385297845373891, |
| "grad_norm": 0.7710108757019043, |
| "learning_rate": 2.4325699745547078e-05, |
| "loss": 0.2306, |
| "step": 1732 |
| }, |
| { |
| "epoch": 4.387832699619771, |
| "grad_norm": 0.6438276767730713, |
| "learning_rate": 2.4223918575063613e-05, |
| "loss": 0.143, |
| "step": 1733 |
| }, |
| { |
| "epoch": 4.390367553865652, |
| "grad_norm": 0.7019467949867249, |
| "learning_rate": 2.4122137404580153e-05, |
| "loss": 0.1641, |
| "step": 1734 |
| }, |
| { |
| "epoch": 4.392902408111533, |
| "grad_norm": 0.598584771156311, |
| "learning_rate": 2.4020356234096695e-05, |
| "loss": 0.1456, |
| "step": 1735 |
| }, |
| { |
| "epoch": 4.395437262357414, |
| "grad_norm": 0.6024305820465088, |
| "learning_rate": 2.391857506361323e-05, |
| "loss": 0.1287, |
| "step": 1736 |
| }, |
| { |
| "epoch": 4.397972116603295, |
| "grad_norm": 0.8446558713912964, |
| "learning_rate": 2.381679389312977e-05, |
| "loss": 0.1705, |
| "step": 1737 |
| }, |
| { |
| "epoch": 4.400506970849176, |
| "grad_norm": 0.5697831511497498, |
| "learning_rate": 2.3715012722646313e-05, |
| "loss": 0.1386, |
| "step": 1738 |
| }, |
| { |
| "epoch": 4.403041825095057, |
| "grad_norm": 0.6655327677726746, |
| "learning_rate": 2.3613231552162853e-05, |
| "loss": 0.186, |
| "step": 1739 |
| }, |
| { |
| "epoch": 4.405576679340938, |
| "grad_norm": 1.1001065969467163, |
| "learning_rate": 2.351145038167939e-05, |
| "loss": 0.2531, |
| "step": 1740 |
| }, |
| { |
| "epoch": 4.408111533586819, |
| "grad_norm": 0.5302372574806213, |
| "learning_rate": 2.340966921119593e-05, |
| "loss": 0.1342, |
| "step": 1741 |
| }, |
| { |
| "epoch": 4.4106463878327, |
| "grad_norm": 0.6450605392456055, |
| "learning_rate": 2.330788804071247e-05, |
| "loss": 0.1499, |
| "step": 1742 |
| }, |
| { |
| "epoch": 4.41318124207858, |
| "grad_norm": 0.5733135342597961, |
| "learning_rate": 2.3206106870229007e-05, |
| "loss": 0.166, |
| "step": 1743 |
| }, |
| { |
| "epoch": 4.415716096324461, |
| "grad_norm": 0.609865665435791, |
| "learning_rate": 2.310432569974555e-05, |
| "loss": 0.1306, |
| "step": 1744 |
| }, |
| { |
| "epoch": 4.418250950570342, |
| "grad_norm": 0.5957082509994507, |
| "learning_rate": 2.300254452926209e-05, |
| "loss": 0.1309, |
| "step": 1745 |
| }, |
| { |
| "epoch": 4.420785804816223, |
| "grad_norm": 0.5951780080795288, |
| "learning_rate": 2.2900763358778628e-05, |
| "loss": 0.1366, |
| "step": 1746 |
| }, |
| { |
| "epoch": 4.423320659062104, |
| "grad_norm": 0.7225191593170166, |
| "learning_rate": 2.2798982188295167e-05, |
| "loss": 0.1825, |
| "step": 1747 |
| }, |
| { |
| "epoch": 4.425855513307985, |
| "grad_norm": 0.6427996158599854, |
| "learning_rate": 2.2697201017811707e-05, |
| "loss": 0.1326, |
| "step": 1748 |
| }, |
| { |
| "epoch": 4.428390367553866, |
| "grad_norm": 0.49267786741256714, |
| "learning_rate": 2.2595419847328246e-05, |
| "loss": 0.1367, |
| "step": 1749 |
| }, |
| { |
| "epoch": 4.430925221799747, |
| "grad_norm": 0.5365452766418457, |
| "learning_rate": 2.2493638676844785e-05, |
| "loss": 0.1456, |
| "step": 1750 |
| }, |
| { |
| "epoch": 4.433460076045628, |
| "grad_norm": 0.65265291929245, |
| "learning_rate": 2.2391857506361324e-05, |
| "loss": 0.1379, |
| "step": 1751 |
| }, |
| { |
| "epoch": 4.435994930291509, |
| "grad_norm": 0.5401502847671509, |
| "learning_rate": 2.2290076335877864e-05, |
| "loss": 0.1293, |
| "step": 1752 |
| }, |
| { |
| "epoch": 4.438529784537389, |
| "grad_norm": 0.6832171678543091, |
| "learning_rate": 2.2188295165394403e-05, |
| "loss": 0.1448, |
| "step": 1753 |
| }, |
| { |
| "epoch": 4.44106463878327, |
| "grad_norm": 0.8080681562423706, |
| "learning_rate": 2.2086513994910942e-05, |
| "loss": 0.1832, |
| "step": 1754 |
| }, |
| { |
| "epoch": 4.443599493029151, |
| "grad_norm": 0.6201688051223755, |
| "learning_rate": 2.198473282442748e-05, |
| "loss": 0.159, |
| "step": 1755 |
| }, |
| { |
| "epoch": 4.446134347275032, |
| "grad_norm": 0.8549275994300842, |
| "learning_rate": 2.1882951653944024e-05, |
| "loss": 0.2103, |
| "step": 1756 |
| }, |
| { |
| "epoch": 4.448669201520913, |
| "grad_norm": 0.5879942178726196, |
| "learning_rate": 2.178117048346056e-05, |
| "loss": 0.1524, |
| "step": 1757 |
| }, |
| { |
| "epoch": 4.451204055766794, |
| "grad_norm": 0.6592312455177307, |
| "learning_rate": 2.16793893129771e-05, |
| "loss": 0.1535, |
| "step": 1758 |
| }, |
| { |
| "epoch": 4.4537389100126745, |
| "grad_norm": 0.6493979096412659, |
| "learning_rate": 2.1577608142493642e-05, |
| "loss": 0.1451, |
| "step": 1759 |
| }, |
| { |
| "epoch": 4.4562737642585555, |
| "grad_norm": 0.7973134517669678, |
| "learning_rate": 2.1475826972010178e-05, |
| "loss": 0.1519, |
| "step": 1760 |
| }, |
| { |
| "epoch": 4.458808618504436, |
| "grad_norm": 0.7703438401222229, |
| "learning_rate": 2.1374045801526718e-05, |
| "loss": 0.1653, |
| "step": 1761 |
| }, |
| { |
| "epoch": 4.4613434727503165, |
| "grad_norm": 1.0013222694396973, |
| "learning_rate": 2.127226463104326e-05, |
| "loss": 0.2064, |
| "step": 1762 |
| }, |
| { |
| "epoch": 4.4638783269961975, |
| "grad_norm": 0.7007017135620117, |
| "learning_rate": 2.11704834605598e-05, |
| "loss": 0.1401, |
| "step": 1763 |
| }, |
| { |
| "epoch": 4.466413181242078, |
| "grad_norm": 0.5366234183311462, |
| "learning_rate": 2.1068702290076335e-05, |
| "loss": 0.1389, |
| "step": 1764 |
| }, |
| { |
| "epoch": 4.468948035487959, |
| "grad_norm": 0.7167120575904846, |
| "learning_rate": 2.0966921119592875e-05, |
| "loss": 0.1817, |
| "step": 1765 |
| }, |
| { |
| "epoch": 4.47148288973384, |
| "grad_norm": 0.7901313900947571, |
| "learning_rate": 2.0865139949109417e-05, |
| "loss": 0.1817, |
| "step": 1766 |
| }, |
| { |
| "epoch": 4.474017743979721, |
| "grad_norm": 0.6681633591651917, |
| "learning_rate": 2.0763358778625953e-05, |
| "loss": 0.1458, |
| "step": 1767 |
| }, |
| { |
| "epoch": 4.476552598225602, |
| "grad_norm": 0.5067597031593323, |
| "learning_rate": 2.0661577608142493e-05, |
| "loss": 0.1301, |
| "step": 1768 |
| }, |
| { |
| "epoch": 4.479087452471483, |
| "grad_norm": 0.6582893133163452, |
| "learning_rate": 2.0559796437659035e-05, |
| "loss": 0.1576, |
| "step": 1769 |
| }, |
| { |
| "epoch": 4.481622306717364, |
| "grad_norm": 0.6628451943397522, |
| "learning_rate": 2.0458015267175575e-05, |
| "loss": 0.168, |
| "step": 1770 |
| }, |
| { |
| "epoch": 4.484157160963244, |
| "grad_norm": 0.5435721278190613, |
| "learning_rate": 2.035623409669211e-05, |
| "loss": 0.1476, |
| "step": 1771 |
| }, |
| { |
| "epoch": 4.486692015209125, |
| "grad_norm": 0.6182110905647278, |
| "learning_rate": 2.0254452926208653e-05, |
| "loss": 0.1441, |
| "step": 1772 |
| }, |
| { |
| "epoch": 4.489226869455006, |
| "grad_norm": 0.9246516823768616, |
| "learning_rate": 2.0152671755725193e-05, |
| "loss": 0.1747, |
| "step": 1773 |
| }, |
| { |
| "epoch": 4.491761723700887, |
| "grad_norm": 0.5967719554901123, |
| "learning_rate": 2.005089058524173e-05, |
| "loss": 0.1461, |
| "step": 1774 |
| }, |
| { |
| "epoch": 4.494296577946768, |
| "grad_norm": 0.5998682379722595, |
| "learning_rate": 1.994910941475827e-05, |
| "loss": 0.1276, |
| "step": 1775 |
| }, |
| { |
| "epoch": 4.496831432192649, |
| "grad_norm": 0.6168457865715027, |
| "learning_rate": 1.984732824427481e-05, |
| "loss": 0.1407, |
| "step": 1776 |
| }, |
| { |
| "epoch": 4.49936628643853, |
| "grad_norm": 0.6580602526664734, |
| "learning_rate": 1.974554707379135e-05, |
| "loss": 0.149, |
| "step": 1777 |
| }, |
| { |
| "epoch": 4.501901140684411, |
| "grad_norm": 0.5117031335830688, |
| "learning_rate": 1.964376590330789e-05, |
| "loss": 0.1397, |
| "step": 1778 |
| }, |
| { |
| "epoch": 4.504435994930292, |
| "grad_norm": 0.4603317975997925, |
| "learning_rate": 1.954198473282443e-05, |
| "loss": 0.1211, |
| "step": 1779 |
| }, |
| { |
| "epoch": 4.506970849176172, |
| "grad_norm": 0.5981631278991699, |
| "learning_rate": 1.9440203562340968e-05, |
| "loss": 0.1371, |
| "step": 1780 |
| }, |
| { |
| "epoch": 4.509505703422053, |
| "grad_norm": 0.6693590879440308, |
| "learning_rate": 1.9338422391857507e-05, |
| "loss": 0.1495, |
| "step": 1781 |
| }, |
| { |
| "epoch": 4.512040557667934, |
| "grad_norm": 0.5286784172058105, |
| "learning_rate": 1.9236641221374046e-05, |
| "loss": 0.1304, |
| "step": 1782 |
| }, |
| { |
| "epoch": 4.514575411913815, |
| "grad_norm": 0.7040352821350098, |
| "learning_rate": 1.9134860050890586e-05, |
| "loss": 0.1584, |
| "step": 1783 |
| }, |
| { |
| "epoch": 4.517110266159696, |
| "grad_norm": 0.6396339535713196, |
| "learning_rate": 1.9033078880407125e-05, |
| "loss": 0.1529, |
| "step": 1784 |
| }, |
| { |
| "epoch": 4.519645120405577, |
| "grad_norm": 0.6708245873451233, |
| "learning_rate": 1.8931297709923664e-05, |
| "loss": 0.1477, |
| "step": 1785 |
| }, |
| { |
| "epoch": 4.522179974651458, |
| "grad_norm": 0.6562108993530273, |
| "learning_rate": 1.8829516539440204e-05, |
| "loss": 0.1499, |
| "step": 1786 |
| }, |
| { |
| "epoch": 4.524714828897339, |
| "grad_norm": 0.5181876420974731, |
| "learning_rate": 1.8727735368956746e-05, |
| "loss": 0.1398, |
| "step": 1787 |
| }, |
| { |
| "epoch": 4.52724968314322, |
| "grad_norm": 0.5952017307281494, |
| "learning_rate": 1.8625954198473282e-05, |
| "loss": 0.1438, |
| "step": 1788 |
| }, |
| { |
| "epoch": 4.5297845373891, |
| "grad_norm": 0.6668636202812195, |
| "learning_rate": 1.852417302798982e-05, |
| "loss": 0.1805, |
| "step": 1789 |
| }, |
| { |
| "epoch": 4.532319391634981, |
| "grad_norm": 0.5433321595191956, |
| "learning_rate": 1.8422391857506364e-05, |
| "loss": 0.1397, |
| "step": 1790 |
| }, |
| { |
| "epoch": 4.534854245880862, |
| "grad_norm": 0.5353025197982788, |
| "learning_rate": 1.83206106870229e-05, |
| "loss": 0.1419, |
| "step": 1791 |
| }, |
| { |
| "epoch": 4.537389100126743, |
| "grad_norm": 0.6123271584510803, |
| "learning_rate": 1.821882951653944e-05, |
| "loss": 0.1493, |
| "step": 1792 |
| }, |
| { |
| "epoch": 4.5399239543726235, |
| "grad_norm": 0.6581493616104126, |
| "learning_rate": 1.8117048346055982e-05, |
| "loss": 0.1467, |
| "step": 1793 |
| }, |
| { |
| "epoch": 4.5424588086185045, |
| "grad_norm": 0.5537798404693604, |
| "learning_rate": 1.801526717557252e-05, |
| "loss": 0.1467, |
| "step": 1794 |
| }, |
| { |
| "epoch": 4.544993662864385, |
| "grad_norm": 0.7163582444190979, |
| "learning_rate": 1.7913486005089058e-05, |
| "loss": 0.1736, |
| "step": 1795 |
| }, |
| { |
| "epoch": 4.547528517110266, |
| "grad_norm": 0.694922149181366, |
| "learning_rate": 1.78117048346056e-05, |
| "loss": 0.1516, |
| "step": 1796 |
| }, |
| { |
| "epoch": 4.550063371356147, |
| "grad_norm": 0.7119778394699097, |
| "learning_rate": 1.770992366412214e-05, |
| "loss": 0.1899, |
| "step": 1797 |
| }, |
| { |
| "epoch": 4.552598225602027, |
| "grad_norm": 0.7570186853408813, |
| "learning_rate": 1.7608142493638675e-05, |
| "loss": 0.1951, |
| "step": 1798 |
| }, |
| { |
| "epoch": 4.555133079847908, |
| "grad_norm": 0.6789132356643677, |
| "learning_rate": 1.7506361323155218e-05, |
| "loss": 0.1475, |
| "step": 1799 |
| }, |
| { |
| "epoch": 4.557667934093789, |
| "grad_norm": 0.5750378966331482, |
| "learning_rate": 1.7404580152671757e-05, |
| "loss": 0.1431, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.56020278833967, |
| "grad_norm": 0.6066502332687378, |
| "learning_rate": 1.7302798982188297e-05, |
| "loss": 0.16, |
| "step": 1801 |
| }, |
| { |
| "epoch": 4.562737642585551, |
| "grad_norm": 0.5730226039886475, |
| "learning_rate": 1.7201017811704836e-05, |
| "loss": 0.1455, |
| "step": 1802 |
| }, |
| { |
| "epoch": 4.565272496831432, |
| "grad_norm": 0.5752687454223633, |
| "learning_rate": 1.7099236641221375e-05, |
| "loss": 0.1281, |
| "step": 1803 |
| }, |
| { |
| "epoch": 4.567807351077313, |
| "grad_norm": 0.5497205853462219, |
| "learning_rate": 1.6997455470737915e-05, |
| "loss": 0.1431, |
| "step": 1804 |
| }, |
| { |
| "epoch": 4.570342205323194, |
| "grad_norm": 0.7738269567489624, |
| "learning_rate": 1.6895674300254454e-05, |
| "loss": 0.1523, |
| "step": 1805 |
| }, |
| { |
| "epoch": 4.572877059569075, |
| "grad_norm": 0.5750918388366699, |
| "learning_rate": 1.6793893129770993e-05, |
| "loss": 0.1466, |
| "step": 1806 |
| }, |
| { |
| "epoch": 4.575411913814955, |
| "grad_norm": 0.5575040578842163, |
| "learning_rate": 1.6692111959287533e-05, |
| "loss": 0.1267, |
| "step": 1807 |
| }, |
| { |
| "epoch": 4.577946768060836, |
| "grad_norm": 0.509616494178772, |
| "learning_rate": 1.6590330788804072e-05, |
| "loss": 0.1434, |
| "step": 1808 |
| }, |
| { |
| "epoch": 4.580481622306717, |
| "grad_norm": 0.643009603023529, |
| "learning_rate": 1.648854961832061e-05, |
| "loss": 0.136, |
| "step": 1809 |
| }, |
| { |
| "epoch": 4.583016476552598, |
| "grad_norm": 0.5133553743362427, |
| "learning_rate": 1.638676844783715e-05, |
| "loss": 0.1223, |
| "step": 1810 |
| }, |
| { |
| "epoch": 4.585551330798479, |
| "grad_norm": 0.7505659461021423, |
| "learning_rate": 1.628498727735369e-05, |
| "loss": 0.1607, |
| "step": 1811 |
| }, |
| { |
| "epoch": 4.58808618504436, |
| "grad_norm": 0.6981300711631775, |
| "learning_rate": 1.618320610687023e-05, |
| "loss": 0.1525, |
| "step": 1812 |
| }, |
| { |
| "epoch": 4.590621039290241, |
| "grad_norm": 0.4981435537338257, |
| "learning_rate": 1.608142493638677e-05, |
| "loss": 0.1236, |
| "step": 1813 |
| }, |
| { |
| "epoch": 4.593155893536122, |
| "grad_norm": 0.6467440724372864, |
| "learning_rate": 1.597964376590331e-05, |
| "loss": 0.153, |
| "step": 1814 |
| }, |
| { |
| "epoch": 4.595690747782003, |
| "grad_norm": 0.6843181848526001, |
| "learning_rate": 1.5877862595419847e-05, |
| "loss": 0.1604, |
| "step": 1815 |
| }, |
| { |
| "epoch": 4.598225602027884, |
| "grad_norm": 0.49898776412010193, |
| "learning_rate": 1.5776081424936386e-05, |
| "loss": 0.1165, |
| "step": 1816 |
| }, |
| { |
| "epoch": 4.600760456273765, |
| "grad_norm": 0.6252351403236389, |
| "learning_rate": 1.567430025445293e-05, |
| "loss": 0.1228, |
| "step": 1817 |
| }, |
| { |
| "epoch": 4.603295310519645, |
| "grad_norm": 0.5452350974082947, |
| "learning_rate": 1.557251908396947e-05, |
| "loss": 0.1245, |
| "step": 1818 |
| }, |
| { |
| "epoch": 4.605830164765526, |
| "grad_norm": 0.6847854852676392, |
| "learning_rate": 1.5470737913486004e-05, |
| "loss": 0.1462, |
| "step": 1819 |
| }, |
| { |
| "epoch": 4.608365019011407, |
| "grad_norm": 0.49941131472587585, |
| "learning_rate": 1.5368956743002547e-05, |
| "loss": 0.1268, |
| "step": 1820 |
| }, |
| { |
| "epoch": 4.610899873257288, |
| "grad_norm": 0.581243097782135, |
| "learning_rate": 1.5267175572519086e-05, |
| "loss": 0.1296, |
| "step": 1821 |
| }, |
| { |
| "epoch": 4.613434727503169, |
| "grad_norm": 0.8345553874969482, |
| "learning_rate": 1.5165394402035624e-05, |
| "loss": 0.1307, |
| "step": 1822 |
| }, |
| { |
| "epoch": 4.61596958174905, |
| "grad_norm": 0.6534408926963806, |
| "learning_rate": 1.5063613231552162e-05, |
| "loss": 0.1446, |
| "step": 1823 |
| }, |
| { |
| "epoch": 4.6185044359949305, |
| "grad_norm": 0.7743064165115356, |
| "learning_rate": 1.4961832061068704e-05, |
| "loss": 0.2027, |
| "step": 1824 |
| }, |
| { |
| "epoch": 4.6210392902408115, |
| "grad_norm": 0.6709569096565247, |
| "learning_rate": 1.4860050890585242e-05, |
| "loss": 0.1427, |
| "step": 1825 |
| }, |
| { |
| "epoch": 4.6235741444866925, |
| "grad_norm": 0.6598264575004578, |
| "learning_rate": 1.4758269720101781e-05, |
| "loss": 0.1399, |
| "step": 1826 |
| }, |
| { |
| "epoch": 4.6261089987325725, |
| "grad_norm": 0.49041053652763367, |
| "learning_rate": 1.4656488549618322e-05, |
| "loss": 0.133, |
| "step": 1827 |
| }, |
| { |
| "epoch": 4.6286438529784535, |
| "grad_norm": 0.6697686910629272, |
| "learning_rate": 1.455470737913486e-05, |
| "loss": 0.1735, |
| "step": 1828 |
| }, |
| { |
| "epoch": 4.6311787072243344, |
| "grad_norm": 0.5481597781181335, |
| "learning_rate": 1.44529262086514e-05, |
| "loss": 0.1244, |
| "step": 1829 |
| }, |
| { |
| "epoch": 4.633713561470215, |
| "grad_norm": 0.6251161694526672, |
| "learning_rate": 1.435114503816794e-05, |
| "loss": 0.1436, |
| "step": 1830 |
| }, |
| { |
| "epoch": 4.636248415716096, |
| "grad_norm": 0.7515272498130798, |
| "learning_rate": 1.424936386768448e-05, |
| "loss": 0.1493, |
| "step": 1831 |
| }, |
| { |
| "epoch": 4.638783269961977, |
| "grad_norm": 0.8478451371192932, |
| "learning_rate": 1.4147582697201017e-05, |
| "loss": 0.1519, |
| "step": 1832 |
| }, |
| { |
| "epoch": 4.641318124207858, |
| "grad_norm": 0.5417062640190125, |
| "learning_rate": 1.4045801526717558e-05, |
| "loss": 0.1318, |
| "step": 1833 |
| }, |
| { |
| "epoch": 4.643852978453739, |
| "grad_norm": 0.6493893265724182, |
| "learning_rate": 1.3944020356234097e-05, |
| "loss": 0.1546, |
| "step": 1834 |
| }, |
| { |
| "epoch": 4.64638783269962, |
| "grad_norm": 0.8475616574287415, |
| "learning_rate": 1.3842239185750635e-05, |
| "loss": 0.172, |
| "step": 1835 |
| }, |
| { |
| "epoch": 4.6489226869455, |
| "grad_norm": 0.5484082698822021, |
| "learning_rate": 1.3740458015267178e-05, |
| "loss": 0.1203, |
| "step": 1836 |
| }, |
| { |
| "epoch": 4.651457541191381, |
| "grad_norm": 0.6533843874931335, |
| "learning_rate": 1.3638676844783715e-05, |
| "loss": 0.1501, |
| "step": 1837 |
| }, |
| { |
| "epoch": 4.653992395437262, |
| "grad_norm": 0.7521854043006897, |
| "learning_rate": 1.3536895674300255e-05, |
| "loss": 0.1955, |
| "step": 1838 |
| }, |
| { |
| "epoch": 4.656527249683143, |
| "grad_norm": 0.6500900983810425, |
| "learning_rate": 1.3435114503816796e-05, |
| "loss": 0.14, |
| "step": 1839 |
| }, |
| { |
| "epoch": 4.659062103929024, |
| "grad_norm": 0.7133599519729614, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.1707, |
| "step": 1840 |
| }, |
| { |
| "epoch": 4.661596958174905, |
| "grad_norm": 0.7065775394439697, |
| "learning_rate": 1.3231552162849873e-05, |
| "loss": 0.144, |
| "step": 1841 |
| }, |
| { |
| "epoch": 4.664131812420786, |
| "grad_norm": 0.7716514468193054, |
| "learning_rate": 1.3129770992366414e-05, |
| "loss": 0.1792, |
| "step": 1842 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.9312828779220581, |
| "learning_rate": 1.3027989821882953e-05, |
| "loss": 0.2139, |
| "step": 1843 |
| }, |
| { |
| "epoch": 4.669201520912548, |
| "grad_norm": 0.5163487792015076, |
| "learning_rate": 1.292620865139949e-05, |
| "loss": 0.139, |
| "step": 1844 |
| }, |
| { |
| "epoch": 4.671736375158428, |
| "grad_norm": 0.7424818277359009, |
| "learning_rate": 1.2824427480916032e-05, |
| "loss": 0.1533, |
| "step": 1845 |
| }, |
| { |
| "epoch": 4.674271229404309, |
| "grad_norm": 0.5935065150260925, |
| "learning_rate": 1.2722646310432571e-05, |
| "loss": 0.1319, |
| "step": 1846 |
| }, |
| { |
| "epoch": 4.67680608365019, |
| "grad_norm": 0.7372322678565979, |
| "learning_rate": 1.2620865139949108e-05, |
| "loss": 0.1832, |
| "step": 1847 |
| }, |
| { |
| "epoch": 4.679340937896071, |
| "grad_norm": 0.5936238765716553, |
| "learning_rate": 1.251908396946565e-05, |
| "loss": 0.1357, |
| "step": 1848 |
| }, |
| { |
| "epoch": 4.681875792141952, |
| "grad_norm": 0.6689032316207886, |
| "learning_rate": 1.2417302798982189e-05, |
| "loss": 0.1709, |
| "step": 1849 |
| }, |
| { |
| "epoch": 4.684410646387833, |
| "grad_norm": 0.6519850492477417, |
| "learning_rate": 1.2315521628498728e-05, |
| "loss": 0.1438, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.686945500633714, |
| "grad_norm": 0.5853939056396484, |
| "learning_rate": 1.2213740458015267e-05, |
| "loss": 0.134, |
| "step": 1851 |
| }, |
| { |
| "epoch": 4.689480354879595, |
| "grad_norm": 0.5059859752655029, |
| "learning_rate": 1.2111959287531807e-05, |
| "loss": 0.1088, |
| "step": 1852 |
| }, |
| { |
| "epoch": 4.692015209125476, |
| "grad_norm": 0.6989784240722656, |
| "learning_rate": 1.2010178117048348e-05, |
| "loss": 0.1527, |
| "step": 1853 |
| }, |
| { |
| "epoch": 4.694550063371356, |
| "grad_norm": 0.5851006507873535, |
| "learning_rate": 1.1908396946564885e-05, |
| "loss": 0.143, |
| "step": 1854 |
| }, |
| { |
| "epoch": 4.697084917617237, |
| "grad_norm": 0.5606602430343628, |
| "learning_rate": 1.1806615776081426e-05, |
| "loss": 0.1288, |
| "step": 1855 |
| }, |
| { |
| "epoch": 4.699619771863118, |
| "grad_norm": 0.6175526976585388, |
| "learning_rate": 1.1704834605597966e-05, |
| "loss": 0.1564, |
| "step": 1856 |
| }, |
| { |
| "epoch": 4.702154626108999, |
| "grad_norm": 0.5776654481887817, |
| "learning_rate": 1.1603053435114503e-05, |
| "loss": 0.1323, |
| "step": 1857 |
| }, |
| { |
| "epoch": 4.7046894803548795, |
| "grad_norm": 0.5664159059524536, |
| "learning_rate": 1.1501272264631044e-05, |
| "loss": 0.1371, |
| "step": 1858 |
| }, |
| { |
| "epoch": 4.7072243346007605, |
| "grad_norm": 0.7187889218330383, |
| "learning_rate": 1.1399491094147584e-05, |
| "loss": 0.1476, |
| "step": 1859 |
| }, |
| { |
| "epoch": 4.7097591888466415, |
| "grad_norm": 0.5795005559921265, |
| "learning_rate": 1.1297709923664123e-05, |
| "loss": 0.1373, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.712294043092522, |
| "grad_norm": 0.5491251945495605, |
| "learning_rate": 1.1195928753180662e-05, |
| "loss": 0.1192, |
| "step": 1861 |
| }, |
| { |
| "epoch": 4.714828897338403, |
| "grad_norm": 0.4715762734413147, |
| "learning_rate": 1.1094147582697202e-05, |
| "loss": 0.1106, |
| "step": 1862 |
| }, |
| { |
| "epoch": 4.7173637515842834, |
| "grad_norm": 0.6300286054611206, |
| "learning_rate": 1.099236641221374e-05, |
| "loss": 0.138, |
| "step": 1863 |
| }, |
| { |
| "epoch": 4.719898605830164, |
| "grad_norm": 0.7265313267707825, |
| "learning_rate": 1.089058524173028e-05, |
| "loss": 0.2246, |
| "step": 1864 |
| }, |
| { |
| "epoch": 4.722433460076045, |
| "grad_norm": 0.7080928087234497, |
| "learning_rate": 1.0788804071246821e-05, |
| "loss": 0.1335, |
| "step": 1865 |
| }, |
| { |
| "epoch": 4.724968314321926, |
| "grad_norm": 0.605714738368988, |
| "learning_rate": 1.0687022900763359e-05, |
| "loss": 0.1412, |
| "step": 1866 |
| }, |
| { |
| "epoch": 4.727503168567807, |
| "grad_norm": 0.6648192405700684, |
| "learning_rate": 1.05852417302799e-05, |
| "loss": 0.1648, |
| "step": 1867 |
| }, |
| { |
| "epoch": 4.730038022813688, |
| "grad_norm": 0.6057281494140625, |
| "learning_rate": 1.0483460559796437e-05, |
| "loss": 0.1266, |
| "step": 1868 |
| }, |
| { |
| "epoch": 4.732572877059569, |
| "grad_norm": 0.6135514974594116, |
| "learning_rate": 1.0381679389312977e-05, |
| "loss": 0.1457, |
| "step": 1869 |
| }, |
| { |
| "epoch": 4.73510773130545, |
| "grad_norm": 0.6599459052085876, |
| "learning_rate": 1.0279898218829518e-05, |
| "loss": 0.1558, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.737642585551331, |
| "grad_norm": 0.5975873470306396, |
| "learning_rate": 1.0178117048346055e-05, |
| "loss": 0.134, |
| "step": 1871 |
| }, |
| { |
| "epoch": 4.740177439797211, |
| "grad_norm": 0.6581792235374451, |
| "learning_rate": 1.0076335877862596e-05, |
| "loss": 0.1463, |
| "step": 1872 |
| }, |
| { |
| "epoch": 4.742712294043092, |
| "grad_norm": 0.5627064108848572, |
| "learning_rate": 9.974554707379136e-06, |
| "loss": 0.1238, |
| "step": 1873 |
| }, |
| { |
| "epoch": 4.745247148288973, |
| "grad_norm": 0.6461361050605774, |
| "learning_rate": 9.872773536895675e-06, |
| "loss": 0.1621, |
| "step": 1874 |
| }, |
| { |
| "epoch": 4.747782002534854, |
| "grad_norm": 0.5615333914756775, |
| "learning_rate": 9.770992366412214e-06, |
| "loss": 0.1387, |
| "step": 1875 |
| }, |
| { |
| "epoch": 4.750316856780735, |
| "grad_norm": 0.6830117702484131, |
| "learning_rate": 9.669211195928754e-06, |
| "loss": 0.1397, |
| "step": 1876 |
| }, |
| { |
| "epoch": 4.752851711026616, |
| "grad_norm": 0.731072187423706, |
| "learning_rate": 9.567430025445293e-06, |
| "loss": 0.1508, |
| "step": 1877 |
| }, |
| { |
| "epoch": 4.755386565272497, |
| "grad_norm": 0.7469286918640137, |
| "learning_rate": 9.465648854961832e-06, |
| "loss": 0.1944, |
| "step": 1878 |
| }, |
| { |
| "epoch": 4.757921419518378, |
| "grad_norm": 0.700532078742981, |
| "learning_rate": 9.363867684478373e-06, |
| "loss": 0.1697, |
| "step": 1879 |
| }, |
| { |
| "epoch": 4.760456273764259, |
| "grad_norm": 0.7140323519706726, |
| "learning_rate": 9.26208651399491e-06, |
| "loss": 0.1597, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.76299112801014, |
| "grad_norm": 0.6711133718490601, |
| "learning_rate": 9.16030534351145e-06, |
| "loss": 0.1731, |
| "step": 1881 |
| }, |
| { |
| "epoch": 4.765525982256021, |
| "grad_norm": 0.43002957105636597, |
| "learning_rate": 9.058524173027991e-06, |
| "loss": 0.1181, |
| "step": 1882 |
| }, |
| { |
| "epoch": 4.768060836501901, |
| "grad_norm": 0.669159471988678, |
| "learning_rate": 8.956743002544529e-06, |
| "loss": 0.1578, |
| "step": 1883 |
| }, |
| { |
| "epoch": 4.770595690747782, |
| "grad_norm": 0.5030307769775391, |
| "learning_rate": 8.85496183206107e-06, |
| "loss": 0.1213, |
| "step": 1884 |
| }, |
| { |
| "epoch": 4.773130544993663, |
| "grad_norm": 0.7841615080833435, |
| "learning_rate": 8.753180661577609e-06, |
| "loss": 0.1619, |
| "step": 1885 |
| }, |
| { |
| "epoch": 4.775665399239544, |
| "grad_norm": 0.5570418834686279, |
| "learning_rate": 8.651399491094148e-06, |
| "loss": 0.1308, |
| "step": 1886 |
| }, |
| { |
| "epoch": 4.778200253485425, |
| "grad_norm": 0.6690031886100769, |
| "learning_rate": 8.549618320610688e-06, |
| "loss": 0.1413, |
| "step": 1887 |
| }, |
| { |
| "epoch": 4.780735107731306, |
| "grad_norm": 0.524140477180481, |
| "learning_rate": 8.447837150127227e-06, |
| "loss": 0.1354, |
| "step": 1888 |
| }, |
| { |
| "epoch": 4.783269961977187, |
| "grad_norm": 0.5612379908561707, |
| "learning_rate": 8.346055979643766e-06, |
| "loss": 0.1375, |
| "step": 1889 |
| }, |
| { |
| "epoch": 4.7858048162230675, |
| "grad_norm": 0.851925790309906, |
| "learning_rate": 8.244274809160306e-06, |
| "loss": 0.1783, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.7883396704689485, |
| "grad_norm": 0.8507834672927856, |
| "learning_rate": 8.142493638676845e-06, |
| "loss": 0.1743, |
| "step": 1891 |
| }, |
| { |
| "epoch": 4.7908745247148286, |
| "grad_norm": 0.8136033415794373, |
| "learning_rate": 8.040712468193384e-06, |
| "loss": 0.1381, |
| "step": 1892 |
| }, |
| { |
| "epoch": 4.7934093789607095, |
| "grad_norm": 0.7247329354286194, |
| "learning_rate": 7.938931297709924e-06, |
| "loss": 0.1793, |
| "step": 1893 |
| }, |
| { |
| "epoch": 4.7959442332065905, |
| "grad_norm": 0.5494823455810547, |
| "learning_rate": 7.837150127226465e-06, |
| "loss": 0.1231, |
| "step": 1894 |
| }, |
| { |
| "epoch": 4.798479087452471, |
| "grad_norm": 0.6107218861579895, |
| "learning_rate": 7.735368956743002e-06, |
| "loss": 0.1358, |
| "step": 1895 |
| }, |
| { |
| "epoch": 4.801013941698352, |
| "grad_norm": 0.6297575235366821, |
| "learning_rate": 7.633587786259543e-06, |
| "loss": 0.1699, |
| "step": 1896 |
| }, |
| { |
| "epoch": 4.803548795944233, |
| "grad_norm": 0.8669266700744629, |
| "learning_rate": 7.531806615776081e-06, |
| "loss": 0.1982, |
| "step": 1897 |
| }, |
| { |
| "epoch": 4.806083650190114, |
| "grad_norm": 0.583975076675415, |
| "learning_rate": 7.430025445292621e-06, |
| "loss": 0.1517, |
| "step": 1898 |
| }, |
| { |
| "epoch": 4.808618504435995, |
| "grad_norm": 0.6059403419494629, |
| "learning_rate": 7.328244274809161e-06, |
| "loss": 0.138, |
| "step": 1899 |
| }, |
| { |
| "epoch": 4.811153358681876, |
| "grad_norm": 1.0802148580551147, |
| "learning_rate": 7.2264631043257e-06, |
| "loss": 0.1677, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.813688212927756, |
| "grad_norm": 0.5637528300285339, |
| "learning_rate": 7.12468193384224e-06, |
| "loss": 0.1517, |
| "step": 1901 |
| }, |
| { |
| "epoch": 4.816223067173637, |
| "grad_norm": 0.6925719976425171, |
| "learning_rate": 7.022900763358779e-06, |
| "loss": 0.1636, |
| "step": 1902 |
| }, |
| { |
| "epoch": 4.818757921419518, |
| "grad_norm": 0.6529707908630371, |
| "learning_rate": 6.9211195928753175e-06, |
| "loss": 0.1587, |
| "step": 1903 |
| }, |
| { |
| "epoch": 4.821292775665399, |
| "grad_norm": 1.1477290391921997, |
| "learning_rate": 6.819338422391858e-06, |
| "loss": 0.1655, |
| "step": 1904 |
| }, |
| { |
| "epoch": 4.82382762991128, |
| "grad_norm": 0.7867985367774963, |
| "learning_rate": 6.717557251908398e-06, |
| "loss": 0.1955, |
| "step": 1905 |
| }, |
| { |
| "epoch": 4.826362484157161, |
| "grad_norm": 0.617871105670929, |
| "learning_rate": 6.615776081424936e-06, |
| "loss": 0.1554, |
| "step": 1906 |
| }, |
| { |
| "epoch": 4.828897338403042, |
| "grad_norm": 0.5985192656517029, |
| "learning_rate": 6.5139949109414765e-06, |
| "loss": 0.1484, |
| "step": 1907 |
| }, |
| { |
| "epoch": 4.831432192648923, |
| "grad_norm": 0.6069400310516357, |
| "learning_rate": 6.412213740458016e-06, |
| "loss": 0.1326, |
| "step": 1908 |
| }, |
| { |
| "epoch": 4.833967046894804, |
| "grad_norm": 0.9009010195732117, |
| "learning_rate": 6.310432569974554e-06, |
| "loss": 0.1999, |
| "step": 1909 |
| }, |
| { |
| "epoch": 4.836501901140684, |
| "grad_norm": 0.5913792848587036, |
| "learning_rate": 6.208651399491094e-06, |
| "loss": 0.1381, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.839036755386565, |
| "grad_norm": 0.5730859637260437, |
| "learning_rate": 6.106870229007634e-06, |
| "loss": 0.1346, |
| "step": 1911 |
| }, |
| { |
| "epoch": 4.841571609632446, |
| "grad_norm": 0.6579172611236572, |
| "learning_rate": 6.005089058524174e-06, |
| "loss": 0.1572, |
| "step": 1912 |
| }, |
| { |
| "epoch": 4.844106463878327, |
| "grad_norm": 0.5854265093803406, |
| "learning_rate": 5.903307888040713e-06, |
| "loss": 0.1359, |
| "step": 1913 |
| }, |
| { |
| "epoch": 4.846641318124208, |
| "grad_norm": 0.7668277025222778, |
| "learning_rate": 5.801526717557252e-06, |
| "loss": 0.1728, |
| "step": 1914 |
| }, |
| { |
| "epoch": 4.849176172370089, |
| "grad_norm": 0.8092861175537109, |
| "learning_rate": 5.699745547073792e-06, |
| "loss": 0.1741, |
| "step": 1915 |
| }, |
| { |
| "epoch": 4.85171102661597, |
| "grad_norm": 0.6868001818656921, |
| "learning_rate": 5.597964376590331e-06, |
| "loss": 0.1604, |
| "step": 1916 |
| }, |
| { |
| "epoch": 4.854245880861851, |
| "grad_norm": 0.6506228446960449, |
| "learning_rate": 5.49618320610687e-06, |
| "loss": 0.1459, |
| "step": 1917 |
| }, |
| { |
| "epoch": 4.856780735107732, |
| "grad_norm": 0.6033440232276917, |
| "learning_rate": 5.394402035623411e-06, |
| "loss": 0.1435, |
| "step": 1918 |
| }, |
| { |
| "epoch": 4.859315589353612, |
| "grad_norm": 0.7446348071098328, |
| "learning_rate": 5.29262086513995e-06, |
| "loss": 0.165, |
| "step": 1919 |
| }, |
| { |
| "epoch": 4.861850443599493, |
| "grad_norm": 0.5380656123161316, |
| "learning_rate": 5.190839694656488e-06, |
| "loss": 0.1504, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.864385297845374, |
| "grad_norm": 0.6752755641937256, |
| "learning_rate": 5.089058524173028e-06, |
| "loss": 0.1616, |
| "step": 1921 |
| }, |
| { |
| "epoch": 4.866920152091255, |
| "grad_norm": 0.6897322535514832, |
| "learning_rate": 4.987277353689568e-06, |
| "loss": 0.1409, |
| "step": 1922 |
| }, |
| { |
| "epoch": 4.869455006337136, |
| "grad_norm": 0.5405673980712891, |
| "learning_rate": 4.885496183206107e-06, |
| "loss": 0.1215, |
| "step": 1923 |
| }, |
| { |
| "epoch": 4.8719898605830165, |
| "grad_norm": 0.6921371221542358, |
| "learning_rate": 4.7837150127226464e-06, |
| "loss": 0.1554, |
| "step": 1924 |
| }, |
| { |
| "epoch": 4.8745247148288975, |
| "grad_norm": 0.6672477722167969, |
| "learning_rate": 4.681933842239187e-06, |
| "loss": 0.1685, |
| "step": 1925 |
| }, |
| { |
| "epoch": 4.8770595690747784, |
| "grad_norm": 0.5887411236763, |
| "learning_rate": 4.580152671755725e-06, |
| "loss": 0.1495, |
| "step": 1926 |
| }, |
| { |
| "epoch": 4.879594423320659, |
| "grad_norm": 0.8119281530380249, |
| "learning_rate": 4.478371501272264e-06, |
| "loss": 0.1778, |
| "step": 1927 |
| }, |
| { |
| "epoch": 4.8821292775665395, |
| "grad_norm": 0.6423155665397644, |
| "learning_rate": 4.3765903307888045e-06, |
| "loss": 0.1532, |
| "step": 1928 |
| }, |
| { |
| "epoch": 4.88466413181242, |
| "grad_norm": 0.576859712600708, |
| "learning_rate": 4.274809160305344e-06, |
| "loss": 0.1474, |
| "step": 1929 |
| }, |
| { |
| "epoch": 4.887198986058301, |
| "grad_norm": 0.668792188167572, |
| "learning_rate": 4.173027989821883e-06, |
| "loss": 0.1583, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.889733840304182, |
| "grad_norm": 0.727428138256073, |
| "learning_rate": 4.0712468193384225e-06, |
| "loss": 0.1759, |
| "step": 1931 |
| }, |
| { |
| "epoch": 4.892268694550063, |
| "grad_norm": 0.7260742783546448, |
| "learning_rate": 3.969465648854962e-06, |
| "loss": 0.1665, |
| "step": 1932 |
| }, |
| { |
| "epoch": 4.894803548795944, |
| "grad_norm": 0.6192269921302795, |
| "learning_rate": 3.867684478371501e-06, |
| "loss": 0.1377, |
| "step": 1933 |
| }, |
| { |
| "epoch": 4.897338403041825, |
| "grad_norm": 0.7672135233879089, |
| "learning_rate": 3.7659033078880404e-06, |
| "loss": 0.1696, |
| "step": 1934 |
| }, |
| { |
| "epoch": 4.899873257287706, |
| "grad_norm": 0.5162369012832642, |
| "learning_rate": 3.6641221374045806e-06, |
| "loss": 0.1384, |
| "step": 1935 |
| }, |
| { |
| "epoch": 4.902408111533587, |
| "grad_norm": 0.6594913601875305, |
| "learning_rate": 3.56234096692112e-06, |
| "loss": 0.1714, |
| "step": 1936 |
| }, |
| { |
| "epoch": 4.904942965779467, |
| "grad_norm": 0.7748851776123047, |
| "learning_rate": 3.4605597964376588e-06, |
| "loss": 0.2014, |
| "step": 1937 |
| }, |
| { |
| "epoch": 4.907477820025348, |
| "grad_norm": 0.6400601267814636, |
| "learning_rate": 3.358778625954199e-06, |
| "loss": 0.1522, |
| "step": 1938 |
| }, |
| { |
| "epoch": 4.910012674271229, |
| "grad_norm": 0.5443174839019775, |
| "learning_rate": 3.2569974554707382e-06, |
| "loss": 0.1276, |
| "step": 1939 |
| }, |
| { |
| "epoch": 4.91254752851711, |
| "grad_norm": 0.6544225811958313, |
| "learning_rate": 3.155216284987277e-06, |
| "loss": 0.1441, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.915082382762991, |
| "grad_norm": 0.6579450368881226, |
| "learning_rate": 3.053435114503817e-06, |
| "loss": 0.1688, |
| "step": 1941 |
| }, |
| { |
| "epoch": 4.917617237008872, |
| "grad_norm": 0.594393253326416, |
| "learning_rate": 2.9516539440203566e-06, |
| "loss": 0.1586, |
| "step": 1942 |
| }, |
| { |
| "epoch": 4.920152091254753, |
| "grad_norm": 0.6417977213859558, |
| "learning_rate": 2.849872773536896e-06, |
| "loss": 0.1389, |
| "step": 1943 |
| }, |
| { |
| "epoch": 4.922686945500634, |
| "grad_norm": 0.5247513055801392, |
| "learning_rate": 2.748091603053435e-06, |
| "loss": 0.1282, |
| "step": 1944 |
| }, |
| { |
| "epoch": 4.925221799746515, |
| "grad_norm": 0.6372106075286865, |
| "learning_rate": 2.646310432569975e-06, |
| "loss": 0.1391, |
| "step": 1945 |
| }, |
| { |
| "epoch": 4.927756653992396, |
| "grad_norm": 0.5967155694961548, |
| "learning_rate": 2.544529262086514e-06, |
| "loss": 0.1358, |
| "step": 1946 |
| }, |
| { |
| "epoch": 4.930291508238277, |
| "grad_norm": 0.6050627827644348, |
| "learning_rate": 2.4427480916030536e-06, |
| "loss": 0.1449, |
| "step": 1947 |
| }, |
| { |
| "epoch": 4.932826362484157, |
| "grad_norm": 0.7595526576042175, |
| "learning_rate": 2.3409669211195933e-06, |
| "loss": 0.1838, |
| "step": 1948 |
| }, |
| { |
| "epoch": 4.935361216730038, |
| "grad_norm": 0.7220463156700134, |
| "learning_rate": 2.239185750636132e-06, |
| "loss": 0.1695, |
| "step": 1949 |
| }, |
| { |
| "epoch": 4.937896070975919, |
| "grad_norm": 0.4891555905342102, |
| "learning_rate": 2.137404580152672e-06, |
| "loss": 0.1394, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.9404309252218, |
| "grad_norm": 0.5262938141822815, |
| "learning_rate": 2.0356234096692112e-06, |
| "loss": 0.1452, |
| "step": 1951 |
| }, |
| { |
| "epoch": 4.942965779467681, |
| "grad_norm": 0.7193884253501892, |
| "learning_rate": 1.9338422391857505e-06, |
| "loss": 0.176, |
| "step": 1952 |
| }, |
| { |
| "epoch": 4.945500633713562, |
| "grad_norm": 0.7117200493812561, |
| "learning_rate": 1.8320610687022903e-06, |
| "loss": 0.1697, |
| "step": 1953 |
| }, |
| { |
| "epoch": 4.948035487959443, |
| "grad_norm": 0.7884610891342163, |
| "learning_rate": 1.7302798982188294e-06, |
| "loss": 0.1864, |
| "step": 1954 |
| }, |
| { |
| "epoch": 4.9505703422053235, |
| "grad_norm": 0.8606098890304565, |
| "learning_rate": 1.6284987277353691e-06, |
| "loss": 0.1568, |
| "step": 1955 |
| }, |
| { |
| "epoch": 4.9531051964512045, |
| "grad_norm": 0.5030885338783264, |
| "learning_rate": 1.5267175572519084e-06, |
| "loss": 0.1306, |
| "step": 1956 |
| }, |
| { |
| "epoch": 4.955640050697085, |
| "grad_norm": 0.5155559182167053, |
| "learning_rate": 1.424936386768448e-06, |
| "loss": 0.1311, |
| "step": 1957 |
| }, |
| { |
| "epoch": 4.9581749049429655, |
| "grad_norm": 0.4945980906486511, |
| "learning_rate": 1.3231552162849875e-06, |
| "loss": 0.1212, |
| "step": 1958 |
| }, |
| { |
| "epoch": 4.9607097591888465, |
| "grad_norm": 0.79302978515625, |
| "learning_rate": 1.2213740458015268e-06, |
| "loss": 0.1763, |
| "step": 1959 |
| }, |
| { |
| "epoch": 4.9632446134347274, |
| "grad_norm": 0.6397921442985535, |
| "learning_rate": 1.119592875318066e-06, |
| "loss": 0.1416, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.965779467680608, |
| "grad_norm": 0.6680799722671509, |
| "learning_rate": 1.0178117048346056e-06, |
| "loss": 0.1519, |
| "step": 1961 |
| }, |
| { |
| "epoch": 4.968314321926489, |
| "grad_norm": 0.5919336080551147, |
| "learning_rate": 9.160305343511451e-07, |
| "loss": 0.16, |
| "step": 1962 |
| }, |
| { |
| "epoch": 4.97084917617237, |
| "grad_norm": 0.5929127335548401, |
| "learning_rate": 8.142493638676846e-07, |
| "loss": 0.143, |
| "step": 1963 |
| }, |
| { |
| "epoch": 4.973384030418251, |
| "grad_norm": 0.5678686499595642, |
| "learning_rate": 7.12468193384224e-07, |
| "loss": 0.1236, |
| "step": 1964 |
| }, |
| { |
| "epoch": 4.975918884664132, |
| "grad_norm": 0.5478057861328125, |
| "learning_rate": 6.106870229007634e-07, |
| "loss": 0.1407, |
| "step": 1965 |
| }, |
| { |
| "epoch": 4.978453738910012, |
| "grad_norm": 0.6003939509391785, |
| "learning_rate": 5.089058524173028e-07, |
| "loss": 0.1315, |
| "step": 1966 |
| }, |
| { |
| "epoch": 4.980988593155893, |
| "grad_norm": 0.5943416357040405, |
| "learning_rate": 4.071246819338423e-07, |
| "loss": 0.1451, |
| "step": 1967 |
| }, |
| { |
| "epoch": 4.983523447401774, |
| "grad_norm": 0.5419045090675354, |
| "learning_rate": 3.053435114503817e-07, |
| "loss": 0.1338, |
| "step": 1968 |
| }, |
| { |
| "epoch": 4.986058301647655, |
| "grad_norm": 0.5665134787559509, |
| "learning_rate": 2.0356234096692114e-07, |
| "loss": 0.1347, |
| "step": 1969 |
| }, |
| { |
| "epoch": 4.988593155893536, |
| "grad_norm": 0.5646002292633057, |
| "learning_rate": 1.0178117048346057e-07, |
| "loss": 0.1352, |
| "step": 1970 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1970, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5558390987853286e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|