| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 100.0, |
| "eval_steps": 20000, |
| "global_step": 309400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03232062055591468, |
| "grad_norm": 13.095431327819824, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 4.2463, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06464124111182935, |
| "grad_norm": 27.387174606323242, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 4.0764, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09696186166774402, |
| "grad_norm": 8.1353759765625, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 4.0007, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1292824822236587, |
| "grad_norm": 11.949505805969238, |
| "learning_rate": 3.99e-05, |
| "loss": 3.8759, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16160310277957338, |
| "grad_norm": 10.47620677947998, |
| "learning_rate": 4.99e-05, |
| "loss": 3.6782, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19392372333548805, |
| "grad_norm": 18.38490104675293, |
| "learning_rate": 5.9900000000000006e-05, |
| "loss": 3.491, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 53.37411117553711, |
| "learning_rate": 6.99e-05, |
| "loss": 3.2757, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2585649644473174, |
| "grad_norm": 17.150442123413086, |
| "learning_rate": 7.99e-05, |
| "loss": 3.1093, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2908855850032321, |
| "grad_norm": 5.678936004638672, |
| "learning_rate": 8.989999999999999e-05, |
| "loss": 3.022, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32320620555914675, |
| "grad_norm": 6.282465934753418, |
| "learning_rate": 9.99e-05, |
| "loss": 2.957, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3555268261150614, |
| "grad_norm": 5.60117244720459, |
| "learning_rate": 0.0001099, |
| "loss": 2.8868, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3878474466709761, |
| "grad_norm": 4.572321891784668, |
| "learning_rate": 0.00011990000000000001, |
| "loss": 2.8396, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 3.3965022563934326, |
| "learning_rate": 0.00012989999999999999, |
| "loss": 2.7941, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 16.95421028137207, |
| "learning_rate": 0.0001399, |
| "loss": 2.7427, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4848093083387201, |
| "grad_norm": 6.816357612609863, |
| "learning_rate": 0.0001499, |
| "loss": 2.7324, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5171299288946348, |
| "grad_norm": 2.1214568614959717, |
| "learning_rate": 0.00015989999999999998, |
| "loss": 2.7426, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 3.358969211578369, |
| "learning_rate": 0.0001699, |
| "loss": 2.715, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5817711700064642, |
| "grad_norm": 2.701083183288574, |
| "learning_rate": 0.0001799, |
| "loss": 2.6622, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6140917905623788, |
| "grad_norm": 1.8063291311264038, |
| "learning_rate": 0.0001899, |
| "loss": 2.6827, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6464124111182935, |
| "grad_norm": 1.8204689025878906, |
| "learning_rate": 0.0001999, |
| "loss": 2.6492, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 1.6437947750091553, |
| "learning_rate": 0.0002099, |
| "loss": 2.6523, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7110536522301228, |
| "grad_norm": 2.960686445236206, |
| "learning_rate": 0.0002199, |
| "loss": 2.6393, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7433742727860375, |
| "grad_norm": 1.3150275945663452, |
| "learning_rate": 0.0002299, |
| "loss": 2.5826, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7756948933419522, |
| "grad_norm": 2.5215647220611572, |
| "learning_rate": 0.0002399, |
| "loss": 2.612, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8080155138978669, |
| "grad_norm": 1.298682451248169, |
| "learning_rate": 0.0002499, |
| "loss": 2.5858, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 1.9009894132614136, |
| "learning_rate": 0.00025990000000000003, |
| "loss": 2.6145, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8726567550096962, |
| "grad_norm": 1.924782395362854, |
| "learning_rate": 0.0002699, |
| "loss": 2.5587, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 1.8015832901000977, |
| "learning_rate": 0.0002799, |
| "loss": 2.554, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9372979961215255, |
| "grad_norm": 1.7930667400360107, |
| "learning_rate": 0.0002899, |
| "loss": 2.5686, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9696186166774402, |
| "grad_norm": 1.5759750604629517, |
| "learning_rate": 0.0002999, |
| "loss": 2.54, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0019392372333549, |
| "grad_norm": 1.3244950771331787, |
| "learning_rate": 0.0003099, |
| "loss": 2.5637, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0342598577892697, |
| "grad_norm": 1.9222184419631958, |
| "learning_rate": 0.0003199, |
| "loss": 2.5038, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0665804783451842, |
| "grad_norm": 1.4104559421539307, |
| "learning_rate": 0.00032990000000000005, |
| "loss": 2.5322, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.098901098901099, |
| "grad_norm": 2.111445188522339, |
| "learning_rate": 0.00033989999999999997, |
| "loss": 2.4966, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1312217194570136, |
| "grad_norm": 1.793367624282837, |
| "learning_rate": 0.0003499, |
| "loss": 2.5059, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1635423400129283, |
| "grad_norm": 1.2840224504470825, |
| "learning_rate": 0.0003599, |
| "loss": 2.4945, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.195862960568843, |
| "grad_norm": 1.9581950902938843, |
| "learning_rate": 0.0003699, |
| "loss": 2.4998, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2281835811247577, |
| "grad_norm": 1.221644639968872, |
| "learning_rate": 0.0003799, |
| "loss": 2.518, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 1.7324455976486206, |
| "learning_rate": 0.00038990000000000004, |
| "loss": 2.5482, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.292824822236587, |
| "grad_norm": 3.273299217224121, |
| "learning_rate": 0.00039989999999999996, |
| "loss": 2.509, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3251454427925016, |
| "grad_norm": 1.7108508348464966, |
| "learning_rate": 0.0004099, |
| "loss": 2.514, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3574660633484164, |
| "grad_norm": 1.50692617893219, |
| "learning_rate": 0.0004199, |
| "loss": 2.5092, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.389786683904331, |
| "grad_norm": 1.5326952934265137, |
| "learning_rate": 0.0004299, |
| "loss": 2.5145, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4221073044602457, |
| "grad_norm": 1.7152796983718872, |
| "learning_rate": 0.0004399, |
| "loss": 2.5145, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4544279250161603, |
| "grad_norm": 1.7978283166885376, |
| "learning_rate": 0.00044990000000000004, |
| "loss": 2.515, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.486748545572075, |
| "grad_norm": 0.933148205280304, |
| "learning_rate": 0.0004599, |
| "loss": 2.5079, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5190691661279896, |
| "grad_norm": 1.4725111722946167, |
| "learning_rate": 0.0004699, |
| "loss": 2.501, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5513897866839044, |
| "grad_norm": 1.3290444612503052, |
| "learning_rate": 0.0004799, |
| "loss": 2.4955, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5837104072398192, |
| "grad_norm": 1.7420613765716553, |
| "learning_rate": 0.0004899, |
| "loss": 2.4826, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6160310277957337, |
| "grad_norm": 2.0405023097991943, |
| "learning_rate": 0.0004999000000000001, |
| "loss": 2.5143, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6483516483516483, |
| "grad_norm": 3.256430149078369, |
| "learning_rate": 0.0005099, |
| "loss": 2.505, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 1.9231915473937988, |
| "learning_rate": 0.0005199, |
| "loss": 2.4882, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.7129928894634778, |
| "grad_norm": 1.8035612106323242, |
| "learning_rate": 0.0005299, |
| "loss": 2.47, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7453135100193924, |
| "grad_norm": 1.3306243419647217, |
| "learning_rate": 0.0005399000000000001, |
| "loss": 2.5006, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.777634130575307, |
| "grad_norm": 1.123727560043335, |
| "learning_rate": 0.0005499000000000001, |
| "loss": 2.4856, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8099547511312217, |
| "grad_norm": 1.8601796627044678, |
| "learning_rate": 0.0005599, |
| "loss": 2.4775, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8422753716871365, |
| "grad_norm": 1.7166662216186523, |
| "learning_rate": 0.0005698999999999999, |
| "loss": 2.4893, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.874595992243051, |
| "grad_norm": 1.2359657287597656, |
| "learning_rate": 0.0005799, |
| "loss": 2.505, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.9069166127989656, |
| "grad_norm": 1.1526943445205688, |
| "learning_rate": 0.0005899, |
| "loss": 2.4919, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9392372333548804, |
| "grad_norm": 1.1822352409362793, |
| "learning_rate": 0.0005999, |
| "loss": 2.5048, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9715578539107952, |
| "grad_norm": 1.2245287895202637, |
| "learning_rate": 0.0006099, |
| "loss": 2.482, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0038784744667097, |
| "grad_norm": 1.1707814931869507, |
| "learning_rate": 0.0006199, |
| "loss": 2.5006, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0361990950226243, |
| "grad_norm": 1.1745480298995972, |
| "learning_rate": 0.0006299000000000001, |
| "loss": 2.4224, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.0685197155785393, |
| "grad_norm": 1.0892423391342163, |
| "learning_rate": 0.0006399, |
| "loss": 2.4111, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100840336134454, |
| "grad_norm": 1.217278242111206, |
| "learning_rate": 0.0006499, |
| "loss": 2.4131, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1331609566903684, |
| "grad_norm": 1.2329050302505493, |
| "learning_rate": 0.0006599, |
| "loss": 2.4312, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165481577246283, |
| "grad_norm": 1.2498464584350586, |
| "learning_rate": 0.0006699000000000001, |
| "loss": 2.4321, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 1.7037843465805054, |
| "learning_rate": 0.0006799, |
| "loss": 2.3998, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.2301228183581125, |
| "grad_norm": 1.5184770822525024, |
| "learning_rate": 0.0006899, |
| "loss": 2.4153, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.262443438914027, |
| "grad_norm": 1.695165991783142, |
| "learning_rate": 0.0006998999999999999, |
| "loss": 2.4218, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2947640594699417, |
| "grad_norm": 1.3623040914535522, |
| "learning_rate": 0.0007099, |
| "loss": 2.4172, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3270846800258567, |
| "grad_norm": 1.1840554475784302, |
| "learning_rate": 0.0007199, |
| "loss": 2.4598, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3594053005817712, |
| "grad_norm": 1.2055315971374512, |
| "learning_rate": 0.0007299, |
| "loss": 2.4048, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.391725921137686, |
| "grad_norm": 1.4990323781967163, |
| "learning_rate": 0.0007399, |
| "loss": 2.4364, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4240465416936003, |
| "grad_norm": 1.1268545389175415, |
| "learning_rate": 0.0007499000000000001, |
| "loss": 2.4543, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4563671622495153, |
| "grad_norm": 1.3047798871994019, |
| "learning_rate": 0.0007599, |
| "loss": 2.4456, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.48868778280543, |
| "grad_norm": 1.170345425605774, |
| "learning_rate": 0.0007699, |
| "loss": 2.4349, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 0.9993893504142761, |
| "learning_rate": 0.0007799, |
| "loss": 2.3841, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.553329023917259, |
| "grad_norm": 0.9757175445556641, |
| "learning_rate": 0.0007899000000000001, |
| "loss": 2.399, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.585649644473174, |
| "grad_norm": 1.2529906034469604, |
| "learning_rate": 0.0007999000000000001, |
| "loss": 2.4359, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6179702650290886, |
| "grad_norm": 1.183002233505249, |
| "learning_rate": 0.0008099, |
| "loss": 2.4184, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.650290885585003, |
| "grad_norm": 1.2000353336334229, |
| "learning_rate": 0.0008198999999999999, |
| "loss": 2.4102, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.682611506140918, |
| "grad_norm": 1.0429084300994873, |
| "learning_rate": 0.0008299, |
| "loss": 2.4061, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.7149321266968327, |
| "grad_norm": 1.2502565383911133, |
| "learning_rate": 0.0008399, |
| "loss": 2.4636, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7472527472527473, |
| "grad_norm": 1.2110387086868286, |
| "learning_rate": 0.0008499, |
| "loss": 2.4175, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.779573367808662, |
| "grad_norm": 1.1316890716552734, |
| "learning_rate": 0.0008599, |
| "loss": 2.4055, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8118939883645764, |
| "grad_norm": 1.0335315465927124, |
| "learning_rate": 0.0008699000000000001, |
| "loss": 2.4026, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8442146089204914, |
| "grad_norm": 1.1183189153671265, |
| "learning_rate": 0.0008799000000000001, |
| "loss": 2.4466, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.876535229476406, |
| "grad_norm": 1.2400306463241577, |
| "learning_rate": 0.0008899, |
| "loss": 2.4484, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9088558500323205, |
| "grad_norm": 1.0672909021377563, |
| "learning_rate": 0.0008999, |
| "loss": 2.3913, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.250365138053894, |
| "learning_rate": 0.0009099, |
| "loss": 2.4352, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.97349709114415, |
| "grad_norm": 1.1961923837661743, |
| "learning_rate": 0.0009199000000000001, |
| "loss": 2.438, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0058177117000646, |
| "grad_norm": 1.6543998718261719, |
| "learning_rate": 0.0009299, |
| "loss": 2.4589, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.038138332255979, |
| "grad_norm": 1.4493199586868286, |
| "learning_rate": 0.0009399, |
| "loss": 2.3203, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.070458952811894, |
| "grad_norm": 1.0506430864334106, |
| "learning_rate": 0.0009498999999999999, |
| "loss": 2.3324, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.1027795733678087, |
| "grad_norm": 1.1909950971603394, |
| "learning_rate": 0.0009599, |
| "loss": 2.3315, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.1351001939237233, |
| "grad_norm": 1.7181323766708374, |
| "learning_rate": 0.0009699, |
| "loss": 2.3593, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.167420814479638, |
| "grad_norm": 1.3096699714660645, |
| "learning_rate": 0.0009799, |
| "loss": 2.3457, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.199741435035553, |
| "grad_norm": 1.4763398170471191, |
| "learning_rate": 0.0009899, |
| "loss": 2.3232, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.2320620555914674, |
| "grad_norm": 1.4502891302108765, |
| "learning_rate": 0.0009999, |
| "loss": 2.3432, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.264382676147382, |
| "grad_norm": 1.4951964616775513, |
| "learning_rate": 0.001, |
| "loss": 2.3599, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.2967032967032965, |
| "grad_norm": 1.3563085794448853, |
| "learning_rate": 0.001, |
| "loss": 2.3509, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.3290239172592115, |
| "grad_norm": 1.1576285362243652, |
| "learning_rate": 0.001, |
| "loss": 2.3666, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.361344537815126, |
| "grad_norm": 1.0529580116271973, |
| "learning_rate": 0.001, |
| "loss": 2.3372, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.3936651583710407, |
| "grad_norm": 1.1564232110977173, |
| "learning_rate": 0.001, |
| "loss": 2.3488, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.425985778926955, |
| "grad_norm": 1.1305166482925415, |
| "learning_rate": 0.001, |
| "loss": 2.3936, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.45830639948287, |
| "grad_norm": 1.464630126953125, |
| "learning_rate": 0.001, |
| "loss": 2.3706, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.490627020038785, |
| "grad_norm": 1.1218030452728271, |
| "learning_rate": 0.001, |
| "loss": 2.4045, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.5229476405946993, |
| "grad_norm": 1.1466821432113647, |
| "learning_rate": 0.001, |
| "loss": 2.3568, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.555268261150614, |
| "grad_norm": 0.9143931269645691, |
| "learning_rate": 0.001, |
| "loss": 2.395, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.587588881706529, |
| "grad_norm": 1.290088176727295, |
| "learning_rate": 0.001, |
| "loss": 2.3372, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.6199095022624435, |
| "grad_norm": 1.271133542060852, |
| "learning_rate": 0.001, |
| "loss": 2.3731, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.652230122818358, |
| "grad_norm": 2.0801732540130615, |
| "learning_rate": 0.001, |
| "loss": 2.3895, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.684550743374273, |
| "grad_norm": 1.0689164400100708, |
| "learning_rate": 0.001, |
| "loss": 2.3635, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.7168713639301876, |
| "grad_norm": 1.1849030256271362, |
| "learning_rate": 0.001, |
| "loss": 2.3524, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.749191984486102, |
| "grad_norm": 1.3510334491729736, |
| "learning_rate": 0.001, |
| "loss": 2.3691, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7815126050420167, |
| "grad_norm": 0.9980648159980774, |
| "learning_rate": 0.001, |
| "loss": 2.3792, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.8138332255979313, |
| "grad_norm": 1.0921436548233032, |
| "learning_rate": 0.001, |
| "loss": 2.3765, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 1.3819752931594849, |
| "learning_rate": 0.001, |
| "loss": 2.359, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.878474466709761, |
| "grad_norm": 1.0975066423416138, |
| "learning_rate": 0.001, |
| "loss": 2.365, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.9107950872656754, |
| "grad_norm": 1.1619728803634644, |
| "learning_rate": 0.001, |
| "loss": 2.3778, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.9431157078215904, |
| "grad_norm": 1.3813674449920654, |
| "learning_rate": 0.001, |
| "loss": 2.3733, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.975436328377505, |
| "grad_norm": 1.6252094507217407, |
| "learning_rate": 0.001, |
| "loss": 2.3926, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.0077569489334195, |
| "grad_norm": 1.207340121269226, |
| "learning_rate": 0.001, |
| "loss": 2.3659, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.040077569489334, |
| "grad_norm": 1.151745319366455, |
| "learning_rate": 0.001, |
| "loss": 2.2764, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.072398190045249, |
| "grad_norm": 1.115214467048645, |
| "learning_rate": 0.001, |
| "loss": 2.2529, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.104718810601163, |
| "grad_norm": 0.8362165689468384, |
| "learning_rate": 0.001, |
| "loss": 2.286, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.137039431157079, |
| "grad_norm": 1.1398061513900757, |
| "learning_rate": 0.001, |
| "loss": 2.2718, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.169360051712993, |
| "grad_norm": 1.1849474906921387, |
| "learning_rate": 0.001, |
| "loss": 2.2881, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.201680672268908, |
| "grad_norm": 0.7654844522476196, |
| "learning_rate": 0.001, |
| "loss": 2.2998, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.234001292824822, |
| "grad_norm": 0.9867515563964844, |
| "learning_rate": 0.001, |
| "loss": 2.2835, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.266321913380737, |
| "grad_norm": 1.1559300422668457, |
| "learning_rate": 0.001, |
| "loss": 2.2413, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.298642533936651, |
| "grad_norm": 0.9872420430183411, |
| "learning_rate": 0.001, |
| "loss": 2.2769, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.330963154492566, |
| "grad_norm": 1.1229034662246704, |
| "learning_rate": 0.001, |
| "loss": 2.2509, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.3632837750484805, |
| "grad_norm": 1.0585553646087646, |
| "learning_rate": 0.001, |
| "loss": 2.3067, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.395604395604396, |
| "grad_norm": 1.0657356977462769, |
| "learning_rate": 0.001, |
| "loss": 2.2857, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.4279250161603105, |
| "grad_norm": 1.3249043226242065, |
| "learning_rate": 0.001, |
| "loss": 2.3003, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.460245636716225, |
| "grad_norm": 1.142127275466919, |
| "learning_rate": 0.001, |
| "loss": 2.3132, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.49256625727214, |
| "grad_norm": 1.1387176513671875, |
| "learning_rate": 0.001, |
| "loss": 2.3045, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.524886877828054, |
| "grad_norm": 1.2085492610931396, |
| "learning_rate": 0.001, |
| "loss": 2.2948, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.557207498383969, |
| "grad_norm": 0.9948108792304993, |
| "learning_rate": 0.001, |
| "loss": 2.2626, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.589528118939883, |
| "grad_norm": 1.3945204019546509, |
| "learning_rate": 0.001, |
| "loss": 2.2821, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.621848739495798, |
| "grad_norm": 1.0511122941970825, |
| "learning_rate": 0.001, |
| "loss": 2.2967, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.654169360051713, |
| "grad_norm": 0.8755561113357544, |
| "learning_rate": 0.001, |
| "loss": 2.2845, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.686489980607628, |
| "grad_norm": 0.8116604685783386, |
| "learning_rate": 0.001, |
| "loss": 2.3187, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.7188106011635425, |
| "grad_norm": 1.3444281816482544, |
| "learning_rate": 0.001, |
| "loss": 2.3013, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.751131221719457, |
| "grad_norm": 1.0668421983718872, |
| "learning_rate": 0.001, |
| "loss": 2.2815, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.783451842275372, |
| "grad_norm": 1.055733561515808, |
| "learning_rate": 0.001, |
| "loss": 2.3033, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.815772462831286, |
| "grad_norm": 0.9193568825721741, |
| "learning_rate": 0.001, |
| "loss": 2.2927, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.848093083387201, |
| "grad_norm": 1.006926417350769, |
| "learning_rate": 0.001, |
| "loss": 2.2657, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.880413703943116, |
| "grad_norm": 0.8166502714157104, |
| "learning_rate": 0.001, |
| "loss": 2.276, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.912734324499031, |
| "grad_norm": 1.0166735649108887, |
| "learning_rate": 0.001, |
| "loss": 2.2926, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.945054945054945, |
| "grad_norm": 1.075845718383789, |
| "learning_rate": 0.001, |
| "loss": 2.2833, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.97737556561086, |
| "grad_norm": 0.8848145604133606, |
| "learning_rate": 0.001, |
| "loss": 2.3029, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.009696186166774, |
| "grad_norm": 0.8544314503669739, |
| "learning_rate": 0.001, |
| "loss": 2.2695, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.042016806722689, |
| "grad_norm": 1.4420232772827148, |
| "learning_rate": 0.001, |
| "loss": 2.1842, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.0743374272786035, |
| "grad_norm": 1.000048041343689, |
| "learning_rate": 0.001, |
| "loss": 2.1832, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.106658047834518, |
| "grad_norm": 1.0307416915893555, |
| "learning_rate": 0.001, |
| "loss": 2.2021, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.1389786683904335, |
| "grad_norm": 1.2275282144546509, |
| "learning_rate": 0.001, |
| "loss": 2.1989, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.171299288946348, |
| "grad_norm": 1.0649629831314087, |
| "learning_rate": 0.001, |
| "loss": 2.2266, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.203619909502263, |
| "grad_norm": 1.2316499948501587, |
| "learning_rate": 0.001, |
| "loss": 2.1996, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.235940530058177, |
| "grad_norm": 1.3741744756698608, |
| "learning_rate": 0.001, |
| "loss": 2.1916, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.268261150614092, |
| "grad_norm": 1.0122957229614258, |
| "learning_rate": 0.001, |
| "loss": 2.2525, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.300581771170006, |
| "grad_norm": 0.8483036160469055, |
| "learning_rate": 0.001, |
| "loss": 2.184, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.332902391725921, |
| "grad_norm": 0.8776037096977234, |
| "learning_rate": 0.001, |
| "loss": 2.2341, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.365223012281835, |
| "grad_norm": 0.9180140495300293, |
| "learning_rate": 0.001, |
| "loss": 2.2265, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.397543632837751, |
| "grad_norm": 1.063555359840393, |
| "learning_rate": 0.001, |
| "loss": 2.1839, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.429864253393665, |
| "grad_norm": 1.938733458518982, |
| "learning_rate": 0.001, |
| "loss": 2.2229, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.46218487394958, |
| "grad_norm": 1.2461786270141602, |
| "learning_rate": 0.001, |
| "loss": 2.2305, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.4945054945054945, |
| "grad_norm": 0.8778449296951294, |
| "learning_rate": 0.001, |
| "loss": 2.2053, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.526826115061409, |
| "grad_norm": 0.9430673718452454, |
| "learning_rate": 0.001, |
| "loss": 2.2403, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.559146735617324, |
| "grad_norm": 0.7641558051109314, |
| "learning_rate": 0.001, |
| "loss": 2.2149, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.591467356173238, |
| "grad_norm": 1.0826328992843628, |
| "learning_rate": 0.001, |
| "loss": 2.2042, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.623787976729153, |
| "grad_norm": 1.050527811050415, |
| "learning_rate": 0.001, |
| "loss": 2.2511, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.656108597285068, |
| "grad_norm": 0.9223970174789429, |
| "learning_rate": 0.001, |
| "loss": 2.2315, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.688429217840983, |
| "grad_norm": 1.0374590158462524, |
| "learning_rate": 0.001, |
| "loss": 2.243, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.720749838396897, |
| "grad_norm": 1.022735834121704, |
| "learning_rate": 0.001, |
| "loss": 2.2103, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.753070458952812, |
| "grad_norm": 1.0346524715423584, |
| "learning_rate": 0.001, |
| "loss": 2.2325, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.785391079508726, |
| "grad_norm": 0.9487394094467163, |
| "learning_rate": 0.001, |
| "loss": 2.2501, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.817711700064641, |
| "grad_norm": 0.923183798789978, |
| "learning_rate": 0.001, |
| "loss": 2.2162, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.850032320620556, |
| "grad_norm": 1.3014050722122192, |
| "learning_rate": 0.001, |
| "loss": 2.2293, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 1.249762773513794, |
| "learning_rate": 0.001, |
| "loss": 2.2131, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.914673561732386, |
| "grad_norm": 0.8731359243392944, |
| "learning_rate": 0.001, |
| "loss": 2.253, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.9469941822883, |
| "grad_norm": 0.896523118019104, |
| "learning_rate": 0.001, |
| "loss": 2.265, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.979314802844215, |
| "grad_norm": 0.9707561731338501, |
| "learning_rate": 0.001, |
| "loss": 2.2056, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.011635423400129, |
| "grad_norm": 1.1334419250488281, |
| "learning_rate": 0.001, |
| "loss": 2.1646, |
| "step": 18600 |
| }, |
| { |
| "epoch": 6.043956043956044, |
| "grad_norm": 1.1426136493682861, |
| "learning_rate": 0.001, |
| "loss": 2.1307, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.076276664511958, |
| "grad_norm": 1.0500696897506714, |
| "learning_rate": 0.001, |
| "loss": 2.1347, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.108597285067873, |
| "grad_norm": 0.8252379894256592, |
| "learning_rate": 0.001, |
| "loss": 2.1297, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.140917905623788, |
| "grad_norm": 1.1823996305465698, |
| "learning_rate": 0.001, |
| "loss": 2.1171, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.173238526179703, |
| "grad_norm": 0.8585313558578491, |
| "learning_rate": 0.001, |
| "loss": 2.1368, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.2055591467356175, |
| "grad_norm": 0.9864341616630554, |
| "learning_rate": 0.001, |
| "loss": 2.145, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.237879767291532, |
| "grad_norm": 1.1134485006332397, |
| "learning_rate": 0.001, |
| "loss": 2.1667, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.270200387847447, |
| "grad_norm": 1.1856287717819214, |
| "learning_rate": 0.001, |
| "loss": 2.1582, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.302521008403361, |
| "grad_norm": 1.1392621994018555, |
| "learning_rate": 0.001, |
| "loss": 2.165, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.334841628959276, |
| "grad_norm": 0.9821904897689819, |
| "learning_rate": 0.001, |
| "loss": 2.1773, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.36716224951519, |
| "grad_norm": 1.0998677015304565, |
| "learning_rate": 0.001, |
| "loss": 2.1583, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.399482870071106, |
| "grad_norm": 1.2467516660690308, |
| "learning_rate": 0.001, |
| "loss": 2.1471, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.43180349062702, |
| "grad_norm": 1.0288869142532349, |
| "learning_rate": 0.001, |
| "loss": 2.1588, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.464124111182935, |
| "grad_norm": 0.7433337569236755, |
| "learning_rate": 0.001, |
| "loss": 2.1435, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.496444731738849, |
| "grad_norm": 0.9702023863792419, |
| "learning_rate": 0.001, |
| "loss": 2.1831, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.528765352294764, |
| "grad_norm": 1.1337813138961792, |
| "learning_rate": 0.001, |
| "loss": 2.1695, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.5610859728506785, |
| "grad_norm": 1.0064531564712524, |
| "learning_rate": 0.001, |
| "loss": 2.1348, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.593406593406593, |
| "grad_norm": 1.3694676160812378, |
| "learning_rate": 0.001, |
| "loss": 2.1895, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.625727213962508, |
| "grad_norm": 0.8968884348869324, |
| "learning_rate": 0.001, |
| "loss": 2.1538, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.658047834518423, |
| "grad_norm": 1.3198908567428589, |
| "learning_rate": 0.001, |
| "loss": 2.1787, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.690368455074338, |
| "grad_norm": 1.086137294769287, |
| "learning_rate": 0.001, |
| "loss": 2.1851, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.722689075630252, |
| "grad_norm": 0.8597367405891418, |
| "learning_rate": 0.001, |
| "loss": 2.1799, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.755009696186167, |
| "grad_norm": 1.2422113418579102, |
| "learning_rate": 0.001, |
| "loss": 2.1767, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.787330316742081, |
| "grad_norm": 0.9910422563552856, |
| "learning_rate": 0.001, |
| "loss": 2.1606, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.819650937297996, |
| "grad_norm": 1.27292799949646, |
| "learning_rate": 0.001, |
| "loss": 2.1678, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.85197155785391, |
| "grad_norm": 0.9695398807525635, |
| "learning_rate": 0.001, |
| "loss": 2.182, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.884292178409826, |
| "grad_norm": 1.1529979705810547, |
| "learning_rate": 0.001, |
| "loss": 2.1706, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.91661279896574, |
| "grad_norm": 1.3126720190048218, |
| "learning_rate": 0.001, |
| "loss": 2.1573, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.948933419521655, |
| "grad_norm": 0.9190894961357117, |
| "learning_rate": 0.001, |
| "loss": 2.1563, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.98125404007757, |
| "grad_norm": 1.3218880891799927, |
| "learning_rate": 0.001, |
| "loss": 2.1972, |
| "step": 21600 |
| }, |
| { |
| "epoch": 7.013574660633484, |
| "grad_norm": 0.8746786117553711, |
| "learning_rate": 0.001, |
| "loss": 2.1198, |
| "step": 21700 |
| }, |
| { |
| "epoch": 7.045895281189399, |
| "grad_norm": 0.9623684287071228, |
| "learning_rate": 0.001, |
| "loss": 2.0991, |
| "step": 21800 |
| }, |
| { |
| "epoch": 7.078215901745313, |
| "grad_norm": 0.9384899735450745, |
| "learning_rate": 0.001, |
| "loss": 2.0808, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.110536522301228, |
| "grad_norm": 1.1331701278686523, |
| "learning_rate": 0.001, |
| "loss": 2.0982, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 1.1500688791275024, |
| "learning_rate": 0.001, |
| "loss": 2.0825, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.175177763413058, |
| "grad_norm": 1.047314167022705, |
| "learning_rate": 0.001, |
| "loss": 2.0834, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.207498383968972, |
| "grad_norm": 1.0547418594360352, |
| "learning_rate": 0.001, |
| "loss": 2.071, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.239819004524887, |
| "grad_norm": 1.3998901844024658, |
| "learning_rate": 0.001, |
| "loss": 2.1047, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.2721396250808015, |
| "grad_norm": 0.9676983952522278, |
| "learning_rate": 0.001, |
| "loss": 2.0935, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.304460245636716, |
| "grad_norm": 0.9831491708755493, |
| "learning_rate": 0.001, |
| "loss": 2.0842, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.336780866192631, |
| "grad_norm": 0.971725344657898, |
| "learning_rate": 0.001, |
| "loss": 2.1248, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.369101486748546, |
| "grad_norm": 0.9703531861305237, |
| "learning_rate": 0.001, |
| "loss": 2.0816, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.401422107304461, |
| "grad_norm": 0.8289443850517273, |
| "learning_rate": 0.001, |
| "loss": 2.0674, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.433742727860375, |
| "grad_norm": 1.0580024719238281, |
| "learning_rate": 0.001, |
| "loss": 2.1269, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.46606334841629, |
| "grad_norm": 1.3143529891967773, |
| "learning_rate": 0.001, |
| "loss": 2.1089, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.498383968972204, |
| "grad_norm": 1.0106147527694702, |
| "learning_rate": 0.001, |
| "loss": 2.1267, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.530704589528119, |
| "grad_norm": 1.0150158405303955, |
| "learning_rate": 0.001, |
| "loss": 2.1166, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.563025210084033, |
| "grad_norm": 1.2520421743392944, |
| "learning_rate": 0.001, |
| "loss": 2.1166, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.595345830639948, |
| "grad_norm": 1.198043704032898, |
| "learning_rate": 0.001, |
| "loss": 2.1213, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.6276664511958625, |
| "grad_norm": 1.2207905054092407, |
| "learning_rate": 0.001, |
| "loss": 2.1108, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.659987071751778, |
| "grad_norm": 0.9647412300109863, |
| "learning_rate": 0.001, |
| "loss": 2.1268, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.9915140271186829, |
| "learning_rate": 0.001, |
| "loss": 2.1438, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.724628312863607, |
| "grad_norm": 1.1954071521759033, |
| "learning_rate": 0.001, |
| "loss": 2.0931, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.756948933419522, |
| "grad_norm": 0.9521498084068298, |
| "learning_rate": 0.001, |
| "loss": 2.1314, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.789269553975436, |
| "grad_norm": 0.9276489615440369, |
| "learning_rate": 0.001, |
| "loss": 2.107, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.821590174531351, |
| "grad_norm": 1.0093352794647217, |
| "learning_rate": 0.001, |
| "loss": 2.1245, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.853910795087265, |
| "grad_norm": 0.842779278755188, |
| "learning_rate": 0.001, |
| "loss": 2.1334, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.886231415643181, |
| "grad_norm": 1.007067322731018, |
| "learning_rate": 0.001, |
| "loss": 2.1178, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.918552036199095, |
| "grad_norm": 1.259667158126831, |
| "learning_rate": 0.001, |
| "loss": 2.1335, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.95087265675501, |
| "grad_norm": 1.6199828386306763, |
| "learning_rate": 0.001, |
| "loss": 2.1201, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.983193277310924, |
| "grad_norm": 1.0307115316390991, |
| "learning_rate": 0.001, |
| "loss": 2.1404, |
| "step": 24700 |
| }, |
| { |
| "epoch": 8.015513897866839, |
| "grad_norm": 3.100094795227051, |
| "learning_rate": 0.001, |
| "loss": 2.0168, |
| "step": 24800 |
| }, |
| { |
| "epoch": 8.047834518422754, |
| "grad_norm": 1.230669379234314, |
| "learning_rate": 0.001, |
| "loss": 2.0227, |
| "step": 24900 |
| }, |
| { |
| "epoch": 8.080155138978668, |
| "grad_norm": 1.1248514652252197, |
| "learning_rate": 0.001, |
| "loss": 2.0109, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.112475759534583, |
| "grad_norm": 1.1911290884017944, |
| "learning_rate": 0.001, |
| "loss": 2.0383, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.144796380090497, |
| "grad_norm": 0.9947310090065002, |
| "learning_rate": 0.001, |
| "loss": 2.0398, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.177117000646412, |
| "grad_norm": 1.1442896127700806, |
| "learning_rate": 0.001, |
| "loss": 2.0483, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.209437621202326, |
| "grad_norm": 0.847846508026123, |
| "learning_rate": 0.001, |
| "loss": 2.0151, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.241758241758241, |
| "grad_norm": 1.1538875102996826, |
| "learning_rate": 0.001, |
| "loss": 2.0274, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.274078862314157, |
| "grad_norm": 0.9806484580039978, |
| "learning_rate": 0.001, |
| "loss": 2.0477, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.306399482870072, |
| "grad_norm": 0.9989823698997498, |
| "learning_rate": 0.001, |
| "loss": 2.0475, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.338720103425986, |
| "grad_norm": 1.151330828666687, |
| "learning_rate": 0.001, |
| "loss": 2.0621, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.371040723981901, |
| "grad_norm": 1.0853585004806519, |
| "learning_rate": 0.001, |
| "loss": 2.0313, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.403361344537815, |
| "grad_norm": 1.1173166036605835, |
| "learning_rate": 0.001, |
| "loss": 2.0538, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.43568196509373, |
| "grad_norm": 1.0314850807189941, |
| "learning_rate": 0.001, |
| "loss": 2.0368, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.468002585649645, |
| "grad_norm": 1.2047348022460938, |
| "learning_rate": 0.001, |
| "loss": 2.0552, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.50032320620556, |
| "grad_norm": 0.9672854542732239, |
| "learning_rate": 0.001, |
| "loss": 2.1015, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.532643826761474, |
| "grad_norm": 0.9105637073516846, |
| "learning_rate": 0.001, |
| "loss": 2.0828, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.564964447317388, |
| "grad_norm": 1.086616039276123, |
| "learning_rate": 0.001, |
| "loss": 2.0289, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.597285067873303, |
| "grad_norm": 0.9836768507957458, |
| "learning_rate": 0.001, |
| "loss": 2.0851, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.629605688429217, |
| "grad_norm": 1.150235891342163, |
| "learning_rate": 0.001, |
| "loss": 2.0542, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.661926308985132, |
| "grad_norm": 1.1072510480880737, |
| "learning_rate": 0.001, |
| "loss": 2.0797, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.694246929541046, |
| "grad_norm": 1.2430179119110107, |
| "learning_rate": 0.001, |
| "loss": 2.0976, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.726567550096961, |
| "grad_norm": 0.9480435848236084, |
| "learning_rate": 0.001, |
| "loss": 2.0835, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.758888170652877, |
| "grad_norm": 1.1376099586486816, |
| "learning_rate": 0.001, |
| "loss": 2.0695, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.791208791208792, |
| "grad_norm": 1.1211824417114258, |
| "learning_rate": 0.001, |
| "loss": 2.0724, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 0.8404349684715271, |
| "learning_rate": 0.001, |
| "loss": 2.0693, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.855850032320621, |
| "grad_norm": 1.0358692407608032, |
| "learning_rate": 0.001, |
| "loss": 2.0874, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.888170652876536, |
| "grad_norm": 0.7218955755233765, |
| "learning_rate": 0.001, |
| "loss": 2.0887, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.92049127343245, |
| "grad_norm": 1.412729024887085, |
| "learning_rate": 0.001, |
| "loss": 2.0919, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.952811893988365, |
| "grad_norm": 0.8630658984184265, |
| "learning_rate": 0.001, |
| "loss": 2.1076, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.98513251454428, |
| "grad_norm": 0.9641337990760803, |
| "learning_rate": 0.001, |
| "loss": 2.0819, |
| "step": 27800 |
| }, |
| { |
| "epoch": 9.017453135100194, |
| "grad_norm": 1.5362216234207153, |
| "learning_rate": 0.001, |
| "loss": 1.9746, |
| "step": 27900 |
| }, |
| { |
| "epoch": 9.049773755656108, |
| "grad_norm": 1.55039381980896, |
| "learning_rate": 0.001, |
| "loss": 1.9523, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.082094376212023, |
| "grad_norm": 1.211641550064087, |
| "learning_rate": 0.001, |
| "loss": 1.9809, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.114414996767938, |
| "grad_norm": 1.4977613687515259, |
| "learning_rate": 0.001, |
| "loss": 1.9573, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.146735617323852, |
| "grad_norm": 1.6715325117111206, |
| "learning_rate": 0.001, |
| "loss": 2.0006, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.179056237879767, |
| "grad_norm": 1.270294189453125, |
| "learning_rate": 0.001, |
| "loss": 1.9826, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.211376858435681, |
| "grad_norm": 1.3731482028961182, |
| "learning_rate": 0.001, |
| "loss": 1.9976, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.243697478991596, |
| "grad_norm": 1.2195913791656494, |
| "learning_rate": 0.001, |
| "loss": 1.9656, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.276018099547512, |
| "grad_norm": 1.3323032855987549, |
| "learning_rate": 0.001, |
| "loss": 2.0285, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.308338720103427, |
| "grad_norm": 1.1946659088134766, |
| "learning_rate": 0.001, |
| "loss": 1.9931, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.340659340659341, |
| "grad_norm": 1.231381893157959, |
| "learning_rate": 0.001, |
| "loss": 1.9987, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.372979961215256, |
| "grad_norm": 1.3914852142333984, |
| "learning_rate": 0.001, |
| "loss": 2.0162, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.40530058177117, |
| "grad_norm": 1.6865482330322266, |
| "learning_rate": 0.001, |
| "loss": 1.9987, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.437621202327085, |
| "grad_norm": 1.3092128038406372, |
| "learning_rate": 0.001, |
| "loss": 2.0095, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.469941822883, |
| "grad_norm": 1.674773097038269, |
| "learning_rate": 0.001, |
| "loss": 2.0127, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.502262443438914, |
| "grad_norm": 1.4838887453079224, |
| "learning_rate": 0.001, |
| "loss": 2.0138, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.534583063994829, |
| "grad_norm": 1.4192705154418945, |
| "learning_rate": 0.001, |
| "loss": 2.0291, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.566903684550743, |
| "grad_norm": 1.3291783332824707, |
| "learning_rate": 0.001, |
| "loss": 2.0384, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.599224305106658, |
| "grad_norm": 1.399062156677246, |
| "learning_rate": 0.001, |
| "loss": 2.0315, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.631544925662572, |
| "grad_norm": 1.213178038597107, |
| "learning_rate": 0.001, |
| "loss": 2.0544, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.663865546218487, |
| "grad_norm": 1.2992242574691772, |
| "learning_rate": 0.001, |
| "loss": 2.0378, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.696186166774401, |
| "grad_norm": 1.3114560842514038, |
| "learning_rate": 0.001, |
| "loss": 2.0445, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.728506787330316, |
| "grad_norm": 1.130974531173706, |
| "learning_rate": 0.001, |
| "loss": 2.0377, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.760827407886232, |
| "grad_norm": 1.3303745985031128, |
| "learning_rate": 0.001, |
| "loss": 2.0535, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.793148028442147, |
| "grad_norm": 1.8423625230789185, |
| "learning_rate": 0.001, |
| "loss": 2.0334, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.825468648998061, |
| "grad_norm": 1.193260669708252, |
| "learning_rate": 0.001, |
| "loss": 2.026, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.857789269553976, |
| "grad_norm": 1.9798153638839722, |
| "learning_rate": 0.001, |
| "loss": 2.0475, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.89010989010989, |
| "grad_norm": 1.1298526525497437, |
| "learning_rate": 0.001, |
| "loss": 2.044, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.922430510665805, |
| "grad_norm": 1.5975474119186401, |
| "learning_rate": 0.001, |
| "loss": 2.0417, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.95475113122172, |
| "grad_norm": 1.2891868352890015, |
| "learning_rate": 0.001, |
| "loss": 2.0445, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.987071751777634, |
| "grad_norm": 1.7390249967575073, |
| "learning_rate": 0.001, |
| "loss": 2.0436, |
| "step": 30900 |
| }, |
| { |
| "epoch": 10.019392372333549, |
| "grad_norm": 0.9371219277381897, |
| "learning_rate": 0.001, |
| "loss": 1.9984, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.051712992889463, |
| "grad_norm": 1.240126132965088, |
| "learning_rate": 0.001, |
| "loss": 1.9411, |
| "step": 31100 |
| }, |
| { |
| "epoch": 10.084033613445378, |
| "grad_norm": 1.129967451095581, |
| "learning_rate": 0.001, |
| "loss": 1.9458, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.116354234001292, |
| "grad_norm": 1.056111216545105, |
| "learning_rate": 0.001, |
| "loss": 1.951, |
| "step": 31300 |
| }, |
| { |
| "epoch": 10.148674854557207, |
| "grad_norm": 1.1332019567489624, |
| "learning_rate": 0.001, |
| "loss": 1.9352, |
| "step": 31400 |
| }, |
| { |
| "epoch": 10.180995475113122, |
| "grad_norm": 1.170355200767517, |
| "learning_rate": 0.001, |
| "loss": 1.9528, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.213316095669036, |
| "grad_norm": 1.212427020072937, |
| "learning_rate": 0.001, |
| "loss": 1.9544, |
| "step": 31600 |
| }, |
| { |
| "epoch": 10.24563671622495, |
| "grad_norm": 1.2170082330703735, |
| "learning_rate": 0.001, |
| "loss": 1.9603, |
| "step": 31700 |
| }, |
| { |
| "epoch": 10.277957336780867, |
| "grad_norm": 1.2054773569107056, |
| "learning_rate": 0.001, |
| "loss": 1.9755, |
| "step": 31800 |
| }, |
| { |
| "epoch": 10.310277957336782, |
| "grad_norm": 1.0994369983673096, |
| "learning_rate": 0.001, |
| "loss": 1.9709, |
| "step": 31900 |
| }, |
| { |
| "epoch": 10.342598577892696, |
| "grad_norm": 1.1936355829238892, |
| "learning_rate": 0.001, |
| "loss": 1.9493, |
| "step": 32000 |
| }, |
| { |
| "epoch": 10.37491919844861, |
| "grad_norm": 1.029075026512146, |
| "learning_rate": 0.001, |
| "loss": 1.9632, |
| "step": 32100 |
| }, |
| { |
| "epoch": 10.407239819004525, |
| "grad_norm": 0.9728875756263733, |
| "learning_rate": 0.001, |
| "loss": 1.9951, |
| "step": 32200 |
| }, |
| { |
| "epoch": 10.43956043956044, |
| "grad_norm": 1.2093101739883423, |
| "learning_rate": 0.001, |
| "loss": 1.9654, |
| "step": 32300 |
| }, |
| { |
| "epoch": 10.471881060116354, |
| "grad_norm": 1.241963267326355, |
| "learning_rate": 0.001, |
| "loss": 1.9874, |
| "step": 32400 |
| }, |
| { |
| "epoch": 10.504201680672269, |
| "grad_norm": 1.0996415615081787, |
| "learning_rate": 0.001, |
| "loss": 1.9809, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.536522301228183, |
| "grad_norm": 1.0012050867080688, |
| "learning_rate": 0.001, |
| "loss": 1.9745, |
| "step": 32600 |
| }, |
| { |
| "epoch": 10.568842921784098, |
| "grad_norm": 1.1308503150939941, |
| "learning_rate": 0.001, |
| "loss": 1.9829, |
| "step": 32700 |
| }, |
| { |
| "epoch": 10.601163542340013, |
| "grad_norm": 1.2286540269851685, |
| "learning_rate": 0.001, |
| "loss": 1.9493, |
| "step": 32800 |
| }, |
| { |
| "epoch": 10.633484162895927, |
| "grad_norm": 0.9340251684188843, |
| "learning_rate": 0.001, |
| "loss": 2.0062, |
| "step": 32900 |
| }, |
| { |
| "epoch": 10.665804783451842, |
| "grad_norm": 1.0742608308792114, |
| "learning_rate": 0.001, |
| "loss": 1.974, |
| "step": 33000 |
| }, |
| { |
| "epoch": 10.698125404007756, |
| "grad_norm": 1.1647969484329224, |
| "learning_rate": 0.001, |
| "loss": 1.9751, |
| "step": 33100 |
| }, |
| { |
| "epoch": 10.73044602456367, |
| "grad_norm": 1.2428866624832153, |
| "learning_rate": 0.001, |
| "loss": 2.0049, |
| "step": 33200 |
| }, |
| { |
| "epoch": 10.762766645119587, |
| "grad_norm": 1.4069253206253052, |
| "learning_rate": 0.001, |
| "loss": 2.0109, |
| "step": 33300 |
| }, |
| { |
| "epoch": 10.795087265675502, |
| "grad_norm": 1.2430641651153564, |
| "learning_rate": 0.001, |
| "loss": 1.9871, |
| "step": 33400 |
| }, |
| { |
| "epoch": 10.827407886231416, |
| "grad_norm": 1.2892264127731323, |
| "learning_rate": 0.001, |
| "loss": 2.0063, |
| "step": 33500 |
| }, |
| { |
| "epoch": 10.85972850678733, |
| "grad_norm": 1.044710397720337, |
| "learning_rate": 0.001, |
| "loss": 2.0003, |
| "step": 33600 |
| }, |
| { |
| "epoch": 10.892049127343245, |
| "grad_norm": 1.239604115486145, |
| "learning_rate": 0.001, |
| "loss": 1.9966, |
| "step": 33700 |
| }, |
| { |
| "epoch": 10.92436974789916, |
| "grad_norm": 1.3418306112289429, |
| "learning_rate": 0.001, |
| "loss": 1.9852, |
| "step": 33800 |
| }, |
| { |
| "epoch": 10.956690368455074, |
| "grad_norm": 1.0788873434066772, |
| "learning_rate": 0.001, |
| "loss": 1.9987, |
| "step": 33900 |
| }, |
| { |
| "epoch": 10.989010989010989, |
| "grad_norm": 1.139272928237915, |
| "learning_rate": 0.001, |
| "loss": 2.0007, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.021331609566904, |
| "grad_norm": 1.0919997692108154, |
| "learning_rate": 0.001, |
| "loss": 1.9835, |
| "step": 34100 |
| }, |
| { |
| "epoch": 11.053652230122818, |
| "grad_norm": 1.0319784879684448, |
| "learning_rate": 0.001, |
| "loss": 1.8933, |
| "step": 34200 |
| }, |
| { |
| "epoch": 11.085972850678733, |
| "grad_norm": 1.3085379600524902, |
| "learning_rate": 0.001, |
| "loss": 1.9088, |
| "step": 34300 |
| }, |
| { |
| "epoch": 11.118293471234647, |
| "grad_norm": 1.3764021396636963, |
| "learning_rate": 0.001, |
| "loss": 1.912, |
| "step": 34400 |
| }, |
| { |
| "epoch": 11.150614091790562, |
| "grad_norm": 1.1798242330551147, |
| "learning_rate": 0.001, |
| "loss": 1.9078, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.182934712346476, |
| "grad_norm": 1.325455904006958, |
| "learning_rate": 0.001, |
| "loss": 1.9255, |
| "step": 34600 |
| }, |
| { |
| "epoch": 11.215255332902391, |
| "grad_norm": 1.0184882879257202, |
| "learning_rate": 0.001, |
| "loss": 1.9248, |
| "step": 34700 |
| }, |
| { |
| "epoch": 11.247575953458306, |
| "grad_norm": 0.9781094193458557, |
| "learning_rate": 0.001, |
| "loss": 1.922, |
| "step": 34800 |
| }, |
| { |
| "epoch": 11.279896574014222, |
| "grad_norm": 1.0457218885421753, |
| "learning_rate": 0.001, |
| "loss": 1.8872, |
| "step": 34900 |
| }, |
| { |
| "epoch": 11.312217194570136, |
| "grad_norm": 1.1817752122879028, |
| "learning_rate": 0.001, |
| "loss": 1.9091, |
| "step": 35000 |
| }, |
| { |
| "epoch": 11.344537815126051, |
| "grad_norm": 1.059022307395935, |
| "learning_rate": 0.001, |
| "loss": 1.9213, |
| "step": 35100 |
| }, |
| { |
| "epoch": 11.376858435681966, |
| "grad_norm": 1.2279465198516846, |
| "learning_rate": 0.001, |
| "loss": 1.9173, |
| "step": 35200 |
| }, |
| { |
| "epoch": 11.40917905623788, |
| "grad_norm": 1.0559117794036865, |
| "learning_rate": 0.001, |
| "loss": 1.9462, |
| "step": 35300 |
| }, |
| { |
| "epoch": 11.441499676793795, |
| "grad_norm": 1.155202031135559, |
| "learning_rate": 0.001, |
| "loss": 1.9219, |
| "step": 35400 |
| }, |
| { |
| "epoch": 11.47382029734971, |
| "grad_norm": 0.9445791244506836, |
| "learning_rate": 0.001, |
| "loss": 1.9351, |
| "step": 35500 |
| }, |
| { |
| "epoch": 11.506140917905624, |
| "grad_norm": 1.164797306060791, |
| "learning_rate": 0.001, |
| "loss": 1.9704, |
| "step": 35600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 1.1770981550216675, |
| "learning_rate": 0.001, |
| "loss": 1.9473, |
| "step": 35700 |
| }, |
| { |
| "epoch": 11.570782159017453, |
| "grad_norm": 1.2111883163452148, |
| "learning_rate": 0.001, |
| "loss": 1.9358, |
| "step": 35800 |
| }, |
| { |
| "epoch": 11.603102779573367, |
| "grad_norm": 1.2625328302383423, |
| "learning_rate": 0.001, |
| "loss": 1.9276, |
| "step": 35900 |
| }, |
| { |
| "epoch": 11.635423400129282, |
| "grad_norm": 1.070054054260254, |
| "learning_rate": 0.001, |
| "loss": 1.9501, |
| "step": 36000 |
| }, |
| { |
| "epoch": 11.667744020685197, |
| "grad_norm": 1.6539897918701172, |
| "learning_rate": 0.001, |
| "loss": 1.9679, |
| "step": 36100 |
| }, |
| { |
| "epoch": 11.700064641241111, |
| "grad_norm": 0.9244352579116821, |
| "learning_rate": 0.001, |
| "loss": 1.9429, |
| "step": 36200 |
| }, |
| { |
| "epoch": 11.732385261797026, |
| "grad_norm": 1.3357621431350708, |
| "learning_rate": 0.001, |
| "loss": 1.9378, |
| "step": 36300 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 1.4329630136489868, |
| "learning_rate": 0.001, |
| "loss": 1.9545, |
| "step": 36400 |
| }, |
| { |
| "epoch": 11.797026502908857, |
| "grad_norm": 1.078933835029602, |
| "learning_rate": 0.001, |
| "loss": 1.9275, |
| "step": 36500 |
| }, |
| { |
| "epoch": 11.829347123464771, |
| "grad_norm": 0.9876687526702881, |
| "learning_rate": 0.001, |
| "loss": 1.9787, |
| "step": 36600 |
| }, |
| { |
| "epoch": 11.861667744020686, |
| "grad_norm": 1.1422828435897827, |
| "learning_rate": 0.001, |
| "loss": 1.9797, |
| "step": 36700 |
| }, |
| { |
| "epoch": 11.8939883645766, |
| "grad_norm": 1.062204360961914, |
| "learning_rate": 0.001, |
| "loss": 1.9526, |
| "step": 36800 |
| }, |
| { |
| "epoch": 11.926308985132515, |
| "grad_norm": 1.3464689254760742, |
| "learning_rate": 0.001, |
| "loss": 1.9804, |
| "step": 36900 |
| }, |
| { |
| "epoch": 11.95862960568843, |
| "grad_norm": 0.9646903276443481, |
| "learning_rate": 0.001, |
| "loss": 1.9566, |
| "step": 37000 |
| }, |
| { |
| "epoch": 11.990950226244344, |
| "grad_norm": 1.1506516933441162, |
| "learning_rate": 0.001, |
| "loss": 1.9728, |
| "step": 37100 |
| }, |
| { |
| "epoch": 12.023270846800258, |
| "grad_norm": 1.369059681892395, |
| "learning_rate": 0.001, |
| "loss": 1.8877, |
| "step": 37200 |
| }, |
| { |
| "epoch": 12.055591467356173, |
| "grad_norm": 1.1158785820007324, |
| "learning_rate": 0.001, |
| "loss": 1.8268, |
| "step": 37300 |
| }, |
| { |
| "epoch": 12.087912087912088, |
| "grad_norm": 1.0913095474243164, |
| "learning_rate": 0.001, |
| "loss": 1.8475, |
| "step": 37400 |
| }, |
| { |
| "epoch": 12.120232708468002, |
| "grad_norm": 1.241456151008606, |
| "learning_rate": 0.001, |
| "loss": 1.8694, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.152553329023917, |
| "grad_norm": 1.1001811027526855, |
| "learning_rate": 0.001, |
| "loss": 1.8699, |
| "step": 37600 |
| }, |
| { |
| "epoch": 12.184873949579831, |
| "grad_norm": 1.1026263236999512, |
| "learning_rate": 0.001, |
| "loss": 1.8862, |
| "step": 37700 |
| }, |
| { |
| "epoch": 12.217194570135746, |
| "grad_norm": 1.1104148626327515, |
| "learning_rate": 0.001, |
| "loss": 1.885, |
| "step": 37800 |
| }, |
| { |
| "epoch": 12.24951519069166, |
| "grad_norm": 1.0062155723571777, |
| "learning_rate": 0.001, |
| "loss": 1.8719, |
| "step": 37900 |
| }, |
| { |
| "epoch": 12.281835811247577, |
| "grad_norm": 1.2362042665481567, |
| "learning_rate": 0.001, |
| "loss": 1.8963, |
| "step": 38000 |
| }, |
| { |
| "epoch": 12.314156431803491, |
| "grad_norm": 1.0042673349380493, |
| "learning_rate": 0.001, |
| "loss": 1.9027, |
| "step": 38100 |
| }, |
| { |
| "epoch": 12.346477052359406, |
| "grad_norm": 1.2307231426239014, |
| "learning_rate": 0.001, |
| "loss": 1.8827, |
| "step": 38200 |
| }, |
| { |
| "epoch": 12.37879767291532, |
| "grad_norm": 0.9624270796775818, |
| "learning_rate": 0.001, |
| "loss": 1.8745, |
| "step": 38300 |
| }, |
| { |
| "epoch": 12.411118293471235, |
| "grad_norm": 0.9763580560684204, |
| "learning_rate": 0.001, |
| "loss": 1.878, |
| "step": 38400 |
| }, |
| { |
| "epoch": 12.44343891402715, |
| "grad_norm": 1.371140480041504, |
| "learning_rate": 0.001, |
| "loss": 1.9079, |
| "step": 38500 |
| }, |
| { |
| "epoch": 12.475759534583064, |
| "grad_norm": 1.0613393783569336, |
| "learning_rate": 0.001, |
| "loss": 1.8867, |
| "step": 38600 |
| }, |
| { |
| "epoch": 12.508080155138979, |
| "grad_norm": 1.1260520219802856, |
| "learning_rate": 0.001, |
| "loss": 1.8916, |
| "step": 38700 |
| }, |
| { |
| "epoch": 12.540400775694893, |
| "grad_norm": 1.4606720209121704, |
| "learning_rate": 0.001, |
| "loss": 1.9039, |
| "step": 38800 |
| }, |
| { |
| "epoch": 12.572721396250808, |
| "grad_norm": 1.113877773284912, |
| "learning_rate": 0.001, |
| "loss": 1.9194, |
| "step": 38900 |
| }, |
| { |
| "epoch": 12.605042016806722, |
| "grad_norm": 1.2178810834884644, |
| "learning_rate": 0.001, |
| "loss": 1.9354, |
| "step": 39000 |
| }, |
| { |
| "epoch": 12.637362637362637, |
| "grad_norm": 1.0655773878097534, |
| "learning_rate": 0.001, |
| "loss": 1.9348, |
| "step": 39100 |
| }, |
| { |
| "epoch": 12.669683257918551, |
| "grad_norm": 1.2934608459472656, |
| "learning_rate": 0.001, |
| "loss": 1.9038, |
| "step": 39200 |
| }, |
| { |
| "epoch": 12.702003878474466, |
| "grad_norm": 0.9688911437988281, |
| "learning_rate": 0.001, |
| "loss": 1.9057, |
| "step": 39300 |
| }, |
| { |
| "epoch": 12.73432449903038, |
| "grad_norm": 1.1328833103179932, |
| "learning_rate": 0.001, |
| "loss": 1.9568, |
| "step": 39400 |
| }, |
| { |
| "epoch": 12.766645119586297, |
| "grad_norm": 1.2693794965744019, |
| "learning_rate": 0.001, |
| "loss": 1.9161, |
| "step": 39500 |
| }, |
| { |
| "epoch": 12.798965740142211, |
| "grad_norm": 1.2547452449798584, |
| "learning_rate": 0.001, |
| "loss": 1.9138, |
| "step": 39600 |
| }, |
| { |
| "epoch": 12.831286360698126, |
| "grad_norm": 1.2441757917404175, |
| "learning_rate": 0.001, |
| "loss": 1.9496, |
| "step": 39700 |
| }, |
| { |
| "epoch": 12.86360698125404, |
| "grad_norm": 0.9224479794502258, |
| "learning_rate": 0.001, |
| "loss": 1.9181, |
| "step": 39800 |
| }, |
| { |
| "epoch": 12.895927601809955, |
| "grad_norm": 1.2042218446731567, |
| "learning_rate": 0.001, |
| "loss": 1.9154, |
| "step": 39900 |
| }, |
| { |
| "epoch": 12.92824822236587, |
| "grad_norm": 1.195986032485962, |
| "learning_rate": 0.001, |
| "loss": 1.92, |
| "step": 40000 |
| }, |
| { |
| "epoch": 12.960568842921784, |
| "grad_norm": 1.5201036930084229, |
| "learning_rate": 0.001, |
| "loss": 1.9273, |
| "step": 40100 |
| }, |
| { |
| "epoch": 12.992889463477699, |
| "grad_norm": 1.1624977588653564, |
| "learning_rate": 0.001, |
| "loss": 1.9191, |
| "step": 40200 |
| }, |
| { |
| "epoch": 13.025210084033613, |
| "grad_norm": 0.8809813261032104, |
| "learning_rate": 0.001, |
| "loss": 1.8388, |
| "step": 40300 |
| }, |
| { |
| "epoch": 13.057530704589528, |
| "grad_norm": 1.2070914506912231, |
| "learning_rate": 0.001, |
| "loss": 1.8258, |
| "step": 40400 |
| }, |
| { |
| "epoch": 13.089851325145442, |
| "grad_norm": 1.0898829698562622, |
| "learning_rate": 0.001, |
| "loss": 1.864, |
| "step": 40500 |
| }, |
| { |
| "epoch": 13.122171945701357, |
| "grad_norm": 1.113163709640503, |
| "learning_rate": 0.001, |
| "loss": 1.8202, |
| "step": 40600 |
| }, |
| { |
| "epoch": 13.154492566257272, |
| "grad_norm": 1.1067817211151123, |
| "learning_rate": 0.001, |
| "loss": 1.8332, |
| "step": 40700 |
| }, |
| { |
| "epoch": 13.186813186813186, |
| "grad_norm": 1.3998067378997803, |
| "learning_rate": 0.001, |
| "loss": 1.8404, |
| "step": 40800 |
| }, |
| { |
| "epoch": 13.2191338073691, |
| "grad_norm": 1.395437479019165, |
| "learning_rate": 0.001, |
| "loss": 1.8519, |
| "step": 40900 |
| }, |
| { |
| "epoch": 13.251454427925015, |
| "grad_norm": 1.0215939283370972, |
| "learning_rate": 0.001, |
| "loss": 1.8144, |
| "step": 41000 |
| }, |
| { |
| "epoch": 13.283775048480932, |
| "grad_norm": 1.2167524099349976, |
| "learning_rate": 0.001, |
| "loss": 1.8414, |
| "step": 41100 |
| }, |
| { |
| "epoch": 13.316095669036846, |
| "grad_norm": 1.1152799129486084, |
| "learning_rate": 0.001, |
| "loss": 1.8569, |
| "step": 41200 |
| }, |
| { |
| "epoch": 13.34841628959276, |
| "grad_norm": 1.06120765209198, |
| "learning_rate": 0.001, |
| "loss": 1.8496, |
| "step": 41300 |
| }, |
| { |
| "epoch": 13.380736910148675, |
| "grad_norm": 1.1193475723266602, |
| "learning_rate": 0.001, |
| "loss": 1.8679, |
| "step": 41400 |
| }, |
| { |
| "epoch": 13.41305753070459, |
| "grad_norm": 1.1804929971694946, |
| "learning_rate": 0.001, |
| "loss": 1.8768, |
| "step": 41500 |
| }, |
| { |
| "epoch": 13.445378151260504, |
| "grad_norm": 0.9798558354377747, |
| "learning_rate": 0.001, |
| "loss": 1.8611, |
| "step": 41600 |
| }, |
| { |
| "epoch": 13.477698771816419, |
| "grad_norm": 1.071977138519287, |
| "learning_rate": 0.001, |
| "loss": 1.8651, |
| "step": 41700 |
| }, |
| { |
| "epoch": 13.510019392372334, |
| "grad_norm": 1.1536909341812134, |
| "learning_rate": 0.001, |
| "loss": 1.8736, |
| "step": 41800 |
| }, |
| { |
| "epoch": 13.542340012928248, |
| "grad_norm": 1.042947769165039, |
| "learning_rate": 0.001, |
| "loss": 1.8417, |
| "step": 41900 |
| }, |
| { |
| "epoch": 13.574660633484163, |
| "grad_norm": 1.2445282936096191, |
| "learning_rate": 0.001, |
| "loss": 1.8717, |
| "step": 42000 |
| }, |
| { |
| "epoch": 13.606981254040077, |
| "grad_norm": 1.1522380113601685, |
| "learning_rate": 0.001, |
| "loss": 1.8796, |
| "step": 42100 |
| }, |
| { |
| "epoch": 13.639301874595992, |
| "grad_norm": 0.980404257774353, |
| "learning_rate": 0.001, |
| "loss": 1.8773, |
| "step": 42200 |
| }, |
| { |
| "epoch": 13.671622495151906, |
| "grad_norm": 1.2718123197555542, |
| "learning_rate": 0.001, |
| "loss": 1.8546, |
| "step": 42300 |
| }, |
| { |
| "epoch": 13.70394311570782, |
| "grad_norm": 1.400966763496399, |
| "learning_rate": 0.001, |
| "loss": 1.8677, |
| "step": 42400 |
| }, |
| { |
| "epoch": 13.736263736263737, |
| "grad_norm": 1.1766020059585571, |
| "learning_rate": 0.001, |
| "loss": 1.8764, |
| "step": 42500 |
| }, |
| { |
| "epoch": 13.768584356819652, |
| "grad_norm": 1.259521484375, |
| "learning_rate": 0.001, |
| "loss": 1.8938, |
| "step": 42600 |
| }, |
| { |
| "epoch": 13.800904977375566, |
| "grad_norm": 1.1797550916671753, |
| "learning_rate": 0.001, |
| "loss": 1.8738, |
| "step": 42700 |
| }, |
| { |
| "epoch": 13.83322559793148, |
| "grad_norm": 0.9888365268707275, |
| "learning_rate": 0.001, |
| "loss": 1.9153, |
| "step": 42800 |
| }, |
| { |
| "epoch": 13.865546218487395, |
| "grad_norm": 1.108959436416626, |
| "learning_rate": 0.001, |
| "loss": 1.8916, |
| "step": 42900 |
| }, |
| { |
| "epoch": 13.89786683904331, |
| "grad_norm": 1.1953341960906982, |
| "learning_rate": 0.001, |
| "loss": 1.9091, |
| "step": 43000 |
| }, |
| { |
| "epoch": 13.930187459599225, |
| "grad_norm": 0.9718614816665649, |
| "learning_rate": 0.001, |
| "loss": 1.8896, |
| "step": 43100 |
| }, |
| { |
| "epoch": 13.96250808015514, |
| "grad_norm": 1.102332592010498, |
| "learning_rate": 0.001, |
| "loss": 1.8836, |
| "step": 43200 |
| }, |
| { |
| "epoch": 13.994828700711054, |
| "grad_norm": 0.9656371474266052, |
| "learning_rate": 0.001, |
| "loss": 1.8902, |
| "step": 43300 |
| }, |
| { |
| "epoch": 14.027149321266968, |
| "grad_norm": 0.9415162801742554, |
| "learning_rate": 0.001, |
| "loss": 1.8123, |
| "step": 43400 |
| }, |
| { |
| "epoch": 14.059469941822883, |
| "grad_norm": 1.268556833267212, |
| "learning_rate": 0.001, |
| "loss": 1.7736, |
| "step": 43500 |
| }, |
| { |
| "epoch": 14.091790562378797, |
| "grad_norm": 1.369551658630371, |
| "learning_rate": 0.001, |
| "loss": 1.7756, |
| "step": 43600 |
| }, |
| { |
| "epoch": 14.124111182934712, |
| "grad_norm": 1.0541764497756958, |
| "learning_rate": 0.001, |
| "loss": 1.8139, |
| "step": 43700 |
| }, |
| { |
| "epoch": 14.156431803490626, |
| "grad_norm": 1.0418062210083008, |
| "learning_rate": 0.001, |
| "loss": 1.7946, |
| "step": 43800 |
| }, |
| { |
| "epoch": 14.188752424046541, |
| "grad_norm": 1.2742817401885986, |
| "learning_rate": 0.001, |
| "loss": 1.8125, |
| "step": 43900 |
| }, |
| { |
| "epoch": 14.221073044602456, |
| "grad_norm": 1.1431653499603271, |
| "learning_rate": 0.001, |
| "loss": 1.8091, |
| "step": 44000 |
| }, |
| { |
| "epoch": 14.25339366515837, |
| "grad_norm": 1.1454989910125732, |
| "learning_rate": 0.001, |
| "loss": 1.8023, |
| "step": 44100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 1.1051077842712402, |
| "learning_rate": 0.001, |
| "loss": 1.787, |
| "step": 44200 |
| }, |
| { |
| "epoch": 14.318034906270201, |
| "grad_norm": 1.0722583532333374, |
| "learning_rate": 0.001, |
| "loss": 1.8183, |
| "step": 44300 |
| }, |
| { |
| "epoch": 14.350355526826116, |
| "grad_norm": 0.8678897619247437, |
| "learning_rate": 0.001, |
| "loss": 1.8147, |
| "step": 44400 |
| }, |
| { |
| "epoch": 14.38267614738203, |
| "grad_norm": 0.9575488567352295, |
| "learning_rate": 0.001, |
| "loss": 1.8209, |
| "step": 44500 |
| }, |
| { |
| "epoch": 14.414996767937945, |
| "grad_norm": 1.1212340593338013, |
| "learning_rate": 0.001, |
| "loss": 1.8389, |
| "step": 44600 |
| }, |
| { |
| "epoch": 14.44731738849386, |
| "grad_norm": 3.612347364425659, |
| "learning_rate": 0.001, |
| "loss": 1.8051, |
| "step": 44700 |
| }, |
| { |
| "epoch": 14.479638009049774, |
| "grad_norm": 1.1123112440109253, |
| "learning_rate": 0.001, |
| "loss": 1.8191, |
| "step": 44800 |
| }, |
| { |
| "epoch": 14.511958629605688, |
| "grad_norm": 1.093819260597229, |
| "learning_rate": 0.001, |
| "loss": 1.8469, |
| "step": 44900 |
| }, |
| { |
| "epoch": 14.544279250161603, |
| "grad_norm": 1.1778242588043213, |
| "learning_rate": 0.001, |
| "loss": 1.8386, |
| "step": 45000 |
| }, |
| { |
| "epoch": 14.576599870717518, |
| "grad_norm": 1.2398154735565186, |
| "learning_rate": 0.001, |
| "loss": 1.8262, |
| "step": 45100 |
| }, |
| { |
| "epoch": 14.608920491273432, |
| "grad_norm": 1.4576246738433838, |
| "learning_rate": 0.001, |
| "loss": 1.8475, |
| "step": 45200 |
| }, |
| { |
| "epoch": 14.641241111829347, |
| "grad_norm": 1.1424200534820557, |
| "learning_rate": 0.001, |
| "loss": 1.8487, |
| "step": 45300 |
| }, |
| { |
| "epoch": 14.673561732385261, |
| "grad_norm": 1.0567972660064697, |
| "learning_rate": 0.001, |
| "loss": 1.8465, |
| "step": 45400 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 1.2708767652511597, |
| "learning_rate": 0.001, |
| "loss": 1.8401, |
| "step": 45500 |
| }, |
| { |
| "epoch": 14.738202973497092, |
| "grad_norm": 1.1260626316070557, |
| "learning_rate": 0.001, |
| "loss": 1.8524, |
| "step": 45600 |
| }, |
| { |
| "epoch": 14.770523594053007, |
| "grad_norm": 1.2315834760665894, |
| "learning_rate": 0.001, |
| "loss": 1.8688, |
| "step": 45700 |
| }, |
| { |
| "epoch": 14.802844214608921, |
| "grad_norm": 1.148695945739746, |
| "learning_rate": 0.001, |
| "loss": 1.858, |
| "step": 45800 |
| }, |
| { |
| "epoch": 14.835164835164836, |
| "grad_norm": 1.004560112953186, |
| "learning_rate": 0.001, |
| "loss": 1.8637, |
| "step": 45900 |
| }, |
| { |
| "epoch": 14.86748545572075, |
| "grad_norm": 1.2938129901885986, |
| "learning_rate": 0.001, |
| "loss": 1.8759, |
| "step": 46000 |
| }, |
| { |
| "epoch": 14.899806076276665, |
| "grad_norm": 0.936123251914978, |
| "learning_rate": 0.001, |
| "loss": 1.8408, |
| "step": 46100 |
| }, |
| { |
| "epoch": 14.93212669683258, |
| "grad_norm": 1.173580288887024, |
| "learning_rate": 0.001, |
| "loss": 1.8871, |
| "step": 46200 |
| }, |
| { |
| "epoch": 14.964447317388494, |
| "grad_norm": 1.340844988822937, |
| "learning_rate": 0.001, |
| "loss": 1.8669, |
| "step": 46300 |
| }, |
| { |
| "epoch": 14.996767937944409, |
| "grad_norm": 1.3199015855789185, |
| "learning_rate": 0.001, |
| "loss": 1.875, |
| "step": 46400 |
| }, |
| { |
| "epoch": 15.029088558500323, |
| "grad_norm": 1.3935327529907227, |
| "learning_rate": 0.001, |
| "loss": 1.7577, |
| "step": 46500 |
| }, |
| { |
| "epoch": 15.061409179056238, |
| "grad_norm": 1.2479134798049927, |
| "learning_rate": 0.001, |
| "loss": 1.7524, |
| "step": 46600 |
| }, |
| { |
| "epoch": 15.093729799612152, |
| "grad_norm": 1.3339827060699463, |
| "learning_rate": 0.001, |
| "loss": 1.778, |
| "step": 46700 |
| }, |
| { |
| "epoch": 15.126050420168067, |
| "grad_norm": 1.185196042060852, |
| "learning_rate": 0.001, |
| "loss": 1.7368, |
| "step": 46800 |
| }, |
| { |
| "epoch": 15.158371040723981, |
| "grad_norm": 1.2535922527313232, |
| "learning_rate": 0.001, |
| "loss": 1.7511, |
| "step": 46900 |
| }, |
| { |
| "epoch": 15.190691661279896, |
| "grad_norm": 1.3402800559997559, |
| "learning_rate": 0.001, |
| "loss": 1.7662, |
| "step": 47000 |
| }, |
| { |
| "epoch": 15.22301228183581, |
| "grad_norm": 1.272594928741455, |
| "learning_rate": 0.001, |
| "loss": 1.7792, |
| "step": 47100 |
| }, |
| { |
| "epoch": 15.255332902391725, |
| "grad_norm": 1.1950360536575317, |
| "learning_rate": 0.001, |
| "loss": 1.8125, |
| "step": 47200 |
| }, |
| { |
| "epoch": 15.287653522947641, |
| "grad_norm": 1.2349659204483032, |
| "learning_rate": 0.001, |
| "loss": 1.7832, |
| "step": 47300 |
| }, |
| { |
| "epoch": 15.319974143503556, |
| "grad_norm": 1.061517357826233, |
| "learning_rate": 0.001, |
| "loss": 1.7894, |
| "step": 47400 |
| }, |
| { |
| "epoch": 15.35229476405947, |
| "grad_norm": 1.3945839405059814, |
| "learning_rate": 0.001, |
| "loss": 1.809, |
| "step": 47500 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 1.3774470090866089, |
| "learning_rate": 0.001, |
| "loss": 1.7642, |
| "step": 47600 |
| }, |
| { |
| "epoch": 15.4169360051713, |
| "grad_norm": 1.2366855144500732, |
| "learning_rate": 0.001, |
| "loss": 1.7812, |
| "step": 47700 |
| }, |
| { |
| "epoch": 15.449256625727214, |
| "grad_norm": 1.4002078771591187, |
| "learning_rate": 0.001, |
| "loss": 1.8015, |
| "step": 47800 |
| }, |
| { |
| "epoch": 15.481577246283129, |
| "grad_norm": 1.4305744171142578, |
| "learning_rate": 0.001, |
| "loss": 1.7981, |
| "step": 47900 |
| }, |
| { |
| "epoch": 15.513897866839043, |
| "grad_norm": 1.2977144718170166, |
| "learning_rate": 0.001, |
| "loss": 1.8124, |
| "step": 48000 |
| }, |
| { |
| "epoch": 15.546218487394958, |
| "grad_norm": 1.0210585594177246, |
| "learning_rate": 0.001, |
| "loss": 1.8203, |
| "step": 48100 |
| }, |
| { |
| "epoch": 15.578539107950872, |
| "grad_norm": 1.2214510440826416, |
| "learning_rate": 0.001, |
| "loss": 1.8164, |
| "step": 48200 |
| }, |
| { |
| "epoch": 15.610859728506787, |
| "grad_norm": 1.0851165056228638, |
| "learning_rate": 0.001, |
| "loss": 1.7905, |
| "step": 48300 |
| }, |
| { |
| "epoch": 15.643180349062701, |
| "grad_norm": 2.2630977630615234, |
| "learning_rate": 0.001, |
| "loss": 1.8129, |
| "step": 48400 |
| }, |
| { |
| "epoch": 15.675500969618616, |
| "grad_norm": 1.0727578401565552, |
| "learning_rate": 0.001, |
| "loss": 1.8309, |
| "step": 48500 |
| }, |
| { |
| "epoch": 15.70782159017453, |
| "grad_norm": 1.122301459312439, |
| "learning_rate": 0.001, |
| "loss": 1.8025, |
| "step": 48600 |
| }, |
| { |
| "epoch": 15.740142210730447, |
| "grad_norm": 1.1632217168807983, |
| "learning_rate": 0.001, |
| "loss": 1.8057, |
| "step": 48700 |
| }, |
| { |
| "epoch": 15.772462831286362, |
| "grad_norm": 1.2424322366714478, |
| "learning_rate": 0.001, |
| "loss": 1.8345, |
| "step": 48800 |
| }, |
| { |
| "epoch": 15.804783451842276, |
| "grad_norm": 0.9887394309043884, |
| "learning_rate": 0.001, |
| "loss": 1.8425, |
| "step": 48900 |
| }, |
| { |
| "epoch": 15.83710407239819, |
| "grad_norm": 1.1400974988937378, |
| "learning_rate": 0.001, |
| "loss": 1.8399, |
| "step": 49000 |
| }, |
| { |
| "epoch": 15.869424692954105, |
| "grad_norm": 1.1223629713058472, |
| "learning_rate": 0.001, |
| "loss": 1.813, |
| "step": 49100 |
| }, |
| { |
| "epoch": 15.90174531351002, |
| "grad_norm": 1.370543122291565, |
| "learning_rate": 0.001, |
| "loss": 1.831, |
| "step": 49200 |
| }, |
| { |
| "epoch": 15.934065934065934, |
| "grad_norm": 1.1231876611709595, |
| "learning_rate": 0.001, |
| "loss": 1.8377, |
| "step": 49300 |
| }, |
| { |
| "epoch": 15.966386554621849, |
| "grad_norm": 1.1462494134902954, |
| "learning_rate": 0.001, |
| "loss": 1.843, |
| "step": 49400 |
| }, |
| { |
| "epoch": 15.998707175177763, |
| "grad_norm": 1.1465389728546143, |
| "learning_rate": 0.001, |
| "loss": 1.7893, |
| "step": 49500 |
| }, |
| { |
| "epoch": 16.031027795733678, |
| "grad_norm": 1.4136383533477783, |
| "learning_rate": 0.001, |
| "loss": 1.7237, |
| "step": 49600 |
| }, |
| { |
| "epoch": 16.063348416289593, |
| "grad_norm": 1.3214054107666016, |
| "learning_rate": 0.001, |
| "loss": 1.7424, |
| "step": 49700 |
| }, |
| { |
| "epoch": 16.095669036845507, |
| "grad_norm": 1.1296522617340088, |
| "learning_rate": 0.001, |
| "loss": 1.7382, |
| "step": 49800 |
| }, |
| { |
| "epoch": 16.12798965740142, |
| "grad_norm": 1.1522901058197021, |
| "learning_rate": 0.001, |
| "loss": 1.7455, |
| "step": 49900 |
| }, |
| { |
| "epoch": 16.160310277957336, |
| "grad_norm": 1.4975008964538574, |
| "learning_rate": 0.001, |
| "loss": 1.7403, |
| "step": 50000 |
| }, |
| { |
| "epoch": 16.19263089851325, |
| "grad_norm": 1.11152982711792, |
| "learning_rate": 0.001, |
| "loss": 1.7476, |
| "step": 50100 |
| }, |
| { |
| "epoch": 16.224951519069165, |
| "grad_norm": 1.299704670906067, |
| "learning_rate": 0.001, |
| "loss": 1.739, |
| "step": 50200 |
| }, |
| { |
| "epoch": 16.25727213962508, |
| "grad_norm": 1.0670026540756226, |
| "learning_rate": 0.001, |
| "loss": 1.7473, |
| "step": 50300 |
| }, |
| { |
| "epoch": 16.289592760180994, |
| "grad_norm": 1.260867714881897, |
| "learning_rate": 0.001, |
| "loss": 1.7428, |
| "step": 50400 |
| }, |
| { |
| "epoch": 16.32191338073691, |
| "grad_norm": 1.5163524150848389, |
| "learning_rate": 0.001, |
| "loss": 1.7542, |
| "step": 50500 |
| }, |
| { |
| "epoch": 16.354234001292824, |
| "grad_norm": 1.0129752159118652, |
| "learning_rate": 0.001, |
| "loss": 1.7653, |
| "step": 50600 |
| }, |
| { |
| "epoch": 16.386554621848738, |
| "grad_norm": 1.3281183242797852, |
| "learning_rate": 0.001, |
| "loss": 1.7604, |
| "step": 50700 |
| }, |
| { |
| "epoch": 16.418875242404653, |
| "grad_norm": 1.205254077911377, |
| "learning_rate": 0.001, |
| "loss": 1.7695, |
| "step": 50800 |
| }, |
| { |
| "epoch": 16.451195862960567, |
| "grad_norm": 1.1382715702056885, |
| "learning_rate": 0.001, |
| "loss": 1.7736, |
| "step": 50900 |
| }, |
| { |
| "epoch": 16.483516483516482, |
| "grad_norm": 1.2368077039718628, |
| "learning_rate": 0.001, |
| "loss": 1.7838, |
| "step": 51000 |
| }, |
| { |
| "epoch": 16.5158371040724, |
| "grad_norm": 1.3391265869140625, |
| "learning_rate": 0.001, |
| "loss": 1.7735, |
| "step": 51100 |
| }, |
| { |
| "epoch": 16.548157724628314, |
| "grad_norm": 1.1239650249481201, |
| "learning_rate": 0.001, |
| "loss": 1.7567, |
| "step": 51200 |
| }, |
| { |
| "epoch": 16.58047834518423, |
| "grad_norm": 1.0369645357131958, |
| "learning_rate": 0.001, |
| "loss": 1.7963, |
| "step": 51300 |
| }, |
| { |
| "epoch": 16.612798965740144, |
| "grad_norm": 1.4317647218704224, |
| "learning_rate": 0.001, |
| "loss": 1.781, |
| "step": 51400 |
| }, |
| { |
| "epoch": 16.645119586296058, |
| "grad_norm": 1.2334084510803223, |
| "learning_rate": 0.001, |
| "loss": 1.7876, |
| "step": 51500 |
| }, |
| { |
| "epoch": 16.677440206851973, |
| "grad_norm": 1.2364823818206787, |
| "learning_rate": 0.001, |
| "loss": 1.7724, |
| "step": 51600 |
| }, |
| { |
| "epoch": 16.709760827407887, |
| "grad_norm": 1.032331943511963, |
| "learning_rate": 0.001, |
| "loss": 1.7978, |
| "step": 51700 |
| }, |
| { |
| "epoch": 16.742081447963802, |
| "grad_norm": 1.4971531629562378, |
| "learning_rate": 0.001, |
| "loss": 1.7797, |
| "step": 51800 |
| }, |
| { |
| "epoch": 16.774402068519716, |
| "grad_norm": 1.1399532556533813, |
| "learning_rate": 0.001, |
| "loss": 1.7998, |
| "step": 51900 |
| }, |
| { |
| "epoch": 16.80672268907563, |
| "grad_norm": 4.143343925476074, |
| "learning_rate": 0.001, |
| "loss": 1.8055, |
| "step": 52000 |
| }, |
| { |
| "epoch": 16.839043309631545, |
| "grad_norm": 1.2093541622161865, |
| "learning_rate": 0.001, |
| "loss": 1.785, |
| "step": 52100 |
| }, |
| { |
| "epoch": 16.87136393018746, |
| "grad_norm": 1.04611337184906, |
| "learning_rate": 0.001, |
| "loss": 1.7681, |
| "step": 52200 |
| }, |
| { |
| "epoch": 16.903684550743375, |
| "grad_norm": 1.2969337701797485, |
| "learning_rate": 0.001, |
| "loss": 1.7753, |
| "step": 52300 |
| }, |
| { |
| "epoch": 16.93600517129929, |
| "grad_norm": 0.9581918716430664, |
| "learning_rate": 0.001, |
| "loss": 1.7911, |
| "step": 52400 |
| }, |
| { |
| "epoch": 16.968325791855204, |
| "grad_norm": 1.0658535957336426, |
| "learning_rate": 0.001, |
| "loss": 1.8223, |
| "step": 52500 |
| }, |
| { |
| "epoch": 17.00064641241112, |
| "grad_norm": 1.322858214378357, |
| "learning_rate": 0.001, |
| "loss": 1.7659, |
| "step": 52600 |
| }, |
| { |
| "epoch": 17.032967032967033, |
| "grad_norm": 1.4845845699310303, |
| "learning_rate": 0.001, |
| "loss": 1.6901, |
| "step": 52700 |
| }, |
| { |
| "epoch": 17.065287653522947, |
| "grad_norm": 1.4620217084884644, |
| "learning_rate": 0.001, |
| "loss": 1.6839, |
| "step": 52800 |
| }, |
| { |
| "epoch": 17.097608274078862, |
| "grad_norm": 1.4743014574050903, |
| "learning_rate": 0.001, |
| "loss": 1.6718, |
| "step": 52900 |
| }, |
| { |
| "epoch": 17.129928894634777, |
| "grad_norm": 1.7777990102767944, |
| "learning_rate": 0.001, |
| "loss": 1.6903, |
| "step": 53000 |
| }, |
| { |
| "epoch": 17.16224951519069, |
| "grad_norm": 1.8911314010620117, |
| "learning_rate": 0.001, |
| "loss": 1.7087, |
| "step": 53100 |
| }, |
| { |
| "epoch": 17.194570135746606, |
| "grad_norm": 1.5767875909805298, |
| "learning_rate": 0.001, |
| "loss": 1.7232, |
| "step": 53200 |
| }, |
| { |
| "epoch": 17.22689075630252, |
| "grad_norm": 1.7391340732574463, |
| "learning_rate": 0.001, |
| "loss": 1.7499, |
| "step": 53300 |
| }, |
| { |
| "epoch": 17.259211376858435, |
| "grad_norm": 1.4621258974075317, |
| "learning_rate": 0.001, |
| "loss": 1.7194, |
| "step": 53400 |
| }, |
| { |
| "epoch": 17.29153199741435, |
| "grad_norm": 2.076847553253174, |
| "learning_rate": 0.001, |
| "loss": 1.7175, |
| "step": 53500 |
| }, |
| { |
| "epoch": 17.323852617970264, |
| "grad_norm": 1.7916910648345947, |
| "learning_rate": 0.001, |
| "loss": 1.7091, |
| "step": 53600 |
| }, |
| { |
| "epoch": 17.35617323852618, |
| "grad_norm": 1.8870574235916138, |
| "learning_rate": 0.001, |
| "loss": 1.741, |
| "step": 53700 |
| }, |
| { |
| "epoch": 17.388493859082093, |
| "grad_norm": 1.267464518547058, |
| "learning_rate": 0.001, |
| "loss": 1.7362, |
| "step": 53800 |
| }, |
| { |
| "epoch": 17.420814479638008, |
| "grad_norm": 1.4546117782592773, |
| "learning_rate": 0.001, |
| "loss": 1.7316, |
| "step": 53900 |
| }, |
| { |
| "epoch": 17.453135100193922, |
| "grad_norm": 1.8361940383911133, |
| "learning_rate": 0.001, |
| "loss": 1.7163, |
| "step": 54000 |
| }, |
| { |
| "epoch": 17.485455720749837, |
| "grad_norm": 1.8543264865875244, |
| "learning_rate": 0.001, |
| "loss": 1.7469, |
| "step": 54100 |
| }, |
| { |
| "epoch": 17.517776341305755, |
| "grad_norm": 1.6080793142318726, |
| "learning_rate": 0.001, |
| "loss": 1.7247, |
| "step": 54200 |
| }, |
| { |
| "epoch": 17.55009696186167, |
| "grad_norm": 1.7696819305419922, |
| "learning_rate": 0.001, |
| "loss": 1.7532, |
| "step": 54300 |
| }, |
| { |
| "epoch": 17.582417582417584, |
| "grad_norm": 1.9339938163757324, |
| "learning_rate": 0.001, |
| "loss": 1.7803, |
| "step": 54400 |
| }, |
| { |
| "epoch": 17.6147382029735, |
| "grad_norm": 1.6124098300933838, |
| "learning_rate": 0.001, |
| "loss": 1.7597, |
| "step": 54500 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 1.9301472902297974, |
| "learning_rate": 0.001, |
| "loss": 1.7487, |
| "step": 54600 |
| }, |
| { |
| "epoch": 17.679379444085328, |
| "grad_norm": 1.3814643621444702, |
| "learning_rate": 0.001, |
| "loss": 1.7435, |
| "step": 54700 |
| }, |
| { |
| "epoch": 17.711700064641242, |
| "grad_norm": 2.3363003730773926, |
| "learning_rate": 0.001, |
| "loss": 1.7477, |
| "step": 54800 |
| }, |
| { |
| "epoch": 17.744020685197157, |
| "grad_norm": 1.5960017442703247, |
| "learning_rate": 0.001, |
| "loss": 1.7944, |
| "step": 54900 |
| }, |
| { |
| "epoch": 17.77634130575307, |
| "grad_norm": 1.4182417392730713, |
| "learning_rate": 0.001, |
| "loss": 1.757, |
| "step": 55000 |
| }, |
| { |
| "epoch": 17.808661926308986, |
| "grad_norm": 1.5050570964813232, |
| "learning_rate": 0.001, |
| "loss": 1.7788, |
| "step": 55100 |
| }, |
| { |
| "epoch": 17.8409825468649, |
| "grad_norm": 1.7510799169540405, |
| "learning_rate": 0.001, |
| "loss": 1.7685, |
| "step": 55200 |
| }, |
| { |
| "epoch": 17.873303167420815, |
| "grad_norm": 1.8426209688186646, |
| "learning_rate": 0.001, |
| "loss": 1.7701, |
| "step": 55300 |
| }, |
| { |
| "epoch": 17.90562378797673, |
| "grad_norm": 1.778796672821045, |
| "learning_rate": 0.001, |
| "loss": 1.7546, |
| "step": 55400 |
| }, |
| { |
| "epoch": 17.937944408532644, |
| "grad_norm": 1.843400001525879, |
| "learning_rate": 0.001, |
| "loss": 1.7567, |
| "step": 55500 |
| }, |
| { |
| "epoch": 17.97026502908856, |
| "grad_norm": 1.376536250114441, |
| "learning_rate": 0.001, |
| "loss": 1.7876, |
| "step": 55600 |
| }, |
| { |
| "epoch": 18.002585649644473, |
| "grad_norm": 1.459142804145813, |
| "learning_rate": 0.001, |
| "loss": 1.7969, |
| "step": 55700 |
| }, |
| { |
| "epoch": 18.034906270200388, |
| "grad_norm": 1.385663628578186, |
| "learning_rate": 0.001, |
| "loss": 1.6557, |
| "step": 55800 |
| }, |
| { |
| "epoch": 18.067226890756302, |
| "grad_norm": 1.577804446220398, |
| "learning_rate": 0.001, |
| "loss": 1.651, |
| "step": 55900 |
| }, |
| { |
| "epoch": 18.099547511312217, |
| "grad_norm": 0.9823166131973267, |
| "learning_rate": 0.001, |
| "loss": 1.6503, |
| "step": 56000 |
| }, |
| { |
| "epoch": 18.13186813186813, |
| "grad_norm": 1.701475739479065, |
| "learning_rate": 0.001, |
| "loss": 1.6771, |
| "step": 56100 |
| }, |
| { |
| "epoch": 18.164188752424046, |
| "grad_norm": 1.289601445198059, |
| "learning_rate": 0.001, |
| "loss": 1.6993, |
| "step": 56200 |
| }, |
| { |
| "epoch": 18.19650937297996, |
| "grad_norm": 2.680657386779785, |
| "learning_rate": 0.001, |
| "loss": 1.692, |
| "step": 56300 |
| }, |
| { |
| "epoch": 18.228829993535875, |
| "grad_norm": 1.2981613874435425, |
| "learning_rate": 0.001, |
| "loss": 1.7194, |
| "step": 56400 |
| }, |
| { |
| "epoch": 18.26115061409179, |
| "grad_norm": 1.715312123298645, |
| "learning_rate": 0.001, |
| "loss": 1.6828, |
| "step": 56500 |
| }, |
| { |
| "epoch": 18.293471234647704, |
| "grad_norm": 1.151294469833374, |
| "learning_rate": 0.001, |
| "loss": 1.6828, |
| "step": 56600 |
| }, |
| { |
| "epoch": 18.32579185520362, |
| "grad_norm": 1.3789284229278564, |
| "learning_rate": 0.001, |
| "loss": 1.718, |
| "step": 56700 |
| }, |
| { |
| "epoch": 18.358112475759533, |
| "grad_norm": 1.1405805349349976, |
| "learning_rate": 0.001, |
| "loss": 1.65, |
| "step": 56800 |
| }, |
| { |
| "epoch": 18.390433096315448, |
| "grad_norm": 1.230582594871521, |
| "learning_rate": 0.001, |
| "loss": 1.7174, |
| "step": 56900 |
| }, |
| { |
| "epoch": 18.422753716871362, |
| "grad_norm": 1.3198965787887573, |
| "learning_rate": 0.001, |
| "loss": 1.7173, |
| "step": 57000 |
| }, |
| { |
| "epoch": 18.455074337427277, |
| "grad_norm": 1.647703766822815, |
| "learning_rate": 0.001, |
| "loss": 1.7111, |
| "step": 57100 |
| }, |
| { |
| "epoch": 18.48739495798319, |
| "grad_norm": 1.202138066291809, |
| "learning_rate": 0.001, |
| "loss": 1.6879, |
| "step": 57200 |
| }, |
| { |
| "epoch": 18.51971557853911, |
| "grad_norm": 1.23879075050354, |
| "learning_rate": 0.001, |
| "loss": 1.734, |
| "step": 57300 |
| }, |
| { |
| "epoch": 18.552036199095024, |
| "grad_norm": 1.201494574546814, |
| "learning_rate": 0.001, |
| "loss": 1.7566, |
| "step": 57400 |
| }, |
| { |
| "epoch": 18.58435681965094, |
| "grad_norm": 1.2573801279067993, |
| "learning_rate": 0.001, |
| "loss": 1.7185, |
| "step": 57500 |
| }, |
| { |
| "epoch": 18.616677440206853, |
| "grad_norm": 1.2716763019561768, |
| "learning_rate": 0.001, |
| "loss": 1.7072, |
| "step": 57600 |
| }, |
| { |
| "epoch": 18.648998060762768, |
| "grad_norm": 1.058627724647522, |
| "learning_rate": 0.001, |
| "loss": 1.7288, |
| "step": 57700 |
| }, |
| { |
| "epoch": 18.681318681318682, |
| "grad_norm": 1.353332281112671, |
| "learning_rate": 0.001, |
| "loss": 1.7155, |
| "step": 57800 |
| }, |
| { |
| "epoch": 18.713639301874597, |
| "grad_norm": 1.175697684288025, |
| "learning_rate": 0.001, |
| "loss": 1.6966, |
| "step": 57900 |
| }, |
| { |
| "epoch": 18.74595992243051, |
| "grad_norm": 1.0386375188827515, |
| "learning_rate": 0.001, |
| "loss": 1.7585, |
| "step": 58000 |
| }, |
| { |
| "epoch": 18.778280542986426, |
| "grad_norm": 1.2818589210510254, |
| "learning_rate": 0.001, |
| "loss": 1.7699, |
| "step": 58100 |
| }, |
| { |
| "epoch": 18.81060116354234, |
| "grad_norm": 1.291780710220337, |
| "learning_rate": 0.001, |
| "loss": 1.742, |
| "step": 58200 |
| }, |
| { |
| "epoch": 18.842921784098255, |
| "grad_norm": 1.204770803451538, |
| "learning_rate": 0.001, |
| "loss": 1.7425, |
| "step": 58300 |
| }, |
| { |
| "epoch": 18.87524240465417, |
| "grad_norm": 1.2708287239074707, |
| "learning_rate": 0.001, |
| "loss": 1.7588, |
| "step": 58400 |
| }, |
| { |
| "epoch": 18.907563025210084, |
| "grad_norm": 1.3189966678619385, |
| "learning_rate": 0.001, |
| "loss": 1.7107, |
| "step": 58500 |
| }, |
| { |
| "epoch": 18.939883645766, |
| "grad_norm": 1.2411608695983887, |
| "learning_rate": 0.001, |
| "loss": 1.7359, |
| "step": 58600 |
| }, |
| { |
| "epoch": 18.972204266321913, |
| "grad_norm": 1.3548706769943237, |
| "learning_rate": 0.001, |
| "loss": 1.7459, |
| "step": 58700 |
| }, |
| { |
| "epoch": 19.004524886877828, |
| "grad_norm": 1.1609103679656982, |
| "learning_rate": 0.001, |
| "loss": 1.7467, |
| "step": 58800 |
| }, |
| { |
| "epoch": 19.036845507433743, |
| "grad_norm": 1.4791889190673828, |
| "learning_rate": 0.001, |
| "loss": 1.6426, |
| "step": 58900 |
| }, |
| { |
| "epoch": 19.069166127989657, |
| "grad_norm": 1.2185018062591553, |
| "learning_rate": 0.001, |
| "loss": 1.6269, |
| "step": 59000 |
| }, |
| { |
| "epoch": 19.10148674854557, |
| "grad_norm": 1.4684048891067505, |
| "learning_rate": 0.001, |
| "loss": 1.6587, |
| "step": 59100 |
| }, |
| { |
| "epoch": 19.133807369101486, |
| "grad_norm": 1.2556350231170654, |
| "learning_rate": 0.001, |
| "loss": 1.667, |
| "step": 59200 |
| }, |
| { |
| "epoch": 19.1661279896574, |
| "grad_norm": 1.3185192346572876, |
| "learning_rate": 0.001, |
| "loss": 1.6482, |
| "step": 59300 |
| }, |
| { |
| "epoch": 19.198448610213315, |
| "grad_norm": 1.174451470375061, |
| "learning_rate": 0.001, |
| "loss": 1.6581, |
| "step": 59400 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 1.5593920946121216, |
| "learning_rate": 0.001, |
| "loss": 1.6536, |
| "step": 59500 |
| }, |
| { |
| "epoch": 19.263089851325145, |
| "grad_norm": 1.0820014476776123, |
| "learning_rate": 0.001, |
| "loss": 1.6585, |
| "step": 59600 |
| }, |
| { |
| "epoch": 19.29541047188106, |
| "grad_norm": 1.4829204082489014, |
| "learning_rate": 0.001, |
| "loss": 1.6528, |
| "step": 59700 |
| }, |
| { |
| "epoch": 19.327731092436974, |
| "grad_norm": 1.426621437072754, |
| "learning_rate": 0.001, |
| "loss": 1.6563, |
| "step": 59800 |
| }, |
| { |
| "epoch": 19.360051712992888, |
| "grad_norm": 1.2473586797714233, |
| "learning_rate": 0.001, |
| "loss": 1.6806, |
| "step": 59900 |
| }, |
| { |
| "epoch": 19.392372333548803, |
| "grad_norm": 1.1561217308044434, |
| "learning_rate": 0.001, |
| "loss": 1.6744, |
| "step": 60000 |
| }, |
| { |
| "epoch": 19.424692954104717, |
| "grad_norm": 1.4965547323226929, |
| "learning_rate": 0.001, |
| "loss": 1.6999, |
| "step": 60100 |
| }, |
| { |
| "epoch": 19.457013574660632, |
| "grad_norm": 1.284369707107544, |
| "learning_rate": 0.001, |
| "loss": 1.6816, |
| "step": 60200 |
| }, |
| { |
| "epoch": 19.489334195216546, |
| "grad_norm": 1.2189420461654663, |
| "learning_rate": 0.001, |
| "loss": 1.6687, |
| "step": 60300 |
| }, |
| { |
| "epoch": 19.521654815772465, |
| "grad_norm": 1.1701105833053589, |
| "learning_rate": 0.001, |
| "loss": 1.6865, |
| "step": 60400 |
| }, |
| { |
| "epoch": 19.55397543632838, |
| "grad_norm": 1.2209968566894531, |
| "learning_rate": 0.001, |
| "loss": 1.6981, |
| "step": 60500 |
| }, |
| { |
| "epoch": 19.586296056884294, |
| "grad_norm": 0.8396730422973633, |
| "learning_rate": 0.001, |
| "loss": 1.6924, |
| "step": 60600 |
| }, |
| { |
| "epoch": 19.618616677440208, |
| "grad_norm": 1.237061858177185, |
| "learning_rate": 0.001, |
| "loss": 1.6996, |
| "step": 60700 |
| }, |
| { |
| "epoch": 19.650937297996123, |
| "grad_norm": 1.1365478038787842, |
| "learning_rate": 0.001, |
| "loss": 1.6981, |
| "step": 60800 |
| }, |
| { |
| "epoch": 19.683257918552037, |
| "grad_norm": 1.2317266464233398, |
| "learning_rate": 0.001, |
| "loss": 1.6784, |
| "step": 60900 |
| }, |
| { |
| "epoch": 19.715578539107952, |
| "grad_norm": 1.3470901250839233, |
| "learning_rate": 0.001, |
| "loss": 1.6913, |
| "step": 61000 |
| }, |
| { |
| "epoch": 19.747899159663866, |
| "grad_norm": 1.2688798904418945, |
| "learning_rate": 0.001, |
| "loss": 1.6841, |
| "step": 61100 |
| }, |
| { |
| "epoch": 19.78021978021978, |
| "grad_norm": 1.069466471672058, |
| "learning_rate": 0.001, |
| "loss": 1.7036, |
| "step": 61200 |
| }, |
| { |
| "epoch": 19.812540400775696, |
| "grad_norm": 1.6772282123565674, |
| "learning_rate": 0.001, |
| "loss": 1.7298, |
| "step": 61300 |
| }, |
| { |
| "epoch": 19.84486102133161, |
| "grad_norm": 1.3225016593933105, |
| "learning_rate": 0.001, |
| "loss": 1.711, |
| "step": 61400 |
| }, |
| { |
| "epoch": 19.877181641887525, |
| "grad_norm": 1.3533378839492798, |
| "learning_rate": 0.001, |
| "loss": 1.7338, |
| "step": 61500 |
| }, |
| { |
| "epoch": 19.90950226244344, |
| "grad_norm": 1.0688648223876953, |
| "learning_rate": 0.001, |
| "loss": 1.7381, |
| "step": 61600 |
| }, |
| { |
| "epoch": 19.941822882999354, |
| "grad_norm": 1.357704758644104, |
| "learning_rate": 0.001, |
| "loss": 1.7413, |
| "step": 61700 |
| }, |
| { |
| "epoch": 19.97414350355527, |
| "grad_norm": 1.1575063467025757, |
| "learning_rate": 0.001, |
| "loss": 1.7246, |
| "step": 61800 |
| }, |
| { |
| "epoch": 20.006464124111183, |
| "grad_norm": 1.2985217571258545, |
| "learning_rate": 0.001, |
| "loss": 1.721, |
| "step": 61900 |
| }, |
| { |
| "epoch": 20.038784744667097, |
| "grad_norm": 1.1972153186798096, |
| "learning_rate": 0.001, |
| "loss": 1.6357, |
| "step": 62000 |
| }, |
| { |
| "epoch": 20.071105365223012, |
| "grad_norm": 1.1683275699615479, |
| "learning_rate": 0.001, |
| "loss": 1.5913, |
| "step": 62100 |
| }, |
| { |
| "epoch": 20.103425985778927, |
| "grad_norm": 1.3947675228118896, |
| "learning_rate": 0.001, |
| "loss": 1.614, |
| "step": 62200 |
| }, |
| { |
| "epoch": 20.13574660633484, |
| "grad_norm": 1.3403626680374146, |
| "learning_rate": 0.001, |
| "loss": 1.606, |
| "step": 62300 |
| }, |
| { |
| "epoch": 20.168067226890756, |
| "grad_norm": 1.252703309059143, |
| "learning_rate": 0.001, |
| "loss": 1.6326, |
| "step": 62400 |
| }, |
| { |
| "epoch": 20.20038784744667, |
| "grad_norm": 1.4022725820541382, |
| "learning_rate": 0.001, |
| "loss": 1.6434, |
| "step": 62500 |
| }, |
| { |
| "epoch": 20.232708468002585, |
| "grad_norm": 1.2247436046600342, |
| "learning_rate": 0.001, |
| "loss": 1.6279, |
| "step": 62600 |
| }, |
| { |
| "epoch": 20.2650290885585, |
| "grad_norm": 1.0815917253494263, |
| "learning_rate": 0.001, |
| "loss": 1.6277, |
| "step": 62700 |
| }, |
| { |
| "epoch": 20.297349709114414, |
| "grad_norm": 1.089803695678711, |
| "learning_rate": 0.001, |
| "loss": 1.6229, |
| "step": 62800 |
| }, |
| { |
| "epoch": 20.32967032967033, |
| "grad_norm": 1.2179733514785767, |
| "learning_rate": 0.001, |
| "loss": 1.6327, |
| "step": 62900 |
| }, |
| { |
| "epoch": 20.361990950226243, |
| "grad_norm": 1.2288011312484741, |
| "learning_rate": 0.001, |
| "loss": 1.647, |
| "step": 63000 |
| }, |
| { |
| "epoch": 20.394311570782158, |
| "grad_norm": 1.0536634922027588, |
| "learning_rate": 0.001, |
| "loss": 1.6695, |
| "step": 63100 |
| }, |
| { |
| "epoch": 20.426632191338072, |
| "grad_norm": 1.0153943300247192, |
| "learning_rate": 0.001, |
| "loss": 1.664, |
| "step": 63200 |
| }, |
| { |
| "epoch": 20.458952811893987, |
| "grad_norm": 1.130033016204834, |
| "learning_rate": 0.001, |
| "loss": 1.6725, |
| "step": 63300 |
| }, |
| { |
| "epoch": 20.4912734324499, |
| "grad_norm": 1.515398621559143, |
| "learning_rate": 0.001, |
| "loss": 1.6572, |
| "step": 63400 |
| }, |
| { |
| "epoch": 20.52359405300582, |
| "grad_norm": 1.2673180103302002, |
| "learning_rate": 0.001, |
| "loss": 1.665, |
| "step": 63500 |
| }, |
| { |
| "epoch": 20.555914673561734, |
| "grad_norm": 1.1285369396209717, |
| "learning_rate": 0.001, |
| "loss": 1.661, |
| "step": 63600 |
| }, |
| { |
| "epoch": 20.58823529411765, |
| "grad_norm": 1.2637361288070679, |
| "learning_rate": 0.001, |
| "loss": 1.67, |
| "step": 63700 |
| }, |
| { |
| "epoch": 20.620555914673563, |
| "grad_norm": 1.3248004913330078, |
| "learning_rate": 0.001, |
| "loss": 1.6913, |
| "step": 63800 |
| }, |
| { |
| "epoch": 20.652876535229478, |
| "grad_norm": 1.1528964042663574, |
| "learning_rate": 0.001, |
| "loss": 1.6517, |
| "step": 63900 |
| }, |
| { |
| "epoch": 20.685197155785392, |
| "grad_norm": 1.1508557796478271, |
| "learning_rate": 0.001, |
| "loss": 1.6593, |
| "step": 64000 |
| }, |
| { |
| "epoch": 20.717517776341307, |
| "grad_norm": 1.1110694408416748, |
| "learning_rate": 0.001, |
| "loss": 1.6933, |
| "step": 64100 |
| }, |
| { |
| "epoch": 20.74983839689722, |
| "grad_norm": 1.089123010635376, |
| "learning_rate": 0.001, |
| "loss": 1.6657, |
| "step": 64200 |
| }, |
| { |
| "epoch": 20.782159017453136, |
| "grad_norm": 1.448936939239502, |
| "learning_rate": 0.001, |
| "loss": 1.7123, |
| "step": 64300 |
| }, |
| { |
| "epoch": 20.81447963800905, |
| "grad_norm": 1.3776893615722656, |
| "learning_rate": 0.001, |
| "loss": 1.7107, |
| "step": 64400 |
| }, |
| { |
| "epoch": 20.846800258564965, |
| "grad_norm": 1.4579541683197021, |
| "learning_rate": 0.001, |
| "loss": 1.6942, |
| "step": 64500 |
| }, |
| { |
| "epoch": 20.87912087912088, |
| "grad_norm": 1.0205689668655396, |
| "learning_rate": 0.001, |
| "loss": 1.6835, |
| "step": 64600 |
| }, |
| { |
| "epoch": 20.911441499676794, |
| "grad_norm": 1.1773357391357422, |
| "learning_rate": 0.001, |
| "loss": 1.6896, |
| "step": 64700 |
| }, |
| { |
| "epoch": 20.94376212023271, |
| "grad_norm": 1.2993382215499878, |
| "learning_rate": 0.001, |
| "loss": 1.6996, |
| "step": 64800 |
| }, |
| { |
| "epoch": 20.976082740788623, |
| "grad_norm": 0.9848776459693909, |
| "learning_rate": 0.001, |
| "loss": 1.6891, |
| "step": 64900 |
| }, |
| { |
| "epoch": 21.008403361344538, |
| "grad_norm": 1.2597471475601196, |
| "learning_rate": 0.001, |
| "loss": 1.6802, |
| "step": 65000 |
| }, |
| { |
| "epoch": 21.040723981900452, |
| "grad_norm": 1.2813159227371216, |
| "learning_rate": 0.001, |
| "loss": 1.5808, |
| "step": 65100 |
| }, |
| { |
| "epoch": 21.073044602456367, |
| "grad_norm": 1.2395695447921753, |
| "learning_rate": 0.001, |
| "loss": 1.5823, |
| "step": 65200 |
| }, |
| { |
| "epoch": 21.10536522301228, |
| "grad_norm": 1.3502123355865479, |
| "learning_rate": 0.001, |
| "loss": 1.5888, |
| "step": 65300 |
| }, |
| { |
| "epoch": 21.137685843568196, |
| "grad_norm": 1.1473206281661987, |
| "learning_rate": 0.001, |
| "loss": 1.6125, |
| "step": 65400 |
| }, |
| { |
| "epoch": 21.17000646412411, |
| "grad_norm": 1.0330958366394043, |
| "learning_rate": 0.001, |
| "loss": 1.6036, |
| "step": 65500 |
| }, |
| { |
| "epoch": 21.202327084680025, |
| "grad_norm": 1.2344865798950195, |
| "learning_rate": 0.001, |
| "loss": 1.6114, |
| "step": 65600 |
| }, |
| { |
| "epoch": 21.23464770523594, |
| "grad_norm": 1.0328295230865479, |
| "learning_rate": 0.001, |
| "loss": 1.612, |
| "step": 65700 |
| }, |
| { |
| "epoch": 21.266968325791854, |
| "grad_norm": 1.1887953281402588, |
| "learning_rate": 0.001, |
| "loss": 1.6174, |
| "step": 65800 |
| }, |
| { |
| "epoch": 21.29928894634777, |
| "grad_norm": 3.9064321517944336, |
| "learning_rate": 0.001, |
| "loss": 1.625, |
| "step": 65900 |
| }, |
| { |
| "epoch": 21.331609566903683, |
| "grad_norm": 1.2309563159942627, |
| "learning_rate": 0.001, |
| "loss": 1.632, |
| "step": 66000 |
| }, |
| { |
| "epoch": 21.363930187459598, |
| "grad_norm": 1.387037992477417, |
| "learning_rate": 0.001, |
| "loss": 1.62, |
| "step": 66100 |
| }, |
| { |
| "epoch": 21.396250808015512, |
| "grad_norm": 1.321847677230835, |
| "learning_rate": 0.001, |
| "loss": 1.6048, |
| "step": 66200 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 1.0067797899246216, |
| "learning_rate": 0.001, |
| "loss": 1.6096, |
| "step": 66300 |
| }, |
| { |
| "epoch": 21.46089204912734, |
| "grad_norm": 1.2617535591125488, |
| "learning_rate": 0.001, |
| "loss": 1.6441, |
| "step": 66400 |
| }, |
| { |
| "epoch": 21.49321266968326, |
| "grad_norm": 1.4177860021591187, |
| "learning_rate": 0.001, |
| "loss": 1.6144, |
| "step": 66500 |
| }, |
| { |
| "epoch": 21.525533290239174, |
| "grad_norm": 1.5710135698318481, |
| "learning_rate": 0.001, |
| "loss": 1.6221, |
| "step": 66600 |
| }, |
| { |
| "epoch": 21.55785391079509, |
| "grad_norm": 1.2180819511413574, |
| "learning_rate": 0.001, |
| "loss": 1.645, |
| "step": 66700 |
| }, |
| { |
| "epoch": 21.590174531351003, |
| "grad_norm": 1.226199984550476, |
| "learning_rate": 0.001, |
| "loss": 1.6302, |
| "step": 66800 |
| }, |
| { |
| "epoch": 21.622495151906918, |
| "grad_norm": 1.044456958770752, |
| "learning_rate": 0.001, |
| "loss": 1.6276, |
| "step": 66900 |
| }, |
| { |
| "epoch": 21.654815772462833, |
| "grad_norm": 1.2039384841918945, |
| "learning_rate": 0.001, |
| "loss": 1.6742, |
| "step": 67000 |
| }, |
| { |
| "epoch": 21.687136393018747, |
| "grad_norm": 1.0783025026321411, |
| "learning_rate": 0.001, |
| "loss": 1.6594, |
| "step": 67100 |
| }, |
| { |
| "epoch": 21.71945701357466, |
| "grad_norm": 1.4424864053726196, |
| "learning_rate": 0.001, |
| "loss": 1.6641, |
| "step": 67200 |
| }, |
| { |
| "epoch": 21.751777634130576, |
| "grad_norm": 1.9445148706436157, |
| "learning_rate": 0.001, |
| "loss": 1.6619, |
| "step": 67300 |
| }, |
| { |
| "epoch": 21.78409825468649, |
| "grad_norm": 1.016932487487793, |
| "learning_rate": 0.001, |
| "loss": 1.6451, |
| "step": 67400 |
| }, |
| { |
| "epoch": 21.816418875242405, |
| "grad_norm": 1.1570857763290405, |
| "learning_rate": 0.001, |
| "loss": 1.6723, |
| "step": 67500 |
| }, |
| { |
| "epoch": 21.84873949579832, |
| "grad_norm": 1.112880825996399, |
| "learning_rate": 0.001, |
| "loss": 1.6544, |
| "step": 67600 |
| }, |
| { |
| "epoch": 21.881060116354234, |
| "grad_norm": 1.3634724617004395, |
| "learning_rate": 0.001, |
| "loss": 1.6492, |
| "step": 67700 |
| }, |
| { |
| "epoch": 21.91338073691015, |
| "grad_norm": 1.1221792697906494, |
| "learning_rate": 0.001, |
| "loss": 1.686, |
| "step": 67800 |
| }, |
| { |
| "epoch": 21.945701357466064, |
| "grad_norm": 1.2058967351913452, |
| "learning_rate": 0.001, |
| "loss": 1.6735, |
| "step": 67900 |
| }, |
| { |
| "epoch": 21.978021978021978, |
| "grad_norm": 1.2786732912063599, |
| "learning_rate": 0.001, |
| "loss": 1.7036, |
| "step": 68000 |
| }, |
| { |
| "epoch": 22.010342598577893, |
| "grad_norm": 1.4422574043273926, |
| "learning_rate": 0.001, |
| "loss": 1.6345, |
| "step": 68100 |
| }, |
| { |
| "epoch": 22.042663219133807, |
| "grad_norm": 1.175736904144287, |
| "learning_rate": 0.001, |
| "loss": 1.5827, |
| "step": 68200 |
| }, |
| { |
| "epoch": 22.07498383968972, |
| "grad_norm": 1.1469517946243286, |
| "learning_rate": 0.001, |
| "loss": 1.571, |
| "step": 68300 |
| }, |
| { |
| "epoch": 22.107304460245636, |
| "grad_norm": 1.5781559944152832, |
| "learning_rate": 0.001, |
| "loss": 1.5555, |
| "step": 68400 |
| }, |
| { |
| "epoch": 22.13962508080155, |
| "grad_norm": 1.413362979888916, |
| "learning_rate": 0.001, |
| "loss": 1.5768, |
| "step": 68500 |
| }, |
| { |
| "epoch": 22.171945701357465, |
| "grad_norm": 1.2031275033950806, |
| "learning_rate": 0.001, |
| "loss": 1.5876, |
| "step": 68600 |
| }, |
| { |
| "epoch": 22.20426632191338, |
| "grad_norm": 1.3570070266723633, |
| "learning_rate": 0.001, |
| "loss": 1.597, |
| "step": 68700 |
| }, |
| { |
| "epoch": 22.236586942469295, |
| "grad_norm": 1.4896953105926514, |
| "learning_rate": 0.001, |
| "loss": 1.5844, |
| "step": 68800 |
| }, |
| { |
| "epoch": 22.26890756302521, |
| "grad_norm": 1.5189759731292725, |
| "learning_rate": 0.001, |
| "loss": 1.5878, |
| "step": 68900 |
| }, |
| { |
| "epoch": 22.301228183581124, |
| "grad_norm": 1.5635501146316528, |
| "learning_rate": 0.001, |
| "loss": 1.6088, |
| "step": 69000 |
| }, |
| { |
| "epoch": 22.33354880413704, |
| "grad_norm": 1.3236138820648193, |
| "learning_rate": 0.001, |
| "loss": 1.5919, |
| "step": 69100 |
| }, |
| { |
| "epoch": 22.365869424692953, |
| "grad_norm": 1.1415072679519653, |
| "learning_rate": 0.001, |
| "loss": 1.6074, |
| "step": 69200 |
| }, |
| { |
| "epoch": 22.398190045248867, |
| "grad_norm": 1.212083101272583, |
| "learning_rate": 0.001, |
| "loss": 1.5884, |
| "step": 69300 |
| }, |
| { |
| "epoch": 22.430510665804782, |
| "grad_norm": 1.3305213451385498, |
| "learning_rate": 0.001, |
| "loss": 1.5937, |
| "step": 69400 |
| }, |
| { |
| "epoch": 22.462831286360696, |
| "grad_norm": 1.476341724395752, |
| "learning_rate": 0.001, |
| "loss": 1.5918, |
| "step": 69500 |
| }, |
| { |
| "epoch": 22.49515190691661, |
| "grad_norm": 1.1090694665908813, |
| "learning_rate": 0.001, |
| "loss": 1.589, |
| "step": 69600 |
| }, |
| { |
| "epoch": 22.52747252747253, |
| "grad_norm": 1.5420668125152588, |
| "learning_rate": 0.001, |
| "loss": 1.616, |
| "step": 69700 |
| }, |
| { |
| "epoch": 22.559793148028444, |
| "grad_norm": 1.281031847000122, |
| "learning_rate": 0.001, |
| "loss": 1.6115, |
| "step": 69800 |
| }, |
| { |
| "epoch": 22.59211376858436, |
| "grad_norm": 1.1826776266098022, |
| "learning_rate": 0.001, |
| "loss": 1.6089, |
| "step": 69900 |
| }, |
| { |
| "epoch": 22.624434389140273, |
| "grad_norm": 1.137690544128418, |
| "learning_rate": 0.001, |
| "loss": 1.6299, |
| "step": 70000 |
| }, |
| { |
| "epoch": 22.656755009696187, |
| "grad_norm": 1.1811046600341797, |
| "learning_rate": 0.001, |
| "loss": 1.6457, |
| "step": 70100 |
| }, |
| { |
| "epoch": 22.689075630252102, |
| "grad_norm": 1.142443060874939, |
| "learning_rate": 0.001, |
| "loss": 1.6284, |
| "step": 70200 |
| }, |
| { |
| "epoch": 22.721396250808017, |
| "grad_norm": 1.719937801361084, |
| "learning_rate": 0.001, |
| "loss": 1.6254, |
| "step": 70300 |
| }, |
| { |
| "epoch": 22.75371687136393, |
| "grad_norm": 1.6573597192764282, |
| "learning_rate": 0.001, |
| "loss": 1.6294, |
| "step": 70400 |
| }, |
| { |
| "epoch": 22.786037491919846, |
| "grad_norm": 1.1464518308639526, |
| "learning_rate": 0.001, |
| "loss": 1.6272, |
| "step": 70500 |
| }, |
| { |
| "epoch": 22.81835811247576, |
| "grad_norm": 1.2429357767105103, |
| "learning_rate": 0.001, |
| "loss": 1.6429, |
| "step": 70600 |
| }, |
| { |
| "epoch": 22.850678733031675, |
| "grad_norm": 1.3033969402313232, |
| "learning_rate": 0.001, |
| "loss": 1.6559, |
| "step": 70700 |
| }, |
| { |
| "epoch": 22.88299935358759, |
| "grad_norm": 1.6557401418685913, |
| "learning_rate": 0.001, |
| "loss": 1.6476, |
| "step": 70800 |
| }, |
| { |
| "epoch": 22.915319974143504, |
| "grad_norm": 1.163193702697754, |
| "learning_rate": 0.001, |
| "loss": 1.6383, |
| "step": 70900 |
| }, |
| { |
| "epoch": 22.94764059469942, |
| "grad_norm": 1.5962588787078857, |
| "learning_rate": 0.001, |
| "loss": 1.6528, |
| "step": 71000 |
| }, |
| { |
| "epoch": 22.979961215255333, |
| "grad_norm": 1.1483979225158691, |
| "learning_rate": 0.001, |
| "loss": 1.6624, |
| "step": 71100 |
| }, |
| { |
| "epoch": 23.012281835811248, |
| "grad_norm": 1.3430731296539307, |
| "learning_rate": 0.001, |
| "loss": 1.5737, |
| "step": 71200 |
| }, |
| { |
| "epoch": 23.044602456367162, |
| "grad_norm": 1.3681293725967407, |
| "learning_rate": 0.001, |
| "loss": 1.5217, |
| "step": 71300 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 1.325210690498352, |
| "learning_rate": 0.001, |
| "loss": 1.5488, |
| "step": 71400 |
| }, |
| { |
| "epoch": 23.10924369747899, |
| "grad_norm": 1.750307559967041, |
| "learning_rate": 0.001, |
| "loss": 1.5457, |
| "step": 71500 |
| }, |
| { |
| "epoch": 23.141564318034906, |
| "grad_norm": 1.3093291521072388, |
| "learning_rate": 0.001, |
| "loss": 1.5522, |
| "step": 71600 |
| }, |
| { |
| "epoch": 23.17388493859082, |
| "grad_norm": 1.2910124063491821, |
| "learning_rate": 0.001, |
| "loss": 1.5526, |
| "step": 71700 |
| }, |
| { |
| "epoch": 23.206205559146735, |
| "grad_norm": 1.2840170860290527, |
| "learning_rate": 0.001, |
| "loss": 1.5682, |
| "step": 71800 |
| }, |
| { |
| "epoch": 23.23852617970265, |
| "grad_norm": 1.2806519269943237, |
| "learning_rate": 0.001, |
| "loss": 1.5885, |
| "step": 71900 |
| }, |
| { |
| "epoch": 23.270846800258564, |
| "grad_norm": 1.2111592292785645, |
| "learning_rate": 0.001, |
| "loss": 1.5734, |
| "step": 72000 |
| }, |
| { |
| "epoch": 23.30316742081448, |
| "grad_norm": 1.3556594848632812, |
| "learning_rate": 0.001, |
| "loss": 1.5792, |
| "step": 72100 |
| }, |
| { |
| "epoch": 23.335488041370393, |
| "grad_norm": 1.2785731554031372, |
| "learning_rate": 0.001, |
| "loss": 1.5682, |
| "step": 72200 |
| }, |
| { |
| "epoch": 23.367808661926308, |
| "grad_norm": 1.3190655708312988, |
| "learning_rate": 0.001, |
| "loss": 1.5695, |
| "step": 72300 |
| }, |
| { |
| "epoch": 23.400129282482222, |
| "grad_norm": 1.1459965705871582, |
| "learning_rate": 0.001, |
| "loss": 1.5899, |
| "step": 72400 |
| }, |
| { |
| "epoch": 23.432449903038137, |
| "grad_norm": 1.3239068984985352, |
| "learning_rate": 0.001, |
| "loss": 1.5818, |
| "step": 72500 |
| }, |
| { |
| "epoch": 23.46477052359405, |
| "grad_norm": 1.6447606086730957, |
| "learning_rate": 0.001, |
| "loss": 1.5798, |
| "step": 72600 |
| }, |
| { |
| "epoch": 23.49709114414997, |
| "grad_norm": 1.2553894519805908, |
| "learning_rate": 0.001, |
| "loss": 1.5897, |
| "step": 72700 |
| }, |
| { |
| "epoch": 23.529411764705884, |
| "grad_norm": 1.5192135572433472, |
| "learning_rate": 0.001, |
| "loss": 1.5671, |
| "step": 72800 |
| }, |
| { |
| "epoch": 23.5617323852618, |
| "grad_norm": 1.1816911697387695, |
| "learning_rate": 0.001, |
| "loss": 1.5876, |
| "step": 72900 |
| }, |
| { |
| "epoch": 23.594053005817713, |
| "grad_norm": 1.2005702257156372, |
| "learning_rate": 0.001, |
| "loss": 1.5963, |
| "step": 73000 |
| }, |
| { |
| "epoch": 23.626373626373628, |
| "grad_norm": 1.5252586603164673, |
| "learning_rate": 0.001, |
| "loss": 1.5819, |
| "step": 73100 |
| }, |
| { |
| "epoch": 23.658694246929542, |
| "grad_norm": 1.3027052879333496, |
| "learning_rate": 0.001, |
| "loss": 1.5909, |
| "step": 73200 |
| }, |
| { |
| "epoch": 23.691014867485457, |
| "grad_norm": 1.3421530723571777, |
| "learning_rate": 0.001, |
| "loss": 1.602, |
| "step": 73300 |
| }, |
| { |
| "epoch": 23.72333548804137, |
| "grad_norm": 1.4905993938446045, |
| "learning_rate": 0.001, |
| "loss": 1.625, |
| "step": 73400 |
| }, |
| { |
| "epoch": 23.755656108597286, |
| "grad_norm": 1.4335204362869263, |
| "learning_rate": 0.001, |
| "loss": 1.5941, |
| "step": 73500 |
| }, |
| { |
| "epoch": 23.7879767291532, |
| "grad_norm": 1.2546992301940918, |
| "learning_rate": 0.001, |
| "loss": 1.6238, |
| "step": 73600 |
| }, |
| { |
| "epoch": 23.820297349709115, |
| "grad_norm": 1.2537373304367065, |
| "learning_rate": 0.001, |
| "loss": 1.6303, |
| "step": 73700 |
| }, |
| { |
| "epoch": 23.85261797026503, |
| "grad_norm": 2.2106618881225586, |
| "learning_rate": 0.001, |
| "loss": 1.6129, |
| "step": 73800 |
| }, |
| { |
| "epoch": 23.884938590820944, |
| "grad_norm": 1.3618272542953491, |
| "learning_rate": 0.001, |
| "loss": 1.6093, |
| "step": 73900 |
| }, |
| { |
| "epoch": 23.91725921137686, |
| "grad_norm": 1.2944997549057007, |
| "learning_rate": 0.001, |
| "loss": 1.6103, |
| "step": 74000 |
| }, |
| { |
| "epoch": 23.949579831932773, |
| "grad_norm": 1.2308852672576904, |
| "learning_rate": 0.001, |
| "loss": 1.6456, |
| "step": 74100 |
| }, |
| { |
| "epoch": 23.981900452488688, |
| "grad_norm": 1.3738027811050415, |
| "learning_rate": 0.001, |
| "loss": 1.6603, |
| "step": 74200 |
| }, |
| { |
| "epoch": 24.014221073044602, |
| "grad_norm": 2.0176162719726562, |
| "learning_rate": 0.001, |
| "loss": 1.5526, |
| "step": 74300 |
| }, |
| { |
| "epoch": 24.046541693600517, |
| "grad_norm": 1.2242016792297363, |
| "learning_rate": 0.001, |
| "loss": 1.4932, |
| "step": 74400 |
| }, |
| { |
| "epoch": 24.07886231415643, |
| "grad_norm": 1.212552785873413, |
| "learning_rate": 0.001, |
| "loss": 1.5145, |
| "step": 74500 |
| }, |
| { |
| "epoch": 24.111182934712346, |
| "grad_norm": 1.89344322681427, |
| "learning_rate": 0.001, |
| "loss": 1.5289, |
| "step": 74600 |
| }, |
| { |
| "epoch": 24.14350355526826, |
| "grad_norm": 1.278113603591919, |
| "learning_rate": 0.001, |
| "loss": 1.5372, |
| "step": 74700 |
| }, |
| { |
| "epoch": 24.175824175824175, |
| "grad_norm": 1.6612149477005005, |
| "learning_rate": 0.001, |
| "loss": 1.5326, |
| "step": 74800 |
| }, |
| { |
| "epoch": 24.20814479638009, |
| "grad_norm": 1.2389644384384155, |
| "learning_rate": 0.001, |
| "loss": 1.5162, |
| "step": 74900 |
| }, |
| { |
| "epoch": 24.240465416936004, |
| "grad_norm": 1.5246014595031738, |
| "learning_rate": 0.001, |
| "loss": 1.5539, |
| "step": 75000 |
| }, |
| { |
| "epoch": 24.27278603749192, |
| "grad_norm": 1.496571660041809, |
| "learning_rate": 0.001, |
| "loss": 1.5298, |
| "step": 75100 |
| }, |
| { |
| "epoch": 24.305106658047833, |
| "grad_norm": 1.1371721029281616, |
| "learning_rate": 0.001, |
| "loss": 1.5627, |
| "step": 75200 |
| }, |
| { |
| "epoch": 24.337427278603748, |
| "grad_norm": 1.488763689994812, |
| "learning_rate": 0.001, |
| "loss": 1.5464, |
| "step": 75300 |
| }, |
| { |
| "epoch": 24.369747899159663, |
| "grad_norm": 2.095946788787842, |
| "learning_rate": 0.001, |
| "loss": 1.5717, |
| "step": 75400 |
| }, |
| { |
| "epoch": 24.402068519715577, |
| "grad_norm": 1.4641380310058594, |
| "learning_rate": 0.001, |
| "loss": 1.5728, |
| "step": 75500 |
| }, |
| { |
| "epoch": 24.43438914027149, |
| "grad_norm": 1.5061386823654175, |
| "learning_rate": 0.001, |
| "loss": 1.5399, |
| "step": 75600 |
| }, |
| { |
| "epoch": 24.466709760827406, |
| "grad_norm": 1.5154993534088135, |
| "learning_rate": 0.001, |
| "loss": 1.5743, |
| "step": 75700 |
| }, |
| { |
| "epoch": 24.49903038138332, |
| "grad_norm": 1.573975682258606, |
| "learning_rate": 0.001, |
| "loss": 1.5653, |
| "step": 75800 |
| }, |
| { |
| "epoch": 24.53135100193924, |
| "grad_norm": 1.5374081134796143, |
| "learning_rate": 0.001, |
| "loss": 1.572, |
| "step": 75900 |
| }, |
| { |
| "epoch": 24.563671622495153, |
| "grad_norm": 1.331465482711792, |
| "learning_rate": 0.001, |
| "loss": 1.5671, |
| "step": 76000 |
| }, |
| { |
| "epoch": 24.595992243051068, |
| "grad_norm": 1.6944043636322021, |
| "learning_rate": 0.001, |
| "loss": 1.569, |
| "step": 76100 |
| }, |
| { |
| "epoch": 24.628312863606983, |
| "grad_norm": 1.5414694547653198, |
| "learning_rate": 0.001, |
| "loss": 1.5945, |
| "step": 76200 |
| }, |
| { |
| "epoch": 24.660633484162897, |
| "grad_norm": 1.74053156375885, |
| "learning_rate": 0.001, |
| "loss": 1.5894, |
| "step": 76300 |
| }, |
| { |
| "epoch": 24.69295410471881, |
| "grad_norm": 1.4306658506393433, |
| "learning_rate": 0.001, |
| "loss": 1.5921, |
| "step": 76400 |
| }, |
| { |
| "epoch": 24.725274725274726, |
| "grad_norm": 1.250169038772583, |
| "learning_rate": 0.001, |
| "loss": 1.5789, |
| "step": 76500 |
| }, |
| { |
| "epoch": 24.75759534583064, |
| "grad_norm": 1.2672016620635986, |
| "learning_rate": 0.001, |
| "loss": 1.5937, |
| "step": 76600 |
| }, |
| { |
| "epoch": 24.789915966386555, |
| "grad_norm": 1.3557816743850708, |
| "learning_rate": 0.001, |
| "loss": 1.592, |
| "step": 76700 |
| }, |
| { |
| "epoch": 24.82223658694247, |
| "grad_norm": 1.330208420753479, |
| "learning_rate": 0.001, |
| "loss": 1.5801, |
| "step": 76800 |
| }, |
| { |
| "epoch": 24.854557207498384, |
| "grad_norm": 1.3479870557785034, |
| "learning_rate": 0.001, |
| "loss": 1.5849, |
| "step": 76900 |
| }, |
| { |
| "epoch": 24.8868778280543, |
| "grad_norm": 1.4242347478866577, |
| "learning_rate": 0.001, |
| "loss": 1.5957, |
| "step": 77000 |
| }, |
| { |
| "epoch": 24.919198448610214, |
| "grad_norm": 1.527997374534607, |
| "learning_rate": 0.001, |
| "loss": 1.6222, |
| "step": 77100 |
| }, |
| { |
| "epoch": 24.951519069166128, |
| "grad_norm": 1.1563555002212524, |
| "learning_rate": 0.001, |
| "loss": 1.6155, |
| "step": 77200 |
| }, |
| { |
| "epoch": 24.983839689722043, |
| "grad_norm": 1.1758341789245605, |
| "learning_rate": 0.001, |
| "loss": 1.6147, |
| "step": 77300 |
| }, |
| { |
| "epoch": 25.016160310277957, |
| "grad_norm": 0.742283284664154, |
| "learning_rate": 0.001, |
| "loss": 1.4905, |
| "step": 77400 |
| }, |
| { |
| "epoch": 25.048480930833872, |
| "grad_norm": 0.9083603620529175, |
| "learning_rate": 0.001, |
| "loss": 1.4952, |
| "step": 77500 |
| }, |
| { |
| "epoch": 25.080801551389786, |
| "grad_norm": 0.7578412294387817, |
| "learning_rate": 0.001, |
| "loss": 1.4976, |
| "step": 77600 |
| }, |
| { |
| "epoch": 25.1131221719457, |
| "grad_norm": 0.7569114565849304, |
| "learning_rate": 0.001, |
| "loss": 1.5075, |
| "step": 77700 |
| }, |
| { |
| "epoch": 25.145442792501616, |
| "grad_norm": 0.7160850763320923, |
| "learning_rate": 0.001, |
| "loss": 1.524, |
| "step": 77800 |
| }, |
| { |
| "epoch": 25.17776341305753, |
| "grad_norm": 0.8957599997520447, |
| "learning_rate": 0.001, |
| "loss": 1.5142, |
| "step": 77900 |
| }, |
| { |
| "epoch": 25.210084033613445, |
| "grad_norm": 0.9639133810997009, |
| "learning_rate": 0.001, |
| "loss": 1.4995, |
| "step": 78000 |
| }, |
| { |
| "epoch": 25.24240465416936, |
| "grad_norm": 0.7975667119026184, |
| "learning_rate": 0.001, |
| "loss": 1.5078, |
| "step": 78100 |
| }, |
| { |
| "epoch": 25.274725274725274, |
| "grad_norm": 0.6604834794998169, |
| "learning_rate": 0.001, |
| "loss": 1.5515, |
| "step": 78200 |
| }, |
| { |
| "epoch": 25.30704589528119, |
| "grad_norm": 1.7004810571670532, |
| "learning_rate": 0.001, |
| "loss": 1.5241, |
| "step": 78300 |
| }, |
| { |
| "epoch": 25.339366515837103, |
| "grad_norm": 0.6456866264343262, |
| "learning_rate": 0.001, |
| "loss": 1.5068, |
| "step": 78400 |
| }, |
| { |
| "epoch": 25.371687136393017, |
| "grad_norm": 0.7533178329467773, |
| "learning_rate": 0.001, |
| "loss": 1.5126, |
| "step": 78500 |
| }, |
| { |
| "epoch": 25.404007756948932, |
| "grad_norm": 1.254917860031128, |
| "learning_rate": 0.001, |
| "loss": 1.5386, |
| "step": 78600 |
| }, |
| { |
| "epoch": 25.436328377504847, |
| "grad_norm": 0.8305799961090088, |
| "learning_rate": 0.001, |
| "loss": 1.5495, |
| "step": 78700 |
| }, |
| { |
| "epoch": 25.46864899806076, |
| "grad_norm": 0.5019985437393188, |
| "learning_rate": 0.001, |
| "loss": 1.5522, |
| "step": 78800 |
| }, |
| { |
| "epoch": 25.50096961861668, |
| "grad_norm": 1.266822338104248, |
| "learning_rate": 0.001, |
| "loss": 1.5586, |
| "step": 78900 |
| }, |
| { |
| "epoch": 25.533290239172594, |
| "grad_norm": 1.1242650747299194, |
| "learning_rate": 0.001, |
| "loss": 1.548, |
| "step": 79000 |
| }, |
| { |
| "epoch": 25.56561085972851, |
| "grad_norm": 1.264953374862671, |
| "learning_rate": 0.001, |
| "loss": 1.5599, |
| "step": 79100 |
| }, |
| { |
| "epoch": 25.597931480284423, |
| "grad_norm": 0.8981252908706665, |
| "learning_rate": 0.001, |
| "loss": 1.5602, |
| "step": 79200 |
| }, |
| { |
| "epoch": 25.630252100840337, |
| "grad_norm": 0.9501145482063293, |
| "learning_rate": 0.001, |
| "loss": 1.5501, |
| "step": 79300 |
| }, |
| { |
| "epoch": 25.662572721396252, |
| "grad_norm": 0.9055042266845703, |
| "learning_rate": 0.001, |
| "loss": 1.5692, |
| "step": 79400 |
| }, |
| { |
| "epoch": 25.694893341952167, |
| "grad_norm": 0.6795365810394287, |
| "learning_rate": 0.001, |
| "loss": 1.5801, |
| "step": 79500 |
| }, |
| { |
| "epoch": 25.72721396250808, |
| "grad_norm": 0.8403079509735107, |
| "learning_rate": 0.001, |
| "loss": 1.574, |
| "step": 79600 |
| }, |
| { |
| "epoch": 25.759534583063996, |
| "grad_norm": 0.8397261500358582, |
| "learning_rate": 0.001, |
| "loss": 1.5633, |
| "step": 79700 |
| }, |
| { |
| "epoch": 25.79185520361991, |
| "grad_norm": 1.120841383934021, |
| "learning_rate": 0.001, |
| "loss": 1.5868, |
| "step": 79800 |
| }, |
| { |
| "epoch": 25.824175824175825, |
| "grad_norm": 0.8841111660003662, |
| "learning_rate": 0.001, |
| "loss": 1.5438, |
| "step": 79900 |
| }, |
| { |
| "epoch": 25.85649644473174, |
| "grad_norm": 1.7660539150238037, |
| "learning_rate": 0.001, |
| "loss": 1.5841, |
| "step": 80000 |
| }, |
| { |
| "epoch": 25.888817065287654, |
| "grad_norm": 0.6177299618721008, |
| "learning_rate": 0.001, |
| "loss": 1.5619, |
| "step": 80100 |
| }, |
| { |
| "epoch": 25.92113768584357, |
| "grad_norm": 0.9627276659011841, |
| "learning_rate": 0.001, |
| "loss": 1.5964, |
| "step": 80200 |
| }, |
| { |
| "epoch": 25.953458306399483, |
| "grad_norm": 1.162492036819458, |
| "learning_rate": 0.001, |
| "loss": 1.5836, |
| "step": 80300 |
| }, |
| { |
| "epoch": 25.985778926955398, |
| "grad_norm": 0.6118494868278503, |
| "learning_rate": 0.001, |
| "loss": 1.5949, |
| "step": 80400 |
| }, |
| { |
| "epoch": 26.018099547511312, |
| "grad_norm": 1.2347427606582642, |
| "learning_rate": 0.001, |
| "loss": 1.5292, |
| "step": 80500 |
| }, |
| { |
| "epoch": 26.050420168067227, |
| "grad_norm": 1.450631856918335, |
| "learning_rate": 0.001, |
| "loss": 1.4459, |
| "step": 80600 |
| }, |
| { |
| "epoch": 26.08274078862314, |
| "grad_norm": 1.2779650688171387, |
| "learning_rate": 0.001, |
| "loss": 1.4594, |
| "step": 80700 |
| }, |
| { |
| "epoch": 26.115061409179056, |
| "grad_norm": 1.435947299003601, |
| "learning_rate": 0.001, |
| "loss": 1.4688, |
| "step": 80800 |
| }, |
| { |
| "epoch": 26.14738202973497, |
| "grad_norm": 1.2987701892852783, |
| "learning_rate": 0.001, |
| "loss": 1.5039, |
| "step": 80900 |
| }, |
| { |
| "epoch": 26.179702650290885, |
| "grad_norm": 1.240431547164917, |
| "learning_rate": 0.001, |
| "loss": 1.5013, |
| "step": 81000 |
| }, |
| { |
| "epoch": 26.2120232708468, |
| "grad_norm": 1.5258148908615112, |
| "learning_rate": 0.001, |
| "loss": 1.4991, |
| "step": 81100 |
| }, |
| { |
| "epoch": 26.244343891402714, |
| "grad_norm": 1.3428306579589844, |
| "learning_rate": 0.001, |
| "loss": 1.504, |
| "step": 81200 |
| }, |
| { |
| "epoch": 26.27666451195863, |
| "grad_norm": 1.8016172647476196, |
| "learning_rate": 0.001, |
| "loss": 1.5192, |
| "step": 81300 |
| }, |
| { |
| "epoch": 26.308985132514543, |
| "grad_norm": 1.2530312538146973, |
| "learning_rate": 0.001, |
| "loss": 1.5187, |
| "step": 81400 |
| }, |
| { |
| "epoch": 26.341305753070458, |
| "grad_norm": 1.2320021390914917, |
| "learning_rate": 0.001, |
| "loss": 1.5256, |
| "step": 81500 |
| }, |
| { |
| "epoch": 26.373626373626372, |
| "grad_norm": 1.5778974294662476, |
| "learning_rate": 0.001, |
| "loss": 1.503, |
| "step": 81600 |
| }, |
| { |
| "epoch": 26.405946994182287, |
| "grad_norm": 1.6406939029693604, |
| "learning_rate": 0.001, |
| "loss": 1.5297, |
| "step": 81700 |
| }, |
| { |
| "epoch": 26.4382676147382, |
| "grad_norm": 1.3841694593429565, |
| "learning_rate": 0.001, |
| "loss": 1.5295, |
| "step": 81800 |
| }, |
| { |
| "epoch": 26.470588235294116, |
| "grad_norm": 1.5115548372268677, |
| "learning_rate": 0.001, |
| "loss": 1.5216, |
| "step": 81900 |
| }, |
| { |
| "epoch": 26.50290885585003, |
| "grad_norm": 1.463423252105713, |
| "learning_rate": 0.001, |
| "loss": 1.522, |
| "step": 82000 |
| }, |
| { |
| "epoch": 26.53522947640595, |
| "grad_norm": 1.2583633661270142, |
| "learning_rate": 0.001, |
| "loss": 1.5367, |
| "step": 82100 |
| }, |
| { |
| "epoch": 26.567550096961863, |
| "grad_norm": 1.482848048210144, |
| "learning_rate": 0.001, |
| "loss": 1.5252, |
| "step": 82200 |
| }, |
| { |
| "epoch": 26.599870717517778, |
| "grad_norm": 1.3020163774490356, |
| "learning_rate": 0.001, |
| "loss": 1.5197, |
| "step": 82300 |
| }, |
| { |
| "epoch": 26.632191338073692, |
| "grad_norm": 1.1481016874313354, |
| "learning_rate": 0.001, |
| "loss": 1.5356, |
| "step": 82400 |
| }, |
| { |
| "epoch": 26.664511958629607, |
| "grad_norm": 1.3201098442077637, |
| "learning_rate": 0.001, |
| "loss": 1.5355, |
| "step": 82500 |
| }, |
| { |
| "epoch": 26.69683257918552, |
| "grad_norm": 1.2914501428604126, |
| "learning_rate": 0.001, |
| "loss": 1.5497, |
| "step": 82600 |
| }, |
| { |
| "epoch": 26.729153199741436, |
| "grad_norm": 1.4507853984832764, |
| "learning_rate": 0.001, |
| "loss": 1.5417, |
| "step": 82700 |
| }, |
| { |
| "epoch": 26.76147382029735, |
| "grad_norm": 1.4626699686050415, |
| "learning_rate": 0.001, |
| "loss": 1.5646, |
| "step": 82800 |
| }, |
| { |
| "epoch": 26.793794440853265, |
| "grad_norm": 1.5812556743621826, |
| "learning_rate": 0.001, |
| "loss": 1.5514, |
| "step": 82900 |
| }, |
| { |
| "epoch": 26.82611506140918, |
| "grad_norm": 1.2999969720840454, |
| "learning_rate": 0.001, |
| "loss": 1.5597, |
| "step": 83000 |
| }, |
| { |
| "epoch": 26.858435681965094, |
| "grad_norm": 1.7724205255508423, |
| "learning_rate": 0.001, |
| "loss": 1.5526, |
| "step": 83100 |
| }, |
| { |
| "epoch": 26.89075630252101, |
| "grad_norm": 1.5374313592910767, |
| "learning_rate": 0.001, |
| "loss": 1.5583, |
| "step": 83200 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 1.34683358669281, |
| "learning_rate": 0.001, |
| "loss": 1.5715, |
| "step": 83300 |
| }, |
| { |
| "epoch": 26.955397543632838, |
| "grad_norm": 1.9225049018859863, |
| "learning_rate": 0.001, |
| "loss": 1.5586, |
| "step": 83400 |
| }, |
| { |
| "epoch": 26.987718164188752, |
| "grad_norm": 1.2125778198242188, |
| "learning_rate": 0.001, |
| "loss": 1.5621, |
| "step": 83500 |
| }, |
| { |
| "epoch": 27.020038784744667, |
| "grad_norm": 1.5823465585708618, |
| "learning_rate": 0.001, |
| "loss": 1.5058, |
| "step": 83600 |
| }, |
| { |
| "epoch": 27.05235940530058, |
| "grad_norm": 1.2081438302993774, |
| "learning_rate": 0.001, |
| "loss": 1.4497, |
| "step": 83700 |
| }, |
| { |
| "epoch": 27.084680025856496, |
| "grad_norm": 1.27534019947052, |
| "learning_rate": 0.001, |
| "loss": 1.4587, |
| "step": 83800 |
| }, |
| { |
| "epoch": 27.11700064641241, |
| "grad_norm": 1.159661054611206, |
| "learning_rate": 0.001, |
| "loss": 1.4757, |
| "step": 83900 |
| }, |
| { |
| "epoch": 27.149321266968325, |
| "grad_norm": 1.3436905145645142, |
| "learning_rate": 0.001, |
| "loss": 1.4684, |
| "step": 84000 |
| }, |
| { |
| "epoch": 27.18164188752424, |
| "grad_norm": 1.2977607250213623, |
| "learning_rate": 0.001, |
| "loss": 1.5002, |
| "step": 84100 |
| }, |
| { |
| "epoch": 27.213962508080154, |
| "grad_norm": 1.5422593355178833, |
| "learning_rate": 0.001, |
| "loss": 1.4569, |
| "step": 84200 |
| }, |
| { |
| "epoch": 27.24628312863607, |
| "grad_norm": 1.330285906791687, |
| "learning_rate": 0.001, |
| "loss": 1.478, |
| "step": 84300 |
| }, |
| { |
| "epoch": 27.278603749191983, |
| "grad_norm": 1.6561344861984253, |
| "learning_rate": 0.001, |
| "loss": 1.4716, |
| "step": 84400 |
| }, |
| { |
| "epoch": 27.310924369747898, |
| "grad_norm": 1.6572483777999878, |
| "learning_rate": 0.001, |
| "loss": 1.4789, |
| "step": 84500 |
| }, |
| { |
| "epoch": 27.343244990303813, |
| "grad_norm": 1.295915961265564, |
| "learning_rate": 0.001, |
| "loss": 1.5091, |
| "step": 84600 |
| }, |
| { |
| "epoch": 27.375565610859727, |
| "grad_norm": 1.2116016149520874, |
| "learning_rate": 0.001, |
| "loss": 1.503, |
| "step": 84700 |
| }, |
| { |
| "epoch": 27.40788623141564, |
| "grad_norm": 1.413427472114563, |
| "learning_rate": 0.001, |
| "loss": 1.4932, |
| "step": 84800 |
| }, |
| { |
| "epoch": 27.440206851971556, |
| "grad_norm": 1.242362380027771, |
| "learning_rate": 0.001, |
| "loss": 1.5037, |
| "step": 84900 |
| }, |
| { |
| "epoch": 27.47252747252747, |
| "grad_norm": 1.4885907173156738, |
| "learning_rate": 0.001, |
| "loss": 1.4834, |
| "step": 85000 |
| }, |
| { |
| "epoch": 27.50484809308339, |
| "grad_norm": 1.2446436882019043, |
| "learning_rate": 0.001, |
| "loss": 1.5187, |
| "step": 85100 |
| }, |
| { |
| "epoch": 27.537168713639304, |
| "grad_norm": 1.154815435409546, |
| "learning_rate": 0.001, |
| "loss": 1.5187, |
| "step": 85200 |
| }, |
| { |
| "epoch": 27.569489334195218, |
| "grad_norm": 1.386157751083374, |
| "learning_rate": 0.001, |
| "loss": 1.5252, |
| "step": 85300 |
| }, |
| { |
| "epoch": 27.601809954751133, |
| "grad_norm": 1.07121741771698, |
| "learning_rate": 0.001, |
| "loss": 1.5063, |
| "step": 85400 |
| }, |
| { |
| "epoch": 27.634130575307047, |
| "grad_norm": 1.6424086093902588, |
| "learning_rate": 0.001, |
| "loss": 1.5084, |
| "step": 85500 |
| }, |
| { |
| "epoch": 27.66645119586296, |
| "grad_norm": 1.3184316158294678, |
| "learning_rate": 0.001, |
| "loss": 1.5178, |
| "step": 85600 |
| }, |
| { |
| "epoch": 27.698771816418876, |
| "grad_norm": 1.3914283514022827, |
| "learning_rate": 0.001, |
| "loss": 1.5375, |
| "step": 85700 |
| }, |
| { |
| "epoch": 27.73109243697479, |
| "grad_norm": 1.3977998495101929, |
| "learning_rate": 0.001, |
| "loss": 1.5232, |
| "step": 85800 |
| }, |
| { |
| "epoch": 27.763413057530705, |
| "grad_norm": 1.423851728439331, |
| "learning_rate": 0.001, |
| "loss": 1.5201, |
| "step": 85900 |
| }, |
| { |
| "epoch": 27.79573367808662, |
| "grad_norm": 1.258535623550415, |
| "learning_rate": 0.001, |
| "loss": 1.5244, |
| "step": 86000 |
| }, |
| { |
| "epoch": 27.828054298642535, |
| "grad_norm": 1.1898850202560425, |
| "learning_rate": 0.001, |
| "loss": 1.524, |
| "step": 86100 |
| }, |
| { |
| "epoch": 27.86037491919845, |
| "grad_norm": 1.3646154403686523, |
| "learning_rate": 0.001, |
| "loss": 1.5416, |
| "step": 86200 |
| }, |
| { |
| "epoch": 27.892695539754364, |
| "grad_norm": 1.1520329713821411, |
| "learning_rate": 0.001, |
| "loss": 1.5248, |
| "step": 86300 |
| }, |
| { |
| "epoch": 27.92501616031028, |
| "grad_norm": 1.2999958992004395, |
| "learning_rate": 0.001, |
| "loss": 1.5227, |
| "step": 86400 |
| }, |
| { |
| "epoch": 27.957336780866193, |
| "grad_norm": 1.1944336891174316, |
| "learning_rate": 0.001, |
| "loss": 1.5457, |
| "step": 86500 |
| }, |
| { |
| "epoch": 27.989657401422107, |
| "grad_norm": 1.6716809272766113, |
| "learning_rate": 0.001, |
| "loss": 1.5364, |
| "step": 86600 |
| }, |
| { |
| "epoch": 28.021978021978022, |
| "grad_norm": 1.3939216136932373, |
| "learning_rate": 0.001, |
| "loss": 1.4927, |
| "step": 86700 |
| }, |
| { |
| "epoch": 28.054298642533936, |
| "grad_norm": 1.2728296518325806, |
| "learning_rate": 0.001, |
| "loss": 1.4457, |
| "step": 86800 |
| }, |
| { |
| "epoch": 28.08661926308985, |
| "grad_norm": 1.3491313457489014, |
| "learning_rate": 0.001, |
| "loss": 1.4111, |
| "step": 86900 |
| }, |
| { |
| "epoch": 28.118939883645766, |
| "grad_norm": 1.2990353107452393, |
| "learning_rate": 0.001, |
| "loss": 1.45, |
| "step": 87000 |
| }, |
| { |
| "epoch": 28.15126050420168, |
| "grad_norm": 1.213227391242981, |
| "learning_rate": 0.001, |
| "loss": 1.4374, |
| "step": 87100 |
| }, |
| { |
| "epoch": 28.183581124757595, |
| "grad_norm": 1.2896127700805664, |
| "learning_rate": 0.001, |
| "loss": 1.4648, |
| "step": 87200 |
| }, |
| { |
| "epoch": 28.21590174531351, |
| "grad_norm": 1.5600861310958862, |
| "learning_rate": 0.001, |
| "loss": 1.4622, |
| "step": 87300 |
| }, |
| { |
| "epoch": 28.248222365869424, |
| "grad_norm": 1.3157583475112915, |
| "learning_rate": 0.001, |
| "loss": 1.4648, |
| "step": 87400 |
| }, |
| { |
| "epoch": 28.28054298642534, |
| "grad_norm": 1.2662124633789062, |
| "learning_rate": 0.001, |
| "loss": 1.4799, |
| "step": 87500 |
| }, |
| { |
| "epoch": 28.312863606981253, |
| "grad_norm": 1.4469647407531738, |
| "learning_rate": 0.001, |
| "loss": 1.4426, |
| "step": 87600 |
| }, |
| { |
| "epoch": 28.345184227537167, |
| "grad_norm": 1.5201246738433838, |
| "learning_rate": 0.001, |
| "loss": 1.4455, |
| "step": 87700 |
| }, |
| { |
| "epoch": 28.377504848093082, |
| "grad_norm": 1.735161542892456, |
| "learning_rate": 0.001, |
| "loss": 1.4936, |
| "step": 87800 |
| }, |
| { |
| "epoch": 28.409825468648997, |
| "grad_norm": 1.433800458908081, |
| "learning_rate": 0.001, |
| "loss": 1.4776, |
| "step": 87900 |
| }, |
| { |
| "epoch": 28.44214608920491, |
| "grad_norm": 1.529483675956726, |
| "learning_rate": 0.001, |
| "loss": 1.4762, |
| "step": 88000 |
| }, |
| { |
| "epoch": 28.474466709760826, |
| "grad_norm": 1.7155773639678955, |
| "learning_rate": 0.001, |
| "loss": 1.4661, |
| "step": 88100 |
| }, |
| { |
| "epoch": 28.50678733031674, |
| "grad_norm": 1.5531314611434937, |
| "learning_rate": 0.001, |
| "loss": 1.4836, |
| "step": 88200 |
| }, |
| { |
| "epoch": 28.53910795087266, |
| "grad_norm": 1.2565120458602905, |
| "learning_rate": 0.001, |
| "loss": 1.4938, |
| "step": 88300 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 1.0435504913330078, |
| "learning_rate": 0.001, |
| "loss": 1.4919, |
| "step": 88400 |
| }, |
| { |
| "epoch": 28.603749191984488, |
| "grad_norm": 1.5444915294647217, |
| "learning_rate": 0.001, |
| "loss": 1.5075, |
| "step": 88500 |
| }, |
| { |
| "epoch": 28.636069812540402, |
| "grad_norm": 2.226731300354004, |
| "learning_rate": 0.001, |
| "loss": 1.5065, |
| "step": 88600 |
| }, |
| { |
| "epoch": 28.668390433096317, |
| "grad_norm": 1.4304759502410889, |
| "learning_rate": 0.001, |
| "loss": 1.506, |
| "step": 88700 |
| }, |
| { |
| "epoch": 28.70071105365223, |
| "grad_norm": 1.0051342248916626, |
| "learning_rate": 0.001, |
| "loss": 1.5066, |
| "step": 88800 |
| }, |
| { |
| "epoch": 28.733031674208146, |
| "grad_norm": 1.3363932371139526, |
| "learning_rate": 0.001, |
| "loss": 1.5029, |
| "step": 88900 |
| }, |
| { |
| "epoch": 28.76535229476406, |
| "grad_norm": 4.342236042022705, |
| "learning_rate": 0.001, |
| "loss": 1.5065, |
| "step": 89000 |
| }, |
| { |
| "epoch": 28.797672915319975, |
| "grad_norm": 1.1559197902679443, |
| "learning_rate": 0.001, |
| "loss": 1.5142, |
| "step": 89100 |
| }, |
| { |
| "epoch": 28.82999353587589, |
| "grad_norm": 1.5334970951080322, |
| "learning_rate": 0.001, |
| "loss": 1.5089, |
| "step": 89200 |
| }, |
| { |
| "epoch": 28.862314156431804, |
| "grad_norm": 1.2204309701919556, |
| "learning_rate": 0.001, |
| "loss": 1.5117, |
| "step": 89300 |
| }, |
| { |
| "epoch": 28.89463477698772, |
| "grad_norm": 1.5689836740493774, |
| "learning_rate": 0.001, |
| "loss": 1.5135, |
| "step": 89400 |
| }, |
| { |
| "epoch": 28.926955397543633, |
| "grad_norm": 1.6976715326309204, |
| "learning_rate": 0.001, |
| "loss": 1.5148, |
| "step": 89500 |
| }, |
| { |
| "epoch": 28.959276018099548, |
| "grad_norm": 1.4873236417770386, |
| "learning_rate": 0.001, |
| "loss": 1.5158, |
| "step": 89600 |
| }, |
| { |
| "epoch": 28.991596638655462, |
| "grad_norm": 1.242458701133728, |
| "learning_rate": 0.001, |
| "loss": 1.5225, |
| "step": 89700 |
| }, |
| { |
| "epoch": 29.023917259211377, |
| "grad_norm": 1.1908669471740723, |
| "learning_rate": 0.001, |
| "loss": 1.4639, |
| "step": 89800 |
| }, |
| { |
| "epoch": 29.05623787976729, |
| "grad_norm": 1.4540772438049316, |
| "learning_rate": 0.001, |
| "loss": 1.4076, |
| "step": 89900 |
| }, |
| { |
| "epoch": 29.088558500323206, |
| "grad_norm": 1.3074499368667603, |
| "learning_rate": 0.001, |
| "loss": 1.4015, |
| "step": 90000 |
| }, |
| { |
| "epoch": 29.12087912087912, |
| "grad_norm": 2.1484622955322266, |
| "learning_rate": 0.001, |
| "loss": 1.425, |
| "step": 90100 |
| }, |
| { |
| "epoch": 29.153199741435035, |
| "grad_norm": 1.9532856941223145, |
| "learning_rate": 0.001, |
| "loss": 1.4144, |
| "step": 90200 |
| }, |
| { |
| "epoch": 29.18552036199095, |
| "grad_norm": 1.4356775283813477, |
| "learning_rate": 0.001, |
| "loss": 1.4342, |
| "step": 90300 |
| }, |
| { |
| "epoch": 29.217840982546864, |
| "grad_norm": 1.7443933486938477, |
| "learning_rate": 0.001, |
| "loss": 1.431, |
| "step": 90400 |
| }, |
| { |
| "epoch": 29.25016160310278, |
| "grad_norm": 1.4974429607391357, |
| "learning_rate": 0.001, |
| "loss": 1.424, |
| "step": 90500 |
| }, |
| { |
| "epoch": 29.282482223658693, |
| "grad_norm": 1.5972236394882202, |
| "learning_rate": 0.001, |
| "loss": 1.4474, |
| "step": 90600 |
| }, |
| { |
| "epoch": 29.314802844214608, |
| "grad_norm": 1.5421713590621948, |
| "learning_rate": 0.001, |
| "loss": 1.4521, |
| "step": 90700 |
| }, |
| { |
| "epoch": 29.347123464770522, |
| "grad_norm": 1.3467155694961548, |
| "learning_rate": 0.001, |
| "loss": 1.4596, |
| "step": 90800 |
| }, |
| { |
| "epoch": 29.379444085326437, |
| "grad_norm": 1.146463394165039, |
| "learning_rate": 0.001, |
| "loss": 1.4456, |
| "step": 90900 |
| }, |
| { |
| "epoch": 29.41176470588235, |
| "grad_norm": 1.3262990713119507, |
| "learning_rate": 0.001, |
| "loss": 1.4791, |
| "step": 91000 |
| }, |
| { |
| "epoch": 29.444085326438266, |
| "grad_norm": 1.5677353143692017, |
| "learning_rate": 0.001, |
| "loss": 1.4612, |
| "step": 91100 |
| }, |
| { |
| "epoch": 29.47640594699418, |
| "grad_norm": 1.2217344045639038, |
| "learning_rate": 0.001, |
| "loss": 1.4476, |
| "step": 91200 |
| }, |
| { |
| "epoch": 29.5087265675501, |
| "grad_norm": 1.3716522455215454, |
| "learning_rate": 0.001, |
| "loss": 1.4687, |
| "step": 91300 |
| }, |
| { |
| "epoch": 29.541047188106013, |
| "grad_norm": 1.5650326013565063, |
| "learning_rate": 0.001, |
| "loss": 1.4932, |
| "step": 91400 |
| }, |
| { |
| "epoch": 29.573367808661928, |
| "grad_norm": 1.4391878843307495, |
| "learning_rate": 0.001, |
| "loss": 1.4665, |
| "step": 91500 |
| }, |
| { |
| "epoch": 29.605688429217842, |
| "grad_norm": 1.2627538442611694, |
| "learning_rate": 0.001, |
| "loss": 1.4443, |
| "step": 91600 |
| }, |
| { |
| "epoch": 29.638009049773757, |
| "grad_norm": 1.4286977052688599, |
| "learning_rate": 0.001, |
| "loss": 1.4829, |
| "step": 91700 |
| }, |
| { |
| "epoch": 29.67032967032967, |
| "grad_norm": 1.380190372467041, |
| "learning_rate": 0.001, |
| "loss": 1.4936, |
| "step": 91800 |
| }, |
| { |
| "epoch": 29.702650290885586, |
| "grad_norm": 1.3603507280349731, |
| "learning_rate": 0.001, |
| "loss": 1.4793, |
| "step": 91900 |
| }, |
| { |
| "epoch": 29.7349709114415, |
| "grad_norm": 1.2642713785171509, |
| "learning_rate": 0.001, |
| "loss": 1.4848, |
| "step": 92000 |
| }, |
| { |
| "epoch": 29.767291531997415, |
| "grad_norm": 1.3949614763259888, |
| "learning_rate": 0.001, |
| "loss": 1.4902, |
| "step": 92100 |
| }, |
| { |
| "epoch": 29.79961215255333, |
| "grad_norm": 1.346086025238037, |
| "learning_rate": 0.001, |
| "loss": 1.5032, |
| "step": 92200 |
| }, |
| { |
| "epoch": 29.831932773109244, |
| "grad_norm": 1.2533193826675415, |
| "learning_rate": 0.001, |
| "loss": 1.4987, |
| "step": 92300 |
| }, |
| { |
| "epoch": 29.86425339366516, |
| "grad_norm": 1.5642977952957153, |
| "learning_rate": 0.001, |
| "loss": 1.4958, |
| "step": 92400 |
| }, |
| { |
| "epoch": 29.896574014221073, |
| "grad_norm": 1.1529120206832886, |
| "learning_rate": 0.001, |
| "loss": 1.504, |
| "step": 92500 |
| }, |
| { |
| "epoch": 29.928894634776988, |
| "grad_norm": 1.5595033168792725, |
| "learning_rate": 0.001, |
| "loss": 1.5043, |
| "step": 92600 |
| }, |
| { |
| "epoch": 29.961215255332903, |
| "grad_norm": 1.3069615364074707, |
| "learning_rate": 0.001, |
| "loss": 1.4877, |
| "step": 92700 |
| }, |
| { |
| "epoch": 29.993535875888817, |
| "grad_norm": 1.4206944704055786, |
| "learning_rate": 0.001, |
| "loss": 1.5047, |
| "step": 92800 |
| }, |
| { |
| "epoch": 30.02585649644473, |
| "grad_norm": 1.6076793670654297, |
| "learning_rate": 0.001, |
| "loss": 1.4092, |
| "step": 92900 |
| }, |
| { |
| "epoch": 30.058177117000646, |
| "grad_norm": 1.626736044883728, |
| "learning_rate": 0.001, |
| "loss": 1.3785, |
| "step": 93000 |
| }, |
| { |
| "epoch": 30.09049773755656, |
| "grad_norm": 1.416502833366394, |
| "learning_rate": 0.001, |
| "loss": 1.3829, |
| "step": 93100 |
| }, |
| { |
| "epoch": 30.122818358112475, |
| "grad_norm": 1.39216947555542, |
| "learning_rate": 0.001, |
| "loss": 1.3948, |
| "step": 93200 |
| }, |
| { |
| "epoch": 30.15513897866839, |
| "grad_norm": 1.3596535921096802, |
| "learning_rate": 0.001, |
| "loss": 1.4033, |
| "step": 93300 |
| }, |
| { |
| "epoch": 30.187459599224304, |
| "grad_norm": 1.544952630996704, |
| "learning_rate": 0.001, |
| "loss": 1.4145, |
| "step": 93400 |
| }, |
| { |
| "epoch": 30.21978021978022, |
| "grad_norm": 1.465815782546997, |
| "learning_rate": 0.001, |
| "loss": 1.4243, |
| "step": 93500 |
| }, |
| { |
| "epoch": 30.252100840336134, |
| "grad_norm": 1.2440087795257568, |
| "learning_rate": 0.001, |
| "loss": 1.434, |
| "step": 93600 |
| }, |
| { |
| "epoch": 30.284421460892048, |
| "grad_norm": 1.3168740272521973, |
| "learning_rate": 0.001, |
| "loss": 1.4139, |
| "step": 93700 |
| }, |
| { |
| "epoch": 30.316742081447963, |
| "grad_norm": 1.6248412132263184, |
| "learning_rate": 0.001, |
| "loss": 1.4096, |
| "step": 93800 |
| }, |
| { |
| "epoch": 30.349062702003877, |
| "grad_norm": 1.1371262073516846, |
| "learning_rate": 0.001, |
| "loss": 1.4351, |
| "step": 93900 |
| }, |
| { |
| "epoch": 30.381383322559792, |
| "grad_norm": 1.4596260786056519, |
| "learning_rate": 0.001, |
| "loss": 1.4342, |
| "step": 94000 |
| }, |
| { |
| "epoch": 30.413703943115706, |
| "grad_norm": 1.284958004951477, |
| "learning_rate": 0.001, |
| "loss": 1.4516, |
| "step": 94100 |
| }, |
| { |
| "epoch": 30.44602456367162, |
| "grad_norm": 1.0324749946594238, |
| "learning_rate": 0.001, |
| "loss": 1.4382, |
| "step": 94200 |
| }, |
| { |
| "epoch": 30.478345184227535, |
| "grad_norm": 1.3807328939437866, |
| "learning_rate": 0.001, |
| "loss": 1.4617, |
| "step": 94300 |
| }, |
| { |
| "epoch": 30.51066580478345, |
| "grad_norm": 1.4561772346496582, |
| "learning_rate": 0.001, |
| "loss": 1.4732, |
| "step": 94400 |
| }, |
| { |
| "epoch": 30.542986425339368, |
| "grad_norm": 1.1771188974380493, |
| "learning_rate": 0.001, |
| "loss": 1.4479, |
| "step": 94500 |
| }, |
| { |
| "epoch": 30.575307045895283, |
| "grad_norm": 1.7424471378326416, |
| "learning_rate": 0.001, |
| "loss": 1.4705, |
| "step": 94600 |
| }, |
| { |
| "epoch": 30.607627666451197, |
| "grad_norm": 1.817940354347229, |
| "learning_rate": 0.001, |
| "loss": 1.462, |
| "step": 94700 |
| }, |
| { |
| "epoch": 30.639948287007112, |
| "grad_norm": 1.380663275718689, |
| "learning_rate": 0.001, |
| "loss": 1.4239, |
| "step": 94800 |
| }, |
| { |
| "epoch": 30.672268907563026, |
| "grad_norm": 1.4679547548294067, |
| "learning_rate": 0.001, |
| "loss": 1.4532, |
| "step": 94900 |
| }, |
| { |
| "epoch": 30.70458952811894, |
| "grad_norm": 1.6584173440933228, |
| "learning_rate": 0.001, |
| "loss": 1.4572, |
| "step": 95000 |
| }, |
| { |
| "epoch": 30.736910148674855, |
| "grad_norm": 1.5099067687988281, |
| "learning_rate": 0.001, |
| "loss": 1.4836, |
| "step": 95100 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.4512988328933716, |
| "learning_rate": 0.001, |
| "loss": 1.4611, |
| "step": 95200 |
| }, |
| { |
| "epoch": 30.801551389786685, |
| "grad_norm": 1.297056794166565, |
| "learning_rate": 0.001, |
| "loss": 1.462, |
| "step": 95300 |
| }, |
| { |
| "epoch": 30.8338720103426, |
| "grad_norm": 1.4180004596710205, |
| "learning_rate": 0.001, |
| "loss": 1.4916, |
| "step": 95400 |
| }, |
| { |
| "epoch": 30.866192630898514, |
| "grad_norm": 1.7167716026306152, |
| "learning_rate": 0.001, |
| "loss": 1.4824, |
| "step": 95500 |
| }, |
| { |
| "epoch": 30.89851325145443, |
| "grad_norm": 1.228950023651123, |
| "learning_rate": 0.001, |
| "loss": 1.4697, |
| "step": 95600 |
| }, |
| { |
| "epoch": 30.930833872010343, |
| "grad_norm": 1.4081132411956787, |
| "learning_rate": 0.001, |
| "loss": 1.5114, |
| "step": 95700 |
| }, |
| { |
| "epoch": 30.963154492566257, |
| "grad_norm": 1.2866050004959106, |
| "learning_rate": 0.001, |
| "loss": 1.484, |
| "step": 95800 |
| }, |
| { |
| "epoch": 30.995475113122172, |
| "grad_norm": 1.4979358911514282, |
| "learning_rate": 0.001, |
| "loss": 1.4559, |
| "step": 95900 |
| }, |
| { |
| "epoch": 31.027795733678087, |
| "grad_norm": 1.3178406953811646, |
| "learning_rate": 0.001, |
| "loss": 1.3567, |
| "step": 96000 |
| }, |
| { |
| "epoch": 31.060116354234, |
| "grad_norm": 1.5758633613586426, |
| "learning_rate": 0.001, |
| "loss": 1.3671, |
| "step": 96100 |
| }, |
| { |
| "epoch": 31.092436974789916, |
| "grad_norm": 1.5738314390182495, |
| "learning_rate": 0.001, |
| "loss": 1.3945, |
| "step": 96200 |
| }, |
| { |
| "epoch": 31.12475759534583, |
| "grad_norm": 1.34505295753479, |
| "learning_rate": 0.001, |
| "loss": 1.4101, |
| "step": 96300 |
| }, |
| { |
| "epoch": 31.157078215901745, |
| "grad_norm": 1.502578616142273, |
| "learning_rate": 0.001, |
| "loss": 1.3946, |
| "step": 96400 |
| }, |
| { |
| "epoch": 31.18939883645766, |
| "grad_norm": 1.2991825342178345, |
| "learning_rate": 0.001, |
| "loss": 1.3664, |
| "step": 96500 |
| }, |
| { |
| "epoch": 31.221719457013574, |
| "grad_norm": 1.2653495073318481, |
| "learning_rate": 0.001, |
| "loss": 1.416, |
| "step": 96600 |
| }, |
| { |
| "epoch": 31.25404007756949, |
| "grad_norm": 1.8182306289672852, |
| "learning_rate": 0.001, |
| "loss": 1.3977, |
| "step": 96700 |
| }, |
| { |
| "epoch": 31.286360698125403, |
| "grad_norm": 1.8426049947738647, |
| "learning_rate": 0.001, |
| "loss": 1.4165, |
| "step": 96800 |
| }, |
| { |
| "epoch": 31.318681318681318, |
| "grad_norm": 1.3844722509384155, |
| "learning_rate": 0.001, |
| "loss": 1.4174, |
| "step": 96900 |
| }, |
| { |
| "epoch": 31.351001939237232, |
| "grad_norm": 1.6439272165298462, |
| "learning_rate": 0.001, |
| "loss": 1.4053, |
| "step": 97000 |
| }, |
| { |
| "epoch": 31.383322559793147, |
| "grad_norm": 1.369141936302185, |
| "learning_rate": 0.001, |
| "loss": 1.4124, |
| "step": 97100 |
| }, |
| { |
| "epoch": 31.41564318034906, |
| "grad_norm": 1.6224870681762695, |
| "learning_rate": 0.001, |
| "loss": 1.4294, |
| "step": 97200 |
| }, |
| { |
| "epoch": 31.447963800904976, |
| "grad_norm": 1.3477481603622437, |
| "learning_rate": 0.001, |
| "loss": 1.4287, |
| "step": 97300 |
| }, |
| { |
| "epoch": 31.48028442146089, |
| "grad_norm": 1.3420053720474243, |
| "learning_rate": 0.001, |
| "loss": 1.4152, |
| "step": 97400 |
| }, |
| { |
| "epoch": 31.51260504201681, |
| "grad_norm": 1.5057765245437622, |
| "learning_rate": 0.001, |
| "loss": 1.4473, |
| "step": 97500 |
| }, |
| { |
| "epoch": 31.544925662572723, |
| "grad_norm": 1.417724609375, |
| "learning_rate": 0.001, |
| "loss": 1.4357, |
| "step": 97600 |
| }, |
| { |
| "epoch": 31.577246283128638, |
| "grad_norm": 1.4711930751800537, |
| "learning_rate": 0.001, |
| "loss": 1.4333, |
| "step": 97700 |
| }, |
| { |
| "epoch": 31.609566903684552, |
| "grad_norm": 1.3188661336898804, |
| "learning_rate": 0.001, |
| "loss": 1.4402, |
| "step": 97800 |
| }, |
| { |
| "epoch": 31.641887524240467, |
| "grad_norm": 1.7794547080993652, |
| "learning_rate": 0.001, |
| "loss": 1.4465, |
| "step": 97900 |
| }, |
| { |
| "epoch": 31.67420814479638, |
| "grad_norm": 1.7140164375305176, |
| "learning_rate": 0.001, |
| "loss": 1.4473, |
| "step": 98000 |
| }, |
| { |
| "epoch": 31.706528765352296, |
| "grad_norm": 1.1292674541473389, |
| "learning_rate": 0.001, |
| "loss": 1.4553, |
| "step": 98100 |
| }, |
| { |
| "epoch": 31.73884938590821, |
| "grad_norm": 1.4880545139312744, |
| "learning_rate": 0.001, |
| "loss": 1.4568, |
| "step": 98200 |
| }, |
| { |
| "epoch": 31.771170006464125, |
| "grad_norm": 1.5296339988708496, |
| "learning_rate": 0.001, |
| "loss": 1.4557, |
| "step": 98300 |
| }, |
| { |
| "epoch": 31.80349062702004, |
| "grad_norm": 2.067758321762085, |
| "learning_rate": 0.001, |
| "loss": 1.4437, |
| "step": 98400 |
| }, |
| { |
| "epoch": 31.835811247575954, |
| "grad_norm": 1.5721486806869507, |
| "learning_rate": 0.001, |
| "loss": 1.4621, |
| "step": 98500 |
| }, |
| { |
| "epoch": 31.86813186813187, |
| "grad_norm": 1.4480345249176025, |
| "learning_rate": 0.001, |
| "loss": 1.4543, |
| "step": 98600 |
| }, |
| { |
| "epoch": 31.900452488687783, |
| "grad_norm": 1.422499179840088, |
| "learning_rate": 0.001, |
| "loss": 1.463, |
| "step": 98700 |
| }, |
| { |
| "epoch": 31.932773109243698, |
| "grad_norm": 1.4816770553588867, |
| "learning_rate": 0.001, |
| "loss": 1.4736, |
| "step": 98800 |
| }, |
| { |
| "epoch": 31.965093729799612, |
| "grad_norm": 1.3213480710983276, |
| "learning_rate": 0.001, |
| "loss": 1.4433, |
| "step": 98900 |
| }, |
| { |
| "epoch": 31.997414350355527, |
| "grad_norm": 1.6971964836120605, |
| "learning_rate": 0.001, |
| "loss": 1.437, |
| "step": 99000 |
| }, |
| { |
| "epoch": 32.02973497091144, |
| "grad_norm": 1.3023521900177002, |
| "learning_rate": 0.001, |
| "loss": 1.384, |
| "step": 99100 |
| }, |
| { |
| "epoch": 32.062055591467356, |
| "grad_norm": 1.6234173774719238, |
| "learning_rate": 0.001, |
| "loss": 1.3549, |
| "step": 99200 |
| }, |
| { |
| "epoch": 32.09437621202327, |
| "grad_norm": 1.768607497215271, |
| "learning_rate": 0.001, |
| "loss": 1.3397, |
| "step": 99300 |
| }, |
| { |
| "epoch": 32.126696832579185, |
| "grad_norm": 1.3650957345962524, |
| "learning_rate": 0.001, |
| "loss": 1.3694, |
| "step": 99400 |
| }, |
| { |
| "epoch": 32.1590174531351, |
| "grad_norm": 1.4948350191116333, |
| "learning_rate": 0.001, |
| "loss": 1.3792, |
| "step": 99500 |
| }, |
| { |
| "epoch": 32.191338073691014, |
| "grad_norm": 1.908570647239685, |
| "learning_rate": 0.001, |
| "loss": 1.3625, |
| "step": 99600 |
| }, |
| { |
| "epoch": 32.22365869424693, |
| "grad_norm": 1.3277240991592407, |
| "learning_rate": 0.001, |
| "loss": 1.3784, |
| "step": 99700 |
| }, |
| { |
| "epoch": 32.25597931480284, |
| "grad_norm": 1.5453720092773438, |
| "learning_rate": 0.001, |
| "loss": 1.382, |
| "step": 99800 |
| }, |
| { |
| "epoch": 32.28829993535876, |
| "grad_norm": 1.4696482419967651, |
| "learning_rate": 0.001, |
| "loss": 1.3842, |
| "step": 99900 |
| }, |
| { |
| "epoch": 32.32062055591467, |
| "grad_norm": 1.1971416473388672, |
| "learning_rate": 0.001, |
| "loss": 1.4028, |
| "step": 100000 |
| }, |
| { |
| "epoch": 32.35294117647059, |
| "grad_norm": 1.68018639087677, |
| "learning_rate": 0.001, |
| "loss": 1.4056, |
| "step": 100100 |
| }, |
| { |
| "epoch": 32.3852617970265, |
| "grad_norm": 1.378050446510315, |
| "learning_rate": 0.001, |
| "loss": 1.4084, |
| "step": 100200 |
| }, |
| { |
| "epoch": 32.417582417582416, |
| "grad_norm": 1.5301188230514526, |
| "learning_rate": 0.001, |
| "loss": 1.4073, |
| "step": 100300 |
| }, |
| { |
| "epoch": 32.44990303813833, |
| "grad_norm": 1.2459090948104858, |
| "learning_rate": 0.001, |
| "loss": 1.4053, |
| "step": 100400 |
| }, |
| { |
| "epoch": 32.482223658694245, |
| "grad_norm": 1.490999460220337, |
| "learning_rate": 0.001, |
| "loss": 1.3908, |
| "step": 100500 |
| }, |
| { |
| "epoch": 32.51454427925016, |
| "grad_norm": 1.8385999202728271, |
| "learning_rate": 0.001, |
| "loss": 1.3967, |
| "step": 100600 |
| }, |
| { |
| "epoch": 32.546864899806074, |
| "grad_norm": 1.4233742952346802, |
| "learning_rate": 0.001, |
| "loss": 1.4154, |
| "step": 100700 |
| }, |
| { |
| "epoch": 32.57918552036199, |
| "grad_norm": 1.403134822845459, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 100800 |
| }, |
| { |
| "epoch": 32.6115061409179, |
| "grad_norm": 1.558266043663025, |
| "learning_rate": 0.001, |
| "loss": 1.4068, |
| "step": 100900 |
| }, |
| { |
| "epoch": 32.64382676147382, |
| "grad_norm": 1.7633984088897705, |
| "learning_rate": 0.001, |
| "loss": 1.429, |
| "step": 101000 |
| }, |
| { |
| "epoch": 32.67614738202973, |
| "grad_norm": 1.5486977100372314, |
| "learning_rate": 0.001, |
| "loss": 1.4175, |
| "step": 101100 |
| }, |
| { |
| "epoch": 32.70846800258565, |
| "grad_norm": 1.4046351909637451, |
| "learning_rate": 0.001, |
| "loss": 1.4407, |
| "step": 101200 |
| }, |
| { |
| "epoch": 32.74078862314156, |
| "grad_norm": 1.6478217840194702, |
| "learning_rate": 0.001, |
| "loss": 1.4231, |
| "step": 101300 |
| }, |
| { |
| "epoch": 32.773109243697476, |
| "grad_norm": 1.5102832317352295, |
| "learning_rate": 0.001, |
| "loss": 1.4342, |
| "step": 101400 |
| }, |
| { |
| "epoch": 32.80542986425339, |
| "grad_norm": 1.3012944459915161, |
| "learning_rate": 0.001, |
| "loss": 1.4379, |
| "step": 101500 |
| }, |
| { |
| "epoch": 32.837750484809305, |
| "grad_norm": 1.6932172775268555, |
| "learning_rate": 0.001, |
| "loss": 1.4469, |
| "step": 101600 |
| }, |
| { |
| "epoch": 32.87007110536522, |
| "grad_norm": 1.916825771331787, |
| "learning_rate": 0.001, |
| "loss": 1.4309, |
| "step": 101700 |
| }, |
| { |
| "epoch": 32.902391725921134, |
| "grad_norm": 1.3492487668991089, |
| "learning_rate": 0.001, |
| "loss": 1.4423, |
| "step": 101800 |
| }, |
| { |
| "epoch": 32.93471234647705, |
| "grad_norm": 1.241912603378296, |
| "learning_rate": 0.001, |
| "loss": 1.4403, |
| "step": 101900 |
| }, |
| { |
| "epoch": 32.967032967032964, |
| "grad_norm": 1.467849612236023, |
| "learning_rate": 0.001, |
| "loss": 1.4475, |
| "step": 102000 |
| }, |
| { |
| "epoch": 32.999353587588885, |
| "grad_norm": 1.6124597787857056, |
| "learning_rate": 0.001, |
| "loss": 1.42, |
| "step": 102100 |
| }, |
| { |
| "epoch": 33.0316742081448, |
| "grad_norm": 1.371381163597107, |
| "learning_rate": 0.001, |
| "loss": 1.3293, |
| "step": 102200 |
| }, |
| { |
| "epoch": 33.063994828700714, |
| "grad_norm": 1.6305900812149048, |
| "learning_rate": 0.001, |
| "loss": 1.3276, |
| "step": 102300 |
| }, |
| { |
| "epoch": 33.09631544925663, |
| "grad_norm": 1.3269321918487549, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 102400 |
| }, |
| { |
| "epoch": 33.12863606981254, |
| "grad_norm": 1.3419619798660278, |
| "learning_rate": 0.001, |
| "loss": 1.3428, |
| "step": 102500 |
| }, |
| { |
| "epoch": 33.16095669036846, |
| "grad_norm": 1.6693239212036133, |
| "learning_rate": 0.001, |
| "loss": 1.3468, |
| "step": 102600 |
| }, |
| { |
| "epoch": 33.19327731092437, |
| "grad_norm": 1.695776104927063, |
| "learning_rate": 0.001, |
| "loss": 1.3426, |
| "step": 102700 |
| }, |
| { |
| "epoch": 33.22559793148029, |
| "grad_norm": 1.5298768281936646, |
| "learning_rate": 0.001, |
| "loss": 1.3613, |
| "step": 102800 |
| }, |
| { |
| "epoch": 33.2579185520362, |
| "grad_norm": 1.1615654230117798, |
| "learning_rate": 0.001, |
| "loss": 1.3602, |
| "step": 102900 |
| }, |
| { |
| "epoch": 33.290239172592116, |
| "grad_norm": 1.2159184217453003, |
| "learning_rate": 0.001, |
| "loss": 1.3699, |
| "step": 103000 |
| }, |
| { |
| "epoch": 33.32255979314803, |
| "grad_norm": 1.51858389377594, |
| "learning_rate": 0.001, |
| "loss": 1.3637, |
| "step": 103100 |
| }, |
| { |
| "epoch": 33.354880413703945, |
| "grad_norm": 1.4186036586761475, |
| "learning_rate": 0.001, |
| "loss": 1.3848, |
| "step": 103200 |
| }, |
| { |
| "epoch": 33.38720103425986, |
| "grad_norm": 1.075792908668518, |
| "learning_rate": 0.001, |
| "loss": 1.386, |
| "step": 103300 |
| }, |
| { |
| "epoch": 33.419521654815775, |
| "grad_norm": 1.2305415868759155, |
| "learning_rate": 0.001, |
| "loss": 1.3859, |
| "step": 103400 |
| }, |
| { |
| "epoch": 33.45184227537169, |
| "grad_norm": 1.5435240268707275, |
| "learning_rate": 0.001, |
| "loss": 1.3855, |
| "step": 103500 |
| }, |
| { |
| "epoch": 33.484162895927604, |
| "grad_norm": 1.311586618423462, |
| "learning_rate": 0.001, |
| "loss": 1.3882, |
| "step": 103600 |
| }, |
| { |
| "epoch": 33.51648351648352, |
| "grad_norm": 1.5243502855300903, |
| "learning_rate": 0.001, |
| "loss": 1.3927, |
| "step": 103700 |
| }, |
| { |
| "epoch": 33.54880413703943, |
| "grad_norm": 1.2692375183105469, |
| "learning_rate": 0.001, |
| "loss": 1.4102, |
| "step": 103800 |
| }, |
| { |
| "epoch": 33.58112475759535, |
| "grad_norm": 1.051710844039917, |
| "learning_rate": 0.001, |
| "loss": 1.3751, |
| "step": 103900 |
| }, |
| { |
| "epoch": 33.61344537815126, |
| "grad_norm": 1.5681949853897095, |
| "learning_rate": 0.001, |
| "loss": 1.4064, |
| "step": 104000 |
| }, |
| { |
| "epoch": 33.645765998707176, |
| "grad_norm": 1.0312007665634155, |
| "learning_rate": 0.001, |
| "loss": 1.3998, |
| "step": 104100 |
| }, |
| { |
| "epoch": 33.67808661926309, |
| "grad_norm": 1.3208967447280884, |
| "learning_rate": 0.001, |
| "loss": 1.4146, |
| "step": 104200 |
| }, |
| { |
| "epoch": 33.710407239819006, |
| "grad_norm": 1.1144686937332153, |
| "learning_rate": 0.001, |
| "loss": 1.4013, |
| "step": 104300 |
| }, |
| { |
| "epoch": 33.74272786037492, |
| "grad_norm": 1.0032135248184204, |
| "learning_rate": 0.001, |
| "loss": 1.4139, |
| "step": 104400 |
| }, |
| { |
| "epoch": 33.775048480930835, |
| "grad_norm": 1.4895492792129517, |
| "learning_rate": 0.001, |
| "loss": 1.431, |
| "step": 104500 |
| }, |
| { |
| "epoch": 33.80736910148675, |
| "grad_norm": 1.7431833744049072, |
| "learning_rate": 0.001, |
| "loss": 1.4265, |
| "step": 104600 |
| }, |
| { |
| "epoch": 33.839689722042664, |
| "grad_norm": 1.2346980571746826, |
| "learning_rate": 0.001, |
| "loss": 1.4348, |
| "step": 104700 |
| }, |
| { |
| "epoch": 33.87201034259858, |
| "grad_norm": 1.3240312337875366, |
| "learning_rate": 0.001, |
| "loss": 1.423, |
| "step": 104800 |
| }, |
| { |
| "epoch": 33.90433096315449, |
| "grad_norm": 1.233581304550171, |
| "learning_rate": 0.001, |
| "loss": 1.4241, |
| "step": 104900 |
| }, |
| { |
| "epoch": 33.93665158371041, |
| "grad_norm": 1.156237006187439, |
| "learning_rate": 0.001, |
| "loss": 1.436, |
| "step": 105000 |
| }, |
| { |
| "epoch": 33.96897220426632, |
| "grad_norm": 1.8369982242584229, |
| "learning_rate": 0.001, |
| "loss": 1.467, |
| "step": 105100 |
| }, |
| { |
| "epoch": 34.00129282482224, |
| "grad_norm": 1.6478368043899536, |
| "learning_rate": 0.001, |
| "loss": 1.4149, |
| "step": 105200 |
| }, |
| { |
| "epoch": 34.03361344537815, |
| "grad_norm": 1.313193917274475, |
| "learning_rate": 0.001, |
| "loss": 1.3248, |
| "step": 105300 |
| }, |
| { |
| "epoch": 34.065934065934066, |
| "grad_norm": 1.7729835510253906, |
| "learning_rate": 0.001, |
| "loss": 1.3279, |
| "step": 105400 |
| }, |
| { |
| "epoch": 34.09825468648998, |
| "grad_norm": 1.3086352348327637, |
| "learning_rate": 0.001, |
| "loss": 1.3106, |
| "step": 105500 |
| }, |
| { |
| "epoch": 34.130575307045895, |
| "grad_norm": 1.9100786447525024, |
| "learning_rate": 0.001, |
| "loss": 1.341, |
| "step": 105600 |
| }, |
| { |
| "epoch": 34.16289592760181, |
| "grad_norm": 1.6001800298690796, |
| "learning_rate": 0.001, |
| "loss": 1.3336, |
| "step": 105700 |
| }, |
| { |
| "epoch": 34.195216548157724, |
| "grad_norm": 2.048259973526001, |
| "learning_rate": 0.001, |
| "loss": 1.342, |
| "step": 105800 |
| }, |
| { |
| "epoch": 34.22753716871364, |
| "grad_norm": 1.946439266204834, |
| "learning_rate": 0.001, |
| "loss": 1.3327, |
| "step": 105900 |
| }, |
| { |
| "epoch": 34.25985778926955, |
| "grad_norm": 2.4914627075195312, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 106000 |
| }, |
| { |
| "epoch": 34.29217840982547, |
| "grad_norm": 1.5728304386138916, |
| "learning_rate": 0.001, |
| "loss": 1.3537, |
| "step": 106100 |
| }, |
| { |
| "epoch": 34.32449903038138, |
| "grad_norm": 1.6155190467834473, |
| "learning_rate": 0.001, |
| "loss": 1.3541, |
| "step": 106200 |
| }, |
| { |
| "epoch": 34.3568196509373, |
| "grad_norm": 1.7084872722625732, |
| "learning_rate": 0.001, |
| "loss": 1.3779, |
| "step": 106300 |
| }, |
| { |
| "epoch": 34.38914027149321, |
| "grad_norm": 2.218944787979126, |
| "learning_rate": 0.001, |
| "loss": 1.3609, |
| "step": 106400 |
| }, |
| { |
| "epoch": 34.421460892049126, |
| "grad_norm": 1.687677025794983, |
| "learning_rate": 0.001, |
| "loss": 1.3636, |
| "step": 106500 |
| }, |
| { |
| "epoch": 34.45378151260504, |
| "grad_norm": 1.9398376941680908, |
| "learning_rate": 0.001, |
| "loss": 1.367, |
| "step": 106600 |
| }, |
| { |
| "epoch": 34.486102133160955, |
| "grad_norm": 1.5889030694961548, |
| "learning_rate": 0.001, |
| "loss": 1.3579, |
| "step": 106700 |
| }, |
| { |
| "epoch": 34.51842275371687, |
| "grad_norm": 1.5578453540802002, |
| "learning_rate": 0.001, |
| "loss": 1.3691, |
| "step": 106800 |
| }, |
| { |
| "epoch": 34.550743374272784, |
| "grad_norm": 1.7951077222824097, |
| "learning_rate": 0.001, |
| "loss": 1.3921, |
| "step": 106900 |
| }, |
| { |
| "epoch": 34.5830639948287, |
| "grad_norm": 1.4601072072982788, |
| "learning_rate": 0.001, |
| "loss": 1.3783, |
| "step": 107000 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.6969603300094604, |
| "learning_rate": 0.001, |
| "loss": 1.3871, |
| "step": 107100 |
| }, |
| { |
| "epoch": 34.64770523594053, |
| "grad_norm": 2.027162551879883, |
| "learning_rate": 0.001, |
| "loss": 1.3973, |
| "step": 107200 |
| }, |
| { |
| "epoch": 34.68002585649644, |
| "grad_norm": 1.6259560585021973, |
| "learning_rate": 0.001, |
| "loss": 1.3925, |
| "step": 107300 |
| }, |
| { |
| "epoch": 34.71234647705236, |
| "grad_norm": 2.365182876586914, |
| "learning_rate": 0.001, |
| "loss": 1.3948, |
| "step": 107400 |
| }, |
| { |
| "epoch": 34.74466709760827, |
| "grad_norm": 1.8796085119247437, |
| "learning_rate": 0.001, |
| "loss": 1.3853, |
| "step": 107500 |
| }, |
| { |
| "epoch": 34.776987718164186, |
| "grad_norm": 2.121328115463257, |
| "learning_rate": 0.001, |
| "loss": 1.3946, |
| "step": 107600 |
| }, |
| { |
| "epoch": 34.8093083387201, |
| "grad_norm": 2.0668091773986816, |
| "learning_rate": 0.001, |
| "loss": 1.4044, |
| "step": 107700 |
| }, |
| { |
| "epoch": 34.841628959276015, |
| "grad_norm": 1.8705992698669434, |
| "learning_rate": 0.001, |
| "loss": 1.4063, |
| "step": 107800 |
| }, |
| { |
| "epoch": 34.87394957983193, |
| "grad_norm": 1.508862018585205, |
| "learning_rate": 0.001, |
| "loss": 1.4206, |
| "step": 107900 |
| }, |
| { |
| "epoch": 34.906270200387844, |
| "grad_norm": 1.69413161277771, |
| "learning_rate": 0.001, |
| "loss": 1.4, |
| "step": 108000 |
| }, |
| { |
| "epoch": 34.93859082094376, |
| "grad_norm": 1.6866521835327148, |
| "learning_rate": 0.001, |
| "loss": 1.4208, |
| "step": 108100 |
| }, |
| { |
| "epoch": 34.97091144149967, |
| "grad_norm": 1.5360355377197266, |
| "learning_rate": 0.001, |
| "loss": 1.4136, |
| "step": 108200 |
| }, |
| { |
| "epoch": 35.003232062055595, |
| "grad_norm": 1.6422771215438843, |
| "learning_rate": 0.001, |
| "loss": 1.4272, |
| "step": 108300 |
| }, |
| { |
| "epoch": 35.03555268261151, |
| "grad_norm": 1.7707010507583618, |
| "learning_rate": 0.001, |
| "loss": 1.301, |
| "step": 108400 |
| }, |
| { |
| "epoch": 35.067873303167424, |
| "grad_norm": 1.2861260175704956, |
| "learning_rate": 0.001, |
| "loss": 1.2999, |
| "step": 108500 |
| }, |
| { |
| "epoch": 35.10019392372334, |
| "grad_norm": 1.4097256660461426, |
| "learning_rate": 0.001, |
| "loss": 1.2971, |
| "step": 108600 |
| }, |
| { |
| "epoch": 35.13251454427925, |
| "grad_norm": 1.6693590879440308, |
| "learning_rate": 0.001, |
| "loss": 1.3326, |
| "step": 108700 |
| }, |
| { |
| "epoch": 35.16483516483517, |
| "grad_norm": 1.5808466672897339, |
| "learning_rate": 0.001, |
| "loss": 1.3262, |
| "step": 108800 |
| }, |
| { |
| "epoch": 35.19715578539108, |
| "grad_norm": 2.1594834327697754, |
| "learning_rate": 0.001, |
| "loss": 1.3371, |
| "step": 108900 |
| }, |
| { |
| "epoch": 35.229476405947, |
| "grad_norm": 1.4698948860168457, |
| "learning_rate": 0.001, |
| "loss": 1.3155, |
| "step": 109000 |
| }, |
| { |
| "epoch": 35.26179702650291, |
| "grad_norm": 1.303154706954956, |
| "learning_rate": 0.001, |
| "loss": 1.3431, |
| "step": 109100 |
| }, |
| { |
| "epoch": 35.294117647058826, |
| "grad_norm": 2.034785032272339, |
| "learning_rate": 0.001, |
| "loss": 1.3363, |
| "step": 109200 |
| }, |
| { |
| "epoch": 35.32643826761474, |
| "grad_norm": 1.606525182723999, |
| "learning_rate": 0.001, |
| "loss": 1.3313, |
| "step": 109300 |
| }, |
| { |
| "epoch": 35.358758888170655, |
| "grad_norm": 1.4785006046295166, |
| "learning_rate": 0.001, |
| "loss": 1.3454, |
| "step": 109400 |
| }, |
| { |
| "epoch": 35.39107950872657, |
| "grad_norm": 1.8039274215698242, |
| "learning_rate": 0.001, |
| "loss": 1.3362, |
| "step": 109500 |
| }, |
| { |
| "epoch": 35.423400129282484, |
| "grad_norm": 1.7395920753479004, |
| "learning_rate": 0.001, |
| "loss": 1.3266, |
| "step": 109600 |
| }, |
| { |
| "epoch": 35.4557207498384, |
| "grad_norm": 1.8243016004562378, |
| "learning_rate": 0.001, |
| "loss": 1.3594, |
| "step": 109700 |
| }, |
| { |
| "epoch": 35.48804137039431, |
| "grad_norm": 1.5380924940109253, |
| "learning_rate": 0.001, |
| "loss": 1.3546, |
| "step": 109800 |
| }, |
| { |
| "epoch": 35.52036199095023, |
| "grad_norm": 1.3779228925704956, |
| "learning_rate": 0.001, |
| "loss": 1.3811, |
| "step": 109900 |
| }, |
| { |
| "epoch": 35.55268261150614, |
| "grad_norm": 2.3389909267425537, |
| "learning_rate": 0.001, |
| "loss": 1.3681, |
| "step": 110000 |
| }, |
| { |
| "epoch": 35.58500323206206, |
| "grad_norm": 1.4241420030593872, |
| "learning_rate": 0.001, |
| "loss": 1.3503, |
| "step": 110100 |
| }, |
| { |
| "epoch": 35.61732385261797, |
| "grad_norm": 1.442950963973999, |
| "learning_rate": 0.001, |
| "loss": 1.3631, |
| "step": 110200 |
| }, |
| { |
| "epoch": 35.649644473173886, |
| "grad_norm": 1.332844614982605, |
| "learning_rate": 0.001, |
| "loss": 1.3938, |
| "step": 110300 |
| }, |
| { |
| "epoch": 35.6819650937298, |
| "grad_norm": 1.8216750621795654, |
| "learning_rate": 0.001, |
| "loss": 1.3864, |
| "step": 110400 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 1.7127039432525635, |
| "learning_rate": 0.001, |
| "loss": 1.3654, |
| "step": 110500 |
| }, |
| { |
| "epoch": 35.74660633484163, |
| "grad_norm": 2.0166015625, |
| "learning_rate": 0.001, |
| "loss": 1.3565, |
| "step": 110600 |
| }, |
| { |
| "epoch": 35.778926955397544, |
| "grad_norm": 1.2647751569747925, |
| "learning_rate": 0.001, |
| "loss": 1.3747, |
| "step": 110700 |
| }, |
| { |
| "epoch": 35.81124757595346, |
| "grad_norm": 1.3230773210525513, |
| "learning_rate": 0.001, |
| "loss": 1.3771, |
| "step": 110800 |
| }, |
| { |
| "epoch": 35.84356819650937, |
| "grad_norm": 1.4883989095687866, |
| "learning_rate": 0.001, |
| "loss": 1.42, |
| "step": 110900 |
| }, |
| { |
| "epoch": 35.87588881706529, |
| "grad_norm": 1.489632248878479, |
| "learning_rate": 0.001, |
| "loss": 1.37, |
| "step": 111000 |
| }, |
| { |
| "epoch": 35.9082094376212, |
| "grad_norm": 1.4182137250900269, |
| "learning_rate": 0.001, |
| "loss": 1.3863, |
| "step": 111100 |
| }, |
| { |
| "epoch": 35.94053005817712, |
| "grad_norm": 2.037407159805298, |
| "learning_rate": 0.001, |
| "loss": 1.3966, |
| "step": 111200 |
| }, |
| { |
| "epoch": 35.97285067873303, |
| "grad_norm": 1.4528826475143433, |
| "learning_rate": 0.001, |
| "loss": 1.4004, |
| "step": 111300 |
| }, |
| { |
| "epoch": 36.005171299288946, |
| "grad_norm": 1.3675060272216797, |
| "learning_rate": 0.001, |
| "loss": 1.4169, |
| "step": 111400 |
| }, |
| { |
| "epoch": 36.03749191984486, |
| "grad_norm": 1.653662919998169, |
| "learning_rate": 0.001, |
| "loss": 1.2959, |
| "step": 111500 |
| }, |
| { |
| "epoch": 36.069812540400775, |
| "grad_norm": 1.1706920862197876, |
| "learning_rate": 0.001, |
| "loss": 1.2975, |
| "step": 111600 |
| }, |
| { |
| "epoch": 36.10213316095669, |
| "grad_norm": 1.3043246269226074, |
| "learning_rate": 0.001, |
| "loss": 1.2886, |
| "step": 111700 |
| }, |
| { |
| "epoch": 36.134453781512605, |
| "grad_norm": 1.1849963665008545, |
| "learning_rate": 0.001, |
| "loss": 1.2927, |
| "step": 111800 |
| }, |
| { |
| "epoch": 36.16677440206852, |
| "grad_norm": 1.2800124883651733, |
| "learning_rate": 0.001, |
| "loss": 1.3027, |
| "step": 111900 |
| }, |
| { |
| "epoch": 36.199095022624434, |
| "grad_norm": 1.2872220277786255, |
| "learning_rate": 0.001, |
| "loss": 1.3163, |
| "step": 112000 |
| }, |
| { |
| "epoch": 36.23141564318035, |
| "grad_norm": 2.119028329849243, |
| "learning_rate": 0.001, |
| "loss": 1.3386, |
| "step": 112100 |
| }, |
| { |
| "epoch": 36.26373626373626, |
| "grad_norm": 1.3781286478042603, |
| "learning_rate": 0.001, |
| "loss": 1.3095, |
| "step": 112200 |
| }, |
| { |
| "epoch": 36.29605688429218, |
| "grad_norm": 1.3859822750091553, |
| "learning_rate": 0.001, |
| "loss": 1.3099, |
| "step": 112300 |
| }, |
| { |
| "epoch": 36.32837750484809, |
| "grad_norm": 1.6260432004928589, |
| "learning_rate": 0.001, |
| "loss": 1.3324, |
| "step": 112400 |
| }, |
| { |
| "epoch": 36.36069812540401, |
| "grad_norm": 1.4716598987579346, |
| "learning_rate": 0.001, |
| "loss": 1.3418, |
| "step": 112500 |
| }, |
| { |
| "epoch": 36.39301874595992, |
| "grad_norm": 1.2116726636886597, |
| "learning_rate": 0.001, |
| "loss": 1.3201, |
| "step": 112600 |
| }, |
| { |
| "epoch": 36.425339366515836, |
| "grad_norm": 1.6337072849273682, |
| "learning_rate": 0.001, |
| "loss": 1.3316, |
| "step": 112700 |
| }, |
| { |
| "epoch": 36.45765998707175, |
| "grad_norm": 1.3227875232696533, |
| "learning_rate": 0.001, |
| "loss": 1.3599, |
| "step": 112800 |
| }, |
| { |
| "epoch": 36.489980607627665, |
| "grad_norm": 1.3499410152435303, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 112900 |
| }, |
| { |
| "epoch": 36.52230122818358, |
| "grad_norm": 1.3329325914382935, |
| "learning_rate": 0.001, |
| "loss": 1.3339, |
| "step": 113000 |
| }, |
| { |
| "epoch": 36.554621848739494, |
| "grad_norm": 1.4539114236831665, |
| "learning_rate": 0.001, |
| "loss": 1.3449, |
| "step": 113100 |
| }, |
| { |
| "epoch": 36.58694246929541, |
| "grad_norm": 2.205564022064209, |
| "learning_rate": 0.001, |
| "loss": 1.352, |
| "step": 113200 |
| }, |
| { |
| "epoch": 36.61926308985132, |
| "grad_norm": 1.6035467386245728, |
| "learning_rate": 0.001, |
| "loss": 1.3914, |
| "step": 113300 |
| }, |
| { |
| "epoch": 36.65158371040724, |
| "grad_norm": 1.2898305654525757, |
| "learning_rate": 0.001, |
| "loss": 1.3443, |
| "step": 113400 |
| }, |
| { |
| "epoch": 36.68390433096315, |
| "grad_norm": 1.606456995010376, |
| "learning_rate": 0.001, |
| "loss": 1.3539, |
| "step": 113500 |
| }, |
| { |
| "epoch": 36.71622495151907, |
| "grad_norm": 1.2838432788848877, |
| "learning_rate": 0.001, |
| "loss": 1.3457, |
| "step": 113600 |
| }, |
| { |
| "epoch": 36.74854557207498, |
| "grad_norm": 1.1542515754699707, |
| "learning_rate": 0.001, |
| "loss": 1.3706, |
| "step": 113700 |
| }, |
| { |
| "epoch": 36.780866192630896, |
| "grad_norm": 1.388629674911499, |
| "learning_rate": 0.001, |
| "loss": 1.3513, |
| "step": 113800 |
| }, |
| { |
| "epoch": 36.81318681318681, |
| "grad_norm": 1.447067141532898, |
| "learning_rate": 0.001, |
| "loss": 1.3755, |
| "step": 113900 |
| }, |
| { |
| "epoch": 36.845507433742725, |
| "grad_norm": 1.297431468963623, |
| "learning_rate": 0.001, |
| "loss": 1.365, |
| "step": 114000 |
| }, |
| { |
| "epoch": 36.87782805429864, |
| "grad_norm": 1.7740482091903687, |
| "learning_rate": 0.001, |
| "loss": 1.3714, |
| "step": 114100 |
| }, |
| { |
| "epoch": 36.910148674854554, |
| "grad_norm": 1.3775901794433594, |
| "learning_rate": 0.001, |
| "loss": 1.3733, |
| "step": 114200 |
| }, |
| { |
| "epoch": 36.94246929541047, |
| "grad_norm": 1.302278995513916, |
| "learning_rate": 0.001, |
| "loss": 1.37, |
| "step": 114300 |
| }, |
| { |
| "epoch": 36.97478991596638, |
| "grad_norm": 1.3867436647415161, |
| "learning_rate": 0.001, |
| "loss": 1.3881, |
| "step": 114400 |
| }, |
| { |
| "epoch": 37.007110536522305, |
| "grad_norm": 1.459952473640442, |
| "learning_rate": 0.001, |
| "loss": 1.3532, |
| "step": 114500 |
| }, |
| { |
| "epoch": 37.03943115707822, |
| "grad_norm": 1.1632601022720337, |
| "learning_rate": 0.001, |
| "loss": 1.2744, |
| "step": 114600 |
| }, |
| { |
| "epoch": 37.071751777634134, |
| "grad_norm": 1.4252634048461914, |
| "learning_rate": 0.001, |
| "loss": 1.2723, |
| "step": 114700 |
| }, |
| { |
| "epoch": 37.10407239819005, |
| "grad_norm": 1.4162706136703491, |
| "learning_rate": 0.001, |
| "loss": 1.2911, |
| "step": 114800 |
| }, |
| { |
| "epoch": 37.13639301874596, |
| "grad_norm": 1.4186283349990845, |
| "learning_rate": 0.001, |
| "loss": 1.273, |
| "step": 114900 |
| }, |
| { |
| "epoch": 37.16871363930188, |
| "grad_norm": 1.3726344108581543, |
| "learning_rate": 0.001, |
| "loss": 1.2772, |
| "step": 115000 |
| }, |
| { |
| "epoch": 37.20103425985779, |
| "grad_norm": 1.6328552961349487, |
| "learning_rate": 0.001, |
| "loss": 1.2846, |
| "step": 115100 |
| }, |
| { |
| "epoch": 37.23335488041371, |
| "grad_norm": 1.3136317729949951, |
| "learning_rate": 0.001, |
| "loss": 1.3262, |
| "step": 115200 |
| }, |
| { |
| "epoch": 37.26567550096962, |
| "grad_norm": 1.3501302003860474, |
| "learning_rate": 0.001, |
| "loss": 1.3047, |
| "step": 115300 |
| }, |
| { |
| "epoch": 37.297996121525536, |
| "grad_norm": 1.55678391456604, |
| "learning_rate": 0.001, |
| "loss": 1.3003, |
| "step": 115400 |
| }, |
| { |
| "epoch": 37.33031674208145, |
| "grad_norm": 1.5524173974990845, |
| "learning_rate": 0.001, |
| "loss": 1.3146, |
| "step": 115500 |
| }, |
| { |
| "epoch": 37.362637362637365, |
| "grad_norm": 1.2920715808868408, |
| "learning_rate": 0.001, |
| "loss": 1.3089, |
| "step": 115600 |
| }, |
| { |
| "epoch": 37.39495798319328, |
| "grad_norm": 1.1357648372650146, |
| "learning_rate": 0.001, |
| "loss": 1.3199, |
| "step": 115700 |
| }, |
| { |
| "epoch": 37.427278603749194, |
| "grad_norm": 1.5290732383728027, |
| "learning_rate": 0.001, |
| "loss": 1.3319, |
| "step": 115800 |
| }, |
| { |
| "epoch": 37.45959922430511, |
| "grad_norm": 1.258813500404358, |
| "learning_rate": 0.001, |
| "loss": 1.3225, |
| "step": 115900 |
| }, |
| { |
| "epoch": 37.49191984486102, |
| "grad_norm": 1.464493751525879, |
| "learning_rate": 0.001, |
| "loss": 1.3389, |
| "step": 116000 |
| }, |
| { |
| "epoch": 37.52424046541694, |
| "grad_norm": 1.4164890050888062, |
| "learning_rate": 0.001, |
| "loss": 1.3241, |
| "step": 116100 |
| }, |
| { |
| "epoch": 37.55656108597285, |
| "grad_norm": 1.4241828918457031, |
| "learning_rate": 0.001, |
| "loss": 1.3247, |
| "step": 116200 |
| }, |
| { |
| "epoch": 37.58888170652877, |
| "grad_norm": 1.3933970928192139, |
| "learning_rate": 0.001, |
| "loss": 1.3393, |
| "step": 116300 |
| }, |
| { |
| "epoch": 37.62120232708468, |
| "grad_norm": 1.3898669481277466, |
| "learning_rate": 0.001, |
| "loss": 1.3299, |
| "step": 116400 |
| }, |
| { |
| "epoch": 37.653522947640596, |
| "grad_norm": 1.510009527206421, |
| "learning_rate": 0.001, |
| "loss": 1.3366, |
| "step": 116500 |
| }, |
| { |
| "epoch": 37.68584356819651, |
| "grad_norm": 1.4854003190994263, |
| "learning_rate": 0.001, |
| "loss": 1.3391, |
| "step": 116600 |
| }, |
| { |
| "epoch": 37.718164188752425, |
| "grad_norm": 1.470606803894043, |
| "learning_rate": 0.001, |
| "loss": 1.3546, |
| "step": 116700 |
| }, |
| { |
| "epoch": 37.75048480930834, |
| "grad_norm": 1.4459855556488037, |
| "learning_rate": 0.001, |
| "loss": 1.3478, |
| "step": 116800 |
| }, |
| { |
| "epoch": 37.782805429864254, |
| "grad_norm": 1.8643691539764404, |
| "learning_rate": 0.001, |
| "loss": 1.3612, |
| "step": 116900 |
| }, |
| { |
| "epoch": 37.81512605042017, |
| "grad_norm": 1.3574726581573486, |
| "learning_rate": 0.001, |
| "loss": 1.338, |
| "step": 117000 |
| }, |
| { |
| "epoch": 37.84744667097608, |
| "grad_norm": 1.159055233001709, |
| "learning_rate": 0.001, |
| "loss": 1.3554, |
| "step": 117100 |
| }, |
| { |
| "epoch": 37.879767291532, |
| "grad_norm": 1.482819676399231, |
| "learning_rate": 0.001, |
| "loss": 1.3272, |
| "step": 117200 |
| }, |
| { |
| "epoch": 37.91208791208791, |
| "grad_norm": 1.246833086013794, |
| "learning_rate": 0.001, |
| "loss": 1.3687, |
| "step": 117300 |
| }, |
| { |
| "epoch": 37.94440853264383, |
| "grad_norm": 1.2404323816299438, |
| "learning_rate": 0.001, |
| "loss": 1.3753, |
| "step": 117400 |
| }, |
| { |
| "epoch": 37.97672915319974, |
| "grad_norm": 1.2670542001724243, |
| "learning_rate": 0.001, |
| "loss": 1.3756, |
| "step": 117500 |
| }, |
| { |
| "epoch": 38.009049773755656, |
| "grad_norm": 1.3727078437805176, |
| "learning_rate": 0.001, |
| "loss": 1.3417, |
| "step": 117600 |
| }, |
| { |
| "epoch": 38.04137039431157, |
| "grad_norm": 1.7219202518463135, |
| "learning_rate": 0.001, |
| "loss": 1.2648, |
| "step": 117700 |
| }, |
| { |
| "epoch": 38.073691014867485, |
| "grad_norm": 1.3733896017074585, |
| "learning_rate": 0.001, |
| "loss": 1.255, |
| "step": 117800 |
| }, |
| { |
| "epoch": 38.1060116354234, |
| "grad_norm": 1.310388445854187, |
| "learning_rate": 0.001, |
| "loss": 1.2708, |
| "step": 117900 |
| }, |
| { |
| "epoch": 38.138332255979314, |
| "grad_norm": 1.933166742324829, |
| "learning_rate": 0.001, |
| "loss": 1.2782, |
| "step": 118000 |
| }, |
| { |
| "epoch": 38.17065287653523, |
| "grad_norm": 1.2118735313415527, |
| "learning_rate": 0.001, |
| "loss": 1.2703, |
| "step": 118100 |
| }, |
| { |
| "epoch": 38.20297349709114, |
| "grad_norm": 1.679825782775879, |
| "learning_rate": 0.001, |
| "loss": 1.293, |
| "step": 118200 |
| }, |
| { |
| "epoch": 38.23529411764706, |
| "grad_norm": 1.510223150253296, |
| "learning_rate": 0.001, |
| "loss": 1.2627, |
| "step": 118300 |
| }, |
| { |
| "epoch": 38.26761473820297, |
| "grad_norm": 1.2792211771011353, |
| "learning_rate": 0.001, |
| "loss": 1.2951, |
| "step": 118400 |
| }, |
| { |
| "epoch": 38.29993535875889, |
| "grad_norm": 1.5114414691925049, |
| "learning_rate": 0.001, |
| "loss": 1.2875, |
| "step": 118500 |
| }, |
| { |
| "epoch": 38.3322559793148, |
| "grad_norm": 2.0232787132263184, |
| "learning_rate": 0.001, |
| "loss": 1.2998, |
| "step": 118600 |
| }, |
| { |
| "epoch": 38.364576599870716, |
| "grad_norm": 1.484636664390564, |
| "learning_rate": 0.001, |
| "loss": 1.2964, |
| "step": 118700 |
| }, |
| { |
| "epoch": 38.39689722042663, |
| "grad_norm": 1.48430597782135, |
| "learning_rate": 0.001, |
| "loss": 1.3011, |
| "step": 118800 |
| }, |
| { |
| "epoch": 38.429217840982545, |
| "grad_norm": 1.327134370803833, |
| "learning_rate": 0.001, |
| "loss": 1.2844, |
| "step": 118900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.5213978290557861, |
| "learning_rate": 0.001, |
| "loss": 1.3078, |
| "step": 119000 |
| }, |
| { |
| "epoch": 38.493859082094374, |
| "grad_norm": 1.2812081575393677, |
| "learning_rate": 0.001, |
| "loss": 1.3009, |
| "step": 119100 |
| }, |
| { |
| "epoch": 38.52617970265029, |
| "grad_norm": 1.4381201267242432, |
| "learning_rate": 0.001, |
| "loss": 1.318, |
| "step": 119200 |
| }, |
| { |
| "epoch": 38.558500323206204, |
| "grad_norm": 1.292160987854004, |
| "learning_rate": 0.001, |
| "loss": 1.3231, |
| "step": 119300 |
| }, |
| { |
| "epoch": 38.59082094376212, |
| "grad_norm": 6.911149978637695, |
| "learning_rate": 0.001, |
| "loss": 1.3112, |
| "step": 119400 |
| }, |
| { |
| "epoch": 38.62314156431803, |
| "grad_norm": 1.2957707643508911, |
| "learning_rate": 0.001, |
| "loss": 1.3244, |
| "step": 119500 |
| }, |
| { |
| "epoch": 38.65546218487395, |
| "grad_norm": 1.4078917503356934, |
| "learning_rate": 0.001, |
| "loss": 1.3198, |
| "step": 119600 |
| }, |
| { |
| "epoch": 38.68778280542986, |
| "grad_norm": 1.5980867147445679, |
| "learning_rate": 0.001, |
| "loss": 1.3277, |
| "step": 119700 |
| }, |
| { |
| "epoch": 38.720103425985776, |
| "grad_norm": 1.592467188835144, |
| "learning_rate": 0.001, |
| "loss": 1.3444, |
| "step": 119800 |
| }, |
| { |
| "epoch": 38.75242404654169, |
| "grad_norm": 1.4102036952972412, |
| "learning_rate": 0.001, |
| "loss": 1.3247, |
| "step": 119900 |
| }, |
| { |
| "epoch": 38.784744667097605, |
| "grad_norm": 1.3911519050598145, |
| "learning_rate": 0.001, |
| "loss": 1.3336, |
| "step": 120000 |
| }, |
| { |
| "epoch": 38.81706528765352, |
| "grad_norm": 1.5132681131362915, |
| "learning_rate": 0.001, |
| "loss": 1.3244, |
| "step": 120100 |
| }, |
| { |
| "epoch": 38.849385908209435, |
| "grad_norm": 1.2358170747756958, |
| "learning_rate": 0.001, |
| "loss": 1.3629, |
| "step": 120200 |
| }, |
| { |
| "epoch": 38.88170652876535, |
| "grad_norm": 1.7172753810882568, |
| "learning_rate": 0.001, |
| "loss": 1.3467, |
| "step": 120300 |
| }, |
| { |
| "epoch": 38.914027149321264, |
| "grad_norm": 1.4436832666397095, |
| "learning_rate": 0.001, |
| "loss": 1.3567, |
| "step": 120400 |
| }, |
| { |
| "epoch": 38.94634776987718, |
| "grad_norm": 1.5369603633880615, |
| "learning_rate": 0.001, |
| "loss": 1.3385, |
| "step": 120500 |
| }, |
| { |
| "epoch": 38.97866839043309, |
| "grad_norm": 1.888579249382019, |
| "learning_rate": 0.001, |
| "loss": 1.3517, |
| "step": 120600 |
| }, |
| { |
| "epoch": 39.010989010989015, |
| "grad_norm": 1.46559476852417, |
| "learning_rate": 0.001, |
| "loss": 1.2974, |
| "step": 120700 |
| }, |
| { |
| "epoch": 39.04330963154493, |
| "grad_norm": 1.5635486841201782, |
| "learning_rate": 0.001, |
| "loss": 1.2502, |
| "step": 120800 |
| }, |
| { |
| "epoch": 39.075630252100844, |
| "grad_norm": 1.7329033613204956, |
| "learning_rate": 0.001, |
| "loss": 1.2601, |
| "step": 120900 |
| }, |
| { |
| "epoch": 39.10795087265676, |
| "grad_norm": 1.5442296266555786, |
| "learning_rate": 0.001, |
| "loss": 1.2569, |
| "step": 121000 |
| }, |
| { |
| "epoch": 39.14027149321267, |
| "grad_norm": 6.576416492462158, |
| "learning_rate": 0.001, |
| "loss": 1.2498, |
| "step": 121100 |
| }, |
| { |
| "epoch": 39.17259211376859, |
| "grad_norm": 1.594713568687439, |
| "learning_rate": 0.001, |
| "loss": 1.2479, |
| "step": 121200 |
| }, |
| { |
| "epoch": 39.2049127343245, |
| "grad_norm": 1.8197976350784302, |
| "learning_rate": 0.001, |
| "loss": 1.2542, |
| "step": 121300 |
| }, |
| { |
| "epoch": 39.237233354880416, |
| "grad_norm": 1.6412878036499023, |
| "learning_rate": 0.001, |
| "loss": 1.2893, |
| "step": 121400 |
| }, |
| { |
| "epoch": 39.26955397543633, |
| "grad_norm": 1.4169423580169678, |
| "learning_rate": 0.001, |
| "loss": 1.2744, |
| "step": 121500 |
| }, |
| { |
| "epoch": 39.301874595992246, |
| "grad_norm": 1.4842840433120728, |
| "learning_rate": 0.001, |
| "loss": 1.2591, |
| "step": 121600 |
| }, |
| { |
| "epoch": 39.33419521654816, |
| "grad_norm": 1.398146629333496, |
| "learning_rate": 0.001, |
| "loss": 1.2875, |
| "step": 121700 |
| }, |
| { |
| "epoch": 39.366515837104075, |
| "grad_norm": 1.357816219329834, |
| "learning_rate": 0.001, |
| "loss": 1.2726, |
| "step": 121800 |
| }, |
| { |
| "epoch": 39.39883645765999, |
| "grad_norm": 1.549759030342102, |
| "learning_rate": 0.001, |
| "loss": 1.2822, |
| "step": 121900 |
| }, |
| { |
| "epoch": 39.431157078215904, |
| "grad_norm": 2.409834146499634, |
| "learning_rate": 0.001, |
| "loss": 1.2891, |
| "step": 122000 |
| }, |
| { |
| "epoch": 39.46347769877182, |
| "grad_norm": 1.6706969738006592, |
| "learning_rate": 0.001, |
| "loss": 1.2912, |
| "step": 122100 |
| }, |
| { |
| "epoch": 39.49579831932773, |
| "grad_norm": 1.5066723823547363, |
| "learning_rate": 0.001, |
| "loss": 1.3089, |
| "step": 122200 |
| }, |
| { |
| "epoch": 39.52811893988365, |
| "grad_norm": 1.2610282897949219, |
| "learning_rate": 0.001, |
| "loss": 1.2997, |
| "step": 122300 |
| }, |
| { |
| "epoch": 39.56043956043956, |
| "grad_norm": 1.5646350383758545, |
| "learning_rate": 0.001, |
| "loss": 1.2968, |
| "step": 122400 |
| }, |
| { |
| "epoch": 39.59276018099548, |
| "grad_norm": 1.5079572200775146, |
| "learning_rate": 0.001, |
| "loss": 1.319, |
| "step": 122500 |
| }, |
| { |
| "epoch": 39.62508080155139, |
| "grad_norm": 1.5733426809310913, |
| "learning_rate": 0.001, |
| "loss": 1.3032, |
| "step": 122600 |
| }, |
| { |
| "epoch": 39.657401422107306, |
| "grad_norm": 2.1421279907226562, |
| "learning_rate": 0.001, |
| "loss": 1.3238, |
| "step": 122700 |
| }, |
| { |
| "epoch": 39.68972204266322, |
| "grad_norm": 1.4061602354049683, |
| "learning_rate": 0.001, |
| "loss": 1.3252, |
| "step": 122800 |
| }, |
| { |
| "epoch": 39.722042663219135, |
| "grad_norm": 2.070896625518799, |
| "learning_rate": 0.001, |
| "loss": 1.3183, |
| "step": 122900 |
| }, |
| { |
| "epoch": 39.75436328377505, |
| "grad_norm": 1.3934524059295654, |
| "learning_rate": 0.001, |
| "loss": 1.3248, |
| "step": 123000 |
| }, |
| { |
| "epoch": 39.786683904330964, |
| "grad_norm": 1.6643691062927246, |
| "learning_rate": 0.001, |
| "loss": 1.3273, |
| "step": 123100 |
| }, |
| { |
| "epoch": 39.81900452488688, |
| "grad_norm": 1.5145334005355835, |
| "learning_rate": 0.001, |
| "loss": 1.3264, |
| "step": 123200 |
| }, |
| { |
| "epoch": 39.85132514544279, |
| "grad_norm": 1.5545204877853394, |
| "learning_rate": 0.001, |
| "loss": 1.3218, |
| "step": 123300 |
| }, |
| { |
| "epoch": 39.88364576599871, |
| "grad_norm": 1.9395331144332886, |
| "learning_rate": 0.001, |
| "loss": 1.3351, |
| "step": 123400 |
| }, |
| { |
| "epoch": 39.91596638655462, |
| "grad_norm": 1.5554918050765991, |
| "learning_rate": 0.001, |
| "loss": 1.3308, |
| "step": 123500 |
| }, |
| { |
| "epoch": 39.94828700711054, |
| "grad_norm": 1.4148260354995728, |
| "learning_rate": 0.001, |
| "loss": 1.3201, |
| "step": 123600 |
| }, |
| { |
| "epoch": 39.98060762766645, |
| "grad_norm": 1.5533839464187622, |
| "learning_rate": 0.001, |
| "loss": 1.336, |
| "step": 123700 |
| }, |
| { |
| "epoch": 40.012928248222366, |
| "grad_norm": 1.5636522769927979, |
| "learning_rate": 0.001, |
| "loss": 1.2853, |
| "step": 123800 |
| }, |
| { |
| "epoch": 40.04524886877828, |
| "grad_norm": 1.8232518434524536, |
| "learning_rate": 0.001, |
| "loss": 1.2589, |
| "step": 123900 |
| }, |
| { |
| "epoch": 40.077569489334195, |
| "grad_norm": 1.736384391784668, |
| "learning_rate": 0.001, |
| "loss": 1.2304, |
| "step": 124000 |
| }, |
| { |
| "epoch": 40.10989010989011, |
| "grad_norm": 1.4018938541412354, |
| "learning_rate": 0.001, |
| "loss": 1.2321, |
| "step": 124100 |
| }, |
| { |
| "epoch": 40.142210730446024, |
| "grad_norm": 1.7340322732925415, |
| "learning_rate": 0.001, |
| "loss": 1.2276, |
| "step": 124200 |
| }, |
| { |
| "epoch": 40.17453135100194, |
| "grad_norm": 1.7465698719024658, |
| "learning_rate": 0.001, |
| "loss": 1.2472, |
| "step": 124300 |
| }, |
| { |
| "epoch": 40.20685197155785, |
| "grad_norm": 1.368248701095581, |
| "learning_rate": 0.001, |
| "loss": 1.2797, |
| "step": 124400 |
| }, |
| { |
| "epoch": 40.23917259211377, |
| "grad_norm": 1.4630776643753052, |
| "learning_rate": 0.001, |
| "loss": 1.2547, |
| "step": 124500 |
| }, |
| { |
| "epoch": 40.27149321266968, |
| "grad_norm": 1.3100026845932007, |
| "learning_rate": 0.001, |
| "loss": 1.2584, |
| "step": 124600 |
| }, |
| { |
| "epoch": 40.3038138332256, |
| "grad_norm": 1.499247431755066, |
| "learning_rate": 0.001, |
| "loss": 1.2662, |
| "step": 124700 |
| }, |
| { |
| "epoch": 40.33613445378151, |
| "grad_norm": 1.5203977823257446, |
| "learning_rate": 0.001, |
| "loss": 1.2632, |
| "step": 124800 |
| }, |
| { |
| "epoch": 40.368455074337426, |
| "grad_norm": 1.2384952306747437, |
| "learning_rate": 0.001, |
| "loss": 1.2653, |
| "step": 124900 |
| }, |
| { |
| "epoch": 40.40077569489334, |
| "grad_norm": 1.4984188079833984, |
| "learning_rate": 0.001, |
| "loss": 1.2775, |
| "step": 125000 |
| }, |
| { |
| "epoch": 40.433096315449255, |
| "grad_norm": 2.0445687770843506, |
| "learning_rate": 0.001, |
| "loss": 1.2666, |
| "step": 125100 |
| }, |
| { |
| "epoch": 40.46541693600517, |
| "grad_norm": 1.394576072692871, |
| "learning_rate": 0.001, |
| "loss": 1.2984, |
| "step": 125200 |
| }, |
| { |
| "epoch": 40.497737556561084, |
| "grad_norm": 1.9576348066329956, |
| "learning_rate": 0.001, |
| "loss": 1.2802, |
| "step": 125300 |
| }, |
| { |
| "epoch": 40.530058177117, |
| "grad_norm": 1.972380518913269, |
| "learning_rate": 0.001, |
| "loss": 1.295, |
| "step": 125400 |
| }, |
| { |
| "epoch": 40.56237879767291, |
| "grad_norm": 1.725782871246338, |
| "learning_rate": 0.001, |
| "loss": 1.2914, |
| "step": 125500 |
| }, |
| { |
| "epoch": 40.59469941822883, |
| "grad_norm": 1.329160451889038, |
| "learning_rate": 0.001, |
| "loss": 1.3045, |
| "step": 125600 |
| }, |
| { |
| "epoch": 40.62702003878474, |
| "grad_norm": 2.017256259918213, |
| "learning_rate": 0.001, |
| "loss": 1.2944, |
| "step": 125700 |
| }, |
| { |
| "epoch": 40.65934065934066, |
| "grad_norm": 1.6846826076507568, |
| "learning_rate": 0.001, |
| "loss": 1.2814, |
| "step": 125800 |
| }, |
| { |
| "epoch": 40.69166127989657, |
| "grad_norm": 1.4076398611068726, |
| "learning_rate": 0.001, |
| "loss": 1.2891, |
| "step": 125900 |
| }, |
| { |
| "epoch": 40.723981900452486, |
| "grad_norm": 1.5176540613174438, |
| "learning_rate": 0.001, |
| "loss": 1.3099, |
| "step": 126000 |
| }, |
| { |
| "epoch": 40.7563025210084, |
| "grad_norm": 1.530797004699707, |
| "learning_rate": 0.001, |
| "loss": 1.2988, |
| "step": 126100 |
| }, |
| { |
| "epoch": 40.788623141564315, |
| "grad_norm": 1.7626606225967407, |
| "learning_rate": 0.001, |
| "loss": 1.3186, |
| "step": 126200 |
| }, |
| { |
| "epoch": 40.82094376212023, |
| "grad_norm": 1.7462037801742554, |
| "learning_rate": 0.001, |
| "loss": 1.3148, |
| "step": 126300 |
| }, |
| { |
| "epoch": 40.853264382676144, |
| "grad_norm": 1.7329397201538086, |
| "learning_rate": 0.001, |
| "loss": 1.3184, |
| "step": 126400 |
| }, |
| { |
| "epoch": 40.88558500323206, |
| "grad_norm": 1.6765464544296265, |
| "learning_rate": 0.001, |
| "loss": 1.3073, |
| "step": 126500 |
| }, |
| { |
| "epoch": 40.91790562378797, |
| "grad_norm": 1.4348853826522827, |
| "learning_rate": 0.001, |
| "loss": 1.3172, |
| "step": 126600 |
| }, |
| { |
| "epoch": 40.95022624434389, |
| "grad_norm": 1.515199065208435, |
| "learning_rate": 0.001, |
| "loss": 1.3298, |
| "step": 126700 |
| }, |
| { |
| "epoch": 40.9825468648998, |
| "grad_norm": 1.3540042638778687, |
| "learning_rate": 0.001, |
| "loss": 1.3278, |
| "step": 126800 |
| }, |
| { |
| "epoch": 41.014867485455724, |
| "grad_norm": 1.2277718782424927, |
| "learning_rate": 0.001, |
| "loss": 1.2313, |
| "step": 126900 |
| }, |
| { |
| "epoch": 41.04718810601164, |
| "grad_norm": 1.4838180541992188, |
| "learning_rate": 0.001, |
| "loss": 1.2345, |
| "step": 127000 |
| }, |
| { |
| "epoch": 41.07950872656755, |
| "grad_norm": 1.5526901483535767, |
| "learning_rate": 0.001, |
| "loss": 1.2396, |
| "step": 127100 |
| }, |
| { |
| "epoch": 41.11182934712347, |
| "grad_norm": 1.3968197107315063, |
| "learning_rate": 0.001, |
| "loss": 1.244, |
| "step": 127200 |
| }, |
| { |
| "epoch": 41.14414996767938, |
| "grad_norm": 1.746369481086731, |
| "learning_rate": 0.001, |
| "loss": 1.2296, |
| "step": 127300 |
| }, |
| { |
| "epoch": 41.1764705882353, |
| "grad_norm": 1.4499101638793945, |
| "learning_rate": 0.001, |
| "loss": 1.2091, |
| "step": 127400 |
| }, |
| { |
| "epoch": 41.20879120879121, |
| "grad_norm": 1.3192867040634155, |
| "learning_rate": 0.001, |
| "loss": 1.2435, |
| "step": 127500 |
| }, |
| { |
| "epoch": 41.241111829347126, |
| "grad_norm": 2.4370741844177246, |
| "learning_rate": 0.001, |
| "loss": 1.2728, |
| "step": 127600 |
| }, |
| { |
| "epoch": 41.27343244990304, |
| "grad_norm": 1.7852426767349243, |
| "learning_rate": 0.001, |
| "loss": 1.2712, |
| "step": 127700 |
| }, |
| { |
| "epoch": 41.305753070458955, |
| "grad_norm": 1.3352060317993164, |
| "learning_rate": 0.001, |
| "loss": 1.2434, |
| "step": 127800 |
| }, |
| { |
| "epoch": 41.33807369101487, |
| "grad_norm": 1.5020313262939453, |
| "learning_rate": 0.001, |
| "loss": 1.2524, |
| "step": 127900 |
| }, |
| { |
| "epoch": 41.370394311570784, |
| "grad_norm": 2.2881219387054443, |
| "learning_rate": 0.001, |
| "loss": 1.2798, |
| "step": 128000 |
| }, |
| { |
| "epoch": 41.4027149321267, |
| "grad_norm": 1.1049096584320068, |
| "learning_rate": 0.001, |
| "loss": 1.2553, |
| "step": 128100 |
| }, |
| { |
| "epoch": 41.43503555268261, |
| "grad_norm": 1.5790807008743286, |
| "learning_rate": 0.001, |
| "loss": 1.264, |
| "step": 128200 |
| }, |
| { |
| "epoch": 41.46735617323853, |
| "grad_norm": 1.2970410585403442, |
| "learning_rate": 0.001, |
| "loss": 1.2725, |
| "step": 128300 |
| }, |
| { |
| "epoch": 41.49967679379444, |
| "grad_norm": 1.338108777999878, |
| "learning_rate": 0.001, |
| "loss": 1.272, |
| "step": 128400 |
| }, |
| { |
| "epoch": 41.53199741435036, |
| "grad_norm": 1.2910419702529907, |
| "learning_rate": 0.001, |
| "loss": 1.2712, |
| "step": 128500 |
| }, |
| { |
| "epoch": 41.56431803490627, |
| "grad_norm": 1.386048436164856, |
| "learning_rate": 0.001, |
| "loss": 1.2547, |
| "step": 128600 |
| }, |
| { |
| "epoch": 41.596638655462186, |
| "grad_norm": 1.3139433860778809, |
| "learning_rate": 0.001, |
| "loss": 1.2594, |
| "step": 128700 |
| }, |
| { |
| "epoch": 41.6289592760181, |
| "grad_norm": 1.7138657569885254, |
| "learning_rate": 0.001, |
| "loss": 1.2662, |
| "step": 128800 |
| }, |
| { |
| "epoch": 41.661279896574015, |
| "grad_norm": 1.7849534749984741, |
| "learning_rate": 0.001, |
| "loss": 1.2807, |
| "step": 128900 |
| }, |
| { |
| "epoch": 41.69360051712993, |
| "grad_norm": 1.5666148662567139, |
| "learning_rate": 0.001, |
| "loss": 1.2924, |
| "step": 129000 |
| }, |
| { |
| "epoch": 41.725921137685845, |
| "grad_norm": 1.6829689741134644, |
| "learning_rate": 0.001, |
| "loss": 1.2825, |
| "step": 129100 |
| }, |
| { |
| "epoch": 41.75824175824176, |
| "grad_norm": 2.1652581691741943, |
| "learning_rate": 0.001, |
| "loss": 1.3067, |
| "step": 129200 |
| }, |
| { |
| "epoch": 41.790562378797674, |
| "grad_norm": 1.6427013874053955, |
| "learning_rate": 0.001, |
| "loss": 1.267, |
| "step": 129300 |
| }, |
| { |
| "epoch": 41.82288299935359, |
| "grad_norm": 1.4795788526535034, |
| "learning_rate": 0.001, |
| "loss": 1.2871, |
| "step": 129400 |
| }, |
| { |
| "epoch": 41.8552036199095, |
| "grad_norm": 1.4031800031661987, |
| "learning_rate": 0.001, |
| "loss": 1.2955, |
| "step": 129500 |
| }, |
| { |
| "epoch": 41.88752424046542, |
| "grad_norm": 1.7825078964233398, |
| "learning_rate": 0.001, |
| "loss": 1.2859, |
| "step": 129600 |
| }, |
| { |
| "epoch": 41.91984486102133, |
| "grad_norm": 1.5589793920516968, |
| "learning_rate": 0.001, |
| "loss": 1.3138, |
| "step": 129700 |
| }, |
| { |
| "epoch": 41.95216548157725, |
| "grad_norm": 1.6905754804611206, |
| "learning_rate": 0.001, |
| "loss": 1.2902, |
| "step": 129800 |
| }, |
| { |
| "epoch": 41.98448610213316, |
| "grad_norm": 1.573258399963379, |
| "learning_rate": 0.001, |
| "loss": 1.3129, |
| "step": 129900 |
| }, |
| { |
| "epoch": 42.016806722689076, |
| "grad_norm": 2.0176637172698975, |
| "learning_rate": 0.001, |
| "loss": 1.2269, |
| "step": 130000 |
| }, |
| { |
| "epoch": 42.04912734324499, |
| "grad_norm": 2.4781367778778076, |
| "learning_rate": 0.001, |
| "loss": 1.1985, |
| "step": 130100 |
| }, |
| { |
| "epoch": 42.081447963800905, |
| "grad_norm": 2.272233009338379, |
| "learning_rate": 0.001, |
| "loss": 1.2131, |
| "step": 130200 |
| }, |
| { |
| "epoch": 42.11376858435682, |
| "grad_norm": 2.1599671840667725, |
| "learning_rate": 0.001, |
| "loss": 1.2109, |
| "step": 130300 |
| }, |
| { |
| "epoch": 42.146089204912734, |
| "grad_norm": 1.9499235153198242, |
| "learning_rate": 0.001, |
| "loss": 1.2144, |
| "step": 130400 |
| }, |
| { |
| "epoch": 42.17840982546865, |
| "grad_norm": 2.5726876258850098, |
| "learning_rate": 0.001, |
| "loss": 1.2383, |
| "step": 130500 |
| }, |
| { |
| "epoch": 42.21073044602456, |
| "grad_norm": 3.015073776245117, |
| "learning_rate": 0.001, |
| "loss": 1.2187, |
| "step": 130600 |
| }, |
| { |
| "epoch": 42.24305106658048, |
| "grad_norm": 1.6541657447814941, |
| "learning_rate": 0.001, |
| "loss": 1.2317, |
| "step": 130700 |
| }, |
| { |
| "epoch": 42.27537168713639, |
| "grad_norm": 2.5568466186523438, |
| "learning_rate": 0.001, |
| "loss": 1.2327, |
| "step": 130800 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 2.0873799324035645, |
| "learning_rate": 0.001, |
| "loss": 1.2395, |
| "step": 130900 |
| }, |
| { |
| "epoch": 42.34001292824822, |
| "grad_norm": 2.5677318572998047, |
| "learning_rate": 0.001, |
| "loss": 1.2289, |
| "step": 131000 |
| }, |
| { |
| "epoch": 42.372333548804136, |
| "grad_norm": 2.3683345317840576, |
| "learning_rate": 0.001, |
| "loss": 1.257, |
| "step": 131100 |
| }, |
| { |
| "epoch": 42.40465416936005, |
| "grad_norm": 2.197979688644409, |
| "learning_rate": 0.001, |
| "loss": 1.2731, |
| "step": 131200 |
| }, |
| { |
| "epoch": 42.436974789915965, |
| "grad_norm": 1.855226993560791, |
| "learning_rate": 0.001, |
| "loss": 1.2417, |
| "step": 131300 |
| }, |
| { |
| "epoch": 42.46929541047188, |
| "grad_norm": 2.141434907913208, |
| "learning_rate": 0.001, |
| "loss": 1.2596, |
| "step": 131400 |
| }, |
| { |
| "epoch": 42.501616031027794, |
| "grad_norm": 1.755460500717163, |
| "learning_rate": 0.001, |
| "loss": 1.2524, |
| "step": 131500 |
| }, |
| { |
| "epoch": 42.53393665158371, |
| "grad_norm": 2.5488452911376953, |
| "learning_rate": 0.001, |
| "loss": 1.2532, |
| "step": 131600 |
| }, |
| { |
| "epoch": 42.56625727213962, |
| "grad_norm": 1.5715889930725098, |
| "learning_rate": 0.001, |
| "loss": 1.2617, |
| "step": 131700 |
| }, |
| { |
| "epoch": 42.59857789269554, |
| "grad_norm": 2.407876968383789, |
| "learning_rate": 0.001, |
| "loss": 1.2961, |
| "step": 131800 |
| }, |
| { |
| "epoch": 42.63089851325145, |
| "grad_norm": 1.963181734085083, |
| "learning_rate": 0.001, |
| "loss": 1.2588, |
| "step": 131900 |
| }, |
| { |
| "epoch": 42.66321913380737, |
| "grad_norm": 1.7608472108840942, |
| "learning_rate": 0.001, |
| "loss": 1.2694, |
| "step": 132000 |
| }, |
| { |
| "epoch": 42.69553975436328, |
| "grad_norm": 2.268115758895874, |
| "learning_rate": 0.001, |
| "loss": 1.2631, |
| "step": 132100 |
| }, |
| { |
| "epoch": 42.727860374919196, |
| "grad_norm": 2.737407684326172, |
| "learning_rate": 0.001, |
| "loss": 1.2707, |
| "step": 132200 |
| }, |
| { |
| "epoch": 42.76018099547511, |
| "grad_norm": 2.061769723892212, |
| "learning_rate": 0.001, |
| "loss": 1.2621, |
| "step": 132300 |
| }, |
| { |
| "epoch": 42.792501616031025, |
| "grad_norm": 2.5539047718048096, |
| "learning_rate": 0.001, |
| "loss": 1.2816, |
| "step": 132400 |
| }, |
| { |
| "epoch": 42.82482223658694, |
| "grad_norm": 2.357755661010742, |
| "learning_rate": 0.001, |
| "loss": 1.2758, |
| "step": 132500 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 2.4219696521759033, |
| "learning_rate": 0.001, |
| "loss": 1.291, |
| "step": 132600 |
| }, |
| { |
| "epoch": 42.88946347769877, |
| "grad_norm": 2.248538017272949, |
| "learning_rate": 0.001, |
| "loss": 1.2678, |
| "step": 132700 |
| }, |
| { |
| "epoch": 42.92178409825468, |
| "grad_norm": 2.223904848098755, |
| "learning_rate": 0.001, |
| "loss": 1.3182, |
| "step": 132800 |
| }, |
| { |
| "epoch": 42.9541047188106, |
| "grad_norm": 2.2266910076141357, |
| "learning_rate": 0.001, |
| "loss": 1.3059, |
| "step": 132900 |
| }, |
| { |
| "epoch": 42.98642533936652, |
| "grad_norm": 2.4159045219421387, |
| "learning_rate": 0.001, |
| "loss": 1.2882, |
| "step": 133000 |
| }, |
| { |
| "epoch": 43.018745959922434, |
| "grad_norm": 1.578348159790039, |
| "learning_rate": 0.001, |
| "loss": 1.2544, |
| "step": 133100 |
| }, |
| { |
| "epoch": 43.05106658047835, |
| "grad_norm": 1.3469822406768799, |
| "learning_rate": 0.001, |
| "loss": 1.1898, |
| "step": 133200 |
| }, |
| { |
| "epoch": 43.08338720103426, |
| "grad_norm": 1.6561214923858643, |
| "learning_rate": 0.001, |
| "loss": 1.1783, |
| "step": 133300 |
| }, |
| { |
| "epoch": 43.11570782159018, |
| "grad_norm": 1.9601678848266602, |
| "learning_rate": 0.001, |
| "loss": 1.2017, |
| "step": 133400 |
| }, |
| { |
| "epoch": 43.14802844214609, |
| "grad_norm": 1.7265827655792236, |
| "learning_rate": 0.001, |
| "loss": 1.2033, |
| "step": 133500 |
| }, |
| { |
| "epoch": 43.18034906270201, |
| "grad_norm": 1.684269905090332, |
| "learning_rate": 0.001, |
| "loss": 1.2206, |
| "step": 133600 |
| }, |
| { |
| "epoch": 43.21266968325792, |
| "grad_norm": 1.8783414363861084, |
| "learning_rate": 0.001, |
| "loss": 1.2415, |
| "step": 133700 |
| }, |
| { |
| "epoch": 43.244990303813836, |
| "grad_norm": 1.572043776512146, |
| "learning_rate": 0.001, |
| "loss": 1.208, |
| "step": 133800 |
| }, |
| { |
| "epoch": 43.27731092436975, |
| "grad_norm": 1.7568895816802979, |
| "learning_rate": 0.001, |
| "loss": 1.2148, |
| "step": 133900 |
| }, |
| { |
| "epoch": 43.309631544925665, |
| "grad_norm": 1.3380953073501587, |
| "learning_rate": 0.001, |
| "loss": 1.2408, |
| "step": 134000 |
| }, |
| { |
| "epoch": 43.34195216548158, |
| "grad_norm": 1.442492127418518, |
| "learning_rate": 0.001, |
| "loss": 1.239, |
| "step": 134100 |
| }, |
| { |
| "epoch": 43.374272786037494, |
| "grad_norm": 1.6913920640945435, |
| "learning_rate": 0.001, |
| "loss": 1.2408, |
| "step": 134200 |
| }, |
| { |
| "epoch": 43.40659340659341, |
| "grad_norm": 1.5097508430480957, |
| "learning_rate": 0.001, |
| "loss": 1.2494, |
| "step": 134300 |
| }, |
| { |
| "epoch": 43.43891402714932, |
| "grad_norm": 1.304591417312622, |
| "learning_rate": 0.001, |
| "loss": 1.2255, |
| "step": 134400 |
| }, |
| { |
| "epoch": 43.47123464770524, |
| "grad_norm": 1.5187216997146606, |
| "learning_rate": 0.001, |
| "loss": 1.2574, |
| "step": 134500 |
| }, |
| { |
| "epoch": 43.50355526826115, |
| "grad_norm": 1.67637300491333, |
| "learning_rate": 0.001, |
| "loss": 1.2373, |
| "step": 134600 |
| }, |
| { |
| "epoch": 43.53587588881707, |
| "grad_norm": 1.3660882711410522, |
| "learning_rate": 0.001, |
| "loss": 1.2314, |
| "step": 134700 |
| }, |
| { |
| "epoch": 43.56819650937298, |
| "grad_norm": 1.8594450950622559, |
| "learning_rate": 0.001, |
| "loss": 1.2504, |
| "step": 134800 |
| }, |
| { |
| "epoch": 43.600517129928896, |
| "grad_norm": 1.4114196300506592, |
| "learning_rate": 0.001, |
| "loss": 1.2192, |
| "step": 134900 |
| }, |
| { |
| "epoch": 43.63283775048481, |
| "grad_norm": 1.8799229860305786, |
| "learning_rate": 0.001, |
| "loss": 1.2706, |
| "step": 135000 |
| }, |
| { |
| "epoch": 43.665158371040725, |
| "grad_norm": 1.6473530530929565, |
| "learning_rate": 0.001, |
| "loss": 1.2412, |
| "step": 135100 |
| }, |
| { |
| "epoch": 43.69747899159664, |
| "grad_norm": 1.7104618549346924, |
| "learning_rate": 0.001, |
| "loss": 1.2671, |
| "step": 135200 |
| }, |
| { |
| "epoch": 43.729799612152554, |
| "grad_norm": 1.5365654230117798, |
| "learning_rate": 0.001, |
| "loss": 1.2637, |
| "step": 135300 |
| }, |
| { |
| "epoch": 43.76212023270847, |
| "grad_norm": 1.5581703186035156, |
| "learning_rate": 0.001, |
| "loss": 1.2647, |
| "step": 135400 |
| }, |
| { |
| "epoch": 43.79444085326438, |
| "grad_norm": 1.283259630203247, |
| "learning_rate": 0.001, |
| "loss": 1.2827, |
| "step": 135500 |
| }, |
| { |
| "epoch": 43.8267614738203, |
| "grad_norm": 1.4676380157470703, |
| "learning_rate": 0.001, |
| "loss": 1.2488, |
| "step": 135600 |
| }, |
| { |
| "epoch": 43.85908209437621, |
| "grad_norm": 1.6356333494186401, |
| "learning_rate": 0.001, |
| "loss": 1.2684, |
| "step": 135700 |
| }, |
| { |
| "epoch": 43.89140271493213, |
| "grad_norm": 1.6816112995147705, |
| "learning_rate": 0.001, |
| "loss": 1.2755, |
| "step": 135800 |
| }, |
| { |
| "epoch": 43.92372333548804, |
| "grad_norm": 1.900416374206543, |
| "learning_rate": 0.001, |
| "loss": 1.2773, |
| "step": 135900 |
| }, |
| { |
| "epoch": 43.956043956043956, |
| "grad_norm": 1.8260838985443115, |
| "learning_rate": 0.001, |
| "loss": 1.2654, |
| "step": 136000 |
| }, |
| { |
| "epoch": 43.98836457659987, |
| "grad_norm": 1.3427294492721558, |
| "learning_rate": 0.001, |
| "loss": 1.2813, |
| "step": 136100 |
| }, |
| { |
| "epoch": 44.020685197155785, |
| "grad_norm": 1.6175090074539185, |
| "learning_rate": 0.001, |
| "loss": 1.2353, |
| "step": 136200 |
| }, |
| { |
| "epoch": 44.0530058177117, |
| "grad_norm": 1.451216220855713, |
| "learning_rate": 0.001, |
| "loss": 1.1861, |
| "step": 136300 |
| }, |
| { |
| "epoch": 44.085326438267614, |
| "grad_norm": 1.4623968601226807, |
| "learning_rate": 0.001, |
| "loss": 1.1686, |
| "step": 136400 |
| }, |
| { |
| "epoch": 44.11764705882353, |
| "grad_norm": 1.295665979385376, |
| "learning_rate": 0.001, |
| "loss": 1.1901, |
| "step": 136500 |
| }, |
| { |
| "epoch": 44.14996767937944, |
| "grad_norm": 1.691976547241211, |
| "learning_rate": 0.001, |
| "loss": 1.1879, |
| "step": 136600 |
| }, |
| { |
| "epoch": 44.18228829993536, |
| "grad_norm": 1.5531518459320068, |
| "learning_rate": 0.001, |
| "loss": 1.2036, |
| "step": 136700 |
| }, |
| { |
| "epoch": 44.21460892049127, |
| "grad_norm": 1.4985134601593018, |
| "learning_rate": 0.001, |
| "loss": 1.1843, |
| "step": 136800 |
| }, |
| { |
| "epoch": 44.24692954104719, |
| "grad_norm": 1.9060845375061035, |
| "learning_rate": 0.001, |
| "loss": 1.195, |
| "step": 136900 |
| }, |
| { |
| "epoch": 44.2792501616031, |
| "grad_norm": 1.326461911201477, |
| "learning_rate": 0.001, |
| "loss": 1.2133, |
| "step": 137000 |
| }, |
| { |
| "epoch": 44.311570782159016, |
| "grad_norm": 1.4161741733551025, |
| "learning_rate": 0.001, |
| "loss": 1.1989, |
| "step": 137100 |
| }, |
| { |
| "epoch": 44.34389140271493, |
| "grad_norm": 1.6229037046432495, |
| "learning_rate": 0.001, |
| "loss": 1.2134, |
| "step": 137200 |
| }, |
| { |
| "epoch": 44.376212023270845, |
| "grad_norm": 1.394170880317688, |
| "learning_rate": 0.001, |
| "loss": 1.2012, |
| "step": 137300 |
| }, |
| { |
| "epoch": 44.40853264382676, |
| "grad_norm": 1.5358482599258423, |
| "learning_rate": 0.001, |
| "loss": 1.2169, |
| "step": 137400 |
| }, |
| { |
| "epoch": 44.440853264382675, |
| "grad_norm": 1.7619261741638184, |
| "learning_rate": 0.001, |
| "loss": 1.2092, |
| "step": 137500 |
| }, |
| { |
| "epoch": 44.47317388493859, |
| "grad_norm": 1.7660161256790161, |
| "learning_rate": 0.001, |
| "loss": 1.231, |
| "step": 137600 |
| }, |
| { |
| "epoch": 44.505494505494504, |
| "grad_norm": 1.3661121129989624, |
| "learning_rate": 0.001, |
| "loss": 1.2354, |
| "step": 137700 |
| }, |
| { |
| "epoch": 44.53781512605042, |
| "grad_norm": 1.4903825521469116, |
| "learning_rate": 0.001, |
| "loss": 1.2407, |
| "step": 137800 |
| }, |
| { |
| "epoch": 44.57013574660633, |
| "grad_norm": 1.6031243801116943, |
| "learning_rate": 0.001, |
| "loss": 1.2485, |
| "step": 137900 |
| }, |
| { |
| "epoch": 44.60245636716225, |
| "grad_norm": 1.6402915716171265, |
| "learning_rate": 0.001, |
| "loss": 1.2353, |
| "step": 138000 |
| }, |
| { |
| "epoch": 44.63477698771816, |
| "grad_norm": 1.4186668395996094, |
| "learning_rate": 0.001, |
| "loss": 1.2482, |
| "step": 138100 |
| }, |
| { |
| "epoch": 44.66709760827408, |
| "grad_norm": 1.4842973947525024, |
| "learning_rate": 0.001, |
| "loss": 1.2324, |
| "step": 138200 |
| }, |
| { |
| "epoch": 44.69941822882999, |
| "grad_norm": 1.4359252452850342, |
| "learning_rate": 0.001, |
| "loss": 1.2624, |
| "step": 138300 |
| }, |
| { |
| "epoch": 44.731738849385906, |
| "grad_norm": 1.7322560548782349, |
| "learning_rate": 0.001, |
| "loss": 1.2446, |
| "step": 138400 |
| }, |
| { |
| "epoch": 44.76405946994182, |
| "grad_norm": 1.3641490936279297, |
| "learning_rate": 0.001, |
| "loss": 1.2614, |
| "step": 138500 |
| }, |
| { |
| "epoch": 44.796380090497735, |
| "grad_norm": 1.7803534269332886, |
| "learning_rate": 0.001, |
| "loss": 1.259, |
| "step": 138600 |
| }, |
| { |
| "epoch": 44.82870071105365, |
| "grad_norm": 1.528218388557434, |
| "learning_rate": 0.001, |
| "loss": 1.2661, |
| "step": 138700 |
| }, |
| { |
| "epoch": 44.861021331609564, |
| "grad_norm": 1.7628278732299805, |
| "learning_rate": 0.001, |
| "loss": 1.2614, |
| "step": 138800 |
| }, |
| { |
| "epoch": 44.89334195216548, |
| "grad_norm": 1.6271640062332153, |
| "learning_rate": 0.001, |
| "loss": 1.265, |
| "step": 138900 |
| }, |
| { |
| "epoch": 44.92566257272139, |
| "grad_norm": 2.0720269680023193, |
| "learning_rate": 0.001, |
| "loss": 1.2693, |
| "step": 139000 |
| }, |
| { |
| "epoch": 44.95798319327731, |
| "grad_norm": 1.4295275211334229, |
| "learning_rate": 0.001, |
| "loss": 1.2845, |
| "step": 139100 |
| }, |
| { |
| "epoch": 44.99030381383322, |
| "grad_norm": 1.7425401210784912, |
| "learning_rate": 0.001, |
| "loss": 1.258, |
| "step": 139200 |
| }, |
| { |
| "epoch": 45.022624434389144, |
| "grad_norm": 1.3533971309661865, |
| "learning_rate": 0.001, |
| "loss": 1.2069, |
| "step": 139300 |
| }, |
| { |
| "epoch": 45.05494505494506, |
| "grad_norm": 1.7541122436523438, |
| "learning_rate": 0.001, |
| "loss": 1.1611, |
| "step": 139400 |
| }, |
| { |
| "epoch": 45.08726567550097, |
| "grad_norm": 1.2720140218734741, |
| "learning_rate": 0.001, |
| "loss": 1.1736, |
| "step": 139500 |
| }, |
| { |
| "epoch": 45.11958629605689, |
| "grad_norm": 1.4547255039215088, |
| "learning_rate": 0.001, |
| "loss": 1.176, |
| "step": 139600 |
| }, |
| { |
| "epoch": 45.1519069166128, |
| "grad_norm": 1.6576576232910156, |
| "learning_rate": 0.001, |
| "loss": 1.1481, |
| "step": 139700 |
| }, |
| { |
| "epoch": 45.18422753716872, |
| "grad_norm": 1.6915053129196167, |
| "learning_rate": 0.001, |
| "loss": 1.1698, |
| "step": 139800 |
| }, |
| { |
| "epoch": 45.21654815772463, |
| "grad_norm": 1.5308271646499634, |
| "learning_rate": 0.001, |
| "loss": 1.1879, |
| "step": 139900 |
| }, |
| { |
| "epoch": 45.248868778280546, |
| "grad_norm": 1.6623047590255737, |
| "learning_rate": 0.001, |
| "loss": 1.1991, |
| "step": 140000 |
| }, |
| { |
| "epoch": 45.28118939883646, |
| "grad_norm": 1.9195517301559448, |
| "learning_rate": 0.001, |
| "loss": 1.1843, |
| "step": 140100 |
| }, |
| { |
| "epoch": 45.313510019392375, |
| "grad_norm": 1.644578456878662, |
| "learning_rate": 0.001, |
| "loss": 1.1975, |
| "step": 140200 |
| }, |
| { |
| "epoch": 45.34583063994829, |
| "grad_norm": 1.4461241960525513, |
| "learning_rate": 0.001, |
| "loss": 1.1956, |
| "step": 140300 |
| }, |
| { |
| "epoch": 45.378151260504204, |
| "grad_norm": 1.663171648979187, |
| "learning_rate": 0.001, |
| "loss": 1.2051, |
| "step": 140400 |
| }, |
| { |
| "epoch": 45.41047188106012, |
| "grad_norm": 1.6137675046920776, |
| "learning_rate": 0.001, |
| "loss": 1.2108, |
| "step": 140500 |
| }, |
| { |
| "epoch": 45.44279250161603, |
| "grad_norm": 1.6578792333602905, |
| "learning_rate": 0.001, |
| "loss": 1.2131, |
| "step": 140600 |
| }, |
| { |
| "epoch": 45.47511312217195, |
| "grad_norm": 1.2520636320114136, |
| "learning_rate": 0.001, |
| "loss": 1.208, |
| "step": 140700 |
| }, |
| { |
| "epoch": 45.50743374272786, |
| "grad_norm": 1.5555574893951416, |
| "learning_rate": 0.001, |
| "loss": 1.2404, |
| "step": 140800 |
| }, |
| { |
| "epoch": 45.53975436328378, |
| "grad_norm": 1.76326322555542, |
| "learning_rate": 0.001, |
| "loss": 1.2328, |
| "step": 140900 |
| }, |
| { |
| "epoch": 45.57207498383969, |
| "grad_norm": 1.5170855522155762, |
| "learning_rate": 0.001, |
| "loss": 1.2143, |
| "step": 141000 |
| }, |
| { |
| "epoch": 45.604395604395606, |
| "grad_norm": 1.3871099948883057, |
| "learning_rate": 0.001, |
| "loss": 1.2245, |
| "step": 141100 |
| }, |
| { |
| "epoch": 45.63671622495152, |
| "grad_norm": 1.281063437461853, |
| "learning_rate": 0.001, |
| "loss": 1.2288, |
| "step": 141200 |
| }, |
| { |
| "epoch": 45.669036845507435, |
| "grad_norm": 1.7094895839691162, |
| "learning_rate": 0.001, |
| "loss": 1.2412, |
| "step": 141300 |
| }, |
| { |
| "epoch": 45.70135746606335, |
| "grad_norm": 1.6494312286376953, |
| "learning_rate": 0.001, |
| "loss": 1.2116, |
| "step": 141400 |
| }, |
| { |
| "epoch": 45.733678086619264, |
| "grad_norm": 1.505626916885376, |
| "learning_rate": 0.001, |
| "loss": 1.2361, |
| "step": 141500 |
| }, |
| { |
| "epoch": 45.76599870717518, |
| "grad_norm": 1.4134547710418701, |
| "learning_rate": 0.001, |
| "loss": 1.2627, |
| "step": 141600 |
| }, |
| { |
| "epoch": 45.79831932773109, |
| "grad_norm": 1.3101989030838013, |
| "learning_rate": 0.001, |
| "loss": 1.2472, |
| "step": 141700 |
| }, |
| { |
| "epoch": 45.83063994828701, |
| "grad_norm": 1.5769306421279907, |
| "learning_rate": 0.001, |
| "loss": 1.2321, |
| "step": 141800 |
| }, |
| { |
| "epoch": 45.86296056884292, |
| "grad_norm": 1.8324518203735352, |
| "learning_rate": 0.001, |
| "loss": 1.2352, |
| "step": 141900 |
| }, |
| { |
| "epoch": 45.89528118939884, |
| "grad_norm": 1.7726576328277588, |
| "learning_rate": 0.001, |
| "loss": 1.2554, |
| "step": 142000 |
| }, |
| { |
| "epoch": 45.92760180995475, |
| "grad_norm": 1.480567455291748, |
| "learning_rate": 0.001, |
| "loss": 1.1472, |
| "step": 142100 |
| }, |
| { |
| "epoch": 45.959922430510666, |
| "grad_norm": 1.8656824827194214, |
| "learning_rate": 0.001, |
| "loss": 1.153, |
| "step": 142200 |
| }, |
| { |
| "epoch": 45.99224305106658, |
| "grad_norm": 1.5704985857009888, |
| "learning_rate": 0.001, |
| "loss": 1.1408, |
| "step": 142300 |
| }, |
| { |
| "epoch": 46.024563671622495, |
| "grad_norm": 1.6737463474273682, |
| "learning_rate": 0.001, |
| "loss": 1.1702, |
| "step": 142400 |
| }, |
| { |
| "epoch": 46.05688429217841, |
| "grad_norm": 1.4223302602767944, |
| "learning_rate": 0.001, |
| "loss": 1.1648, |
| "step": 142500 |
| }, |
| { |
| "epoch": 46.089204912734324, |
| "grad_norm": 1.4680297374725342, |
| "learning_rate": 0.001, |
| "loss": 1.1531, |
| "step": 142600 |
| }, |
| { |
| "epoch": 46.12152553329024, |
| "grad_norm": 1.2421369552612305, |
| "learning_rate": 0.001, |
| "loss": 1.152, |
| "step": 142700 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.6976029872894287, |
| "learning_rate": 0.001, |
| "loss": 1.1664, |
| "step": 142800 |
| }, |
| { |
| "epoch": 46.18616677440207, |
| "grad_norm": 1.5253973007202148, |
| "learning_rate": 0.001, |
| "loss": 1.1621, |
| "step": 142900 |
| }, |
| { |
| "epoch": 46.21848739495798, |
| "grad_norm": 1.3489149808883667, |
| "learning_rate": 0.001, |
| "loss": 1.1926, |
| "step": 143000 |
| }, |
| { |
| "epoch": 46.2508080155139, |
| "grad_norm": 1.8092660903930664, |
| "learning_rate": 0.001, |
| "loss": 1.1874, |
| "step": 143100 |
| }, |
| { |
| "epoch": 46.28312863606981, |
| "grad_norm": 2.419577121734619, |
| "learning_rate": 0.001, |
| "loss": 1.1816, |
| "step": 143200 |
| }, |
| { |
| "epoch": 46.315449256625726, |
| "grad_norm": 1.3183753490447998, |
| "learning_rate": 0.001, |
| "loss": 1.1854, |
| "step": 143300 |
| }, |
| { |
| "epoch": 46.34776987718164, |
| "grad_norm": 1.8555301427841187, |
| "learning_rate": 0.001, |
| "loss": 1.2091, |
| "step": 143400 |
| }, |
| { |
| "epoch": 46.380090497737555, |
| "grad_norm": 1.7718279361724854, |
| "learning_rate": 0.001, |
| "loss": 1.1856, |
| "step": 143500 |
| }, |
| { |
| "epoch": 46.41241111829347, |
| "grad_norm": 1.52428138256073, |
| "learning_rate": 0.001, |
| "loss": 1.1845, |
| "step": 143600 |
| }, |
| { |
| "epoch": 46.444731738849384, |
| "grad_norm": 1.6636964082717896, |
| "learning_rate": 0.001, |
| "loss": 1.1983, |
| "step": 143700 |
| }, |
| { |
| "epoch": 46.4770523594053, |
| "grad_norm": 1.3807815313339233, |
| "learning_rate": 0.001, |
| "loss": 1.2193, |
| "step": 143800 |
| }, |
| { |
| "epoch": 46.50937297996121, |
| "grad_norm": 1.3139708042144775, |
| "learning_rate": 0.001, |
| "loss": 1.205, |
| "step": 143900 |
| }, |
| { |
| "epoch": 46.54169360051713, |
| "grad_norm": 1.2375787496566772, |
| "learning_rate": 0.001, |
| "loss": 1.1898, |
| "step": 144000 |
| }, |
| { |
| "epoch": 46.57401422107304, |
| "grad_norm": 1.7440154552459717, |
| "learning_rate": 0.001, |
| "loss": 1.1944, |
| "step": 144100 |
| }, |
| { |
| "epoch": 46.60633484162896, |
| "grad_norm": 1.7930964231491089, |
| "learning_rate": 0.001, |
| "loss": 1.2046, |
| "step": 144200 |
| }, |
| { |
| "epoch": 46.63865546218487, |
| "grad_norm": 1.7479157447814941, |
| "learning_rate": 0.001, |
| "loss": 1.2002, |
| "step": 144300 |
| }, |
| { |
| "epoch": 46.670976082740786, |
| "grad_norm": 1.5789364576339722, |
| "learning_rate": 0.001, |
| "loss": 1.2219, |
| "step": 144400 |
| }, |
| { |
| "epoch": 46.7032967032967, |
| "grad_norm": 1.5130776166915894, |
| "learning_rate": 0.001, |
| "loss": 1.2434, |
| "step": 144500 |
| }, |
| { |
| "epoch": 46.735617323852615, |
| "grad_norm": 2.0466105937957764, |
| "learning_rate": 0.001, |
| "loss": 1.2327, |
| "step": 144600 |
| }, |
| { |
| "epoch": 46.76793794440853, |
| "grad_norm": 1.5826852321624756, |
| "learning_rate": 0.001, |
| "loss": 1.2467, |
| "step": 144700 |
| }, |
| { |
| "epoch": 46.800258564964444, |
| "grad_norm": 1.5535780191421509, |
| "learning_rate": 0.001, |
| "loss": 1.2317, |
| "step": 144800 |
| }, |
| { |
| "epoch": 46.83257918552036, |
| "grad_norm": 1.7607159614562988, |
| "learning_rate": 0.001, |
| "loss": 1.2328, |
| "step": 144900 |
| }, |
| { |
| "epoch": 46.864899806076274, |
| "grad_norm": 1.4072030782699585, |
| "learning_rate": 0.001, |
| "loss": 1.2437, |
| "step": 145000 |
| }, |
| { |
| "epoch": 46.89722042663219, |
| "grad_norm": 1.9758942127227783, |
| "learning_rate": 0.001, |
| "loss": 1.2378, |
| "step": 145100 |
| }, |
| { |
| "epoch": 46.9295410471881, |
| "grad_norm": 1.6869163513183594, |
| "learning_rate": 0.001, |
| "loss": 1.2454, |
| "step": 145200 |
| }, |
| { |
| "epoch": 46.96186166774402, |
| "grad_norm": 1.5331294536590576, |
| "learning_rate": 0.001, |
| "loss": 1.2447, |
| "step": 145300 |
| }, |
| { |
| "epoch": 46.99418228829994, |
| "grad_norm": 1.357105016708374, |
| "learning_rate": 0.001, |
| "loss": 1.2275, |
| "step": 145400 |
| }, |
| { |
| "epoch": 47.02650290885585, |
| "grad_norm": 1.6140611171722412, |
| "learning_rate": 0.001, |
| "loss": 1.1603, |
| "step": 145500 |
| }, |
| { |
| "epoch": 47.05882352941177, |
| "grad_norm": 2.081415891647339, |
| "learning_rate": 0.001, |
| "loss": 1.1452, |
| "step": 145600 |
| }, |
| { |
| "epoch": 47.09114414996768, |
| "grad_norm": 2.0446853637695312, |
| "learning_rate": 0.001, |
| "loss": 1.1433, |
| "step": 145700 |
| }, |
| { |
| "epoch": 47.1234647705236, |
| "grad_norm": 1.3056678771972656, |
| "learning_rate": 0.001, |
| "loss": 1.1422, |
| "step": 145800 |
| }, |
| { |
| "epoch": 47.15578539107951, |
| "grad_norm": 1.6712660789489746, |
| "learning_rate": 0.001, |
| "loss": 1.1523, |
| "step": 145900 |
| }, |
| { |
| "epoch": 47.188106011635426, |
| "grad_norm": 1.5658693313598633, |
| "learning_rate": 0.001, |
| "loss": 1.1758, |
| "step": 146000 |
| }, |
| { |
| "epoch": 47.22042663219134, |
| "grad_norm": 1.599502682685852, |
| "learning_rate": 0.001, |
| "loss": 1.1551, |
| "step": 146100 |
| }, |
| { |
| "epoch": 47.252747252747255, |
| "grad_norm": 1.624306559562683, |
| "learning_rate": 0.001, |
| "loss": 1.1577, |
| "step": 146200 |
| }, |
| { |
| "epoch": 47.28506787330317, |
| "grad_norm": 1.6202889680862427, |
| "learning_rate": 0.001, |
| "loss": 1.1607, |
| "step": 146300 |
| }, |
| { |
| "epoch": 47.317388493859085, |
| "grad_norm": 1.5435707569122314, |
| "learning_rate": 0.001, |
| "loss": 1.1746, |
| "step": 146400 |
| }, |
| { |
| "epoch": 47.349709114415, |
| "grad_norm": 1.4827113151550293, |
| "learning_rate": 0.001, |
| "loss": 1.1569, |
| "step": 146500 |
| }, |
| { |
| "epoch": 47.382029734970914, |
| "grad_norm": 1.2994977235794067, |
| "learning_rate": 0.001, |
| "loss": 1.1738, |
| "step": 146600 |
| }, |
| { |
| "epoch": 47.41435035552683, |
| "grad_norm": 1.5433175563812256, |
| "learning_rate": 0.001, |
| "loss": 1.1945, |
| "step": 146700 |
| }, |
| { |
| "epoch": 47.44667097608274, |
| "grad_norm": 1.61589777469635, |
| "learning_rate": 0.001, |
| "loss": 1.1804, |
| "step": 146800 |
| }, |
| { |
| "epoch": 47.47899159663866, |
| "grad_norm": 2.19016695022583, |
| "learning_rate": 0.001, |
| "loss": 1.1817, |
| "step": 146900 |
| }, |
| { |
| "epoch": 47.51131221719457, |
| "grad_norm": 1.3823586702346802, |
| "learning_rate": 0.001, |
| "loss": 1.2015, |
| "step": 147000 |
| }, |
| { |
| "epoch": 47.543632837750486, |
| "grad_norm": 1.8267431259155273, |
| "learning_rate": 0.001, |
| "loss": 1.1772, |
| "step": 147100 |
| }, |
| { |
| "epoch": 47.5759534583064, |
| "grad_norm": 1.4151540994644165, |
| "learning_rate": 0.001, |
| "loss": 1.1982, |
| "step": 147200 |
| }, |
| { |
| "epoch": 47.608274078862316, |
| "grad_norm": 1.7475345134735107, |
| "learning_rate": 0.001, |
| "loss": 1.1941, |
| "step": 147300 |
| }, |
| { |
| "epoch": 47.64059469941823, |
| "grad_norm": 1.7757151126861572, |
| "learning_rate": 0.001, |
| "loss": 1.2097, |
| "step": 147400 |
| }, |
| { |
| "epoch": 47.672915319974145, |
| "grad_norm": 1.5026477575302124, |
| "learning_rate": 0.001, |
| "loss": 1.2292, |
| "step": 147500 |
| }, |
| { |
| "epoch": 47.70523594053006, |
| "grad_norm": 1.9475871324539185, |
| "learning_rate": 0.001, |
| "loss": 1.231, |
| "step": 147600 |
| }, |
| { |
| "epoch": 47.737556561085974, |
| "grad_norm": 1.2796332836151123, |
| "learning_rate": 0.001, |
| "loss": 1.2198, |
| "step": 147700 |
| }, |
| { |
| "epoch": 47.76987718164189, |
| "grad_norm": 1.7256336212158203, |
| "learning_rate": 0.001, |
| "loss": 1.2134, |
| "step": 147800 |
| }, |
| { |
| "epoch": 47.8021978021978, |
| "grad_norm": 1.6882165670394897, |
| "learning_rate": 0.001, |
| "loss": 1.2361, |
| "step": 147900 |
| }, |
| { |
| "epoch": 47.83451842275372, |
| "grad_norm": 1.5431451797485352, |
| "learning_rate": 0.001, |
| "loss": 1.2174, |
| "step": 148000 |
| }, |
| { |
| "epoch": 47.86683904330963, |
| "grad_norm": 1.7564618587493896, |
| "learning_rate": 0.001, |
| "loss": 1.2215, |
| "step": 148100 |
| }, |
| { |
| "epoch": 47.89915966386555, |
| "grad_norm": 1.5725071430206299, |
| "learning_rate": 0.001, |
| "loss": 1.2261, |
| "step": 148200 |
| }, |
| { |
| "epoch": 47.93148028442146, |
| "grad_norm": 1.5619136095046997, |
| "learning_rate": 0.001, |
| "loss": 1.2253, |
| "step": 148300 |
| }, |
| { |
| "epoch": 47.963800904977376, |
| "grad_norm": 1.8927035331726074, |
| "learning_rate": 0.001, |
| "loss": 1.2225, |
| "step": 148400 |
| }, |
| { |
| "epoch": 47.99612152553329, |
| "grad_norm": 1.8232885599136353, |
| "learning_rate": 0.001, |
| "loss": 1.2266, |
| "step": 148500 |
| }, |
| { |
| "epoch": 48.028442146089205, |
| "grad_norm": 1.9754383563995361, |
| "learning_rate": 0.001, |
| "loss": 1.1366, |
| "step": 148600 |
| }, |
| { |
| "epoch": 48.06076276664512, |
| "grad_norm": 1.661736249923706, |
| "learning_rate": 0.001, |
| "loss": 1.1406, |
| "step": 148700 |
| }, |
| { |
| "epoch": 48.093083387201034, |
| "grad_norm": 1.6042405366897583, |
| "learning_rate": 0.001, |
| "loss": 1.1258, |
| "step": 148800 |
| }, |
| { |
| "epoch": 48.12540400775695, |
| "grad_norm": 1.820178508758545, |
| "learning_rate": 0.001, |
| "loss": 1.1516, |
| "step": 148900 |
| }, |
| { |
| "epoch": 48.15772462831286, |
| "grad_norm": 1.3801450729370117, |
| "learning_rate": 0.001, |
| "loss": 1.1606, |
| "step": 149000 |
| }, |
| { |
| "epoch": 48.19004524886878, |
| "grad_norm": 1.8100758790969849, |
| "learning_rate": 0.001, |
| "loss": 1.1324, |
| "step": 149100 |
| }, |
| { |
| "epoch": 48.22236586942469, |
| "grad_norm": 1.4243206977844238, |
| "learning_rate": 0.001, |
| "loss": 1.1639, |
| "step": 149200 |
| }, |
| { |
| "epoch": 48.25468648998061, |
| "grad_norm": 1.5422807931900024, |
| "learning_rate": 0.001, |
| "loss": 1.1675, |
| "step": 149300 |
| }, |
| { |
| "epoch": 48.28700711053652, |
| "grad_norm": 1.4906775951385498, |
| "learning_rate": 0.001, |
| "loss": 1.1498, |
| "step": 149400 |
| }, |
| { |
| "epoch": 48.319327731092436, |
| "grad_norm": 1.2655715942382812, |
| "learning_rate": 0.001, |
| "loss": 1.1568, |
| "step": 149500 |
| }, |
| { |
| "epoch": 48.35164835164835, |
| "grad_norm": 2.0237317085266113, |
| "learning_rate": 0.001, |
| "loss": 1.1634, |
| "step": 149600 |
| }, |
| { |
| "epoch": 48.383968972204265, |
| "grad_norm": 1.5441726446151733, |
| "learning_rate": 0.001, |
| "loss": 1.1673, |
| "step": 149700 |
| }, |
| { |
| "epoch": 48.41628959276018, |
| "grad_norm": 1.5430012941360474, |
| "learning_rate": 0.001, |
| "loss": 1.1761, |
| "step": 149800 |
| }, |
| { |
| "epoch": 48.448610213316094, |
| "grad_norm": 1.3982590436935425, |
| "learning_rate": 0.001, |
| "loss": 1.1713, |
| "step": 149900 |
| }, |
| { |
| "epoch": 48.48093083387201, |
| "grad_norm": 2.1957812309265137, |
| "learning_rate": 0.001, |
| "loss": 1.1768, |
| "step": 150000 |
| }, |
| { |
| "epoch": 48.51325145442792, |
| "grad_norm": 1.6471282243728638, |
| "learning_rate": 0.001, |
| "loss": 1.1628, |
| "step": 150100 |
| }, |
| { |
| "epoch": 48.54557207498384, |
| "grad_norm": 1.5349053144454956, |
| "learning_rate": 0.001, |
| "loss": 1.1756, |
| "step": 150200 |
| }, |
| { |
| "epoch": 48.57789269553975, |
| "grad_norm": 1.5751968622207642, |
| "learning_rate": 0.001, |
| "loss": 1.1746, |
| "step": 150300 |
| }, |
| { |
| "epoch": 48.61021331609567, |
| "grad_norm": 1.6446707248687744, |
| "learning_rate": 0.001, |
| "loss": 1.1901, |
| "step": 150400 |
| }, |
| { |
| "epoch": 48.64253393665158, |
| "grad_norm": 1.5992244482040405, |
| "learning_rate": 0.001, |
| "loss": 1.2006, |
| "step": 150500 |
| }, |
| { |
| "epoch": 48.674854557207496, |
| "grad_norm": 1.4150946140289307, |
| "learning_rate": 0.001, |
| "loss": 1.1895, |
| "step": 150600 |
| }, |
| { |
| "epoch": 48.70717517776341, |
| "grad_norm": 2.0123841762542725, |
| "learning_rate": 0.001, |
| "loss": 1.2026, |
| "step": 150700 |
| }, |
| { |
| "epoch": 48.739495798319325, |
| "grad_norm": 1.5513368844985962, |
| "learning_rate": 0.001, |
| "loss": 1.2164, |
| "step": 150800 |
| }, |
| { |
| "epoch": 48.77181641887524, |
| "grad_norm": 1.7306681871414185, |
| "learning_rate": 0.001, |
| "loss": 1.2243, |
| "step": 150900 |
| }, |
| { |
| "epoch": 48.804137039431154, |
| "grad_norm": 1.7530173063278198, |
| "learning_rate": 0.001, |
| "loss": 1.1997, |
| "step": 151000 |
| }, |
| { |
| "epoch": 48.83645765998707, |
| "grad_norm": 1.6544743776321411, |
| "learning_rate": 0.001, |
| "loss": 1.2087, |
| "step": 151100 |
| }, |
| { |
| "epoch": 48.86877828054298, |
| "grad_norm": 1.404657006263733, |
| "learning_rate": 0.001, |
| "loss": 1.2177, |
| "step": 151200 |
| }, |
| { |
| "epoch": 48.9010989010989, |
| "grad_norm": 1.3040798902511597, |
| "learning_rate": 0.001, |
| "loss": 1.2182, |
| "step": 151300 |
| }, |
| { |
| "epoch": 48.93341952165481, |
| "grad_norm": 1.7273650169372559, |
| "learning_rate": 0.001, |
| "loss": 1.2262, |
| "step": 151400 |
| }, |
| { |
| "epoch": 48.96574014221073, |
| "grad_norm": 1.721778154373169, |
| "learning_rate": 0.001, |
| "loss": 1.2277, |
| "step": 151500 |
| }, |
| { |
| "epoch": 48.99806076276664, |
| "grad_norm": 2.63419246673584, |
| "learning_rate": 0.001, |
| "loss": 1.1779, |
| "step": 151600 |
| }, |
| { |
| "epoch": 49.03038138332256, |
| "grad_norm": 1.5806034803390503, |
| "learning_rate": 0.001, |
| "loss": 1.1105, |
| "step": 151700 |
| }, |
| { |
| "epoch": 49.06270200387848, |
| "grad_norm": 1.3670389652252197, |
| "learning_rate": 0.001, |
| "loss": 1.123, |
| "step": 151800 |
| }, |
| { |
| "epoch": 49.09502262443439, |
| "grad_norm": 2.055310010910034, |
| "learning_rate": 0.001, |
| "loss": 1.1219, |
| "step": 151900 |
| }, |
| { |
| "epoch": 49.12734324499031, |
| "grad_norm": 1.7734863758087158, |
| "learning_rate": 0.001, |
| "loss": 1.1289, |
| "step": 152000 |
| }, |
| { |
| "epoch": 49.15966386554622, |
| "grad_norm": 1.5310850143432617, |
| "learning_rate": 0.001, |
| "loss": 1.1494, |
| "step": 152100 |
| }, |
| { |
| "epoch": 49.191984486102136, |
| "grad_norm": 1.7340278625488281, |
| "learning_rate": 0.001, |
| "loss": 1.1401, |
| "step": 152200 |
| }, |
| { |
| "epoch": 49.22430510665805, |
| "grad_norm": 1.797639012336731, |
| "learning_rate": 0.001, |
| "loss": 1.1599, |
| "step": 152300 |
| }, |
| { |
| "epoch": 49.256625727213965, |
| "grad_norm": 1.9868476390838623, |
| "learning_rate": 0.001, |
| "loss": 1.1398, |
| "step": 152400 |
| }, |
| { |
| "epoch": 49.28894634776988, |
| "grad_norm": 1.5633907318115234, |
| "learning_rate": 0.001, |
| "loss": 1.1491, |
| "step": 152500 |
| }, |
| { |
| "epoch": 49.321266968325794, |
| "grad_norm": 1.5145635604858398, |
| "learning_rate": 0.001, |
| "loss": 1.1577, |
| "step": 152600 |
| }, |
| { |
| "epoch": 49.35358758888171, |
| "grad_norm": 1.5028250217437744, |
| "learning_rate": 0.001, |
| "loss": 1.1317, |
| "step": 152700 |
| }, |
| { |
| "epoch": 49.38590820943762, |
| "grad_norm": 1.316331148147583, |
| "learning_rate": 0.001, |
| "loss": 1.1653, |
| "step": 152800 |
| }, |
| { |
| "epoch": 49.41822882999354, |
| "grad_norm": 1.6597394943237305, |
| "learning_rate": 0.001, |
| "loss": 1.171, |
| "step": 152900 |
| }, |
| { |
| "epoch": 49.45054945054945, |
| "grad_norm": 1.6509952545166016, |
| "learning_rate": 0.001, |
| "loss": 1.1547, |
| "step": 153000 |
| }, |
| { |
| "epoch": 49.48287007110537, |
| "grad_norm": 1.3771039247512817, |
| "learning_rate": 0.001, |
| "loss": 1.1643, |
| "step": 153100 |
| }, |
| { |
| "epoch": 49.51519069166128, |
| "grad_norm": 1.25186026096344, |
| "learning_rate": 0.001, |
| "loss": 1.1755, |
| "step": 153200 |
| }, |
| { |
| "epoch": 49.547511312217196, |
| "grad_norm": 1.5495226383209229, |
| "learning_rate": 0.001, |
| "loss": 1.176, |
| "step": 153300 |
| }, |
| { |
| "epoch": 49.57983193277311, |
| "grad_norm": 1.9336113929748535, |
| "learning_rate": 0.001, |
| "loss": 1.168, |
| "step": 153400 |
| }, |
| { |
| "epoch": 49.612152553329025, |
| "grad_norm": 1.4076240062713623, |
| "learning_rate": 0.001, |
| "loss": 1.1858, |
| "step": 153500 |
| }, |
| { |
| "epoch": 49.64447317388494, |
| "grad_norm": 1.9218026399612427, |
| "learning_rate": 0.001, |
| "loss": 1.185, |
| "step": 153600 |
| }, |
| { |
| "epoch": 49.676793794440854, |
| "grad_norm": 1.3178162574768066, |
| "learning_rate": 0.001, |
| "loss": 1.1826, |
| "step": 153700 |
| }, |
| { |
| "epoch": 49.70911441499677, |
| "grad_norm": 1.7671688795089722, |
| "learning_rate": 0.001, |
| "loss": 1.2037, |
| "step": 153800 |
| }, |
| { |
| "epoch": 49.74143503555268, |
| "grad_norm": 1.5962954759597778, |
| "learning_rate": 0.001, |
| "loss": 1.1803, |
| "step": 153900 |
| }, |
| { |
| "epoch": 49.7737556561086, |
| "grad_norm": 1.9002434015274048, |
| "learning_rate": 0.001, |
| "loss": 1.1916, |
| "step": 154000 |
| }, |
| { |
| "epoch": 49.80607627666451, |
| "grad_norm": 1.387270450592041, |
| "learning_rate": 0.001, |
| "loss": 1.1982, |
| "step": 154100 |
| }, |
| { |
| "epoch": 49.83839689722043, |
| "grad_norm": 1.8704843521118164, |
| "learning_rate": 0.001, |
| "loss": 1.1858, |
| "step": 154200 |
| }, |
| { |
| "epoch": 49.87071751777634, |
| "grad_norm": 1.5286774635314941, |
| "learning_rate": 0.001, |
| "loss": 1.19, |
| "step": 154300 |
| }, |
| { |
| "epoch": 49.903038138332256, |
| "grad_norm": 1.6860578060150146, |
| "learning_rate": 0.001, |
| "loss": 1.2088, |
| "step": 154400 |
| }, |
| { |
| "epoch": 49.93535875888817, |
| "grad_norm": 1.7764240503311157, |
| "learning_rate": 0.001, |
| "loss": 1.214, |
| "step": 154500 |
| }, |
| { |
| "epoch": 49.967679379444085, |
| "grad_norm": 1.661325454711914, |
| "learning_rate": 0.001, |
| "loss": 1.2037, |
| "step": 154600 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 1.262018084526062, |
| "learning_rate": 0.001, |
| "loss": 1.1552, |
| "step": 154700 |
| }, |
| { |
| "epoch": 50.032320620555915, |
| "grad_norm": 0.9459092617034912, |
| "learning_rate": 0.001, |
| "loss": 1.1055, |
| "step": 154800 |
| }, |
| { |
| "epoch": 50.06464124111183, |
| "grad_norm": 1.3187966346740723, |
| "learning_rate": 0.001, |
| "loss": 1.1186, |
| "step": 154900 |
| }, |
| { |
| "epoch": 50.096961861667744, |
| "grad_norm": 0.8036020398139954, |
| "learning_rate": 0.001, |
| "loss": 1.1067, |
| "step": 155000 |
| }, |
| { |
| "epoch": 50.12928248222366, |
| "grad_norm": 0.9802110195159912, |
| "learning_rate": 0.001, |
| "loss": 1.1072, |
| "step": 155100 |
| }, |
| { |
| "epoch": 50.16160310277957, |
| "grad_norm": 0.7400931715965271, |
| "learning_rate": 0.001, |
| "loss": 1.1048, |
| "step": 155200 |
| }, |
| { |
| "epoch": 50.19392372333549, |
| "grad_norm": 0.8337299823760986, |
| "learning_rate": 0.001, |
| "loss": 1.1185, |
| "step": 155300 |
| }, |
| { |
| "epoch": 50.2262443438914, |
| "grad_norm": 0.857559859752655, |
| "learning_rate": 0.001, |
| "loss": 1.1487, |
| "step": 155400 |
| }, |
| { |
| "epoch": 50.25856496444732, |
| "grad_norm": 0.9556663632392883, |
| "learning_rate": 0.001, |
| "loss": 1.1425, |
| "step": 155500 |
| }, |
| { |
| "epoch": 50.29088558500323, |
| "grad_norm": 1.762825846672058, |
| "learning_rate": 0.001, |
| "loss": 1.1336, |
| "step": 155600 |
| }, |
| { |
| "epoch": 50.323206205559146, |
| "grad_norm": 0.46932679414749146, |
| "learning_rate": 0.001, |
| "loss": 1.1445, |
| "step": 155700 |
| }, |
| { |
| "epoch": 50.35552682611506, |
| "grad_norm": 0.46966707706451416, |
| "learning_rate": 0.001, |
| "loss": 1.1263, |
| "step": 155800 |
| }, |
| { |
| "epoch": 50.387847446670975, |
| "grad_norm": 0.8840547204017639, |
| "learning_rate": 0.001, |
| "loss": 1.1478, |
| "step": 155900 |
| }, |
| { |
| "epoch": 50.42016806722689, |
| "grad_norm": 0.2904863655567169, |
| "learning_rate": 0.001, |
| "loss": 1.1441, |
| "step": 156000 |
| }, |
| { |
| "epoch": 50.452488687782804, |
| "grad_norm": 0.5406709909439087, |
| "learning_rate": 0.001, |
| "loss": 1.1493, |
| "step": 156100 |
| }, |
| { |
| "epoch": 50.48480930833872, |
| "grad_norm": 0.8404807448387146, |
| "learning_rate": 0.001, |
| "loss": 1.1593, |
| "step": 156200 |
| }, |
| { |
| "epoch": 50.51712992889463, |
| "grad_norm": 0.6578356623649597, |
| "learning_rate": 0.001, |
| "loss": 1.1599, |
| "step": 156300 |
| }, |
| { |
| "epoch": 50.54945054945055, |
| "grad_norm": 1.5182676315307617, |
| "learning_rate": 0.001, |
| "loss": 1.1606, |
| "step": 156400 |
| }, |
| { |
| "epoch": 50.58177117000646, |
| "grad_norm": 0.5784684419631958, |
| "learning_rate": 0.001, |
| "loss": 1.1615, |
| "step": 156500 |
| }, |
| { |
| "epoch": 50.61409179056238, |
| "grad_norm": 0.5010991096496582, |
| "learning_rate": 0.001, |
| "loss": 1.182, |
| "step": 156600 |
| }, |
| { |
| "epoch": 50.64641241111829, |
| "grad_norm": 1.0248159170150757, |
| "learning_rate": 0.001, |
| "loss": 1.157, |
| "step": 156700 |
| }, |
| { |
| "epoch": 50.678733031674206, |
| "grad_norm": 0.4912484884262085, |
| "learning_rate": 0.001, |
| "loss": 1.1554, |
| "step": 156800 |
| }, |
| { |
| "epoch": 50.71105365223012, |
| "grad_norm": 1.007466197013855, |
| "learning_rate": 0.001, |
| "loss": 1.2062, |
| "step": 156900 |
| }, |
| { |
| "epoch": 50.743374272786035, |
| "grad_norm": 0.6946946978569031, |
| "learning_rate": 0.001, |
| "loss": 1.1865, |
| "step": 157000 |
| }, |
| { |
| "epoch": 50.77569489334195, |
| "grad_norm": 1.3229173421859741, |
| "learning_rate": 0.001, |
| "loss": 1.2058, |
| "step": 157100 |
| }, |
| { |
| "epoch": 50.808015513897864, |
| "grad_norm": 0.8402013778686523, |
| "learning_rate": 0.001, |
| "loss": 1.1784, |
| "step": 157200 |
| }, |
| { |
| "epoch": 50.84033613445378, |
| "grad_norm": 0.7068992257118225, |
| "learning_rate": 0.001, |
| "loss": 1.1889, |
| "step": 157300 |
| }, |
| { |
| "epoch": 50.87265675500969, |
| "grad_norm": 1.3328818082809448, |
| "learning_rate": 0.001, |
| "loss": 1.1871, |
| "step": 157400 |
| }, |
| { |
| "epoch": 50.90497737556561, |
| "grad_norm": 0.7360666990280151, |
| "learning_rate": 0.001, |
| "loss": 1.1767, |
| "step": 157500 |
| }, |
| { |
| "epoch": 50.93729799612152, |
| "grad_norm": 0.4994688928127289, |
| "learning_rate": 0.001, |
| "loss": 1.1876, |
| "step": 157600 |
| }, |
| { |
| "epoch": 50.96961861667744, |
| "grad_norm": 0.7623894214630127, |
| "learning_rate": 0.001, |
| "loss": 1.2043, |
| "step": 157700 |
| }, |
| { |
| "epoch": 51.00193923723336, |
| "grad_norm": 1.4419955015182495, |
| "learning_rate": 0.001, |
| "loss": 1.2096, |
| "step": 157800 |
| }, |
| { |
| "epoch": 51.03425985778927, |
| "grad_norm": 1.7683582305908203, |
| "learning_rate": 0.001, |
| "loss": 1.084, |
| "step": 157900 |
| }, |
| { |
| "epoch": 51.06658047834519, |
| "grad_norm": 1.7946703433990479, |
| "learning_rate": 0.001, |
| "loss": 1.09, |
| "step": 158000 |
| }, |
| { |
| "epoch": 51.0989010989011, |
| "grad_norm": 1.7804067134857178, |
| "learning_rate": 0.001, |
| "loss": 1.1085, |
| "step": 158100 |
| }, |
| { |
| "epoch": 51.13122171945702, |
| "grad_norm": 2.1206374168395996, |
| "learning_rate": 0.001, |
| "loss": 1.0934, |
| "step": 158200 |
| }, |
| { |
| "epoch": 51.16354234001293, |
| "grad_norm": 1.6893900632858276, |
| "learning_rate": 0.001, |
| "loss": 1.1113, |
| "step": 158300 |
| }, |
| { |
| "epoch": 51.195862960568846, |
| "grad_norm": 1.67825186252594, |
| "learning_rate": 0.001, |
| "loss": 1.1348, |
| "step": 158400 |
| }, |
| { |
| "epoch": 51.22818358112476, |
| "grad_norm": 1.5735834836959839, |
| "learning_rate": 0.001, |
| "loss": 1.1227, |
| "step": 158500 |
| }, |
| { |
| "epoch": 51.260504201680675, |
| "grad_norm": 1.5846762657165527, |
| "learning_rate": 0.001, |
| "loss": 1.1272, |
| "step": 158600 |
| }, |
| { |
| "epoch": 51.29282482223659, |
| "grad_norm": 1.7856253385543823, |
| "learning_rate": 0.001, |
| "loss": 1.1188, |
| "step": 158700 |
| }, |
| { |
| "epoch": 51.325145442792504, |
| "grad_norm": 1.580972671508789, |
| "learning_rate": 0.001, |
| "loss": 1.1211, |
| "step": 158800 |
| }, |
| { |
| "epoch": 51.35746606334842, |
| "grad_norm": 1.6010007858276367, |
| "learning_rate": 0.001, |
| "loss": 1.1172, |
| "step": 158900 |
| }, |
| { |
| "epoch": 51.38978668390433, |
| "grad_norm": 1.87574303150177, |
| "learning_rate": 0.001, |
| "loss": 1.1303, |
| "step": 159000 |
| }, |
| { |
| "epoch": 51.42210730446025, |
| "grad_norm": 1.657053828239441, |
| "learning_rate": 0.001, |
| "loss": 1.1479, |
| "step": 159100 |
| }, |
| { |
| "epoch": 51.45442792501616, |
| "grad_norm": 1.626969814300537, |
| "learning_rate": 0.001, |
| "loss": 1.1429, |
| "step": 159200 |
| }, |
| { |
| "epoch": 51.48674854557208, |
| "grad_norm": 1.7134428024291992, |
| "learning_rate": 0.001, |
| "loss": 1.1462, |
| "step": 159300 |
| }, |
| { |
| "epoch": 51.51906916612799, |
| "grad_norm": 1.6238813400268555, |
| "learning_rate": 0.001, |
| "loss": 1.1381, |
| "step": 159400 |
| }, |
| { |
| "epoch": 51.551389786683906, |
| "grad_norm": 1.773478388786316, |
| "learning_rate": 0.001, |
| "loss": 1.1606, |
| "step": 159500 |
| }, |
| { |
| "epoch": 51.58371040723982, |
| "grad_norm": 1.8276687860488892, |
| "learning_rate": 0.001, |
| "loss": 1.1531, |
| "step": 159600 |
| }, |
| { |
| "epoch": 51.616031027795735, |
| "grad_norm": 1.6656960248947144, |
| "learning_rate": 0.001, |
| "loss": 1.167, |
| "step": 159700 |
| }, |
| { |
| "epoch": 51.64835164835165, |
| "grad_norm": 1.8340784311294556, |
| "learning_rate": 0.001, |
| "loss": 1.1476, |
| "step": 159800 |
| }, |
| { |
| "epoch": 51.680672268907564, |
| "grad_norm": 2.128314256668091, |
| "learning_rate": 0.001, |
| "loss": 1.1672, |
| "step": 159900 |
| }, |
| { |
| "epoch": 51.71299288946348, |
| "grad_norm": 1.9760433435440063, |
| "learning_rate": 0.001, |
| "loss": 1.1584, |
| "step": 160000 |
| }, |
| { |
| "epoch": 51.74531351001939, |
| "grad_norm": 2.1876275539398193, |
| "learning_rate": 0.001, |
| "loss": 1.1566, |
| "step": 160100 |
| }, |
| { |
| "epoch": 51.77763413057531, |
| "grad_norm": 1.6389830112457275, |
| "learning_rate": 0.001, |
| "loss": 1.1819, |
| "step": 160200 |
| }, |
| { |
| "epoch": 51.80995475113122, |
| "grad_norm": 1.499732255935669, |
| "learning_rate": 0.001, |
| "loss": 1.1732, |
| "step": 160300 |
| }, |
| { |
| "epoch": 51.84227537168714, |
| "grad_norm": 4.384315013885498, |
| "learning_rate": 0.001, |
| "loss": 1.1634, |
| "step": 160400 |
| }, |
| { |
| "epoch": 51.87459599224305, |
| "grad_norm": 1.833261489868164, |
| "learning_rate": 0.001, |
| "loss": 1.1801, |
| "step": 160500 |
| }, |
| { |
| "epoch": 51.906916612798966, |
| "grad_norm": 2.3746957778930664, |
| "learning_rate": 0.001, |
| "loss": 1.1738, |
| "step": 160600 |
| }, |
| { |
| "epoch": 51.93923723335488, |
| "grad_norm": 2.942558526992798, |
| "learning_rate": 0.001, |
| "loss": 1.1719, |
| "step": 160700 |
| }, |
| { |
| "epoch": 51.971557853910795, |
| "grad_norm": 1.7185624837875366, |
| "learning_rate": 0.001, |
| "loss": 1.2128, |
| "step": 160800 |
| }, |
| { |
| "epoch": 52.00387847446671, |
| "grad_norm": 1.84402334690094, |
| "learning_rate": 0.001, |
| "loss": 1.2243, |
| "step": 160900 |
| }, |
| { |
| "epoch": 52.036199095022624, |
| "grad_norm": 1.704215407371521, |
| "learning_rate": 0.001, |
| "loss": 1.0773, |
| "step": 161000 |
| }, |
| { |
| "epoch": 52.06851971557854, |
| "grad_norm": 1.5075286626815796, |
| "learning_rate": 0.001, |
| "loss": 1.0701, |
| "step": 161100 |
| }, |
| { |
| "epoch": 52.10084033613445, |
| "grad_norm": 1.6493642330169678, |
| "learning_rate": 0.001, |
| "loss": 1.1066, |
| "step": 161200 |
| }, |
| { |
| "epoch": 52.13316095669037, |
| "grad_norm": 1.6777901649475098, |
| "learning_rate": 0.001, |
| "loss": 1.1036, |
| "step": 161300 |
| }, |
| { |
| "epoch": 52.16548157724628, |
| "grad_norm": 1.7006093263626099, |
| "learning_rate": 0.001, |
| "loss": 1.092, |
| "step": 161400 |
| }, |
| { |
| "epoch": 52.1978021978022, |
| "grad_norm": 2.084979295730591, |
| "learning_rate": 0.001, |
| "loss": 1.1008, |
| "step": 161500 |
| }, |
| { |
| "epoch": 52.23012281835811, |
| "grad_norm": 1.5530115365982056, |
| "learning_rate": 0.001, |
| "loss": 1.1272, |
| "step": 161600 |
| }, |
| { |
| "epoch": 52.262443438914026, |
| "grad_norm": 1.7012377977371216, |
| "learning_rate": 0.001, |
| "loss": 1.1185, |
| "step": 161700 |
| }, |
| { |
| "epoch": 52.29476405946994, |
| "grad_norm": 1.9061259031295776, |
| "learning_rate": 0.001, |
| "loss": 1.1186, |
| "step": 161800 |
| }, |
| { |
| "epoch": 52.327084680025855, |
| "grad_norm": 1.660539984703064, |
| "learning_rate": 0.001, |
| "loss": 1.1255, |
| "step": 161900 |
| }, |
| { |
| "epoch": 52.35940530058177, |
| "grad_norm": 1.4890267848968506, |
| "learning_rate": 0.001, |
| "loss": 1.1091, |
| "step": 162000 |
| }, |
| { |
| "epoch": 52.391725921137684, |
| "grad_norm": 1.545951008796692, |
| "learning_rate": 0.001, |
| "loss": 1.1293, |
| "step": 162100 |
| }, |
| { |
| "epoch": 52.4240465416936, |
| "grad_norm": 1.6420233249664307, |
| "learning_rate": 0.001, |
| "loss": 1.1157, |
| "step": 162200 |
| }, |
| { |
| "epoch": 52.456367162249514, |
| "grad_norm": 1.496561050415039, |
| "learning_rate": 0.001, |
| "loss": 1.1222, |
| "step": 162300 |
| }, |
| { |
| "epoch": 52.48868778280543, |
| "grad_norm": 1.982618808746338, |
| "learning_rate": 0.001, |
| "loss": 1.1287, |
| "step": 162400 |
| }, |
| { |
| "epoch": 52.52100840336134, |
| "grad_norm": 1.955478549003601, |
| "learning_rate": 0.001, |
| "loss": 1.131, |
| "step": 162500 |
| }, |
| { |
| "epoch": 52.55332902391726, |
| "grad_norm": 1.3720135688781738, |
| "learning_rate": 0.001, |
| "loss": 1.1394, |
| "step": 162600 |
| }, |
| { |
| "epoch": 52.58564964447317, |
| "grad_norm": 1.5025126934051514, |
| "learning_rate": 0.001, |
| "loss": 1.1357, |
| "step": 162700 |
| }, |
| { |
| "epoch": 52.617970265029086, |
| "grad_norm": 1.3566020727157593, |
| "learning_rate": 0.001, |
| "loss": 1.1557, |
| "step": 162800 |
| }, |
| { |
| "epoch": 52.650290885585, |
| "grad_norm": 1.7265077829360962, |
| "learning_rate": 0.001, |
| "loss": 1.1595, |
| "step": 162900 |
| }, |
| { |
| "epoch": 52.682611506140915, |
| "grad_norm": 2.398094654083252, |
| "learning_rate": 0.001, |
| "loss": 1.1625, |
| "step": 163000 |
| }, |
| { |
| "epoch": 52.71493212669683, |
| "grad_norm": 1.7664134502410889, |
| "learning_rate": 0.001, |
| "loss": 1.1529, |
| "step": 163100 |
| }, |
| { |
| "epoch": 52.747252747252745, |
| "grad_norm": 1.6189780235290527, |
| "learning_rate": 0.001, |
| "loss": 1.1595, |
| "step": 163200 |
| }, |
| { |
| "epoch": 52.77957336780866, |
| "grad_norm": 1.4175572395324707, |
| "learning_rate": 0.001, |
| "loss": 1.1622, |
| "step": 163300 |
| }, |
| { |
| "epoch": 52.811893988364574, |
| "grad_norm": 1.6346251964569092, |
| "learning_rate": 0.001, |
| "loss": 1.1434, |
| "step": 163400 |
| }, |
| { |
| "epoch": 52.84421460892049, |
| "grad_norm": 1.599571704864502, |
| "learning_rate": 0.001, |
| "loss": 1.1716, |
| "step": 163500 |
| }, |
| { |
| "epoch": 52.8765352294764, |
| "grad_norm": 1.4147549867630005, |
| "learning_rate": 0.001, |
| "loss": 1.1765, |
| "step": 163600 |
| }, |
| { |
| "epoch": 52.90885585003232, |
| "grad_norm": 2.264082431793213, |
| "learning_rate": 0.001, |
| "loss": 1.1803, |
| "step": 163700 |
| }, |
| { |
| "epoch": 52.94117647058823, |
| "grad_norm": 1.427462100982666, |
| "learning_rate": 0.001, |
| "loss": 1.1808, |
| "step": 163800 |
| }, |
| { |
| "epoch": 52.97349709114415, |
| "grad_norm": 1.7261021137237549, |
| "learning_rate": 0.001, |
| "loss": 1.1801, |
| "step": 163900 |
| }, |
| { |
| "epoch": 53.00581771170007, |
| "grad_norm": 1.4574629068374634, |
| "learning_rate": 0.001, |
| "loss": 1.1513, |
| "step": 164000 |
| }, |
| { |
| "epoch": 53.03813833225598, |
| "grad_norm": 1.341528058052063, |
| "learning_rate": 0.001, |
| "loss": 1.0847, |
| "step": 164100 |
| }, |
| { |
| "epoch": 53.0704589528119, |
| "grad_norm": 1.2390453815460205, |
| "learning_rate": 0.001, |
| "loss": 1.0535, |
| "step": 164200 |
| }, |
| { |
| "epoch": 53.10277957336781, |
| "grad_norm": 1.554686427116394, |
| "learning_rate": 0.001, |
| "loss": 1.0839, |
| "step": 164300 |
| }, |
| { |
| "epoch": 53.135100193923726, |
| "grad_norm": 1.6500283479690552, |
| "learning_rate": 0.001, |
| "loss": 1.0895, |
| "step": 164400 |
| }, |
| { |
| "epoch": 53.16742081447964, |
| "grad_norm": 1.352728247642517, |
| "learning_rate": 0.001, |
| "loss": 1.1027, |
| "step": 164500 |
| }, |
| { |
| "epoch": 53.199741435035556, |
| "grad_norm": 1.5491443872451782, |
| "learning_rate": 0.001, |
| "loss": 1.0904, |
| "step": 164600 |
| }, |
| { |
| "epoch": 53.23206205559147, |
| "grad_norm": 1.5412120819091797, |
| "learning_rate": 0.001, |
| "loss": 1.096, |
| "step": 164700 |
| }, |
| { |
| "epoch": 53.264382676147385, |
| "grad_norm": 1.3460454940795898, |
| "learning_rate": 0.001, |
| "loss": 1.1034, |
| "step": 164800 |
| }, |
| { |
| "epoch": 53.2967032967033, |
| "grad_norm": 1.4456019401550293, |
| "learning_rate": 0.001, |
| "loss": 1.1198, |
| "step": 164900 |
| }, |
| { |
| "epoch": 53.329023917259214, |
| "grad_norm": 1.5322580337524414, |
| "learning_rate": 0.001, |
| "loss": 1.13, |
| "step": 165000 |
| }, |
| { |
| "epoch": 53.36134453781513, |
| "grad_norm": 1.5252654552459717, |
| "learning_rate": 0.001, |
| "loss": 1.1037, |
| "step": 165100 |
| }, |
| { |
| "epoch": 53.39366515837104, |
| "grad_norm": 1.5594348907470703, |
| "learning_rate": 0.001, |
| "loss": 1.1234, |
| "step": 165200 |
| }, |
| { |
| "epoch": 53.42598577892696, |
| "grad_norm": 1.6623215675354004, |
| "learning_rate": 0.001, |
| "loss": 1.1315, |
| "step": 165300 |
| }, |
| { |
| "epoch": 53.45830639948287, |
| "grad_norm": 1.8282194137573242, |
| "learning_rate": 0.001, |
| "loss": 1.1151, |
| "step": 165400 |
| }, |
| { |
| "epoch": 53.49062702003879, |
| "grad_norm": 1.407645344734192, |
| "learning_rate": 0.001, |
| "loss": 1.12, |
| "step": 165500 |
| }, |
| { |
| "epoch": 53.5229476405947, |
| "grad_norm": 1.8748985528945923, |
| "learning_rate": 0.001, |
| "loss": 1.1289, |
| "step": 165600 |
| }, |
| { |
| "epoch": 53.555268261150616, |
| "grad_norm": 1.391897201538086, |
| "learning_rate": 0.001, |
| "loss": 1.1274, |
| "step": 165700 |
| }, |
| { |
| "epoch": 53.58758888170653, |
| "grad_norm": 1.3943778276443481, |
| "learning_rate": 0.001, |
| "loss": 1.1019, |
| "step": 165800 |
| }, |
| { |
| "epoch": 53.619909502262445, |
| "grad_norm": 1.5813536643981934, |
| "learning_rate": 0.001, |
| "loss": 1.134, |
| "step": 165900 |
| }, |
| { |
| "epoch": 53.65223012281836, |
| "grad_norm": 2.096243143081665, |
| "learning_rate": 0.001, |
| "loss": 1.1326, |
| "step": 166000 |
| }, |
| { |
| "epoch": 53.684550743374274, |
| "grad_norm": 1.64535653591156, |
| "learning_rate": 0.001, |
| "loss": 1.1487, |
| "step": 166100 |
| }, |
| { |
| "epoch": 53.71687136393019, |
| "grad_norm": 1.6284700632095337, |
| "learning_rate": 0.001, |
| "loss": 1.1465, |
| "step": 166200 |
| }, |
| { |
| "epoch": 53.7491919844861, |
| "grad_norm": 1.598093867301941, |
| "learning_rate": 0.001, |
| "loss": 1.1414, |
| "step": 166300 |
| }, |
| { |
| "epoch": 53.78151260504202, |
| "grad_norm": 1.3568588495254517, |
| "learning_rate": 0.001, |
| "loss": 1.1602, |
| "step": 166400 |
| }, |
| { |
| "epoch": 53.81383322559793, |
| "grad_norm": 1.6616982221603394, |
| "learning_rate": 0.001, |
| "loss": 1.1318, |
| "step": 166500 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 1.5817769765853882, |
| "learning_rate": 0.001, |
| "loss": 1.1405, |
| "step": 166600 |
| }, |
| { |
| "epoch": 53.87847446670976, |
| "grad_norm": 1.7250040769577026, |
| "learning_rate": 0.001, |
| "loss": 1.1601, |
| "step": 166700 |
| }, |
| { |
| "epoch": 53.910795087265676, |
| "grad_norm": 1.6254827976226807, |
| "learning_rate": 0.001, |
| "loss": 1.1653, |
| "step": 166800 |
| }, |
| { |
| "epoch": 53.94311570782159, |
| "grad_norm": 1.3446640968322754, |
| "learning_rate": 0.001, |
| "loss": 1.1598, |
| "step": 166900 |
| }, |
| { |
| "epoch": 53.975436328377505, |
| "grad_norm": 1.6438404321670532, |
| "learning_rate": 0.001, |
| "loss": 1.1655, |
| "step": 167000 |
| }, |
| { |
| "epoch": 54.00775694893342, |
| "grad_norm": 1.505845069885254, |
| "learning_rate": 0.001, |
| "loss": 1.1783, |
| "step": 167100 |
| }, |
| { |
| "epoch": 54.040077569489334, |
| "grad_norm": 1.787855863571167, |
| "learning_rate": 0.001, |
| "loss": 1.062, |
| "step": 167200 |
| }, |
| { |
| "epoch": 54.07239819004525, |
| "grad_norm": 1.8118529319763184, |
| "learning_rate": 0.001, |
| "loss": 1.0605, |
| "step": 167300 |
| }, |
| { |
| "epoch": 54.10471881060116, |
| "grad_norm": 2.0226893424987793, |
| "learning_rate": 0.001, |
| "loss": 1.0748, |
| "step": 167400 |
| }, |
| { |
| "epoch": 54.13703943115708, |
| "grad_norm": 1.669325590133667, |
| "learning_rate": 0.001, |
| "loss": 1.063, |
| "step": 167500 |
| }, |
| { |
| "epoch": 54.16936005171299, |
| "grad_norm": 1.4700037240982056, |
| "learning_rate": 0.001, |
| "loss": 1.073, |
| "step": 167600 |
| }, |
| { |
| "epoch": 54.20168067226891, |
| "grad_norm": 1.7410122156143188, |
| "learning_rate": 0.001, |
| "loss": 1.0722, |
| "step": 167700 |
| }, |
| { |
| "epoch": 54.23400129282482, |
| "grad_norm": 1.518440842628479, |
| "learning_rate": 0.001, |
| "loss": 1.0926, |
| "step": 167800 |
| }, |
| { |
| "epoch": 54.266321913380736, |
| "grad_norm": 1.6282093524932861, |
| "learning_rate": 0.001, |
| "loss": 1.0978, |
| "step": 167900 |
| }, |
| { |
| "epoch": 54.29864253393665, |
| "grad_norm": 1.4921144247055054, |
| "learning_rate": 0.001, |
| "loss": 1.0918, |
| "step": 168000 |
| }, |
| { |
| "epoch": 54.330963154492565, |
| "grad_norm": 1.4011250734329224, |
| "learning_rate": 0.001, |
| "loss": 1.102, |
| "step": 168100 |
| }, |
| { |
| "epoch": 54.36328377504848, |
| "grad_norm": 1.6125963926315308, |
| "learning_rate": 0.001, |
| "loss": 1.0986, |
| "step": 168200 |
| }, |
| { |
| "epoch": 54.395604395604394, |
| "grad_norm": 2.5859177112579346, |
| "learning_rate": 0.001, |
| "loss": 1.1138, |
| "step": 168300 |
| }, |
| { |
| "epoch": 54.42792501616031, |
| "grad_norm": 1.9405006170272827, |
| "learning_rate": 0.001, |
| "loss": 1.1107, |
| "step": 168400 |
| }, |
| { |
| "epoch": 54.46024563671622, |
| "grad_norm": 1.813139796257019, |
| "learning_rate": 0.001, |
| "loss": 1.102, |
| "step": 168500 |
| }, |
| { |
| "epoch": 54.49256625727214, |
| "grad_norm": 1.5077065229415894, |
| "learning_rate": 0.001, |
| "loss": 1.1122, |
| "step": 168600 |
| }, |
| { |
| "epoch": 54.52488687782805, |
| "grad_norm": 1.3443224430084229, |
| "learning_rate": 0.001, |
| "loss": 1.1176, |
| "step": 168700 |
| }, |
| { |
| "epoch": 54.55720749838397, |
| "grad_norm": 1.6900509595870972, |
| "learning_rate": 0.001, |
| "loss": 1.1348, |
| "step": 168800 |
| }, |
| { |
| "epoch": 54.58952811893988, |
| "grad_norm": 1.4742830991744995, |
| "learning_rate": 0.001, |
| "loss": 1.1336, |
| "step": 168900 |
| }, |
| { |
| "epoch": 54.621848739495796, |
| "grad_norm": 1.719673752784729, |
| "learning_rate": 0.001, |
| "loss": 1.1237, |
| "step": 169000 |
| }, |
| { |
| "epoch": 54.65416936005171, |
| "grad_norm": 1.4414795637130737, |
| "learning_rate": 0.001, |
| "loss": 1.1374, |
| "step": 169100 |
| }, |
| { |
| "epoch": 54.686489980607625, |
| "grad_norm": 1.6505208015441895, |
| "learning_rate": 0.001, |
| "loss": 1.1369, |
| "step": 169200 |
| }, |
| { |
| "epoch": 54.71881060116354, |
| "grad_norm": 1.6553674936294556, |
| "learning_rate": 0.001, |
| "loss": 1.111, |
| "step": 169300 |
| }, |
| { |
| "epoch": 54.751131221719454, |
| "grad_norm": 1.5446336269378662, |
| "learning_rate": 0.001, |
| "loss": 1.1443, |
| "step": 169400 |
| }, |
| { |
| "epoch": 54.78345184227537, |
| "grad_norm": 1.5632305145263672, |
| "learning_rate": 0.001, |
| "loss": 1.1349, |
| "step": 169500 |
| }, |
| { |
| "epoch": 54.81577246283128, |
| "grad_norm": 1.4174524545669556, |
| "learning_rate": 0.001, |
| "loss": 1.1394, |
| "step": 169600 |
| }, |
| { |
| "epoch": 54.8480930833872, |
| "grad_norm": 1.5752019882202148, |
| "learning_rate": 0.001, |
| "loss": 1.1356, |
| "step": 169700 |
| }, |
| { |
| "epoch": 54.88041370394311, |
| "grad_norm": 3.077443838119507, |
| "learning_rate": 0.001, |
| "loss": 1.1346, |
| "step": 169800 |
| }, |
| { |
| "epoch": 54.91273432449903, |
| "grad_norm": 1.3338394165039062, |
| "learning_rate": 0.001, |
| "loss": 1.1594, |
| "step": 169900 |
| }, |
| { |
| "epoch": 54.94505494505494, |
| "grad_norm": 1.565075397491455, |
| "learning_rate": 0.001, |
| "loss": 1.1667, |
| "step": 170000 |
| }, |
| { |
| "epoch": 54.977375565610856, |
| "grad_norm": 1.5464483499526978, |
| "learning_rate": 0.001, |
| "loss": 1.1592, |
| "step": 170100 |
| }, |
| { |
| "epoch": 55.00969618616678, |
| "grad_norm": 1.780253529548645, |
| "learning_rate": 0.001, |
| "loss": 1.1241, |
| "step": 170200 |
| }, |
| { |
| "epoch": 55.04201680672269, |
| "grad_norm": 1.3855479955673218, |
| "learning_rate": 0.001, |
| "loss": 1.0466, |
| "step": 170300 |
| }, |
| { |
| "epoch": 55.07433742727861, |
| "grad_norm": 1.5501574277877808, |
| "learning_rate": 0.001, |
| "loss": 1.0559, |
| "step": 170400 |
| }, |
| { |
| "epoch": 55.10665804783452, |
| "grad_norm": 1.5003669261932373, |
| "learning_rate": 0.001, |
| "loss": 1.0591, |
| "step": 170500 |
| }, |
| { |
| "epoch": 55.138978668390436, |
| "grad_norm": 1.529256820678711, |
| "learning_rate": 0.001, |
| "loss": 1.0588, |
| "step": 170600 |
| }, |
| { |
| "epoch": 55.17129928894635, |
| "grad_norm": 1.6695269346237183, |
| "learning_rate": 0.001, |
| "loss": 1.0455, |
| "step": 170700 |
| }, |
| { |
| "epoch": 55.203619909502265, |
| "grad_norm": 1.1769028902053833, |
| "learning_rate": 0.001, |
| "loss": 1.0813, |
| "step": 170800 |
| }, |
| { |
| "epoch": 55.23594053005818, |
| "grad_norm": 1.33823823928833, |
| "learning_rate": 0.001, |
| "loss": 1.0969, |
| "step": 170900 |
| }, |
| { |
| "epoch": 55.268261150614094, |
| "grad_norm": 1.3728437423706055, |
| "learning_rate": 0.001, |
| "loss": 1.0731, |
| "step": 171000 |
| }, |
| { |
| "epoch": 55.30058177117001, |
| "grad_norm": 1.431822419166565, |
| "learning_rate": 0.001, |
| "loss": 1.0707, |
| "step": 171100 |
| }, |
| { |
| "epoch": 55.33290239172592, |
| "grad_norm": 1.5498169660568237, |
| "learning_rate": 0.001, |
| "loss": 1.0905, |
| "step": 171200 |
| }, |
| { |
| "epoch": 55.36522301228184, |
| "grad_norm": 1.598392367362976, |
| "learning_rate": 0.001, |
| "loss": 1.0835, |
| "step": 171300 |
| }, |
| { |
| "epoch": 55.39754363283775, |
| "grad_norm": 1.8495707511901855, |
| "learning_rate": 0.001, |
| "loss": 1.1046, |
| "step": 171400 |
| }, |
| { |
| "epoch": 55.42986425339367, |
| "grad_norm": 1.452956199645996, |
| "learning_rate": 0.001, |
| "loss": 1.1048, |
| "step": 171500 |
| }, |
| { |
| "epoch": 55.46218487394958, |
| "grad_norm": 1.5427874326705933, |
| "learning_rate": 0.001, |
| "loss": 1.0985, |
| "step": 171600 |
| }, |
| { |
| "epoch": 55.494505494505496, |
| "grad_norm": 1.5331876277923584, |
| "learning_rate": 0.001, |
| "loss": 1.0998, |
| "step": 171700 |
| }, |
| { |
| "epoch": 55.52682611506141, |
| "grad_norm": 1.4414092302322388, |
| "learning_rate": 0.001, |
| "loss": 1.0987, |
| "step": 171800 |
| }, |
| { |
| "epoch": 55.559146735617325, |
| "grad_norm": 1.469769835472107, |
| "learning_rate": 0.001, |
| "loss": 1.1062, |
| "step": 171900 |
| }, |
| { |
| "epoch": 55.59146735617324, |
| "grad_norm": 1.6705925464630127, |
| "learning_rate": 0.001, |
| "loss": 1.121, |
| "step": 172000 |
| }, |
| { |
| "epoch": 55.623787976729155, |
| "grad_norm": 2.0680267810821533, |
| "learning_rate": 0.001, |
| "loss": 1.1212, |
| "step": 172100 |
| }, |
| { |
| "epoch": 55.65610859728507, |
| "grad_norm": 1.1828604936599731, |
| "learning_rate": 0.001, |
| "loss": 1.1022, |
| "step": 172200 |
| }, |
| { |
| "epoch": 55.688429217840984, |
| "grad_norm": 2.1064138412475586, |
| "learning_rate": 0.001, |
| "loss": 1.1253, |
| "step": 172300 |
| }, |
| { |
| "epoch": 55.7207498383969, |
| "grad_norm": 1.25071382522583, |
| "learning_rate": 0.001, |
| "loss": 1.1336, |
| "step": 172400 |
| }, |
| { |
| "epoch": 55.75307045895281, |
| "grad_norm": 1.4712039232254028, |
| "learning_rate": 0.001, |
| "loss": 1.1207, |
| "step": 172500 |
| }, |
| { |
| "epoch": 55.78539107950873, |
| "grad_norm": 1.976820468902588, |
| "learning_rate": 0.001, |
| "loss": 1.127, |
| "step": 172600 |
| }, |
| { |
| "epoch": 55.81771170006464, |
| "grad_norm": 1.6924251317977905, |
| "learning_rate": 0.001, |
| "loss": 1.1265, |
| "step": 172700 |
| }, |
| { |
| "epoch": 55.85003232062056, |
| "grad_norm": 1.5243760347366333, |
| "learning_rate": 0.001, |
| "loss": 1.1382, |
| "step": 172800 |
| }, |
| { |
| "epoch": 55.88235294117647, |
| "grad_norm": 1.5966765880584717, |
| "learning_rate": 0.001, |
| "loss": 1.1386, |
| "step": 172900 |
| }, |
| { |
| "epoch": 55.914673561732386, |
| "grad_norm": 1.5467320680618286, |
| "learning_rate": 0.001, |
| "loss": 1.1288, |
| "step": 173000 |
| }, |
| { |
| "epoch": 55.9469941822883, |
| "grad_norm": 1.509267807006836, |
| "learning_rate": 0.001, |
| "loss": 1.157, |
| "step": 173100 |
| }, |
| { |
| "epoch": 55.979314802844215, |
| "grad_norm": 1.3964108228683472, |
| "learning_rate": 0.001, |
| "loss": 1.1531, |
| "step": 173200 |
| }, |
| { |
| "epoch": 56.01163542340013, |
| "grad_norm": 1.5852636098861694, |
| "learning_rate": 0.001, |
| "loss": 1.097, |
| "step": 173300 |
| }, |
| { |
| "epoch": 56.043956043956044, |
| "grad_norm": 1.3627901077270508, |
| "learning_rate": 0.001, |
| "loss": 1.0334, |
| "step": 173400 |
| }, |
| { |
| "epoch": 56.07627666451196, |
| "grad_norm": 1.454304575920105, |
| "learning_rate": 0.001, |
| "loss": 1.0504, |
| "step": 173500 |
| }, |
| { |
| "epoch": 56.10859728506787, |
| "grad_norm": 1.7767127752304077, |
| "learning_rate": 0.001, |
| "loss": 1.0563, |
| "step": 173600 |
| }, |
| { |
| "epoch": 56.14091790562379, |
| "grad_norm": 1.4236656427383423, |
| "learning_rate": 0.001, |
| "loss": 1.0453, |
| "step": 173700 |
| }, |
| { |
| "epoch": 56.1732385261797, |
| "grad_norm": 1.743483066558838, |
| "learning_rate": 0.001, |
| "loss": 1.0616, |
| "step": 173800 |
| }, |
| { |
| "epoch": 56.20555914673562, |
| "grad_norm": 1.520132064819336, |
| "learning_rate": 0.001, |
| "loss": 1.0532, |
| "step": 173900 |
| }, |
| { |
| "epoch": 56.23787976729153, |
| "grad_norm": 2.2348384857177734, |
| "learning_rate": 0.001, |
| "loss": 1.0477, |
| "step": 174000 |
| }, |
| { |
| "epoch": 56.270200387847446, |
| "grad_norm": 2.293739080429077, |
| "learning_rate": 0.001, |
| "loss": 1.0707, |
| "step": 174100 |
| }, |
| { |
| "epoch": 56.30252100840336, |
| "grad_norm": 1.6483992338180542, |
| "learning_rate": 0.001, |
| "loss": 1.0814, |
| "step": 174200 |
| }, |
| { |
| "epoch": 56.334841628959275, |
| "grad_norm": 1.7153490781784058, |
| "learning_rate": 0.001, |
| "loss": 1.0742, |
| "step": 174300 |
| }, |
| { |
| "epoch": 56.36716224951519, |
| "grad_norm": 1.7525187730789185, |
| "learning_rate": 0.001, |
| "loss": 1.0837, |
| "step": 174400 |
| }, |
| { |
| "epoch": 56.399482870071104, |
| "grad_norm": 2.0571489334106445, |
| "learning_rate": 0.001, |
| "loss": 1.0844, |
| "step": 174500 |
| }, |
| { |
| "epoch": 56.43180349062702, |
| "grad_norm": 1.923480749130249, |
| "learning_rate": 0.001, |
| "loss": 1.0849, |
| "step": 174600 |
| }, |
| { |
| "epoch": 56.46412411118293, |
| "grad_norm": 1.9075169563293457, |
| "learning_rate": 0.001, |
| "loss": 1.0761, |
| "step": 174700 |
| }, |
| { |
| "epoch": 56.49644473173885, |
| "grad_norm": 1.8042218685150146, |
| "learning_rate": 0.001, |
| "loss": 1.0983, |
| "step": 174800 |
| }, |
| { |
| "epoch": 56.52876535229476, |
| "grad_norm": 1.6176084280014038, |
| "learning_rate": 0.001, |
| "loss": 1.0871, |
| "step": 174900 |
| }, |
| { |
| "epoch": 56.56108597285068, |
| "grad_norm": 2.0587964057922363, |
| "learning_rate": 0.001, |
| "loss": 1.1045, |
| "step": 175000 |
| }, |
| { |
| "epoch": 56.59340659340659, |
| "grad_norm": 1.8761169910430908, |
| "learning_rate": 0.001, |
| "loss": 1.1241, |
| "step": 175100 |
| }, |
| { |
| "epoch": 56.625727213962506, |
| "grad_norm": 2.422200918197632, |
| "learning_rate": 0.001, |
| "loss": 1.1027, |
| "step": 175200 |
| }, |
| { |
| "epoch": 56.65804783451842, |
| "grad_norm": 1.628890037536621, |
| "learning_rate": 0.001, |
| "loss": 1.1181, |
| "step": 175300 |
| }, |
| { |
| "epoch": 56.690368455074335, |
| "grad_norm": 1.7099559307098389, |
| "learning_rate": 0.001, |
| "loss": 1.0967, |
| "step": 175400 |
| }, |
| { |
| "epoch": 56.72268907563025, |
| "grad_norm": 1.6226879358291626, |
| "learning_rate": 0.001, |
| "loss": 1.1141, |
| "step": 175500 |
| }, |
| { |
| "epoch": 56.755009696186164, |
| "grad_norm": 1.4959603548049927, |
| "learning_rate": 0.001, |
| "loss": 1.1177, |
| "step": 175600 |
| }, |
| { |
| "epoch": 56.78733031674208, |
| "grad_norm": 1.6804662942886353, |
| "learning_rate": 0.001, |
| "loss": 1.1291, |
| "step": 175700 |
| }, |
| { |
| "epoch": 56.81965093729799, |
| "grad_norm": 1.7406642436981201, |
| "learning_rate": 0.001, |
| "loss": 1.1339, |
| "step": 175800 |
| }, |
| { |
| "epoch": 56.85197155785391, |
| "grad_norm": 1.5311131477355957, |
| "learning_rate": 0.001, |
| "loss": 1.1195, |
| "step": 175900 |
| }, |
| { |
| "epoch": 56.88429217840982, |
| "grad_norm": 1.861016869544983, |
| "learning_rate": 0.001, |
| "loss": 1.1357, |
| "step": 176000 |
| }, |
| { |
| "epoch": 56.91661279896574, |
| "grad_norm": 1.7938547134399414, |
| "learning_rate": 0.001, |
| "loss": 1.1281, |
| "step": 176100 |
| }, |
| { |
| "epoch": 56.94893341952165, |
| "grad_norm": 1.6675164699554443, |
| "learning_rate": 0.001, |
| "loss": 1.1311, |
| "step": 176200 |
| }, |
| { |
| "epoch": 56.981254040077566, |
| "grad_norm": 1.8921256065368652, |
| "learning_rate": 0.001, |
| "loss": 1.132, |
| "step": 176300 |
| }, |
| { |
| "epoch": 57.01357466063349, |
| "grad_norm": 1.6421865224838257, |
| "learning_rate": 0.001, |
| "loss": 1.0711, |
| "step": 176400 |
| }, |
| { |
| "epoch": 57.0458952811894, |
| "grad_norm": 1.6362299919128418, |
| "learning_rate": 0.001, |
| "loss": 1.0239, |
| "step": 176500 |
| }, |
| { |
| "epoch": 57.07821590174532, |
| "grad_norm": 1.5909955501556396, |
| "learning_rate": 0.001, |
| "loss": 1.0368, |
| "step": 176600 |
| }, |
| { |
| "epoch": 57.11053652230123, |
| "grad_norm": 1.624410629272461, |
| "learning_rate": 0.001, |
| "loss": 1.0494, |
| "step": 176700 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 1.610818862915039, |
| "learning_rate": 0.001, |
| "loss": 1.0437, |
| "step": 176800 |
| }, |
| { |
| "epoch": 57.17517776341306, |
| "grad_norm": 1.4707742929458618, |
| "learning_rate": 0.001, |
| "loss": 1.0494, |
| "step": 176900 |
| }, |
| { |
| "epoch": 57.207498383968975, |
| "grad_norm": 1.3964228630065918, |
| "learning_rate": 0.001, |
| "loss": 1.0635, |
| "step": 177000 |
| }, |
| { |
| "epoch": 57.23981900452489, |
| "grad_norm": 1.5611023902893066, |
| "learning_rate": 0.001, |
| "loss": 1.0516, |
| "step": 177100 |
| }, |
| { |
| "epoch": 57.272139625080804, |
| "grad_norm": 1.4848252534866333, |
| "learning_rate": 0.001, |
| "loss": 1.0631, |
| "step": 177200 |
| }, |
| { |
| "epoch": 57.30446024563672, |
| "grad_norm": 1.2606589794158936, |
| "learning_rate": 0.001, |
| "loss": 1.0566, |
| "step": 177300 |
| }, |
| { |
| "epoch": 57.33678086619263, |
| "grad_norm": 1.6222645044326782, |
| "learning_rate": 0.001, |
| "loss": 1.0771, |
| "step": 177400 |
| }, |
| { |
| "epoch": 57.36910148674855, |
| "grad_norm": 2.2820799350738525, |
| "learning_rate": 0.001, |
| "loss": 1.0735, |
| "step": 177500 |
| }, |
| { |
| "epoch": 57.40142210730446, |
| "grad_norm": 1.7914892435073853, |
| "learning_rate": 0.001, |
| "loss": 1.076, |
| "step": 177600 |
| }, |
| { |
| "epoch": 57.43374272786038, |
| "grad_norm": 1.5209670066833496, |
| "learning_rate": 0.001, |
| "loss": 1.0958, |
| "step": 177700 |
| }, |
| { |
| "epoch": 57.46606334841629, |
| "grad_norm": 2.0354158878326416, |
| "learning_rate": 0.001, |
| "loss": 1.0731, |
| "step": 177800 |
| }, |
| { |
| "epoch": 57.498383968972206, |
| "grad_norm": 1.5465794801712036, |
| "learning_rate": 0.001, |
| "loss": 1.082, |
| "step": 177900 |
| }, |
| { |
| "epoch": 57.53070458952812, |
| "grad_norm": 1.4770228862762451, |
| "learning_rate": 0.001, |
| "loss": 1.0939, |
| "step": 178000 |
| }, |
| { |
| "epoch": 57.563025210084035, |
| "grad_norm": 1.5884677171707153, |
| "learning_rate": 0.001, |
| "loss": 1.0881, |
| "step": 178100 |
| }, |
| { |
| "epoch": 57.59534583063995, |
| "grad_norm": 1.3796273469924927, |
| "learning_rate": 0.001, |
| "loss": 1.0723, |
| "step": 178200 |
| }, |
| { |
| "epoch": 57.627666451195864, |
| "grad_norm": 1.7390011548995972, |
| "learning_rate": 0.001, |
| "loss": 1.0946, |
| "step": 178300 |
| }, |
| { |
| "epoch": 57.65998707175178, |
| "grad_norm": 1.8054834604263306, |
| "learning_rate": 0.001, |
| "loss": 1.0912, |
| "step": 178400 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.5965912342071533, |
| "learning_rate": 0.001, |
| "loss": 1.1132, |
| "step": 178500 |
| }, |
| { |
| "epoch": 57.72462831286361, |
| "grad_norm": 1.4859302043914795, |
| "learning_rate": 0.001, |
| "loss": 1.0969, |
| "step": 178600 |
| }, |
| { |
| "epoch": 57.75694893341952, |
| "grad_norm": 1.8760346174240112, |
| "learning_rate": 0.001, |
| "loss": 1.1142, |
| "step": 178700 |
| }, |
| { |
| "epoch": 57.78926955397544, |
| "grad_norm": 1.9528679847717285, |
| "learning_rate": 0.001, |
| "loss": 1.0951, |
| "step": 178800 |
| }, |
| { |
| "epoch": 57.82159017453135, |
| "grad_norm": 1.9293407201766968, |
| "learning_rate": 0.001, |
| "loss": 1.1017, |
| "step": 178900 |
| }, |
| { |
| "epoch": 57.853910795087266, |
| "grad_norm": 1.3426704406738281, |
| "learning_rate": 0.001, |
| "loss": 1.105, |
| "step": 179000 |
| }, |
| { |
| "epoch": 57.88623141564318, |
| "grad_norm": 1.448326587677002, |
| "learning_rate": 0.001, |
| "loss": 1.1075, |
| "step": 179100 |
| }, |
| { |
| "epoch": 57.918552036199095, |
| "grad_norm": 1.7974193096160889, |
| "learning_rate": 0.001, |
| "loss": 1.1323, |
| "step": 179200 |
| }, |
| { |
| "epoch": 57.95087265675501, |
| "grad_norm": 1.621390461921692, |
| "learning_rate": 0.001, |
| "loss": 1.1397, |
| "step": 179300 |
| }, |
| { |
| "epoch": 57.983193277310924, |
| "grad_norm": 1.6346193552017212, |
| "learning_rate": 0.001, |
| "loss": 1.1445, |
| "step": 179400 |
| }, |
| { |
| "epoch": 58.01551389786684, |
| "grad_norm": 1.8142673969268799, |
| "learning_rate": 0.001, |
| "loss": 1.0451, |
| "step": 179500 |
| }, |
| { |
| "epoch": 58.04783451842275, |
| "grad_norm": 1.33307945728302, |
| "learning_rate": 0.001, |
| "loss": 1.0349, |
| "step": 179600 |
| }, |
| { |
| "epoch": 58.08015513897867, |
| "grad_norm": 1.182541012763977, |
| "learning_rate": 0.001, |
| "loss": 1.0273, |
| "step": 179700 |
| }, |
| { |
| "epoch": 58.11247575953458, |
| "grad_norm": 1.4469527006149292, |
| "learning_rate": 0.001, |
| "loss": 1.0151, |
| "step": 179800 |
| }, |
| { |
| "epoch": 58.1447963800905, |
| "grad_norm": 1.3350833654403687, |
| "learning_rate": 0.001, |
| "loss": 1.0442, |
| "step": 179900 |
| }, |
| { |
| "epoch": 58.17711700064641, |
| "grad_norm": 1.4207303524017334, |
| "learning_rate": 0.001, |
| "loss": 1.0211, |
| "step": 180000 |
| }, |
| { |
| "epoch": 58.209437621202326, |
| "grad_norm": 1.633563756942749, |
| "learning_rate": 0.001, |
| "loss": 1.0392, |
| "step": 180100 |
| }, |
| { |
| "epoch": 58.24175824175824, |
| "grad_norm": 1.3849244117736816, |
| "learning_rate": 0.001, |
| "loss": 1.0623, |
| "step": 180200 |
| }, |
| { |
| "epoch": 58.274078862314155, |
| "grad_norm": 1.8374276161193848, |
| "learning_rate": 0.001, |
| "loss": 1.0388, |
| "step": 180300 |
| }, |
| { |
| "epoch": 58.30639948287007, |
| "grad_norm": 1.3447884321212769, |
| "learning_rate": 0.001, |
| "loss": 1.0626, |
| "step": 180400 |
| }, |
| { |
| "epoch": 58.338720103425985, |
| "grad_norm": 1.5665903091430664, |
| "learning_rate": 0.001, |
| "loss": 1.0754, |
| "step": 180500 |
| }, |
| { |
| "epoch": 58.3710407239819, |
| "grad_norm": 1.3040165901184082, |
| "learning_rate": 0.001, |
| "loss": 1.0746, |
| "step": 180600 |
| }, |
| { |
| "epoch": 58.403361344537814, |
| "grad_norm": 1.3881635665893555, |
| "learning_rate": 0.001, |
| "loss": 1.0525, |
| "step": 180700 |
| }, |
| { |
| "epoch": 58.43568196509373, |
| "grad_norm": 1.1611268520355225, |
| "learning_rate": 0.001, |
| "loss": 1.0717, |
| "step": 180800 |
| }, |
| { |
| "epoch": 58.46800258564964, |
| "grad_norm": 1.2324254512786865, |
| "learning_rate": 0.001, |
| "loss": 1.0696, |
| "step": 180900 |
| }, |
| { |
| "epoch": 58.50032320620556, |
| "grad_norm": 1.5421253442764282, |
| "learning_rate": 0.001, |
| "loss": 1.0742, |
| "step": 181000 |
| }, |
| { |
| "epoch": 58.53264382676147, |
| "grad_norm": 0.9455136656761169, |
| "learning_rate": 0.001, |
| "loss": 1.0723, |
| "step": 181100 |
| }, |
| { |
| "epoch": 58.56496444731739, |
| "grad_norm": 1.49685800075531, |
| "learning_rate": 0.001, |
| "loss": 1.0705, |
| "step": 181200 |
| }, |
| { |
| "epoch": 58.5972850678733, |
| "grad_norm": 3.2262043952941895, |
| "learning_rate": 0.001, |
| "loss": 1.0927, |
| "step": 181300 |
| }, |
| { |
| "epoch": 58.629605688429216, |
| "grad_norm": 1.138930320739746, |
| "learning_rate": 0.001, |
| "loss": 1.0768, |
| "step": 181400 |
| }, |
| { |
| "epoch": 58.66192630898513, |
| "grad_norm": 1.6434274911880493, |
| "learning_rate": 0.001, |
| "loss": 1.086, |
| "step": 181500 |
| }, |
| { |
| "epoch": 58.694246929541045, |
| "grad_norm": 1.4245071411132812, |
| "learning_rate": 0.001, |
| "loss": 1.0806, |
| "step": 181600 |
| }, |
| { |
| "epoch": 58.72656755009696, |
| "grad_norm": 1.1249699592590332, |
| "learning_rate": 0.001, |
| "loss": 1.1123, |
| "step": 181700 |
| }, |
| { |
| "epoch": 58.758888170652874, |
| "grad_norm": 1.3846380710601807, |
| "learning_rate": 0.001, |
| "loss": 1.1084, |
| "step": 181800 |
| }, |
| { |
| "epoch": 58.79120879120879, |
| "grad_norm": 1.39168119430542, |
| "learning_rate": 0.001, |
| "loss": 1.1256, |
| "step": 181900 |
| }, |
| { |
| "epoch": 58.8235294117647, |
| "grad_norm": 1.3924428224563599, |
| "learning_rate": 0.001, |
| "loss": 1.1124, |
| "step": 182000 |
| }, |
| { |
| "epoch": 58.85585003232062, |
| "grad_norm": 1.7155895233154297, |
| "learning_rate": 0.001, |
| "loss": 0.9987, |
| "step": 182100 |
| }, |
| { |
| "epoch": 58.88817065287653, |
| "grad_norm": 1.6909483671188354, |
| "learning_rate": 0.001, |
| "loss": 1.008, |
| "step": 182200 |
| }, |
| { |
| "epoch": 58.92049127343245, |
| "grad_norm": 1.2806386947631836, |
| "learning_rate": 0.001, |
| "loss": 1.0156, |
| "step": 182300 |
| }, |
| { |
| "epoch": 58.95281189398836, |
| "grad_norm": 1.5416556596755981, |
| "learning_rate": 0.001, |
| "loss": 1.0119, |
| "step": 182400 |
| }, |
| { |
| "epoch": 58.985132514544276, |
| "grad_norm": 1.9270284175872803, |
| "learning_rate": 0.001, |
| "loss": 0.9978, |
| "step": 182500 |
| }, |
| { |
| "epoch": 59.0174531351002, |
| "grad_norm": 1.8223607540130615, |
| "learning_rate": 0.001, |
| "loss": 1.0246, |
| "step": 182600 |
| }, |
| { |
| "epoch": 59.04977375565611, |
| "grad_norm": 2.0636425018310547, |
| "learning_rate": 0.001, |
| "loss": 1.0184, |
| "step": 182700 |
| }, |
| { |
| "epoch": 59.08209437621203, |
| "grad_norm": 1.6684495210647583, |
| "learning_rate": 0.001, |
| "loss": 1.008, |
| "step": 182800 |
| }, |
| { |
| "epoch": 59.11441499676794, |
| "grad_norm": 2.0706138610839844, |
| "learning_rate": 0.001, |
| "loss": 1.0314, |
| "step": 182900 |
| }, |
| { |
| "epoch": 59.146735617323856, |
| "grad_norm": 2.2243518829345703, |
| "learning_rate": 0.001, |
| "loss": 1.0092, |
| "step": 183000 |
| }, |
| { |
| "epoch": 59.17905623787977, |
| "grad_norm": 1.8938789367675781, |
| "learning_rate": 0.001, |
| "loss": 1.0275, |
| "step": 183100 |
| }, |
| { |
| "epoch": 59.211376858435685, |
| "grad_norm": 1.7461127042770386, |
| "learning_rate": 0.001, |
| "loss": 1.0462, |
| "step": 183200 |
| }, |
| { |
| "epoch": 59.2436974789916, |
| "grad_norm": 2.4900665283203125, |
| "learning_rate": 0.001, |
| "loss": 1.0474, |
| "step": 183300 |
| }, |
| { |
| "epoch": 59.276018099547514, |
| "grad_norm": 2.173794984817505, |
| "learning_rate": 0.001, |
| "loss": 1.0452, |
| "step": 183400 |
| }, |
| { |
| "epoch": 59.30833872010343, |
| "grad_norm": 2.100980043411255, |
| "learning_rate": 0.001, |
| "loss": 1.0621, |
| "step": 183500 |
| }, |
| { |
| "epoch": 59.34065934065934, |
| "grad_norm": 2.124846935272217, |
| "learning_rate": 0.001, |
| "loss": 1.077, |
| "step": 183600 |
| }, |
| { |
| "epoch": 59.37297996121526, |
| "grad_norm": 2.6659579277038574, |
| "learning_rate": 0.001, |
| "loss": 1.06, |
| "step": 183700 |
| }, |
| { |
| "epoch": 59.40530058177117, |
| "grad_norm": 2.3515841960906982, |
| "learning_rate": 0.001, |
| "loss": 1.0685, |
| "step": 183800 |
| }, |
| { |
| "epoch": 59.43762120232709, |
| "grad_norm": 2.4156997203826904, |
| "learning_rate": 0.001, |
| "loss": 1.061, |
| "step": 183900 |
| }, |
| { |
| "epoch": 59.469941822883, |
| "grad_norm": 1.7681801319122314, |
| "learning_rate": 0.001, |
| "loss": 1.068, |
| "step": 184000 |
| }, |
| { |
| "epoch": 59.502262443438916, |
| "grad_norm": 1.7692102193832397, |
| "learning_rate": 0.001, |
| "loss": 1.0839, |
| "step": 184100 |
| }, |
| { |
| "epoch": 59.53458306399483, |
| "grad_norm": 1.82084059715271, |
| "learning_rate": 0.001, |
| "loss": 1.0723, |
| "step": 184200 |
| }, |
| { |
| "epoch": 59.566903684550745, |
| "grad_norm": 2.9844210147857666, |
| "learning_rate": 0.001, |
| "loss": 1.0736, |
| "step": 184300 |
| }, |
| { |
| "epoch": 59.59922430510666, |
| "grad_norm": 1.6782689094543457, |
| "learning_rate": 0.001, |
| "loss": 1.079, |
| "step": 184400 |
| }, |
| { |
| "epoch": 59.631544925662574, |
| "grad_norm": 2.3039188385009766, |
| "learning_rate": 0.001, |
| "loss": 1.077, |
| "step": 184500 |
| }, |
| { |
| "epoch": 59.66386554621849, |
| "grad_norm": 1.5084682703018188, |
| "learning_rate": 0.001, |
| "loss": 1.0895, |
| "step": 184600 |
| }, |
| { |
| "epoch": 59.6961861667744, |
| "grad_norm": 2.922206163406372, |
| "learning_rate": 0.001, |
| "loss": 1.0795, |
| "step": 184700 |
| }, |
| { |
| "epoch": 59.72850678733032, |
| "grad_norm": 1.4033228158950806, |
| "learning_rate": 0.001, |
| "loss": 1.0939, |
| "step": 184800 |
| }, |
| { |
| "epoch": 59.76082740788623, |
| "grad_norm": 1.722129464149475, |
| "learning_rate": 0.001, |
| "loss": 1.0868, |
| "step": 184900 |
| }, |
| { |
| "epoch": 59.79314802844215, |
| "grad_norm": 1.9006403684616089, |
| "learning_rate": 0.001, |
| "loss": 1.0985, |
| "step": 185000 |
| }, |
| { |
| "epoch": 59.82546864899806, |
| "grad_norm": 2.96142315864563, |
| "learning_rate": 0.001, |
| "loss": 1.1029, |
| "step": 185100 |
| }, |
| { |
| "epoch": 59.857789269553976, |
| "grad_norm": 1.7573572397232056, |
| "learning_rate": 0.001, |
| "loss": 1.101, |
| "step": 185200 |
| }, |
| { |
| "epoch": 59.89010989010989, |
| "grad_norm": 1.8144474029541016, |
| "learning_rate": 0.001, |
| "loss": 1.0882, |
| "step": 185300 |
| }, |
| { |
| "epoch": 59.922430510665805, |
| "grad_norm": 1.9638746976852417, |
| "learning_rate": 0.001, |
| "loss": 1.0941, |
| "step": 185400 |
| }, |
| { |
| "epoch": 59.95475113122172, |
| "grad_norm": 1.8024924993515015, |
| "learning_rate": 0.001, |
| "loss": 1.1102, |
| "step": 185500 |
| }, |
| { |
| "epoch": 59.987071751777634, |
| "grad_norm": 1.905829668045044, |
| "learning_rate": 0.001, |
| "loss": 1.0942, |
| "step": 185600 |
| }, |
| { |
| "epoch": 60.01939237233355, |
| "grad_norm": 1.6812539100646973, |
| "learning_rate": 0.001, |
| "loss": 1.0748, |
| "step": 185700 |
| }, |
| { |
| "epoch": 60.05171299288946, |
| "grad_norm": 1.7039813995361328, |
| "learning_rate": 0.001, |
| "loss": 1.0093, |
| "step": 185800 |
| }, |
| { |
| "epoch": 60.08403361344538, |
| "grad_norm": 1.8435310125350952, |
| "learning_rate": 0.001, |
| "loss": 1.0105, |
| "step": 185900 |
| }, |
| { |
| "epoch": 60.11635423400129, |
| "grad_norm": 2.0761561393737793, |
| "learning_rate": 0.001, |
| "loss": 1.0328, |
| "step": 186000 |
| }, |
| { |
| "epoch": 60.14867485455721, |
| "grad_norm": 4.478841304779053, |
| "learning_rate": 0.001, |
| "loss": 1.03, |
| "step": 186100 |
| }, |
| { |
| "epoch": 60.18099547511312, |
| "grad_norm": 1.5484015941619873, |
| "learning_rate": 0.001, |
| "loss": 1.0442, |
| "step": 186200 |
| }, |
| { |
| "epoch": 60.213316095669036, |
| "grad_norm": 1.8544293642044067, |
| "learning_rate": 0.001, |
| "loss": 1.0356, |
| "step": 186300 |
| }, |
| { |
| "epoch": 60.24563671622495, |
| "grad_norm": 1.7366797924041748, |
| "learning_rate": 0.001, |
| "loss": 1.0181, |
| "step": 186400 |
| }, |
| { |
| "epoch": 60.277957336780865, |
| "grad_norm": 1.8898407220840454, |
| "learning_rate": 0.001, |
| "loss": 1.0362, |
| "step": 186500 |
| }, |
| { |
| "epoch": 60.31027795733678, |
| "grad_norm": 1.422088623046875, |
| "learning_rate": 0.001, |
| "loss": 1.0307, |
| "step": 186600 |
| }, |
| { |
| "epoch": 60.342598577892694, |
| "grad_norm": 2.02425217628479, |
| "learning_rate": 0.001, |
| "loss": 1.0591, |
| "step": 186700 |
| }, |
| { |
| "epoch": 60.37491919844861, |
| "grad_norm": 1.5161786079406738, |
| "learning_rate": 0.001, |
| "loss": 1.03, |
| "step": 186800 |
| }, |
| { |
| "epoch": 60.40723981900452, |
| "grad_norm": 1.4360978603363037, |
| "learning_rate": 0.001, |
| "loss": 1.0376, |
| "step": 186900 |
| }, |
| { |
| "epoch": 60.43956043956044, |
| "grad_norm": 1.6159266233444214, |
| "learning_rate": 0.001, |
| "loss": 1.0403, |
| "step": 187000 |
| }, |
| { |
| "epoch": 60.47188106011635, |
| "grad_norm": 1.6211012601852417, |
| "learning_rate": 0.001, |
| "loss": 1.0488, |
| "step": 187100 |
| }, |
| { |
| "epoch": 60.50420168067227, |
| "grad_norm": 1.6320511102676392, |
| "learning_rate": 0.001, |
| "loss": 1.0632, |
| "step": 187200 |
| }, |
| { |
| "epoch": 60.53652230122818, |
| "grad_norm": 1.4999951124191284, |
| "learning_rate": 0.001, |
| "loss": 1.0759, |
| "step": 187300 |
| }, |
| { |
| "epoch": 60.568842921784096, |
| "grad_norm": 1.581023931503296, |
| "learning_rate": 0.001, |
| "loss": 1.0658, |
| "step": 187400 |
| }, |
| { |
| "epoch": 60.60116354234001, |
| "grad_norm": 1.7817587852478027, |
| "learning_rate": 0.001, |
| "loss": 1.054, |
| "step": 187500 |
| }, |
| { |
| "epoch": 60.633484162895925, |
| "grad_norm": 1.7636590003967285, |
| "learning_rate": 0.001, |
| "loss": 1.0684, |
| "step": 187600 |
| }, |
| { |
| "epoch": 60.66580478345184, |
| "grad_norm": 1.7448927164077759, |
| "learning_rate": 0.001, |
| "loss": 1.0625, |
| "step": 187700 |
| }, |
| { |
| "epoch": 60.698125404007754, |
| "grad_norm": 2.117954969406128, |
| "learning_rate": 0.001, |
| "loss": 1.0824, |
| "step": 187800 |
| }, |
| { |
| "epoch": 60.73044602456367, |
| "grad_norm": 1.5040390491485596, |
| "learning_rate": 0.001, |
| "loss": 1.0652, |
| "step": 187900 |
| }, |
| { |
| "epoch": 60.762766645119584, |
| "grad_norm": 1.752811074256897, |
| "learning_rate": 0.001, |
| "loss": 1.0901, |
| "step": 188000 |
| }, |
| { |
| "epoch": 60.7950872656755, |
| "grad_norm": 1.800222396850586, |
| "learning_rate": 0.001, |
| "loss": 1.0856, |
| "step": 188100 |
| }, |
| { |
| "epoch": 60.82740788623141, |
| "grad_norm": 1.8694863319396973, |
| "learning_rate": 0.001, |
| "loss": 1.0953, |
| "step": 188200 |
| }, |
| { |
| "epoch": 60.85972850678733, |
| "grad_norm": 1.5264275074005127, |
| "learning_rate": 0.001, |
| "loss": 1.0727, |
| "step": 188300 |
| }, |
| { |
| "epoch": 60.89204912734324, |
| "grad_norm": 1.5535075664520264, |
| "learning_rate": 0.001, |
| "loss": 1.0954, |
| "step": 188400 |
| }, |
| { |
| "epoch": 60.924369747899156, |
| "grad_norm": 1.4875106811523438, |
| "learning_rate": 0.001, |
| "loss": 1.0942, |
| "step": 188500 |
| }, |
| { |
| "epoch": 60.95669036845507, |
| "grad_norm": 1.7024391889572144, |
| "learning_rate": 0.001, |
| "loss": 1.0822, |
| "step": 188600 |
| }, |
| { |
| "epoch": 60.98901098901099, |
| "grad_norm": 1.6542145013809204, |
| "learning_rate": 0.001, |
| "loss": 1.0996, |
| "step": 188700 |
| }, |
| { |
| "epoch": 61.02133160956691, |
| "grad_norm": 1.7969130277633667, |
| "learning_rate": 0.001, |
| "loss": 1.0485, |
| "step": 188800 |
| }, |
| { |
| "epoch": 61.05365223012282, |
| "grad_norm": 1.9887769222259521, |
| "learning_rate": 0.001, |
| "loss": 0.9878, |
| "step": 188900 |
| }, |
| { |
| "epoch": 61.085972850678736, |
| "grad_norm": 1.2860554456710815, |
| "learning_rate": 0.001, |
| "loss": 1.0076, |
| "step": 189000 |
| }, |
| { |
| "epoch": 61.11829347123465, |
| "grad_norm": 2.320146083831787, |
| "learning_rate": 0.001, |
| "loss": 1.0179, |
| "step": 189100 |
| }, |
| { |
| "epoch": 61.150614091790565, |
| "grad_norm": 1.7772401571273804, |
| "learning_rate": 0.001, |
| "loss": 1.0105, |
| "step": 189200 |
| }, |
| { |
| "epoch": 61.18293471234648, |
| "grad_norm": 1.7308861017227173, |
| "learning_rate": 0.001, |
| "loss": 1.0254, |
| "step": 189300 |
| }, |
| { |
| "epoch": 61.215255332902395, |
| "grad_norm": 1.4736839532852173, |
| "learning_rate": 0.001, |
| "loss": 1.0321, |
| "step": 189400 |
| }, |
| { |
| "epoch": 61.24757595345831, |
| "grad_norm": 1.709177851676941, |
| "learning_rate": 0.001, |
| "loss": 1.0102, |
| "step": 189500 |
| }, |
| { |
| "epoch": 61.279896574014224, |
| "grad_norm": 1.6960136890411377, |
| "learning_rate": 0.001, |
| "loss": 1.017, |
| "step": 189600 |
| }, |
| { |
| "epoch": 61.31221719457014, |
| "grad_norm": 1.7163701057434082, |
| "learning_rate": 0.001, |
| "loss": 1.0547, |
| "step": 189700 |
| }, |
| { |
| "epoch": 61.34453781512605, |
| "grad_norm": 1.6530094146728516, |
| "learning_rate": 0.001, |
| "loss": 1.0263, |
| "step": 189800 |
| }, |
| { |
| "epoch": 61.37685843568197, |
| "grad_norm": 2.045729398727417, |
| "learning_rate": 0.001, |
| "loss": 1.0378, |
| "step": 189900 |
| }, |
| { |
| "epoch": 61.40917905623788, |
| "grad_norm": 1.541930913925171, |
| "learning_rate": 0.001, |
| "loss": 1.0491, |
| "step": 190000 |
| }, |
| { |
| "epoch": 61.441499676793796, |
| "grad_norm": 1.3723949193954468, |
| "learning_rate": 0.001, |
| "loss": 1.0508, |
| "step": 190100 |
| }, |
| { |
| "epoch": 61.47382029734971, |
| "grad_norm": 1.6087392568588257, |
| "learning_rate": 0.001, |
| "loss": 1.0278, |
| "step": 190200 |
| }, |
| { |
| "epoch": 61.506140917905626, |
| "grad_norm": 1.7446894645690918, |
| "learning_rate": 0.001, |
| "loss": 1.0405, |
| "step": 190300 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 1.5976572036743164, |
| "learning_rate": 0.001, |
| "loss": 1.062, |
| "step": 190400 |
| }, |
| { |
| "epoch": 61.570782159017455, |
| "grad_norm": 1.484781265258789, |
| "learning_rate": 0.001, |
| "loss": 1.0507, |
| "step": 190500 |
| }, |
| { |
| "epoch": 61.60310277957337, |
| "grad_norm": 1.9391828775405884, |
| "learning_rate": 0.001, |
| "loss": 1.049, |
| "step": 190600 |
| }, |
| { |
| "epoch": 61.635423400129284, |
| "grad_norm": 1.4382134675979614, |
| "learning_rate": 0.001, |
| "loss": 1.0471, |
| "step": 190700 |
| }, |
| { |
| "epoch": 61.6677440206852, |
| "grad_norm": 1.4284454584121704, |
| "learning_rate": 0.001, |
| "loss": 1.0551, |
| "step": 190800 |
| }, |
| { |
| "epoch": 61.70006464124111, |
| "grad_norm": 1.9543735980987549, |
| "learning_rate": 0.001, |
| "loss": 1.064, |
| "step": 190900 |
| }, |
| { |
| "epoch": 61.73238526179703, |
| "grad_norm": 1.5626345872879028, |
| "learning_rate": 0.001, |
| "loss": 1.0552, |
| "step": 191000 |
| }, |
| { |
| "epoch": 61.76470588235294, |
| "grad_norm": 1.6643787622451782, |
| "learning_rate": 0.001, |
| "loss": 1.0775, |
| "step": 191100 |
| }, |
| { |
| "epoch": 61.79702650290886, |
| "grad_norm": 1.4800466299057007, |
| "learning_rate": 0.001, |
| "loss": 1.07, |
| "step": 191200 |
| }, |
| { |
| "epoch": 61.82934712346477, |
| "grad_norm": 1.5903040170669556, |
| "learning_rate": 0.001, |
| "loss": 1.0834, |
| "step": 191300 |
| }, |
| { |
| "epoch": 61.861667744020686, |
| "grad_norm": 1.492759108543396, |
| "learning_rate": 0.001, |
| "loss": 1.0834, |
| "step": 191400 |
| }, |
| { |
| "epoch": 61.8939883645766, |
| "grad_norm": 1.6342939138412476, |
| "learning_rate": 0.001, |
| "loss": 1.0651, |
| "step": 191500 |
| }, |
| { |
| "epoch": 61.926308985132515, |
| "grad_norm": 1.6058002710342407, |
| "learning_rate": 0.001, |
| "loss": 1.0673, |
| "step": 191600 |
| }, |
| { |
| "epoch": 61.95862960568843, |
| "grad_norm": 1.3571763038635254, |
| "learning_rate": 0.001, |
| "loss": 1.0744, |
| "step": 191700 |
| }, |
| { |
| "epoch": 61.990950226244344, |
| "grad_norm": 1.5555781126022339, |
| "learning_rate": 0.001, |
| "loss": 1.0971, |
| "step": 191800 |
| }, |
| { |
| "epoch": 62.02327084680026, |
| "grad_norm": 1.648055911064148, |
| "learning_rate": 0.001, |
| "loss": 1.0267, |
| "step": 191900 |
| }, |
| { |
| "epoch": 62.05559146735617, |
| "grad_norm": 1.420931339263916, |
| "learning_rate": 0.001, |
| "loss": 0.9998, |
| "step": 192000 |
| }, |
| { |
| "epoch": 62.08791208791209, |
| "grad_norm": 1.436596393585205, |
| "learning_rate": 0.001, |
| "loss": 0.9868, |
| "step": 192100 |
| }, |
| { |
| "epoch": 62.120232708468, |
| "grad_norm": 1.863795518875122, |
| "learning_rate": 0.001, |
| "loss": 1.0002, |
| "step": 192200 |
| }, |
| { |
| "epoch": 62.15255332902392, |
| "grad_norm": 2.2537336349487305, |
| "learning_rate": 0.001, |
| "loss": 1.0086, |
| "step": 192300 |
| }, |
| { |
| "epoch": 62.18487394957983, |
| "grad_norm": 1.4402319192886353, |
| "learning_rate": 0.001, |
| "loss": 1.0017, |
| "step": 192400 |
| }, |
| { |
| "epoch": 62.217194570135746, |
| "grad_norm": 1.756102442741394, |
| "learning_rate": 0.001, |
| "loss": 1.0109, |
| "step": 192500 |
| }, |
| { |
| "epoch": 62.24951519069166, |
| "grad_norm": 1.5982434749603271, |
| "learning_rate": 0.001, |
| "loss": 0.9953, |
| "step": 192600 |
| }, |
| { |
| "epoch": 62.281835811247575, |
| "grad_norm": 1.6266897916793823, |
| "learning_rate": 0.001, |
| "loss": 1.0191, |
| "step": 192700 |
| }, |
| { |
| "epoch": 62.31415643180349, |
| "grad_norm": 1.8158379793167114, |
| "learning_rate": 0.001, |
| "loss": 1.0156, |
| "step": 192800 |
| }, |
| { |
| "epoch": 62.346477052359404, |
| "grad_norm": 1.6840484142303467, |
| "learning_rate": 0.001, |
| "loss": 1.0194, |
| "step": 192900 |
| }, |
| { |
| "epoch": 62.37879767291532, |
| "grad_norm": 1.6763032674789429, |
| "learning_rate": 0.001, |
| "loss": 1.0243, |
| "step": 193000 |
| }, |
| { |
| "epoch": 62.41111829347123, |
| "grad_norm": 2.340099334716797, |
| "learning_rate": 0.001, |
| "loss": 1.0222, |
| "step": 193100 |
| }, |
| { |
| "epoch": 62.44343891402715, |
| "grad_norm": 1.2376492023468018, |
| "learning_rate": 0.001, |
| "loss": 1.0288, |
| "step": 193200 |
| }, |
| { |
| "epoch": 62.47575953458306, |
| "grad_norm": 1.5578148365020752, |
| "learning_rate": 0.001, |
| "loss": 1.0442, |
| "step": 193300 |
| }, |
| { |
| "epoch": 62.50808015513898, |
| "grad_norm": 1.7190568447113037, |
| "learning_rate": 0.001, |
| "loss": 1.0338, |
| "step": 193400 |
| }, |
| { |
| "epoch": 62.54040077569489, |
| "grad_norm": 1.876619577407837, |
| "learning_rate": 0.001, |
| "loss": 1.0451, |
| "step": 193500 |
| }, |
| { |
| "epoch": 62.572721396250806, |
| "grad_norm": 1.5763674974441528, |
| "learning_rate": 0.001, |
| "loss": 1.0518, |
| "step": 193600 |
| }, |
| { |
| "epoch": 62.60504201680672, |
| "grad_norm": 1.3858126401901245, |
| "learning_rate": 0.001, |
| "loss": 1.0555, |
| "step": 193700 |
| }, |
| { |
| "epoch": 62.637362637362635, |
| "grad_norm": 1.5947778224945068, |
| "learning_rate": 0.001, |
| "loss": 1.0683, |
| "step": 193800 |
| }, |
| { |
| "epoch": 62.66968325791855, |
| "grad_norm": 1.6285808086395264, |
| "learning_rate": 0.001, |
| "loss": 1.0541, |
| "step": 193900 |
| }, |
| { |
| "epoch": 62.702003878474464, |
| "grad_norm": 1.5361659526824951, |
| "learning_rate": 0.001, |
| "loss": 1.0422, |
| "step": 194000 |
| }, |
| { |
| "epoch": 62.73432449903038, |
| "grad_norm": 1.534832239151001, |
| "learning_rate": 0.001, |
| "loss": 1.0602, |
| "step": 194100 |
| }, |
| { |
| "epoch": 62.76664511958629, |
| "grad_norm": 1.6349917650222778, |
| "learning_rate": 0.001, |
| "loss": 1.0666, |
| "step": 194200 |
| }, |
| { |
| "epoch": 62.79896574014221, |
| "grad_norm": 2.202749729156494, |
| "learning_rate": 0.001, |
| "loss": 1.0618, |
| "step": 194300 |
| }, |
| { |
| "epoch": 62.83128636069812, |
| "grad_norm": 1.2954192161560059, |
| "learning_rate": 0.001, |
| "loss": 1.0573, |
| "step": 194400 |
| }, |
| { |
| "epoch": 62.86360698125404, |
| "grad_norm": 1.7332080602645874, |
| "learning_rate": 0.001, |
| "loss": 1.0763, |
| "step": 194500 |
| }, |
| { |
| "epoch": 62.89592760180995, |
| "grad_norm": 1.8932256698608398, |
| "learning_rate": 0.001, |
| "loss": 1.0794, |
| "step": 194600 |
| }, |
| { |
| "epoch": 62.928248222365866, |
| "grad_norm": 1.3601816892623901, |
| "learning_rate": 0.001, |
| "loss": 1.0828, |
| "step": 194700 |
| }, |
| { |
| "epoch": 62.96056884292178, |
| "grad_norm": 1.5028172731399536, |
| "learning_rate": 0.001, |
| "loss": 1.0879, |
| "step": 194800 |
| }, |
| { |
| "epoch": 62.992889463477695, |
| "grad_norm": 1.466861367225647, |
| "learning_rate": 0.001, |
| "loss": 1.0798, |
| "step": 194900 |
| }, |
| { |
| "epoch": 63.02521008403362, |
| "grad_norm": 1.4932770729064941, |
| "learning_rate": 0.001, |
| "loss": 1.0139, |
| "step": 195000 |
| }, |
| { |
| "epoch": 63.05753070458953, |
| "grad_norm": 1.7395883798599243, |
| "learning_rate": 0.001, |
| "loss": 0.9744, |
| "step": 195100 |
| }, |
| { |
| "epoch": 63.089851325145446, |
| "grad_norm": 1.5448013544082642, |
| "learning_rate": 0.001, |
| "loss": 0.9972, |
| "step": 195200 |
| }, |
| { |
| "epoch": 63.12217194570136, |
| "grad_norm": 1.6690627336502075, |
| "learning_rate": 0.001, |
| "loss": 0.9984, |
| "step": 195300 |
| }, |
| { |
| "epoch": 63.154492566257275, |
| "grad_norm": 1.3744710683822632, |
| "learning_rate": 0.001, |
| "loss": 0.989, |
| "step": 195400 |
| }, |
| { |
| "epoch": 63.18681318681319, |
| "grad_norm": 1.4976576566696167, |
| "learning_rate": 0.001, |
| "loss": 1.0029, |
| "step": 195500 |
| }, |
| { |
| "epoch": 63.219133807369104, |
| "grad_norm": 1.7103939056396484, |
| "learning_rate": 0.001, |
| "loss": 1.0041, |
| "step": 195600 |
| }, |
| { |
| "epoch": 63.25145442792502, |
| "grad_norm": 1.535509467124939, |
| "learning_rate": 0.001, |
| "loss": 1.0122, |
| "step": 195700 |
| }, |
| { |
| "epoch": 63.28377504848093, |
| "grad_norm": 1.9888761043548584, |
| "learning_rate": 0.001, |
| "loss": 1.0109, |
| "step": 195800 |
| }, |
| { |
| "epoch": 63.31609566903685, |
| "grad_norm": 1.6202621459960938, |
| "learning_rate": 0.001, |
| "loss": 1.0244, |
| "step": 195900 |
| }, |
| { |
| "epoch": 63.34841628959276, |
| "grad_norm": 1.4541372060775757, |
| "learning_rate": 0.001, |
| "loss": 1.0023, |
| "step": 196000 |
| }, |
| { |
| "epoch": 63.38073691014868, |
| "grad_norm": 1.5561103820800781, |
| "learning_rate": 0.001, |
| "loss": 1.0055, |
| "step": 196100 |
| }, |
| { |
| "epoch": 63.41305753070459, |
| "grad_norm": 1.6799334287643433, |
| "learning_rate": 0.001, |
| "loss": 1.0261, |
| "step": 196200 |
| }, |
| { |
| "epoch": 63.445378151260506, |
| "grad_norm": 1.6529778242111206, |
| "learning_rate": 0.001, |
| "loss": 1.0308, |
| "step": 196300 |
| }, |
| { |
| "epoch": 63.47769877181642, |
| "grad_norm": 1.3067463636398315, |
| "learning_rate": 0.001, |
| "loss": 1.018, |
| "step": 196400 |
| }, |
| { |
| "epoch": 63.510019392372335, |
| "grad_norm": 1.8982113599777222, |
| "learning_rate": 0.001, |
| "loss": 1.0239, |
| "step": 196500 |
| }, |
| { |
| "epoch": 63.54234001292825, |
| "grad_norm": 1.3777034282684326, |
| "learning_rate": 0.001, |
| "loss": 1.0361, |
| "step": 196600 |
| }, |
| { |
| "epoch": 63.574660633484164, |
| "grad_norm": 1.6532855033874512, |
| "learning_rate": 0.001, |
| "loss": 1.0291, |
| "step": 196700 |
| }, |
| { |
| "epoch": 63.60698125404008, |
| "grad_norm": 1.9859448671340942, |
| "learning_rate": 0.001, |
| "loss": 1.0361, |
| "step": 196800 |
| }, |
| { |
| "epoch": 63.63930187459599, |
| "grad_norm": 1.6853806972503662, |
| "learning_rate": 0.001, |
| "loss": 1.0609, |
| "step": 196900 |
| }, |
| { |
| "epoch": 63.67162249515191, |
| "grad_norm": 1.7648768424987793, |
| "learning_rate": 0.001, |
| "loss": 1.0455, |
| "step": 197000 |
| }, |
| { |
| "epoch": 63.70394311570782, |
| "grad_norm": 1.4905048608779907, |
| "learning_rate": 0.001, |
| "loss": 1.0397, |
| "step": 197100 |
| }, |
| { |
| "epoch": 63.73626373626374, |
| "grad_norm": 1.4467679262161255, |
| "learning_rate": 0.001, |
| "loss": 1.0418, |
| "step": 197200 |
| }, |
| { |
| "epoch": 63.76858435681965, |
| "grad_norm": 1.4468274116516113, |
| "learning_rate": 0.001, |
| "loss": 1.069, |
| "step": 197300 |
| }, |
| { |
| "epoch": 63.800904977375566, |
| "grad_norm": 1.6732083559036255, |
| "learning_rate": 0.001, |
| "loss": 1.0555, |
| "step": 197400 |
| }, |
| { |
| "epoch": 63.83322559793148, |
| "grad_norm": 1.6796603202819824, |
| "learning_rate": 0.001, |
| "loss": 1.0448, |
| "step": 197500 |
| }, |
| { |
| "epoch": 63.865546218487395, |
| "grad_norm": 1.566683053970337, |
| "learning_rate": 0.001, |
| "loss": 1.073, |
| "step": 197600 |
| }, |
| { |
| "epoch": 63.89786683904331, |
| "grad_norm": 1.954232931137085, |
| "learning_rate": 0.001, |
| "loss": 1.0571, |
| "step": 197700 |
| }, |
| { |
| "epoch": 63.930187459599225, |
| "grad_norm": 1.7381691932678223, |
| "learning_rate": 0.001, |
| "loss": 1.0625, |
| "step": 197800 |
| }, |
| { |
| "epoch": 63.96250808015514, |
| "grad_norm": 1.313048005104065, |
| "learning_rate": 0.001, |
| "loss": 1.0704, |
| "step": 197900 |
| }, |
| { |
| "epoch": 63.994828700711054, |
| "grad_norm": 1.5494202375411987, |
| "learning_rate": 0.001, |
| "loss": 1.0526, |
| "step": 198000 |
| }, |
| { |
| "epoch": 64.02714932126698, |
| "grad_norm": 1.574831247329712, |
| "learning_rate": 0.001, |
| "loss": 1.0078, |
| "step": 198100 |
| }, |
| { |
| "epoch": 64.05946994182288, |
| "grad_norm": 1.8430677652359009, |
| "learning_rate": 0.001, |
| "loss": 0.9663, |
| "step": 198200 |
| }, |
| { |
| "epoch": 64.0917905623788, |
| "grad_norm": 1.352602243423462, |
| "learning_rate": 0.001, |
| "loss": 0.9762, |
| "step": 198300 |
| }, |
| { |
| "epoch": 64.12411118293471, |
| "grad_norm": 17.603384017944336, |
| "learning_rate": 0.001, |
| "loss": 0.9659, |
| "step": 198400 |
| }, |
| { |
| "epoch": 64.15643180349063, |
| "grad_norm": 1.5747212171554565, |
| "learning_rate": 0.001, |
| "loss": 0.9795, |
| "step": 198500 |
| }, |
| { |
| "epoch": 64.18875242404654, |
| "grad_norm": 1.5215107202529907, |
| "learning_rate": 0.001, |
| "loss": 0.9927, |
| "step": 198600 |
| }, |
| { |
| "epoch": 64.22107304460246, |
| "grad_norm": 1.9847549200057983, |
| "learning_rate": 0.001, |
| "loss": 0.9911, |
| "step": 198700 |
| }, |
| { |
| "epoch": 64.25339366515837, |
| "grad_norm": 1.9193583726882935, |
| "learning_rate": 0.001, |
| "loss": 0.9902, |
| "step": 198800 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 2.0221829414367676, |
| "learning_rate": 0.001, |
| "loss": 1.0085, |
| "step": 198900 |
| }, |
| { |
| "epoch": 64.3180349062702, |
| "grad_norm": 1.7887248992919922, |
| "learning_rate": 0.001, |
| "loss": 1.0029, |
| "step": 199000 |
| }, |
| { |
| "epoch": 64.35035552682612, |
| "grad_norm": 1.7705152034759521, |
| "learning_rate": 0.001, |
| "loss": 1.0082, |
| "step": 199100 |
| }, |
| { |
| "epoch": 64.38267614738203, |
| "grad_norm": 1.9016677141189575, |
| "learning_rate": 0.001, |
| "loss": 1.0053, |
| "step": 199200 |
| }, |
| { |
| "epoch": 64.41499676793795, |
| "grad_norm": 1.694682240486145, |
| "learning_rate": 0.001, |
| "loss": 0.9946, |
| "step": 199300 |
| }, |
| { |
| "epoch": 64.44731738849386, |
| "grad_norm": 1.935779333114624, |
| "learning_rate": 0.001, |
| "loss": 1.0154, |
| "step": 199400 |
| }, |
| { |
| "epoch": 64.47963800904978, |
| "grad_norm": 1.625313639640808, |
| "learning_rate": 0.001, |
| "loss": 1.0126, |
| "step": 199500 |
| }, |
| { |
| "epoch": 64.51195862960569, |
| "grad_norm": 1.7697724103927612, |
| "learning_rate": 0.001, |
| "loss": 1.0334, |
| "step": 199600 |
| }, |
| { |
| "epoch": 64.54427925016161, |
| "grad_norm": 2.2513742446899414, |
| "learning_rate": 0.001, |
| "loss": 1.0362, |
| "step": 199700 |
| }, |
| { |
| "epoch": 64.57659987071752, |
| "grad_norm": 1.630536437034607, |
| "learning_rate": 0.001, |
| "loss": 1.0259, |
| "step": 199800 |
| }, |
| { |
| "epoch": 64.60892049127344, |
| "grad_norm": 1.9813055992126465, |
| "learning_rate": 0.001, |
| "loss": 1.0455, |
| "step": 199900 |
| }, |
| { |
| "epoch": 64.64124111182934, |
| "grad_norm": 1.6457490921020508, |
| "learning_rate": 0.001, |
| "loss": 1.0369, |
| "step": 200000 |
| }, |
| { |
| "epoch": 64.67356173238527, |
| "grad_norm": 1.7242528200149536, |
| "learning_rate": 0.001, |
| "loss": 1.028, |
| "step": 200100 |
| }, |
| { |
| "epoch": 64.70588235294117, |
| "grad_norm": 1.8536429405212402, |
| "learning_rate": 0.001, |
| "loss": 1.052, |
| "step": 200200 |
| }, |
| { |
| "epoch": 64.7382029734971, |
| "grad_norm": 2.031980037689209, |
| "learning_rate": 0.001, |
| "loss": 1.0352, |
| "step": 200300 |
| }, |
| { |
| "epoch": 64.770523594053, |
| "grad_norm": 2.1543500423431396, |
| "learning_rate": 0.001, |
| "loss": 1.0238, |
| "step": 200400 |
| }, |
| { |
| "epoch": 64.80284421460892, |
| "grad_norm": 1.4823232889175415, |
| "learning_rate": 0.001, |
| "loss": 1.0573, |
| "step": 200500 |
| }, |
| { |
| "epoch": 64.83516483516483, |
| "grad_norm": 1.7782244682312012, |
| "learning_rate": 0.001, |
| "loss": 1.0476, |
| "step": 200600 |
| }, |
| { |
| "epoch": 64.86748545572075, |
| "grad_norm": 2.004187822341919, |
| "learning_rate": 0.001, |
| "loss": 1.0579, |
| "step": 200700 |
| }, |
| { |
| "epoch": 64.89980607627666, |
| "grad_norm": 1.5076520442962646, |
| "learning_rate": 0.001, |
| "loss": 1.055, |
| "step": 200800 |
| }, |
| { |
| "epoch": 64.93212669683258, |
| "grad_norm": 2.1601130962371826, |
| "learning_rate": 0.001, |
| "loss": 1.0416, |
| "step": 200900 |
| }, |
| { |
| "epoch": 64.96444731738849, |
| "grad_norm": 1.9222103357315063, |
| "learning_rate": 0.001, |
| "loss": 1.0593, |
| "step": 201000 |
| }, |
| { |
| "epoch": 64.99676793794441, |
| "grad_norm": 2.148522138595581, |
| "learning_rate": 0.001, |
| "loss": 1.0256, |
| "step": 201100 |
| }, |
| { |
| "epoch": 65.02908855850032, |
| "grad_norm": 1.4371371269226074, |
| "learning_rate": 0.001, |
| "loss": 0.9674, |
| "step": 201200 |
| }, |
| { |
| "epoch": 65.06140917905624, |
| "grad_norm": 1.54662024974823, |
| "learning_rate": 0.001, |
| "loss": 0.9374, |
| "step": 201300 |
| }, |
| { |
| "epoch": 65.09372979961215, |
| "grad_norm": 1.464662790298462, |
| "learning_rate": 0.001, |
| "loss": 0.9625, |
| "step": 201400 |
| }, |
| { |
| "epoch": 65.12605042016807, |
| "grad_norm": 1.7494497299194336, |
| "learning_rate": 0.001, |
| "loss": 0.9913, |
| "step": 201500 |
| }, |
| { |
| "epoch": 65.15837104072398, |
| "grad_norm": 1.547864317893982, |
| "learning_rate": 0.001, |
| "loss": 0.9913, |
| "step": 201600 |
| }, |
| { |
| "epoch": 65.1906916612799, |
| "grad_norm": 1.4280509948730469, |
| "learning_rate": 0.001, |
| "loss": 0.9887, |
| "step": 201700 |
| }, |
| { |
| "epoch": 65.2230122818358, |
| "grad_norm": 1.948360800743103, |
| "learning_rate": 0.001, |
| "loss": 0.9843, |
| "step": 201800 |
| }, |
| { |
| "epoch": 65.25533290239173, |
| "grad_norm": 1.9437856674194336, |
| "learning_rate": 0.001, |
| "loss": 0.9906, |
| "step": 201900 |
| }, |
| { |
| "epoch": 65.28765352294764, |
| "grad_norm": 2.2211198806762695, |
| "learning_rate": 0.001, |
| "loss": 0.9781, |
| "step": 202000 |
| }, |
| { |
| "epoch": 65.31997414350356, |
| "grad_norm": 1.444524884223938, |
| "learning_rate": 0.001, |
| "loss": 1.0022, |
| "step": 202100 |
| }, |
| { |
| "epoch": 65.35229476405947, |
| "grad_norm": 1.7573517560958862, |
| "learning_rate": 0.001, |
| "loss": 1.0147, |
| "step": 202200 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 1.4979794025421143, |
| "learning_rate": 0.001, |
| "loss": 0.9888, |
| "step": 202300 |
| }, |
| { |
| "epoch": 65.4169360051713, |
| "grad_norm": 1.5916309356689453, |
| "learning_rate": 0.001, |
| "loss": 0.982, |
| "step": 202400 |
| }, |
| { |
| "epoch": 65.44925662572722, |
| "grad_norm": 1.95499849319458, |
| "learning_rate": 0.001, |
| "loss": 1.0023, |
| "step": 202500 |
| }, |
| { |
| "epoch": 65.48157724628312, |
| "grad_norm": 1.6247013807296753, |
| "learning_rate": 0.001, |
| "loss": 0.9973, |
| "step": 202600 |
| }, |
| { |
| "epoch": 65.51389786683905, |
| "grad_norm": 1.7667204141616821, |
| "learning_rate": 0.001, |
| "loss": 1.0227, |
| "step": 202700 |
| }, |
| { |
| "epoch": 65.54621848739495, |
| "grad_norm": 1.7284674644470215, |
| "learning_rate": 0.001, |
| "loss": 1.0197, |
| "step": 202800 |
| }, |
| { |
| "epoch": 65.57853910795087, |
| "grad_norm": 1.888560175895691, |
| "learning_rate": 0.001, |
| "loss": 1.028, |
| "step": 202900 |
| }, |
| { |
| "epoch": 65.61085972850678, |
| "grad_norm": 1.4070155620574951, |
| "learning_rate": 0.001, |
| "loss": 1.039, |
| "step": 203000 |
| }, |
| { |
| "epoch": 65.6431803490627, |
| "grad_norm": 2.212310552597046, |
| "learning_rate": 0.001, |
| "loss": 1.0261, |
| "step": 203100 |
| }, |
| { |
| "epoch": 65.67550096961861, |
| "grad_norm": 2.192976474761963, |
| "learning_rate": 0.001, |
| "loss": 1.0353, |
| "step": 203200 |
| }, |
| { |
| "epoch": 65.70782159017453, |
| "grad_norm": 1.7333697080612183, |
| "learning_rate": 0.001, |
| "loss": 1.0293, |
| "step": 203300 |
| }, |
| { |
| "epoch": 65.74014221073044, |
| "grad_norm": 1.5100023746490479, |
| "learning_rate": 0.001, |
| "loss": 1.0205, |
| "step": 203400 |
| }, |
| { |
| "epoch": 65.77246283128636, |
| "grad_norm": 2.0148110389709473, |
| "learning_rate": 0.001, |
| "loss": 1.027, |
| "step": 203500 |
| }, |
| { |
| "epoch": 65.80478345184227, |
| "grad_norm": 1.6597840785980225, |
| "learning_rate": 0.001, |
| "loss": 1.0248, |
| "step": 203600 |
| }, |
| { |
| "epoch": 65.83710407239819, |
| "grad_norm": 1.6659585237503052, |
| "learning_rate": 0.001, |
| "loss": 1.0414, |
| "step": 203700 |
| }, |
| { |
| "epoch": 65.8694246929541, |
| "grad_norm": 1.5771119594573975, |
| "learning_rate": 0.001, |
| "loss": 1.0326, |
| "step": 203800 |
| }, |
| { |
| "epoch": 65.90174531351002, |
| "grad_norm": 1.8948572874069214, |
| "learning_rate": 0.001, |
| "loss": 1.0511, |
| "step": 203900 |
| }, |
| { |
| "epoch": 65.93406593406593, |
| "grad_norm": 1.6274173259735107, |
| "learning_rate": 0.001, |
| "loss": 1.0555, |
| "step": 204000 |
| }, |
| { |
| "epoch": 65.96638655462185, |
| "grad_norm": 1.5236989259719849, |
| "learning_rate": 0.001, |
| "loss": 1.0338, |
| "step": 204100 |
| }, |
| { |
| "epoch": 65.99870717517777, |
| "grad_norm": 1.388321042060852, |
| "learning_rate": 0.001, |
| "loss": 1.018, |
| "step": 204200 |
| }, |
| { |
| "epoch": 66.03102779573368, |
| "grad_norm": 1.1837842464447021, |
| "learning_rate": 0.001, |
| "loss": 0.9499, |
| "step": 204300 |
| }, |
| { |
| "epoch": 66.0633484162896, |
| "grad_norm": 1.6892098188400269, |
| "learning_rate": 0.001, |
| "loss": 0.9493, |
| "step": 204400 |
| }, |
| { |
| "epoch": 66.0956690368455, |
| "grad_norm": 1.2390347719192505, |
| "learning_rate": 0.001, |
| "loss": 0.9732, |
| "step": 204500 |
| }, |
| { |
| "epoch": 66.12798965740143, |
| "grad_norm": 1.2077933549880981, |
| "learning_rate": 0.001, |
| "loss": 0.9703, |
| "step": 204600 |
| }, |
| { |
| "epoch": 66.16031027795734, |
| "grad_norm": 1.7999850511550903, |
| "learning_rate": 0.001, |
| "loss": 0.968, |
| "step": 204700 |
| }, |
| { |
| "epoch": 66.19263089851326, |
| "grad_norm": 1.8721860647201538, |
| "learning_rate": 0.001, |
| "loss": 0.9882, |
| "step": 204800 |
| }, |
| { |
| "epoch": 66.22495151906917, |
| "grad_norm": 1.4129645824432373, |
| "learning_rate": 0.001, |
| "loss": 0.9709, |
| "step": 204900 |
| }, |
| { |
| "epoch": 66.25727213962509, |
| "grad_norm": 1.5184195041656494, |
| "learning_rate": 0.001, |
| "loss": 0.9852, |
| "step": 205000 |
| }, |
| { |
| "epoch": 66.289592760181, |
| "grad_norm": 1.9621903896331787, |
| "learning_rate": 0.001, |
| "loss": 0.971, |
| "step": 205100 |
| }, |
| { |
| "epoch": 66.32191338073692, |
| "grad_norm": 1.714373230934143, |
| "learning_rate": 0.001, |
| "loss": 0.9942, |
| "step": 205200 |
| }, |
| { |
| "epoch": 66.35423400129282, |
| "grad_norm": 1.475232720375061, |
| "learning_rate": 0.001, |
| "loss": 0.987, |
| "step": 205300 |
| }, |
| { |
| "epoch": 66.38655462184875, |
| "grad_norm": 1.7224111557006836, |
| "learning_rate": 0.001, |
| "loss": 0.9769, |
| "step": 205400 |
| }, |
| { |
| "epoch": 66.41887524240465, |
| "grad_norm": 1.6576015949249268, |
| "learning_rate": 0.001, |
| "loss": 0.998, |
| "step": 205500 |
| }, |
| { |
| "epoch": 66.45119586296057, |
| "grad_norm": 1.739248275756836, |
| "learning_rate": 0.001, |
| "loss": 1.0116, |
| "step": 205600 |
| }, |
| { |
| "epoch": 66.48351648351648, |
| "grad_norm": 1.4287973642349243, |
| "learning_rate": 0.001, |
| "loss": 0.9823, |
| "step": 205700 |
| }, |
| { |
| "epoch": 66.5158371040724, |
| "grad_norm": 2.0759823322296143, |
| "learning_rate": 0.001, |
| "loss": 0.9991, |
| "step": 205800 |
| }, |
| { |
| "epoch": 66.54815772462831, |
| "grad_norm": 1.8355493545532227, |
| "learning_rate": 0.001, |
| "loss": 1.0104, |
| "step": 205900 |
| }, |
| { |
| "epoch": 66.58047834518423, |
| "grad_norm": 2.031424045562744, |
| "learning_rate": 0.001, |
| "loss": 1.0043, |
| "step": 206000 |
| }, |
| { |
| "epoch": 66.61279896574014, |
| "grad_norm": 1.8214043378829956, |
| "learning_rate": 0.001, |
| "loss": 1.0088, |
| "step": 206100 |
| }, |
| { |
| "epoch": 66.64511958629606, |
| "grad_norm": 1.7729460000991821, |
| "learning_rate": 0.001, |
| "loss": 1.0072, |
| "step": 206200 |
| }, |
| { |
| "epoch": 66.67744020685197, |
| "grad_norm": 1.6359024047851562, |
| "learning_rate": 0.001, |
| "loss": 1.0232, |
| "step": 206300 |
| }, |
| { |
| "epoch": 66.70976082740789, |
| "grad_norm": 1.8432180881500244, |
| "learning_rate": 0.001, |
| "loss": 1.0086, |
| "step": 206400 |
| }, |
| { |
| "epoch": 66.7420814479638, |
| "grad_norm": 1.3775078058242798, |
| "learning_rate": 0.001, |
| "loss": 1.0241, |
| "step": 206500 |
| }, |
| { |
| "epoch": 66.77440206851972, |
| "grad_norm": 1.574852705001831, |
| "learning_rate": 0.001, |
| "loss": 1.0194, |
| "step": 206600 |
| }, |
| { |
| "epoch": 66.80672268907563, |
| "grad_norm": 1.415818691253662, |
| "learning_rate": 0.001, |
| "loss": 1.0053, |
| "step": 206700 |
| }, |
| { |
| "epoch": 66.83904330963155, |
| "grad_norm": 1.3461685180664062, |
| "learning_rate": 0.001, |
| "loss": 1.0263, |
| "step": 206800 |
| }, |
| { |
| "epoch": 66.87136393018746, |
| "grad_norm": 1.530918836593628, |
| "learning_rate": 0.001, |
| "loss": 1.0466, |
| "step": 206900 |
| }, |
| { |
| "epoch": 66.90368455074338, |
| "grad_norm": 1.2895759344100952, |
| "learning_rate": 0.001, |
| "loss": 1.0355, |
| "step": 207000 |
| }, |
| { |
| "epoch": 66.93600517129929, |
| "grad_norm": 1.4702177047729492, |
| "learning_rate": 0.001, |
| "loss": 1.0289, |
| "step": 207100 |
| }, |
| { |
| "epoch": 66.96832579185521, |
| "grad_norm": 1.5006325244903564, |
| "learning_rate": 0.001, |
| "loss": 1.0585, |
| "step": 207200 |
| }, |
| { |
| "epoch": 67.00064641241111, |
| "grad_norm": 2.1274211406707764, |
| "learning_rate": 0.001, |
| "loss": 1.016, |
| "step": 207300 |
| }, |
| { |
| "epoch": 67.03296703296704, |
| "grad_norm": 1.4935500621795654, |
| "learning_rate": 0.001, |
| "loss": 0.9389, |
| "step": 207400 |
| }, |
| { |
| "epoch": 67.06528765352294, |
| "grad_norm": 1.7395673990249634, |
| "learning_rate": 0.001, |
| "loss": 0.9416, |
| "step": 207500 |
| }, |
| { |
| "epoch": 67.09760827407887, |
| "grad_norm": 2.2219367027282715, |
| "learning_rate": 0.001, |
| "loss": 0.9603, |
| "step": 207600 |
| }, |
| { |
| "epoch": 67.12992889463477, |
| "grad_norm": 2.3106393814086914, |
| "learning_rate": 0.001, |
| "loss": 0.9557, |
| "step": 207700 |
| }, |
| { |
| "epoch": 67.1622495151907, |
| "grad_norm": 2.637923002243042, |
| "learning_rate": 0.001, |
| "loss": 0.9448, |
| "step": 207800 |
| }, |
| { |
| "epoch": 67.1945701357466, |
| "grad_norm": 1.9894449710845947, |
| "learning_rate": 0.001, |
| "loss": 0.9693, |
| "step": 207900 |
| }, |
| { |
| "epoch": 67.22689075630252, |
| "grad_norm": 2.0248193740844727, |
| "learning_rate": 0.001, |
| "loss": 0.9842, |
| "step": 208000 |
| }, |
| { |
| "epoch": 67.25921137685843, |
| "grad_norm": 2.42965030670166, |
| "learning_rate": 0.001, |
| "loss": 0.954, |
| "step": 208100 |
| }, |
| { |
| "epoch": 67.29153199741435, |
| "grad_norm": 2.4134888648986816, |
| "learning_rate": 0.001, |
| "loss": 0.9746, |
| "step": 208200 |
| }, |
| { |
| "epoch": 67.32385261797026, |
| "grad_norm": 2.5889575481414795, |
| "learning_rate": 0.001, |
| "loss": 0.9787, |
| "step": 208300 |
| }, |
| { |
| "epoch": 67.35617323852618, |
| "grad_norm": 2.4583373069763184, |
| "learning_rate": 0.001, |
| "loss": 0.977, |
| "step": 208400 |
| }, |
| { |
| "epoch": 67.38849385908209, |
| "grad_norm": 1.9895371198654175, |
| "learning_rate": 0.001, |
| "loss": 0.981, |
| "step": 208500 |
| }, |
| { |
| "epoch": 67.42081447963801, |
| "grad_norm": 2.2331972122192383, |
| "learning_rate": 0.001, |
| "loss": 1.005, |
| "step": 208600 |
| }, |
| { |
| "epoch": 67.45313510019392, |
| "grad_norm": 1.9639379978179932, |
| "learning_rate": 0.001, |
| "loss": 0.9807, |
| "step": 208700 |
| }, |
| { |
| "epoch": 67.48545572074984, |
| "grad_norm": 1.7588427066802979, |
| "learning_rate": 0.001, |
| "loss": 1.001, |
| "step": 208800 |
| }, |
| { |
| "epoch": 67.51777634130575, |
| "grad_norm": 2.256831169128418, |
| "learning_rate": 0.001, |
| "loss": 1.0069, |
| "step": 208900 |
| }, |
| { |
| "epoch": 67.55009696186167, |
| "grad_norm": 1.9242079257965088, |
| "learning_rate": 0.001, |
| "loss": 0.9981, |
| "step": 209000 |
| }, |
| { |
| "epoch": 67.58241758241758, |
| "grad_norm": 2.6344449520111084, |
| "learning_rate": 0.001, |
| "loss": 1.0119, |
| "step": 209100 |
| }, |
| { |
| "epoch": 67.6147382029735, |
| "grad_norm": 2.7711076736450195, |
| "learning_rate": 0.001, |
| "loss": 1.0018, |
| "step": 209200 |
| }, |
| { |
| "epoch": 67.6470588235294, |
| "grad_norm": 1.9800853729248047, |
| "learning_rate": 0.001, |
| "loss": 0.9975, |
| "step": 209300 |
| }, |
| { |
| "epoch": 67.67937944408533, |
| "grad_norm": 1.8756169080734253, |
| "learning_rate": 0.001, |
| "loss": 1.0123, |
| "step": 209400 |
| }, |
| { |
| "epoch": 67.71170006464124, |
| "grad_norm": 3.0076963901519775, |
| "learning_rate": 0.001, |
| "loss": 1.0041, |
| "step": 209500 |
| }, |
| { |
| "epoch": 67.74402068519716, |
| "grad_norm": 1.6423379182815552, |
| "learning_rate": 0.001, |
| "loss": 1.0102, |
| "step": 209600 |
| }, |
| { |
| "epoch": 67.77634130575306, |
| "grad_norm": 2.183377504348755, |
| "learning_rate": 0.001, |
| "loss": 1.025, |
| "step": 209700 |
| }, |
| { |
| "epoch": 67.80866192630899, |
| "grad_norm": 3.2337539196014404, |
| "learning_rate": 0.001, |
| "loss": 1.0051, |
| "step": 209800 |
| }, |
| { |
| "epoch": 67.8409825468649, |
| "grad_norm": 1.9558117389678955, |
| "learning_rate": 0.001, |
| "loss": 1.0139, |
| "step": 209900 |
| }, |
| { |
| "epoch": 67.87330316742081, |
| "grad_norm": 2.0811750888824463, |
| "learning_rate": 0.001, |
| "loss": 1.043, |
| "step": 210000 |
| }, |
| { |
| "epoch": 67.90562378797672, |
| "grad_norm": 2.108379602432251, |
| "learning_rate": 0.001, |
| "loss": 1.0259, |
| "step": 210100 |
| }, |
| { |
| "epoch": 67.93794440853264, |
| "grad_norm": 2.1077399253845215, |
| "learning_rate": 0.001, |
| "loss": 1.0235, |
| "step": 210200 |
| }, |
| { |
| "epoch": 67.97026502908855, |
| "grad_norm": 1.871891975402832, |
| "learning_rate": 0.001, |
| "loss": 1.0415, |
| "step": 210300 |
| }, |
| { |
| "epoch": 68.00258564964447, |
| "grad_norm": 1.6436115503311157, |
| "learning_rate": 0.001, |
| "loss": 1.0467, |
| "step": 210400 |
| }, |
| { |
| "epoch": 68.0349062702004, |
| "grad_norm": 1.6314891576766968, |
| "learning_rate": 0.001, |
| "loss": 0.9279, |
| "step": 210500 |
| }, |
| { |
| "epoch": 68.0672268907563, |
| "grad_norm": 1.5454355478286743, |
| "learning_rate": 0.001, |
| "loss": 0.9457, |
| "step": 210600 |
| }, |
| { |
| "epoch": 68.09954751131222, |
| "grad_norm": 1.8596247434616089, |
| "learning_rate": 0.001, |
| "loss": 0.9512, |
| "step": 210700 |
| }, |
| { |
| "epoch": 68.13186813186813, |
| "grad_norm": 1.685611367225647, |
| "learning_rate": 0.001, |
| "loss": 0.9767, |
| "step": 210800 |
| }, |
| { |
| "epoch": 68.16418875242405, |
| "grad_norm": 1.754593849182129, |
| "learning_rate": 0.001, |
| "loss": 0.9568, |
| "step": 210900 |
| }, |
| { |
| "epoch": 68.19650937297996, |
| "grad_norm": 1.898842453956604, |
| "learning_rate": 0.001, |
| "loss": 0.9628, |
| "step": 211000 |
| }, |
| { |
| "epoch": 68.22882999353588, |
| "grad_norm": 2.4915871620178223, |
| "learning_rate": 0.001, |
| "loss": 0.9551, |
| "step": 211100 |
| }, |
| { |
| "epoch": 68.26115061409179, |
| "grad_norm": 1.8018248081207275, |
| "learning_rate": 0.001, |
| "loss": 0.9756, |
| "step": 211200 |
| }, |
| { |
| "epoch": 68.29347123464771, |
| "grad_norm": 1.7845089435577393, |
| "learning_rate": 0.001, |
| "loss": 0.9681, |
| "step": 211300 |
| }, |
| { |
| "epoch": 68.32579185520362, |
| "grad_norm": 1.6771483421325684, |
| "learning_rate": 0.001, |
| "loss": 0.9567, |
| "step": 211400 |
| }, |
| { |
| "epoch": 68.35811247575954, |
| "grad_norm": 1.7854185104370117, |
| "learning_rate": 0.001, |
| "loss": 0.9724, |
| "step": 211500 |
| }, |
| { |
| "epoch": 68.39043309631545, |
| "grad_norm": 3.660853147506714, |
| "learning_rate": 0.001, |
| "loss": 0.981, |
| "step": 211600 |
| }, |
| { |
| "epoch": 68.42275371687137, |
| "grad_norm": 1.621956706047058, |
| "learning_rate": 0.001, |
| "loss": 0.9833, |
| "step": 211700 |
| }, |
| { |
| "epoch": 68.45507433742728, |
| "grad_norm": 1.5680972337722778, |
| "learning_rate": 0.001, |
| "loss": 0.9766, |
| "step": 211800 |
| }, |
| { |
| "epoch": 68.4873949579832, |
| "grad_norm": 1.4635026454925537, |
| "learning_rate": 0.001, |
| "loss": 0.986, |
| "step": 211900 |
| }, |
| { |
| "epoch": 68.5197155785391, |
| "grad_norm": 1.9271141290664673, |
| "learning_rate": 0.001, |
| "loss": 0.9871, |
| "step": 212000 |
| }, |
| { |
| "epoch": 68.55203619909503, |
| "grad_norm": 1.8947614431381226, |
| "learning_rate": 0.001, |
| "loss": 0.9893, |
| "step": 212100 |
| }, |
| { |
| "epoch": 68.58435681965094, |
| "grad_norm": 1.6011998653411865, |
| "learning_rate": 0.001, |
| "loss": 0.9805, |
| "step": 212200 |
| }, |
| { |
| "epoch": 68.61667744020686, |
| "grad_norm": 1.5570119619369507, |
| "learning_rate": 0.001, |
| "loss": 1.0029, |
| "step": 212300 |
| }, |
| { |
| "epoch": 68.64899806076276, |
| "grad_norm": 1.8010640144348145, |
| "learning_rate": 0.001, |
| "loss": 0.9896, |
| "step": 212400 |
| }, |
| { |
| "epoch": 68.68131868131869, |
| "grad_norm": 1.5905638933181763, |
| "learning_rate": 0.001, |
| "loss": 0.9957, |
| "step": 212500 |
| }, |
| { |
| "epoch": 68.7136393018746, |
| "grad_norm": 1.521748423576355, |
| "learning_rate": 0.001, |
| "loss": 0.9887, |
| "step": 212600 |
| }, |
| { |
| "epoch": 68.74595992243052, |
| "grad_norm": 2.2083487510681152, |
| "learning_rate": 0.001, |
| "loss": 1.002, |
| "step": 212700 |
| }, |
| { |
| "epoch": 68.77828054298642, |
| "grad_norm": 1.520521879196167, |
| "learning_rate": 0.001, |
| "loss": 0.9973, |
| "step": 212800 |
| }, |
| { |
| "epoch": 68.81060116354234, |
| "grad_norm": 2.090383291244507, |
| "learning_rate": 0.001, |
| "loss": 1.0091, |
| "step": 212900 |
| }, |
| { |
| "epoch": 68.84292178409825, |
| "grad_norm": 1.6483055353164673, |
| "learning_rate": 0.001, |
| "loss": 1.0292, |
| "step": 213000 |
| }, |
| { |
| "epoch": 68.87524240465417, |
| "grad_norm": 1.4829121828079224, |
| "learning_rate": 0.001, |
| "loss": 1.03, |
| "step": 213100 |
| }, |
| { |
| "epoch": 68.90756302521008, |
| "grad_norm": 1.6526368856430054, |
| "learning_rate": 0.001, |
| "loss": 1.0217, |
| "step": 213200 |
| }, |
| { |
| "epoch": 68.939883645766, |
| "grad_norm": 1.554659128189087, |
| "learning_rate": 0.001, |
| "loss": 1.0293, |
| "step": 213300 |
| }, |
| { |
| "epoch": 68.97220426632191, |
| "grad_norm": 1.735242486000061, |
| "learning_rate": 0.001, |
| "loss": 1.0124, |
| "step": 213400 |
| }, |
| { |
| "epoch": 69.00452488687783, |
| "grad_norm": 1.4963144063949585, |
| "learning_rate": 0.001, |
| "loss": 1.0175, |
| "step": 213500 |
| }, |
| { |
| "epoch": 69.03684550743374, |
| "grad_norm": 1.603499412536621, |
| "learning_rate": 0.001, |
| "loss": 0.9341, |
| "step": 213600 |
| }, |
| { |
| "epoch": 69.06916612798966, |
| "grad_norm": 1.6510496139526367, |
| "learning_rate": 0.001, |
| "loss": 0.9354, |
| "step": 213700 |
| }, |
| { |
| "epoch": 69.10148674854557, |
| "grad_norm": 1.66983962059021, |
| "learning_rate": 0.001, |
| "loss": 0.9496, |
| "step": 213800 |
| }, |
| { |
| "epoch": 69.13380736910149, |
| "grad_norm": 1.7199987173080444, |
| "learning_rate": 0.001, |
| "loss": 0.9496, |
| "step": 213900 |
| }, |
| { |
| "epoch": 69.1661279896574, |
| "grad_norm": 1.773880124092102, |
| "learning_rate": 0.001, |
| "loss": 0.949, |
| "step": 214000 |
| }, |
| { |
| "epoch": 69.19844861021332, |
| "grad_norm": 1.6391410827636719, |
| "learning_rate": 0.001, |
| "loss": 0.9458, |
| "step": 214100 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 1.5789082050323486, |
| "learning_rate": 0.001, |
| "loss": 0.9601, |
| "step": 214200 |
| }, |
| { |
| "epoch": 69.26308985132515, |
| "grad_norm": 1.9064000844955444, |
| "learning_rate": 0.001, |
| "loss": 0.9513, |
| "step": 214300 |
| }, |
| { |
| "epoch": 69.29541047188106, |
| "grad_norm": 1.6485440731048584, |
| "learning_rate": 0.001, |
| "loss": 0.9489, |
| "step": 214400 |
| }, |
| { |
| "epoch": 69.32773109243698, |
| "grad_norm": 1.4657033681869507, |
| "learning_rate": 0.001, |
| "loss": 0.9752, |
| "step": 214500 |
| }, |
| { |
| "epoch": 69.36005171299288, |
| "grad_norm": 1.8752721548080444, |
| "learning_rate": 0.001, |
| "loss": 0.9484, |
| "step": 214600 |
| }, |
| { |
| "epoch": 69.3923723335488, |
| "grad_norm": 1.4221304655075073, |
| "learning_rate": 0.001, |
| "loss": 0.9518, |
| "step": 214700 |
| }, |
| { |
| "epoch": 69.42469295410471, |
| "grad_norm": 1.6716110706329346, |
| "learning_rate": 0.001, |
| "loss": 0.9571, |
| "step": 214800 |
| }, |
| { |
| "epoch": 69.45701357466064, |
| "grad_norm": 1.6328706741333008, |
| "learning_rate": 0.001, |
| "loss": 0.9617, |
| "step": 214900 |
| }, |
| { |
| "epoch": 69.48933419521654, |
| "grad_norm": 1.467498779296875, |
| "learning_rate": 0.001, |
| "loss": 0.993, |
| "step": 215000 |
| }, |
| { |
| "epoch": 69.52165481577246, |
| "grad_norm": 1.6579245328903198, |
| "learning_rate": 0.001, |
| "loss": 0.9775, |
| "step": 215100 |
| }, |
| { |
| "epoch": 69.55397543632837, |
| "grad_norm": 1.9347474575042725, |
| "learning_rate": 0.001, |
| "loss": 0.9802, |
| "step": 215200 |
| }, |
| { |
| "epoch": 69.5862960568843, |
| "grad_norm": 1.8056132793426514, |
| "learning_rate": 0.001, |
| "loss": 0.9917, |
| "step": 215300 |
| }, |
| { |
| "epoch": 69.6186166774402, |
| "grad_norm": 1.5170385837554932, |
| "learning_rate": 0.001, |
| "loss": 0.989, |
| "step": 215400 |
| }, |
| { |
| "epoch": 69.65093729799612, |
| "grad_norm": 1.9028490781784058, |
| "learning_rate": 0.001, |
| "loss": 1.0019, |
| "step": 215500 |
| }, |
| { |
| "epoch": 69.68325791855203, |
| "grad_norm": 1.7090563774108887, |
| "learning_rate": 0.001, |
| "loss": 0.9913, |
| "step": 215600 |
| }, |
| { |
| "epoch": 69.71557853910795, |
| "grad_norm": 3.4204039573669434, |
| "learning_rate": 0.001, |
| "loss": 0.9862, |
| "step": 215700 |
| }, |
| { |
| "epoch": 69.74789915966386, |
| "grad_norm": 1.559909462928772, |
| "learning_rate": 0.001, |
| "loss": 0.9973, |
| "step": 215800 |
| }, |
| { |
| "epoch": 69.78021978021978, |
| "grad_norm": 1.948533058166504, |
| "learning_rate": 0.001, |
| "loss": 0.9941, |
| "step": 215900 |
| }, |
| { |
| "epoch": 69.81254040077569, |
| "grad_norm": 2.0749168395996094, |
| "learning_rate": 0.001, |
| "loss": 0.9959, |
| "step": 216000 |
| }, |
| { |
| "epoch": 69.84486102133161, |
| "grad_norm": 1.929993748664856, |
| "learning_rate": 0.001, |
| "loss": 0.9985, |
| "step": 216100 |
| }, |
| { |
| "epoch": 69.87718164188752, |
| "grad_norm": 1.607664942741394, |
| "learning_rate": 0.001, |
| "loss": 0.9967, |
| "step": 216200 |
| }, |
| { |
| "epoch": 69.90950226244344, |
| "grad_norm": 1.9539837837219238, |
| "learning_rate": 0.001, |
| "loss": 1.0014, |
| "step": 216300 |
| }, |
| { |
| "epoch": 69.94182288299935, |
| "grad_norm": 1.4872267246246338, |
| "learning_rate": 0.001, |
| "loss": 1.0314, |
| "step": 216400 |
| }, |
| { |
| "epoch": 69.97414350355527, |
| "grad_norm": 1.7044696807861328, |
| "learning_rate": 0.001, |
| "loss": 1.0059, |
| "step": 216500 |
| }, |
| { |
| "epoch": 70.00646412411119, |
| "grad_norm": 56.00265121459961, |
| "learning_rate": 0.001, |
| "loss": 1.0061, |
| "step": 216600 |
| }, |
| { |
| "epoch": 70.0387847446671, |
| "grad_norm": 1.7173796892166138, |
| "learning_rate": 0.001, |
| "loss": 0.9109, |
| "step": 216700 |
| }, |
| { |
| "epoch": 70.07110536522302, |
| "grad_norm": 1.9290728569030762, |
| "learning_rate": 0.001, |
| "loss": 0.9124, |
| "step": 216800 |
| }, |
| { |
| "epoch": 70.10342598577893, |
| "grad_norm": 1.516363263130188, |
| "learning_rate": 0.001, |
| "loss": 0.9359, |
| "step": 216900 |
| }, |
| { |
| "epoch": 70.13574660633485, |
| "grad_norm": 2.09360408782959, |
| "learning_rate": 0.001, |
| "loss": 0.9359, |
| "step": 217000 |
| }, |
| { |
| "epoch": 70.16806722689076, |
| "grad_norm": 1.570351004600525, |
| "learning_rate": 0.001, |
| "loss": 0.933, |
| "step": 217100 |
| }, |
| { |
| "epoch": 70.20038784744668, |
| "grad_norm": 1.773545742034912, |
| "learning_rate": 0.001, |
| "loss": 0.9225, |
| "step": 217200 |
| }, |
| { |
| "epoch": 70.23270846800258, |
| "grad_norm": 1.9614516496658325, |
| "learning_rate": 0.001, |
| "loss": 0.9409, |
| "step": 217300 |
| }, |
| { |
| "epoch": 70.2650290885585, |
| "grad_norm": 1.9093842506408691, |
| "learning_rate": 0.001, |
| "loss": 0.945, |
| "step": 217400 |
| }, |
| { |
| "epoch": 70.29734970911441, |
| "grad_norm": 1.4649842977523804, |
| "learning_rate": 0.001, |
| "loss": 0.9564, |
| "step": 217500 |
| }, |
| { |
| "epoch": 70.32967032967034, |
| "grad_norm": 1.7459298372268677, |
| "learning_rate": 0.001, |
| "loss": 0.9691, |
| "step": 217600 |
| }, |
| { |
| "epoch": 70.36199095022624, |
| "grad_norm": 1.5805401802062988, |
| "learning_rate": 0.001, |
| "loss": 0.9633, |
| "step": 217700 |
| }, |
| { |
| "epoch": 70.39431157078216, |
| "grad_norm": 1.8536136150360107, |
| "learning_rate": 0.001, |
| "loss": 0.9546, |
| "step": 217800 |
| }, |
| { |
| "epoch": 70.42663219133807, |
| "grad_norm": 1.3968721628189087, |
| "learning_rate": 0.001, |
| "loss": 0.9538, |
| "step": 217900 |
| }, |
| { |
| "epoch": 70.458952811894, |
| "grad_norm": 2.076166868209839, |
| "learning_rate": 0.001, |
| "loss": 0.9602, |
| "step": 218000 |
| }, |
| { |
| "epoch": 70.4912734324499, |
| "grad_norm": 1.5355348587036133, |
| "learning_rate": 0.001, |
| "loss": 0.9816, |
| "step": 218100 |
| }, |
| { |
| "epoch": 70.52359405300582, |
| "grad_norm": 1.6751102209091187, |
| "learning_rate": 0.001, |
| "loss": 0.9691, |
| "step": 218200 |
| }, |
| { |
| "epoch": 70.55591467356173, |
| "grad_norm": 1.6039971113204956, |
| "learning_rate": 0.001, |
| "loss": 0.9945, |
| "step": 218300 |
| }, |
| { |
| "epoch": 70.58823529411765, |
| "grad_norm": 1.4298136234283447, |
| "learning_rate": 0.001, |
| "loss": 0.998, |
| "step": 218400 |
| }, |
| { |
| "epoch": 70.62055591467356, |
| "grad_norm": 1.6534899473190308, |
| "learning_rate": 0.001, |
| "loss": 0.9642, |
| "step": 218500 |
| }, |
| { |
| "epoch": 70.65287653522948, |
| "grad_norm": 1.645822286605835, |
| "learning_rate": 0.001, |
| "loss": 0.9765, |
| "step": 218600 |
| }, |
| { |
| "epoch": 70.68519715578539, |
| "grad_norm": 1.5585241317749023, |
| "learning_rate": 0.001, |
| "loss": 0.9683, |
| "step": 218700 |
| }, |
| { |
| "epoch": 70.71751777634131, |
| "grad_norm": 2.097666025161743, |
| "learning_rate": 0.001, |
| "loss": 0.9765, |
| "step": 218800 |
| }, |
| { |
| "epoch": 70.74983839689722, |
| "grad_norm": 1.694786787033081, |
| "learning_rate": 0.001, |
| "loss": 0.9764, |
| "step": 218900 |
| }, |
| { |
| "epoch": 70.78215901745314, |
| "grad_norm": 1.9305164813995361, |
| "learning_rate": 0.001, |
| "loss": 0.9903, |
| "step": 219000 |
| }, |
| { |
| "epoch": 70.81447963800905, |
| "grad_norm": 1.6934314966201782, |
| "learning_rate": 0.001, |
| "loss": 0.9976, |
| "step": 219100 |
| }, |
| { |
| "epoch": 70.84680025856497, |
| "grad_norm": 1.8787205219268799, |
| "learning_rate": 0.001, |
| "loss": 1.0127, |
| "step": 219200 |
| }, |
| { |
| "epoch": 70.87912087912088, |
| "grad_norm": 1.8684394359588623, |
| "learning_rate": 0.001, |
| "loss": 0.9992, |
| "step": 219300 |
| }, |
| { |
| "epoch": 70.9114414996768, |
| "grad_norm": 1.5081862211227417, |
| "learning_rate": 0.001, |
| "loss": 1.005, |
| "step": 219400 |
| }, |
| { |
| "epoch": 70.9437621202327, |
| "grad_norm": 1.4214324951171875, |
| "learning_rate": 0.001, |
| "loss": 0.9998, |
| "step": 219500 |
| }, |
| { |
| "epoch": 70.97608274078863, |
| "grad_norm": 1.6040948629379272, |
| "learning_rate": 0.001, |
| "loss": 0.9977, |
| "step": 219600 |
| }, |
| { |
| "epoch": 71.00840336134453, |
| "grad_norm": 1.9321045875549316, |
| "learning_rate": 0.001, |
| "loss": 1.0009, |
| "step": 219700 |
| }, |
| { |
| "epoch": 71.04072398190046, |
| "grad_norm": 1.6411948204040527, |
| "learning_rate": 0.001, |
| "loss": 0.9007, |
| "step": 219800 |
| }, |
| { |
| "epoch": 71.07304460245636, |
| "grad_norm": 1.5181108713150024, |
| "learning_rate": 0.001, |
| "loss": 0.9252, |
| "step": 219900 |
| }, |
| { |
| "epoch": 71.10536522301229, |
| "grad_norm": 1.6294838190078735, |
| "learning_rate": 0.001, |
| "loss": 0.9105, |
| "step": 220000 |
| }, |
| { |
| "epoch": 71.13768584356819, |
| "grad_norm": 1.670454502105713, |
| "learning_rate": 0.001, |
| "loss": 0.9215, |
| "step": 220100 |
| }, |
| { |
| "epoch": 71.17000646412411, |
| "grad_norm": 1.461074948310852, |
| "learning_rate": 0.001, |
| "loss": 0.9223, |
| "step": 220200 |
| }, |
| { |
| "epoch": 71.20232708468002, |
| "grad_norm": 1.9516828060150146, |
| "learning_rate": 0.001, |
| "loss": 0.9358, |
| "step": 220300 |
| }, |
| { |
| "epoch": 71.23464770523594, |
| "grad_norm": 1.5460422039031982, |
| "learning_rate": 0.001, |
| "loss": 0.9511, |
| "step": 220400 |
| }, |
| { |
| "epoch": 71.26696832579185, |
| "grad_norm": 1.5585349798202515, |
| "learning_rate": 0.001, |
| "loss": 0.9239, |
| "step": 220500 |
| }, |
| { |
| "epoch": 71.29928894634777, |
| "grad_norm": 1.466973900794983, |
| "learning_rate": 0.001, |
| "loss": 0.9475, |
| "step": 220600 |
| }, |
| { |
| "epoch": 71.33160956690368, |
| "grad_norm": 1.8937549591064453, |
| "learning_rate": 0.001, |
| "loss": 0.9497, |
| "step": 220700 |
| }, |
| { |
| "epoch": 71.3639301874596, |
| "grad_norm": 1.702267050743103, |
| "learning_rate": 0.001, |
| "loss": 0.9463, |
| "step": 220800 |
| }, |
| { |
| "epoch": 71.39625080801551, |
| "grad_norm": 2.2292089462280273, |
| "learning_rate": 0.001, |
| "loss": 0.9696, |
| "step": 220900 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 1.7881437540054321, |
| "learning_rate": 0.001, |
| "loss": 0.9697, |
| "step": 221000 |
| }, |
| { |
| "epoch": 71.46089204912734, |
| "grad_norm": 1.5955432653427124, |
| "learning_rate": 0.001, |
| "loss": 0.9586, |
| "step": 221100 |
| }, |
| { |
| "epoch": 71.49321266968326, |
| "grad_norm": 1.5466030836105347, |
| "learning_rate": 0.001, |
| "loss": 0.9773, |
| "step": 221200 |
| }, |
| { |
| "epoch": 71.52553329023917, |
| "grad_norm": 1.5583151578903198, |
| "learning_rate": 0.001, |
| "loss": 0.9608, |
| "step": 221300 |
| }, |
| { |
| "epoch": 71.55785391079509, |
| "grad_norm": 1.478674292564392, |
| "learning_rate": 0.001, |
| "loss": 0.9787, |
| "step": 221400 |
| }, |
| { |
| "epoch": 71.590174531351, |
| "grad_norm": 1.3500618934631348, |
| "learning_rate": 0.001, |
| "loss": 0.9751, |
| "step": 221500 |
| }, |
| { |
| "epoch": 71.62249515190692, |
| "grad_norm": 1.8969987630844116, |
| "learning_rate": 0.001, |
| "loss": 0.9751, |
| "step": 221600 |
| }, |
| { |
| "epoch": 71.65481577246283, |
| "grad_norm": 1.7005226612091064, |
| "learning_rate": 0.001, |
| "loss": 0.9813, |
| "step": 221700 |
| }, |
| { |
| "epoch": 71.68713639301875, |
| "grad_norm": 1.568214774131775, |
| "learning_rate": 0.001, |
| "loss": 0.9743, |
| "step": 221800 |
| }, |
| { |
| "epoch": 71.71945701357465, |
| "grad_norm": 1.417033314704895, |
| "learning_rate": 0.001, |
| "loss": 0.9677, |
| "step": 221900 |
| }, |
| { |
| "epoch": 71.75177763413058, |
| "grad_norm": 1.7298557758331299, |
| "learning_rate": 0.001, |
| "loss": 0.9805, |
| "step": 222000 |
| }, |
| { |
| "epoch": 71.78409825468648, |
| "grad_norm": 1.6061108112335205, |
| "learning_rate": 0.001, |
| "loss": 0.9917, |
| "step": 222100 |
| }, |
| { |
| "epoch": 71.8164188752424, |
| "grad_norm": 1.9525036811828613, |
| "learning_rate": 0.001, |
| "loss": 0.9839, |
| "step": 222200 |
| }, |
| { |
| "epoch": 71.84873949579831, |
| "grad_norm": 1.677465796470642, |
| "learning_rate": 0.001, |
| "loss": 1.0039, |
| "step": 222300 |
| }, |
| { |
| "epoch": 71.88106011635423, |
| "grad_norm": 2.25289249420166, |
| "learning_rate": 0.001, |
| "loss": 0.9813, |
| "step": 222400 |
| }, |
| { |
| "epoch": 71.91338073691014, |
| "grad_norm": 1.4465690851211548, |
| "learning_rate": 0.001, |
| "loss": 0.9896, |
| "step": 222500 |
| }, |
| { |
| "epoch": 71.94570135746606, |
| "grad_norm": 1.8012648820877075, |
| "learning_rate": 0.001, |
| "loss": 0.9868, |
| "step": 222600 |
| }, |
| { |
| "epoch": 71.97802197802197, |
| "grad_norm": 1.6948093175888062, |
| "learning_rate": 0.001, |
| "loss": 1.0119, |
| "step": 222700 |
| }, |
| { |
| "epoch": 72.01034259857789, |
| "grad_norm": 1.7720965147018433, |
| "learning_rate": 0.001, |
| "loss": 0.9562, |
| "step": 222800 |
| }, |
| { |
| "epoch": 72.04266321913381, |
| "grad_norm": 1.5736439228057861, |
| "learning_rate": 0.001, |
| "loss": 0.8985, |
| "step": 222900 |
| }, |
| { |
| "epoch": 72.07498383968972, |
| "grad_norm": 1.4669991731643677, |
| "learning_rate": 0.001, |
| "loss": 0.9112, |
| "step": 223000 |
| }, |
| { |
| "epoch": 72.10730446024564, |
| "grad_norm": 1.6496646404266357, |
| "learning_rate": 0.001, |
| "loss": 0.9098, |
| "step": 223100 |
| }, |
| { |
| "epoch": 72.13962508080155, |
| "grad_norm": 1.6294455528259277, |
| "learning_rate": 0.001, |
| "loss": 0.9193, |
| "step": 223200 |
| }, |
| { |
| "epoch": 72.17194570135747, |
| "grad_norm": 1.664873480796814, |
| "learning_rate": 0.001, |
| "loss": 0.9216, |
| "step": 223300 |
| }, |
| { |
| "epoch": 72.20426632191338, |
| "grad_norm": 1.6307495832443237, |
| "learning_rate": 0.001, |
| "loss": 0.9311, |
| "step": 223400 |
| }, |
| { |
| "epoch": 72.2365869424693, |
| "grad_norm": 1.928640604019165, |
| "learning_rate": 0.001, |
| "loss": 0.9326, |
| "step": 223500 |
| }, |
| { |
| "epoch": 72.26890756302521, |
| "grad_norm": 1.5909250974655151, |
| "learning_rate": 0.001, |
| "loss": 0.9341, |
| "step": 223600 |
| }, |
| { |
| "epoch": 72.30122818358113, |
| "grad_norm": 1.8960169553756714, |
| "learning_rate": 0.001, |
| "loss": 0.9352, |
| "step": 223700 |
| }, |
| { |
| "epoch": 72.33354880413704, |
| "grad_norm": 1.693377137184143, |
| "learning_rate": 0.001, |
| "loss": 0.9254, |
| "step": 223800 |
| }, |
| { |
| "epoch": 72.36586942469296, |
| "grad_norm": 1.8388135433197021, |
| "learning_rate": 0.001, |
| "loss": 0.9422, |
| "step": 223900 |
| }, |
| { |
| "epoch": 72.39819004524887, |
| "grad_norm": 1.7362223863601685, |
| "learning_rate": 0.001, |
| "loss": 0.9542, |
| "step": 224000 |
| }, |
| { |
| "epoch": 72.43051066580479, |
| "grad_norm": 2.2575292587280273, |
| "learning_rate": 0.001, |
| "loss": 0.9544, |
| "step": 224100 |
| }, |
| { |
| "epoch": 72.4628312863607, |
| "grad_norm": 1.8790425062179565, |
| "learning_rate": 0.001, |
| "loss": 0.9703, |
| "step": 224200 |
| }, |
| { |
| "epoch": 72.49515190691662, |
| "grad_norm": 1.7574528455734253, |
| "learning_rate": 0.001, |
| "loss": 0.9609, |
| "step": 224300 |
| }, |
| { |
| "epoch": 72.52747252747253, |
| "grad_norm": 1.6900725364685059, |
| "learning_rate": 0.001, |
| "loss": 0.9607, |
| "step": 224400 |
| }, |
| { |
| "epoch": 72.55979314802845, |
| "grad_norm": 1.4746700525283813, |
| "learning_rate": 0.001, |
| "loss": 0.9459, |
| "step": 224500 |
| }, |
| { |
| "epoch": 72.59211376858435, |
| "grad_norm": 1.7143659591674805, |
| "learning_rate": 0.001, |
| "loss": 0.9697, |
| "step": 224600 |
| }, |
| { |
| "epoch": 72.62443438914028, |
| "grad_norm": 1.6546257734298706, |
| "learning_rate": 0.001, |
| "loss": 0.9662, |
| "step": 224700 |
| }, |
| { |
| "epoch": 72.65675500969618, |
| "grad_norm": 1.6653079986572266, |
| "learning_rate": 0.001, |
| "loss": 0.96, |
| "step": 224800 |
| }, |
| { |
| "epoch": 72.6890756302521, |
| "grad_norm": 6.541753768920898, |
| "learning_rate": 0.001, |
| "loss": 0.9632, |
| "step": 224900 |
| }, |
| { |
| "epoch": 72.72139625080801, |
| "grad_norm": 1.6017827987670898, |
| "learning_rate": 0.001, |
| "loss": 0.9813, |
| "step": 225000 |
| }, |
| { |
| "epoch": 72.75371687136393, |
| "grad_norm": 1.894031286239624, |
| "learning_rate": 0.001, |
| "loss": 0.9779, |
| "step": 225100 |
| }, |
| { |
| "epoch": 72.78603749191984, |
| "grad_norm": 2.0835087299346924, |
| "learning_rate": 0.001, |
| "loss": 0.9904, |
| "step": 225200 |
| }, |
| { |
| "epoch": 72.81835811247576, |
| "grad_norm": 1.5882728099822998, |
| "learning_rate": 0.001, |
| "loss": 0.9841, |
| "step": 225300 |
| }, |
| { |
| "epoch": 72.85067873303167, |
| "grad_norm": 1.8402472734451294, |
| "learning_rate": 0.001, |
| "loss": 0.9843, |
| "step": 225400 |
| }, |
| { |
| "epoch": 72.88299935358759, |
| "grad_norm": 22.45509910583496, |
| "learning_rate": 0.001, |
| "loss": 0.9746, |
| "step": 225500 |
| }, |
| { |
| "epoch": 72.9153199741435, |
| "grad_norm": 1.7956304550170898, |
| "learning_rate": 0.001, |
| "loss": 0.9798, |
| "step": 225600 |
| }, |
| { |
| "epoch": 72.94764059469942, |
| "grad_norm": 1.881117582321167, |
| "learning_rate": 0.001, |
| "loss": 0.9862, |
| "step": 225700 |
| }, |
| { |
| "epoch": 72.97996121525533, |
| "grad_norm": 1.8564492464065552, |
| "learning_rate": 0.001, |
| "loss": 0.9993, |
| "step": 225800 |
| }, |
| { |
| "epoch": 73.01228183581125, |
| "grad_norm": 1.7108012437820435, |
| "learning_rate": 0.001, |
| "loss": 0.9341, |
| "step": 225900 |
| }, |
| { |
| "epoch": 73.04460245636716, |
| "grad_norm": 3.303130865097046, |
| "learning_rate": 0.001, |
| "loss": 0.8865, |
| "step": 226000 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 1.7715092897415161, |
| "learning_rate": 0.001, |
| "loss": 0.9084, |
| "step": 226100 |
| }, |
| { |
| "epoch": 73.10924369747899, |
| "grad_norm": 2.105649948120117, |
| "learning_rate": 0.001, |
| "loss": 0.9055, |
| "step": 226200 |
| }, |
| { |
| "epoch": 73.14156431803491, |
| "grad_norm": 1.626462697982788, |
| "learning_rate": 0.001, |
| "loss": 0.9072, |
| "step": 226300 |
| }, |
| { |
| "epoch": 73.17388493859082, |
| "grad_norm": 1.675002932548523, |
| "learning_rate": 0.001, |
| "loss": 0.9123, |
| "step": 226400 |
| }, |
| { |
| "epoch": 73.20620555914674, |
| "grad_norm": 1.7024825811386108, |
| "learning_rate": 0.001, |
| "loss": 0.9245, |
| "step": 226500 |
| }, |
| { |
| "epoch": 73.23852617970265, |
| "grad_norm": 1.7466365098953247, |
| "learning_rate": 0.001, |
| "loss": 0.9124, |
| "step": 226600 |
| }, |
| { |
| "epoch": 73.27084680025857, |
| "grad_norm": 2.140228748321533, |
| "learning_rate": 0.001, |
| "loss": 0.9284, |
| "step": 226700 |
| }, |
| { |
| "epoch": 73.30316742081448, |
| "grad_norm": 1.6669248342514038, |
| "learning_rate": 0.001, |
| "loss": 0.934, |
| "step": 226800 |
| }, |
| { |
| "epoch": 73.3354880413704, |
| "grad_norm": 1.5409226417541504, |
| "learning_rate": 0.001, |
| "loss": 0.93, |
| "step": 226900 |
| }, |
| { |
| "epoch": 73.3678086619263, |
| "grad_norm": 1.4296633005142212, |
| "learning_rate": 0.001, |
| "loss": 0.9363, |
| "step": 227000 |
| }, |
| { |
| "epoch": 73.40012928248223, |
| "grad_norm": 1.7514222860336304, |
| "learning_rate": 0.001, |
| "loss": 0.9472, |
| "step": 227100 |
| }, |
| { |
| "epoch": 73.43244990303813, |
| "grad_norm": 1.5675846338272095, |
| "learning_rate": 0.001, |
| "loss": 0.9367, |
| "step": 227200 |
| }, |
| { |
| "epoch": 73.46477052359405, |
| "grad_norm": 1.4617664813995361, |
| "learning_rate": 0.001, |
| "loss": 0.9389, |
| "step": 227300 |
| }, |
| { |
| "epoch": 73.49709114414996, |
| "grad_norm": 1.8842968940734863, |
| "learning_rate": 0.001, |
| "loss": 0.9441, |
| "step": 227400 |
| }, |
| { |
| "epoch": 73.52941176470588, |
| "grad_norm": 1.7531907558441162, |
| "learning_rate": 0.001, |
| "loss": 0.9572, |
| "step": 227500 |
| }, |
| { |
| "epoch": 73.56173238526179, |
| "grad_norm": 1.828428864479065, |
| "learning_rate": 0.001, |
| "loss": 0.9802, |
| "step": 227600 |
| }, |
| { |
| "epoch": 73.59405300581771, |
| "grad_norm": 5.751527786254883, |
| "learning_rate": 0.001, |
| "loss": 0.9551, |
| "step": 227700 |
| }, |
| { |
| "epoch": 73.62637362637362, |
| "grad_norm": 1.549415111541748, |
| "learning_rate": 0.001, |
| "loss": 0.9573, |
| "step": 227800 |
| }, |
| { |
| "epoch": 73.65869424692954, |
| "grad_norm": 2.144838333129883, |
| "learning_rate": 0.001, |
| "loss": 0.9626, |
| "step": 227900 |
| }, |
| { |
| "epoch": 73.69101486748545, |
| "grad_norm": 1.6326838731765747, |
| "learning_rate": 0.001, |
| "loss": 0.9568, |
| "step": 228000 |
| }, |
| { |
| "epoch": 73.72333548804137, |
| "grad_norm": 1.6560914516448975, |
| "learning_rate": 0.001, |
| "loss": 0.9749, |
| "step": 228100 |
| }, |
| { |
| "epoch": 73.75565610859728, |
| "grad_norm": 1.5580803155899048, |
| "learning_rate": 0.001, |
| "loss": 0.9803, |
| "step": 228200 |
| }, |
| { |
| "epoch": 73.7879767291532, |
| "grad_norm": 1.5272198915481567, |
| "learning_rate": 0.001, |
| "loss": 0.9738, |
| "step": 228300 |
| }, |
| { |
| "epoch": 73.82029734970911, |
| "grad_norm": 1.8833277225494385, |
| "learning_rate": 0.001, |
| "loss": 0.9753, |
| "step": 228400 |
| }, |
| { |
| "epoch": 73.85261797026503, |
| "grad_norm": 1.8136698007583618, |
| "learning_rate": 0.001, |
| "loss": 0.9737, |
| "step": 228500 |
| }, |
| { |
| "epoch": 73.88493859082094, |
| "grad_norm": 1.8745663166046143, |
| "learning_rate": 0.001, |
| "loss": 0.9822, |
| "step": 228600 |
| }, |
| { |
| "epoch": 73.91725921137686, |
| "grad_norm": 1.5352058410644531, |
| "learning_rate": 0.001, |
| "loss": 0.9901, |
| "step": 228700 |
| }, |
| { |
| "epoch": 73.94957983193277, |
| "grad_norm": 1.4566476345062256, |
| "learning_rate": 0.001, |
| "loss": 0.9663, |
| "step": 228800 |
| }, |
| { |
| "epoch": 73.98190045248869, |
| "grad_norm": 1.6003284454345703, |
| "learning_rate": 0.001, |
| "loss": 0.9907, |
| "step": 228900 |
| }, |
| { |
| "epoch": 74.01422107304461, |
| "grad_norm": 1.3612558841705322, |
| "learning_rate": 0.001, |
| "loss": 0.909, |
| "step": 229000 |
| }, |
| { |
| "epoch": 74.04654169360052, |
| "grad_norm": 1.8809579610824585, |
| "learning_rate": 0.001, |
| "loss": 0.9056, |
| "step": 229100 |
| }, |
| { |
| "epoch": 74.07886231415644, |
| "grad_norm": 1.5369434356689453, |
| "learning_rate": 0.001, |
| "loss": 0.895, |
| "step": 229200 |
| }, |
| { |
| "epoch": 74.11118293471235, |
| "grad_norm": 2.4605133533477783, |
| "learning_rate": 0.001, |
| "loss": 0.9107, |
| "step": 229300 |
| }, |
| { |
| "epoch": 74.14350355526827, |
| "grad_norm": 1.4680637121200562, |
| "learning_rate": 0.001, |
| "loss": 0.9069, |
| "step": 229400 |
| }, |
| { |
| "epoch": 74.17582417582418, |
| "grad_norm": 2.1316113471984863, |
| "learning_rate": 0.001, |
| "loss": 0.912, |
| "step": 229500 |
| }, |
| { |
| "epoch": 74.2081447963801, |
| "grad_norm": 2.0725295543670654, |
| "learning_rate": 0.001, |
| "loss": 0.9133, |
| "step": 229600 |
| }, |
| { |
| "epoch": 74.240465416936, |
| "grad_norm": 1.671245813369751, |
| "learning_rate": 0.001, |
| "loss": 0.9045, |
| "step": 229700 |
| }, |
| { |
| "epoch": 74.27278603749193, |
| "grad_norm": 1.4853520393371582, |
| "learning_rate": 0.001, |
| "loss": 0.9284, |
| "step": 229800 |
| }, |
| { |
| "epoch": 74.30510665804783, |
| "grad_norm": 2.123257875442505, |
| "learning_rate": 0.001, |
| "loss": 0.9156, |
| "step": 229900 |
| }, |
| { |
| "epoch": 74.33742727860376, |
| "grad_norm": 1.213315486907959, |
| "learning_rate": 0.001, |
| "loss": 0.9246, |
| "step": 230000 |
| }, |
| { |
| "epoch": 74.36974789915966, |
| "grad_norm": 1.6315170526504517, |
| "learning_rate": 0.001, |
| "loss": 0.9245, |
| "step": 230100 |
| }, |
| { |
| "epoch": 74.40206851971558, |
| "grad_norm": 1.4847023487091064, |
| "learning_rate": 0.001, |
| "loss": 0.9439, |
| "step": 230200 |
| }, |
| { |
| "epoch": 74.43438914027149, |
| "grad_norm": 2.0054616928100586, |
| "learning_rate": 0.001, |
| "loss": 0.944, |
| "step": 230300 |
| }, |
| { |
| "epoch": 74.46670976082741, |
| "grad_norm": 1.3979027271270752, |
| "learning_rate": 0.001, |
| "loss": 0.9408, |
| "step": 230400 |
| }, |
| { |
| "epoch": 74.49903038138332, |
| "grad_norm": 1.9914604425430298, |
| "learning_rate": 0.001, |
| "loss": 0.9585, |
| "step": 230500 |
| }, |
| { |
| "epoch": 74.53135100193924, |
| "grad_norm": 2.074437379837036, |
| "learning_rate": 0.001, |
| "loss": 0.9411, |
| "step": 230600 |
| }, |
| { |
| "epoch": 74.56367162249515, |
| "grad_norm": 1.5431259870529175, |
| "learning_rate": 0.001, |
| "loss": 0.9452, |
| "step": 230700 |
| }, |
| { |
| "epoch": 74.59599224305107, |
| "grad_norm": 1.7445917129516602, |
| "learning_rate": 0.001, |
| "loss": 0.9415, |
| "step": 230800 |
| }, |
| { |
| "epoch": 74.62831286360698, |
| "grad_norm": 1.5942051410675049, |
| "learning_rate": 0.001, |
| "loss": 0.9594, |
| "step": 230900 |
| }, |
| { |
| "epoch": 74.6606334841629, |
| "grad_norm": 1.7823764085769653, |
| "learning_rate": 0.001, |
| "loss": 0.9477, |
| "step": 231000 |
| }, |
| { |
| "epoch": 74.69295410471881, |
| "grad_norm": 1.778059959411621, |
| "learning_rate": 0.001, |
| "loss": 0.9554, |
| "step": 231100 |
| }, |
| { |
| "epoch": 74.72527472527473, |
| "grad_norm": 1.4539250135421753, |
| "learning_rate": 0.001, |
| "loss": 0.9525, |
| "step": 231200 |
| }, |
| { |
| "epoch": 74.75759534583064, |
| "grad_norm": 1.9402769804000854, |
| "learning_rate": 0.001, |
| "loss": 0.9544, |
| "step": 231300 |
| }, |
| { |
| "epoch": 74.78991596638656, |
| "grad_norm": 1.6188675165176392, |
| "learning_rate": 0.001, |
| "loss": 0.9697, |
| "step": 231400 |
| }, |
| { |
| "epoch": 74.82223658694247, |
| "grad_norm": 1.5256141424179077, |
| "learning_rate": 0.001, |
| "loss": 0.9508, |
| "step": 231500 |
| }, |
| { |
| "epoch": 74.85455720749839, |
| "grad_norm": 1.6139826774597168, |
| "learning_rate": 0.001, |
| "loss": 0.9579, |
| "step": 231600 |
| }, |
| { |
| "epoch": 74.8868778280543, |
| "grad_norm": 1.597104787826538, |
| "learning_rate": 0.001, |
| "loss": 0.9626, |
| "step": 231700 |
| }, |
| { |
| "epoch": 74.91919844861022, |
| "grad_norm": 1.529085636138916, |
| "learning_rate": 0.001, |
| "loss": 0.9808, |
| "step": 231800 |
| }, |
| { |
| "epoch": 74.95151906916612, |
| "grad_norm": 1.8991279602050781, |
| "learning_rate": 0.001, |
| "loss": 0.9758, |
| "step": 231900 |
| }, |
| { |
| "epoch": 74.98383968972205, |
| "grad_norm": 1.5026214122772217, |
| "learning_rate": 0.001, |
| "loss": 0.984, |
| "step": 232000 |
| }, |
| { |
| "epoch": 75.01616031027795, |
| "grad_norm": 1.071177363395691, |
| "learning_rate": 0.001, |
| "loss": 0.8859, |
| "step": 232100 |
| }, |
| { |
| "epoch": 75.04848093083388, |
| "grad_norm": 1.1679174900054932, |
| "learning_rate": 0.001, |
| "loss": 0.8884, |
| "step": 232200 |
| }, |
| { |
| "epoch": 75.08080155138978, |
| "grad_norm": 0.9388474225997925, |
| "learning_rate": 0.001, |
| "loss": 0.8708, |
| "step": 232300 |
| }, |
| { |
| "epoch": 75.1131221719457, |
| "grad_norm": 0.4192671775817871, |
| "learning_rate": 0.001, |
| "loss": 0.896, |
| "step": 232400 |
| }, |
| { |
| "epoch": 75.14544279250161, |
| "grad_norm": 0.6754704117774963, |
| "learning_rate": 0.001, |
| "loss": 0.9114, |
| "step": 232500 |
| }, |
| { |
| "epoch": 75.17776341305753, |
| "grad_norm": 0.7327497005462646, |
| "learning_rate": 0.001, |
| "loss": 0.8957, |
| "step": 232600 |
| }, |
| { |
| "epoch": 75.21008403361344, |
| "grad_norm": 1.0593340396881104, |
| "learning_rate": 0.001, |
| "loss": 0.9038, |
| "step": 232700 |
| }, |
| { |
| "epoch": 75.24240465416936, |
| "grad_norm": 0.8486055135726929, |
| "learning_rate": 0.001, |
| "loss": 0.9076, |
| "step": 232800 |
| }, |
| { |
| "epoch": 75.27472527472527, |
| "grad_norm": 0.3056071996688843, |
| "learning_rate": 0.001, |
| "loss": 0.9377, |
| "step": 232900 |
| }, |
| { |
| "epoch": 75.30704589528119, |
| "grad_norm": 0.7975426316261292, |
| "learning_rate": 0.001, |
| "loss": 0.9175, |
| "step": 233000 |
| }, |
| { |
| "epoch": 75.3393665158371, |
| "grad_norm": 1.1826505661010742, |
| "learning_rate": 0.001, |
| "loss": 0.9082, |
| "step": 233100 |
| }, |
| { |
| "epoch": 75.37168713639302, |
| "grad_norm": 0.6041250824928284, |
| "learning_rate": 0.001, |
| "loss": 0.9184, |
| "step": 233200 |
| }, |
| { |
| "epoch": 75.40400775694893, |
| "grad_norm": 0.9403012990951538, |
| "learning_rate": 0.001, |
| "loss": 0.9225, |
| "step": 233300 |
| }, |
| { |
| "epoch": 75.43632837750485, |
| "grad_norm": 0.2614690959453583, |
| "learning_rate": 0.001, |
| "loss": 0.9362, |
| "step": 233400 |
| }, |
| { |
| "epoch": 75.46864899806076, |
| "grad_norm": 0.6960493922233582, |
| "learning_rate": 0.001, |
| "loss": 0.9441, |
| "step": 233500 |
| }, |
| { |
| "epoch": 75.50096961861668, |
| "grad_norm": 0.4896499216556549, |
| "learning_rate": 0.001, |
| "loss": 0.9179, |
| "step": 233600 |
| }, |
| { |
| "epoch": 75.53329023917259, |
| "grad_norm": 0.5739967226982117, |
| "learning_rate": 0.001, |
| "loss": 0.9323, |
| "step": 233700 |
| }, |
| { |
| "epoch": 75.56561085972851, |
| "grad_norm": 0.287398099899292, |
| "learning_rate": 0.001, |
| "loss": 0.9521, |
| "step": 233800 |
| }, |
| { |
| "epoch": 75.59793148028442, |
| "grad_norm": 0.7290223240852356, |
| "learning_rate": 0.001, |
| "loss": 0.9349, |
| "step": 233900 |
| }, |
| { |
| "epoch": 75.63025210084034, |
| "grad_norm": 0.48519167304039, |
| "learning_rate": 0.001, |
| "loss": 0.9373, |
| "step": 234000 |
| }, |
| { |
| "epoch": 75.66257272139624, |
| "grad_norm": 0.6618010401725769, |
| "learning_rate": 0.001, |
| "loss": 0.9405, |
| "step": 234100 |
| }, |
| { |
| "epoch": 75.69489334195217, |
| "grad_norm": 1.3536745309829712, |
| "learning_rate": 0.001, |
| "loss": 0.945, |
| "step": 234200 |
| }, |
| { |
| "epoch": 75.72721396250807, |
| "grad_norm": 1.1508914232254028, |
| "learning_rate": 0.001, |
| "loss": 0.9543, |
| "step": 234300 |
| }, |
| { |
| "epoch": 75.759534583064, |
| "grad_norm": 0.33295896649360657, |
| "learning_rate": 0.001, |
| "loss": 0.9693, |
| "step": 234400 |
| }, |
| { |
| "epoch": 75.7918552036199, |
| "grad_norm": 0.6864619255065918, |
| "learning_rate": 0.001, |
| "loss": 0.9433, |
| "step": 234500 |
| }, |
| { |
| "epoch": 75.82417582417582, |
| "grad_norm": 0.7642561197280884, |
| "learning_rate": 0.001, |
| "loss": 0.9418, |
| "step": 234600 |
| }, |
| { |
| "epoch": 75.85649644473173, |
| "grad_norm": 1.3688499927520752, |
| "learning_rate": 0.001, |
| "loss": 0.9591, |
| "step": 234700 |
| }, |
| { |
| "epoch": 75.88881706528765, |
| "grad_norm": 0.7520803809165955, |
| "learning_rate": 0.001, |
| "loss": 0.9798, |
| "step": 234800 |
| }, |
| { |
| "epoch": 75.92113768584356, |
| "grad_norm": 0.8843240737915039, |
| "learning_rate": 0.001, |
| "loss": 0.9686, |
| "step": 234900 |
| }, |
| { |
| "epoch": 75.95345830639948, |
| "grad_norm": 0.34943240880966187, |
| "learning_rate": 0.001, |
| "loss": 0.9566, |
| "step": 235000 |
| }, |
| { |
| "epoch": 75.98577892695539, |
| "grad_norm": 0.8272552490234375, |
| "learning_rate": 0.001, |
| "loss": 0.9776, |
| "step": 235100 |
| }, |
| { |
| "epoch": 76.01809954751131, |
| "grad_norm": 1.922287940979004, |
| "learning_rate": 0.001, |
| "loss": 0.9338, |
| "step": 235200 |
| }, |
| { |
| "epoch": 76.05042016806723, |
| "grad_norm": 6.02496337890625, |
| "learning_rate": 0.001, |
| "loss": 0.878, |
| "step": 235300 |
| }, |
| { |
| "epoch": 76.08274078862314, |
| "grad_norm": 1.6479023694992065, |
| "learning_rate": 0.001, |
| "loss": 0.8833, |
| "step": 235400 |
| }, |
| { |
| "epoch": 76.11506140917906, |
| "grad_norm": 2.0861504077911377, |
| "learning_rate": 0.001, |
| "loss": 0.8836, |
| "step": 235500 |
| }, |
| { |
| "epoch": 76.14738202973497, |
| "grad_norm": 2.1388919353485107, |
| "learning_rate": 0.001, |
| "loss": 0.8864, |
| "step": 235600 |
| }, |
| { |
| "epoch": 76.17970265029089, |
| "grad_norm": 1.9114896059036255, |
| "learning_rate": 0.001, |
| "loss": 0.9015, |
| "step": 235700 |
| }, |
| { |
| "epoch": 76.2120232708468, |
| "grad_norm": 1.6289089918136597, |
| "learning_rate": 0.001, |
| "loss": 0.9024, |
| "step": 235800 |
| }, |
| { |
| "epoch": 76.24434389140272, |
| "grad_norm": 1.781545639038086, |
| "learning_rate": 0.001, |
| "loss": 0.9032, |
| "step": 235900 |
| }, |
| { |
| "epoch": 76.27666451195863, |
| "grad_norm": 1.6694800853729248, |
| "learning_rate": 0.001, |
| "loss": 0.9199, |
| "step": 236000 |
| }, |
| { |
| "epoch": 76.30898513251455, |
| "grad_norm": 1.621127724647522, |
| "learning_rate": 0.001, |
| "loss": 0.9102, |
| "step": 236100 |
| }, |
| { |
| "epoch": 76.34130575307046, |
| "grad_norm": 1.9022835493087769, |
| "learning_rate": 0.001, |
| "loss": 0.9136, |
| "step": 236200 |
| }, |
| { |
| "epoch": 76.37362637362638, |
| "grad_norm": 1.7457150220870972, |
| "learning_rate": 0.001, |
| "loss": 0.9216, |
| "step": 236300 |
| }, |
| { |
| "epoch": 76.40594699418229, |
| "grad_norm": 1.751098394393921, |
| "learning_rate": 0.001, |
| "loss": 0.9272, |
| "step": 236400 |
| }, |
| { |
| "epoch": 76.43826761473821, |
| "grad_norm": 2.129150390625, |
| "learning_rate": 0.001, |
| "loss": 0.92, |
| "step": 236500 |
| }, |
| { |
| "epoch": 76.47058823529412, |
| "grad_norm": 1.7513155937194824, |
| "learning_rate": 0.001, |
| "loss": 0.9246, |
| "step": 236600 |
| }, |
| { |
| "epoch": 76.50290885585004, |
| "grad_norm": 1.6849470138549805, |
| "learning_rate": 0.001, |
| "loss": 0.9175, |
| "step": 236700 |
| }, |
| { |
| "epoch": 76.53522947640595, |
| "grad_norm": 1.7795782089233398, |
| "learning_rate": 0.001, |
| "loss": 0.9305, |
| "step": 236800 |
| }, |
| { |
| "epoch": 76.56755009696187, |
| "grad_norm": 1.9670932292938232, |
| "learning_rate": 0.001, |
| "loss": 0.9366, |
| "step": 236900 |
| }, |
| { |
| "epoch": 76.59987071751777, |
| "grad_norm": 1.8679801225662231, |
| "learning_rate": 0.001, |
| "loss": 0.9519, |
| "step": 237000 |
| }, |
| { |
| "epoch": 76.6321913380737, |
| "grad_norm": 1.8227111101150513, |
| "learning_rate": 0.001, |
| "loss": 0.9325, |
| "step": 237100 |
| }, |
| { |
| "epoch": 76.6645119586296, |
| "grad_norm": 6.633680820465088, |
| "learning_rate": 0.001, |
| "loss": 0.9339, |
| "step": 237200 |
| }, |
| { |
| "epoch": 76.69683257918552, |
| "grad_norm": 1.7457256317138672, |
| "learning_rate": 0.001, |
| "loss": 0.9416, |
| "step": 237300 |
| }, |
| { |
| "epoch": 76.72915319974143, |
| "grad_norm": 2.4800775051116943, |
| "learning_rate": 0.001, |
| "loss": 0.9526, |
| "step": 237400 |
| }, |
| { |
| "epoch": 76.76147382029735, |
| "grad_norm": 1.730302333831787, |
| "learning_rate": 0.001, |
| "loss": 0.9285, |
| "step": 237500 |
| }, |
| { |
| "epoch": 76.79379444085326, |
| "grad_norm": 1.8279228210449219, |
| "learning_rate": 0.001, |
| "loss": 0.9723, |
| "step": 237600 |
| }, |
| { |
| "epoch": 76.82611506140918, |
| "grad_norm": 2.014887809753418, |
| "learning_rate": 0.001, |
| "loss": 0.9415, |
| "step": 237700 |
| }, |
| { |
| "epoch": 76.85843568196509, |
| "grad_norm": 2.5039334297180176, |
| "learning_rate": 0.001, |
| "loss": 0.9643, |
| "step": 237800 |
| }, |
| { |
| "epoch": 76.89075630252101, |
| "grad_norm": 2.5656185150146484, |
| "learning_rate": 0.001, |
| "loss": 0.9451, |
| "step": 237900 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 1.8565692901611328, |
| "learning_rate": 0.001, |
| "loss": 0.941, |
| "step": 238000 |
| }, |
| { |
| "epoch": 76.95539754363284, |
| "grad_norm": 1.7059361934661865, |
| "learning_rate": 0.001, |
| "loss": 0.9776, |
| "step": 238100 |
| }, |
| { |
| "epoch": 76.98771816418875, |
| "grad_norm": 1.8570643663406372, |
| "learning_rate": 0.001, |
| "loss": 0.9439, |
| "step": 238200 |
| }, |
| { |
| "epoch": 77.02003878474467, |
| "grad_norm": 2.594362258911133, |
| "learning_rate": 0.001, |
| "loss": 0.9292, |
| "step": 238300 |
| }, |
| { |
| "epoch": 77.05235940530058, |
| "grad_norm": 1.7089303731918335, |
| "learning_rate": 0.001, |
| "loss": 0.8733, |
| "step": 238400 |
| }, |
| { |
| "epoch": 77.0846800258565, |
| "grad_norm": 2.044188976287842, |
| "learning_rate": 0.001, |
| "loss": 0.8921, |
| "step": 238500 |
| }, |
| { |
| "epoch": 77.11700064641241, |
| "grad_norm": 1.9743757247924805, |
| "learning_rate": 0.001, |
| "loss": 0.8858, |
| "step": 238600 |
| }, |
| { |
| "epoch": 77.14932126696833, |
| "grad_norm": 1.9437718391418457, |
| "learning_rate": 0.001, |
| "loss": 0.8908, |
| "step": 238700 |
| }, |
| { |
| "epoch": 77.18164188752424, |
| "grad_norm": 2.1930902004241943, |
| "learning_rate": 0.001, |
| "loss": 0.8885, |
| "step": 238800 |
| }, |
| { |
| "epoch": 77.21396250808016, |
| "grad_norm": 1.930568814277649, |
| "learning_rate": 0.001, |
| "loss": 0.8924, |
| "step": 238900 |
| }, |
| { |
| "epoch": 77.24628312863607, |
| "grad_norm": 1.827075719833374, |
| "learning_rate": 0.001, |
| "loss": 0.8958, |
| "step": 239000 |
| }, |
| { |
| "epoch": 77.27860374919199, |
| "grad_norm": 1.926182746887207, |
| "learning_rate": 0.001, |
| "loss": 0.9041, |
| "step": 239100 |
| }, |
| { |
| "epoch": 77.3109243697479, |
| "grad_norm": 22.807456970214844, |
| "learning_rate": 0.001, |
| "loss": 0.9079, |
| "step": 239200 |
| }, |
| { |
| "epoch": 77.34324499030382, |
| "grad_norm": 2.4273526668548584, |
| "learning_rate": 0.001, |
| "loss": 0.9035, |
| "step": 239300 |
| }, |
| { |
| "epoch": 77.37556561085972, |
| "grad_norm": 1.7550523281097412, |
| "learning_rate": 0.001, |
| "loss": 0.9272, |
| "step": 239400 |
| }, |
| { |
| "epoch": 77.40788623141565, |
| "grad_norm": 1.4827191829681396, |
| "learning_rate": 0.001, |
| "loss": 0.9148, |
| "step": 239500 |
| }, |
| { |
| "epoch": 77.44020685197155, |
| "grad_norm": 1.5911911725997925, |
| "learning_rate": 0.001, |
| "loss": 0.9168, |
| "step": 239600 |
| }, |
| { |
| "epoch": 77.47252747252747, |
| "grad_norm": 1.583900809288025, |
| "learning_rate": 0.001, |
| "loss": 0.9196, |
| "step": 239700 |
| }, |
| { |
| "epoch": 77.50484809308338, |
| "grad_norm": 2.1414811611175537, |
| "learning_rate": 0.001, |
| "loss": 0.9164, |
| "step": 239800 |
| }, |
| { |
| "epoch": 77.5371687136393, |
| "grad_norm": 1.7510583400726318, |
| "learning_rate": 0.001, |
| "loss": 0.9213, |
| "step": 239900 |
| }, |
| { |
| "epoch": 77.56948933419521, |
| "grad_norm": 1.7050837278366089, |
| "learning_rate": 0.001, |
| "loss": 0.9308, |
| "step": 240000 |
| }, |
| { |
| "epoch": 77.60180995475113, |
| "grad_norm": 2.887627363204956, |
| "learning_rate": 0.001, |
| "loss": 0.9263, |
| "step": 240100 |
| }, |
| { |
| "epoch": 77.63413057530704, |
| "grad_norm": 1.8627345561981201, |
| "learning_rate": 0.001, |
| "loss": 0.935, |
| "step": 240200 |
| }, |
| { |
| "epoch": 77.66645119586296, |
| "grad_norm": 1.4883463382720947, |
| "learning_rate": 0.001, |
| "loss": 0.9439, |
| "step": 240300 |
| }, |
| { |
| "epoch": 77.69877181641887, |
| "grad_norm": 2.6425867080688477, |
| "learning_rate": 0.001, |
| "loss": 0.9367, |
| "step": 240400 |
| }, |
| { |
| "epoch": 77.73109243697479, |
| "grad_norm": 1.971907615661621, |
| "learning_rate": 0.001, |
| "loss": 0.9371, |
| "step": 240500 |
| }, |
| { |
| "epoch": 77.7634130575307, |
| "grad_norm": 1.6637628078460693, |
| "learning_rate": 0.001, |
| "loss": 0.9277, |
| "step": 240600 |
| }, |
| { |
| "epoch": 77.79573367808662, |
| "grad_norm": 4.366883754730225, |
| "learning_rate": 0.001, |
| "loss": 0.9409, |
| "step": 240700 |
| }, |
| { |
| "epoch": 77.82805429864253, |
| "grad_norm": 1.700300931930542, |
| "learning_rate": 0.001, |
| "loss": 0.9582, |
| "step": 240800 |
| }, |
| { |
| "epoch": 77.86037491919845, |
| "grad_norm": 1.6653016805648804, |
| "learning_rate": 0.001, |
| "loss": 0.9337, |
| "step": 240900 |
| }, |
| { |
| "epoch": 77.89269553975436, |
| "grad_norm": 1.5066953897476196, |
| "learning_rate": 0.001, |
| "loss": 0.9384, |
| "step": 241000 |
| }, |
| { |
| "epoch": 77.92501616031028, |
| "grad_norm": 1.970182180404663, |
| "learning_rate": 0.001, |
| "loss": 0.9514, |
| "step": 241100 |
| }, |
| { |
| "epoch": 77.95733678086619, |
| "grad_norm": 1.863133192062378, |
| "learning_rate": 0.001, |
| "loss": 0.9502, |
| "step": 241200 |
| }, |
| { |
| "epoch": 77.98965740142211, |
| "grad_norm": 2.4191184043884277, |
| "learning_rate": 0.001, |
| "loss": 0.9507, |
| "step": 241300 |
| }, |
| { |
| "epoch": 78.02197802197803, |
| "grad_norm": 1.864750862121582, |
| "learning_rate": 0.001, |
| "loss": 0.9046, |
| "step": 241400 |
| }, |
| { |
| "epoch": 78.05429864253394, |
| "grad_norm": 1.5248106718063354, |
| "learning_rate": 0.001, |
| "loss": 0.8657, |
| "step": 241500 |
| }, |
| { |
| "epoch": 78.08661926308986, |
| "grad_norm": 1.8853729963302612, |
| "learning_rate": 0.001, |
| "loss": 0.8685, |
| "step": 241600 |
| }, |
| { |
| "epoch": 78.11893988364577, |
| "grad_norm": 1.8293319940567017, |
| "learning_rate": 0.001, |
| "loss": 0.871, |
| "step": 241700 |
| }, |
| { |
| "epoch": 78.15126050420169, |
| "grad_norm": 2.1890885829925537, |
| "learning_rate": 0.001, |
| "loss": 0.8768, |
| "step": 241800 |
| }, |
| { |
| "epoch": 78.1835811247576, |
| "grad_norm": 1.899473786354065, |
| "learning_rate": 0.001, |
| "loss": 0.8958, |
| "step": 241900 |
| }, |
| { |
| "epoch": 78.21590174531352, |
| "grad_norm": 1.888890027999878, |
| "learning_rate": 0.001, |
| "loss": 0.8698, |
| "step": 242000 |
| }, |
| { |
| "epoch": 78.24822236586942, |
| "grad_norm": 1.9390349388122559, |
| "learning_rate": 0.001, |
| "loss": 0.8938, |
| "step": 242100 |
| }, |
| { |
| "epoch": 78.28054298642535, |
| "grad_norm": 1.613270878791809, |
| "learning_rate": 0.001, |
| "loss": 0.8969, |
| "step": 242200 |
| }, |
| { |
| "epoch": 78.31286360698125, |
| "grad_norm": 1.510816216468811, |
| "learning_rate": 0.001, |
| "loss": 0.8928, |
| "step": 242300 |
| }, |
| { |
| "epoch": 78.34518422753717, |
| "grad_norm": 1.6668297052383423, |
| "learning_rate": 0.001, |
| "loss": 0.8946, |
| "step": 242400 |
| }, |
| { |
| "epoch": 78.37750484809308, |
| "grad_norm": 2.631298065185547, |
| "learning_rate": 0.001, |
| "loss": 0.9224, |
| "step": 242500 |
| }, |
| { |
| "epoch": 78.409825468649, |
| "grad_norm": 1.9295475482940674, |
| "learning_rate": 0.001, |
| "loss": 0.8946, |
| "step": 242600 |
| }, |
| { |
| "epoch": 78.44214608920491, |
| "grad_norm": 1.5661232471466064, |
| "learning_rate": 0.001, |
| "loss": 0.9005, |
| "step": 242700 |
| }, |
| { |
| "epoch": 78.47446670976083, |
| "grad_norm": 1.602188229560852, |
| "learning_rate": 0.001, |
| "loss": 0.9058, |
| "step": 242800 |
| }, |
| { |
| "epoch": 78.50678733031674, |
| "grad_norm": 2.1843175888061523, |
| "learning_rate": 0.001, |
| "loss": 0.9144, |
| "step": 242900 |
| }, |
| { |
| "epoch": 78.53910795087266, |
| "grad_norm": 1.603387475013733, |
| "learning_rate": 0.001, |
| "loss": 0.9284, |
| "step": 243000 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 2.4746646881103516, |
| "learning_rate": 0.001, |
| "loss": 0.9248, |
| "step": 243100 |
| }, |
| { |
| "epoch": 78.60374919198449, |
| "grad_norm": 1.4542807340621948, |
| "learning_rate": 0.001, |
| "loss": 0.908, |
| "step": 243200 |
| }, |
| { |
| "epoch": 78.6360698125404, |
| "grad_norm": 2.2467586994171143, |
| "learning_rate": 0.001, |
| "loss": 0.9209, |
| "step": 243300 |
| }, |
| { |
| "epoch": 78.66839043309632, |
| "grad_norm": 1.670127034187317, |
| "learning_rate": 0.001, |
| "loss": 0.9106, |
| "step": 243400 |
| }, |
| { |
| "epoch": 78.70071105365223, |
| "grad_norm": 1.4943729639053345, |
| "learning_rate": 0.001, |
| "loss": 0.9295, |
| "step": 243500 |
| }, |
| { |
| "epoch": 78.73303167420815, |
| "grad_norm": 1.8003182411193848, |
| "learning_rate": 0.001, |
| "loss": 0.9323, |
| "step": 243600 |
| }, |
| { |
| "epoch": 78.76535229476406, |
| "grad_norm": 1.6677433252334595, |
| "learning_rate": 0.001, |
| "loss": 0.9376, |
| "step": 243700 |
| }, |
| { |
| "epoch": 78.79767291531998, |
| "grad_norm": 1.4286067485809326, |
| "learning_rate": 0.001, |
| "loss": 0.9452, |
| "step": 243800 |
| }, |
| { |
| "epoch": 78.82999353587589, |
| "grad_norm": 1.4021170139312744, |
| "learning_rate": 0.001, |
| "loss": 0.9488, |
| "step": 243900 |
| }, |
| { |
| "epoch": 78.86231415643181, |
| "grad_norm": 1.5756992101669312, |
| "learning_rate": 0.001, |
| "loss": 0.9512, |
| "step": 244000 |
| }, |
| { |
| "epoch": 78.89463477698771, |
| "grad_norm": 1.6208910942077637, |
| "learning_rate": 0.001, |
| "loss": 0.9351, |
| "step": 244100 |
| }, |
| { |
| "epoch": 78.92695539754364, |
| "grad_norm": 1.8639366626739502, |
| "learning_rate": 0.001, |
| "loss": 0.9422, |
| "step": 244200 |
| }, |
| { |
| "epoch": 78.95927601809954, |
| "grad_norm": 1.9582170248031616, |
| "learning_rate": 0.001, |
| "loss": 0.962, |
| "step": 244300 |
| }, |
| { |
| "epoch": 78.99159663865547, |
| "grad_norm": 1.5845123529434204, |
| "learning_rate": 0.001, |
| "loss": 0.9453, |
| "step": 244400 |
| }, |
| { |
| "epoch": 79.02391725921137, |
| "grad_norm": 1.7335338592529297, |
| "learning_rate": 0.001, |
| "loss": 0.8861, |
| "step": 244500 |
| }, |
| { |
| "epoch": 79.0562378797673, |
| "grad_norm": 1.3891186714172363, |
| "learning_rate": 0.001, |
| "loss": 0.8479, |
| "step": 244600 |
| }, |
| { |
| "epoch": 79.0885585003232, |
| "grad_norm": 2.224407434463501, |
| "learning_rate": 0.001, |
| "loss": 0.8521, |
| "step": 244700 |
| }, |
| { |
| "epoch": 79.12087912087912, |
| "grad_norm": 1.7175556421279907, |
| "learning_rate": 0.001, |
| "loss": 0.8663, |
| "step": 244800 |
| }, |
| { |
| "epoch": 79.15319974143503, |
| "grad_norm": 1.3971540927886963, |
| "learning_rate": 0.001, |
| "loss": 0.8854, |
| "step": 244900 |
| }, |
| { |
| "epoch": 79.18552036199095, |
| "grad_norm": 1.6051901578903198, |
| "learning_rate": 0.001, |
| "loss": 0.8806, |
| "step": 245000 |
| }, |
| { |
| "epoch": 79.21784098254686, |
| "grad_norm": 1.4144991636276245, |
| "learning_rate": 0.001, |
| "loss": 0.8854, |
| "step": 245100 |
| }, |
| { |
| "epoch": 79.25016160310278, |
| "grad_norm": 1.6836448907852173, |
| "learning_rate": 0.001, |
| "loss": 0.8852, |
| "step": 245200 |
| }, |
| { |
| "epoch": 79.28248222365869, |
| "grad_norm": 1.8142777681350708, |
| "learning_rate": 0.001, |
| "loss": 0.889, |
| "step": 245300 |
| }, |
| { |
| "epoch": 79.31480284421461, |
| "grad_norm": 1.4503638744354248, |
| "learning_rate": 0.001, |
| "loss": 0.8878, |
| "step": 245400 |
| }, |
| { |
| "epoch": 79.34712346477052, |
| "grad_norm": 1.810864806175232, |
| "learning_rate": 0.001, |
| "loss": 0.8944, |
| "step": 245500 |
| }, |
| { |
| "epoch": 79.37944408532644, |
| "grad_norm": 1.5955215692520142, |
| "learning_rate": 0.001, |
| "loss": 0.8924, |
| "step": 245600 |
| }, |
| { |
| "epoch": 79.41176470588235, |
| "grad_norm": 1.9111827611923218, |
| "learning_rate": 0.001, |
| "loss": 0.9071, |
| "step": 245700 |
| }, |
| { |
| "epoch": 79.44408532643827, |
| "grad_norm": 1.7505015134811401, |
| "learning_rate": 0.001, |
| "loss": 0.9116, |
| "step": 245800 |
| }, |
| { |
| "epoch": 79.47640594699418, |
| "grad_norm": 1.6506662368774414, |
| "learning_rate": 0.001, |
| "loss": 0.8994, |
| "step": 245900 |
| }, |
| { |
| "epoch": 79.5087265675501, |
| "grad_norm": 1.7561360597610474, |
| "learning_rate": 0.001, |
| "loss": 0.897, |
| "step": 246000 |
| }, |
| { |
| "epoch": 79.541047188106, |
| "grad_norm": 1.4854172468185425, |
| "learning_rate": 0.001, |
| "loss": 0.9047, |
| "step": 246100 |
| }, |
| { |
| "epoch": 79.57336780866193, |
| "grad_norm": 2.3282666206359863, |
| "learning_rate": 0.001, |
| "loss": 0.915, |
| "step": 246200 |
| }, |
| { |
| "epoch": 79.60568842921784, |
| "grad_norm": 1.461841344833374, |
| "learning_rate": 0.001, |
| "loss": 0.9143, |
| "step": 246300 |
| }, |
| { |
| "epoch": 79.63800904977376, |
| "grad_norm": 1.3623031377792358, |
| "learning_rate": 0.001, |
| "loss": 0.9193, |
| "step": 246400 |
| }, |
| { |
| "epoch": 79.67032967032966, |
| "grad_norm": 1.9313454627990723, |
| "learning_rate": 0.001, |
| "loss": 0.9184, |
| "step": 246500 |
| }, |
| { |
| "epoch": 79.70265029088559, |
| "grad_norm": 1.8738027811050415, |
| "learning_rate": 0.001, |
| "loss": 0.9146, |
| "step": 246600 |
| }, |
| { |
| "epoch": 79.7349709114415, |
| "grad_norm": 1.95305335521698, |
| "learning_rate": 0.001, |
| "loss": 0.9158, |
| "step": 246700 |
| }, |
| { |
| "epoch": 79.76729153199742, |
| "grad_norm": 1.6693203449249268, |
| "learning_rate": 0.001, |
| "loss": 0.9184, |
| "step": 246800 |
| }, |
| { |
| "epoch": 79.79961215255332, |
| "grad_norm": 1.764410376548767, |
| "learning_rate": 0.001, |
| "loss": 0.9463, |
| "step": 246900 |
| }, |
| { |
| "epoch": 79.83193277310924, |
| "grad_norm": 1.8955789804458618, |
| "learning_rate": 0.001, |
| "loss": 0.9434, |
| "step": 247000 |
| }, |
| { |
| "epoch": 79.86425339366515, |
| "grad_norm": 1.6161580085754395, |
| "learning_rate": 0.001, |
| "loss": 0.9376, |
| "step": 247100 |
| }, |
| { |
| "epoch": 79.89657401422107, |
| "grad_norm": 1.5458617210388184, |
| "learning_rate": 0.001, |
| "loss": 0.9327, |
| "step": 247200 |
| }, |
| { |
| "epoch": 79.92889463477698, |
| "grad_norm": 1.5969195365905762, |
| "learning_rate": 0.001, |
| "loss": 0.9487, |
| "step": 247300 |
| }, |
| { |
| "epoch": 79.9612152553329, |
| "grad_norm": 1.6383031606674194, |
| "learning_rate": 0.001, |
| "loss": 0.9546, |
| "step": 247400 |
| }, |
| { |
| "epoch": 79.99353587588882, |
| "grad_norm": 2.259331464767456, |
| "learning_rate": 0.001, |
| "loss": 0.9425, |
| "step": 247500 |
| }, |
| { |
| "epoch": 80.02585649644473, |
| "grad_norm": 2.12754225730896, |
| "learning_rate": 0.001, |
| "loss": 0.8788, |
| "step": 247600 |
| }, |
| { |
| "epoch": 80.05817711700065, |
| "grad_norm": 1.7799402475357056, |
| "learning_rate": 0.001, |
| "loss": 0.8519, |
| "step": 247700 |
| }, |
| { |
| "epoch": 80.09049773755656, |
| "grad_norm": 2.46228289604187, |
| "learning_rate": 0.001, |
| "loss": 0.8533, |
| "step": 247800 |
| }, |
| { |
| "epoch": 80.12281835811248, |
| "grad_norm": 1.5350501537322998, |
| "learning_rate": 0.001, |
| "loss": 0.861, |
| "step": 247900 |
| }, |
| { |
| "epoch": 80.15513897866839, |
| "grad_norm": 2.1066055297851562, |
| "learning_rate": 0.001, |
| "loss": 0.8711, |
| "step": 248000 |
| }, |
| { |
| "epoch": 80.18745959922431, |
| "grad_norm": 1.4657257795333862, |
| "learning_rate": 0.001, |
| "loss": 0.8665, |
| "step": 248100 |
| }, |
| { |
| "epoch": 80.21978021978022, |
| "grad_norm": 1.7249951362609863, |
| "learning_rate": 0.001, |
| "loss": 0.8784, |
| "step": 248200 |
| }, |
| { |
| "epoch": 80.25210084033614, |
| "grad_norm": 2.1803066730499268, |
| "learning_rate": 0.001, |
| "loss": 0.8675, |
| "step": 248300 |
| }, |
| { |
| "epoch": 80.28442146089205, |
| "grad_norm": 1.5328905582427979, |
| "learning_rate": 0.001, |
| "loss": 0.8967, |
| "step": 248400 |
| }, |
| { |
| "epoch": 80.31674208144797, |
| "grad_norm": 1.712321400642395, |
| "learning_rate": 0.001, |
| "loss": 0.8854, |
| "step": 248500 |
| }, |
| { |
| "epoch": 80.34906270200388, |
| "grad_norm": 1.821807861328125, |
| "learning_rate": 0.001, |
| "loss": 0.8878, |
| "step": 248600 |
| }, |
| { |
| "epoch": 80.3813833225598, |
| "grad_norm": 1.7244770526885986, |
| "learning_rate": 0.001, |
| "loss": 0.8968, |
| "step": 248700 |
| }, |
| { |
| "epoch": 80.4137039431157, |
| "grad_norm": 1.9649723768234253, |
| "learning_rate": 0.001, |
| "loss": 0.8979, |
| "step": 248800 |
| }, |
| { |
| "epoch": 80.44602456367163, |
| "grad_norm": 1.7593700885772705, |
| "learning_rate": 0.001, |
| "loss": 0.8921, |
| "step": 248900 |
| }, |
| { |
| "epoch": 80.47834518422754, |
| "grad_norm": 1.606793999671936, |
| "learning_rate": 0.001, |
| "loss": 0.9013, |
| "step": 249000 |
| }, |
| { |
| "epoch": 80.51066580478346, |
| "grad_norm": 1.2996211051940918, |
| "learning_rate": 0.001, |
| "loss": 0.8991, |
| "step": 249100 |
| }, |
| { |
| "epoch": 80.54298642533936, |
| "grad_norm": 1.8968507051467896, |
| "learning_rate": 0.001, |
| "loss": 0.8959, |
| "step": 249200 |
| }, |
| { |
| "epoch": 80.57530704589529, |
| "grad_norm": 1.6174746751785278, |
| "learning_rate": 0.001, |
| "loss": 0.9086, |
| "step": 249300 |
| }, |
| { |
| "epoch": 80.6076276664512, |
| "grad_norm": 1.8977900743484497, |
| "learning_rate": 0.001, |
| "loss": 0.916, |
| "step": 249400 |
| }, |
| { |
| "epoch": 80.63994828700712, |
| "grad_norm": 2.1842105388641357, |
| "learning_rate": 0.001, |
| "loss": 0.9061, |
| "step": 249500 |
| }, |
| { |
| "epoch": 80.67226890756302, |
| "grad_norm": 1.555891513824463, |
| "learning_rate": 0.001, |
| "loss": 0.9002, |
| "step": 249600 |
| }, |
| { |
| "epoch": 80.70458952811894, |
| "grad_norm": 1.8011150360107422, |
| "learning_rate": 0.001, |
| "loss": 0.9051, |
| "step": 249700 |
| }, |
| { |
| "epoch": 80.73691014867485, |
| "grad_norm": 2.098155975341797, |
| "learning_rate": 0.001, |
| "loss": 0.9128, |
| "step": 249800 |
| }, |
| { |
| "epoch": 80.76923076923077, |
| "grad_norm": 1.7902772426605225, |
| "learning_rate": 0.001, |
| "loss": 0.9092, |
| "step": 249900 |
| }, |
| { |
| "epoch": 80.80155138978668, |
| "grad_norm": 1.8167074918746948, |
| "learning_rate": 0.001, |
| "loss": 0.9232, |
| "step": 250000 |
| }, |
| { |
| "epoch": 80.8338720103426, |
| "grad_norm": 1.8946473598480225, |
| "learning_rate": 0.001, |
| "loss": 0.9256, |
| "step": 250100 |
| }, |
| { |
| "epoch": 80.86619263089851, |
| "grad_norm": 2.035849094390869, |
| "learning_rate": 0.001, |
| "loss": 0.9248, |
| "step": 250200 |
| }, |
| { |
| "epoch": 80.89851325145443, |
| "grad_norm": 2.2636032104492188, |
| "learning_rate": 0.001, |
| "loss": 0.9376, |
| "step": 250300 |
| }, |
| { |
| "epoch": 80.93083387201034, |
| "grad_norm": 2.28778338432312, |
| "learning_rate": 0.001, |
| "loss": 0.946, |
| "step": 250400 |
| }, |
| { |
| "epoch": 80.96315449256626, |
| "grad_norm": 2.307657480239868, |
| "learning_rate": 0.001, |
| "loss": 0.9468, |
| "step": 250500 |
| }, |
| { |
| "epoch": 80.99547511312217, |
| "grad_norm": 1.6172599792480469, |
| "learning_rate": 0.001, |
| "loss": 0.9319, |
| "step": 250600 |
| }, |
| { |
| "epoch": 81.02779573367809, |
| "grad_norm": 1.3265091180801392, |
| "learning_rate": 0.001, |
| "loss": 0.8342, |
| "step": 250700 |
| }, |
| { |
| "epoch": 81.060116354234, |
| "grad_norm": 1.4472734928131104, |
| "learning_rate": 0.001, |
| "loss": 0.8624, |
| "step": 250800 |
| }, |
| { |
| "epoch": 81.09243697478992, |
| "grad_norm": 1.6675355434417725, |
| "learning_rate": 0.001, |
| "loss": 0.8429, |
| "step": 250900 |
| }, |
| { |
| "epoch": 81.12475759534583, |
| "grad_norm": 1.8562703132629395, |
| "learning_rate": 0.001, |
| "loss": 0.8655, |
| "step": 251000 |
| }, |
| { |
| "epoch": 81.15707821590175, |
| "grad_norm": 2.034242630004883, |
| "learning_rate": 0.001, |
| "loss": 0.865, |
| "step": 251100 |
| }, |
| { |
| "epoch": 81.18939883645766, |
| "grad_norm": 1.5765374898910522, |
| "learning_rate": 0.001, |
| "loss": 0.8735, |
| "step": 251200 |
| }, |
| { |
| "epoch": 81.22171945701358, |
| "grad_norm": 1.7124650478363037, |
| "learning_rate": 0.001, |
| "loss": 0.8562, |
| "step": 251300 |
| }, |
| { |
| "epoch": 81.25404007756948, |
| "grad_norm": 1.5629198551177979, |
| "learning_rate": 0.001, |
| "loss": 0.8594, |
| "step": 251400 |
| }, |
| { |
| "epoch": 81.2863606981254, |
| "grad_norm": 1.4347710609436035, |
| "learning_rate": 0.001, |
| "loss": 0.8888, |
| "step": 251500 |
| }, |
| { |
| "epoch": 81.31868131868131, |
| "grad_norm": 1.937766671180725, |
| "learning_rate": 0.001, |
| "loss": 0.8832, |
| "step": 251600 |
| }, |
| { |
| "epoch": 81.35100193923724, |
| "grad_norm": 1.9682731628417969, |
| "learning_rate": 0.001, |
| "loss": 0.8715, |
| "step": 251700 |
| }, |
| { |
| "epoch": 81.38332255979314, |
| "grad_norm": 1.760391116142273, |
| "learning_rate": 0.001, |
| "loss": 0.8781, |
| "step": 251800 |
| }, |
| { |
| "epoch": 81.41564318034906, |
| "grad_norm": 1.8551701307296753, |
| "learning_rate": 0.001, |
| "loss": 0.8749, |
| "step": 251900 |
| }, |
| { |
| "epoch": 81.44796380090497, |
| "grad_norm": 1.7820264101028442, |
| "learning_rate": 0.001, |
| "loss": 0.8981, |
| "step": 252000 |
| }, |
| { |
| "epoch": 81.4802844214609, |
| "grad_norm": 1.556565284729004, |
| "learning_rate": 0.001, |
| "loss": 0.9005, |
| "step": 252100 |
| }, |
| { |
| "epoch": 81.5126050420168, |
| "grad_norm": 4.37351131439209, |
| "learning_rate": 0.001, |
| "loss": 0.8902, |
| "step": 252200 |
| }, |
| { |
| "epoch": 81.54492566257272, |
| "grad_norm": 1.8729733228683472, |
| "learning_rate": 0.001, |
| "loss": 0.8959, |
| "step": 252300 |
| }, |
| { |
| "epoch": 81.57724628312863, |
| "grad_norm": 1.4958561658859253, |
| "learning_rate": 0.001, |
| "loss": 0.9072, |
| "step": 252400 |
| }, |
| { |
| "epoch": 81.60956690368455, |
| "grad_norm": 1.8346575498580933, |
| "learning_rate": 0.001, |
| "loss": 0.9189, |
| "step": 252500 |
| }, |
| { |
| "epoch": 81.64188752424046, |
| "grad_norm": 1.9965070486068726, |
| "learning_rate": 0.001, |
| "loss": 0.8933, |
| "step": 252600 |
| }, |
| { |
| "epoch": 81.67420814479638, |
| "grad_norm": 1.7740451097488403, |
| "learning_rate": 0.001, |
| "loss": 0.9031, |
| "step": 252700 |
| }, |
| { |
| "epoch": 81.70652876535229, |
| "grad_norm": 1.8927515745162964, |
| "learning_rate": 0.001, |
| "loss": 0.9042, |
| "step": 252800 |
| }, |
| { |
| "epoch": 81.73884938590821, |
| "grad_norm": 1.7266933917999268, |
| "learning_rate": 0.001, |
| "loss": 0.9183, |
| "step": 252900 |
| }, |
| { |
| "epoch": 81.77117000646412, |
| "grad_norm": 1.5089515447616577, |
| "learning_rate": 0.001, |
| "loss": 0.9018, |
| "step": 253000 |
| }, |
| { |
| "epoch": 81.80349062702004, |
| "grad_norm": 17.170991897583008, |
| "learning_rate": 0.001, |
| "loss": 0.9344, |
| "step": 253100 |
| }, |
| { |
| "epoch": 81.83581124757595, |
| "grad_norm": 1.541385531425476, |
| "learning_rate": 0.001, |
| "loss": 0.9277, |
| "step": 253200 |
| }, |
| { |
| "epoch": 81.86813186813187, |
| "grad_norm": 1.8526593446731567, |
| "learning_rate": 0.001, |
| "loss": 0.9274, |
| "step": 253300 |
| }, |
| { |
| "epoch": 81.90045248868778, |
| "grad_norm": 1.4917140007019043, |
| "learning_rate": 0.001, |
| "loss": 0.914, |
| "step": 253400 |
| }, |
| { |
| "epoch": 81.9327731092437, |
| "grad_norm": 1.8501405715942383, |
| "learning_rate": 0.001, |
| "loss": 0.9397, |
| "step": 253500 |
| }, |
| { |
| "epoch": 81.9650937297996, |
| "grad_norm": 1.5764436721801758, |
| "learning_rate": 0.001, |
| "loss": 0.9387, |
| "step": 253600 |
| }, |
| { |
| "epoch": 81.99741435035553, |
| "grad_norm": 1.4233180284500122, |
| "learning_rate": 0.001, |
| "loss": 0.9267, |
| "step": 253700 |
| }, |
| { |
| "epoch": 82.02973497091145, |
| "grad_norm": 1.810560941696167, |
| "learning_rate": 0.001, |
| "loss": 0.8427, |
| "step": 253800 |
| }, |
| { |
| "epoch": 82.06205559146736, |
| "grad_norm": 1.6479710340499878, |
| "learning_rate": 0.001, |
| "loss": 0.8471, |
| "step": 253900 |
| }, |
| { |
| "epoch": 82.09437621202328, |
| "grad_norm": 1.6219289302825928, |
| "learning_rate": 0.001, |
| "loss": 0.8567, |
| "step": 254000 |
| }, |
| { |
| "epoch": 82.12669683257919, |
| "grad_norm": 1.7933605909347534, |
| "learning_rate": 0.001, |
| "loss": 0.859, |
| "step": 254100 |
| }, |
| { |
| "epoch": 82.1590174531351, |
| "grad_norm": 1.518423080444336, |
| "learning_rate": 0.001, |
| "loss": 0.8533, |
| "step": 254200 |
| }, |
| { |
| "epoch": 82.19133807369101, |
| "grad_norm": 1.4636738300323486, |
| "learning_rate": 0.001, |
| "loss": 0.8601, |
| "step": 254300 |
| }, |
| { |
| "epoch": 82.22365869424694, |
| "grad_norm": 1.5128989219665527, |
| "learning_rate": 0.001, |
| "loss": 0.8589, |
| "step": 254400 |
| }, |
| { |
| "epoch": 82.25597931480284, |
| "grad_norm": 1.463891863822937, |
| "learning_rate": 0.001, |
| "loss": 0.8611, |
| "step": 254500 |
| }, |
| { |
| "epoch": 82.28829993535876, |
| "grad_norm": 1.5017369985580444, |
| "learning_rate": 0.001, |
| "loss": 0.8683, |
| "step": 254600 |
| }, |
| { |
| "epoch": 82.32062055591467, |
| "grad_norm": 1.7159160375595093, |
| "learning_rate": 0.001, |
| "loss": 0.8748, |
| "step": 254700 |
| }, |
| { |
| "epoch": 82.3529411764706, |
| "grad_norm": 1.799309253692627, |
| "learning_rate": 0.001, |
| "loss": 0.8784, |
| "step": 254800 |
| }, |
| { |
| "epoch": 82.3852617970265, |
| "grad_norm": 1.742081880569458, |
| "learning_rate": 0.001, |
| "loss": 0.8949, |
| "step": 254900 |
| }, |
| { |
| "epoch": 82.41758241758242, |
| "grad_norm": 1.7784702777862549, |
| "learning_rate": 0.001, |
| "loss": 0.8735, |
| "step": 255000 |
| }, |
| { |
| "epoch": 82.44990303813833, |
| "grad_norm": 1.4071807861328125, |
| "learning_rate": 0.001, |
| "loss": 0.8704, |
| "step": 255100 |
| }, |
| { |
| "epoch": 82.48222365869425, |
| "grad_norm": 1.567540168762207, |
| "learning_rate": 0.001, |
| "loss": 0.877, |
| "step": 255200 |
| }, |
| { |
| "epoch": 82.51454427925016, |
| "grad_norm": 1.4727294445037842, |
| "learning_rate": 0.001, |
| "loss": 0.9026, |
| "step": 255300 |
| }, |
| { |
| "epoch": 82.54686489980608, |
| "grad_norm": 2.0840132236480713, |
| "learning_rate": 0.001, |
| "loss": 0.8898, |
| "step": 255400 |
| }, |
| { |
| "epoch": 82.57918552036199, |
| "grad_norm": 1.4543981552124023, |
| "learning_rate": 0.001, |
| "loss": 0.8851, |
| "step": 255500 |
| }, |
| { |
| "epoch": 82.61150614091791, |
| "grad_norm": 2.6201300621032715, |
| "learning_rate": 0.001, |
| "loss": 0.9079, |
| "step": 255600 |
| }, |
| { |
| "epoch": 82.64382676147382, |
| "grad_norm": 1.6429693698883057, |
| "learning_rate": 0.001, |
| "loss": 0.8945, |
| "step": 255700 |
| }, |
| { |
| "epoch": 82.67614738202974, |
| "grad_norm": 1.4842705726623535, |
| "learning_rate": 0.001, |
| "loss": 0.8983, |
| "step": 255800 |
| }, |
| { |
| "epoch": 82.70846800258565, |
| "grad_norm": 2.0955522060394287, |
| "learning_rate": 0.001, |
| "loss": 0.9124, |
| "step": 255900 |
| }, |
| { |
| "epoch": 82.74078862314157, |
| "grad_norm": 1.486534833908081, |
| "learning_rate": 0.001, |
| "loss": 0.9026, |
| "step": 256000 |
| }, |
| { |
| "epoch": 82.77310924369748, |
| "grad_norm": 1.7523531913757324, |
| "learning_rate": 0.001, |
| "loss": 0.9076, |
| "step": 256100 |
| }, |
| { |
| "epoch": 82.8054298642534, |
| "grad_norm": 1.7044719457626343, |
| "learning_rate": 0.001, |
| "loss": 0.9097, |
| "step": 256200 |
| }, |
| { |
| "epoch": 82.8377504848093, |
| "grad_norm": 1.5170854330062866, |
| "learning_rate": 0.001, |
| "loss": 0.9027, |
| "step": 256300 |
| }, |
| { |
| "epoch": 82.87007110536523, |
| "grad_norm": 1.520762324333191, |
| "learning_rate": 0.001, |
| "loss": 0.9069, |
| "step": 256400 |
| }, |
| { |
| "epoch": 82.90239172592113, |
| "grad_norm": 1.821850299835205, |
| "learning_rate": 0.001, |
| "loss": 0.925, |
| "step": 256500 |
| }, |
| { |
| "epoch": 82.93471234647706, |
| "grad_norm": 1.8839287757873535, |
| "learning_rate": 0.001, |
| "loss": 0.9241, |
| "step": 256600 |
| }, |
| { |
| "epoch": 82.96703296703296, |
| "grad_norm": 2.05893611907959, |
| "learning_rate": 0.001, |
| "loss": 0.9253, |
| "step": 256700 |
| }, |
| { |
| "epoch": 82.99935358758889, |
| "grad_norm": 1.280112862586975, |
| "learning_rate": 0.001, |
| "loss": 0.8939, |
| "step": 256800 |
| }, |
| { |
| "epoch": 83.03167420814479, |
| "grad_norm": 1.2705212831497192, |
| "learning_rate": 0.001, |
| "loss": 0.8337, |
| "step": 256900 |
| }, |
| { |
| "epoch": 83.06399482870071, |
| "grad_norm": 1.231841802597046, |
| "learning_rate": 0.001, |
| "loss": 0.8347, |
| "step": 257000 |
| }, |
| { |
| "epoch": 83.09631544925662, |
| "grad_norm": 1.311620831489563, |
| "learning_rate": 0.001, |
| "loss": 0.8409, |
| "step": 257100 |
| }, |
| { |
| "epoch": 83.12863606981254, |
| "grad_norm": 1.7882788181304932, |
| "learning_rate": 0.001, |
| "loss": 0.8443, |
| "step": 257200 |
| }, |
| { |
| "epoch": 83.16095669036845, |
| "grad_norm": 3.822011709213257, |
| "learning_rate": 0.001, |
| "loss": 0.8478, |
| "step": 257300 |
| }, |
| { |
| "epoch": 83.19327731092437, |
| "grad_norm": 1.924403190612793, |
| "learning_rate": 0.001, |
| "loss": 0.8459, |
| "step": 257400 |
| }, |
| { |
| "epoch": 83.22559793148028, |
| "grad_norm": 1.4485812187194824, |
| "learning_rate": 0.001, |
| "loss": 0.854, |
| "step": 257500 |
| }, |
| { |
| "epoch": 83.2579185520362, |
| "grad_norm": 1.1443918943405151, |
| "learning_rate": 0.001, |
| "loss": 0.8654, |
| "step": 257600 |
| }, |
| { |
| "epoch": 83.29023917259211, |
| "grad_norm": 1.3180463314056396, |
| "learning_rate": 0.001, |
| "loss": 0.8738, |
| "step": 257700 |
| }, |
| { |
| "epoch": 83.32255979314803, |
| "grad_norm": 1.2314362525939941, |
| "learning_rate": 0.001, |
| "loss": 0.8578, |
| "step": 257800 |
| }, |
| { |
| "epoch": 83.35488041370394, |
| "grad_norm": 1.101746678352356, |
| "learning_rate": 0.001, |
| "loss": 0.8723, |
| "step": 257900 |
| }, |
| { |
| "epoch": 83.38720103425986, |
| "grad_norm": 0.9859794974327087, |
| "learning_rate": 0.001, |
| "loss": 0.8622, |
| "step": 258000 |
| }, |
| { |
| "epoch": 83.41952165481577, |
| "grad_norm": 3.018252372741699, |
| "learning_rate": 0.001, |
| "loss": 0.8729, |
| "step": 258100 |
| }, |
| { |
| "epoch": 83.45184227537169, |
| "grad_norm": 1.3902442455291748, |
| "learning_rate": 0.001, |
| "loss": 0.8788, |
| "step": 258200 |
| }, |
| { |
| "epoch": 83.4841628959276, |
| "grad_norm": 1.4548698663711548, |
| "learning_rate": 0.001, |
| "loss": 0.8834, |
| "step": 258300 |
| }, |
| { |
| "epoch": 83.51648351648352, |
| "grad_norm": 1.4873329401016235, |
| "learning_rate": 0.001, |
| "loss": 0.8907, |
| "step": 258400 |
| }, |
| { |
| "epoch": 83.54880413703943, |
| "grad_norm": 1.177139163017273, |
| "learning_rate": 0.001, |
| "loss": 0.8801, |
| "step": 258500 |
| }, |
| { |
| "epoch": 83.58112475759535, |
| "grad_norm": 1.9104993343353271, |
| "learning_rate": 0.001, |
| "loss": 0.8869, |
| "step": 258600 |
| }, |
| { |
| "epoch": 83.61344537815125, |
| "grad_norm": 1.7681374549865723, |
| "learning_rate": 0.001, |
| "loss": 0.8976, |
| "step": 258700 |
| }, |
| { |
| "epoch": 83.64576599870718, |
| "grad_norm": 0.9657064080238342, |
| "learning_rate": 0.001, |
| "loss": 0.8977, |
| "step": 258800 |
| }, |
| { |
| "epoch": 83.67808661926308, |
| "grad_norm": 1.5095404386520386, |
| "learning_rate": 0.001, |
| "loss": 0.8891, |
| "step": 258900 |
| }, |
| { |
| "epoch": 83.710407239819, |
| "grad_norm": 0.9872686862945557, |
| "learning_rate": 0.001, |
| "loss": 0.8972, |
| "step": 259000 |
| }, |
| { |
| "epoch": 83.74272786037491, |
| "grad_norm": 1.5525736808776855, |
| "learning_rate": 0.001, |
| "loss": 0.9007, |
| "step": 259100 |
| }, |
| { |
| "epoch": 83.77504848093083, |
| "grad_norm": 1.1791164875030518, |
| "learning_rate": 0.001, |
| "loss": 0.904, |
| "step": 259200 |
| }, |
| { |
| "epoch": 83.80736910148674, |
| "grad_norm": 1.8377259969711304, |
| "learning_rate": 0.001, |
| "loss": 0.9127, |
| "step": 259300 |
| }, |
| { |
| "epoch": 83.83968972204266, |
| "grad_norm": 1.5384559631347656, |
| "learning_rate": 0.001, |
| "loss": 0.9098, |
| "step": 259400 |
| }, |
| { |
| "epoch": 83.87201034259857, |
| "grad_norm": 1.4044259786605835, |
| "learning_rate": 0.001, |
| "loss": 0.9075, |
| "step": 259500 |
| }, |
| { |
| "epoch": 83.9043309631545, |
| "grad_norm": 1.0175000429153442, |
| "learning_rate": 0.001, |
| "loss": 0.9107, |
| "step": 259600 |
| }, |
| { |
| "epoch": 83.9366515837104, |
| "grad_norm": 1.0417195558547974, |
| "learning_rate": 0.001, |
| "loss": 0.9235, |
| "step": 259700 |
| }, |
| { |
| "epoch": 83.96897220426632, |
| "grad_norm": 1.4625414609909058, |
| "learning_rate": 0.001, |
| "loss": 0.9279, |
| "step": 259800 |
| }, |
| { |
| "epoch": 84.00129282482224, |
| "grad_norm": 158.49737548828125, |
| "learning_rate": 0.001, |
| "loss": 0.9166, |
| "step": 259900 |
| }, |
| { |
| "epoch": 84.03361344537815, |
| "grad_norm": 3.5888888835906982, |
| "learning_rate": 0.001, |
| "loss": 0.825, |
| "step": 260000 |
| }, |
| { |
| "epoch": 84.06593406593407, |
| "grad_norm": 2.126845121383667, |
| "learning_rate": 0.001, |
| "loss": 0.8455, |
| "step": 260100 |
| }, |
| { |
| "epoch": 84.09825468648998, |
| "grad_norm": 2.2806663513183594, |
| "learning_rate": 0.001, |
| "loss": 0.8185, |
| "step": 260200 |
| }, |
| { |
| "epoch": 84.1305753070459, |
| "grad_norm": 1.8550175428390503, |
| "learning_rate": 0.001, |
| "loss": 0.8512, |
| "step": 260300 |
| }, |
| { |
| "epoch": 84.16289592760181, |
| "grad_norm": 2.1901886463165283, |
| "learning_rate": 0.001, |
| "loss": 0.8463, |
| "step": 260400 |
| }, |
| { |
| "epoch": 84.19521654815773, |
| "grad_norm": 2.431595802307129, |
| "learning_rate": 0.001, |
| "loss": 0.8452, |
| "step": 260500 |
| }, |
| { |
| "epoch": 84.22753716871364, |
| "grad_norm": 2.0532970428466797, |
| "learning_rate": 0.001, |
| "loss": 0.8521, |
| "step": 260600 |
| }, |
| { |
| "epoch": 84.25985778926956, |
| "grad_norm": 2.080876350402832, |
| "learning_rate": 0.001, |
| "loss": 0.8423, |
| "step": 260700 |
| }, |
| { |
| "epoch": 84.29217840982547, |
| "grad_norm": 2.1651861667633057, |
| "learning_rate": 0.001, |
| "loss": 0.8475, |
| "step": 260800 |
| }, |
| { |
| "epoch": 84.32449903038139, |
| "grad_norm": 2.6767241954803467, |
| "learning_rate": 0.001, |
| "loss": 0.8522, |
| "step": 260900 |
| }, |
| { |
| "epoch": 84.3568196509373, |
| "grad_norm": 2.2361319065093994, |
| "learning_rate": 0.001, |
| "loss": 0.8845, |
| "step": 261000 |
| }, |
| { |
| "epoch": 84.38914027149322, |
| "grad_norm": 2.157210111618042, |
| "learning_rate": 0.001, |
| "loss": 0.8596, |
| "step": 261100 |
| }, |
| { |
| "epoch": 84.42146089204913, |
| "grad_norm": 1.8781487941741943, |
| "learning_rate": 0.001, |
| "loss": 0.8816, |
| "step": 261200 |
| }, |
| { |
| "epoch": 84.45378151260505, |
| "grad_norm": 1.8453071117401123, |
| "learning_rate": 0.001, |
| "loss": 0.8741, |
| "step": 261300 |
| }, |
| { |
| "epoch": 84.48610213316095, |
| "grad_norm": 1.809593915939331, |
| "learning_rate": 0.001, |
| "loss": 0.8694, |
| "step": 261400 |
| }, |
| { |
| "epoch": 84.51842275371688, |
| "grad_norm": 2.3979721069335938, |
| "learning_rate": 0.001, |
| "loss": 0.8779, |
| "step": 261500 |
| }, |
| { |
| "epoch": 84.55074337427278, |
| "grad_norm": 2.704803228378296, |
| "learning_rate": 0.001, |
| "loss": 0.8825, |
| "step": 261600 |
| }, |
| { |
| "epoch": 84.5830639948287, |
| "grad_norm": 1.9731519222259521, |
| "learning_rate": 0.001, |
| "loss": 0.8836, |
| "step": 261700 |
| }, |
| { |
| "epoch": 84.61538461538461, |
| "grad_norm": 1.9622254371643066, |
| "learning_rate": 0.001, |
| "loss": 0.8799, |
| "step": 261800 |
| }, |
| { |
| "epoch": 84.64770523594053, |
| "grad_norm": 1.9820390939712524, |
| "learning_rate": 0.001, |
| "loss": 0.8798, |
| "step": 261900 |
| }, |
| { |
| "epoch": 84.68002585649644, |
| "grad_norm": 3.088493585586548, |
| "learning_rate": 0.001, |
| "loss": 0.8821, |
| "step": 262000 |
| }, |
| { |
| "epoch": 84.71234647705236, |
| "grad_norm": 2.2478713989257812, |
| "learning_rate": 0.001, |
| "loss": 0.9065, |
| "step": 262100 |
| }, |
| { |
| "epoch": 84.74466709760827, |
| "grad_norm": 1.8590142726898193, |
| "learning_rate": 0.001, |
| "loss": 0.8902, |
| "step": 262200 |
| }, |
| { |
| "epoch": 84.7769877181642, |
| "grad_norm": 1.954840064048767, |
| "learning_rate": 0.001, |
| "loss": 0.9051, |
| "step": 262300 |
| }, |
| { |
| "epoch": 84.8093083387201, |
| "grad_norm": 1.901276707649231, |
| "learning_rate": 0.001, |
| "loss": 0.9101, |
| "step": 262400 |
| }, |
| { |
| "epoch": 84.84162895927602, |
| "grad_norm": 2.1338517665863037, |
| "learning_rate": 0.001, |
| "loss": 0.8986, |
| "step": 262500 |
| }, |
| { |
| "epoch": 84.87394957983193, |
| "grad_norm": 9.55400276184082, |
| "learning_rate": 0.001, |
| "loss": 0.9106, |
| "step": 262600 |
| }, |
| { |
| "epoch": 84.90627020038785, |
| "grad_norm": 2.2221624851226807, |
| "learning_rate": 0.001, |
| "loss": 0.9136, |
| "step": 262700 |
| }, |
| { |
| "epoch": 84.93859082094376, |
| "grad_norm": 1.898995041847229, |
| "learning_rate": 0.001, |
| "loss": 0.916, |
| "step": 262800 |
| }, |
| { |
| "epoch": 84.97091144149968, |
| "grad_norm": 1.8953394889831543, |
| "learning_rate": 0.001, |
| "loss": 0.9177, |
| "step": 262900 |
| }, |
| { |
| "epoch": 85.00323206205559, |
| "grad_norm": 1.9160492420196533, |
| "learning_rate": 0.001, |
| "loss": 0.9063, |
| "step": 263000 |
| }, |
| { |
| "epoch": 85.03555268261151, |
| "grad_norm": 2.0379796028137207, |
| "learning_rate": 0.001, |
| "loss": 0.8174, |
| "step": 263100 |
| }, |
| { |
| "epoch": 85.06787330316742, |
| "grad_norm": 1.6912503242492676, |
| "learning_rate": 0.001, |
| "loss": 0.8196, |
| "step": 263200 |
| }, |
| { |
| "epoch": 85.10019392372334, |
| "grad_norm": 1.959588885307312, |
| "learning_rate": 0.001, |
| "loss": 0.8337, |
| "step": 263300 |
| }, |
| { |
| "epoch": 85.13251454427925, |
| "grad_norm": 2.460237503051758, |
| "learning_rate": 0.001, |
| "loss": 0.8392, |
| "step": 263400 |
| }, |
| { |
| "epoch": 85.16483516483517, |
| "grad_norm": 1.890858769416809, |
| "learning_rate": 0.001, |
| "loss": 0.8418, |
| "step": 263500 |
| }, |
| { |
| "epoch": 85.19715578539108, |
| "grad_norm": 52.027462005615234, |
| "learning_rate": 0.001, |
| "loss": 0.8351, |
| "step": 263600 |
| }, |
| { |
| "epoch": 85.229476405947, |
| "grad_norm": 2.1597611904144287, |
| "learning_rate": 0.001, |
| "loss": 0.8313, |
| "step": 263700 |
| }, |
| { |
| "epoch": 85.2617970265029, |
| "grad_norm": 1.7659682035446167, |
| "learning_rate": 0.001, |
| "loss": 0.8469, |
| "step": 263800 |
| }, |
| { |
| "epoch": 85.29411764705883, |
| "grad_norm": 2.0225253105163574, |
| "learning_rate": 0.001, |
| "loss": 0.8391, |
| "step": 263900 |
| }, |
| { |
| "epoch": 85.32643826761473, |
| "grad_norm": 1.9262751340866089, |
| "learning_rate": 0.001, |
| "loss": 0.8529, |
| "step": 264000 |
| }, |
| { |
| "epoch": 85.35875888817066, |
| "grad_norm": 2.231473684310913, |
| "learning_rate": 0.001, |
| "loss": 0.8587, |
| "step": 264100 |
| }, |
| { |
| "epoch": 85.39107950872656, |
| "grad_norm": 1.8628264665603638, |
| "learning_rate": 0.001, |
| "loss": 0.8519, |
| "step": 264200 |
| }, |
| { |
| "epoch": 85.42340012928248, |
| "grad_norm": 2.2745962142944336, |
| "learning_rate": 0.001, |
| "loss": 0.8694, |
| "step": 264300 |
| }, |
| { |
| "epoch": 85.45572074983839, |
| "grad_norm": 1.8171344995498657, |
| "learning_rate": 0.001, |
| "loss": 0.8683, |
| "step": 264400 |
| }, |
| { |
| "epoch": 85.48804137039431, |
| "grad_norm": 1.960572600364685, |
| "learning_rate": 0.001, |
| "loss": 0.8763, |
| "step": 264500 |
| }, |
| { |
| "epoch": 85.52036199095022, |
| "grad_norm": 1.8863012790679932, |
| "learning_rate": 0.001, |
| "loss": 0.8631, |
| "step": 264600 |
| }, |
| { |
| "epoch": 85.55268261150614, |
| "grad_norm": 2.6001973152160645, |
| "learning_rate": 0.001, |
| "loss": 0.8896, |
| "step": 264700 |
| }, |
| { |
| "epoch": 85.58500323206205, |
| "grad_norm": 1.8021491765975952, |
| "learning_rate": 0.001, |
| "loss": 0.8748, |
| "step": 264800 |
| }, |
| { |
| "epoch": 85.61732385261797, |
| "grad_norm": 2.0595309734344482, |
| "learning_rate": 0.001, |
| "loss": 0.8827, |
| "step": 264900 |
| }, |
| { |
| "epoch": 85.64964447317388, |
| "grad_norm": 2.0802087783813477, |
| "learning_rate": 0.001, |
| "loss": 0.8979, |
| "step": 265000 |
| }, |
| { |
| "epoch": 85.6819650937298, |
| "grad_norm": 1.499593734741211, |
| "learning_rate": 0.001, |
| "loss": 0.8839, |
| "step": 265100 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 1.6684249639511108, |
| "learning_rate": 0.001, |
| "loss": 0.8892, |
| "step": 265200 |
| }, |
| { |
| "epoch": 85.74660633484163, |
| "grad_norm": 3.872833251953125, |
| "learning_rate": 0.001, |
| "loss": 0.8859, |
| "step": 265300 |
| }, |
| { |
| "epoch": 85.77892695539754, |
| "grad_norm": 2.0526092052459717, |
| "learning_rate": 0.001, |
| "loss": 0.8777, |
| "step": 265400 |
| }, |
| { |
| "epoch": 85.81124757595346, |
| "grad_norm": 1.7454049587249756, |
| "learning_rate": 0.001, |
| "loss": 0.9088, |
| "step": 265500 |
| }, |
| { |
| "epoch": 85.84356819650937, |
| "grad_norm": 1.6886650323867798, |
| "learning_rate": 0.001, |
| "loss": 0.8788, |
| "step": 265600 |
| }, |
| { |
| "epoch": 85.87588881706529, |
| "grad_norm": 1.5133458375930786, |
| "learning_rate": 0.001, |
| "loss": 0.9042, |
| "step": 265700 |
| }, |
| { |
| "epoch": 85.9082094376212, |
| "grad_norm": 2.204150676727295, |
| "learning_rate": 0.001, |
| "loss": 0.9147, |
| "step": 265800 |
| }, |
| { |
| "epoch": 85.94053005817712, |
| "grad_norm": 2.4142262935638428, |
| "learning_rate": 0.001, |
| "loss": 0.9162, |
| "step": 265900 |
| }, |
| { |
| "epoch": 85.97285067873302, |
| "grad_norm": 1.7503329515457153, |
| "learning_rate": 0.001, |
| "loss": 0.9047, |
| "step": 266000 |
| }, |
| { |
| "epoch": 86.00517129928895, |
| "grad_norm": 1.7632008790969849, |
| "learning_rate": 0.001, |
| "loss": 0.9059, |
| "step": 266100 |
| }, |
| { |
| "epoch": 86.03749191984487, |
| "grad_norm": 1.7511223554611206, |
| "learning_rate": 0.001, |
| "loss": 0.8161, |
| "step": 266200 |
| }, |
| { |
| "epoch": 86.06981254040078, |
| "grad_norm": 1.6529717445373535, |
| "learning_rate": 0.001, |
| "loss": 0.8258, |
| "step": 266300 |
| }, |
| { |
| "epoch": 86.1021331609567, |
| "grad_norm": 1.7957078218460083, |
| "learning_rate": 0.001, |
| "loss": 0.8162, |
| "step": 266400 |
| }, |
| { |
| "epoch": 86.1344537815126, |
| "grad_norm": 1.553620457649231, |
| "learning_rate": 0.001, |
| "loss": 0.834, |
| "step": 266500 |
| }, |
| { |
| "epoch": 86.16677440206853, |
| "grad_norm": 1.6290562152862549, |
| "learning_rate": 0.001, |
| "loss": 0.8362, |
| "step": 266600 |
| }, |
| { |
| "epoch": 86.19909502262443, |
| "grad_norm": 1.8925175666809082, |
| "learning_rate": 0.001, |
| "loss": 0.8177, |
| "step": 266700 |
| }, |
| { |
| "epoch": 86.23141564318036, |
| "grad_norm": 1.861567497253418, |
| "learning_rate": 0.001, |
| "loss": 0.8392, |
| "step": 266800 |
| }, |
| { |
| "epoch": 86.26373626373626, |
| "grad_norm": 1.780373215675354, |
| "learning_rate": 0.001, |
| "loss": 0.8536, |
| "step": 266900 |
| }, |
| { |
| "epoch": 86.29605688429218, |
| "grad_norm": 2.946547508239746, |
| "learning_rate": 0.001, |
| "loss": 0.8629, |
| "step": 267000 |
| }, |
| { |
| "epoch": 86.32837750484809, |
| "grad_norm": 1.7340573072433472, |
| "learning_rate": 0.001, |
| "loss": 0.8415, |
| "step": 267100 |
| }, |
| { |
| "epoch": 86.36069812540401, |
| "grad_norm": 1.4430204629898071, |
| "learning_rate": 0.001, |
| "loss": 0.847, |
| "step": 267200 |
| }, |
| { |
| "epoch": 86.39301874595992, |
| "grad_norm": 1.9690868854522705, |
| "learning_rate": 0.001, |
| "loss": 0.8584, |
| "step": 267300 |
| }, |
| { |
| "epoch": 86.42533936651584, |
| "grad_norm": 2.0398447513580322, |
| "learning_rate": 0.001, |
| "loss": 0.86, |
| "step": 267400 |
| }, |
| { |
| "epoch": 86.45765998707175, |
| "grad_norm": 4.0924577713012695, |
| "learning_rate": 0.001, |
| "loss": 0.8571, |
| "step": 267500 |
| }, |
| { |
| "epoch": 86.48998060762767, |
| "grad_norm": 1.7488676309585571, |
| "learning_rate": 0.001, |
| "loss": 0.8722, |
| "step": 267600 |
| }, |
| { |
| "epoch": 86.52230122818358, |
| "grad_norm": 1.8430218696594238, |
| "learning_rate": 0.001, |
| "loss": 0.8669, |
| "step": 267700 |
| }, |
| { |
| "epoch": 86.5546218487395, |
| "grad_norm": 1.694116473197937, |
| "learning_rate": 0.001, |
| "loss": 0.876, |
| "step": 267800 |
| }, |
| { |
| "epoch": 86.58694246929541, |
| "grad_norm": 1.5010638236999512, |
| "learning_rate": 0.001, |
| "loss": 0.8711, |
| "step": 267900 |
| }, |
| { |
| "epoch": 86.61926308985133, |
| "grad_norm": 17.831743240356445, |
| "learning_rate": 0.001, |
| "loss": 0.8698, |
| "step": 268000 |
| }, |
| { |
| "epoch": 86.65158371040724, |
| "grad_norm": 1.4711484909057617, |
| "learning_rate": 0.001, |
| "loss": 0.8819, |
| "step": 268100 |
| }, |
| { |
| "epoch": 86.68390433096316, |
| "grad_norm": 1.886446237564087, |
| "learning_rate": 0.001, |
| "loss": 0.8723, |
| "step": 268200 |
| }, |
| { |
| "epoch": 86.71622495151907, |
| "grad_norm": 1.6571316719055176, |
| "learning_rate": 0.001, |
| "loss": 0.882, |
| "step": 268300 |
| }, |
| { |
| "epoch": 86.74854557207499, |
| "grad_norm": 1.8453465700149536, |
| "learning_rate": 0.001, |
| "loss": 0.8937, |
| "step": 268400 |
| }, |
| { |
| "epoch": 86.7808661926309, |
| "grad_norm": 1.5359055995941162, |
| "learning_rate": 0.001, |
| "loss": 0.882, |
| "step": 268500 |
| }, |
| { |
| "epoch": 86.81318681318682, |
| "grad_norm": 1.6801995038986206, |
| "learning_rate": 0.001, |
| "loss": 0.8985, |
| "step": 268600 |
| }, |
| { |
| "epoch": 86.84550743374272, |
| "grad_norm": 1.4833015203475952, |
| "learning_rate": 0.001, |
| "loss": 0.8788, |
| "step": 268700 |
| }, |
| { |
| "epoch": 86.87782805429865, |
| "grad_norm": 1.8023815155029297, |
| "learning_rate": 0.001, |
| "loss": 0.8774, |
| "step": 268800 |
| }, |
| { |
| "epoch": 86.91014867485455, |
| "grad_norm": 1.7117823362350464, |
| "learning_rate": 0.001, |
| "loss": 0.8884, |
| "step": 268900 |
| }, |
| { |
| "epoch": 86.94246929541048, |
| "grad_norm": 1.8823192119598389, |
| "learning_rate": 0.001, |
| "loss": 0.8795, |
| "step": 269000 |
| }, |
| { |
| "epoch": 86.97478991596638, |
| "grad_norm": 5.488122463226318, |
| "learning_rate": 0.001, |
| "loss": 0.9028, |
| "step": 269100 |
| }, |
| { |
| "epoch": 87.0071105365223, |
| "grad_norm": 1.49545156955719, |
| "learning_rate": 0.001, |
| "loss": 0.8898, |
| "step": 269200 |
| }, |
| { |
| "epoch": 87.03943115707821, |
| "grad_norm": 1.3911610841751099, |
| "learning_rate": 0.001, |
| "loss": 0.8081, |
| "step": 269300 |
| }, |
| { |
| "epoch": 87.07175177763413, |
| "grad_norm": 1.6940593719482422, |
| "learning_rate": 0.001, |
| "loss": 0.8151, |
| "step": 269400 |
| }, |
| { |
| "epoch": 87.10407239819004, |
| "grad_norm": 1.702312707901001, |
| "learning_rate": 0.001, |
| "loss": 0.8274, |
| "step": 269500 |
| }, |
| { |
| "epoch": 87.13639301874596, |
| "grad_norm": 1.5827884674072266, |
| "learning_rate": 0.001, |
| "loss": 0.8283, |
| "step": 269600 |
| }, |
| { |
| "epoch": 87.16871363930187, |
| "grad_norm": 1.4666353464126587, |
| "learning_rate": 0.001, |
| "loss": 0.8203, |
| "step": 269700 |
| }, |
| { |
| "epoch": 87.20103425985779, |
| "grad_norm": 1.3696997165679932, |
| "learning_rate": 0.001, |
| "loss": 0.8289, |
| "step": 269800 |
| }, |
| { |
| "epoch": 87.2333548804137, |
| "grad_norm": 5.3828959465026855, |
| "learning_rate": 0.001, |
| "loss": 0.8233, |
| "step": 269900 |
| }, |
| { |
| "epoch": 87.26567550096962, |
| "grad_norm": 2.304546594619751, |
| "learning_rate": 0.001, |
| "loss": 0.8399, |
| "step": 270000 |
| }, |
| { |
| "epoch": 87.29799612152553, |
| "grad_norm": 1.6255569458007812, |
| "learning_rate": 0.001, |
| "loss": 0.8468, |
| "step": 270100 |
| }, |
| { |
| "epoch": 87.33031674208145, |
| "grad_norm": 1.8003710508346558, |
| "learning_rate": 0.001, |
| "loss": 0.8363, |
| "step": 270200 |
| }, |
| { |
| "epoch": 87.36263736263736, |
| "grad_norm": 1.5921064615249634, |
| "learning_rate": 0.001, |
| "loss": 0.8417, |
| "step": 270300 |
| }, |
| { |
| "epoch": 87.39495798319328, |
| "grad_norm": 1.5983816385269165, |
| "learning_rate": 0.001, |
| "loss": 0.838, |
| "step": 270400 |
| }, |
| { |
| "epoch": 87.42727860374919, |
| "grad_norm": 2.061232089996338, |
| "learning_rate": 0.001, |
| "loss": 0.8513, |
| "step": 270500 |
| }, |
| { |
| "epoch": 87.45959922430511, |
| "grad_norm": 1.5790170431137085, |
| "learning_rate": 0.001, |
| "loss": 0.8585, |
| "step": 270600 |
| }, |
| { |
| "epoch": 87.49191984486102, |
| "grad_norm": 1.874551773071289, |
| "learning_rate": 0.001, |
| "loss": 0.8671, |
| "step": 270700 |
| }, |
| { |
| "epoch": 87.52424046541694, |
| "grad_norm": 3.1544809341430664, |
| "learning_rate": 0.001, |
| "loss": 0.847, |
| "step": 270800 |
| }, |
| { |
| "epoch": 87.55656108597285, |
| "grad_norm": 1.533233880996704, |
| "learning_rate": 0.001, |
| "loss": 0.8618, |
| "step": 270900 |
| }, |
| { |
| "epoch": 87.58888170652877, |
| "grad_norm": 1.7700929641723633, |
| "learning_rate": 0.001, |
| "loss": 0.8678, |
| "step": 271000 |
| }, |
| { |
| "epoch": 87.62120232708467, |
| "grad_norm": 1.546187162399292, |
| "learning_rate": 0.001, |
| "loss": 0.8752, |
| "step": 271100 |
| }, |
| { |
| "epoch": 87.6535229476406, |
| "grad_norm": 1.6159831285476685, |
| "learning_rate": 0.001, |
| "loss": 0.8523, |
| "step": 271200 |
| }, |
| { |
| "epoch": 87.6858435681965, |
| "grad_norm": 1.5596297979354858, |
| "learning_rate": 0.001, |
| "loss": 0.8704, |
| "step": 271300 |
| }, |
| { |
| "epoch": 87.71816418875243, |
| "grad_norm": 1.7366268634796143, |
| "learning_rate": 0.001, |
| "loss": 0.8717, |
| "step": 271400 |
| }, |
| { |
| "epoch": 87.75048480930833, |
| "grad_norm": 5.429916858673096, |
| "learning_rate": 0.001, |
| "loss": 0.8716, |
| "step": 271500 |
| }, |
| { |
| "epoch": 87.78280542986425, |
| "grad_norm": 1.5415160655975342, |
| "learning_rate": 0.001, |
| "loss": 0.8722, |
| "step": 271600 |
| }, |
| { |
| "epoch": 87.81512605042016, |
| "grad_norm": 1.9593197107315063, |
| "learning_rate": 0.001, |
| "loss": 0.8802, |
| "step": 271700 |
| }, |
| { |
| "epoch": 87.84744667097608, |
| "grad_norm": 2.1193692684173584, |
| "learning_rate": 0.001, |
| "loss": 0.8842, |
| "step": 271800 |
| }, |
| { |
| "epoch": 87.87976729153199, |
| "grad_norm": 2.191317558288574, |
| "learning_rate": 0.001, |
| "loss": 0.8958, |
| "step": 271900 |
| }, |
| { |
| "epoch": 87.91208791208791, |
| "grad_norm": 2.4855754375457764, |
| "learning_rate": 0.001, |
| "loss": 0.899, |
| "step": 272000 |
| }, |
| { |
| "epoch": 87.94440853264382, |
| "grad_norm": 1.505005955696106, |
| "learning_rate": 0.001, |
| "loss": 0.8938, |
| "step": 272100 |
| }, |
| { |
| "epoch": 87.97672915319974, |
| "grad_norm": 1.9065461158752441, |
| "learning_rate": 0.001, |
| "loss": 0.907, |
| "step": 272200 |
| }, |
| { |
| "epoch": 88.00904977375566, |
| "grad_norm": 1.7264128923416138, |
| "learning_rate": 0.001, |
| "loss": 0.8799, |
| "step": 272300 |
| }, |
| { |
| "epoch": 88.04137039431157, |
| "grad_norm": 1.7042555809020996, |
| "learning_rate": 0.001, |
| "loss": 0.8046, |
| "step": 272400 |
| }, |
| { |
| "epoch": 88.07369101486749, |
| "grad_norm": 2.3185431957244873, |
| "learning_rate": 0.001, |
| "loss": 0.8033, |
| "step": 272500 |
| }, |
| { |
| "epoch": 88.1060116354234, |
| "grad_norm": 1.6043986082077026, |
| "learning_rate": 0.001, |
| "loss": 0.8068, |
| "step": 272600 |
| }, |
| { |
| "epoch": 88.13833225597932, |
| "grad_norm": 1.5428094863891602, |
| "learning_rate": 0.001, |
| "loss": 0.8138, |
| "step": 272700 |
| }, |
| { |
| "epoch": 88.17065287653523, |
| "grad_norm": 1.9031853675842285, |
| "learning_rate": 0.001, |
| "loss": 0.811, |
| "step": 272800 |
| }, |
| { |
| "epoch": 88.20297349709115, |
| "grad_norm": 2.3232343196868896, |
| "learning_rate": 0.001, |
| "loss": 0.8193, |
| "step": 272900 |
| }, |
| { |
| "epoch": 88.23529411764706, |
| "grad_norm": 1.4863861799240112, |
| "learning_rate": 0.001, |
| "loss": 0.8203, |
| "step": 273000 |
| }, |
| { |
| "epoch": 88.26761473820298, |
| "grad_norm": 1.7252644300460815, |
| "learning_rate": 0.001, |
| "loss": 0.836, |
| "step": 273100 |
| }, |
| { |
| "epoch": 88.29993535875889, |
| "grad_norm": 1.682450771331787, |
| "learning_rate": 0.001, |
| "loss": 0.8439, |
| "step": 273200 |
| }, |
| { |
| "epoch": 88.33225597931481, |
| "grad_norm": 1.6543453931808472, |
| "learning_rate": 0.001, |
| "loss": 0.8483, |
| "step": 273300 |
| }, |
| { |
| "epoch": 88.36457659987072, |
| "grad_norm": 1.7153080701828003, |
| "learning_rate": 0.001, |
| "loss": 0.8417, |
| "step": 273400 |
| }, |
| { |
| "epoch": 88.39689722042664, |
| "grad_norm": 2.134333372116089, |
| "learning_rate": 0.001, |
| "loss": 0.8499, |
| "step": 273500 |
| }, |
| { |
| "epoch": 88.42921784098255, |
| "grad_norm": 2.4157066345214844, |
| "learning_rate": 0.001, |
| "loss": 0.8418, |
| "step": 273600 |
| }, |
| { |
| "epoch": 88.46153846153847, |
| "grad_norm": 8.76519775390625, |
| "learning_rate": 0.001, |
| "loss": 0.8498, |
| "step": 273700 |
| }, |
| { |
| "epoch": 88.49385908209437, |
| "grad_norm": 1.6663837432861328, |
| "learning_rate": 0.001, |
| "loss": 0.854, |
| "step": 273800 |
| }, |
| { |
| "epoch": 88.5261797026503, |
| "grad_norm": 1.6248563528060913, |
| "learning_rate": 0.001, |
| "loss": 0.858, |
| "step": 273900 |
| }, |
| { |
| "epoch": 88.5585003232062, |
| "grad_norm": 1.6257447004318237, |
| "learning_rate": 0.001, |
| "loss": 0.865, |
| "step": 274000 |
| }, |
| { |
| "epoch": 88.59082094376213, |
| "grad_norm": 2.0399746894836426, |
| "learning_rate": 0.001, |
| "loss": 0.868, |
| "step": 274100 |
| }, |
| { |
| "epoch": 88.62314156431803, |
| "grad_norm": 1.530640721321106, |
| "learning_rate": 0.001, |
| "loss": 0.8678, |
| "step": 274200 |
| }, |
| { |
| "epoch": 88.65546218487395, |
| "grad_norm": 1.7557321786880493, |
| "learning_rate": 0.001, |
| "loss": 0.8672, |
| "step": 274300 |
| }, |
| { |
| "epoch": 88.68778280542986, |
| "grad_norm": 1.8893425464630127, |
| "learning_rate": 0.001, |
| "loss": 0.8677, |
| "step": 274400 |
| }, |
| { |
| "epoch": 88.72010342598578, |
| "grad_norm": 1.8820282220840454, |
| "learning_rate": 0.001, |
| "loss": 0.8846, |
| "step": 274500 |
| }, |
| { |
| "epoch": 88.75242404654169, |
| "grad_norm": 1.5749956369400024, |
| "learning_rate": 0.001, |
| "loss": 0.8603, |
| "step": 274600 |
| }, |
| { |
| "epoch": 88.78474466709761, |
| "grad_norm": 1.4271475076675415, |
| "learning_rate": 0.001, |
| "loss": 0.8767, |
| "step": 274700 |
| }, |
| { |
| "epoch": 88.81706528765352, |
| "grad_norm": 1.6466025114059448, |
| "learning_rate": 0.001, |
| "loss": 0.8762, |
| "step": 274800 |
| }, |
| { |
| "epoch": 88.84938590820944, |
| "grad_norm": 1.7966150045394897, |
| "learning_rate": 0.001, |
| "loss": 0.8847, |
| "step": 274900 |
| }, |
| { |
| "epoch": 88.88170652876535, |
| "grad_norm": 1.834599256515503, |
| "learning_rate": 0.001, |
| "loss": 0.8873, |
| "step": 275000 |
| }, |
| { |
| "epoch": 88.91402714932127, |
| "grad_norm": 1.5994890928268433, |
| "learning_rate": 0.001, |
| "loss": 0.8755, |
| "step": 275100 |
| }, |
| { |
| "epoch": 88.94634776987718, |
| "grad_norm": 6.51814079284668, |
| "learning_rate": 0.001, |
| "loss": 0.8676, |
| "step": 275200 |
| }, |
| { |
| "epoch": 88.9786683904331, |
| "grad_norm": 1.8895727396011353, |
| "learning_rate": 0.001, |
| "loss": 0.884, |
| "step": 275300 |
| }, |
| { |
| "epoch": 89.01098901098901, |
| "grad_norm": 2.0515339374542236, |
| "learning_rate": 0.001, |
| "loss": 0.8533, |
| "step": 275400 |
| }, |
| { |
| "epoch": 89.04330963154493, |
| "grad_norm": 2.124110698699951, |
| "learning_rate": 0.001, |
| "loss": 0.8042, |
| "step": 275500 |
| }, |
| { |
| "epoch": 89.07563025210084, |
| "grad_norm": 1.8615392446517944, |
| "learning_rate": 0.001, |
| "loss": 0.7895, |
| "step": 275600 |
| }, |
| { |
| "epoch": 89.10795087265676, |
| "grad_norm": 1.9530836343765259, |
| "learning_rate": 0.001, |
| "loss": 0.8206, |
| "step": 275700 |
| }, |
| { |
| "epoch": 89.14027149321267, |
| "grad_norm": 2.4263992309570312, |
| "learning_rate": 0.001, |
| "loss": 0.8153, |
| "step": 275800 |
| }, |
| { |
| "epoch": 89.17259211376859, |
| "grad_norm": 1.5851964950561523, |
| "learning_rate": 0.001, |
| "loss": 0.8218, |
| "step": 275900 |
| }, |
| { |
| "epoch": 89.2049127343245, |
| "grad_norm": 2.3962132930755615, |
| "learning_rate": 0.001, |
| "loss": 0.8249, |
| "step": 276000 |
| }, |
| { |
| "epoch": 89.23723335488042, |
| "grad_norm": 2.4741365909576416, |
| "learning_rate": 0.001, |
| "loss": 0.8316, |
| "step": 276100 |
| }, |
| { |
| "epoch": 89.26955397543632, |
| "grad_norm": 1.6986922025680542, |
| "learning_rate": 0.001, |
| "loss": 0.8327, |
| "step": 276200 |
| }, |
| { |
| "epoch": 89.30187459599225, |
| "grad_norm": 1.554884910583496, |
| "learning_rate": 0.001, |
| "loss": 0.8264, |
| "step": 276300 |
| }, |
| { |
| "epoch": 89.33419521654815, |
| "grad_norm": 7.18624210357666, |
| "learning_rate": 0.001, |
| "loss": 0.8393, |
| "step": 276400 |
| }, |
| { |
| "epoch": 89.36651583710407, |
| "grad_norm": 2.092738628387451, |
| "learning_rate": 0.001, |
| "loss": 0.8523, |
| "step": 276500 |
| }, |
| { |
| "epoch": 89.39883645765998, |
| "grad_norm": 2.584104299545288, |
| "learning_rate": 0.001, |
| "loss": 0.8507, |
| "step": 276600 |
| }, |
| { |
| "epoch": 89.4311570782159, |
| "grad_norm": 1.88053297996521, |
| "learning_rate": 0.001, |
| "loss": 0.8363, |
| "step": 276700 |
| }, |
| { |
| "epoch": 89.46347769877181, |
| "grad_norm": 1.7402923107147217, |
| "learning_rate": 0.001, |
| "loss": 0.8536, |
| "step": 276800 |
| }, |
| { |
| "epoch": 89.49579831932773, |
| "grad_norm": 1.7702052593231201, |
| "learning_rate": 0.001, |
| "loss": 0.8553, |
| "step": 276900 |
| }, |
| { |
| "epoch": 89.52811893988364, |
| "grad_norm": 1.5800539255142212, |
| "learning_rate": 0.001, |
| "loss": 0.8519, |
| "step": 277000 |
| }, |
| { |
| "epoch": 89.56043956043956, |
| "grad_norm": 1.7110040187835693, |
| "learning_rate": 0.001, |
| "loss": 0.8445, |
| "step": 277100 |
| }, |
| { |
| "epoch": 89.59276018099547, |
| "grad_norm": 1.5943825244903564, |
| "learning_rate": 0.001, |
| "loss": 0.8441, |
| "step": 277200 |
| }, |
| { |
| "epoch": 89.62508080155139, |
| "grad_norm": 1.785972237586975, |
| "learning_rate": 0.001, |
| "loss": 0.8622, |
| "step": 277300 |
| }, |
| { |
| "epoch": 89.6574014221073, |
| "grad_norm": 1.652089238166809, |
| "learning_rate": 0.001, |
| "loss": 0.8598, |
| "step": 277400 |
| }, |
| { |
| "epoch": 89.68972204266322, |
| "grad_norm": 1.664921522140503, |
| "learning_rate": 0.001, |
| "loss": 0.8516, |
| "step": 277500 |
| }, |
| { |
| "epoch": 89.72204266321913, |
| "grad_norm": 1.6153144836425781, |
| "learning_rate": 0.001, |
| "loss": 0.869, |
| "step": 277600 |
| }, |
| { |
| "epoch": 89.75436328377505, |
| "grad_norm": 2.071241617202759, |
| "learning_rate": 0.001, |
| "loss": 0.8643, |
| "step": 277700 |
| }, |
| { |
| "epoch": 89.78668390433096, |
| "grad_norm": 1.825007438659668, |
| "learning_rate": 0.001, |
| "loss": 0.8663, |
| "step": 277800 |
| }, |
| { |
| "epoch": 89.81900452488688, |
| "grad_norm": 2.0584936141967773, |
| "learning_rate": 0.001, |
| "loss": 0.8828, |
| "step": 277900 |
| }, |
| { |
| "epoch": 89.85132514544279, |
| "grad_norm": 2.5568366050720215, |
| "learning_rate": 0.001, |
| "loss": 0.8628, |
| "step": 278000 |
| }, |
| { |
| "epoch": 89.88364576599871, |
| "grad_norm": 1.7184977531433105, |
| "learning_rate": 0.001, |
| "loss": 0.8739, |
| "step": 278100 |
| }, |
| { |
| "epoch": 89.91596638655462, |
| "grad_norm": 1.8462786674499512, |
| "learning_rate": 0.001, |
| "loss": 0.8731, |
| "step": 278200 |
| }, |
| { |
| "epoch": 89.94828700711054, |
| "grad_norm": 3.5282583236694336, |
| "learning_rate": 0.001, |
| "loss": 0.8852, |
| "step": 278300 |
| }, |
| { |
| "epoch": 89.98060762766644, |
| "grad_norm": 1.5443145036697388, |
| "learning_rate": 0.001, |
| "loss": 0.8737, |
| "step": 278400 |
| }, |
| { |
| "epoch": 90.01292824822237, |
| "grad_norm": 13.76854133605957, |
| "learning_rate": 0.001, |
| "loss": 0.821, |
| "step": 278500 |
| }, |
| { |
| "epoch": 90.04524886877829, |
| "grad_norm": 1.5974069833755493, |
| "learning_rate": 0.001, |
| "loss": 0.7964, |
| "step": 278600 |
| }, |
| { |
| "epoch": 90.0775694893342, |
| "grad_norm": 1.4872169494628906, |
| "learning_rate": 0.001, |
| "loss": 0.8018, |
| "step": 278700 |
| }, |
| { |
| "epoch": 90.10989010989012, |
| "grad_norm": 2.4473085403442383, |
| "learning_rate": 0.001, |
| "loss": 0.7932, |
| "step": 278800 |
| }, |
| { |
| "epoch": 90.14221073044602, |
| "grad_norm": 1.7129226922988892, |
| "learning_rate": 0.001, |
| "loss": 0.8181, |
| "step": 278900 |
| }, |
| { |
| "epoch": 90.17453135100195, |
| "grad_norm": 1.6856908798217773, |
| "learning_rate": 0.001, |
| "loss": 0.8114, |
| "step": 279000 |
| }, |
| { |
| "epoch": 90.20685197155785, |
| "grad_norm": 1.5722812414169312, |
| "learning_rate": 0.001, |
| "loss": 0.8026, |
| "step": 279100 |
| }, |
| { |
| "epoch": 90.23917259211377, |
| "grad_norm": 1.3502541780471802, |
| "learning_rate": 0.001, |
| "loss": 0.8319, |
| "step": 279200 |
| }, |
| { |
| "epoch": 90.27149321266968, |
| "grad_norm": 1.4687424898147583, |
| "learning_rate": 0.001, |
| "loss": 0.8336, |
| "step": 279300 |
| }, |
| { |
| "epoch": 90.3038138332256, |
| "grad_norm": 1.8175430297851562, |
| "learning_rate": 0.001, |
| "loss": 0.8138, |
| "step": 279400 |
| }, |
| { |
| "epoch": 90.33613445378151, |
| "grad_norm": 1.4699106216430664, |
| "learning_rate": 0.001, |
| "loss": 0.8276, |
| "step": 279500 |
| }, |
| { |
| "epoch": 90.36845507433743, |
| "grad_norm": 2.0859620571136475, |
| "learning_rate": 0.001, |
| "loss": 0.8293, |
| "step": 279600 |
| }, |
| { |
| "epoch": 90.40077569489334, |
| "grad_norm": 1.5765058994293213, |
| "learning_rate": 0.001, |
| "loss": 0.8308, |
| "step": 279700 |
| }, |
| { |
| "epoch": 90.43309631544926, |
| "grad_norm": 1.4347844123840332, |
| "learning_rate": 0.001, |
| "loss": 0.825, |
| "step": 279800 |
| }, |
| { |
| "epoch": 90.46541693600517, |
| "grad_norm": 1.8731211423873901, |
| "learning_rate": 0.001, |
| "loss": 0.8459, |
| "step": 279900 |
| }, |
| { |
| "epoch": 90.49773755656109, |
| "grad_norm": 1.7345112562179565, |
| "learning_rate": 0.001, |
| "loss": 0.8469, |
| "step": 280000 |
| }, |
| { |
| "epoch": 90.530058177117, |
| "grad_norm": 1.9544506072998047, |
| "learning_rate": 0.001, |
| "loss": 0.8388, |
| "step": 280100 |
| }, |
| { |
| "epoch": 90.56237879767292, |
| "grad_norm": 1.8892264366149902, |
| "learning_rate": 0.001, |
| "loss": 0.8392, |
| "step": 280200 |
| }, |
| { |
| "epoch": 90.59469941822883, |
| "grad_norm": 2.320929765701294, |
| "learning_rate": 0.001, |
| "loss": 0.8473, |
| "step": 280300 |
| }, |
| { |
| "epoch": 90.62702003878475, |
| "grad_norm": 1.7178595066070557, |
| "learning_rate": 0.001, |
| "loss": 0.8469, |
| "step": 280400 |
| }, |
| { |
| "epoch": 90.65934065934066, |
| "grad_norm": 1.7863913774490356, |
| "learning_rate": 0.001, |
| "loss": 0.8319, |
| "step": 280500 |
| }, |
| { |
| "epoch": 90.69166127989658, |
| "grad_norm": 1.957006573677063, |
| "learning_rate": 0.001, |
| "loss": 0.8543, |
| "step": 280600 |
| }, |
| { |
| "epoch": 90.72398190045249, |
| "grad_norm": 1.944828748703003, |
| "learning_rate": 0.001, |
| "loss": 0.8531, |
| "step": 280700 |
| }, |
| { |
| "epoch": 90.75630252100841, |
| "grad_norm": 1.5873464345932007, |
| "learning_rate": 0.001, |
| "loss": 0.8541, |
| "step": 280800 |
| }, |
| { |
| "epoch": 90.78862314156432, |
| "grad_norm": 1.8036553859710693, |
| "learning_rate": 0.001, |
| "loss": 0.8562, |
| "step": 280900 |
| }, |
| { |
| "epoch": 90.82094376212024, |
| "grad_norm": 1.9891469478607178, |
| "learning_rate": 0.001, |
| "loss": 0.8705, |
| "step": 281000 |
| }, |
| { |
| "epoch": 90.85326438267614, |
| "grad_norm": 2.126394510269165, |
| "learning_rate": 0.001, |
| "loss": 0.8843, |
| "step": 281100 |
| }, |
| { |
| "epoch": 90.88558500323207, |
| "grad_norm": 2.3660521507263184, |
| "learning_rate": 0.001, |
| "loss": 0.883, |
| "step": 281200 |
| }, |
| { |
| "epoch": 90.91790562378797, |
| "grad_norm": 1.8034424781799316, |
| "learning_rate": 0.001, |
| "loss": 0.8748, |
| "step": 281300 |
| }, |
| { |
| "epoch": 90.9502262443439, |
| "grad_norm": 2.120250701904297, |
| "learning_rate": 0.001, |
| "loss": 0.8603, |
| "step": 281400 |
| }, |
| { |
| "epoch": 90.9825468648998, |
| "grad_norm": 1.402356743812561, |
| "learning_rate": 0.001, |
| "loss": 0.8768, |
| "step": 281500 |
| }, |
| { |
| "epoch": 91.01486748545572, |
| "grad_norm": 1.658185362815857, |
| "learning_rate": 0.001, |
| "loss": 0.8144, |
| "step": 281600 |
| }, |
| { |
| "epoch": 91.04718810601163, |
| "grad_norm": 1.5982550382614136, |
| "learning_rate": 0.001, |
| "loss": 0.7909, |
| "step": 281700 |
| }, |
| { |
| "epoch": 91.07950872656755, |
| "grad_norm": 1.4638272523880005, |
| "learning_rate": 0.001, |
| "loss": 0.7907, |
| "step": 281800 |
| }, |
| { |
| "epoch": 91.11182934712346, |
| "grad_norm": 1.9809621572494507, |
| "learning_rate": 0.001, |
| "loss": 0.8007, |
| "step": 281900 |
| }, |
| { |
| "epoch": 91.14414996767938, |
| "grad_norm": 1.5442944765090942, |
| "learning_rate": 0.001, |
| "loss": 0.8162, |
| "step": 282000 |
| }, |
| { |
| "epoch": 91.17647058823529, |
| "grad_norm": 1.343239665031433, |
| "learning_rate": 0.001, |
| "loss": 0.8132, |
| "step": 282100 |
| }, |
| { |
| "epoch": 91.20879120879121, |
| "grad_norm": 1.709086537361145, |
| "learning_rate": 0.001, |
| "loss": 0.8112, |
| "step": 282200 |
| }, |
| { |
| "epoch": 91.24111182934712, |
| "grad_norm": 1.8876216411590576, |
| "learning_rate": 0.001, |
| "loss": 0.8102, |
| "step": 282300 |
| }, |
| { |
| "epoch": 91.27343244990304, |
| "grad_norm": 1.4226528406143188, |
| "learning_rate": 0.001, |
| "loss": 0.8129, |
| "step": 282400 |
| }, |
| { |
| "epoch": 91.30575307045895, |
| "grad_norm": 1.2886971235275269, |
| "learning_rate": 0.001, |
| "loss": 0.8256, |
| "step": 282500 |
| }, |
| { |
| "epoch": 91.33807369101487, |
| "grad_norm": 1.4405816793441772, |
| "learning_rate": 0.001, |
| "loss": 0.8168, |
| "step": 282600 |
| }, |
| { |
| "epoch": 91.37039431157078, |
| "grad_norm": 1.7710850238800049, |
| "learning_rate": 0.001, |
| "loss": 0.8317, |
| "step": 282700 |
| }, |
| { |
| "epoch": 91.4027149321267, |
| "grad_norm": 1.5474108457565308, |
| "learning_rate": 0.001, |
| "loss": 0.8283, |
| "step": 282800 |
| }, |
| { |
| "epoch": 91.4350355526826, |
| "grad_norm": 1.4025057554244995, |
| "learning_rate": 0.001, |
| "loss": 0.8198, |
| "step": 282900 |
| }, |
| { |
| "epoch": 91.46735617323853, |
| "grad_norm": 1.5733214616775513, |
| "learning_rate": 0.001, |
| "loss": 0.8275, |
| "step": 283000 |
| }, |
| { |
| "epoch": 91.49967679379444, |
| "grad_norm": 1.640556812286377, |
| "learning_rate": 0.001, |
| "loss": 0.8366, |
| "step": 283100 |
| }, |
| { |
| "epoch": 91.53199741435036, |
| "grad_norm": 1.873395562171936, |
| "learning_rate": 0.001, |
| "loss": 0.8338, |
| "step": 283200 |
| }, |
| { |
| "epoch": 91.56431803490626, |
| "grad_norm": 1.2778507471084595, |
| "learning_rate": 0.001, |
| "loss": 0.8279, |
| "step": 283300 |
| }, |
| { |
| "epoch": 91.59663865546219, |
| "grad_norm": 1.236539363861084, |
| "learning_rate": 0.001, |
| "loss": 0.8453, |
| "step": 283400 |
| }, |
| { |
| "epoch": 91.6289592760181, |
| "grad_norm": 1.8194011449813843, |
| "learning_rate": 0.001, |
| "loss": 0.8474, |
| "step": 283500 |
| }, |
| { |
| "epoch": 91.66127989657402, |
| "grad_norm": 1.5453321933746338, |
| "learning_rate": 0.001, |
| "loss": 0.8523, |
| "step": 283600 |
| }, |
| { |
| "epoch": 91.69360051712992, |
| "grad_norm": 2.883462429046631, |
| "learning_rate": 0.001, |
| "loss": 0.8488, |
| "step": 283700 |
| }, |
| { |
| "epoch": 91.72592113768584, |
| "grad_norm": 1.4357653856277466, |
| "learning_rate": 0.001, |
| "loss": 0.8514, |
| "step": 283800 |
| }, |
| { |
| "epoch": 91.75824175824175, |
| "grad_norm": 1.421116590499878, |
| "learning_rate": 0.001, |
| "loss": 0.8597, |
| "step": 283900 |
| }, |
| { |
| "epoch": 91.79056237879767, |
| "grad_norm": 1.4790443181991577, |
| "learning_rate": 0.001, |
| "loss": 0.8558, |
| "step": 284000 |
| }, |
| { |
| "epoch": 91.82288299935358, |
| "grad_norm": 1.6023317575454712, |
| "learning_rate": 0.001, |
| "loss": 0.8622, |
| "step": 284100 |
| }, |
| { |
| "epoch": 91.8552036199095, |
| "grad_norm": 1.4575282335281372, |
| "learning_rate": 0.001, |
| "loss": 0.8699, |
| "step": 284200 |
| }, |
| { |
| "epoch": 91.88752424046541, |
| "grad_norm": 1.5747337341308594, |
| "learning_rate": 0.001, |
| "loss": 0.8589, |
| "step": 284300 |
| }, |
| { |
| "epoch": 91.91984486102133, |
| "grad_norm": 1.5780296325683594, |
| "learning_rate": 0.001, |
| "loss": 0.8587, |
| "step": 284400 |
| }, |
| { |
| "epoch": 91.95216548157724, |
| "grad_norm": 1.3909651041030884, |
| "learning_rate": 0.001, |
| "loss": 0.8712, |
| "step": 284500 |
| }, |
| { |
| "epoch": 91.98448610213316, |
| "grad_norm": 1.6297881603240967, |
| "learning_rate": 0.001, |
| "loss": 0.8557, |
| "step": 284600 |
| }, |
| { |
| "epoch": 92.01680672268908, |
| "grad_norm": 9.393735885620117, |
| "learning_rate": 0.001, |
| "loss": 0.7966, |
| "step": 284700 |
| }, |
| { |
| "epoch": 92.04912734324499, |
| "grad_norm": 2.9230968952178955, |
| "learning_rate": 0.001, |
| "loss": 0.7938, |
| "step": 284800 |
| }, |
| { |
| "epoch": 92.08144796380091, |
| "grad_norm": 3.2949323654174805, |
| "learning_rate": 0.001, |
| "loss": 0.7879, |
| "step": 284900 |
| }, |
| { |
| "epoch": 92.11376858435682, |
| "grad_norm": 2.239393949508667, |
| "learning_rate": 0.001, |
| "loss": 0.7979, |
| "step": 285000 |
| }, |
| { |
| "epoch": 92.14608920491274, |
| "grad_norm": 2.370790958404541, |
| "learning_rate": 0.001, |
| "loss": 0.8035, |
| "step": 285100 |
| }, |
| { |
| "epoch": 92.17840982546865, |
| "grad_norm": 2.7069642543792725, |
| "learning_rate": 0.001, |
| "loss": 0.8146, |
| "step": 285200 |
| }, |
| { |
| "epoch": 92.21073044602457, |
| "grad_norm": 2.8528716564178467, |
| "learning_rate": 0.001, |
| "loss": 0.8003, |
| "step": 285300 |
| }, |
| { |
| "epoch": 92.24305106658048, |
| "grad_norm": 2.916715383529663, |
| "learning_rate": 0.001, |
| "loss": 0.8079, |
| "step": 285400 |
| }, |
| { |
| "epoch": 92.2753716871364, |
| "grad_norm": 2.335475444793701, |
| "learning_rate": 0.001, |
| "loss": 0.8268, |
| "step": 285500 |
| }, |
| { |
| "epoch": 92.3076923076923, |
| "grad_norm": 2.3702704906463623, |
| "learning_rate": 0.001, |
| "loss": 0.8217, |
| "step": 285600 |
| }, |
| { |
| "epoch": 92.34001292824823, |
| "grad_norm": 2.541818380355835, |
| "learning_rate": 0.001, |
| "loss": 0.8306, |
| "step": 285700 |
| }, |
| { |
| "epoch": 92.37233354880414, |
| "grad_norm": 2.096249580383301, |
| "learning_rate": 0.001, |
| "loss": 0.814, |
| "step": 285800 |
| }, |
| { |
| "epoch": 92.40465416936006, |
| "grad_norm": 1.8846521377563477, |
| "learning_rate": 0.001, |
| "loss": 0.8156, |
| "step": 285900 |
| }, |
| { |
| "epoch": 92.43697478991596, |
| "grad_norm": 2.9999451637268066, |
| "learning_rate": 0.001, |
| "loss": 0.825, |
| "step": 286000 |
| }, |
| { |
| "epoch": 92.46929541047189, |
| "grad_norm": 1.9357293844223022, |
| "learning_rate": 0.001, |
| "loss": 0.8355, |
| "step": 286100 |
| }, |
| { |
| "epoch": 92.5016160310278, |
| "grad_norm": 2.528646469116211, |
| "learning_rate": 0.001, |
| "loss": 0.8257, |
| "step": 286200 |
| }, |
| { |
| "epoch": 92.53393665158372, |
| "grad_norm": 2.2945749759674072, |
| "learning_rate": 0.001, |
| "loss": 0.8417, |
| "step": 286300 |
| }, |
| { |
| "epoch": 92.56625727213962, |
| "grad_norm": 4.917908668518066, |
| "learning_rate": 0.001, |
| "loss": 0.8357, |
| "step": 286400 |
| }, |
| { |
| "epoch": 92.59857789269554, |
| "grad_norm": 1.9125192165374756, |
| "learning_rate": 0.001, |
| "loss": 0.8315, |
| "step": 286500 |
| }, |
| { |
| "epoch": 92.63089851325145, |
| "grad_norm": 2.123912811279297, |
| "learning_rate": 0.001, |
| "loss": 0.8342, |
| "step": 286600 |
| }, |
| { |
| "epoch": 92.66321913380737, |
| "grad_norm": 1.902299404144287, |
| "learning_rate": 0.001, |
| "loss": 0.8473, |
| "step": 286700 |
| }, |
| { |
| "epoch": 92.69553975436328, |
| "grad_norm": 2.312319278717041, |
| "learning_rate": 0.001, |
| "loss": 0.8454, |
| "step": 286800 |
| }, |
| { |
| "epoch": 92.7278603749192, |
| "grad_norm": 2.930506944656372, |
| "learning_rate": 0.001, |
| "loss": 0.8352, |
| "step": 286900 |
| }, |
| { |
| "epoch": 92.76018099547511, |
| "grad_norm": 2.298229932785034, |
| "learning_rate": 0.001, |
| "loss": 0.8647, |
| "step": 287000 |
| }, |
| { |
| "epoch": 92.79250161603103, |
| "grad_norm": 2.502454996109009, |
| "learning_rate": 0.001, |
| "loss": 0.8454, |
| "step": 287100 |
| }, |
| { |
| "epoch": 92.82482223658694, |
| "grad_norm": 2.3575501441955566, |
| "learning_rate": 0.001, |
| "loss": 0.8612, |
| "step": 287200 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 2.6692607402801514, |
| "learning_rate": 0.001, |
| "loss": 0.8579, |
| "step": 287300 |
| }, |
| { |
| "epoch": 92.88946347769877, |
| "grad_norm": 2.066166400909424, |
| "learning_rate": 0.001, |
| "loss": 0.8441, |
| "step": 287400 |
| }, |
| { |
| "epoch": 92.92178409825469, |
| "grad_norm": 2.153433084487915, |
| "learning_rate": 0.001, |
| "loss": 0.8543, |
| "step": 287500 |
| }, |
| { |
| "epoch": 92.9541047188106, |
| "grad_norm": 2.5967466831207275, |
| "learning_rate": 0.001, |
| "loss": 0.8561, |
| "step": 287600 |
| }, |
| { |
| "epoch": 92.98642533936652, |
| "grad_norm": 2.4448750019073486, |
| "learning_rate": 0.001, |
| "loss": 0.863, |
| "step": 287700 |
| }, |
| { |
| "epoch": 93.01874595992243, |
| "grad_norm": 2.5534839630126953, |
| "learning_rate": 0.001, |
| "loss": 0.8194, |
| "step": 287800 |
| }, |
| { |
| "epoch": 93.05106658047835, |
| "grad_norm": 2.0508859157562256, |
| "learning_rate": 0.001, |
| "loss": 0.7717, |
| "step": 287900 |
| }, |
| { |
| "epoch": 93.08338720103426, |
| "grad_norm": 1.9136475324630737, |
| "learning_rate": 0.001, |
| "loss": 0.7865, |
| "step": 288000 |
| }, |
| { |
| "epoch": 93.11570782159018, |
| "grad_norm": 1.851262092590332, |
| "learning_rate": 0.001, |
| "loss": 0.7909, |
| "step": 288100 |
| }, |
| { |
| "epoch": 93.14802844214609, |
| "grad_norm": 1.6321786642074585, |
| "learning_rate": 0.001, |
| "loss": 0.7939, |
| "step": 288200 |
| }, |
| { |
| "epoch": 93.180349062702, |
| "grad_norm": 2.0846023559570312, |
| "learning_rate": 0.001, |
| "loss": 0.7887, |
| "step": 288300 |
| }, |
| { |
| "epoch": 93.21266968325791, |
| "grad_norm": 1.5281572341918945, |
| "learning_rate": 0.001, |
| "loss": 0.794, |
| "step": 288400 |
| }, |
| { |
| "epoch": 93.24499030381384, |
| "grad_norm": 1.4659923315048218, |
| "learning_rate": 0.001, |
| "loss": 0.7852, |
| "step": 288500 |
| }, |
| { |
| "epoch": 93.27731092436974, |
| "grad_norm": 1.5710750818252563, |
| "learning_rate": 0.001, |
| "loss": 0.7999, |
| "step": 288600 |
| }, |
| { |
| "epoch": 93.30963154492567, |
| "grad_norm": 1.6601274013519287, |
| "learning_rate": 0.001, |
| "loss": 0.8061, |
| "step": 288700 |
| }, |
| { |
| "epoch": 93.34195216548157, |
| "grad_norm": 2.21152663230896, |
| "learning_rate": 0.001, |
| "loss": 0.8111, |
| "step": 288800 |
| }, |
| { |
| "epoch": 93.3742727860375, |
| "grad_norm": 1.6584738492965698, |
| "learning_rate": 0.001, |
| "loss": 0.8111, |
| "step": 288900 |
| }, |
| { |
| "epoch": 93.4065934065934, |
| "grad_norm": 1.7776179313659668, |
| "learning_rate": 0.001, |
| "loss": 0.8083, |
| "step": 289000 |
| }, |
| { |
| "epoch": 93.43891402714932, |
| "grad_norm": 1.865175485610962, |
| "learning_rate": 0.001, |
| "loss": 0.8207, |
| "step": 289100 |
| }, |
| { |
| "epoch": 93.47123464770523, |
| "grad_norm": 2.4652998447418213, |
| "learning_rate": 0.001, |
| "loss": 0.8225, |
| "step": 289200 |
| }, |
| { |
| "epoch": 93.50355526826115, |
| "grad_norm": 1.636830449104309, |
| "learning_rate": 0.001, |
| "loss": 0.8235, |
| "step": 289300 |
| }, |
| { |
| "epoch": 93.53587588881706, |
| "grad_norm": 1.8473851680755615, |
| "learning_rate": 0.001, |
| "loss": 0.8259, |
| "step": 289400 |
| }, |
| { |
| "epoch": 93.56819650937298, |
| "grad_norm": 1.7582794427871704, |
| "learning_rate": 0.001, |
| "loss": 0.8427, |
| "step": 289500 |
| }, |
| { |
| "epoch": 93.60051712992889, |
| "grad_norm": 1.745152473449707, |
| "learning_rate": 0.001, |
| "loss": 0.8249, |
| "step": 289600 |
| }, |
| { |
| "epoch": 93.63283775048481, |
| "grad_norm": 1.648447036743164, |
| "learning_rate": 0.001, |
| "loss": 0.8273, |
| "step": 289700 |
| }, |
| { |
| "epoch": 93.66515837104072, |
| "grad_norm": 1.4837515354156494, |
| "learning_rate": 0.001, |
| "loss": 0.8356, |
| "step": 289800 |
| }, |
| { |
| "epoch": 93.69747899159664, |
| "grad_norm": 1.7128311395645142, |
| "learning_rate": 0.001, |
| "loss": 0.8498, |
| "step": 289900 |
| }, |
| { |
| "epoch": 93.72979961215255, |
| "grad_norm": 2.1904046535491943, |
| "learning_rate": 0.001, |
| "loss": 0.842, |
| "step": 290000 |
| }, |
| { |
| "epoch": 93.76212023270847, |
| "grad_norm": 1.8092793226242065, |
| "learning_rate": 0.001, |
| "loss": 0.8392, |
| "step": 290100 |
| }, |
| { |
| "epoch": 93.79444085326438, |
| "grad_norm": 1.474500060081482, |
| "learning_rate": 0.001, |
| "loss": 0.8514, |
| "step": 290200 |
| }, |
| { |
| "epoch": 93.8267614738203, |
| "grad_norm": 1.9503815174102783, |
| "learning_rate": 0.001, |
| "loss": 0.8679, |
| "step": 290300 |
| }, |
| { |
| "epoch": 93.8590820943762, |
| "grad_norm": 1.7040289640426636, |
| "learning_rate": 0.001, |
| "loss": 0.8419, |
| "step": 290400 |
| }, |
| { |
| "epoch": 93.89140271493213, |
| "grad_norm": 1.7533595561981201, |
| "learning_rate": 0.001, |
| "loss": 0.8657, |
| "step": 290500 |
| }, |
| { |
| "epoch": 93.92372333548803, |
| "grad_norm": 24.08082389831543, |
| "learning_rate": 0.001, |
| "loss": 0.8406, |
| "step": 290600 |
| }, |
| { |
| "epoch": 93.95604395604396, |
| "grad_norm": 1.9626394510269165, |
| "learning_rate": 0.001, |
| "loss": 0.8745, |
| "step": 290700 |
| }, |
| { |
| "epoch": 93.98836457659988, |
| "grad_norm": 2.037595272064209, |
| "learning_rate": 0.001, |
| "loss": 0.8533, |
| "step": 290800 |
| }, |
| { |
| "epoch": 94.02068519715579, |
| "grad_norm": 1.519704818725586, |
| "learning_rate": 0.001, |
| "loss": 0.8253, |
| "step": 290900 |
| }, |
| { |
| "epoch": 94.0530058177117, |
| "grad_norm": 1.9078086614608765, |
| "learning_rate": 0.001, |
| "loss": 0.7688, |
| "step": 291000 |
| }, |
| { |
| "epoch": 94.08532643826761, |
| "grad_norm": 1.8446297645568848, |
| "learning_rate": 0.001, |
| "loss": 0.785, |
| "step": 291100 |
| }, |
| { |
| "epoch": 94.11764705882354, |
| "grad_norm": 2.2081801891326904, |
| "learning_rate": 0.001, |
| "loss": 0.7876, |
| "step": 291200 |
| }, |
| { |
| "epoch": 94.14996767937944, |
| "grad_norm": 1.5179626941680908, |
| "learning_rate": 0.001, |
| "loss": 0.7979, |
| "step": 291300 |
| }, |
| { |
| "epoch": 94.18228829993537, |
| "grad_norm": 1.627453088760376, |
| "learning_rate": 0.001, |
| "loss": 0.7764, |
| "step": 291400 |
| }, |
| { |
| "epoch": 94.21460892049127, |
| "grad_norm": 1.617651343345642, |
| "learning_rate": 0.001, |
| "loss": 0.7927, |
| "step": 291500 |
| }, |
| { |
| "epoch": 94.2469295410472, |
| "grad_norm": 2.2514536380767822, |
| "learning_rate": 0.001, |
| "loss": 0.7932, |
| "step": 291600 |
| }, |
| { |
| "epoch": 94.2792501616031, |
| "grad_norm": 5.1066460609436035, |
| "learning_rate": 0.001, |
| "loss": 0.8039, |
| "step": 291700 |
| }, |
| { |
| "epoch": 94.31157078215902, |
| "grad_norm": 1.6531953811645508, |
| "learning_rate": 0.001, |
| "loss": 0.8012, |
| "step": 291800 |
| }, |
| { |
| "epoch": 94.34389140271493, |
| "grad_norm": 1.9666928052902222, |
| "learning_rate": 0.001, |
| "loss": 0.8011, |
| "step": 291900 |
| }, |
| { |
| "epoch": 94.37621202327085, |
| "grad_norm": 1.7861919403076172, |
| "learning_rate": 0.001, |
| "loss": 0.8004, |
| "step": 292000 |
| }, |
| { |
| "epoch": 94.40853264382676, |
| "grad_norm": 1.7683416604995728, |
| "learning_rate": 0.001, |
| "loss": 0.8082, |
| "step": 292100 |
| }, |
| { |
| "epoch": 94.44085326438268, |
| "grad_norm": 2.031372308731079, |
| "learning_rate": 0.001, |
| "loss": 0.8202, |
| "step": 292200 |
| }, |
| { |
| "epoch": 94.47317388493859, |
| "grad_norm": 1.7173404693603516, |
| "learning_rate": 0.001, |
| "loss": 0.8169, |
| "step": 292300 |
| }, |
| { |
| "epoch": 94.50549450549451, |
| "grad_norm": 1.615218162536621, |
| "learning_rate": 0.001, |
| "loss": 0.8257, |
| "step": 292400 |
| }, |
| { |
| "epoch": 94.53781512605042, |
| "grad_norm": 1.601548671722412, |
| "learning_rate": 0.001, |
| "loss": 0.8292, |
| "step": 292500 |
| }, |
| { |
| "epoch": 94.57013574660634, |
| "grad_norm": 5.030603408813477, |
| "learning_rate": 0.001, |
| "loss": 0.8219, |
| "step": 292600 |
| }, |
| { |
| "epoch": 94.60245636716225, |
| "grad_norm": 1.6010547876358032, |
| "learning_rate": 0.001, |
| "loss": 0.8289, |
| "step": 292700 |
| }, |
| { |
| "epoch": 94.63477698771817, |
| "grad_norm": 1.8350695371627808, |
| "learning_rate": 0.001, |
| "loss": 0.8296, |
| "step": 292800 |
| }, |
| { |
| "epoch": 94.66709760827408, |
| "grad_norm": 2.2363829612731934, |
| "learning_rate": 0.001, |
| "loss": 0.8278, |
| "step": 292900 |
| }, |
| { |
| "epoch": 94.69941822883, |
| "grad_norm": 1.8991843461990356, |
| "learning_rate": 0.001, |
| "loss": 0.8389, |
| "step": 293000 |
| }, |
| { |
| "epoch": 94.7317388493859, |
| "grad_norm": 3.0361104011535645, |
| "learning_rate": 0.001, |
| "loss": 0.8456, |
| "step": 293100 |
| }, |
| { |
| "epoch": 94.76405946994183, |
| "grad_norm": 1.8470691442489624, |
| "learning_rate": 0.001, |
| "loss": 0.8518, |
| "step": 293200 |
| }, |
| { |
| "epoch": 94.79638009049773, |
| "grad_norm": 1.6898738145828247, |
| "learning_rate": 0.001, |
| "loss": 0.8523, |
| "step": 293300 |
| }, |
| { |
| "epoch": 94.82870071105366, |
| "grad_norm": 1.8268592357635498, |
| "learning_rate": 0.001, |
| "loss": 0.8349, |
| "step": 293400 |
| }, |
| { |
| "epoch": 94.86102133160956, |
| "grad_norm": 1.5458370447158813, |
| "learning_rate": 0.001, |
| "loss": 0.8191, |
| "step": 293500 |
| }, |
| { |
| "epoch": 94.89334195216549, |
| "grad_norm": 1.5829081535339355, |
| "learning_rate": 0.001, |
| "loss": 0.8532, |
| "step": 293600 |
| }, |
| { |
| "epoch": 94.9256625727214, |
| "grad_norm": 2.295776844024658, |
| "learning_rate": 0.001, |
| "loss": 0.8528, |
| "step": 293700 |
| }, |
| { |
| "epoch": 94.95798319327731, |
| "grad_norm": 3.22947359085083, |
| "learning_rate": 0.001, |
| "loss": 0.8541, |
| "step": 293800 |
| }, |
| { |
| "epoch": 94.99030381383322, |
| "grad_norm": 2.0059330463409424, |
| "learning_rate": 0.001, |
| "loss": 0.858, |
| "step": 293900 |
| }, |
| { |
| "epoch": 95.02262443438914, |
| "grad_norm": 1.7929795980453491, |
| "learning_rate": 0.001, |
| "loss": 0.8092, |
| "step": 294000 |
| }, |
| { |
| "epoch": 95.05494505494505, |
| "grad_norm": 1.520918369293213, |
| "learning_rate": 0.001, |
| "loss": 0.7758, |
| "step": 294100 |
| }, |
| { |
| "epoch": 95.08726567550097, |
| "grad_norm": 1.6936134099960327, |
| "learning_rate": 0.001, |
| "loss": 0.7783, |
| "step": 294200 |
| }, |
| { |
| "epoch": 95.11958629605688, |
| "grad_norm": 1.9839485883712769, |
| "learning_rate": 0.001, |
| "loss": 0.7663, |
| "step": 294300 |
| }, |
| { |
| "epoch": 95.1519069166128, |
| "grad_norm": 1.593344807624817, |
| "learning_rate": 0.001, |
| "loss": 0.7913, |
| "step": 294400 |
| }, |
| { |
| "epoch": 95.18422753716871, |
| "grad_norm": 2.5860588550567627, |
| "learning_rate": 0.001, |
| "loss": 0.7833, |
| "step": 294500 |
| }, |
| { |
| "epoch": 95.21654815772463, |
| "grad_norm": 2.560615301132202, |
| "learning_rate": 0.001, |
| "loss": 0.7896, |
| "step": 294600 |
| }, |
| { |
| "epoch": 95.24886877828054, |
| "grad_norm": 2.0831151008605957, |
| "learning_rate": 0.001, |
| "loss": 0.7706, |
| "step": 294700 |
| }, |
| { |
| "epoch": 95.28118939883646, |
| "grad_norm": 1.564112663269043, |
| "learning_rate": 0.001, |
| "loss": 0.7903, |
| "step": 294800 |
| }, |
| { |
| "epoch": 95.31351001939237, |
| "grad_norm": 1.4641203880310059, |
| "learning_rate": 0.001, |
| "loss": 0.8028, |
| "step": 294900 |
| }, |
| { |
| "epoch": 95.34583063994829, |
| "grad_norm": 1.5678726434707642, |
| "learning_rate": 0.001, |
| "loss": 0.7956, |
| "step": 295000 |
| }, |
| { |
| "epoch": 95.3781512605042, |
| "grad_norm": 2.574636220932007, |
| "learning_rate": 0.001, |
| "loss": 0.8273, |
| "step": 295100 |
| }, |
| { |
| "epoch": 95.41047188106012, |
| "grad_norm": 2.0223448276519775, |
| "learning_rate": 0.001, |
| "loss": 0.7968, |
| "step": 295200 |
| }, |
| { |
| "epoch": 95.44279250161603, |
| "grad_norm": 1.6964439153671265, |
| "learning_rate": 0.001, |
| "loss": 0.8138, |
| "step": 295300 |
| }, |
| { |
| "epoch": 95.47511312217195, |
| "grad_norm": 1.6102707386016846, |
| "learning_rate": 0.001, |
| "loss": 0.8149, |
| "step": 295400 |
| }, |
| { |
| "epoch": 95.50743374272786, |
| "grad_norm": 1.607711672782898, |
| "learning_rate": 0.001, |
| "loss": 0.8176, |
| "step": 295500 |
| }, |
| { |
| "epoch": 95.53975436328378, |
| "grad_norm": 1.8629106283187866, |
| "learning_rate": 0.001, |
| "loss": 0.8331, |
| "step": 295600 |
| }, |
| { |
| "epoch": 95.57207498383968, |
| "grad_norm": 1.388291358947754, |
| "learning_rate": 0.001, |
| "loss": 0.8185, |
| "step": 295700 |
| }, |
| { |
| "epoch": 95.6043956043956, |
| "grad_norm": 2.2941222190856934, |
| "learning_rate": 0.001, |
| "loss": 0.8138, |
| "step": 295800 |
| }, |
| { |
| "epoch": 95.63671622495151, |
| "grad_norm": 2.3601090908050537, |
| "learning_rate": 0.001, |
| "loss": 0.8259, |
| "step": 295900 |
| }, |
| { |
| "epoch": 95.66903684550743, |
| "grad_norm": 1.7740150690078735, |
| "learning_rate": 0.001, |
| "loss": 0.812, |
| "step": 296000 |
| }, |
| { |
| "epoch": 95.70135746606334, |
| "grad_norm": 1.7329515218734741, |
| "learning_rate": 0.001, |
| "loss": 0.8219, |
| "step": 296100 |
| }, |
| { |
| "epoch": 95.73367808661926, |
| "grad_norm": 1.57270348072052, |
| "learning_rate": 0.001, |
| "loss": 0.8476, |
| "step": 296200 |
| }, |
| { |
| "epoch": 95.76599870717517, |
| "grad_norm": 1.5133745670318604, |
| "learning_rate": 0.001, |
| "loss": 0.8147, |
| "step": 296300 |
| }, |
| { |
| "epoch": 95.7983193277311, |
| "grad_norm": 1.8256583213806152, |
| "learning_rate": 0.001, |
| "loss": 0.8433, |
| "step": 296400 |
| }, |
| { |
| "epoch": 95.830639948287, |
| "grad_norm": 1.820281744003296, |
| "learning_rate": 0.001, |
| "loss": 0.8387, |
| "step": 296500 |
| }, |
| { |
| "epoch": 95.86296056884292, |
| "grad_norm": 1.661638617515564, |
| "learning_rate": 0.001, |
| "loss": 0.8386, |
| "step": 296600 |
| }, |
| { |
| "epoch": 95.89528118939883, |
| "grad_norm": 1.9161064624786377, |
| "learning_rate": 0.001, |
| "loss": 0.8376, |
| "step": 296700 |
| }, |
| { |
| "epoch": 95.92760180995475, |
| "grad_norm": 155.10792541503906, |
| "learning_rate": 0.001, |
| "loss": 0.8513, |
| "step": 296800 |
| }, |
| { |
| "epoch": 95.95992243051066, |
| "grad_norm": 1.6141908168792725, |
| "learning_rate": 0.001, |
| "loss": 0.8456, |
| "step": 296900 |
| }, |
| { |
| "epoch": 95.99224305106658, |
| "grad_norm": 1.7733116149902344, |
| "learning_rate": 0.001, |
| "loss": 0.8441, |
| "step": 297000 |
| }, |
| { |
| "epoch": 96.0245636716225, |
| "grad_norm": 1.8193919658660889, |
| "learning_rate": 0.001, |
| "loss": 0.8051, |
| "step": 297100 |
| }, |
| { |
| "epoch": 96.05688429217841, |
| "grad_norm": 1.5573382377624512, |
| "learning_rate": 0.001, |
| "loss": 0.7617, |
| "step": 297200 |
| }, |
| { |
| "epoch": 96.08920491273433, |
| "grad_norm": 1.5014259815216064, |
| "learning_rate": 0.001, |
| "loss": 0.7728, |
| "step": 297300 |
| }, |
| { |
| "epoch": 96.12152553329024, |
| "grad_norm": 1.7022926807403564, |
| "learning_rate": 0.001, |
| "loss": 0.7714, |
| "step": 297400 |
| }, |
| { |
| "epoch": 96.15384615384616, |
| "grad_norm": 1.7341800928115845, |
| "learning_rate": 0.001, |
| "loss": 0.7813, |
| "step": 297500 |
| }, |
| { |
| "epoch": 96.18616677440207, |
| "grad_norm": 1.7341227531433105, |
| "learning_rate": 0.001, |
| "loss": 0.7863, |
| "step": 297600 |
| }, |
| { |
| "epoch": 96.21848739495799, |
| "grad_norm": 1.5951087474822998, |
| "learning_rate": 0.001, |
| "loss": 0.7917, |
| "step": 297700 |
| }, |
| { |
| "epoch": 96.2508080155139, |
| "grad_norm": 1.7377108335494995, |
| "learning_rate": 0.001, |
| "loss": 0.787, |
| "step": 297800 |
| }, |
| { |
| "epoch": 96.28312863606982, |
| "grad_norm": 1.6977343559265137, |
| "learning_rate": 0.001, |
| "loss": 0.7687, |
| "step": 297900 |
| }, |
| { |
| "epoch": 96.31544925662573, |
| "grad_norm": 1.503747820854187, |
| "learning_rate": 0.001, |
| "loss": 0.7973, |
| "step": 298000 |
| }, |
| { |
| "epoch": 96.34776987718165, |
| "grad_norm": 1.4911261796951294, |
| "learning_rate": 0.001, |
| "loss": 0.8041, |
| "step": 298100 |
| }, |
| { |
| "epoch": 96.38009049773756, |
| "grad_norm": 1.8095695972442627, |
| "learning_rate": 0.001, |
| "loss": 0.8138, |
| "step": 298200 |
| }, |
| { |
| "epoch": 96.41241111829348, |
| "grad_norm": 1.5566781759262085, |
| "learning_rate": 0.001, |
| "loss": 0.7972, |
| "step": 298300 |
| }, |
| { |
| "epoch": 96.44473173884938, |
| "grad_norm": 1.5418801307678223, |
| "learning_rate": 0.001, |
| "loss": 0.8008, |
| "step": 298400 |
| }, |
| { |
| "epoch": 96.4770523594053, |
| "grad_norm": 1.9948352575302124, |
| "learning_rate": 0.001, |
| "loss": 0.8103, |
| "step": 298500 |
| }, |
| { |
| "epoch": 96.50937297996121, |
| "grad_norm": 2.468416213989258, |
| "learning_rate": 0.001, |
| "loss": 0.8254, |
| "step": 298600 |
| }, |
| { |
| "epoch": 96.54169360051714, |
| "grad_norm": 1.474953293800354, |
| "learning_rate": 0.001, |
| "loss": 0.8181, |
| "step": 298700 |
| }, |
| { |
| "epoch": 96.57401422107304, |
| "grad_norm": 1.533799409866333, |
| "learning_rate": 0.001, |
| "loss": 0.8145, |
| "step": 298800 |
| }, |
| { |
| "epoch": 96.60633484162896, |
| "grad_norm": 1.4074748754501343, |
| "learning_rate": 0.001, |
| "loss": 0.8215, |
| "step": 298900 |
| }, |
| { |
| "epoch": 96.63865546218487, |
| "grad_norm": 1.8802385330200195, |
| "learning_rate": 0.001, |
| "loss": 0.803, |
| "step": 299000 |
| }, |
| { |
| "epoch": 96.6709760827408, |
| "grad_norm": 1.875154972076416, |
| "learning_rate": 0.001, |
| "loss": 0.8164, |
| "step": 299100 |
| }, |
| { |
| "epoch": 96.7032967032967, |
| "grad_norm": 1.6148853302001953, |
| "learning_rate": 0.001, |
| "loss": 0.8259, |
| "step": 299200 |
| }, |
| { |
| "epoch": 96.73561732385262, |
| "grad_norm": 1.7623929977416992, |
| "learning_rate": 0.001, |
| "loss": 0.825, |
| "step": 299300 |
| }, |
| { |
| "epoch": 96.76793794440853, |
| "grad_norm": 1.9254313707351685, |
| "learning_rate": 0.001, |
| "loss": 0.8376, |
| "step": 299400 |
| }, |
| { |
| "epoch": 96.80025856496445, |
| "grad_norm": 1.6274827718734741, |
| "learning_rate": 0.001, |
| "loss": 0.8356, |
| "step": 299500 |
| }, |
| { |
| "epoch": 96.83257918552036, |
| "grad_norm": 1.6711455583572388, |
| "learning_rate": 0.001, |
| "loss": 0.8385, |
| "step": 299600 |
| }, |
| { |
| "epoch": 96.86489980607628, |
| "grad_norm": 2.029143810272217, |
| "learning_rate": 0.001, |
| "loss": 0.8333, |
| "step": 299700 |
| }, |
| { |
| "epoch": 96.89722042663219, |
| "grad_norm": 1.801000714302063, |
| "learning_rate": 0.001, |
| "loss": 0.8393, |
| "step": 299800 |
| }, |
| { |
| "epoch": 96.92954104718811, |
| "grad_norm": 1.6309932470321655, |
| "learning_rate": 0.001, |
| "loss": 0.8271, |
| "step": 299900 |
| }, |
| { |
| "epoch": 96.96186166774402, |
| "grad_norm": 1.8438305854797363, |
| "learning_rate": 0.001, |
| "loss": 0.8348, |
| "step": 300000 |
| }, |
| { |
| "epoch": 96.99418228829994, |
| "grad_norm": 1.7183477878570557, |
| "learning_rate": 0.001, |
| "loss": 0.8257, |
| "step": 300100 |
| }, |
| { |
| "epoch": 97.02650290885585, |
| "grad_norm": 1.5698041915893555, |
| "learning_rate": 0.001, |
| "loss": 0.7702, |
| "step": 300200 |
| }, |
| { |
| "epoch": 97.05882352941177, |
| "grad_norm": 1.6669820547103882, |
| "learning_rate": 0.001, |
| "loss": 0.7739, |
| "step": 300300 |
| }, |
| { |
| "epoch": 97.09114414996768, |
| "grad_norm": 1.6277503967285156, |
| "learning_rate": 0.001, |
| "loss": 0.7652, |
| "step": 300400 |
| }, |
| { |
| "epoch": 97.1234647705236, |
| "grad_norm": 1.3346500396728516, |
| "learning_rate": 0.001, |
| "loss": 0.7631, |
| "step": 300500 |
| }, |
| { |
| "epoch": 97.1557853910795, |
| "grad_norm": 1.852993130683899, |
| "learning_rate": 0.001, |
| "loss": 0.7823, |
| "step": 300600 |
| }, |
| { |
| "epoch": 97.18810601163543, |
| "grad_norm": 1.7382128238677979, |
| "learning_rate": 0.001, |
| "loss": 0.7705, |
| "step": 300700 |
| }, |
| { |
| "epoch": 97.22042663219133, |
| "grad_norm": 1.5095046758651733, |
| "learning_rate": 0.001, |
| "loss": 0.7798, |
| "step": 300800 |
| }, |
| { |
| "epoch": 97.25274725274726, |
| "grad_norm": 1.847023606300354, |
| "learning_rate": 0.001, |
| "loss": 0.7838, |
| "step": 300900 |
| }, |
| { |
| "epoch": 97.28506787330316, |
| "grad_norm": 1.7964493036270142, |
| "learning_rate": 0.001, |
| "loss": 0.7896, |
| "step": 301000 |
| }, |
| { |
| "epoch": 97.31738849385908, |
| "grad_norm": 2.9618778228759766, |
| "learning_rate": 0.001, |
| "loss": 0.7969, |
| "step": 301100 |
| }, |
| { |
| "epoch": 97.34970911441499, |
| "grad_norm": 1.6932001113891602, |
| "learning_rate": 0.001, |
| "loss": 0.795, |
| "step": 301200 |
| }, |
| { |
| "epoch": 97.38202973497091, |
| "grad_norm": 1.504896879196167, |
| "learning_rate": 0.001, |
| "loss": 0.794, |
| "step": 301300 |
| }, |
| { |
| "epoch": 97.41435035552682, |
| "grad_norm": 1.857369065284729, |
| "learning_rate": 0.001, |
| "loss": 0.7876, |
| "step": 301400 |
| }, |
| { |
| "epoch": 97.44667097608274, |
| "grad_norm": 1.5941156148910522, |
| "learning_rate": 0.001, |
| "loss": 0.8076, |
| "step": 301500 |
| }, |
| { |
| "epoch": 97.47899159663865, |
| "grad_norm": 2.0202865600585938, |
| "learning_rate": 0.001, |
| "loss": 0.8003, |
| "step": 301600 |
| }, |
| { |
| "epoch": 97.51131221719457, |
| "grad_norm": 1.77725088596344, |
| "learning_rate": 0.001, |
| "loss": 0.8186, |
| "step": 301700 |
| }, |
| { |
| "epoch": 97.54363283775048, |
| "grad_norm": 1.4381173849105835, |
| "learning_rate": 0.001, |
| "loss": 0.8083, |
| "step": 301800 |
| }, |
| { |
| "epoch": 97.5759534583064, |
| "grad_norm": 1.4481972455978394, |
| "learning_rate": 0.001, |
| "loss": 0.8015, |
| "step": 301900 |
| }, |
| { |
| "epoch": 97.60827407886231, |
| "grad_norm": 2.0220069885253906, |
| "learning_rate": 0.001, |
| "loss": 0.8175, |
| "step": 302000 |
| }, |
| { |
| "epoch": 97.64059469941823, |
| "grad_norm": 1.730670690536499, |
| "learning_rate": 0.001, |
| "loss": 0.8144, |
| "step": 302100 |
| }, |
| { |
| "epoch": 97.67291531997414, |
| "grad_norm": 1.6534379720687866, |
| "learning_rate": 0.001, |
| "loss": 0.8065, |
| "step": 302200 |
| }, |
| { |
| "epoch": 97.70523594053006, |
| "grad_norm": 1.6358345746994019, |
| "learning_rate": 0.001, |
| "loss": 0.8051, |
| "step": 302300 |
| }, |
| { |
| "epoch": 97.73755656108597, |
| "grad_norm": 1.6618595123291016, |
| "learning_rate": 0.001, |
| "loss": 0.8125, |
| "step": 302400 |
| }, |
| { |
| "epoch": 97.76987718164189, |
| "grad_norm": 1.5252058506011963, |
| "learning_rate": 0.001, |
| "loss": 0.8192, |
| "step": 302500 |
| }, |
| { |
| "epoch": 97.8021978021978, |
| "grad_norm": 1.7979437112808228, |
| "learning_rate": 0.001, |
| "loss": 0.8292, |
| "step": 302600 |
| }, |
| { |
| "epoch": 97.83451842275372, |
| "grad_norm": 1.6801445484161377, |
| "learning_rate": 0.001, |
| "loss": 0.8272, |
| "step": 302700 |
| }, |
| { |
| "epoch": 97.86683904330962, |
| "grad_norm": 1.5387403964996338, |
| "learning_rate": 0.001, |
| "loss": 0.8236, |
| "step": 302800 |
| }, |
| { |
| "epoch": 97.89915966386555, |
| "grad_norm": 1.9808905124664307, |
| "learning_rate": 0.001, |
| "loss": 0.8208, |
| "step": 302900 |
| }, |
| { |
| "epoch": 97.93148028442145, |
| "grad_norm": 1.9140608310699463, |
| "learning_rate": 0.001, |
| "loss": 0.8367, |
| "step": 303000 |
| }, |
| { |
| "epoch": 97.96380090497738, |
| "grad_norm": 2.240133762359619, |
| "learning_rate": 0.001, |
| "loss": 0.8299, |
| "step": 303100 |
| }, |
| { |
| "epoch": 97.99612152553328, |
| "grad_norm": 2.0139851570129395, |
| "learning_rate": 0.001, |
| "loss": 0.8189, |
| "step": 303200 |
| }, |
| { |
| "epoch": 98.0284421460892, |
| "grad_norm": 1.701872706413269, |
| "learning_rate": 0.001, |
| "loss": 0.7438, |
| "step": 303300 |
| }, |
| { |
| "epoch": 98.06076276664513, |
| "grad_norm": 1.5965427160263062, |
| "learning_rate": 0.001, |
| "loss": 0.7551, |
| "step": 303400 |
| }, |
| { |
| "epoch": 98.09308338720103, |
| "grad_norm": 1.8786427974700928, |
| "learning_rate": 0.001, |
| "loss": 0.766, |
| "step": 303500 |
| }, |
| { |
| "epoch": 98.12540400775696, |
| "grad_norm": 1.8245842456817627, |
| "learning_rate": 0.001, |
| "loss": 0.7673, |
| "step": 303600 |
| }, |
| { |
| "epoch": 98.15772462831286, |
| "grad_norm": 2.078568935394287, |
| "learning_rate": 0.001, |
| "loss": 0.7756, |
| "step": 303700 |
| }, |
| { |
| "epoch": 98.19004524886878, |
| "grad_norm": 1.7483006715774536, |
| "learning_rate": 0.001, |
| "loss": 0.7743, |
| "step": 303800 |
| }, |
| { |
| "epoch": 98.22236586942469, |
| "grad_norm": 1.639719843864441, |
| "learning_rate": 0.001, |
| "loss": 0.7776, |
| "step": 303900 |
| }, |
| { |
| "epoch": 98.25468648998061, |
| "grad_norm": 1.6719263792037964, |
| "learning_rate": 0.001, |
| "loss": 0.7783, |
| "step": 304000 |
| }, |
| { |
| "epoch": 98.28700711053652, |
| "grad_norm": 1.8221156597137451, |
| "learning_rate": 0.001, |
| "loss": 0.7849, |
| "step": 304100 |
| }, |
| { |
| "epoch": 98.31932773109244, |
| "grad_norm": 1.5369601249694824, |
| "learning_rate": 0.001, |
| "loss": 0.783, |
| "step": 304200 |
| }, |
| { |
| "epoch": 98.35164835164835, |
| "grad_norm": 1.405853033065796, |
| "learning_rate": 0.001, |
| "loss": 0.7821, |
| "step": 304300 |
| }, |
| { |
| "epoch": 98.38396897220427, |
| "grad_norm": 2.133615255355835, |
| "learning_rate": 0.001, |
| "loss": 0.7812, |
| "step": 304400 |
| }, |
| { |
| "epoch": 98.41628959276018, |
| "grad_norm": 1.5420911312103271, |
| "learning_rate": 0.001, |
| "loss": 0.7965, |
| "step": 304500 |
| }, |
| { |
| "epoch": 98.4486102133161, |
| "grad_norm": 1.6347417831420898, |
| "learning_rate": 0.001, |
| "loss": 0.7933, |
| "step": 304600 |
| }, |
| { |
| "epoch": 98.48093083387201, |
| "grad_norm": 1.4630227088928223, |
| "learning_rate": 0.001, |
| "loss": 0.7793, |
| "step": 304700 |
| }, |
| { |
| "epoch": 98.51325145442793, |
| "grad_norm": 1.563585638999939, |
| "learning_rate": 0.001, |
| "loss": 0.7818, |
| "step": 304800 |
| }, |
| { |
| "epoch": 98.54557207498384, |
| "grad_norm": 1.7685762643814087, |
| "learning_rate": 0.001, |
| "loss": 0.8194, |
| "step": 304900 |
| }, |
| { |
| "epoch": 98.57789269553976, |
| "grad_norm": 1.683583378791809, |
| "learning_rate": 0.001, |
| "loss": 0.8049, |
| "step": 305000 |
| }, |
| { |
| "epoch": 98.61021331609567, |
| "grad_norm": 1.280974268913269, |
| "learning_rate": 0.001, |
| "loss": 0.8025, |
| "step": 305100 |
| }, |
| { |
| "epoch": 98.64253393665159, |
| "grad_norm": 1.4059722423553467, |
| "learning_rate": 0.001, |
| "loss": 0.81, |
| "step": 305200 |
| }, |
| { |
| "epoch": 98.6748545572075, |
| "grad_norm": 1.575561761856079, |
| "learning_rate": 0.001, |
| "loss": 0.8082, |
| "step": 305300 |
| }, |
| { |
| "epoch": 98.70717517776342, |
| "grad_norm": 2.3904173374176025, |
| "learning_rate": 0.001, |
| "loss": 0.8047, |
| "step": 305400 |
| }, |
| { |
| "epoch": 98.73949579831933, |
| "grad_norm": 2.197852849960327, |
| "learning_rate": 0.001, |
| "loss": 0.8068, |
| "step": 305500 |
| }, |
| { |
| "epoch": 98.77181641887525, |
| "grad_norm": 1.8822154998779297, |
| "learning_rate": 0.001, |
| "loss": 0.8173, |
| "step": 305600 |
| }, |
| { |
| "epoch": 98.80413703943115, |
| "grad_norm": 1.5784273147583008, |
| "learning_rate": 0.001, |
| "loss": 0.8124, |
| "step": 305700 |
| }, |
| { |
| "epoch": 98.83645765998708, |
| "grad_norm": 1.572724461555481, |
| "learning_rate": 0.001, |
| "loss": 0.81, |
| "step": 305800 |
| }, |
| { |
| "epoch": 98.86877828054298, |
| "grad_norm": 1.875663161277771, |
| "learning_rate": 0.001, |
| "loss": 0.8151, |
| "step": 305900 |
| }, |
| { |
| "epoch": 98.9010989010989, |
| "grad_norm": 3.32124662399292, |
| "learning_rate": 0.001, |
| "loss": 0.809, |
| "step": 306000 |
| }, |
| { |
| "epoch": 98.93341952165481, |
| "grad_norm": 2.008251190185547, |
| "learning_rate": 0.001, |
| "loss": 0.825, |
| "step": 306100 |
| }, |
| { |
| "epoch": 98.96574014221073, |
| "grad_norm": 14.357972145080566, |
| "learning_rate": 0.001, |
| "loss": 0.8296, |
| "step": 306200 |
| }, |
| { |
| "epoch": 98.99806076276664, |
| "grad_norm": 1.7346726655960083, |
| "learning_rate": 0.001, |
| "loss": 0.8113, |
| "step": 306300 |
| }, |
| { |
| "epoch": 99.03038138332256, |
| "grad_norm": 1.5255569219589233, |
| "learning_rate": 0.001, |
| "loss": 0.753, |
| "step": 306400 |
| }, |
| { |
| "epoch": 99.06270200387847, |
| "grad_norm": 1.8542404174804688, |
| "learning_rate": 0.001, |
| "loss": 0.7537, |
| "step": 306500 |
| }, |
| { |
| "epoch": 99.09502262443439, |
| "grad_norm": 1.9389482736587524, |
| "learning_rate": 0.001, |
| "loss": 0.7604, |
| "step": 306600 |
| }, |
| { |
| "epoch": 99.1273432449903, |
| "grad_norm": 1.9633482694625854, |
| "learning_rate": 0.001, |
| "loss": 0.7669, |
| "step": 306700 |
| }, |
| { |
| "epoch": 99.15966386554622, |
| "grad_norm": 1.3970327377319336, |
| "learning_rate": 0.001, |
| "loss": 0.7502, |
| "step": 306800 |
| }, |
| { |
| "epoch": 99.19198448610213, |
| "grad_norm": 1.400421380996704, |
| "learning_rate": 0.001, |
| "loss": 0.7697, |
| "step": 306900 |
| }, |
| { |
| "epoch": 99.22430510665805, |
| "grad_norm": 2.0532190799713135, |
| "learning_rate": 0.001, |
| "loss": 0.7604, |
| "step": 307000 |
| }, |
| { |
| "epoch": 99.25662572721396, |
| "grad_norm": 1.6515733003616333, |
| "learning_rate": 0.001, |
| "loss": 0.7727, |
| "step": 307100 |
| }, |
| { |
| "epoch": 99.28894634776988, |
| "grad_norm": 1.4215999841690063, |
| "learning_rate": 0.001, |
| "loss": 0.7935, |
| "step": 307200 |
| }, |
| { |
| "epoch": 99.32126696832579, |
| "grad_norm": 1.2796798944473267, |
| "learning_rate": 0.001, |
| "loss": 0.783, |
| "step": 307300 |
| }, |
| { |
| "epoch": 99.35358758888171, |
| "grad_norm": 1.8550664186477661, |
| "learning_rate": 0.001, |
| "loss": 0.7806, |
| "step": 307400 |
| }, |
| { |
| "epoch": 99.38590820943762, |
| "grad_norm": 1.508309245109558, |
| "learning_rate": 0.001, |
| "loss": 0.7955, |
| "step": 307500 |
| }, |
| { |
| "epoch": 99.41822882999354, |
| "grad_norm": 5.713343620300293, |
| "learning_rate": 0.001, |
| "loss": 0.7914, |
| "step": 307600 |
| }, |
| { |
| "epoch": 99.45054945054945, |
| "grad_norm": 1.6815954446792603, |
| "learning_rate": 0.001, |
| "loss": 0.788, |
| "step": 307700 |
| }, |
| { |
| "epoch": 99.48287007110537, |
| "grad_norm": 1.4613316059112549, |
| "learning_rate": 0.001, |
| "loss": 0.7896, |
| "step": 307800 |
| }, |
| { |
| "epoch": 99.51519069166127, |
| "grad_norm": 1.8426566123962402, |
| "learning_rate": 0.001, |
| "loss": 0.8139, |
| "step": 307900 |
| }, |
| { |
| "epoch": 99.5475113122172, |
| "grad_norm": 1.4660000801086426, |
| "learning_rate": 0.001, |
| "loss": 0.795, |
| "step": 308000 |
| }, |
| { |
| "epoch": 99.5798319327731, |
| "grad_norm": 1.5318522453308105, |
| "learning_rate": 0.001, |
| "loss": 0.8095, |
| "step": 308100 |
| }, |
| { |
| "epoch": 99.61215255332903, |
| "grad_norm": 1.5603666305541992, |
| "learning_rate": 0.001, |
| "loss": 0.8079, |
| "step": 308200 |
| }, |
| { |
| "epoch": 99.64447317388493, |
| "grad_norm": 1.7400113344192505, |
| "learning_rate": 0.001, |
| "loss": 0.8057, |
| "step": 308300 |
| }, |
| { |
| "epoch": 99.67679379444085, |
| "grad_norm": 1.3645039796829224, |
| "learning_rate": 0.001, |
| "loss": 0.8052, |
| "step": 308400 |
| }, |
| { |
| "epoch": 99.70911441499676, |
| "grad_norm": 1.9990870952606201, |
| "learning_rate": 0.001, |
| "loss": 0.8038, |
| "step": 308500 |
| }, |
| { |
| "epoch": 99.74143503555268, |
| "grad_norm": 1.7578907012939453, |
| "learning_rate": 0.001, |
| "loss": 0.7997, |
| "step": 308600 |
| }, |
| { |
| "epoch": 99.77375565610859, |
| "grad_norm": 1.4323079586029053, |
| "learning_rate": 0.001, |
| "loss": 0.8035, |
| "step": 308700 |
| }, |
| { |
| "epoch": 99.80607627666451, |
| "grad_norm": 1.3833441734313965, |
| "learning_rate": 0.001, |
| "loss": 0.7939, |
| "step": 308800 |
| }, |
| { |
| "epoch": 99.83839689722042, |
| "grad_norm": 1.8678525686264038, |
| "learning_rate": 0.001, |
| "loss": 0.8105, |
| "step": 308900 |
| }, |
| { |
| "epoch": 99.87071751777634, |
| "grad_norm": 1.543296456336975, |
| "learning_rate": 0.001, |
| "loss": 0.8166, |
| "step": 309000 |
| }, |
| { |
| "epoch": 99.90303813833225, |
| "grad_norm": 1.2817319631576538, |
| "learning_rate": 0.001, |
| "loss": 0.8154, |
| "step": 309100 |
| }, |
| { |
| "epoch": 99.93535875888817, |
| "grad_norm": 1.460585594177246, |
| "learning_rate": 0.001, |
| "loss": 0.8102, |
| "step": 309200 |
| }, |
| { |
| "epoch": 99.96767937944408, |
| "grad_norm": 1.6103432178497314, |
| "learning_rate": 0.001, |
| "loss": 0.8096, |
| "step": 309300 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.6271578073501587, |
| "learning_rate": 0.001, |
| "loss": 0.7634, |
| "step": 309400 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 309400, |
| "total_flos": 3.297635183404155e+17, |
| "train_loss": 0.3764197817446728, |
| "train_runtime": 18030.0585, |
| "train_samples_per_second": 549.083, |
| "train_steps_per_second": 17.16 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 309400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.297635183404155e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|