| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 100.0, |
| "eval_steps": 20000, |
| "global_step": 309400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03232062055591468, |
| "grad_norm": 113.44369506835938, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 5.1994, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06464124111182935, |
| "grad_norm": 33.283287048339844, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 4.5771, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09696186166774402, |
| "grad_norm": 26.25897789001465, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 4.1609, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1292824822236587, |
| "grad_norm": 11.137129783630371, |
| "learning_rate": 3.99e-05, |
| "loss": 4.0668, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16160310277957338, |
| "grad_norm": 14.905611991882324, |
| "learning_rate": 4.99e-05, |
| "loss": 4.0021, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19392372333548805, |
| "grad_norm": 2.45811128616333, |
| "learning_rate": 5.9900000000000006e-05, |
| "loss": 3.9788, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 3.4350666999816895, |
| "learning_rate": 6.99e-05, |
| "loss": 3.9595, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2585649644473174, |
| "grad_norm": 21.33148956298828, |
| "learning_rate": 7.99e-05, |
| "loss": 3.9243, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2908855850032321, |
| "grad_norm": 2.3939037322998047, |
| "learning_rate": 8.989999999999999e-05, |
| "loss": 3.8641, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32320620555914675, |
| "grad_norm": 2.719916343688965, |
| "learning_rate": 9.99e-05, |
| "loss": 3.8605, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3555268261150614, |
| "grad_norm": 2.285353899002075, |
| "learning_rate": 0.0001099, |
| "loss": 3.8334, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3878474466709761, |
| "grad_norm": 2.2743117809295654, |
| "learning_rate": 0.00011990000000000001, |
| "loss": 3.8158, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 2.1339423656463623, |
| "learning_rate": 0.00012989999999999999, |
| "loss": 3.7871, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 2.2670485973358154, |
| "learning_rate": 0.0001399, |
| "loss": 3.779, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4848093083387201, |
| "grad_norm": 2.1925437450408936, |
| "learning_rate": 0.0001499, |
| "loss": 3.7351, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5171299288946348, |
| "grad_norm": 2.1954660415649414, |
| "learning_rate": 0.00015989999999999998, |
| "loss": 3.7266, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 3.2180988788604736, |
| "learning_rate": 0.0001699, |
| "loss": 3.7417, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5817711700064642, |
| "grad_norm": 2.362977981567383, |
| "learning_rate": 0.0001799, |
| "loss": 3.7068, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6140917905623788, |
| "grad_norm": 2.1778812408447266, |
| "learning_rate": 0.0001899, |
| "loss": 3.7141, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6464124111182935, |
| "grad_norm": 2.239959716796875, |
| "learning_rate": 0.0001999, |
| "loss": 3.6816, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 2.3324010372161865, |
| "learning_rate": 0.0002099, |
| "loss": 3.6757, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7110536522301228, |
| "grad_norm": 2.222890853881836, |
| "learning_rate": 0.0002199, |
| "loss": 3.6602, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7433742727860375, |
| "grad_norm": 2.055497407913208, |
| "learning_rate": 0.0002299, |
| "loss": 3.657, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7756948933419522, |
| "grad_norm": 2.1699202060699463, |
| "learning_rate": 0.0002399, |
| "loss": 3.6371, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8080155138978669, |
| "grad_norm": 2.2787368297576904, |
| "learning_rate": 0.0002499, |
| "loss": 3.6215, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 2.3619353771209717, |
| "learning_rate": 0.00025990000000000003, |
| "loss": 3.6471, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8726567550096962, |
| "grad_norm": 2.054823160171509, |
| "learning_rate": 0.0002699, |
| "loss": 3.5954, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 2.221311092376709, |
| "learning_rate": 0.0002799, |
| "loss": 3.5998, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9372979961215255, |
| "grad_norm": 2.4884724617004395, |
| "learning_rate": 0.0002899, |
| "loss": 3.5862, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9696186166774402, |
| "grad_norm": 2.2477798461914062, |
| "learning_rate": 0.0002999, |
| "loss": 3.5723, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0019392372333549, |
| "grad_norm": 2.155560255050659, |
| "learning_rate": 0.0003099, |
| "loss": 3.5919, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0342598577892697, |
| "grad_norm": 1.4968713521957397, |
| "learning_rate": 0.0003199, |
| "loss": 3.5275, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0665804783451842, |
| "grad_norm": 1.2746202945709229, |
| "learning_rate": 0.00032990000000000005, |
| "loss": 3.5395, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.098901098901099, |
| "grad_norm": 1.3098007440567017, |
| "learning_rate": 0.00033989999999999997, |
| "loss": 3.5354, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1312217194570136, |
| "grad_norm": 1.745730996131897, |
| "learning_rate": 0.0003499, |
| "loss": 3.5099, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1635423400129283, |
| "grad_norm": 1.8114116191864014, |
| "learning_rate": 0.0003599, |
| "loss": 3.5412, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.195862960568843, |
| "grad_norm": 2.3928165435791016, |
| "learning_rate": 0.0003699, |
| "loss": 3.5332, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2281835811247577, |
| "grad_norm": 1.126379370689392, |
| "learning_rate": 0.0003799, |
| "loss": 3.5205, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 1.7738536596298218, |
| "learning_rate": 0.00038990000000000004, |
| "loss": 3.5306, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.292824822236587, |
| "grad_norm": 2.3915224075317383, |
| "learning_rate": 0.00039989999999999996, |
| "loss": 3.5113, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3251454427925016, |
| "grad_norm": 1.2352187633514404, |
| "learning_rate": 0.0004099, |
| "loss": 3.4997, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3574660633484164, |
| "grad_norm": 1.6244854927062988, |
| "learning_rate": 0.0004199, |
| "loss": 3.5079, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.389786683904331, |
| "grad_norm": 1.1132093667984009, |
| "learning_rate": 0.0004299, |
| "loss": 3.5052, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4221073044602457, |
| "grad_norm": 1.9960055351257324, |
| "learning_rate": 0.0004399, |
| "loss": 3.5056, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4544279250161603, |
| "grad_norm": 1.619606852531433, |
| "learning_rate": 0.00044990000000000004, |
| "loss": 3.4965, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.486748545572075, |
| "grad_norm": 1.8198740482330322, |
| "learning_rate": 0.0004599, |
| "loss": 3.4824, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5190691661279896, |
| "grad_norm": 2.4110352993011475, |
| "learning_rate": 0.0004699, |
| "loss": 3.4957, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5513897866839044, |
| "grad_norm": 1.0220048427581787, |
| "learning_rate": 0.0004799, |
| "loss": 3.4719, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5837104072398192, |
| "grad_norm": 1.1443417072296143, |
| "learning_rate": 0.0004899, |
| "loss": 3.4902, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6160310277957337, |
| "grad_norm": 1.0778316259384155, |
| "learning_rate": 0.0004999000000000001, |
| "loss": 3.4851, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6483516483516483, |
| "grad_norm": 1.5312631130218506, |
| "learning_rate": 0.0005099, |
| "loss": 3.4704, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 1.2108691930770874, |
| "learning_rate": 0.0005199, |
| "loss": 3.4674, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.7129928894634778, |
| "grad_norm": 1.3876620531082153, |
| "learning_rate": 0.0005299, |
| "loss": 3.4695, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7453135100193924, |
| "grad_norm": 1.118224024772644, |
| "learning_rate": 0.0005399000000000001, |
| "loss": 3.4654, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.777634130575307, |
| "grad_norm": 1.0403189659118652, |
| "learning_rate": 0.0005499000000000001, |
| "loss": 3.4498, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8099547511312217, |
| "grad_norm": 1.0136079788208008, |
| "learning_rate": 0.0005599, |
| "loss": 3.4364, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8422753716871365, |
| "grad_norm": 1.3502916097640991, |
| "learning_rate": 0.0005698999999999999, |
| "loss": 3.4396, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.874595992243051, |
| "grad_norm": 1.0874897241592407, |
| "learning_rate": 0.0005799, |
| "loss": 3.4767, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.9069166127989656, |
| "grad_norm": 1.2160725593566895, |
| "learning_rate": 0.0005899, |
| "loss": 3.4555, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9392372333548804, |
| "grad_norm": 0.9763041734695435, |
| "learning_rate": 0.0005999, |
| "loss": 3.4453, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9715578539107952, |
| "grad_norm": 1.3038817644119263, |
| "learning_rate": 0.0006099, |
| "loss": 3.4526, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0038784744667097, |
| "grad_norm": 1.0602381229400635, |
| "learning_rate": 0.0006199, |
| "loss": 3.4462, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0361990950226243, |
| "grad_norm": 0.7091767191886902, |
| "learning_rate": 0.0006299000000000001, |
| "loss": 3.3692, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.0685197155785393, |
| "grad_norm": 0.9198008179664612, |
| "learning_rate": 0.0006399, |
| "loss": 3.3699, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100840336134454, |
| "grad_norm": 0.7512125372886658, |
| "learning_rate": 0.0006499, |
| "loss": 3.3667, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1331609566903684, |
| "grad_norm": 1.0284326076507568, |
| "learning_rate": 0.0006599, |
| "loss": 3.3685, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165481577246283, |
| "grad_norm": 1.1990816593170166, |
| "learning_rate": 0.0006699000000000001, |
| "loss": 3.3774, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 0.9412187933921814, |
| "learning_rate": 0.0006799, |
| "loss": 3.3603, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.2301228183581125, |
| "grad_norm": 1.1793667078018188, |
| "learning_rate": 0.0006899, |
| "loss": 3.3585, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.262443438914027, |
| "grad_norm": 1.0098427534103394, |
| "learning_rate": 0.0006998999999999999, |
| "loss": 3.3701, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2947640594699417, |
| "grad_norm": 1.2947179079055786, |
| "learning_rate": 0.0007099, |
| "loss": 3.3552, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3270846800258567, |
| "grad_norm": 1.0354315042495728, |
| "learning_rate": 0.0007199, |
| "loss": 3.3738, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3594053005817712, |
| "grad_norm": 1.6114907264709473, |
| "learning_rate": 0.0007299, |
| "loss": 3.3601, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.391725921137686, |
| "grad_norm": 1.2489641904830933, |
| "learning_rate": 0.0007399, |
| "loss": 3.3666, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4240465416936003, |
| "grad_norm": 1.2799603939056396, |
| "learning_rate": 0.0007499000000000001, |
| "loss": 3.3767, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4563671622495153, |
| "grad_norm": 0.9481520652770996, |
| "learning_rate": 0.0007599, |
| "loss": 3.3799, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.48868778280543, |
| "grad_norm": 1.095807671546936, |
| "learning_rate": 0.0007699, |
| "loss": 3.3691, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 0.9669839143753052, |
| "learning_rate": 0.0007799, |
| "loss": 3.3424, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.553329023917259, |
| "grad_norm": 0.819404125213623, |
| "learning_rate": 0.0007899000000000001, |
| "loss": 3.3339, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.585649644473174, |
| "grad_norm": 0.797839879989624, |
| "learning_rate": 0.0007999000000000001, |
| "loss": 3.3667, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6179702650290886, |
| "grad_norm": 1.1646702289581299, |
| "learning_rate": 0.0008099, |
| "loss": 3.37, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.650290885585003, |
| "grad_norm": 1.1598727703094482, |
| "learning_rate": 0.0008198999999999999, |
| "loss": 3.3375, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.682611506140918, |
| "grad_norm": 1.25111722946167, |
| "learning_rate": 0.0008299, |
| "loss": 3.3584, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.7149321266968327, |
| "grad_norm": 1.2236987352371216, |
| "learning_rate": 0.0008399, |
| "loss": 3.3714, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7472527472527473, |
| "grad_norm": 0.9083935022354126, |
| "learning_rate": 0.0008499, |
| "loss": 3.3567, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.779573367808662, |
| "grad_norm": 1.0694997310638428, |
| "learning_rate": 0.0008599, |
| "loss": 3.3439, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8118939883645764, |
| "grad_norm": 1.1277011632919312, |
| "learning_rate": 0.0008699000000000001, |
| "loss": 3.3346, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8442146089204914, |
| "grad_norm": 1.3444178104400635, |
| "learning_rate": 0.0008799000000000001, |
| "loss": 3.3451, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.876535229476406, |
| "grad_norm": 0.9961609244346619, |
| "learning_rate": 0.0008899, |
| "loss": 3.3484, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9088558500323205, |
| "grad_norm": 0.9413596391677856, |
| "learning_rate": 0.0008999, |
| "loss": 3.3289, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.064833164215088, |
| "learning_rate": 0.0009099, |
| "loss": 3.3546, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.97349709114415, |
| "grad_norm": 0.9576635360717773, |
| "learning_rate": 0.0009199000000000001, |
| "loss": 3.3515, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0058177117000646, |
| "grad_norm": 0.8599377274513245, |
| "learning_rate": 0.0009299, |
| "loss": 3.3331, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.038138332255979, |
| "grad_norm": 0.8528821468353271, |
| "learning_rate": 0.0009399, |
| "loss": 3.2379, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.070458952811894, |
| "grad_norm": 1.010067105293274, |
| "learning_rate": 0.0009498999999999999, |
| "loss": 3.2548, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.1027795733678087, |
| "grad_norm": 1.0422749519348145, |
| "learning_rate": 0.0009599, |
| "loss": 3.2458, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.1351001939237233, |
| "grad_norm": 1.1780894994735718, |
| "learning_rate": 0.0009699, |
| "loss": 3.2479, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.167420814479638, |
| "grad_norm": 1.0699479579925537, |
| "learning_rate": 0.0009799, |
| "loss": 3.2466, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.199741435035553, |
| "grad_norm": 0.7220283150672913, |
| "learning_rate": 0.0009899, |
| "loss": 3.2282, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.2320620555914674, |
| "grad_norm": 0.9945827126502991, |
| "learning_rate": 0.0009999, |
| "loss": 3.2569, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.264382676147382, |
| "grad_norm": 0.9884164929389954, |
| "learning_rate": 0.001, |
| "loss": 3.2202, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.2967032967032965, |
| "grad_norm": 0.9047076106071472, |
| "learning_rate": 0.001, |
| "loss": 3.2181, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.3290239172592115, |
| "grad_norm": 1.3361272811889648, |
| "learning_rate": 0.001, |
| "loss": 3.2336, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.361344537815126, |
| "grad_norm": 1.0945632457733154, |
| "learning_rate": 0.001, |
| "loss": 3.2355, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.3936651583710407, |
| "grad_norm": 1.010493278503418, |
| "learning_rate": 0.001, |
| "loss": 3.2467, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.425985778926955, |
| "grad_norm": 0.941750705242157, |
| "learning_rate": 0.001, |
| "loss": 3.2268, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.45830639948287, |
| "grad_norm": 0.6709722876548767, |
| "learning_rate": 0.001, |
| "loss": 3.2282, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.490627020038785, |
| "grad_norm": 0.8207814693450928, |
| "learning_rate": 0.001, |
| "loss": 3.2243, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.5229476405946993, |
| "grad_norm": 0.9374675154685974, |
| "learning_rate": 0.001, |
| "loss": 3.2398, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.555268261150614, |
| "grad_norm": 1.0046817064285278, |
| "learning_rate": 0.001, |
| "loss": 3.2387, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.587588881706529, |
| "grad_norm": 1.0659905672073364, |
| "learning_rate": 0.001, |
| "loss": 3.2373, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.6199095022624435, |
| "grad_norm": 0.8840706944465637, |
| "learning_rate": 0.001, |
| "loss": 3.2419, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.652230122818358, |
| "grad_norm": 0.8998158574104309, |
| "learning_rate": 0.001, |
| "loss": 3.2298, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.684550743374273, |
| "grad_norm": 0.7526170015335083, |
| "learning_rate": 0.001, |
| "loss": 3.2266, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.7168713639301876, |
| "grad_norm": 1.02177095413208, |
| "learning_rate": 0.001, |
| "loss": 3.2299, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.749191984486102, |
| "grad_norm": 1.0600848197937012, |
| "learning_rate": 0.001, |
| "loss": 3.2301, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7815126050420167, |
| "grad_norm": 1.2486552000045776, |
| "learning_rate": 0.001, |
| "loss": 3.2267, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.8138332255979313, |
| "grad_norm": 1.2336997985839844, |
| "learning_rate": 0.001, |
| "loss": 3.2322, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 1.1906139850616455, |
| "learning_rate": 0.001, |
| "loss": 3.2321, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.878474466709761, |
| "grad_norm": 0.8188307881355286, |
| "learning_rate": 0.001, |
| "loss": 3.2081, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.9107950872656754, |
| "grad_norm": 0.9523578882217407, |
| "learning_rate": 0.001, |
| "loss": 3.2217, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.9431157078215904, |
| "grad_norm": 1.0636179447174072, |
| "learning_rate": 0.001, |
| "loss": 3.2072, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.975436328377505, |
| "grad_norm": 1.092067003250122, |
| "learning_rate": 0.001, |
| "loss": 3.2053, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.0077569489334195, |
| "grad_norm": 0.7680415511131287, |
| "learning_rate": 0.001, |
| "loss": 3.2015, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.040077569489334, |
| "grad_norm": 1.0675336122512817, |
| "learning_rate": 0.001, |
| "loss": 3.1144, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.072398190045249, |
| "grad_norm": 1.0144174098968506, |
| "learning_rate": 0.001, |
| "loss": 3.1165, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.104718810601163, |
| "grad_norm": 1.1183968782424927, |
| "learning_rate": 0.001, |
| "loss": 3.131, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.137039431157079, |
| "grad_norm": 0.8900427222251892, |
| "learning_rate": 0.001, |
| "loss": 3.1315, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.169360051712993, |
| "grad_norm": 0.9315294027328491, |
| "learning_rate": 0.001, |
| "loss": 3.1344, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.201680672268908, |
| "grad_norm": 1.214819312095642, |
| "learning_rate": 0.001, |
| "loss": 3.132, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.234001292824822, |
| "grad_norm": 0.7954821586608887, |
| "learning_rate": 0.001, |
| "loss": 3.1417, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.266321913380737, |
| "grad_norm": 0.927521824836731, |
| "learning_rate": 0.001, |
| "loss": 3.1207, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.298642533936651, |
| "grad_norm": 0.9967598915100098, |
| "learning_rate": 0.001, |
| "loss": 3.1457, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.330963154492566, |
| "grad_norm": 0.8189947009086609, |
| "learning_rate": 0.001, |
| "loss": 3.1434, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.3632837750484805, |
| "grad_norm": 0.8321822285652161, |
| "learning_rate": 0.001, |
| "loss": 3.1667, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.395604395604396, |
| "grad_norm": 0.9342663884162903, |
| "learning_rate": 0.001, |
| "loss": 3.1384, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.4279250161603105, |
| "grad_norm": 1.075761079788208, |
| "learning_rate": 0.001, |
| "loss": 3.1525, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.460245636716225, |
| "grad_norm": 0.8055844902992249, |
| "learning_rate": 0.001, |
| "loss": 3.1573, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.49256625727214, |
| "grad_norm": 1.074677586555481, |
| "learning_rate": 0.001, |
| "loss": 3.1582, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.524886877828054, |
| "grad_norm": 0.9087541699409485, |
| "learning_rate": 0.001, |
| "loss": 3.1483, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.557207498383969, |
| "grad_norm": 0.9732751846313477, |
| "learning_rate": 0.001, |
| "loss": 3.1317, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.589528118939883, |
| "grad_norm": 1.3061445951461792, |
| "learning_rate": 0.001, |
| "loss": 3.1436, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.621848739495798, |
| "grad_norm": 1.0117545127868652, |
| "learning_rate": 0.001, |
| "loss": 3.1514, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.654169360051713, |
| "grad_norm": 0.9364200830459595, |
| "learning_rate": 0.001, |
| "loss": 3.1453, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.686489980607628, |
| "grad_norm": 0.9485110640525818, |
| "learning_rate": 0.001, |
| "loss": 3.1505, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.7188106011635425, |
| "grad_norm": 0.7017455101013184, |
| "learning_rate": 0.001, |
| "loss": 3.1556, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.751131221719457, |
| "grad_norm": 0.7182191610336304, |
| "learning_rate": 0.001, |
| "loss": 3.1469, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.783451842275372, |
| "grad_norm": 0.871917724609375, |
| "learning_rate": 0.001, |
| "loss": 3.1624, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.815772462831286, |
| "grad_norm": 0.9741417169570923, |
| "learning_rate": 0.001, |
| "loss": 3.151, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.848093083387201, |
| "grad_norm": 0.966870903968811, |
| "learning_rate": 0.001, |
| "loss": 3.1085, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.880413703943116, |
| "grad_norm": 0.8608819842338562, |
| "learning_rate": 0.001, |
| "loss": 3.1423, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.912734324499031, |
| "grad_norm": 0.8575353622436523, |
| "learning_rate": 0.001, |
| "loss": 3.1509, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.945054945054945, |
| "grad_norm": 0.7007643580436707, |
| "learning_rate": 0.001, |
| "loss": 3.1507, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.97737556561086, |
| "grad_norm": 0.8379340767860413, |
| "learning_rate": 0.001, |
| "loss": 3.1407, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.009696186166774, |
| "grad_norm": 0.9032299518585205, |
| "learning_rate": 0.001, |
| "loss": 3.1197, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.042016806722689, |
| "grad_norm": 0.922421932220459, |
| "learning_rate": 0.001, |
| "loss": 3.0103, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.0743374272786035, |
| "grad_norm": 0.8886153697967529, |
| "learning_rate": 0.001, |
| "loss": 3.0239, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.106658047834518, |
| "grad_norm": 1.210507869720459, |
| "learning_rate": 0.001, |
| "loss": 3.0331, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.1389786683904335, |
| "grad_norm": 0.9552029371261597, |
| "learning_rate": 0.001, |
| "loss": 3.0191, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.171299288946348, |
| "grad_norm": 0.9923421740531921, |
| "learning_rate": 0.001, |
| "loss": 3.0519, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.203619909502263, |
| "grad_norm": 0.646263062953949, |
| "learning_rate": 0.001, |
| "loss": 3.0327, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.235940530058177, |
| "grad_norm": 0.9746344685554504, |
| "learning_rate": 0.001, |
| "loss": 3.0388, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.268261150614092, |
| "grad_norm": 0.894690215587616, |
| "learning_rate": 0.001, |
| "loss": 3.0451, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.300581771170006, |
| "grad_norm": 1.1923508644104004, |
| "learning_rate": 0.001, |
| "loss": 3.039, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.332902391725921, |
| "grad_norm": 0.9015272259712219, |
| "learning_rate": 0.001, |
| "loss": 3.0746, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.365223012281835, |
| "grad_norm": 1.0535194873809814, |
| "learning_rate": 0.001, |
| "loss": 3.0663, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.397543632837751, |
| "grad_norm": 0.8790969848632812, |
| "learning_rate": 0.001, |
| "loss": 3.0455, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.429864253393665, |
| "grad_norm": 0.8354184627532959, |
| "learning_rate": 0.001, |
| "loss": 3.0653, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.46218487394958, |
| "grad_norm": 1.1250206232070923, |
| "learning_rate": 0.001, |
| "loss": 3.0639, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.4945054945054945, |
| "grad_norm": 0.9991565942764282, |
| "learning_rate": 0.001, |
| "loss": 3.0435, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.526826115061409, |
| "grad_norm": 0.757892370223999, |
| "learning_rate": 0.001, |
| "loss": 3.0669, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.559146735617324, |
| "grad_norm": 0.8190027475357056, |
| "learning_rate": 0.001, |
| "loss": 3.0609, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.591467356173238, |
| "grad_norm": 0.9603754878044128, |
| "learning_rate": 0.001, |
| "loss": 3.0439, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.623787976729153, |
| "grad_norm": 0.5893958806991577, |
| "learning_rate": 0.001, |
| "loss": 3.0683, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.656108597285068, |
| "grad_norm": 0.927288830280304, |
| "learning_rate": 0.001, |
| "loss": 3.0408, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.688429217840983, |
| "grad_norm": 0.90427565574646, |
| "learning_rate": 0.001, |
| "loss": 3.0687, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.720749838396897, |
| "grad_norm": 0.8372429013252258, |
| "learning_rate": 0.001, |
| "loss": 3.0582, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.753070458952812, |
| "grad_norm": 0.9206283092498779, |
| "learning_rate": 0.001, |
| "loss": 3.0717, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.785391079508726, |
| "grad_norm": 0.970551609992981, |
| "learning_rate": 0.001, |
| "loss": 3.0689, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.817711700064641, |
| "grad_norm": 0.9441208839416504, |
| "learning_rate": 0.001, |
| "loss": 3.0412, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.850032320620556, |
| "grad_norm": 0.869175910949707, |
| "learning_rate": 0.001, |
| "loss": 3.0553, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.6702381372451782, |
| "learning_rate": 0.001, |
| "loss": 3.0553, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.914673561732386, |
| "grad_norm": 1.0862089395523071, |
| "learning_rate": 0.001, |
| "loss": 3.0741, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.9469941822883, |
| "grad_norm": 0.9103309512138367, |
| "learning_rate": 0.001, |
| "loss": 3.0748, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.979314802844215, |
| "grad_norm": 1.0982170104980469, |
| "learning_rate": 0.001, |
| "loss": 3.054, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.011635423400129, |
| "grad_norm": 1.0017380714416504, |
| "learning_rate": 0.001, |
| "loss": 3.0038, |
| "step": 18600 |
| }, |
| { |
| "epoch": 6.043956043956044, |
| "grad_norm": 1.0425845384597778, |
| "learning_rate": 0.001, |
| "loss": 2.9489, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.076276664511958, |
| "grad_norm": 1.030131220817566, |
| "learning_rate": 0.001, |
| "loss": 2.9291, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.108597285067873, |
| "grad_norm": 1.0179742574691772, |
| "learning_rate": 0.001, |
| "loss": 2.9501, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.140917905623788, |
| "grad_norm": 1.0169600248336792, |
| "learning_rate": 0.001, |
| "loss": 2.9339, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.173238526179703, |
| "grad_norm": 0.7874507308006287, |
| "learning_rate": 0.001, |
| "loss": 2.9623, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.2055591467356175, |
| "grad_norm": 0.7536994218826294, |
| "learning_rate": 0.001, |
| "loss": 2.9431, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.237879767291532, |
| "grad_norm": 1.3374706506729126, |
| "learning_rate": 0.001, |
| "loss": 2.9691, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.270200387847447, |
| "grad_norm": 0.9803719520568848, |
| "learning_rate": 0.001, |
| "loss": 2.9555, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.302521008403361, |
| "grad_norm": 0.898348331451416, |
| "learning_rate": 0.001, |
| "loss": 2.9691, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.334841628959276, |
| "grad_norm": 1.0121514797210693, |
| "learning_rate": 0.001, |
| "loss": 2.9758, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.36716224951519, |
| "grad_norm": 0.8829805850982666, |
| "learning_rate": 0.001, |
| "loss": 2.9724, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.399482870071106, |
| "grad_norm": 1.1278361082077026, |
| "learning_rate": 0.001, |
| "loss": 2.9596, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.43180349062702, |
| "grad_norm": 0.9255377650260925, |
| "learning_rate": 0.001, |
| "loss": 2.9845, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.464124111182935, |
| "grad_norm": 0.867389440536499, |
| "learning_rate": 0.001, |
| "loss": 2.9652, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.496444731738849, |
| "grad_norm": 0.8110164999961853, |
| "learning_rate": 0.001, |
| "loss": 2.9839, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.528765352294764, |
| "grad_norm": 1.071718454360962, |
| "learning_rate": 0.001, |
| "loss": 2.9835, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.5610859728506785, |
| "grad_norm": 1.1645890474319458, |
| "learning_rate": 0.001, |
| "loss": 2.9578, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.593406593406593, |
| "grad_norm": 0.9051645994186401, |
| "learning_rate": 0.001, |
| "loss": 2.9964, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.625727213962508, |
| "grad_norm": 1.0463429689407349, |
| "learning_rate": 0.001, |
| "loss": 2.9768, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.658047834518423, |
| "grad_norm": 1.1838918924331665, |
| "learning_rate": 0.001, |
| "loss": 2.9879, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.690368455074338, |
| "grad_norm": 0.7269249558448792, |
| "learning_rate": 0.001, |
| "loss": 2.9872, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.722689075630252, |
| "grad_norm": 0.8935712575912476, |
| "learning_rate": 0.001, |
| "loss": 2.978, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.755009696186167, |
| "grad_norm": 1.0895768404006958, |
| "learning_rate": 0.001, |
| "loss": 2.9891, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.787330316742081, |
| "grad_norm": 0.7446025609970093, |
| "learning_rate": 0.001, |
| "loss": 2.9789, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.819650937297996, |
| "grad_norm": 1.0650365352630615, |
| "learning_rate": 0.001, |
| "loss": 2.9689, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.85197155785391, |
| "grad_norm": 1.0426945686340332, |
| "learning_rate": 0.001, |
| "loss": 3.003, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.884292178409826, |
| "grad_norm": 0.9406901597976685, |
| "learning_rate": 0.001, |
| "loss": 2.9873, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.91661279896574, |
| "grad_norm": 0.8841379284858704, |
| "learning_rate": 0.001, |
| "loss": 2.9859, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.948933419521655, |
| "grad_norm": 0.9247796535491943, |
| "learning_rate": 0.001, |
| "loss": 2.9769, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.98125404007757, |
| "grad_norm": 0.9202895164489746, |
| "learning_rate": 0.001, |
| "loss": 2.9972, |
| "step": 21600 |
| }, |
| { |
| "epoch": 7.013574660633484, |
| "grad_norm": 1.2635228633880615, |
| "learning_rate": 0.001, |
| "loss": 2.9374, |
| "step": 21700 |
| }, |
| { |
| "epoch": 7.045895281189399, |
| "grad_norm": 1.269637107849121, |
| "learning_rate": 0.001, |
| "loss": 2.8665, |
| "step": 21800 |
| }, |
| { |
| "epoch": 7.078215901745313, |
| "grad_norm": 1.1895501613616943, |
| "learning_rate": 0.001, |
| "loss": 2.8506, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.110536522301228, |
| "grad_norm": 0.8704085350036621, |
| "learning_rate": 0.001, |
| "loss": 2.8815, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 1.042540192604065, |
| "learning_rate": 0.001, |
| "loss": 2.8699, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.175177763413058, |
| "grad_norm": 0.9110554456710815, |
| "learning_rate": 0.001, |
| "loss": 2.8704, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.207498383968972, |
| "grad_norm": 0.951602578163147, |
| "learning_rate": 0.001, |
| "loss": 2.8674, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.239819004524887, |
| "grad_norm": 0.988113284111023, |
| "learning_rate": 0.001, |
| "loss": 2.8939, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.2721396250808015, |
| "grad_norm": 1.3134607076644897, |
| "learning_rate": 0.001, |
| "loss": 2.8967, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.304460245636716, |
| "grad_norm": 0.9608508348464966, |
| "learning_rate": 0.001, |
| "loss": 2.8733, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.336780866192631, |
| "grad_norm": 0.9588029384613037, |
| "learning_rate": 0.001, |
| "loss": 2.8962, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.369101486748546, |
| "grad_norm": 1.15349280834198, |
| "learning_rate": 0.001, |
| "loss": 2.8861, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.401422107304461, |
| "grad_norm": 1.198407530784607, |
| "learning_rate": 0.001, |
| "loss": 2.8782, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.433742727860375, |
| "grad_norm": 0.9935774803161621, |
| "learning_rate": 0.001, |
| "loss": 2.9192, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.46606334841629, |
| "grad_norm": 1.0621734857559204, |
| "learning_rate": 0.001, |
| "loss": 2.9049, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.498383968972204, |
| "grad_norm": 1.2372674942016602, |
| "learning_rate": 0.001, |
| "loss": 2.9289, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.530704589528119, |
| "grad_norm": 1.0882078409194946, |
| "learning_rate": 0.001, |
| "loss": 2.9114, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.563025210084033, |
| "grad_norm": 0.8493301272392273, |
| "learning_rate": 0.001, |
| "loss": 2.9269, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.595345830639948, |
| "grad_norm": 0.7285293340682983, |
| "learning_rate": 0.001, |
| "loss": 2.9255, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.6276664511958625, |
| "grad_norm": 1.0530344247817993, |
| "learning_rate": 0.001, |
| "loss": 2.9048, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.659987071751778, |
| "grad_norm": 1.0908581018447876, |
| "learning_rate": 0.001, |
| "loss": 2.9228, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.9890776872634888, |
| "learning_rate": 0.001, |
| "loss": 2.9278, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.724628312863607, |
| "grad_norm": 0.9740605354309082, |
| "learning_rate": 0.001, |
| "loss": 2.9043, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.756948933419522, |
| "grad_norm": 1.2037266492843628, |
| "learning_rate": 0.001, |
| "loss": 2.9253, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.789269553975436, |
| "grad_norm": 1.1273502111434937, |
| "learning_rate": 0.001, |
| "loss": 2.9193, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.821590174531351, |
| "grad_norm": 1.109514594078064, |
| "learning_rate": 0.001, |
| "loss": 2.9252, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.853910795087265, |
| "grad_norm": 1.082470178604126, |
| "learning_rate": 0.001, |
| "loss": 2.9334, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.886231415643181, |
| "grad_norm": 1.14096200466156, |
| "learning_rate": 0.001, |
| "loss": 2.9195, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.918552036199095, |
| "grad_norm": 1.1023954153060913, |
| "learning_rate": 0.001, |
| "loss": 2.9416, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.95087265675501, |
| "grad_norm": 0.8876301646232605, |
| "learning_rate": 0.001, |
| "loss": 2.9058, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.983193277310924, |
| "grad_norm": 1.178880214691162, |
| "learning_rate": 0.001, |
| "loss": 2.9248, |
| "step": 24700 |
| }, |
| { |
| "epoch": 8.015513897866839, |
| "grad_norm": 1.0578535795211792, |
| "learning_rate": 0.001, |
| "loss": 2.8523, |
| "step": 24800 |
| }, |
| { |
| "epoch": 8.047834518422754, |
| "grad_norm": 1.3258286714553833, |
| "learning_rate": 0.001, |
| "loss": 2.8026, |
| "step": 24900 |
| }, |
| { |
| "epoch": 8.080155138978668, |
| "grad_norm": 1.2805758714675903, |
| "learning_rate": 0.001, |
| "loss": 2.7986, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.112475759534583, |
| "grad_norm": 0.9195302128791809, |
| "learning_rate": 0.001, |
| "loss": 2.8141, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.144796380090497, |
| "grad_norm": 1.085827112197876, |
| "learning_rate": 0.001, |
| "loss": 2.813, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.177117000646412, |
| "grad_norm": 1.5205888748168945, |
| "learning_rate": 0.001, |
| "loss": 2.8136, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.209437621202326, |
| "grad_norm": 1.387506127357483, |
| "learning_rate": 0.001, |
| "loss": 2.809, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.241758241758241, |
| "grad_norm": 0.9745718240737915, |
| "learning_rate": 0.001, |
| "loss": 2.8106, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.274078862314157, |
| "grad_norm": 1.1379154920578003, |
| "learning_rate": 0.001, |
| "loss": 2.8474, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.306399482870072, |
| "grad_norm": 1.5041371583938599, |
| "learning_rate": 0.001, |
| "loss": 2.8457, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.338720103425986, |
| "grad_norm": 1.2022348642349243, |
| "learning_rate": 0.001, |
| "loss": 2.8391, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.371040723981901, |
| "grad_norm": 1.372659683227539, |
| "learning_rate": 0.001, |
| "loss": 2.8052, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.403361344537815, |
| "grad_norm": 1.4856975078582764, |
| "learning_rate": 0.001, |
| "loss": 2.8294, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.43568196509373, |
| "grad_norm": 1.080346703529358, |
| "learning_rate": 0.001, |
| "loss": 2.8348, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.468002585649645, |
| "grad_norm": 1.1464358568191528, |
| "learning_rate": 0.001, |
| "loss": 2.8426, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.50032320620556, |
| "grad_norm": 1.155603051185608, |
| "learning_rate": 0.001, |
| "loss": 2.8648, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.532643826761474, |
| "grad_norm": 1.1736012697219849, |
| "learning_rate": 0.001, |
| "loss": 2.8652, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.564964447317388, |
| "grad_norm": 1.1918305158615112, |
| "learning_rate": 0.001, |
| "loss": 2.8356, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.597285067873303, |
| "grad_norm": 0.9844455122947693, |
| "learning_rate": 0.001, |
| "loss": 2.8573, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.629605688429217, |
| "grad_norm": 1.0055450201034546, |
| "learning_rate": 0.001, |
| "loss": 2.8432, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.661926308985132, |
| "grad_norm": 1.429309368133545, |
| "learning_rate": 0.001, |
| "loss": 2.8591, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.694246929541046, |
| "grad_norm": 1.6253108978271484, |
| "learning_rate": 0.001, |
| "loss": 2.8641, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.726567550096961, |
| "grad_norm": 1.1094082593917847, |
| "learning_rate": 0.001, |
| "loss": 2.8623, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.758888170652877, |
| "grad_norm": 1.6157804727554321, |
| "learning_rate": 0.001, |
| "loss": 2.8444, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.791208791208792, |
| "grad_norm": 1.1055054664611816, |
| "learning_rate": 0.001, |
| "loss": 2.8503, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 1.0860084295272827, |
| "learning_rate": 0.001, |
| "loss": 2.8627, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.855850032320621, |
| "grad_norm": 1.8660216331481934, |
| "learning_rate": 0.001, |
| "loss": 2.863, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.888170652876536, |
| "grad_norm": 0.9799934029579163, |
| "learning_rate": 0.001, |
| "loss": 2.8736, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.92049127343245, |
| "grad_norm": 1.2370038032531738, |
| "learning_rate": 0.001, |
| "loss": 2.8695, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.952811893988365, |
| "grad_norm": 1.547116994857788, |
| "learning_rate": 0.001, |
| "loss": 2.8767, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.98513251454428, |
| "grad_norm": 1.3884578943252563, |
| "learning_rate": 0.001, |
| "loss": 2.8653, |
| "step": 27800 |
| }, |
| { |
| "epoch": 9.017453135100194, |
| "grad_norm": 1.0243676900863647, |
| "learning_rate": 0.001, |
| "loss": 2.7814, |
| "step": 27900 |
| }, |
| { |
| "epoch": 9.049773755656108, |
| "grad_norm": 1.246034026145935, |
| "learning_rate": 0.001, |
| "loss": 2.7149, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.082094376212023, |
| "grad_norm": 1.4059250354766846, |
| "learning_rate": 0.001, |
| "loss": 2.7479, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.114414996767938, |
| "grad_norm": 1.1591936349868774, |
| "learning_rate": 0.001, |
| "loss": 2.7149, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.146735617323852, |
| "grad_norm": 1.3190242052078247, |
| "learning_rate": 0.001, |
| "loss": 2.7682, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.179056237879767, |
| "grad_norm": 1.0201836824417114, |
| "learning_rate": 0.001, |
| "loss": 2.7568, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.211376858435681, |
| "grad_norm": 1.3165394067764282, |
| "learning_rate": 0.001, |
| "loss": 2.761, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.243697478991596, |
| "grad_norm": 1.5911014080047607, |
| "learning_rate": 0.001, |
| "loss": 2.7625, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.276018099547512, |
| "grad_norm": 1.216652750968933, |
| "learning_rate": 0.001, |
| "loss": 2.7949, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.308338720103427, |
| "grad_norm": 1.302988886833191, |
| "learning_rate": 0.001, |
| "loss": 2.7728, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.340659340659341, |
| "grad_norm": 1.0725343227386475, |
| "learning_rate": 0.001, |
| "loss": 2.7737, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.372979961215256, |
| "grad_norm": 1.4870610237121582, |
| "learning_rate": 0.001, |
| "loss": 2.7814, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.40530058177117, |
| "grad_norm": 1.8429652452468872, |
| "learning_rate": 0.001, |
| "loss": 2.7755, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.437621202327085, |
| "grad_norm": 1.161829948425293, |
| "learning_rate": 0.001, |
| "loss": 2.7867, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.469941822883, |
| "grad_norm": 1.0724472999572754, |
| "learning_rate": 0.001, |
| "loss": 2.7889, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.502262443438914, |
| "grad_norm": 1.493570327758789, |
| "learning_rate": 0.001, |
| "loss": 2.7713, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.534583063994829, |
| "grad_norm": 1.497449278831482, |
| "learning_rate": 0.001, |
| "loss": 2.7983, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.566903684550743, |
| "grad_norm": 1.244484543800354, |
| "learning_rate": 0.001, |
| "loss": 2.7882, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.599224305106658, |
| "grad_norm": 1.44032621383667, |
| "learning_rate": 0.001, |
| "loss": 2.8069, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.631544925662572, |
| "grad_norm": 0.8440661430358887, |
| "learning_rate": 0.001, |
| "loss": 2.8004, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.663865546218487, |
| "grad_norm": 1.1102241277694702, |
| "learning_rate": 0.001, |
| "loss": 2.793, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.696186166774401, |
| "grad_norm": 0.9780630469322205, |
| "learning_rate": 0.001, |
| "loss": 2.791, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.728506787330316, |
| "grad_norm": 1.3218283653259277, |
| "learning_rate": 0.001, |
| "loss": 2.7984, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.760827407886232, |
| "grad_norm": 1.3901311159133911, |
| "learning_rate": 0.001, |
| "loss": 2.7985, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.793148028442147, |
| "grad_norm": 1.4005396366119385, |
| "learning_rate": 0.001, |
| "loss": 2.7985, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.825468648998061, |
| "grad_norm": 1.001150369644165, |
| "learning_rate": 0.001, |
| "loss": 2.807, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.857789269553976, |
| "grad_norm": 1.3119566440582275, |
| "learning_rate": 0.001, |
| "loss": 2.8018, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.89010989010989, |
| "grad_norm": 1.1732113361358643, |
| "learning_rate": 0.001, |
| "loss": 2.8034, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.922430510665805, |
| "grad_norm": 1.5517836809158325, |
| "learning_rate": 0.001, |
| "loss": 2.8192, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.95475113122172, |
| "grad_norm": 1.5819275379180908, |
| "learning_rate": 0.001, |
| "loss": 2.8089, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.987071751777634, |
| "grad_norm": 1.0927438735961914, |
| "learning_rate": 0.001, |
| "loss": 2.8199, |
| "step": 30900 |
| }, |
| { |
| "epoch": 10.019392372333549, |
| "grad_norm": 1.000216007232666, |
| "learning_rate": 0.001, |
| "loss": 2.7301, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.051712992889463, |
| "grad_norm": 1.1941349506378174, |
| "learning_rate": 0.001, |
| "loss": 2.6769, |
| "step": 31100 |
| }, |
| { |
| "epoch": 10.084033613445378, |
| "grad_norm": 1.313490629196167, |
| "learning_rate": 0.001, |
| "loss": 2.697, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.116354234001292, |
| "grad_norm": 0.816692590713501, |
| "learning_rate": 0.001, |
| "loss": 2.7027, |
| "step": 31300 |
| }, |
| { |
| "epoch": 10.148674854557207, |
| "grad_norm": 1.0504392385482788, |
| "learning_rate": 0.001, |
| "loss": 2.6844, |
| "step": 31400 |
| }, |
| { |
| "epoch": 10.180995475113122, |
| "grad_norm": 0.7781311869621277, |
| "learning_rate": 0.001, |
| "loss": 2.7091, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.213316095669036, |
| "grad_norm": 1.0855755805969238, |
| "learning_rate": 0.001, |
| "loss": 2.7003, |
| "step": 31600 |
| }, |
| { |
| "epoch": 10.24563671622495, |
| "grad_norm": 0.8881792426109314, |
| "learning_rate": 0.001, |
| "loss": 2.7052, |
| "step": 31700 |
| }, |
| { |
| "epoch": 10.277957336780867, |
| "grad_norm": 0.7997879385948181, |
| "learning_rate": 0.001, |
| "loss": 2.7191, |
| "step": 31800 |
| }, |
| { |
| "epoch": 10.310277957336782, |
| "grad_norm": 0.7601060271263123, |
| "learning_rate": 0.001, |
| "loss": 2.7178, |
| "step": 31900 |
| }, |
| { |
| "epoch": 10.342598577892696, |
| "grad_norm": 1.0980945825576782, |
| "learning_rate": 0.001, |
| "loss": 2.7123, |
| "step": 32000 |
| }, |
| { |
| "epoch": 10.37491919844861, |
| "grad_norm": 0.8539420366287231, |
| "learning_rate": 0.001, |
| "loss": 2.7201, |
| "step": 32100 |
| }, |
| { |
| "epoch": 10.407239819004525, |
| "grad_norm": 0.9446685314178467, |
| "learning_rate": 0.001, |
| "loss": 2.7538, |
| "step": 32200 |
| }, |
| { |
| "epoch": 10.43956043956044, |
| "grad_norm": 1.2443652153015137, |
| "learning_rate": 0.001, |
| "loss": 2.7351, |
| "step": 32300 |
| }, |
| { |
| "epoch": 10.471881060116354, |
| "grad_norm": 1.1149390935897827, |
| "learning_rate": 0.001, |
| "loss": 2.7451, |
| "step": 32400 |
| }, |
| { |
| "epoch": 10.504201680672269, |
| "grad_norm": 1.2788093090057373, |
| "learning_rate": 0.001, |
| "loss": 2.7387, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.536522301228183, |
| "grad_norm": 0.862935483455658, |
| "learning_rate": 0.001, |
| "loss": 2.7337, |
| "step": 32600 |
| }, |
| { |
| "epoch": 10.568842921784098, |
| "grad_norm": 1.0679367780685425, |
| "learning_rate": 0.001, |
| "loss": 2.7335, |
| "step": 32700 |
| }, |
| { |
| "epoch": 10.601163542340013, |
| "grad_norm": 1.15278160572052, |
| "learning_rate": 0.001, |
| "loss": 2.7268, |
| "step": 32800 |
| }, |
| { |
| "epoch": 10.633484162895927, |
| "grad_norm": 0.8131126165390015, |
| "learning_rate": 0.001, |
| "loss": 2.7518, |
| "step": 32900 |
| }, |
| { |
| "epoch": 10.665804783451842, |
| "grad_norm": 0.9842662811279297, |
| "learning_rate": 0.001, |
| "loss": 2.723, |
| "step": 33000 |
| }, |
| { |
| "epoch": 10.698125404007756, |
| "grad_norm": 1.0702661275863647, |
| "learning_rate": 0.001, |
| "loss": 2.7401, |
| "step": 33100 |
| }, |
| { |
| "epoch": 10.73044602456367, |
| "grad_norm": 1.0665313005447388, |
| "learning_rate": 0.001, |
| "loss": 2.7595, |
| "step": 33200 |
| }, |
| { |
| "epoch": 10.762766645119587, |
| "grad_norm": 0.853649914264679, |
| "learning_rate": 0.001, |
| "loss": 2.7734, |
| "step": 33300 |
| }, |
| { |
| "epoch": 10.795087265675502, |
| "grad_norm": 1.1607190370559692, |
| "learning_rate": 0.001, |
| "loss": 2.7514, |
| "step": 33400 |
| }, |
| { |
| "epoch": 10.827407886231416, |
| "grad_norm": 1.0019394159317017, |
| "learning_rate": 0.001, |
| "loss": 2.7674, |
| "step": 33500 |
| }, |
| { |
| "epoch": 10.85972850678733, |
| "grad_norm": 1.0300143957138062, |
| "learning_rate": 0.001, |
| "loss": 2.7567, |
| "step": 33600 |
| }, |
| { |
| "epoch": 10.892049127343245, |
| "grad_norm": 0.9865690469741821, |
| "learning_rate": 0.001, |
| "loss": 2.7641, |
| "step": 33700 |
| }, |
| { |
| "epoch": 10.92436974789916, |
| "grad_norm": 0.9428331255912781, |
| "learning_rate": 0.001, |
| "loss": 2.7485, |
| "step": 33800 |
| }, |
| { |
| "epoch": 10.956690368455074, |
| "grad_norm": 0.8437827229499817, |
| "learning_rate": 0.001, |
| "loss": 2.7633, |
| "step": 33900 |
| }, |
| { |
| "epoch": 10.989010989010989, |
| "grad_norm": 1.3695709705352783, |
| "learning_rate": 0.001, |
| "loss": 2.7574, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.021331609566904, |
| "grad_norm": 1.3747538328170776, |
| "learning_rate": 0.001, |
| "loss": 2.7082, |
| "step": 34100 |
| }, |
| { |
| "epoch": 11.053652230122818, |
| "grad_norm": 0.9120927453041077, |
| "learning_rate": 0.001, |
| "loss": 2.6415, |
| "step": 34200 |
| }, |
| { |
| "epoch": 11.085972850678733, |
| "grad_norm": 1.1730124950408936, |
| "learning_rate": 0.001, |
| "loss": 2.6401, |
| "step": 34300 |
| }, |
| { |
| "epoch": 11.118293471234647, |
| "grad_norm": 0.9366941452026367, |
| "learning_rate": 0.001, |
| "loss": 2.6628, |
| "step": 34400 |
| }, |
| { |
| "epoch": 11.150614091790562, |
| "grad_norm": 1.0878065824508667, |
| "learning_rate": 0.001, |
| "loss": 2.6478, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.182934712346476, |
| "grad_norm": 0.989831805229187, |
| "learning_rate": 0.001, |
| "loss": 2.6668, |
| "step": 34600 |
| }, |
| { |
| "epoch": 11.215255332902391, |
| "grad_norm": 1.0725752115249634, |
| "learning_rate": 0.001, |
| "loss": 2.6692, |
| "step": 34700 |
| }, |
| { |
| "epoch": 11.247575953458306, |
| "grad_norm": 1.2443684339523315, |
| "learning_rate": 0.001, |
| "loss": 2.6533, |
| "step": 34800 |
| }, |
| { |
| "epoch": 11.279896574014222, |
| "grad_norm": 0.7641411423683167, |
| "learning_rate": 0.001, |
| "loss": 2.67, |
| "step": 34900 |
| }, |
| { |
| "epoch": 11.312217194570136, |
| "grad_norm": 1.127281904220581, |
| "learning_rate": 0.001, |
| "loss": 2.6623, |
| "step": 35000 |
| }, |
| { |
| "epoch": 11.344537815126051, |
| "grad_norm": 1.0336980819702148, |
| "learning_rate": 0.001, |
| "loss": 2.6648, |
| "step": 35100 |
| }, |
| { |
| "epoch": 11.376858435681966, |
| "grad_norm": 0.899639368057251, |
| "learning_rate": 0.001, |
| "loss": 2.6684, |
| "step": 35200 |
| }, |
| { |
| "epoch": 11.40917905623788, |
| "grad_norm": 1.1687790155410767, |
| "learning_rate": 0.001, |
| "loss": 2.6739, |
| "step": 35300 |
| }, |
| { |
| "epoch": 11.441499676793795, |
| "grad_norm": 1.050125241279602, |
| "learning_rate": 0.001, |
| "loss": 2.6682, |
| "step": 35400 |
| }, |
| { |
| "epoch": 11.47382029734971, |
| "grad_norm": 0.8031840920448303, |
| "learning_rate": 0.001, |
| "loss": 2.6905, |
| "step": 35500 |
| }, |
| { |
| "epoch": 11.506140917905624, |
| "grad_norm": 1.2813186645507812, |
| "learning_rate": 0.001, |
| "loss": 2.7205, |
| "step": 35600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 0.9722304344177246, |
| "learning_rate": 0.001, |
| "loss": 2.6864, |
| "step": 35700 |
| }, |
| { |
| "epoch": 11.570782159017453, |
| "grad_norm": 0.9341808557510376, |
| "learning_rate": 0.001, |
| "loss": 2.6956, |
| "step": 35800 |
| }, |
| { |
| "epoch": 11.603102779573367, |
| "grad_norm": 1.0142689943313599, |
| "learning_rate": 0.001, |
| "loss": 2.6816, |
| "step": 35900 |
| }, |
| { |
| "epoch": 11.635423400129282, |
| "grad_norm": 0.813972532749176, |
| "learning_rate": 0.001, |
| "loss": 2.6859, |
| "step": 36000 |
| }, |
| { |
| "epoch": 11.667744020685197, |
| "grad_norm": 1.170541763305664, |
| "learning_rate": 0.001, |
| "loss": 2.596, |
| "step": 36100 |
| }, |
| { |
| "epoch": 11.700064641241111, |
| "grad_norm": 1.1519564390182495, |
| "learning_rate": 0.001, |
| "loss": 2.5998, |
| "step": 36200 |
| }, |
| { |
| "epoch": 11.732385261797026, |
| "grad_norm": 1.2155847549438477, |
| "learning_rate": 0.001, |
| "loss": 2.6024, |
| "step": 36300 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 1.2150511741638184, |
| "learning_rate": 0.001, |
| "loss": 2.6089, |
| "step": 36400 |
| }, |
| { |
| "epoch": 11.797026502908857, |
| "grad_norm": 0.9761043787002563, |
| "learning_rate": 0.001, |
| "loss": 2.6097, |
| "step": 36500 |
| }, |
| { |
| "epoch": 11.829347123464771, |
| "grad_norm": 0.753853440284729, |
| "learning_rate": 0.001, |
| "loss": 2.6199, |
| "step": 36600 |
| }, |
| { |
| "epoch": 11.861667744020686, |
| "grad_norm": 1.1278780698776245, |
| "learning_rate": 0.001, |
| "loss": 2.6138, |
| "step": 36700 |
| }, |
| { |
| "epoch": 11.8939883645766, |
| "grad_norm": 0.9861252307891846, |
| "learning_rate": 0.001, |
| "loss": 2.6216, |
| "step": 36800 |
| }, |
| { |
| "epoch": 11.926308985132515, |
| "grad_norm": 0.7999703884124756, |
| "learning_rate": 0.001, |
| "loss": 2.6214, |
| "step": 36900 |
| }, |
| { |
| "epoch": 11.95862960568843, |
| "grad_norm": 0.8605625629425049, |
| "learning_rate": 0.001, |
| "loss": 2.6279, |
| "step": 37000 |
| }, |
| { |
| "epoch": 11.990950226244344, |
| "grad_norm": 1.095085620880127, |
| "learning_rate": 0.001, |
| "loss": 2.6221, |
| "step": 37100 |
| }, |
| { |
| "epoch": 12.023270846800258, |
| "grad_norm": 1.4295034408569336, |
| "learning_rate": 0.001, |
| "loss": 2.6083, |
| "step": 37200 |
| }, |
| { |
| "epoch": 12.055591467356173, |
| "grad_norm": 0.8960357308387756, |
| "learning_rate": 0.001, |
| "loss": 2.5875, |
| "step": 37300 |
| }, |
| { |
| "epoch": 12.087912087912088, |
| "grad_norm": 0.999830961227417, |
| "learning_rate": 0.001, |
| "loss": 2.5932, |
| "step": 37400 |
| }, |
| { |
| "epoch": 12.120232708468002, |
| "grad_norm": 1.110213041305542, |
| "learning_rate": 0.001, |
| "loss": 2.5877, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.152553329023917, |
| "grad_norm": 1.1710408926010132, |
| "learning_rate": 0.001, |
| "loss": 2.6005, |
| "step": 37600 |
| }, |
| { |
| "epoch": 12.184873949579831, |
| "grad_norm": 1.1342028379440308, |
| "learning_rate": 0.001, |
| "loss": 2.6232, |
| "step": 37700 |
| }, |
| { |
| "epoch": 12.217194570135746, |
| "grad_norm": 1.1005823612213135, |
| "learning_rate": 0.001, |
| "loss": 2.6374, |
| "step": 37800 |
| }, |
| { |
| "epoch": 12.24951519069166, |
| "grad_norm": 0.8393723964691162, |
| "learning_rate": 0.001, |
| "loss": 2.6276, |
| "step": 37900 |
| }, |
| { |
| "epoch": 12.281835811247577, |
| "grad_norm": 0.8750357031822205, |
| "learning_rate": 0.001, |
| "loss": 2.6316, |
| "step": 38000 |
| }, |
| { |
| "epoch": 12.314156431803491, |
| "grad_norm": 0.9854604601860046, |
| "learning_rate": 0.001, |
| "loss": 2.6355, |
| "step": 38100 |
| }, |
| { |
| "epoch": 12.346477052359406, |
| "grad_norm": 0.927906334400177, |
| "learning_rate": 0.001, |
| "loss": 2.617, |
| "step": 38200 |
| }, |
| { |
| "epoch": 12.37879767291532, |
| "grad_norm": 1.4400506019592285, |
| "learning_rate": 0.001, |
| "loss": 2.6094, |
| "step": 38300 |
| }, |
| { |
| "epoch": 12.411118293471235, |
| "grad_norm": 0.9851289987564087, |
| "learning_rate": 0.001, |
| "loss": 2.6556, |
| "step": 38400 |
| }, |
| { |
| "epoch": 12.44343891402715, |
| "grad_norm": 1.1679672002792358, |
| "learning_rate": 0.001, |
| "loss": 2.6526, |
| "step": 38500 |
| }, |
| { |
| "epoch": 12.475759534583064, |
| "grad_norm": 0.9106985330581665, |
| "learning_rate": 0.001, |
| "loss": 2.6378, |
| "step": 38600 |
| }, |
| { |
| "epoch": 12.508080155138979, |
| "grad_norm": 0.8666954636573792, |
| "learning_rate": 0.001, |
| "loss": 2.6454, |
| "step": 38700 |
| }, |
| { |
| "epoch": 12.540400775694893, |
| "grad_norm": 1.5032638311386108, |
| "learning_rate": 0.001, |
| "loss": 2.6457, |
| "step": 38800 |
| }, |
| { |
| "epoch": 12.572721396250808, |
| "grad_norm": 1.29007089138031, |
| "learning_rate": 0.001, |
| "loss": 2.6457, |
| "step": 38900 |
| }, |
| { |
| "epoch": 12.605042016806722, |
| "grad_norm": 1.187584400177002, |
| "learning_rate": 0.001, |
| "loss": 2.6636, |
| "step": 39000 |
| }, |
| { |
| "epoch": 12.637362637362637, |
| "grad_norm": 1.0897037982940674, |
| "learning_rate": 0.001, |
| "loss": 2.67, |
| "step": 39100 |
| }, |
| { |
| "epoch": 12.669683257918551, |
| "grad_norm": 1.2954078912734985, |
| "learning_rate": 0.001, |
| "loss": 2.6616, |
| "step": 39200 |
| }, |
| { |
| "epoch": 12.702003878474466, |
| "grad_norm": 0.8234089016914368, |
| "learning_rate": 0.001, |
| "loss": 2.6467, |
| "step": 39300 |
| }, |
| { |
| "epoch": 12.73432449903038, |
| "grad_norm": 0.9669334292411804, |
| "learning_rate": 0.001, |
| "loss": 2.6853, |
| "step": 39400 |
| }, |
| { |
| "epoch": 12.766645119586297, |
| "grad_norm": 0.931905210018158, |
| "learning_rate": 0.001, |
| "loss": 2.6756, |
| "step": 39500 |
| }, |
| { |
| "epoch": 12.798965740142211, |
| "grad_norm": 0.8956810235977173, |
| "learning_rate": 0.001, |
| "loss": 2.6557, |
| "step": 39600 |
| }, |
| { |
| "epoch": 12.831286360698126, |
| "grad_norm": 1.1757956743240356, |
| "learning_rate": 0.001, |
| "loss": 2.6833, |
| "step": 39700 |
| }, |
| { |
| "epoch": 12.86360698125404, |
| "grad_norm": 0.9770579934120178, |
| "learning_rate": 0.001, |
| "loss": 2.6633, |
| "step": 39800 |
| }, |
| { |
| "epoch": 12.895927601809955, |
| "grad_norm": 1.209007740020752, |
| "learning_rate": 0.001, |
| "loss": 2.6647, |
| "step": 39900 |
| }, |
| { |
| "epoch": 12.92824822236587, |
| "grad_norm": 1.2638388872146606, |
| "learning_rate": 0.001, |
| "loss": 2.673, |
| "step": 40000 |
| }, |
| { |
| "epoch": 12.960568842921784, |
| "grad_norm": 1.2949479818344116, |
| "learning_rate": 0.001, |
| "loss": 2.679, |
| "step": 40100 |
| }, |
| { |
| "epoch": 12.992889463477699, |
| "grad_norm": 1.257927656173706, |
| "learning_rate": 0.001, |
| "loss": 2.6769, |
| "step": 40200 |
| }, |
| { |
| "epoch": 13.025210084033613, |
| "grad_norm": 1.076108694076538, |
| "learning_rate": 0.001, |
| "loss": 2.5622, |
| "step": 40300 |
| }, |
| { |
| "epoch": 13.057530704589528, |
| "grad_norm": 1.1594815254211426, |
| "learning_rate": 0.001, |
| "loss": 2.5509, |
| "step": 40400 |
| }, |
| { |
| "epoch": 13.089851325145442, |
| "grad_norm": 0.9643914103507996, |
| "learning_rate": 0.001, |
| "loss": 2.5681, |
| "step": 40500 |
| }, |
| { |
| "epoch": 13.122171945701357, |
| "grad_norm": 1.0107759237289429, |
| "learning_rate": 0.001, |
| "loss": 2.5477, |
| "step": 40600 |
| }, |
| { |
| "epoch": 13.154492566257272, |
| "grad_norm": 0.7948814630508423, |
| "learning_rate": 0.001, |
| "loss": 2.5575, |
| "step": 40700 |
| }, |
| { |
| "epoch": 13.186813186813186, |
| "grad_norm": 1.3021531105041504, |
| "learning_rate": 0.001, |
| "loss": 2.5826, |
| "step": 40800 |
| }, |
| { |
| "epoch": 13.2191338073691, |
| "grad_norm": 0.9879547357559204, |
| "learning_rate": 0.001, |
| "loss": 2.5756, |
| "step": 40900 |
| }, |
| { |
| "epoch": 13.251454427925015, |
| "grad_norm": 0.8074339032173157, |
| "learning_rate": 0.001, |
| "loss": 2.558, |
| "step": 41000 |
| }, |
| { |
| "epoch": 13.283775048480932, |
| "grad_norm": 0.9623432159423828, |
| "learning_rate": 0.001, |
| "loss": 2.56, |
| "step": 41100 |
| }, |
| { |
| "epoch": 13.316095669036846, |
| "grad_norm": 1.2546730041503906, |
| "learning_rate": 0.001, |
| "loss": 2.5823, |
| "step": 41200 |
| }, |
| { |
| "epoch": 13.34841628959276, |
| "grad_norm": 1.051803469657898, |
| "learning_rate": 0.001, |
| "loss": 2.5743, |
| "step": 41300 |
| }, |
| { |
| "epoch": 13.380736910148675, |
| "grad_norm": 1.1379426717758179, |
| "learning_rate": 0.001, |
| "loss": 2.5904, |
| "step": 41400 |
| }, |
| { |
| "epoch": 13.41305753070459, |
| "grad_norm": 0.9064072370529175, |
| "learning_rate": 0.001, |
| "loss": 2.6003, |
| "step": 41500 |
| }, |
| { |
| "epoch": 13.445378151260504, |
| "grad_norm": 1.031472086906433, |
| "learning_rate": 0.001, |
| "loss": 2.6042, |
| "step": 41600 |
| }, |
| { |
| "epoch": 13.477698771816419, |
| "grad_norm": 1.0278077125549316, |
| "learning_rate": 0.001, |
| "loss": 2.5967, |
| "step": 41700 |
| }, |
| { |
| "epoch": 13.510019392372334, |
| "grad_norm": 0.9069276452064514, |
| "learning_rate": 0.001, |
| "loss": 2.6164, |
| "step": 41800 |
| }, |
| { |
| "epoch": 13.542340012928248, |
| "grad_norm": 1.1582199335098267, |
| "learning_rate": 0.001, |
| "loss": 2.596, |
| "step": 41900 |
| }, |
| { |
| "epoch": 13.574660633484163, |
| "grad_norm": 0.9175617694854736, |
| "learning_rate": 0.001, |
| "loss": 2.5997, |
| "step": 42000 |
| }, |
| { |
| "epoch": 13.606981254040077, |
| "grad_norm": 1.11009681224823, |
| "learning_rate": 0.001, |
| "loss": 2.6208, |
| "step": 42100 |
| }, |
| { |
| "epoch": 13.639301874595992, |
| "grad_norm": 1.0659406185150146, |
| "learning_rate": 0.001, |
| "loss": 2.6103, |
| "step": 42200 |
| }, |
| { |
| "epoch": 13.671622495151906, |
| "grad_norm": 1.2223795652389526, |
| "learning_rate": 0.001, |
| "loss": 2.6163, |
| "step": 42300 |
| }, |
| { |
| "epoch": 13.70394311570782, |
| "grad_norm": 0.9805840849876404, |
| "learning_rate": 0.001, |
| "loss": 2.6003, |
| "step": 42400 |
| }, |
| { |
| "epoch": 13.736263736263737, |
| "grad_norm": 1.130028247833252, |
| "learning_rate": 0.001, |
| "loss": 2.6155, |
| "step": 42500 |
| }, |
| { |
| "epoch": 13.768584356819652, |
| "grad_norm": 1.0690525770187378, |
| "learning_rate": 0.001, |
| "loss": 2.6287, |
| "step": 42600 |
| }, |
| { |
| "epoch": 13.800904977375566, |
| "grad_norm": 1.1252151727676392, |
| "learning_rate": 0.001, |
| "loss": 2.618, |
| "step": 42700 |
| }, |
| { |
| "epoch": 13.83322559793148, |
| "grad_norm": 1.2166658639907837, |
| "learning_rate": 0.001, |
| "loss": 2.6358, |
| "step": 42800 |
| }, |
| { |
| "epoch": 13.865546218487395, |
| "grad_norm": 1.0091367959976196, |
| "learning_rate": 0.001, |
| "loss": 2.6259, |
| "step": 42900 |
| }, |
| { |
| "epoch": 13.89786683904331, |
| "grad_norm": 1.2160910367965698, |
| "learning_rate": 0.001, |
| "loss": 2.6537, |
| "step": 43000 |
| }, |
| { |
| "epoch": 13.930187459599225, |
| "grad_norm": 1.0695080757141113, |
| "learning_rate": 0.001, |
| "loss": 2.6389, |
| "step": 43100 |
| }, |
| { |
| "epoch": 13.96250808015514, |
| "grad_norm": 0.9037491083145142, |
| "learning_rate": 0.001, |
| "loss": 2.6302, |
| "step": 43200 |
| }, |
| { |
| "epoch": 13.994828700711054, |
| "grad_norm": 1.2553186416625977, |
| "learning_rate": 0.001, |
| "loss": 2.6346, |
| "step": 43300 |
| }, |
| { |
| "epoch": 14.027149321266968, |
| "grad_norm": 1.0879606008529663, |
| "learning_rate": 0.001, |
| "loss": 2.5208, |
| "step": 43400 |
| }, |
| { |
| "epoch": 14.059469941822883, |
| "grad_norm": 1.118901252746582, |
| "learning_rate": 0.001, |
| "loss": 2.5002, |
| "step": 43500 |
| }, |
| { |
| "epoch": 14.091790562378797, |
| "grad_norm": 1.2253042459487915, |
| "learning_rate": 0.001, |
| "loss": 2.5058, |
| "step": 43600 |
| }, |
| { |
| "epoch": 14.124111182934712, |
| "grad_norm": 1.366318941116333, |
| "learning_rate": 0.001, |
| "loss": 2.5225, |
| "step": 43700 |
| }, |
| { |
| "epoch": 14.156431803490626, |
| "grad_norm": 1.1060069799423218, |
| "learning_rate": 0.001, |
| "loss": 2.5134, |
| "step": 43800 |
| }, |
| { |
| "epoch": 14.188752424046541, |
| "grad_norm": 1.1791584491729736, |
| "learning_rate": 0.001, |
| "loss": 2.541, |
| "step": 43900 |
| }, |
| { |
| "epoch": 14.221073044602456, |
| "grad_norm": 1.2990303039550781, |
| "learning_rate": 0.001, |
| "loss": 2.5358, |
| "step": 44000 |
| }, |
| { |
| "epoch": 14.25339366515837, |
| "grad_norm": 0.881264328956604, |
| "learning_rate": 0.001, |
| "loss": 2.5145, |
| "step": 44100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 0.991474449634552, |
| "learning_rate": 0.001, |
| "loss": 2.5161, |
| "step": 44200 |
| }, |
| { |
| "epoch": 14.318034906270201, |
| "grad_norm": 1.303600788116455, |
| "learning_rate": 0.001, |
| "loss": 2.5507, |
| "step": 44300 |
| }, |
| { |
| "epoch": 14.350355526826116, |
| "grad_norm": 1.3152434825897217, |
| "learning_rate": 0.001, |
| "loss": 2.5458, |
| "step": 44400 |
| }, |
| { |
| "epoch": 14.38267614738203, |
| "grad_norm": 1.014987826347351, |
| "learning_rate": 0.001, |
| "loss": 2.5469, |
| "step": 44500 |
| }, |
| { |
| "epoch": 14.414996767937945, |
| "grad_norm": 0.9973893165588379, |
| "learning_rate": 0.001, |
| "loss": 2.5583, |
| "step": 44600 |
| }, |
| { |
| "epoch": 14.44731738849386, |
| "grad_norm": 1.0743112564086914, |
| "learning_rate": 0.001, |
| "loss": 2.5419, |
| "step": 44700 |
| }, |
| { |
| "epoch": 14.479638009049774, |
| "grad_norm": 1.2921549081802368, |
| "learning_rate": 0.001, |
| "loss": 2.5509, |
| "step": 44800 |
| }, |
| { |
| "epoch": 14.511958629605688, |
| "grad_norm": 0.9177213907241821, |
| "learning_rate": 0.001, |
| "loss": 2.5699, |
| "step": 44900 |
| }, |
| { |
| "epoch": 14.544279250161603, |
| "grad_norm": 1.4217877388000488, |
| "learning_rate": 0.001, |
| "loss": 2.5507, |
| "step": 45000 |
| }, |
| { |
| "epoch": 14.576599870717518, |
| "grad_norm": 1.3486171960830688, |
| "learning_rate": 0.001, |
| "loss": 2.5519, |
| "step": 45100 |
| }, |
| { |
| "epoch": 14.608920491273432, |
| "grad_norm": 1.2178871631622314, |
| "learning_rate": 0.001, |
| "loss": 2.5565, |
| "step": 45200 |
| }, |
| { |
| "epoch": 14.641241111829347, |
| "grad_norm": 1.29816734790802, |
| "learning_rate": 0.001, |
| "loss": 2.5733, |
| "step": 45300 |
| }, |
| { |
| "epoch": 14.673561732385261, |
| "grad_norm": 1.0312505960464478, |
| "learning_rate": 0.001, |
| "loss": 2.5617, |
| "step": 45400 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 1.1762622594833374, |
| "learning_rate": 0.001, |
| "loss": 2.5717, |
| "step": 45500 |
| }, |
| { |
| "epoch": 14.738202973497092, |
| "grad_norm": 1.0525074005126953, |
| "learning_rate": 0.001, |
| "loss": 2.5774, |
| "step": 45600 |
| }, |
| { |
| "epoch": 14.770523594053007, |
| "grad_norm": 1.54285728931427, |
| "learning_rate": 0.001, |
| "loss": 2.5915, |
| "step": 45700 |
| }, |
| { |
| "epoch": 14.802844214608921, |
| "grad_norm": 1.0963060855865479, |
| "learning_rate": 0.001, |
| "loss": 2.5708, |
| "step": 45800 |
| }, |
| { |
| "epoch": 14.835164835164836, |
| "grad_norm": 0.9922949075698853, |
| "learning_rate": 0.001, |
| "loss": 2.5994, |
| "step": 45900 |
| }, |
| { |
| "epoch": 14.86748545572075, |
| "grad_norm": 1.2420673370361328, |
| "learning_rate": 0.001, |
| "loss": 2.5955, |
| "step": 46000 |
| }, |
| { |
| "epoch": 14.899806076276665, |
| "grad_norm": 0.9536318778991699, |
| "learning_rate": 0.001, |
| "loss": 2.5818, |
| "step": 46100 |
| }, |
| { |
| "epoch": 14.93212669683258, |
| "grad_norm": 1.0575299263000488, |
| "learning_rate": 0.001, |
| "loss": 2.6045, |
| "step": 46200 |
| }, |
| { |
| "epoch": 14.964447317388494, |
| "grad_norm": 0.8865509629249573, |
| "learning_rate": 0.001, |
| "loss": 2.5853, |
| "step": 46300 |
| }, |
| { |
| "epoch": 14.996767937944409, |
| "grad_norm": 1.2456960678100586, |
| "learning_rate": 0.001, |
| "loss": 2.5969, |
| "step": 46400 |
| }, |
| { |
| "epoch": 15.029088558500323, |
| "grad_norm": 1.0670504570007324, |
| "learning_rate": 0.001, |
| "loss": 2.4761, |
| "step": 46500 |
| }, |
| { |
| "epoch": 15.061409179056238, |
| "grad_norm": 0.948182225227356, |
| "learning_rate": 0.001, |
| "loss": 2.4568, |
| "step": 46600 |
| }, |
| { |
| "epoch": 15.093729799612152, |
| "grad_norm": 1.298714280128479, |
| "learning_rate": 0.001, |
| "loss": 2.4873, |
| "step": 46700 |
| }, |
| { |
| "epoch": 15.126050420168067, |
| "grad_norm": 1.0651124715805054, |
| "learning_rate": 0.001, |
| "loss": 2.4539, |
| "step": 46800 |
| }, |
| { |
| "epoch": 15.158371040723981, |
| "grad_norm": 0.9363376498222351, |
| "learning_rate": 0.001, |
| "loss": 2.4645, |
| "step": 46900 |
| }, |
| { |
| "epoch": 15.190691661279896, |
| "grad_norm": 1.1798454523086548, |
| "learning_rate": 0.001, |
| "loss": 2.4686, |
| "step": 47000 |
| }, |
| { |
| "epoch": 15.22301228183581, |
| "grad_norm": 1.0877801179885864, |
| "learning_rate": 0.001, |
| "loss": 2.479, |
| "step": 47100 |
| }, |
| { |
| "epoch": 15.255332902391725, |
| "grad_norm": 1.2323899269104004, |
| "learning_rate": 0.001, |
| "loss": 2.5143, |
| "step": 47200 |
| }, |
| { |
| "epoch": 15.287653522947641, |
| "grad_norm": 1.1232088804244995, |
| "learning_rate": 0.001, |
| "loss": 2.4904, |
| "step": 47300 |
| }, |
| { |
| "epoch": 15.319974143503556, |
| "grad_norm": 1.3587777614593506, |
| "learning_rate": 0.001, |
| "loss": 2.499, |
| "step": 47400 |
| }, |
| { |
| "epoch": 15.35229476405947, |
| "grad_norm": 1.5187313556671143, |
| "learning_rate": 0.001, |
| "loss": 2.5202, |
| "step": 47500 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 1.5214544534683228, |
| "learning_rate": 0.001, |
| "loss": 2.4968, |
| "step": 47600 |
| }, |
| { |
| "epoch": 15.4169360051713, |
| "grad_norm": 1.4066399335861206, |
| "learning_rate": 0.001, |
| "loss": 2.5064, |
| "step": 47700 |
| }, |
| { |
| "epoch": 15.449256625727214, |
| "grad_norm": 1.1718268394470215, |
| "learning_rate": 0.001, |
| "loss": 2.5161, |
| "step": 47800 |
| }, |
| { |
| "epoch": 15.481577246283129, |
| "grad_norm": 1.0770251750946045, |
| "learning_rate": 0.001, |
| "loss": 2.5122, |
| "step": 47900 |
| }, |
| { |
| "epoch": 15.513897866839043, |
| "grad_norm": 1.092182993888855, |
| "learning_rate": 0.001, |
| "loss": 2.5292, |
| "step": 48000 |
| }, |
| { |
| "epoch": 15.546218487394958, |
| "grad_norm": 1.0473302602767944, |
| "learning_rate": 0.001, |
| "loss": 2.5315, |
| "step": 48100 |
| }, |
| { |
| "epoch": 15.578539107950872, |
| "grad_norm": 1.0130027532577515, |
| "learning_rate": 0.001, |
| "loss": 2.5181, |
| "step": 48200 |
| }, |
| { |
| "epoch": 15.610859728506787, |
| "grad_norm": 1.1945054531097412, |
| "learning_rate": 0.001, |
| "loss": 2.5199, |
| "step": 48300 |
| }, |
| { |
| "epoch": 15.643180349062701, |
| "grad_norm": 1.765254259109497, |
| "learning_rate": 0.001, |
| "loss": 2.5338, |
| "step": 48400 |
| }, |
| { |
| "epoch": 15.675500969618616, |
| "grad_norm": 1.1778308153152466, |
| "learning_rate": 0.001, |
| "loss": 2.5448, |
| "step": 48500 |
| }, |
| { |
| "epoch": 15.70782159017453, |
| "grad_norm": 1.2698488235473633, |
| "learning_rate": 0.001, |
| "loss": 2.5294, |
| "step": 48600 |
| }, |
| { |
| "epoch": 15.740142210730447, |
| "grad_norm": 1.0903241634368896, |
| "learning_rate": 0.001, |
| "loss": 2.5441, |
| "step": 48700 |
| }, |
| { |
| "epoch": 15.772462831286362, |
| "grad_norm": 0.9908322691917419, |
| "learning_rate": 0.001, |
| "loss": 2.5434, |
| "step": 48800 |
| }, |
| { |
| "epoch": 15.804783451842276, |
| "grad_norm": 1.0519664287567139, |
| "learning_rate": 0.001, |
| "loss": 2.5406, |
| "step": 48900 |
| }, |
| { |
| "epoch": 15.83710407239819, |
| "grad_norm": 1.250427484512329, |
| "learning_rate": 0.001, |
| "loss": 2.5543, |
| "step": 49000 |
| }, |
| { |
| "epoch": 15.869424692954105, |
| "grad_norm": 1.1209453344345093, |
| "learning_rate": 0.001, |
| "loss": 2.5426, |
| "step": 49100 |
| }, |
| { |
| "epoch": 15.90174531351002, |
| "grad_norm": 1.2598960399627686, |
| "learning_rate": 0.001, |
| "loss": 2.5517, |
| "step": 49200 |
| }, |
| { |
| "epoch": 15.934065934065934, |
| "grad_norm": 1.168419599533081, |
| "learning_rate": 0.001, |
| "loss": 2.557, |
| "step": 49300 |
| }, |
| { |
| "epoch": 15.966386554621849, |
| "grad_norm": 1.2239935398101807, |
| "learning_rate": 0.001, |
| "loss": 2.5589, |
| "step": 49400 |
| }, |
| { |
| "epoch": 15.998707175177763, |
| "grad_norm": 1.3314663171768188, |
| "learning_rate": 0.001, |
| "loss": 2.5477, |
| "step": 49500 |
| }, |
| { |
| "epoch": 16.031027795733678, |
| "grad_norm": 1.2716763019561768, |
| "learning_rate": 0.001, |
| "loss": 2.4259, |
| "step": 49600 |
| }, |
| { |
| "epoch": 16.063348416289593, |
| "grad_norm": 1.186794400215149, |
| "learning_rate": 0.001, |
| "loss": 2.4269, |
| "step": 49700 |
| }, |
| { |
| "epoch": 16.095669036845507, |
| "grad_norm": 1.5052367448806763, |
| "learning_rate": 0.001, |
| "loss": 2.4288, |
| "step": 49800 |
| }, |
| { |
| "epoch": 16.12798965740142, |
| "grad_norm": 1.3346431255340576, |
| "learning_rate": 0.001, |
| "loss": 2.4482, |
| "step": 49900 |
| }, |
| { |
| "epoch": 16.160310277957336, |
| "grad_norm": 1.0178332328796387, |
| "learning_rate": 0.001, |
| "loss": 2.4402, |
| "step": 50000 |
| }, |
| { |
| "epoch": 16.19263089851325, |
| "grad_norm": 1.1986984014511108, |
| "learning_rate": 0.001, |
| "loss": 2.4467, |
| "step": 50100 |
| }, |
| { |
| "epoch": 16.224951519069165, |
| "grad_norm": 1.4956904649734497, |
| "learning_rate": 0.001, |
| "loss": 2.4564, |
| "step": 50200 |
| }, |
| { |
| "epoch": 16.25727213962508, |
| "grad_norm": 1.2278879880905151, |
| "learning_rate": 0.001, |
| "loss": 2.4519, |
| "step": 50300 |
| }, |
| { |
| "epoch": 16.289592760180994, |
| "grad_norm": 1.304221272468567, |
| "learning_rate": 0.001, |
| "loss": 2.4559, |
| "step": 50400 |
| }, |
| { |
| "epoch": 16.32191338073691, |
| "grad_norm": 1.3971552848815918, |
| "learning_rate": 0.001, |
| "loss": 2.4522, |
| "step": 50500 |
| }, |
| { |
| "epoch": 16.354234001292824, |
| "grad_norm": 1.3419325351715088, |
| "learning_rate": 0.001, |
| "loss": 2.4699, |
| "step": 50600 |
| }, |
| { |
| "epoch": 16.386554621848738, |
| "grad_norm": 1.2903776168823242, |
| "learning_rate": 0.001, |
| "loss": 2.4723, |
| "step": 50700 |
| }, |
| { |
| "epoch": 16.418875242404653, |
| "grad_norm": 1.5094326734542847, |
| "learning_rate": 0.001, |
| "loss": 2.4804, |
| "step": 50800 |
| }, |
| { |
| "epoch": 16.451195862960567, |
| "grad_norm": 1.5980035066604614, |
| "learning_rate": 0.001, |
| "loss": 2.483, |
| "step": 50900 |
| }, |
| { |
| "epoch": 16.483516483516482, |
| "grad_norm": 1.2993049621582031, |
| "learning_rate": 0.001, |
| "loss": 2.4846, |
| "step": 51000 |
| }, |
| { |
| "epoch": 16.5158371040724, |
| "grad_norm": 1.2067556381225586, |
| "learning_rate": 0.001, |
| "loss": 2.4769, |
| "step": 51100 |
| }, |
| { |
| "epoch": 16.548157724628314, |
| "grad_norm": 1.9052283763885498, |
| "learning_rate": 0.001, |
| "loss": 2.479, |
| "step": 51200 |
| }, |
| { |
| "epoch": 16.58047834518423, |
| "grad_norm": 1.3466124534606934, |
| "learning_rate": 0.001, |
| "loss": 2.5064, |
| "step": 51300 |
| }, |
| { |
| "epoch": 16.612798965740144, |
| "grad_norm": 1.563071846961975, |
| "learning_rate": 0.001, |
| "loss": 2.4885, |
| "step": 51400 |
| }, |
| { |
| "epoch": 16.645119586296058, |
| "grad_norm": 1.5232013463974, |
| "learning_rate": 0.001, |
| "loss": 2.489, |
| "step": 51500 |
| }, |
| { |
| "epoch": 16.677440206851973, |
| "grad_norm": 1.1990022659301758, |
| "learning_rate": 0.001, |
| "loss": 2.4958, |
| "step": 51600 |
| }, |
| { |
| "epoch": 16.709760827407887, |
| "grad_norm": 1.1869248151779175, |
| "learning_rate": 0.001, |
| "loss": 2.5014, |
| "step": 51700 |
| }, |
| { |
| "epoch": 16.742081447963802, |
| "grad_norm": 1.4302977323532104, |
| "learning_rate": 0.001, |
| "loss": 2.5028, |
| "step": 51800 |
| }, |
| { |
| "epoch": 16.774402068519716, |
| "grad_norm": 1.3441663980484009, |
| "learning_rate": 0.001, |
| "loss": 2.5031, |
| "step": 51900 |
| }, |
| { |
| "epoch": 16.80672268907563, |
| "grad_norm": 1.2463428974151611, |
| "learning_rate": 0.001, |
| "loss": 2.5206, |
| "step": 52000 |
| }, |
| { |
| "epoch": 16.839043309631545, |
| "grad_norm": 1.5516313314437866, |
| "learning_rate": 0.001, |
| "loss": 2.5044, |
| "step": 52100 |
| }, |
| { |
| "epoch": 16.87136393018746, |
| "grad_norm": 1.4924278259277344, |
| "learning_rate": 0.001, |
| "loss": 2.5015, |
| "step": 52200 |
| }, |
| { |
| "epoch": 16.903684550743375, |
| "grad_norm": 1.4840755462646484, |
| "learning_rate": 0.001, |
| "loss": 2.5115, |
| "step": 52300 |
| }, |
| { |
| "epoch": 16.93600517129929, |
| "grad_norm": 1.1688814163208008, |
| "learning_rate": 0.001, |
| "loss": 2.5057, |
| "step": 52400 |
| }, |
| { |
| "epoch": 16.968325791855204, |
| "grad_norm": 1.1291621923446655, |
| "learning_rate": 0.001, |
| "loss": 2.5316, |
| "step": 52500 |
| }, |
| { |
| "epoch": 17.00064641241112, |
| "grad_norm": 1.3141711950302124, |
| "learning_rate": 0.001, |
| "loss": 2.5096, |
| "step": 52600 |
| }, |
| { |
| "epoch": 17.032967032967033, |
| "grad_norm": 1.1805791854858398, |
| "learning_rate": 0.001, |
| "loss": 2.3768, |
| "step": 52700 |
| }, |
| { |
| "epoch": 17.065287653522947, |
| "grad_norm": 1.2402406930923462, |
| "learning_rate": 0.001, |
| "loss": 2.3891, |
| "step": 52800 |
| }, |
| { |
| "epoch": 17.097608274078862, |
| "grad_norm": 1.5982283353805542, |
| "learning_rate": 0.001, |
| "loss": 2.3722, |
| "step": 52900 |
| }, |
| { |
| "epoch": 17.129928894634777, |
| "grad_norm": 1.4602590799331665, |
| "learning_rate": 0.001, |
| "loss": 2.3982, |
| "step": 53000 |
| }, |
| { |
| "epoch": 17.16224951519069, |
| "grad_norm": 2.0189146995544434, |
| "learning_rate": 0.001, |
| "loss": 2.4175, |
| "step": 53100 |
| }, |
| { |
| "epoch": 17.194570135746606, |
| "grad_norm": 2.113309621810913, |
| "learning_rate": 0.001, |
| "loss": 2.4099, |
| "step": 53200 |
| }, |
| { |
| "epoch": 17.22689075630252, |
| "grad_norm": 1.5319947004318237, |
| "learning_rate": 0.001, |
| "loss": 2.4323, |
| "step": 53300 |
| }, |
| { |
| "epoch": 17.259211376858435, |
| "grad_norm": 1.7054084539413452, |
| "learning_rate": 0.001, |
| "loss": 2.4122, |
| "step": 53400 |
| }, |
| { |
| "epoch": 17.29153199741435, |
| "grad_norm": 2.107525110244751, |
| "learning_rate": 0.001, |
| "loss": 2.4175, |
| "step": 53500 |
| }, |
| { |
| "epoch": 17.323852617970264, |
| "grad_norm": 1.9685229063034058, |
| "learning_rate": 0.001, |
| "loss": 2.42, |
| "step": 53600 |
| }, |
| { |
| "epoch": 17.35617323852618, |
| "grad_norm": 1.7401131391525269, |
| "learning_rate": 0.001, |
| "loss": 2.4364, |
| "step": 53700 |
| }, |
| { |
| "epoch": 17.388493859082093, |
| "grad_norm": 2.035468101501465, |
| "learning_rate": 0.001, |
| "loss": 2.4363, |
| "step": 53800 |
| }, |
| { |
| "epoch": 17.420814479638008, |
| "grad_norm": 1.4379023313522339, |
| "learning_rate": 0.001, |
| "loss": 2.4419, |
| "step": 53900 |
| }, |
| { |
| "epoch": 17.453135100193922, |
| "grad_norm": 1.6685974597930908, |
| "learning_rate": 0.001, |
| "loss": 2.439, |
| "step": 54000 |
| }, |
| { |
| "epoch": 17.485455720749837, |
| "grad_norm": 1.5203348398208618, |
| "learning_rate": 0.001, |
| "loss": 2.4467, |
| "step": 54100 |
| }, |
| { |
| "epoch": 17.517776341305755, |
| "grad_norm": 1.2775732278823853, |
| "learning_rate": 0.001, |
| "loss": 2.4467, |
| "step": 54200 |
| }, |
| { |
| "epoch": 17.55009696186167, |
| "grad_norm": 1.7329598665237427, |
| "learning_rate": 0.001, |
| "loss": 2.4492, |
| "step": 54300 |
| }, |
| { |
| "epoch": 17.582417582417584, |
| "grad_norm": 1.3014206886291504, |
| "learning_rate": 0.001, |
| "loss": 2.4777, |
| "step": 54400 |
| }, |
| { |
| "epoch": 17.6147382029735, |
| "grad_norm": 1.263486623764038, |
| "learning_rate": 0.001, |
| "loss": 2.4548, |
| "step": 54500 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 2.006847620010376, |
| "learning_rate": 0.001, |
| "loss": 2.4647, |
| "step": 54600 |
| }, |
| { |
| "epoch": 17.679379444085328, |
| "grad_norm": 2.0060877799987793, |
| "learning_rate": 0.001, |
| "loss": 2.4461, |
| "step": 54700 |
| }, |
| { |
| "epoch": 17.711700064641242, |
| "grad_norm": 1.688281774520874, |
| "learning_rate": 0.001, |
| "loss": 2.4669, |
| "step": 54800 |
| }, |
| { |
| "epoch": 17.744020685197157, |
| "grad_norm": 1.5485999584197998, |
| "learning_rate": 0.001, |
| "loss": 2.4972, |
| "step": 54900 |
| }, |
| { |
| "epoch": 17.77634130575307, |
| "grad_norm": 1.3471914529800415, |
| "learning_rate": 0.001, |
| "loss": 2.4706, |
| "step": 55000 |
| }, |
| { |
| "epoch": 17.808661926308986, |
| "grad_norm": 1.660112738609314, |
| "learning_rate": 0.001, |
| "loss": 2.4731, |
| "step": 55100 |
| }, |
| { |
| "epoch": 17.8409825468649, |
| "grad_norm": 1.4767402410507202, |
| "learning_rate": 0.001, |
| "loss": 2.468, |
| "step": 55200 |
| }, |
| { |
| "epoch": 17.873303167420815, |
| "grad_norm": 1.243491530418396, |
| "learning_rate": 0.001, |
| "loss": 2.4797, |
| "step": 55300 |
| }, |
| { |
| "epoch": 17.90562378797673, |
| "grad_norm": 1.5120997428894043, |
| "learning_rate": 0.001, |
| "loss": 2.4638, |
| "step": 55400 |
| }, |
| { |
| "epoch": 17.937944408532644, |
| "grad_norm": 1.463824987411499, |
| "learning_rate": 0.001, |
| "loss": 2.4688, |
| "step": 55500 |
| }, |
| { |
| "epoch": 17.97026502908856, |
| "grad_norm": 1.5150164365768433, |
| "learning_rate": 0.001, |
| "loss": 2.4871, |
| "step": 55600 |
| }, |
| { |
| "epoch": 18.002585649644473, |
| "grad_norm": 1.273226261138916, |
| "learning_rate": 0.001, |
| "loss": 2.5058, |
| "step": 55700 |
| }, |
| { |
| "epoch": 18.034906270200388, |
| "grad_norm": 1.1136760711669922, |
| "learning_rate": 0.001, |
| "loss": 2.3403, |
| "step": 55800 |
| }, |
| { |
| "epoch": 18.067226890756302, |
| "grad_norm": 1.2880661487579346, |
| "learning_rate": 0.001, |
| "loss": 2.3427, |
| "step": 55900 |
| }, |
| { |
| "epoch": 18.099547511312217, |
| "grad_norm": 1.3773696422576904, |
| "learning_rate": 0.001, |
| "loss": 2.3565, |
| "step": 56000 |
| }, |
| { |
| "epoch": 18.13186813186813, |
| "grad_norm": 0.9006345868110657, |
| "learning_rate": 0.001, |
| "loss": 2.3755, |
| "step": 56100 |
| }, |
| { |
| "epoch": 18.164188752424046, |
| "grad_norm": 1.2340940237045288, |
| "learning_rate": 0.001, |
| "loss": 2.3949, |
| "step": 56200 |
| }, |
| { |
| "epoch": 18.19650937297996, |
| "grad_norm": 1.297279953956604, |
| "learning_rate": 0.001, |
| "loss": 2.3835, |
| "step": 56300 |
| }, |
| { |
| "epoch": 18.228829993535875, |
| "grad_norm": 1.0448439121246338, |
| "learning_rate": 0.001, |
| "loss": 2.3986, |
| "step": 56400 |
| }, |
| { |
| "epoch": 18.26115061409179, |
| "grad_norm": 1.0477882623672485, |
| "learning_rate": 0.001, |
| "loss": 2.3906, |
| "step": 56500 |
| }, |
| { |
| "epoch": 18.293471234647704, |
| "grad_norm": 1.030461072921753, |
| "learning_rate": 0.001, |
| "loss": 2.3882, |
| "step": 56600 |
| }, |
| { |
| "epoch": 18.32579185520362, |
| "grad_norm": 0.8635056614875793, |
| "learning_rate": 0.001, |
| "loss": 2.4023, |
| "step": 56700 |
| }, |
| { |
| "epoch": 18.358112475759533, |
| "grad_norm": 1.3237051963806152, |
| "learning_rate": 0.001, |
| "loss": 2.369, |
| "step": 56800 |
| }, |
| { |
| "epoch": 18.390433096315448, |
| "grad_norm": 1.3418434858322144, |
| "learning_rate": 0.001, |
| "loss": 2.4058, |
| "step": 56900 |
| }, |
| { |
| "epoch": 18.422753716871362, |
| "grad_norm": 0.8876633644104004, |
| "learning_rate": 0.001, |
| "loss": 2.4028, |
| "step": 57000 |
| }, |
| { |
| "epoch": 18.455074337427277, |
| "grad_norm": 1.2339004278182983, |
| "learning_rate": 0.001, |
| "loss": 2.4131, |
| "step": 57100 |
| }, |
| { |
| "epoch": 18.48739495798319, |
| "grad_norm": 1.1238473653793335, |
| "learning_rate": 0.001, |
| "loss": 2.3856, |
| "step": 57200 |
| }, |
| { |
| "epoch": 18.51971557853911, |
| "grad_norm": 1.535725474357605, |
| "learning_rate": 0.001, |
| "loss": 2.4254, |
| "step": 57300 |
| }, |
| { |
| "epoch": 18.552036199095024, |
| "grad_norm": 0.8891725540161133, |
| "learning_rate": 0.001, |
| "loss": 2.4406, |
| "step": 57400 |
| }, |
| { |
| "epoch": 18.58435681965094, |
| "grad_norm": 1.3219631910324097, |
| "learning_rate": 0.001, |
| "loss": 2.4282, |
| "step": 57500 |
| }, |
| { |
| "epoch": 18.616677440206853, |
| "grad_norm": 1.4664770364761353, |
| "learning_rate": 0.001, |
| "loss": 2.4167, |
| "step": 57600 |
| }, |
| { |
| "epoch": 18.648998060762768, |
| "grad_norm": 1.4198930263519287, |
| "learning_rate": 0.001, |
| "loss": 2.4329, |
| "step": 57700 |
| }, |
| { |
| "epoch": 18.681318681318682, |
| "grad_norm": 1.31005859375, |
| "learning_rate": 0.001, |
| "loss": 2.4255, |
| "step": 57800 |
| }, |
| { |
| "epoch": 18.713639301874597, |
| "grad_norm": 1.3033839464187622, |
| "learning_rate": 0.001, |
| "loss": 2.4145, |
| "step": 57900 |
| }, |
| { |
| "epoch": 18.74595992243051, |
| "grad_norm": 1.0991932153701782, |
| "learning_rate": 0.001, |
| "loss": 2.4405, |
| "step": 58000 |
| }, |
| { |
| "epoch": 18.778280542986426, |
| "grad_norm": 1.0855860710144043, |
| "learning_rate": 0.001, |
| "loss": 2.4528, |
| "step": 58100 |
| }, |
| { |
| "epoch": 18.81060116354234, |
| "grad_norm": 1.543005347251892, |
| "learning_rate": 0.001, |
| "loss": 2.4497, |
| "step": 58200 |
| }, |
| { |
| "epoch": 18.842921784098255, |
| "grad_norm": 1.1537874937057495, |
| "learning_rate": 0.001, |
| "loss": 2.4345, |
| "step": 58300 |
| }, |
| { |
| "epoch": 18.87524240465417, |
| "grad_norm": 1.6207787990570068, |
| "learning_rate": 0.001, |
| "loss": 2.4552, |
| "step": 58400 |
| }, |
| { |
| "epoch": 18.907563025210084, |
| "grad_norm": 0.987137496471405, |
| "learning_rate": 0.001, |
| "loss": 2.4263, |
| "step": 58500 |
| }, |
| { |
| "epoch": 18.939883645766, |
| "grad_norm": 1.1179084777832031, |
| "learning_rate": 0.001, |
| "loss": 2.4583, |
| "step": 58600 |
| }, |
| { |
| "epoch": 18.972204266321913, |
| "grad_norm": 0.999499499797821, |
| "learning_rate": 0.001, |
| "loss": 2.4537, |
| "step": 58700 |
| }, |
| { |
| "epoch": 19.004524886877828, |
| "grad_norm": 1.0300583839416504, |
| "learning_rate": 0.001, |
| "loss": 2.4514, |
| "step": 58800 |
| }, |
| { |
| "epoch": 19.036845507433743, |
| "grad_norm": 0.9608945846557617, |
| "learning_rate": 0.001, |
| "loss": 2.326, |
| "step": 58900 |
| }, |
| { |
| "epoch": 19.069166127989657, |
| "grad_norm": 0.9459260702133179, |
| "learning_rate": 0.001, |
| "loss": 2.3132, |
| "step": 59000 |
| }, |
| { |
| "epoch": 19.10148674854557, |
| "grad_norm": 1.2012510299682617, |
| "learning_rate": 0.001, |
| "loss": 2.332, |
| "step": 59100 |
| }, |
| { |
| "epoch": 19.133807369101486, |
| "grad_norm": 1.1166940927505493, |
| "learning_rate": 0.001, |
| "loss": 2.3535, |
| "step": 59200 |
| }, |
| { |
| "epoch": 19.1661279896574, |
| "grad_norm": 1.3316816091537476, |
| "learning_rate": 0.001, |
| "loss": 2.3331, |
| "step": 59300 |
| }, |
| { |
| "epoch": 19.198448610213315, |
| "grad_norm": 1.1380234956741333, |
| "learning_rate": 0.001, |
| "loss": 2.3438, |
| "step": 59400 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 1.0130378007888794, |
| "learning_rate": 0.001, |
| "loss": 2.3425, |
| "step": 59500 |
| }, |
| { |
| "epoch": 19.263089851325145, |
| "grad_norm": 1.0612099170684814, |
| "learning_rate": 0.001, |
| "loss": 2.3591, |
| "step": 59600 |
| }, |
| { |
| "epoch": 19.29541047188106, |
| "grad_norm": 1.0072628259658813, |
| "learning_rate": 0.001, |
| "loss": 2.3543, |
| "step": 59700 |
| }, |
| { |
| "epoch": 19.327731092436974, |
| "grad_norm": 1.1276565790176392, |
| "learning_rate": 0.001, |
| "loss": 2.3558, |
| "step": 59800 |
| }, |
| { |
| "epoch": 19.360051712992888, |
| "grad_norm": 1.0294878482818604, |
| "learning_rate": 0.001, |
| "loss": 2.3793, |
| "step": 59900 |
| }, |
| { |
| "epoch": 19.392372333548803, |
| "grad_norm": 1.3939874172210693, |
| "learning_rate": 0.001, |
| "loss": 2.3649, |
| "step": 60000 |
| }, |
| { |
| "epoch": 19.424692954104717, |
| "grad_norm": 1.295091986656189, |
| "learning_rate": 0.001, |
| "loss": 2.3757, |
| "step": 60100 |
| }, |
| { |
| "epoch": 19.457013574660632, |
| "grad_norm": 1.3220593929290771, |
| "learning_rate": 0.001, |
| "loss": 2.3635, |
| "step": 60200 |
| }, |
| { |
| "epoch": 19.489334195216546, |
| "grad_norm": 1.4696449041366577, |
| "learning_rate": 0.001, |
| "loss": 2.3927, |
| "step": 60300 |
| }, |
| { |
| "epoch": 19.521654815772465, |
| "grad_norm": 0.9687468409538269, |
| "learning_rate": 0.001, |
| "loss": 2.3806, |
| "step": 60400 |
| }, |
| { |
| "epoch": 19.55397543632838, |
| "grad_norm": 1.0483554601669312, |
| "learning_rate": 0.001, |
| "loss": 2.3805, |
| "step": 60500 |
| }, |
| { |
| "epoch": 19.586296056884294, |
| "grad_norm": 0.9296285510063171, |
| "learning_rate": 0.001, |
| "loss": 2.3839, |
| "step": 60600 |
| }, |
| { |
| "epoch": 19.618616677440208, |
| "grad_norm": 1.5007354021072388, |
| "learning_rate": 0.001, |
| "loss": 2.3968, |
| "step": 60700 |
| }, |
| { |
| "epoch": 19.650937297996123, |
| "grad_norm": 1.5555963516235352, |
| "learning_rate": 0.001, |
| "loss": 2.3942, |
| "step": 60800 |
| }, |
| { |
| "epoch": 19.683257918552037, |
| "grad_norm": 1.1963167190551758, |
| "learning_rate": 0.001, |
| "loss": 2.3854, |
| "step": 60900 |
| }, |
| { |
| "epoch": 19.715578539107952, |
| "grad_norm": 1.2514575719833374, |
| "learning_rate": 0.001, |
| "loss": 2.404, |
| "step": 61000 |
| }, |
| { |
| "epoch": 19.747899159663866, |
| "grad_norm": 1.4767061471939087, |
| "learning_rate": 0.001, |
| "loss": 2.3834, |
| "step": 61100 |
| }, |
| { |
| "epoch": 19.78021978021978, |
| "grad_norm": 0.9871048927307129, |
| "learning_rate": 0.001, |
| "loss": 2.3927, |
| "step": 61200 |
| }, |
| { |
| "epoch": 19.812540400775696, |
| "grad_norm": 1.4090756177902222, |
| "learning_rate": 0.001, |
| "loss": 2.4223, |
| "step": 61300 |
| }, |
| { |
| "epoch": 19.84486102133161, |
| "grad_norm": 1.3110271692276, |
| "learning_rate": 0.001, |
| "loss": 2.3889, |
| "step": 61400 |
| }, |
| { |
| "epoch": 19.877181641887525, |
| "grad_norm": 0.9297486543655396, |
| "learning_rate": 0.001, |
| "loss": 2.431, |
| "step": 61500 |
| }, |
| { |
| "epoch": 19.90950226244344, |
| "grad_norm": 1.3082588911056519, |
| "learning_rate": 0.001, |
| "loss": 2.4306, |
| "step": 61600 |
| }, |
| { |
| "epoch": 19.941822882999354, |
| "grad_norm": 0.9515864849090576, |
| "learning_rate": 0.001, |
| "loss": 2.4443, |
| "step": 61700 |
| }, |
| { |
| "epoch": 19.97414350355527, |
| "grad_norm": 1.187511920928955, |
| "learning_rate": 0.001, |
| "loss": 2.4338, |
| "step": 61800 |
| }, |
| { |
| "epoch": 20.006464124111183, |
| "grad_norm": 1.113945722579956, |
| "learning_rate": 0.001, |
| "loss": 2.4036, |
| "step": 61900 |
| }, |
| { |
| "epoch": 20.038784744667097, |
| "grad_norm": 0.9578619599342346, |
| "learning_rate": 0.001, |
| "loss": 2.3059, |
| "step": 62000 |
| }, |
| { |
| "epoch": 20.071105365223012, |
| "grad_norm": 1.0143957138061523, |
| "learning_rate": 0.001, |
| "loss": 2.2843, |
| "step": 62100 |
| }, |
| { |
| "epoch": 20.103425985778927, |
| "grad_norm": 1.097559928894043, |
| "learning_rate": 0.001, |
| "loss": 2.3054, |
| "step": 62200 |
| }, |
| { |
| "epoch": 20.13574660633484, |
| "grad_norm": 0.9272734522819519, |
| "learning_rate": 0.001, |
| "loss": 2.292, |
| "step": 62300 |
| }, |
| { |
| "epoch": 20.168067226890756, |
| "grad_norm": 1.0882327556610107, |
| "learning_rate": 0.001, |
| "loss": 2.3145, |
| "step": 62400 |
| }, |
| { |
| "epoch": 20.20038784744667, |
| "grad_norm": 1.3917256593704224, |
| "learning_rate": 0.001, |
| "loss": 2.3216, |
| "step": 62500 |
| }, |
| { |
| "epoch": 20.232708468002585, |
| "grad_norm": 1.114766001701355, |
| "learning_rate": 0.001, |
| "loss": 2.3222, |
| "step": 62600 |
| }, |
| { |
| "epoch": 20.2650290885585, |
| "grad_norm": 1.3342783451080322, |
| "learning_rate": 0.001, |
| "loss": 2.3123, |
| "step": 62700 |
| }, |
| { |
| "epoch": 20.297349709114414, |
| "grad_norm": 1.06283438205719, |
| "learning_rate": 0.001, |
| "loss": 2.3179, |
| "step": 62800 |
| }, |
| { |
| "epoch": 20.32967032967033, |
| "grad_norm": 0.93958979845047, |
| "learning_rate": 0.001, |
| "loss": 2.3198, |
| "step": 62900 |
| }, |
| { |
| "epoch": 20.361990950226243, |
| "grad_norm": 1.4212366342544556, |
| "learning_rate": 0.001, |
| "loss": 2.3144, |
| "step": 63000 |
| }, |
| { |
| "epoch": 20.394311570782158, |
| "grad_norm": 1.3134227991104126, |
| "learning_rate": 0.001, |
| "loss": 2.3527, |
| "step": 63100 |
| }, |
| { |
| "epoch": 20.426632191338072, |
| "grad_norm": 1.1288926601409912, |
| "learning_rate": 0.001, |
| "loss": 2.3462, |
| "step": 63200 |
| }, |
| { |
| "epoch": 20.458952811893987, |
| "grad_norm": 1.0301005840301514, |
| "learning_rate": 0.001, |
| "loss": 2.3592, |
| "step": 63300 |
| }, |
| { |
| "epoch": 20.4912734324499, |
| "grad_norm": 0.9638227820396423, |
| "learning_rate": 0.001, |
| "loss": 2.3568, |
| "step": 63400 |
| }, |
| { |
| "epoch": 20.52359405300582, |
| "grad_norm": 1.5197877883911133, |
| "learning_rate": 0.001, |
| "loss": 2.3549, |
| "step": 63500 |
| }, |
| { |
| "epoch": 20.555914673561734, |
| "grad_norm": 1.1526395082473755, |
| "learning_rate": 0.001, |
| "loss": 2.3606, |
| "step": 63600 |
| }, |
| { |
| "epoch": 20.58823529411765, |
| "grad_norm": 1.2824829816818237, |
| "learning_rate": 0.001, |
| "loss": 2.3527, |
| "step": 63700 |
| }, |
| { |
| "epoch": 20.620555914673563, |
| "grad_norm": 1.1107521057128906, |
| "learning_rate": 0.001, |
| "loss": 2.3621, |
| "step": 63800 |
| }, |
| { |
| "epoch": 20.652876535229478, |
| "grad_norm": 1.2559133768081665, |
| "learning_rate": 0.001, |
| "loss": 2.3635, |
| "step": 63900 |
| }, |
| { |
| "epoch": 20.685197155785392, |
| "grad_norm": 1.1576194763183594, |
| "learning_rate": 0.001, |
| "loss": 2.3666, |
| "step": 64000 |
| }, |
| { |
| "epoch": 20.717517776341307, |
| "grad_norm": 1.067619800567627, |
| "learning_rate": 0.001, |
| "loss": 2.3763, |
| "step": 64100 |
| }, |
| { |
| "epoch": 20.74983839689722, |
| "grad_norm": 1.0354868173599243, |
| "learning_rate": 0.001, |
| "loss": 2.3594, |
| "step": 64200 |
| }, |
| { |
| "epoch": 20.782159017453136, |
| "grad_norm": 1.3898648023605347, |
| "learning_rate": 0.001, |
| "loss": 2.3996, |
| "step": 64300 |
| }, |
| { |
| "epoch": 20.81447963800905, |
| "grad_norm": 1.0457971096038818, |
| "learning_rate": 0.001, |
| "loss": 2.3974, |
| "step": 64400 |
| }, |
| { |
| "epoch": 20.846800258564965, |
| "grad_norm": 1.394001841545105, |
| "learning_rate": 0.001, |
| "loss": 2.376, |
| "step": 64500 |
| }, |
| { |
| "epoch": 20.87912087912088, |
| "grad_norm": 1.2913107872009277, |
| "learning_rate": 0.001, |
| "loss": 2.3719, |
| "step": 64600 |
| }, |
| { |
| "epoch": 20.911441499676794, |
| "grad_norm": 0.8956575393676758, |
| "learning_rate": 0.001, |
| "loss": 2.3725, |
| "step": 64700 |
| }, |
| { |
| "epoch": 20.94376212023271, |
| "grad_norm": 0.9267522692680359, |
| "learning_rate": 0.001, |
| "loss": 2.3908, |
| "step": 64800 |
| }, |
| { |
| "epoch": 20.976082740788623, |
| "grad_norm": 1.1674199104309082, |
| "learning_rate": 0.001, |
| "loss": 2.3816, |
| "step": 64900 |
| }, |
| { |
| "epoch": 21.008403361344538, |
| "grad_norm": 1.149228811264038, |
| "learning_rate": 0.001, |
| "loss": 2.3687, |
| "step": 65000 |
| }, |
| { |
| "epoch": 21.040723981900452, |
| "grad_norm": 1.1210534572601318, |
| "learning_rate": 0.001, |
| "loss": 2.2492, |
| "step": 65100 |
| }, |
| { |
| "epoch": 21.073044602456367, |
| "grad_norm": 1.153964638710022, |
| "learning_rate": 0.001, |
| "loss": 2.2474, |
| "step": 65200 |
| }, |
| { |
| "epoch": 21.10536522301228, |
| "grad_norm": 1.2374367713928223, |
| "learning_rate": 0.001, |
| "loss": 2.2591, |
| "step": 65300 |
| }, |
| { |
| "epoch": 21.137685843568196, |
| "grad_norm": 1.231076955795288, |
| "learning_rate": 0.001, |
| "loss": 2.2824, |
| "step": 65400 |
| }, |
| { |
| "epoch": 21.17000646412411, |
| "grad_norm": 1.1518474817276, |
| "learning_rate": 0.001, |
| "loss": 2.2904, |
| "step": 65500 |
| }, |
| { |
| "epoch": 21.202327084680025, |
| "grad_norm": 1.1400446891784668, |
| "learning_rate": 0.001, |
| "loss": 2.2937, |
| "step": 65600 |
| }, |
| { |
| "epoch": 21.23464770523594, |
| "grad_norm": 1.0334277153015137, |
| "learning_rate": 0.001, |
| "loss": 2.291, |
| "step": 65700 |
| }, |
| { |
| "epoch": 21.266968325791854, |
| "grad_norm": 1.1228870153427124, |
| "learning_rate": 0.001, |
| "loss": 2.2916, |
| "step": 65800 |
| }, |
| { |
| "epoch": 21.29928894634777, |
| "grad_norm": 1.0719165802001953, |
| "learning_rate": 0.001, |
| "loss": 2.2946, |
| "step": 65900 |
| }, |
| { |
| "epoch": 21.331609566903683, |
| "grad_norm": 1.0845648050308228, |
| "learning_rate": 0.001, |
| "loss": 2.3075, |
| "step": 66000 |
| }, |
| { |
| "epoch": 21.363930187459598, |
| "grad_norm": 1.3496699333190918, |
| "learning_rate": 0.001, |
| "loss": 2.2954, |
| "step": 66100 |
| }, |
| { |
| "epoch": 21.396250808015512, |
| "grad_norm": 0.8247977495193481, |
| "learning_rate": 0.001, |
| "loss": 2.2924, |
| "step": 66200 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 0.9616690278053284, |
| "learning_rate": 0.001, |
| "loss": 2.305, |
| "step": 66300 |
| }, |
| { |
| "epoch": 21.46089204912734, |
| "grad_norm": 1.060357689857483, |
| "learning_rate": 0.001, |
| "loss": 2.3309, |
| "step": 66400 |
| }, |
| { |
| "epoch": 21.49321266968326, |
| "grad_norm": 1.2382253408432007, |
| "learning_rate": 0.001, |
| "loss": 2.3049, |
| "step": 66500 |
| }, |
| { |
| "epoch": 21.525533290239174, |
| "grad_norm": 0.8520820736885071, |
| "learning_rate": 0.001, |
| "loss": 2.3077, |
| "step": 66600 |
| }, |
| { |
| "epoch": 21.55785391079509, |
| "grad_norm": 1.147915005683899, |
| "learning_rate": 0.001, |
| "loss": 2.3323, |
| "step": 66700 |
| }, |
| { |
| "epoch": 21.590174531351003, |
| "grad_norm": 1.319478154182434, |
| "learning_rate": 0.001, |
| "loss": 2.3191, |
| "step": 66800 |
| }, |
| { |
| "epoch": 21.622495151906918, |
| "grad_norm": 1.131990671157837, |
| "learning_rate": 0.001, |
| "loss": 2.323, |
| "step": 66900 |
| }, |
| { |
| "epoch": 21.654815772462833, |
| "grad_norm": 1.0251647233963013, |
| "learning_rate": 0.001, |
| "loss": 2.3486, |
| "step": 67000 |
| }, |
| { |
| "epoch": 21.687136393018747, |
| "grad_norm": 1.1523061990737915, |
| "learning_rate": 0.001, |
| "loss": 2.341, |
| "step": 67100 |
| }, |
| { |
| "epoch": 21.71945701357466, |
| "grad_norm": 1.333418607711792, |
| "learning_rate": 0.001, |
| "loss": 2.3534, |
| "step": 67200 |
| }, |
| { |
| "epoch": 21.751777634130576, |
| "grad_norm": 0.9779078364372253, |
| "learning_rate": 0.001, |
| "loss": 2.3478, |
| "step": 67300 |
| }, |
| { |
| "epoch": 21.78409825468649, |
| "grad_norm": 1.1211507320404053, |
| "learning_rate": 0.001, |
| "loss": 2.332, |
| "step": 67400 |
| }, |
| { |
| "epoch": 21.816418875242405, |
| "grad_norm": 1.406153678894043, |
| "learning_rate": 0.001, |
| "loss": 2.3506, |
| "step": 67500 |
| }, |
| { |
| "epoch": 21.84873949579832, |
| "grad_norm": 0.9729377031326294, |
| "learning_rate": 0.001, |
| "loss": 2.3466, |
| "step": 67600 |
| }, |
| { |
| "epoch": 21.881060116354234, |
| "grad_norm": 1.1170735359191895, |
| "learning_rate": 0.001, |
| "loss": 2.3591, |
| "step": 67700 |
| }, |
| { |
| "epoch": 21.91338073691015, |
| "grad_norm": 1.214516282081604, |
| "learning_rate": 0.001, |
| "loss": 2.3676, |
| "step": 67800 |
| }, |
| { |
| "epoch": 21.945701357466064, |
| "grad_norm": 1.2979365587234497, |
| "learning_rate": 0.001, |
| "loss": 2.3666, |
| "step": 67900 |
| }, |
| { |
| "epoch": 21.978021978021978, |
| "grad_norm": 1.5110282897949219, |
| "learning_rate": 0.001, |
| "loss": 2.3846, |
| "step": 68000 |
| }, |
| { |
| "epoch": 22.010342598577893, |
| "grad_norm": 1.2588374614715576, |
| "learning_rate": 0.001, |
| "loss": 2.323, |
| "step": 68100 |
| }, |
| { |
| "epoch": 22.042663219133807, |
| "grad_norm": 1.0757921934127808, |
| "learning_rate": 0.001, |
| "loss": 2.2411, |
| "step": 68200 |
| }, |
| { |
| "epoch": 22.07498383968972, |
| "grad_norm": 1.0231963396072388, |
| "learning_rate": 0.001, |
| "loss": 2.2505, |
| "step": 68300 |
| }, |
| { |
| "epoch": 22.107304460245636, |
| "grad_norm": 1.2770941257476807, |
| "learning_rate": 0.001, |
| "loss": 2.2263, |
| "step": 68400 |
| }, |
| { |
| "epoch": 22.13962508080155, |
| "grad_norm": 1.3017656803131104, |
| "learning_rate": 0.001, |
| "loss": 2.2273, |
| "step": 68500 |
| }, |
| { |
| "epoch": 22.171945701357465, |
| "grad_norm": 1.176435947418213, |
| "learning_rate": 0.001, |
| "loss": 2.2569, |
| "step": 68600 |
| }, |
| { |
| "epoch": 22.20426632191338, |
| "grad_norm": 1.3054310083389282, |
| "learning_rate": 0.001, |
| "loss": 2.2676, |
| "step": 68700 |
| }, |
| { |
| "epoch": 22.236586942469295, |
| "grad_norm": 1.0871057510375977, |
| "learning_rate": 0.001, |
| "loss": 2.268, |
| "step": 68800 |
| }, |
| { |
| "epoch": 22.26890756302521, |
| "grad_norm": 1.239288330078125, |
| "learning_rate": 0.001, |
| "loss": 2.2647, |
| "step": 68900 |
| }, |
| { |
| "epoch": 22.301228183581124, |
| "grad_norm": 1.0190906524658203, |
| "learning_rate": 0.001, |
| "loss": 2.2857, |
| "step": 69000 |
| }, |
| { |
| "epoch": 22.33354880413704, |
| "grad_norm": 1.2209452390670776, |
| "learning_rate": 0.001, |
| "loss": 2.2612, |
| "step": 69100 |
| }, |
| { |
| "epoch": 22.365869424692953, |
| "grad_norm": 1.2406755685806274, |
| "learning_rate": 0.001, |
| "loss": 2.2864, |
| "step": 69200 |
| }, |
| { |
| "epoch": 22.398190045248867, |
| "grad_norm": 1.1042966842651367, |
| "learning_rate": 0.001, |
| "loss": 2.2763, |
| "step": 69300 |
| }, |
| { |
| "epoch": 22.430510665804782, |
| "grad_norm": 1.0039794445037842, |
| "learning_rate": 0.001, |
| "loss": 2.2695, |
| "step": 69400 |
| }, |
| { |
| "epoch": 22.462831286360696, |
| "grad_norm": 1.4037723541259766, |
| "learning_rate": 0.001, |
| "loss": 2.2778, |
| "step": 69500 |
| }, |
| { |
| "epoch": 22.49515190691661, |
| "grad_norm": 1.3101537227630615, |
| "learning_rate": 0.001, |
| "loss": 2.2784, |
| "step": 69600 |
| }, |
| { |
| "epoch": 22.52747252747253, |
| "grad_norm": 1.0466303825378418, |
| "learning_rate": 0.001, |
| "loss": 2.2998, |
| "step": 69700 |
| }, |
| { |
| "epoch": 22.559793148028444, |
| "grad_norm": 1.2835876941680908, |
| "learning_rate": 0.001, |
| "loss": 2.2933, |
| "step": 69800 |
| }, |
| { |
| "epoch": 22.59211376858436, |
| "grad_norm": 1.040469765663147, |
| "learning_rate": 0.001, |
| "loss": 2.2951, |
| "step": 69900 |
| }, |
| { |
| "epoch": 22.624434389140273, |
| "grad_norm": 0.8917735815048218, |
| "learning_rate": 0.001, |
| "loss": 2.3084, |
| "step": 70000 |
| }, |
| { |
| "epoch": 22.656755009696187, |
| "grad_norm": 1.2921438217163086, |
| "learning_rate": 0.001, |
| "loss": 2.3097, |
| "step": 70100 |
| }, |
| { |
| "epoch": 22.689075630252102, |
| "grad_norm": 1.1070172786712646, |
| "learning_rate": 0.001, |
| "loss": 2.3189, |
| "step": 70200 |
| }, |
| { |
| "epoch": 22.721396250808017, |
| "grad_norm": 1.3595770597457886, |
| "learning_rate": 0.001, |
| "loss": 2.3035, |
| "step": 70300 |
| }, |
| { |
| "epoch": 22.75371687136393, |
| "grad_norm": 1.0014970302581787, |
| "learning_rate": 0.001, |
| "loss": 2.3059, |
| "step": 70400 |
| }, |
| { |
| "epoch": 22.786037491919846, |
| "grad_norm": 1.2341176271438599, |
| "learning_rate": 0.001, |
| "loss": 2.3161, |
| "step": 70500 |
| }, |
| { |
| "epoch": 22.81835811247576, |
| "grad_norm": 0.9569864273071289, |
| "learning_rate": 0.001, |
| "loss": 2.3215, |
| "step": 70600 |
| }, |
| { |
| "epoch": 22.850678733031675, |
| "grad_norm": 1.036069631576538, |
| "learning_rate": 0.001, |
| "loss": 2.335, |
| "step": 70700 |
| }, |
| { |
| "epoch": 22.88299935358759, |
| "grad_norm": 1.5049176216125488, |
| "learning_rate": 0.001, |
| "loss": 2.3246, |
| "step": 70800 |
| }, |
| { |
| "epoch": 22.915319974143504, |
| "grad_norm": 1.2657185792922974, |
| "learning_rate": 0.001, |
| "loss": 2.3259, |
| "step": 70900 |
| }, |
| { |
| "epoch": 22.94764059469942, |
| "grad_norm": 1.2280467748641968, |
| "learning_rate": 0.001, |
| "loss": 2.3253, |
| "step": 71000 |
| }, |
| { |
| "epoch": 22.979961215255333, |
| "grad_norm": 1.2914707660675049, |
| "learning_rate": 0.001, |
| "loss": 2.3534, |
| "step": 71100 |
| }, |
| { |
| "epoch": 23.012281835811248, |
| "grad_norm": 1.0989599227905273, |
| "learning_rate": 0.001, |
| "loss": 2.2715, |
| "step": 71200 |
| }, |
| { |
| "epoch": 23.044602456367162, |
| "grad_norm": 1.1108335256576538, |
| "learning_rate": 0.001, |
| "loss": 2.1853, |
| "step": 71300 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 1.5226976871490479, |
| "learning_rate": 0.001, |
| "loss": 2.215, |
| "step": 71400 |
| }, |
| { |
| "epoch": 23.10924369747899, |
| "grad_norm": 1.087694525718689, |
| "learning_rate": 0.001, |
| "loss": 2.216, |
| "step": 71500 |
| }, |
| { |
| "epoch": 23.141564318034906, |
| "grad_norm": 1.6098570823669434, |
| "learning_rate": 0.001, |
| "loss": 2.2233, |
| "step": 71600 |
| }, |
| { |
| "epoch": 23.17388493859082, |
| "grad_norm": 1.3562836647033691, |
| "learning_rate": 0.001, |
| "loss": 2.2185, |
| "step": 71700 |
| }, |
| { |
| "epoch": 23.206205559146735, |
| "grad_norm": 1.253631353378296, |
| "learning_rate": 0.001, |
| "loss": 2.2379, |
| "step": 71800 |
| }, |
| { |
| "epoch": 23.23852617970265, |
| "grad_norm": 1.4562768936157227, |
| "learning_rate": 0.001, |
| "loss": 2.2423, |
| "step": 71900 |
| }, |
| { |
| "epoch": 23.270846800258564, |
| "grad_norm": 1.1356525421142578, |
| "learning_rate": 0.001, |
| "loss": 2.2214, |
| "step": 72000 |
| }, |
| { |
| "epoch": 23.30316742081448, |
| "grad_norm": 1.1421269178390503, |
| "learning_rate": 0.001, |
| "loss": 2.2432, |
| "step": 72100 |
| }, |
| { |
| "epoch": 23.335488041370393, |
| "grad_norm": 1.1426451206207275, |
| "learning_rate": 0.001, |
| "loss": 2.2497, |
| "step": 72200 |
| }, |
| { |
| "epoch": 23.367808661926308, |
| "grad_norm": 1.230992078781128, |
| "learning_rate": 0.001, |
| "loss": 2.249, |
| "step": 72300 |
| }, |
| { |
| "epoch": 23.400129282482222, |
| "grad_norm": 1.4944993257522583, |
| "learning_rate": 0.001, |
| "loss": 2.2725, |
| "step": 72400 |
| }, |
| { |
| "epoch": 23.432449903038137, |
| "grad_norm": 1.104407787322998, |
| "learning_rate": 0.001, |
| "loss": 2.24, |
| "step": 72500 |
| }, |
| { |
| "epoch": 23.46477052359405, |
| "grad_norm": 1.4007248878479004, |
| "learning_rate": 0.001, |
| "loss": 2.2582, |
| "step": 72600 |
| }, |
| { |
| "epoch": 23.49709114414997, |
| "grad_norm": 1.5281628370285034, |
| "learning_rate": 0.001, |
| "loss": 2.2614, |
| "step": 72700 |
| }, |
| { |
| "epoch": 23.529411764705884, |
| "grad_norm": 1.3542243242263794, |
| "learning_rate": 0.001, |
| "loss": 2.2539, |
| "step": 72800 |
| }, |
| { |
| "epoch": 23.5617323852618, |
| "grad_norm": 1.7197226285934448, |
| "learning_rate": 0.001, |
| "loss": 2.271, |
| "step": 72900 |
| }, |
| { |
| "epoch": 23.594053005817713, |
| "grad_norm": 1.0739434957504272, |
| "learning_rate": 0.001, |
| "loss": 2.2626, |
| "step": 73000 |
| }, |
| { |
| "epoch": 23.626373626373628, |
| "grad_norm": 1.0851984024047852, |
| "learning_rate": 0.001, |
| "loss": 2.2636, |
| "step": 73100 |
| }, |
| { |
| "epoch": 23.658694246929542, |
| "grad_norm": 1.173173189163208, |
| "learning_rate": 0.001, |
| "loss": 2.2793, |
| "step": 73200 |
| }, |
| { |
| "epoch": 23.691014867485457, |
| "grad_norm": 1.310870885848999, |
| "learning_rate": 0.001, |
| "loss": 2.2891, |
| "step": 73300 |
| }, |
| { |
| "epoch": 23.72333548804137, |
| "grad_norm": 1.3357535600662231, |
| "learning_rate": 0.001, |
| "loss": 2.289, |
| "step": 73400 |
| }, |
| { |
| "epoch": 23.755656108597286, |
| "grad_norm": 1.8570516109466553, |
| "learning_rate": 0.001, |
| "loss": 2.2864, |
| "step": 73500 |
| }, |
| { |
| "epoch": 23.7879767291532, |
| "grad_norm": 1.019691824913025, |
| "learning_rate": 0.001, |
| "loss": 2.3127, |
| "step": 73600 |
| }, |
| { |
| "epoch": 23.820297349709115, |
| "grad_norm": 1.171330213546753, |
| "learning_rate": 0.001, |
| "loss": 2.302, |
| "step": 73700 |
| }, |
| { |
| "epoch": 23.85261797026503, |
| "grad_norm": 1.340964913368225, |
| "learning_rate": 0.001, |
| "loss": 2.2851, |
| "step": 73800 |
| }, |
| { |
| "epoch": 23.884938590820944, |
| "grad_norm": 1.0584162473678589, |
| "learning_rate": 0.001, |
| "loss": 2.2952, |
| "step": 73900 |
| }, |
| { |
| "epoch": 23.91725921137686, |
| "grad_norm": 1.089120864868164, |
| "learning_rate": 0.001, |
| "loss": 2.2938, |
| "step": 74000 |
| }, |
| { |
| "epoch": 23.949579831932773, |
| "grad_norm": 0.9918783903121948, |
| "learning_rate": 0.001, |
| "loss": 2.3159, |
| "step": 74100 |
| }, |
| { |
| "epoch": 23.981900452488688, |
| "grad_norm": 1.2158150672912598, |
| "learning_rate": 0.001, |
| "loss": 2.3232, |
| "step": 74200 |
| }, |
| { |
| "epoch": 24.014221073044602, |
| "grad_norm": 1.7537885904312134, |
| "learning_rate": 0.001, |
| "loss": 2.2393, |
| "step": 74300 |
| }, |
| { |
| "epoch": 24.046541693600517, |
| "grad_norm": 1.2171244621276855, |
| "learning_rate": 0.001, |
| "loss": 2.1603, |
| "step": 74400 |
| }, |
| { |
| "epoch": 24.07886231415643, |
| "grad_norm": 1.923531413078308, |
| "learning_rate": 0.001, |
| "loss": 2.1792, |
| "step": 74500 |
| }, |
| { |
| "epoch": 24.111182934712346, |
| "grad_norm": 1.4310954809188843, |
| "learning_rate": 0.001, |
| "loss": 2.1884, |
| "step": 74600 |
| }, |
| { |
| "epoch": 24.14350355526826, |
| "grad_norm": 1.3417888879776, |
| "learning_rate": 0.001, |
| "loss": 2.2052, |
| "step": 74700 |
| }, |
| { |
| "epoch": 24.175824175824175, |
| "grad_norm": 1.4067453145980835, |
| "learning_rate": 0.001, |
| "loss": 2.1844, |
| "step": 74800 |
| }, |
| { |
| "epoch": 24.20814479638009, |
| "grad_norm": 1.3039509057998657, |
| "learning_rate": 0.001, |
| "loss": 2.1862, |
| "step": 74900 |
| }, |
| { |
| "epoch": 24.240465416936004, |
| "grad_norm": 1.3245545625686646, |
| "learning_rate": 0.001, |
| "loss": 2.2082, |
| "step": 75000 |
| }, |
| { |
| "epoch": 24.27278603749192, |
| "grad_norm": 1.1734683513641357, |
| "learning_rate": 0.001, |
| "loss": 2.1925, |
| "step": 75100 |
| }, |
| { |
| "epoch": 24.305106658047833, |
| "grad_norm": 1.2977125644683838, |
| "learning_rate": 0.001, |
| "loss": 2.2278, |
| "step": 75200 |
| }, |
| { |
| "epoch": 24.337427278603748, |
| "grad_norm": 1.3284844160079956, |
| "learning_rate": 0.001, |
| "loss": 2.2107, |
| "step": 75300 |
| }, |
| { |
| "epoch": 24.369747899159663, |
| "grad_norm": 1.7949256896972656, |
| "learning_rate": 0.001, |
| "loss": 2.2315, |
| "step": 75400 |
| }, |
| { |
| "epoch": 24.402068519715577, |
| "grad_norm": 1.2842888832092285, |
| "learning_rate": 0.001, |
| "loss": 2.2449, |
| "step": 75500 |
| }, |
| { |
| "epoch": 24.43438914027149, |
| "grad_norm": 1.664375901222229, |
| "learning_rate": 0.001, |
| "loss": 2.2103, |
| "step": 75600 |
| }, |
| { |
| "epoch": 24.466709760827406, |
| "grad_norm": 1.819553017616272, |
| "learning_rate": 0.001, |
| "loss": 2.2411, |
| "step": 75700 |
| }, |
| { |
| "epoch": 24.49903038138332, |
| "grad_norm": 1.7226625680923462, |
| "learning_rate": 0.001, |
| "loss": 2.2335, |
| "step": 75800 |
| }, |
| { |
| "epoch": 24.53135100193924, |
| "grad_norm": 1.389763355255127, |
| "learning_rate": 0.001, |
| "loss": 2.2298, |
| "step": 75900 |
| }, |
| { |
| "epoch": 24.563671622495153, |
| "grad_norm": 1.4180145263671875, |
| "learning_rate": 0.001, |
| "loss": 2.2553, |
| "step": 76000 |
| }, |
| { |
| "epoch": 24.595992243051068, |
| "grad_norm": 1.7032394409179688, |
| "learning_rate": 0.001, |
| "loss": 2.253, |
| "step": 76100 |
| }, |
| { |
| "epoch": 24.628312863606983, |
| "grad_norm": 1.2484254837036133, |
| "learning_rate": 0.001, |
| "loss": 2.2626, |
| "step": 76200 |
| }, |
| { |
| "epoch": 24.660633484162897, |
| "grad_norm": 1.4861352443695068, |
| "learning_rate": 0.001, |
| "loss": 2.2621, |
| "step": 76300 |
| }, |
| { |
| "epoch": 24.69295410471881, |
| "grad_norm": 1.2266833782196045, |
| "learning_rate": 0.001, |
| "loss": 2.2577, |
| "step": 76400 |
| }, |
| { |
| "epoch": 24.725274725274726, |
| "grad_norm": 1.5920053720474243, |
| "learning_rate": 0.001, |
| "loss": 2.2622, |
| "step": 76500 |
| }, |
| { |
| "epoch": 24.75759534583064, |
| "grad_norm": 1.5401145219802856, |
| "learning_rate": 0.001, |
| "loss": 2.2672, |
| "step": 76600 |
| }, |
| { |
| "epoch": 24.789915966386555, |
| "grad_norm": 1.6406484842300415, |
| "learning_rate": 0.001, |
| "loss": 2.2677, |
| "step": 76700 |
| }, |
| { |
| "epoch": 24.82223658694247, |
| "grad_norm": 1.2690526247024536, |
| "learning_rate": 0.001, |
| "loss": 2.2716, |
| "step": 76800 |
| }, |
| { |
| "epoch": 24.854557207498384, |
| "grad_norm": 1.3956623077392578, |
| "learning_rate": 0.001, |
| "loss": 2.2565, |
| "step": 76900 |
| }, |
| { |
| "epoch": 24.8868778280543, |
| "grad_norm": 1.2422696352005005, |
| "learning_rate": 0.001, |
| "loss": 2.2735, |
| "step": 77000 |
| }, |
| { |
| "epoch": 24.919198448610214, |
| "grad_norm": 1.3382925987243652, |
| "learning_rate": 0.001, |
| "loss": 2.2897, |
| "step": 77100 |
| }, |
| { |
| "epoch": 24.951519069166128, |
| "grad_norm": 1.3177549839019775, |
| "learning_rate": 0.001, |
| "loss": 2.2879, |
| "step": 77200 |
| }, |
| { |
| "epoch": 24.983839689722043, |
| "grad_norm": 1.4060649871826172, |
| "learning_rate": 0.001, |
| "loss": 2.2852, |
| "step": 77300 |
| }, |
| { |
| "epoch": 25.016160310277957, |
| "grad_norm": 2.507507801055908, |
| "learning_rate": 0.001, |
| "loss": 2.1969, |
| "step": 77400 |
| }, |
| { |
| "epoch": 25.048480930833872, |
| "grad_norm": 1.7902899980545044, |
| "learning_rate": 0.001, |
| "loss": 2.159, |
| "step": 77500 |
| }, |
| { |
| "epoch": 25.080801551389786, |
| "grad_norm": 2.543278455734253, |
| "learning_rate": 0.001, |
| "loss": 2.1503, |
| "step": 77600 |
| }, |
| { |
| "epoch": 25.1131221719457, |
| "grad_norm": 2.190962553024292, |
| "learning_rate": 0.001, |
| "loss": 2.1718, |
| "step": 77700 |
| }, |
| { |
| "epoch": 25.145442792501616, |
| "grad_norm": 2.3507230281829834, |
| "learning_rate": 0.001, |
| "loss": 2.1762, |
| "step": 77800 |
| }, |
| { |
| "epoch": 25.17776341305753, |
| "grad_norm": 2.4458060264587402, |
| "learning_rate": 0.001, |
| "loss": 2.1659, |
| "step": 77900 |
| }, |
| { |
| "epoch": 25.210084033613445, |
| "grad_norm": 1.6360588073730469, |
| "learning_rate": 0.001, |
| "loss": 2.1637, |
| "step": 78000 |
| }, |
| { |
| "epoch": 25.24240465416936, |
| "grad_norm": 2.420311212539673, |
| "learning_rate": 0.001, |
| "loss": 2.1622, |
| "step": 78100 |
| }, |
| { |
| "epoch": 25.274725274725274, |
| "grad_norm": 1.5954487323760986, |
| "learning_rate": 0.001, |
| "loss": 2.2073, |
| "step": 78200 |
| }, |
| { |
| "epoch": 25.30704589528119, |
| "grad_norm": 2.6514792442321777, |
| "learning_rate": 0.001, |
| "loss": 2.1782, |
| "step": 78300 |
| }, |
| { |
| "epoch": 25.339366515837103, |
| "grad_norm": 2.318582057952881, |
| "learning_rate": 0.001, |
| "loss": 2.1766, |
| "step": 78400 |
| }, |
| { |
| "epoch": 25.371687136393017, |
| "grad_norm": 2.036907434463501, |
| "learning_rate": 0.001, |
| "loss": 2.1729, |
| "step": 78500 |
| }, |
| { |
| "epoch": 25.404007756948932, |
| "grad_norm": 2.1478607654571533, |
| "learning_rate": 0.001, |
| "loss": 2.1956, |
| "step": 78600 |
| }, |
| { |
| "epoch": 25.436328377504847, |
| "grad_norm": 1.987168788909912, |
| "learning_rate": 0.001, |
| "loss": 2.1986, |
| "step": 78700 |
| }, |
| { |
| "epoch": 25.46864899806076, |
| "grad_norm": 1.57612144947052, |
| "learning_rate": 0.001, |
| "loss": 2.2138, |
| "step": 78800 |
| }, |
| { |
| "epoch": 25.50096961861668, |
| "grad_norm": 1.8580467700958252, |
| "learning_rate": 0.001, |
| "loss": 2.2137, |
| "step": 78900 |
| }, |
| { |
| "epoch": 25.533290239172594, |
| "grad_norm": 2.277358293533325, |
| "learning_rate": 0.001, |
| "loss": 2.2186, |
| "step": 79000 |
| }, |
| { |
| "epoch": 25.56561085972851, |
| "grad_norm": 2.014516592025757, |
| "learning_rate": 0.001, |
| "loss": 2.2176, |
| "step": 79100 |
| }, |
| { |
| "epoch": 25.597931480284423, |
| "grad_norm": 1.9379520416259766, |
| "learning_rate": 0.001, |
| "loss": 2.2346, |
| "step": 79200 |
| }, |
| { |
| "epoch": 25.630252100840337, |
| "grad_norm": 1.767410397529602, |
| "learning_rate": 0.001, |
| "loss": 2.225, |
| "step": 79300 |
| }, |
| { |
| "epoch": 25.662572721396252, |
| "grad_norm": 1.7645798921585083, |
| "learning_rate": 0.001, |
| "loss": 2.2424, |
| "step": 79400 |
| }, |
| { |
| "epoch": 25.694893341952167, |
| "grad_norm": 1.68009614944458, |
| "learning_rate": 0.001, |
| "loss": 2.2434, |
| "step": 79500 |
| }, |
| { |
| "epoch": 25.72721396250808, |
| "grad_norm": 1.9485399723052979, |
| "learning_rate": 0.001, |
| "loss": 2.2394, |
| "step": 79600 |
| }, |
| { |
| "epoch": 25.759534583063996, |
| "grad_norm": 2.297874927520752, |
| "learning_rate": 0.001, |
| "loss": 2.2385, |
| "step": 79700 |
| }, |
| { |
| "epoch": 25.79185520361991, |
| "grad_norm": 1.9281902313232422, |
| "learning_rate": 0.001, |
| "loss": 2.2504, |
| "step": 79800 |
| }, |
| { |
| "epoch": 25.824175824175825, |
| "grad_norm": 1.8228880167007446, |
| "learning_rate": 0.001, |
| "loss": 2.2296, |
| "step": 79900 |
| }, |
| { |
| "epoch": 25.85649644473174, |
| "grad_norm": 2.050082206726074, |
| "learning_rate": 0.001, |
| "loss": 2.2528, |
| "step": 80000 |
| }, |
| { |
| "epoch": 25.888817065287654, |
| "grad_norm": 2.1452155113220215, |
| "learning_rate": 0.001, |
| "loss": 2.2329, |
| "step": 80100 |
| }, |
| { |
| "epoch": 25.92113768584357, |
| "grad_norm": 1.783553957939148, |
| "learning_rate": 0.001, |
| "loss": 2.2716, |
| "step": 80200 |
| }, |
| { |
| "epoch": 25.953458306399483, |
| "grad_norm": 2.0124473571777344, |
| "learning_rate": 0.001, |
| "loss": 2.2467, |
| "step": 80300 |
| }, |
| { |
| "epoch": 25.985778926955398, |
| "grad_norm": 1.8177739381790161, |
| "learning_rate": 0.001, |
| "loss": 2.2752, |
| "step": 80400 |
| }, |
| { |
| "epoch": 26.018099547511312, |
| "grad_norm": 1.366523265838623, |
| "learning_rate": 0.001, |
| "loss": 2.1745, |
| "step": 80500 |
| }, |
| { |
| "epoch": 26.050420168067227, |
| "grad_norm": 1.7601717710494995, |
| "learning_rate": 0.001, |
| "loss": 2.1084, |
| "step": 80600 |
| }, |
| { |
| "epoch": 26.08274078862314, |
| "grad_norm": 1.2369937896728516, |
| "learning_rate": 0.001, |
| "loss": 2.1185, |
| "step": 80700 |
| }, |
| { |
| "epoch": 26.115061409179056, |
| "grad_norm": 1.2424700260162354, |
| "learning_rate": 0.001, |
| "loss": 2.1177, |
| "step": 80800 |
| }, |
| { |
| "epoch": 26.14738202973497, |
| "grad_norm": 1.8751611709594727, |
| "learning_rate": 0.001, |
| "loss": 2.152, |
| "step": 80900 |
| }, |
| { |
| "epoch": 26.179702650290885, |
| "grad_norm": 1.1152818202972412, |
| "learning_rate": 0.001, |
| "loss": 2.1489, |
| "step": 81000 |
| }, |
| { |
| "epoch": 26.2120232708468, |
| "grad_norm": 1.1283564567565918, |
| "learning_rate": 0.001, |
| "loss": 2.1572, |
| "step": 81100 |
| }, |
| { |
| "epoch": 26.244343891402714, |
| "grad_norm": 1.639378309249878, |
| "learning_rate": 0.001, |
| "loss": 2.1561, |
| "step": 81200 |
| }, |
| { |
| "epoch": 26.27666451195863, |
| "grad_norm": 1.1049447059631348, |
| "learning_rate": 0.001, |
| "loss": 2.1588, |
| "step": 81300 |
| }, |
| { |
| "epoch": 26.308985132514543, |
| "grad_norm": 1.0583877563476562, |
| "learning_rate": 0.001, |
| "loss": 2.1731, |
| "step": 81400 |
| }, |
| { |
| "epoch": 26.341305753070458, |
| "grad_norm": 1.3335222005844116, |
| "learning_rate": 0.001, |
| "loss": 2.1745, |
| "step": 81500 |
| }, |
| { |
| "epoch": 26.373626373626372, |
| "grad_norm": 1.6638619899749756, |
| "learning_rate": 0.001, |
| "loss": 2.1599, |
| "step": 81600 |
| }, |
| { |
| "epoch": 26.405946994182287, |
| "grad_norm": 1.8034693002700806, |
| "learning_rate": 0.001, |
| "loss": 2.1863, |
| "step": 81700 |
| }, |
| { |
| "epoch": 26.4382676147382, |
| "grad_norm": 1.0890341997146606, |
| "learning_rate": 0.001, |
| "loss": 2.1944, |
| "step": 81800 |
| }, |
| { |
| "epoch": 26.470588235294116, |
| "grad_norm": 1.7226943969726562, |
| "learning_rate": 0.001, |
| "loss": 2.1819, |
| "step": 81900 |
| }, |
| { |
| "epoch": 26.50290885585003, |
| "grad_norm": 1.294287085533142, |
| "learning_rate": 0.001, |
| "loss": 2.1832, |
| "step": 82000 |
| }, |
| { |
| "epoch": 26.53522947640595, |
| "grad_norm": 1.3550304174423218, |
| "learning_rate": 0.001, |
| "loss": 2.2035, |
| "step": 82100 |
| }, |
| { |
| "epoch": 26.567550096961863, |
| "grad_norm": 1.1257293224334717, |
| "learning_rate": 0.001, |
| "loss": 2.1898, |
| "step": 82200 |
| }, |
| { |
| "epoch": 26.599870717517778, |
| "grad_norm": 1.2646733522415161, |
| "learning_rate": 0.001, |
| "loss": 2.1997, |
| "step": 82300 |
| }, |
| { |
| "epoch": 26.632191338073692, |
| "grad_norm": 1.2385530471801758, |
| "learning_rate": 0.001, |
| "loss": 2.2071, |
| "step": 82400 |
| }, |
| { |
| "epoch": 26.664511958629607, |
| "grad_norm": 1.4374717473983765, |
| "learning_rate": 0.001, |
| "loss": 2.2082, |
| "step": 82500 |
| }, |
| { |
| "epoch": 26.69683257918552, |
| "grad_norm": 1.3186120986938477, |
| "learning_rate": 0.001, |
| "loss": 2.2149, |
| "step": 82600 |
| }, |
| { |
| "epoch": 26.729153199741436, |
| "grad_norm": 1.2617899179458618, |
| "learning_rate": 0.001, |
| "loss": 2.2039, |
| "step": 82700 |
| }, |
| { |
| "epoch": 26.76147382029735, |
| "grad_norm": 1.0441887378692627, |
| "learning_rate": 0.001, |
| "loss": 2.2324, |
| "step": 82800 |
| }, |
| { |
| "epoch": 26.793794440853265, |
| "grad_norm": 1.5578880310058594, |
| "learning_rate": 0.001, |
| "loss": 2.2155, |
| "step": 82900 |
| }, |
| { |
| "epoch": 26.82611506140918, |
| "grad_norm": 1.1477580070495605, |
| "learning_rate": 0.001, |
| "loss": 2.2219, |
| "step": 83000 |
| }, |
| { |
| "epoch": 26.858435681965094, |
| "grad_norm": 1.3140536546707153, |
| "learning_rate": 0.001, |
| "loss": 2.2309, |
| "step": 83100 |
| }, |
| { |
| "epoch": 26.89075630252101, |
| "grad_norm": 1.0781357288360596, |
| "learning_rate": 0.001, |
| "loss": 2.2302, |
| "step": 83200 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 1.6410821676254272, |
| "learning_rate": 0.001, |
| "loss": 2.2319, |
| "step": 83300 |
| }, |
| { |
| "epoch": 26.955397543632838, |
| "grad_norm": 1.5108658075332642, |
| "learning_rate": 0.001, |
| "loss": 2.2258, |
| "step": 83400 |
| }, |
| { |
| "epoch": 26.987718164188752, |
| "grad_norm": 1.4185115098953247, |
| "learning_rate": 0.001, |
| "loss": 2.2219, |
| "step": 83500 |
| }, |
| { |
| "epoch": 27.020038784744667, |
| "grad_norm": 1.2338390350341797, |
| "learning_rate": 0.001, |
| "loss": 2.1582, |
| "step": 83600 |
| }, |
| { |
| "epoch": 27.05235940530058, |
| "grad_norm": 1.1917002201080322, |
| "learning_rate": 0.001, |
| "loss": 2.0996, |
| "step": 83700 |
| }, |
| { |
| "epoch": 27.084680025856496, |
| "grad_norm": 1.9231431484222412, |
| "learning_rate": 0.001, |
| "loss": 2.1101, |
| "step": 83800 |
| }, |
| { |
| "epoch": 27.11700064641241, |
| "grad_norm": 0.9797855615615845, |
| "learning_rate": 0.001, |
| "loss": 2.1199, |
| "step": 83900 |
| }, |
| { |
| "epoch": 27.149321266968325, |
| "grad_norm": 1.3979872465133667, |
| "learning_rate": 0.001, |
| "loss": 2.1181, |
| "step": 84000 |
| }, |
| { |
| "epoch": 27.18164188752424, |
| "grad_norm": 1.0003714561462402, |
| "learning_rate": 0.001, |
| "loss": 2.1446, |
| "step": 84100 |
| }, |
| { |
| "epoch": 27.213962508080154, |
| "grad_norm": 1.7641527652740479, |
| "learning_rate": 0.001, |
| "loss": 2.1024, |
| "step": 84200 |
| }, |
| { |
| "epoch": 27.24628312863607, |
| "grad_norm": 0.9522609114646912, |
| "learning_rate": 0.001, |
| "loss": 2.126, |
| "step": 84300 |
| }, |
| { |
| "epoch": 27.278603749191983, |
| "grad_norm": 1.028588891029358, |
| "learning_rate": 0.001, |
| "loss": 2.1288, |
| "step": 84400 |
| }, |
| { |
| "epoch": 27.310924369747898, |
| "grad_norm": 1.1693410873413086, |
| "learning_rate": 0.001, |
| "loss": 2.1359, |
| "step": 84500 |
| }, |
| { |
| "epoch": 27.343244990303813, |
| "grad_norm": 1.3971128463745117, |
| "learning_rate": 0.001, |
| "loss": 2.1631, |
| "step": 84600 |
| }, |
| { |
| "epoch": 27.375565610859727, |
| "grad_norm": 1.0558292865753174, |
| "learning_rate": 0.001, |
| "loss": 2.1556, |
| "step": 84700 |
| }, |
| { |
| "epoch": 27.40788623141564, |
| "grad_norm": 1.113844633102417, |
| "learning_rate": 0.001, |
| "loss": 2.1462, |
| "step": 84800 |
| }, |
| { |
| "epoch": 27.440206851971556, |
| "grad_norm": 1.251039981842041, |
| "learning_rate": 0.001, |
| "loss": 2.1483, |
| "step": 84900 |
| }, |
| { |
| "epoch": 27.47252747252747, |
| "grad_norm": 1.2447925806045532, |
| "learning_rate": 0.001, |
| "loss": 2.1503, |
| "step": 85000 |
| }, |
| { |
| "epoch": 27.50484809308339, |
| "grad_norm": 1.154356598854065, |
| "learning_rate": 0.001, |
| "loss": 2.1789, |
| "step": 85100 |
| }, |
| { |
| "epoch": 27.537168713639304, |
| "grad_norm": 1.1376807689666748, |
| "learning_rate": 0.001, |
| "loss": 2.1802, |
| "step": 85200 |
| }, |
| { |
| "epoch": 27.569489334195218, |
| "grad_norm": 1.3358043432235718, |
| "learning_rate": 0.001, |
| "loss": 2.1827, |
| "step": 85300 |
| }, |
| { |
| "epoch": 27.601809954751133, |
| "grad_norm": 1.147119402885437, |
| "learning_rate": 0.001, |
| "loss": 2.173, |
| "step": 85400 |
| }, |
| { |
| "epoch": 27.634130575307047, |
| "grad_norm": 1.3377326726913452, |
| "learning_rate": 0.001, |
| "loss": 2.1811, |
| "step": 85500 |
| }, |
| { |
| "epoch": 27.66645119586296, |
| "grad_norm": 1.1831611394882202, |
| "learning_rate": 0.001, |
| "loss": 2.1735, |
| "step": 85600 |
| }, |
| { |
| "epoch": 27.698771816418876, |
| "grad_norm": 1.2356758117675781, |
| "learning_rate": 0.001, |
| "loss": 2.1968, |
| "step": 85700 |
| }, |
| { |
| "epoch": 27.73109243697479, |
| "grad_norm": 1.4294730424880981, |
| "learning_rate": 0.001, |
| "loss": 2.178, |
| "step": 85800 |
| }, |
| { |
| "epoch": 27.763413057530705, |
| "grad_norm": 1.5439614057540894, |
| "learning_rate": 0.001, |
| "loss": 2.1808, |
| "step": 85900 |
| }, |
| { |
| "epoch": 27.79573367808662, |
| "grad_norm": 1.3789457082748413, |
| "learning_rate": 0.001, |
| "loss": 2.1897, |
| "step": 86000 |
| }, |
| { |
| "epoch": 27.828054298642535, |
| "grad_norm": 0.9521170258522034, |
| "learning_rate": 0.001, |
| "loss": 2.1824, |
| "step": 86100 |
| }, |
| { |
| "epoch": 27.86037491919845, |
| "grad_norm": 0.98322594165802, |
| "learning_rate": 0.001, |
| "loss": 2.2039, |
| "step": 86200 |
| }, |
| { |
| "epoch": 27.892695539754364, |
| "grad_norm": 1.7284862995147705, |
| "learning_rate": 0.001, |
| "loss": 2.1833, |
| "step": 86300 |
| }, |
| { |
| "epoch": 27.92501616031028, |
| "grad_norm": 1.160421371459961, |
| "learning_rate": 0.001, |
| "loss": 2.1943, |
| "step": 86400 |
| }, |
| { |
| "epoch": 27.957336780866193, |
| "grad_norm": 1.2484320402145386, |
| "learning_rate": 0.001, |
| "loss": 2.2137, |
| "step": 86500 |
| }, |
| { |
| "epoch": 27.989657401422107, |
| "grad_norm": 1.6111912727355957, |
| "learning_rate": 0.001, |
| "loss": 2.1995, |
| "step": 86600 |
| }, |
| { |
| "epoch": 28.021978021978022, |
| "grad_norm": 1.159625768661499, |
| "learning_rate": 0.001, |
| "loss": 2.1394, |
| "step": 86700 |
| }, |
| { |
| "epoch": 28.054298642533936, |
| "grad_norm": 1.2188056707382202, |
| "learning_rate": 0.001, |
| "loss": 2.0749, |
| "step": 86800 |
| }, |
| { |
| "epoch": 28.08661926308985, |
| "grad_norm": 1.3605949878692627, |
| "learning_rate": 0.001, |
| "loss": 2.0627, |
| "step": 86900 |
| }, |
| { |
| "epoch": 28.118939883645766, |
| "grad_norm": 1.0128086805343628, |
| "learning_rate": 0.001, |
| "loss": 2.0837, |
| "step": 87000 |
| }, |
| { |
| "epoch": 28.15126050420168, |
| "grad_norm": 1.420502781867981, |
| "learning_rate": 0.001, |
| "loss": 2.0804, |
| "step": 87100 |
| }, |
| { |
| "epoch": 28.183581124757595, |
| "grad_norm": 1.1222341060638428, |
| "learning_rate": 0.001, |
| "loss": 2.0978, |
| "step": 87200 |
| }, |
| { |
| "epoch": 28.21590174531351, |
| "grad_norm": 1.2059674263000488, |
| "learning_rate": 0.001, |
| "loss": 2.1132, |
| "step": 87300 |
| }, |
| { |
| "epoch": 28.248222365869424, |
| "grad_norm": 1.1298748254776, |
| "learning_rate": 0.001, |
| "loss": 2.1136, |
| "step": 87400 |
| }, |
| { |
| "epoch": 28.28054298642534, |
| "grad_norm": 1.2116446495056152, |
| "learning_rate": 0.001, |
| "loss": 2.1205, |
| "step": 87500 |
| }, |
| { |
| "epoch": 28.312863606981253, |
| "grad_norm": 1.4095208644866943, |
| "learning_rate": 0.001, |
| "loss": 2.1073, |
| "step": 87600 |
| }, |
| { |
| "epoch": 28.345184227537167, |
| "grad_norm": 1.0656698942184448, |
| "learning_rate": 0.001, |
| "loss": 2.1035, |
| "step": 87700 |
| }, |
| { |
| "epoch": 28.377504848093082, |
| "grad_norm": 1.166192650794983, |
| "learning_rate": 0.001, |
| "loss": 2.1497, |
| "step": 87800 |
| }, |
| { |
| "epoch": 28.409825468648997, |
| "grad_norm": 1.2223316431045532, |
| "learning_rate": 0.001, |
| "loss": 2.1285, |
| "step": 87900 |
| }, |
| { |
| "epoch": 28.44214608920491, |
| "grad_norm": 1.3476072549819946, |
| "learning_rate": 0.001, |
| "loss": 2.1229, |
| "step": 88000 |
| }, |
| { |
| "epoch": 28.474466709760826, |
| "grad_norm": 1.5452789068222046, |
| "learning_rate": 0.001, |
| "loss": 2.1377, |
| "step": 88100 |
| }, |
| { |
| "epoch": 28.50678733031674, |
| "grad_norm": 1.204521656036377, |
| "learning_rate": 0.001, |
| "loss": 2.1411, |
| "step": 88200 |
| }, |
| { |
| "epoch": 28.53910795087266, |
| "grad_norm": 1.2663288116455078, |
| "learning_rate": 0.001, |
| "loss": 2.156, |
| "step": 88300 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 1.0826762914657593, |
| "learning_rate": 0.001, |
| "loss": 2.1547, |
| "step": 88400 |
| }, |
| { |
| "epoch": 28.603749191984488, |
| "grad_norm": 1.1127820014953613, |
| "learning_rate": 0.001, |
| "loss": 2.1708, |
| "step": 88500 |
| }, |
| { |
| "epoch": 28.636069812540402, |
| "grad_norm": 1.3881254196166992, |
| "learning_rate": 0.001, |
| "loss": 2.1594, |
| "step": 88600 |
| }, |
| { |
| "epoch": 28.668390433096317, |
| "grad_norm": 1.1154764890670776, |
| "learning_rate": 0.001, |
| "loss": 2.1618, |
| "step": 88700 |
| }, |
| { |
| "epoch": 28.70071105365223, |
| "grad_norm": 1.3774234056472778, |
| "learning_rate": 0.001, |
| "loss": 2.1696, |
| "step": 88800 |
| }, |
| { |
| "epoch": 28.733031674208146, |
| "grad_norm": 1.477734923362732, |
| "learning_rate": 0.001, |
| "loss": 2.1735, |
| "step": 88900 |
| }, |
| { |
| "epoch": 28.76535229476406, |
| "grad_norm": 1.1354868412017822, |
| "learning_rate": 0.001, |
| "loss": 2.1755, |
| "step": 89000 |
| }, |
| { |
| "epoch": 28.797672915319975, |
| "grad_norm": 1.2781658172607422, |
| "learning_rate": 0.001, |
| "loss": 2.1702, |
| "step": 89100 |
| }, |
| { |
| "epoch": 28.82999353587589, |
| "grad_norm": 0.9861193895339966, |
| "learning_rate": 0.001, |
| "loss": 2.1754, |
| "step": 89200 |
| }, |
| { |
| "epoch": 28.862314156431804, |
| "grad_norm": 1.2006617784500122, |
| "learning_rate": 0.001, |
| "loss": 2.1712, |
| "step": 89300 |
| }, |
| { |
| "epoch": 28.89463477698772, |
| "grad_norm": 1.0458413362503052, |
| "learning_rate": 0.001, |
| "loss": 2.1622, |
| "step": 89400 |
| }, |
| { |
| "epoch": 28.926955397543633, |
| "grad_norm": 1.6380434036254883, |
| "learning_rate": 0.001, |
| "loss": 2.1827, |
| "step": 89500 |
| }, |
| { |
| "epoch": 28.959276018099548, |
| "grad_norm": 1.4402202367782593, |
| "learning_rate": 0.001, |
| "loss": 2.1788, |
| "step": 89600 |
| }, |
| { |
| "epoch": 28.991596638655462, |
| "grad_norm": 1.1642043590545654, |
| "learning_rate": 0.001, |
| "loss": 2.1953, |
| "step": 89700 |
| }, |
| { |
| "epoch": 29.023917259211377, |
| "grad_norm": 1.3228559494018555, |
| "learning_rate": 0.001, |
| "loss": 2.097, |
| "step": 89800 |
| }, |
| { |
| "epoch": 29.05623787976729, |
| "grad_norm": 1.251704216003418, |
| "learning_rate": 0.001, |
| "loss": 2.042, |
| "step": 89900 |
| }, |
| { |
| "epoch": 29.088558500323206, |
| "grad_norm": 1.0149487257003784, |
| "learning_rate": 0.001, |
| "loss": 2.0505, |
| "step": 90000 |
| }, |
| { |
| "epoch": 29.12087912087912, |
| "grad_norm": 1.255851149559021, |
| "learning_rate": 0.001, |
| "loss": 2.0693, |
| "step": 90100 |
| }, |
| { |
| "epoch": 29.153199741435035, |
| "grad_norm": 1.5572729110717773, |
| "learning_rate": 0.001, |
| "loss": 2.0668, |
| "step": 90200 |
| }, |
| { |
| "epoch": 29.18552036199095, |
| "grad_norm": 1.1453217267990112, |
| "learning_rate": 0.001, |
| "loss": 2.0868, |
| "step": 90300 |
| }, |
| { |
| "epoch": 29.217840982546864, |
| "grad_norm": 1.1141669750213623, |
| "learning_rate": 0.001, |
| "loss": 2.0751, |
| "step": 90400 |
| }, |
| { |
| "epoch": 29.25016160310278, |
| "grad_norm": 1.3564319610595703, |
| "learning_rate": 0.001, |
| "loss": 2.0766, |
| "step": 90500 |
| }, |
| { |
| "epoch": 29.282482223658693, |
| "grad_norm": 1.0817420482635498, |
| "learning_rate": 0.001, |
| "loss": 2.0784, |
| "step": 90600 |
| }, |
| { |
| "epoch": 29.314802844214608, |
| "grad_norm": 1.1262016296386719, |
| "learning_rate": 0.001, |
| "loss": 2.0984, |
| "step": 90700 |
| }, |
| { |
| "epoch": 29.347123464770522, |
| "grad_norm": 0.9490855932235718, |
| "learning_rate": 0.001, |
| "loss": 2.1047, |
| "step": 90800 |
| }, |
| { |
| "epoch": 29.379444085326437, |
| "grad_norm": 1.128937005996704, |
| "learning_rate": 0.001, |
| "loss": 2.0962, |
| "step": 90900 |
| }, |
| { |
| "epoch": 29.41176470588235, |
| "grad_norm": 1.2524522542953491, |
| "learning_rate": 0.001, |
| "loss": 2.1144, |
| "step": 91000 |
| }, |
| { |
| "epoch": 29.444085326438266, |
| "grad_norm": 1.3518542051315308, |
| "learning_rate": 0.001, |
| "loss": 2.1241, |
| "step": 91100 |
| }, |
| { |
| "epoch": 29.47640594699418, |
| "grad_norm": 1.2834372520446777, |
| "learning_rate": 0.001, |
| "loss": 2.1124, |
| "step": 91200 |
| }, |
| { |
| "epoch": 29.5087265675501, |
| "grad_norm": 1.3267338275909424, |
| "learning_rate": 0.001, |
| "loss": 2.1215, |
| "step": 91300 |
| }, |
| { |
| "epoch": 29.541047188106013, |
| "grad_norm": 1.4632035493850708, |
| "learning_rate": 0.001, |
| "loss": 2.143, |
| "step": 91400 |
| }, |
| { |
| "epoch": 29.573367808661928, |
| "grad_norm": 1.2071197032928467, |
| "learning_rate": 0.001, |
| "loss": 2.1225, |
| "step": 91500 |
| }, |
| { |
| "epoch": 29.605688429217842, |
| "grad_norm": 1.339823842048645, |
| "learning_rate": 0.001, |
| "loss": 2.1094, |
| "step": 91600 |
| }, |
| { |
| "epoch": 29.638009049773757, |
| "grad_norm": 1.1348124742507935, |
| "learning_rate": 0.001, |
| "loss": 2.1373, |
| "step": 91700 |
| }, |
| { |
| "epoch": 29.67032967032967, |
| "grad_norm": 1.0786309242248535, |
| "learning_rate": 0.001, |
| "loss": 2.146, |
| "step": 91800 |
| }, |
| { |
| "epoch": 29.702650290885586, |
| "grad_norm": 1.2192673683166504, |
| "learning_rate": 0.001, |
| "loss": 2.1523, |
| "step": 91900 |
| }, |
| { |
| "epoch": 29.7349709114415, |
| "grad_norm": 0.9565330147743225, |
| "learning_rate": 0.001, |
| "loss": 2.1408, |
| "step": 92000 |
| }, |
| { |
| "epoch": 29.767291531997415, |
| "grad_norm": 1.2919212579727173, |
| "learning_rate": 0.001, |
| "loss": 2.1431, |
| "step": 92100 |
| }, |
| { |
| "epoch": 29.79961215255333, |
| "grad_norm": 1.1823819875717163, |
| "learning_rate": 0.001, |
| "loss": 2.1612, |
| "step": 92200 |
| }, |
| { |
| "epoch": 29.831932773109244, |
| "grad_norm": 1.1808539628982544, |
| "learning_rate": 0.001, |
| "loss": 2.1541, |
| "step": 92300 |
| }, |
| { |
| "epoch": 29.86425339366516, |
| "grad_norm": 0.9185066223144531, |
| "learning_rate": 0.001, |
| "loss": 2.1528, |
| "step": 92400 |
| }, |
| { |
| "epoch": 29.896574014221073, |
| "grad_norm": 1.387736439704895, |
| "learning_rate": 0.001, |
| "loss": 2.1577, |
| "step": 92500 |
| }, |
| { |
| "epoch": 29.928894634776988, |
| "grad_norm": 1.27926504611969, |
| "learning_rate": 0.001, |
| "loss": 2.1618, |
| "step": 92600 |
| }, |
| { |
| "epoch": 29.961215255332903, |
| "grad_norm": 1.438068151473999, |
| "learning_rate": 0.001, |
| "loss": 2.1501, |
| "step": 92700 |
| }, |
| { |
| "epoch": 29.993535875888817, |
| "grad_norm": 1.2931363582611084, |
| "learning_rate": 0.001, |
| "loss": 2.1733, |
| "step": 92800 |
| }, |
| { |
| "epoch": 30.02585649644473, |
| "grad_norm": 1.4011821746826172, |
| "learning_rate": 0.001, |
| "loss": 2.0494, |
| "step": 92900 |
| }, |
| { |
| "epoch": 30.058177117000646, |
| "grad_norm": 1.3266193866729736, |
| "learning_rate": 0.001, |
| "loss": 2.028, |
| "step": 93000 |
| }, |
| { |
| "epoch": 30.09049773755656, |
| "grad_norm": 1.4412564039230347, |
| "learning_rate": 0.001, |
| "loss": 2.0236, |
| "step": 93100 |
| }, |
| { |
| "epoch": 30.122818358112475, |
| "grad_norm": 1.2266614437103271, |
| "learning_rate": 0.001, |
| "loss": 2.0305, |
| "step": 93200 |
| }, |
| { |
| "epoch": 30.15513897866839, |
| "grad_norm": 1.2787266969680786, |
| "learning_rate": 0.001, |
| "loss": 2.0371, |
| "step": 93300 |
| }, |
| { |
| "epoch": 30.187459599224304, |
| "grad_norm": 1.3180530071258545, |
| "learning_rate": 0.001, |
| "loss": 2.0685, |
| "step": 93400 |
| }, |
| { |
| "epoch": 30.21978021978022, |
| "grad_norm": 0.980501115322113, |
| "learning_rate": 0.001, |
| "loss": 2.0615, |
| "step": 93500 |
| }, |
| { |
| "epoch": 30.252100840336134, |
| "grad_norm": 1.0844141244888306, |
| "learning_rate": 0.001, |
| "loss": 2.0757, |
| "step": 93600 |
| }, |
| { |
| "epoch": 30.284421460892048, |
| "grad_norm": 1.7597213983535767, |
| "learning_rate": 0.001, |
| "loss": 2.0663, |
| "step": 93700 |
| }, |
| { |
| "epoch": 30.316742081447963, |
| "grad_norm": 1.1686208248138428, |
| "learning_rate": 0.001, |
| "loss": 2.0461, |
| "step": 93800 |
| }, |
| { |
| "epoch": 30.349062702003877, |
| "grad_norm": 1.2640702724456787, |
| "learning_rate": 0.001, |
| "loss": 2.081, |
| "step": 93900 |
| }, |
| { |
| "epoch": 30.381383322559792, |
| "grad_norm": 1.1923021078109741, |
| "learning_rate": 0.001, |
| "loss": 2.0855, |
| "step": 94000 |
| }, |
| { |
| "epoch": 30.413703943115706, |
| "grad_norm": 1.25620698928833, |
| "learning_rate": 0.001, |
| "loss": 2.1024, |
| "step": 94100 |
| }, |
| { |
| "epoch": 30.44602456367162, |
| "grad_norm": 1.1417943239212036, |
| "learning_rate": 0.001, |
| "loss": 2.0802, |
| "step": 94200 |
| }, |
| { |
| "epoch": 30.478345184227535, |
| "grad_norm": 1.3786128759384155, |
| "learning_rate": 0.001, |
| "loss": 2.1161, |
| "step": 94300 |
| }, |
| { |
| "epoch": 30.51066580478345, |
| "grad_norm": 1.1850334405899048, |
| "learning_rate": 0.001, |
| "loss": 2.1171, |
| "step": 94400 |
| }, |
| { |
| "epoch": 30.542986425339368, |
| "grad_norm": 1.1608952283859253, |
| "learning_rate": 0.001, |
| "loss": 2.1074, |
| "step": 94500 |
| }, |
| { |
| "epoch": 30.575307045895283, |
| "grad_norm": 1.5039657354354858, |
| "learning_rate": 0.001, |
| "loss": 2.1199, |
| "step": 94600 |
| }, |
| { |
| "epoch": 30.607627666451197, |
| "grad_norm": 1.1668621301651, |
| "learning_rate": 0.001, |
| "loss": 2.1185, |
| "step": 94700 |
| }, |
| { |
| "epoch": 30.639948287007112, |
| "grad_norm": 1.6132876873016357, |
| "learning_rate": 0.001, |
| "loss": 2.0857, |
| "step": 94800 |
| }, |
| { |
| "epoch": 30.672268907563026, |
| "grad_norm": 1.538486123085022, |
| "learning_rate": 0.001, |
| "loss": 2.0991, |
| "step": 94900 |
| }, |
| { |
| "epoch": 30.70458952811894, |
| "grad_norm": 1.506345272064209, |
| "learning_rate": 0.001, |
| "loss": 2.1122, |
| "step": 95000 |
| }, |
| { |
| "epoch": 30.736910148674855, |
| "grad_norm": 1.0374579429626465, |
| "learning_rate": 0.001, |
| "loss": 2.1348, |
| "step": 95100 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.2800602912902832, |
| "learning_rate": 0.001, |
| "loss": 2.1188, |
| "step": 95200 |
| }, |
| { |
| "epoch": 30.801551389786685, |
| "grad_norm": 1.1404392719268799, |
| "learning_rate": 0.001, |
| "loss": 2.1272, |
| "step": 95300 |
| }, |
| { |
| "epoch": 30.8338720103426, |
| "grad_norm": 1.3880510330200195, |
| "learning_rate": 0.001, |
| "loss": 2.1493, |
| "step": 95400 |
| }, |
| { |
| "epoch": 30.866192630898514, |
| "grad_norm": 1.391794204711914, |
| "learning_rate": 0.001, |
| "loss": 2.1378, |
| "step": 95500 |
| }, |
| { |
| "epoch": 30.89851325145443, |
| "grad_norm": 1.0275532007217407, |
| "learning_rate": 0.001, |
| "loss": 2.1227, |
| "step": 95600 |
| }, |
| { |
| "epoch": 30.930833872010343, |
| "grad_norm": 1.4005444049835205, |
| "learning_rate": 0.001, |
| "loss": 2.1576, |
| "step": 95700 |
| }, |
| { |
| "epoch": 30.963154492566257, |
| "grad_norm": 1.2828242778778076, |
| "learning_rate": 0.001, |
| "loss": 2.1411, |
| "step": 95800 |
| }, |
| { |
| "epoch": 30.995475113122172, |
| "grad_norm": 1.341699242591858, |
| "learning_rate": 0.001, |
| "loss": 2.1246, |
| "step": 95900 |
| }, |
| { |
| "epoch": 31.027795733678087, |
| "grad_norm": 1.4933757781982422, |
| "learning_rate": 0.001, |
| "loss": 2.0016, |
| "step": 96000 |
| }, |
| { |
| "epoch": 31.060116354234, |
| "grad_norm": 1.5427340269088745, |
| "learning_rate": 0.001, |
| "loss": 2.0132, |
| "step": 96100 |
| }, |
| { |
| "epoch": 31.092436974789916, |
| "grad_norm": 1.1902391910552979, |
| "learning_rate": 0.001, |
| "loss": 2.0301, |
| "step": 96200 |
| }, |
| { |
| "epoch": 31.12475759534583, |
| "grad_norm": 1.2572218179702759, |
| "learning_rate": 0.001, |
| "loss": 2.0339, |
| "step": 96300 |
| }, |
| { |
| "epoch": 31.157078215901745, |
| "grad_norm": 1.2706958055496216, |
| "learning_rate": 0.001, |
| "loss": 2.032, |
| "step": 96400 |
| }, |
| { |
| "epoch": 31.18939883645766, |
| "grad_norm": 1.4089903831481934, |
| "learning_rate": 0.001, |
| "loss": 2.0138, |
| "step": 96500 |
| }, |
| { |
| "epoch": 31.221719457013574, |
| "grad_norm": 1.3937467336654663, |
| "learning_rate": 0.001, |
| "loss": 2.0442, |
| "step": 96600 |
| }, |
| { |
| "epoch": 31.25404007756949, |
| "grad_norm": 1.353804588317871, |
| "learning_rate": 0.001, |
| "loss": 2.0317, |
| "step": 96700 |
| }, |
| { |
| "epoch": 31.286360698125403, |
| "grad_norm": 1.279462456703186, |
| "learning_rate": 0.001, |
| "loss": 2.057, |
| "step": 96800 |
| }, |
| { |
| "epoch": 31.318681318681318, |
| "grad_norm": 1.0817734003067017, |
| "learning_rate": 0.001, |
| "loss": 2.0597, |
| "step": 96900 |
| }, |
| { |
| "epoch": 31.351001939237232, |
| "grad_norm": 1.5528923273086548, |
| "learning_rate": 0.001, |
| "loss": 2.0555, |
| "step": 97000 |
| }, |
| { |
| "epoch": 31.383322559793147, |
| "grad_norm": 1.3770098686218262, |
| "learning_rate": 0.001, |
| "loss": 2.0565, |
| "step": 97100 |
| }, |
| { |
| "epoch": 31.41564318034906, |
| "grad_norm": 1.2134709358215332, |
| "learning_rate": 0.001, |
| "loss": 2.077, |
| "step": 97200 |
| }, |
| { |
| "epoch": 31.447963800904976, |
| "grad_norm": 1.6369162797927856, |
| "learning_rate": 0.001, |
| "loss": 2.0746, |
| "step": 97300 |
| }, |
| { |
| "epoch": 31.48028442146089, |
| "grad_norm": 1.389773964881897, |
| "learning_rate": 0.001, |
| "loss": 2.0696, |
| "step": 97400 |
| }, |
| { |
| "epoch": 31.51260504201681, |
| "grad_norm": 1.4730721712112427, |
| "learning_rate": 0.001, |
| "loss": 2.086, |
| "step": 97500 |
| }, |
| { |
| "epoch": 31.544925662572723, |
| "grad_norm": 1.5068094730377197, |
| "learning_rate": 0.001, |
| "loss": 2.0827, |
| "step": 97600 |
| }, |
| { |
| "epoch": 31.577246283128638, |
| "grad_norm": 1.334282636642456, |
| "learning_rate": 0.001, |
| "loss": 2.0834, |
| "step": 97700 |
| }, |
| { |
| "epoch": 31.609566903684552, |
| "grad_norm": 1.3712047338485718, |
| "learning_rate": 0.001, |
| "loss": 2.083, |
| "step": 97800 |
| }, |
| { |
| "epoch": 31.641887524240467, |
| "grad_norm": 1.0409879684448242, |
| "learning_rate": 0.001, |
| "loss": 2.0913, |
| "step": 97900 |
| }, |
| { |
| "epoch": 31.67420814479638, |
| "grad_norm": 1.559335708618164, |
| "learning_rate": 0.001, |
| "loss": 2.1005, |
| "step": 98000 |
| }, |
| { |
| "epoch": 31.706528765352296, |
| "grad_norm": 1.1505087614059448, |
| "learning_rate": 0.001, |
| "loss": 2.1066, |
| "step": 98100 |
| }, |
| { |
| "epoch": 31.73884938590821, |
| "grad_norm": 1.2901155948638916, |
| "learning_rate": 0.001, |
| "loss": 2.0871, |
| "step": 98200 |
| }, |
| { |
| "epoch": 31.771170006464125, |
| "grad_norm": 1.6031471490859985, |
| "learning_rate": 0.001, |
| "loss": 2.1081, |
| "step": 98300 |
| }, |
| { |
| "epoch": 31.80349062702004, |
| "grad_norm": 1.2014280557632446, |
| "learning_rate": 0.001, |
| "loss": 2.1044, |
| "step": 98400 |
| }, |
| { |
| "epoch": 31.835811247575954, |
| "grad_norm": 1.3353430032730103, |
| "learning_rate": 0.001, |
| "loss": 2.1145, |
| "step": 98500 |
| }, |
| { |
| "epoch": 31.86813186813187, |
| "grad_norm": 1.4211585521697998, |
| "learning_rate": 0.001, |
| "loss": 2.1067, |
| "step": 98600 |
| }, |
| { |
| "epoch": 31.900452488687783, |
| "grad_norm": 1.28533935546875, |
| "learning_rate": 0.001, |
| "loss": 2.1234, |
| "step": 98700 |
| }, |
| { |
| "epoch": 31.932773109243698, |
| "grad_norm": 1.4658890962600708, |
| "learning_rate": 0.001, |
| "loss": 2.1224, |
| "step": 98800 |
| }, |
| { |
| "epoch": 31.965093729799612, |
| "grad_norm": 1.3019647598266602, |
| "learning_rate": 0.001, |
| "loss": 2.1161, |
| "step": 98900 |
| }, |
| { |
| "epoch": 31.997414350355527, |
| "grad_norm": 1.580609679222107, |
| "learning_rate": 0.001, |
| "loss": 2.1106, |
| "step": 99000 |
| }, |
| { |
| "epoch": 32.02973497091144, |
| "grad_norm": 1.6000734567642212, |
| "learning_rate": 0.001, |
| "loss": 2.0183, |
| "step": 99100 |
| }, |
| { |
| "epoch": 32.062055591467356, |
| "grad_norm": 1.5266070365905762, |
| "learning_rate": 0.001, |
| "loss": 1.992, |
| "step": 99200 |
| }, |
| { |
| "epoch": 32.09437621202327, |
| "grad_norm": 1.4625951051712036, |
| "learning_rate": 0.001, |
| "loss": 1.9784, |
| "step": 99300 |
| }, |
| { |
| "epoch": 32.126696832579185, |
| "grad_norm": 1.5137872695922852, |
| "learning_rate": 0.001, |
| "loss": 1.9986, |
| "step": 99400 |
| }, |
| { |
| "epoch": 32.1590174531351, |
| "grad_norm": 1.565382719039917, |
| "learning_rate": 0.001, |
| "loss": 2.014, |
| "step": 99500 |
| }, |
| { |
| "epoch": 32.191338073691014, |
| "grad_norm": 1.1044955253601074, |
| "learning_rate": 0.001, |
| "loss": 2.0119, |
| "step": 99600 |
| }, |
| { |
| "epoch": 32.22365869424693, |
| "grad_norm": 1.5801650285720825, |
| "learning_rate": 0.001, |
| "loss": 2.0215, |
| "step": 99700 |
| }, |
| { |
| "epoch": 32.25597931480284, |
| "grad_norm": 1.502664566040039, |
| "learning_rate": 0.001, |
| "loss": 2.0201, |
| "step": 99800 |
| }, |
| { |
| "epoch": 32.28829993535876, |
| "grad_norm": 1.5750117301940918, |
| "learning_rate": 0.001, |
| "loss": 2.0281, |
| "step": 99900 |
| }, |
| { |
| "epoch": 32.32062055591467, |
| "grad_norm": 1.4533034563064575, |
| "learning_rate": 0.001, |
| "loss": 2.0448, |
| "step": 100000 |
| }, |
| { |
| "epoch": 32.35294117647059, |
| "grad_norm": 1.4341336488723755, |
| "learning_rate": 0.001, |
| "loss": 2.0461, |
| "step": 100100 |
| }, |
| { |
| "epoch": 32.3852617970265, |
| "grad_norm": 1.501747488975525, |
| "learning_rate": 0.001, |
| "loss": 2.0465, |
| "step": 100200 |
| }, |
| { |
| "epoch": 32.417582417582416, |
| "grad_norm": 1.3563640117645264, |
| "learning_rate": 0.001, |
| "loss": 2.0374, |
| "step": 100300 |
| }, |
| { |
| "epoch": 32.44990303813833, |
| "grad_norm": 1.6985423564910889, |
| "learning_rate": 0.001, |
| "loss": 2.042, |
| "step": 100400 |
| }, |
| { |
| "epoch": 32.482223658694245, |
| "grad_norm": 1.352845311164856, |
| "learning_rate": 0.001, |
| "loss": 2.0394, |
| "step": 100500 |
| }, |
| { |
| "epoch": 32.51454427925016, |
| "grad_norm": 1.6858887672424316, |
| "learning_rate": 0.001, |
| "loss": 2.0446, |
| "step": 100600 |
| }, |
| { |
| "epoch": 32.546864899806074, |
| "grad_norm": 1.6969914436340332, |
| "learning_rate": 0.001, |
| "loss": 2.0659, |
| "step": 100700 |
| }, |
| { |
| "epoch": 32.57918552036199, |
| "grad_norm": 1.2938587665557861, |
| "learning_rate": 0.001, |
| "loss": 2.0486, |
| "step": 100800 |
| }, |
| { |
| "epoch": 32.6115061409179, |
| "grad_norm": 1.2119784355163574, |
| "learning_rate": 0.001, |
| "loss": 2.065, |
| "step": 100900 |
| }, |
| { |
| "epoch": 32.64382676147382, |
| "grad_norm": 1.3899317979812622, |
| "learning_rate": 0.001, |
| "loss": 2.0688, |
| "step": 101000 |
| }, |
| { |
| "epoch": 32.67614738202973, |
| "grad_norm": 1.5170328617095947, |
| "learning_rate": 0.001, |
| "loss": 2.065, |
| "step": 101100 |
| }, |
| { |
| "epoch": 32.70846800258565, |
| "grad_norm": 1.3387643098831177, |
| "learning_rate": 0.001, |
| "loss": 2.0754, |
| "step": 101200 |
| }, |
| { |
| "epoch": 32.74078862314156, |
| "grad_norm": 2.1560583114624023, |
| "learning_rate": 0.001, |
| "loss": 2.0846, |
| "step": 101300 |
| }, |
| { |
| "epoch": 32.773109243697476, |
| "grad_norm": 1.1874232292175293, |
| "learning_rate": 0.001, |
| "loss": 2.0913, |
| "step": 101400 |
| }, |
| { |
| "epoch": 32.80542986425339, |
| "grad_norm": 1.5885916948318481, |
| "learning_rate": 0.001, |
| "loss": 2.0901, |
| "step": 101500 |
| }, |
| { |
| "epoch": 32.837750484809305, |
| "grad_norm": 1.571648359298706, |
| "learning_rate": 0.001, |
| "loss": 2.09, |
| "step": 101600 |
| }, |
| { |
| "epoch": 32.87007110536522, |
| "grad_norm": 1.2718168497085571, |
| "learning_rate": 0.001, |
| "loss": 2.086, |
| "step": 101700 |
| }, |
| { |
| "epoch": 32.902391725921134, |
| "grad_norm": 1.189794659614563, |
| "learning_rate": 0.001, |
| "loss": 2.0902, |
| "step": 101800 |
| }, |
| { |
| "epoch": 32.93471234647705, |
| "grad_norm": 1.6280981302261353, |
| "learning_rate": 0.001, |
| "loss": 2.0996, |
| "step": 101900 |
| }, |
| { |
| "epoch": 32.967032967032964, |
| "grad_norm": 1.2445026636123657, |
| "learning_rate": 0.001, |
| "loss": 2.0995, |
| "step": 102000 |
| }, |
| { |
| "epoch": 32.999353587588885, |
| "grad_norm": 1.5590155124664307, |
| "learning_rate": 0.001, |
| "loss": 2.0958, |
| "step": 102100 |
| }, |
| { |
| "epoch": 33.0316742081448, |
| "grad_norm": 1.9303966760635376, |
| "learning_rate": 0.001, |
| "loss": 1.9653, |
| "step": 102200 |
| }, |
| { |
| "epoch": 33.063994828700714, |
| "grad_norm": 1.5534723997116089, |
| "learning_rate": 0.001, |
| "loss": 1.9536, |
| "step": 102300 |
| }, |
| { |
| "epoch": 33.09631544925663, |
| "grad_norm": 1.7266992330551147, |
| "learning_rate": 0.001, |
| "loss": 1.9677, |
| "step": 102400 |
| }, |
| { |
| "epoch": 33.12863606981254, |
| "grad_norm": 2.0274415016174316, |
| "learning_rate": 0.001, |
| "loss": 1.9669, |
| "step": 102500 |
| }, |
| { |
| "epoch": 33.16095669036846, |
| "grad_norm": 1.6314760446548462, |
| "learning_rate": 0.001, |
| "loss": 1.9827, |
| "step": 102600 |
| }, |
| { |
| "epoch": 33.19327731092437, |
| "grad_norm": 1.4350862503051758, |
| "learning_rate": 0.001, |
| "loss": 1.9811, |
| "step": 102700 |
| }, |
| { |
| "epoch": 33.22559793148029, |
| "grad_norm": 1.9013689756393433, |
| "learning_rate": 0.001, |
| "loss": 1.9947, |
| "step": 102800 |
| }, |
| { |
| "epoch": 33.2579185520362, |
| "grad_norm": 1.85300874710083, |
| "learning_rate": 0.001, |
| "loss": 2.0024, |
| "step": 102900 |
| }, |
| { |
| "epoch": 33.290239172592116, |
| "grad_norm": 1.71381676197052, |
| "learning_rate": 0.001, |
| "loss": 2.0084, |
| "step": 103000 |
| }, |
| { |
| "epoch": 33.32255979314803, |
| "grad_norm": 1.7121025323867798, |
| "learning_rate": 0.001, |
| "loss": 2.0024, |
| "step": 103100 |
| }, |
| { |
| "epoch": 33.354880413703945, |
| "grad_norm": 1.8725587129592896, |
| "learning_rate": 0.001, |
| "loss": 2.0229, |
| "step": 103200 |
| }, |
| { |
| "epoch": 33.38720103425986, |
| "grad_norm": 1.6383904218673706, |
| "learning_rate": 0.001, |
| "loss": 2.0222, |
| "step": 103300 |
| }, |
| { |
| "epoch": 33.419521654815775, |
| "grad_norm": 1.5853757858276367, |
| "learning_rate": 0.001, |
| "loss": 2.0423, |
| "step": 103400 |
| }, |
| { |
| "epoch": 33.45184227537169, |
| "grad_norm": 1.7861037254333496, |
| "learning_rate": 0.001, |
| "loss": 2.0429, |
| "step": 103500 |
| }, |
| { |
| "epoch": 33.484162895927604, |
| "grad_norm": 1.4143872261047363, |
| "learning_rate": 0.001, |
| "loss": 2.0305, |
| "step": 103600 |
| }, |
| { |
| "epoch": 33.51648351648352, |
| "grad_norm": 1.3351759910583496, |
| "learning_rate": 0.001, |
| "loss": 2.0328, |
| "step": 103700 |
| }, |
| { |
| "epoch": 33.54880413703943, |
| "grad_norm": 1.6123368740081787, |
| "learning_rate": 0.001, |
| "loss": 2.0436, |
| "step": 103800 |
| }, |
| { |
| "epoch": 33.58112475759535, |
| "grad_norm": 1.5616430044174194, |
| "learning_rate": 0.001, |
| "loss": 2.0368, |
| "step": 103900 |
| }, |
| { |
| "epoch": 33.61344537815126, |
| "grad_norm": 1.4323886632919312, |
| "learning_rate": 0.001, |
| "loss": 2.0552, |
| "step": 104000 |
| }, |
| { |
| "epoch": 33.645765998707176, |
| "grad_norm": 1.7153393030166626, |
| "learning_rate": 0.001, |
| "loss": 2.0487, |
| "step": 104100 |
| }, |
| { |
| "epoch": 33.67808661926309, |
| "grad_norm": 1.5671979188919067, |
| "learning_rate": 0.001, |
| "loss": 2.0577, |
| "step": 104200 |
| }, |
| { |
| "epoch": 33.710407239819006, |
| "grad_norm": 1.7711577415466309, |
| "learning_rate": 0.001, |
| "loss": 2.0493, |
| "step": 104300 |
| }, |
| { |
| "epoch": 33.74272786037492, |
| "grad_norm": 1.6455186605453491, |
| "learning_rate": 0.001, |
| "loss": 2.0717, |
| "step": 104400 |
| }, |
| { |
| "epoch": 33.775048480930835, |
| "grad_norm": 1.6012450456619263, |
| "learning_rate": 0.001, |
| "loss": 2.0727, |
| "step": 104500 |
| }, |
| { |
| "epoch": 33.80736910148675, |
| "grad_norm": 2.0459935665130615, |
| "learning_rate": 0.001, |
| "loss": 2.0627, |
| "step": 104600 |
| }, |
| { |
| "epoch": 33.839689722042664, |
| "grad_norm": 1.5936315059661865, |
| "learning_rate": 0.001, |
| "loss": 2.0791, |
| "step": 104700 |
| }, |
| { |
| "epoch": 33.87201034259858, |
| "grad_norm": 2.3798508644104004, |
| "learning_rate": 0.001, |
| "loss": 2.0771, |
| "step": 104800 |
| }, |
| { |
| "epoch": 33.90433096315449, |
| "grad_norm": 1.5202367305755615, |
| "learning_rate": 0.001, |
| "loss": 2.0747, |
| "step": 104900 |
| }, |
| { |
| "epoch": 33.93665158371041, |
| "grad_norm": 1.810958743095398, |
| "learning_rate": 0.001, |
| "loss": 2.0842, |
| "step": 105000 |
| }, |
| { |
| "epoch": 33.96897220426632, |
| "grad_norm": 1.8689950704574585, |
| "learning_rate": 0.001, |
| "loss": 2.1037, |
| "step": 105100 |
| }, |
| { |
| "epoch": 34.00129282482224, |
| "grad_norm": 1.2684123516082764, |
| "learning_rate": 0.001, |
| "loss": 2.0769, |
| "step": 105200 |
| }, |
| { |
| "epoch": 34.03361344537815, |
| "grad_norm": 2.0106089115142822, |
| "learning_rate": 0.001, |
| "loss": 1.9514, |
| "step": 105300 |
| }, |
| { |
| "epoch": 34.065934065934066, |
| "grad_norm": 1.3193442821502686, |
| "learning_rate": 0.001, |
| "loss": 1.9505, |
| "step": 105400 |
| }, |
| { |
| "epoch": 34.09825468648998, |
| "grad_norm": 1.62157142162323, |
| "learning_rate": 0.001, |
| "loss": 1.9503, |
| "step": 105500 |
| }, |
| { |
| "epoch": 34.130575307045895, |
| "grad_norm": 1.8132089376449585, |
| "learning_rate": 0.001, |
| "loss": 1.9696, |
| "step": 105600 |
| }, |
| { |
| "epoch": 34.16289592760181, |
| "grad_norm": 1.5851075649261475, |
| "learning_rate": 0.001, |
| "loss": 1.9705, |
| "step": 105700 |
| }, |
| { |
| "epoch": 34.195216548157724, |
| "grad_norm": 1.1907926797866821, |
| "learning_rate": 0.001, |
| "loss": 1.9756, |
| "step": 105800 |
| }, |
| { |
| "epoch": 34.22753716871364, |
| "grad_norm": 1.9979790449142456, |
| "learning_rate": 0.001, |
| "loss": 1.9721, |
| "step": 105900 |
| }, |
| { |
| "epoch": 34.25985778926955, |
| "grad_norm": 1.5261240005493164, |
| "learning_rate": 0.001, |
| "loss": 1.9795, |
| "step": 106000 |
| }, |
| { |
| "epoch": 34.29217840982547, |
| "grad_norm": 1.6692026853561401, |
| "learning_rate": 0.001, |
| "loss": 1.9855, |
| "step": 106100 |
| }, |
| { |
| "epoch": 34.32449903038138, |
| "grad_norm": 2.0377280712127686, |
| "learning_rate": 0.001, |
| "loss": 1.9959, |
| "step": 106200 |
| }, |
| { |
| "epoch": 34.3568196509373, |
| "grad_norm": 1.3200879096984863, |
| "learning_rate": 0.001, |
| "loss": 2.0081, |
| "step": 106300 |
| }, |
| { |
| "epoch": 34.38914027149321, |
| "grad_norm": 2.1755311489105225, |
| "learning_rate": 0.001, |
| "loss": 1.9839, |
| "step": 106400 |
| }, |
| { |
| "epoch": 34.421460892049126, |
| "grad_norm": 1.659410834312439, |
| "learning_rate": 0.001, |
| "loss": 2.0166, |
| "step": 106500 |
| }, |
| { |
| "epoch": 34.45378151260504, |
| "grad_norm": 1.6596027612686157, |
| "learning_rate": 0.001, |
| "loss": 2.0029, |
| "step": 106600 |
| }, |
| { |
| "epoch": 34.486102133160955, |
| "grad_norm": 1.490046501159668, |
| "learning_rate": 0.001, |
| "loss": 1.9976, |
| "step": 106700 |
| }, |
| { |
| "epoch": 34.51842275371687, |
| "grad_norm": 1.6935290098190308, |
| "learning_rate": 0.001, |
| "loss": 2.0203, |
| "step": 106800 |
| }, |
| { |
| "epoch": 34.550743374272784, |
| "grad_norm": 1.5543889999389648, |
| "learning_rate": 0.001, |
| "loss": 2.0136, |
| "step": 106900 |
| }, |
| { |
| "epoch": 34.5830639948287, |
| "grad_norm": 1.4732424020767212, |
| "learning_rate": 0.001, |
| "loss": 2.017, |
| "step": 107000 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.509547233581543, |
| "learning_rate": 0.001, |
| "loss": 2.0381, |
| "step": 107100 |
| }, |
| { |
| "epoch": 34.64770523594053, |
| "grad_norm": 2.1105563640594482, |
| "learning_rate": 0.001, |
| "loss": 2.037, |
| "step": 107200 |
| }, |
| { |
| "epoch": 34.68002585649644, |
| "grad_norm": 1.399300456047058, |
| "learning_rate": 0.001, |
| "loss": 2.0363, |
| "step": 107300 |
| }, |
| { |
| "epoch": 34.71234647705236, |
| "grad_norm": 1.9533933401107788, |
| "learning_rate": 0.001, |
| "loss": 2.0391, |
| "step": 107400 |
| }, |
| { |
| "epoch": 34.74466709760827, |
| "grad_norm": 1.9212638139724731, |
| "learning_rate": 0.001, |
| "loss": 2.0377, |
| "step": 107500 |
| }, |
| { |
| "epoch": 34.776987718164186, |
| "grad_norm": 1.9131107330322266, |
| "learning_rate": 0.001, |
| "loss": 2.0449, |
| "step": 107600 |
| }, |
| { |
| "epoch": 34.8093083387201, |
| "grad_norm": 1.5715868473052979, |
| "learning_rate": 0.001, |
| "loss": 2.0533, |
| "step": 107700 |
| }, |
| { |
| "epoch": 34.841628959276015, |
| "grad_norm": 1.5210678577423096, |
| "learning_rate": 0.001, |
| "loss": 2.0515, |
| "step": 107800 |
| }, |
| { |
| "epoch": 34.87394957983193, |
| "grad_norm": 1.4609169960021973, |
| "learning_rate": 0.001, |
| "loss": 2.0555, |
| "step": 107900 |
| }, |
| { |
| "epoch": 34.906270200387844, |
| "grad_norm": 1.274895191192627, |
| "learning_rate": 0.001, |
| "loss": 2.0561, |
| "step": 108000 |
| }, |
| { |
| "epoch": 34.93859082094376, |
| "grad_norm": 1.3937968015670776, |
| "learning_rate": 0.001, |
| "loss": 2.0723, |
| "step": 108100 |
| }, |
| { |
| "epoch": 34.97091144149967, |
| "grad_norm": 1.4828203916549683, |
| "learning_rate": 0.001, |
| "loss": 2.0713, |
| "step": 108200 |
| }, |
| { |
| "epoch": 35.003232062055595, |
| "grad_norm": 1.7049474716186523, |
| "learning_rate": 0.001, |
| "loss": 2.0587, |
| "step": 108300 |
| }, |
| { |
| "epoch": 35.03555268261151, |
| "grad_norm": 1.3542168140411377, |
| "learning_rate": 0.001, |
| "loss": 1.928, |
| "step": 108400 |
| }, |
| { |
| "epoch": 35.067873303167424, |
| "grad_norm": 2.243741750717163, |
| "learning_rate": 0.001, |
| "loss": 1.9264, |
| "step": 108500 |
| }, |
| { |
| "epoch": 35.10019392372334, |
| "grad_norm": 1.1347894668579102, |
| "learning_rate": 0.001, |
| "loss": 1.9211, |
| "step": 108600 |
| }, |
| { |
| "epoch": 35.13251454427925, |
| "grad_norm": 1.671585202217102, |
| "learning_rate": 0.001, |
| "loss": 1.9593, |
| "step": 108700 |
| }, |
| { |
| "epoch": 35.16483516483517, |
| "grad_norm": 1.1993159055709839, |
| "learning_rate": 0.001, |
| "loss": 1.9503, |
| "step": 108800 |
| }, |
| { |
| "epoch": 35.19715578539108, |
| "grad_norm": 1.3364758491516113, |
| "learning_rate": 0.001, |
| "loss": 1.9685, |
| "step": 108900 |
| }, |
| { |
| "epoch": 35.229476405947, |
| "grad_norm": 1.625673532485962, |
| "learning_rate": 0.001, |
| "loss": 1.9594, |
| "step": 109000 |
| }, |
| { |
| "epoch": 35.26179702650291, |
| "grad_norm": 1.8384968042373657, |
| "learning_rate": 0.001, |
| "loss": 1.9819, |
| "step": 109100 |
| }, |
| { |
| "epoch": 35.294117647058826, |
| "grad_norm": 1.3008297681808472, |
| "learning_rate": 0.001, |
| "loss": 1.9637, |
| "step": 109200 |
| }, |
| { |
| "epoch": 35.32643826761474, |
| "grad_norm": 1.42014741897583, |
| "learning_rate": 0.001, |
| "loss": 1.9671, |
| "step": 109300 |
| }, |
| { |
| "epoch": 35.358758888170655, |
| "grad_norm": 1.323197364807129, |
| "learning_rate": 0.001, |
| "loss": 1.9704, |
| "step": 109400 |
| }, |
| { |
| "epoch": 35.39107950872657, |
| "grad_norm": 1.5077106952667236, |
| "learning_rate": 0.001, |
| "loss": 1.9726, |
| "step": 109500 |
| }, |
| { |
| "epoch": 35.423400129282484, |
| "grad_norm": 2.083890199661255, |
| "learning_rate": 0.001, |
| "loss": 1.9629, |
| "step": 109600 |
| }, |
| { |
| "epoch": 35.4557207498384, |
| "grad_norm": 1.543499231338501, |
| "learning_rate": 0.001, |
| "loss": 1.9994, |
| "step": 109700 |
| }, |
| { |
| "epoch": 35.48804137039431, |
| "grad_norm": 1.34257972240448, |
| "learning_rate": 0.001, |
| "loss": 1.9949, |
| "step": 109800 |
| }, |
| { |
| "epoch": 35.52036199095023, |
| "grad_norm": 1.394116759300232, |
| "learning_rate": 0.001, |
| "loss": 2.0154, |
| "step": 109900 |
| }, |
| { |
| "epoch": 35.55268261150614, |
| "grad_norm": 1.224687933921814, |
| "learning_rate": 0.001, |
| "loss": 2.0221, |
| "step": 110000 |
| }, |
| { |
| "epoch": 35.58500323206206, |
| "grad_norm": 1.5279735326766968, |
| "learning_rate": 0.001, |
| "loss": 1.9958, |
| "step": 110100 |
| }, |
| { |
| "epoch": 35.61732385261797, |
| "grad_norm": 1.4509029388427734, |
| "learning_rate": 0.001, |
| "loss": 2.0004, |
| "step": 110200 |
| }, |
| { |
| "epoch": 35.649644473173886, |
| "grad_norm": 1.1077178716659546, |
| "learning_rate": 0.001, |
| "loss": 2.038, |
| "step": 110300 |
| }, |
| { |
| "epoch": 35.6819650937298, |
| "grad_norm": 1.495784044265747, |
| "learning_rate": 0.001, |
| "loss": 2.0186, |
| "step": 110400 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 1.239295244216919, |
| "learning_rate": 0.001, |
| "loss": 2.0075, |
| "step": 110500 |
| }, |
| { |
| "epoch": 35.74660633484163, |
| "grad_norm": 1.6918079853057861, |
| "learning_rate": 0.001, |
| "loss": 2.0214, |
| "step": 110600 |
| }, |
| { |
| "epoch": 35.778926955397544, |
| "grad_norm": 1.210204005241394, |
| "learning_rate": 0.001, |
| "loss": 2.024, |
| "step": 110700 |
| }, |
| { |
| "epoch": 35.81124757595346, |
| "grad_norm": 1.7192320823669434, |
| "learning_rate": 0.001, |
| "loss": 2.0059, |
| "step": 110800 |
| }, |
| { |
| "epoch": 35.84356819650937, |
| "grad_norm": 1.3810573816299438, |
| "learning_rate": 0.001, |
| "loss": 2.0528, |
| "step": 110900 |
| }, |
| { |
| "epoch": 35.87588881706529, |
| "grad_norm": 1.758357048034668, |
| "learning_rate": 0.001, |
| "loss": 2.0309, |
| "step": 111000 |
| }, |
| { |
| "epoch": 35.9082094376212, |
| "grad_norm": 1.845845103263855, |
| "learning_rate": 0.001, |
| "loss": 2.0404, |
| "step": 111100 |
| }, |
| { |
| "epoch": 35.94053005817712, |
| "grad_norm": 1.2828718423843384, |
| "learning_rate": 0.001, |
| "loss": 2.0384, |
| "step": 111200 |
| }, |
| { |
| "epoch": 35.97285067873303, |
| "grad_norm": 1.4219506978988647, |
| "learning_rate": 0.001, |
| "loss": 2.0465, |
| "step": 111300 |
| }, |
| { |
| "epoch": 36.005171299288946, |
| "grad_norm": 1.2361496686935425, |
| "learning_rate": 0.001, |
| "loss": 2.0539, |
| "step": 111400 |
| }, |
| { |
| "epoch": 36.03749191984486, |
| "grad_norm": 1.4928691387176514, |
| "learning_rate": 0.001, |
| "loss": 1.9142, |
| "step": 111500 |
| }, |
| { |
| "epoch": 36.069812540400775, |
| "grad_norm": 1.2020481824874878, |
| "learning_rate": 0.001, |
| "loss": 1.9197, |
| "step": 111600 |
| }, |
| { |
| "epoch": 36.10213316095669, |
| "grad_norm": 1.018372893333435, |
| "learning_rate": 0.001, |
| "loss": 1.9154, |
| "step": 111700 |
| }, |
| { |
| "epoch": 36.134453781512605, |
| "grad_norm": 1.3498088121414185, |
| "learning_rate": 0.001, |
| "loss": 1.913, |
| "step": 111800 |
| }, |
| { |
| "epoch": 36.16677440206852, |
| "grad_norm": 1.097247838973999, |
| "learning_rate": 0.001, |
| "loss": 1.9381, |
| "step": 111900 |
| }, |
| { |
| "epoch": 36.199095022624434, |
| "grad_norm": 1.3852177858352661, |
| "learning_rate": 0.001, |
| "loss": 1.9463, |
| "step": 112000 |
| }, |
| { |
| "epoch": 36.23141564318035, |
| "grad_norm": 1.4728375673294067, |
| "learning_rate": 0.001, |
| "loss": 1.9594, |
| "step": 112100 |
| }, |
| { |
| "epoch": 36.26373626373626, |
| "grad_norm": 1.5016732215881348, |
| "learning_rate": 0.001, |
| "loss": 1.9393, |
| "step": 112200 |
| }, |
| { |
| "epoch": 36.29605688429218, |
| "grad_norm": 1.4927202463150024, |
| "learning_rate": 0.001, |
| "loss": 1.9546, |
| "step": 112300 |
| }, |
| { |
| "epoch": 36.32837750484809, |
| "grad_norm": 1.1831008195877075, |
| "learning_rate": 0.001, |
| "loss": 1.9703, |
| "step": 112400 |
| }, |
| { |
| "epoch": 36.36069812540401, |
| "grad_norm": 1.76808762550354, |
| "learning_rate": 0.001, |
| "loss": 1.9709, |
| "step": 112500 |
| }, |
| { |
| "epoch": 36.39301874595992, |
| "grad_norm": 1.2848598957061768, |
| "learning_rate": 0.001, |
| "loss": 1.9609, |
| "step": 112600 |
| }, |
| { |
| "epoch": 36.425339366515836, |
| "grad_norm": 1.5108144283294678, |
| "learning_rate": 0.001, |
| "loss": 1.9567, |
| "step": 112700 |
| }, |
| { |
| "epoch": 36.45765998707175, |
| "grad_norm": 1.2236725091934204, |
| "learning_rate": 0.001, |
| "loss": 1.9863, |
| "step": 112800 |
| }, |
| { |
| "epoch": 36.489980607627665, |
| "grad_norm": 1.0058413743972778, |
| "learning_rate": 0.001, |
| "loss": 1.9919, |
| "step": 112900 |
| }, |
| { |
| "epoch": 36.52230122818358, |
| "grad_norm": 1.4160171747207642, |
| "learning_rate": 0.001, |
| "loss": 1.9634, |
| "step": 113000 |
| }, |
| { |
| "epoch": 36.554621848739494, |
| "grad_norm": 1.1909221410751343, |
| "learning_rate": 0.001, |
| "loss": 1.9777, |
| "step": 113100 |
| }, |
| { |
| "epoch": 36.58694246929541, |
| "grad_norm": 1.2544959783554077, |
| "learning_rate": 0.001, |
| "loss": 1.9854, |
| "step": 113200 |
| }, |
| { |
| "epoch": 36.61926308985132, |
| "grad_norm": 1.5229424238204956, |
| "learning_rate": 0.001, |
| "loss": 2.0088, |
| "step": 113300 |
| }, |
| { |
| "epoch": 36.65158371040724, |
| "grad_norm": 1.0623260736465454, |
| "learning_rate": 0.001, |
| "loss": 1.9908, |
| "step": 113400 |
| }, |
| { |
| "epoch": 36.68390433096315, |
| "grad_norm": 1.053356409072876, |
| "learning_rate": 0.001, |
| "loss": 1.9932, |
| "step": 113500 |
| }, |
| { |
| "epoch": 36.71622495151907, |
| "grad_norm": 1.0189151763916016, |
| "learning_rate": 0.001, |
| "loss": 1.9961, |
| "step": 113600 |
| }, |
| { |
| "epoch": 36.74854557207498, |
| "grad_norm": 1.2587002515792847, |
| "learning_rate": 0.001, |
| "loss": 2.0122, |
| "step": 113700 |
| }, |
| { |
| "epoch": 36.780866192630896, |
| "grad_norm": 1.2830156087875366, |
| "learning_rate": 0.001, |
| "loss": 2.0072, |
| "step": 113800 |
| }, |
| { |
| "epoch": 36.81318681318681, |
| "grad_norm": 1.6737384796142578, |
| "learning_rate": 0.001, |
| "loss": 2.0168, |
| "step": 113900 |
| }, |
| { |
| "epoch": 36.845507433742725, |
| "grad_norm": 1.3077422380447388, |
| "learning_rate": 0.001, |
| "loss": 2.0139, |
| "step": 114000 |
| }, |
| { |
| "epoch": 36.87782805429864, |
| "grad_norm": 1.417799711227417, |
| "learning_rate": 0.001, |
| "loss": 2.0118, |
| "step": 114100 |
| }, |
| { |
| "epoch": 36.910148674854554, |
| "grad_norm": 1.3692338466644287, |
| "learning_rate": 0.001, |
| "loss": 2.0219, |
| "step": 114200 |
| }, |
| { |
| "epoch": 36.94246929541047, |
| "grad_norm": 1.2055531740188599, |
| "learning_rate": 0.001, |
| "loss": 2.0191, |
| "step": 114300 |
| }, |
| { |
| "epoch": 36.97478991596638, |
| "grad_norm": 1.2253512144088745, |
| "learning_rate": 0.001, |
| "loss": 2.0287, |
| "step": 114400 |
| }, |
| { |
| "epoch": 37.007110536522305, |
| "grad_norm": 1.2838011980056763, |
| "learning_rate": 0.001, |
| "loss": 1.9971, |
| "step": 114500 |
| }, |
| { |
| "epoch": 37.03943115707822, |
| "grad_norm": 1.2368170022964478, |
| "learning_rate": 0.001, |
| "loss": 1.8909, |
| "step": 114600 |
| }, |
| { |
| "epoch": 37.071751777634134, |
| "grad_norm": 1.07204270362854, |
| "learning_rate": 0.001, |
| "loss": 1.8982, |
| "step": 114700 |
| }, |
| { |
| "epoch": 37.10407239819005, |
| "grad_norm": 1.3839013576507568, |
| "learning_rate": 0.001, |
| "loss": 1.9024, |
| "step": 114800 |
| }, |
| { |
| "epoch": 37.13639301874596, |
| "grad_norm": 2.0179762840270996, |
| "learning_rate": 0.001, |
| "loss": 1.9065, |
| "step": 114900 |
| }, |
| { |
| "epoch": 37.16871363930188, |
| "grad_norm": 1.2924718856811523, |
| "learning_rate": 0.001, |
| "loss": 1.9107, |
| "step": 115000 |
| }, |
| { |
| "epoch": 37.20103425985779, |
| "grad_norm": 1.395383596420288, |
| "learning_rate": 0.001, |
| "loss": 1.9157, |
| "step": 115100 |
| }, |
| { |
| "epoch": 37.23335488041371, |
| "grad_norm": 1.0912832021713257, |
| "learning_rate": 0.001, |
| "loss": 1.9446, |
| "step": 115200 |
| }, |
| { |
| "epoch": 37.26567550096962, |
| "grad_norm": 1.6009647846221924, |
| "learning_rate": 0.001, |
| "loss": 1.927, |
| "step": 115300 |
| }, |
| { |
| "epoch": 37.297996121525536, |
| "grad_norm": 1.2845613956451416, |
| "learning_rate": 0.001, |
| "loss": 1.9379, |
| "step": 115400 |
| }, |
| { |
| "epoch": 37.33031674208145, |
| "grad_norm": 1.1216648817062378, |
| "learning_rate": 0.001, |
| "loss": 1.9436, |
| "step": 115500 |
| }, |
| { |
| "epoch": 37.362637362637365, |
| "grad_norm": 1.1084158420562744, |
| "learning_rate": 0.001, |
| "loss": 1.9347, |
| "step": 115600 |
| }, |
| { |
| "epoch": 37.39495798319328, |
| "grad_norm": 1.501246452331543, |
| "learning_rate": 0.001, |
| "loss": 1.9566, |
| "step": 115700 |
| }, |
| { |
| "epoch": 37.427278603749194, |
| "grad_norm": 1.1051548719406128, |
| "learning_rate": 0.001, |
| "loss": 1.9608, |
| "step": 115800 |
| }, |
| { |
| "epoch": 37.45959922430511, |
| "grad_norm": 1.2411729097366333, |
| "learning_rate": 0.001, |
| "loss": 1.9485, |
| "step": 115900 |
| }, |
| { |
| "epoch": 37.49191984486102, |
| "grad_norm": 1.4812703132629395, |
| "learning_rate": 0.001, |
| "loss": 1.9647, |
| "step": 116000 |
| }, |
| { |
| "epoch": 37.52424046541694, |
| "grad_norm": 1.242192029953003, |
| "learning_rate": 0.001, |
| "loss": 1.9573, |
| "step": 116100 |
| }, |
| { |
| "epoch": 37.55656108597285, |
| "grad_norm": 1.146277904510498, |
| "learning_rate": 0.001, |
| "loss": 1.9583, |
| "step": 116200 |
| }, |
| { |
| "epoch": 37.58888170652877, |
| "grad_norm": 1.420883297920227, |
| "learning_rate": 0.001, |
| "loss": 1.9733, |
| "step": 116300 |
| }, |
| { |
| "epoch": 37.62120232708468, |
| "grad_norm": 1.1877259016036987, |
| "learning_rate": 0.001, |
| "loss": 1.9787, |
| "step": 116400 |
| }, |
| { |
| "epoch": 37.653522947640596, |
| "grad_norm": 1.1888118982315063, |
| "learning_rate": 0.001, |
| "loss": 1.972, |
| "step": 116500 |
| }, |
| { |
| "epoch": 37.68584356819651, |
| "grad_norm": 1.042897343635559, |
| "learning_rate": 0.001, |
| "loss": 1.9742, |
| "step": 116600 |
| }, |
| { |
| "epoch": 37.718164188752425, |
| "grad_norm": 1.110247254371643, |
| "learning_rate": 0.001, |
| "loss": 1.9944, |
| "step": 116700 |
| }, |
| { |
| "epoch": 37.75048480930834, |
| "grad_norm": 0.9629383087158203, |
| "learning_rate": 0.001, |
| "loss": 1.9785, |
| "step": 116800 |
| }, |
| { |
| "epoch": 37.782805429864254, |
| "grad_norm": 1.3344351053237915, |
| "learning_rate": 0.001, |
| "loss": 2.0116, |
| "step": 116900 |
| }, |
| { |
| "epoch": 37.81512605042017, |
| "grad_norm": 1.3152480125427246, |
| "learning_rate": 0.001, |
| "loss": 1.9854, |
| "step": 117000 |
| }, |
| { |
| "epoch": 37.84744667097608, |
| "grad_norm": 1.245644211769104, |
| "learning_rate": 0.001, |
| "loss": 1.9971, |
| "step": 117100 |
| }, |
| { |
| "epoch": 37.879767291532, |
| "grad_norm": 1.1488375663757324, |
| "learning_rate": 0.001, |
| "loss": 1.9812, |
| "step": 117200 |
| }, |
| { |
| "epoch": 37.91208791208791, |
| "grad_norm": 1.0456823110580444, |
| "learning_rate": 0.001, |
| "loss": 2.0149, |
| "step": 117300 |
| }, |
| { |
| "epoch": 37.94440853264383, |
| "grad_norm": 1.369343638420105, |
| "learning_rate": 0.001, |
| "loss": 2.0129, |
| "step": 117400 |
| }, |
| { |
| "epoch": 37.97672915319974, |
| "grad_norm": 1.4738596677780151, |
| "learning_rate": 0.001, |
| "loss": 2.021, |
| "step": 117500 |
| }, |
| { |
| "epoch": 38.009049773755656, |
| "grad_norm": 1.3079040050506592, |
| "learning_rate": 0.001, |
| "loss": 1.9785, |
| "step": 117600 |
| }, |
| { |
| "epoch": 38.04137039431157, |
| "grad_norm": 1.6166199445724487, |
| "learning_rate": 0.001, |
| "loss": 1.883, |
| "step": 117700 |
| }, |
| { |
| "epoch": 38.073691014867485, |
| "grad_norm": 1.3175252676010132, |
| "learning_rate": 0.001, |
| "loss": 1.871, |
| "step": 117800 |
| }, |
| { |
| "epoch": 38.1060116354234, |
| "grad_norm": 1.0515940189361572, |
| "learning_rate": 0.001, |
| "loss": 1.8938, |
| "step": 117900 |
| }, |
| { |
| "epoch": 38.138332255979314, |
| "grad_norm": 1.0703885555267334, |
| "learning_rate": 0.001, |
| "loss": 1.9082, |
| "step": 118000 |
| }, |
| { |
| "epoch": 38.17065287653523, |
| "grad_norm": 1.1041367053985596, |
| "learning_rate": 0.001, |
| "loss": 1.9039, |
| "step": 118100 |
| }, |
| { |
| "epoch": 38.20297349709114, |
| "grad_norm": 1.2935791015625, |
| "learning_rate": 0.001, |
| "loss": 1.9165, |
| "step": 118200 |
| }, |
| { |
| "epoch": 38.23529411764706, |
| "grad_norm": 1.1646063327789307, |
| "learning_rate": 0.001, |
| "loss": 1.8891, |
| "step": 118300 |
| }, |
| { |
| "epoch": 38.26761473820297, |
| "grad_norm": 1.0428200960159302, |
| "learning_rate": 0.001, |
| "loss": 1.9169, |
| "step": 118400 |
| }, |
| { |
| "epoch": 38.29993535875889, |
| "grad_norm": 0.9607976078987122, |
| "learning_rate": 0.001, |
| "loss": 1.9173, |
| "step": 118500 |
| }, |
| { |
| "epoch": 38.3322559793148, |
| "grad_norm": 2.0434305667877197, |
| "learning_rate": 0.001, |
| "loss": 1.9229, |
| "step": 118600 |
| }, |
| { |
| "epoch": 38.364576599870716, |
| "grad_norm": 1.2759087085723877, |
| "learning_rate": 0.001, |
| "loss": 1.9232, |
| "step": 118700 |
| }, |
| { |
| "epoch": 38.39689722042663, |
| "grad_norm": 1.1599791049957275, |
| "learning_rate": 0.001, |
| "loss": 1.9373, |
| "step": 118800 |
| }, |
| { |
| "epoch": 38.429217840982545, |
| "grad_norm": 1.1653485298156738, |
| "learning_rate": 0.001, |
| "loss": 1.9279, |
| "step": 118900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.1542648077011108, |
| "learning_rate": 0.001, |
| "loss": 1.9215, |
| "step": 119000 |
| }, |
| { |
| "epoch": 38.493859082094374, |
| "grad_norm": 1.0891329050064087, |
| "learning_rate": 0.001, |
| "loss": 1.9305, |
| "step": 119100 |
| }, |
| { |
| "epoch": 38.52617970265029, |
| "grad_norm": 1.2211647033691406, |
| "learning_rate": 0.001, |
| "loss": 1.9546, |
| "step": 119200 |
| }, |
| { |
| "epoch": 38.558500323206204, |
| "grad_norm": 1.691691279411316, |
| "learning_rate": 0.001, |
| "loss": 1.9608, |
| "step": 119300 |
| }, |
| { |
| "epoch": 38.59082094376212, |
| "grad_norm": 1.301088571548462, |
| "learning_rate": 0.001, |
| "loss": 1.9468, |
| "step": 119400 |
| }, |
| { |
| "epoch": 38.62314156431803, |
| "grad_norm": 1.126369833946228, |
| "learning_rate": 0.001, |
| "loss": 1.9543, |
| "step": 119500 |
| }, |
| { |
| "epoch": 38.65546218487395, |
| "grad_norm": 1.0784813165664673, |
| "learning_rate": 0.001, |
| "loss": 1.9531, |
| "step": 119600 |
| }, |
| { |
| "epoch": 38.68778280542986, |
| "grad_norm": 1.1354749202728271, |
| "learning_rate": 0.001, |
| "loss": 1.9629, |
| "step": 119700 |
| }, |
| { |
| "epoch": 38.720103425985776, |
| "grad_norm": 1.3400801420211792, |
| "learning_rate": 0.001, |
| "loss": 1.9768, |
| "step": 119800 |
| }, |
| { |
| "epoch": 38.75242404654169, |
| "grad_norm": 1.5241564512252808, |
| "learning_rate": 0.001, |
| "loss": 1.9631, |
| "step": 119900 |
| }, |
| { |
| "epoch": 38.784744667097605, |
| "grad_norm": 1.2316350936889648, |
| "learning_rate": 0.001, |
| "loss": 1.9713, |
| "step": 120000 |
| }, |
| { |
| "epoch": 38.81706528765352, |
| "grad_norm": 1.0691965818405151, |
| "learning_rate": 0.001, |
| "loss": 1.9635, |
| "step": 120100 |
| }, |
| { |
| "epoch": 38.849385908209435, |
| "grad_norm": 1.1982426643371582, |
| "learning_rate": 0.001, |
| "loss": 1.996, |
| "step": 120200 |
| }, |
| { |
| "epoch": 38.88170652876535, |
| "grad_norm": 1.0837290287017822, |
| "learning_rate": 0.001, |
| "loss": 1.9862, |
| "step": 120300 |
| }, |
| { |
| "epoch": 38.914027149321264, |
| "grad_norm": 1.4738332033157349, |
| "learning_rate": 0.001, |
| "loss": 1.9908, |
| "step": 120400 |
| }, |
| { |
| "epoch": 38.94634776987718, |
| "grad_norm": 1.73765230178833, |
| "learning_rate": 0.001, |
| "loss": 1.9781, |
| "step": 120500 |
| }, |
| { |
| "epoch": 38.97866839043309, |
| "grad_norm": 1.286083459854126, |
| "learning_rate": 0.001, |
| "loss": 2.0017, |
| "step": 120600 |
| }, |
| { |
| "epoch": 39.010989010989015, |
| "grad_norm": 1.5349410772323608, |
| "learning_rate": 0.001, |
| "loss": 1.9367, |
| "step": 120700 |
| }, |
| { |
| "epoch": 39.04330963154493, |
| "grad_norm": 1.5377883911132812, |
| "learning_rate": 0.001, |
| "loss": 1.859, |
| "step": 120800 |
| }, |
| { |
| "epoch": 39.075630252100844, |
| "grad_norm": 1.2859725952148438, |
| "learning_rate": 0.001, |
| "loss": 1.8716, |
| "step": 120900 |
| }, |
| { |
| "epoch": 39.10795087265676, |
| "grad_norm": 1.770250916481018, |
| "learning_rate": 0.001, |
| "loss": 1.8772, |
| "step": 121000 |
| }, |
| { |
| "epoch": 39.14027149321267, |
| "grad_norm": 1.3946150541305542, |
| "learning_rate": 0.001, |
| "loss": 1.8759, |
| "step": 121100 |
| }, |
| { |
| "epoch": 39.17259211376859, |
| "grad_norm": 1.2503063678741455, |
| "learning_rate": 0.001, |
| "loss": 1.8674, |
| "step": 121200 |
| }, |
| { |
| "epoch": 39.2049127343245, |
| "grad_norm": 1.0887434482574463, |
| "learning_rate": 0.001, |
| "loss": 1.8681, |
| "step": 121300 |
| }, |
| { |
| "epoch": 39.237233354880416, |
| "grad_norm": 1.2380496263504028, |
| "learning_rate": 0.001, |
| "loss": 1.9177, |
| "step": 121400 |
| }, |
| { |
| "epoch": 39.26955397543633, |
| "grad_norm": 1.4318231344223022, |
| "learning_rate": 0.001, |
| "loss": 1.9009, |
| "step": 121500 |
| }, |
| { |
| "epoch": 39.301874595992246, |
| "grad_norm": 1.4162850379943848, |
| "learning_rate": 0.001, |
| "loss": 1.8917, |
| "step": 121600 |
| }, |
| { |
| "epoch": 39.33419521654816, |
| "grad_norm": 1.4503649473190308, |
| "learning_rate": 0.001, |
| "loss": 1.9165, |
| "step": 121700 |
| }, |
| { |
| "epoch": 39.366515837104075, |
| "grad_norm": 1.4559428691864014, |
| "learning_rate": 0.001, |
| "loss": 1.9064, |
| "step": 121800 |
| }, |
| { |
| "epoch": 39.39883645765999, |
| "grad_norm": 1.3657339811325073, |
| "learning_rate": 0.001, |
| "loss": 1.8925, |
| "step": 121900 |
| }, |
| { |
| "epoch": 39.431157078215904, |
| "grad_norm": 1.1839957237243652, |
| "learning_rate": 0.001, |
| "loss": 1.9145, |
| "step": 122000 |
| }, |
| { |
| "epoch": 39.46347769877182, |
| "grad_norm": 1.3001296520233154, |
| "learning_rate": 0.001, |
| "loss": 1.9223, |
| "step": 122100 |
| }, |
| { |
| "epoch": 39.49579831932773, |
| "grad_norm": 1.4840679168701172, |
| "learning_rate": 0.001, |
| "loss": 1.9204, |
| "step": 122200 |
| }, |
| { |
| "epoch": 39.52811893988365, |
| "grad_norm": 1.4856712818145752, |
| "learning_rate": 0.001, |
| "loss": 1.935, |
| "step": 122300 |
| }, |
| { |
| "epoch": 39.56043956043956, |
| "grad_norm": 1.253468632698059, |
| "learning_rate": 0.001, |
| "loss": 1.9202, |
| "step": 122400 |
| }, |
| { |
| "epoch": 39.59276018099548, |
| "grad_norm": 1.2646362781524658, |
| "learning_rate": 0.001, |
| "loss": 1.9373, |
| "step": 122500 |
| }, |
| { |
| "epoch": 39.62508080155139, |
| "grad_norm": 1.436712622642517, |
| "learning_rate": 0.001, |
| "loss": 1.9379, |
| "step": 122600 |
| }, |
| { |
| "epoch": 39.657401422107306, |
| "grad_norm": 1.2675755023956299, |
| "learning_rate": 0.001, |
| "loss": 1.9559, |
| "step": 122700 |
| }, |
| { |
| "epoch": 39.68972204266322, |
| "grad_norm": 1.567535400390625, |
| "learning_rate": 0.001, |
| "loss": 1.9614, |
| "step": 122800 |
| }, |
| { |
| "epoch": 39.722042663219135, |
| "grad_norm": 1.2379465103149414, |
| "learning_rate": 0.001, |
| "loss": 1.9453, |
| "step": 122900 |
| }, |
| { |
| "epoch": 39.75436328377505, |
| "grad_norm": 1.4940135478973389, |
| "learning_rate": 0.001, |
| "loss": 1.9551, |
| "step": 123000 |
| }, |
| { |
| "epoch": 39.786683904330964, |
| "grad_norm": 1.5482773780822754, |
| "learning_rate": 0.001, |
| "loss": 1.9755, |
| "step": 123100 |
| }, |
| { |
| "epoch": 39.81900452488688, |
| "grad_norm": 1.1395546197891235, |
| "learning_rate": 0.001, |
| "loss": 1.9733, |
| "step": 123200 |
| }, |
| { |
| "epoch": 39.85132514544279, |
| "grad_norm": 2.028623342514038, |
| "learning_rate": 0.001, |
| "loss": 1.9566, |
| "step": 123300 |
| }, |
| { |
| "epoch": 39.88364576599871, |
| "grad_norm": 1.3051038980484009, |
| "learning_rate": 0.001, |
| "loss": 1.9707, |
| "step": 123400 |
| }, |
| { |
| "epoch": 39.91596638655462, |
| "grad_norm": 1.2123631238937378, |
| "learning_rate": 0.001, |
| "loss": 1.972, |
| "step": 123500 |
| }, |
| { |
| "epoch": 39.94828700711054, |
| "grad_norm": 1.4840129613876343, |
| "learning_rate": 0.001, |
| "loss": 1.9554, |
| "step": 123600 |
| }, |
| { |
| "epoch": 39.98060762766645, |
| "grad_norm": 1.2305253744125366, |
| "learning_rate": 0.001, |
| "loss": 1.9722, |
| "step": 123700 |
| }, |
| { |
| "epoch": 40.012928248222366, |
| "grad_norm": 1.2095211744308472, |
| "learning_rate": 0.001, |
| "loss": 1.9293, |
| "step": 123800 |
| }, |
| { |
| "epoch": 40.04524886877828, |
| "grad_norm": 1.7045320272445679, |
| "learning_rate": 0.001, |
| "loss": 1.8624, |
| "step": 123900 |
| }, |
| { |
| "epoch": 40.077569489334195, |
| "grad_norm": 1.7265287637710571, |
| "learning_rate": 0.001, |
| "loss": 1.8376, |
| "step": 124000 |
| }, |
| { |
| "epoch": 40.10989010989011, |
| "grad_norm": 1.1483691930770874, |
| "learning_rate": 0.001, |
| "loss": 1.8393, |
| "step": 124100 |
| }, |
| { |
| "epoch": 40.142210730446024, |
| "grad_norm": 1.6978778839111328, |
| "learning_rate": 0.001, |
| "loss": 1.8468, |
| "step": 124200 |
| }, |
| { |
| "epoch": 40.17453135100194, |
| "grad_norm": 1.343893051147461, |
| "learning_rate": 0.001, |
| "loss": 1.8629, |
| "step": 124300 |
| }, |
| { |
| "epoch": 40.20685197155785, |
| "grad_norm": 1.4871058464050293, |
| "learning_rate": 0.001, |
| "loss": 1.8865, |
| "step": 124400 |
| }, |
| { |
| "epoch": 40.23917259211377, |
| "grad_norm": 1.637841820716858, |
| "learning_rate": 0.001, |
| "loss": 1.8659, |
| "step": 124500 |
| }, |
| { |
| "epoch": 40.27149321266968, |
| "grad_norm": 1.8821446895599365, |
| "learning_rate": 0.001, |
| "loss": 1.8728, |
| "step": 124600 |
| }, |
| { |
| "epoch": 40.3038138332256, |
| "grad_norm": 1.7137219905853271, |
| "learning_rate": 0.001, |
| "loss": 1.8914, |
| "step": 124700 |
| }, |
| { |
| "epoch": 40.33613445378151, |
| "grad_norm": 1.8872164487838745, |
| "learning_rate": 0.001, |
| "loss": 1.8869, |
| "step": 124800 |
| }, |
| { |
| "epoch": 40.368455074337426, |
| "grad_norm": 1.2478173971176147, |
| "learning_rate": 0.001, |
| "loss": 1.9071, |
| "step": 124900 |
| }, |
| { |
| "epoch": 40.40077569489334, |
| "grad_norm": 1.838136911392212, |
| "learning_rate": 0.001, |
| "loss": 1.9017, |
| "step": 125000 |
| }, |
| { |
| "epoch": 40.433096315449255, |
| "grad_norm": 1.546654462814331, |
| "learning_rate": 0.001, |
| "loss": 1.8874, |
| "step": 125100 |
| }, |
| { |
| "epoch": 40.46541693600517, |
| "grad_norm": 1.3680957555770874, |
| "learning_rate": 0.001, |
| "loss": 1.9164, |
| "step": 125200 |
| }, |
| { |
| "epoch": 40.497737556561084, |
| "grad_norm": 1.4279357194900513, |
| "learning_rate": 0.001, |
| "loss": 1.9252, |
| "step": 125300 |
| }, |
| { |
| "epoch": 40.530058177117, |
| "grad_norm": 1.331580400466919, |
| "learning_rate": 0.001, |
| "loss": 1.915, |
| "step": 125400 |
| }, |
| { |
| "epoch": 40.56237879767291, |
| "grad_norm": 1.5645660161972046, |
| "learning_rate": 0.001, |
| "loss": 1.9173, |
| "step": 125500 |
| }, |
| { |
| "epoch": 40.59469941822883, |
| "grad_norm": 1.7686409950256348, |
| "learning_rate": 0.001, |
| "loss": 1.9249, |
| "step": 125600 |
| }, |
| { |
| "epoch": 40.62702003878474, |
| "grad_norm": 1.4038866758346558, |
| "learning_rate": 0.001, |
| "loss": 1.9211, |
| "step": 125700 |
| }, |
| { |
| "epoch": 40.65934065934066, |
| "grad_norm": 1.7020624876022339, |
| "learning_rate": 0.001, |
| "loss": 1.9155, |
| "step": 125800 |
| }, |
| { |
| "epoch": 40.69166127989657, |
| "grad_norm": 1.450563907623291, |
| "learning_rate": 0.001, |
| "loss": 1.9234, |
| "step": 125900 |
| }, |
| { |
| "epoch": 40.723981900452486, |
| "grad_norm": 1.3181536197662354, |
| "learning_rate": 0.001, |
| "loss": 1.9412, |
| "step": 126000 |
| }, |
| { |
| "epoch": 40.7563025210084, |
| "grad_norm": 1.0498440265655518, |
| "learning_rate": 0.001, |
| "loss": 1.9281, |
| "step": 126100 |
| }, |
| { |
| "epoch": 40.788623141564315, |
| "grad_norm": 1.4465446472167969, |
| "learning_rate": 0.001, |
| "loss": 1.9506, |
| "step": 126200 |
| }, |
| { |
| "epoch": 40.82094376212023, |
| "grad_norm": 1.2336151599884033, |
| "learning_rate": 0.001, |
| "loss": 1.9491, |
| "step": 126300 |
| }, |
| { |
| "epoch": 40.853264382676144, |
| "grad_norm": 1.3242679834365845, |
| "learning_rate": 0.001, |
| "loss": 1.9514, |
| "step": 126400 |
| }, |
| { |
| "epoch": 40.88558500323206, |
| "grad_norm": 1.2799617052078247, |
| "learning_rate": 0.001, |
| "loss": 1.9405, |
| "step": 126500 |
| }, |
| { |
| "epoch": 40.91790562378797, |
| "grad_norm": 1.2991124391555786, |
| "learning_rate": 0.001, |
| "loss": 1.9466, |
| "step": 126600 |
| }, |
| { |
| "epoch": 40.95022624434389, |
| "grad_norm": 1.4150607585906982, |
| "learning_rate": 0.001, |
| "loss": 1.9604, |
| "step": 126700 |
| }, |
| { |
| "epoch": 40.9825468648998, |
| "grad_norm": 1.208999514579773, |
| "learning_rate": 0.001, |
| "loss": 1.9608, |
| "step": 126800 |
| }, |
| { |
| "epoch": 41.014867485455724, |
| "grad_norm": 1.668093204498291, |
| "learning_rate": 0.001, |
| "loss": 1.8746, |
| "step": 126900 |
| }, |
| { |
| "epoch": 41.04718810601164, |
| "grad_norm": 1.4752800464630127, |
| "learning_rate": 0.001, |
| "loss": 1.8344, |
| "step": 127000 |
| }, |
| { |
| "epoch": 41.07950872656755, |
| "grad_norm": 2.1280136108398438, |
| "learning_rate": 0.001, |
| "loss": 1.8427, |
| "step": 127100 |
| }, |
| { |
| "epoch": 41.11182934712347, |
| "grad_norm": 1.9451837539672852, |
| "learning_rate": 0.001, |
| "loss": 1.8396, |
| "step": 127200 |
| }, |
| { |
| "epoch": 41.14414996767938, |
| "grad_norm": 1.3411965370178223, |
| "learning_rate": 0.001, |
| "loss": 1.8439, |
| "step": 127300 |
| }, |
| { |
| "epoch": 41.1764705882353, |
| "grad_norm": 1.758414387702942, |
| "learning_rate": 0.001, |
| "loss": 1.8294, |
| "step": 127400 |
| }, |
| { |
| "epoch": 41.20879120879121, |
| "grad_norm": 1.796096682548523, |
| "learning_rate": 0.001, |
| "loss": 1.8607, |
| "step": 127500 |
| }, |
| { |
| "epoch": 41.241111829347126, |
| "grad_norm": 1.3038780689239502, |
| "learning_rate": 0.001, |
| "loss": 1.8796, |
| "step": 127600 |
| }, |
| { |
| "epoch": 41.27343244990304, |
| "grad_norm": 1.5630769729614258, |
| "learning_rate": 0.001, |
| "loss": 1.8847, |
| "step": 127700 |
| }, |
| { |
| "epoch": 41.305753070458955, |
| "grad_norm": 1.3789194822311401, |
| "learning_rate": 0.001, |
| "loss": 1.8714, |
| "step": 127800 |
| }, |
| { |
| "epoch": 41.33807369101487, |
| "grad_norm": 1.3722786903381348, |
| "learning_rate": 0.001, |
| "loss": 1.8615, |
| "step": 127900 |
| }, |
| { |
| "epoch": 41.370394311570784, |
| "grad_norm": 2.0524232387542725, |
| "learning_rate": 0.001, |
| "loss": 1.9055, |
| "step": 128000 |
| }, |
| { |
| "epoch": 41.4027149321267, |
| "grad_norm": 1.6414809226989746, |
| "learning_rate": 0.001, |
| "loss": 1.8853, |
| "step": 128100 |
| }, |
| { |
| "epoch": 41.43503555268261, |
| "grad_norm": 1.334133267402649, |
| "learning_rate": 0.001, |
| "loss": 1.874, |
| "step": 128200 |
| }, |
| { |
| "epoch": 41.46735617323853, |
| "grad_norm": 1.618760585784912, |
| "learning_rate": 0.001, |
| "loss": 1.8997, |
| "step": 128300 |
| }, |
| { |
| "epoch": 41.49967679379444, |
| "grad_norm": 1.7455990314483643, |
| "learning_rate": 0.001, |
| "loss": 1.8986, |
| "step": 128400 |
| }, |
| { |
| "epoch": 41.53199741435036, |
| "grad_norm": 2.0014233589172363, |
| "learning_rate": 0.001, |
| "loss": 1.8999, |
| "step": 128500 |
| }, |
| { |
| "epoch": 41.56431803490627, |
| "grad_norm": 1.647352695465088, |
| "learning_rate": 0.001, |
| "loss": 1.895, |
| "step": 128600 |
| }, |
| { |
| "epoch": 41.596638655462186, |
| "grad_norm": 1.543352723121643, |
| "learning_rate": 0.001, |
| "loss": 1.8822, |
| "step": 128700 |
| }, |
| { |
| "epoch": 41.6289592760181, |
| "grad_norm": 1.7632330656051636, |
| "learning_rate": 0.001, |
| "loss": 1.8975, |
| "step": 128800 |
| }, |
| { |
| "epoch": 41.661279896574015, |
| "grad_norm": 2.0121119022369385, |
| "learning_rate": 0.001, |
| "loss": 1.906, |
| "step": 128900 |
| }, |
| { |
| "epoch": 41.69360051712993, |
| "grad_norm": 1.5535619258880615, |
| "learning_rate": 0.001, |
| "loss": 1.9233, |
| "step": 129000 |
| }, |
| { |
| "epoch": 41.725921137685845, |
| "grad_norm": 1.4161769151687622, |
| "learning_rate": 0.001, |
| "loss": 1.9226, |
| "step": 129100 |
| }, |
| { |
| "epoch": 41.75824175824176, |
| "grad_norm": 1.93502676486969, |
| "learning_rate": 0.001, |
| "loss": 1.9377, |
| "step": 129200 |
| }, |
| { |
| "epoch": 41.790562378797674, |
| "grad_norm": 1.6511423587799072, |
| "learning_rate": 0.001, |
| "loss": 1.9139, |
| "step": 129300 |
| }, |
| { |
| "epoch": 41.82288299935359, |
| "grad_norm": 1.5430395603179932, |
| "learning_rate": 0.001, |
| "loss": 1.9264, |
| "step": 129400 |
| }, |
| { |
| "epoch": 41.8552036199095, |
| "grad_norm": 1.556210994720459, |
| "learning_rate": 0.001, |
| "loss": 1.9407, |
| "step": 129500 |
| }, |
| { |
| "epoch": 41.88752424046542, |
| "grad_norm": 2.07692551612854, |
| "learning_rate": 0.001, |
| "loss": 1.9227, |
| "step": 129600 |
| }, |
| { |
| "epoch": 41.91984486102133, |
| "grad_norm": 1.8000415563583374, |
| "learning_rate": 0.001, |
| "loss": 1.9491, |
| "step": 129700 |
| }, |
| { |
| "epoch": 41.95216548157725, |
| "grad_norm": 1.6233563423156738, |
| "learning_rate": 0.001, |
| "loss": 1.9245, |
| "step": 129800 |
| }, |
| { |
| "epoch": 41.98448610213316, |
| "grad_norm": 1.6756561994552612, |
| "learning_rate": 0.001, |
| "loss": 1.9551, |
| "step": 129900 |
| }, |
| { |
| "epoch": 42.016806722689076, |
| "grad_norm": 1.9280065298080444, |
| "learning_rate": 0.001, |
| "loss": 1.8467, |
| "step": 130000 |
| }, |
| { |
| "epoch": 42.04912734324499, |
| "grad_norm": 1.9981783628463745, |
| "learning_rate": 0.001, |
| "loss": 1.8106, |
| "step": 130100 |
| }, |
| { |
| "epoch": 42.081447963800905, |
| "grad_norm": 2.082932949066162, |
| "learning_rate": 0.001, |
| "loss": 1.8251, |
| "step": 130200 |
| }, |
| { |
| "epoch": 42.11376858435682, |
| "grad_norm": 2.3820343017578125, |
| "learning_rate": 0.001, |
| "loss": 1.8222, |
| "step": 130300 |
| }, |
| { |
| "epoch": 42.146089204912734, |
| "grad_norm": 1.403403401374817, |
| "learning_rate": 0.001, |
| "loss": 1.8253, |
| "step": 130400 |
| }, |
| { |
| "epoch": 42.17840982546865, |
| "grad_norm": 2.2364022731781006, |
| "learning_rate": 0.001, |
| "loss": 1.8458, |
| "step": 130500 |
| }, |
| { |
| "epoch": 42.21073044602456, |
| "grad_norm": 2.2380168437957764, |
| "learning_rate": 0.001, |
| "loss": 1.8439, |
| "step": 130600 |
| }, |
| { |
| "epoch": 42.24305106658048, |
| "grad_norm": 1.7103081941604614, |
| "learning_rate": 0.001, |
| "loss": 1.8403, |
| "step": 130700 |
| }, |
| { |
| "epoch": 42.27537168713639, |
| "grad_norm": 1.7879369258880615, |
| "learning_rate": 0.001, |
| "loss": 1.8448, |
| "step": 130800 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 2.4792017936706543, |
| "learning_rate": 0.001, |
| "loss": 1.8527, |
| "step": 130900 |
| }, |
| { |
| "epoch": 42.34001292824822, |
| "grad_norm": 2.2171385288238525, |
| "learning_rate": 0.001, |
| "loss": 1.8467, |
| "step": 131000 |
| }, |
| { |
| "epoch": 42.372333548804136, |
| "grad_norm": 1.9090009927749634, |
| "learning_rate": 0.001, |
| "loss": 1.8737, |
| "step": 131100 |
| }, |
| { |
| "epoch": 42.40465416936005, |
| "grad_norm": 1.862734079360962, |
| "learning_rate": 0.001, |
| "loss": 1.8801, |
| "step": 131200 |
| }, |
| { |
| "epoch": 42.436974789915965, |
| "grad_norm": 1.944027066230774, |
| "learning_rate": 0.001, |
| "loss": 1.8575, |
| "step": 131300 |
| }, |
| { |
| "epoch": 42.46929541047188, |
| "grad_norm": 1.8651403188705444, |
| "learning_rate": 0.001, |
| "loss": 1.8745, |
| "step": 131400 |
| }, |
| { |
| "epoch": 42.501616031027794, |
| "grad_norm": 1.9831877946853638, |
| "learning_rate": 0.001, |
| "loss": 1.8786, |
| "step": 131500 |
| }, |
| { |
| "epoch": 42.53393665158371, |
| "grad_norm": 1.9882594347000122, |
| "learning_rate": 0.001, |
| "loss": 1.872, |
| "step": 131600 |
| }, |
| { |
| "epoch": 42.56625727213962, |
| "grad_norm": 2.680168867111206, |
| "learning_rate": 0.001, |
| "loss": 1.8877, |
| "step": 131700 |
| }, |
| { |
| "epoch": 42.59857789269554, |
| "grad_norm": 1.6203826665878296, |
| "learning_rate": 0.001, |
| "loss": 1.9111, |
| "step": 131800 |
| }, |
| { |
| "epoch": 42.63089851325145, |
| "grad_norm": 1.6531736850738525, |
| "learning_rate": 0.001, |
| "loss": 1.8921, |
| "step": 131900 |
| }, |
| { |
| "epoch": 42.66321913380737, |
| "grad_norm": 1.7691742181777954, |
| "learning_rate": 0.001, |
| "loss": 1.9, |
| "step": 132000 |
| }, |
| { |
| "epoch": 42.69553975436328, |
| "grad_norm": 2.291424036026001, |
| "learning_rate": 0.001, |
| "loss": 1.8941, |
| "step": 132100 |
| }, |
| { |
| "epoch": 42.727860374919196, |
| "grad_norm": 1.9580634832382202, |
| "learning_rate": 0.001, |
| "loss": 1.9028, |
| "step": 132200 |
| }, |
| { |
| "epoch": 42.76018099547511, |
| "grad_norm": 1.9733229875564575, |
| "learning_rate": 0.001, |
| "loss": 1.9039, |
| "step": 132300 |
| }, |
| { |
| "epoch": 42.792501616031025, |
| "grad_norm": 1.861606478691101, |
| "learning_rate": 0.001, |
| "loss": 1.9105, |
| "step": 132400 |
| }, |
| { |
| "epoch": 42.82482223658694, |
| "grad_norm": 1.7530721426010132, |
| "learning_rate": 0.001, |
| "loss": 1.916, |
| "step": 132500 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 2.0133233070373535, |
| "learning_rate": 0.001, |
| "loss": 1.913, |
| "step": 132600 |
| }, |
| { |
| "epoch": 42.88946347769877, |
| "grad_norm": 1.8621476888656616, |
| "learning_rate": 0.001, |
| "loss": 1.9032, |
| "step": 132700 |
| }, |
| { |
| "epoch": 42.92178409825468, |
| "grad_norm": 2.1451456546783447, |
| "learning_rate": 0.001, |
| "loss": 1.9438, |
| "step": 132800 |
| }, |
| { |
| "epoch": 42.9541047188106, |
| "grad_norm": 1.651073694229126, |
| "learning_rate": 0.001, |
| "loss": 1.9355, |
| "step": 132900 |
| }, |
| { |
| "epoch": 42.98642533936652, |
| "grad_norm": 1.9844690561294556, |
| "learning_rate": 0.001, |
| "loss": 1.9233, |
| "step": 133000 |
| }, |
| { |
| "epoch": 43.018745959922434, |
| "grad_norm": 1.3748688697814941, |
| "learning_rate": 0.001, |
| "loss": 1.8581, |
| "step": 133100 |
| }, |
| { |
| "epoch": 43.05106658047835, |
| "grad_norm": 1.2402000427246094, |
| "learning_rate": 0.001, |
| "loss": 1.787, |
| "step": 133200 |
| }, |
| { |
| "epoch": 43.08338720103426, |
| "grad_norm": 1.3622288703918457, |
| "learning_rate": 0.001, |
| "loss": 1.7909, |
| "step": 133300 |
| }, |
| { |
| "epoch": 43.11570782159018, |
| "grad_norm": 1.5441625118255615, |
| "learning_rate": 0.001, |
| "loss": 1.8092, |
| "step": 133400 |
| }, |
| { |
| "epoch": 43.14802844214609, |
| "grad_norm": 1.443248987197876, |
| "learning_rate": 0.001, |
| "loss": 1.8175, |
| "step": 133500 |
| }, |
| { |
| "epoch": 43.18034906270201, |
| "grad_norm": 1.303268551826477, |
| "learning_rate": 0.001, |
| "loss": 1.839, |
| "step": 133600 |
| }, |
| { |
| "epoch": 43.21266968325792, |
| "grad_norm": 1.8648037910461426, |
| "learning_rate": 0.001, |
| "loss": 1.8477, |
| "step": 133700 |
| }, |
| { |
| "epoch": 43.244990303813836, |
| "grad_norm": 1.5793299674987793, |
| "learning_rate": 0.001, |
| "loss": 1.8161, |
| "step": 133800 |
| }, |
| { |
| "epoch": 43.27731092436975, |
| "grad_norm": 1.2750500440597534, |
| "learning_rate": 0.001, |
| "loss": 1.8347, |
| "step": 133900 |
| }, |
| { |
| "epoch": 43.309631544925665, |
| "grad_norm": 1.87288236618042, |
| "learning_rate": 0.001, |
| "loss": 1.851, |
| "step": 134000 |
| }, |
| { |
| "epoch": 43.34195216548158, |
| "grad_norm": 1.6583763360977173, |
| "learning_rate": 0.001, |
| "loss": 1.8523, |
| "step": 134100 |
| }, |
| { |
| "epoch": 43.374272786037494, |
| "grad_norm": 1.6558666229248047, |
| "learning_rate": 0.001, |
| "loss": 1.8563, |
| "step": 134200 |
| }, |
| { |
| "epoch": 43.40659340659341, |
| "grad_norm": 1.6735751628875732, |
| "learning_rate": 0.001, |
| "loss": 1.8612, |
| "step": 134300 |
| }, |
| { |
| "epoch": 43.43891402714932, |
| "grad_norm": 1.3858246803283691, |
| "learning_rate": 0.001, |
| "loss": 1.8426, |
| "step": 134400 |
| }, |
| { |
| "epoch": 43.47123464770524, |
| "grad_norm": 1.8875099420547485, |
| "learning_rate": 0.001, |
| "loss": 1.8739, |
| "step": 134500 |
| }, |
| { |
| "epoch": 43.50355526826115, |
| "grad_norm": 1.4078848361968994, |
| "learning_rate": 0.001, |
| "loss": 1.8585, |
| "step": 134600 |
| }, |
| { |
| "epoch": 43.53587588881707, |
| "grad_norm": 1.3911489248275757, |
| "learning_rate": 0.001, |
| "loss": 1.8548, |
| "step": 134700 |
| }, |
| { |
| "epoch": 43.56819650937298, |
| "grad_norm": 1.5766676664352417, |
| "learning_rate": 0.001, |
| "loss": 1.8771, |
| "step": 134800 |
| }, |
| { |
| "epoch": 43.600517129928896, |
| "grad_norm": 1.8566309213638306, |
| "learning_rate": 0.001, |
| "loss": 1.8583, |
| "step": 134900 |
| }, |
| { |
| "epoch": 43.63283775048481, |
| "grad_norm": 1.2162312269210815, |
| "learning_rate": 0.001, |
| "loss": 1.8837, |
| "step": 135000 |
| }, |
| { |
| "epoch": 43.665158371040725, |
| "grad_norm": 1.2028443813323975, |
| "learning_rate": 0.001, |
| "loss": 1.8684, |
| "step": 135100 |
| }, |
| { |
| "epoch": 43.69747899159664, |
| "grad_norm": 1.4986368417739868, |
| "learning_rate": 0.001, |
| "loss": 1.888, |
| "step": 135200 |
| }, |
| { |
| "epoch": 43.729799612152554, |
| "grad_norm": 1.400987148284912, |
| "learning_rate": 0.001, |
| "loss": 1.8848, |
| "step": 135300 |
| }, |
| { |
| "epoch": 43.76212023270847, |
| "grad_norm": 1.536422848701477, |
| "learning_rate": 0.001, |
| "loss": 1.8994, |
| "step": 135400 |
| }, |
| { |
| "epoch": 43.79444085326438, |
| "grad_norm": 1.6821508407592773, |
| "learning_rate": 0.001, |
| "loss": 1.9077, |
| "step": 135500 |
| }, |
| { |
| "epoch": 43.8267614738203, |
| "grad_norm": 1.1288377046585083, |
| "learning_rate": 0.001, |
| "loss": 1.8833, |
| "step": 135600 |
| }, |
| { |
| "epoch": 43.85908209437621, |
| "grad_norm": 1.2076668739318848, |
| "learning_rate": 0.001, |
| "loss": 1.8983, |
| "step": 135700 |
| }, |
| { |
| "epoch": 43.89140271493213, |
| "grad_norm": 1.4700038433074951, |
| "learning_rate": 0.001, |
| "loss": 1.906, |
| "step": 135800 |
| }, |
| { |
| "epoch": 43.92372333548804, |
| "grad_norm": 1.7205662727355957, |
| "learning_rate": 0.001, |
| "loss": 1.9095, |
| "step": 135900 |
| }, |
| { |
| "epoch": 43.956043956043956, |
| "grad_norm": 1.2482390403747559, |
| "learning_rate": 0.001, |
| "loss": 1.8971, |
| "step": 136000 |
| }, |
| { |
| "epoch": 43.98836457659987, |
| "grad_norm": 1.927675724029541, |
| "learning_rate": 0.001, |
| "loss": 1.9145, |
| "step": 136100 |
| }, |
| { |
| "epoch": 44.020685197155785, |
| "grad_norm": 1.3814749717712402, |
| "learning_rate": 0.001, |
| "loss": 1.8382, |
| "step": 136200 |
| }, |
| { |
| "epoch": 44.0530058177117, |
| "grad_norm": 1.3042851686477661, |
| "learning_rate": 0.001, |
| "loss": 1.796, |
| "step": 136300 |
| }, |
| { |
| "epoch": 44.085326438267614, |
| "grad_norm": 1.4995719194412231, |
| "learning_rate": 0.001, |
| "loss": 1.7771, |
| "step": 136400 |
| }, |
| { |
| "epoch": 44.11764705882353, |
| "grad_norm": 1.3295960426330566, |
| "learning_rate": 0.001, |
| "loss": 1.807, |
| "step": 136500 |
| }, |
| { |
| "epoch": 44.14996767937944, |
| "grad_norm": 1.2102105617523193, |
| "learning_rate": 0.001, |
| "loss": 1.7994, |
| "step": 136600 |
| }, |
| { |
| "epoch": 44.18228829993536, |
| "grad_norm": 1.1424363851547241, |
| "learning_rate": 0.001, |
| "loss": 1.8154, |
| "step": 136700 |
| }, |
| { |
| "epoch": 44.21460892049127, |
| "grad_norm": 1.3782585859298706, |
| "learning_rate": 0.001, |
| "loss": 1.8056, |
| "step": 136800 |
| }, |
| { |
| "epoch": 44.24692954104719, |
| "grad_norm": 1.4626226425170898, |
| "learning_rate": 0.001, |
| "loss": 1.8078, |
| "step": 136900 |
| }, |
| { |
| "epoch": 44.2792501616031, |
| "grad_norm": 1.2396538257598877, |
| "learning_rate": 0.001, |
| "loss": 1.8336, |
| "step": 137000 |
| }, |
| { |
| "epoch": 44.311570782159016, |
| "grad_norm": 1.3221837282180786, |
| "learning_rate": 0.001, |
| "loss": 1.8088, |
| "step": 137100 |
| }, |
| { |
| "epoch": 44.34389140271493, |
| "grad_norm": 1.2316101789474487, |
| "learning_rate": 0.001, |
| "loss": 1.8322, |
| "step": 137200 |
| }, |
| { |
| "epoch": 44.376212023270845, |
| "grad_norm": 1.264435052871704, |
| "learning_rate": 0.001, |
| "loss": 1.8232, |
| "step": 137300 |
| }, |
| { |
| "epoch": 44.40853264382676, |
| "grad_norm": 1.1061835289001465, |
| "learning_rate": 0.001, |
| "loss": 1.8281, |
| "step": 137400 |
| }, |
| { |
| "epoch": 44.440853264382675, |
| "grad_norm": 1.1366873979568481, |
| "learning_rate": 0.001, |
| "loss": 1.8283, |
| "step": 137500 |
| }, |
| { |
| "epoch": 44.47317388493859, |
| "grad_norm": 1.7174246311187744, |
| "learning_rate": 0.001, |
| "loss": 1.8599, |
| "step": 137600 |
| }, |
| { |
| "epoch": 44.505494505494504, |
| "grad_norm": 1.4456156492233276, |
| "learning_rate": 0.001, |
| "loss": 1.8474, |
| "step": 137700 |
| }, |
| { |
| "epoch": 44.53781512605042, |
| "grad_norm": 0.972943902015686, |
| "learning_rate": 0.001, |
| "loss": 1.8544, |
| "step": 137800 |
| }, |
| { |
| "epoch": 44.57013574660633, |
| "grad_norm": 1.6857820749282837, |
| "learning_rate": 0.001, |
| "loss": 1.8733, |
| "step": 137900 |
| }, |
| { |
| "epoch": 44.60245636716225, |
| "grad_norm": 1.701591968536377, |
| "learning_rate": 0.001, |
| "loss": 1.8474, |
| "step": 138000 |
| }, |
| { |
| "epoch": 44.63477698771816, |
| "grad_norm": 1.1282321214675903, |
| "learning_rate": 0.001, |
| "loss": 1.8592, |
| "step": 138100 |
| }, |
| { |
| "epoch": 44.66709760827408, |
| "grad_norm": 1.5330438613891602, |
| "learning_rate": 0.001, |
| "loss": 1.8709, |
| "step": 138200 |
| }, |
| { |
| "epoch": 44.69941822882999, |
| "grad_norm": 1.1971231698989868, |
| "learning_rate": 0.001, |
| "loss": 1.8878, |
| "step": 138300 |
| }, |
| { |
| "epoch": 44.731738849385906, |
| "grad_norm": 1.7220470905303955, |
| "learning_rate": 0.001, |
| "loss": 1.8817, |
| "step": 138400 |
| }, |
| { |
| "epoch": 44.76405946994182, |
| "grad_norm": 1.196541666984558, |
| "learning_rate": 0.001, |
| "loss": 1.8801, |
| "step": 138500 |
| }, |
| { |
| "epoch": 44.796380090497735, |
| "grad_norm": 1.4516090154647827, |
| "learning_rate": 0.001, |
| "loss": 1.8725, |
| "step": 138600 |
| }, |
| { |
| "epoch": 44.82870071105365, |
| "grad_norm": 1.449439287185669, |
| "learning_rate": 0.001, |
| "loss": 1.8915, |
| "step": 138700 |
| }, |
| { |
| "epoch": 44.861021331609564, |
| "grad_norm": 1.433020830154419, |
| "learning_rate": 0.001, |
| "loss": 1.8824, |
| "step": 138800 |
| }, |
| { |
| "epoch": 44.89334195216548, |
| "grad_norm": 1.412376046180725, |
| "learning_rate": 0.001, |
| "loss": 1.8862, |
| "step": 138900 |
| }, |
| { |
| "epoch": 44.92566257272139, |
| "grad_norm": 1.0682293176651, |
| "learning_rate": 0.001, |
| "loss": 1.892, |
| "step": 139000 |
| }, |
| { |
| "epoch": 44.95798319327731, |
| "grad_norm": 1.2839839458465576, |
| "learning_rate": 0.001, |
| "loss": 1.9081, |
| "step": 139100 |
| }, |
| { |
| "epoch": 44.99030381383322, |
| "grad_norm": 1.5696237087249756, |
| "learning_rate": 0.001, |
| "loss": 1.8943, |
| "step": 139200 |
| }, |
| { |
| "epoch": 45.022624434389144, |
| "grad_norm": 1.4640036821365356, |
| "learning_rate": 0.001, |
| "loss": 1.8173, |
| "step": 139300 |
| }, |
| { |
| "epoch": 45.05494505494506, |
| "grad_norm": 1.5870167016983032, |
| "learning_rate": 0.001, |
| "loss": 1.7714, |
| "step": 139400 |
| }, |
| { |
| "epoch": 45.08726567550097, |
| "grad_norm": 1.6109189987182617, |
| "learning_rate": 0.001, |
| "loss": 1.7732, |
| "step": 139500 |
| }, |
| { |
| "epoch": 45.11958629605689, |
| "grad_norm": 1.1997172832489014, |
| "learning_rate": 0.001, |
| "loss": 1.787, |
| "step": 139600 |
| }, |
| { |
| "epoch": 45.1519069166128, |
| "grad_norm": 1.2339918613433838, |
| "learning_rate": 0.001, |
| "loss": 1.7585, |
| "step": 139700 |
| }, |
| { |
| "epoch": 45.18422753716872, |
| "grad_norm": 1.179796814918518, |
| "learning_rate": 0.001, |
| "loss": 1.7848, |
| "step": 139800 |
| }, |
| { |
| "epoch": 45.21654815772463, |
| "grad_norm": 1.5013426542282104, |
| "learning_rate": 0.001, |
| "loss": 1.7957, |
| "step": 139900 |
| }, |
| { |
| "epoch": 45.248868778280546, |
| "grad_norm": 1.3376390933990479, |
| "learning_rate": 0.001, |
| "loss": 1.8045, |
| "step": 140000 |
| }, |
| { |
| "epoch": 45.28118939883646, |
| "grad_norm": 1.2788093090057373, |
| "learning_rate": 0.001, |
| "loss": 1.7864, |
| "step": 140100 |
| }, |
| { |
| "epoch": 45.313510019392375, |
| "grad_norm": 1.4019917249679565, |
| "learning_rate": 0.001, |
| "loss": 1.8004, |
| "step": 140200 |
| }, |
| { |
| "epoch": 45.34583063994829, |
| "grad_norm": 1.2221229076385498, |
| "learning_rate": 0.001, |
| "loss": 1.8066, |
| "step": 140300 |
| }, |
| { |
| "epoch": 45.378151260504204, |
| "grad_norm": 1.4707577228546143, |
| "learning_rate": 0.001, |
| "loss": 1.8257, |
| "step": 140400 |
| }, |
| { |
| "epoch": 45.41047188106012, |
| "grad_norm": 1.2390767335891724, |
| "learning_rate": 0.001, |
| "loss": 1.8273, |
| "step": 140500 |
| }, |
| { |
| "epoch": 45.44279250161603, |
| "grad_norm": 1.0136756896972656, |
| "learning_rate": 0.001, |
| "loss": 1.8305, |
| "step": 140600 |
| }, |
| { |
| "epoch": 45.47511312217195, |
| "grad_norm": 1.0152579545974731, |
| "learning_rate": 0.001, |
| "loss": 1.8296, |
| "step": 140700 |
| }, |
| { |
| "epoch": 45.50743374272786, |
| "grad_norm": 1.436432957649231, |
| "learning_rate": 0.001, |
| "loss": 1.8522, |
| "step": 140800 |
| }, |
| { |
| "epoch": 45.53975436328378, |
| "grad_norm": 1.0323009490966797, |
| "learning_rate": 0.001, |
| "loss": 1.8393, |
| "step": 140900 |
| }, |
| { |
| "epoch": 45.57207498383969, |
| "grad_norm": 1.4058171510696411, |
| "learning_rate": 0.001, |
| "loss": 1.8404, |
| "step": 141000 |
| }, |
| { |
| "epoch": 45.604395604395606, |
| "grad_norm": 1.2600674629211426, |
| "learning_rate": 0.001, |
| "loss": 1.8454, |
| "step": 141100 |
| }, |
| { |
| "epoch": 45.63671622495152, |
| "grad_norm": 1.2156469821929932, |
| "learning_rate": 0.001, |
| "loss": 1.8537, |
| "step": 141200 |
| }, |
| { |
| "epoch": 45.669036845507435, |
| "grad_norm": 1.3858039379119873, |
| "learning_rate": 0.001, |
| "loss": 1.8662, |
| "step": 141300 |
| }, |
| { |
| "epoch": 45.70135746606335, |
| "grad_norm": 1.4224052429199219, |
| "learning_rate": 0.001, |
| "loss": 1.8397, |
| "step": 141400 |
| }, |
| { |
| "epoch": 45.733678086619264, |
| "grad_norm": 1.3239479064941406, |
| "learning_rate": 0.001, |
| "loss": 1.8671, |
| "step": 141500 |
| }, |
| { |
| "epoch": 45.76599870717518, |
| "grad_norm": 1.519800066947937, |
| "learning_rate": 0.001, |
| "loss": 1.873, |
| "step": 141600 |
| }, |
| { |
| "epoch": 45.79831932773109, |
| "grad_norm": 1.158959150314331, |
| "learning_rate": 0.001, |
| "loss": 1.8647, |
| "step": 141700 |
| }, |
| { |
| "epoch": 45.83063994828701, |
| "grad_norm": 1.415377140045166, |
| "learning_rate": 0.001, |
| "loss": 1.8647, |
| "step": 141800 |
| }, |
| { |
| "epoch": 45.86296056884292, |
| "grad_norm": 1.214583396911621, |
| "learning_rate": 0.001, |
| "loss": 1.864, |
| "step": 141900 |
| }, |
| { |
| "epoch": 45.89528118939884, |
| "grad_norm": 1.3497668504714966, |
| "learning_rate": 0.001, |
| "loss": 1.8847, |
| "step": 142000 |
| }, |
| { |
| "epoch": 45.92760180995475, |
| "grad_norm": 1.2246413230895996, |
| "learning_rate": 0.001, |
| "loss": 1.899, |
| "step": 142100 |
| }, |
| { |
| "epoch": 45.959922430510666, |
| "grad_norm": 1.4457169771194458, |
| "learning_rate": 0.001, |
| "loss": 1.8664, |
| "step": 142200 |
| }, |
| { |
| "epoch": 45.99224305106658, |
| "grad_norm": 1.2997976541519165, |
| "learning_rate": 0.001, |
| "loss": 1.8957, |
| "step": 142300 |
| }, |
| { |
| "epoch": 46.024563671622495, |
| "grad_norm": 1.4182782173156738, |
| "learning_rate": 0.001, |
| "loss": 1.7877, |
| "step": 142400 |
| }, |
| { |
| "epoch": 46.05688429217841, |
| "grad_norm": 1.2993918657302856, |
| "learning_rate": 0.001, |
| "loss": 1.7651, |
| "step": 142500 |
| }, |
| { |
| "epoch": 46.089204912734324, |
| "grad_norm": 1.2173502445220947, |
| "learning_rate": 0.001, |
| "loss": 1.7561, |
| "step": 142600 |
| }, |
| { |
| "epoch": 46.12152553329024, |
| "grad_norm": 1.4112443923950195, |
| "learning_rate": 0.001, |
| "loss": 1.7623, |
| "step": 142700 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.5435090065002441, |
| "learning_rate": 0.001, |
| "loss": 1.7749, |
| "step": 142800 |
| }, |
| { |
| "epoch": 46.18616677440207, |
| "grad_norm": 1.3198472261428833, |
| "learning_rate": 0.001, |
| "loss": 1.7711, |
| "step": 142900 |
| }, |
| { |
| "epoch": 46.21848739495798, |
| "grad_norm": 1.6420048475265503, |
| "learning_rate": 0.001, |
| "loss": 1.7993, |
| "step": 143000 |
| }, |
| { |
| "epoch": 46.2508080155139, |
| "grad_norm": 1.378956913948059, |
| "learning_rate": 0.001, |
| "loss": 1.7934, |
| "step": 143100 |
| }, |
| { |
| "epoch": 46.28312863606981, |
| "grad_norm": 1.5023648738861084, |
| "learning_rate": 0.001, |
| "loss": 1.7963, |
| "step": 143200 |
| }, |
| { |
| "epoch": 46.315449256625726, |
| "grad_norm": 1.281911849975586, |
| "learning_rate": 0.001, |
| "loss": 1.8086, |
| "step": 143300 |
| }, |
| { |
| "epoch": 46.34776987718164, |
| "grad_norm": 1.1474652290344238, |
| "learning_rate": 0.001, |
| "loss": 1.8174, |
| "step": 143400 |
| }, |
| { |
| "epoch": 46.380090497737555, |
| "grad_norm": 1.4720494747161865, |
| "learning_rate": 0.001, |
| "loss": 1.7957, |
| "step": 143500 |
| }, |
| { |
| "epoch": 46.41241111829347, |
| "grad_norm": 1.2694511413574219, |
| "learning_rate": 0.001, |
| "loss": 1.8011, |
| "step": 143600 |
| }, |
| { |
| "epoch": 46.444731738849384, |
| "grad_norm": 1.3873778581619263, |
| "learning_rate": 0.001, |
| "loss": 1.8205, |
| "step": 143700 |
| }, |
| { |
| "epoch": 46.4770523594053, |
| "grad_norm": 1.3449006080627441, |
| "learning_rate": 0.001, |
| "loss": 1.8294, |
| "step": 143800 |
| }, |
| { |
| "epoch": 46.50937297996121, |
| "grad_norm": 1.5486829280853271, |
| "learning_rate": 0.001, |
| "loss": 1.8243, |
| "step": 143900 |
| }, |
| { |
| "epoch": 46.54169360051713, |
| "grad_norm": 1.3362038135528564, |
| "learning_rate": 0.001, |
| "loss": 1.8159, |
| "step": 144000 |
| }, |
| { |
| "epoch": 46.57401422107304, |
| "grad_norm": 1.412407636642456, |
| "learning_rate": 0.001, |
| "loss": 1.8099, |
| "step": 144100 |
| }, |
| { |
| "epoch": 46.60633484162896, |
| "grad_norm": 1.3122761249542236, |
| "learning_rate": 0.001, |
| "loss": 1.8263, |
| "step": 144200 |
| }, |
| { |
| "epoch": 46.63865546218487, |
| "grad_norm": 1.3933433294296265, |
| "learning_rate": 0.001, |
| "loss": 1.8221, |
| "step": 144300 |
| }, |
| { |
| "epoch": 46.670976082740786, |
| "grad_norm": 1.0872950553894043, |
| "learning_rate": 0.001, |
| "loss": 1.8414, |
| "step": 144400 |
| }, |
| { |
| "epoch": 46.7032967032967, |
| "grad_norm": 1.0704154968261719, |
| "learning_rate": 0.001, |
| "loss": 1.8611, |
| "step": 144500 |
| }, |
| { |
| "epoch": 46.735617323852615, |
| "grad_norm": 1.1374051570892334, |
| "learning_rate": 0.001, |
| "loss": 1.8597, |
| "step": 144600 |
| }, |
| { |
| "epoch": 46.76793794440853, |
| "grad_norm": 1.6145614385604858, |
| "learning_rate": 0.001, |
| "loss": 1.8636, |
| "step": 144700 |
| }, |
| { |
| "epoch": 46.800258564964444, |
| "grad_norm": 1.250145673751831, |
| "learning_rate": 0.001, |
| "loss": 1.8532, |
| "step": 144800 |
| }, |
| { |
| "epoch": 46.83257918552036, |
| "grad_norm": 1.1978321075439453, |
| "learning_rate": 0.001, |
| "loss": 1.8524, |
| "step": 144900 |
| }, |
| { |
| "epoch": 46.864899806076274, |
| "grad_norm": 1.2241102457046509, |
| "learning_rate": 0.001, |
| "loss": 1.8546, |
| "step": 145000 |
| }, |
| { |
| "epoch": 46.89722042663219, |
| "grad_norm": 1.416428565979004, |
| "learning_rate": 0.001, |
| "loss": 1.8539, |
| "step": 145100 |
| }, |
| { |
| "epoch": 46.9295410471881, |
| "grad_norm": 1.2089383602142334, |
| "learning_rate": 0.001, |
| "loss": 1.8663, |
| "step": 145200 |
| }, |
| { |
| "epoch": 46.96186166774402, |
| "grad_norm": 1.3217616081237793, |
| "learning_rate": 0.001, |
| "loss": 1.8666, |
| "step": 145300 |
| }, |
| { |
| "epoch": 46.99418228829994, |
| "grad_norm": 1.6821898221969604, |
| "learning_rate": 0.001, |
| "loss": 1.8631, |
| "step": 145400 |
| }, |
| { |
| "epoch": 47.02650290885585, |
| "grad_norm": 1.6980481147766113, |
| "learning_rate": 0.001, |
| "loss": 1.7668, |
| "step": 145500 |
| }, |
| { |
| "epoch": 47.05882352941177, |
| "grad_norm": 1.5981022119522095, |
| "learning_rate": 0.001, |
| "loss": 1.7509, |
| "step": 145600 |
| }, |
| { |
| "epoch": 47.09114414996768, |
| "grad_norm": 1.54694664478302, |
| "learning_rate": 0.001, |
| "loss": 1.7445, |
| "step": 145700 |
| }, |
| { |
| "epoch": 47.1234647705236, |
| "grad_norm": 1.3953649997711182, |
| "learning_rate": 0.001, |
| "loss": 1.7486, |
| "step": 145800 |
| }, |
| { |
| "epoch": 47.15578539107951, |
| "grad_norm": 1.4519585371017456, |
| "learning_rate": 0.001, |
| "loss": 1.7573, |
| "step": 145900 |
| }, |
| { |
| "epoch": 47.188106011635426, |
| "grad_norm": 2.0267391204833984, |
| "learning_rate": 0.001, |
| "loss": 1.7892, |
| "step": 146000 |
| }, |
| { |
| "epoch": 47.22042663219134, |
| "grad_norm": 1.5844534635543823, |
| "learning_rate": 0.001, |
| "loss": 1.7733, |
| "step": 146100 |
| }, |
| { |
| "epoch": 47.252747252747255, |
| "grad_norm": 1.2251486778259277, |
| "learning_rate": 0.001, |
| "loss": 1.7747, |
| "step": 146200 |
| }, |
| { |
| "epoch": 47.28506787330317, |
| "grad_norm": 1.4506583213806152, |
| "learning_rate": 0.001, |
| "loss": 1.7746, |
| "step": 146300 |
| }, |
| { |
| "epoch": 47.317388493859085, |
| "grad_norm": 1.6739964485168457, |
| "learning_rate": 0.001, |
| "loss": 1.7853, |
| "step": 146400 |
| }, |
| { |
| "epoch": 47.349709114415, |
| "grad_norm": 1.7414036989212036, |
| "learning_rate": 0.001, |
| "loss": 1.7778, |
| "step": 146500 |
| }, |
| { |
| "epoch": 47.382029734970914, |
| "grad_norm": 1.4631189107894897, |
| "learning_rate": 0.001, |
| "loss": 1.7837, |
| "step": 146600 |
| }, |
| { |
| "epoch": 47.41435035552683, |
| "grad_norm": 1.6663905382156372, |
| "learning_rate": 0.001, |
| "loss": 1.7969, |
| "step": 146700 |
| }, |
| { |
| "epoch": 47.44667097608274, |
| "grad_norm": 1.7138595581054688, |
| "learning_rate": 0.001, |
| "loss": 1.7962, |
| "step": 146800 |
| }, |
| { |
| "epoch": 47.47899159663866, |
| "grad_norm": 1.4735912084579468, |
| "learning_rate": 0.001, |
| "loss": 1.8132, |
| "step": 146900 |
| }, |
| { |
| "epoch": 47.51131221719457, |
| "grad_norm": 1.72100031375885, |
| "learning_rate": 0.001, |
| "loss": 1.8061, |
| "step": 147000 |
| }, |
| { |
| "epoch": 47.543632837750486, |
| "grad_norm": 1.1838710308074951, |
| "learning_rate": 0.001, |
| "loss": 1.7897, |
| "step": 147100 |
| }, |
| { |
| "epoch": 47.5759534583064, |
| "grad_norm": 1.4770824909210205, |
| "learning_rate": 0.001, |
| "loss": 1.8138, |
| "step": 147200 |
| }, |
| { |
| "epoch": 47.608274078862316, |
| "grad_norm": 1.287657380104065, |
| "learning_rate": 0.001, |
| "loss": 1.8097, |
| "step": 147300 |
| }, |
| { |
| "epoch": 47.64059469941823, |
| "grad_norm": 1.8169690370559692, |
| "learning_rate": 0.001, |
| "loss": 1.8287, |
| "step": 147400 |
| }, |
| { |
| "epoch": 47.672915319974145, |
| "grad_norm": 0.9983140826225281, |
| "learning_rate": 0.001, |
| "loss": 1.8407, |
| "step": 147500 |
| }, |
| { |
| "epoch": 47.70523594053006, |
| "grad_norm": 1.3537484407424927, |
| "learning_rate": 0.001, |
| "loss": 1.8438, |
| "step": 147600 |
| }, |
| { |
| "epoch": 47.737556561085974, |
| "grad_norm": 1.5467236042022705, |
| "learning_rate": 0.001, |
| "loss": 1.8278, |
| "step": 147700 |
| }, |
| { |
| "epoch": 47.76987718164189, |
| "grad_norm": 1.1953139305114746, |
| "learning_rate": 0.001, |
| "loss": 1.8182, |
| "step": 147800 |
| }, |
| { |
| "epoch": 47.8021978021978, |
| "grad_norm": 1.4098021984100342, |
| "learning_rate": 0.001, |
| "loss": 1.8585, |
| "step": 147900 |
| }, |
| { |
| "epoch": 47.83451842275372, |
| "grad_norm": 1.4294242858886719, |
| "learning_rate": 0.001, |
| "loss": 1.8365, |
| "step": 148000 |
| }, |
| { |
| "epoch": 47.86683904330963, |
| "grad_norm": 1.4361600875854492, |
| "learning_rate": 0.001, |
| "loss": 1.8329, |
| "step": 148100 |
| }, |
| { |
| "epoch": 47.89915966386555, |
| "grad_norm": 1.189009428024292, |
| "learning_rate": 0.001, |
| "loss": 1.8502, |
| "step": 148200 |
| }, |
| { |
| "epoch": 47.93148028442146, |
| "grad_norm": 1.3974965810775757, |
| "learning_rate": 0.001, |
| "loss": 1.8439, |
| "step": 148300 |
| }, |
| { |
| "epoch": 47.963800904977376, |
| "grad_norm": 1.1778879165649414, |
| "learning_rate": 0.001, |
| "loss": 1.8449, |
| "step": 148400 |
| }, |
| { |
| "epoch": 47.99612152553329, |
| "grad_norm": 1.9312989711761475, |
| "learning_rate": 0.001, |
| "loss": 1.8532, |
| "step": 148500 |
| }, |
| { |
| "epoch": 48.028442146089205, |
| "grad_norm": 1.6134992837905884, |
| "learning_rate": 0.001, |
| "loss": 1.7465, |
| "step": 148600 |
| }, |
| { |
| "epoch": 48.06076276664512, |
| "grad_norm": 1.2701274156570435, |
| "learning_rate": 0.001, |
| "loss": 1.7405, |
| "step": 148700 |
| }, |
| { |
| "epoch": 48.093083387201034, |
| "grad_norm": 1.5127066373825073, |
| "learning_rate": 0.001, |
| "loss": 1.7266, |
| "step": 148800 |
| }, |
| { |
| "epoch": 48.12540400775695, |
| "grad_norm": 1.2889701128005981, |
| "learning_rate": 0.001, |
| "loss": 1.7455, |
| "step": 148900 |
| }, |
| { |
| "epoch": 48.15772462831286, |
| "grad_norm": 1.8702465295791626, |
| "learning_rate": 0.001, |
| "loss": 1.7568, |
| "step": 149000 |
| }, |
| { |
| "epoch": 48.19004524886878, |
| "grad_norm": 1.6072839498519897, |
| "learning_rate": 0.001, |
| "loss": 1.7385, |
| "step": 149100 |
| }, |
| { |
| "epoch": 48.22236586942469, |
| "grad_norm": 1.8324649333953857, |
| "learning_rate": 0.001, |
| "loss": 1.766, |
| "step": 149200 |
| }, |
| { |
| "epoch": 48.25468648998061, |
| "grad_norm": 1.686521053314209, |
| "learning_rate": 0.001, |
| "loss": 1.7666, |
| "step": 149300 |
| }, |
| { |
| "epoch": 48.28700711053652, |
| "grad_norm": 1.4718869924545288, |
| "learning_rate": 0.001, |
| "loss": 1.7506, |
| "step": 149400 |
| }, |
| { |
| "epoch": 48.319327731092436, |
| "grad_norm": 1.7188520431518555, |
| "learning_rate": 0.001, |
| "loss": 1.7695, |
| "step": 149500 |
| }, |
| { |
| "epoch": 48.35164835164835, |
| "grad_norm": 1.57921302318573, |
| "learning_rate": 0.001, |
| "loss": 1.7729, |
| "step": 149600 |
| }, |
| { |
| "epoch": 48.383968972204265, |
| "grad_norm": 1.2841829061508179, |
| "learning_rate": 0.001, |
| "loss": 1.7712, |
| "step": 149700 |
| }, |
| { |
| "epoch": 48.41628959276018, |
| "grad_norm": 1.3768117427825928, |
| "learning_rate": 0.001, |
| "loss": 1.7834, |
| "step": 149800 |
| }, |
| { |
| "epoch": 48.448610213316094, |
| "grad_norm": 1.1611604690551758, |
| "learning_rate": 0.001, |
| "loss": 1.788, |
| "step": 149900 |
| }, |
| { |
| "epoch": 48.48093083387201, |
| "grad_norm": 1.3717632293701172, |
| "learning_rate": 0.001, |
| "loss": 1.7862, |
| "step": 150000 |
| }, |
| { |
| "epoch": 48.51325145442792, |
| "grad_norm": 1.4986873865127563, |
| "learning_rate": 0.001, |
| "loss": 1.7847, |
| "step": 150100 |
| }, |
| { |
| "epoch": 48.54557207498384, |
| "grad_norm": 1.6464810371398926, |
| "learning_rate": 0.001, |
| "loss": 1.7995, |
| "step": 150200 |
| }, |
| { |
| "epoch": 48.57789269553975, |
| "grad_norm": 1.5459891557693481, |
| "learning_rate": 0.001, |
| "loss": 1.7868, |
| "step": 150300 |
| }, |
| { |
| "epoch": 48.61021331609567, |
| "grad_norm": 1.5244051218032837, |
| "learning_rate": 0.001, |
| "loss": 1.8098, |
| "step": 150400 |
| }, |
| { |
| "epoch": 48.64253393665158, |
| "grad_norm": 1.25544011592865, |
| "learning_rate": 0.001, |
| "loss": 1.815, |
| "step": 150500 |
| }, |
| { |
| "epoch": 48.674854557207496, |
| "grad_norm": 1.3828271627426147, |
| "learning_rate": 0.001, |
| "loss": 1.8117, |
| "step": 150600 |
| }, |
| { |
| "epoch": 48.70717517776341, |
| "grad_norm": 1.4629228115081787, |
| "learning_rate": 0.001, |
| "loss": 1.8145, |
| "step": 150700 |
| }, |
| { |
| "epoch": 48.739495798319325, |
| "grad_norm": 1.336828589439392, |
| "learning_rate": 0.001, |
| "loss": 1.8303, |
| "step": 150800 |
| }, |
| { |
| "epoch": 48.77181641887524, |
| "grad_norm": 1.5909345149993896, |
| "learning_rate": 0.001, |
| "loss": 1.8385, |
| "step": 150900 |
| }, |
| { |
| "epoch": 48.804137039431154, |
| "grad_norm": 1.407732605934143, |
| "learning_rate": 0.001, |
| "loss": 1.8142, |
| "step": 151000 |
| }, |
| { |
| "epoch": 48.83645765998707, |
| "grad_norm": 1.2567416429519653, |
| "learning_rate": 0.001, |
| "loss": 1.8275, |
| "step": 151100 |
| }, |
| { |
| "epoch": 48.86877828054298, |
| "grad_norm": 1.3103371858596802, |
| "learning_rate": 0.001, |
| "loss": 1.8312, |
| "step": 151200 |
| }, |
| { |
| "epoch": 48.9010989010989, |
| "grad_norm": 1.4088467359542847, |
| "learning_rate": 0.001, |
| "loss": 1.8218, |
| "step": 151300 |
| }, |
| { |
| "epoch": 48.93341952165481, |
| "grad_norm": 1.8668159246444702, |
| "learning_rate": 0.001, |
| "loss": 1.8424, |
| "step": 151400 |
| }, |
| { |
| "epoch": 48.96574014221073, |
| "grad_norm": 1.5781002044677734, |
| "learning_rate": 0.001, |
| "loss": 1.8504, |
| "step": 151500 |
| }, |
| { |
| "epoch": 48.99806076276664, |
| "grad_norm": 1.4598246812820435, |
| "learning_rate": 0.001, |
| "loss": 1.8163, |
| "step": 151600 |
| }, |
| { |
| "epoch": 49.03038138332256, |
| "grad_norm": 1.3983478546142578, |
| "learning_rate": 0.001, |
| "loss": 1.7089, |
| "step": 151700 |
| }, |
| { |
| "epoch": 49.06270200387848, |
| "grad_norm": 1.577358603477478, |
| "learning_rate": 0.001, |
| "loss": 1.7011, |
| "step": 151800 |
| }, |
| { |
| "epoch": 49.09502262443439, |
| "grad_norm": 2.1866214275360107, |
| "learning_rate": 0.001, |
| "loss": 1.715, |
| "step": 151900 |
| }, |
| { |
| "epoch": 49.12734324499031, |
| "grad_norm": 1.976177453994751, |
| "learning_rate": 0.001, |
| "loss": 1.731, |
| "step": 152000 |
| }, |
| { |
| "epoch": 49.15966386554622, |
| "grad_norm": 1.2965463399887085, |
| "learning_rate": 0.001, |
| "loss": 1.7506, |
| "step": 152100 |
| }, |
| { |
| "epoch": 49.191984486102136, |
| "grad_norm": 1.4683022499084473, |
| "learning_rate": 0.001, |
| "loss": 1.7458, |
| "step": 152200 |
| }, |
| { |
| "epoch": 49.22430510665805, |
| "grad_norm": 1.3126236200332642, |
| "learning_rate": 0.001, |
| "loss": 1.7486, |
| "step": 152300 |
| }, |
| { |
| "epoch": 49.256625727213965, |
| "grad_norm": 1.6978693008422852, |
| "learning_rate": 0.001, |
| "loss": 1.7525, |
| "step": 152400 |
| }, |
| { |
| "epoch": 49.28894634776988, |
| "grad_norm": 1.4591189622879028, |
| "learning_rate": 0.001, |
| "loss": 1.7625, |
| "step": 152500 |
| }, |
| { |
| "epoch": 49.321266968325794, |
| "grad_norm": 1.7097078561782837, |
| "learning_rate": 0.001, |
| "loss": 1.7696, |
| "step": 152600 |
| }, |
| { |
| "epoch": 49.35358758888171, |
| "grad_norm": 1.3662595748901367, |
| "learning_rate": 0.001, |
| "loss": 1.7443, |
| "step": 152700 |
| }, |
| { |
| "epoch": 49.38590820943762, |
| "grad_norm": 1.8225806951522827, |
| "learning_rate": 0.001, |
| "loss": 1.7724, |
| "step": 152800 |
| }, |
| { |
| "epoch": 49.41822882999354, |
| "grad_norm": 1.3173518180847168, |
| "learning_rate": 0.001, |
| "loss": 1.7782, |
| "step": 152900 |
| }, |
| { |
| "epoch": 49.45054945054945, |
| "grad_norm": 1.3523099422454834, |
| "learning_rate": 0.001, |
| "loss": 1.7784, |
| "step": 153000 |
| }, |
| { |
| "epoch": 49.48287007110537, |
| "grad_norm": 1.639769434928894, |
| "learning_rate": 0.001, |
| "loss": 1.7701, |
| "step": 153100 |
| }, |
| { |
| "epoch": 49.51519069166128, |
| "grad_norm": 1.6762608289718628, |
| "learning_rate": 0.001, |
| "loss": 1.7884, |
| "step": 153200 |
| }, |
| { |
| "epoch": 49.547511312217196, |
| "grad_norm": 1.9911901950836182, |
| "learning_rate": 0.001, |
| "loss": 1.785, |
| "step": 153300 |
| }, |
| { |
| "epoch": 49.57983193277311, |
| "grad_norm": 1.4912683963775635, |
| "learning_rate": 0.001, |
| "loss": 1.7702, |
| "step": 153400 |
| }, |
| { |
| "epoch": 49.612152553329025, |
| "grad_norm": 1.8878214359283447, |
| "learning_rate": 0.001, |
| "loss": 1.7945, |
| "step": 153500 |
| }, |
| { |
| "epoch": 49.64447317388494, |
| "grad_norm": 1.3476299047470093, |
| "learning_rate": 0.001, |
| "loss": 1.7988, |
| "step": 153600 |
| }, |
| { |
| "epoch": 49.676793794440854, |
| "grad_norm": 1.8301808834075928, |
| "learning_rate": 0.001, |
| "loss": 1.7997, |
| "step": 153700 |
| }, |
| { |
| "epoch": 49.70911441499677, |
| "grad_norm": 1.6317429542541504, |
| "learning_rate": 0.001, |
| "loss": 1.8126, |
| "step": 153800 |
| }, |
| { |
| "epoch": 49.74143503555268, |
| "grad_norm": 1.8560541868209839, |
| "learning_rate": 0.001, |
| "loss": 1.787, |
| "step": 153900 |
| }, |
| { |
| "epoch": 49.7737556561086, |
| "grad_norm": 1.715898036956787, |
| "learning_rate": 0.001, |
| "loss": 1.8086, |
| "step": 154000 |
| }, |
| { |
| "epoch": 49.80607627666451, |
| "grad_norm": 1.7181135416030884, |
| "learning_rate": 0.001, |
| "loss": 1.8184, |
| "step": 154100 |
| }, |
| { |
| "epoch": 49.83839689722043, |
| "grad_norm": 1.3545619249343872, |
| "learning_rate": 0.001, |
| "loss": 1.798, |
| "step": 154200 |
| }, |
| { |
| "epoch": 49.87071751777634, |
| "grad_norm": 1.5673587322235107, |
| "learning_rate": 0.001, |
| "loss": 1.8144, |
| "step": 154300 |
| }, |
| { |
| "epoch": 49.903038138332256, |
| "grad_norm": 1.5232983827590942, |
| "learning_rate": 0.001, |
| "loss": 1.83, |
| "step": 154400 |
| }, |
| { |
| "epoch": 49.93535875888817, |
| "grad_norm": 1.9223566055297852, |
| "learning_rate": 0.001, |
| "loss": 1.8302, |
| "step": 154500 |
| }, |
| { |
| "epoch": 49.967679379444085, |
| "grad_norm": 2.017540693283081, |
| "learning_rate": 0.001, |
| "loss": 1.8268, |
| "step": 154600 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 2.689202308654785, |
| "learning_rate": 0.001, |
| "loss": 1.798, |
| "step": 154700 |
| }, |
| { |
| "epoch": 50.032320620555915, |
| "grad_norm": 2.791679859161377, |
| "learning_rate": 0.001, |
| "loss": 1.6968, |
| "step": 154800 |
| }, |
| { |
| "epoch": 50.06464124111183, |
| "grad_norm": 2.2272870540618896, |
| "learning_rate": 0.001, |
| "loss": 1.7063, |
| "step": 154900 |
| }, |
| { |
| "epoch": 50.096961861667744, |
| "grad_norm": 2.1595311164855957, |
| "learning_rate": 0.001, |
| "loss": 1.7114, |
| "step": 155000 |
| }, |
| { |
| "epoch": 50.12928248222366, |
| "grad_norm": 2.6563737392425537, |
| "learning_rate": 0.001, |
| "loss": 1.7146, |
| "step": 155100 |
| }, |
| { |
| "epoch": 50.16160310277957, |
| "grad_norm": 2.2539663314819336, |
| "learning_rate": 0.001, |
| "loss": 1.7092, |
| "step": 155200 |
| }, |
| { |
| "epoch": 50.19392372333549, |
| "grad_norm": 2.3137271404266357, |
| "learning_rate": 0.001, |
| "loss": 1.7244, |
| "step": 155300 |
| }, |
| { |
| "epoch": 50.2262443438914, |
| "grad_norm": 1.7513139247894287, |
| "learning_rate": 0.001, |
| "loss": 1.7423, |
| "step": 155400 |
| }, |
| { |
| "epoch": 50.25856496444732, |
| "grad_norm": 2.2095792293548584, |
| "learning_rate": 0.001, |
| "loss": 1.7539, |
| "step": 155500 |
| }, |
| { |
| "epoch": 50.29088558500323, |
| "grad_norm": 2.0682663917541504, |
| "learning_rate": 0.001, |
| "loss": 1.7331, |
| "step": 155600 |
| }, |
| { |
| "epoch": 50.323206205559146, |
| "grad_norm": 2.1839566230773926, |
| "learning_rate": 0.001, |
| "loss": 1.7547, |
| "step": 155700 |
| }, |
| { |
| "epoch": 50.35552682611506, |
| "grad_norm": 1.9047203063964844, |
| "learning_rate": 0.001, |
| "loss": 1.7365, |
| "step": 155800 |
| }, |
| { |
| "epoch": 50.387847446670975, |
| "grad_norm": 2.428255558013916, |
| "learning_rate": 0.001, |
| "loss": 1.7581, |
| "step": 155900 |
| }, |
| { |
| "epoch": 50.42016806722689, |
| "grad_norm": 2.507028102874756, |
| "learning_rate": 0.001, |
| "loss": 1.7549, |
| "step": 156000 |
| }, |
| { |
| "epoch": 50.452488687782804, |
| "grad_norm": 2.5041208267211914, |
| "learning_rate": 0.001, |
| "loss": 1.7576, |
| "step": 156100 |
| }, |
| { |
| "epoch": 50.48480930833872, |
| "grad_norm": 1.992263913154602, |
| "learning_rate": 0.001, |
| "loss": 1.7626, |
| "step": 156200 |
| }, |
| { |
| "epoch": 50.51712992889463, |
| "grad_norm": 2.5200653076171875, |
| "learning_rate": 0.001, |
| "loss": 1.7646, |
| "step": 156300 |
| }, |
| { |
| "epoch": 50.54945054945055, |
| "grad_norm": 1.9477589130401611, |
| "learning_rate": 0.001, |
| "loss": 1.7755, |
| "step": 156400 |
| }, |
| { |
| "epoch": 50.58177117000646, |
| "grad_norm": 2.513901710510254, |
| "learning_rate": 0.001, |
| "loss": 1.7761, |
| "step": 156500 |
| }, |
| { |
| "epoch": 50.61409179056238, |
| "grad_norm": 2.063380718231201, |
| "learning_rate": 0.001, |
| "loss": 1.7887, |
| "step": 156600 |
| }, |
| { |
| "epoch": 50.64641241111829, |
| "grad_norm": 2.3076422214508057, |
| "learning_rate": 0.001, |
| "loss": 1.7691, |
| "step": 156700 |
| }, |
| { |
| "epoch": 50.678733031674206, |
| "grad_norm": 2.060290813446045, |
| "learning_rate": 0.001, |
| "loss": 1.7846, |
| "step": 156800 |
| }, |
| { |
| "epoch": 50.71105365223012, |
| "grad_norm": 2.024672031402588, |
| "learning_rate": 0.001, |
| "loss": 1.8027, |
| "step": 156900 |
| }, |
| { |
| "epoch": 50.743374272786035, |
| "grad_norm": 1.648667335510254, |
| "learning_rate": 0.001, |
| "loss": 1.803, |
| "step": 157000 |
| }, |
| { |
| "epoch": 50.77569489334195, |
| "grad_norm": 1.7075327634811401, |
| "learning_rate": 0.001, |
| "loss": 1.8164, |
| "step": 157100 |
| }, |
| { |
| "epoch": 50.808015513897864, |
| "grad_norm": 2.504213809967041, |
| "learning_rate": 0.001, |
| "loss": 1.8026, |
| "step": 157200 |
| }, |
| { |
| "epoch": 50.84033613445378, |
| "grad_norm": 2.695814847946167, |
| "learning_rate": 0.001, |
| "loss": 1.8074, |
| "step": 157300 |
| }, |
| { |
| "epoch": 50.87265675500969, |
| "grad_norm": 2.0666091442108154, |
| "learning_rate": 0.001, |
| "loss": 1.8145, |
| "step": 157400 |
| }, |
| { |
| "epoch": 50.90497737556561, |
| "grad_norm": 2.5069732666015625, |
| "learning_rate": 0.001, |
| "loss": 1.8024, |
| "step": 157500 |
| }, |
| { |
| "epoch": 50.93729799612152, |
| "grad_norm": 2.342129945755005, |
| "learning_rate": 0.001, |
| "loss": 1.8082, |
| "step": 157600 |
| }, |
| { |
| "epoch": 50.96961861667744, |
| "grad_norm": 1.9542409181594849, |
| "learning_rate": 0.001, |
| "loss": 1.8148, |
| "step": 157700 |
| }, |
| { |
| "epoch": 51.00193923723336, |
| "grad_norm": 1.3802143335342407, |
| "learning_rate": 0.001, |
| "loss": 1.8217, |
| "step": 157800 |
| }, |
| { |
| "epoch": 51.03425985778927, |
| "grad_norm": 1.5042811632156372, |
| "learning_rate": 0.001, |
| "loss": 1.6777, |
| "step": 157900 |
| }, |
| { |
| "epoch": 51.06658047834519, |
| "grad_norm": 1.4837565422058105, |
| "learning_rate": 0.001, |
| "loss": 1.6913, |
| "step": 158000 |
| }, |
| { |
| "epoch": 51.0989010989011, |
| "grad_norm": 1.344022274017334, |
| "learning_rate": 0.001, |
| "loss": 1.7002, |
| "step": 158100 |
| }, |
| { |
| "epoch": 51.13122171945702, |
| "grad_norm": 1.92073655128479, |
| "learning_rate": 0.001, |
| "loss": 1.686, |
| "step": 158200 |
| }, |
| { |
| "epoch": 51.16354234001293, |
| "grad_norm": 1.4150066375732422, |
| "learning_rate": 0.001, |
| "loss": 1.7084, |
| "step": 158300 |
| }, |
| { |
| "epoch": 51.195862960568846, |
| "grad_norm": 1.204180359840393, |
| "learning_rate": 0.001, |
| "loss": 1.7351, |
| "step": 158400 |
| }, |
| { |
| "epoch": 51.22818358112476, |
| "grad_norm": 1.1352612972259521, |
| "learning_rate": 0.001, |
| "loss": 1.7265, |
| "step": 158500 |
| }, |
| { |
| "epoch": 51.260504201680675, |
| "grad_norm": 1.162936806678772, |
| "learning_rate": 0.001, |
| "loss": 1.725, |
| "step": 158600 |
| }, |
| { |
| "epoch": 51.29282482223659, |
| "grad_norm": 1.576052188873291, |
| "learning_rate": 0.001, |
| "loss": 1.7206, |
| "step": 158700 |
| }, |
| { |
| "epoch": 51.325145442792504, |
| "grad_norm": 1.7167997360229492, |
| "learning_rate": 0.001, |
| "loss": 1.7223, |
| "step": 158800 |
| }, |
| { |
| "epoch": 51.35746606334842, |
| "grad_norm": 1.7025160789489746, |
| "learning_rate": 0.001, |
| "loss": 1.7241, |
| "step": 158900 |
| }, |
| { |
| "epoch": 51.38978668390433, |
| "grad_norm": 1.5888192653656006, |
| "learning_rate": 0.001, |
| "loss": 1.7366, |
| "step": 159000 |
| }, |
| { |
| "epoch": 51.42210730446025, |
| "grad_norm": 1.263992190361023, |
| "learning_rate": 0.001, |
| "loss": 1.7557, |
| "step": 159100 |
| }, |
| { |
| "epoch": 51.45442792501616, |
| "grad_norm": 1.2514597177505493, |
| "learning_rate": 0.001, |
| "loss": 1.746, |
| "step": 159200 |
| }, |
| { |
| "epoch": 51.48674854557208, |
| "grad_norm": 1.5387557744979858, |
| "learning_rate": 0.001, |
| "loss": 1.7493, |
| "step": 159300 |
| }, |
| { |
| "epoch": 51.51906916612799, |
| "grad_norm": 1.4407093524932861, |
| "learning_rate": 0.001, |
| "loss": 1.7484, |
| "step": 159400 |
| }, |
| { |
| "epoch": 51.551389786683906, |
| "grad_norm": 1.0381523370742798, |
| "learning_rate": 0.001, |
| "loss": 1.7688, |
| "step": 159500 |
| }, |
| { |
| "epoch": 51.58371040723982, |
| "grad_norm": 1.8995901346206665, |
| "learning_rate": 0.001, |
| "loss": 1.7602, |
| "step": 159600 |
| }, |
| { |
| "epoch": 51.616031027795735, |
| "grad_norm": 1.7138015031814575, |
| "learning_rate": 0.001, |
| "loss": 1.7752, |
| "step": 159700 |
| }, |
| { |
| "epoch": 51.64835164835165, |
| "grad_norm": 1.5023847818374634, |
| "learning_rate": 0.001, |
| "loss": 1.7622, |
| "step": 159800 |
| }, |
| { |
| "epoch": 51.680672268907564, |
| "grad_norm": 1.310754656791687, |
| "learning_rate": 0.001, |
| "loss": 1.7766, |
| "step": 159900 |
| }, |
| { |
| "epoch": 51.71299288946348, |
| "grad_norm": 1.8020479679107666, |
| "learning_rate": 0.001, |
| "loss": 1.7752, |
| "step": 160000 |
| }, |
| { |
| "epoch": 51.74531351001939, |
| "grad_norm": 1.5402014255523682, |
| "learning_rate": 0.001, |
| "loss": 1.7729, |
| "step": 160100 |
| }, |
| { |
| "epoch": 51.77763413057531, |
| "grad_norm": 1.6112022399902344, |
| "learning_rate": 0.001, |
| "loss": 1.7996, |
| "step": 160200 |
| }, |
| { |
| "epoch": 51.80995475113122, |
| "grad_norm": 1.7574292421340942, |
| "learning_rate": 0.001, |
| "loss": 1.7823, |
| "step": 160300 |
| }, |
| { |
| "epoch": 51.84227537168714, |
| "grad_norm": 1.387109398841858, |
| "learning_rate": 0.001, |
| "loss": 1.7757, |
| "step": 160400 |
| }, |
| { |
| "epoch": 51.87459599224305, |
| "grad_norm": 1.5390779972076416, |
| "learning_rate": 0.001, |
| "loss": 1.8018, |
| "step": 160500 |
| }, |
| { |
| "epoch": 51.906916612798966, |
| "grad_norm": 1.2343939542770386, |
| "learning_rate": 0.001, |
| "loss": 1.7902, |
| "step": 160600 |
| }, |
| { |
| "epoch": 51.93923723335488, |
| "grad_norm": 1.5530798435211182, |
| "learning_rate": 0.001, |
| "loss": 1.794, |
| "step": 160700 |
| }, |
| { |
| "epoch": 51.971557853910795, |
| "grad_norm": 1.6617400646209717, |
| "learning_rate": 0.001, |
| "loss": 1.8228, |
| "step": 160800 |
| }, |
| { |
| "epoch": 52.00387847446671, |
| "grad_norm": 1.4502238035202026, |
| "learning_rate": 0.001, |
| "loss": 1.8277, |
| "step": 160900 |
| }, |
| { |
| "epoch": 52.036199095022624, |
| "grad_norm": 1.5478730201721191, |
| "learning_rate": 0.001, |
| "loss": 1.6583, |
| "step": 161000 |
| }, |
| { |
| "epoch": 52.06851971557854, |
| "grad_norm": 1.4118196964263916, |
| "learning_rate": 0.001, |
| "loss": 1.6602, |
| "step": 161100 |
| }, |
| { |
| "epoch": 52.10084033613445, |
| "grad_norm": 1.292994737625122, |
| "learning_rate": 0.001, |
| "loss": 1.7036, |
| "step": 161200 |
| }, |
| { |
| "epoch": 52.13316095669037, |
| "grad_norm": 1.2106192111968994, |
| "learning_rate": 0.001, |
| "loss": 1.7012, |
| "step": 161300 |
| }, |
| { |
| "epoch": 52.16548157724628, |
| "grad_norm": 1.943745493888855, |
| "learning_rate": 0.001, |
| "loss": 1.6879, |
| "step": 161400 |
| }, |
| { |
| "epoch": 52.1978021978022, |
| "grad_norm": 1.3990156650543213, |
| "learning_rate": 0.001, |
| "loss": 1.7025, |
| "step": 161500 |
| }, |
| { |
| "epoch": 52.23012281835811, |
| "grad_norm": 1.3725271224975586, |
| "learning_rate": 0.001, |
| "loss": 1.7142, |
| "step": 161600 |
| }, |
| { |
| "epoch": 52.262443438914026, |
| "grad_norm": 1.4279320240020752, |
| "learning_rate": 0.001, |
| "loss": 1.7224, |
| "step": 161700 |
| }, |
| { |
| "epoch": 52.29476405946994, |
| "grad_norm": 1.4990894794464111, |
| "learning_rate": 0.001, |
| "loss": 1.7182, |
| "step": 161800 |
| }, |
| { |
| "epoch": 52.327084680025855, |
| "grad_norm": 1.810198426246643, |
| "learning_rate": 0.001, |
| "loss": 1.7324, |
| "step": 161900 |
| }, |
| { |
| "epoch": 52.35940530058177, |
| "grad_norm": 1.5801573991775513, |
| "learning_rate": 0.001, |
| "loss": 1.7286, |
| "step": 162000 |
| }, |
| { |
| "epoch": 52.391725921137684, |
| "grad_norm": 1.611038088798523, |
| "learning_rate": 0.001, |
| "loss": 1.7272, |
| "step": 162100 |
| }, |
| { |
| "epoch": 52.4240465416936, |
| "grad_norm": 1.4676355123519897, |
| "learning_rate": 0.001, |
| "loss": 1.728, |
| "step": 162200 |
| }, |
| { |
| "epoch": 52.456367162249514, |
| "grad_norm": 1.0789649486541748, |
| "learning_rate": 0.001, |
| "loss": 1.7314, |
| "step": 162300 |
| }, |
| { |
| "epoch": 52.48868778280543, |
| "grad_norm": 1.4516468048095703, |
| "learning_rate": 0.001, |
| "loss": 1.7339, |
| "step": 162400 |
| }, |
| { |
| "epoch": 52.52100840336134, |
| "grad_norm": 1.4005082845687866, |
| "learning_rate": 0.001, |
| "loss": 1.7325, |
| "step": 162500 |
| }, |
| { |
| "epoch": 52.55332902391726, |
| "grad_norm": 1.4502547979354858, |
| "learning_rate": 0.001, |
| "loss": 1.7495, |
| "step": 162600 |
| }, |
| { |
| "epoch": 52.58564964447317, |
| "grad_norm": 1.5237691402435303, |
| "learning_rate": 0.001, |
| "loss": 1.7472, |
| "step": 162700 |
| }, |
| { |
| "epoch": 52.617970265029086, |
| "grad_norm": 1.3226033449172974, |
| "learning_rate": 0.001, |
| "loss": 1.7567, |
| "step": 162800 |
| }, |
| { |
| "epoch": 52.650290885585, |
| "grad_norm": 1.2818242311477661, |
| "learning_rate": 0.001, |
| "loss": 1.7681, |
| "step": 162900 |
| }, |
| { |
| "epoch": 52.682611506140915, |
| "grad_norm": 1.0916748046875, |
| "learning_rate": 0.001, |
| "loss": 1.7691, |
| "step": 163000 |
| }, |
| { |
| "epoch": 52.71493212669683, |
| "grad_norm": 1.3001798391342163, |
| "learning_rate": 0.001, |
| "loss": 1.7714, |
| "step": 163100 |
| }, |
| { |
| "epoch": 52.747252747252745, |
| "grad_norm": 1.2963659763336182, |
| "learning_rate": 0.001, |
| "loss": 1.7761, |
| "step": 163200 |
| }, |
| { |
| "epoch": 52.77957336780866, |
| "grad_norm": 1.3647947311401367, |
| "learning_rate": 0.001, |
| "loss": 1.7662, |
| "step": 163300 |
| }, |
| { |
| "epoch": 52.811893988364574, |
| "grad_norm": 0.9298973679542542, |
| "learning_rate": 0.001, |
| "loss": 1.7631, |
| "step": 163400 |
| }, |
| { |
| "epoch": 52.84421460892049, |
| "grad_norm": 1.4588956832885742, |
| "learning_rate": 0.001, |
| "loss": 1.7802, |
| "step": 163500 |
| }, |
| { |
| "epoch": 52.8765352294764, |
| "grad_norm": 1.4117836952209473, |
| "learning_rate": 0.001, |
| "loss": 1.7943, |
| "step": 163600 |
| }, |
| { |
| "epoch": 52.90885585003232, |
| "grad_norm": 1.3966608047485352, |
| "learning_rate": 0.001, |
| "loss": 1.7876, |
| "step": 163700 |
| }, |
| { |
| "epoch": 52.94117647058823, |
| "grad_norm": 1.2959389686584473, |
| "learning_rate": 0.001, |
| "loss": 1.7929, |
| "step": 163800 |
| }, |
| { |
| "epoch": 52.97349709114415, |
| "grad_norm": 1.3205302953720093, |
| "learning_rate": 0.001, |
| "loss": 1.8011, |
| "step": 163900 |
| }, |
| { |
| "epoch": 53.00581771170007, |
| "grad_norm": 1.3091896772384644, |
| "learning_rate": 0.001, |
| "loss": 1.7603, |
| "step": 164000 |
| }, |
| { |
| "epoch": 53.03813833225598, |
| "grad_norm": 1.2790523767471313, |
| "learning_rate": 0.001, |
| "loss": 1.6707, |
| "step": 164100 |
| }, |
| { |
| "epoch": 53.0704589528119, |
| "grad_norm": 1.5966098308563232, |
| "learning_rate": 0.001, |
| "loss": 1.6345, |
| "step": 164200 |
| }, |
| { |
| "epoch": 53.10277957336781, |
| "grad_norm": 1.3328355550765991, |
| "learning_rate": 0.001, |
| "loss": 1.6752, |
| "step": 164300 |
| }, |
| { |
| "epoch": 53.135100193923726, |
| "grad_norm": 1.163203239440918, |
| "learning_rate": 0.001, |
| "loss": 1.6842, |
| "step": 164400 |
| }, |
| { |
| "epoch": 53.16742081447964, |
| "grad_norm": 1.2743290662765503, |
| "learning_rate": 0.001, |
| "loss": 1.6861, |
| "step": 164500 |
| }, |
| { |
| "epoch": 53.199741435035556, |
| "grad_norm": 1.0141232013702393, |
| "learning_rate": 0.001, |
| "loss": 1.6939, |
| "step": 164600 |
| }, |
| { |
| "epoch": 53.23206205559147, |
| "grad_norm": 1.482993483543396, |
| "learning_rate": 0.001, |
| "loss": 1.6931, |
| "step": 164700 |
| }, |
| { |
| "epoch": 53.264382676147385, |
| "grad_norm": 1.7234373092651367, |
| "learning_rate": 0.001, |
| "loss": 1.7039, |
| "step": 164800 |
| }, |
| { |
| "epoch": 53.2967032967033, |
| "grad_norm": 1.4995521306991577, |
| "learning_rate": 0.001, |
| "loss": 1.7132, |
| "step": 164900 |
| }, |
| { |
| "epoch": 53.329023917259214, |
| "grad_norm": 1.4815672636032104, |
| "learning_rate": 0.001, |
| "loss": 1.724, |
| "step": 165000 |
| }, |
| { |
| "epoch": 53.36134453781513, |
| "grad_norm": 1.2232056856155396, |
| "learning_rate": 0.001, |
| "loss": 1.7088, |
| "step": 165100 |
| }, |
| { |
| "epoch": 53.39366515837104, |
| "grad_norm": 1.2571676969528198, |
| "learning_rate": 0.001, |
| "loss": 1.7178, |
| "step": 165200 |
| }, |
| { |
| "epoch": 53.42598577892696, |
| "grad_norm": 1.4687373638153076, |
| "learning_rate": 0.001, |
| "loss": 1.7219, |
| "step": 165300 |
| }, |
| { |
| "epoch": 53.45830639948287, |
| "grad_norm": 1.3377008438110352, |
| "learning_rate": 0.001, |
| "loss": 1.7198, |
| "step": 165400 |
| }, |
| { |
| "epoch": 53.49062702003879, |
| "grad_norm": 1.3167778253555298, |
| "learning_rate": 0.001, |
| "loss": 1.7242, |
| "step": 165500 |
| }, |
| { |
| "epoch": 53.5229476405947, |
| "grad_norm": 1.419568419456482, |
| "learning_rate": 0.001, |
| "loss": 1.7394, |
| "step": 165600 |
| }, |
| { |
| "epoch": 53.555268261150616, |
| "grad_norm": 1.1394786834716797, |
| "learning_rate": 0.001, |
| "loss": 1.7384, |
| "step": 165700 |
| }, |
| { |
| "epoch": 53.58758888170653, |
| "grad_norm": 1.2440487146377563, |
| "learning_rate": 0.001, |
| "loss": 1.7182, |
| "step": 165800 |
| }, |
| { |
| "epoch": 53.619909502262445, |
| "grad_norm": 1.3777581453323364, |
| "learning_rate": 0.001, |
| "loss": 1.7425, |
| "step": 165900 |
| }, |
| { |
| "epoch": 53.65223012281836, |
| "grad_norm": 1.3118562698364258, |
| "learning_rate": 0.001, |
| "loss": 1.7385, |
| "step": 166000 |
| }, |
| { |
| "epoch": 53.684550743374274, |
| "grad_norm": 1.4936604499816895, |
| "learning_rate": 0.001, |
| "loss": 1.7678, |
| "step": 166100 |
| }, |
| { |
| "epoch": 53.71687136393019, |
| "grad_norm": 1.2285600900650024, |
| "learning_rate": 0.001, |
| "loss": 1.7554, |
| "step": 166200 |
| }, |
| { |
| "epoch": 53.7491919844861, |
| "grad_norm": 1.1528136730194092, |
| "learning_rate": 0.001, |
| "loss": 1.7527, |
| "step": 166300 |
| }, |
| { |
| "epoch": 53.78151260504202, |
| "grad_norm": 1.2130924463272095, |
| "learning_rate": 0.001, |
| "loss": 1.7759, |
| "step": 166400 |
| }, |
| { |
| "epoch": 53.81383322559793, |
| "grad_norm": 1.3952350616455078, |
| "learning_rate": 0.001, |
| "loss": 1.7483, |
| "step": 166500 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 1.3838261365890503, |
| "learning_rate": 0.001, |
| "loss": 1.7562, |
| "step": 166600 |
| }, |
| { |
| "epoch": 53.87847446670976, |
| "grad_norm": 1.4428045749664307, |
| "learning_rate": 0.001, |
| "loss": 1.7718, |
| "step": 166700 |
| }, |
| { |
| "epoch": 53.910795087265676, |
| "grad_norm": 0.8531396389007568, |
| "learning_rate": 0.001, |
| "loss": 1.7756, |
| "step": 166800 |
| }, |
| { |
| "epoch": 53.94311570782159, |
| "grad_norm": 1.3588244915008545, |
| "learning_rate": 0.001, |
| "loss": 1.7818, |
| "step": 166900 |
| }, |
| { |
| "epoch": 53.975436328377505, |
| "grad_norm": 1.445267915725708, |
| "learning_rate": 0.001, |
| "loss": 1.786, |
| "step": 167000 |
| }, |
| { |
| "epoch": 54.00775694893342, |
| "grad_norm": 1.230377435684204, |
| "learning_rate": 0.001, |
| "loss": 1.7735, |
| "step": 167100 |
| }, |
| { |
| "epoch": 54.040077569489334, |
| "grad_norm": 1.4522416591644287, |
| "learning_rate": 0.001, |
| "loss": 1.6596, |
| "step": 167200 |
| }, |
| { |
| "epoch": 54.07239819004525, |
| "grad_norm": 1.166033148765564, |
| "learning_rate": 0.001, |
| "loss": 1.6483, |
| "step": 167300 |
| }, |
| { |
| "epoch": 54.10471881060116, |
| "grad_norm": 1.9019384384155273, |
| "learning_rate": 0.001, |
| "loss": 1.6614, |
| "step": 167400 |
| }, |
| { |
| "epoch": 54.13703943115708, |
| "grad_norm": 1.1690678596496582, |
| "learning_rate": 0.001, |
| "loss": 1.6648, |
| "step": 167500 |
| }, |
| { |
| "epoch": 54.16936005171299, |
| "grad_norm": 1.9268842935562134, |
| "learning_rate": 0.001, |
| "loss": 1.6621, |
| "step": 167600 |
| }, |
| { |
| "epoch": 54.20168067226891, |
| "grad_norm": 1.696664810180664, |
| "learning_rate": 0.001, |
| "loss": 1.6677, |
| "step": 167700 |
| }, |
| { |
| "epoch": 54.23400129282482, |
| "grad_norm": 1.063439965248108, |
| "learning_rate": 0.001, |
| "loss": 1.6917, |
| "step": 167800 |
| }, |
| { |
| "epoch": 54.266321913380736, |
| "grad_norm": 1.134600281715393, |
| "learning_rate": 0.001, |
| "loss": 1.6965, |
| "step": 167900 |
| }, |
| { |
| "epoch": 54.29864253393665, |
| "grad_norm": 1.2934255599975586, |
| "learning_rate": 0.001, |
| "loss": 1.689, |
| "step": 168000 |
| }, |
| { |
| "epoch": 54.330963154492565, |
| "grad_norm": 2.243715524673462, |
| "learning_rate": 0.001, |
| "loss": 1.6987, |
| "step": 168100 |
| }, |
| { |
| "epoch": 54.36328377504848, |
| "grad_norm": 1.3866597414016724, |
| "learning_rate": 0.001, |
| "loss": 1.702, |
| "step": 168200 |
| }, |
| { |
| "epoch": 54.395604395604394, |
| "grad_norm": 1.2409111261367798, |
| "learning_rate": 0.001, |
| "loss": 1.7087, |
| "step": 168300 |
| }, |
| { |
| "epoch": 54.42792501616031, |
| "grad_norm": 1.5567268133163452, |
| "learning_rate": 0.001, |
| "loss": 1.7126, |
| "step": 168400 |
| }, |
| { |
| "epoch": 54.46024563671622, |
| "grad_norm": 1.2952624559402466, |
| "learning_rate": 0.001, |
| "loss": 1.7203, |
| "step": 168500 |
| }, |
| { |
| "epoch": 54.49256625727214, |
| "grad_norm": 1.1444261074066162, |
| "learning_rate": 0.001, |
| "loss": 1.708, |
| "step": 168600 |
| }, |
| { |
| "epoch": 54.52488687782805, |
| "grad_norm": 1.1505794525146484, |
| "learning_rate": 0.001, |
| "loss": 1.7202, |
| "step": 168700 |
| }, |
| { |
| "epoch": 54.55720749838397, |
| "grad_norm": 1.1038155555725098, |
| "learning_rate": 0.001, |
| "loss": 1.7309, |
| "step": 168800 |
| }, |
| { |
| "epoch": 54.58952811893988, |
| "grad_norm": 1.279555320739746, |
| "learning_rate": 0.001, |
| "loss": 1.7425, |
| "step": 168900 |
| }, |
| { |
| "epoch": 54.621848739495796, |
| "grad_norm": 1.2210204601287842, |
| "learning_rate": 0.001, |
| "loss": 1.7291, |
| "step": 169000 |
| }, |
| { |
| "epoch": 54.65416936005171, |
| "grad_norm": 1.162758231163025, |
| "learning_rate": 0.001, |
| "loss": 1.7489, |
| "step": 169100 |
| }, |
| { |
| "epoch": 54.686489980607625, |
| "grad_norm": 1.2171443700790405, |
| "learning_rate": 0.001, |
| "loss": 1.7409, |
| "step": 169200 |
| }, |
| { |
| "epoch": 54.71881060116354, |
| "grad_norm": 1.1131047010421753, |
| "learning_rate": 0.001, |
| "loss": 1.7244, |
| "step": 169300 |
| }, |
| { |
| "epoch": 54.751131221719454, |
| "grad_norm": 1.2578123807907104, |
| "learning_rate": 0.001, |
| "loss": 1.7508, |
| "step": 169400 |
| }, |
| { |
| "epoch": 54.78345184227537, |
| "grad_norm": 1.4146089553833008, |
| "learning_rate": 0.001, |
| "loss": 1.7498, |
| "step": 169500 |
| }, |
| { |
| "epoch": 54.81577246283128, |
| "grad_norm": 1.197304129600525, |
| "learning_rate": 0.001, |
| "loss": 1.756, |
| "step": 169600 |
| }, |
| { |
| "epoch": 54.8480930833872, |
| "grad_norm": 1.5083588361740112, |
| "learning_rate": 0.001, |
| "loss": 1.7443, |
| "step": 169700 |
| }, |
| { |
| "epoch": 54.88041370394311, |
| "grad_norm": 1.3559470176696777, |
| "learning_rate": 0.001, |
| "loss": 1.7423, |
| "step": 169800 |
| }, |
| { |
| "epoch": 54.91273432449903, |
| "grad_norm": 1.4131273031234741, |
| "learning_rate": 0.001, |
| "loss": 1.7629, |
| "step": 169900 |
| }, |
| { |
| "epoch": 54.94505494505494, |
| "grad_norm": 1.3871897459030151, |
| "learning_rate": 0.001, |
| "loss": 1.774, |
| "step": 170000 |
| }, |
| { |
| "epoch": 54.977375565610856, |
| "grad_norm": 1.2662379741668701, |
| "learning_rate": 0.001, |
| "loss": 1.7679, |
| "step": 170100 |
| }, |
| { |
| "epoch": 55.00969618616678, |
| "grad_norm": 1.1516029834747314, |
| "learning_rate": 0.001, |
| "loss": 1.7404, |
| "step": 170200 |
| }, |
| { |
| "epoch": 55.04201680672269, |
| "grad_norm": 1.3849647045135498, |
| "learning_rate": 0.001, |
| "loss": 1.6419, |
| "step": 170300 |
| }, |
| { |
| "epoch": 55.07433742727861, |
| "grad_norm": 1.3158921003341675, |
| "learning_rate": 0.001, |
| "loss": 1.6436, |
| "step": 170400 |
| }, |
| { |
| "epoch": 55.10665804783452, |
| "grad_norm": 1.2415001392364502, |
| "learning_rate": 0.001, |
| "loss": 1.6498, |
| "step": 170500 |
| }, |
| { |
| "epoch": 55.138978668390436, |
| "grad_norm": 1.701054334640503, |
| "learning_rate": 0.001, |
| "loss": 1.6463, |
| "step": 170600 |
| }, |
| { |
| "epoch": 55.17129928894635, |
| "grad_norm": 1.5335909128189087, |
| "learning_rate": 0.001, |
| "loss": 1.6358, |
| "step": 170700 |
| }, |
| { |
| "epoch": 55.203619909502265, |
| "grad_norm": 1.6681565046310425, |
| "learning_rate": 0.001, |
| "loss": 1.6698, |
| "step": 170800 |
| }, |
| { |
| "epoch": 55.23594053005818, |
| "grad_norm": 1.212498426437378, |
| "learning_rate": 0.001, |
| "loss": 1.6935, |
| "step": 170900 |
| }, |
| { |
| "epoch": 55.268261150614094, |
| "grad_norm": 1.3680095672607422, |
| "learning_rate": 0.001, |
| "loss": 1.6751, |
| "step": 171000 |
| }, |
| { |
| "epoch": 55.30058177117001, |
| "grad_norm": 1.35792076587677, |
| "learning_rate": 0.001, |
| "loss": 1.6836, |
| "step": 171100 |
| }, |
| { |
| "epoch": 55.33290239172592, |
| "grad_norm": 1.379514217376709, |
| "learning_rate": 0.001, |
| "loss": 1.6869, |
| "step": 171200 |
| }, |
| { |
| "epoch": 55.36522301228184, |
| "grad_norm": 1.1992942094802856, |
| "learning_rate": 0.001, |
| "loss": 1.679, |
| "step": 171300 |
| }, |
| { |
| "epoch": 55.39754363283775, |
| "grad_norm": 1.0317083597183228, |
| "learning_rate": 0.001, |
| "loss": 1.7091, |
| "step": 171400 |
| }, |
| { |
| "epoch": 55.42986425339367, |
| "grad_norm": 1.0589741468429565, |
| "learning_rate": 0.001, |
| "loss": 1.7057, |
| "step": 171500 |
| }, |
| { |
| "epoch": 55.46218487394958, |
| "grad_norm": 1.4371219873428345, |
| "learning_rate": 0.001, |
| "loss": 1.706, |
| "step": 171600 |
| }, |
| { |
| "epoch": 55.494505494505496, |
| "grad_norm": 1.698289155960083, |
| "learning_rate": 0.001, |
| "loss": 1.7089, |
| "step": 171700 |
| }, |
| { |
| "epoch": 55.52682611506141, |
| "grad_norm": 1.3261172771453857, |
| "learning_rate": 0.001, |
| "loss": 1.6997, |
| "step": 171800 |
| }, |
| { |
| "epoch": 55.559146735617325, |
| "grad_norm": 1.3861745595932007, |
| "learning_rate": 0.001, |
| "loss": 1.718, |
| "step": 171900 |
| }, |
| { |
| "epoch": 55.59146735617324, |
| "grad_norm": 1.6267752647399902, |
| "learning_rate": 0.001, |
| "loss": 1.7208, |
| "step": 172000 |
| }, |
| { |
| "epoch": 55.623787976729155, |
| "grad_norm": 1.1328678131103516, |
| "learning_rate": 0.001, |
| "loss": 1.7223, |
| "step": 172100 |
| }, |
| { |
| "epoch": 55.65610859728507, |
| "grad_norm": 1.0877766609191895, |
| "learning_rate": 0.001, |
| "loss": 1.7178, |
| "step": 172200 |
| }, |
| { |
| "epoch": 55.688429217840984, |
| "grad_norm": 1.3555140495300293, |
| "learning_rate": 0.001, |
| "loss": 1.7219, |
| "step": 172300 |
| }, |
| { |
| "epoch": 55.7207498383969, |
| "grad_norm": 1.6600762605667114, |
| "learning_rate": 0.001, |
| "loss": 1.7403, |
| "step": 172400 |
| }, |
| { |
| "epoch": 55.75307045895281, |
| "grad_norm": 1.2952337265014648, |
| "learning_rate": 0.001, |
| "loss": 1.7195, |
| "step": 172500 |
| }, |
| { |
| "epoch": 55.78539107950873, |
| "grad_norm": 1.478588342666626, |
| "learning_rate": 0.001, |
| "loss": 1.7345, |
| "step": 172600 |
| }, |
| { |
| "epoch": 55.81771170006464, |
| "grad_norm": 1.2930961847305298, |
| "learning_rate": 0.001, |
| "loss": 1.7363, |
| "step": 172700 |
| }, |
| { |
| "epoch": 55.85003232062056, |
| "grad_norm": 1.9322788715362549, |
| "learning_rate": 0.001, |
| "loss": 1.7384, |
| "step": 172800 |
| }, |
| { |
| "epoch": 55.88235294117647, |
| "grad_norm": 1.3968391418457031, |
| "learning_rate": 0.001, |
| "loss": 1.7524, |
| "step": 172900 |
| }, |
| { |
| "epoch": 55.914673561732386, |
| "grad_norm": 1.6979691982269287, |
| "learning_rate": 0.001, |
| "loss": 1.7424, |
| "step": 173000 |
| }, |
| { |
| "epoch": 55.9469941822883, |
| "grad_norm": 1.0870234966278076, |
| "learning_rate": 0.001, |
| "loss": 1.7632, |
| "step": 173100 |
| }, |
| { |
| "epoch": 55.979314802844215, |
| "grad_norm": 1.294993281364441, |
| "learning_rate": 0.001, |
| "loss": 1.7644, |
| "step": 173200 |
| }, |
| { |
| "epoch": 56.01163542340013, |
| "grad_norm": 1.5237852334976196, |
| "learning_rate": 0.001, |
| "loss": 1.7015, |
| "step": 173300 |
| }, |
| { |
| "epoch": 56.043956043956044, |
| "grad_norm": 1.554025650024414, |
| "learning_rate": 0.001, |
| "loss": 1.6182, |
| "step": 173400 |
| }, |
| { |
| "epoch": 56.07627666451196, |
| "grad_norm": 1.3839563131332397, |
| "learning_rate": 0.001, |
| "loss": 1.6317, |
| "step": 173500 |
| }, |
| { |
| "epoch": 56.10859728506787, |
| "grad_norm": 1.6912566423416138, |
| "learning_rate": 0.001, |
| "loss": 1.6477, |
| "step": 173600 |
| }, |
| { |
| "epoch": 56.14091790562379, |
| "grad_norm": 1.4600452184677124, |
| "learning_rate": 0.001, |
| "loss": 1.6555, |
| "step": 173700 |
| }, |
| { |
| "epoch": 56.1732385261797, |
| "grad_norm": 1.976299524307251, |
| "learning_rate": 0.001, |
| "loss": 1.6481, |
| "step": 173800 |
| }, |
| { |
| "epoch": 56.20555914673562, |
| "grad_norm": 1.5101046562194824, |
| "learning_rate": 0.001, |
| "loss": 1.6432, |
| "step": 173900 |
| }, |
| { |
| "epoch": 56.23787976729153, |
| "grad_norm": 1.807742953300476, |
| "learning_rate": 0.001, |
| "loss": 1.6469, |
| "step": 174000 |
| }, |
| { |
| "epoch": 56.270200387847446, |
| "grad_norm": 1.5435492992401123, |
| "learning_rate": 0.001, |
| "loss": 1.66, |
| "step": 174100 |
| }, |
| { |
| "epoch": 56.30252100840336, |
| "grad_norm": 1.4853410720825195, |
| "learning_rate": 0.001, |
| "loss": 1.6784, |
| "step": 174200 |
| }, |
| { |
| "epoch": 56.334841628959275, |
| "grad_norm": 1.39516019821167, |
| "learning_rate": 0.001, |
| "loss": 1.6785, |
| "step": 174300 |
| }, |
| { |
| "epoch": 56.36716224951519, |
| "grad_norm": 1.7564011812210083, |
| "learning_rate": 0.001, |
| "loss": 1.6742, |
| "step": 174400 |
| }, |
| { |
| "epoch": 56.399482870071104, |
| "grad_norm": 1.4495413303375244, |
| "learning_rate": 0.001, |
| "loss": 1.6787, |
| "step": 174500 |
| }, |
| { |
| "epoch": 56.43180349062702, |
| "grad_norm": 1.738829493522644, |
| "learning_rate": 0.001, |
| "loss": 1.6904, |
| "step": 174600 |
| }, |
| { |
| "epoch": 56.46412411118293, |
| "grad_norm": 1.2273107767105103, |
| "learning_rate": 0.001, |
| "loss": 1.6761, |
| "step": 174700 |
| }, |
| { |
| "epoch": 56.49644473173885, |
| "grad_norm": 1.4229944944381714, |
| "learning_rate": 0.001, |
| "loss": 1.6872, |
| "step": 174800 |
| }, |
| { |
| "epoch": 56.52876535229476, |
| "grad_norm": 1.1950942277908325, |
| "learning_rate": 0.001, |
| "loss": 1.6886, |
| "step": 174900 |
| }, |
| { |
| "epoch": 56.56108597285068, |
| "grad_norm": 1.4346133470535278, |
| "learning_rate": 0.001, |
| "loss": 1.7026, |
| "step": 175000 |
| }, |
| { |
| "epoch": 56.59340659340659, |
| "grad_norm": 1.2384557723999023, |
| "learning_rate": 0.001, |
| "loss": 1.7176, |
| "step": 175100 |
| }, |
| { |
| "epoch": 56.625727213962506, |
| "grad_norm": 1.3948698043823242, |
| "learning_rate": 0.001, |
| "loss": 1.7093, |
| "step": 175200 |
| }, |
| { |
| "epoch": 56.65804783451842, |
| "grad_norm": 1.547871708869934, |
| "learning_rate": 0.001, |
| "loss": 1.7273, |
| "step": 175300 |
| }, |
| { |
| "epoch": 56.690368455074335, |
| "grad_norm": 1.5837923288345337, |
| "learning_rate": 0.001, |
| "loss": 1.706, |
| "step": 175400 |
| }, |
| { |
| "epoch": 56.72268907563025, |
| "grad_norm": 1.3171043395996094, |
| "learning_rate": 0.001, |
| "loss": 1.7245, |
| "step": 175500 |
| }, |
| { |
| "epoch": 56.755009696186164, |
| "grad_norm": 1.7027974128723145, |
| "learning_rate": 0.001, |
| "loss": 1.72, |
| "step": 175600 |
| }, |
| { |
| "epoch": 56.78733031674208, |
| "grad_norm": 1.5932236909866333, |
| "learning_rate": 0.001, |
| "loss": 1.731, |
| "step": 175700 |
| }, |
| { |
| "epoch": 56.81965093729799, |
| "grad_norm": 1.2864384651184082, |
| "learning_rate": 0.001, |
| "loss": 1.732, |
| "step": 175800 |
| }, |
| { |
| "epoch": 56.85197155785391, |
| "grad_norm": 1.7263994216918945, |
| "learning_rate": 0.001, |
| "loss": 1.7377, |
| "step": 175900 |
| }, |
| { |
| "epoch": 56.88429217840982, |
| "grad_norm": 1.3134857416152954, |
| "learning_rate": 0.001, |
| "loss": 1.7407, |
| "step": 176000 |
| }, |
| { |
| "epoch": 56.91661279896574, |
| "grad_norm": 1.2284256219863892, |
| "learning_rate": 0.001, |
| "loss": 1.7405, |
| "step": 176100 |
| }, |
| { |
| "epoch": 56.94893341952165, |
| "grad_norm": 1.7951955795288086, |
| "learning_rate": 0.001, |
| "loss": 1.7399, |
| "step": 176200 |
| }, |
| { |
| "epoch": 56.981254040077566, |
| "grad_norm": 1.8334953784942627, |
| "learning_rate": 0.001, |
| "loss": 1.7412, |
| "step": 176300 |
| }, |
| { |
| "epoch": 57.01357466063349, |
| "grad_norm": 1.490244746208191, |
| "learning_rate": 0.001, |
| "loss": 1.6897, |
| "step": 176400 |
| }, |
| { |
| "epoch": 57.0458952811894, |
| "grad_norm": 1.785170316696167, |
| "learning_rate": 0.001, |
| "loss": 1.6153, |
| "step": 176500 |
| }, |
| { |
| "epoch": 57.07821590174532, |
| "grad_norm": 2.492884635925293, |
| "learning_rate": 0.001, |
| "loss": 1.6217, |
| "step": 176600 |
| }, |
| { |
| "epoch": 57.11053652230123, |
| "grad_norm": 2.1509578227996826, |
| "learning_rate": 0.001, |
| "loss": 1.6313, |
| "step": 176700 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 1.789116382598877, |
| "learning_rate": 0.001, |
| "loss": 1.6308, |
| "step": 176800 |
| }, |
| { |
| "epoch": 57.17517776341306, |
| "grad_norm": 1.6912237405776978, |
| "learning_rate": 0.001, |
| "loss": 1.6443, |
| "step": 176900 |
| }, |
| { |
| "epoch": 57.207498383968975, |
| "grad_norm": 1.6607928276062012, |
| "learning_rate": 0.001, |
| "loss": 1.6519, |
| "step": 177000 |
| }, |
| { |
| "epoch": 57.23981900452489, |
| "grad_norm": 1.7817693948745728, |
| "learning_rate": 0.001, |
| "loss": 1.6476, |
| "step": 177100 |
| }, |
| { |
| "epoch": 57.272139625080804, |
| "grad_norm": 1.3845174312591553, |
| "learning_rate": 0.001, |
| "loss": 1.6536, |
| "step": 177200 |
| }, |
| { |
| "epoch": 57.30446024563672, |
| "grad_norm": 1.780415415763855, |
| "learning_rate": 0.001, |
| "loss": 1.6431, |
| "step": 177300 |
| }, |
| { |
| "epoch": 57.33678086619263, |
| "grad_norm": 1.345583438873291, |
| "learning_rate": 0.001, |
| "loss": 1.6722, |
| "step": 177400 |
| }, |
| { |
| "epoch": 57.36910148674855, |
| "grad_norm": 2.1150074005126953, |
| "learning_rate": 0.001, |
| "loss": 1.6676, |
| "step": 177500 |
| }, |
| { |
| "epoch": 57.40142210730446, |
| "grad_norm": 1.7680250406265259, |
| "learning_rate": 0.001, |
| "loss": 1.6723, |
| "step": 177600 |
| }, |
| { |
| "epoch": 57.43374272786038, |
| "grad_norm": 1.5597569942474365, |
| "learning_rate": 0.001, |
| "loss": 1.6801, |
| "step": 177700 |
| }, |
| { |
| "epoch": 57.46606334841629, |
| "grad_norm": 1.5379387140274048, |
| "learning_rate": 0.001, |
| "loss": 1.6728, |
| "step": 177800 |
| }, |
| { |
| "epoch": 57.498383968972206, |
| "grad_norm": 1.645971655845642, |
| "learning_rate": 0.001, |
| "loss": 1.6907, |
| "step": 177900 |
| }, |
| { |
| "epoch": 57.53070458952812, |
| "grad_norm": 1.28190016746521, |
| "learning_rate": 0.001, |
| "loss": 1.6902, |
| "step": 178000 |
| }, |
| { |
| "epoch": 57.563025210084035, |
| "grad_norm": 2.0813686847686768, |
| "learning_rate": 0.001, |
| "loss": 1.6944, |
| "step": 178100 |
| }, |
| { |
| "epoch": 57.59534583063995, |
| "grad_norm": 1.533876895904541, |
| "learning_rate": 0.001, |
| "loss": 1.6836, |
| "step": 178200 |
| }, |
| { |
| "epoch": 57.627666451195864, |
| "grad_norm": 1.5302281379699707, |
| "learning_rate": 0.001, |
| "loss": 1.7007, |
| "step": 178300 |
| }, |
| { |
| "epoch": 57.65998707175178, |
| "grad_norm": 1.3833650350570679, |
| "learning_rate": 0.001, |
| "loss": 1.6855, |
| "step": 178400 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.61258065700531, |
| "learning_rate": 0.001, |
| "loss": 1.7141, |
| "step": 178500 |
| }, |
| { |
| "epoch": 57.72462831286361, |
| "grad_norm": 1.4593497514724731, |
| "learning_rate": 0.001, |
| "loss": 1.6981, |
| "step": 178600 |
| }, |
| { |
| "epoch": 57.75694893341952, |
| "grad_norm": 1.5033830404281616, |
| "learning_rate": 0.001, |
| "loss": 1.7213, |
| "step": 178700 |
| }, |
| { |
| "epoch": 57.78926955397544, |
| "grad_norm": 1.7307718992233276, |
| "learning_rate": 0.001, |
| "loss": 1.7133, |
| "step": 178800 |
| }, |
| { |
| "epoch": 57.82159017453135, |
| "grad_norm": 1.674391746520996, |
| "learning_rate": 0.001, |
| "loss": 1.7114, |
| "step": 178900 |
| }, |
| { |
| "epoch": 57.853910795087266, |
| "grad_norm": 1.642048716545105, |
| "learning_rate": 0.001, |
| "loss": 1.7125, |
| "step": 179000 |
| }, |
| { |
| "epoch": 57.88623141564318, |
| "grad_norm": 1.9265817403793335, |
| "learning_rate": 0.001, |
| "loss": 1.7243, |
| "step": 179100 |
| }, |
| { |
| "epoch": 57.918552036199095, |
| "grad_norm": 1.534619927406311, |
| "learning_rate": 0.001, |
| "loss": 1.7354, |
| "step": 179200 |
| }, |
| { |
| "epoch": 57.95087265675501, |
| "grad_norm": 1.3853662014007568, |
| "learning_rate": 0.001, |
| "loss": 1.7486, |
| "step": 179300 |
| }, |
| { |
| "epoch": 57.983193277310924, |
| "grad_norm": 1.3320893049240112, |
| "learning_rate": 0.001, |
| "loss": 1.7432, |
| "step": 179400 |
| }, |
| { |
| "epoch": 58.01551389786684, |
| "grad_norm": 1.929592490196228, |
| "learning_rate": 0.001, |
| "loss": 1.6612, |
| "step": 179500 |
| }, |
| { |
| "epoch": 58.04783451842275, |
| "grad_norm": 1.2655434608459473, |
| "learning_rate": 0.001, |
| "loss": 1.6113, |
| "step": 179600 |
| }, |
| { |
| "epoch": 58.08015513897867, |
| "grad_norm": 1.8946388959884644, |
| "learning_rate": 0.001, |
| "loss": 1.6196, |
| "step": 179700 |
| }, |
| { |
| "epoch": 58.11247575953458, |
| "grad_norm": 1.6740760803222656, |
| "learning_rate": 0.001, |
| "loss": 1.5944, |
| "step": 179800 |
| }, |
| { |
| "epoch": 58.1447963800905, |
| "grad_norm": 1.9058560132980347, |
| "learning_rate": 0.001, |
| "loss": 1.6275, |
| "step": 179900 |
| }, |
| { |
| "epoch": 58.17711700064641, |
| "grad_norm": 1.8207988739013672, |
| "learning_rate": 0.001, |
| "loss": 1.6079, |
| "step": 180000 |
| }, |
| { |
| "epoch": 58.209437621202326, |
| "grad_norm": 1.7238407135009766, |
| "learning_rate": 0.001, |
| "loss": 1.6269, |
| "step": 180100 |
| }, |
| { |
| "epoch": 58.24175824175824, |
| "grad_norm": 1.809914231300354, |
| "learning_rate": 0.001, |
| "loss": 1.6507, |
| "step": 180200 |
| }, |
| { |
| "epoch": 58.274078862314155, |
| "grad_norm": 1.8771950006484985, |
| "learning_rate": 0.001, |
| "loss": 1.6316, |
| "step": 180300 |
| }, |
| { |
| "epoch": 58.30639948287007, |
| "grad_norm": 2.1245312690734863, |
| "learning_rate": 0.001, |
| "loss": 1.6455, |
| "step": 180400 |
| }, |
| { |
| "epoch": 58.338720103425985, |
| "grad_norm": 1.507102608680725, |
| "learning_rate": 0.001, |
| "loss": 1.662, |
| "step": 180500 |
| }, |
| { |
| "epoch": 58.3710407239819, |
| "grad_norm": 1.8217893838882446, |
| "learning_rate": 0.001, |
| "loss": 1.6656, |
| "step": 180600 |
| }, |
| { |
| "epoch": 58.403361344537814, |
| "grad_norm": 1.8754676580429077, |
| "learning_rate": 0.001, |
| "loss": 1.6481, |
| "step": 180700 |
| }, |
| { |
| "epoch": 58.43568196509373, |
| "grad_norm": 1.8230457305908203, |
| "learning_rate": 0.001, |
| "loss": 1.6674, |
| "step": 180800 |
| }, |
| { |
| "epoch": 58.46800258564964, |
| "grad_norm": 2.2292656898498535, |
| "learning_rate": 0.001, |
| "loss": 1.6704, |
| "step": 180900 |
| }, |
| { |
| "epoch": 58.50032320620556, |
| "grad_norm": 1.9867464303970337, |
| "learning_rate": 0.001, |
| "loss": 1.669, |
| "step": 181000 |
| }, |
| { |
| "epoch": 58.53264382676147, |
| "grad_norm": 1.8236154317855835, |
| "learning_rate": 0.001, |
| "loss": 1.6707, |
| "step": 181100 |
| }, |
| { |
| "epoch": 58.56496444731739, |
| "grad_norm": 1.3619102239608765, |
| "learning_rate": 0.001, |
| "loss": 1.6735, |
| "step": 181200 |
| }, |
| { |
| "epoch": 58.5972850678733, |
| "grad_norm": 1.4769443273544312, |
| "learning_rate": 0.001, |
| "loss": 1.6882, |
| "step": 181300 |
| }, |
| { |
| "epoch": 58.629605688429216, |
| "grad_norm": 2.5227391719818115, |
| "learning_rate": 0.001, |
| "loss": 1.6807, |
| "step": 181400 |
| }, |
| { |
| "epoch": 58.66192630898513, |
| "grad_norm": 1.9169458150863647, |
| "learning_rate": 0.001, |
| "loss": 1.6877, |
| "step": 181500 |
| }, |
| { |
| "epoch": 58.694246929541045, |
| "grad_norm": 1.9800595045089722, |
| "learning_rate": 0.001, |
| "loss": 1.6846, |
| "step": 181600 |
| }, |
| { |
| "epoch": 58.72656755009696, |
| "grad_norm": 1.5677974224090576, |
| "learning_rate": 0.001, |
| "loss": 1.6981, |
| "step": 181700 |
| }, |
| { |
| "epoch": 58.758888170652874, |
| "grad_norm": 1.9665014743804932, |
| "learning_rate": 0.001, |
| "loss": 1.6963, |
| "step": 181800 |
| }, |
| { |
| "epoch": 58.79120879120879, |
| "grad_norm": 2.1127920150756836, |
| "learning_rate": 0.001, |
| "loss": 1.7346, |
| "step": 181900 |
| }, |
| { |
| "epoch": 58.8235294117647, |
| "grad_norm": 1.8396202325820923, |
| "learning_rate": 0.001, |
| "loss": 1.7076, |
| "step": 182000 |
| }, |
| { |
| "epoch": 58.85585003232062, |
| "grad_norm": 2.098335027694702, |
| "learning_rate": 0.001, |
| "loss": 1.7025, |
| "step": 182100 |
| }, |
| { |
| "epoch": 58.88817065287653, |
| "grad_norm": 1.597739338874817, |
| "learning_rate": 0.001, |
| "loss": 1.7145, |
| "step": 182200 |
| }, |
| { |
| "epoch": 58.92049127343245, |
| "grad_norm": 1.9156646728515625, |
| "learning_rate": 0.001, |
| "loss": 1.7235, |
| "step": 182300 |
| }, |
| { |
| "epoch": 58.95281189398836, |
| "grad_norm": 2.0628559589385986, |
| "learning_rate": 0.001, |
| "loss": 1.7255, |
| "step": 182400 |
| }, |
| { |
| "epoch": 58.985132514544276, |
| "grad_norm": 1.4248301982879639, |
| "learning_rate": 0.001, |
| "loss": 1.7222, |
| "step": 182500 |
| }, |
| { |
| "epoch": 59.0174531351002, |
| "grad_norm": 1.9326345920562744, |
| "learning_rate": 0.001, |
| "loss": 1.652, |
| "step": 182600 |
| }, |
| { |
| "epoch": 59.04977375565611, |
| "grad_norm": 1.6768263578414917, |
| "learning_rate": 0.001, |
| "loss": 1.5961, |
| "step": 182700 |
| }, |
| { |
| "epoch": 59.08209437621203, |
| "grad_norm": 2.4055697917938232, |
| "learning_rate": 0.001, |
| "loss": 1.5885, |
| "step": 182800 |
| }, |
| { |
| "epoch": 59.11441499676794, |
| "grad_norm": 2.089221954345703, |
| "learning_rate": 0.001, |
| "loss": 1.6047, |
| "step": 182900 |
| }, |
| { |
| "epoch": 59.146735617323856, |
| "grad_norm": 2.2153096199035645, |
| "learning_rate": 0.001, |
| "loss": 1.5914, |
| "step": 183000 |
| }, |
| { |
| "epoch": 59.17905623787977, |
| "grad_norm": 1.6174402236938477, |
| "learning_rate": 0.001, |
| "loss": 1.6094, |
| "step": 183100 |
| }, |
| { |
| "epoch": 59.211376858435685, |
| "grad_norm": 1.7545989751815796, |
| "learning_rate": 0.001, |
| "loss": 1.6415, |
| "step": 183200 |
| }, |
| { |
| "epoch": 59.2436974789916, |
| "grad_norm": 1.6180590391159058, |
| "learning_rate": 0.001, |
| "loss": 1.6332, |
| "step": 183300 |
| }, |
| { |
| "epoch": 59.276018099547514, |
| "grad_norm": 1.4139974117279053, |
| "learning_rate": 0.001, |
| "loss": 1.6373, |
| "step": 183400 |
| }, |
| { |
| "epoch": 59.30833872010343, |
| "grad_norm": 2.0663397312164307, |
| "learning_rate": 0.001, |
| "loss": 1.6387, |
| "step": 183500 |
| }, |
| { |
| "epoch": 59.34065934065934, |
| "grad_norm": 1.6867034435272217, |
| "learning_rate": 0.001, |
| "loss": 1.6548, |
| "step": 183600 |
| }, |
| { |
| "epoch": 59.37297996121526, |
| "grad_norm": 1.6726882457733154, |
| "learning_rate": 0.001, |
| "loss": 1.6555, |
| "step": 183700 |
| }, |
| { |
| "epoch": 59.40530058177117, |
| "grad_norm": 1.7690714597702026, |
| "learning_rate": 0.001, |
| "loss": 1.6526, |
| "step": 183800 |
| }, |
| { |
| "epoch": 59.43762120232709, |
| "grad_norm": 1.4407790899276733, |
| "learning_rate": 0.001, |
| "loss": 1.6619, |
| "step": 183900 |
| }, |
| { |
| "epoch": 59.469941822883, |
| "grad_norm": 1.3931773900985718, |
| "learning_rate": 0.001, |
| "loss": 1.6647, |
| "step": 184000 |
| }, |
| { |
| "epoch": 59.502262443438916, |
| "grad_norm": 1.713707447052002, |
| "learning_rate": 0.001, |
| "loss": 1.6664, |
| "step": 184100 |
| }, |
| { |
| "epoch": 59.53458306399483, |
| "grad_norm": 1.4662772417068481, |
| "learning_rate": 0.001, |
| "loss": 1.6614, |
| "step": 184200 |
| }, |
| { |
| "epoch": 59.566903684550745, |
| "grad_norm": 1.6400699615478516, |
| "learning_rate": 0.001, |
| "loss": 1.6717, |
| "step": 184300 |
| }, |
| { |
| "epoch": 59.59922430510666, |
| "grad_norm": 1.428658366203308, |
| "learning_rate": 0.001, |
| "loss": 1.6698, |
| "step": 184400 |
| }, |
| { |
| "epoch": 59.631544925662574, |
| "grad_norm": 1.601353645324707, |
| "learning_rate": 0.001, |
| "loss": 1.6647, |
| "step": 184500 |
| }, |
| { |
| "epoch": 59.66386554621849, |
| "grad_norm": 1.7401574850082397, |
| "learning_rate": 0.001, |
| "loss": 1.6853, |
| "step": 184600 |
| }, |
| { |
| "epoch": 59.6961861667744, |
| "grad_norm": 1.6120574474334717, |
| "learning_rate": 0.001, |
| "loss": 1.6766, |
| "step": 184700 |
| }, |
| { |
| "epoch": 59.72850678733032, |
| "grad_norm": 1.4168685674667358, |
| "learning_rate": 0.001, |
| "loss": 1.6836, |
| "step": 184800 |
| }, |
| { |
| "epoch": 59.76082740788623, |
| "grad_norm": 1.7387588024139404, |
| "learning_rate": 0.001, |
| "loss": 1.6833, |
| "step": 184900 |
| }, |
| { |
| "epoch": 59.79314802844215, |
| "grad_norm": 1.6142842769622803, |
| "learning_rate": 0.001, |
| "loss": 1.7001, |
| "step": 185000 |
| }, |
| { |
| "epoch": 59.82546864899806, |
| "grad_norm": 1.520094394683838, |
| "learning_rate": 0.001, |
| "loss": 1.7084, |
| "step": 185100 |
| }, |
| { |
| "epoch": 59.857789269553976, |
| "grad_norm": 1.6498136520385742, |
| "learning_rate": 0.001, |
| "loss": 1.7055, |
| "step": 185200 |
| }, |
| { |
| "epoch": 59.89010989010989, |
| "grad_norm": 1.3799690008163452, |
| "learning_rate": 0.001, |
| "loss": 1.6949, |
| "step": 185300 |
| }, |
| { |
| "epoch": 59.922430510665805, |
| "grad_norm": 1.5039490461349487, |
| "learning_rate": 0.001, |
| "loss": 1.7057, |
| "step": 185400 |
| }, |
| { |
| "epoch": 59.95475113122172, |
| "grad_norm": 2.032569408416748, |
| "learning_rate": 0.001, |
| "loss": 1.7085, |
| "step": 185500 |
| }, |
| { |
| "epoch": 59.987071751777634, |
| "grad_norm": 1.669844388961792, |
| "learning_rate": 0.001, |
| "loss": 1.7086, |
| "step": 185600 |
| }, |
| { |
| "epoch": 60.01939237233355, |
| "grad_norm": 1.638071060180664, |
| "learning_rate": 0.001, |
| "loss": 1.6557, |
| "step": 185700 |
| }, |
| { |
| "epoch": 60.05171299288946, |
| "grad_norm": 2.303903102874756, |
| "learning_rate": 0.001, |
| "loss": 1.5925, |
| "step": 185800 |
| }, |
| { |
| "epoch": 60.08403361344538, |
| "grad_norm": 1.742456316947937, |
| "learning_rate": 0.001, |
| "loss": 1.587, |
| "step": 185900 |
| }, |
| { |
| "epoch": 60.11635423400129, |
| "grad_norm": 1.2366214990615845, |
| "learning_rate": 0.001, |
| "loss": 1.6073, |
| "step": 186000 |
| }, |
| { |
| "epoch": 60.14867485455721, |
| "grad_norm": 1.7127535343170166, |
| "learning_rate": 0.001, |
| "loss": 1.6022, |
| "step": 186100 |
| }, |
| { |
| "epoch": 60.18099547511312, |
| "grad_norm": 1.181023359298706, |
| "learning_rate": 0.001, |
| "loss": 1.6293, |
| "step": 186200 |
| }, |
| { |
| "epoch": 60.213316095669036, |
| "grad_norm": 1.2192198038101196, |
| "learning_rate": 0.001, |
| "loss": 1.6217, |
| "step": 186300 |
| }, |
| { |
| "epoch": 60.24563671622495, |
| "grad_norm": 1.5543640851974487, |
| "learning_rate": 0.001, |
| "loss": 1.6107, |
| "step": 186400 |
| }, |
| { |
| "epoch": 60.277957336780865, |
| "grad_norm": 1.1227211952209473, |
| "learning_rate": 0.001, |
| "loss": 1.6365, |
| "step": 186500 |
| }, |
| { |
| "epoch": 60.31027795733678, |
| "grad_norm": 1.0597680807113647, |
| "learning_rate": 0.001, |
| "loss": 1.6166, |
| "step": 186600 |
| }, |
| { |
| "epoch": 60.342598577892694, |
| "grad_norm": 1.4818602800369263, |
| "learning_rate": 0.001, |
| "loss": 1.6446, |
| "step": 186700 |
| }, |
| { |
| "epoch": 60.37491919844861, |
| "grad_norm": 1.3480159044265747, |
| "learning_rate": 0.001, |
| "loss": 1.6248, |
| "step": 186800 |
| }, |
| { |
| "epoch": 60.40723981900452, |
| "grad_norm": 1.5055954456329346, |
| "learning_rate": 0.001, |
| "loss": 1.6269, |
| "step": 186900 |
| }, |
| { |
| "epoch": 60.43956043956044, |
| "grad_norm": 1.6578614711761475, |
| "learning_rate": 0.001, |
| "loss": 1.6322, |
| "step": 187000 |
| }, |
| { |
| "epoch": 60.47188106011635, |
| "grad_norm": 1.2708501815795898, |
| "learning_rate": 0.001, |
| "loss": 1.6435, |
| "step": 187100 |
| }, |
| { |
| "epoch": 60.50420168067227, |
| "grad_norm": 1.213304877281189, |
| "learning_rate": 0.001, |
| "loss": 1.6495, |
| "step": 187200 |
| }, |
| { |
| "epoch": 60.53652230122818, |
| "grad_norm": 1.300215721130371, |
| "learning_rate": 0.001, |
| "loss": 1.6774, |
| "step": 187300 |
| }, |
| { |
| "epoch": 60.568842921784096, |
| "grad_norm": 1.3916350603103638, |
| "learning_rate": 0.001, |
| "loss": 1.6679, |
| "step": 187400 |
| }, |
| { |
| "epoch": 60.60116354234001, |
| "grad_norm": 1.5513008832931519, |
| "learning_rate": 0.001, |
| "loss": 1.6526, |
| "step": 187500 |
| }, |
| { |
| "epoch": 60.633484162895925, |
| "grad_norm": 1.6689372062683105, |
| "learning_rate": 0.001, |
| "loss": 1.656, |
| "step": 187600 |
| }, |
| { |
| "epoch": 60.66580478345184, |
| "grad_norm": 1.6529994010925293, |
| "learning_rate": 0.001, |
| "loss": 1.6587, |
| "step": 187700 |
| }, |
| { |
| "epoch": 60.698125404007754, |
| "grad_norm": 1.4164494276046753, |
| "learning_rate": 0.001, |
| "loss": 1.6801, |
| "step": 187800 |
| }, |
| { |
| "epoch": 60.73044602456367, |
| "grad_norm": 1.7202731370925903, |
| "learning_rate": 0.001, |
| "loss": 1.674, |
| "step": 187900 |
| }, |
| { |
| "epoch": 60.762766645119584, |
| "grad_norm": 1.2557040452957153, |
| "learning_rate": 0.001, |
| "loss": 1.6891, |
| "step": 188000 |
| }, |
| { |
| "epoch": 60.7950872656755, |
| "grad_norm": 1.393248200416565, |
| "learning_rate": 0.001, |
| "loss": 1.6826, |
| "step": 188100 |
| }, |
| { |
| "epoch": 60.82740788623141, |
| "grad_norm": 1.4197131395339966, |
| "learning_rate": 0.001, |
| "loss": 1.7021, |
| "step": 188200 |
| }, |
| { |
| "epoch": 60.85972850678733, |
| "grad_norm": 1.243842363357544, |
| "learning_rate": 0.001, |
| "loss": 1.6776, |
| "step": 188300 |
| }, |
| { |
| "epoch": 60.89204912734324, |
| "grad_norm": 1.5487949848175049, |
| "learning_rate": 0.001, |
| "loss": 1.7033, |
| "step": 188400 |
| }, |
| { |
| "epoch": 60.924369747899156, |
| "grad_norm": 1.9102970361709595, |
| "learning_rate": 0.001, |
| "loss": 1.6951, |
| "step": 188500 |
| }, |
| { |
| "epoch": 60.95669036845507, |
| "grad_norm": 1.1745511293411255, |
| "learning_rate": 0.001, |
| "loss": 1.6816, |
| "step": 188600 |
| }, |
| { |
| "epoch": 60.98901098901099, |
| "grad_norm": 1.4164996147155762, |
| "learning_rate": 0.001, |
| "loss": 1.7036, |
| "step": 188700 |
| }, |
| { |
| "epoch": 61.02133160956691, |
| "grad_norm": 1.3019585609436035, |
| "learning_rate": 0.001, |
| "loss": 1.6329, |
| "step": 188800 |
| }, |
| { |
| "epoch": 61.05365223012282, |
| "grad_norm": 1.4903287887573242, |
| "learning_rate": 0.001, |
| "loss": 1.5689, |
| "step": 188900 |
| }, |
| { |
| "epoch": 61.085972850678736, |
| "grad_norm": 1.4800775051116943, |
| "learning_rate": 0.001, |
| "loss": 1.5883, |
| "step": 189000 |
| }, |
| { |
| "epoch": 61.11829347123465, |
| "grad_norm": 1.1266968250274658, |
| "learning_rate": 0.001, |
| "loss": 1.5951, |
| "step": 189100 |
| }, |
| { |
| "epoch": 61.150614091790565, |
| "grad_norm": 1.840097188949585, |
| "learning_rate": 0.001, |
| "loss": 1.6005, |
| "step": 189200 |
| }, |
| { |
| "epoch": 61.18293471234648, |
| "grad_norm": 1.932331919670105, |
| "learning_rate": 0.001, |
| "loss": 1.6065, |
| "step": 189300 |
| }, |
| { |
| "epoch": 61.215255332902395, |
| "grad_norm": 1.543431282043457, |
| "learning_rate": 0.001, |
| "loss": 1.6173, |
| "step": 189400 |
| }, |
| { |
| "epoch": 61.24757595345831, |
| "grad_norm": 1.4862014055252075, |
| "learning_rate": 0.001, |
| "loss": 1.6023, |
| "step": 189500 |
| }, |
| { |
| "epoch": 61.279896574014224, |
| "grad_norm": 1.5869604349136353, |
| "learning_rate": 0.001, |
| "loss": 1.6039, |
| "step": 189600 |
| }, |
| { |
| "epoch": 61.31221719457014, |
| "grad_norm": 1.3288321495056152, |
| "learning_rate": 0.001, |
| "loss": 1.6332, |
| "step": 189700 |
| }, |
| { |
| "epoch": 61.34453781512605, |
| "grad_norm": 1.0867267847061157, |
| "learning_rate": 0.001, |
| "loss": 1.6153, |
| "step": 189800 |
| }, |
| { |
| "epoch": 61.37685843568197, |
| "grad_norm": 1.447813630104065, |
| "learning_rate": 0.001, |
| "loss": 1.6349, |
| "step": 189900 |
| }, |
| { |
| "epoch": 61.40917905623788, |
| "grad_norm": 1.2495040893554688, |
| "learning_rate": 0.001, |
| "loss": 1.6337, |
| "step": 190000 |
| }, |
| { |
| "epoch": 61.441499676793796, |
| "grad_norm": 1.3944239616394043, |
| "learning_rate": 0.001, |
| "loss": 1.6448, |
| "step": 190100 |
| }, |
| { |
| "epoch": 61.47382029734971, |
| "grad_norm": 1.4838260412216187, |
| "learning_rate": 0.001, |
| "loss": 1.6196, |
| "step": 190200 |
| }, |
| { |
| "epoch": 61.506140917905626, |
| "grad_norm": 1.3022147417068481, |
| "learning_rate": 0.001, |
| "loss": 1.6427, |
| "step": 190300 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 1.3423457145690918, |
| "learning_rate": 0.001, |
| "loss": 1.6558, |
| "step": 190400 |
| }, |
| { |
| "epoch": 61.570782159017455, |
| "grad_norm": 1.270648717880249, |
| "learning_rate": 0.001, |
| "loss": 1.6486, |
| "step": 190500 |
| }, |
| { |
| "epoch": 61.60310277957337, |
| "grad_norm": 1.671781301498413, |
| "learning_rate": 0.001, |
| "loss": 1.647, |
| "step": 190600 |
| }, |
| { |
| "epoch": 61.635423400129284, |
| "grad_norm": 1.0591039657592773, |
| "learning_rate": 0.001, |
| "loss": 1.6451, |
| "step": 190700 |
| }, |
| { |
| "epoch": 61.6677440206852, |
| "grad_norm": 1.2032123804092407, |
| "learning_rate": 0.001, |
| "loss": 1.6495, |
| "step": 190800 |
| }, |
| { |
| "epoch": 61.70006464124111, |
| "grad_norm": 1.9419327974319458, |
| "learning_rate": 0.001, |
| "loss": 1.6573, |
| "step": 190900 |
| }, |
| { |
| "epoch": 61.73238526179703, |
| "grad_norm": 1.8041101694107056, |
| "learning_rate": 0.001, |
| "loss": 1.6546, |
| "step": 191000 |
| }, |
| { |
| "epoch": 61.76470588235294, |
| "grad_norm": 1.3328889608383179, |
| "learning_rate": 0.001, |
| "loss": 1.6769, |
| "step": 191100 |
| }, |
| { |
| "epoch": 61.79702650290886, |
| "grad_norm": 1.5163036584854126, |
| "learning_rate": 0.001, |
| "loss": 1.6669, |
| "step": 191200 |
| }, |
| { |
| "epoch": 61.82934712346477, |
| "grad_norm": 1.349266767501831, |
| "learning_rate": 0.001, |
| "loss": 1.6793, |
| "step": 191300 |
| }, |
| { |
| "epoch": 61.861667744020686, |
| "grad_norm": 1.4275840520858765, |
| "learning_rate": 0.001, |
| "loss": 1.6818, |
| "step": 191400 |
| }, |
| { |
| "epoch": 61.8939883645766, |
| "grad_norm": 1.475961685180664, |
| "learning_rate": 0.001, |
| "loss": 1.6815, |
| "step": 191500 |
| }, |
| { |
| "epoch": 61.926308985132515, |
| "grad_norm": 1.5726628303527832, |
| "learning_rate": 0.001, |
| "loss": 1.6682, |
| "step": 191600 |
| }, |
| { |
| "epoch": 61.95862960568843, |
| "grad_norm": 1.38363778591156, |
| "learning_rate": 0.001, |
| "loss": 1.6794, |
| "step": 191700 |
| }, |
| { |
| "epoch": 61.990950226244344, |
| "grad_norm": 1.2430803775787354, |
| "learning_rate": 0.001, |
| "loss": 1.702, |
| "step": 191800 |
| }, |
| { |
| "epoch": 62.02327084680026, |
| "grad_norm": 1.368446707725525, |
| "learning_rate": 0.001, |
| "loss": 1.6099, |
| "step": 191900 |
| }, |
| { |
| "epoch": 62.05559146735617, |
| "grad_norm": 1.095283031463623, |
| "learning_rate": 0.001, |
| "loss": 1.5752, |
| "step": 192000 |
| }, |
| { |
| "epoch": 62.08791208791209, |
| "grad_norm": 1.393159031867981, |
| "learning_rate": 0.001, |
| "loss": 1.5772, |
| "step": 192100 |
| }, |
| { |
| "epoch": 62.120232708468, |
| "grad_norm": 1.2382171154022217, |
| "learning_rate": 0.001, |
| "loss": 1.5867, |
| "step": 192200 |
| }, |
| { |
| "epoch": 62.15255332902392, |
| "grad_norm": 1.3655904531478882, |
| "learning_rate": 0.001, |
| "loss": 1.5889, |
| "step": 192300 |
| }, |
| { |
| "epoch": 62.18487394957983, |
| "grad_norm": 1.4175256490707397, |
| "learning_rate": 0.001, |
| "loss": 1.5975, |
| "step": 192400 |
| }, |
| { |
| "epoch": 62.217194570135746, |
| "grad_norm": 1.2587052583694458, |
| "learning_rate": 0.001, |
| "loss": 1.6041, |
| "step": 192500 |
| }, |
| { |
| "epoch": 62.24951519069166, |
| "grad_norm": 1.8340458869934082, |
| "learning_rate": 0.001, |
| "loss": 1.5917, |
| "step": 192600 |
| }, |
| { |
| "epoch": 62.281835811247575, |
| "grad_norm": 1.2407337427139282, |
| "learning_rate": 0.001, |
| "loss": 1.604, |
| "step": 192700 |
| }, |
| { |
| "epoch": 62.31415643180349, |
| "grad_norm": 1.2123467922210693, |
| "learning_rate": 0.001, |
| "loss": 1.6059, |
| "step": 192800 |
| }, |
| { |
| "epoch": 62.346477052359404, |
| "grad_norm": 1.3114657402038574, |
| "learning_rate": 0.001, |
| "loss": 1.6083, |
| "step": 192900 |
| }, |
| { |
| "epoch": 62.37879767291532, |
| "grad_norm": 1.7987451553344727, |
| "learning_rate": 0.001, |
| "loss": 1.6117, |
| "step": 193000 |
| }, |
| { |
| "epoch": 62.41111829347123, |
| "grad_norm": 1.8280270099639893, |
| "learning_rate": 0.001, |
| "loss": 1.6151, |
| "step": 193100 |
| }, |
| { |
| "epoch": 62.44343891402715, |
| "grad_norm": 1.516326904296875, |
| "learning_rate": 0.001, |
| "loss": 1.63, |
| "step": 193200 |
| }, |
| { |
| "epoch": 62.47575953458306, |
| "grad_norm": 1.1470481157302856, |
| "learning_rate": 0.001, |
| "loss": 1.6373, |
| "step": 193300 |
| }, |
| { |
| "epoch": 62.50808015513898, |
| "grad_norm": 1.4997236728668213, |
| "learning_rate": 0.001, |
| "loss": 1.618, |
| "step": 193400 |
| }, |
| { |
| "epoch": 62.54040077569489, |
| "grad_norm": 1.1891118288040161, |
| "learning_rate": 0.001, |
| "loss": 1.64, |
| "step": 193500 |
| }, |
| { |
| "epoch": 62.572721396250806, |
| "grad_norm": 1.4351845979690552, |
| "learning_rate": 0.001, |
| "loss": 1.639, |
| "step": 193600 |
| }, |
| { |
| "epoch": 62.60504201680672, |
| "grad_norm": 1.5203590393066406, |
| "learning_rate": 0.001, |
| "loss": 1.6484, |
| "step": 193700 |
| }, |
| { |
| "epoch": 62.637362637362635, |
| "grad_norm": 1.3365706205368042, |
| "learning_rate": 0.001, |
| "loss": 1.66, |
| "step": 193800 |
| }, |
| { |
| "epoch": 62.66968325791855, |
| "grad_norm": 1.2810354232788086, |
| "learning_rate": 0.001, |
| "loss": 1.6524, |
| "step": 193900 |
| }, |
| { |
| "epoch": 62.702003878474464, |
| "grad_norm": 1.2262632846832275, |
| "learning_rate": 0.001, |
| "loss": 1.6403, |
| "step": 194000 |
| }, |
| { |
| "epoch": 62.73432449903038, |
| "grad_norm": 1.404079794883728, |
| "learning_rate": 0.001, |
| "loss": 1.6542, |
| "step": 194100 |
| }, |
| { |
| "epoch": 62.76664511958629, |
| "grad_norm": 0.9942919015884399, |
| "learning_rate": 0.001, |
| "loss": 1.6668, |
| "step": 194200 |
| }, |
| { |
| "epoch": 62.79896574014221, |
| "grad_norm": 0.9326017498970032, |
| "learning_rate": 0.001, |
| "loss": 1.6636, |
| "step": 194300 |
| }, |
| { |
| "epoch": 62.83128636069812, |
| "grad_norm": 1.221508264541626, |
| "learning_rate": 0.001, |
| "loss": 1.6628, |
| "step": 194400 |
| }, |
| { |
| "epoch": 62.86360698125404, |
| "grad_norm": 1.3761545419692993, |
| "learning_rate": 0.001, |
| "loss": 1.6593, |
| "step": 194500 |
| }, |
| { |
| "epoch": 62.89592760180995, |
| "grad_norm": 1.4750157594680786, |
| "learning_rate": 0.001, |
| "loss": 1.6685, |
| "step": 194600 |
| }, |
| { |
| "epoch": 62.928248222365866, |
| "grad_norm": 1.1970282793045044, |
| "learning_rate": 0.001, |
| "loss": 1.6811, |
| "step": 194700 |
| }, |
| { |
| "epoch": 62.96056884292178, |
| "grad_norm": 1.3890151977539062, |
| "learning_rate": 0.001, |
| "loss": 1.678, |
| "step": 194800 |
| }, |
| { |
| "epoch": 62.992889463477695, |
| "grad_norm": 1.246107578277588, |
| "learning_rate": 0.001, |
| "loss": 1.6759, |
| "step": 194900 |
| }, |
| { |
| "epoch": 63.02521008403362, |
| "grad_norm": 1.2954941987991333, |
| "learning_rate": 0.001, |
| "loss": 1.6029, |
| "step": 195000 |
| }, |
| { |
| "epoch": 63.05753070458953, |
| "grad_norm": 1.8841441869735718, |
| "learning_rate": 0.001, |
| "loss": 1.5563, |
| "step": 195100 |
| }, |
| { |
| "epoch": 63.089851325145446, |
| "grad_norm": 1.5582014322280884, |
| "learning_rate": 0.001, |
| "loss": 1.5777, |
| "step": 195200 |
| }, |
| { |
| "epoch": 63.12217194570136, |
| "grad_norm": 1.4565472602844238, |
| "learning_rate": 0.001, |
| "loss": 1.5718, |
| "step": 195300 |
| }, |
| { |
| "epoch": 63.154492566257275, |
| "grad_norm": 1.4416550397872925, |
| "learning_rate": 0.001, |
| "loss": 1.5738, |
| "step": 195400 |
| }, |
| { |
| "epoch": 63.18681318681319, |
| "grad_norm": 1.4541130065917969, |
| "learning_rate": 0.001, |
| "loss": 1.5861, |
| "step": 195500 |
| }, |
| { |
| "epoch": 63.219133807369104, |
| "grad_norm": 1.5667710304260254, |
| "learning_rate": 0.001, |
| "loss": 1.5822, |
| "step": 195600 |
| }, |
| { |
| "epoch": 63.25145442792502, |
| "grad_norm": 1.415602445602417, |
| "learning_rate": 0.001, |
| "loss": 1.5934, |
| "step": 195700 |
| }, |
| { |
| "epoch": 63.28377504848093, |
| "grad_norm": 1.4232977628707886, |
| "learning_rate": 0.001, |
| "loss": 1.6013, |
| "step": 195800 |
| }, |
| { |
| "epoch": 63.31609566903685, |
| "grad_norm": 1.4085842370986938, |
| "learning_rate": 0.001, |
| "loss": 1.5978, |
| "step": 195900 |
| }, |
| { |
| "epoch": 63.34841628959276, |
| "grad_norm": 1.247901439666748, |
| "learning_rate": 0.001, |
| "loss": 1.6006, |
| "step": 196000 |
| }, |
| { |
| "epoch": 63.38073691014868, |
| "grad_norm": 1.6161179542541504, |
| "learning_rate": 0.001, |
| "loss": 1.5963, |
| "step": 196100 |
| }, |
| { |
| "epoch": 63.41305753070459, |
| "grad_norm": 1.4580583572387695, |
| "learning_rate": 0.001, |
| "loss": 1.6122, |
| "step": 196200 |
| }, |
| { |
| "epoch": 63.445378151260506, |
| "grad_norm": 1.6338632106781006, |
| "learning_rate": 0.001, |
| "loss": 1.6249, |
| "step": 196300 |
| }, |
| { |
| "epoch": 63.47769877181642, |
| "grad_norm": 1.6578707695007324, |
| "learning_rate": 0.001, |
| "loss": 1.6097, |
| "step": 196400 |
| }, |
| { |
| "epoch": 63.510019392372335, |
| "grad_norm": 1.5980095863342285, |
| "learning_rate": 0.001, |
| "loss": 1.617, |
| "step": 196500 |
| }, |
| { |
| "epoch": 63.54234001292825, |
| "grad_norm": 1.0995980501174927, |
| "learning_rate": 0.001, |
| "loss": 1.6297, |
| "step": 196600 |
| }, |
| { |
| "epoch": 63.574660633484164, |
| "grad_norm": 1.4633560180664062, |
| "learning_rate": 0.001, |
| "loss": 1.6169, |
| "step": 196700 |
| }, |
| { |
| "epoch": 63.60698125404008, |
| "grad_norm": 1.428837537765503, |
| "learning_rate": 0.001, |
| "loss": 1.6364, |
| "step": 196800 |
| }, |
| { |
| "epoch": 63.63930187459599, |
| "grad_norm": 1.0487279891967773, |
| "learning_rate": 0.001, |
| "loss": 1.6567, |
| "step": 196900 |
| }, |
| { |
| "epoch": 63.67162249515191, |
| "grad_norm": 1.2603679895401, |
| "learning_rate": 0.001, |
| "loss": 1.646, |
| "step": 197000 |
| }, |
| { |
| "epoch": 63.70394311570782, |
| "grad_norm": 1.264657974243164, |
| "learning_rate": 0.001, |
| "loss": 1.6411, |
| "step": 197100 |
| }, |
| { |
| "epoch": 63.73626373626374, |
| "grad_norm": 1.2870080471038818, |
| "learning_rate": 0.001, |
| "loss": 1.6392, |
| "step": 197200 |
| }, |
| { |
| "epoch": 63.76858435681965, |
| "grad_norm": 1.1723002195358276, |
| "learning_rate": 0.001, |
| "loss": 1.6656, |
| "step": 197300 |
| }, |
| { |
| "epoch": 63.800904977375566, |
| "grad_norm": 1.2961125373840332, |
| "learning_rate": 0.001, |
| "loss": 1.6515, |
| "step": 197400 |
| }, |
| { |
| "epoch": 63.83322559793148, |
| "grad_norm": 1.5006647109985352, |
| "learning_rate": 0.001, |
| "loss": 1.6418, |
| "step": 197500 |
| }, |
| { |
| "epoch": 63.865546218487395, |
| "grad_norm": 1.268092393875122, |
| "learning_rate": 0.001, |
| "loss": 1.6733, |
| "step": 197600 |
| }, |
| { |
| "epoch": 63.89786683904331, |
| "grad_norm": 1.032017469406128, |
| "learning_rate": 0.001, |
| "loss": 1.6558, |
| "step": 197700 |
| }, |
| { |
| "epoch": 63.930187459599225, |
| "grad_norm": 1.181327223777771, |
| "learning_rate": 0.001, |
| "loss": 1.6631, |
| "step": 197800 |
| }, |
| { |
| "epoch": 63.96250808015514, |
| "grad_norm": 1.2115685939788818, |
| "learning_rate": 0.001, |
| "loss": 1.6711, |
| "step": 197900 |
| }, |
| { |
| "epoch": 63.994828700711054, |
| "grad_norm": 1.368562936782837, |
| "learning_rate": 0.001, |
| "loss": 1.6624, |
| "step": 198000 |
| }, |
| { |
| "epoch": 64.02714932126698, |
| "grad_norm": 1.4267082214355469, |
| "learning_rate": 0.001, |
| "loss": 1.582, |
| "step": 198100 |
| }, |
| { |
| "epoch": 64.05946994182288, |
| "grad_norm": 1.3782719373703003, |
| "learning_rate": 0.001, |
| "loss": 1.5388, |
| "step": 198200 |
| }, |
| { |
| "epoch": 64.0917905623788, |
| "grad_norm": 1.500325083732605, |
| "learning_rate": 0.001, |
| "loss": 1.555, |
| "step": 198300 |
| }, |
| { |
| "epoch": 64.12411118293471, |
| "grad_norm": 1.6411162614822388, |
| "learning_rate": 0.001, |
| "loss": 1.5433, |
| "step": 198400 |
| }, |
| { |
| "epoch": 64.15643180349063, |
| "grad_norm": 1.4161418676376343, |
| "learning_rate": 0.001, |
| "loss": 1.5618, |
| "step": 198500 |
| }, |
| { |
| "epoch": 64.18875242404654, |
| "grad_norm": 1.2888981103897095, |
| "learning_rate": 0.001, |
| "loss": 1.5708, |
| "step": 198600 |
| }, |
| { |
| "epoch": 64.22107304460246, |
| "grad_norm": 1.5764001607894897, |
| "learning_rate": 0.001, |
| "loss": 1.578, |
| "step": 198700 |
| }, |
| { |
| "epoch": 64.25339366515837, |
| "grad_norm": 1.4483191967010498, |
| "learning_rate": 0.001, |
| "loss": 1.5664, |
| "step": 198800 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 1.844832181930542, |
| "learning_rate": 0.001, |
| "loss": 1.6023, |
| "step": 198900 |
| }, |
| { |
| "epoch": 64.3180349062702, |
| "grad_norm": 1.3805255889892578, |
| "learning_rate": 0.001, |
| "loss": 1.5934, |
| "step": 199000 |
| }, |
| { |
| "epoch": 64.35035552682612, |
| "grad_norm": 1.3433549404144287, |
| "learning_rate": 0.001, |
| "loss": 1.5915, |
| "step": 199100 |
| }, |
| { |
| "epoch": 64.38267614738203, |
| "grad_norm": 1.7375874519348145, |
| "learning_rate": 0.001, |
| "loss": 1.5885, |
| "step": 199200 |
| }, |
| { |
| "epoch": 64.41499676793795, |
| "grad_norm": 1.5128262042999268, |
| "learning_rate": 0.001, |
| "loss": 1.5863, |
| "step": 199300 |
| }, |
| { |
| "epoch": 64.44731738849386, |
| "grad_norm": 1.608688235282898, |
| "learning_rate": 0.001, |
| "loss": 1.6058, |
| "step": 199400 |
| }, |
| { |
| "epoch": 64.47963800904978, |
| "grad_norm": 1.6296998262405396, |
| "learning_rate": 0.001, |
| "loss": 1.6096, |
| "step": 199500 |
| }, |
| { |
| "epoch": 64.51195862960569, |
| "grad_norm": 1.5886939764022827, |
| "learning_rate": 0.001, |
| "loss": 1.6216, |
| "step": 199600 |
| }, |
| { |
| "epoch": 64.54427925016161, |
| "grad_norm": 1.421675443649292, |
| "learning_rate": 0.001, |
| "loss": 1.6242, |
| "step": 199700 |
| }, |
| { |
| "epoch": 64.57659987071752, |
| "grad_norm": 1.5409778356552124, |
| "learning_rate": 0.001, |
| "loss": 1.624, |
| "step": 199800 |
| }, |
| { |
| "epoch": 64.60892049127344, |
| "grad_norm": 1.9098420143127441, |
| "learning_rate": 0.001, |
| "loss": 1.645, |
| "step": 199900 |
| }, |
| { |
| "epoch": 64.64124111182934, |
| "grad_norm": 1.2659186124801636, |
| "learning_rate": 0.001, |
| "loss": 1.632, |
| "step": 200000 |
| }, |
| { |
| "epoch": 64.67356173238527, |
| "grad_norm": 1.7314198017120361, |
| "learning_rate": 0.001, |
| "loss": 1.6317, |
| "step": 200100 |
| }, |
| { |
| "epoch": 64.70588235294117, |
| "grad_norm": 1.4244946241378784, |
| "learning_rate": 0.001, |
| "loss": 1.647, |
| "step": 200200 |
| }, |
| { |
| "epoch": 64.7382029734971, |
| "grad_norm": 1.355060338973999, |
| "learning_rate": 0.001, |
| "loss": 1.6356, |
| "step": 200300 |
| }, |
| { |
| "epoch": 64.770523594053, |
| "grad_norm": 1.4022140502929688, |
| "learning_rate": 0.001, |
| "loss": 1.6212, |
| "step": 200400 |
| }, |
| { |
| "epoch": 64.80284421460892, |
| "grad_norm": 1.4149821996688843, |
| "learning_rate": 0.001, |
| "loss": 1.6613, |
| "step": 200500 |
| }, |
| { |
| "epoch": 64.83516483516483, |
| "grad_norm": 1.4350502490997314, |
| "learning_rate": 0.001, |
| "loss": 1.6431, |
| "step": 200600 |
| }, |
| { |
| "epoch": 64.86748545572075, |
| "grad_norm": 1.5340101718902588, |
| "learning_rate": 0.001, |
| "loss": 1.6499, |
| "step": 200700 |
| }, |
| { |
| "epoch": 64.89980607627666, |
| "grad_norm": 1.2688623666763306, |
| "learning_rate": 0.001, |
| "loss": 1.6539, |
| "step": 200800 |
| }, |
| { |
| "epoch": 64.93212669683258, |
| "grad_norm": 1.8362963199615479, |
| "learning_rate": 0.001, |
| "loss": 1.6486, |
| "step": 200900 |
| }, |
| { |
| "epoch": 64.96444731738849, |
| "grad_norm": 1.0354406833648682, |
| "learning_rate": 0.001, |
| "loss": 1.6534, |
| "step": 201000 |
| }, |
| { |
| "epoch": 64.99676793794441, |
| "grad_norm": 1.8147214651107788, |
| "learning_rate": 0.001, |
| "loss": 1.6395, |
| "step": 201100 |
| }, |
| { |
| "epoch": 65.02908855850032, |
| "grad_norm": 1.3064634799957275, |
| "learning_rate": 0.001, |
| "loss": 1.5475, |
| "step": 201200 |
| }, |
| { |
| "epoch": 65.06140917905624, |
| "grad_norm": 1.3053911924362183, |
| "learning_rate": 0.001, |
| "loss": 1.525, |
| "step": 201300 |
| }, |
| { |
| "epoch": 65.09372979961215, |
| "grad_norm": 1.8994085788726807, |
| "learning_rate": 0.001, |
| "loss": 1.5361, |
| "step": 201400 |
| }, |
| { |
| "epoch": 65.12605042016807, |
| "grad_norm": 1.4329079389572144, |
| "learning_rate": 0.001, |
| "loss": 1.5706, |
| "step": 201500 |
| }, |
| { |
| "epoch": 65.15837104072398, |
| "grad_norm": 1.4145162105560303, |
| "learning_rate": 0.001, |
| "loss": 1.5803, |
| "step": 201600 |
| }, |
| { |
| "epoch": 65.1906916612799, |
| "grad_norm": 1.7784736156463623, |
| "learning_rate": 0.001, |
| "loss": 1.5627, |
| "step": 201700 |
| }, |
| { |
| "epoch": 65.2230122818358, |
| "grad_norm": 1.8180545568466187, |
| "learning_rate": 0.001, |
| "loss": 1.5645, |
| "step": 201800 |
| }, |
| { |
| "epoch": 65.25533290239173, |
| "grad_norm": 1.5060557126998901, |
| "learning_rate": 0.001, |
| "loss": 1.5773, |
| "step": 201900 |
| }, |
| { |
| "epoch": 65.28765352294764, |
| "grad_norm": 1.6624622344970703, |
| "learning_rate": 0.001, |
| "loss": 1.5682, |
| "step": 202000 |
| }, |
| { |
| "epoch": 65.31997414350356, |
| "grad_norm": 1.513641119003296, |
| "learning_rate": 0.001, |
| "loss": 1.5841, |
| "step": 202100 |
| }, |
| { |
| "epoch": 65.35229476405947, |
| "grad_norm": 1.789554476737976, |
| "learning_rate": 0.001, |
| "loss": 1.6033, |
| "step": 202200 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 2.5331172943115234, |
| "learning_rate": 0.001, |
| "loss": 1.5851, |
| "step": 202300 |
| }, |
| { |
| "epoch": 65.4169360051713, |
| "grad_norm": 1.5825227499008179, |
| "learning_rate": 0.001, |
| "loss": 1.5732, |
| "step": 202400 |
| }, |
| { |
| "epoch": 65.44925662572722, |
| "grad_norm": 1.7947232723236084, |
| "learning_rate": 0.001, |
| "loss": 1.5976, |
| "step": 202500 |
| }, |
| { |
| "epoch": 65.48157724628312, |
| "grad_norm": 1.7513070106506348, |
| "learning_rate": 0.001, |
| "loss": 1.5981, |
| "step": 202600 |
| }, |
| { |
| "epoch": 65.51389786683905, |
| "grad_norm": 1.9662882089614868, |
| "learning_rate": 0.001, |
| "loss": 1.622, |
| "step": 202700 |
| }, |
| { |
| "epoch": 65.54621848739495, |
| "grad_norm": 1.6715610027313232, |
| "learning_rate": 0.001, |
| "loss": 1.6087, |
| "step": 202800 |
| }, |
| { |
| "epoch": 65.57853910795087, |
| "grad_norm": 1.2199307680130005, |
| "learning_rate": 0.001, |
| "loss": 1.6237, |
| "step": 202900 |
| }, |
| { |
| "epoch": 65.61085972850678, |
| "grad_norm": 1.4250069856643677, |
| "learning_rate": 0.001, |
| "loss": 1.6225, |
| "step": 203000 |
| }, |
| { |
| "epoch": 65.6431803490627, |
| "grad_norm": 1.7255460023880005, |
| "learning_rate": 0.001, |
| "loss": 1.6139, |
| "step": 203100 |
| }, |
| { |
| "epoch": 65.67550096961861, |
| "grad_norm": 1.2933099269866943, |
| "learning_rate": 0.001, |
| "loss": 1.6239, |
| "step": 203200 |
| }, |
| { |
| "epoch": 65.70782159017453, |
| "grad_norm": 1.560722827911377, |
| "learning_rate": 0.001, |
| "loss": 1.6286, |
| "step": 203300 |
| }, |
| { |
| "epoch": 65.74014221073044, |
| "grad_norm": 1.5156254768371582, |
| "learning_rate": 0.001, |
| "loss": 1.6239, |
| "step": 203400 |
| }, |
| { |
| "epoch": 65.77246283128636, |
| "grad_norm": 1.7756282091140747, |
| "learning_rate": 0.001, |
| "loss": 1.629, |
| "step": 203500 |
| }, |
| { |
| "epoch": 65.80478345184227, |
| "grad_norm": 1.8276307582855225, |
| "learning_rate": 0.001, |
| "loss": 1.6207, |
| "step": 203600 |
| }, |
| { |
| "epoch": 65.83710407239819, |
| "grad_norm": 1.3041716814041138, |
| "learning_rate": 0.001, |
| "loss": 1.6341, |
| "step": 203700 |
| }, |
| { |
| "epoch": 65.8694246929541, |
| "grad_norm": 1.604331374168396, |
| "learning_rate": 0.001, |
| "loss": 1.629, |
| "step": 203800 |
| }, |
| { |
| "epoch": 65.90174531351002, |
| "grad_norm": 1.3833218812942505, |
| "learning_rate": 0.001, |
| "loss": 1.6493, |
| "step": 203900 |
| }, |
| { |
| "epoch": 65.93406593406593, |
| "grad_norm": 1.6882765293121338, |
| "learning_rate": 0.001, |
| "loss": 1.6559, |
| "step": 204000 |
| }, |
| { |
| "epoch": 65.96638655462185, |
| "grad_norm": 1.5404090881347656, |
| "learning_rate": 0.001, |
| "loss": 1.639, |
| "step": 204100 |
| }, |
| { |
| "epoch": 65.99870717517777, |
| "grad_norm": 1.791722059249878, |
| "learning_rate": 0.001, |
| "loss": 1.6264, |
| "step": 204200 |
| }, |
| { |
| "epoch": 66.03102779573368, |
| "grad_norm": 1.9372780323028564, |
| "learning_rate": 0.001, |
| "loss": 1.5299, |
| "step": 204300 |
| }, |
| { |
| "epoch": 66.0633484162896, |
| "grad_norm": 1.8623173236846924, |
| "learning_rate": 0.001, |
| "loss": 1.5294, |
| "step": 204400 |
| }, |
| { |
| "epoch": 66.0956690368455, |
| "grad_norm": 2.164478063583374, |
| "learning_rate": 0.001, |
| "loss": 1.554, |
| "step": 204500 |
| }, |
| { |
| "epoch": 66.12798965740143, |
| "grad_norm": 1.894229531288147, |
| "learning_rate": 0.001, |
| "loss": 1.55, |
| "step": 204600 |
| }, |
| { |
| "epoch": 66.16031027795734, |
| "grad_norm": 1.493912935256958, |
| "learning_rate": 0.001, |
| "loss": 1.56, |
| "step": 204700 |
| }, |
| { |
| "epoch": 66.19263089851326, |
| "grad_norm": 1.607630968093872, |
| "learning_rate": 0.001, |
| "loss": 1.5709, |
| "step": 204800 |
| }, |
| { |
| "epoch": 66.22495151906917, |
| "grad_norm": 1.5320664644241333, |
| "learning_rate": 0.001, |
| "loss": 1.5523, |
| "step": 204900 |
| }, |
| { |
| "epoch": 66.25727213962509, |
| "grad_norm": 1.4517817497253418, |
| "learning_rate": 0.001, |
| "loss": 1.5756, |
| "step": 205000 |
| }, |
| { |
| "epoch": 66.289592760181, |
| "grad_norm": 1.4102898836135864, |
| "learning_rate": 0.001, |
| "loss": 1.5582, |
| "step": 205100 |
| }, |
| { |
| "epoch": 66.32191338073692, |
| "grad_norm": 1.6045602560043335, |
| "learning_rate": 0.001, |
| "loss": 1.5812, |
| "step": 205200 |
| }, |
| { |
| "epoch": 66.35423400129282, |
| "grad_norm": 2.0872433185577393, |
| "learning_rate": 0.001, |
| "loss": 1.5771, |
| "step": 205300 |
| }, |
| { |
| "epoch": 66.38655462184875, |
| "grad_norm": 1.7898145914077759, |
| "learning_rate": 0.001, |
| "loss": 1.5632, |
| "step": 205400 |
| }, |
| { |
| "epoch": 66.41887524240465, |
| "grad_norm": 1.5753815174102783, |
| "learning_rate": 0.001, |
| "loss": 1.5897, |
| "step": 205500 |
| }, |
| { |
| "epoch": 66.45119586296057, |
| "grad_norm": 1.8376567363739014, |
| "learning_rate": 0.001, |
| "loss": 1.5991, |
| "step": 205600 |
| }, |
| { |
| "epoch": 66.48351648351648, |
| "grad_norm": 1.246621012687683, |
| "learning_rate": 0.001, |
| "loss": 1.5738, |
| "step": 205700 |
| }, |
| { |
| "epoch": 66.5158371040724, |
| "grad_norm": 1.9372217655181885, |
| "learning_rate": 0.001, |
| "loss": 1.5895, |
| "step": 205800 |
| }, |
| { |
| "epoch": 66.54815772462831, |
| "grad_norm": 1.6086442470550537, |
| "learning_rate": 0.001, |
| "loss": 1.5999, |
| "step": 205900 |
| }, |
| { |
| "epoch": 66.58047834518423, |
| "grad_norm": 1.5425879955291748, |
| "learning_rate": 0.001, |
| "loss": 1.6072, |
| "step": 206000 |
| }, |
| { |
| "epoch": 66.61279896574014, |
| "grad_norm": 1.7193233966827393, |
| "learning_rate": 0.001, |
| "loss": 1.6019, |
| "step": 206100 |
| }, |
| { |
| "epoch": 66.64511958629606, |
| "grad_norm": 2.3257246017456055, |
| "learning_rate": 0.001, |
| "loss": 1.6003, |
| "step": 206200 |
| }, |
| { |
| "epoch": 66.67744020685197, |
| "grad_norm": 1.939846396446228, |
| "learning_rate": 0.001, |
| "loss": 1.6202, |
| "step": 206300 |
| }, |
| { |
| "epoch": 66.70976082740789, |
| "grad_norm": 1.9590939283370972, |
| "learning_rate": 0.001, |
| "loss": 1.6085, |
| "step": 206400 |
| }, |
| { |
| "epoch": 66.7420814479638, |
| "grad_norm": 1.5885615348815918, |
| "learning_rate": 0.001, |
| "loss": 1.6189, |
| "step": 206500 |
| }, |
| { |
| "epoch": 66.77440206851972, |
| "grad_norm": 1.4376970529556274, |
| "learning_rate": 0.001, |
| "loss": 1.6185, |
| "step": 206600 |
| }, |
| { |
| "epoch": 66.80672268907563, |
| "grad_norm": 1.5070077180862427, |
| "learning_rate": 0.001, |
| "loss": 1.6083, |
| "step": 206700 |
| }, |
| { |
| "epoch": 66.83904330963155, |
| "grad_norm": 1.8947584629058838, |
| "learning_rate": 0.001, |
| "loss": 1.6184, |
| "step": 206800 |
| }, |
| { |
| "epoch": 66.87136393018746, |
| "grad_norm": 1.4360790252685547, |
| "learning_rate": 0.001, |
| "loss": 1.6363, |
| "step": 206900 |
| }, |
| { |
| "epoch": 66.90368455074338, |
| "grad_norm": 1.6498112678527832, |
| "learning_rate": 0.001, |
| "loss": 1.6361, |
| "step": 207000 |
| }, |
| { |
| "epoch": 66.93600517129929, |
| "grad_norm": 1.7067687511444092, |
| "learning_rate": 0.001, |
| "loss": 1.6187, |
| "step": 207100 |
| }, |
| { |
| "epoch": 66.96832579185521, |
| "grad_norm": 1.905935287475586, |
| "learning_rate": 0.001, |
| "loss": 1.6511, |
| "step": 207200 |
| }, |
| { |
| "epoch": 67.00064641241111, |
| "grad_norm": 2.8338687419891357, |
| "learning_rate": 0.001, |
| "loss": 1.6223, |
| "step": 207300 |
| }, |
| { |
| "epoch": 67.03296703296704, |
| "grad_norm": 1.7403825521469116, |
| "learning_rate": 0.001, |
| "loss": 1.5059, |
| "step": 207400 |
| }, |
| { |
| "epoch": 67.06528765352294, |
| "grad_norm": 1.6658779382705688, |
| "learning_rate": 0.001, |
| "loss": 1.5186, |
| "step": 207500 |
| }, |
| { |
| "epoch": 67.09760827407887, |
| "grad_norm": 1.9711863994598389, |
| "learning_rate": 0.001, |
| "loss": 1.5343, |
| "step": 207600 |
| }, |
| { |
| "epoch": 67.12992889463477, |
| "grad_norm": 1.5921905040740967, |
| "learning_rate": 0.001, |
| "loss": 1.5336, |
| "step": 207700 |
| }, |
| { |
| "epoch": 67.1622495151907, |
| "grad_norm": 1.9078835248947144, |
| "learning_rate": 0.001, |
| "loss": 1.5284, |
| "step": 207800 |
| }, |
| { |
| "epoch": 67.1945701357466, |
| "grad_norm": 2.686221122741699, |
| "learning_rate": 0.001, |
| "loss": 1.5466, |
| "step": 207900 |
| }, |
| { |
| "epoch": 67.22689075630252, |
| "grad_norm": 1.8036460876464844, |
| "learning_rate": 0.001, |
| "loss": 1.5566, |
| "step": 208000 |
| }, |
| { |
| "epoch": 67.25921137685843, |
| "grad_norm": 1.9834649562835693, |
| "learning_rate": 0.001, |
| "loss": 1.5432, |
| "step": 208100 |
| }, |
| { |
| "epoch": 67.29153199741435, |
| "grad_norm": 2.482227087020874, |
| "learning_rate": 0.001, |
| "loss": 1.5466, |
| "step": 208200 |
| }, |
| { |
| "epoch": 67.32385261797026, |
| "grad_norm": 1.6784040927886963, |
| "learning_rate": 0.001, |
| "loss": 1.5603, |
| "step": 208300 |
| }, |
| { |
| "epoch": 67.35617323852618, |
| "grad_norm": 1.9004853963851929, |
| "learning_rate": 0.001, |
| "loss": 1.5535, |
| "step": 208400 |
| }, |
| { |
| "epoch": 67.38849385908209, |
| "grad_norm": 2.2348580360412598, |
| "learning_rate": 0.001, |
| "loss": 1.5645, |
| "step": 208500 |
| }, |
| { |
| "epoch": 67.42081447963801, |
| "grad_norm": 1.5861483812332153, |
| "learning_rate": 0.001, |
| "loss": 1.5818, |
| "step": 208600 |
| }, |
| { |
| "epoch": 67.45313510019392, |
| "grad_norm": 2.3207311630249023, |
| "learning_rate": 0.001, |
| "loss": 1.5708, |
| "step": 208700 |
| }, |
| { |
| "epoch": 67.48545572074984, |
| "grad_norm": 3.079047918319702, |
| "learning_rate": 0.001, |
| "loss": 1.5933, |
| "step": 208800 |
| }, |
| { |
| "epoch": 67.51777634130575, |
| "grad_norm": 1.9215753078460693, |
| "learning_rate": 0.001, |
| "loss": 1.5945, |
| "step": 208900 |
| }, |
| { |
| "epoch": 67.55009696186167, |
| "grad_norm": 1.2818607091903687, |
| "learning_rate": 0.001, |
| "loss": 1.5983, |
| "step": 209000 |
| }, |
| { |
| "epoch": 67.58241758241758, |
| "grad_norm": 1.4433931112289429, |
| "learning_rate": 0.001, |
| "loss": 1.608, |
| "step": 209100 |
| }, |
| { |
| "epoch": 67.6147382029735, |
| "grad_norm": 1.771817922592163, |
| "learning_rate": 0.001, |
| "loss": 1.5869, |
| "step": 209200 |
| }, |
| { |
| "epoch": 67.6470588235294, |
| "grad_norm": 2.404127597808838, |
| "learning_rate": 0.001, |
| "loss": 1.5879, |
| "step": 209300 |
| }, |
| { |
| "epoch": 67.67937944408533, |
| "grad_norm": 1.46668541431427, |
| "learning_rate": 0.001, |
| "loss": 1.6071, |
| "step": 209400 |
| }, |
| { |
| "epoch": 67.71170006464124, |
| "grad_norm": 2.0058186054229736, |
| "learning_rate": 0.001, |
| "loss": 1.5945, |
| "step": 209500 |
| }, |
| { |
| "epoch": 67.74402068519716, |
| "grad_norm": 1.8062503337860107, |
| "learning_rate": 0.001, |
| "loss": 1.6054, |
| "step": 209600 |
| }, |
| { |
| "epoch": 67.77634130575306, |
| "grad_norm": 2.3619446754455566, |
| "learning_rate": 0.001, |
| "loss": 1.6172, |
| "step": 209700 |
| }, |
| { |
| "epoch": 67.80866192630899, |
| "grad_norm": 1.6072543859481812, |
| "learning_rate": 0.001, |
| "loss": 1.5929, |
| "step": 209800 |
| }, |
| { |
| "epoch": 67.8409825468649, |
| "grad_norm": 1.9901092052459717, |
| "learning_rate": 0.001, |
| "loss": 1.6103, |
| "step": 209900 |
| }, |
| { |
| "epoch": 67.87330316742081, |
| "grad_norm": 1.511841893196106, |
| "learning_rate": 0.001, |
| "loss": 1.6441, |
| "step": 210000 |
| }, |
| { |
| "epoch": 67.90562378797672, |
| "grad_norm": 1.9327521324157715, |
| "learning_rate": 0.001, |
| "loss": 1.6185, |
| "step": 210100 |
| }, |
| { |
| "epoch": 67.93794440853264, |
| "grad_norm": 1.9379057884216309, |
| "learning_rate": 0.001, |
| "loss": 1.6331, |
| "step": 210200 |
| }, |
| { |
| "epoch": 67.97026502908855, |
| "grad_norm": 2.142944097518921, |
| "learning_rate": 0.001, |
| "loss": 1.6382, |
| "step": 210300 |
| }, |
| { |
| "epoch": 68.00258564964447, |
| "grad_norm": 1.401830792427063, |
| "learning_rate": 0.001, |
| "loss": 1.6396, |
| "step": 210400 |
| }, |
| { |
| "epoch": 68.0349062702004, |
| "grad_norm": 1.776995301246643, |
| "learning_rate": 0.001, |
| "loss": 1.4978, |
| "step": 210500 |
| }, |
| { |
| "epoch": 68.0672268907563, |
| "grad_norm": 1.409732699394226, |
| "learning_rate": 0.001, |
| "loss": 1.5218, |
| "step": 210600 |
| }, |
| { |
| "epoch": 68.09954751131222, |
| "grad_norm": 2.3525753021240234, |
| "learning_rate": 0.001, |
| "loss": 1.5259, |
| "step": 210700 |
| }, |
| { |
| "epoch": 68.13186813186813, |
| "grad_norm": 1.5566622018814087, |
| "learning_rate": 0.001, |
| "loss": 1.5528, |
| "step": 210800 |
| }, |
| { |
| "epoch": 68.16418875242405, |
| "grad_norm": 1.6221765279769897, |
| "learning_rate": 0.001, |
| "loss": 1.5335, |
| "step": 210900 |
| }, |
| { |
| "epoch": 68.19650937297996, |
| "grad_norm": 1.8986177444458008, |
| "learning_rate": 0.001, |
| "loss": 1.546, |
| "step": 211000 |
| }, |
| { |
| "epoch": 68.22882999353588, |
| "grad_norm": 1.4100441932678223, |
| "learning_rate": 0.001, |
| "loss": 1.5445, |
| "step": 211100 |
| }, |
| { |
| "epoch": 68.26115061409179, |
| "grad_norm": 1.8427026271820068, |
| "learning_rate": 0.001, |
| "loss": 1.5541, |
| "step": 211200 |
| }, |
| { |
| "epoch": 68.29347123464771, |
| "grad_norm": 2.2507922649383545, |
| "learning_rate": 0.001, |
| "loss": 1.5481, |
| "step": 211300 |
| }, |
| { |
| "epoch": 68.32579185520362, |
| "grad_norm": 1.769182801246643, |
| "learning_rate": 0.001, |
| "loss": 1.5397, |
| "step": 211400 |
| }, |
| { |
| "epoch": 68.35811247575954, |
| "grad_norm": 1.8999907970428467, |
| "learning_rate": 0.001, |
| "loss": 1.5467, |
| "step": 211500 |
| }, |
| { |
| "epoch": 68.39043309631545, |
| "grad_norm": 1.141658067703247, |
| "learning_rate": 0.001, |
| "loss": 1.5666, |
| "step": 211600 |
| }, |
| { |
| "epoch": 68.42275371687137, |
| "grad_norm": 1.5240708589553833, |
| "learning_rate": 0.001, |
| "loss": 1.5606, |
| "step": 211700 |
| }, |
| { |
| "epoch": 68.45507433742728, |
| "grad_norm": 1.4288796186447144, |
| "learning_rate": 0.001, |
| "loss": 1.5732, |
| "step": 211800 |
| }, |
| { |
| "epoch": 68.4873949579832, |
| "grad_norm": 1.3086295127868652, |
| "learning_rate": 0.001, |
| "loss": 1.5758, |
| "step": 211900 |
| }, |
| { |
| "epoch": 68.5197155785391, |
| "grad_norm": 1.2653098106384277, |
| "learning_rate": 0.001, |
| "loss": 1.5713, |
| "step": 212000 |
| }, |
| { |
| "epoch": 68.55203619909503, |
| "grad_norm": 1.4599053859710693, |
| "learning_rate": 0.001, |
| "loss": 1.5817, |
| "step": 212100 |
| }, |
| { |
| "epoch": 68.58435681965094, |
| "grad_norm": 1.642386794090271, |
| "learning_rate": 0.001, |
| "loss": 1.5719, |
| "step": 212200 |
| }, |
| { |
| "epoch": 68.61667744020686, |
| "grad_norm": 1.565251350402832, |
| "learning_rate": 0.001, |
| "loss": 1.5889, |
| "step": 212300 |
| }, |
| { |
| "epoch": 68.64899806076276, |
| "grad_norm": 1.2315536737442017, |
| "learning_rate": 0.001, |
| "loss": 1.5831, |
| "step": 212400 |
| }, |
| { |
| "epoch": 68.68131868131869, |
| "grad_norm": 2.1367335319519043, |
| "learning_rate": 0.001, |
| "loss": 1.5915, |
| "step": 212500 |
| }, |
| { |
| "epoch": 68.7136393018746, |
| "grad_norm": 1.3036410808563232, |
| "learning_rate": 0.001, |
| "loss": 1.5882, |
| "step": 212600 |
| }, |
| { |
| "epoch": 68.74595992243052, |
| "grad_norm": 1.7766282558441162, |
| "learning_rate": 0.001, |
| "loss": 1.5965, |
| "step": 212700 |
| }, |
| { |
| "epoch": 68.77828054298642, |
| "grad_norm": 1.840104579925537, |
| "learning_rate": 0.001, |
| "loss": 1.5873, |
| "step": 212800 |
| }, |
| { |
| "epoch": 68.81060116354234, |
| "grad_norm": 1.8919962644577026, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 212900 |
| }, |
| { |
| "epoch": 68.84292178409825, |
| "grad_norm": 1.4418390989303589, |
| "learning_rate": 0.001, |
| "loss": 1.626, |
| "step": 213000 |
| }, |
| { |
| "epoch": 68.87524240465417, |
| "grad_norm": 1.54845130443573, |
| "learning_rate": 0.001, |
| "loss": 1.6256, |
| "step": 213100 |
| }, |
| { |
| "epoch": 68.90756302521008, |
| "grad_norm": 1.4561160802841187, |
| "learning_rate": 0.001, |
| "loss": 1.6176, |
| "step": 213200 |
| }, |
| { |
| "epoch": 68.939883645766, |
| "grad_norm": 1.3307286500930786, |
| "learning_rate": 0.001, |
| "loss": 1.6251, |
| "step": 213300 |
| }, |
| { |
| "epoch": 68.97220426632191, |
| "grad_norm": 1.0837733745574951, |
| "learning_rate": 0.001, |
| "loss": 1.6189, |
| "step": 213400 |
| }, |
| { |
| "epoch": 69.00452488687783, |
| "grad_norm": 1.1446514129638672, |
| "learning_rate": 0.001, |
| "loss": 1.61, |
| "step": 213500 |
| }, |
| { |
| "epoch": 69.03684550743374, |
| "grad_norm": 1.1614508628845215, |
| "learning_rate": 0.001, |
| "loss": 1.5068, |
| "step": 213600 |
| }, |
| { |
| "epoch": 69.06916612798966, |
| "grad_norm": 1.7956161499023438, |
| "learning_rate": 0.001, |
| "loss": 1.5062, |
| "step": 213700 |
| }, |
| { |
| "epoch": 69.10148674854557, |
| "grad_norm": 1.4920622110366821, |
| "learning_rate": 0.001, |
| "loss": 1.5255, |
| "step": 213800 |
| }, |
| { |
| "epoch": 69.13380736910149, |
| "grad_norm": 1.180904507637024, |
| "learning_rate": 0.001, |
| "loss": 1.5239, |
| "step": 213900 |
| }, |
| { |
| "epoch": 69.1661279896574, |
| "grad_norm": 1.4228894710540771, |
| "learning_rate": 0.001, |
| "loss": 1.5292, |
| "step": 214000 |
| }, |
| { |
| "epoch": 69.19844861021332, |
| "grad_norm": 1.5594555139541626, |
| "learning_rate": 0.001, |
| "loss": 1.5186, |
| "step": 214100 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 1.0493789911270142, |
| "learning_rate": 0.001, |
| "loss": 1.5329, |
| "step": 214200 |
| }, |
| { |
| "epoch": 69.26308985132515, |
| "grad_norm": 1.764050841331482, |
| "learning_rate": 0.001, |
| "loss": 1.542, |
| "step": 214300 |
| }, |
| { |
| "epoch": 69.29541047188106, |
| "grad_norm": 1.9596831798553467, |
| "learning_rate": 0.001, |
| "loss": 1.5347, |
| "step": 214400 |
| }, |
| { |
| "epoch": 69.32773109243698, |
| "grad_norm": 1.615365982055664, |
| "learning_rate": 0.001, |
| "loss": 1.5616, |
| "step": 214500 |
| }, |
| { |
| "epoch": 69.36005171299288, |
| "grad_norm": 1.2732131481170654, |
| "learning_rate": 0.001, |
| "loss": 1.5411, |
| "step": 214600 |
| }, |
| { |
| "epoch": 69.3923723335488, |
| "grad_norm": 1.531451940536499, |
| "learning_rate": 0.001, |
| "loss": 1.5435, |
| "step": 214700 |
| }, |
| { |
| "epoch": 69.42469295410471, |
| "grad_norm": 1.7909483909606934, |
| "learning_rate": 0.001, |
| "loss": 1.543, |
| "step": 214800 |
| }, |
| { |
| "epoch": 69.45701357466064, |
| "grad_norm": 0.9809910655021667, |
| "learning_rate": 0.001, |
| "loss": 1.5491, |
| "step": 214900 |
| }, |
| { |
| "epoch": 69.48933419521654, |
| "grad_norm": 1.3432776927947998, |
| "learning_rate": 0.001, |
| "loss": 1.5731, |
| "step": 215000 |
| }, |
| { |
| "epoch": 69.52165481577246, |
| "grad_norm": 1.2507835626602173, |
| "learning_rate": 0.001, |
| "loss": 1.5713, |
| "step": 215100 |
| }, |
| { |
| "epoch": 69.55397543632837, |
| "grad_norm": 1.3285642862319946, |
| "learning_rate": 0.001, |
| "loss": 1.5724, |
| "step": 215200 |
| }, |
| { |
| "epoch": 69.5862960568843, |
| "grad_norm": 1.9070652723312378, |
| "learning_rate": 0.001, |
| "loss": 1.5737, |
| "step": 215300 |
| }, |
| { |
| "epoch": 69.6186166774402, |
| "grad_norm": 1.5597374439239502, |
| "learning_rate": 0.001, |
| "loss": 1.5839, |
| "step": 215400 |
| }, |
| { |
| "epoch": 69.65093729799612, |
| "grad_norm": 1.8160336017608643, |
| "learning_rate": 0.001, |
| "loss": 1.589, |
| "step": 215500 |
| }, |
| { |
| "epoch": 69.68325791855203, |
| "grad_norm": 1.1347708702087402, |
| "learning_rate": 0.001, |
| "loss": 1.5807, |
| "step": 215600 |
| }, |
| { |
| "epoch": 69.71557853910795, |
| "grad_norm": 1.2475730180740356, |
| "learning_rate": 0.001, |
| "loss": 1.5937, |
| "step": 215700 |
| }, |
| { |
| "epoch": 69.74789915966386, |
| "grad_norm": 1.8533042669296265, |
| "learning_rate": 0.001, |
| "loss": 1.5843, |
| "step": 215800 |
| }, |
| { |
| "epoch": 69.78021978021978, |
| "grad_norm": 1.7402023077011108, |
| "learning_rate": 0.001, |
| "loss": 1.5811, |
| "step": 215900 |
| }, |
| { |
| "epoch": 69.81254040077569, |
| "grad_norm": 1.4845796823501587, |
| "learning_rate": 0.001, |
| "loss": 1.5987, |
| "step": 216000 |
| }, |
| { |
| "epoch": 69.84486102133161, |
| "grad_norm": 1.5384836196899414, |
| "learning_rate": 0.001, |
| "loss": 1.6039, |
| "step": 216100 |
| }, |
| { |
| "epoch": 69.87718164188752, |
| "grad_norm": 1.451719045639038, |
| "learning_rate": 0.001, |
| "loss": 1.5907, |
| "step": 216200 |
| }, |
| { |
| "epoch": 69.90950226244344, |
| "grad_norm": 1.1956146955490112, |
| "learning_rate": 0.001, |
| "loss": 1.5954, |
| "step": 216300 |
| }, |
| { |
| "epoch": 69.94182288299935, |
| "grad_norm": 1.8268797397613525, |
| "learning_rate": 0.001, |
| "loss": 1.619, |
| "step": 216400 |
| }, |
| { |
| "epoch": 69.97414350355527, |
| "grad_norm": 1.3545068502426147, |
| "learning_rate": 0.001, |
| "loss": 1.6064, |
| "step": 216500 |
| }, |
| { |
| "epoch": 70.00646412411119, |
| "grad_norm": 1.1828221082687378, |
| "learning_rate": 0.001, |
| "loss": 1.6031, |
| "step": 216600 |
| }, |
| { |
| "epoch": 70.0387847446671, |
| "grad_norm": 1.4522184133529663, |
| "learning_rate": 0.001, |
| "loss": 1.4794, |
| "step": 216700 |
| }, |
| { |
| "epoch": 70.07110536522302, |
| "grad_norm": 1.7155815362930298, |
| "learning_rate": 0.001, |
| "loss": 1.4898, |
| "step": 216800 |
| }, |
| { |
| "epoch": 70.10342598577893, |
| "grad_norm": 1.4515830278396606, |
| "learning_rate": 0.001, |
| "loss": 1.5079, |
| "step": 216900 |
| }, |
| { |
| "epoch": 70.13574660633485, |
| "grad_norm": 1.3109537363052368, |
| "learning_rate": 0.001, |
| "loss": 1.5131, |
| "step": 217000 |
| }, |
| { |
| "epoch": 70.16806722689076, |
| "grad_norm": 1.8338853120803833, |
| "learning_rate": 0.001, |
| "loss": 1.5118, |
| "step": 217100 |
| }, |
| { |
| "epoch": 70.20038784744668, |
| "grad_norm": 1.5463823080062866, |
| "learning_rate": 0.001, |
| "loss": 1.512, |
| "step": 217200 |
| }, |
| { |
| "epoch": 70.23270846800258, |
| "grad_norm": 1.2792097330093384, |
| "learning_rate": 0.001, |
| "loss": 1.5257, |
| "step": 217300 |
| }, |
| { |
| "epoch": 70.2650290885585, |
| "grad_norm": 1.662739872932434, |
| "learning_rate": 0.001, |
| "loss": 1.5272, |
| "step": 217400 |
| }, |
| { |
| "epoch": 70.29734970911441, |
| "grad_norm": 1.3703052997589111, |
| "learning_rate": 0.001, |
| "loss": 1.5373, |
| "step": 217500 |
| }, |
| { |
| "epoch": 70.32967032967034, |
| "grad_norm": 1.3139020204544067, |
| "learning_rate": 0.001, |
| "loss": 1.5459, |
| "step": 217600 |
| }, |
| { |
| "epoch": 70.36199095022624, |
| "grad_norm": 1.8041199445724487, |
| "learning_rate": 0.001, |
| "loss": 1.5372, |
| "step": 217700 |
| }, |
| { |
| "epoch": 70.39431157078216, |
| "grad_norm": 1.9823251962661743, |
| "learning_rate": 0.001, |
| "loss": 1.5454, |
| "step": 217800 |
| }, |
| { |
| "epoch": 70.42663219133807, |
| "grad_norm": 1.5236910581588745, |
| "learning_rate": 0.001, |
| "loss": 1.5506, |
| "step": 217900 |
| }, |
| { |
| "epoch": 70.458952811894, |
| "grad_norm": 1.4975502490997314, |
| "learning_rate": 0.001, |
| "loss": 1.5514, |
| "step": 218000 |
| }, |
| { |
| "epoch": 70.4912734324499, |
| "grad_norm": 0.9770181179046631, |
| "learning_rate": 0.001, |
| "loss": 1.5633, |
| "step": 218100 |
| }, |
| { |
| "epoch": 70.52359405300582, |
| "grad_norm": 1.632096767425537, |
| "learning_rate": 0.001, |
| "loss": 1.5659, |
| "step": 218200 |
| }, |
| { |
| "epoch": 70.55591467356173, |
| "grad_norm": 1.3791656494140625, |
| "learning_rate": 0.001, |
| "loss": 1.5788, |
| "step": 218300 |
| }, |
| { |
| "epoch": 70.58823529411765, |
| "grad_norm": 1.3625118732452393, |
| "learning_rate": 0.001, |
| "loss": 1.5848, |
| "step": 218400 |
| }, |
| { |
| "epoch": 70.62055591467356, |
| "grad_norm": 1.355443000793457, |
| "learning_rate": 0.001, |
| "loss": 1.5531, |
| "step": 218500 |
| }, |
| { |
| "epoch": 70.65287653522948, |
| "grad_norm": 1.3710728883743286, |
| "learning_rate": 0.001, |
| "loss": 1.5684, |
| "step": 218600 |
| }, |
| { |
| "epoch": 70.68519715578539, |
| "grad_norm": 1.2288410663604736, |
| "learning_rate": 0.001, |
| "loss": 1.56, |
| "step": 218700 |
| }, |
| { |
| "epoch": 70.71751777634131, |
| "grad_norm": 1.7254223823547363, |
| "learning_rate": 0.001, |
| "loss": 1.5803, |
| "step": 218800 |
| }, |
| { |
| "epoch": 70.74983839689722, |
| "grad_norm": 1.6320266723632812, |
| "learning_rate": 0.001, |
| "loss": 1.5777, |
| "step": 218900 |
| }, |
| { |
| "epoch": 70.78215901745314, |
| "grad_norm": 1.209186315536499, |
| "learning_rate": 0.001, |
| "loss": 1.5888, |
| "step": 219000 |
| }, |
| { |
| "epoch": 70.81447963800905, |
| "grad_norm": 1.2234702110290527, |
| "learning_rate": 0.001, |
| "loss": 1.5897, |
| "step": 219100 |
| }, |
| { |
| "epoch": 70.84680025856497, |
| "grad_norm": 1.058451771736145, |
| "learning_rate": 0.001, |
| "loss": 1.6031, |
| "step": 219200 |
| }, |
| { |
| "epoch": 70.87912087912088, |
| "grad_norm": 1.3177103996276855, |
| "learning_rate": 0.001, |
| "loss": 1.5897, |
| "step": 219300 |
| }, |
| { |
| "epoch": 70.9114414996768, |
| "grad_norm": 1.2803094387054443, |
| "learning_rate": 0.001, |
| "loss": 1.6078, |
| "step": 219400 |
| }, |
| { |
| "epoch": 70.9437621202327, |
| "grad_norm": 1.410070776939392, |
| "learning_rate": 0.001, |
| "loss": 1.595, |
| "step": 219500 |
| }, |
| { |
| "epoch": 70.97608274078863, |
| "grad_norm": 2.0982401371002197, |
| "learning_rate": 0.001, |
| "loss": 1.5901, |
| "step": 219600 |
| }, |
| { |
| "epoch": 71.00840336134453, |
| "grad_norm": 1.0597091913223267, |
| "learning_rate": 0.001, |
| "loss": 1.582, |
| "step": 219700 |
| }, |
| { |
| "epoch": 71.04072398190046, |
| "grad_norm": 2.105226755142212, |
| "learning_rate": 0.001, |
| "loss": 1.4687, |
| "step": 219800 |
| }, |
| { |
| "epoch": 71.07304460245636, |
| "grad_norm": 1.2170047760009766, |
| "learning_rate": 0.001, |
| "loss": 1.4939, |
| "step": 219900 |
| }, |
| { |
| "epoch": 71.10536522301229, |
| "grad_norm": 1.4154855012893677, |
| "learning_rate": 0.001, |
| "loss": 1.4787, |
| "step": 220000 |
| }, |
| { |
| "epoch": 71.13768584356819, |
| "grad_norm": 1.7579776048660278, |
| "learning_rate": 0.001, |
| "loss": 1.499, |
| "step": 220100 |
| }, |
| { |
| "epoch": 71.17000646412411, |
| "grad_norm": 1.5543571710586548, |
| "learning_rate": 0.001, |
| "loss": 1.4995, |
| "step": 220200 |
| }, |
| { |
| "epoch": 71.20232708468002, |
| "grad_norm": 1.4604703187942505, |
| "learning_rate": 0.001, |
| "loss": 1.5156, |
| "step": 220300 |
| }, |
| { |
| "epoch": 71.23464770523594, |
| "grad_norm": 1.5122308731079102, |
| "learning_rate": 0.001, |
| "loss": 1.5249, |
| "step": 220400 |
| }, |
| { |
| "epoch": 71.26696832579185, |
| "grad_norm": 1.566140055656433, |
| "learning_rate": 0.001, |
| "loss": 1.4966, |
| "step": 220500 |
| }, |
| { |
| "epoch": 71.29928894634777, |
| "grad_norm": 1.962449550628662, |
| "learning_rate": 0.001, |
| "loss": 1.5303, |
| "step": 220600 |
| }, |
| { |
| "epoch": 71.33160956690368, |
| "grad_norm": 1.3906453847885132, |
| "learning_rate": 0.001, |
| "loss": 1.5293, |
| "step": 220700 |
| }, |
| { |
| "epoch": 71.3639301874596, |
| "grad_norm": 1.6068905591964722, |
| "learning_rate": 0.001, |
| "loss": 1.5271, |
| "step": 220800 |
| }, |
| { |
| "epoch": 71.39625080801551, |
| "grad_norm": 1.3614650964736938, |
| "learning_rate": 0.001, |
| "loss": 1.5508, |
| "step": 220900 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 1.6165804862976074, |
| "learning_rate": 0.001, |
| "loss": 1.5458, |
| "step": 221000 |
| }, |
| { |
| "epoch": 71.46089204912734, |
| "grad_norm": 1.724016785621643, |
| "learning_rate": 0.001, |
| "loss": 1.5337, |
| "step": 221100 |
| }, |
| { |
| "epoch": 71.49321266968326, |
| "grad_norm": 1.3093929290771484, |
| "learning_rate": 0.001, |
| "loss": 1.5535, |
| "step": 221200 |
| }, |
| { |
| "epoch": 71.52553329023917, |
| "grad_norm": 1.233283281326294, |
| "learning_rate": 0.001, |
| "loss": 1.545, |
| "step": 221300 |
| }, |
| { |
| "epoch": 71.55785391079509, |
| "grad_norm": 1.4728742837905884, |
| "learning_rate": 0.001, |
| "loss": 1.5582, |
| "step": 221400 |
| }, |
| { |
| "epoch": 71.590174531351, |
| "grad_norm": 1.3168997764587402, |
| "learning_rate": 0.001, |
| "loss": 1.5633, |
| "step": 221500 |
| }, |
| { |
| "epoch": 71.62249515190692, |
| "grad_norm": 1.5132778882980347, |
| "learning_rate": 0.001, |
| "loss": 1.5636, |
| "step": 221600 |
| }, |
| { |
| "epoch": 71.65481577246283, |
| "grad_norm": 1.6981767416000366, |
| "learning_rate": 0.001, |
| "loss": 1.5719, |
| "step": 221700 |
| }, |
| { |
| "epoch": 71.68713639301875, |
| "grad_norm": 1.4577381610870361, |
| "learning_rate": 0.001, |
| "loss": 1.5592, |
| "step": 221800 |
| }, |
| { |
| "epoch": 71.71945701357465, |
| "grad_norm": 1.7084128856658936, |
| "learning_rate": 0.001, |
| "loss": 1.5581, |
| "step": 221900 |
| }, |
| { |
| "epoch": 71.75177763413058, |
| "grad_norm": 1.0470802783966064, |
| "learning_rate": 0.001, |
| "loss": 1.5716, |
| "step": 222000 |
| }, |
| { |
| "epoch": 71.78409825468648, |
| "grad_norm": 1.6497925519943237, |
| "learning_rate": 0.001, |
| "loss": 1.5946, |
| "step": 222100 |
| }, |
| { |
| "epoch": 71.8164188752424, |
| "grad_norm": 1.3580806255340576, |
| "learning_rate": 0.001, |
| "loss": 1.5868, |
| "step": 222200 |
| }, |
| { |
| "epoch": 71.84873949579831, |
| "grad_norm": 1.7498301267623901, |
| "learning_rate": 0.001, |
| "loss": 1.5963, |
| "step": 222300 |
| }, |
| { |
| "epoch": 71.88106011635423, |
| "grad_norm": 1.8118958473205566, |
| "learning_rate": 0.001, |
| "loss": 1.5845, |
| "step": 222400 |
| }, |
| { |
| "epoch": 71.91338073691014, |
| "grad_norm": 1.2654248476028442, |
| "learning_rate": 0.001, |
| "loss": 1.5879, |
| "step": 222500 |
| }, |
| { |
| "epoch": 71.94570135746606, |
| "grad_norm": 1.555355429649353, |
| "learning_rate": 0.001, |
| "loss": 1.5891, |
| "step": 222600 |
| }, |
| { |
| "epoch": 71.97802197802197, |
| "grad_norm": 1.1465693712234497, |
| "learning_rate": 0.001, |
| "loss": 1.6082, |
| "step": 222700 |
| }, |
| { |
| "epoch": 72.01034259857789, |
| "grad_norm": 1.6664994955062866, |
| "learning_rate": 0.001, |
| "loss": 1.5414, |
| "step": 222800 |
| }, |
| { |
| "epoch": 72.04266321913381, |
| "grad_norm": 1.639346957206726, |
| "learning_rate": 0.001, |
| "loss": 1.4734, |
| "step": 222900 |
| }, |
| { |
| "epoch": 72.07498383968972, |
| "grad_norm": 1.4530357122421265, |
| "learning_rate": 0.001, |
| "loss": 1.4757, |
| "step": 223000 |
| }, |
| { |
| "epoch": 72.10730446024564, |
| "grad_norm": 1.1979373693466187, |
| "learning_rate": 0.001, |
| "loss": 1.4811, |
| "step": 223100 |
| }, |
| { |
| "epoch": 72.13962508080155, |
| "grad_norm": 1.339179277420044, |
| "learning_rate": 0.001, |
| "loss": 1.4943, |
| "step": 223200 |
| }, |
| { |
| "epoch": 72.17194570135747, |
| "grad_norm": 1.2545099258422852, |
| "learning_rate": 0.001, |
| "loss": 1.4921, |
| "step": 223300 |
| }, |
| { |
| "epoch": 72.20426632191338, |
| "grad_norm": 1.280022382736206, |
| "learning_rate": 0.001, |
| "loss": 1.5095, |
| "step": 223400 |
| }, |
| { |
| "epoch": 72.2365869424693, |
| "grad_norm": 1.3083187341690063, |
| "learning_rate": 0.001, |
| "loss": 1.5142, |
| "step": 223500 |
| }, |
| { |
| "epoch": 72.26890756302521, |
| "grad_norm": 1.3355222940444946, |
| "learning_rate": 0.001, |
| "loss": 1.5164, |
| "step": 223600 |
| }, |
| { |
| "epoch": 72.30122818358113, |
| "grad_norm": 1.3752424716949463, |
| "learning_rate": 0.001, |
| "loss": 1.5246, |
| "step": 223700 |
| }, |
| { |
| "epoch": 72.33354880413704, |
| "grad_norm": 1.200433611869812, |
| "learning_rate": 0.001, |
| "loss": 1.5067, |
| "step": 223800 |
| }, |
| { |
| "epoch": 72.36586942469296, |
| "grad_norm": 1.5312621593475342, |
| "learning_rate": 0.001, |
| "loss": 1.5259, |
| "step": 223900 |
| }, |
| { |
| "epoch": 72.39819004524887, |
| "grad_norm": 1.517161250114441, |
| "learning_rate": 0.001, |
| "loss": 1.5268, |
| "step": 224000 |
| }, |
| { |
| "epoch": 72.43051066580479, |
| "grad_norm": 1.5134165287017822, |
| "learning_rate": 0.001, |
| "loss": 1.5434, |
| "step": 224100 |
| }, |
| { |
| "epoch": 72.4628312863607, |
| "grad_norm": 2.0942986011505127, |
| "learning_rate": 0.001, |
| "loss": 1.5519, |
| "step": 224200 |
| }, |
| { |
| "epoch": 72.49515190691662, |
| "grad_norm": 1.622260332107544, |
| "learning_rate": 0.001, |
| "loss": 1.541, |
| "step": 224300 |
| }, |
| { |
| "epoch": 72.52747252747253, |
| "grad_norm": 1.4113531112670898, |
| "learning_rate": 0.001, |
| "loss": 1.5439, |
| "step": 224400 |
| }, |
| { |
| "epoch": 72.55979314802845, |
| "grad_norm": 1.36627197265625, |
| "learning_rate": 0.001, |
| "loss": 1.5327, |
| "step": 224500 |
| }, |
| { |
| "epoch": 72.59211376858435, |
| "grad_norm": 1.434559941291809, |
| "learning_rate": 0.001, |
| "loss": 1.5554, |
| "step": 224600 |
| }, |
| { |
| "epoch": 72.62443438914028, |
| "grad_norm": 1.475899338722229, |
| "learning_rate": 0.001, |
| "loss": 1.5536, |
| "step": 224700 |
| }, |
| { |
| "epoch": 72.65675500969618, |
| "grad_norm": 1.4711800813674927, |
| "learning_rate": 0.001, |
| "loss": 1.5546, |
| "step": 224800 |
| }, |
| { |
| "epoch": 72.6890756302521, |
| "grad_norm": 1.2975748777389526, |
| "learning_rate": 0.001, |
| "loss": 1.5602, |
| "step": 224900 |
| }, |
| { |
| "epoch": 72.72139625080801, |
| "grad_norm": 1.4378626346588135, |
| "learning_rate": 0.001, |
| "loss": 1.568, |
| "step": 225000 |
| }, |
| { |
| "epoch": 72.75371687136393, |
| "grad_norm": 1.3611150979995728, |
| "learning_rate": 0.001, |
| "loss": 1.5704, |
| "step": 225100 |
| }, |
| { |
| "epoch": 72.78603749191984, |
| "grad_norm": 1.6263785362243652, |
| "learning_rate": 0.001, |
| "loss": 1.5769, |
| "step": 225200 |
| }, |
| { |
| "epoch": 72.81835811247576, |
| "grad_norm": 1.392359733581543, |
| "learning_rate": 0.001, |
| "loss": 1.5909, |
| "step": 225300 |
| }, |
| { |
| "epoch": 72.85067873303167, |
| "grad_norm": 1.764510989189148, |
| "learning_rate": 0.001, |
| "loss": 1.582, |
| "step": 225400 |
| }, |
| { |
| "epoch": 72.88299935358759, |
| "grad_norm": 1.4342442750930786, |
| "learning_rate": 0.001, |
| "loss": 1.5679, |
| "step": 225500 |
| }, |
| { |
| "epoch": 72.9153199741435, |
| "grad_norm": 1.422317385673523, |
| "learning_rate": 0.001, |
| "loss": 1.5692, |
| "step": 225600 |
| }, |
| { |
| "epoch": 72.94764059469942, |
| "grad_norm": 1.768009901046753, |
| "learning_rate": 0.001, |
| "loss": 1.5812, |
| "step": 225700 |
| }, |
| { |
| "epoch": 72.97996121525533, |
| "grad_norm": 1.7353980541229248, |
| "learning_rate": 0.001, |
| "loss": 1.5985, |
| "step": 225800 |
| }, |
| { |
| "epoch": 73.01228183581125, |
| "grad_norm": 1.470423698425293, |
| "learning_rate": 0.001, |
| "loss": 1.5372, |
| "step": 225900 |
| }, |
| { |
| "epoch": 73.04460245636716, |
| "grad_norm": 1.3820226192474365, |
| "learning_rate": 0.001, |
| "loss": 1.4558, |
| "step": 226000 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 1.4334419965744019, |
| "learning_rate": 0.001, |
| "loss": 1.4857, |
| "step": 226100 |
| }, |
| { |
| "epoch": 73.10924369747899, |
| "grad_norm": 1.552611231803894, |
| "learning_rate": 0.001, |
| "loss": 1.4796, |
| "step": 226200 |
| }, |
| { |
| "epoch": 73.14156431803491, |
| "grad_norm": 1.4738566875457764, |
| "learning_rate": 0.001, |
| "loss": 1.4877, |
| "step": 226300 |
| }, |
| { |
| "epoch": 73.17388493859082, |
| "grad_norm": 1.2708693742752075, |
| "learning_rate": 0.001, |
| "loss": 1.4875, |
| "step": 226400 |
| }, |
| { |
| "epoch": 73.20620555914674, |
| "grad_norm": 1.2074936628341675, |
| "learning_rate": 0.001, |
| "loss": 1.5055, |
| "step": 226500 |
| }, |
| { |
| "epoch": 73.23852617970265, |
| "grad_norm": 1.4904911518096924, |
| "learning_rate": 0.001, |
| "loss": 1.4873, |
| "step": 226600 |
| }, |
| { |
| "epoch": 73.27084680025857, |
| "grad_norm": 1.4835336208343506, |
| "learning_rate": 0.001, |
| "loss": 1.5047, |
| "step": 226700 |
| }, |
| { |
| "epoch": 73.30316742081448, |
| "grad_norm": 1.8338134288787842, |
| "learning_rate": 0.001, |
| "loss": 1.5133, |
| "step": 226800 |
| }, |
| { |
| "epoch": 73.3354880413704, |
| "grad_norm": 1.603265643119812, |
| "learning_rate": 0.001, |
| "loss": 1.5093, |
| "step": 226900 |
| }, |
| { |
| "epoch": 73.3678086619263, |
| "grad_norm": 1.803918719291687, |
| "learning_rate": 0.001, |
| "loss": 1.5151, |
| "step": 227000 |
| }, |
| { |
| "epoch": 73.40012928248223, |
| "grad_norm": 1.5095746517181396, |
| "learning_rate": 0.001, |
| "loss": 1.5201, |
| "step": 227100 |
| }, |
| { |
| "epoch": 73.43244990303813, |
| "grad_norm": 1.6134722232818604, |
| "learning_rate": 0.001, |
| "loss": 1.5281, |
| "step": 227200 |
| }, |
| { |
| "epoch": 73.46477052359405, |
| "grad_norm": 1.5454301834106445, |
| "learning_rate": 0.001, |
| "loss": 1.5203, |
| "step": 227300 |
| }, |
| { |
| "epoch": 73.49709114414996, |
| "grad_norm": 1.8803797960281372, |
| "learning_rate": 0.001, |
| "loss": 1.5316, |
| "step": 227400 |
| }, |
| { |
| "epoch": 73.52941176470588, |
| "grad_norm": 1.5477262735366821, |
| "learning_rate": 0.001, |
| "loss": 1.5481, |
| "step": 227500 |
| }, |
| { |
| "epoch": 73.56173238526179, |
| "grad_norm": 1.8320611715316772, |
| "learning_rate": 0.001, |
| "loss": 1.5553, |
| "step": 227600 |
| }, |
| { |
| "epoch": 73.59405300581771, |
| "grad_norm": 2.3626441955566406, |
| "learning_rate": 0.001, |
| "loss": 1.541, |
| "step": 227700 |
| }, |
| { |
| "epoch": 73.62637362637362, |
| "grad_norm": 1.5265443325042725, |
| "learning_rate": 0.001, |
| "loss": 1.5509, |
| "step": 227800 |
| }, |
| { |
| "epoch": 73.65869424692954, |
| "grad_norm": 1.6350661516189575, |
| "learning_rate": 0.001, |
| "loss": 1.5515, |
| "step": 227900 |
| }, |
| { |
| "epoch": 73.69101486748545, |
| "grad_norm": 1.40945303440094, |
| "learning_rate": 0.001, |
| "loss": 1.5467, |
| "step": 228000 |
| }, |
| { |
| "epoch": 73.72333548804137, |
| "grad_norm": 1.6472688913345337, |
| "learning_rate": 0.001, |
| "loss": 1.5666, |
| "step": 228100 |
| }, |
| { |
| "epoch": 73.75565610859728, |
| "grad_norm": 1.753916621208191, |
| "learning_rate": 0.001, |
| "loss": 1.5645, |
| "step": 228200 |
| }, |
| { |
| "epoch": 73.7879767291532, |
| "grad_norm": 1.5692429542541504, |
| "learning_rate": 0.001, |
| "loss": 1.5616, |
| "step": 228300 |
| }, |
| { |
| "epoch": 73.82029734970911, |
| "grad_norm": 1.4695786237716675, |
| "learning_rate": 0.001, |
| "loss": 1.563, |
| "step": 228400 |
| }, |
| { |
| "epoch": 73.85261797026503, |
| "grad_norm": 1.5109291076660156, |
| "learning_rate": 0.001, |
| "loss": 1.5702, |
| "step": 228500 |
| }, |
| { |
| "epoch": 73.88493859082094, |
| "grad_norm": 1.3853473663330078, |
| "learning_rate": 0.001, |
| "loss": 1.5721, |
| "step": 228600 |
| }, |
| { |
| "epoch": 73.91725921137686, |
| "grad_norm": 1.181089162826538, |
| "learning_rate": 0.001, |
| "loss": 1.5778, |
| "step": 228700 |
| }, |
| { |
| "epoch": 73.94957983193277, |
| "grad_norm": 1.3903465270996094, |
| "learning_rate": 0.001, |
| "loss": 1.5663, |
| "step": 228800 |
| }, |
| { |
| "epoch": 73.98190045248869, |
| "grad_norm": 1.7607699632644653, |
| "learning_rate": 0.001, |
| "loss": 1.5841, |
| "step": 228900 |
| }, |
| { |
| "epoch": 74.01422107304461, |
| "grad_norm": 1.933079481124878, |
| "learning_rate": 0.001, |
| "loss": 1.4981, |
| "step": 229000 |
| }, |
| { |
| "epoch": 74.04654169360052, |
| "grad_norm": 1.6522470712661743, |
| "learning_rate": 0.001, |
| "loss": 1.4555, |
| "step": 229100 |
| }, |
| { |
| "epoch": 74.07886231415644, |
| "grad_norm": 1.9484916925430298, |
| "learning_rate": 0.001, |
| "loss": 1.4581, |
| "step": 229200 |
| }, |
| { |
| "epoch": 74.11118293471235, |
| "grad_norm": 1.5861504077911377, |
| "learning_rate": 0.001, |
| "loss": 1.4818, |
| "step": 229300 |
| }, |
| { |
| "epoch": 74.14350355526827, |
| "grad_norm": 1.9591395854949951, |
| "learning_rate": 0.001, |
| "loss": 1.47, |
| "step": 229400 |
| }, |
| { |
| "epoch": 74.17582417582418, |
| "grad_norm": 1.5973981618881226, |
| "learning_rate": 0.001, |
| "loss": 1.4859, |
| "step": 229500 |
| }, |
| { |
| "epoch": 74.2081447963801, |
| "grad_norm": 1.7444751262664795, |
| "learning_rate": 0.001, |
| "loss": 1.4906, |
| "step": 229600 |
| }, |
| { |
| "epoch": 74.240465416936, |
| "grad_norm": 2.131800889968872, |
| "learning_rate": 0.001, |
| "loss": 1.4872, |
| "step": 229700 |
| }, |
| { |
| "epoch": 74.27278603749193, |
| "grad_norm": 1.7644002437591553, |
| "learning_rate": 0.001, |
| "loss": 1.4969, |
| "step": 229800 |
| }, |
| { |
| "epoch": 74.30510665804783, |
| "grad_norm": 1.646075963973999, |
| "learning_rate": 0.001, |
| "loss": 1.494, |
| "step": 229900 |
| }, |
| { |
| "epoch": 74.33742727860376, |
| "grad_norm": 1.8765732049942017, |
| "learning_rate": 0.001, |
| "loss": 1.51, |
| "step": 230000 |
| }, |
| { |
| "epoch": 74.36974789915966, |
| "grad_norm": 1.7023745775222778, |
| "learning_rate": 0.001, |
| "loss": 1.5053, |
| "step": 230100 |
| }, |
| { |
| "epoch": 74.40206851971558, |
| "grad_norm": 1.703138828277588, |
| "learning_rate": 0.001, |
| "loss": 1.5195, |
| "step": 230200 |
| }, |
| { |
| "epoch": 74.43438914027149, |
| "grad_norm": 1.6692290306091309, |
| "learning_rate": 0.001, |
| "loss": 1.5259, |
| "step": 230300 |
| }, |
| { |
| "epoch": 74.46670976082741, |
| "grad_norm": 1.418053388595581, |
| "learning_rate": 0.001, |
| "loss": 1.5187, |
| "step": 230400 |
| }, |
| { |
| "epoch": 74.49903038138332, |
| "grad_norm": 1.8168318271636963, |
| "learning_rate": 0.001, |
| "loss": 1.535, |
| "step": 230500 |
| }, |
| { |
| "epoch": 74.53135100193924, |
| "grad_norm": 1.8448301553726196, |
| "learning_rate": 0.001, |
| "loss": 1.5297, |
| "step": 230600 |
| }, |
| { |
| "epoch": 74.56367162249515, |
| "grad_norm": 1.8441119194030762, |
| "learning_rate": 0.001, |
| "loss": 1.538, |
| "step": 230700 |
| }, |
| { |
| "epoch": 74.59599224305107, |
| "grad_norm": 1.601678490638733, |
| "learning_rate": 0.001, |
| "loss": 1.5382, |
| "step": 230800 |
| }, |
| { |
| "epoch": 74.62831286360698, |
| "grad_norm": 1.40133535861969, |
| "learning_rate": 0.001, |
| "loss": 1.5457, |
| "step": 230900 |
| }, |
| { |
| "epoch": 74.6606334841629, |
| "grad_norm": 1.3961271047592163, |
| "learning_rate": 0.001, |
| "loss": 1.5447, |
| "step": 231000 |
| }, |
| { |
| "epoch": 74.69295410471881, |
| "grad_norm": 1.5519992113113403, |
| "learning_rate": 0.001, |
| "loss": 1.5392, |
| "step": 231100 |
| }, |
| { |
| "epoch": 74.72527472527473, |
| "grad_norm": 1.3843839168548584, |
| "learning_rate": 0.001, |
| "loss": 1.5474, |
| "step": 231200 |
| }, |
| { |
| "epoch": 74.75759534583064, |
| "grad_norm": 1.7330049276351929, |
| "learning_rate": 0.001, |
| "loss": 1.5534, |
| "step": 231300 |
| }, |
| { |
| "epoch": 74.78991596638656, |
| "grad_norm": 1.7051910161972046, |
| "learning_rate": 0.001, |
| "loss": 1.5532, |
| "step": 231400 |
| }, |
| { |
| "epoch": 74.82223658694247, |
| "grad_norm": 1.8478976488113403, |
| "learning_rate": 0.001, |
| "loss": 1.5618, |
| "step": 231500 |
| }, |
| { |
| "epoch": 74.85455720749839, |
| "grad_norm": 1.7964320182800293, |
| "learning_rate": 0.001, |
| "loss": 1.5505, |
| "step": 231600 |
| }, |
| { |
| "epoch": 74.8868778280543, |
| "grad_norm": 2.2320656776428223, |
| "learning_rate": 0.001, |
| "loss": 1.5654, |
| "step": 231700 |
| }, |
| { |
| "epoch": 74.91919844861022, |
| "grad_norm": 1.874264121055603, |
| "learning_rate": 0.001, |
| "loss": 1.5705, |
| "step": 231800 |
| }, |
| { |
| "epoch": 74.95151906916612, |
| "grad_norm": 1.4276374578475952, |
| "learning_rate": 0.001, |
| "loss": 1.5734, |
| "step": 231900 |
| }, |
| { |
| "epoch": 74.98383968972205, |
| "grad_norm": 1.7904925346374512, |
| "learning_rate": 0.001, |
| "loss": 1.5732, |
| "step": 232000 |
| }, |
| { |
| "epoch": 75.01616031027795, |
| "grad_norm": 1.8452962636947632, |
| "learning_rate": 0.001, |
| "loss": 1.4718, |
| "step": 232100 |
| }, |
| { |
| "epoch": 75.04848093083388, |
| "grad_norm": 2.6169495582580566, |
| "learning_rate": 0.001, |
| "loss": 1.4575, |
| "step": 232200 |
| }, |
| { |
| "epoch": 75.08080155138978, |
| "grad_norm": 1.707862377166748, |
| "learning_rate": 0.001, |
| "loss": 1.4386, |
| "step": 232300 |
| }, |
| { |
| "epoch": 75.1131221719457, |
| "grad_norm": 1.8327504396438599, |
| "learning_rate": 0.001, |
| "loss": 1.4743, |
| "step": 232400 |
| }, |
| { |
| "epoch": 75.14544279250161, |
| "grad_norm": 2.308570384979248, |
| "learning_rate": 0.001, |
| "loss": 1.4821, |
| "step": 232500 |
| }, |
| { |
| "epoch": 75.17776341305753, |
| "grad_norm": 2.6504032611846924, |
| "learning_rate": 0.001, |
| "loss": 1.4765, |
| "step": 232600 |
| }, |
| { |
| "epoch": 75.21008403361344, |
| "grad_norm": 2.242478132247925, |
| "learning_rate": 0.001, |
| "loss": 1.4719, |
| "step": 232700 |
| }, |
| { |
| "epoch": 75.24240465416936, |
| "grad_norm": 2.303994655609131, |
| "learning_rate": 0.001, |
| "loss": 1.4753, |
| "step": 232800 |
| }, |
| { |
| "epoch": 75.27472527472527, |
| "grad_norm": 2.3544199466705322, |
| "learning_rate": 0.001, |
| "loss": 1.5065, |
| "step": 232900 |
| }, |
| { |
| "epoch": 75.30704589528119, |
| "grad_norm": 1.9674110412597656, |
| "learning_rate": 0.001, |
| "loss": 1.5036, |
| "step": 233000 |
| }, |
| { |
| "epoch": 75.3393665158371, |
| "grad_norm": 1.8192665576934814, |
| "learning_rate": 0.001, |
| "loss": 1.495, |
| "step": 233100 |
| }, |
| { |
| "epoch": 75.37168713639302, |
| "grad_norm": 2.133833408355713, |
| "learning_rate": 0.001, |
| "loss": 1.4959, |
| "step": 233200 |
| }, |
| { |
| "epoch": 75.40400775694893, |
| "grad_norm": 2.5970709323883057, |
| "learning_rate": 0.001, |
| "loss": 1.5083, |
| "step": 233300 |
| }, |
| { |
| "epoch": 75.43632837750485, |
| "grad_norm": 2.0291748046875, |
| "learning_rate": 0.001, |
| "loss": 1.5127, |
| "step": 233400 |
| }, |
| { |
| "epoch": 75.46864899806076, |
| "grad_norm": 1.7293260097503662, |
| "learning_rate": 0.001, |
| "loss": 1.5247, |
| "step": 233500 |
| }, |
| { |
| "epoch": 75.50096961861668, |
| "grad_norm": 2.267519235610962, |
| "learning_rate": 0.001, |
| "loss": 1.5094, |
| "step": 233600 |
| }, |
| { |
| "epoch": 75.53329023917259, |
| "grad_norm": 1.6786974668502808, |
| "learning_rate": 0.001, |
| "loss": 1.5213, |
| "step": 233700 |
| }, |
| { |
| "epoch": 75.56561085972851, |
| "grad_norm": 1.8709112405776978, |
| "learning_rate": 0.001, |
| "loss": 1.5404, |
| "step": 233800 |
| }, |
| { |
| "epoch": 75.59793148028442, |
| "grad_norm": 1.9633324146270752, |
| "learning_rate": 0.001, |
| "loss": 1.5208, |
| "step": 233900 |
| }, |
| { |
| "epoch": 75.63025210084034, |
| "grad_norm": 1.9440094232559204, |
| "learning_rate": 0.001, |
| "loss": 1.5271, |
| "step": 234000 |
| }, |
| { |
| "epoch": 75.66257272139624, |
| "grad_norm": 1.935779333114624, |
| "learning_rate": 0.001, |
| "loss": 1.5306, |
| "step": 234100 |
| }, |
| { |
| "epoch": 75.69489334195217, |
| "grad_norm": 2.249645948410034, |
| "learning_rate": 0.001, |
| "loss": 1.5325, |
| "step": 234200 |
| }, |
| { |
| "epoch": 75.72721396250807, |
| "grad_norm": 2.0431575775146484, |
| "learning_rate": 0.001, |
| "loss": 1.5446, |
| "step": 234300 |
| }, |
| { |
| "epoch": 75.759534583064, |
| "grad_norm": 2.305968999862671, |
| "learning_rate": 0.001, |
| "loss": 1.5575, |
| "step": 234400 |
| }, |
| { |
| "epoch": 75.7918552036199, |
| "grad_norm": 1.8218010663986206, |
| "learning_rate": 0.001, |
| "loss": 1.5389, |
| "step": 234500 |
| }, |
| { |
| "epoch": 75.82417582417582, |
| "grad_norm": 2.15047025680542, |
| "learning_rate": 0.001, |
| "loss": 1.5369, |
| "step": 234600 |
| }, |
| { |
| "epoch": 75.85649644473173, |
| "grad_norm": 2.2102482318878174, |
| "learning_rate": 0.001, |
| "loss": 1.5545, |
| "step": 234700 |
| }, |
| { |
| "epoch": 75.88881706528765, |
| "grad_norm": 2.4484729766845703, |
| "learning_rate": 0.001, |
| "loss": 1.5664, |
| "step": 234800 |
| }, |
| { |
| "epoch": 75.92113768584356, |
| "grad_norm": 2.1117191314697266, |
| "learning_rate": 0.001, |
| "loss": 1.5666, |
| "step": 234900 |
| }, |
| { |
| "epoch": 75.95345830639948, |
| "grad_norm": 2.8818562030792236, |
| "learning_rate": 0.001, |
| "loss": 1.5675, |
| "step": 235000 |
| }, |
| { |
| "epoch": 75.98577892695539, |
| "grad_norm": 2.7250523567199707, |
| "learning_rate": 0.001, |
| "loss": 1.5668, |
| "step": 235100 |
| }, |
| { |
| "epoch": 76.01809954751131, |
| "grad_norm": 2.0469698905944824, |
| "learning_rate": 0.001, |
| "loss": 1.5111, |
| "step": 235200 |
| }, |
| { |
| "epoch": 76.05042016806723, |
| "grad_norm": 1.6739189624786377, |
| "learning_rate": 0.001, |
| "loss": 1.4421, |
| "step": 235300 |
| }, |
| { |
| "epoch": 76.08274078862314, |
| "grad_norm": 1.7519482374191284, |
| "learning_rate": 0.001, |
| "loss": 1.4533, |
| "step": 235400 |
| }, |
| { |
| "epoch": 76.11506140917906, |
| "grad_norm": 1.2195477485656738, |
| "learning_rate": 0.001, |
| "loss": 1.4447, |
| "step": 235500 |
| }, |
| { |
| "epoch": 76.14738202973497, |
| "grad_norm": 1.7513339519500732, |
| "learning_rate": 0.001, |
| "loss": 1.4578, |
| "step": 235600 |
| }, |
| { |
| "epoch": 76.17970265029089, |
| "grad_norm": 1.6105413436889648, |
| "learning_rate": 0.001, |
| "loss": 1.4734, |
| "step": 235700 |
| }, |
| { |
| "epoch": 76.2120232708468, |
| "grad_norm": 1.4778788089752197, |
| "learning_rate": 0.001, |
| "loss": 1.4741, |
| "step": 235800 |
| }, |
| { |
| "epoch": 76.24434389140272, |
| "grad_norm": 1.4792375564575195, |
| "learning_rate": 0.001, |
| "loss": 1.4773, |
| "step": 235900 |
| }, |
| { |
| "epoch": 76.27666451195863, |
| "grad_norm": 1.8309617042541504, |
| "learning_rate": 0.001, |
| "loss": 1.4803, |
| "step": 236000 |
| }, |
| { |
| "epoch": 76.30898513251455, |
| "grad_norm": 1.7483227252960205, |
| "learning_rate": 0.001, |
| "loss": 1.4851, |
| "step": 236100 |
| }, |
| { |
| "epoch": 76.34130575307046, |
| "grad_norm": 1.1991394758224487, |
| "learning_rate": 0.001, |
| "loss": 1.4842, |
| "step": 236200 |
| }, |
| { |
| "epoch": 76.37362637362638, |
| "grad_norm": 1.6364467144012451, |
| "learning_rate": 0.001, |
| "loss": 1.5046, |
| "step": 236300 |
| }, |
| { |
| "epoch": 76.40594699418229, |
| "grad_norm": 1.481478214263916, |
| "learning_rate": 0.001, |
| "loss": 1.5063, |
| "step": 236400 |
| }, |
| { |
| "epoch": 76.43826761473821, |
| "grad_norm": 1.7369900941848755, |
| "learning_rate": 0.001, |
| "loss": 1.5113, |
| "step": 236500 |
| }, |
| { |
| "epoch": 76.47058823529412, |
| "grad_norm": 1.894484043121338, |
| "learning_rate": 0.001, |
| "loss": 1.501, |
| "step": 236600 |
| }, |
| { |
| "epoch": 76.50290885585004, |
| "grad_norm": 1.4116283655166626, |
| "learning_rate": 0.001, |
| "loss": 1.5078, |
| "step": 236700 |
| }, |
| { |
| "epoch": 76.53522947640595, |
| "grad_norm": 1.3731389045715332, |
| "learning_rate": 0.001, |
| "loss": 1.5154, |
| "step": 236800 |
| }, |
| { |
| "epoch": 76.56755009696187, |
| "grad_norm": 2.088660478591919, |
| "learning_rate": 0.001, |
| "loss": 1.5316, |
| "step": 236900 |
| }, |
| { |
| "epoch": 76.59987071751777, |
| "grad_norm": 1.4767850637435913, |
| "learning_rate": 0.001, |
| "loss": 1.5417, |
| "step": 237000 |
| }, |
| { |
| "epoch": 76.6321913380737, |
| "grad_norm": 1.584747076034546, |
| "learning_rate": 0.001, |
| "loss": 1.5225, |
| "step": 237100 |
| }, |
| { |
| "epoch": 76.6645119586296, |
| "grad_norm": 1.8084192276000977, |
| "learning_rate": 0.001, |
| "loss": 1.5176, |
| "step": 237200 |
| }, |
| { |
| "epoch": 76.69683257918552, |
| "grad_norm": 1.3601627349853516, |
| "learning_rate": 0.001, |
| "loss": 1.5344, |
| "step": 237300 |
| }, |
| { |
| "epoch": 76.72915319974143, |
| "grad_norm": 1.4814226627349854, |
| "learning_rate": 0.001, |
| "loss": 1.5454, |
| "step": 237400 |
| }, |
| { |
| "epoch": 76.76147382029735, |
| "grad_norm": 1.7931079864501953, |
| "learning_rate": 0.001, |
| "loss": 1.5104, |
| "step": 237500 |
| }, |
| { |
| "epoch": 76.79379444085326, |
| "grad_norm": 1.7696950435638428, |
| "learning_rate": 0.001, |
| "loss": 1.547, |
| "step": 237600 |
| }, |
| { |
| "epoch": 76.82611506140918, |
| "grad_norm": 1.9078216552734375, |
| "learning_rate": 0.001, |
| "loss": 1.5307, |
| "step": 237700 |
| }, |
| { |
| "epoch": 76.85843568196509, |
| "grad_norm": 1.6045186519622803, |
| "learning_rate": 0.001, |
| "loss": 1.5539, |
| "step": 237800 |
| }, |
| { |
| "epoch": 76.89075630252101, |
| "grad_norm": 1.2418279647827148, |
| "learning_rate": 0.001, |
| "loss": 1.5479, |
| "step": 237900 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 1.5800580978393555, |
| "learning_rate": 0.001, |
| "loss": 1.5538, |
| "step": 238000 |
| }, |
| { |
| "epoch": 76.95539754363284, |
| "grad_norm": 1.7912734746932983, |
| "learning_rate": 0.001, |
| "loss": 1.5633, |
| "step": 238100 |
| }, |
| { |
| "epoch": 76.98771816418875, |
| "grad_norm": 1.3797253370285034, |
| "learning_rate": 0.001, |
| "loss": 1.5384, |
| "step": 238200 |
| }, |
| { |
| "epoch": 77.02003878474467, |
| "grad_norm": 1.6609480381011963, |
| "learning_rate": 0.001, |
| "loss": 1.4973, |
| "step": 238300 |
| }, |
| { |
| "epoch": 77.05235940530058, |
| "grad_norm": 1.7042680978775024, |
| "learning_rate": 0.001, |
| "loss": 1.4333, |
| "step": 238400 |
| }, |
| { |
| "epoch": 77.0846800258565, |
| "grad_norm": 1.32135009765625, |
| "learning_rate": 0.001, |
| "loss": 1.4579, |
| "step": 238500 |
| }, |
| { |
| "epoch": 77.11700064641241, |
| "grad_norm": 1.302638053894043, |
| "learning_rate": 0.001, |
| "loss": 1.4579, |
| "step": 238600 |
| }, |
| { |
| "epoch": 77.14932126696833, |
| "grad_norm": 1.463110089302063, |
| "learning_rate": 0.001, |
| "loss": 1.4563, |
| "step": 238700 |
| }, |
| { |
| "epoch": 77.18164188752424, |
| "grad_norm": 1.5074396133422852, |
| "learning_rate": 0.001, |
| "loss": 1.4651, |
| "step": 238800 |
| }, |
| { |
| "epoch": 77.21396250808016, |
| "grad_norm": 1.4624207019805908, |
| "learning_rate": 0.001, |
| "loss": 1.4642, |
| "step": 238900 |
| }, |
| { |
| "epoch": 77.24628312863607, |
| "grad_norm": 1.4944427013397217, |
| "learning_rate": 0.001, |
| "loss": 1.4593, |
| "step": 239000 |
| }, |
| { |
| "epoch": 77.27860374919199, |
| "grad_norm": 2.2866570949554443, |
| "learning_rate": 0.001, |
| "loss": 1.4749, |
| "step": 239100 |
| }, |
| { |
| "epoch": 77.3109243697479, |
| "grad_norm": 2.0275280475616455, |
| "learning_rate": 0.001, |
| "loss": 1.487, |
| "step": 239200 |
| }, |
| { |
| "epoch": 77.34324499030382, |
| "grad_norm": 1.6738691329956055, |
| "learning_rate": 0.001, |
| "loss": 1.4711, |
| "step": 239300 |
| }, |
| { |
| "epoch": 77.37556561085972, |
| "grad_norm": 1.8057903051376343, |
| "learning_rate": 0.001, |
| "loss": 1.5001, |
| "step": 239400 |
| }, |
| { |
| "epoch": 77.40788623141565, |
| "grad_norm": 1.3166172504425049, |
| "learning_rate": 0.001, |
| "loss": 1.4909, |
| "step": 239500 |
| }, |
| { |
| "epoch": 77.44020685197155, |
| "grad_norm": 1.8812742233276367, |
| "learning_rate": 0.001, |
| "loss": 1.4962, |
| "step": 239600 |
| }, |
| { |
| "epoch": 77.47252747252747, |
| "grad_norm": 1.6621681451797485, |
| "learning_rate": 0.001, |
| "loss": 1.5079, |
| "step": 239700 |
| }, |
| { |
| "epoch": 77.50484809308338, |
| "grad_norm": 1.238905668258667, |
| "learning_rate": 0.001, |
| "loss": 1.4916, |
| "step": 239800 |
| }, |
| { |
| "epoch": 77.5371687136393, |
| "grad_norm": 1.3346996307373047, |
| "learning_rate": 0.001, |
| "loss": 1.5014, |
| "step": 239900 |
| }, |
| { |
| "epoch": 77.56948933419521, |
| "grad_norm": 1.3015086650848389, |
| "learning_rate": 0.001, |
| "loss": 1.5032, |
| "step": 240000 |
| }, |
| { |
| "epoch": 77.60180995475113, |
| "grad_norm": 1.2718425989151, |
| "learning_rate": 0.001, |
| "loss": 1.498, |
| "step": 240100 |
| }, |
| { |
| "epoch": 77.63413057530704, |
| "grad_norm": 1.5175855159759521, |
| "learning_rate": 0.001, |
| "loss": 1.5267, |
| "step": 240200 |
| }, |
| { |
| "epoch": 77.66645119586296, |
| "grad_norm": 1.2660694122314453, |
| "learning_rate": 0.001, |
| "loss": 1.5257, |
| "step": 240300 |
| }, |
| { |
| "epoch": 77.69877181641887, |
| "grad_norm": 1.3283731937408447, |
| "learning_rate": 0.001, |
| "loss": 1.5231, |
| "step": 240400 |
| }, |
| { |
| "epoch": 77.73109243697479, |
| "grad_norm": 1.6311471462249756, |
| "learning_rate": 0.001, |
| "loss": 1.5289, |
| "step": 240500 |
| }, |
| { |
| "epoch": 77.7634130575307, |
| "grad_norm": 1.6861320734024048, |
| "learning_rate": 0.001, |
| "loss": 1.5224, |
| "step": 240600 |
| }, |
| { |
| "epoch": 77.79573367808662, |
| "grad_norm": 1.559901237487793, |
| "learning_rate": 0.001, |
| "loss": 1.5299, |
| "step": 240700 |
| }, |
| { |
| "epoch": 77.82805429864253, |
| "grad_norm": 1.2827755212783813, |
| "learning_rate": 0.001, |
| "loss": 1.5427, |
| "step": 240800 |
| }, |
| { |
| "epoch": 77.86037491919845, |
| "grad_norm": 1.285235047340393, |
| "learning_rate": 0.001, |
| "loss": 1.5308, |
| "step": 240900 |
| }, |
| { |
| "epoch": 77.89269553975436, |
| "grad_norm": 1.173821210861206, |
| "learning_rate": 0.001, |
| "loss": 1.5255, |
| "step": 241000 |
| }, |
| { |
| "epoch": 77.92501616031028, |
| "grad_norm": 1.1771650314331055, |
| "learning_rate": 0.001, |
| "loss": 1.5458, |
| "step": 241100 |
| }, |
| { |
| "epoch": 77.95733678086619, |
| "grad_norm": 1.4045838117599487, |
| "learning_rate": 0.001, |
| "loss": 1.5496, |
| "step": 241200 |
| }, |
| { |
| "epoch": 77.98965740142211, |
| "grad_norm": 1.7626705169677734, |
| "learning_rate": 0.001, |
| "loss": 1.5402, |
| "step": 241300 |
| }, |
| { |
| "epoch": 78.02197802197803, |
| "grad_norm": 1.3559398651123047, |
| "learning_rate": 0.001, |
| "loss": 1.4808, |
| "step": 241400 |
| }, |
| { |
| "epoch": 78.05429864253394, |
| "grad_norm": 1.2719939947128296, |
| "learning_rate": 0.001, |
| "loss": 1.4271, |
| "step": 241500 |
| }, |
| { |
| "epoch": 78.08661926308986, |
| "grad_norm": 1.233944296836853, |
| "learning_rate": 0.001, |
| "loss": 1.4339, |
| "step": 241600 |
| }, |
| { |
| "epoch": 78.11893988364577, |
| "grad_norm": 1.182702660560608, |
| "learning_rate": 0.001, |
| "loss": 1.4389, |
| "step": 241700 |
| }, |
| { |
| "epoch": 78.15126050420169, |
| "grad_norm": 1.380983829498291, |
| "learning_rate": 0.001, |
| "loss": 1.4442, |
| "step": 241800 |
| }, |
| { |
| "epoch": 78.1835811247576, |
| "grad_norm": 1.6417757272720337, |
| "learning_rate": 0.001, |
| "loss": 1.4632, |
| "step": 241900 |
| }, |
| { |
| "epoch": 78.21590174531352, |
| "grad_norm": 1.3505266904830933, |
| "learning_rate": 0.001, |
| "loss": 1.4413, |
| "step": 242000 |
| }, |
| { |
| "epoch": 78.24822236586942, |
| "grad_norm": 1.4652029275894165, |
| "learning_rate": 0.001, |
| "loss": 1.4585, |
| "step": 242100 |
| }, |
| { |
| "epoch": 78.28054298642535, |
| "grad_norm": 1.0839201211929321, |
| "learning_rate": 0.001, |
| "loss": 1.4762, |
| "step": 242200 |
| }, |
| { |
| "epoch": 78.31286360698125, |
| "grad_norm": 2.6254734992980957, |
| "learning_rate": 0.001, |
| "loss": 1.4708, |
| "step": 242300 |
| }, |
| { |
| "epoch": 78.34518422753717, |
| "grad_norm": 1.1155922412872314, |
| "learning_rate": 0.001, |
| "loss": 1.4752, |
| "step": 242400 |
| }, |
| { |
| "epoch": 78.37750484809308, |
| "grad_norm": 1.1666878461837769, |
| "learning_rate": 0.001, |
| "loss": 1.4932, |
| "step": 242500 |
| }, |
| { |
| "epoch": 78.409825468649, |
| "grad_norm": 1.032177448272705, |
| "learning_rate": 0.001, |
| "loss": 1.4745, |
| "step": 242600 |
| }, |
| { |
| "epoch": 78.44214608920491, |
| "grad_norm": 1.311974287033081, |
| "learning_rate": 0.001, |
| "loss": 1.4818, |
| "step": 242700 |
| }, |
| { |
| "epoch": 78.47446670976083, |
| "grad_norm": 1.2213555574417114, |
| "learning_rate": 0.001, |
| "loss": 1.4865, |
| "step": 242800 |
| }, |
| { |
| "epoch": 78.50678733031674, |
| "grad_norm": 1.4418728351593018, |
| "learning_rate": 0.001, |
| "loss": 1.4842, |
| "step": 242900 |
| }, |
| { |
| "epoch": 78.53910795087266, |
| "grad_norm": 1.4388240575790405, |
| "learning_rate": 0.001, |
| "loss": 1.514, |
| "step": 243000 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 1.6182565689086914, |
| "learning_rate": 0.001, |
| "loss": 1.5051, |
| "step": 243100 |
| }, |
| { |
| "epoch": 78.60374919198449, |
| "grad_norm": 1.4511289596557617, |
| "learning_rate": 0.001, |
| "loss": 1.5017, |
| "step": 243200 |
| }, |
| { |
| "epoch": 78.6360698125404, |
| "grad_norm": 1.4314979314804077, |
| "learning_rate": 0.001, |
| "loss": 1.5093, |
| "step": 243300 |
| }, |
| { |
| "epoch": 78.66839043309632, |
| "grad_norm": 1.3016643524169922, |
| "learning_rate": 0.001, |
| "loss": 1.5105, |
| "step": 243400 |
| }, |
| { |
| "epoch": 78.70071105365223, |
| "grad_norm": 1.3273727893829346, |
| "learning_rate": 0.001, |
| "loss": 1.5116, |
| "step": 243500 |
| }, |
| { |
| "epoch": 78.73303167420815, |
| "grad_norm": 1.3437339067459106, |
| "learning_rate": 0.001, |
| "loss": 1.5217, |
| "step": 243600 |
| }, |
| { |
| "epoch": 78.76535229476406, |
| "grad_norm": 1.8093808889389038, |
| "learning_rate": 0.001, |
| "loss": 1.523, |
| "step": 243700 |
| }, |
| { |
| "epoch": 78.79767291531998, |
| "grad_norm": 1.3972852230072021, |
| "learning_rate": 0.001, |
| "loss": 1.5323, |
| "step": 243800 |
| }, |
| { |
| "epoch": 78.82999353587589, |
| "grad_norm": 1.1998374462127686, |
| "learning_rate": 0.001, |
| "loss": 1.5329, |
| "step": 243900 |
| }, |
| { |
| "epoch": 78.86231415643181, |
| "grad_norm": 1.5502599477767944, |
| "learning_rate": 0.001, |
| "loss": 1.5409, |
| "step": 244000 |
| }, |
| { |
| "epoch": 78.89463477698771, |
| "grad_norm": 1.1933624744415283, |
| "learning_rate": 0.001, |
| "loss": 1.5201, |
| "step": 244100 |
| }, |
| { |
| "epoch": 78.92695539754364, |
| "grad_norm": 1.1095892190933228, |
| "learning_rate": 0.001, |
| "loss": 1.539, |
| "step": 244200 |
| }, |
| { |
| "epoch": 78.95927601809954, |
| "grad_norm": 1.1915327310562134, |
| "learning_rate": 0.001, |
| "loss": 1.5508, |
| "step": 244300 |
| }, |
| { |
| "epoch": 78.99159663865547, |
| "grad_norm": 0.9810858368873596, |
| "learning_rate": 0.001, |
| "loss": 1.5415, |
| "step": 244400 |
| }, |
| { |
| "epoch": 79.02391725921137, |
| "grad_norm": 1.1638388633728027, |
| "learning_rate": 0.001, |
| "loss": 1.4601, |
| "step": 244500 |
| }, |
| { |
| "epoch": 79.0562378797673, |
| "grad_norm": 1.7552450895309448, |
| "learning_rate": 0.001, |
| "loss": 1.4161, |
| "step": 244600 |
| }, |
| { |
| "epoch": 79.0885585003232, |
| "grad_norm": 1.2643893957138062, |
| "learning_rate": 0.001, |
| "loss": 1.4078, |
| "step": 244700 |
| }, |
| { |
| "epoch": 79.12087912087912, |
| "grad_norm": 1.566015601158142, |
| "learning_rate": 0.001, |
| "loss": 1.4305, |
| "step": 244800 |
| }, |
| { |
| "epoch": 79.15319974143503, |
| "grad_norm": 1.5437681674957275, |
| "learning_rate": 0.001, |
| "loss": 1.4536, |
| "step": 244900 |
| }, |
| { |
| "epoch": 79.18552036199095, |
| "grad_norm": 1.9942206144332886, |
| "learning_rate": 0.001, |
| "loss": 1.4516, |
| "step": 245000 |
| }, |
| { |
| "epoch": 79.21784098254686, |
| "grad_norm": 1.1073012351989746, |
| "learning_rate": 0.001, |
| "loss": 1.4669, |
| "step": 245100 |
| }, |
| { |
| "epoch": 79.25016160310278, |
| "grad_norm": 1.2855706214904785, |
| "learning_rate": 0.001, |
| "loss": 1.4514, |
| "step": 245200 |
| }, |
| { |
| "epoch": 79.28248222365869, |
| "grad_norm": 1.5170331001281738, |
| "learning_rate": 0.001, |
| "loss": 1.4588, |
| "step": 245300 |
| }, |
| { |
| "epoch": 79.31480284421461, |
| "grad_norm": 1.9636250734329224, |
| "learning_rate": 0.001, |
| "loss": 1.4642, |
| "step": 245400 |
| }, |
| { |
| "epoch": 79.34712346477052, |
| "grad_norm": 1.4256482124328613, |
| "learning_rate": 0.001, |
| "loss": 1.4727, |
| "step": 245500 |
| }, |
| { |
| "epoch": 79.37944408532644, |
| "grad_norm": 1.501046895980835, |
| "learning_rate": 0.001, |
| "loss": 1.4727, |
| "step": 245600 |
| }, |
| { |
| "epoch": 79.41176470588235, |
| "grad_norm": 1.8273255825042725, |
| "learning_rate": 0.001, |
| "loss": 1.4813, |
| "step": 245700 |
| }, |
| { |
| "epoch": 79.44408532643827, |
| "grad_norm": 1.8996268510818481, |
| "learning_rate": 0.001, |
| "loss": 1.4857, |
| "step": 245800 |
| }, |
| { |
| "epoch": 79.47640594699418, |
| "grad_norm": 1.5301989316940308, |
| "learning_rate": 0.001, |
| "loss": 1.476, |
| "step": 245900 |
| }, |
| { |
| "epoch": 79.5087265675501, |
| "grad_norm": 1.4433398246765137, |
| "learning_rate": 0.001, |
| "loss": 1.4826, |
| "step": 246000 |
| }, |
| { |
| "epoch": 79.541047188106, |
| "grad_norm": 1.4371416568756104, |
| "learning_rate": 0.001, |
| "loss": 1.4859, |
| "step": 246100 |
| }, |
| { |
| "epoch": 79.57336780866193, |
| "grad_norm": 1.288563847541809, |
| "learning_rate": 0.001, |
| "loss": 1.4988, |
| "step": 246200 |
| }, |
| { |
| "epoch": 79.60568842921784, |
| "grad_norm": 1.3247302770614624, |
| "learning_rate": 0.001, |
| "loss": 1.4985, |
| "step": 246300 |
| }, |
| { |
| "epoch": 79.63800904977376, |
| "grad_norm": 1.4887481927871704, |
| "learning_rate": 0.001, |
| "loss": 1.497, |
| "step": 246400 |
| }, |
| { |
| "epoch": 79.67032967032966, |
| "grad_norm": 1.3735618591308594, |
| "learning_rate": 0.001, |
| "loss": 1.5049, |
| "step": 246500 |
| }, |
| { |
| "epoch": 79.70265029088559, |
| "grad_norm": 1.4121897220611572, |
| "learning_rate": 0.001, |
| "loss": 1.5014, |
| "step": 246600 |
| }, |
| { |
| "epoch": 79.7349709114415, |
| "grad_norm": 1.568111777305603, |
| "learning_rate": 0.001, |
| "loss": 1.5004, |
| "step": 246700 |
| }, |
| { |
| "epoch": 79.76729153199742, |
| "grad_norm": 1.5190857648849487, |
| "learning_rate": 0.001, |
| "loss": 1.5016, |
| "step": 246800 |
| }, |
| { |
| "epoch": 79.79961215255332, |
| "grad_norm": 1.287047028541565, |
| "learning_rate": 0.001, |
| "loss": 1.5353, |
| "step": 246900 |
| }, |
| { |
| "epoch": 79.83193277310924, |
| "grad_norm": 1.403714656829834, |
| "learning_rate": 0.001, |
| "loss": 1.5267, |
| "step": 247000 |
| }, |
| { |
| "epoch": 79.86425339366515, |
| "grad_norm": 1.180440068244934, |
| "learning_rate": 0.001, |
| "loss": 1.5313, |
| "step": 247100 |
| }, |
| { |
| "epoch": 79.89657401422107, |
| "grad_norm": 1.1998322010040283, |
| "learning_rate": 0.001, |
| "loss": 1.5293, |
| "step": 247200 |
| }, |
| { |
| "epoch": 79.92889463477698, |
| "grad_norm": 1.2172197103500366, |
| "learning_rate": 0.001, |
| "loss": 1.5364, |
| "step": 247300 |
| }, |
| { |
| "epoch": 79.9612152553329, |
| "grad_norm": 1.5113470554351807, |
| "learning_rate": 0.001, |
| "loss": 1.5482, |
| "step": 247400 |
| }, |
| { |
| "epoch": 79.99353587588882, |
| "grad_norm": 1.7513564825057983, |
| "learning_rate": 0.001, |
| "loss": 1.531, |
| "step": 247500 |
| }, |
| { |
| "epoch": 80.02585649644473, |
| "grad_norm": 1.1894452571868896, |
| "learning_rate": 0.001, |
| "loss": 1.4383, |
| "step": 247600 |
| }, |
| { |
| "epoch": 80.05817711700065, |
| "grad_norm": 1.2569270133972168, |
| "learning_rate": 0.001, |
| "loss": 1.416, |
| "step": 247700 |
| }, |
| { |
| "epoch": 80.09049773755656, |
| "grad_norm": 1.8621593713760376, |
| "learning_rate": 0.001, |
| "loss": 1.4129, |
| "step": 247800 |
| }, |
| { |
| "epoch": 80.12281835811248, |
| "grad_norm": 1.2772419452667236, |
| "learning_rate": 0.001, |
| "loss": 1.429, |
| "step": 247900 |
| }, |
| { |
| "epoch": 80.15513897866839, |
| "grad_norm": 1.5088549852371216, |
| "learning_rate": 0.001, |
| "loss": 1.4407, |
| "step": 248000 |
| }, |
| { |
| "epoch": 80.18745959922431, |
| "grad_norm": 1.2808599472045898, |
| "learning_rate": 0.001, |
| "loss": 1.4328, |
| "step": 248100 |
| }, |
| { |
| "epoch": 80.21978021978022, |
| "grad_norm": 1.5279263257980347, |
| "learning_rate": 0.001, |
| "loss": 1.4476, |
| "step": 248200 |
| }, |
| { |
| "epoch": 80.25210084033614, |
| "grad_norm": 1.5252046585083008, |
| "learning_rate": 0.001, |
| "loss": 1.4425, |
| "step": 248300 |
| }, |
| { |
| "epoch": 80.28442146089205, |
| "grad_norm": 1.759626030921936, |
| "learning_rate": 0.001, |
| "loss": 1.4678, |
| "step": 248400 |
| }, |
| { |
| "epoch": 80.31674208144797, |
| "grad_norm": 1.4535683393478394, |
| "learning_rate": 0.001, |
| "loss": 1.4482, |
| "step": 248500 |
| }, |
| { |
| "epoch": 80.34906270200388, |
| "grad_norm": 1.4096509218215942, |
| "learning_rate": 0.001, |
| "loss": 1.4594, |
| "step": 248600 |
| }, |
| { |
| "epoch": 80.3813833225598, |
| "grad_norm": 1.2100415229797363, |
| "learning_rate": 0.001, |
| "loss": 1.4807, |
| "step": 248700 |
| }, |
| { |
| "epoch": 80.4137039431157, |
| "grad_norm": 1.5168434381484985, |
| "learning_rate": 0.001, |
| "loss": 1.4731, |
| "step": 248800 |
| }, |
| { |
| "epoch": 80.44602456367163, |
| "grad_norm": 1.6755900382995605, |
| "learning_rate": 0.001, |
| "loss": 1.4677, |
| "step": 248900 |
| }, |
| { |
| "epoch": 80.47834518422754, |
| "grad_norm": 1.6561522483825684, |
| "learning_rate": 0.001, |
| "loss": 1.4783, |
| "step": 249000 |
| }, |
| { |
| "epoch": 80.51066580478346, |
| "grad_norm": 1.4312539100646973, |
| "learning_rate": 0.001, |
| "loss": 1.4754, |
| "step": 249100 |
| }, |
| { |
| "epoch": 80.54298642533936, |
| "grad_norm": 1.3774590492248535, |
| "learning_rate": 0.001, |
| "loss": 1.4781, |
| "step": 249200 |
| }, |
| { |
| "epoch": 80.57530704589529, |
| "grad_norm": 1.4128397703170776, |
| "learning_rate": 0.001, |
| "loss": 1.4897, |
| "step": 249300 |
| }, |
| { |
| "epoch": 80.6076276664512, |
| "grad_norm": 1.261557698249817, |
| "learning_rate": 0.001, |
| "loss": 1.4966, |
| "step": 249400 |
| }, |
| { |
| "epoch": 80.63994828700712, |
| "grad_norm": 1.7835651636123657, |
| "learning_rate": 0.001, |
| "loss": 1.4864, |
| "step": 249500 |
| }, |
| { |
| "epoch": 80.67226890756302, |
| "grad_norm": 1.3453576564788818, |
| "learning_rate": 0.001, |
| "loss": 1.4925, |
| "step": 249600 |
| }, |
| { |
| "epoch": 80.70458952811894, |
| "grad_norm": 1.269067406654358, |
| "learning_rate": 0.001, |
| "loss": 1.4857, |
| "step": 249700 |
| }, |
| { |
| "epoch": 80.73691014867485, |
| "grad_norm": 2.2933781147003174, |
| "learning_rate": 0.001, |
| "loss": 1.51, |
| "step": 249800 |
| }, |
| { |
| "epoch": 80.76923076923077, |
| "grad_norm": 1.3368571996688843, |
| "learning_rate": 0.001, |
| "loss": 1.496, |
| "step": 249900 |
| }, |
| { |
| "epoch": 80.80155138978668, |
| "grad_norm": 1.5495718717575073, |
| "learning_rate": 0.001, |
| "loss": 1.5008, |
| "step": 250000 |
| }, |
| { |
| "epoch": 80.8338720103426, |
| "grad_norm": 1.5590369701385498, |
| "learning_rate": 0.001, |
| "loss": 1.5045, |
| "step": 250100 |
| }, |
| { |
| "epoch": 80.86619263089851, |
| "grad_norm": 1.4353771209716797, |
| "learning_rate": 0.001, |
| "loss": 1.5153, |
| "step": 250200 |
| }, |
| { |
| "epoch": 80.89851325145443, |
| "grad_norm": 1.0782513618469238, |
| "learning_rate": 0.001, |
| "loss": 1.5366, |
| "step": 250300 |
| }, |
| { |
| "epoch": 80.93083387201034, |
| "grad_norm": 1.3433213233947754, |
| "learning_rate": 0.001, |
| "loss": 1.5223, |
| "step": 250400 |
| }, |
| { |
| "epoch": 80.96315449256626, |
| "grad_norm": 1.6848622560501099, |
| "learning_rate": 0.001, |
| "loss": 1.5359, |
| "step": 250500 |
| }, |
| { |
| "epoch": 80.99547511312217, |
| "grad_norm": 1.3130286931991577, |
| "learning_rate": 0.001, |
| "loss": 1.5322, |
| "step": 250600 |
| }, |
| { |
| "epoch": 81.02779573367809, |
| "grad_norm": 1.2962126731872559, |
| "learning_rate": 0.001, |
| "loss": 1.4071, |
| "step": 250700 |
| }, |
| { |
| "epoch": 81.060116354234, |
| "grad_norm": 1.4461277723312378, |
| "learning_rate": 0.001, |
| "loss": 1.4242, |
| "step": 250800 |
| }, |
| { |
| "epoch": 81.09243697478992, |
| "grad_norm": 1.6120673418045044, |
| "learning_rate": 0.001, |
| "loss": 1.4029, |
| "step": 250900 |
| }, |
| { |
| "epoch": 81.12475759534583, |
| "grad_norm": 1.6296929121017456, |
| "learning_rate": 0.001, |
| "loss": 1.4263, |
| "step": 251000 |
| }, |
| { |
| "epoch": 81.15707821590175, |
| "grad_norm": 1.6019946336746216, |
| "learning_rate": 0.001, |
| "loss": 1.4285, |
| "step": 251100 |
| }, |
| { |
| "epoch": 81.18939883645766, |
| "grad_norm": 1.4875445365905762, |
| "learning_rate": 0.001, |
| "loss": 1.4387, |
| "step": 251200 |
| }, |
| { |
| "epoch": 81.22171945701358, |
| "grad_norm": 1.2833311557769775, |
| "learning_rate": 0.001, |
| "loss": 1.4167, |
| "step": 251300 |
| }, |
| { |
| "epoch": 81.25404007756948, |
| "grad_norm": 1.8684656620025635, |
| "learning_rate": 0.001, |
| "loss": 1.4276, |
| "step": 251400 |
| }, |
| { |
| "epoch": 81.2863606981254, |
| "grad_norm": 1.2863260507583618, |
| "learning_rate": 0.001, |
| "loss": 1.4624, |
| "step": 251500 |
| }, |
| { |
| "epoch": 81.31868131868131, |
| "grad_norm": 1.9092422723770142, |
| "learning_rate": 0.001, |
| "loss": 1.4576, |
| "step": 251600 |
| }, |
| { |
| "epoch": 81.35100193923724, |
| "grad_norm": 1.2243609428405762, |
| "learning_rate": 0.001, |
| "loss": 1.4517, |
| "step": 251700 |
| }, |
| { |
| "epoch": 81.38332255979314, |
| "grad_norm": 1.2571626901626587, |
| "learning_rate": 0.001, |
| "loss": 1.4541, |
| "step": 251800 |
| }, |
| { |
| "epoch": 81.41564318034906, |
| "grad_norm": 1.7485051155090332, |
| "learning_rate": 0.001, |
| "loss": 1.4439, |
| "step": 251900 |
| }, |
| { |
| "epoch": 81.44796380090497, |
| "grad_norm": 2.036057710647583, |
| "learning_rate": 0.001, |
| "loss": 1.4817, |
| "step": 252000 |
| }, |
| { |
| "epoch": 81.4802844214609, |
| "grad_norm": 1.6407066583633423, |
| "learning_rate": 0.001, |
| "loss": 1.4732, |
| "step": 252100 |
| }, |
| { |
| "epoch": 81.5126050420168, |
| "grad_norm": 1.3501890897750854, |
| "learning_rate": 0.001, |
| "loss": 1.4662, |
| "step": 252200 |
| }, |
| { |
| "epoch": 81.54492566257272, |
| "grad_norm": 1.4880703687667847, |
| "learning_rate": 0.001, |
| "loss": 1.4659, |
| "step": 252300 |
| }, |
| { |
| "epoch": 81.57724628312863, |
| "grad_norm": 1.2374626398086548, |
| "learning_rate": 0.001, |
| "loss": 1.4867, |
| "step": 252400 |
| }, |
| { |
| "epoch": 81.60956690368455, |
| "grad_norm": 1.559194803237915, |
| "learning_rate": 0.001, |
| "loss": 1.4935, |
| "step": 252500 |
| }, |
| { |
| "epoch": 81.64188752424046, |
| "grad_norm": 1.5244473218917847, |
| "learning_rate": 0.001, |
| "loss": 1.4731, |
| "step": 252600 |
| }, |
| { |
| "epoch": 81.67420814479638, |
| "grad_norm": 1.5921167135238647, |
| "learning_rate": 0.001, |
| "loss": 1.4795, |
| "step": 252700 |
| }, |
| { |
| "epoch": 81.70652876535229, |
| "grad_norm": 1.672304630279541, |
| "learning_rate": 0.001, |
| "loss": 1.4872, |
| "step": 252800 |
| }, |
| { |
| "epoch": 81.73884938590821, |
| "grad_norm": 1.6150751113891602, |
| "learning_rate": 0.001, |
| "loss": 1.5039, |
| "step": 252900 |
| }, |
| { |
| "epoch": 81.77117000646412, |
| "grad_norm": 1.2743347883224487, |
| "learning_rate": 0.001, |
| "loss": 1.4856, |
| "step": 253000 |
| }, |
| { |
| "epoch": 81.80349062702004, |
| "grad_norm": 1.7275660037994385, |
| "learning_rate": 0.001, |
| "loss": 1.5095, |
| "step": 253100 |
| }, |
| { |
| "epoch": 81.83581124757595, |
| "grad_norm": 1.778511643409729, |
| "learning_rate": 0.001, |
| "loss": 1.5192, |
| "step": 253200 |
| }, |
| { |
| "epoch": 81.86813186813187, |
| "grad_norm": 1.483957052230835, |
| "learning_rate": 0.001, |
| "loss": 1.5149, |
| "step": 253300 |
| }, |
| { |
| "epoch": 81.90045248868778, |
| "grad_norm": 1.586846113204956, |
| "learning_rate": 0.001, |
| "loss": 1.5079, |
| "step": 253400 |
| }, |
| { |
| "epoch": 81.9327731092437, |
| "grad_norm": 2.0547091960906982, |
| "learning_rate": 0.001, |
| "loss": 1.5322, |
| "step": 253500 |
| }, |
| { |
| "epoch": 81.9650937297996, |
| "grad_norm": 1.5635367631912231, |
| "learning_rate": 0.001, |
| "loss": 1.5191, |
| "step": 253600 |
| }, |
| { |
| "epoch": 81.99741435035553, |
| "grad_norm": 1.4796775579452515, |
| "learning_rate": 0.001, |
| "loss": 1.5176, |
| "step": 253700 |
| }, |
| { |
| "epoch": 82.02973497091145, |
| "grad_norm": 1.479210615158081, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 253800 |
| }, |
| { |
| "epoch": 82.06205559146736, |
| "grad_norm": 1.9869581460952759, |
| "learning_rate": 0.001, |
| "loss": 1.404, |
| "step": 253900 |
| }, |
| { |
| "epoch": 82.09437621202328, |
| "grad_norm": 1.41694176197052, |
| "learning_rate": 0.001, |
| "loss": 1.4167, |
| "step": 254000 |
| }, |
| { |
| "epoch": 82.12669683257919, |
| "grad_norm": 2.2012135982513428, |
| "learning_rate": 0.001, |
| "loss": 1.4148, |
| "step": 254100 |
| }, |
| { |
| "epoch": 82.1590174531351, |
| "grad_norm": 1.582972526550293, |
| "learning_rate": 0.001, |
| "loss": 1.425, |
| "step": 254200 |
| }, |
| { |
| "epoch": 82.19133807369101, |
| "grad_norm": 1.4067134857177734, |
| "learning_rate": 0.001, |
| "loss": 1.4294, |
| "step": 254300 |
| }, |
| { |
| "epoch": 82.22365869424694, |
| "grad_norm": 1.7295600175857544, |
| "learning_rate": 0.001, |
| "loss": 1.4232, |
| "step": 254400 |
| }, |
| { |
| "epoch": 82.25597931480284, |
| "grad_norm": 1.199151635169983, |
| "learning_rate": 0.001, |
| "loss": 1.4323, |
| "step": 254500 |
| }, |
| { |
| "epoch": 82.28829993535876, |
| "grad_norm": 1.210278034210205, |
| "learning_rate": 0.001, |
| "loss": 1.4329, |
| "step": 254600 |
| }, |
| { |
| "epoch": 82.32062055591467, |
| "grad_norm": 1.3613992929458618, |
| "learning_rate": 0.001, |
| "loss": 1.4363, |
| "step": 254700 |
| }, |
| { |
| "epoch": 82.3529411764706, |
| "grad_norm": 2.0656394958496094, |
| "learning_rate": 0.001, |
| "loss": 1.4495, |
| "step": 254800 |
| }, |
| { |
| "epoch": 82.3852617970265, |
| "grad_norm": 1.796547293663025, |
| "learning_rate": 0.001, |
| "loss": 1.4605, |
| "step": 254900 |
| }, |
| { |
| "epoch": 82.41758241758242, |
| "grad_norm": 1.6070237159729004, |
| "learning_rate": 0.001, |
| "loss": 1.4455, |
| "step": 255000 |
| }, |
| { |
| "epoch": 82.44990303813833, |
| "grad_norm": 1.657780408859253, |
| "learning_rate": 0.001, |
| "loss": 1.4424, |
| "step": 255100 |
| }, |
| { |
| "epoch": 82.48222365869425, |
| "grad_norm": 1.9245177507400513, |
| "learning_rate": 0.001, |
| "loss": 1.4549, |
| "step": 255200 |
| }, |
| { |
| "epoch": 82.51454427925016, |
| "grad_norm": 2.279123544692993, |
| "learning_rate": 0.001, |
| "loss": 1.468, |
| "step": 255300 |
| }, |
| { |
| "epoch": 82.54686489980608, |
| "grad_norm": 1.8537899255752563, |
| "learning_rate": 0.001, |
| "loss": 1.4766, |
| "step": 255400 |
| }, |
| { |
| "epoch": 82.57918552036199, |
| "grad_norm": 1.2981986999511719, |
| "learning_rate": 0.001, |
| "loss": 1.4577, |
| "step": 255500 |
| }, |
| { |
| "epoch": 82.61150614091791, |
| "grad_norm": 1.694447636604309, |
| "learning_rate": 0.001, |
| "loss": 1.4843, |
| "step": 255600 |
| }, |
| { |
| "epoch": 82.64382676147382, |
| "grad_norm": 2.3249664306640625, |
| "learning_rate": 0.001, |
| "loss": 1.4704, |
| "step": 255700 |
| }, |
| { |
| "epoch": 82.67614738202974, |
| "grad_norm": 1.9651449918746948, |
| "learning_rate": 0.001, |
| "loss": 1.4804, |
| "step": 255800 |
| }, |
| { |
| "epoch": 82.70846800258565, |
| "grad_norm": 1.6320222616195679, |
| "learning_rate": 0.001, |
| "loss": 1.4993, |
| "step": 255900 |
| }, |
| { |
| "epoch": 82.74078862314157, |
| "grad_norm": 1.9455903768539429, |
| "learning_rate": 0.001, |
| "loss": 1.4862, |
| "step": 256000 |
| }, |
| { |
| "epoch": 82.77310924369748, |
| "grad_norm": 1.7584723234176636, |
| "learning_rate": 0.001, |
| "loss": 1.4873, |
| "step": 256100 |
| }, |
| { |
| "epoch": 82.8054298642534, |
| "grad_norm": 1.4015384912490845, |
| "learning_rate": 0.001, |
| "loss": 1.4884, |
| "step": 256200 |
| }, |
| { |
| "epoch": 82.8377504848093, |
| "grad_norm": 1.6554213762283325, |
| "learning_rate": 0.001, |
| "loss": 1.4922, |
| "step": 256300 |
| }, |
| { |
| "epoch": 82.87007110536523, |
| "grad_norm": 2.2378883361816406, |
| "learning_rate": 0.001, |
| "loss": 1.4975, |
| "step": 256400 |
| }, |
| { |
| "epoch": 82.90239172592113, |
| "grad_norm": 1.633915662765503, |
| "learning_rate": 0.001, |
| "loss": 1.5221, |
| "step": 256500 |
| }, |
| { |
| "epoch": 82.93471234647706, |
| "grad_norm": 1.896621823310852, |
| "learning_rate": 0.001, |
| "loss": 1.5058, |
| "step": 256600 |
| }, |
| { |
| "epoch": 82.96703296703296, |
| "grad_norm": 1.408779263496399, |
| "learning_rate": 0.001, |
| "loss": 1.5066, |
| "step": 256700 |
| }, |
| { |
| "epoch": 82.99935358758889, |
| "grad_norm": 1.6087642908096313, |
| "learning_rate": 0.001, |
| "loss": 1.4986, |
| "step": 256800 |
| }, |
| { |
| "epoch": 83.03167420814479, |
| "grad_norm": 1.8113195896148682, |
| "learning_rate": 0.001, |
| "loss": 1.3956, |
| "step": 256900 |
| }, |
| { |
| "epoch": 83.06399482870071, |
| "grad_norm": 1.6769685745239258, |
| "learning_rate": 0.001, |
| "loss": 1.3916, |
| "step": 257000 |
| }, |
| { |
| "epoch": 83.09631544925662, |
| "grad_norm": 1.9285463094711304, |
| "learning_rate": 0.001, |
| "loss": 1.4024, |
| "step": 257100 |
| }, |
| { |
| "epoch": 83.12863606981254, |
| "grad_norm": 1.453759789466858, |
| "learning_rate": 0.001, |
| "loss": 1.4032, |
| "step": 257200 |
| }, |
| { |
| "epoch": 83.16095669036845, |
| "grad_norm": 1.9637484550476074, |
| "learning_rate": 0.001, |
| "loss": 1.4126, |
| "step": 257300 |
| }, |
| { |
| "epoch": 83.19327731092437, |
| "grad_norm": 2.4130241870880127, |
| "learning_rate": 0.001, |
| "loss": 1.4154, |
| "step": 257400 |
| }, |
| { |
| "epoch": 83.22559793148028, |
| "grad_norm": 1.6804895401000977, |
| "learning_rate": 0.001, |
| "loss": 1.4111, |
| "step": 257500 |
| }, |
| { |
| "epoch": 83.2579185520362, |
| "grad_norm": 1.816179871559143, |
| "learning_rate": 0.001, |
| "loss": 1.4297, |
| "step": 257600 |
| }, |
| { |
| "epoch": 83.29023917259211, |
| "grad_norm": 2.7931365966796875, |
| "learning_rate": 0.001, |
| "loss": 1.4333, |
| "step": 257700 |
| }, |
| { |
| "epoch": 83.32255979314803, |
| "grad_norm": 2.2509469985961914, |
| "learning_rate": 0.001, |
| "loss": 1.4247, |
| "step": 257800 |
| }, |
| { |
| "epoch": 83.35488041370394, |
| "grad_norm": 1.733638882637024, |
| "learning_rate": 0.001, |
| "loss": 1.4293, |
| "step": 257900 |
| }, |
| { |
| "epoch": 83.38720103425986, |
| "grad_norm": 2.0615646839141846, |
| "learning_rate": 0.001, |
| "loss": 1.4304, |
| "step": 258000 |
| }, |
| { |
| "epoch": 83.41952165481577, |
| "grad_norm": 1.8422954082489014, |
| "learning_rate": 0.001, |
| "loss": 1.4391, |
| "step": 258100 |
| }, |
| { |
| "epoch": 83.45184227537169, |
| "grad_norm": 2.084277391433716, |
| "learning_rate": 0.001, |
| "loss": 1.455, |
| "step": 258200 |
| }, |
| { |
| "epoch": 83.4841628959276, |
| "grad_norm": 1.990598440170288, |
| "learning_rate": 0.001, |
| "loss": 1.4575, |
| "step": 258300 |
| }, |
| { |
| "epoch": 83.51648351648352, |
| "grad_norm": 1.9928582906723022, |
| "learning_rate": 0.001, |
| "loss": 1.4622, |
| "step": 258400 |
| }, |
| { |
| "epoch": 83.54880413703943, |
| "grad_norm": 1.6820632219314575, |
| "learning_rate": 0.001, |
| "loss": 1.4477, |
| "step": 258500 |
| }, |
| { |
| "epoch": 83.58112475759535, |
| "grad_norm": 1.4622422456741333, |
| "learning_rate": 0.001, |
| "loss": 1.4602, |
| "step": 258600 |
| }, |
| { |
| "epoch": 83.61344537815125, |
| "grad_norm": 1.9164870977401733, |
| "learning_rate": 0.001, |
| "loss": 1.4846, |
| "step": 258700 |
| }, |
| { |
| "epoch": 83.64576599870718, |
| "grad_norm": 2.162792921066284, |
| "learning_rate": 0.001, |
| "loss": 1.48, |
| "step": 258800 |
| }, |
| { |
| "epoch": 83.67808661926308, |
| "grad_norm": 2.626990795135498, |
| "learning_rate": 0.001, |
| "loss": 1.4729, |
| "step": 258900 |
| }, |
| { |
| "epoch": 83.710407239819, |
| "grad_norm": 1.8493727445602417, |
| "learning_rate": 0.001, |
| "loss": 1.4754, |
| "step": 259000 |
| }, |
| { |
| "epoch": 83.74272786037491, |
| "grad_norm": 2.3368582725524902, |
| "learning_rate": 0.001, |
| "loss": 1.4843, |
| "step": 259100 |
| }, |
| { |
| "epoch": 83.77504848093083, |
| "grad_norm": 2.14288067817688, |
| "learning_rate": 0.001, |
| "loss": 1.4815, |
| "step": 259200 |
| }, |
| { |
| "epoch": 83.80736910148674, |
| "grad_norm": 2.0774693489074707, |
| "learning_rate": 0.001, |
| "loss": 1.4933, |
| "step": 259300 |
| }, |
| { |
| "epoch": 83.83968972204266, |
| "grad_norm": 2.4030613899230957, |
| "learning_rate": 0.001, |
| "loss": 1.4974, |
| "step": 259400 |
| }, |
| { |
| "epoch": 83.87201034259857, |
| "grad_norm": 1.7850096225738525, |
| "learning_rate": 0.001, |
| "loss": 1.4967, |
| "step": 259500 |
| }, |
| { |
| "epoch": 83.9043309631545, |
| "grad_norm": 1.7380095720291138, |
| "learning_rate": 0.001, |
| "loss": 1.4988, |
| "step": 259600 |
| }, |
| { |
| "epoch": 83.9366515837104, |
| "grad_norm": 1.620004653930664, |
| "learning_rate": 0.001, |
| "loss": 1.5072, |
| "step": 259700 |
| }, |
| { |
| "epoch": 83.96897220426632, |
| "grad_norm": 2.2414209842681885, |
| "learning_rate": 0.001, |
| "loss": 1.5217, |
| "step": 259800 |
| }, |
| { |
| "epoch": 84.00129282482224, |
| "grad_norm": 1.7334810495376587, |
| "learning_rate": 0.001, |
| "loss": 1.4948, |
| "step": 259900 |
| }, |
| { |
| "epoch": 84.03361344537815, |
| "grad_norm": 1.9263193607330322, |
| "learning_rate": 0.001, |
| "loss": 1.3888, |
| "step": 260000 |
| }, |
| { |
| "epoch": 84.06593406593407, |
| "grad_norm": 1.596864104270935, |
| "learning_rate": 0.001, |
| "loss": 1.3919, |
| "step": 260100 |
| }, |
| { |
| "epoch": 84.09825468648998, |
| "grad_norm": 1.529685378074646, |
| "learning_rate": 0.001, |
| "loss": 1.3771, |
| "step": 260200 |
| }, |
| { |
| "epoch": 84.1305753070459, |
| "grad_norm": 1.3261326551437378, |
| "learning_rate": 0.001, |
| "loss": 1.4221, |
| "step": 260300 |
| }, |
| { |
| "epoch": 84.16289592760181, |
| "grad_norm": 1.412488579750061, |
| "learning_rate": 0.001, |
| "loss": 1.4084, |
| "step": 260400 |
| }, |
| { |
| "epoch": 84.19521654815773, |
| "grad_norm": 1.5648341178894043, |
| "learning_rate": 0.001, |
| "loss": 1.4197, |
| "step": 260500 |
| }, |
| { |
| "epoch": 84.22753716871364, |
| "grad_norm": 1.4410159587860107, |
| "learning_rate": 0.001, |
| "loss": 1.4181, |
| "step": 260600 |
| }, |
| { |
| "epoch": 84.25985778926956, |
| "grad_norm": 2.8147940635681152, |
| "learning_rate": 0.001, |
| "loss": 1.4123, |
| "step": 260700 |
| }, |
| { |
| "epoch": 84.29217840982547, |
| "grad_norm": 1.6910955905914307, |
| "learning_rate": 0.001, |
| "loss": 1.4099, |
| "step": 260800 |
| }, |
| { |
| "epoch": 84.32449903038139, |
| "grad_norm": 1.6648961305618286, |
| "learning_rate": 0.001, |
| "loss": 1.4261, |
| "step": 260900 |
| }, |
| { |
| "epoch": 84.3568196509373, |
| "grad_norm": 3.2388651371002197, |
| "learning_rate": 0.001, |
| "loss": 1.449, |
| "step": 261000 |
| }, |
| { |
| "epoch": 84.38914027149322, |
| "grad_norm": 1.9411053657531738, |
| "learning_rate": 0.001, |
| "loss": 1.4306, |
| "step": 261100 |
| }, |
| { |
| "epoch": 84.42146089204913, |
| "grad_norm": 1.7887365818023682, |
| "learning_rate": 0.001, |
| "loss": 1.4554, |
| "step": 261200 |
| }, |
| { |
| "epoch": 84.45378151260505, |
| "grad_norm": 2.1713664531707764, |
| "learning_rate": 0.001, |
| "loss": 1.4414, |
| "step": 261300 |
| }, |
| { |
| "epoch": 84.48610213316095, |
| "grad_norm": 1.6247268915176392, |
| "learning_rate": 0.001, |
| "loss": 1.4438, |
| "step": 261400 |
| }, |
| { |
| "epoch": 84.51842275371688, |
| "grad_norm": 1.6007441282272339, |
| "learning_rate": 0.001, |
| "loss": 1.4546, |
| "step": 261500 |
| }, |
| { |
| "epoch": 84.55074337427278, |
| "grad_norm": 1.7280800342559814, |
| "learning_rate": 0.001, |
| "loss": 1.4641, |
| "step": 261600 |
| }, |
| { |
| "epoch": 84.5830639948287, |
| "grad_norm": 1.4836840629577637, |
| "learning_rate": 0.001, |
| "loss": 1.4621, |
| "step": 261700 |
| }, |
| { |
| "epoch": 84.61538461538461, |
| "grad_norm": 1.3228708505630493, |
| "learning_rate": 0.001, |
| "loss": 1.4535, |
| "step": 261800 |
| }, |
| { |
| "epoch": 84.64770523594053, |
| "grad_norm": 1.2039247751235962, |
| "learning_rate": 0.001, |
| "loss": 1.4504, |
| "step": 261900 |
| }, |
| { |
| "epoch": 84.68002585649644, |
| "grad_norm": 1.3510963916778564, |
| "learning_rate": 0.001, |
| "loss": 1.4611, |
| "step": 262000 |
| }, |
| { |
| "epoch": 84.71234647705236, |
| "grad_norm": 1.8786565065383911, |
| "learning_rate": 0.001, |
| "loss": 1.4809, |
| "step": 262100 |
| }, |
| { |
| "epoch": 84.74466709760827, |
| "grad_norm": 2.283278226852417, |
| "learning_rate": 0.001, |
| "loss": 1.4757, |
| "step": 262200 |
| }, |
| { |
| "epoch": 84.7769877181642, |
| "grad_norm": 1.2619625329971313, |
| "learning_rate": 0.001, |
| "loss": 1.4844, |
| "step": 262300 |
| }, |
| { |
| "epoch": 84.8093083387201, |
| "grad_norm": 1.4168131351470947, |
| "learning_rate": 0.001, |
| "loss": 1.4884, |
| "step": 262400 |
| }, |
| { |
| "epoch": 84.84162895927602, |
| "grad_norm": 1.4280415773391724, |
| "learning_rate": 0.001, |
| "loss": 1.4846, |
| "step": 262500 |
| }, |
| { |
| "epoch": 84.87394957983193, |
| "grad_norm": 2.1055214405059814, |
| "learning_rate": 0.001, |
| "loss": 1.4845, |
| "step": 262600 |
| }, |
| { |
| "epoch": 84.90627020038785, |
| "grad_norm": 2.787269353866577, |
| "learning_rate": 0.001, |
| "loss": 1.4872, |
| "step": 262700 |
| }, |
| { |
| "epoch": 84.93859082094376, |
| "grad_norm": 1.399965524673462, |
| "learning_rate": 0.001, |
| "loss": 1.5049, |
| "step": 262800 |
| }, |
| { |
| "epoch": 84.97091144149968, |
| "grad_norm": 1.5080381631851196, |
| "learning_rate": 0.001, |
| "loss": 1.5069, |
| "step": 262900 |
| }, |
| { |
| "epoch": 85.00323206205559, |
| "grad_norm": 2.1978836059570312, |
| "learning_rate": 0.001, |
| "loss": 1.4949, |
| "step": 263000 |
| }, |
| { |
| "epoch": 85.03555268261151, |
| "grad_norm": 1.3249891996383667, |
| "learning_rate": 0.001, |
| "loss": 1.37, |
| "step": 263100 |
| }, |
| { |
| "epoch": 85.06787330316742, |
| "grad_norm": 1.6576632261276245, |
| "learning_rate": 0.001, |
| "loss": 1.3804, |
| "step": 263200 |
| }, |
| { |
| "epoch": 85.10019392372334, |
| "grad_norm": 1.4127508401870728, |
| "learning_rate": 0.001, |
| "loss": 1.3937, |
| "step": 263300 |
| }, |
| { |
| "epoch": 85.13251454427925, |
| "grad_norm": 1.4045134782791138, |
| "learning_rate": 0.001, |
| "loss": 1.3971, |
| "step": 263400 |
| }, |
| { |
| "epoch": 85.16483516483517, |
| "grad_norm": 1.5815248489379883, |
| "learning_rate": 0.001, |
| "loss": 1.4044, |
| "step": 263500 |
| }, |
| { |
| "epoch": 85.19715578539108, |
| "grad_norm": 1.9947961568832397, |
| "learning_rate": 0.001, |
| "loss": 1.3958, |
| "step": 263600 |
| }, |
| { |
| "epoch": 85.229476405947, |
| "grad_norm": 1.3920848369598389, |
| "learning_rate": 0.001, |
| "loss": 1.401, |
| "step": 263700 |
| }, |
| { |
| "epoch": 85.2617970265029, |
| "grad_norm": 1.7426060438156128, |
| "learning_rate": 0.001, |
| "loss": 1.4183, |
| "step": 263800 |
| }, |
| { |
| "epoch": 85.29411764705883, |
| "grad_norm": 1.9438815116882324, |
| "learning_rate": 0.001, |
| "loss": 1.4087, |
| "step": 263900 |
| }, |
| { |
| "epoch": 85.32643826761473, |
| "grad_norm": 3.285012722015381, |
| "learning_rate": 0.001, |
| "loss": 1.4252, |
| "step": 264000 |
| }, |
| { |
| "epoch": 85.35875888817066, |
| "grad_norm": 1.551334261894226, |
| "learning_rate": 0.001, |
| "loss": 1.4356, |
| "step": 264100 |
| }, |
| { |
| "epoch": 85.39107950872656, |
| "grad_norm": 1.8503215312957764, |
| "learning_rate": 0.001, |
| "loss": 1.4199, |
| "step": 264200 |
| }, |
| { |
| "epoch": 85.42340012928248, |
| "grad_norm": 1.434235692024231, |
| "learning_rate": 0.001, |
| "loss": 1.4396, |
| "step": 264300 |
| }, |
| { |
| "epoch": 85.45572074983839, |
| "grad_norm": 1.1429784297943115, |
| "learning_rate": 0.001, |
| "loss": 1.4341, |
| "step": 264400 |
| }, |
| { |
| "epoch": 85.48804137039431, |
| "grad_norm": 1.928701639175415, |
| "learning_rate": 0.001, |
| "loss": 1.453, |
| "step": 264500 |
| }, |
| { |
| "epoch": 85.52036199095022, |
| "grad_norm": 1.24684476852417, |
| "learning_rate": 0.001, |
| "loss": 1.4297, |
| "step": 264600 |
| }, |
| { |
| "epoch": 85.55268261150614, |
| "grad_norm": 1.9832147359848022, |
| "learning_rate": 0.001, |
| "loss": 1.4598, |
| "step": 264700 |
| }, |
| { |
| "epoch": 85.58500323206205, |
| "grad_norm": 1.3334790468215942, |
| "learning_rate": 0.001, |
| "loss": 1.4518, |
| "step": 264800 |
| }, |
| { |
| "epoch": 85.61732385261797, |
| "grad_norm": 1.4837751388549805, |
| "learning_rate": 0.001, |
| "loss": 1.4636, |
| "step": 264900 |
| }, |
| { |
| "epoch": 85.64964447317388, |
| "grad_norm": 1.547782063484192, |
| "learning_rate": 0.001, |
| "loss": 1.4695, |
| "step": 265000 |
| }, |
| { |
| "epoch": 85.6819650937298, |
| "grad_norm": 1.674836277961731, |
| "learning_rate": 0.001, |
| "loss": 1.4553, |
| "step": 265100 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 2.6639299392700195, |
| "learning_rate": 0.001, |
| "loss": 1.4768, |
| "step": 265200 |
| }, |
| { |
| "epoch": 85.74660633484163, |
| "grad_norm": 1.4166233539581299, |
| "learning_rate": 0.001, |
| "loss": 1.4565, |
| "step": 265300 |
| }, |
| { |
| "epoch": 85.77892695539754, |
| "grad_norm": 1.1681371927261353, |
| "learning_rate": 0.001, |
| "loss": 1.46, |
| "step": 265400 |
| }, |
| { |
| "epoch": 85.81124757595346, |
| "grad_norm": 2.1296963691711426, |
| "learning_rate": 0.001, |
| "loss": 1.4889, |
| "step": 265500 |
| }, |
| { |
| "epoch": 85.84356819650937, |
| "grad_norm": 1.5797781944274902, |
| "learning_rate": 0.001, |
| "loss": 1.4638, |
| "step": 265600 |
| }, |
| { |
| "epoch": 85.87588881706529, |
| "grad_norm": 1.7881137132644653, |
| "learning_rate": 0.001, |
| "loss": 1.4752, |
| "step": 265700 |
| }, |
| { |
| "epoch": 85.9082094376212, |
| "grad_norm": 1.2332497835159302, |
| "learning_rate": 0.001, |
| "loss": 1.4873, |
| "step": 265800 |
| }, |
| { |
| "epoch": 85.94053005817712, |
| "grad_norm": 1.7854421138763428, |
| "learning_rate": 0.001, |
| "loss": 1.4971, |
| "step": 265900 |
| }, |
| { |
| "epoch": 85.97285067873302, |
| "grad_norm": 1.4832534790039062, |
| "learning_rate": 0.001, |
| "loss": 1.4837, |
| "step": 266000 |
| }, |
| { |
| "epoch": 86.00517129928895, |
| "grad_norm": 1.3114742040634155, |
| "learning_rate": 0.001, |
| "loss": 1.4811, |
| "step": 266100 |
| }, |
| { |
| "epoch": 86.03749191984487, |
| "grad_norm": 1.5295010805130005, |
| "learning_rate": 0.001, |
| "loss": 1.3754, |
| "step": 266200 |
| }, |
| { |
| "epoch": 86.06981254040078, |
| "grad_norm": 1.905587077140808, |
| "learning_rate": 0.001, |
| "loss": 1.3851, |
| "step": 266300 |
| }, |
| { |
| "epoch": 86.1021331609567, |
| "grad_norm": 1.2385417222976685, |
| "learning_rate": 0.001, |
| "loss": 1.3739, |
| "step": 266400 |
| }, |
| { |
| "epoch": 86.1344537815126, |
| "grad_norm": 2.0491955280303955, |
| "learning_rate": 0.001, |
| "loss": 1.3927, |
| "step": 266500 |
| }, |
| { |
| "epoch": 86.16677440206853, |
| "grad_norm": 1.4316233396530151, |
| "learning_rate": 0.001, |
| "loss": 1.3902, |
| "step": 266600 |
| }, |
| { |
| "epoch": 86.19909502262443, |
| "grad_norm": 2.0543875694274902, |
| "learning_rate": 0.001, |
| "loss": 1.3802, |
| "step": 266700 |
| }, |
| { |
| "epoch": 86.23141564318036, |
| "grad_norm": 1.4741928577423096, |
| "learning_rate": 0.001, |
| "loss": 1.4082, |
| "step": 266800 |
| }, |
| { |
| "epoch": 86.26373626373626, |
| "grad_norm": 1.246631383895874, |
| "learning_rate": 0.001, |
| "loss": 1.4175, |
| "step": 266900 |
| }, |
| { |
| "epoch": 86.29605688429218, |
| "grad_norm": 1.2465566396713257, |
| "learning_rate": 0.001, |
| "loss": 1.4193, |
| "step": 267000 |
| }, |
| { |
| "epoch": 86.32837750484809, |
| "grad_norm": 2.4174885749816895, |
| "learning_rate": 0.001, |
| "loss": 1.4122, |
| "step": 267100 |
| }, |
| { |
| "epoch": 86.36069812540401, |
| "grad_norm": 2.284865140914917, |
| "learning_rate": 0.001, |
| "loss": 1.4077, |
| "step": 267200 |
| }, |
| { |
| "epoch": 86.39301874595992, |
| "grad_norm": 1.5470408201217651, |
| "learning_rate": 0.001, |
| "loss": 1.4288, |
| "step": 267300 |
| }, |
| { |
| "epoch": 86.42533936651584, |
| "grad_norm": 1.2510464191436768, |
| "learning_rate": 0.001, |
| "loss": 1.4268, |
| "step": 267400 |
| }, |
| { |
| "epoch": 86.45765998707175, |
| "grad_norm": 1.6958582401275635, |
| "learning_rate": 0.001, |
| "loss": 1.4252, |
| "step": 267500 |
| }, |
| { |
| "epoch": 86.48998060762767, |
| "grad_norm": 1.337631106376648, |
| "learning_rate": 0.001, |
| "loss": 1.4409, |
| "step": 267600 |
| }, |
| { |
| "epoch": 86.52230122818358, |
| "grad_norm": 2.136993646621704, |
| "learning_rate": 0.001, |
| "loss": 1.4443, |
| "step": 267700 |
| }, |
| { |
| "epoch": 86.5546218487395, |
| "grad_norm": 1.556563377380371, |
| "learning_rate": 0.001, |
| "loss": 1.4458, |
| "step": 267800 |
| }, |
| { |
| "epoch": 86.58694246929541, |
| "grad_norm": 1.3264302015304565, |
| "learning_rate": 0.001, |
| "loss": 1.4506, |
| "step": 267900 |
| }, |
| { |
| "epoch": 86.61926308985133, |
| "grad_norm": 1.9455862045288086, |
| "learning_rate": 0.001, |
| "loss": 1.4437, |
| "step": 268000 |
| }, |
| { |
| "epoch": 86.65158371040724, |
| "grad_norm": 1.4218875169754028, |
| "learning_rate": 0.001, |
| "loss": 1.4613, |
| "step": 268100 |
| }, |
| { |
| "epoch": 86.68390433096316, |
| "grad_norm": 1.6667139530181885, |
| "learning_rate": 0.001, |
| "loss": 1.4553, |
| "step": 268200 |
| }, |
| { |
| "epoch": 86.71622495151907, |
| "grad_norm": 1.3904166221618652, |
| "learning_rate": 0.001, |
| "loss": 1.4655, |
| "step": 268300 |
| }, |
| { |
| "epoch": 86.74854557207499, |
| "grad_norm": 1.9042881727218628, |
| "learning_rate": 0.001, |
| "loss": 1.4657, |
| "step": 268400 |
| }, |
| { |
| "epoch": 86.7808661926309, |
| "grad_norm": 1.1454037427902222, |
| "learning_rate": 0.001, |
| "loss": 1.4521, |
| "step": 268500 |
| }, |
| { |
| "epoch": 86.81318681318682, |
| "grad_norm": 1.6706260442733765, |
| "learning_rate": 0.001, |
| "loss": 1.4817, |
| "step": 268600 |
| }, |
| { |
| "epoch": 86.84550743374272, |
| "grad_norm": 1.468371033668518, |
| "learning_rate": 0.001, |
| "loss": 1.4599, |
| "step": 268700 |
| }, |
| { |
| "epoch": 86.87782805429865, |
| "grad_norm": 1.1414262056350708, |
| "learning_rate": 0.001, |
| "loss": 1.4702, |
| "step": 268800 |
| }, |
| { |
| "epoch": 86.91014867485455, |
| "grad_norm": 1.4835313558578491, |
| "learning_rate": 0.001, |
| "loss": 1.4802, |
| "step": 268900 |
| }, |
| { |
| "epoch": 86.94246929541048, |
| "grad_norm": 1.785538673400879, |
| "learning_rate": 0.001, |
| "loss": 1.4688, |
| "step": 269000 |
| }, |
| { |
| "epoch": 86.97478991596638, |
| "grad_norm": 1.1166988611221313, |
| "learning_rate": 0.001, |
| "loss": 1.4886, |
| "step": 269100 |
| }, |
| { |
| "epoch": 87.0071105365223, |
| "grad_norm": 1.2215816974639893, |
| "learning_rate": 0.001, |
| "loss": 1.4617, |
| "step": 269200 |
| }, |
| { |
| "epoch": 87.03943115707821, |
| "grad_norm": 1.267557978630066, |
| "learning_rate": 0.001, |
| "loss": 1.3595, |
| "step": 269300 |
| }, |
| { |
| "epoch": 87.07175177763413, |
| "grad_norm": 1.3655972480773926, |
| "learning_rate": 0.001, |
| "loss": 1.3799, |
| "step": 269400 |
| }, |
| { |
| "epoch": 87.10407239819004, |
| "grad_norm": 1.4063516855239868, |
| "learning_rate": 0.001, |
| "loss": 1.3841, |
| "step": 269500 |
| }, |
| { |
| "epoch": 87.13639301874596, |
| "grad_norm": 1.688720941543579, |
| "learning_rate": 0.001, |
| "loss": 1.3949, |
| "step": 269600 |
| }, |
| { |
| "epoch": 87.16871363930187, |
| "grad_norm": 2.142454147338867, |
| "learning_rate": 0.001, |
| "loss": 1.372, |
| "step": 269700 |
| }, |
| { |
| "epoch": 87.20103425985779, |
| "grad_norm": 1.5391592979431152, |
| "learning_rate": 0.001, |
| "loss": 1.3894, |
| "step": 269800 |
| }, |
| { |
| "epoch": 87.2333548804137, |
| "grad_norm": 1.9259381294250488, |
| "learning_rate": 0.001, |
| "loss": 1.393, |
| "step": 269900 |
| }, |
| { |
| "epoch": 87.26567550096962, |
| "grad_norm": 1.3825037479400635, |
| "learning_rate": 0.001, |
| "loss": 1.3998, |
| "step": 270000 |
| }, |
| { |
| "epoch": 87.29799612152553, |
| "grad_norm": 1.6389166116714478, |
| "learning_rate": 0.001, |
| "loss": 1.4141, |
| "step": 270100 |
| }, |
| { |
| "epoch": 87.33031674208145, |
| "grad_norm": 1.6546417474746704, |
| "learning_rate": 0.001, |
| "loss": 1.4115, |
| "step": 270200 |
| }, |
| { |
| "epoch": 87.36263736263736, |
| "grad_norm": 1.4968101978302002, |
| "learning_rate": 0.001, |
| "loss": 1.4083, |
| "step": 270300 |
| }, |
| { |
| "epoch": 87.39495798319328, |
| "grad_norm": 1.302238941192627, |
| "learning_rate": 0.001, |
| "loss": 1.4143, |
| "step": 270400 |
| }, |
| { |
| "epoch": 87.42727860374919, |
| "grad_norm": 1.899917721748352, |
| "learning_rate": 0.001, |
| "loss": 1.4216, |
| "step": 270500 |
| }, |
| { |
| "epoch": 87.45959922430511, |
| "grad_norm": 1.4192540645599365, |
| "learning_rate": 0.001, |
| "loss": 1.4287, |
| "step": 270600 |
| }, |
| { |
| "epoch": 87.49191984486102, |
| "grad_norm": 1.4450047016143799, |
| "learning_rate": 0.001, |
| "loss": 1.4465, |
| "step": 270700 |
| }, |
| { |
| "epoch": 87.52424046541694, |
| "grad_norm": 1.1823523044586182, |
| "learning_rate": 0.001, |
| "loss": 1.4158, |
| "step": 270800 |
| }, |
| { |
| "epoch": 87.55656108597285, |
| "grad_norm": 1.489056944847107, |
| "learning_rate": 0.001, |
| "loss": 1.4336, |
| "step": 270900 |
| }, |
| { |
| "epoch": 87.58888170652877, |
| "grad_norm": 1.1466692686080933, |
| "learning_rate": 0.001, |
| "loss": 1.4453, |
| "step": 271000 |
| }, |
| { |
| "epoch": 87.62120232708467, |
| "grad_norm": 1.4787135124206543, |
| "learning_rate": 0.001, |
| "loss": 1.447, |
| "step": 271100 |
| }, |
| { |
| "epoch": 87.6535229476406, |
| "grad_norm": 1.5493061542510986, |
| "learning_rate": 0.001, |
| "loss": 1.4309, |
| "step": 271200 |
| }, |
| { |
| "epoch": 87.6858435681965, |
| "grad_norm": 1.8036699295043945, |
| "learning_rate": 0.001, |
| "loss": 1.449, |
| "step": 271300 |
| }, |
| { |
| "epoch": 87.71816418875243, |
| "grad_norm": 1.628659725189209, |
| "learning_rate": 0.001, |
| "loss": 1.4558, |
| "step": 271400 |
| }, |
| { |
| "epoch": 87.75048480930833, |
| "grad_norm": 2.0171215534210205, |
| "learning_rate": 0.001, |
| "loss": 1.4451, |
| "step": 271500 |
| }, |
| { |
| "epoch": 87.78280542986425, |
| "grad_norm": 1.3943383693695068, |
| "learning_rate": 0.001, |
| "loss": 1.4518, |
| "step": 271600 |
| }, |
| { |
| "epoch": 87.81512605042016, |
| "grad_norm": 1.1058683395385742, |
| "learning_rate": 0.001, |
| "loss": 1.4634, |
| "step": 271700 |
| }, |
| { |
| "epoch": 87.84744667097608, |
| "grad_norm": 1.244297981262207, |
| "learning_rate": 0.001, |
| "loss": 1.4672, |
| "step": 271800 |
| }, |
| { |
| "epoch": 87.87976729153199, |
| "grad_norm": 1.8520456552505493, |
| "learning_rate": 0.001, |
| "loss": 1.4765, |
| "step": 271900 |
| }, |
| { |
| "epoch": 87.91208791208791, |
| "grad_norm": 1.0921056270599365, |
| "learning_rate": 0.001, |
| "loss": 1.4854, |
| "step": 272000 |
| }, |
| { |
| "epoch": 87.94440853264382, |
| "grad_norm": 1.8864128589630127, |
| "learning_rate": 0.001, |
| "loss": 1.4814, |
| "step": 272100 |
| }, |
| { |
| "epoch": 87.97672915319974, |
| "grad_norm": 1.423569679260254, |
| "learning_rate": 0.001, |
| "loss": 1.4885, |
| "step": 272200 |
| }, |
| { |
| "epoch": 88.00904977375566, |
| "grad_norm": 1.6555652618408203, |
| "learning_rate": 0.001, |
| "loss": 1.4544, |
| "step": 272300 |
| }, |
| { |
| "epoch": 88.04137039431157, |
| "grad_norm": 1.9587502479553223, |
| "learning_rate": 0.001, |
| "loss": 1.3591, |
| "step": 272400 |
| }, |
| { |
| "epoch": 88.07369101486749, |
| "grad_norm": 1.5171904563903809, |
| "learning_rate": 0.001, |
| "loss": 1.3578, |
| "step": 272500 |
| }, |
| { |
| "epoch": 88.1060116354234, |
| "grad_norm": 1.5223625898361206, |
| "learning_rate": 0.001, |
| "loss": 1.3711, |
| "step": 272600 |
| }, |
| { |
| "epoch": 88.13833225597932, |
| "grad_norm": 1.2753205299377441, |
| "learning_rate": 0.001, |
| "loss": 1.3702, |
| "step": 272700 |
| }, |
| { |
| "epoch": 88.17065287653523, |
| "grad_norm": 1.760709285736084, |
| "learning_rate": 0.001, |
| "loss": 1.3807, |
| "step": 272800 |
| }, |
| { |
| "epoch": 88.20297349709115, |
| "grad_norm": 1.6494214534759521, |
| "learning_rate": 0.001, |
| "loss": 1.3763, |
| "step": 272900 |
| }, |
| { |
| "epoch": 88.23529411764706, |
| "grad_norm": 1.3722658157348633, |
| "learning_rate": 0.001, |
| "loss": 1.3851, |
| "step": 273000 |
| }, |
| { |
| "epoch": 88.26761473820298, |
| "grad_norm": 1.349332571029663, |
| "learning_rate": 0.001, |
| "loss": 1.4007, |
| "step": 273100 |
| }, |
| { |
| "epoch": 88.29993535875889, |
| "grad_norm": 1.384621262550354, |
| "learning_rate": 0.001, |
| "loss": 1.4118, |
| "step": 273200 |
| }, |
| { |
| "epoch": 88.33225597931481, |
| "grad_norm": 1.5978822708129883, |
| "learning_rate": 0.001, |
| "loss": 1.4077, |
| "step": 273300 |
| }, |
| { |
| "epoch": 88.36457659987072, |
| "grad_norm": 1.614707350730896, |
| "learning_rate": 0.001, |
| "loss": 1.4044, |
| "step": 273400 |
| }, |
| { |
| "epoch": 88.39689722042664, |
| "grad_norm": 1.2889578342437744, |
| "learning_rate": 0.001, |
| "loss": 1.4159, |
| "step": 273500 |
| }, |
| { |
| "epoch": 88.42921784098255, |
| "grad_norm": 1.2051821947097778, |
| "learning_rate": 0.001, |
| "loss": 1.4153, |
| "step": 273600 |
| }, |
| { |
| "epoch": 88.46153846153847, |
| "grad_norm": 1.505126953125, |
| "learning_rate": 0.001, |
| "loss": 1.4146, |
| "step": 273700 |
| }, |
| { |
| "epoch": 88.49385908209437, |
| "grad_norm": 1.3672128915786743, |
| "learning_rate": 0.001, |
| "loss": 1.4233, |
| "step": 273800 |
| }, |
| { |
| "epoch": 88.5261797026503, |
| "grad_norm": 1.5844954252243042, |
| "learning_rate": 0.001, |
| "loss": 1.4354, |
| "step": 273900 |
| }, |
| { |
| "epoch": 88.5585003232062, |
| "grad_norm": 1.1671549081802368, |
| "learning_rate": 0.001, |
| "loss": 1.4261, |
| "step": 274000 |
| }, |
| { |
| "epoch": 88.59082094376213, |
| "grad_norm": 1.7736015319824219, |
| "learning_rate": 0.001, |
| "loss": 1.4343, |
| "step": 274100 |
| }, |
| { |
| "epoch": 88.62314156431803, |
| "grad_norm": 1.6177502870559692, |
| "learning_rate": 0.001, |
| "loss": 1.4385, |
| "step": 274200 |
| }, |
| { |
| "epoch": 88.65546218487395, |
| "grad_norm": 1.4046214818954468, |
| "learning_rate": 0.001, |
| "loss": 1.4426, |
| "step": 274300 |
| }, |
| { |
| "epoch": 88.68778280542986, |
| "grad_norm": 1.4468072652816772, |
| "learning_rate": 0.001, |
| "loss": 1.4563, |
| "step": 274400 |
| }, |
| { |
| "epoch": 88.72010342598578, |
| "grad_norm": 1.2877278327941895, |
| "learning_rate": 0.001, |
| "loss": 1.4573, |
| "step": 274500 |
| }, |
| { |
| "epoch": 88.75242404654169, |
| "grad_norm": 1.874245285987854, |
| "learning_rate": 0.001, |
| "loss": 1.437, |
| "step": 274600 |
| }, |
| { |
| "epoch": 88.78474466709761, |
| "grad_norm": 1.740172266960144, |
| "learning_rate": 0.001, |
| "loss": 1.4548, |
| "step": 274700 |
| }, |
| { |
| "epoch": 88.81706528765352, |
| "grad_norm": 2.0147006511688232, |
| "learning_rate": 0.001, |
| "loss": 1.4542, |
| "step": 274800 |
| }, |
| { |
| "epoch": 88.84938590820944, |
| "grad_norm": 2.030195713043213, |
| "learning_rate": 0.001, |
| "loss": 1.4624, |
| "step": 274900 |
| }, |
| { |
| "epoch": 88.88170652876535, |
| "grad_norm": 1.3581018447875977, |
| "learning_rate": 0.001, |
| "loss": 1.4679, |
| "step": 275000 |
| }, |
| { |
| "epoch": 88.91402714932127, |
| "grad_norm": 1.2543132305145264, |
| "learning_rate": 0.001, |
| "loss": 1.4649, |
| "step": 275100 |
| }, |
| { |
| "epoch": 88.94634776987718, |
| "grad_norm": 1.4189682006835938, |
| "learning_rate": 0.001, |
| "loss": 1.4535, |
| "step": 275200 |
| }, |
| { |
| "epoch": 88.9786683904331, |
| "grad_norm": 2.033797264099121, |
| "learning_rate": 0.001, |
| "loss": 1.4647, |
| "step": 275300 |
| }, |
| { |
| "epoch": 89.01098901098901, |
| "grad_norm": 1.2707444429397583, |
| "learning_rate": 0.001, |
| "loss": 1.4318, |
| "step": 275400 |
| }, |
| { |
| "epoch": 89.04330963154493, |
| "grad_norm": 1.6407828330993652, |
| "learning_rate": 0.001, |
| "loss": 1.3663, |
| "step": 275500 |
| }, |
| { |
| "epoch": 89.07563025210084, |
| "grad_norm": 1.7446123361587524, |
| "learning_rate": 0.001, |
| "loss": 1.3561, |
| "step": 275600 |
| }, |
| { |
| "epoch": 89.10795087265676, |
| "grad_norm": 1.7070893049240112, |
| "learning_rate": 0.001, |
| "loss": 1.3785, |
| "step": 275700 |
| }, |
| { |
| "epoch": 89.14027149321267, |
| "grad_norm": 1.4096546173095703, |
| "learning_rate": 0.001, |
| "loss": 1.3707, |
| "step": 275800 |
| }, |
| { |
| "epoch": 89.17259211376859, |
| "grad_norm": 1.5572234392166138, |
| "learning_rate": 0.001, |
| "loss": 1.3817, |
| "step": 275900 |
| }, |
| { |
| "epoch": 89.2049127343245, |
| "grad_norm": 1.4868851900100708, |
| "learning_rate": 0.001, |
| "loss": 1.387, |
| "step": 276000 |
| }, |
| { |
| "epoch": 89.23723335488042, |
| "grad_norm": 1.7338111400604248, |
| "learning_rate": 0.001, |
| "loss": 1.3882, |
| "step": 276100 |
| }, |
| { |
| "epoch": 89.26955397543632, |
| "grad_norm": 1.4389114379882812, |
| "learning_rate": 0.001, |
| "loss": 1.3906, |
| "step": 276200 |
| }, |
| { |
| "epoch": 89.30187459599225, |
| "grad_norm": 1.546191692352295, |
| "learning_rate": 0.001, |
| "loss": 1.3786, |
| "step": 276300 |
| }, |
| { |
| "epoch": 89.33419521654815, |
| "grad_norm": 1.2728110551834106, |
| "learning_rate": 0.001, |
| "loss": 1.4024, |
| "step": 276400 |
| }, |
| { |
| "epoch": 89.36651583710407, |
| "grad_norm": 1.728053092956543, |
| "learning_rate": 0.001, |
| "loss": 1.4192, |
| "step": 276500 |
| }, |
| { |
| "epoch": 89.39883645765998, |
| "grad_norm": 1.7624201774597168, |
| "learning_rate": 0.001, |
| "loss": 1.4122, |
| "step": 276600 |
| }, |
| { |
| "epoch": 89.4311570782159, |
| "grad_norm": 1.3859518766403198, |
| "learning_rate": 0.001, |
| "loss": 1.4031, |
| "step": 276700 |
| }, |
| { |
| "epoch": 89.46347769877181, |
| "grad_norm": 1.5684387683868408, |
| "learning_rate": 0.001, |
| "loss": 1.4146, |
| "step": 276800 |
| }, |
| { |
| "epoch": 89.49579831932773, |
| "grad_norm": 1.493183970451355, |
| "learning_rate": 0.001, |
| "loss": 1.4187, |
| "step": 276900 |
| }, |
| { |
| "epoch": 89.52811893988364, |
| "grad_norm": 1.4836277961730957, |
| "learning_rate": 0.001, |
| "loss": 1.4199, |
| "step": 277000 |
| }, |
| { |
| "epoch": 89.56043956043956, |
| "grad_norm": 1.4376671314239502, |
| "learning_rate": 0.001, |
| "loss": 1.4178, |
| "step": 277100 |
| }, |
| { |
| "epoch": 89.59276018099547, |
| "grad_norm": 1.6110737323760986, |
| "learning_rate": 0.001, |
| "loss": 1.4165, |
| "step": 277200 |
| }, |
| { |
| "epoch": 89.62508080155139, |
| "grad_norm": 1.447060227394104, |
| "learning_rate": 0.001, |
| "loss": 1.4457, |
| "step": 277300 |
| }, |
| { |
| "epoch": 89.6574014221073, |
| "grad_norm": 1.2219505310058594, |
| "learning_rate": 0.001, |
| "loss": 1.4378, |
| "step": 277400 |
| }, |
| { |
| "epoch": 89.68972204266322, |
| "grad_norm": 2.083582878112793, |
| "learning_rate": 0.001, |
| "loss": 1.4247, |
| "step": 277500 |
| }, |
| { |
| "epoch": 89.72204266321913, |
| "grad_norm": 1.6131013631820679, |
| "learning_rate": 0.001, |
| "loss": 1.4497, |
| "step": 277600 |
| }, |
| { |
| "epoch": 89.75436328377505, |
| "grad_norm": 1.2412773370742798, |
| "learning_rate": 0.001, |
| "loss": 1.4375, |
| "step": 277700 |
| }, |
| { |
| "epoch": 89.78668390433096, |
| "grad_norm": 1.619038701057434, |
| "learning_rate": 0.001, |
| "loss": 1.4347, |
| "step": 277800 |
| }, |
| { |
| "epoch": 89.81900452488688, |
| "grad_norm": 1.4823715686798096, |
| "learning_rate": 0.001, |
| "loss": 1.4662, |
| "step": 277900 |
| }, |
| { |
| "epoch": 89.85132514544279, |
| "grad_norm": 2.2037734985351562, |
| "learning_rate": 0.001, |
| "loss": 1.4334, |
| "step": 278000 |
| }, |
| { |
| "epoch": 89.88364576599871, |
| "grad_norm": 1.715324878692627, |
| "learning_rate": 0.001, |
| "loss": 1.4549, |
| "step": 278100 |
| }, |
| { |
| "epoch": 89.91596638655462, |
| "grad_norm": 1.643113374710083, |
| "learning_rate": 0.001, |
| "loss": 1.4477, |
| "step": 278200 |
| }, |
| { |
| "epoch": 89.94828700711054, |
| "grad_norm": 1.3043150901794434, |
| "learning_rate": 0.001, |
| "loss": 1.4623, |
| "step": 278300 |
| }, |
| { |
| "epoch": 89.98060762766644, |
| "grad_norm": 2.1376888751983643, |
| "learning_rate": 0.001, |
| "loss": 1.4497, |
| "step": 278400 |
| }, |
| { |
| "epoch": 90.01292824822237, |
| "grad_norm": 2.751443862915039, |
| "learning_rate": 0.001, |
| "loss": 1.4045, |
| "step": 278500 |
| }, |
| { |
| "epoch": 90.04524886877829, |
| "grad_norm": 1.382408857345581, |
| "learning_rate": 0.001, |
| "loss": 1.3524, |
| "step": 278600 |
| }, |
| { |
| "epoch": 90.0775694893342, |
| "grad_norm": 1.7884782552719116, |
| "learning_rate": 0.001, |
| "loss": 1.357, |
| "step": 278700 |
| }, |
| { |
| "epoch": 90.10989010989012, |
| "grad_norm": 1.7769843339920044, |
| "learning_rate": 0.001, |
| "loss": 1.352, |
| "step": 278800 |
| }, |
| { |
| "epoch": 90.14221073044602, |
| "grad_norm": 1.3789559602737427, |
| "learning_rate": 0.001, |
| "loss": 1.3698, |
| "step": 278900 |
| }, |
| { |
| "epoch": 90.17453135100195, |
| "grad_norm": 1.8564437627792358, |
| "learning_rate": 0.001, |
| "loss": 1.3642, |
| "step": 279000 |
| }, |
| { |
| "epoch": 90.20685197155785, |
| "grad_norm": 1.7183071374893188, |
| "learning_rate": 0.001, |
| "loss": 1.3561, |
| "step": 279100 |
| }, |
| { |
| "epoch": 90.23917259211377, |
| "grad_norm": 1.4420616626739502, |
| "learning_rate": 0.001, |
| "loss": 1.3924, |
| "step": 279200 |
| }, |
| { |
| "epoch": 90.27149321266968, |
| "grad_norm": 1.652576208114624, |
| "learning_rate": 0.001, |
| "loss": 1.3891, |
| "step": 279300 |
| }, |
| { |
| "epoch": 90.3038138332256, |
| "grad_norm": 1.4430058002471924, |
| "learning_rate": 0.001, |
| "loss": 1.3758, |
| "step": 279400 |
| }, |
| { |
| "epoch": 90.33613445378151, |
| "grad_norm": 1.5071574449539185, |
| "learning_rate": 0.001, |
| "loss": 1.3871, |
| "step": 279500 |
| }, |
| { |
| "epoch": 90.36845507433743, |
| "grad_norm": 1.622592568397522, |
| "learning_rate": 0.001, |
| "loss": 1.3958, |
| "step": 279600 |
| }, |
| { |
| "epoch": 90.40077569489334, |
| "grad_norm": 1.8727059364318848, |
| "learning_rate": 0.001, |
| "loss": 1.3956, |
| "step": 279700 |
| }, |
| { |
| "epoch": 90.43309631544926, |
| "grad_norm": 1.5023539066314697, |
| "learning_rate": 0.001, |
| "loss": 1.3878, |
| "step": 279800 |
| }, |
| { |
| "epoch": 90.46541693600517, |
| "grad_norm": 1.6353498697280884, |
| "learning_rate": 0.001, |
| "loss": 1.4131, |
| "step": 279900 |
| }, |
| { |
| "epoch": 90.49773755656109, |
| "grad_norm": 1.4944193363189697, |
| "learning_rate": 0.001, |
| "loss": 1.4118, |
| "step": 280000 |
| }, |
| { |
| "epoch": 90.530058177117, |
| "grad_norm": 1.550230622291565, |
| "learning_rate": 0.001, |
| "loss": 1.4177, |
| "step": 280100 |
| }, |
| { |
| "epoch": 90.56237879767292, |
| "grad_norm": 1.6260823011398315, |
| "learning_rate": 0.001, |
| "loss": 1.4105, |
| "step": 280200 |
| }, |
| { |
| "epoch": 90.59469941822883, |
| "grad_norm": 1.7901148796081543, |
| "learning_rate": 0.001, |
| "loss": 1.4237, |
| "step": 280300 |
| }, |
| { |
| "epoch": 90.62702003878475, |
| "grad_norm": 1.7166590690612793, |
| "learning_rate": 0.001, |
| "loss": 1.4241, |
| "step": 280400 |
| }, |
| { |
| "epoch": 90.65934065934066, |
| "grad_norm": 1.6885669231414795, |
| "learning_rate": 0.001, |
| "loss": 1.4078, |
| "step": 280500 |
| }, |
| { |
| "epoch": 90.69166127989658, |
| "grad_norm": 2.0084264278411865, |
| "learning_rate": 0.001, |
| "loss": 1.4381, |
| "step": 280600 |
| }, |
| { |
| "epoch": 90.72398190045249, |
| "grad_norm": 2.050732135772705, |
| "learning_rate": 0.001, |
| "loss": 1.4323, |
| "step": 280700 |
| }, |
| { |
| "epoch": 90.75630252100841, |
| "grad_norm": 1.9119924306869507, |
| "learning_rate": 0.001, |
| "loss": 1.4294, |
| "step": 280800 |
| }, |
| { |
| "epoch": 90.78862314156432, |
| "grad_norm": 1.8119657039642334, |
| "learning_rate": 0.001, |
| "loss": 1.4261, |
| "step": 280900 |
| }, |
| { |
| "epoch": 90.82094376212024, |
| "grad_norm": 1.9015934467315674, |
| "learning_rate": 0.001, |
| "loss": 1.4534, |
| "step": 281000 |
| }, |
| { |
| "epoch": 90.85326438267614, |
| "grad_norm": 1.812975287437439, |
| "learning_rate": 0.001, |
| "loss": 1.4645, |
| "step": 281100 |
| }, |
| { |
| "epoch": 90.88558500323207, |
| "grad_norm": 1.9151250123977661, |
| "learning_rate": 0.001, |
| "loss": 1.4598, |
| "step": 281200 |
| }, |
| { |
| "epoch": 90.91790562378797, |
| "grad_norm": 1.9323469400405884, |
| "learning_rate": 0.001, |
| "loss": 1.4657, |
| "step": 281300 |
| }, |
| { |
| "epoch": 90.9502262443439, |
| "grad_norm": 1.3795750141143799, |
| "learning_rate": 0.001, |
| "loss": 1.4455, |
| "step": 281400 |
| }, |
| { |
| "epoch": 90.9825468648998, |
| "grad_norm": 1.4775996208190918, |
| "learning_rate": 0.001, |
| "loss": 1.4577, |
| "step": 281500 |
| }, |
| { |
| "epoch": 91.01486748545572, |
| "grad_norm": 1.7066103219985962, |
| "learning_rate": 0.001, |
| "loss": 1.3822, |
| "step": 281600 |
| }, |
| { |
| "epoch": 91.04718810601163, |
| "grad_norm": 1.2869503498077393, |
| "learning_rate": 0.001, |
| "loss": 1.3376, |
| "step": 281700 |
| }, |
| { |
| "epoch": 91.07950872656755, |
| "grad_norm": 1.8983968496322632, |
| "learning_rate": 0.001, |
| "loss": 1.3431, |
| "step": 281800 |
| }, |
| { |
| "epoch": 91.11182934712346, |
| "grad_norm": 1.9564945697784424, |
| "learning_rate": 0.001, |
| "loss": 1.3623, |
| "step": 281900 |
| }, |
| { |
| "epoch": 91.14414996767938, |
| "grad_norm": 1.5554038286209106, |
| "learning_rate": 0.001, |
| "loss": 1.369, |
| "step": 282000 |
| }, |
| { |
| "epoch": 91.17647058823529, |
| "grad_norm": 1.8451589345932007, |
| "learning_rate": 0.001, |
| "loss": 1.3661, |
| "step": 282100 |
| }, |
| { |
| "epoch": 91.20879120879121, |
| "grad_norm": 1.8111636638641357, |
| "learning_rate": 0.001, |
| "loss": 1.3709, |
| "step": 282200 |
| }, |
| { |
| "epoch": 91.24111182934712, |
| "grad_norm": 1.7260061502456665, |
| "learning_rate": 0.001, |
| "loss": 1.3703, |
| "step": 282300 |
| }, |
| { |
| "epoch": 91.27343244990304, |
| "grad_norm": 2.1305065155029297, |
| "learning_rate": 0.001, |
| "loss": 1.3697, |
| "step": 282400 |
| }, |
| { |
| "epoch": 91.30575307045895, |
| "grad_norm": 1.5473634004592896, |
| "learning_rate": 0.001, |
| "loss": 1.3866, |
| "step": 282500 |
| }, |
| { |
| "epoch": 91.33807369101487, |
| "grad_norm": 1.3852416276931763, |
| "learning_rate": 0.001, |
| "loss": 1.383, |
| "step": 282600 |
| }, |
| { |
| "epoch": 91.37039431157078, |
| "grad_norm": 1.899824857711792, |
| "learning_rate": 0.001, |
| "loss": 1.3934, |
| "step": 282700 |
| }, |
| { |
| "epoch": 91.4027149321267, |
| "grad_norm": 1.6592535972595215, |
| "learning_rate": 0.001, |
| "loss": 1.4009, |
| "step": 282800 |
| }, |
| { |
| "epoch": 91.4350355526826, |
| "grad_norm": 1.8192658424377441, |
| "learning_rate": 0.001, |
| "loss": 1.3915, |
| "step": 282900 |
| }, |
| { |
| "epoch": 91.46735617323853, |
| "grad_norm": 1.6893870830535889, |
| "learning_rate": 0.001, |
| "loss": 1.3993, |
| "step": 283000 |
| }, |
| { |
| "epoch": 91.49967679379444, |
| "grad_norm": 1.8176219463348389, |
| "learning_rate": 0.001, |
| "loss": 1.4131, |
| "step": 283100 |
| }, |
| { |
| "epoch": 91.53199741435036, |
| "grad_norm": 1.5680086612701416, |
| "learning_rate": 0.001, |
| "loss": 1.4065, |
| "step": 283200 |
| }, |
| { |
| "epoch": 91.56431803490626, |
| "grad_norm": 1.828738808631897, |
| "learning_rate": 0.001, |
| "loss": 1.4054, |
| "step": 283300 |
| }, |
| { |
| "epoch": 91.59663865546219, |
| "grad_norm": 1.3039504289627075, |
| "learning_rate": 0.001, |
| "loss": 1.4171, |
| "step": 283400 |
| }, |
| { |
| "epoch": 91.6289592760181, |
| "grad_norm": 1.849231243133545, |
| "learning_rate": 0.001, |
| "loss": 1.4133, |
| "step": 283500 |
| }, |
| { |
| "epoch": 91.66127989657402, |
| "grad_norm": 1.7335554361343384, |
| "learning_rate": 0.001, |
| "loss": 1.4347, |
| "step": 283600 |
| }, |
| { |
| "epoch": 91.69360051712992, |
| "grad_norm": 1.6021090745925903, |
| "learning_rate": 0.001, |
| "loss": 1.4229, |
| "step": 283700 |
| }, |
| { |
| "epoch": 91.72592113768584, |
| "grad_norm": 1.8389188051223755, |
| "learning_rate": 0.001, |
| "loss": 1.4254, |
| "step": 283800 |
| }, |
| { |
| "epoch": 91.75824175824175, |
| "grad_norm": 1.4818696975708008, |
| "learning_rate": 0.001, |
| "loss": 1.4276, |
| "step": 283900 |
| }, |
| { |
| "epoch": 91.79056237879767, |
| "grad_norm": 2.0308756828308105, |
| "learning_rate": 0.001, |
| "loss": 1.4423, |
| "step": 284000 |
| }, |
| { |
| "epoch": 91.82288299935358, |
| "grad_norm": 1.9242323637008667, |
| "learning_rate": 0.001, |
| "loss": 1.4399, |
| "step": 284100 |
| }, |
| { |
| "epoch": 91.8552036199095, |
| "grad_norm": 1.9083664417266846, |
| "learning_rate": 0.001, |
| "loss": 1.4535, |
| "step": 284200 |
| }, |
| { |
| "epoch": 91.88752424046541, |
| "grad_norm": 2.0432400703430176, |
| "learning_rate": 0.001, |
| "loss": 1.4408, |
| "step": 284300 |
| }, |
| { |
| "epoch": 91.91984486102133, |
| "grad_norm": 1.565138339996338, |
| "learning_rate": 0.001, |
| "loss": 1.4351, |
| "step": 284400 |
| }, |
| { |
| "epoch": 91.95216548157724, |
| "grad_norm": 2.194920063018799, |
| "learning_rate": 0.001, |
| "loss": 1.4471, |
| "step": 284500 |
| }, |
| { |
| "epoch": 91.98448610213316, |
| "grad_norm": 1.545792579650879, |
| "learning_rate": 0.001, |
| "loss": 1.4397, |
| "step": 284600 |
| }, |
| { |
| "epoch": 92.01680672268908, |
| "grad_norm": 2.3659920692443848, |
| "learning_rate": 0.001, |
| "loss": 1.3656, |
| "step": 284700 |
| }, |
| { |
| "epoch": 92.04912734324499, |
| "grad_norm": 3.0130562782287598, |
| "learning_rate": 0.001, |
| "loss": 1.3451, |
| "step": 284800 |
| }, |
| { |
| "epoch": 92.08144796380091, |
| "grad_norm": 1.6045893430709839, |
| "learning_rate": 0.001, |
| "loss": 1.3519, |
| "step": 284900 |
| }, |
| { |
| "epoch": 92.11376858435682, |
| "grad_norm": 1.9951777458190918, |
| "learning_rate": 0.001, |
| "loss": 1.3523, |
| "step": 285000 |
| }, |
| { |
| "epoch": 92.14608920491274, |
| "grad_norm": 1.7994177341461182, |
| "learning_rate": 0.001, |
| "loss": 1.3532, |
| "step": 285100 |
| }, |
| { |
| "epoch": 92.17840982546865, |
| "grad_norm": 1.8829383850097656, |
| "learning_rate": 0.001, |
| "loss": 1.3646, |
| "step": 285200 |
| }, |
| { |
| "epoch": 92.21073044602457, |
| "grad_norm": 1.5709656476974487, |
| "learning_rate": 0.001, |
| "loss": 1.3591, |
| "step": 285300 |
| }, |
| { |
| "epoch": 92.24305106658048, |
| "grad_norm": 1.4151982069015503, |
| "learning_rate": 0.001, |
| "loss": 1.3666, |
| "step": 285400 |
| }, |
| { |
| "epoch": 92.2753716871364, |
| "grad_norm": 2.813156843185425, |
| "learning_rate": 0.001, |
| "loss": 1.388, |
| "step": 285500 |
| }, |
| { |
| "epoch": 92.3076923076923, |
| "grad_norm": 2.141181707382202, |
| "learning_rate": 0.001, |
| "loss": 1.3718, |
| "step": 285600 |
| }, |
| { |
| "epoch": 92.34001292824823, |
| "grad_norm": 2.4286043643951416, |
| "learning_rate": 0.001, |
| "loss": 1.388, |
| "step": 285700 |
| }, |
| { |
| "epoch": 92.37233354880414, |
| "grad_norm": 2.7005491256713867, |
| "learning_rate": 0.001, |
| "loss": 1.3813, |
| "step": 285800 |
| }, |
| { |
| "epoch": 92.40465416936006, |
| "grad_norm": 1.7971080541610718, |
| "learning_rate": 0.001, |
| "loss": 1.3825, |
| "step": 285900 |
| }, |
| { |
| "epoch": 92.43697478991596, |
| "grad_norm": 1.8688595294952393, |
| "learning_rate": 0.001, |
| "loss": 1.3938, |
| "step": 286000 |
| }, |
| { |
| "epoch": 92.46929541047189, |
| "grad_norm": 2.3091495037078857, |
| "learning_rate": 0.001, |
| "loss": 1.3983, |
| "step": 286100 |
| }, |
| { |
| "epoch": 92.5016160310278, |
| "grad_norm": 1.972983479499817, |
| "learning_rate": 0.001, |
| "loss": 1.385, |
| "step": 286200 |
| }, |
| { |
| "epoch": 92.53393665158372, |
| "grad_norm": 2.646979570388794, |
| "learning_rate": 0.001, |
| "loss": 1.4115, |
| "step": 286300 |
| }, |
| { |
| "epoch": 92.56625727213962, |
| "grad_norm": 1.8654998540878296, |
| "learning_rate": 0.001, |
| "loss": 1.3997, |
| "step": 286400 |
| }, |
| { |
| "epoch": 92.59857789269554, |
| "grad_norm": 1.8353911638259888, |
| "learning_rate": 0.001, |
| "loss": 1.4048, |
| "step": 286500 |
| }, |
| { |
| "epoch": 92.63089851325145, |
| "grad_norm": 1.9789825677871704, |
| "learning_rate": 0.001, |
| "loss": 1.4107, |
| "step": 286600 |
| }, |
| { |
| "epoch": 92.66321913380737, |
| "grad_norm": 2.219052791595459, |
| "learning_rate": 0.001, |
| "loss": 1.4268, |
| "step": 286700 |
| }, |
| { |
| "epoch": 92.69553975436328, |
| "grad_norm": 2.2139179706573486, |
| "learning_rate": 0.001, |
| "loss": 1.4118, |
| "step": 286800 |
| }, |
| { |
| "epoch": 92.7278603749192, |
| "grad_norm": 2.3915624618530273, |
| "learning_rate": 0.001, |
| "loss": 1.4126, |
| "step": 286900 |
| }, |
| { |
| "epoch": 92.76018099547511, |
| "grad_norm": 3.5310840606689453, |
| "learning_rate": 0.001, |
| "loss": 1.4344, |
| "step": 287000 |
| }, |
| { |
| "epoch": 92.79250161603103, |
| "grad_norm": 3.0643367767333984, |
| "learning_rate": 0.001, |
| "loss": 1.4189, |
| "step": 287100 |
| }, |
| { |
| "epoch": 92.82482223658694, |
| "grad_norm": 3.3198719024658203, |
| "learning_rate": 0.001, |
| "loss": 1.441, |
| "step": 287200 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 1.597151279449463, |
| "learning_rate": 0.001, |
| "loss": 1.4311, |
| "step": 287300 |
| }, |
| { |
| "epoch": 92.88946347769877, |
| "grad_norm": 1.5445133447647095, |
| "learning_rate": 0.001, |
| "loss": 1.4289, |
| "step": 287400 |
| }, |
| { |
| "epoch": 92.92178409825469, |
| "grad_norm": 2.8522164821624756, |
| "learning_rate": 0.001, |
| "loss": 1.4284, |
| "step": 287500 |
| }, |
| { |
| "epoch": 92.9541047188106, |
| "grad_norm": 2.093144178390503, |
| "learning_rate": 0.001, |
| "loss": 1.4327, |
| "step": 287600 |
| }, |
| { |
| "epoch": 92.98642533936652, |
| "grad_norm": 1.3801674842834473, |
| "learning_rate": 0.001, |
| "loss": 1.4522, |
| "step": 287700 |
| }, |
| { |
| "epoch": 93.01874595992243, |
| "grad_norm": 2.2896697521209717, |
| "learning_rate": 0.001, |
| "loss": 1.3783, |
| "step": 287800 |
| }, |
| { |
| "epoch": 93.05106658047835, |
| "grad_norm": 2.3245482444763184, |
| "learning_rate": 0.001, |
| "loss": 1.3282, |
| "step": 287900 |
| }, |
| { |
| "epoch": 93.08338720103426, |
| "grad_norm": 1.4789022207260132, |
| "learning_rate": 0.001, |
| "loss": 1.329, |
| "step": 288000 |
| }, |
| { |
| "epoch": 93.11570782159018, |
| "grad_norm": 1.8415120840072632, |
| "learning_rate": 0.001, |
| "loss": 1.3544, |
| "step": 288100 |
| }, |
| { |
| "epoch": 93.14802844214609, |
| "grad_norm": 1.6748054027557373, |
| "learning_rate": 0.001, |
| "loss": 1.3446, |
| "step": 288200 |
| }, |
| { |
| "epoch": 93.180349062702, |
| "grad_norm": 1.9157007932662964, |
| "learning_rate": 0.001, |
| "loss": 1.3454, |
| "step": 288300 |
| }, |
| { |
| "epoch": 93.21266968325791, |
| "grad_norm": 2.0357260704040527, |
| "learning_rate": 0.001, |
| "loss": 1.3463, |
| "step": 288400 |
| }, |
| { |
| "epoch": 93.24499030381384, |
| "grad_norm": 1.5724585056304932, |
| "learning_rate": 0.001, |
| "loss": 1.3515, |
| "step": 288500 |
| }, |
| { |
| "epoch": 93.27731092436974, |
| "grad_norm": 1.6937826871871948, |
| "learning_rate": 0.001, |
| "loss": 1.3722, |
| "step": 288600 |
| }, |
| { |
| "epoch": 93.30963154492567, |
| "grad_norm": 1.246242642402649, |
| "learning_rate": 0.001, |
| "loss": 1.3608, |
| "step": 288700 |
| }, |
| { |
| "epoch": 93.34195216548157, |
| "grad_norm": 1.6182515621185303, |
| "learning_rate": 0.001, |
| "loss": 1.3816, |
| "step": 288800 |
| }, |
| { |
| "epoch": 93.3742727860375, |
| "grad_norm": 2.0054874420166016, |
| "learning_rate": 0.001, |
| "loss": 1.3722, |
| "step": 288900 |
| }, |
| { |
| "epoch": 93.4065934065934, |
| "grad_norm": 2.126291275024414, |
| "learning_rate": 0.001, |
| "loss": 1.3721, |
| "step": 289000 |
| }, |
| { |
| "epoch": 93.43891402714932, |
| "grad_norm": 1.4355053901672363, |
| "learning_rate": 0.001, |
| "loss": 1.3871, |
| "step": 289100 |
| }, |
| { |
| "epoch": 93.47123464770523, |
| "grad_norm": 2.010754108428955, |
| "learning_rate": 0.001, |
| "loss": 1.3885, |
| "step": 289200 |
| }, |
| { |
| "epoch": 93.50355526826115, |
| "grad_norm": 1.3670854568481445, |
| "learning_rate": 0.001, |
| "loss": 1.3948, |
| "step": 289300 |
| }, |
| { |
| "epoch": 93.53587588881706, |
| "grad_norm": 1.4175864458084106, |
| "learning_rate": 0.001, |
| "loss": 1.3982, |
| "step": 289400 |
| }, |
| { |
| "epoch": 93.56819650937298, |
| "grad_norm": 2.6126651763916016, |
| "learning_rate": 0.001, |
| "loss": 1.4164, |
| "step": 289500 |
| }, |
| { |
| "epoch": 93.60051712992889, |
| "grad_norm": 1.7548456192016602, |
| "learning_rate": 0.001, |
| "loss": 1.3967, |
| "step": 289600 |
| }, |
| { |
| "epoch": 93.63283775048481, |
| "grad_norm": 1.2660231590270996, |
| "learning_rate": 0.001, |
| "loss": 1.394, |
| "step": 289700 |
| }, |
| { |
| "epoch": 93.66515837104072, |
| "grad_norm": 1.2814079523086548, |
| "learning_rate": 0.001, |
| "loss": 1.4106, |
| "step": 289800 |
| }, |
| { |
| "epoch": 93.69747899159664, |
| "grad_norm": 1.2180604934692383, |
| "learning_rate": 0.001, |
| "loss": 1.4285, |
| "step": 289900 |
| }, |
| { |
| "epoch": 93.72979961215255, |
| "grad_norm": 2.0289041996002197, |
| "learning_rate": 0.001, |
| "loss": 1.4151, |
| "step": 290000 |
| }, |
| { |
| "epoch": 93.76212023270847, |
| "grad_norm": 1.379878282546997, |
| "learning_rate": 0.001, |
| "loss": 1.4212, |
| "step": 290100 |
| }, |
| { |
| "epoch": 93.79444085326438, |
| "grad_norm": 1.6776106357574463, |
| "learning_rate": 0.001, |
| "loss": 1.4362, |
| "step": 290200 |
| }, |
| { |
| "epoch": 93.8267614738203, |
| "grad_norm": 1.7783920764923096, |
| "learning_rate": 0.001, |
| "loss": 1.4347, |
| "step": 290300 |
| }, |
| { |
| "epoch": 93.8590820943762, |
| "grad_norm": 2.01831316947937, |
| "learning_rate": 0.001, |
| "loss": 1.4237, |
| "step": 290400 |
| }, |
| { |
| "epoch": 93.89140271493213, |
| "grad_norm": 2.4345901012420654, |
| "learning_rate": 0.001, |
| "loss": 1.4323, |
| "step": 290500 |
| }, |
| { |
| "epoch": 93.92372333548803, |
| "grad_norm": 1.4624724388122559, |
| "learning_rate": 0.001, |
| "loss": 1.4257, |
| "step": 290600 |
| }, |
| { |
| "epoch": 93.95604395604396, |
| "grad_norm": 1.7283406257629395, |
| "learning_rate": 0.001, |
| "loss": 1.4538, |
| "step": 290700 |
| }, |
| { |
| "epoch": 93.98836457659988, |
| "grad_norm": 2.014925241470337, |
| "learning_rate": 0.001, |
| "loss": 1.4296, |
| "step": 290800 |
| }, |
| { |
| "epoch": 94.02068519715579, |
| "grad_norm": 1.3549288511276245, |
| "learning_rate": 0.001, |
| "loss": 1.3744, |
| "step": 290900 |
| }, |
| { |
| "epoch": 94.0530058177117, |
| "grad_norm": 1.424655556678772, |
| "learning_rate": 0.001, |
| "loss": 1.3224, |
| "step": 291000 |
| }, |
| { |
| "epoch": 94.08532643826761, |
| "grad_norm": 1.3687740564346313, |
| "learning_rate": 0.001, |
| "loss": 1.3355, |
| "step": 291100 |
| }, |
| { |
| "epoch": 94.11764705882354, |
| "grad_norm": 1.4774508476257324, |
| "learning_rate": 0.001, |
| "loss": 1.3339, |
| "step": 291200 |
| }, |
| { |
| "epoch": 94.14996767937944, |
| "grad_norm": 1.6459767818450928, |
| "learning_rate": 0.001, |
| "loss": 1.3499, |
| "step": 291300 |
| }, |
| { |
| "epoch": 94.18228829993537, |
| "grad_norm": 1.4307767152786255, |
| "learning_rate": 0.001, |
| "loss": 1.3354, |
| "step": 291400 |
| }, |
| { |
| "epoch": 94.21460892049127, |
| "grad_norm": 1.650320053100586, |
| "learning_rate": 0.001, |
| "loss": 1.3508, |
| "step": 291500 |
| }, |
| { |
| "epoch": 94.2469295410472, |
| "grad_norm": 1.236221432685852, |
| "learning_rate": 0.001, |
| "loss": 1.3495, |
| "step": 291600 |
| }, |
| { |
| "epoch": 94.2792501616031, |
| "grad_norm": 2.103527545928955, |
| "learning_rate": 0.001, |
| "loss": 1.3587, |
| "step": 291700 |
| }, |
| { |
| "epoch": 94.31157078215902, |
| "grad_norm": 1.647996425628662, |
| "learning_rate": 0.001, |
| "loss": 1.3607, |
| "step": 291800 |
| }, |
| { |
| "epoch": 94.34389140271493, |
| "grad_norm": 2.096320867538452, |
| "learning_rate": 0.001, |
| "loss": 1.361, |
| "step": 291900 |
| }, |
| { |
| "epoch": 94.37621202327085, |
| "grad_norm": 2.003998041152954, |
| "learning_rate": 0.001, |
| "loss": 1.3591, |
| "step": 292000 |
| }, |
| { |
| "epoch": 94.40853264382676, |
| "grad_norm": 1.35490882396698, |
| "learning_rate": 0.001, |
| "loss": 1.3731, |
| "step": 292100 |
| }, |
| { |
| "epoch": 94.44085326438268, |
| "grad_norm": 1.3877946138381958, |
| "learning_rate": 0.001, |
| "loss": 1.3887, |
| "step": 292200 |
| }, |
| { |
| "epoch": 94.47317388493859, |
| "grad_norm": 1.6850929260253906, |
| "learning_rate": 0.001, |
| "loss": 1.3902, |
| "step": 292300 |
| }, |
| { |
| "epoch": 94.50549450549451, |
| "grad_norm": 1.870916724205017, |
| "learning_rate": 0.001, |
| "loss": 1.3924, |
| "step": 292400 |
| }, |
| { |
| "epoch": 94.53781512605042, |
| "grad_norm": 1.4658701419830322, |
| "learning_rate": 0.001, |
| "loss": 1.3969, |
| "step": 292500 |
| }, |
| { |
| "epoch": 94.57013574660634, |
| "grad_norm": 1.3580251932144165, |
| "learning_rate": 0.001, |
| "loss": 1.3774, |
| "step": 292600 |
| }, |
| { |
| "epoch": 94.60245636716225, |
| "grad_norm": 1.2977242469787598, |
| "learning_rate": 0.001, |
| "loss": 1.3918, |
| "step": 292700 |
| }, |
| { |
| "epoch": 94.63477698771817, |
| "grad_norm": 1.645932912826538, |
| "learning_rate": 0.001, |
| "loss": 1.3933, |
| "step": 292800 |
| }, |
| { |
| "epoch": 94.66709760827408, |
| "grad_norm": 2.167123794555664, |
| "learning_rate": 0.001, |
| "loss": 1.4075, |
| "step": 292900 |
| }, |
| { |
| "epoch": 94.69941822883, |
| "grad_norm": 1.1674609184265137, |
| "learning_rate": 0.001, |
| "loss": 1.4102, |
| "step": 293000 |
| }, |
| { |
| "epoch": 94.7317388493859, |
| "grad_norm": 1.6270956993103027, |
| "learning_rate": 0.001, |
| "loss": 1.4173, |
| "step": 293100 |
| }, |
| { |
| "epoch": 94.76405946994183, |
| "grad_norm": 1.2232261896133423, |
| "learning_rate": 0.001, |
| "loss": 1.4259, |
| "step": 293200 |
| }, |
| { |
| "epoch": 94.79638009049773, |
| "grad_norm": 1.3110573291778564, |
| "learning_rate": 0.001, |
| "loss": 1.427, |
| "step": 293300 |
| }, |
| { |
| "epoch": 94.82870071105366, |
| "grad_norm": 1.368855595588684, |
| "learning_rate": 0.001, |
| "loss": 1.4135, |
| "step": 293400 |
| }, |
| { |
| "epoch": 94.86102133160956, |
| "grad_norm": 1.7230993509292603, |
| "learning_rate": 0.001, |
| "loss": 1.4043, |
| "step": 293500 |
| }, |
| { |
| "epoch": 94.89334195216549, |
| "grad_norm": 1.5591554641723633, |
| "learning_rate": 0.001, |
| "loss": 1.4271, |
| "step": 293600 |
| }, |
| { |
| "epoch": 94.9256625727214, |
| "grad_norm": 1.8541635274887085, |
| "learning_rate": 0.001, |
| "loss": 1.4325, |
| "step": 293700 |
| }, |
| { |
| "epoch": 94.95798319327731, |
| "grad_norm": 1.5044015645980835, |
| "learning_rate": 0.001, |
| "loss": 1.4358, |
| "step": 293800 |
| }, |
| { |
| "epoch": 94.99030381383322, |
| "grad_norm": 1.679853081703186, |
| "learning_rate": 0.001, |
| "loss": 1.431, |
| "step": 293900 |
| }, |
| { |
| "epoch": 95.02262443438914, |
| "grad_norm": 1.3926348686218262, |
| "learning_rate": 0.001, |
| "loss": 1.363, |
| "step": 294000 |
| }, |
| { |
| "epoch": 95.05494505494505, |
| "grad_norm": 1.3716071844100952, |
| "learning_rate": 0.001, |
| "loss": 1.3219, |
| "step": 294100 |
| }, |
| { |
| "epoch": 95.08726567550097, |
| "grad_norm": 1.6231685876846313, |
| "learning_rate": 0.001, |
| "loss": 1.3236, |
| "step": 294200 |
| }, |
| { |
| "epoch": 95.11958629605688, |
| "grad_norm": 1.5676648616790771, |
| "learning_rate": 0.001, |
| "loss": 1.3203, |
| "step": 294300 |
| }, |
| { |
| "epoch": 95.1519069166128, |
| "grad_norm": 1.576737880706787, |
| "learning_rate": 0.001, |
| "loss": 1.3477, |
| "step": 294400 |
| }, |
| { |
| "epoch": 95.18422753716871, |
| "grad_norm": 1.466307282447815, |
| "learning_rate": 0.001, |
| "loss": 1.3387, |
| "step": 294500 |
| }, |
| { |
| "epoch": 95.21654815772463, |
| "grad_norm": 1.8757154941558838, |
| "learning_rate": 0.001, |
| "loss": 1.3486, |
| "step": 294600 |
| }, |
| { |
| "epoch": 95.24886877828054, |
| "grad_norm": 1.3650315999984741, |
| "learning_rate": 0.001, |
| "loss": 1.3346, |
| "step": 294700 |
| }, |
| { |
| "epoch": 95.28118939883646, |
| "grad_norm": 1.3587350845336914, |
| "learning_rate": 0.001, |
| "loss": 1.3518, |
| "step": 294800 |
| }, |
| { |
| "epoch": 95.31351001939237, |
| "grad_norm": 1.2925024032592773, |
| "learning_rate": 0.001, |
| "loss": 1.3627, |
| "step": 294900 |
| }, |
| { |
| "epoch": 95.34583063994829, |
| "grad_norm": 1.84067964553833, |
| "learning_rate": 0.001, |
| "loss": 1.3586, |
| "step": 295000 |
| }, |
| { |
| "epoch": 95.3781512605042, |
| "grad_norm": 2.0140841007232666, |
| "learning_rate": 0.001, |
| "loss": 1.375, |
| "step": 295100 |
| }, |
| { |
| "epoch": 95.41047188106012, |
| "grad_norm": 1.6531531810760498, |
| "learning_rate": 0.001, |
| "loss": 1.3599, |
| "step": 295200 |
| }, |
| { |
| "epoch": 95.44279250161603, |
| "grad_norm": 1.875063419342041, |
| "learning_rate": 0.001, |
| "loss": 1.3817, |
| "step": 295300 |
| }, |
| { |
| "epoch": 95.47511312217195, |
| "grad_norm": 1.1672395467758179, |
| "learning_rate": 0.001, |
| "loss": 1.383, |
| "step": 295400 |
| }, |
| { |
| "epoch": 95.50743374272786, |
| "grad_norm": 1.1308726072311401, |
| "learning_rate": 0.001, |
| "loss": 1.3782, |
| "step": 295500 |
| }, |
| { |
| "epoch": 95.53975436328378, |
| "grad_norm": 1.5952845811843872, |
| "learning_rate": 0.001, |
| "loss": 1.3883, |
| "step": 295600 |
| }, |
| { |
| "epoch": 95.57207498383968, |
| "grad_norm": 1.3622543811798096, |
| "learning_rate": 0.001, |
| "loss": 1.3841, |
| "step": 295700 |
| }, |
| { |
| "epoch": 95.6043956043956, |
| "grad_norm": 1.927077054977417, |
| "learning_rate": 0.001, |
| "loss": 1.3853, |
| "step": 295800 |
| }, |
| { |
| "epoch": 95.63671622495151, |
| "grad_norm": 1.565975308418274, |
| "learning_rate": 0.001, |
| "loss": 1.3973, |
| "step": 295900 |
| }, |
| { |
| "epoch": 95.66903684550743, |
| "grad_norm": 1.1944808959960938, |
| "learning_rate": 0.001, |
| "loss": 1.3878, |
| "step": 296000 |
| }, |
| { |
| "epoch": 95.70135746606334, |
| "grad_norm": 1.8510597944259644, |
| "learning_rate": 0.001, |
| "loss": 1.3971, |
| "step": 296100 |
| }, |
| { |
| "epoch": 95.73367808661926, |
| "grad_norm": 1.73808753490448, |
| "learning_rate": 0.001, |
| "loss": 1.4215, |
| "step": 296200 |
| }, |
| { |
| "epoch": 95.76599870717517, |
| "grad_norm": 1.3161669969558716, |
| "learning_rate": 0.001, |
| "loss": 1.3952, |
| "step": 296300 |
| }, |
| { |
| "epoch": 95.7983193277311, |
| "grad_norm": 1.5162066221237183, |
| "learning_rate": 0.001, |
| "loss": 1.4164, |
| "step": 296400 |
| }, |
| { |
| "epoch": 95.830639948287, |
| "grad_norm": 1.520003318786621, |
| "learning_rate": 0.001, |
| "loss": 1.4094, |
| "step": 296500 |
| }, |
| { |
| "epoch": 95.86296056884292, |
| "grad_norm": 1.3342549800872803, |
| "learning_rate": 0.001, |
| "loss": 1.4156, |
| "step": 296600 |
| }, |
| { |
| "epoch": 95.89528118939883, |
| "grad_norm": 1.4298732280731201, |
| "learning_rate": 0.001, |
| "loss": 1.4212, |
| "step": 296700 |
| }, |
| { |
| "epoch": 95.92760180995475, |
| "grad_norm": 1.7872447967529297, |
| "learning_rate": 0.001, |
| "loss": 1.4259, |
| "step": 296800 |
| }, |
| { |
| "epoch": 95.95992243051066, |
| "grad_norm": 1.2885979413986206, |
| "learning_rate": 0.001, |
| "loss": 1.4186, |
| "step": 296900 |
| }, |
| { |
| "epoch": 95.99224305106658, |
| "grad_norm": 1.2090197801589966, |
| "learning_rate": 0.001, |
| "loss": 1.4237, |
| "step": 297000 |
| }, |
| { |
| "epoch": 96.0245636716225, |
| "grad_norm": 1.53062105178833, |
| "learning_rate": 0.001, |
| "loss": 1.3642, |
| "step": 297100 |
| }, |
| { |
| "epoch": 96.05688429217841, |
| "grad_norm": 1.6323614120483398, |
| "learning_rate": 0.001, |
| "loss": 1.3123, |
| "step": 297200 |
| }, |
| { |
| "epoch": 96.08920491273433, |
| "grad_norm": 1.32908296585083, |
| "learning_rate": 0.001, |
| "loss": 1.3278, |
| "step": 297300 |
| }, |
| { |
| "epoch": 96.12152553329024, |
| "grad_norm": 1.26341712474823, |
| "learning_rate": 0.001, |
| "loss": 1.3218, |
| "step": 297400 |
| }, |
| { |
| "epoch": 96.15384615384616, |
| "grad_norm": 1.4131050109863281, |
| "learning_rate": 0.001, |
| "loss": 1.3322, |
| "step": 297500 |
| }, |
| { |
| "epoch": 96.18616677440207, |
| "grad_norm": 1.8546245098114014, |
| "learning_rate": 0.001, |
| "loss": 1.3301, |
| "step": 297600 |
| }, |
| { |
| "epoch": 96.21848739495799, |
| "grad_norm": 1.5039724111557007, |
| "learning_rate": 0.001, |
| "loss": 1.3453, |
| "step": 297700 |
| }, |
| { |
| "epoch": 96.2508080155139, |
| "grad_norm": 1.752182960510254, |
| "learning_rate": 0.001, |
| "loss": 1.3376, |
| "step": 297800 |
| }, |
| { |
| "epoch": 96.28312863606982, |
| "grad_norm": 1.3808088302612305, |
| "learning_rate": 0.001, |
| "loss": 1.3277, |
| "step": 297900 |
| }, |
| { |
| "epoch": 96.31544925662573, |
| "grad_norm": 1.6190468072891235, |
| "learning_rate": 0.001, |
| "loss": 1.3542, |
| "step": 298000 |
| }, |
| { |
| "epoch": 96.34776987718165, |
| "grad_norm": 1.9144762754440308, |
| "learning_rate": 0.001, |
| "loss": 1.3687, |
| "step": 298100 |
| }, |
| { |
| "epoch": 96.38009049773756, |
| "grad_norm": 1.2472035884857178, |
| "learning_rate": 0.001, |
| "loss": 1.3788, |
| "step": 298200 |
| }, |
| { |
| "epoch": 96.41241111829348, |
| "grad_norm": 1.2860407829284668, |
| "learning_rate": 0.001, |
| "loss": 1.3554, |
| "step": 298300 |
| }, |
| { |
| "epoch": 96.44473173884938, |
| "grad_norm": 1.6460773944854736, |
| "learning_rate": 0.001, |
| "loss": 1.3596, |
| "step": 298400 |
| }, |
| { |
| "epoch": 96.4770523594053, |
| "grad_norm": 1.6944340467453003, |
| "learning_rate": 0.001, |
| "loss": 1.3742, |
| "step": 298500 |
| }, |
| { |
| "epoch": 96.50937297996121, |
| "grad_norm": 2.076917886734009, |
| "learning_rate": 0.001, |
| "loss": 1.3831, |
| "step": 298600 |
| }, |
| { |
| "epoch": 96.54169360051714, |
| "grad_norm": 1.1751590967178345, |
| "learning_rate": 0.001, |
| "loss": 1.385, |
| "step": 298700 |
| }, |
| { |
| "epoch": 96.57401422107304, |
| "grad_norm": 1.7542001008987427, |
| "learning_rate": 0.001, |
| "loss": 1.3931, |
| "step": 298800 |
| }, |
| { |
| "epoch": 96.60633484162896, |
| "grad_norm": 1.7114346027374268, |
| "learning_rate": 0.001, |
| "loss": 1.3904, |
| "step": 298900 |
| }, |
| { |
| "epoch": 96.63865546218487, |
| "grad_norm": 1.9079195261001587, |
| "learning_rate": 0.001, |
| "loss": 1.3692, |
| "step": 299000 |
| }, |
| { |
| "epoch": 96.6709760827408, |
| "grad_norm": 1.387211799621582, |
| "learning_rate": 0.001, |
| "loss": 1.3851, |
| "step": 299100 |
| }, |
| { |
| "epoch": 96.7032967032967, |
| "grad_norm": 1.264164924621582, |
| "learning_rate": 0.001, |
| "loss": 1.3984, |
| "step": 299200 |
| }, |
| { |
| "epoch": 96.73561732385262, |
| "grad_norm": 1.5514090061187744, |
| "learning_rate": 0.001, |
| "loss": 1.3975, |
| "step": 299300 |
| }, |
| { |
| "epoch": 96.76793794440853, |
| "grad_norm": 1.4951378107070923, |
| "learning_rate": 0.001, |
| "loss": 1.4159, |
| "step": 299400 |
| }, |
| { |
| "epoch": 96.80025856496445, |
| "grad_norm": 1.847012996673584, |
| "learning_rate": 0.001, |
| "loss": 1.4089, |
| "step": 299500 |
| }, |
| { |
| "epoch": 96.83257918552036, |
| "grad_norm": 1.565603494644165, |
| "learning_rate": 0.001, |
| "loss": 1.4111, |
| "step": 299600 |
| }, |
| { |
| "epoch": 96.86489980607628, |
| "grad_norm": 1.2334107160568237, |
| "learning_rate": 0.001, |
| "loss": 1.4101, |
| "step": 299700 |
| }, |
| { |
| "epoch": 96.89722042663219, |
| "grad_norm": 1.2206110954284668, |
| "learning_rate": 0.001, |
| "loss": 1.4177, |
| "step": 299800 |
| }, |
| { |
| "epoch": 96.92954104718811, |
| "grad_norm": 1.3281985521316528, |
| "learning_rate": 0.001, |
| "loss": 1.4061, |
| "step": 299900 |
| }, |
| { |
| "epoch": 96.96186166774402, |
| "grad_norm": 1.514419436454773, |
| "learning_rate": 0.001, |
| "loss": 1.4247, |
| "step": 300000 |
| }, |
| { |
| "epoch": 96.99418228829994, |
| "grad_norm": 1.4396289587020874, |
| "learning_rate": 0.001, |
| "loss": 1.4064, |
| "step": 300100 |
| }, |
| { |
| "epoch": 97.02650290885585, |
| "grad_norm": 1.2071473598480225, |
| "learning_rate": 0.001, |
| "loss": 1.3271, |
| "step": 300200 |
| }, |
| { |
| "epoch": 97.05882352941177, |
| "grad_norm": 1.5665583610534668, |
| "learning_rate": 0.001, |
| "loss": 1.3159, |
| "step": 300300 |
| }, |
| { |
| "epoch": 97.09114414996768, |
| "grad_norm": 1.9283638000488281, |
| "learning_rate": 0.001, |
| "loss": 1.3176, |
| "step": 300400 |
| }, |
| { |
| "epoch": 97.1234647705236, |
| "grad_norm": 1.1790565252304077, |
| "learning_rate": 0.001, |
| "loss": 1.3134, |
| "step": 300500 |
| }, |
| { |
| "epoch": 97.1557853910795, |
| "grad_norm": 1.4134197235107422, |
| "learning_rate": 0.001, |
| "loss": 1.3288, |
| "step": 300600 |
| }, |
| { |
| "epoch": 97.18810601163543, |
| "grad_norm": 1.5686752796173096, |
| "learning_rate": 0.001, |
| "loss": 1.3287, |
| "step": 300700 |
| }, |
| { |
| "epoch": 97.22042663219133, |
| "grad_norm": 1.3202617168426514, |
| "learning_rate": 0.001, |
| "loss": 1.3291, |
| "step": 300800 |
| }, |
| { |
| "epoch": 97.25274725274726, |
| "grad_norm": 1.5901153087615967, |
| "learning_rate": 0.001, |
| "loss": 1.3391, |
| "step": 300900 |
| }, |
| { |
| "epoch": 97.28506787330316, |
| "grad_norm": 1.9898815155029297, |
| "learning_rate": 0.001, |
| "loss": 1.3483, |
| "step": 301000 |
| }, |
| { |
| "epoch": 97.31738849385908, |
| "grad_norm": 1.3884061574935913, |
| "learning_rate": 0.001, |
| "loss": 1.3591, |
| "step": 301100 |
| }, |
| { |
| "epoch": 97.34970911441499, |
| "grad_norm": 1.2868521213531494, |
| "learning_rate": 0.001, |
| "loss": 1.3553, |
| "step": 301200 |
| }, |
| { |
| "epoch": 97.38202973497091, |
| "grad_norm": 1.1923433542251587, |
| "learning_rate": 0.001, |
| "loss": 1.3638, |
| "step": 301300 |
| }, |
| { |
| "epoch": 97.41435035552682, |
| "grad_norm": 1.232994556427002, |
| "learning_rate": 0.001, |
| "loss": 1.3487, |
| "step": 301400 |
| }, |
| { |
| "epoch": 97.44667097608274, |
| "grad_norm": 1.6176105737686157, |
| "learning_rate": 0.001, |
| "loss": 1.3713, |
| "step": 301500 |
| }, |
| { |
| "epoch": 97.47899159663865, |
| "grad_norm": 1.3008984327316284, |
| "learning_rate": 0.001, |
| "loss": 1.3635, |
| "step": 301600 |
| }, |
| { |
| "epoch": 97.51131221719457, |
| "grad_norm": 1.520272135734558, |
| "learning_rate": 0.001, |
| "loss": 1.3869, |
| "step": 301700 |
| }, |
| { |
| "epoch": 97.54363283775048, |
| "grad_norm": 1.3893316984176636, |
| "learning_rate": 0.001, |
| "loss": 1.3769, |
| "step": 301800 |
| }, |
| { |
| "epoch": 97.5759534583064, |
| "grad_norm": 1.3842289447784424, |
| "learning_rate": 0.001, |
| "loss": 1.3699, |
| "step": 301900 |
| }, |
| { |
| "epoch": 97.60827407886231, |
| "grad_norm": 1.4758777618408203, |
| "learning_rate": 0.001, |
| "loss": 1.3931, |
| "step": 302000 |
| }, |
| { |
| "epoch": 97.64059469941823, |
| "grad_norm": 1.738865852355957, |
| "learning_rate": 0.001, |
| "loss": 1.3826, |
| "step": 302100 |
| }, |
| { |
| "epoch": 97.67291531997414, |
| "grad_norm": 2.019986867904663, |
| "learning_rate": 0.001, |
| "loss": 1.3695, |
| "step": 302200 |
| }, |
| { |
| "epoch": 97.70523594053006, |
| "grad_norm": 1.6837821006774902, |
| "learning_rate": 0.001, |
| "loss": 1.3765, |
| "step": 302300 |
| }, |
| { |
| "epoch": 97.73755656108597, |
| "grad_norm": 1.2452991008758545, |
| "learning_rate": 0.001, |
| "loss": 1.3805, |
| "step": 302400 |
| }, |
| { |
| "epoch": 97.76987718164189, |
| "grad_norm": 1.489229440689087, |
| "learning_rate": 0.001, |
| "loss": 1.394, |
| "step": 302500 |
| }, |
| { |
| "epoch": 97.8021978021978, |
| "grad_norm": 1.229744791984558, |
| "learning_rate": 0.001, |
| "loss": 1.4094, |
| "step": 302600 |
| }, |
| { |
| "epoch": 97.83451842275372, |
| "grad_norm": 1.2610152959823608, |
| "learning_rate": 0.001, |
| "loss": 1.4014, |
| "step": 302700 |
| }, |
| { |
| "epoch": 97.86683904330962, |
| "grad_norm": 1.180010199546814, |
| "learning_rate": 0.001, |
| "loss": 1.3983, |
| "step": 302800 |
| }, |
| { |
| "epoch": 97.89915966386555, |
| "grad_norm": 1.1906663179397583, |
| "learning_rate": 0.001, |
| "loss": 1.3998, |
| "step": 302900 |
| }, |
| { |
| "epoch": 97.93148028442145, |
| "grad_norm": 1.3478801250457764, |
| "learning_rate": 0.001, |
| "loss": 1.4172, |
| "step": 303000 |
| }, |
| { |
| "epoch": 97.96380090497738, |
| "grad_norm": 2.488024950027466, |
| "learning_rate": 0.001, |
| "loss": 1.4144, |
| "step": 303100 |
| }, |
| { |
| "epoch": 97.99612152553328, |
| "grad_norm": 1.6468634605407715, |
| "learning_rate": 0.001, |
| "loss": 1.4048, |
| "step": 303200 |
| }, |
| { |
| "epoch": 98.0284421460892, |
| "grad_norm": 1.5927938222885132, |
| "learning_rate": 0.001, |
| "loss": 1.297, |
| "step": 303300 |
| }, |
| { |
| "epoch": 98.06076276664513, |
| "grad_norm": 1.2960963249206543, |
| "learning_rate": 0.001, |
| "loss": 1.2978, |
| "step": 303400 |
| }, |
| { |
| "epoch": 98.09308338720103, |
| "grad_norm": 1.713445782661438, |
| "learning_rate": 0.001, |
| "loss": 1.3146, |
| "step": 303500 |
| }, |
| { |
| "epoch": 98.12540400775696, |
| "grad_norm": 1.6118299961090088, |
| "learning_rate": 0.001, |
| "loss": 1.3146, |
| "step": 303600 |
| }, |
| { |
| "epoch": 98.15772462831286, |
| "grad_norm": 1.4335243701934814, |
| "learning_rate": 0.001, |
| "loss": 1.3206, |
| "step": 303700 |
| }, |
| { |
| "epoch": 98.19004524886878, |
| "grad_norm": 1.9087425470352173, |
| "learning_rate": 0.001, |
| "loss": 1.3339, |
| "step": 303800 |
| }, |
| { |
| "epoch": 98.22236586942469, |
| "grad_norm": 1.7270716428756714, |
| "learning_rate": 0.001, |
| "loss": 1.3316, |
| "step": 303900 |
| }, |
| { |
| "epoch": 98.25468648998061, |
| "grad_norm": 1.5996944904327393, |
| "learning_rate": 0.001, |
| "loss": 1.3386, |
| "step": 304000 |
| }, |
| { |
| "epoch": 98.28700711053652, |
| "grad_norm": 1.5665717124938965, |
| "learning_rate": 0.001, |
| "loss": 1.3378, |
| "step": 304100 |
| }, |
| { |
| "epoch": 98.31932773109244, |
| "grad_norm": 1.7300177812576294, |
| "learning_rate": 0.001, |
| "loss": 1.3368, |
| "step": 304200 |
| }, |
| { |
| "epoch": 98.35164835164835, |
| "grad_norm": 1.506001591682434, |
| "learning_rate": 0.001, |
| "loss": 1.3383, |
| "step": 304300 |
| }, |
| { |
| "epoch": 98.38396897220427, |
| "grad_norm": 1.8939203023910522, |
| "learning_rate": 0.001, |
| "loss": 1.3454, |
| "step": 304400 |
| }, |
| { |
| "epoch": 98.41628959276018, |
| "grad_norm": 2.432687759399414, |
| "learning_rate": 0.001, |
| "loss": 1.3542, |
| "step": 304500 |
| }, |
| { |
| "epoch": 98.4486102133161, |
| "grad_norm": 1.764639139175415, |
| "learning_rate": 0.001, |
| "loss": 1.363, |
| "step": 304600 |
| }, |
| { |
| "epoch": 98.48093083387201, |
| "grad_norm": 1.528296709060669, |
| "learning_rate": 0.001, |
| "loss": 1.3519, |
| "step": 304700 |
| }, |
| { |
| "epoch": 98.51325145442793, |
| "grad_norm": 1.5113193988800049, |
| "learning_rate": 0.001, |
| "loss": 1.3514, |
| "step": 304800 |
| }, |
| { |
| "epoch": 98.54557207498384, |
| "grad_norm": 1.426236629486084, |
| "learning_rate": 0.001, |
| "loss": 1.3889, |
| "step": 304900 |
| }, |
| { |
| "epoch": 98.57789269553976, |
| "grad_norm": 1.766783356666565, |
| "learning_rate": 0.001, |
| "loss": 1.3707, |
| "step": 305000 |
| }, |
| { |
| "epoch": 98.61021331609567, |
| "grad_norm": 1.1614407300949097, |
| "learning_rate": 0.001, |
| "loss": 1.3697, |
| "step": 305100 |
| }, |
| { |
| "epoch": 98.64253393665159, |
| "grad_norm": 1.2347285747528076, |
| "learning_rate": 0.001, |
| "loss": 1.3776, |
| "step": 305200 |
| }, |
| { |
| "epoch": 98.6748545572075, |
| "grad_norm": 1.4319281578063965, |
| "learning_rate": 0.001, |
| "loss": 1.3862, |
| "step": 305300 |
| }, |
| { |
| "epoch": 98.70717517776342, |
| "grad_norm": 1.8043638467788696, |
| "learning_rate": 0.001, |
| "loss": 1.377, |
| "step": 305400 |
| }, |
| { |
| "epoch": 98.73949579831933, |
| "grad_norm": 1.7270032167434692, |
| "learning_rate": 0.001, |
| "loss": 1.3851, |
| "step": 305500 |
| }, |
| { |
| "epoch": 98.77181641887525, |
| "grad_norm": 1.621086597442627, |
| "learning_rate": 0.001, |
| "loss": 1.3959, |
| "step": 305600 |
| }, |
| { |
| "epoch": 98.80413703943115, |
| "grad_norm": 1.4632642269134521, |
| "learning_rate": 0.001, |
| "loss": 1.3906, |
| "step": 305700 |
| }, |
| { |
| "epoch": 98.83645765998708, |
| "grad_norm": 1.3503309488296509, |
| "learning_rate": 0.001, |
| "loss": 1.3863, |
| "step": 305800 |
| }, |
| { |
| "epoch": 98.86877828054298, |
| "grad_norm": 1.5197378396987915, |
| "learning_rate": 0.001, |
| "loss": 1.3895, |
| "step": 305900 |
| }, |
| { |
| "epoch": 98.9010989010989, |
| "grad_norm": 1.3938710689544678, |
| "learning_rate": 0.001, |
| "loss": 1.3891, |
| "step": 306000 |
| }, |
| { |
| "epoch": 98.93341952165481, |
| "grad_norm": 1.2191146612167358, |
| "learning_rate": 0.001, |
| "loss": 1.406, |
| "step": 306100 |
| }, |
| { |
| "epoch": 98.96574014221073, |
| "grad_norm": 1.2720669507980347, |
| "learning_rate": 0.001, |
| "loss": 1.4065, |
| "step": 306200 |
| }, |
| { |
| "epoch": 98.99806076276664, |
| "grad_norm": 2.131666898727417, |
| "learning_rate": 0.001, |
| "loss": 1.3987, |
| "step": 306300 |
| }, |
| { |
| "epoch": 99.03038138332256, |
| "grad_norm": 1.6609143018722534, |
| "learning_rate": 0.001, |
| "loss": 1.2968, |
| "step": 306400 |
| }, |
| { |
| "epoch": 99.06270200387847, |
| "grad_norm": 1.4163116216659546, |
| "learning_rate": 0.001, |
| "loss": 1.2919, |
| "step": 306500 |
| }, |
| { |
| "epoch": 99.09502262443439, |
| "grad_norm": 1.4382485151290894, |
| "learning_rate": 0.001, |
| "loss": 1.3104, |
| "step": 306600 |
| }, |
| { |
| "epoch": 99.1273432449903, |
| "grad_norm": 1.3695274591445923, |
| "learning_rate": 0.001, |
| "loss": 1.3187, |
| "step": 306700 |
| }, |
| { |
| "epoch": 99.15966386554622, |
| "grad_norm": 2.090116500854492, |
| "learning_rate": 0.001, |
| "loss": 1.3037, |
| "step": 306800 |
| }, |
| { |
| "epoch": 99.19198448610213, |
| "grad_norm": 1.7685065269470215, |
| "learning_rate": 0.001, |
| "loss": 1.3185, |
| "step": 306900 |
| }, |
| { |
| "epoch": 99.22430510665805, |
| "grad_norm": 1.4047720432281494, |
| "learning_rate": 0.001, |
| "loss": 1.3167, |
| "step": 307000 |
| }, |
| { |
| "epoch": 99.25662572721396, |
| "grad_norm": 1.699637532234192, |
| "learning_rate": 0.001, |
| "loss": 1.3277, |
| "step": 307100 |
| }, |
| { |
| "epoch": 99.28894634776988, |
| "grad_norm": 1.614418625831604, |
| "learning_rate": 0.001, |
| "loss": 1.3475, |
| "step": 307200 |
| }, |
| { |
| "epoch": 99.32126696832579, |
| "grad_norm": 1.5218583345413208, |
| "learning_rate": 0.001, |
| "loss": 1.3379, |
| "step": 307300 |
| }, |
| { |
| "epoch": 99.35358758888171, |
| "grad_norm": 1.8946406841278076, |
| "learning_rate": 0.001, |
| "loss": 1.3394, |
| "step": 307400 |
| }, |
| { |
| "epoch": 99.38590820943762, |
| "grad_norm": 2.1262855529785156, |
| "learning_rate": 0.001, |
| "loss": 1.3493, |
| "step": 307500 |
| }, |
| { |
| "epoch": 99.41822882999354, |
| "grad_norm": 1.8946560621261597, |
| "learning_rate": 0.001, |
| "loss": 1.3528, |
| "step": 307600 |
| }, |
| { |
| "epoch": 99.45054945054945, |
| "grad_norm": 1.5395762920379639, |
| "learning_rate": 0.001, |
| "loss": 1.3461, |
| "step": 307700 |
| }, |
| { |
| "epoch": 99.48287007110537, |
| "grad_norm": 2.264472723007202, |
| "learning_rate": 0.001, |
| "loss": 1.3557, |
| "step": 307800 |
| }, |
| { |
| "epoch": 99.51519069166127, |
| "grad_norm": 1.5566797256469727, |
| "learning_rate": 0.001, |
| "loss": 1.373, |
| "step": 307900 |
| }, |
| { |
| "epoch": 99.5475113122172, |
| "grad_norm": 2.553978204727173, |
| "learning_rate": 0.001, |
| "loss": 1.3613, |
| "step": 308000 |
| }, |
| { |
| "epoch": 99.5798319327731, |
| "grad_norm": 1.5590407848358154, |
| "learning_rate": 0.001, |
| "loss": 1.3815, |
| "step": 308100 |
| }, |
| { |
| "epoch": 99.61215255332903, |
| "grad_norm": 1.2775064706802368, |
| "learning_rate": 0.001, |
| "loss": 1.3751, |
| "step": 308200 |
| }, |
| { |
| "epoch": 99.64447317388493, |
| "grad_norm": 1.6800312995910645, |
| "learning_rate": 0.001, |
| "loss": 1.3617, |
| "step": 308300 |
| }, |
| { |
| "epoch": 99.67679379444085, |
| "grad_norm": 1.5396053791046143, |
| "learning_rate": 0.001, |
| "loss": 1.3789, |
| "step": 308400 |
| }, |
| { |
| "epoch": 99.70911441499676, |
| "grad_norm": 2.490694284439087, |
| "learning_rate": 0.001, |
| "loss": 1.3785, |
| "step": 308500 |
| }, |
| { |
| "epoch": 99.74143503555268, |
| "grad_norm": 1.9777220487594604, |
| "learning_rate": 0.001, |
| "loss": 1.3768, |
| "step": 308600 |
| }, |
| { |
| "epoch": 99.77375565610859, |
| "grad_norm": 2.8109829425811768, |
| "learning_rate": 0.001, |
| "loss": 1.3853, |
| "step": 308700 |
| }, |
| { |
| "epoch": 99.80607627666451, |
| "grad_norm": 1.528751254081726, |
| "learning_rate": 0.001, |
| "loss": 1.3772, |
| "step": 308800 |
| }, |
| { |
| "epoch": 99.83839689722042, |
| "grad_norm": 1.5922292470932007, |
| "learning_rate": 0.001, |
| "loss": 1.3837, |
| "step": 308900 |
| }, |
| { |
| "epoch": 99.87071751777634, |
| "grad_norm": 1.7995340824127197, |
| "learning_rate": 0.001, |
| "loss": 1.399, |
| "step": 309000 |
| }, |
| { |
| "epoch": 99.90303813833225, |
| "grad_norm": 1.7829585075378418, |
| "learning_rate": 0.001, |
| "loss": 1.4036, |
| "step": 309100 |
| }, |
| { |
| "epoch": 99.93535875888817, |
| "grad_norm": 1.6563310623168945, |
| "learning_rate": 0.001, |
| "loss": 1.3975, |
| "step": 309200 |
| }, |
| { |
| "epoch": 99.96767937944408, |
| "grad_norm": 1.8799587488174438, |
| "learning_rate": 0.001, |
| "loss": 1.4018, |
| "step": 309300 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 1.8644742965698242, |
| "learning_rate": 0.001, |
| "loss": 1.3632, |
| "step": 309400 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 309400, |
| "total_flos": 6.973099854336e+17, |
| "train_loss": 1.5836482462177142, |
| "train_runtime": 25792.9375, |
| "train_samples_per_second": 383.826, |
| "train_steps_per_second": 11.996 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 309400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.973099854336e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|