{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 20000, "global_step": 773500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012928248222365869, "grad_norm": 118.84596252441406, "learning_rate": 9.900000000000002e-06, "loss": 4.642, "step": 100 }, { "epoch": 0.025856496444731737, "grad_norm": 6.501941680908203, "learning_rate": 1.9900000000000003e-05, "loss": 4.2126, "step": 200 }, { "epoch": 0.03878474466709761, "grad_norm": 2.5425221920013428, "learning_rate": 2.9900000000000002e-05, "loss": 4.0734, "step": 300 }, { "epoch": 0.051712992889463474, "grad_norm": 2.795517683029175, "learning_rate": 3.99e-05, "loss": 4.0456, "step": 400 }, { "epoch": 0.06464124111182935, "grad_norm": 2.4115378856658936, "learning_rate": 4.99e-05, "loss": 4.0032, "step": 500 }, { "epoch": 0.07756948933419522, "grad_norm": 2.564777135848999, "learning_rate": 5.9900000000000006e-05, "loss": 3.9994, "step": 600 }, { "epoch": 0.09049773755656108, "grad_norm": 1.8325903415679932, "learning_rate": 6.99e-05, "loss": 3.9548, "step": 700 }, { "epoch": 0.10342598577892695, "grad_norm": 2.4493870735168457, "learning_rate": 7.99e-05, "loss": 3.968, "step": 800 }, { "epoch": 0.11635423400129283, "grad_norm": 2.2559242248535156, "learning_rate": 8.989999999999999e-05, "loss": 3.9355, "step": 900 }, { "epoch": 0.1292824822236587, "grad_norm": 1.9281154870986938, "learning_rate": 9.99e-05, "loss": 3.9088, "step": 1000 }, { "epoch": 0.14221073044602456, "grad_norm": 2.2611186504364014, "learning_rate": 0.0001099, "loss": 3.9146, "step": 1100 }, { "epoch": 0.15513897866839044, "grad_norm": 1.9342560768127441, "learning_rate": 0.00011990000000000001, "loss": 3.8665, "step": 1200 }, { "epoch": 0.16806722689075632, "grad_norm": 1.824284553527832, "learning_rate": 0.00012989999999999999, "loss": 3.85, "step": 1300 }, { "epoch": 0.18099547511312217, "grad_norm": 2.3489317893981934, "learning_rate": 0.0001399, "loss": 3.8146, "step": 1400 }, { "epoch": 0.19392372333548805, "grad_norm": 2.7127528190612793, "learning_rate": 0.0001499, "loss": 3.7963, "step": 1500 }, { "epoch": 0.2068519715578539, "grad_norm": 4.036416053771973, "learning_rate": 0.00015989999999999998, "loss": 3.7668, "step": 1600 }, { "epoch": 0.21978021978021978, "grad_norm": 7.036014556884766, "learning_rate": 0.0001699, "loss": 3.7573, "step": 1700 }, { "epoch": 0.23270846800258566, "grad_norm": 3.4820680618286133, "learning_rate": 0.0001799, "loss": 3.757, "step": 1800 }, { "epoch": 0.2456367162249515, "grad_norm": 2.37874174118042, "learning_rate": 0.0001899, "loss": 3.7249, "step": 1900 }, { "epoch": 0.2585649644473174, "grad_norm": 2.647188901901245, "learning_rate": 0.0001999, "loss": 3.7084, "step": 2000 }, { "epoch": 0.27149321266968324, "grad_norm": 2.474475622177124, "learning_rate": 0.0002099, "loss": 3.7031, "step": 2100 }, { "epoch": 0.2844214608920491, "grad_norm": 2.401221990585327, "learning_rate": 0.0002199, "loss": 3.6721, "step": 2200 }, { "epoch": 0.297349709114415, "grad_norm": 1.9776878356933594, "learning_rate": 0.0002299, "loss": 3.6484, "step": 2300 }, { "epoch": 0.3102779573367809, "grad_norm": 2.370199203491211, "learning_rate": 0.0002399, "loss": 3.6477, "step": 2400 }, { "epoch": 0.32320620555914675, "grad_norm": 2.2015411853790283, "learning_rate": 0.0002499, "loss": 3.6238, "step": 2500 }, { "epoch": 0.33613445378151263, "grad_norm": 2.1264984607696533, "learning_rate": 0.00025990000000000003, "loss": 3.6344, "step": 2600 }, { "epoch": 0.34906270200387846, "grad_norm": 1.9741814136505127, "learning_rate": 0.0002699, "loss": 3.6079, "step": 2700 }, { "epoch": 0.36199095022624433, "grad_norm": 2.562873125076294, "learning_rate": 0.0002799, "loss": 3.6051, "step": 2800 }, { "epoch": 0.3749191984486102, "grad_norm": 2.1244149208068848, "learning_rate": 0.0002899, "loss": 3.5748, "step": 2900 }, { "epoch": 0.3878474466709761, "grad_norm": 2.182062864303589, "learning_rate": 0.0002999, "loss": 3.5577, "step": 3000 }, { "epoch": 0.40077569489334197, "grad_norm": 3.0616064071655273, "learning_rate": 0.0003099, "loss": 3.5573, "step": 3100 }, { "epoch": 0.4137039431157078, "grad_norm": 167.88400268554688, "learning_rate": 0.0003199, "loss": 3.5185, "step": 3200 }, { "epoch": 0.4266321913380737, "grad_norm": 5.59126615524292, "learning_rate": 0.00032990000000000005, "loss": 3.5425, "step": 3300 }, { "epoch": 0.43956043956043955, "grad_norm": 2.0688278675079346, "learning_rate": 0.00033989999999999997, "loss": 3.5133, "step": 3400 }, { "epoch": 0.45248868778280543, "grad_norm": 1.9997947216033936, "learning_rate": 0.0003499, "loss": 3.484, "step": 3500 }, { "epoch": 0.4654169360051713, "grad_norm": 1.9747772216796875, "learning_rate": 0.0003599, "loss": 3.5045, "step": 3600 }, { "epoch": 0.4783451842275372, "grad_norm": 2.076634645462036, "learning_rate": 0.0003699, "loss": 3.5099, "step": 3700 }, { "epoch": 0.491273432449903, "grad_norm": 2.340916156768799, "learning_rate": 0.0003799, "loss": 3.4833, "step": 3800 }, { "epoch": 0.5042016806722689, "grad_norm": 2.153327703475952, "learning_rate": 0.00038990000000000004, "loss": 3.4819, "step": 3900 }, { "epoch": 0.5171299288946348, "grad_norm": 1.7927031517028809, "learning_rate": 0.00039989999999999996, "loss": 3.4694, "step": 4000 }, { "epoch": 0.5300581771170007, "grad_norm": 2.590524435043335, "learning_rate": 0.0004099, "loss": 3.4838, "step": 4100 }, { "epoch": 0.5429864253393665, "grad_norm": 2.684635877609253, "learning_rate": 0.0004199, "loss": 3.4652, "step": 4200 }, { "epoch": 0.5559146735617324, "grad_norm": 2.63749098777771, "learning_rate": 0.0004299, "loss": 3.4593, "step": 4300 }, { "epoch": 0.5688429217840982, "grad_norm": 2.2427191734313965, "learning_rate": 0.0004399, "loss": 3.439, "step": 4400 }, { "epoch": 0.5817711700064642, "grad_norm": 4.470568656921387, "learning_rate": 0.00044990000000000004, "loss": 3.4451, "step": 4500 }, { "epoch": 0.59469941822883, "grad_norm": 1.8583451509475708, "learning_rate": 0.0004599, "loss": 3.4428, "step": 4600 }, { "epoch": 0.6076276664511958, "grad_norm": 1.706453800201416, "learning_rate": 0.0004699, "loss": 3.4301, "step": 4700 }, { "epoch": 0.6205559146735617, "grad_norm": 3.373572587966919, "learning_rate": 0.0004799, "loss": 3.4216, "step": 4800 }, { "epoch": 0.6334841628959276, "grad_norm": 1.8972089290618896, "learning_rate": 0.0004899, "loss": 3.4634, "step": 4900 }, { "epoch": 0.6464124111182935, "grad_norm": 2.8634328842163086, "learning_rate": 0.0004999000000000001, "loss": 3.4172, "step": 5000 }, { "epoch": 0.6593406593406593, "grad_norm": 2.139434814453125, "learning_rate": 0.0005099, "loss": 3.4197, "step": 5100 }, { "epoch": 0.6722689075630253, "grad_norm": 2.4795174598693848, "learning_rate": 0.0005199, "loss": 3.4173, "step": 5200 }, { "epoch": 0.6851971557853911, "grad_norm": 8.052289962768555, "learning_rate": 0.0005299, "loss": 3.4275, "step": 5300 }, { "epoch": 0.6981254040077569, "grad_norm": 2.0772311687469482, "learning_rate": 0.0005399000000000001, "loss": 3.406, "step": 5400 }, { "epoch": 0.7110536522301228, "grad_norm": 1.9832972288131714, "learning_rate": 0.0005499000000000001, "loss": 3.3976, "step": 5500 }, { "epoch": 0.7239819004524887, "grad_norm": 1.8123490810394287, "learning_rate": 0.0005599, "loss": 3.3955, "step": 5600 }, { "epoch": 0.7369101486748546, "grad_norm": 2.0782182216644287, "learning_rate": 0.0005698999999999999, "loss": 3.3787, "step": 5700 }, { "epoch": 0.7498383968972204, "grad_norm": 2.193305253982544, "learning_rate": 0.0005799, "loss": 3.4001, "step": 5800 }, { "epoch": 0.7627666451195863, "grad_norm": 1.492191195487976, "learning_rate": 0.0005899, "loss": 3.3929, "step": 5900 }, { "epoch": 0.7756948933419522, "grad_norm": 1.6435977220535278, "learning_rate": 0.0005999, "loss": 3.3767, "step": 6000 }, { "epoch": 0.788623141564318, "grad_norm": 1.681206464767456, "learning_rate": 0.0006099, "loss": 3.3633, "step": 6100 }, { "epoch": 0.8015513897866839, "grad_norm": 3.55729603767395, "learning_rate": 0.0006199, "loss": 3.3584, "step": 6200 }, { "epoch": 0.8144796380090498, "grad_norm": 2.277841567993164, "learning_rate": 0.0006299000000000001, "loss": 3.3762, "step": 6300 }, { "epoch": 0.8274078862314156, "grad_norm": 1.6451385021209717, "learning_rate": 0.0006399, "loss": 3.3759, "step": 6400 }, { "epoch": 0.8403361344537815, "grad_norm": 1.7898396253585815, "learning_rate": 0.0006499, "loss": 3.3654, "step": 6500 }, { "epoch": 0.8532643826761473, "grad_norm": 1.614151954650879, "learning_rate": 0.0006599, "loss": 3.3593, "step": 6600 }, { "epoch": 0.8661926308985133, "grad_norm": 1.8487229347229004, "learning_rate": 0.0006699000000000001, "loss": 3.3472, "step": 6700 }, { "epoch": 0.8791208791208791, "grad_norm": 1.64303719997406, "learning_rate": 0.0006799, "loss": 3.369, "step": 6800 }, { "epoch": 0.892049127343245, "grad_norm": 3.988755702972412, "learning_rate": 0.0006899, "loss": 3.3453, "step": 6900 }, { "epoch": 0.9049773755656109, "grad_norm": 2.620021104812622, "learning_rate": 0.0006998999999999999, "loss": 3.363, "step": 7000 }, { "epoch": 0.9179056237879767, "grad_norm": 3.350825786590576, "learning_rate": 0.0007099, "loss": 3.3502, "step": 7100 }, { "epoch": 0.9308338720103426, "grad_norm": 1.7754006385803223, "learning_rate": 0.0007199, "loss": 3.3479, "step": 7200 }, { "epoch": 0.9437621202327084, "grad_norm": 1.6747366189956665, "learning_rate": 0.0007299, "loss": 3.3441, "step": 7300 }, { "epoch": 0.9566903684550744, "grad_norm": 1.6178967952728271, "learning_rate": 0.0007399, "loss": 3.345, "step": 7400 }, { "epoch": 0.9696186166774402, "grad_norm": 1.5039114952087402, "learning_rate": 0.0007499000000000001, "loss": 3.3591, "step": 7500 }, { "epoch": 0.982546864899806, "grad_norm": 1.6169086694717407, "learning_rate": 0.0007599, "loss": 3.3367, "step": 7600 }, { "epoch": 0.995475113122172, "grad_norm": 2.2698099613189697, "learning_rate": 0.0007699, "loss": 3.3339, "step": 7700 }, { "epoch": 1.0084033613445378, "grad_norm": 0.9970628619194031, "learning_rate": 0.0007799, "loss": 3.3027, "step": 7800 }, { "epoch": 1.0213316095669036, "grad_norm": 1.132667064666748, "learning_rate": 0.0007899000000000001, "loss": 3.2797, "step": 7900 }, { "epoch": 1.0342598577892697, "grad_norm": 1.1211313009262085, "learning_rate": 0.0007999000000000001, "loss": 3.2859, "step": 8000 }, { "epoch": 1.0471881060116355, "grad_norm": 1.0364649295806885, "learning_rate": 0.0008099, "loss": 3.2879, "step": 8100 }, { "epoch": 1.0601163542340013, "grad_norm": 0.8946724534034729, "learning_rate": 0.0008198999999999999, "loss": 3.281, "step": 8200 }, { "epoch": 1.0730446024563671, "grad_norm": 0.9480011463165283, "learning_rate": 0.0008299, "loss": 3.2681, "step": 8300 }, { "epoch": 1.085972850678733, "grad_norm": 1.09050714969635, "learning_rate": 0.0008399, "loss": 3.2707, "step": 8400 }, { "epoch": 1.098901098901099, "grad_norm": 1.1623272895812988, "learning_rate": 0.0008499, "loss": 3.295, "step": 8500 }, { "epoch": 1.1118293471234648, "grad_norm": 1.041061520576477, "learning_rate": 0.0008599, "loss": 3.2788, "step": 8600 }, { "epoch": 1.1247575953458306, "grad_norm": 0.9008080363273621, "learning_rate": 0.0008699000000000001, "loss": 3.277, "step": 8700 }, { "epoch": 1.1376858435681965, "grad_norm": 0.972515881061554, "learning_rate": 0.0008799000000000001, "loss": 3.2639, "step": 8800 }, { "epoch": 1.1506140917905623, "grad_norm": 1.1764262914657593, "learning_rate": 0.0008899, "loss": 3.2794, "step": 8900 }, { "epoch": 1.1635423400129283, "grad_norm": 0.8464470505714417, "learning_rate": 0.0008999, "loss": 3.2698, "step": 9000 }, { "epoch": 1.1764705882352942, "grad_norm": 1.1593658924102783, "learning_rate": 0.0009099, "loss": 3.2807, "step": 9100 }, { "epoch": 1.18939883645766, "grad_norm": 5.167534828186035, "learning_rate": 0.0009199000000000001, "loss": 3.2842, "step": 9200 }, { "epoch": 1.2023270846800258, "grad_norm": 1.1114386320114136, "learning_rate": 0.0009299, "loss": 3.2667, "step": 9300 }, { "epoch": 1.2152553329023918, "grad_norm": 1.0218206644058228, "learning_rate": 0.0009399, "loss": 3.2766, "step": 9400 }, { "epoch": 1.2281835811247577, "grad_norm": 0.9876813292503357, "learning_rate": 0.0009498999999999999, "loss": 3.2831, "step": 9500 }, { "epoch": 1.2411118293471235, "grad_norm": 1.1326946020126343, "learning_rate": 0.0009599, "loss": 3.2674, "step": 9600 }, { "epoch": 1.2540400775694893, "grad_norm": 1.034670114517212, "learning_rate": 0.0009699, "loss": 3.2478, "step": 9700 }, { "epoch": 1.2669683257918551, "grad_norm": 1.5128363370895386, "learning_rate": 0.0009799, "loss": 3.2641, "step": 9800 }, { "epoch": 1.279896574014221, "grad_norm": 0.9992265701293945, "learning_rate": 0.0009899, "loss": 3.2624, "step": 9900 }, { "epoch": 1.292824822236587, "grad_norm": 1.1447618007659912, "learning_rate": 0.0009999, "loss": 3.2571, "step": 10000 }, { "epoch": 1.3057530704589528, "grad_norm": 1.0726267099380493, "learning_rate": 0.001, "loss": 3.2713, "step": 10100 }, { "epoch": 1.3186813186813187, "grad_norm": 1.3022083044052124, "learning_rate": 0.001, "loss": 3.2451, "step": 10200 }, { "epoch": 1.3316095669036845, "grad_norm": 1.0720202922821045, "learning_rate": 0.001, "loss": 3.2767, "step": 10300 }, { "epoch": 1.3445378151260505, "grad_norm": 0.9287368059158325, "learning_rate": 0.001, "loss": 3.2721, "step": 10400 }, { "epoch": 1.3574660633484164, "grad_norm": 0.9787566661834717, "learning_rate": 0.001, "loss": 3.253, "step": 10500 }, { "epoch": 1.3703943115707822, "grad_norm": 1.6317791938781738, "learning_rate": 0.001, "loss": 3.2609, "step": 10600 }, { "epoch": 1.383322559793148, "grad_norm": 1.0309605598449707, "learning_rate": 0.001, "loss": 3.2449, "step": 10700 }, { "epoch": 1.3962508080155138, "grad_norm": 0.7937541007995605, "learning_rate": 0.001, "loss": 3.2641, "step": 10800 }, { "epoch": 1.4091790562378796, "grad_norm": 1.4972172975540161, "learning_rate": 0.001, "loss": 3.2419, "step": 10900 }, { "epoch": 1.4221073044602457, "grad_norm": 0.998430609703064, "learning_rate": 0.001, "loss": 3.2379, "step": 11000 }, { "epoch": 1.4350355526826115, "grad_norm": 1.0715460777282715, "learning_rate": 0.001, "loss": 3.2572, "step": 11100 }, { "epoch": 1.4479638009049773, "grad_norm": 1.1538857221603394, "learning_rate": 0.001, "loss": 3.2248, "step": 11200 }, { "epoch": 1.4608920491273432, "grad_norm": 0.8802598714828491, "learning_rate": 0.001, "loss": 3.251, "step": 11300 }, { "epoch": 1.4738202973497092, "grad_norm": 0.8060498833656311, "learning_rate": 0.001, "loss": 3.2404, "step": 11400 }, { "epoch": 1.486748545572075, "grad_norm": 1.3524706363677979, "learning_rate": 0.001, "loss": 3.2327, "step": 11500 }, { "epoch": 1.4996767937944409, "grad_norm": 1.2319313287734985, "learning_rate": 0.001, "loss": 3.2298, "step": 11600 }, { "epoch": 1.5126050420168067, "grad_norm": 0.9734055399894714, "learning_rate": 0.001, "loss": 3.2435, "step": 11700 }, { "epoch": 1.5255332902391725, "grad_norm": 0.8134593367576599, "learning_rate": 0.001, "loss": 3.2299, "step": 11800 }, { "epoch": 1.5384615384615383, "grad_norm": 0.806391716003418, "learning_rate": 0.001, "loss": 3.2295, "step": 11900 }, { "epoch": 1.5513897866839044, "grad_norm": 0.9927522540092468, "learning_rate": 0.001, "loss": 3.2332, "step": 12000 }, { "epoch": 1.5643180349062702, "grad_norm": 0.8309698104858398, "learning_rate": 0.001, "loss": 3.2238, "step": 12100 }, { "epoch": 1.577246283128636, "grad_norm": 1.2423845529556274, "learning_rate": 0.001, "loss": 3.2259, "step": 12200 }, { "epoch": 1.590174531351002, "grad_norm": 0.9562028646469116, "learning_rate": 0.001, "loss": 3.2274, "step": 12300 }, { "epoch": 1.6031027795733679, "grad_norm": 1.086228370666504, "learning_rate": 0.001, "loss": 3.2098, "step": 12400 }, { "epoch": 1.6160310277957337, "grad_norm": 0.901372492313385, "learning_rate": 0.001, "loss": 3.2087, "step": 12500 }, { "epoch": 1.6289592760180995, "grad_norm": 1.2067883014678955, "learning_rate": 0.001, "loss": 3.2204, "step": 12600 }, { "epoch": 1.6418875242404654, "grad_norm": 1.1645654439926147, "learning_rate": 0.001, "loss": 3.2111, "step": 12700 }, { "epoch": 1.6548157724628312, "grad_norm": 1.139648199081421, "learning_rate": 0.001, "loss": 3.194, "step": 12800 }, { "epoch": 1.667744020685197, "grad_norm": 1.0192232131958008, "learning_rate": 0.001, "loss": 3.2231, "step": 12900 }, { "epoch": 1.680672268907563, "grad_norm": 1.358818531036377, "learning_rate": 0.001, "loss": 3.211, "step": 13000 }, { "epoch": 1.6936005171299289, "grad_norm": 0.8475126028060913, "learning_rate": 0.001, "loss": 3.2229, "step": 13100 }, { "epoch": 1.706528765352295, "grad_norm": 0.803980827331543, "learning_rate": 0.001, "loss": 3.2011, "step": 13200 }, { "epoch": 1.7194570135746607, "grad_norm": 0.9838019013404846, "learning_rate": 0.001, "loss": 3.2141, "step": 13300 }, { "epoch": 1.7323852617970266, "grad_norm": 1.0253037214279175, "learning_rate": 0.001, "loss": 3.189, "step": 13400 }, { "epoch": 1.7453135100193924, "grad_norm": 0.9395850896835327, "learning_rate": 0.001, "loss": 3.2126, "step": 13500 }, { "epoch": 1.7582417582417582, "grad_norm": 2.0117998123168945, "learning_rate": 0.001, "loss": 3.2001, "step": 13600 }, { "epoch": 1.771170006464124, "grad_norm": 1.2111902236938477, "learning_rate": 0.001, "loss": 3.1946, "step": 13700 }, { "epoch": 1.7840982546864899, "grad_norm": 1.1220893859863281, "learning_rate": 0.001, "loss": 3.2151, "step": 13800 }, { "epoch": 1.797026502908856, "grad_norm": 1.2716549634933472, "learning_rate": 0.001, "loss": 3.2067, "step": 13900 }, { "epoch": 1.8099547511312217, "grad_norm": 0.9393177628517151, "learning_rate": 0.001, "loss": 3.1874, "step": 14000 }, { "epoch": 1.8228829993535876, "grad_norm": 1.0577871799468994, "learning_rate": 0.001, "loss": 3.1865, "step": 14100 }, { "epoch": 1.8358112475759536, "grad_norm": 1.1500855684280396, "learning_rate": 0.001, "loss": 3.2077, "step": 14200 }, { "epoch": 1.8487394957983194, "grad_norm": 0.978339672088623, "learning_rate": 0.001, "loss": 3.1889, "step": 14300 }, { "epoch": 1.8616677440206852, "grad_norm": 1.3598369359970093, "learning_rate": 0.001, "loss": 3.1791, "step": 14400 }, { "epoch": 1.874595992243051, "grad_norm": 1.139323115348816, "learning_rate": 0.001, "loss": 3.1938, "step": 14500 }, { "epoch": 1.887524240465417, "grad_norm": 1.0580919981002808, "learning_rate": 0.001, "loss": 3.1821, "step": 14600 }, { "epoch": 1.9004524886877827, "grad_norm": 0.9836323857307434, "learning_rate": 0.001, "loss": 3.2186, "step": 14700 }, { "epoch": 1.9133807369101485, "grad_norm": 1.0957473516464233, "learning_rate": 0.001, "loss": 3.1906, "step": 14800 }, { "epoch": 1.9263089851325146, "grad_norm": 1.1016499996185303, "learning_rate": 0.001, "loss": 3.1888, "step": 14900 }, { "epoch": 1.9392372333548804, "grad_norm": 1.064293384552002, "learning_rate": 0.001, "loss": 3.1915, "step": 15000 }, { "epoch": 1.9521654815772462, "grad_norm": 0.8584859371185303, "learning_rate": 0.001, "loss": 3.1798, "step": 15100 }, { "epoch": 1.9650937297996123, "grad_norm": 1.0258640050888062, "learning_rate": 0.001, "loss": 3.1738, "step": 15200 }, { "epoch": 1.978021978021978, "grad_norm": 1.1215410232543945, "learning_rate": 0.001, "loss": 3.1789, "step": 15300 }, { "epoch": 1.990950226244344, "grad_norm": 0.9511606693267822, "learning_rate": 0.001, "loss": 3.1945, "step": 15400 }, { "epoch": 2.0038784744667097, "grad_norm": 1.4124641418457031, "learning_rate": 0.001, "loss": 3.1345, "step": 15500 }, { "epoch": 2.0168067226890756, "grad_norm": 1.0094051361083984, "learning_rate": 0.001, "loss": 3.095, "step": 15600 }, { "epoch": 2.0297349709114414, "grad_norm": 0.9024159908294678, "learning_rate": 0.001, "loss": 3.084, "step": 15700 }, { "epoch": 2.042663219133807, "grad_norm": 0.881212055683136, "learning_rate": 0.001, "loss": 3.0817, "step": 15800 }, { "epoch": 2.055591467356173, "grad_norm": 0.8830527663230896, "learning_rate": 0.001, "loss": 3.0917, "step": 15900 }, { "epoch": 2.0685197155785393, "grad_norm": 1.0235767364501953, "learning_rate": 0.001, "loss": 3.1009, "step": 16000 }, { "epoch": 2.081447963800905, "grad_norm": 1.247129201889038, "learning_rate": 0.001, "loss": 3.0897, "step": 16100 }, { "epoch": 2.094376212023271, "grad_norm": 1.0438477993011475, "learning_rate": 0.001, "loss": 3.0777, "step": 16200 }, { "epoch": 2.107304460245637, "grad_norm": 1.2446783781051636, "learning_rate": 0.001, "loss": 3.0696, "step": 16300 }, { "epoch": 2.1202327084680026, "grad_norm": 1.1828064918518066, "learning_rate": 0.001, "loss": 3.0975, "step": 16400 }, { "epoch": 2.1331609566903684, "grad_norm": 0.8775501847267151, "learning_rate": 0.001, "loss": 3.0695, "step": 16500 }, { "epoch": 2.1460892049127342, "grad_norm": 2.027470350265503, "learning_rate": 0.001, "loss": 3.0815, "step": 16600 }, { "epoch": 2.1590174531351, "grad_norm": 0.6762149930000305, "learning_rate": 0.001, "loss": 3.0928, "step": 16700 }, { "epoch": 2.171945701357466, "grad_norm": 1.2223689556121826, "learning_rate": 0.001, "loss": 3.0897, "step": 16800 }, { "epoch": 2.184873949579832, "grad_norm": 0.865894615650177, "learning_rate": 0.001, "loss": 3.0824, "step": 16900 }, { "epoch": 2.197802197802198, "grad_norm": 1.8851679563522339, "learning_rate": 0.001, "loss": 3.063, "step": 17000 }, { "epoch": 2.210730446024564, "grad_norm": 1.206737756729126, "learning_rate": 0.001, "loss": 3.0904, "step": 17100 }, { "epoch": 2.2236586942469296, "grad_norm": 1.0634925365447998, "learning_rate": 0.001, "loss": 3.0852, "step": 17200 }, { "epoch": 2.2365869424692955, "grad_norm": 0.7785546779632568, "learning_rate": 0.001, "loss": 3.0864, "step": 17300 }, { "epoch": 2.2495151906916613, "grad_norm": 0.7880673408508301, "learning_rate": 0.001, "loss": 3.091, "step": 17400 }, { "epoch": 2.262443438914027, "grad_norm": 1.2936878204345703, "learning_rate": 0.001, "loss": 3.0752, "step": 17500 }, { "epoch": 2.275371687136393, "grad_norm": 0.9386409521102905, "learning_rate": 0.001, "loss": 3.1016, "step": 17600 }, { "epoch": 2.2882999353587588, "grad_norm": 1.0321121215820312, "learning_rate": 0.001, "loss": 3.0825, "step": 17700 }, { "epoch": 2.3012281835811246, "grad_norm": 3.2278409004211426, "learning_rate": 0.001, "loss": 3.0769, "step": 17800 }, { "epoch": 2.3141564318034904, "grad_norm": 1.0846681594848633, "learning_rate": 0.001, "loss": 3.0977, "step": 17900 }, { "epoch": 2.3270846800258567, "grad_norm": 0.9138995409011841, "learning_rate": 0.001, "loss": 3.0907, "step": 18000 }, { "epoch": 2.3400129282482225, "grad_norm": 0.9935817122459412, "learning_rate": 0.001, "loss": 3.0796, "step": 18100 }, { "epoch": 2.3529411764705883, "grad_norm": 0.8221590518951416, "learning_rate": 0.001, "loss": 3.1062, "step": 18200 }, { "epoch": 2.365869424692954, "grad_norm": 1.2357958555221558, "learning_rate": 0.001, "loss": 3.0764, "step": 18300 }, { "epoch": 2.37879767291532, "grad_norm": 0.787947952747345, "learning_rate": 0.001, "loss": 3.0992, "step": 18400 }, { "epoch": 2.391725921137686, "grad_norm": 0.9376165866851807, "learning_rate": 0.001, "loss": 3.1021, "step": 18500 }, { "epoch": 2.4046541693600516, "grad_norm": 1.0112468004226685, "learning_rate": 0.001, "loss": 3.076, "step": 18600 }, { "epoch": 2.4175824175824174, "grad_norm": 0.8361511826515198, "learning_rate": 0.001, "loss": 3.0768, "step": 18700 }, { "epoch": 2.4305106658047837, "grad_norm": 0.7793710827827454, "learning_rate": 0.001, "loss": 3.0627, "step": 18800 }, { "epoch": 2.4434389140271495, "grad_norm": 1.0263872146606445, "learning_rate": 0.001, "loss": 3.0886, "step": 18900 }, { "epoch": 2.4563671622495153, "grad_norm": 0.9580851793289185, "learning_rate": 0.001, "loss": 3.0805, "step": 19000 }, { "epoch": 2.469295410471881, "grad_norm": 0.9041637182235718, "learning_rate": 0.001, "loss": 3.0961, "step": 19100 }, { "epoch": 2.482223658694247, "grad_norm": 1.3338507413864136, "learning_rate": 0.001, "loss": 3.0606, "step": 19200 }, { "epoch": 2.495151906916613, "grad_norm": 0.9854009747505188, "learning_rate": 0.001, "loss": 3.1106, "step": 19300 }, { "epoch": 2.5080801551389786, "grad_norm": 1.4172568321228027, "learning_rate": 0.001, "loss": 3.0846, "step": 19400 }, { "epoch": 2.5210084033613445, "grad_norm": 1.2610217332839966, "learning_rate": 0.001, "loss": 3.095, "step": 19500 }, { "epoch": 2.5339366515837103, "grad_norm": 0.9490922093391418, "learning_rate": 0.001, "loss": 3.0888, "step": 19600 }, { "epoch": 2.546864899806076, "grad_norm": 1.152039885520935, "learning_rate": 0.001, "loss": 3.0721, "step": 19700 }, { "epoch": 2.559793148028442, "grad_norm": 0.7546257376670837, "learning_rate": 0.001, "loss": 3.0793, "step": 19800 }, { "epoch": 2.5727213962508078, "grad_norm": 0.8939322233200073, "learning_rate": 0.001, "loss": 3.0864, "step": 19900 }, { "epoch": 2.585649644473174, "grad_norm": 0.9169397950172424, "learning_rate": 0.001, "loss": 3.0574, "step": 20000 }, { "epoch": 2.59857789269554, "grad_norm": 0.8420311212539673, "learning_rate": 0.001, "loss": 3.0637, "step": 20100 }, { "epoch": 2.6115061409179057, "grad_norm": 0.959895133972168, "learning_rate": 0.001, "loss": 3.07, "step": 20200 }, { "epoch": 2.6244343891402715, "grad_norm": 0.9686364531517029, "learning_rate": 0.001, "loss": 3.0609, "step": 20300 }, { "epoch": 2.6373626373626373, "grad_norm": 1.8271739482879639, "learning_rate": 0.001, "loss": 3.0881, "step": 20400 }, { "epoch": 2.650290885585003, "grad_norm": 1.2238178253173828, "learning_rate": 0.001, "loss": 3.0799, "step": 20500 }, { "epoch": 2.663219133807369, "grad_norm": 1.0088492631912231, "learning_rate": 0.001, "loss": 3.0811, "step": 20600 }, { "epoch": 2.6761473820297352, "grad_norm": 4.4115705490112305, "learning_rate": 0.001, "loss": 3.0565, "step": 20700 }, { "epoch": 2.689075630252101, "grad_norm": 1.230033040046692, "learning_rate": 0.001, "loss": 3.0831, "step": 20800 }, { "epoch": 2.702003878474467, "grad_norm": 0.6796260476112366, "learning_rate": 0.001, "loss": 3.0751, "step": 20900 }, { "epoch": 2.7149321266968327, "grad_norm": 0.9699063897132874, "learning_rate": 0.001, "loss": 3.0643, "step": 21000 }, { "epoch": 2.7278603749191985, "grad_norm": 1.1217529773712158, "learning_rate": 0.001, "loss": 3.0673, "step": 21100 }, { "epoch": 2.7407886231415644, "grad_norm": 1.2146625518798828, "learning_rate": 0.001, "loss": 3.0579, "step": 21200 }, { "epoch": 2.75371687136393, "grad_norm": 1.0542621612548828, "learning_rate": 0.001, "loss": 3.0555, "step": 21300 }, { "epoch": 2.766645119586296, "grad_norm": 0.8560185432434082, "learning_rate": 0.001, "loss": 3.0759, "step": 21400 }, { "epoch": 2.779573367808662, "grad_norm": 0.744569718837738, "learning_rate": 0.001, "loss": 3.0638, "step": 21500 }, { "epoch": 2.7925016160310276, "grad_norm": 0.9793397784233093, "learning_rate": 0.001, "loss": 3.0465, "step": 21600 }, { "epoch": 2.8054298642533935, "grad_norm": 1.5432952642440796, "learning_rate": 0.001, "loss": 3.076, "step": 21700 }, { "epoch": 2.8183581124757593, "grad_norm": 0.8528289794921875, "learning_rate": 0.001, "loss": 3.051, "step": 21800 }, { "epoch": 2.8312863606981256, "grad_norm": 0.9466942548751831, "learning_rate": 0.001, "loss": 3.068, "step": 21900 }, { "epoch": 2.8442146089204914, "grad_norm": 2.5625410079956055, "learning_rate": 0.001, "loss": 3.051, "step": 22000 }, { "epoch": 2.857142857142857, "grad_norm": 0.9894402027130127, "learning_rate": 0.001, "loss": 3.0619, "step": 22100 }, { "epoch": 2.870071105365223, "grad_norm": 0.9967089891433716, "learning_rate": 0.001, "loss": 3.0483, "step": 22200 }, { "epoch": 2.882999353587589, "grad_norm": 0.9490856528282166, "learning_rate": 0.001, "loss": 3.0678, "step": 22300 }, { "epoch": 2.8959276018099547, "grad_norm": 0.9151811003684998, "learning_rate": 0.001, "loss": 3.0626, "step": 22400 }, { "epoch": 2.9088558500323205, "grad_norm": 0.9919764995574951, "learning_rate": 0.001, "loss": 3.0802, "step": 22500 }, { "epoch": 2.9217840982546863, "grad_norm": 1.1991615295410156, "learning_rate": 0.001, "loss": 3.0641, "step": 22600 }, { "epoch": 2.9347123464770526, "grad_norm": 8.546902656555176, "learning_rate": 0.001, "loss": 3.0457, "step": 22700 }, { "epoch": 2.9476405946994184, "grad_norm": 0.7885714769363403, "learning_rate": 0.001, "loss": 3.0641, "step": 22800 }, { "epoch": 2.9605688429217842, "grad_norm": 1.017195701599121, "learning_rate": 0.001, "loss": 3.0831, "step": 22900 }, { "epoch": 2.97349709114415, "grad_norm": 1.31964111328125, "learning_rate": 0.001, "loss": 3.0673, "step": 23000 }, { "epoch": 2.986425339366516, "grad_norm": 0.9606931209564209, "learning_rate": 0.001, "loss": 3.0614, "step": 23100 }, { "epoch": 2.9993535875888817, "grad_norm": 1.2054433822631836, "learning_rate": 0.001, "loss": 3.0673, "step": 23200 }, { "epoch": 3.0122818358112475, "grad_norm": 0.8430971503257751, "learning_rate": 0.001, "loss": 2.9804, "step": 23300 }, { "epoch": 3.0252100840336134, "grad_norm": 1.1955339908599854, "learning_rate": 0.001, "loss": 2.9677, "step": 23400 }, { "epoch": 3.038138332255979, "grad_norm": 1.2166880369186401, "learning_rate": 0.001, "loss": 2.9848, "step": 23500 }, { "epoch": 3.051066580478345, "grad_norm": 0.9455789923667908, "learning_rate": 0.001, "loss": 2.9752, "step": 23600 }, { "epoch": 3.063994828700711, "grad_norm": 1.103708028793335, "learning_rate": 0.001, "loss": 2.9876, "step": 23700 }, { "epoch": 3.076923076923077, "grad_norm": 0.9451152086257935, "learning_rate": 0.001, "loss": 2.9757, "step": 23800 }, { "epoch": 3.089851325145443, "grad_norm": 1.1617419719696045, "learning_rate": 0.001, "loss": 2.9736, "step": 23900 }, { "epoch": 3.1027795733678087, "grad_norm": 2.8053629398345947, "learning_rate": 0.001, "loss": 2.9798, "step": 24000 }, { "epoch": 3.1157078215901746, "grad_norm": 1.4630392789840698, "learning_rate": 0.001, "loss": 2.9693, "step": 24100 }, { "epoch": 3.1286360698125404, "grad_norm": 1.2354226112365723, "learning_rate": 0.001, "loss": 2.9839, "step": 24200 }, { "epoch": 3.141564318034906, "grad_norm": 1.3217436075210571, "learning_rate": 0.001, "loss": 2.9802, "step": 24300 }, { "epoch": 3.154492566257272, "grad_norm": 1.0590447187423706, "learning_rate": 0.001, "loss": 2.9631, "step": 24400 }, { "epoch": 3.167420814479638, "grad_norm": 1.3475168943405151, "learning_rate": 0.001, "loss": 2.9797, "step": 24500 }, { "epoch": 3.1803490627020037, "grad_norm": 1.171952724456787, "learning_rate": 0.001, "loss": 2.9724, "step": 24600 }, { "epoch": 3.19327731092437, "grad_norm": 1.3020005226135254, "learning_rate": 0.001, "loss": 2.9742, "step": 24700 }, { "epoch": 3.2062055591467358, "grad_norm": 1.2192933559417725, "learning_rate": 0.001, "loss": 2.9764, "step": 24800 }, { "epoch": 3.2191338073691016, "grad_norm": 1.2347426414489746, "learning_rate": 0.001, "loss": 2.9754, "step": 24900 }, { "epoch": 3.2320620555914674, "grad_norm": 1.1259766817092896, "learning_rate": 0.001, "loss": 2.9786, "step": 25000 }, { "epoch": 3.2449903038138332, "grad_norm": 1.2958155870437622, "learning_rate": 0.001, "loss": 2.9946, "step": 25100 }, { "epoch": 3.257918552036199, "grad_norm": 2.7344205379486084, "learning_rate": 0.001, "loss": 2.9957, "step": 25200 }, { "epoch": 3.270846800258565, "grad_norm": 1.3562507629394531, "learning_rate": 0.001, "loss": 2.9789, "step": 25300 }, { "epoch": 3.2837750484809307, "grad_norm": 1.520480751991272, "learning_rate": 0.001, "loss": 2.975, "step": 25400 }, { "epoch": 3.2967032967032965, "grad_norm": 30.83466148376465, "learning_rate": 0.001, "loss": 2.9985, "step": 25500 }, { "epoch": 3.3096315449256624, "grad_norm": 1.8769716024398804, "learning_rate": 0.001, "loss": 2.9755, "step": 25600 }, { "epoch": 3.3225597931480286, "grad_norm": 1.0913584232330322, "learning_rate": 0.001, "loss": 2.9968, "step": 25700 }, { "epoch": 3.3354880413703945, "grad_norm": 1.1464645862579346, "learning_rate": 0.001, "loss": 3.0124, "step": 25800 }, { "epoch": 3.3484162895927603, "grad_norm": 1.0762064456939697, "learning_rate": 0.001, "loss": 2.9917, "step": 25900 }, { "epoch": 3.361344537815126, "grad_norm": 1.7063734531402588, "learning_rate": 0.001, "loss": 2.9927, "step": 26000 }, { "epoch": 3.374272786037492, "grad_norm": 1.4727184772491455, "learning_rate": 0.001, "loss": 2.9758, "step": 26100 }, { "epoch": 3.3872010342598577, "grad_norm": 0.9792320728302002, "learning_rate": 0.001, "loss": 2.9982, "step": 26200 }, { "epoch": 3.4001292824822236, "grad_norm": 1.342593789100647, "learning_rate": 0.001, "loss": 2.9962, "step": 26300 }, { "epoch": 3.4130575307045894, "grad_norm": 1.6286009550094604, "learning_rate": 0.001, "loss": 2.9986, "step": 26400 }, { "epoch": 3.425985778926955, "grad_norm": 1.7997628450393677, "learning_rate": 0.001, "loss": 2.9982, "step": 26500 }, { "epoch": 3.4389140271493215, "grad_norm": 1.2490218877792358, "learning_rate": 0.001, "loss": 2.996, "step": 26600 }, { "epoch": 3.4518422753716873, "grad_norm": 1.2919135093688965, "learning_rate": 0.001, "loss": 3.0046, "step": 26700 }, { "epoch": 3.464770523594053, "grad_norm": 12.556557655334473, "learning_rate": 0.001, "loss": 2.9872, "step": 26800 }, { "epoch": 3.477698771816419, "grad_norm": 1.3433451652526855, "learning_rate": 0.001, "loss": 3.0167, "step": 26900 }, { "epoch": 3.490627020038785, "grad_norm": 1.1677124500274658, "learning_rate": 0.001, "loss": 2.9981, "step": 27000 }, { "epoch": 3.5035552682611506, "grad_norm": 1.218559980392456, "learning_rate": 0.001, "loss": 3.0207, "step": 27100 }, { "epoch": 3.5164835164835164, "grad_norm": 1.1366580724716187, "learning_rate": 0.001, "loss": 2.9958, "step": 27200 }, { "epoch": 3.5294117647058822, "grad_norm": 1.4061895608901978, "learning_rate": 0.001, "loss": 2.9972, "step": 27300 }, { "epoch": 3.542340012928248, "grad_norm": 1.7801659107208252, "learning_rate": 0.001, "loss": 3.0003, "step": 27400 }, { "epoch": 3.555268261150614, "grad_norm": 1.9881681203842163, "learning_rate": 0.001, "loss": 3.0157, "step": 27500 }, { "epoch": 3.5681965093729797, "grad_norm": 0.8259904980659485, "learning_rate": 0.001, "loss": 3.0043, "step": 27600 }, { "epoch": 3.581124757595346, "grad_norm": 1.2834389209747314, "learning_rate": 0.001, "loss": 2.9954, "step": 27700 }, { "epoch": 3.594053005817712, "grad_norm": 1.9239144325256348, "learning_rate": 0.001, "loss": 2.9943, "step": 27800 }, { "epoch": 3.6069812540400776, "grad_norm": 1.4826470613479614, "learning_rate": 0.001, "loss": 2.9912, "step": 27900 }, { "epoch": 3.6199095022624435, "grad_norm": 1.3252791166305542, "learning_rate": 0.001, "loss": 3.0152, "step": 28000 }, { "epoch": 3.6328377504848093, "grad_norm": 1.1659870147705078, "learning_rate": 0.001, "loss": 2.9969, "step": 28100 }, { "epoch": 3.645765998707175, "grad_norm": 1.27777898311615, "learning_rate": 0.001, "loss": 3.0081, "step": 28200 }, { "epoch": 3.658694246929541, "grad_norm": 1.1128146648406982, "learning_rate": 0.001, "loss": 3.0074, "step": 28300 }, { "epoch": 3.6716224951519068, "grad_norm": 1.415917158126831, "learning_rate": 0.001, "loss": 3.014, "step": 28400 }, { "epoch": 3.684550743374273, "grad_norm": 1.0008307695388794, "learning_rate": 0.001, "loss": 3.0164, "step": 28500 }, { "epoch": 3.697478991596639, "grad_norm": 1.8747793436050415, "learning_rate": 0.001, "loss": 2.9887, "step": 28600 }, { "epoch": 3.7104072398190047, "grad_norm": 1.5692400932312012, "learning_rate": 0.001, "loss": 2.981, "step": 28700 }, { "epoch": 3.7233354880413705, "grad_norm": 0.9974040985107422, "learning_rate": 0.001, "loss": 2.9922, "step": 28800 }, { "epoch": 3.7362637362637363, "grad_norm": 1.3241302967071533, "learning_rate": 0.001, "loss": 2.9901, "step": 28900 }, { "epoch": 3.749191984486102, "grad_norm": 1.7146044969558716, "learning_rate": 0.001, "loss": 2.9895, "step": 29000 }, { "epoch": 3.762120232708468, "grad_norm": 0.8391664624214172, "learning_rate": 0.001, "loss": 3.0055, "step": 29100 }, { "epoch": 3.775048480930834, "grad_norm": 1.2534948587417603, "learning_rate": 0.001, "loss": 2.9859, "step": 29200 }, { "epoch": 3.7879767291531996, "grad_norm": 1.011658787727356, "learning_rate": 0.001, "loss": 2.9726, "step": 29300 }, { "epoch": 3.8009049773755654, "grad_norm": 1.4607046842575073, "learning_rate": 0.001, "loss": 2.9837, "step": 29400 }, { "epoch": 3.8138332255979313, "grad_norm": 1.192064642906189, "learning_rate": 0.001, "loss": 2.9969, "step": 29500 }, { "epoch": 3.826761473820297, "grad_norm": 1.0219391584396362, "learning_rate": 0.001, "loss": 3.0091, "step": 29600 }, { "epoch": 3.8396897220426633, "grad_norm": 1.4828758239746094, "learning_rate": 0.001, "loss": 2.9955, "step": 29700 }, { "epoch": 3.852617970265029, "grad_norm": 2.027723550796509, "learning_rate": 0.001, "loss": 2.975, "step": 29800 }, { "epoch": 3.865546218487395, "grad_norm": 1.0762542486190796, "learning_rate": 0.001, "loss": 2.9744, "step": 29900 }, { "epoch": 3.878474466709761, "grad_norm": 0.9982675313949585, "learning_rate": 0.001, "loss": 3.0081, "step": 30000 }, { "epoch": 3.8914027149321266, "grad_norm": 1.0702546834945679, "learning_rate": 0.001, "loss": 2.9796, "step": 30100 }, { "epoch": 3.9043309631544925, "grad_norm": 28.13825798034668, "learning_rate": 0.001, "loss": 2.9845, "step": 30200 }, { "epoch": 3.9172592113768583, "grad_norm": 1.0247751474380493, "learning_rate": 0.001, "loss": 2.9863, "step": 30300 }, { "epoch": 3.9301874595992246, "grad_norm": 1.2103465795516968, "learning_rate": 0.001, "loss": 2.9736, "step": 30400 }, { "epoch": 3.9431157078215904, "grad_norm": 0.9458410739898682, "learning_rate": 0.001, "loss": 2.9774, "step": 30500 }, { "epoch": 3.956043956043956, "grad_norm": 1.3326276540756226, "learning_rate": 0.001, "loss": 2.9935, "step": 30600 }, { "epoch": 3.968972204266322, "grad_norm": 1.224952220916748, "learning_rate": 0.001, "loss": 2.9808, "step": 30700 }, { "epoch": 3.981900452488688, "grad_norm": 1.136208176612854, "learning_rate": 0.001, "loss": 2.9754, "step": 30800 }, { "epoch": 3.9948287007110537, "grad_norm": 1.2542495727539062, "learning_rate": 0.001, "loss": 2.9802, "step": 30900 }, { "epoch": 4.0077569489334195, "grad_norm": 0.7255315780639648, "learning_rate": 0.001, "loss": 2.9025, "step": 31000 }, { "epoch": 4.020685197155785, "grad_norm": 0.7668340802192688, "learning_rate": 0.001, "loss": 2.9082, "step": 31100 }, { "epoch": 4.033613445378151, "grad_norm": 0.937721848487854, "learning_rate": 0.001, "loss": 2.909, "step": 31200 }, { "epoch": 4.046541693600517, "grad_norm": 1.2655540704727173, "learning_rate": 0.001, "loss": 2.9032, "step": 31300 }, { "epoch": 4.059469941822883, "grad_norm": 1.10466730594635, "learning_rate": 0.001, "loss": 2.9092, "step": 31400 }, { "epoch": 4.072398190045249, "grad_norm": 2.687725305557251, "learning_rate": 0.001, "loss": 2.9102, "step": 31500 }, { "epoch": 4.085326438267614, "grad_norm": 0.9718222618103027, "learning_rate": 0.001, "loss": 2.9043, "step": 31600 }, { "epoch": 4.09825468648998, "grad_norm": 1.0471924543380737, "learning_rate": 0.001, "loss": 2.9173, "step": 31700 }, { "epoch": 4.111182934712346, "grad_norm": 1.4803344011306763, "learning_rate": 0.001, "loss": 2.9025, "step": 31800 }, { "epoch": 4.124111182934712, "grad_norm": 1.0673942565917969, "learning_rate": 0.001, "loss": 2.8895, "step": 31900 }, { "epoch": 4.137039431157079, "grad_norm": 1.2132673263549805, "learning_rate": 0.001, "loss": 2.9048, "step": 32000 }, { "epoch": 4.149967679379444, "grad_norm": 0.9155902862548828, "learning_rate": 0.001, "loss": 2.9114, "step": 32100 }, { "epoch": 4.16289592760181, "grad_norm": 1.1091375350952148, "learning_rate": 0.001, "loss": 2.93, "step": 32200 }, { "epoch": 4.175824175824176, "grad_norm": 1.135400652885437, "learning_rate": 0.001, "loss": 2.9015, "step": 32300 }, { "epoch": 4.188752424046542, "grad_norm": 0.9205621480941772, "learning_rate": 0.001, "loss": 2.9067, "step": 32400 }, { "epoch": 4.201680672268908, "grad_norm": 1.2379872798919678, "learning_rate": 0.001, "loss": 2.9231, "step": 32500 }, { "epoch": 4.214608920491274, "grad_norm": 1.444331169128418, "learning_rate": 0.001, "loss": 2.9174, "step": 32600 }, { "epoch": 4.227537168713639, "grad_norm": 1.2080880403518677, "learning_rate": 0.001, "loss": 2.9349, "step": 32700 }, { "epoch": 4.240465416936005, "grad_norm": 0.821864902973175, "learning_rate": 0.001, "loss": 2.8988, "step": 32800 }, { "epoch": 4.253393665158371, "grad_norm": 0.9216119647026062, "learning_rate": 0.001, "loss": 2.9211, "step": 32900 }, { "epoch": 4.266321913380737, "grad_norm": 1.2613247632980347, "learning_rate": 0.001, "loss": 2.9086, "step": 33000 }, { "epoch": 4.279250161603103, "grad_norm": 1.0701028108596802, "learning_rate": 0.001, "loss": 2.9199, "step": 33100 }, { "epoch": 4.2921784098254685, "grad_norm": 2.86519193649292, "learning_rate": 0.001, "loss": 2.9157, "step": 33200 }, { "epoch": 4.305106658047834, "grad_norm": 0.8622322082519531, "learning_rate": 0.001, "loss": 2.9251, "step": 33300 }, { "epoch": 4.3180349062702, "grad_norm": 1.152278184890747, "learning_rate": 0.001, "loss": 2.9174, "step": 33400 }, { "epoch": 4.330963154492566, "grad_norm": 4.246500492095947, "learning_rate": 0.001, "loss": 2.9185, "step": 33500 }, { "epoch": 4.343891402714932, "grad_norm": 1.0751771926879883, "learning_rate": 0.001, "loss": 2.9189, "step": 33600 }, { "epoch": 4.356819650937298, "grad_norm": 1.1110507249832153, "learning_rate": 0.001, "loss": 2.9153, "step": 33700 }, { "epoch": 4.369747899159664, "grad_norm": 1.3820734024047852, "learning_rate": 0.001, "loss": 2.9075, "step": 33800 }, { "epoch": 4.38267614738203, "grad_norm": 1.3190172910690308, "learning_rate": 0.001, "loss": 2.9291, "step": 33900 }, { "epoch": 4.395604395604396, "grad_norm": 1.1762887239456177, "learning_rate": 0.001, "loss": 2.9053, "step": 34000 }, { "epoch": 4.408532643826762, "grad_norm": 1.0224344730377197, "learning_rate": 0.001, "loss": 2.9335, "step": 34100 }, { "epoch": 4.421460892049128, "grad_norm": 1.0334924459457397, "learning_rate": 0.001, "loss": 2.917, "step": 34200 }, { "epoch": 4.4343891402714934, "grad_norm": 0.866147518157959, "learning_rate": 0.001, "loss": 2.9143, "step": 34300 }, { "epoch": 4.447317388493859, "grad_norm": 0.9848929643630981, "learning_rate": 0.001, "loss": 2.9249, "step": 34400 }, { "epoch": 4.460245636716225, "grad_norm": 1.1825933456420898, "learning_rate": 0.001, "loss": 2.9145, "step": 34500 }, { "epoch": 4.473173884938591, "grad_norm": 0.8044064044952393, "learning_rate": 0.001, "loss": 2.9282, "step": 34600 }, { "epoch": 4.486102133160957, "grad_norm": 1.2905904054641724, "learning_rate": 0.001, "loss": 2.9291, "step": 34700 }, { "epoch": 4.499030381383323, "grad_norm": 1.0896506309509277, "learning_rate": 0.001, "loss": 2.9378, "step": 34800 }, { "epoch": 4.511958629605688, "grad_norm": 1.1009185314178467, "learning_rate": 0.001, "loss": 2.9106, "step": 34900 }, { "epoch": 4.524886877828054, "grad_norm": 0.7579659819602966, "learning_rate": 0.001, "loss": 2.9309, "step": 35000 }, { "epoch": 4.53781512605042, "grad_norm": 1.4976803064346313, "learning_rate": 0.001, "loss": 2.9275, "step": 35100 }, { "epoch": 4.550743374272786, "grad_norm": 1.5140354633331299, "learning_rate": 0.001, "loss": 2.9266, "step": 35200 }, { "epoch": 4.563671622495152, "grad_norm": 0.7666584253311157, "learning_rate": 0.001, "loss": 2.9409, "step": 35300 }, { "epoch": 4.5765998707175175, "grad_norm": 1.2247462272644043, "learning_rate": 0.001, "loss": 2.9151, "step": 35400 }, { "epoch": 4.589528118939883, "grad_norm": 1.3318167924880981, "learning_rate": 0.001, "loss": 2.9415, "step": 35500 }, { "epoch": 4.602456367162249, "grad_norm": 1.2134454250335693, "learning_rate": 0.001, "loss": 2.9331, "step": 35600 }, { "epoch": 4.615384615384615, "grad_norm": 1.1034247875213623, "learning_rate": 0.001, "loss": 2.9261, "step": 35700 }, { "epoch": 4.628312863606981, "grad_norm": 1.0959861278533936, "learning_rate": 0.001, "loss": 2.9135, "step": 35800 }, { "epoch": 4.6412411118293475, "grad_norm": 4.098438739776611, "learning_rate": 0.001, "loss": 2.93, "step": 35900 }, { "epoch": 4.654169360051713, "grad_norm": 1.4309004545211792, "learning_rate": 0.001, "loss": 2.9049, "step": 36000 }, { "epoch": 4.667097608274079, "grad_norm": 1.2049057483673096, "learning_rate": 0.001, "loss": 2.9089, "step": 36100 }, { "epoch": 4.680025856496445, "grad_norm": 0.8401902914047241, "learning_rate": 0.001, "loss": 2.9244, "step": 36200 }, { "epoch": 4.692954104718811, "grad_norm": 1.1822596788406372, "learning_rate": 0.001, "loss": 2.9015, "step": 36300 }, { "epoch": 4.705882352941177, "grad_norm": 1.2013134956359863, "learning_rate": 0.001, "loss": 2.9256, "step": 36400 }, { "epoch": 4.7188106011635425, "grad_norm": 0.8610106706619263, "learning_rate": 0.001, "loss": 2.927, "step": 36500 }, { "epoch": 4.731738849385908, "grad_norm": 1.057443618774414, "learning_rate": 0.001, "loss": 2.9247, "step": 36600 }, { "epoch": 4.744667097608274, "grad_norm": 0.664431095123291, "learning_rate": 0.001, "loss": 2.9321, "step": 36700 }, { "epoch": 4.75759534583064, "grad_norm": 0.9363741874694824, "learning_rate": 0.001, "loss": 2.8941, "step": 36800 }, { "epoch": 4.770523594053006, "grad_norm": 5.362719535827637, "learning_rate": 0.001, "loss": 2.9134, "step": 36900 }, { "epoch": 4.783451842275372, "grad_norm": 1.2769626379013062, "learning_rate": 0.001, "loss": 2.9287, "step": 37000 }, { "epoch": 4.796380090497737, "grad_norm": 2.802379608154297, "learning_rate": 0.001, "loss": 2.9213, "step": 37100 }, { "epoch": 4.809308338720103, "grad_norm": 1.1858912706375122, "learning_rate": 0.001, "loss": 2.9255, "step": 37200 }, { "epoch": 4.822236586942469, "grad_norm": 1.639768362045288, "learning_rate": 0.001, "loss": 2.9474, "step": 37300 }, { "epoch": 4.835164835164835, "grad_norm": 1.2080357074737549, "learning_rate": 0.001, "loss": 2.9254, "step": 37400 }, { "epoch": 4.848093083387201, "grad_norm": 1.0133618116378784, "learning_rate": 0.001, "loss": 2.9005, "step": 37500 }, { "epoch": 4.861021331609567, "grad_norm": 0.9962373375892639, "learning_rate": 0.001, "loss": 2.9253, "step": 37600 }, { "epoch": 4.873949579831933, "grad_norm": 1.0925848484039307, "learning_rate": 0.001, "loss": 2.9353, "step": 37700 }, { "epoch": 4.886877828054299, "grad_norm": 1.2475625276565552, "learning_rate": 0.001, "loss": 2.9369, "step": 37800 }, { "epoch": 4.899806076276665, "grad_norm": 1.6497710943222046, "learning_rate": 0.001, "loss": 2.939, "step": 37900 }, { "epoch": 4.912734324499031, "grad_norm": 0.9475740790367126, "learning_rate": 0.001, "loss": 2.9211, "step": 38000 }, { "epoch": 4.9256625727213965, "grad_norm": 1.6969155073165894, "learning_rate": 0.001, "loss": 2.9273, "step": 38100 }, { "epoch": 4.938590820943762, "grad_norm": 1.1423985958099365, "learning_rate": 0.001, "loss": 2.9342, "step": 38200 }, { "epoch": 4.951519069166128, "grad_norm": 1.347312331199646, "learning_rate": 0.001, "loss": 2.9334, "step": 38300 }, { "epoch": 4.964447317388494, "grad_norm": 1.3161590099334717, "learning_rate": 0.001, "loss": 2.9208, "step": 38400 }, { "epoch": 4.97737556561086, "grad_norm": 0.9403855800628662, "learning_rate": 0.001, "loss": 2.9234, "step": 38500 }, { "epoch": 4.990303813833226, "grad_norm": 4.280357837677002, "learning_rate": 0.001, "loss": 2.9411, "step": 38600 }, { "epoch": 5.0032320620555915, "grad_norm": 0.8901792764663696, "learning_rate": 0.001, "loss": 2.9219, "step": 38700 }, { "epoch": 5.016160310277957, "grad_norm": 0.8578092455863953, "learning_rate": 0.001, "loss": 2.8514, "step": 38800 }, { "epoch": 5.029088558500323, "grad_norm": 0.890854001045227, "learning_rate": 0.001, "loss": 2.8442, "step": 38900 }, { "epoch": 5.042016806722689, "grad_norm": 0.9127228856086731, "learning_rate": 0.001, "loss": 2.8337, "step": 39000 }, { "epoch": 5.054945054945055, "grad_norm": 1.0185246467590332, "learning_rate": 0.001, "loss": 2.8605, "step": 39100 }, { "epoch": 5.067873303167421, "grad_norm": 0.9470261335372925, "learning_rate": 0.001, "loss": 2.8532, "step": 39200 }, { "epoch": 5.080801551389786, "grad_norm": 1.2356655597686768, "learning_rate": 0.001, "loss": 2.8249, "step": 39300 }, { "epoch": 5.093729799612152, "grad_norm": 0.8807908296585083, "learning_rate": 0.001, "loss": 2.8359, "step": 39400 }, { "epoch": 5.106658047834518, "grad_norm": 0.9328768253326416, "learning_rate": 0.001, "loss": 2.8482, "step": 39500 }, { "epoch": 5.119586296056884, "grad_norm": 0.8954903483390808, "learning_rate": 0.001, "loss": 2.8659, "step": 39600 }, { "epoch": 5.132514544279251, "grad_norm": 1.1177982091903687, "learning_rate": 0.001, "loss": 2.8307, "step": 39700 }, { "epoch": 5.145442792501616, "grad_norm": 1.2628185749053955, "learning_rate": 0.001, "loss": 2.8474, "step": 39800 }, { "epoch": 5.158371040723982, "grad_norm": 1.638264775276184, "learning_rate": 0.001, "loss": 2.8583, "step": 39900 }, { "epoch": 5.171299288946348, "grad_norm": 1.122513771057129, "learning_rate": 0.001, "loss": 2.8329, "step": 40000 }, { "epoch": 5.184227537168714, "grad_norm": 0.8399069905281067, "learning_rate": 0.001, "loss": 2.8546, "step": 40100 }, { "epoch": 5.19715578539108, "grad_norm": 0.9421060085296631, "learning_rate": 0.001, "loss": 2.853, "step": 40200 }, { "epoch": 5.2100840336134455, "grad_norm": 1.2536128759384155, "learning_rate": 0.001, "loss": 2.8689, "step": 40300 }, { "epoch": 5.223012281835811, "grad_norm": 2.2597620487213135, "learning_rate": 0.001, "loss": 2.8317, "step": 40400 }, { "epoch": 5.235940530058177, "grad_norm": 0.9830044507980347, "learning_rate": 0.001, "loss": 2.86, "step": 40500 }, { "epoch": 5.248868778280543, "grad_norm": 0.9415826201438904, "learning_rate": 0.001, "loss": 2.8605, "step": 40600 }, { "epoch": 5.261797026502909, "grad_norm": 1.1565967798233032, "learning_rate": 0.001, "loss": 2.8645, "step": 40700 }, { "epoch": 5.274725274725275, "grad_norm": 1.4251514673233032, "learning_rate": 0.001, "loss": 2.8741, "step": 40800 }, { "epoch": 5.2876535229476405, "grad_norm": 0.8589007258415222, "learning_rate": 0.001, "loss": 2.8661, "step": 40900 }, { "epoch": 5.300581771170006, "grad_norm": 0.9929611682891846, "learning_rate": 0.001, "loss": 2.8914, "step": 41000 }, { "epoch": 5.313510019392372, "grad_norm": 1.2020773887634277, "learning_rate": 0.001, "loss": 2.8592, "step": 41100 }, { "epoch": 5.326438267614738, "grad_norm": 0.8927658796310425, "learning_rate": 0.001, "loss": 2.8669, "step": 41200 }, { "epoch": 5.339366515837104, "grad_norm": 1.511470079421997, "learning_rate": 0.001, "loss": 2.8594, "step": 41300 }, { "epoch": 5.35229476405947, "grad_norm": 0.7040639519691467, "learning_rate": 0.001, "loss": 2.8721, "step": 41400 }, { "epoch": 5.365223012281835, "grad_norm": 0.9226789474487305, "learning_rate": 0.001, "loss": 2.8721, "step": 41500 }, { "epoch": 5.378151260504202, "grad_norm": 1.0757025480270386, "learning_rate": 0.001, "loss": 2.8605, "step": 41600 }, { "epoch": 5.391079508726568, "grad_norm": 0.8448374271392822, "learning_rate": 0.001, "loss": 2.8651, "step": 41700 }, { "epoch": 5.404007756948934, "grad_norm": 0.9977715015411377, "learning_rate": 0.001, "loss": 2.8697, "step": 41800 }, { "epoch": 5.4169360051713, "grad_norm": 1.1560465097427368, "learning_rate": 0.001, "loss": 2.8797, "step": 41900 }, { "epoch": 5.429864253393665, "grad_norm": 1.18364417552948, "learning_rate": 0.001, "loss": 2.8598, "step": 42000 }, { "epoch": 5.442792501616031, "grad_norm": 0.9866510033607483, "learning_rate": 0.001, "loss": 2.8628, "step": 42100 }, { "epoch": 5.455720749838397, "grad_norm": 1.066038727760315, "learning_rate": 0.001, "loss": 2.8348, "step": 42200 }, { "epoch": 5.468648998060763, "grad_norm": 1.0029083490371704, "learning_rate": 0.001, "loss": 2.8946, "step": 42300 }, { "epoch": 5.481577246283129, "grad_norm": 0.7679119110107422, "learning_rate": 0.001, "loss": 2.8709, "step": 42400 }, { "epoch": 5.4945054945054945, "grad_norm": 1.344748854637146, "learning_rate": 0.001, "loss": 2.8838, "step": 42500 }, { "epoch": 5.50743374272786, "grad_norm": 1.1062631607055664, "learning_rate": 0.001, "loss": 2.8728, "step": 42600 }, { "epoch": 5.520361990950226, "grad_norm": 0.8063384890556335, "learning_rate": 0.001, "loss": 2.8693, "step": 42700 }, { "epoch": 5.533290239172592, "grad_norm": 1.1191445589065552, "learning_rate": 0.001, "loss": 2.8693, "step": 42800 }, { "epoch": 5.546218487394958, "grad_norm": 0.8379215598106384, "learning_rate": 0.001, "loss": 2.88, "step": 42900 }, { "epoch": 5.559146735617324, "grad_norm": 0.9084675908088684, "learning_rate": 0.001, "loss": 2.8766, "step": 43000 }, { "epoch": 5.5720749838396895, "grad_norm": 1.546298861503601, "learning_rate": 0.001, "loss": 2.8995, "step": 43100 }, { "epoch": 5.585003232062055, "grad_norm": 1.221314549446106, "learning_rate": 0.001, "loss": 2.8932, "step": 43200 }, { "epoch": 5.597931480284421, "grad_norm": 0.7827103734016418, "learning_rate": 0.001, "loss": 2.9011, "step": 43300 }, { "epoch": 5.610859728506787, "grad_norm": 1.2011886835098267, "learning_rate": 0.001, "loss": 2.8847, "step": 43400 }, { "epoch": 5.623787976729153, "grad_norm": 0.950648307800293, "learning_rate": 0.001, "loss": 2.8745, "step": 43500 }, { "epoch": 5.636716224951519, "grad_norm": 1.4898109436035156, "learning_rate": 0.001, "loss": 2.8721, "step": 43600 }, { "epoch": 5.649644473173885, "grad_norm": 0.7440897822380066, "learning_rate": 0.001, "loss": 2.8916, "step": 43700 }, { "epoch": 5.662572721396251, "grad_norm": 0.8547734022140503, "learning_rate": 0.001, "loss": 2.8879, "step": 43800 }, { "epoch": 5.675500969618617, "grad_norm": 0.8578941822052002, "learning_rate": 0.001, "loss": 2.8757, "step": 43900 }, { "epoch": 5.688429217840983, "grad_norm": 1.0275822877883911, "learning_rate": 0.001, "loss": 2.8834, "step": 44000 }, { "epoch": 5.701357466063349, "grad_norm": 6.591407775878906, "learning_rate": 0.001, "loss": 2.8769, "step": 44100 }, { "epoch": 5.714285714285714, "grad_norm": 0.9956234097480774, "learning_rate": 0.001, "loss": 2.8672, "step": 44200 }, { "epoch": 5.72721396250808, "grad_norm": 1.73713219165802, "learning_rate": 0.001, "loss": 2.8553, "step": 44300 }, { "epoch": 5.740142210730446, "grad_norm": 1.9002691507339478, "learning_rate": 0.001, "loss": 2.8702, "step": 44400 }, { "epoch": 5.753070458952812, "grad_norm": 1.3217421770095825, "learning_rate": 0.001, "loss": 2.8713, "step": 44500 }, { "epoch": 5.765998707175178, "grad_norm": 0.857168436050415, "learning_rate": 0.001, "loss": 2.8751, "step": 44600 }, { "epoch": 5.7789269553975435, "grad_norm": 0.9854875802993774, "learning_rate": 0.001, "loss": 2.8776, "step": 44700 }, { "epoch": 5.791855203619909, "grad_norm": 0.7514907717704773, "learning_rate": 0.001, "loss": 2.875, "step": 44800 }, { "epoch": 5.804783451842275, "grad_norm": 0.9921727776527405, "learning_rate": 0.001, "loss": 2.8723, "step": 44900 }, { "epoch": 5.817711700064641, "grad_norm": 0.85745769739151, "learning_rate": 0.001, "loss": 2.8785, "step": 45000 }, { "epoch": 5.830639948287007, "grad_norm": 0.8073659539222717, "learning_rate": 0.001, "loss": 2.8658, "step": 45100 }, { "epoch": 5.843568196509373, "grad_norm": 1.2673579454421997, "learning_rate": 0.001, "loss": 2.8809, "step": 45200 }, { "epoch": 5.8564964447317385, "grad_norm": 0.8654096126556396, "learning_rate": 0.001, "loss": 2.8756, "step": 45300 }, { "epoch": 5.869424692954105, "grad_norm": 1.30247163772583, "learning_rate": 0.001, "loss": 2.8765, "step": 45400 }, { "epoch": 5.882352941176471, "grad_norm": 0.7884098887443542, "learning_rate": 0.001, "loss": 2.8922, "step": 45500 }, { "epoch": 5.895281189398837, "grad_norm": 1.059687852859497, "learning_rate": 0.001, "loss": 2.895, "step": 45600 }, { "epoch": 5.908209437621203, "grad_norm": 0.8863629102706909, "learning_rate": 0.001, "loss": 2.8669, "step": 45700 }, { "epoch": 5.9211376858435685, "grad_norm": 0.9793347120285034, "learning_rate": 0.001, "loss": 2.8696, "step": 45800 }, { "epoch": 5.934065934065934, "grad_norm": 0.9157764911651611, "learning_rate": 0.001, "loss": 2.9002, "step": 45900 }, { "epoch": 5.9469941822883, "grad_norm": 0.6185294985771179, "learning_rate": 0.001, "loss": 2.8965, "step": 46000 }, { "epoch": 5.959922430510666, "grad_norm": 1.054610013961792, "learning_rate": 0.001, "loss": 2.884, "step": 46100 }, { "epoch": 5.972850678733032, "grad_norm": 0.880906343460083, "learning_rate": 0.001, "loss": 2.8944, "step": 46200 }, { "epoch": 5.985778926955398, "grad_norm": 0.9664792418479919, "learning_rate": 0.001, "loss": 2.8872, "step": 46300 }, { "epoch": 5.998707175177763, "grad_norm": 1.5413181781768799, "learning_rate": 0.001, "loss": 2.8842, "step": 46400 }, { "epoch": 6.011635423400129, "grad_norm": 0.9651244282722473, "learning_rate": 0.001, "loss": 2.7809, "step": 46500 }, { "epoch": 6.024563671622495, "grad_norm": 1.1572868824005127, "learning_rate": 0.001, "loss": 2.785, "step": 46600 }, { "epoch": 6.037491919844861, "grad_norm": 0.8506984710693359, "learning_rate": 0.001, "loss": 2.8036, "step": 46700 }, { "epoch": 6.050420168067227, "grad_norm": 1.3112835884094238, "learning_rate": 0.001, "loss": 2.8016, "step": 46800 }, { "epoch": 6.0633484162895925, "grad_norm": 0.9240913391113281, "learning_rate": 0.001, "loss": 2.8225, "step": 46900 }, { "epoch": 6.076276664511958, "grad_norm": 1.1086339950561523, "learning_rate": 0.001, "loss": 2.8163, "step": 47000 }, { "epoch": 6.089204912734324, "grad_norm": 1.3795610666275024, "learning_rate": 0.001, "loss": 2.8038, "step": 47100 }, { "epoch": 6.10213316095669, "grad_norm": 0.987629234790802, "learning_rate": 0.001, "loss": 2.8131, "step": 47200 }, { "epoch": 6.115061409179056, "grad_norm": 1.0993800163269043, "learning_rate": 0.001, "loss": 2.7886, "step": 47300 }, { "epoch": 6.127989657401422, "grad_norm": 0.9193052649497986, "learning_rate": 0.001, "loss": 2.8028, "step": 47400 }, { "epoch": 6.140917905623788, "grad_norm": 1.7102909088134766, "learning_rate": 0.001, "loss": 2.8233, "step": 47500 }, { "epoch": 6.153846153846154, "grad_norm": 1.3379311561584473, "learning_rate": 0.001, "loss": 2.8152, "step": 47600 }, { "epoch": 6.16677440206852, "grad_norm": 1.3249787092208862, "learning_rate": 0.001, "loss": 2.7948, "step": 47700 }, { "epoch": 6.179702650290886, "grad_norm": 1.0834747552871704, "learning_rate": 0.001, "loss": 2.8018, "step": 47800 }, { "epoch": 6.192630898513252, "grad_norm": 1.1233692169189453, "learning_rate": 0.001, "loss": 2.7919, "step": 47900 }, { "epoch": 6.2055591467356175, "grad_norm": 1.0301451683044434, "learning_rate": 0.001, "loss": 2.8147, "step": 48000 }, { "epoch": 6.218487394957983, "grad_norm": 1.1919742822647095, "learning_rate": 0.001, "loss": 2.8301, "step": 48100 }, { "epoch": 6.231415643180349, "grad_norm": 1.1102752685546875, "learning_rate": 0.001, "loss": 2.8174, "step": 48200 }, { "epoch": 6.244343891402715, "grad_norm": 0.9158645272254944, "learning_rate": 0.001, "loss": 2.8191, "step": 48300 }, { "epoch": 6.257272139625081, "grad_norm": 0.95766282081604, "learning_rate": 0.001, "loss": 2.8329, "step": 48400 }, { "epoch": 6.270200387847447, "grad_norm": 1.025870680809021, "learning_rate": 0.001, "loss": 2.8098, "step": 48500 }, { "epoch": 6.283128636069812, "grad_norm": 4.698361873626709, "learning_rate": 0.001, "loss": 2.8159, "step": 48600 }, { "epoch": 6.296056884292178, "grad_norm": 0.9917893409729004, "learning_rate": 0.001, "loss": 2.8179, "step": 48700 }, { "epoch": 6.308985132514544, "grad_norm": 0.8799039721488953, "learning_rate": 0.001, "loss": 2.8288, "step": 48800 }, { "epoch": 6.32191338073691, "grad_norm": 0.9522714018821716, "learning_rate": 0.001, "loss": 2.8016, "step": 48900 }, { "epoch": 6.334841628959276, "grad_norm": 1.1591792106628418, "learning_rate": 0.001, "loss": 2.8207, "step": 49000 }, { "epoch": 6.3477698771816415, "grad_norm": 1.1305049657821655, "learning_rate": 0.001, "loss": 2.8203, "step": 49100 }, { "epoch": 6.360698125404007, "grad_norm": 0.9877011179924011, "learning_rate": 0.001, "loss": 2.8061, "step": 49200 }, { "epoch": 6.373626373626374, "grad_norm": 1.4320781230926514, "learning_rate": 0.001, "loss": 2.8114, "step": 49300 }, { "epoch": 6.38655462184874, "grad_norm": 0.7362180948257446, "learning_rate": 0.001, "loss": 2.8358, "step": 49400 }, { "epoch": 6.399482870071106, "grad_norm": 2.388739824295044, "learning_rate": 0.001, "loss": 2.8149, "step": 49500 }, { "epoch": 6.4124111182934715, "grad_norm": 0.9117136001586914, "learning_rate": 0.001, "loss": 2.8133, "step": 49600 }, { "epoch": 6.425339366515837, "grad_norm": 1.093301773071289, "learning_rate": 0.001, "loss": 2.817, "step": 49700 }, { "epoch": 6.438267614738203, "grad_norm": 1.008831262588501, "learning_rate": 0.001, "loss": 2.8436, "step": 49800 }, { "epoch": 6.451195862960569, "grad_norm": 1.9667439460754395, "learning_rate": 0.001, "loss": 2.8204, "step": 49900 }, { "epoch": 6.464124111182935, "grad_norm": 0.9083557724952698, "learning_rate": 0.001, "loss": 2.8278, "step": 50000 }, { "epoch": 6.477052359405301, "grad_norm": 1.045493245124817, "learning_rate": 0.001, "loss": 2.8194, "step": 50100 }, { "epoch": 6.4899806076276665, "grad_norm": 1.2469412088394165, "learning_rate": 0.001, "loss": 2.813, "step": 50200 }, { "epoch": 6.502908855850032, "grad_norm": 0.9621401429176331, "learning_rate": 0.001, "loss": 2.8148, "step": 50300 }, { "epoch": 6.515837104072398, "grad_norm": 1.8395905494689941, "learning_rate": 0.001, "loss": 2.8244, "step": 50400 }, { "epoch": 6.528765352294764, "grad_norm": 1.409899353981018, "learning_rate": 0.001, "loss": 2.8143, "step": 50500 }, { "epoch": 6.54169360051713, "grad_norm": 1.0134375095367432, "learning_rate": 0.001, "loss": 2.8362, "step": 50600 }, { "epoch": 6.554621848739496, "grad_norm": 1.0192620754241943, "learning_rate": 0.001, "loss": 2.8086, "step": 50700 }, { "epoch": 6.567550096961861, "grad_norm": 1.9895530939102173, "learning_rate": 0.001, "loss": 2.8352, "step": 50800 }, { "epoch": 6.580478345184227, "grad_norm": 1.1625665426254272, "learning_rate": 0.001, "loss": 2.8322, "step": 50900 }, { "epoch": 6.593406593406593, "grad_norm": 1.190502405166626, "learning_rate": 0.001, "loss": 2.8214, "step": 51000 }, { "epoch": 6.606334841628959, "grad_norm": 1.2797126770019531, "learning_rate": 0.001, "loss": 2.8354, "step": 51100 }, { "epoch": 6.619263089851325, "grad_norm": 1.3066164255142212, "learning_rate": 0.001, "loss": 2.85, "step": 51200 }, { "epoch": 6.6321913380736905, "grad_norm": 1.3139715194702148, "learning_rate": 0.001, "loss": 2.8478, "step": 51300 }, { "epoch": 6.645119586296057, "grad_norm": 1.3180826902389526, "learning_rate": 0.001, "loss": 2.84, "step": 51400 }, { "epoch": 6.658047834518423, "grad_norm": 1.003361701965332, "learning_rate": 0.001, "loss": 2.8127, "step": 51500 }, { "epoch": 6.670976082740789, "grad_norm": 0.9198578596115112, "learning_rate": 0.001, "loss": 2.838, "step": 51600 }, { "epoch": 6.683904330963155, "grad_norm": 1.1694542169570923, "learning_rate": 0.001, "loss": 2.8127, "step": 51700 }, { "epoch": 6.6968325791855206, "grad_norm": 12.815674781799316, "learning_rate": 0.001, "loss": 2.82, "step": 51800 }, { "epoch": 6.709760827407886, "grad_norm": 0.8722559213638306, "learning_rate": 0.001, "loss": 2.8338, "step": 51900 }, { "epoch": 6.722689075630252, "grad_norm": 1.080980658531189, "learning_rate": 0.001, "loss": 2.8229, "step": 52000 }, { "epoch": 6.735617323852618, "grad_norm": 1.9150267839431763, "learning_rate": 0.001, "loss": 2.8408, "step": 52100 }, { "epoch": 6.748545572074984, "grad_norm": 1.6330978870391846, "learning_rate": 0.001, "loss": 2.8335, "step": 52200 }, { "epoch": 6.76147382029735, "grad_norm": 1.1254642009735107, "learning_rate": 0.001, "loss": 2.8273, "step": 52300 }, { "epoch": 6.7744020685197155, "grad_norm": 1.4073829650878906, "learning_rate": 0.001, "loss": 2.8389, "step": 52400 }, { "epoch": 6.787330316742081, "grad_norm": 0.793594479560852, "learning_rate": 0.001, "loss": 2.8153, "step": 52500 }, { "epoch": 6.800258564964447, "grad_norm": 2.0264644622802734, "learning_rate": 0.001, "loss": 2.8343, "step": 52600 }, { "epoch": 6.813186813186813, "grad_norm": 1.7524126768112183, "learning_rate": 0.001, "loss": 2.8734, "step": 52700 }, { "epoch": 6.826115061409179, "grad_norm": 1.1634052991867065, "learning_rate": 0.001, "loss": 2.8308, "step": 52800 }, { "epoch": 6.839043309631545, "grad_norm": 1.0121957063674927, "learning_rate": 0.001, "loss": 2.8261, "step": 52900 }, { "epoch": 6.85197155785391, "grad_norm": 1.0075827836990356, "learning_rate": 0.001, "loss": 2.8098, "step": 53000 }, { "epoch": 6.864899806076277, "grad_norm": 1.3358700275421143, "learning_rate": 0.001, "loss": 2.8268, "step": 53100 }, { "epoch": 6.877828054298643, "grad_norm": 1.2529650926589966, "learning_rate": 0.001, "loss": 2.8323, "step": 53200 }, { "epoch": 6.890756302521009, "grad_norm": 0.9423221349716187, "learning_rate": 0.001, "loss": 2.8353, "step": 53300 }, { "epoch": 6.903684550743375, "grad_norm": 0.9099181890487671, "learning_rate": 0.001, "loss": 2.8253, "step": 53400 }, { "epoch": 6.91661279896574, "grad_norm": 1.0064482688903809, "learning_rate": 0.001, "loss": 2.8509, "step": 53500 }, { "epoch": 6.929541047188106, "grad_norm": 0.8802391290664673, "learning_rate": 0.001, "loss": 2.841, "step": 53600 }, { "epoch": 6.942469295410472, "grad_norm": 0.9910616874694824, "learning_rate": 0.001, "loss": 2.8254, "step": 53700 }, { "epoch": 6.955397543632838, "grad_norm": 0.7056309580802917, "learning_rate": 0.001, "loss": 2.8295, "step": 53800 }, { "epoch": 6.968325791855204, "grad_norm": 0.826834499835968, "learning_rate": 0.001, "loss": 2.8367, "step": 53900 }, { "epoch": 6.98125404007757, "grad_norm": 1.306159496307373, "learning_rate": 0.001, "loss": 2.8371, "step": 54000 }, { "epoch": 6.994182288299935, "grad_norm": 1.1751346588134766, "learning_rate": 0.001, "loss": 2.8431, "step": 54100 }, { "epoch": 7.007110536522301, "grad_norm": 1.4894050359725952, "learning_rate": 0.001, "loss": 2.7614, "step": 54200 }, { "epoch": 7.020038784744667, "grad_norm": 1.9095325469970703, "learning_rate": 0.001, "loss": 2.755, "step": 54300 }, { "epoch": 7.032967032967033, "grad_norm": 1.1786752939224243, "learning_rate": 0.001, "loss": 2.7357, "step": 54400 }, { "epoch": 7.045895281189399, "grad_norm": 1.087520956993103, "learning_rate": 0.001, "loss": 2.7476, "step": 54500 }, { "epoch": 7.0588235294117645, "grad_norm": 1.0659937858581543, "learning_rate": 0.001, "loss": 2.7546, "step": 54600 }, { "epoch": 7.07175177763413, "grad_norm": 1.69095778465271, "learning_rate": 0.001, "loss": 2.7694, "step": 54700 }, { "epoch": 7.084680025856496, "grad_norm": 0.9546342492103577, "learning_rate": 0.001, "loss": 2.7589, "step": 54800 }, { "epoch": 7.097608274078862, "grad_norm": 1.2522302865982056, "learning_rate": 0.001, "loss": 2.755, "step": 54900 }, { "epoch": 7.110536522301228, "grad_norm": 1.0458678007125854, "learning_rate": 0.001, "loss": 2.7624, "step": 55000 }, { "epoch": 7.123464770523594, "grad_norm": 1.5993131399154663, "learning_rate": 0.001, "loss": 2.7533, "step": 55100 }, { "epoch": 7.13639301874596, "grad_norm": 2.1327452659606934, "learning_rate": 0.001, "loss": 2.7684, "step": 55200 }, { "epoch": 7.149321266968326, "grad_norm": 1.2879582643508911, "learning_rate": 0.001, "loss": 2.7841, "step": 55300 }, { "epoch": 7.162249515190692, "grad_norm": 5.810297012329102, "learning_rate": 0.001, "loss": 2.7632, "step": 55400 }, { "epoch": 7.175177763413058, "grad_norm": 2.381146192550659, "learning_rate": 0.001, "loss": 2.7724, "step": 55500 }, { "epoch": 7.188106011635424, "grad_norm": 2.6122331619262695, "learning_rate": 0.001, "loss": 2.7602, "step": 55600 }, { "epoch": 7.201034259857789, "grad_norm": 0.9421831369400024, "learning_rate": 0.001, "loss": 2.7642, "step": 55700 }, { "epoch": 7.213962508080155, "grad_norm": 1.4747930765151978, "learning_rate": 0.001, "loss": 2.7747, "step": 55800 }, { "epoch": 7.226890756302521, "grad_norm": 1.5691676139831543, "learning_rate": 0.001, "loss": 2.7633, "step": 55900 }, { "epoch": 7.239819004524887, "grad_norm": 2.3205883502960205, "learning_rate": 0.001, "loss": 2.777, "step": 56000 }, { "epoch": 7.252747252747253, "grad_norm": 0.9136083126068115, "learning_rate": 0.001, "loss": 2.7753, "step": 56100 }, { "epoch": 7.265675500969619, "grad_norm": 1.204375147819519, "learning_rate": 0.001, "loss": 2.7829, "step": 56200 }, { "epoch": 7.278603749191984, "grad_norm": 1.3491688966751099, "learning_rate": 0.001, "loss": 2.7762, "step": 56300 }, { "epoch": 7.29153199741435, "grad_norm": 1.3704417943954468, "learning_rate": 0.001, "loss": 2.7832, "step": 56400 }, { "epoch": 7.304460245636716, "grad_norm": 1.2007696628570557, "learning_rate": 0.001, "loss": 2.7931, "step": 56500 }, { "epoch": 7.317388493859082, "grad_norm": 1.2190853357315063, "learning_rate": 0.001, "loss": 2.7813, "step": 56600 }, { "epoch": 7.330316742081448, "grad_norm": 1.068428874015808, "learning_rate": 0.001, "loss": 2.7837, "step": 56700 }, { "epoch": 7.3432449903038135, "grad_norm": 1.1655817031860352, "learning_rate": 0.001, "loss": 2.7797, "step": 56800 }, { "epoch": 7.356173238526179, "grad_norm": 0.9523726105690002, "learning_rate": 0.001, "loss": 2.7733, "step": 56900 }, { "epoch": 7.369101486748546, "grad_norm": 1.1044821739196777, "learning_rate": 0.001, "loss": 2.7892, "step": 57000 }, { "epoch": 7.382029734970912, "grad_norm": 1.441642165184021, "learning_rate": 0.001, "loss": 2.7792, "step": 57100 }, { "epoch": 7.394957983193278, "grad_norm": 1.5160586833953857, "learning_rate": 0.001, "loss": 2.7753, "step": 57200 }, { "epoch": 7.4078862314156435, "grad_norm": 1.0723419189453125, "learning_rate": 0.001, "loss": 2.7774, "step": 57300 }, { "epoch": 7.420814479638009, "grad_norm": 1.1391361951828003, "learning_rate": 0.001, "loss": 2.7862, "step": 57400 }, { "epoch": 7.433742727860375, "grad_norm": 0.9852800369262695, "learning_rate": 0.001, "loss": 2.7945, "step": 57500 }, { "epoch": 7.446670976082741, "grad_norm": 1.704779028892517, "learning_rate": 0.001, "loss": 2.7723, "step": 57600 }, { "epoch": 7.459599224305107, "grad_norm": 1.1179612874984741, "learning_rate": 0.001, "loss": 2.8032, "step": 57700 }, { "epoch": 7.472527472527473, "grad_norm": 1.0278562307357788, "learning_rate": 0.001, "loss": 2.7692, "step": 57800 }, { "epoch": 7.4854557207498384, "grad_norm": 1.4199299812316895, "learning_rate": 0.001, "loss": 2.7888, "step": 57900 }, { "epoch": 7.498383968972204, "grad_norm": 1.0988259315490723, "learning_rate": 0.001, "loss": 2.7838, "step": 58000 }, { "epoch": 7.51131221719457, "grad_norm": 1.1480588912963867, "learning_rate": 0.001, "loss": 2.7664, "step": 58100 }, { "epoch": 7.524240465416936, "grad_norm": 1.0612937211990356, "learning_rate": 0.001, "loss": 2.7847, "step": 58200 }, { "epoch": 7.537168713639302, "grad_norm": 1.2389616966247559, "learning_rate": 0.001, "loss": 2.7961, "step": 58300 }, { "epoch": 7.550096961861668, "grad_norm": 1.3809268474578857, "learning_rate": 0.001, "loss": 2.7951, "step": 58400 }, { "epoch": 7.563025210084033, "grad_norm": 1.5792521238327026, "learning_rate": 0.001, "loss": 2.7923, "step": 58500 }, { "epoch": 7.575953458306399, "grad_norm": 1.1234872341156006, "learning_rate": 0.001, "loss": 2.7815, "step": 58600 }, { "epoch": 7.588881706528765, "grad_norm": 1.716138482093811, "learning_rate": 0.001, "loss": 2.7825, "step": 58700 }, { "epoch": 7.601809954751131, "grad_norm": 1.1564284563064575, "learning_rate": 0.001, "loss": 2.7995, "step": 58800 }, { "epoch": 7.614738202973497, "grad_norm": 1.2145895957946777, "learning_rate": 0.001, "loss": 2.78, "step": 58900 }, { "epoch": 7.6276664511958625, "grad_norm": 1.1288453340530396, "learning_rate": 0.001, "loss": 2.7929, "step": 59000 }, { "epoch": 7.640594699418228, "grad_norm": 1.9070700407028198, "learning_rate": 0.001, "loss": 2.7997, "step": 59100 }, { "epoch": 7.653522947640595, "grad_norm": 1.2677251100540161, "learning_rate": 0.001, "loss": 2.8047, "step": 59200 }, { "epoch": 7.666451195862961, "grad_norm": 1.0207923650741577, "learning_rate": 0.001, "loss": 2.7802, "step": 59300 }, { "epoch": 7.679379444085327, "grad_norm": 1.5379018783569336, "learning_rate": 0.001, "loss": 2.7822, "step": 59400 }, { "epoch": 7.6923076923076925, "grad_norm": 1.2018486261367798, "learning_rate": 0.001, "loss": 2.7959, "step": 59500 }, { "epoch": 7.705235940530058, "grad_norm": 1.1517373323440552, "learning_rate": 0.001, "loss": 2.7969, "step": 59600 }, { "epoch": 7.718164188752424, "grad_norm": 1.1749873161315918, "learning_rate": 0.001, "loss": 2.7647, "step": 59700 }, { "epoch": 7.73109243697479, "grad_norm": 1.087382435798645, "learning_rate": 0.001, "loss": 2.8163, "step": 59800 }, { "epoch": 7.744020685197156, "grad_norm": 1.1469467878341675, "learning_rate": 0.001, "loss": 2.7911, "step": 59900 }, { "epoch": 7.756948933419522, "grad_norm": 1.4366775751113892, "learning_rate": 0.001, "loss": 2.7808, "step": 60000 }, { "epoch": 7.7698771816418875, "grad_norm": 1.3238182067871094, "learning_rate": 0.001, "loss": 2.7744, "step": 60100 }, { "epoch": 7.782805429864253, "grad_norm": 1.2987924814224243, "learning_rate": 0.001, "loss": 2.7932, "step": 60200 }, { "epoch": 7.795733678086619, "grad_norm": 1.367601752281189, "learning_rate": 0.001, "loss": 2.7946, "step": 60300 }, { "epoch": 7.808661926308985, "grad_norm": 0.8648695945739746, "learning_rate": 0.001, "loss": 2.8021, "step": 60400 }, { "epoch": 7.821590174531351, "grad_norm": 1.401485800743103, "learning_rate": 0.001, "loss": 2.7976, "step": 60500 }, { "epoch": 7.834518422753717, "grad_norm": 0.99416583776474, "learning_rate": 0.001, "loss": 2.7912, "step": 60600 }, { "epoch": 7.847446670976082, "grad_norm": 1.170789361000061, "learning_rate": 0.001, "loss": 2.8089, "step": 60700 }, { "epoch": 7.860374919198449, "grad_norm": 0.8984666466712952, "learning_rate": 0.001, "loss": 2.791, "step": 60800 }, { "epoch": 7.873303167420815, "grad_norm": 1.2284300327301025, "learning_rate": 0.001, "loss": 2.8074, "step": 60900 }, { "epoch": 7.886231415643181, "grad_norm": 1.0558274984359741, "learning_rate": 0.001, "loss": 2.7868, "step": 61000 }, { "epoch": 7.899159663865547, "grad_norm": 1.7060306072235107, "learning_rate": 0.001, "loss": 2.7992, "step": 61100 }, { "epoch": 7.912087912087912, "grad_norm": 0.9145954251289368, "learning_rate": 0.001, "loss": 2.7922, "step": 61200 }, { "epoch": 7.925016160310278, "grad_norm": 1.9090044498443604, "learning_rate": 0.001, "loss": 2.7956, "step": 61300 }, { "epoch": 7.937944408532644, "grad_norm": 1.1686561107635498, "learning_rate": 0.001, "loss": 2.8009, "step": 61400 }, { "epoch": 7.95087265675501, "grad_norm": 1.2453254461288452, "learning_rate": 0.001, "loss": 2.795, "step": 61500 }, { "epoch": 7.963800904977376, "grad_norm": 1.25712251663208, "learning_rate": 0.001, "loss": 2.8019, "step": 61600 }, { "epoch": 7.9767291531997415, "grad_norm": 1.6445039510726929, "learning_rate": 0.001, "loss": 2.8075, "step": 61700 }, { "epoch": 7.989657401422107, "grad_norm": 1.3989014625549316, "learning_rate": 0.001, "loss": 2.7929, "step": 61800 }, { "epoch": 8.002585649644473, "grad_norm": 0.7822006344795227, "learning_rate": 0.001, "loss": 2.7933, "step": 61900 }, { "epoch": 8.015513897866839, "grad_norm": 0.9704698920249939, "learning_rate": 0.001, "loss": 2.712, "step": 62000 }, { "epoch": 8.028442146089205, "grad_norm": 1.0883376598358154, "learning_rate": 0.001, "loss": 2.726, "step": 62100 }, { "epoch": 8.04137039431157, "grad_norm": 1.997149109840393, "learning_rate": 0.001, "loss": 2.704, "step": 62200 }, { "epoch": 8.054298642533936, "grad_norm": 1.2318902015686035, "learning_rate": 0.001, "loss": 2.7325, "step": 62300 }, { "epoch": 8.067226890756302, "grad_norm": 0.9250662922859192, "learning_rate": 0.001, "loss": 2.7039, "step": 62400 }, { "epoch": 8.080155138978668, "grad_norm": 0.9370284676551819, "learning_rate": 0.001, "loss": 2.7, "step": 62500 }, { "epoch": 8.093083387201034, "grad_norm": 0.9868721961975098, "learning_rate": 0.001, "loss": 2.7062, "step": 62600 }, { "epoch": 8.1060116354234, "grad_norm": 1.2153418064117432, "learning_rate": 0.001, "loss": 2.7327, "step": 62700 }, { "epoch": 8.118939883645766, "grad_norm": 0.8939513564109802, "learning_rate": 0.001, "loss": 2.7472, "step": 62800 }, { "epoch": 8.131868131868131, "grad_norm": 1.2539023160934448, "learning_rate": 0.001, "loss": 2.7379, "step": 62900 }, { "epoch": 8.144796380090497, "grad_norm": 1.2995036840438843, "learning_rate": 0.001, "loss": 2.7186, "step": 63000 }, { "epoch": 8.157724628312863, "grad_norm": 1.8944493532180786, "learning_rate": 0.001, "loss": 2.7372, "step": 63100 }, { "epoch": 8.170652876535229, "grad_norm": 0.9368337988853455, "learning_rate": 0.001, "loss": 2.7222, "step": 63200 }, { "epoch": 8.183581124757595, "grad_norm": 1.580886960029602, "learning_rate": 0.001, "loss": 2.7179, "step": 63300 }, { "epoch": 8.19650937297996, "grad_norm": 1.153987169265747, "learning_rate": 0.001, "loss": 2.7125, "step": 63400 }, { "epoch": 8.209437621202326, "grad_norm": 4.039945602416992, "learning_rate": 0.001, "loss": 2.723, "step": 63500 }, { "epoch": 8.222365869424692, "grad_norm": 1.0662940740585327, "learning_rate": 0.001, "loss": 2.7322, "step": 63600 }, { "epoch": 8.235294117647058, "grad_norm": 0.9399656653404236, "learning_rate": 0.001, "loss": 2.7228, "step": 63700 }, { "epoch": 8.248222365869424, "grad_norm": 1.1038421392440796, "learning_rate": 0.001, "loss": 2.7314, "step": 63800 }, { "epoch": 8.261150614091791, "grad_norm": 0.7182426452636719, "learning_rate": 0.001, "loss": 2.7524, "step": 63900 }, { "epoch": 8.274078862314157, "grad_norm": 1.3149150609970093, "learning_rate": 0.001, "loss": 2.7518, "step": 64000 }, { "epoch": 8.287007110536523, "grad_norm": 11.512463569641113, "learning_rate": 0.001, "loss": 2.7136, "step": 64100 }, { "epoch": 8.299935358758889, "grad_norm": 0.9225639700889587, "learning_rate": 0.001, "loss": 2.7438, "step": 64200 }, { "epoch": 8.312863606981255, "grad_norm": 1.1448559761047363, "learning_rate": 0.001, "loss": 2.7613, "step": 64300 }, { "epoch": 8.32579185520362, "grad_norm": 0.9962376356124878, "learning_rate": 0.001, "loss": 2.7538, "step": 64400 }, { "epoch": 8.338720103425986, "grad_norm": 1.227972149848938, "learning_rate": 0.001, "loss": 2.7323, "step": 64500 }, { "epoch": 8.351648351648352, "grad_norm": 3.387406826019287, "learning_rate": 0.001, "loss": 2.7664, "step": 64600 }, { "epoch": 8.364576599870718, "grad_norm": 3.3918063640594482, "learning_rate": 0.001, "loss": 2.7458, "step": 64700 }, { "epoch": 8.377504848093084, "grad_norm": 0.9719196557998657, "learning_rate": 0.001, "loss": 2.7505, "step": 64800 }, { "epoch": 8.39043309631545, "grad_norm": 1.2279847860336304, "learning_rate": 0.001, "loss": 2.754, "step": 64900 }, { "epoch": 8.403361344537815, "grad_norm": 1.176398754119873, "learning_rate": 0.001, "loss": 2.754, "step": 65000 }, { "epoch": 8.416289592760181, "grad_norm": 0.9430725574493408, "learning_rate": 0.001, "loss": 2.7221, "step": 65100 }, { "epoch": 8.429217840982547, "grad_norm": 1.5332907438278198, "learning_rate": 0.001, "loss": 2.7391, "step": 65200 }, { "epoch": 8.442146089204913, "grad_norm": 1.311378002166748, "learning_rate": 0.001, "loss": 2.722, "step": 65300 }, { "epoch": 8.455074337427279, "grad_norm": 0.9279801845550537, "learning_rate": 0.001, "loss": 2.7498, "step": 65400 }, { "epoch": 8.468002585649645, "grad_norm": 0.9946776628494263, "learning_rate": 0.001, "loss": 2.7532, "step": 65500 }, { "epoch": 8.48093083387201, "grad_norm": 1.1088714599609375, "learning_rate": 0.001, "loss": 2.7399, "step": 65600 }, { "epoch": 8.493859082094376, "grad_norm": 1.0984580516815186, "learning_rate": 0.001, "loss": 2.7738, "step": 65700 }, { "epoch": 8.506787330316742, "grad_norm": 0.9153105616569519, "learning_rate": 0.001, "loss": 2.7417, "step": 65800 }, { "epoch": 8.519715578539108, "grad_norm": 1.0561844110488892, "learning_rate": 0.001, "loss": 2.7386, "step": 65900 }, { "epoch": 8.532643826761474, "grad_norm": 0.8769338130950928, "learning_rate": 0.001, "loss": 2.7303, "step": 66000 }, { "epoch": 8.54557207498384, "grad_norm": 2.944385528564453, "learning_rate": 0.001, "loss": 2.7483, "step": 66100 }, { "epoch": 8.558500323206205, "grad_norm": 5.9722113609313965, "learning_rate": 0.001, "loss": 2.7593, "step": 66200 }, { "epoch": 8.571428571428571, "grad_norm": 1.0851624011993408, "learning_rate": 0.001, "loss": 2.7434, "step": 66300 }, { "epoch": 8.584356819650937, "grad_norm": 1.3058371543884277, "learning_rate": 0.001, "loss": 2.7379, "step": 66400 }, { "epoch": 8.597285067873303, "grad_norm": 0.8533601760864258, "learning_rate": 0.001, "loss": 2.7712, "step": 66500 }, { "epoch": 8.610213316095669, "grad_norm": 0.9440778493881226, "learning_rate": 0.001, "loss": 2.7555, "step": 66600 }, { "epoch": 8.623141564318034, "grad_norm": 0.8034050464630127, "learning_rate": 0.001, "loss": 2.7532, "step": 66700 }, { "epoch": 8.6360698125404, "grad_norm": 5.41121244430542, "learning_rate": 0.001, "loss": 2.7466, "step": 66800 }, { "epoch": 8.648998060762766, "grad_norm": 1.0441023111343384, "learning_rate": 0.001, "loss": 2.7746, "step": 66900 }, { "epoch": 8.661926308985132, "grad_norm": 1.6864733695983887, "learning_rate": 0.001, "loss": 2.764, "step": 67000 }, { "epoch": 8.674854557207498, "grad_norm": 1.337632179260254, "learning_rate": 0.001, "loss": 2.7595, "step": 67100 }, { "epoch": 8.687782805429864, "grad_norm": 1.4171411991119385, "learning_rate": 0.001, "loss": 2.7596, "step": 67200 }, { "epoch": 8.70071105365223, "grad_norm": 1.0201205015182495, "learning_rate": 0.001, "loss": 2.7671, "step": 67300 }, { "epoch": 8.713639301874595, "grad_norm": 1.0115344524383545, "learning_rate": 0.001, "loss": 2.7662, "step": 67400 }, { "epoch": 8.726567550096961, "grad_norm": 1.0609462261199951, "learning_rate": 0.001, "loss": 2.7393, "step": 67500 }, { "epoch": 8.739495798319329, "grad_norm": 0.9182627201080322, "learning_rate": 0.001, "loss": 2.7484, "step": 67600 }, { "epoch": 8.752424046541694, "grad_norm": 1.240238904953003, "learning_rate": 0.001, "loss": 2.7708, "step": 67700 }, { "epoch": 8.76535229476406, "grad_norm": 1.1152071952819824, "learning_rate": 0.001, "loss": 2.7667, "step": 67800 }, { "epoch": 8.778280542986426, "grad_norm": 3.0352251529693604, "learning_rate": 0.001, "loss": 2.7655, "step": 67900 }, { "epoch": 8.791208791208792, "grad_norm": 1.0837465524673462, "learning_rate": 0.001, "loss": 2.7478, "step": 68000 }, { "epoch": 8.804137039431158, "grad_norm": 0.8978697061538696, "learning_rate": 0.001, "loss": 2.7311, "step": 68100 }, { "epoch": 8.817065287653524, "grad_norm": 1.7222955226898193, "learning_rate": 0.001, "loss": 2.7468, "step": 68200 }, { "epoch": 8.82999353587589, "grad_norm": 1.0202816724777222, "learning_rate": 0.001, "loss": 2.7468, "step": 68300 }, { "epoch": 8.842921784098255, "grad_norm": 1.0737507343292236, "learning_rate": 0.001, "loss": 2.758, "step": 68400 }, { "epoch": 8.855850032320621, "grad_norm": 1.692369818687439, "learning_rate": 0.001, "loss": 2.7579, "step": 68500 }, { "epoch": 8.868778280542987, "grad_norm": 1.3789904117584229, "learning_rate": 0.001, "loss": 2.7458, "step": 68600 }, { "epoch": 8.881706528765353, "grad_norm": 1.171149492263794, "learning_rate": 0.001, "loss": 2.7782, "step": 68700 }, { "epoch": 8.894634776987719, "grad_norm": 0.7613237500190735, "learning_rate": 0.001, "loss": 2.7729, "step": 68800 }, { "epoch": 8.907563025210084, "grad_norm": 0.8251891136169434, "learning_rate": 0.001, "loss": 2.7567, "step": 68900 }, { "epoch": 8.92049127343245, "grad_norm": 1.6298612356185913, "learning_rate": 0.001, "loss": 2.7442, "step": 69000 }, { "epoch": 8.933419521654816, "grad_norm": 1.134397268295288, "learning_rate": 0.001, "loss": 2.7554, "step": 69100 }, { "epoch": 8.946347769877182, "grad_norm": 1.258986234664917, "learning_rate": 0.001, "loss": 2.7661, "step": 69200 }, { "epoch": 8.959276018099548, "grad_norm": 0.8591852784156799, "learning_rate": 0.001, "loss": 2.7615, "step": 69300 }, { "epoch": 8.972204266321913, "grad_norm": 0.8491867780685425, "learning_rate": 0.001, "loss": 2.7755, "step": 69400 }, { "epoch": 8.98513251454428, "grad_norm": 1.6283411979675293, "learning_rate": 0.001, "loss": 2.7849, "step": 69500 }, { "epoch": 8.998060762766645, "grad_norm": 1.0004658699035645, "learning_rate": 0.001, "loss": 2.7867, "step": 69600 }, { "epoch": 9.010989010989011, "grad_norm": 1.1037940979003906, "learning_rate": 0.001, "loss": 2.7232, "step": 69700 }, { "epoch": 9.023917259211377, "grad_norm": 0.8842139840126038, "learning_rate": 0.001, "loss": 2.6937, "step": 69800 }, { "epoch": 9.036845507433743, "grad_norm": 0.9884702563285828, "learning_rate": 0.001, "loss": 2.6876, "step": 69900 }, { "epoch": 9.049773755656108, "grad_norm": 0.8421281576156616, "learning_rate": 0.001, "loss": 2.6845, "step": 70000 }, { "epoch": 9.062702003878474, "grad_norm": 1.3887548446655273, "learning_rate": 0.001, "loss": 2.6759, "step": 70100 }, { "epoch": 9.07563025210084, "grad_norm": 1.0346109867095947, "learning_rate": 0.001, "loss": 2.6721, "step": 70200 }, { "epoch": 9.088558500323206, "grad_norm": 1.256400465965271, "learning_rate": 0.001, "loss": 2.6781, "step": 70300 }, { "epoch": 9.101486748545572, "grad_norm": 0.8142704367637634, "learning_rate": 0.001, "loss": 2.7019, "step": 70400 }, { "epoch": 9.114414996767938, "grad_norm": 1.1634830236434937, "learning_rate": 0.001, "loss": 2.6731, "step": 70500 }, { "epoch": 9.127343244990303, "grad_norm": 1.1792539358139038, "learning_rate": 0.001, "loss": 2.6909, "step": 70600 }, { "epoch": 9.14027149321267, "grad_norm": 0.946540355682373, "learning_rate": 0.001, "loss": 2.682, "step": 70700 }, { "epoch": 9.153199741435035, "grad_norm": 1.3108381032943726, "learning_rate": 0.001, "loss": 2.7016, "step": 70800 }, { "epoch": 9.1661279896574, "grad_norm": 0.9079214334487915, "learning_rate": 0.001, "loss": 2.6962, "step": 70900 }, { "epoch": 9.179056237879767, "grad_norm": 0.8933747410774231, "learning_rate": 0.001, "loss": 2.6912, "step": 71000 }, { "epoch": 9.191984486102132, "grad_norm": 2.151691198348999, "learning_rate": 0.001, "loss": 2.6948, "step": 71100 }, { "epoch": 9.204912734324498, "grad_norm": 0.9286857843399048, "learning_rate": 0.001, "loss": 2.693, "step": 71200 }, { "epoch": 9.217840982546864, "grad_norm": 0.9560434818267822, "learning_rate": 0.001, "loss": 2.6894, "step": 71300 }, { "epoch": 9.23076923076923, "grad_norm": 1.2572168111801147, "learning_rate": 0.001, "loss": 2.6859, "step": 71400 }, { "epoch": 9.243697478991596, "grad_norm": 0.8896794319152832, "learning_rate": 0.001, "loss": 2.7039, "step": 71500 }, { "epoch": 9.256625727213962, "grad_norm": 1.2362706661224365, "learning_rate": 0.001, "loss": 2.6978, "step": 71600 }, { "epoch": 9.26955397543633, "grad_norm": 0.9216800332069397, "learning_rate": 0.001, "loss": 2.6962, "step": 71700 }, { "epoch": 9.282482223658695, "grad_norm": 2.0505709648132324, "learning_rate": 0.001, "loss": 2.7119, "step": 71800 }, { "epoch": 9.29541047188106, "grad_norm": 1.0112839937210083, "learning_rate": 0.001, "loss": 2.6985, "step": 71900 }, { "epoch": 9.308338720103427, "grad_norm": 1.0925159454345703, "learning_rate": 0.001, "loss": 2.7142, "step": 72000 }, { "epoch": 9.321266968325792, "grad_norm": 1.0961880683898926, "learning_rate": 0.001, "loss": 2.7289, "step": 72100 }, { "epoch": 9.334195216548158, "grad_norm": 0.8614845871925354, "learning_rate": 0.001, "loss": 2.7123, "step": 72200 }, { "epoch": 9.347123464770524, "grad_norm": 0.8720149993896484, "learning_rate": 0.001, "loss": 2.7022, "step": 72300 }, { "epoch": 9.36005171299289, "grad_norm": 1.0294467210769653, "learning_rate": 0.001, "loss": 2.6896, "step": 72400 }, { "epoch": 9.372979961215256, "grad_norm": 0.8613417744636536, "learning_rate": 0.001, "loss": 2.7053, "step": 72500 }, { "epoch": 9.385908209437622, "grad_norm": 1.090896725654602, "learning_rate": 0.001, "loss": 2.7245, "step": 72600 }, { "epoch": 9.398836457659987, "grad_norm": 1.184139370918274, "learning_rate": 0.001, "loss": 2.7208, "step": 72700 }, { "epoch": 9.411764705882353, "grad_norm": 1.1321731805801392, "learning_rate": 0.001, "loss": 2.7141, "step": 72800 }, { "epoch": 9.424692954104719, "grad_norm": 1.0475479364395142, "learning_rate": 0.001, "loss": 2.7222, "step": 72900 }, { "epoch": 9.437621202327085, "grad_norm": 0.8946665525436401, "learning_rate": 0.001, "loss": 2.7346, "step": 73000 }, { "epoch": 9.45054945054945, "grad_norm": 0.9968447685241699, "learning_rate": 0.001, "loss": 2.6842, "step": 73100 }, { "epoch": 9.463477698771817, "grad_norm": 1.5438908338546753, "learning_rate": 0.001, "loss": 2.7047, "step": 73200 }, { "epoch": 9.476405946994182, "grad_norm": 0.8566017746925354, "learning_rate": 0.001, "loss": 2.7068, "step": 73300 }, { "epoch": 9.489334195216548, "grad_norm": 1.2360520362854004, "learning_rate": 0.001, "loss": 2.6903, "step": 73400 }, { "epoch": 9.502262443438914, "grad_norm": 1.388558268547058, "learning_rate": 0.001, "loss": 2.7159, "step": 73500 }, { "epoch": 9.51519069166128, "grad_norm": 0.7788105010986328, "learning_rate": 0.001, "loss": 2.7185, "step": 73600 }, { "epoch": 9.528118939883646, "grad_norm": 0.9103308320045471, "learning_rate": 0.001, "loss": 2.7144, "step": 73700 }, { "epoch": 9.541047188106011, "grad_norm": 0.8771539330482483, "learning_rate": 0.001, "loss": 2.7158, "step": 73800 }, { "epoch": 9.553975436328377, "grad_norm": 1.665895700454712, "learning_rate": 0.001, "loss": 2.697, "step": 73900 }, { "epoch": 9.566903684550743, "grad_norm": 1.0309876203536987, "learning_rate": 0.001, "loss": 2.7223, "step": 74000 }, { "epoch": 9.579831932773109, "grad_norm": 1.424079418182373, "learning_rate": 0.001, "loss": 2.7275, "step": 74100 }, { "epoch": 9.592760180995475, "grad_norm": 2.9845285415649414, "learning_rate": 0.001, "loss": 2.7155, "step": 74200 }, { "epoch": 9.60568842921784, "grad_norm": 0.9433808326721191, "learning_rate": 0.001, "loss": 2.7197, "step": 74300 }, { "epoch": 9.618616677440206, "grad_norm": 3.04238224029541, "learning_rate": 0.001, "loss": 2.7276, "step": 74400 }, { "epoch": 9.631544925662572, "grad_norm": 0.8325649499893188, "learning_rate": 0.001, "loss": 2.7209, "step": 74500 }, { "epoch": 9.644473173884938, "grad_norm": 0.8801798224449158, "learning_rate": 0.001, "loss": 2.7172, "step": 74600 }, { "epoch": 9.657401422107304, "grad_norm": 0.8683134317398071, "learning_rate": 0.001, "loss": 2.7103, "step": 74700 }, { "epoch": 9.67032967032967, "grad_norm": 1.7452878952026367, "learning_rate": 0.001, "loss": 2.7314, "step": 74800 }, { "epoch": 9.683257918552036, "grad_norm": 0.873221755027771, "learning_rate": 0.001, "loss": 2.7309, "step": 74900 }, { "epoch": 9.696186166774401, "grad_norm": 0.7590211629867554, "learning_rate": 0.001, "loss": 2.7321, "step": 75000 }, { "epoch": 9.709114414996767, "grad_norm": 1.3360341787338257, "learning_rate": 0.001, "loss": 2.7287, "step": 75100 }, { "epoch": 9.722042663219133, "grad_norm": 0.9545396566390991, "learning_rate": 0.001, "loss": 2.7271, "step": 75200 }, { "epoch": 9.7349709114415, "grad_norm": 0.7417583465576172, "learning_rate": 0.001, "loss": 2.7245, "step": 75300 }, { "epoch": 9.747899159663866, "grad_norm": 0.9567973613739014, "learning_rate": 0.001, "loss": 2.7244, "step": 75400 }, { "epoch": 9.760827407886232, "grad_norm": 0.9093434810638428, "learning_rate": 0.001, "loss": 2.7357, "step": 75500 }, { "epoch": 9.773755656108598, "grad_norm": 1.0726120471954346, "learning_rate": 0.001, "loss": 2.7444, "step": 75600 }, { "epoch": 9.786683904330964, "grad_norm": 0.8388472199440002, "learning_rate": 0.001, "loss": 2.7227, "step": 75700 }, { "epoch": 9.79961215255333, "grad_norm": 0.7029350996017456, "learning_rate": 0.001, "loss": 2.7188, "step": 75800 }, { "epoch": 9.812540400775696, "grad_norm": 1.2813917398452759, "learning_rate": 0.001, "loss": 2.7317, "step": 75900 }, { "epoch": 9.825468648998061, "grad_norm": 1.4075497388839722, "learning_rate": 0.001, "loss": 2.7332, "step": 76000 }, { "epoch": 9.838396897220427, "grad_norm": 3.540656089782715, "learning_rate": 0.001, "loss": 2.7354, "step": 76100 }, { "epoch": 9.851325145442793, "grad_norm": 1.5768593549728394, "learning_rate": 0.001, "loss": 2.7232, "step": 76200 }, { "epoch": 9.864253393665159, "grad_norm": 0.8924940228462219, "learning_rate": 0.001, "loss": 2.7283, "step": 76300 }, { "epoch": 9.877181641887525, "grad_norm": 0.963885486125946, "learning_rate": 0.001, "loss": 2.7276, "step": 76400 }, { "epoch": 9.89010989010989, "grad_norm": 35.0478401184082, "learning_rate": 0.001, "loss": 2.7463, "step": 76500 }, { "epoch": 9.903038138332256, "grad_norm": 1.1581839323043823, "learning_rate": 0.001, "loss": 2.7266, "step": 76600 }, { "epoch": 9.915966386554622, "grad_norm": 1.272782325744629, "learning_rate": 0.001, "loss": 2.7239, "step": 76700 }, { "epoch": 9.928894634776988, "grad_norm": 0.8745754957199097, "learning_rate": 0.001, "loss": 2.6982, "step": 76800 }, { "epoch": 9.941822882999354, "grad_norm": 0.9268345236778259, "learning_rate": 0.001, "loss": 2.7404, "step": 76900 }, { "epoch": 9.95475113122172, "grad_norm": 0.780647337436676, "learning_rate": 0.001, "loss": 2.7392, "step": 77000 }, { "epoch": 9.967679379444085, "grad_norm": 1.1054409742355347, "learning_rate": 0.001, "loss": 2.7276, "step": 77100 }, { "epoch": 9.980607627666451, "grad_norm": 1.2892589569091797, "learning_rate": 0.001, "loss": 2.7227, "step": 77200 }, { "epoch": 9.993535875888817, "grad_norm": 1.3129256963729858, "learning_rate": 0.001, "loss": 2.7297, "step": 77300 }, { "epoch": 10.006464124111183, "grad_norm": 1.5200209617614746, "learning_rate": 0.001, "loss": 2.6552, "step": 77400 }, { "epoch": 10.019392372333549, "grad_norm": 1.8013774156570435, "learning_rate": 0.001, "loss": 2.6559, "step": 77500 }, { "epoch": 10.032320620555915, "grad_norm": 2.0734710693359375, "learning_rate": 0.001, "loss": 2.6633, "step": 77600 }, { "epoch": 10.04524886877828, "grad_norm": 1.4813411235809326, "learning_rate": 0.001, "loss": 2.6528, "step": 77700 }, { "epoch": 10.058177117000646, "grad_norm": 13.583207130432129, "learning_rate": 0.001, "loss": 2.6429, "step": 77800 }, { "epoch": 10.071105365223012, "grad_norm": 3.28782320022583, "learning_rate": 0.001, "loss": 2.6655, "step": 77900 }, { "epoch": 10.084033613445378, "grad_norm": 1.3249520063400269, "learning_rate": 0.001, "loss": 2.6516, "step": 78000 }, { "epoch": 10.096961861667744, "grad_norm": 2.9735586643218994, "learning_rate": 0.001, "loss": 2.6662, "step": 78100 }, { "epoch": 10.10989010989011, "grad_norm": 11.060941696166992, "learning_rate": 0.001, "loss": 2.6303, "step": 78200 }, { "epoch": 10.122818358112475, "grad_norm": 1.635340690612793, "learning_rate": 0.001, "loss": 2.6492, "step": 78300 }, { "epoch": 10.135746606334841, "grad_norm": 1.4860790967941284, "learning_rate": 0.001, "loss": 2.6532, "step": 78400 }, { "epoch": 10.148674854557207, "grad_norm": 16.866540908813477, "learning_rate": 0.001, "loss": 2.6869, "step": 78500 }, { "epoch": 10.161603102779573, "grad_norm": 1.4368072748184204, "learning_rate": 0.001, "loss": 2.6923, "step": 78600 }, { "epoch": 10.174531351001939, "grad_norm": 2.011688470840454, "learning_rate": 0.001, "loss": 2.6715, "step": 78700 }, { "epoch": 10.187459599224304, "grad_norm": 1.5369997024536133, "learning_rate": 0.001, "loss": 2.6796, "step": 78800 }, { "epoch": 10.20038784744667, "grad_norm": 1.299744725227356, "learning_rate": 0.001, "loss": 2.6713, "step": 78900 }, { "epoch": 10.213316095669036, "grad_norm": 1.5470356941223145, "learning_rate": 0.001, "loss": 2.6757, "step": 79000 }, { "epoch": 10.226244343891402, "grad_norm": 1.7392202615737915, "learning_rate": 0.001, "loss": 2.6753, "step": 79100 }, { "epoch": 10.239172592113768, "grad_norm": 1.9048188924789429, "learning_rate": 0.001, "loss": 2.673, "step": 79200 }, { "epoch": 10.252100840336134, "grad_norm": 1.4483544826507568, "learning_rate": 0.001, "loss": 2.6766, "step": 79300 }, { "epoch": 10.265029088558501, "grad_norm": 1.9516232013702393, "learning_rate": 0.001, "loss": 2.6755, "step": 79400 }, { "epoch": 10.277957336780867, "grad_norm": 6.30878210067749, "learning_rate": 0.001, "loss": 2.6642, "step": 79500 }, { "epoch": 10.290885585003233, "grad_norm": 3.4453213214874268, "learning_rate": 0.001, "loss": 2.6897, "step": 79600 }, { "epoch": 10.303813833225599, "grad_norm": 2.2907896041870117, "learning_rate": 0.001, "loss": 2.6749, "step": 79700 }, { "epoch": 10.316742081447964, "grad_norm": 1.7642360925674438, "learning_rate": 0.001, "loss": 2.6822, "step": 79800 }, { "epoch": 10.32967032967033, "grad_norm": 1.5711822509765625, "learning_rate": 0.001, "loss": 2.678, "step": 79900 }, { "epoch": 10.342598577892696, "grad_norm": 1.1433390378952026, "learning_rate": 0.001, "loss": 2.7269, "step": 80000 }, { "epoch": 10.355526826115062, "grad_norm": 1.4434189796447754, "learning_rate": 0.001, "loss": 2.6873, "step": 80100 }, { "epoch": 10.368455074337428, "grad_norm": 1.5983542203903198, "learning_rate": 0.001, "loss": 2.6628, "step": 80200 }, { "epoch": 10.381383322559794, "grad_norm": 1.824179768562317, "learning_rate": 0.001, "loss": 2.6834, "step": 80300 }, { "epoch": 10.39431157078216, "grad_norm": 1.7431809902191162, "learning_rate": 0.001, "loss": 2.6961, "step": 80400 }, { "epoch": 10.407239819004525, "grad_norm": 1.7386524677276611, "learning_rate": 0.001, "loss": 2.7124, "step": 80500 }, { "epoch": 10.420168067226891, "grad_norm": 1.9140936136245728, "learning_rate": 0.001, "loss": 2.6944, "step": 80600 }, { "epoch": 10.433096315449257, "grad_norm": 1.6061726808547974, "learning_rate": 0.001, "loss": 2.6983, "step": 80700 }, { "epoch": 10.446024563671623, "grad_norm": 1.890636682510376, "learning_rate": 0.001, "loss": 2.7124, "step": 80800 }, { "epoch": 10.458952811893989, "grad_norm": 8.99052906036377, "learning_rate": 0.001, "loss": 2.7097, "step": 80900 }, { "epoch": 10.471881060116354, "grad_norm": 3.308232307434082, "learning_rate": 0.001, "loss": 2.6871, "step": 81000 }, { "epoch": 10.48480930833872, "grad_norm": 1.7526073455810547, "learning_rate": 0.001, "loss": 2.6964, "step": 81100 }, { "epoch": 10.497737556561086, "grad_norm": 2.5073893070220947, "learning_rate": 0.001, "loss": 2.6872, "step": 81200 }, { "epoch": 10.510665804783452, "grad_norm": 2.065606117248535, "learning_rate": 0.001, "loss": 2.7025, "step": 81300 }, { "epoch": 10.523594053005818, "grad_norm": 1.7259389162063599, "learning_rate": 0.001, "loss": 2.6858, "step": 81400 }, { "epoch": 10.536522301228183, "grad_norm": 1.5471388101577759, "learning_rate": 0.001, "loss": 2.7194, "step": 81500 }, { "epoch": 10.54945054945055, "grad_norm": 1.5582093000411987, "learning_rate": 0.001, "loss": 2.7093, "step": 81600 }, { "epoch": 10.562378797672915, "grad_norm": 1.8000075817108154, "learning_rate": 0.001, "loss": 2.6892, "step": 81700 }, { "epoch": 10.575307045895281, "grad_norm": 11.107261657714844, "learning_rate": 0.001, "loss": 2.6919, "step": 81800 }, { "epoch": 10.588235294117647, "grad_norm": 1.1564598083496094, "learning_rate": 0.001, "loss": 2.6924, "step": 81900 }, { "epoch": 10.601163542340013, "grad_norm": 2.7805750370025635, "learning_rate": 0.001, "loss": 2.6854, "step": 82000 }, { "epoch": 10.614091790562378, "grad_norm": 1.8032679557800293, "learning_rate": 0.001, "loss": 2.6978, "step": 82100 }, { "epoch": 10.627020038784744, "grad_norm": 1.8383549451828003, "learning_rate": 0.001, "loss": 2.6772, "step": 82200 }, { "epoch": 10.63994828700711, "grad_norm": 1.401485800743103, "learning_rate": 0.001, "loss": 2.6841, "step": 82300 }, { "epoch": 10.652876535229476, "grad_norm": 2.0449604988098145, "learning_rate": 0.001, "loss": 2.6931, "step": 82400 }, { "epoch": 10.665804783451842, "grad_norm": 1.5781915187835693, "learning_rate": 0.001, "loss": 2.6879, "step": 82500 }, { "epoch": 10.678733031674208, "grad_norm": 3.114753484725952, "learning_rate": 0.001, "loss": 2.6703, "step": 82600 }, { "epoch": 10.691661279896573, "grad_norm": 1.6629799604415894, "learning_rate": 0.001, "loss": 2.6962, "step": 82700 }, { "epoch": 10.70458952811894, "grad_norm": 2.135855197906494, "learning_rate": 0.001, "loss": 2.6744, "step": 82800 }, { "epoch": 10.717517776341305, "grad_norm": 1.4240350723266602, "learning_rate": 0.001, "loss": 2.6927, "step": 82900 }, { "epoch": 10.73044602456367, "grad_norm": 1.6837315559387207, "learning_rate": 0.001, "loss": 2.7063, "step": 83000 }, { "epoch": 10.743374272786038, "grad_norm": 2.3321783542633057, "learning_rate": 0.001, "loss": 2.7015, "step": 83100 }, { "epoch": 10.756302521008404, "grad_norm": 1.945072054862976, "learning_rate": 0.001, "loss": 2.6915, "step": 83200 }, { "epoch": 10.76923076923077, "grad_norm": 1.5299361944198608, "learning_rate": 0.001, "loss": 2.6939, "step": 83300 }, { "epoch": 10.782159017453136, "grad_norm": 1.3456792831420898, "learning_rate": 0.001, "loss": 2.6889, "step": 83400 }, { "epoch": 10.795087265675502, "grad_norm": 1.7869873046875, "learning_rate": 0.001, "loss": 2.6961, "step": 83500 }, { "epoch": 10.808015513897868, "grad_norm": 1.8991650342941284, "learning_rate": 0.001, "loss": 2.6928, "step": 83600 }, { "epoch": 10.820943762120233, "grad_norm": 2.8951282501220703, "learning_rate": 0.001, "loss": 2.6878, "step": 83700 }, { "epoch": 10.8338720103426, "grad_norm": 1.5730149745941162, "learning_rate": 0.001, "loss": 2.7126, "step": 83800 }, { "epoch": 10.846800258564965, "grad_norm": 1.2202272415161133, "learning_rate": 0.001, "loss": 2.7248, "step": 83900 }, { "epoch": 10.85972850678733, "grad_norm": 1.2895640134811401, "learning_rate": 0.001, "loss": 2.6981, "step": 84000 }, { "epoch": 10.872656755009697, "grad_norm": 42.596431732177734, "learning_rate": 0.001, "loss": 2.7034, "step": 84100 }, { "epoch": 10.885585003232062, "grad_norm": 2.500561237335205, "learning_rate": 0.001, "loss": 2.6863, "step": 84200 }, { "epoch": 10.898513251454428, "grad_norm": 1.9021297693252563, "learning_rate": 0.001, "loss": 2.6866, "step": 84300 }, { "epoch": 10.911441499676794, "grad_norm": 2.349684715270996, "learning_rate": 0.001, "loss": 2.7168, "step": 84400 }, { "epoch": 10.92436974789916, "grad_norm": 3.5122315883636475, "learning_rate": 0.001, "loss": 2.6951, "step": 84500 }, { "epoch": 10.937297996121526, "grad_norm": 2.262617349624634, "learning_rate": 0.001, "loss": 2.692, "step": 84600 }, { "epoch": 10.950226244343892, "grad_norm": 2.4772520065307617, "learning_rate": 0.001, "loss": 2.701, "step": 84700 }, { "epoch": 10.963154492566257, "grad_norm": 1.6619492769241333, "learning_rate": 0.001, "loss": 2.7047, "step": 84800 }, { "epoch": 10.976082740788623, "grad_norm": 1.7089967727661133, "learning_rate": 0.001, "loss": 2.6993, "step": 84900 }, { "epoch": 10.989010989010989, "grad_norm": 1.477765440940857, "learning_rate": 0.001, "loss": 2.6946, "step": 85000 }, { "epoch": 11.001939237233355, "grad_norm": 1.0294665098190308, "learning_rate": 0.001, "loss": 2.6772, "step": 85100 }, { "epoch": 11.01486748545572, "grad_norm": 1.265723466873169, "learning_rate": 0.001, "loss": 2.6159, "step": 85200 }, { "epoch": 11.027795733678087, "grad_norm": 0.8327229619026184, "learning_rate": 0.001, "loss": 2.6031, "step": 85300 }, { "epoch": 11.040723981900452, "grad_norm": 1.0511152744293213, "learning_rate": 0.001, "loss": 2.6062, "step": 85400 }, { "epoch": 11.053652230122818, "grad_norm": 1.2999374866485596, "learning_rate": 0.001, "loss": 2.6367, "step": 85500 }, { "epoch": 11.066580478345184, "grad_norm": 1.1072615385055542, "learning_rate": 0.001, "loss": 2.6241, "step": 85600 }, { "epoch": 11.07950872656755, "grad_norm": 1.1631008386611938, "learning_rate": 0.001, "loss": 2.6286, "step": 85700 }, { "epoch": 11.092436974789916, "grad_norm": 0.8590694069862366, "learning_rate": 0.001, "loss": 2.6104, "step": 85800 }, { "epoch": 11.105365223012281, "grad_norm": 0.8791977167129517, "learning_rate": 0.001, "loss": 2.6406, "step": 85900 }, { "epoch": 11.118293471234647, "grad_norm": 1.2491915225982666, "learning_rate": 0.001, "loss": 2.6252, "step": 86000 }, { "epoch": 11.131221719457013, "grad_norm": 2.4702064990997314, "learning_rate": 0.001, "loss": 2.6343, "step": 86100 }, { "epoch": 11.144149967679379, "grad_norm": 2.2694764137268066, "learning_rate": 0.001, "loss": 2.6452, "step": 86200 }, { "epoch": 11.157078215901745, "grad_norm": 0.7111111283302307, "learning_rate": 0.001, "loss": 2.6379, "step": 86300 }, { "epoch": 11.17000646412411, "grad_norm": 1.3306670188903809, "learning_rate": 0.001, "loss": 2.6328, "step": 86400 }, { "epoch": 11.182934712346476, "grad_norm": 0.9429754018783569, "learning_rate": 0.001, "loss": 2.6548, "step": 86500 }, { "epoch": 11.195862960568842, "grad_norm": 1.1300415992736816, "learning_rate": 0.001, "loss": 2.6314, "step": 86600 }, { "epoch": 11.208791208791208, "grad_norm": 1.0663219690322876, "learning_rate": 0.001, "loss": 2.6478, "step": 86700 }, { "epoch": 11.221719457013574, "grad_norm": 1.29740309715271, "learning_rate": 0.001, "loss": 2.6518, "step": 86800 }, { "epoch": 11.23464770523594, "grad_norm": 1.1628097295761108, "learning_rate": 0.001, "loss": 2.6759, "step": 86900 }, { "epoch": 11.247575953458306, "grad_norm": 0.948623776435852, "learning_rate": 0.001, "loss": 2.6763, "step": 87000 }, { "epoch": 11.260504201680673, "grad_norm": 11.88587474822998, "learning_rate": 0.001, "loss": 2.6731, "step": 87100 }, { "epoch": 11.273432449903039, "grad_norm": 0.883820116519928, "learning_rate": 0.001, "loss": 2.6565, "step": 87200 }, { "epoch": 11.286360698125405, "grad_norm": 0.9652610421180725, "learning_rate": 0.001, "loss": 2.6587, "step": 87300 }, { "epoch": 11.29928894634777, "grad_norm": 0.9308032989501953, "learning_rate": 0.001, "loss": 2.6777, "step": 87400 }, { "epoch": 11.312217194570136, "grad_norm": 1.1909735202789307, "learning_rate": 0.001, "loss": 2.6296, "step": 87500 }, { "epoch": 11.325145442792502, "grad_norm": 0.9454175233840942, "learning_rate": 0.001, "loss": 2.6764, "step": 87600 }, { "epoch": 11.338073691014868, "grad_norm": 1.1811083555221558, "learning_rate": 0.001, "loss": 2.6646, "step": 87700 }, { "epoch": 11.351001939237234, "grad_norm": 0.8846003413200378, "learning_rate": 0.001, "loss": 2.6707, "step": 87800 }, { "epoch": 11.3639301874596, "grad_norm": 2.3255388736724854, "learning_rate": 0.001, "loss": 2.6541, "step": 87900 }, { "epoch": 11.376858435681966, "grad_norm": 1.1596791744232178, "learning_rate": 0.001, "loss": 2.6517, "step": 88000 }, { "epoch": 11.389786683904331, "grad_norm": 1.5261799097061157, "learning_rate": 0.001, "loss": 2.6687, "step": 88100 }, { "epoch": 11.402714932126697, "grad_norm": 1.0909160375595093, "learning_rate": 0.001, "loss": 2.6593, "step": 88200 }, { "epoch": 11.415643180349063, "grad_norm": 1.1748400926589966, "learning_rate": 0.001, "loss": 2.6318, "step": 88300 }, { "epoch": 11.428571428571429, "grad_norm": 0.8547530770301819, "learning_rate": 0.001, "loss": 2.6421, "step": 88400 }, { "epoch": 11.441499676793795, "grad_norm": 1.3063973188400269, "learning_rate": 0.001, "loss": 2.6584, "step": 88500 }, { "epoch": 11.45442792501616, "grad_norm": 1.095815896987915, "learning_rate": 0.001, "loss": 2.6586, "step": 88600 }, { "epoch": 11.467356173238526, "grad_norm": 0.9768524765968323, "learning_rate": 0.001, "loss": 2.6554, "step": 88700 }, { "epoch": 11.480284421460892, "grad_norm": 2.9972920417785645, "learning_rate": 0.001, "loss": 2.6542, "step": 88800 }, { "epoch": 11.493212669683258, "grad_norm": 1.0556726455688477, "learning_rate": 0.001, "loss": 2.6606, "step": 88900 }, { "epoch": 11.506140917905624, "grad_norm": 0.7791993618011475, "learning_rate": 0.001, "loss": 2.6761, "step": 89000 }, { "epoch": 11.51906916612799, "grad_norm": 1.3203805685043335, "learning_rate": 0.001, "loss": 2.6574, "step": 89100 }, { "epoch": 11.531997414350355, "grad_norm": 3.360579490661621, "learning_rate": 0.001, "loss": 2.6603, "step": 89200 }, { "epoch": 11.544925662572721, "grad_norm": 1.0178064107894897, "learning_rate": 0.001, "loss": 2.6436, "step": 89300 }, { "epoch": 11.557853910795087, "grad_norm": 3.6343777179718018, "learning_rate": 0.001, "loss": 2.6639, "step": 89400 }, { "epoch": 11.570782159017453, "grad_norm": 5.497527122497559, "learning_rate": 0.001, "loss": 2.6611, "step": 89500 }, { "epoch": 11.583710407239819, "grad_norm": 0.6952324509620667, "learning_rate": 0.001, "loss": 2.6847, "step": 89600 }, { "epoch": 11.596638655462185, "grad_norm": 1.0872278213500977, "learning_rate": 0.001, "loss": 2.6734, "step": 89700 }, { "epoch": 11.60956690368455, "grad_norm": 1.023619532585144, "learning_rate": 0.001, "loss": 2.6713, "step": 89800 }, { "epoch": 11.622495151906916, "grad_norm": 0.8020855188369751, "learning_rate": 0.001, "loss": 2.6807, "step": 89900 }, { "epoch": 11.635423400129282, "grad_norm": 1.5020579099655151, "learning_rate": 0.001, "loss": 2.68, "step": 90000 }, { "epoch": 11.648351648351648, "grad_norm": 2.6273889541625977, "learning_rate": 0.001, "loss": 2.6899, "step": 90100 }, { "epoch": 11.661279896574014, "grad_norm": 1.022353172302246, "learning_rate": 0.001, "loss": 2.6661, "step": 90200 }, { "epoch": 11.67420814479638, "grad_norm": 1.1088885068893433, "learning_rate": 0.001, "loss": 2.6621, "step": 90300 }, { "epoch": 11.687136393018745, "grad_norm": 0.9006462693214417, "learning_rate": 0.001, "loss": 2.6664, "step": 90400 }, { "epoch": 11.700064641241111, "grad_norm": 1.5229220390319824, "learning_rate": 0.001, "loss": 2.6815, "step": 90500 }, { "epoch": 11.712992889463477, "grad_norm": 1.3698803186416626, "learning_rate": 0.001, "loss": 2.6955, "step": 90600 }, { "epoch": 11.725921137685843, "grad_norm": 0.9904107451438904, "learning_rate": 0.001, "loss": 2.6764, "step": 90700 }, { "epoch": 11.73884938590821, "grad_norm": 1.030483603477478, "learning_rate": 0.001, "loss": 2.6753, "step": 90800 }, { "epoch": 11.751777634130576, "grad_norm": 1.7445719242095947, "learning_rate": 0.001, "loss": 2.6785, "step": 90900 }, { "epoch": 11.764705882352942, "grad_norm": 1.4486085176467896, "learning_rate": 0.001, "loss": 2.6657, "step": 91000 }, { "epoch": 11.777634130575308, "grad_norm": 0.8699414134025574, "learning_rate": 0.001, "loss": 2.6707, "step": 91100 }, { "epoch": 11.790562378797674, "grad_norm": 2.386399030685425, "learning_rate": 0.001, "loss": 2.6931, "step": 91200 }, { "epoch": 11.80349062702004, "grad_norm": 2.663858652114868, "learning_rate": 0.001, "loss": 2.693, "step": 91300 }, { "epoch": 11.816418875242405, "grad_norm": 0.9542876482009888, "learning_rate": 0.001, "loss": 2.6837, "step": 91400 }, { "epoch": 11.829347123464771, "grad_norm": 0.8943921327590942, "learning_rate": 0.001, "loss": 2.6835, "step": 91500 }, { "epoch": 11.842275371687137, "grad_norm": 0.9962356686592102, "learning_rate": 0.001, "loss": 2.6631, "step": 91600 }, { "epoch": 11.855203619909503, "grad_norm": 0.7357028722763062, "learning_rate": 0.001, "loss": 2.6659, "step": 91700 }, { "epoch": 11.868131868131869, "grad_norm": 0.8244296908378601, "learning_rate": 0.001, "loss": 2.6913, "step": 91800 }, { "epoch": 11.881060116354234, "grad_norm": 2.7442877292633057, "learning_rate": 0.001, "loss": 2.6918, "step": 91900 }, { "epoch": 11.8939883645766, "grad_norm": 1.158800721168518, "learning_rate": 0.001, "loss": 2.6801, "step": 92000 }, { "epoch": 11.906916612798966, "grad_norm": 0.9720841646194458, "learning_rate": 0.001, "loss": 2.6829, "step": 92100 }, { "epoch": 11.919844861021332, "grad_norm": 0.8982048630714417, "learning_rate": 0.001, "loss": 2.6787, "step": 92200 }, { "epoch": 11.932773109243698, "grad_norm": 0.9631633162498474, "learning_rate": 0.001, "loss": 2.6934, "step": 92300 }, { "epoch": 11.945701357466064, "grad_norm": 2.1384406089782715, "learning_rate": 0.001, "loss": 2.696, "step": 92400 }, { "epoch": 11.95862960568843, "grad_norm": 1.4067777395248413, "learning_rate": 0.001, "loss": 2.6795, "step": 92500 }, { "epoch": 11.971557853910795, "grad_norm": 1.1825356483459473, "learning_rate": 0.001, "loss": 2.685, "step": 92600 }, { "epoch": 11.984486102133161, "grad_norm": 2.022620439529419, "learning_rate": 0.001, "loss": 2.6759, "step": 92700 }, { "epoch": 11.997414350355527, "grad_norm": 0.9231863021850586, "learning_rate": 0.001, "loss": 2.6809, "step": 92800 }, { "epoch": 12.010342598577893, "grad_norm": 0.8348036408424377, "learning_rate": 0.001, "loss": 2.6106, "step": 92900 }, { "epoch": 12.023270846800258, "grad_norm": 1.127417802810669, "learning_rate": 0.001, "loss": 2.6091, "step": 93000 }, { "epoch": 12.036199095022624, "grad_norm": 1.2826160192489624, "learning_rate": 0.001, "loss": 2.5929, "step": 93100 }, { "epoch": 12.04912734324499, "grad_norm": 0.8691980838775635, "learning_rate": 0.001, "loss": 2.5947, "step": 93200 }, { "epoch": 12.062055591467356, "grad_norm": 1.3016541004180908, "learning_rate": 0.001, "loss": 2.6002, "step": 93300 }, { "epoch": 12.074983839689722, "grad_norm": 1.129916787147522, "learning_rate": 0.001, "loss": 2.619, "step": 93400 }, { "epoch": 12.087912087912088, "grad_norm": 0.7897166013717651, "learning_rate": 0.001, "loss": 2.5986, "step": 93500 }, { "epoch": 12.100840336134453, "grad_norm": 0.7885643243789673, "learning_rate": 0.001, "loss": 2.6031, "step": 93600 }, { "epoch": 12.11376858435682, "grad_norm": 0.9378390908241272, "learning_rate": 0.001, "loss": 2.6112, "step": 93700 }, { "epoch": 12.126696832579185, "grad_norm": 1.0299588441848755, "learning_rate": 0.001, "loss": 2.6293, "step": 93800 }, { "epoch": 12.139625080801551, "grad_norm": 0.8726086616516113, "learning_rate": 0.001, "loss": 2.6101, "step": 93900 }, { "epoch": 12.152553329023917, "grad_norm": 2.20650315284729, "learning_rate": 0.001, "loss": 2.6059, "step": 94000 }, { "epoch": 12.165481577246283, "grad_norm": 1.1698155403137207, "learning_rate": 0.001, "loss": 2.6023, "step": 94100 }, { "epoch": 12.178409825468648, "grad_norm": 3.3406498432159424, "learning_rate": 0.001, "loss": 2.6066, "step": 94200 }, { "epoch": 12.191338073691014, "grad_norm": 0.8710218667984009, "learning_rate": 0.001, "loss": 2.6135, "step": 94300 }, { "epoch": 12.20426632191338, "grad_norm": 1.0140999555587769, "learning_rate": 0.001, "loss": 2.6454, "step": 94400 }, { "epoch": 12.217194570135746, "grad_norm": 0.9547477960586548, "learning_rate": 0.001, "loss": 2.6003, "step": 94500 }, { "epoch": 12.230122818358112, "grad_norm": 0.8141343593597412, "learning_rate": 0.001, "loss": 2.6176, "step": 94600 }, { "epoch": 12.243051066580477, "grad_norm": 1.1595431566238403, "learning_rate": 0.001, "loss": 2.5998, "step": 94700 }, { "epoch": 12.255979314802843, "grad_norm": 0.9996180534362793, "learning_rate": 0.001, "loss": 2.6178, "step": 94800 }, { "epoch": 12.268907563025211, "grad_norm": 1.0080978870391846, "learning_rate": 0.001, "loss": 2.6244, "step": 94900 }, { "epoch": 12.281835811247577, "grad_norm": 0.9158511161804199, "learning_rate": 0.001, "loss": 2.6276, "step": 95000 }, { "epoch": 12.294764059469943, "grad_norm": 0.9087651371955872, "learning_rate": 0.001, "loss": 2.6093, "step": 95100 }, { "epoch": 12.307692307692308, "grad_norm": 0.9143967032432556, "learning_rate": 0.001, "loss": 2.6165, "step": 95200 }, { "epoch": 12.320620555914674, "grad_norm": 0.9838846325874329, "learning_rate": 0.001, "loss": 2.6231, "step": 95300 }, { "epoch": 12.33354880413704, "grad_norm": 2.7699103355407715, "learning_rate": 0.001, "loss": 2.6206, "step": 95400 }, { "epoch": 12.346477052359406, "grad_norm": 0.7824585437774658, "learning_rate": 0.001, "loss": 2.6384, "step": 95500 }, { "epoch": 12.359405300581772, "grad_norm": 1.2936850786209106, "learning_rate": 0.001, "loss": 2.6218, "step": 95600 }, { "epoch": 12.372333548804137, "grad_norm": 0.9893867373466492, "learning_rate": 0.001, "loss": 2.6313, "step": 95700 }, { "epoch": 12.385261797026503, "grad_norm": 0.9381806254386902, "learning_rate": 0.001, "loss": 2.5988, "step": 95800 }, { "epoch": 12.39819004524887, "grad_norm": 1.2195684909820557, "learning_rate": 0.001, "loss": 2.6178, "step": 95900 }, { "epoch": 12.411118293471235, "grad_norm": 0.9846706986427307, "learning_rate": 0.001, "loss": 2.6208, "step": 96000 }, { "epoch": 12.4240465416936, "grad_norm": 0.8426992893218994, "learning_rate": 0.001, "loss": 2.6395, "step": 96100 }, { "epoch": 12.436974789915967, "grad_norm": 1.0667080879211426, "learning_rate": 0.001, "loss": 2.6147, "step": 96200 }, { "epoch": 12.449903038138332, "grad_norm": 2.7010085582733154, "learning_rate": 0.001, "loss": 2.6522, "step": 96300 }, { "epoch": 12.462831286360698, "grad_norm": 0.8123608827590942, "learning_rate": 0.001, "loss": 2.646, "step": 96400 }, { "epoch": 12.475759534583064, "grad_norm": 3.817003011703491, "learning_rate": 0.001, "loss": 2.6588, "step": 96500 }, { "epoch": 12.48868778280543, "grad_norm": 1.028645634651184, "learning_rate": 0.001, "loss": 2.627, "step": 96600 }, { "epoch": 12.501616031027796, "grad_norm": 0.9402895569801331, "learning_rate": 0.001, "loss": 2.6404, "step": 96700 }, { "epoch": 12.514544279250162, "grad_norm": 0.8970463275909424, "learning_rate": 0.001, "loss": 2.6334, "step": 96800 }, { "epoch": 12.527472527472527, "grad_norm": 0.8597877025604248, "learning_rate": 0.001, "loss": 2.6351, "step": 96900 }, { "epoch": 12.540400775694893, "grad_norm": 1.1909126043319702, "learning_rate": 0.001, "loss": 2.6386, "step": 97000 }, { "epoch": 12.553329023917259, "grad_norm": 1.5252606868743896, "learning_rate": 0.001, "loss": 2.6513, "step": 97100 }, { "epoch": 12.566257272139625, "grad_norm": 0.8709731101989746, "learning_rate": 0.001, "loss": 2.6173, "step": 97200 }, { "epoch": 12.57918552036199, "grad_norm": 3.6886866092681885, "learning_rate": 0.001, "loss": 2.6543, "step": 97300 }, { "epoch": 12.592113768584356, "grad_norm": 1.1346412897109985, "learning_rate": 0.001, "loss": 2.6447, "step": 97400 }, { "epoch": 12.605042016806722, "grad_norm": 2.621077537536621, "learning_rate": 0.001, "loss": 2.6481, "step": 97500 }, { "epoch": 12.617970265029088, "grad_norm": 0.8778619170188904, "learning_rate": 0.001, "loss": 2.6518, "step": 97600 }, { "epoch": 12.630898513251454, "grad_norm": 0.9916554093360901, "learning_rate": 0.001, "loss": 2.6447, "step": 97700 }, { "epoch": 12.64382676147382, "grad_norm": 0.8293843865394592, "learning_rate": 0.001, "loss": 2.6474, "step": 97800 }, { "epoch": 12.656755009696186, "grad_norm": 0.8228791356086731, "learning_rate": 0.001, "loss": 2.6398, "step": 97900 }, { "epoch": 12.669683257918551, "grad_norm": 1.0537912845611572, "learning_rate": 0.001, "loss": 2.6489, "step": 98000 }, { "epoch": 12.682611506140917, "grad_norm": 0.9879679679870605, "learning_rate": 0.001, "loss": 2.6505, "step": 98100 }, { "epoch": 12.695539754363283, "grad_norm": 0.9865432977676392, "learning_rate": 0.001, "loss": 2.668, "step": 98200 }, { "epoch": 12.708468002585649, "grad_norm": 1.4924379587173462, "learning_rate": 0.001, "loss": 2.6639, "step": 98300 }, { "epoch": 12.721396250808015, "grad_norm": 3.8619749546051025, "learning_rate": 0.001, "loss": 2.6344, "step": 98400 }, { "epoch": 12.73432449903038, "grad_norm": 0.7283621430397034, "learning_rate": 0.001, "loss": 2.6508, "step": 98500 }, { "epoch": 12.747252747252748, "grad_norm": 0.8155194520950317, "learning_rate": 0.001, "loss": 2.6562, "step": 98600 }, { "epoch": 12.760180995475114, "grad_norm": 0.9900557398796082, "learning_rate": 0.001, "loss": 2.6468, "step": 98700 }, { "epoch": 12.77310924369748, "grad_norm": 1.1180295944213867, "learning_rate": 0.001, "loss": 2.6634, "step": 98800 }, { "epoch": 12.786037491919846, "grad_norm": 1.0277903079986572, "learning_rate": 0.001, "loss": 2.6228, "step": 98900 }, { "epoch": 12.798965740142211, "grad_norm": 1.0414572954177856, "learning_rate": 0.001, "loss": 2.6492, "step": 99000 }, { "epoch": 12.811893988364577, "grad_norm": 0.9646109938621521, "learning_rate": 0.001, "loss": 2.6737, "step": 99100 }, { "epoch": 12.824822236586943, "grad_norm": 1.0240024328231812, "learning_rate": 0.001, "loss": 2.6531, "step": 99200 }, { "epoch": 12.837750484809309, "grad_norm": 1.2418140172958374, "learning_rate": 0.001, "loss": 2.6404, "step": 99300 }, { "epoch": 12.850678733031675, "grad_norm": 0.93936687707901, "learning_rate": 0.001, "loss": 2.6469, "step": 99400 }, { "epoch": 12.86360698125404, "grad_norm": 0.8729557991027832, "learning_rate": 0.001, "loss": 2.6408, "step": 99500 }, { "epoch": 12.876535229476406, "grad_norm": 0.8166502118110657, "learning_rate": 0.001, "loss": 2.6658, "step": 99600 }, { "epoch": 12.889463477698772, "grad_norm": 0.8894999623298645, "learning_rate": 0.001, "loss": 2.6455, "step": 99700 }, { "epoch": 12.902391725921138, "grad_norm": 1.0049282312393188, "learning_rate": 0.001, "loss": 2.6366, "step": 99800 }, { "epoch": 12.915319974143504, "grad_norm": 0.8839260339736938, "learning_rate": 0.001, "loss": 2.6597, "step": 99900 }, { "epoch": 12.92824822236587, "grad_norm": 0.8483619093894958, "learning_rate": 0.001, "loss": 2.6552, "step": 100000 }, { "epoch": 12.941176470588236, "grad_norm": 0.817435085773468, "learning_rate": 0.001, "loss": 2.6426, "step": 100100 }, { "epoch": 12.954104718810601, "grad_norm": 1.0191770792007446, "learning_rate": 0.001, "loss": 2.6494, "step": 100200 }, { "epoch": 12.967032967032967, "grad_norm": 1.056062936782837, "learning_rate": 0.001, "loss": 2.6558, "step": 100300 }, { "epoch": 12.979961215255333, "grad_norm": 1.0269839763641357, "learning_rate": 0.001, "loss": 2.659, "step": 100400 }, { "epoch": 12.992889463477699, "grad_norm": 0.8958103656768799, "learning_rate": 0.001, "loss": 2.6707, "step": 100500 }, { "epoch": 13.005817711700065, "grad_norm": 1.0725547075271606, "learning_rate": 0.001, "loss": 2.6334, "step": 100600 }, { "epoch": 13.01874595992243, "grad_norm": 0.9194360375404358, "learning_rate": 0.001, "loss": 2.5806, "step": 100700 }, { "epoch": 13.031674208144796, "grad_norm": 1.3886171579360962, "learning_rate": 0.001, "loss": 2.5812, "step": 100800 }, { "epoch": 13.044602456367162, "grad_norm": 1.3397186994552612, "learning_rate": 0.001, "loss": 2.5865, "step": 100900 }, { "epoch": 13.057530704589528, "grad_norm": 1.541393518447876, "learning_rate": 0.001, "loss": 2.5838, "step": 101000 }, { "epoch": 13.070458952811894, "grad_norm": 1.2277320623397827, "learning_rate": 0.001, "loss": 2.5777, "step": 101100 }, { "epoch": 13.08338720103426, "grad_norm": 1.027618646621704, "learning_rate": 0.001, "loss": 2.568, "step": 101200 }, { "epoch": 13.096315449256625, "grad_norm": 1.0071109533309937, "learning_rate": 0.001, "loss": 2.5803, "step": 101300 }, { "epoch": 13.109243697478991, "grad_norm": 0.8686617016792297, "learning_rate": 0.001, "loss": 2.5651, "step": 101400 }, { "epoch": 13.122171945701357, "grad_norm": 1.1948893070220947, "learning_rate": 0.001, "loss": 2.591, "step": 101500 }, { "epoch": 13.135100193923723, "grad_norm": 1.0793249607086182, "learning_rate": 0.001, "loss": 2.5899, "step": 101600 }, { "epoch": 13.148028442146089, "grad_norm": 2.4241278171539307, "learning_rate": 0.001, "loss": 2.5664, "step": 101700 }, { "epoch": 13.160956690368455, "grad_norm": 0.9248546361923218, "learning_rate": 0.001, "loss": 2.5909, "step": 101800 }, { "epoch": 13.17388493859082, "grad_norm": 0.9408791661262512, "learning_rate": 0.001, "loss": 2.6017, "step": 101900 }, { "epoch": 13.186813186813186, "grad_norm": 1.4988340139389038, "learning_rate": 0.001, "loss": 2.5784, "step": 102000 }, { "epoch": 13.199741435035552, "grad_norm": 2.448138952255249, "learning_rate": 0.001, "loss": 2.5832, "step": 102100 }, { "epoch": 13.212669683257918, "grad_norm": 1.1110291481018066, "learning_rate": 0.001, "loss": 2.5874, "step": 102200 }, { "epoch": 13.225597931480284, "grad_norm": 1.158694863319397, "learning_rate": 0.001, "loss": 2.584, "step": 102300 }, { "epoch": 13.23852617970265, "grad_norm": 1.0650640726089478, "learning_rate": 0.001, "loss": 2.588, "step": 102400 }, { "epoch": 13.251454427925015, "grad_norm": 1.2195520401000977, "learning_rate": 0.001, "loss": 2.5927, "step": 102500 }, { "epoch": 13.264382676147383, "grad_norm": 0.866563081741333, "learning_rate": 0.001, "loss": 2.5816, "step": 102600 }, { "epoch": 13.277310924369749, "grad_norm": 1.194378137588501, "learning_rate": 0.001, "loss": 2.6193, "step": 102700 }, { "epoch": 13.290239172592115, "grad_norm": 3.0118188858032227, "learning_rate": 0.001, "loss": 2.6092, "step": 102800 }, { "epoch": 13.30316742081448, "grad_norm": 1.2849793434143066, "learning_rate": 0.001, "loss": 2.5966, "step": 102900 }, { "epoch": 13.316095669036846, "grad_norm": 0.7711643576622009, "learning_rate": 0.001, "loss": 2.5803, "step": 103000 }, { "epoch": 13.329023917259212, "grad_norm": 4.715073108673096, "learning_rate": 0.001, "loss": 2.6127, "step": 103100 }, { "epoch": 13.341952165481578, "grad_norm": 0.9868823885917664, "learning_rate": 0.001, "loss": 2.605, "step": 103200 }, { "epoch": 13.354880413703944, "grad_norm": 0.8661988377571106, "learning_rate": 0.001, "loss": 2.6239, "step": 103300 }, { "epoch": 13.36780866192631, "grad_norm": 1.1089942455291748, "learning_rate": 0.001, "loss": 2.6006, "step": 103400 }, { "epoch": 13.380736910148675, "grad_norm": 1.444116234779358, "learning_rate": 0.001, "loss": 2.5845, "step": 103500 }, { "epoch": 13.393665158371041, "grad_norm": 0.8376063704490662, "learning_rate": 0.001, "loss": 2.6104, "step": 103600 }, { "epoch": 13.406593406593407, "grad_norm": 1.03670072555542, "learning_rate": 0.001, "loss": 2.6092, "step": 103700 }, { "epoch": 13.419521654815773, "grad_norm": 0.8578863739967346, "learning_rate": 0.001, "loss": 2.6, "step": 103800 }, { "epoch": 13.432449903038139, "grad_norm": 0.8343275189399719, "learning_rate": 0.001, "loss": 2.6157, "step": 103900 }, { "epoch": 13.445378151260504, "grad_norm": 1.5313721895217896, "learning_rate": 0.001, "loss": 2.6253, "step": 104000 }, { "epoch": 13.45830639948287, "grad_norm": 0.8908823728561401, "learning_rate": 0.001, "loss": 2.6249, "step": 104100 }, { "epoch": 13.471234647705236, "grad_norm": 2.568053722381592, "learning_rate": 0.001, "loss": 2.5984, "step": 104200 }, { "epoch": 13.484162895927602, "grad_norm": 1.079933762550354, "learning_rate": 0.001, "loss": 2.6226, "step": 104300 }, { "epoch": 13.497091144149968, "grad_norm": 1.1315194368362427, "learning_rate": 0.001, "loss": 2.6163, "step": 104400 }, { "epoch": 13.510019392372334, "grad_norm": 1.2109016180038452, "learning_rate": 0.001, "loss": 2.6343, "step": 104500 }, { "epoch": 13.5229476405947, "grad_norm": 1.1619150638580322, "learning_rate": 0.001, "loss": 2.6125, "step": 104600 }, { "epoch": 13.535875888817065, "grad_norm": 0.9381139278411865, "learning_rate": 0.001, "loss": 2.6276, "step": 104700 }, { "epoch": 13.548804137039431, "grad_norm": 1.2995294332504272, "learning_rate": 0.001, "loss": 2.6192, "step": 104800 }, { "epoch": 13.561732385261797, "grad_norm": 1.2335697412490845, "learning_rate": 0.001, "loss": 2.5938, "step": 104900 }, { "epoch": 13.574660633484163, "grad_norm": 0.9342742562294006, "learning_rate": 0.001, "loss": 2.632, "step": 105000 }, { "epoch": 13.587588881706528, "grad_norm": 0.9512138366699219, "learning_rate": 0.001, "loss": 2.6375, "step": 105100 }, { "epoch": 13.600517129928894, "grad_norm": 1.067646861076355, "learning_rate": 0.001, "loss": 2.6314, "step": 105200 }, { "epoch": 13.61344537815126, "grad_norm": 0.997576117515564, "learning_rate": 0.001, "loss": 2.6448, "step": 105300 }, { "epoch": 13.626373626373626, "grad_norm": 1.661247968673706, "learning_rate": 0.001, "loss": 2.6074, "step": 105400 }, { "epoch": 13.639301874595992, "grad_norm": 1.8682411909103394, "learning_rate": 0.001, "loss": 2.6137, "step": 105500 }, { "epoch": 13.652230122818358, "grad_norm": 1.0532630681991577, "learning_rate": 0.001, "loss": 2.6071, "step": 105600 }, { "epoch": 13.665158371040723, "grad_norm": 1.0576748847961426, "learning_rate": 0.001, "loss": 2.6363, "step": 105700 }, { "epoch": 13.67808661926309, "grad_norm": 0.8601416945457458, "learning_rate": 0.001, "loss": 2.6257, "step": 105800 }, { "epoch": 13.691014867485455, "grad_norm": 1.3065986633300781, "learning_rate": 0.001, "loss": 2.6344, "step": 105900 }, { "epoch": 13.70394311570782, "grad_norm": 1.3175486326217651, "learning_rate": 0.001, "loss": 2.6137, "step": 106000 }, { "epoch": 13.716871363930187, "grad_norm": 1.0517117977142334, "learning_rate": 0.001, "loss": 2.6296, "step": 106100 }, { "epoch": 13.729799612152553, "grad_norm": 1.0981764793395996, "learning_rate": 0.001, "loss": 2.6152, "step": 106200 }, { "epoch": 13.74272786037492, "grad_norm": 1.1962010860443115, "learning_rate": 0.001, "loss": 2.6316, "step": 106300 }, { "epoch": 13.755656108597286, "grad_norm": 2.334928274154663, "learning_rate": 0.001, "loss": 2.6193, "step": 106400 }, { "epoch": 13.768584356819652, "grad_norm": 1.2267770767211914, "learning_rate": 0.001, "loss": 2.6161, "step": 106500 }, { "epoch": 13.781512605042018, "grad_norm": 1.334123134613037, "learning_rate": 0.001, "loss": 2.6154, "step": 106600 }, { "epoch": 13.794440853264383, "grad_norm": 0.8203903436660767, "learning_rate": 0.001, "loss": 2.637, "step": 106700 }, { "epoch": 13.80736910148675, "grad_norm": 0.9311782121658325, "learning_rate": 0.001, "loss": 2.6366, "step": 106800 }, { "epoch": 13.820297349709115, "grad_norm": 0.9777155518531799, "learning_rate": 0.001, "loss": 2.6168, "step": 106900 }, { "epoch": 13.83322559793148, "grad_norm": 0.9558805823326111, "learning_rate": 0.001, "loss": 2.6433, "step": 107000 }, { "epoch": 13.846153846153847, "grad_norm": 1.0244085788726807, "learning_rate": 0.001, "loss": 2.6271, "step": 107100 }, { "epoch": 13.859082094376213, "grad_norm": 1.0409430265426636, "learning_rate": 0.001, "loss": 2.6275, "step": 107200 }, { "epoch": 13.872010342598578, "grad_norm": 1.0494492053985596, "learning_rate": 0.001, "loss": 2.6235, "step": 107300 }, { "epoch": 13.884938590820944, "grad_norm": 0.9923045635223389, "learning_rate": 0.001, "loss": 2.6401, "step": 107400 }, { "epoch": 13.89786683904331, "grad_norm": 1.0816038846969604, "learning_rate": 0.001, "loss": 2.6575, "step": 107500 }, { "epoch": 13.910795087265676, "grad_norm": 1.3523567914962769, "learning_rate": 0.001, "loss": 2.6492, "step": 107600 }, { "epoch": 13.923723335488042, "grad_norm": 1.2187690734863281, "learning_rate": 0.001, "loss": 2.6337, "step": 107700 }, { "epoch": 13.936651583710407, "grad_norm": 1.2167950868606567, "learning_rate": 0.001, "loss": 2.6291, "step": 107800 }, { "epoch": 13.949579831932773, "grad_norm": 3.2038822174072266, "learning_rate": 0.001, "loss": 2.6511, "step": 107900 }, { "epoch": 13.96250808015514, "grad_norm": 2.482389211654663, "learning_rate": 0.001, "loss": 2.6533, "step": 108000 }, { "epoch": 13.975436328377505, "grad_norm": 1.3293681144714355, "learning_rate": 0.001, "loss": 2.6418, "step": 108100 }, { "epoch": 13.98836457659987, "grad_norm": 1.0060287714004517, "learning_rate": 0.001, "loss": 2.6431, "step": 108200 }, { "epoch": 14.001292824822237, "grad_norm": 0.8136079907417297, "learning_rate": 0.001, "loss": 2.6176, "step": 108300 }, { "epoch": 14.014221073044602, "grad_norm": 1.2400718927383423, "learning_rate": 0.001, "loss": 2.536, "step": 108400 }, { "epoch": 14.027149321266968, "grad_norm": 1.2739711999893188, "learning_rate": 0.001, "loss": 2.5553, "step": 108500 }, { "epoch": 14.040077569489334, "grad_norm": 1.020321011543274, "learning_rate": 0.001, "loss": 2.568, "step": 108600 }, { "epoch": 14.0530058177117, "grad_norm": 0.9283075928688049, "learning_rate": 0.001, "loss": 2.5868, "step": 108700 }, { "epoch": 14.065934065934066, "grad_norm": 0.7413648962974548, "learning_rate": 0.001, "loss": 2.5619, "step": 108800 }, { "epoch": 14.078862314156432, "grad_norm": 1.1795777082443237, "learning_rate": 0.001, "loss": 2.5624, "step": 108900 }, { "epoch": 14.091790562378797, "grad_norm": 0.9537236094474792, "learning_rate": 0.001, "loss": 2.5645, "step": 109000 }, { "epoch": 14.104718810601163, "grad_norm": 1.0021485090255737, "learning_rate": 0.001, "loss": 2.5705, "step": 109100 }, { "epoch": 14.117647058823529, "grad_norm": 0.9730578660964966, "learning_rate": 0.001, "loss": 2.556, "step": 109200 }, { "epoch": 14.130575307045895, "grad_norm": 0.9030996561050415, "learning_rate": 0.001, "loss": 2.5482, "step": 109300 }, { "epoch": 14.14350355526826, "grad_norm": 1.165297031402588, "learning_rate": 0.001, "loss": 2.5841, "step": 109400 }, { "epoch": 14.156431803490626, "grad_norm": 1.1317731142044067, "learning_rate": 0.001, "loss": 2.5772, "step": 109500 }, { "epoch": 14.169360051712992, "grad_norm": 0.8106029629707336, "learning_rate": 0.001, "loss": 2.5563, "step": 109600 }, { "epoch": 14.182288299935358, "grad_norm": 0.9116665720939636, "learning_rate": 0.001, "loss": 2.5715, "step": 109700 }, { "epoch": 14.195216548157724, "grad_norm": 1.2062695026397705, "learning_rate": 0.001, "loss": 2.5627, "step": 109800 }, { "epoch": 14.20814479638009, "grad_norm": 0.9418051242828369, "learning_rate": 0.001, "loss": 2.583, "step": 109900 }, { "epoch": 14.221073044602456, "grad_norm": 1.1221860647201538, "learning_rate": 0.001, "loss": 2.5757, "step": 110000 }, { "epoch": 14.234001292824821, "grad_norm": 1.198014736175537, "learning_rate": 0.001, "loss": 2.5805, "step": 110100 }, { "epoch": 14.246929541047187, "grad_norm": 0.7695361971855164, "learning_rate": 0.001, "loss": 2.5804, "step": 110200 }, { "epoch": 14.259857789269555, "grad_norm": 1.739322304725647, "learning_rate": 0.001, "loss": 2.5735, "step": 110300 }, { "epoch": 14.27278603749192, "grad_norm": 1.0180315971374512, "learning_rate": 0.001, "loss": 2.582, "step": 110400 }, { "epoch": 14.285714285714286, "grad_norm": 1.913111925125122, "learning_rate": 0.001, "loss": 2.5973, "step": 110500 }, { "epoch": 14.298642533936652, "grad_norm": 0.9132810831069946, "learning_rate": 0.001, "loss": 2.572, "step": 110600 }, { "epoch": 14.311570782159018, "grad_norm": 0.8779964447021484, "learning_rate": 0.001, "loss": 2.5903, "step": 110700 }, { "epoch": 14.324499030381384, "grad_norm": 1.22675359249115, "learning_rate": 0.001, "loss": 2.579, "step": 110800 }, { "epoch": 14.33742727860375, "grad_norm": 1.0067064762115479, "learning_rate": 0.001, "loss": 2.5754, "step": 110900 }, { "epoch": 14.350355526826116, "grad_norm": 0.77426677942276, "learning_rate": 0.001, "loss": 2.5672, "step": 111000 }, { "epoch": 14.363283775048481, "grad_norm": 1.381880760192871, "learning_rate": 0.001, "loss": 2.5897, "step": 111100 }, { "epoch": 14.376212023270847, "grad_norm": 0.8549098968505859, "learning_rate": 0.001, "loss": 2.6027, "step": 111200 }, { "epoch": 14.389140271493213, "grad_norm": 0.7777000069618225, "learning_rate": 0.001, "loss": 2.578, "step": 111300 }, { "epoch": 14.402068519715579, "grad_norm": 0.6824961304664612, "learning_rate": 0.001, "loss": 2.5846, "step": 111400 }, { "epoch": 14.414996767937945, "grad_norm": 0.8856310248374939, "learning_rate": 0.001, "loss": 2.6016, "step": 111500 }, { "epoch": 14.42792501616031, "grad_norm": 1.8227547407150269, "learning_rate": 0.001, "loss": 2.5919, "step": 111600 }, { "epoch": 14.440853264382676, "grad_norm": 1.1195710897445679, "learning_rate": 0.001, "loss": 2.5809, "step": 111700 }, { "epoch": 14.453781512605042, "grad_norm": 3.210498332977295, "learning_rate": 0.001, "loss": 2.5839, "step": 111800 }, { "epoch": 14.466709760827408, "grad_norm": 0.8595222234725952, "learning_rate": 0.001, "loss": 2.5847, "step": 111900 }, { "epoch": 14.479638009049774, "grad_norm": 1.33635413646698, "learning_rate": 0.001, "loss": 2.5993, "step": 112000 }, { "epoch": 14.49256625727214, "grad_norm": 3.8354926109313965, "learning_rate": 0.001, "loss": 2.5983, "step": 112100 }, { "epoch": 14.505494505494505, "grad_norm": 1.070267915725708, "learning_rate": 0.001, "loss": 2.5988, "step": 112200 }, { "epoch": 14.518422753716871, "grad_norm": 0.8938151597976685, "learning_rate": 0.001, "loss": 2.5963, "step": 112300 }, { "epoch": 14.531351001939237, "grad_norm": 1.1116007566452026, "learning_rate": 0.001, "loss": 2.5887, "step": 112400 }, { "epoch": 14.544279250161603, "grad_norm": 0.9080971479415894, "learning_rate": 0.001, "loss": 2.6038, "step": 112500 }, { "epoch": 14.557207498383969, "grad_norm": 1.1192103624343872, "learning_rate": 0.001, "loss": 2.5985, "step": 112600 }, { "epoch": 14.570135746606335, "grad_norm": 0.7032302618026733, "learning_rate": 0.001, "loss": 2.612, "step": 112700 }, { "epoch": 14.5830639948287, "grad_norm": 1.6761265993118286, "learning_rate": 0.001, "loss": 2.6214, "step": 112800 }, { "epoch": 14.595992243051066, "grad_norm": 0.9042111039161682, "learning_rate": 0.001, "loss": 2.5874, "step": 112900 }, { "epoch": 14.608920491273432, "grad_norm": 1.1957740783691406, "learning_rate": 0.001, "loss": 2.5925, "step": 113000 }, { "epoch": 14.621848739495798, "grad_norm": 0.8348422646522522, "learning_rate": 0.001, "loss": 2.6122, "step": 113100 }, { "epoch": 14.634776987718164, "grad_norm": 0.8426868915557861, "learning_rate": 0.001, "loss": 2.5903, "step": 113200 }, { "epoch": 14.64770523594053, "grad_norm": 1.7460241317749023, "learning_rate": 0.001, "loss": 2.6216, "step": 113300 }, { "epoch": 14.660633484162895, "grad_norm": 1.0793107748031616, "learning_rate": 0.001, "loss": 2.5962, "step": 113400 }, { "epoch": 14.673561732385261, "grad_norm": 1.4313504695892334, "learning_rate": 0.001, "loss": 2.6204, "step": 113500 }, { "epoch": 14.686489980607627, "grad_norm": 1.7662521600723267, "learning_rate": 0.001, "loss": 2.6216, "step": 113600 }, { "epoch": 14.699418228829993, "grad_norm": 0.7971717715263367, "learning_rate": 0.001, "loss": 2.6131, "step": 113700 }, { "epoch": 14.712346477052359, "grad_norm": 0.923685610294342, "learning_rate": 0.001, "loss": 2.5854, "step": 113800 }, { "epoch": 14.725274725274724, "grad_norm": 0.6881007552146912, "learning_rate": 0.001, "loss": 2.5983, "step": 113900 }, { "epoch": 14.738202973497092, "grad_norm": 1.3270736932754517, "learning_rate": 0.001, "loss": 2.5979, "step": 114000 }, { "epoch": 14.751131221719458, "grad_norm": 0.9529316425323486, "learning_rate": 0.001, "loss": 2.6043, "step": 114100 }, { "epoch": 14.764059469941824, "grad_norm": 0.9568586349487305, "learning_rate": 0.001, "loss": 2.5966, "step": 114200 }, { "epoch": 14.77698771816419, "grad_norm": 2.5716514587402344, "learning_rate": 0.001, "loss": 2.607, "step": 114300 }, { "epoch": 14.789915966386555, "grad_norm": 1.0761027336120605, "learning_rate": 0.001, "loss": 2.6209, "step": 114400 }, { "epoch": 14.802844214608921, "grad_norm": 0.9297621250152588, "learning_rate": 0.001, "loss": 2.6127, "step": 114500 }, { "epoch": 14.815772462831287, "grad_norm": 3.45412278175354, "learning_rate": 0.001, "loss": 2.6243, "step": 114600 }, { "epoch": 14.828700711053653, "grad_norm": 0.7786528468132019, "learning_rate": 0.001, "loss": 2.6101, "step": 114700 }, { "epoch": 14.841628959276019, "grad_norm": 1.2500520944595337, "learning_rate": 0.001, "loss": 2.6102, "step": 114800 }, { "epoch": 14.854557207498384, "grad_norm": 0.8665407299995422, "learning_rate": 0.001, "loss": 2.6159, "step": 114900 }, { "epoch": 14.86748545572075, "grad_norm": 0.8825407028198242, "learning_rate": 0.001, "loss": 2.6418, "step": 115000 }, { "epoch": 14.880413703943116, "grad_norm": 0.8189628720283508, "learning_rate": 0.001, "loss": 2.6206, "step": 115100 }, { "epoch": 14.893341952165482, "grad_norm": 0.9071522951126099, "learning_rate": 0.001, "loss": 2.5978, "step": 115200 }, { "epoch": 14.906270200387848, "grad_norm": 1.0628280639648438, "learning_rate": 0.001, "loss": 2.6156, "step": 115300 }, { "epoch": 14.919198448610214, "grad_norm": 1.0758466720581055, "learning_rate": 0.001, "loss": 2.6148, "step": 115400 }, { "epoch": 14.93212669683258, "grad_norm": 0.9504366517066956, "learning_rate": 0.001, "loss": 2.6119, "step": 115500 }, { "epoch": 14.945054945054945, "grad_norm": 0.8474738001823425, "learning_rate": 0.001, "loss": 2.6205, "step": 115600 }, { "epoch": 14.957983193277311, "grad_norm": 0.9702305793762207, "learning_rate": 0.001, "loss": 2.6271, "step": 115700 }, { "epoch": 14.970911441499677, "grad_norm": 2.0693514347076416, "learning_rate": 0.001, "loss": 2.6054, "step": 115800 }, { "epoch": 14.983839689722043, "grad_norm": 0.8102448582649231, "learning_rate": 0.001, "loss": 2.6014, "step": 115900 }, { "epoch": 14.996767937944409, "grad_norm": 1.0416910648345947, "learning_rate": 0.001, "loss": 2.5992, "step": 116000 }, { "epoch": 15.009696186166774, "grad_norm": 0.657540500164032, "learning_rate": 0.001, "loss": 2.5622, "step": 116100 }, { "epoch": 15.02262443438914, "grad_norm": 0.815985918045044, "learning_rate": 0.001, "loss": 2.529, "step": 116200 }, { "epoch": 15.035552682611506, "grad_norm": 0.7533887028694153, "learning_rate": 0.001, "loss": 2.5392, "step": 116300 }, { "epoch": 15.048480930833872, "grad_norm": 0.9380256533622742, "learning_rate": 0.001, "loss": 2.5425, "step": 116400 }, { "epoch": 15.061409179056238, "grad_norm": 0.7857024073600769, "learning_rate": 0.001, "loss": 2.5357, "step": 116500 }, { "epoch": 15.074337427278603, "grad_norm": 1.211055874824524, "learning_rate": 0.001, "loss": 2.54, "step": 116600 }, { "epoch": 15.08726567550097, "grad_norm": 0.7158958911895752, "learning_rate": 0.001, "loss": 2.5443, "step": 116700 }, { "epoch": 15.100193923723335, "grad_norm": 0.8195045590400696, "learning_rate": 0.001, "loss": 2.5292, "step": 116800 }, { "epoch": 15.113122171945701, "grad_norm": 0.8771835565567017, "learning_rate": 0.001, "loss": 2.5286, "step": 116900 }, { "epoch": 15.126050420168067, "grad_norm": 0.9149394631385803, "learning_rate": 0.001, "loss": 2.5498, "step": 117000 }, { "epoch": 15.138978668390433, "grad_norm": 1.0545625686645508, "learning_rate": 0.001, "loss": 2.5394, "step": 117100 }, { "epoch": 15.151906916612798, "grad_norm": 1.017945408821106, "learning_rate": 0.001, "loss": 2.5345, "step": 117200 }, { "epoch": 15.164835164835164, "grad_norm": 0.8175914287567139, "learning_rate": 0.001, "loss": 2.5549, "step": 117300 }, { "epoch": 15.17776341305753, "grad_norm": 0.8213416337966919, "learning_rate": 0.001, "loss": 2.5434, "step": 117400 }, { "epoch": 15.190691661279896, "grad_norm": 0.8517171144485474, "learning_rate": 0.001, "loss": 2.5378, "step": 117500 }, { "epoch": 15.203619909502262, "grad_norm": 0.743828535079956, "learning_rate": 0.001, "loss": 2.5313, "step": 117600 }, { "epoch": 15.216548157724628, "grad_norm": 0.8169363737106323, "learning_rate": 0.001, "loss": 2.5379, "step": 117700 }, { "epoch": 15.229476405946993, "grad_norm": 0.8676871061325073, "learning_rate": 0.001, "loss": 2.5505, "step": 117800 }, { "epoch": 15.24240465416936, "grad_norm": 2.6110706329345703, "learning_rate": 0.001, "loss": 2.5527, "step": 117900 }, { "epoch": 15.255332902391725, "grad_norm": 1.0858913660049438, "learning_rate": 0.001, "loss": 2.5606, "step": 118000 }, { "epoch": 15.268261150614093, "grad_norm": 0.8823414444923401, "learning_rate": 0.001, "loss": 2.5502, "step": 118100 }, { "epoch": 15.281189398836458, "grad_norm": 0.8567391037940979, "learning_rate": 0.001, "loss": 2.5326, "step": 118200 }, { "epoch": 15.294117647058824, "grad_norm": 0.9477885365486145, "learning_rate": 0.001, "loss": 2.5754, "step": 118300 }, { "epoch": 15.30704589528119, "grad_norm": 2.1644864082336426, "learning_rate": 0.001, "loss": 2.549, "step": 118400 }, { "epoch": 15.319974143503556, "grad_norm": 0.8650554418563843, "learning_rate": 0.001, "loss": 2.5612, "step": 118500 }, { "epoch": 15.332902391725922, "grad_norm": 0.8215305805206299, "learning_rate": 0.001, "loss": 2.565, "step": 118600 }, { "epoch": 15.345830639948288, "grad_norm": 0.8542001843452454, "learning_rate": 0.001, "loss": 2.5635, "step": 118700 }, { "epoch": 15.358758888170653, "grad_norm": 0.8219261765480042, "learning_rate": 0.001, "loss": 2.5673, "step": 118800 }, { "epoch": 15.37168713639302, "grad_norm": 2.1299471855163574, "learning_rate": 0.001, "loss": 2.5556, "step": 118900 }, { "epoch": 15.384615384615385, "grad_norm": 0.8396114706993103, "learning_rate": 0.001, "loss": 2.5627, "step": 119000 }, { "epoch": 15.39754363283775, "grad_norm": 0.7101746797561646, "learning_rate": 0.001, "loss": 2.5444, "step": 119100 }, { "epoch": 15.410471881060117, "grad_norm": 0.8579614758491516, "learning_rate": 0.001, "loss": 2.5485, "step": 119200 }, { "epoch": 15.423400129282482, "grad_norm": 4.381954669952393, "learning_rate": 0.001, "loss": 2.5592, "step": 119300 }, { "epoch": 15.436328377504848, "grad_norm": 0.8097376227378845, "learning_rate": 0.001, "loss": 2.5629, "step": 119400 }, { "epoch": 15.449256625727214, "grad_norm": 1.3511531352996826, "learning_rate": 0.001, "loss": 2.5833, "step": 119500 }, { "epoch": 15.46218487394958, "grad_norm": 0.749480128288269, "learning_rate": 0.001, "loss": 2.574, "step": 119600 }, { "epoch": 15.475113122171946, "grad_norm": 0.7316570281982422, "learning_rate": 0.001, "loss": 2.5713, "step": 119700 }, { "epoch": 15.488041370394312, "grad_norm": 0.8587490320205688, "learning_rate": 0.001, "loss": 2.5726, "step": 119800 }, { "epoch": 15.500969618616677, "grad_norm": 0.7772616744041443, "learning_rate": 0.001, "loss": 2.5528, "step": 119900 }, { "epoch": 15.513897866839043, "grad_norm": 1.3862597942352295, "learning_rate": 0.001, "loss": 2.5595, "step": 120000 }, { "epoch": 15.526826115061409, "grad_norm": 1.0324838161468506, "learning_rate": 0.001, "loss": 2.5692, "step": 120100 }, { "epoch": 15.539754363283775, "grad_norm": 2.5185530185699463, "learning_rate": 0.001, "loss": 2.5744, "step": 120200 }, { "epoch": 15.55268261150614, "grad_norm": 0.7411508560180664, "learning_rate": 0.001, "loss": 2.5651, "step": 120300 }, { "epoch": 15.565610859728507, "grad_norm": 0.709475576877594, "learning_rate": 0.001, "loss": 2.5773, "step": 120400 }, { "epoch": 15.578539107950872, "grad_norm": 0.9099294543266296, "learning_rate": 0.001, "loss": 2.5757, "step": 120500 }, { "epoch": 15.591467356173238, "grad_norm": 2.8636815547943115, "learning_rate": 0.001, "loss": 2.572, "step": 120600 }, { "epoch": 15.604395604395604, "grad_norm": 1.2228679656982422, "learning_rate": 0.001, "loss": 2.576, "step": 120700 }, { "epoch": 15.61732385261797, "grad_norm": 11.643074035644531, "learning_rate": 0.001, "loss": 2.5789, "step": 120800 }, { "epoch": 15.630252100840336, "grad_norm": 0.9943307638168335, "learning_rate": 0.001, "loss": 2.5908, "step": 120900 }, { "epoch": 15.643180349062701, "grad_norm": 0.8236725330352783, "learning_rate": 0.001, "loss": 2.5826, "step": 121000 }, { "epoch": 15.656108597285067, "grad_norm": 0.8492900133132935, "learning_rate": 0.001, "loss": 2.5573, "step": 121100 }, { "epoch": 15.669036845507433, "grad_norm": 0.720883309841156, "learning_rate": 0.001, "loss": 2.5852, "step": 121200 }, { "epoch": 15.681965093729799, "grad_norm": 1.0127933025360107, "learning_rate": 0.001, "loss": 2.5909, "step": 121300 }, { "epoch": 15.694893341952165, "grad_norm": 1.1621676683425903, "learning_rate": 0.001, "loss": 2.5946, "step": 121400 }, { "epoch": 15.70782159017453, "grad_norm": 0.7997000217437744, "learning_rate": 0.001, "loss": 2.5906, "step": 121500 }, { "epoch": 15.720749838396896, "grad_norm": 0.8370825052261353, "learning_rate": 0.001, "loss": 2.5904, "step": 121600 }, { "epoch": 15.733678086619262, "grad_norm": 0.8236163258552551, "learning_rate": 0.001, "loss": 2.5897, "step": 121700 }, { "epoch": 15.74660633484163, "grad_norm": 0.8831769227981567, "learning_rate": 0.001, "loss": 2.5697, "step": 121800 }, { "epoch": 15.759534583063996, "grad_norm": 1.1621983051300049, "learning_rate": 0.001, "loss": 2.5695, "step": 121900 }, { "epoch": 15.772462831286362, "grad_norm": 0.8794357776641846, "learning_rate": 0.001, "loss": 2.5937, "step": 122000 }, { "epoch": 15.785391079508727, "grad_norm": 0.845700740814209, "learning_rate": 0.001, "loss": 2.6162, "step": 122100 }, { "epoch": 15.798319327731093, "grad_norm": 0.7322149872779846, "learning_rate": 0.001, "loss": 2.5885, "step": 122200 }, { "epoch": 15.811247575953459, "grad_norm": 1.2213095426559448, "learning_rate": 0.001, "loss": 2.5996, "step": 122300 }, { "epoch": 15.824175824175825, "grad_norm": 1.3661214113235474, "learning_rate": 0.001, "loss": 2.597, "step": 122400 }, { "epoch": 15.83710407239819, "grad_norm": 1.1152528524398804, "learning_rate": 0.001, "loss": 2.6041, "step": 122500 }, { "epoch": 15.850032320620556, "grad_norm": 1.0370805263519287, "learning_rate": 0.001, "loss": 2.6032, "step": 122600 }, { "epoch": 15.862960568842922, "grad_norm": 1.0139251947402954, "learning_rate": 0.001, "loss": 2.594, "step": 122700 }, { "epoch": 15.875888817065288, "grad_norm": 0.8002803325653076, "learning_rate": 0.001, "loss": 2.5941, "step": 122800 }, { "epoch": 15.888817065287654, "grad_norm": 0.7084197998046875, "learning_rate": 0.001, "loss": 2.6167, "step": 122900 }, { "epoch": 15.90174531351002, "grad_norm": 0.8605496883392334, "learning_rate": 0.001, "loss": 2.5783, "step": 123000 }, { "epoch": 15.914673561732386, "grad_norm": 0.8486752510070801, "learning_rate": 0.001, "loss": 2.5942, "step": 123100 }, { "epoch": 15.927601809954751, "grad_norm": 3.644688844680786, "learning_rate": 0.001, "loss": 2.617, "step": 123200 }, { "epoch": 15.940530058177117, "grad_norm": 1.0534603595733643, "learning_rate": 0.001, "loss": 2.6073, "step": 123300 }, { "epoch": 15.953458306399483, "grad_norm": 0.6849213242530823, "learning_rate": 0.001, "loss": 2.596, "step": 123400 }, { "epoch": 15.966386554621849, "grad_norm": 0.9224341511726379, "learning_rate": 0.001, "loss": 2.6026, "step": 123500 }, { "epoch": 15.979314802844215, "grad_norm": 0.9115817546844482, "learning_rate": 0.001, "loss": 2.5996, "step": 123600 }, { "epoch": 15.99224305106658, "grad_norm": 0.8614963889122009, "learning_rate": 0.001, "loss": 2.6107, "step": 123700 }, { "epoch": 16.005171299288946, "grad_norm": 0.9771395921707153, "learning_rate": 0.001, "loss": 2.5447, "step": 123800 }, { "epoch": 16.018099547511312, "grad_norm": 1.1223886013031006, "learning_rate": 0.001, "loss": 2.4994, "step": 123900 }, { "epoch": 16.031027795733678, "grad_norm": 1.1173896789550781, "learning_rate": 0.001, "loss": 2.5272, "step": 124000 }, { "epoch": 16.043956043956044, "grad_norm": 0.9858359694480896, "learning_rate": 0.001, "loss": 2.5088, "step": 124100 }, { "epoch": 16.05688429217841, "grad_norm": 0.799921989440918, "learning_rate": 0.001, "loss": 2.5133, "step": 124200 }, { "epoch": 16.069812540400775, "grad_norm": 6.902834415435791, "learning_rate": 0.001, "loss": 2.5184, "step": 124300 }, { "epoch": 16.08274078862314, "grad_norm": 0.8443405628204346, "learning_rate": 0.001, "loss": 2.5095, "step": 124400 }, { "epoch": 16.095669036845507, "grad_norm": 1.106756329536438, "learning_rate": 0.001, "loss": 2.5362, "step": 124500 }, { "epoch": 16.108597285067873, "grad_norm": 1.1826072931289673, "learning_rate": 0.001, "loss": 2.5293, "step": 124600 }, { "epoch": 16.12152553329024, "grad_norm": 1.101196050643921, "learning_rate": 0.001, "loss": 2.5218, "step": 124700 }, { "epoch": 16.134453781512605, "grad_norm": 0.9533873796463013, "learning_rate": 0.001, "loss": 2.5238, "step": 124800 }, { "epoch": 16.14738202973497, "grad_norm": 0.9198462963104248, "learning_rate": 0.001, "loss": 2.525, "step": 124900 }, { "epoch": 16.160310277957336, "grad_norm": 1.7835291624069214, "learning_rate": 0.001, "loss": 2.5394, "step": 125000 }, { "epoch": 16.173238526179702, "grad_norm": 0.9190177321434021, "learning_rate": 0.001, "loss": 2.5284, "step": 125100 }, { "epoch": 16.186166774402068, "grad_norm": 0.9965214133262634, "learning_rate": 0.001, "loss": 2.5287, "step": 125200 }, { "epoch": 16.199095022624434, "grad_norm": 1.4756314754486084, "learning_rate": 0.001, "loss": 2.5411, "step": 125300 }, { "epoch": 16.2120232708468, "grad_norm": 0.8666704297065735, "learning_rate": 0.001, "loss": 2.5315, "step": 125400 }, { "epoch": 16.224951519069165, "grad_norm": 1.3108162879943848, "learning_rate": 0.001, "loss": 2.5541, "step": 125500 }, { "epoch": 16.23787976729153, "grad_norm": 0.8551124334335327, "learning_rate": 0.001, "loss": 2.5372, "step": 125600 }, { "epoch": 16.250808015513897, "grad_norm": 1.322482705116272, "learning_rate": 0.001, "loss": 2.5575, "step": 125700 }, { "epoch": 16.263736263736263, "grad_norm": 1.2717254161834717, "learning_rate": 0.001, "loss": 2.5594, "step": 125800 }, { "epoch": 16.27666451195863, "grad_norm": 1.3914741277694702, "learning_rate": 0.001, "loss": 2.5607, "step": 125900 }, { "epoch": 16.289592760180994, "grad_norm": 0.9041780233383179, "learning_rate": 0.001, "loss": 2.5456, "step": 126000 }, { "epoch": 16.30252100840336, "grad_norm": 0.9139705896377563, "learning_rate": 0.001, "loss": 2.5568, "step": 126100 }, { "epoch": 16.315449256625726, "grad_norm": 1.3718900680541992, "learning_rate": 0.001, "loss": 2.5538, "step": 126200 }, { "epoch": 16.328377504848092, "grad_norm": 0.8075583577156067, "learning_rate": 0.001, "loss": 2.5707, "step": 126300 }, { "epoch": 16.341305753070458, "grad_norm": 2.013007640838623, "learning_rate": 0.001, "loss": 2.5186, "step": 126400 }, { "epoch": 16.354234001292824, "grad_norm": 0.9065732955932617, "learning_rate": 0.001, "loss": 2.5401, "step": 126500 }, { "epoch": 16.36716224951519, "grad_norm": 25.44598960876465, "learning_rate": 0.001, "loss": 2.5539, "step": 126600 }, { "epoch": 16.380090497737555, "grad_norm": 0.9235751628875732, "learning_rate": 0.001, "loss": 2.5297, "step": 126700 }, { "epoch": 16.39301874595992, "grad_norm": 0.724864661693573, "learning_rate": 0.001, "loss": 2.5381, "step": 126800 }, { "epoch": 16.405946994182287, "grad_norm": 0.8753328919410706, "learning_rate": 0.001, "loss": 2.5548, "step": 126900 }, { "epoch": 16.418875242404653, "grad_norm": 0.7831981778144836, "learning_rate": 0.001, "loss": 2.5503, "step": 127000 }, { "epoch": 16.43180349062702, "grad_norm": 1.1626372337341309, "learning_rate": 0.001, "loss": 2.5465, "step": 127100 }, { "epoch": 16.444731738849384, "grad_norm": 1.0983495712280273, "learning_rate": 0.001, "loss": 2.5533, "step": 127200 }, { "epoch": 16.45765998707175, "grad_norm": 1.4168146848678589, "learning_rate": 0.001, "loss": 2.5426, "step": 127300 }, { "epoch": 16.470588235294116, "grad_norm": 18.096590042114258, "learning_rate": 0.001, "loss": 2.5442, "step": 127400 }, { "epoch": 16.483516483516482, "grad_norm": 1.1030082702636719, "learning_rate": 0.001, "loss": 2.5497, "step": 127500 }, { "epoch": 16.496444731738848, "grad_norm": 1.679632306098938, "learning_rate": 0.001, "loss": 2.5457, "step": 127600 }, { "epoch": 16.509372979961213, "grad_norm": 0.8951355814933777, "learning_rate": 0.001, "loss": 2.5465, "step": 127700 }, { "epoch": 16.522301228183583, "grad_norm": 0.9594563841819763, "learning_rate": 0.001, "loss": 2.5663, "step": 127800 }, { "epoch": 16.53522947640595, "grad_norm": 1.3974311351776123, "learning_rate": 0.001, "loss": 2.5604, "step": 127900 }, { "epoch": 16.548157724628314, "grad_norm": 6.313642501831055, "learning_rate": 0.001, "loss": 2.574, "step": 128000 }, { "epoch": 16.56108597285068, "grad_norm": 0.9976781010627747, "learning_rate": 0.001, "loss": 2.5367, "step": 128100 }, { "epoch": 16.574014221073046, "grad_norm": 2.8323049545288086, "learning_rate": 0.001, "loss": 2.5563, "step": 128200 }, { "epoch": 16.586942469295412, "grad_norm": 1.0244059562683105, "learning_rate": 0.001, "loss": 2.5539, "step": 128300 }, { "epoch": 16.599870717517778, "grad_norm": 1.0916117429733276, "learning_rate": 0.001, "loss": 2.5497, "step": 128400 }, { "epoch": 16.612798965740144, "grad_norm": 1.0723795890808105, "learning_rate": 0.001, "loss": 2.5569, "step": 128500 }, { "epoch": 16.62572721396251, "grad_norm": 0.8742340207099915, "learning_rate": 0.001, "loss": 2.5649, "step": 128600 }, { "epoch": 16.638655462184875, "grad_norm": 0.8872504830360413, "learning_rate": 0.001, "loss": 2.5626, "step": 128700 }, { "epoch": 16.65158371040724, "grad_norm": 1.04092276096344, "learning_rate": 0.001, "loss": 2.5852, "step": 128800 }, { "epoch": 16.664511958629607, "grad_norm": 0.9670746326446533, "learning_rate": 0.001, "loss": 2.5675, "step": 128900 }, { "epoch": 16.677440206851973, "grad_norm": 1.5549794435501099, "learning_rate": 0.001, "loss": 2.5667, "step": 129000 }, { "epoch": 16.69036845507434, "grad_norm": 1.3276736736297607, "learning_rate": 0.001, "loss": 2.5656, "step": 129100 }, { "epoch": 16.703296703296704, "grad_norm": 0.8616847395896912, "learning_rate": 0.001, "loss": 2.549, "step": 129200 }, { "epoch": 16.71622495151907, "grad_norm": 1.7426073551177979, "learning_rate": 0.001, "loss": 2.5557, "step": 129300 }, { "epoch": 16.729153199741436, "grad_norm": 1.3163553476333618, "learning_rate": 0.001, "loss": 2.5947, "step": 129400 }, { "epoch": 16.742081447963802, "grad_norm": 0.7390009164810181, "learning_rate": 0.001, "loss": 2.5674, "step": 129500 }, { "epoch": 16.755009696186168, "grad_norm": 1.0931220054626465, "learning_rate": 0.001, "loss": 2.5757, "step": 129600 }, { "epoch": 16.767937944408533, "grad_norm": 0.8307375311851501, "learning_rate": 0.001, "loss": 2.5781, "step": 129700 }, { "epoch": 16.7808661926309, "grad_norm": 0.7598011493682861, "learning_rate": 0.001, "loss": 2.5623, "step": 129800 }, { "epoch": 16.793794440853265, "grad_norm": 6.385036468505859, "learning_rate": 0.001, "loss": 2.5621, "step": 129900 }, { "epoch": 16.80672268907563, "grad_norm": 1.2335394620895386, "learning_rate": 0.001, "loss": 2.5852, "step": 130000 }, { "epoch": 16.819650937297997, "grad_norm": 0.7795293927192688, "learning_rate": 0.001, "loss": 2.5735, "step": 130100 }, { "epoch": 16.832579185520363, "grad_norm": 0.8660564422607422, "learning_rate": 0.001, "loss": 2.576, "step": 130200 }, { "epoch": 16.84550743374273, "grad_norm": 0.8524670600891113, "learning_rate": 0.001, "loss": 2.573, "step": 130300 }, { "epoch": 16.858435681965094, "grad_norm": 1.0687352418899536, "learning_rate": 0.001, "loss": 2.5673, "step": 130400 }, { "epoch": 16.87136393018746, "grad_norm": 0.9728882908821106, "learning_rate": 0.001, "loss": 2.5677, "step": 130500 }, { "epoch": 16.884292178409826, "grad_norm": 1.356015920639038, "learning_rate": 0.001, "loss": 2.5843, "step": 130600 }, { "epoch": 16.89722042663219, "grad_norm": 0.8931475877761841, "learning_rate": 0.001, "loss": 2.5757, "step": 130700 }, { "epoch": 16.910148674854558, "grad_norm": 1.088809847831726, "learning_rate": 0.001, "loss": 2.5938, "step": 130800 }, { "epoch": 16.923076923076923, "grad_norm": 1.0330066680908203, "learning_rate": 0.001, "loss": 2.5735, "step": 130900 }, { "epoch": 16.93600517129929, "grad_norm": 1.1621315479278564, "learning_rate": 0.001, "loss": 2.5642, "step": 131000 }, { "epoch": 16.948933419521655, "grad_norm": 0.8401142358779907, "learning_rate": 0.001, "loss": 2.5781, "step": 131100 }, { "epoch": 16.96186166774402, "grad_norm": 0.7843949198722839, "learning_rate": 0.001, "loss": 2.5783, "step": 131200 }, { "epoch": 16.974789915966387, "grad_norm": 1.1435294151306152, "learning_rate": 0.001, "loss": 2.5801, "step": 131300 }, { "epoch": 16.987718164188752, "grad_norm": 0.8952112793922424, "learning_rate": 0.001, "loss": 2.5662, "step": 131400 }, { "epoch": 17.00064641241112, "grad_norm": 0.6599233746528625, "learning_rate": 0.001, "loss": 2.5546, "step": 131500 }, { "epoch": 17.013574660633484, "grad_norm": 3.2165415287017822, "learning_rate": 0.001, "loss": 2.4851, "step": 131600 }, { "epoch": 17.02650290885585, "grad_norm": 0.8312383890151978, "learning_rate": 0.001, "loss": 2.4667, "step": 131700 }, { "epoch": 17.039431157078216, "grad_norm": 1.0451081991195679, "learning_rate": 0.001, "loss": 2.5077, "step": 131800 }, { "epoch": 17.05235940530058, "grad_norm": 1.6864192485809326, "learning_rate": 0.001, "loss": 2.48, "step": 131900 }, { "epoch": 17.065287653522947, "grad_norm": 1.4165103435516357, "learning_rate": 0.001, "loss": 2.4933, "step": 132000 }, { "epoch": 17.078215901745313, "grad_norm": 1.4687983989715576, "learning_rate": 0.001, "loss": 2.4995, "step": 132100 }, { "epoch": 17.09114414996768, "grad_norm": 1.4867112636566162, "learning_rate": 0.001, "loss": 2.4835, "step": 132200 }, { "epoch": 17.104072398190045, "grad_norm": 1.0710995197296143, "learning_rate": 0.001, "loss": 2.5006, "step": 132300 }, { "epoch": 17.11700064641241, "grad_norm": 1.4409894943237305, "learning_rate": 0.001, "loss": 2.5121, "step": 132400 }, { "epoch": 17.129928894634777, "grad_norm": 1.0766199827194214, "learning_rate": 0.001, "loss": 2.5117, "step": 132500 }, { "epoch": 17.142857142857142, "grad_norm": 1.0012937784194946, "learning_rate": 0.001, "loss": 2.5059, "step": 132600 }, { "epoch": 17.155785391079508, "grad_norm": 0.9501429796218872, "learning_rate": 0.001, "loss": 2.4962, "step": 132700 }, { "epoch": 17.168713639301874, "grad_norm": 0.9777405858039856, "learning_rate": 0.001, "loss": 2.4985, "step": 132800 }, { "epoch": 17.18164188752424, "grad_norm": 0.9750475287437439, "learning_rate": 0.001, "loss": 2.5321, "step": 132900 }, { "epoch": 17.194570135746606, "grad_norm": 0.8672329187393188, "learning_rate": 0.001, "loss": 2.5215, "step": 133000 }, { "epoch": 17.20749838396897, "grad_norm": 1.7719123363494873, "learning_rate": 0.001, "loss": 2.5162, "step": 133100 }, { "epoch": 17.220426632191337, "grad_norm": 1.1455820798873901, "learning_rate": 0.001, "loss": 2.5406, "step": 133200 }, { "epoch": 17.233354880413703, "grad_norm": 1.2547072172164917, "learning_rate": 0.001, "loss": 2.531, "step": 133300 }, { "epoch": 17.24628312863607, "grad_norm": 1.0438746213912964, "learning_rate": 0.001, "loss": 2.5132, "step": 133400 }, { "epoch": 17.259211376858435, "grad_norm": 1.125115990638733, "learning_rate": 0.001, "loss": 2.5363, "step": 133500 }, { "epoch": 17.2721396250808, "grad_norm": 0.8872924447059631, "learning_rate": 0.001, "loss": 2.5015, "step": 133600 }, { "epoch": 17.285067873303166, "grad_norm": 2.078883647918701, "learning_rate": 0.001, "loss": 2.503, "step": 133700 }, { "epoch": 17.297996121525532, "grad_norm": 1.359065294265747, "learning_rate": 0.001, "loss": 2.5172, "step": 133800 }, { "epoch": 17.310924369747898, "grad_norm": 1.1448476314544678, "learning_rate": 0.001, "loss": 2.5221, "step": 133900 }, { "epoch": 17.323852617970264, "grad_norm": 1.0665686130523682, "learning_rate": 0.001, "loss": 2.5324, "step": 134000 }, { "epoch": 17.33678086619263, "grad_norm": 1.097373366355896, "learning_rate": 0.001, "loss": 2.5252, "step": 134100 }, { "epoch": 17.349709114414996, "grad_norm": 2.471261501312256, "learning_rate": 0.001, "loss": 2.546, "step": 134200 }, { "epoch": 17.36263736263736, "grad_norm": 1.1681324243545532, "learning_rate": 0.001, "loss": 2.5209, "step": 134300 }, { "epoch": 17.375565610859727, "grad_norm": 5.4484171867370605, "learning_rate": 0.001, "loss": 2.5076, "step": 134400 }, { "epoch": 17.388493859082093, "grad_norm": 0.9953847527503967, "learning_rate": 0.001, "loss": 2.5332, "step": 134500 }, { "epoch": 17.40142210730446, "grad_norm": 0.9361692070960999, "learning_rate": 0.001, "loss": 2.5437, "step": 134600 }, { "epoch": 17.414350355526825, "grad_norm": 0.8539331555366516, "learning_rate": 0.001, "loss": 2.5154, "step": 134700 }, { "epoch": 17.42727860374919, "grad_norm": 0.9666441679000854, "learning_rate": 0.001, "loss": 2.5232, "step": 134800 }, { "epoch": 17.440206851971556, "grad_norm": 2.3844125270843506, "learning_rate": 0.001, "loss": 2.526, "step": 134900 }, { "epoch": 17.453135100193922, "grad_norm": 2.557101249694824, "learning_rate": 0.001, "loss": 2.5354, "step": 135000 }, { "epoch": 17.466063348416288, "grad_norm": 0.8548785448074341, "learning_rate": 0.001, "loss": 2.5223, "step": 135100 }, { "epoch": 17.478991596638654, "grad_norm": 0.9855307340621948, "learning_rate": 0.001, "loss": 2.5241, "step": 135200 }, { "epoch": 17.49191984486102, "grad_norm": 1.0815459489822388, "learning_rate": 0.001, "loss": 2.5225, "step": 135300 }, { "epoch": 17.50484809308339, "grad_norm": 1.7802574634552002, "learning_rate": 0.001, "loss": 2.5343, "step": 135400 }, { "epoch": 17.517776341305755, "grad_norm": 0.8039378523826599, "learning_rate": 0.001, "loss": 2.5047, "step": 135500 }, { "epoch": 17.53070458952812, "grad_norm": 1.4809627532958984, "learning_rate": 0.001, "loss": 2.5302, "step": 135600 }, { "epoch": 17.543632837750486, "grad_norm": 0.9139940738677979, "learning_rate": 0.001, "loss": 2.5582, "step": 135700 }, { "epoch": 17.556561085972852, "grad_norm": 1.1603368520736694, "learning_rate": 0.001, "loss": 2.5268, "step": 135800 }, { "epoch": 17.569489334195218, "grad_norm": 1.0877861976623535, "learning_rate": 0.001, "loss": 2.5584, "step": 135900 }, { "epoch": 17.582417582417584, "grad_norm": 1.00713050365448, "learning_rate": 0.001, "loss": 2.5145, "step": 136000 }, { "epoch": 17.59534583063995, "grad_norm": 0.6527794599533081, "learning_rate": 0.001, "loss": 2.5319, "step": 136100 }, { "epoch": 17.608274078862316, "grad_norm": 1.0179394483566284, "learning_rate": 0.001, "loss": 2.5452, "step": 136200 }, { "epoch": 17.62120232708468, "grad_norm": 0.9140816330909729, "learning_rate": 0.001, "loss": 2.542, "step": 136300 }, { "epoch": 17.634130575307047, "grad_norm": 3.7345874309539795, "learning_rate": 0.001, "loss": 2.5369, "step": 136400 }, { "epoch": 17.647058823529413, "grad_norm": 0.9748387932777405, "learning_rate": 0.001, "loss": 2.5669, "step": 136500 }, { "epoch": 17.65998707175178, "grad_norm": 0.8346742987632751, "learning_rate": 0.001, "loss": 2.551, "step": 136600 }, { "epoch": 17.672915319974145, "grad_norm": 0.8149889707565308, "learning_rate": 0.001, "loss": 2.526, "step": 136700 }, { "epoch": 17.68584356819651, "grad_norm": 1.0498007535934448, "learning_rate": 0.001, "loss": 2.5342, "step": 136800 }, { "epoch": 17.698771816418876, "grad_norm": 1.0643752813339233, "learning_rate": 0.001, "loss": 2.5322, "step": 136900 }, { "epoch": 17.711700064641242, "grad_norm": 0.7995538711547852, "learning_rate": 0.001, "loss": 2.5487, "step": 137000 }, { "epoch": 17.724628312863608, "grad_norm": 1.1539868116378784, "learning_rate": 0.001, "loss": 2.5634, "step": 137100 }, { "epoch": 17.737556561085974, "grad_norm": 0.9324626326560974, "learning_rate": 0.001, "loss": 2.5455, "step": 137200 }, { "epoch": 17.75048480930834, "grad_norm": 1.1402015686035156, "learning_rate": 0.001, "loss": 2.5534, "step": 137300 }, { "epoch": 17.763413057530705, "grad_norm": 1.0585646629333496, "learning_rate": 0.001, "loss": 2.5516, "step": 137400 }, { "epoch": 17.77634130575307, "grad_norm": 1.2042267322540283, "learning_rate": 0.001, "loss": 2.5465, "step": 137500 }, { "epoch": 17.789269553975437, "grad_norm": 1.47130286693573, "learning_rate": 0.001, "loss": 2.5376, "step": 137600 }, { "epoch": 17.802197802197803, "grad_norm": 0.9858179092407227, "learning_rate": 0.001, "loss": 2.5642, "step": 137700 }, { "epoch": 17.81512605042017, "grad_norm": 1.049811601638794, "learning_rate": 0.001, "loss": 2.5564, "step": 137800 }, { "epoch": 17.828054298642535, "grad_norm": 1.2673760652542114, "learning_rate": 0.001, "loss": 2.5616, "step": 137900 }, { "epoch": 17.8409825468649, "grad_norm": 0.8074108362197876, "learning_rate": 0.001, "loss": 2.5496, "step": 138000 }, { "epoch": 17.853910795087266, "grad_norm": 1.2420908212661743, "learning_rate": 0.001, "loss": 2.5664, "step": 138100 }, { "epoch": 17.866839043309632, "grad_norm": 1.1231664419174194, "learning_rate": 0.001, "loss": 2.5311, "step": 138200 }, { "epoch": 17.879767291531998, "grad_norm": 1.0279496908187866, "learning_rate": 0.001, "loss": 2.5494, "step": 138300 }, { "epoch": 17.892695539754364, "grad_norm": 9.692241668701172, "learning_rate": 0.001, "loss": 2.5542, "step": 138400 }, { "epoch": 17.90562378797673, "grad_norm": 1.193340539932251, "learning_rate": 0.001, "loss": 2.5446, "step": 138500 }, { "epoch": 17.918552036199095, "grad_norm": 1.055174469947815, "learning_rate": 0.001, "loss": 2.5685, "step": 138600 }, { "epoch": 17.93148028442146, "grad_norm": 1.1931167840957642, "learning_rate": 0.001, "loss": 2.5584, "step": 138700 }, { "epoch": 17.944408532643827, "grad_norm": 9.478219985961914, "learning_rate": 0.001, "loss": 2.5573, "step": 138800 }, { "epoch": 17.957336780866193, "grad_norm": 1.3488218784332275, "learning_rate": 0.001, "loss": 2.548, "step": 138900 }, { "epoch": 17.97026502908856, "grad_norm": 2.3975250720977783, "learning_rate": 0.001, "loss": 2.5543, "step": 139000 }, { "epoch": 17.983193277310924, "grad_norm": 1.393118977546692, "learning_rate": 0.001, "loss": 2.5591, "step": 139100 }, { "epoch": 17.99612152553329, "grad_norm": 0.9184149503707886, "learning_rate": 0.001, "loss": 2.5725, "step": 139200 }, { "epoch": 18.009049773755656, "grad_norm": 0.9140307903289795, "learning_rate": 0.001, "loss": 2.4895, "step": 139300 }, { "epoch": 18.021978021978022, "grad_norm": 0.812884509563446, "learning_rate": 0.001, "loss": 2.4605, "step": 139400 }, { "epoch": 18.034906270200388, "grad_norm": 0.992943525314331, "learning_rate": 0.001, "loss": 2.4645, "step": 139500 }, { "epoch": 18.047834518422754, "grad_norm": 0.7694491744041443, "learning_rate": 0.001, "loss": 2.4814, "step": 139600 }, { "epoch": 18.06076276664512, "grad_norm": 0.8844092488288879, "learning_rate": 0.001, "loss": 2.4694, "step": 139700 }, { "epoch": 18.073691014867485, "grad_norm": 0.9538593292236328, "learning_rate": 0.001, "loss": 2.4552, "step": 139800 }, { "epoch": 18.08661926308985, "grad_norm": 0.954797089099884, "learning_rate": 0.001, "loss": 2.4685, "step": 139900 }, { "epoch": 18.099547511312217, "grad_norm": 0.9369713068008423, "learning_rate": 0.001, "loss": 2.4827, "step": 140000 }, { "epoch": 18.112475759534583, "grad_norm": 0.9430900812149048, "learning_rate": 0.001, "loss": 2.4805, "step": 140100 }, { "epoch": 18.12540400775695, "grad_norm": 0.8952952027320862, "learning_rate": 0.001, "loss": 2.4686, "step": 140200 }, { "epoch": 18.138332255979314, "grad_norm": 1.5310841798782349, "learning_rate": 0.001, "loss": 2.4775, "step": 140300 }, { "epoch": 18.15126050420168, "grad_norm": 6.367415904998779, "learning_rate": 0.001, "loss": 2.487, "step": 140400 }, { "epoch": 18.164188752424046, "grad_norm": 0.9867402911186218, "learning_rate": 0.001, "loss": 2.4926, "step": 140500 }, { "epoch": 18.177117000646412, "grad_norm": 1.429605484008789, "learning_rate": 0.001, "loss": 2.4951, "step": 140600 }, { "epoch": 18.190045248868778, "grad_norm": 0.8199079036712646, "learning_rate": 0.001, "loss": 2.516, "step": 140700 }, { "epoch": 18.202973497091143, "grad_norm": 1.4226949214935303, "learning_rate": 0.001, "loss": 2.5019, "step": 140800 }, { "epoch": 18.21590174531351, "grad_norm": 0.9730379581451416, "learning_rate": 0.001, "loss": 2.4832, "step": 140900 }, { "epoch": 18.228829993535875, "grad_norm": 4.4546990394592285, "learning_rate": 0.001, "loss": 2.5003, "step": 141000 }, { "epoch": 18.24175824175824, "grad_norm": 2.391216993331909, "learning_rate": 0.001, "loss": 2.5069, "step": 141100 }, { "epoch": 18.254686489980607, "grad_norm": 0.9249674677848816, "learning_rate": 0.001, "loss": 2.4826, "step": 141200 }, { "epoch": 18.267614738202973, "grad_norm": 1.0824097394943237, "learning_rate": 0.001, "loss": 2.5185, "step": 141300 }, { "epoch": 18.28054298642534, "grad_norm": 0.9414225220680237, "learning_rate": 0.001, "loss": 2.4915, "step": 141400 }, { "epoch": 18.293471234647704, "grad_norm": 7.050719261169434, "learning_rate": 0.001, "loss": 2.5, "step": 141500 }, { "epoch": 18.30639948287007, "grad_norm": 3.468125343322754, "learning_rate": 0.001, "loss": 2.4886, "step": 141600 }, { "epoch": 18.319327731092436, "grad_norm": 0.9512806534767151, "learning_rate": 0.001, "loss": 2.4886, "step": 141700 }, { "epoch": 18.3322559793148, "grad_norm": 0.6943651437759399, "learning_rate": 0.001, "loss": 2.4791, "step": 141800 }, { "epoch": 18.345184227537167, "grad_norm": 2.3228261470794678, "learning_rate": 0.001, "loss": 2.5066, "step": 141900 }, { "epoch": 18.358112475759533, "grad_norm": 0.718782901763916, "learning_rate": 0.001, "loss": 2.4899, "step": 142000 }, { "epoch": 18.3710407239819, "grad_norm": 1.0763026475906372, "learning_rate": 0.001, "loss": 2.4987, "step": 142100 }, { "epoch": 18.383968972204265, "grad_norm": 1.0569713115692139, "learning_rate": 0.001, "loss": 2.5003, "step": 142200 }, { "epoch": 18.39689722042663, "grad_norm": 1.0740362405776978, "learning_rate": 0.001, "loss": 2.4915, "step": 142300 }, { "epoch": 18.409825468648997, "grad_norm": 0.9801196455955505, "learning_rate": 0.001, "loss": 2.5343, "step": 142400 }, { "epoch": 18.422753716871362, "grad_norm": 0.938046395778656, "learning_rate": 0.001, "loss": 2.5234, "step": 142500 }, { "epoch": 18.43568196509373, "grad_norm": 0.8363734483718872, "learning_rate": 0.001, "loss": 2.5096, "step": 142600 }, { "epoch": 18.448610213316094, "grad_norm": 1.345826268196106, "learning_rate": 0.001, "loss": 2.4894, "step": 142700 }, { "epoch": 18.46153846153846, "grad_norm": 1.2522311210632324, "learning_rate": 0.001, "loss": 2.5246, "step": 142800 }, { "epoch": 18.474466709760826, "grad_norm": 0.8304099440574646, "learning_rate": 0.001, "loss": 2.5175, "step": 142900 }, { "epoch": 18.48739495798319, "grad_norm": 0.8310981392860413, "learning_rate": 0.001, "loss": 2.5026, "step": 143000 }, { "epoch": 18.500323206205557, "grad_norm": 0.8464237451553345, "learning_rate": 0.001, "loss": 2.5057, "step": 143100 }, { "epoch": 18.513251454427923, "grad_norm": 0.9131343364715576, "learning_rate": 0.001, "loss": 2.5277, "step": 143200 }, { "epoch": 18.526179702650293, "grad_norm": 0.9889345169067383, "learning_rate": 0.001, "loss": 2.5237, "step": 143300 }, { "epoch": 18.53910795087266, "grad_norm": 1.2459388971328735, "learning_rate": 0.001, "loss": 2.5271, "step": 143400 }, { "epoch": 18.552036199095024, "grad_norm": 2.4386870861053467, "learning_rate": 0.001, "loss": 2.5247, "step": 143500 }, { "epoch": 18.56496444731739, "grad_norm": 1.015897274017334, "learning_rate": 0.001, "loss": 2.5253, "step": 143600 }, { "epoch": 18.577892695539756, "grad_norm": 0.9111728072166443, "learning_rate": 0.001, "loss": 2.5202, "step": 143700 }, { "epoch": 18.59082094376212, "grad_norm": 1.811147928237915, "learning_rate": 0.001, "loss": 2.5248, "step": 143800 }, { "epoch": 18.603749191984488, "grad_norm": 1.0122994184494019, "learning_rate": 0.001, "loss": 2.5224, "step": 143900 }, { "epoch": 18.616677440206853, "grad_norm": 0.8775935769081116, "learning_rate": 0.001, "loss": 2.5305, "step": 144000 }, { "epoch": 18.62960568842922, "grad_norm": 0.8408803343772888, "learning_rate": 0.001, "loss": 2.5296, "step": 144100 }, { "epoch": 18.642533936651585, "grad_norm": 0.8723968267440796, "learning_rate": 0.001, "loss": 2.5359, "step": 144200 }, { "epoch": 18.65546218487395, "grad_norm": 0.7395591139793396, "learning_rate": 0.001, "loss": 2.5268, "step": 144300 }, { "epoch": 18.668390433096317, "grad_norm": 0.9785889387130737, "learning_rate": 0.001, "loss": 2.4941, "step": 144400 }, { "epoch": 18.681318681318682, "grad_norm": 0.9496091604232788, "learning_rate": 0.001, "loss": 2.5302, "step": 144500 }, { "epoch": 18.69424692954105, "grad_norm": 1.0189368724822998, "learning_rate": 0.001, "loss": 2.521, "step": 144600 }, { "epoch": 18.707175177763414, "grad_norm": 0.7732048034667969, "learning_rate": 0.001, "loss": 2.5301, "step": 144700 }, { "epoch": 18.72010342598578, "grad_norm": 0.7573161721229553, "learning_rate": 0.001, "loss": 2.5374, "step": 144800 }, { "epoch": 18.733031674208146, "grad_norm": 0.7049207091331482, "learning_rate": 0.001, "loss": 2.5183, "step": 144900 }, { "epoch": 18.74595992243051, "grad_norm": 0.8106624484062195, "learning_rate": 0.001, "loss": 2.5265, "step": 145000 }, { "epoch": 18.758888170652877, "grad_norm": 0.9084197282791138, "learning_rate": 0.001, "loss": 2.542, "step": 145100 }, { "epoch": 18.771816418875243, "grad_norm": 0.9828372001647949, "learning_rate": 0.001, "loss": 2.5212, "step": 145200 }, { "epoch": 18.78474466709761, "grad_norm": 1.5742497444152832, "learning_rate": 0.001, "loss": 2.5247, "step": 145300 }, { "epoch": 18.797672915319975, "grad_norm": 0.7281416654586792, "learning_rate": 0.001, "loss": 2.5337, "step": 145400 }, { "epoch": 18.81060116354234, "grad_norm": 2.1434366703033447, "learning_rate": 0.001, "loss": 2.55, "step": 145500 }, { "epoch": 18.823529411764707, "grad_norm": 0.7234621047973633, "learning_rate": 0.001, "loss": 2.5351, "step": 145600 }, { "epoch": 18.836457659987072, "grad_norm": 0.7519808411598206, "learning_rate": 0.001, "loss": 2.5594, "step": 145700 }, { "epoch": 18.849385908209438, "grad_norm": 0.8085550665855408, "learning_rate": 0.001, "loss": 2.5216, "step": 145800 }, { "epoch": 18.862314156431804, "grad_norm": 1.065490961074829, "learning_rate": 0.001, "loss": 2.5286, "step": 145900 }, { "epoch": 18.87524240465417, "grad_norm": 0.8183416724205017, "learning_rate": 0.001, "loss": 2.5542, "step": 146000 }, { "epoch": 18.888170652876536, "grad_norm": 0.7724586129188538, "learning_rate": 0.001, "loss": 2.5311, "step": 146100 }, { "epoch": 18.9010989010989, "grad_norm": 0.8260592818260193, "learning_rate": 0.001, "loss": 2.5367, "step": 146200 }, { "epoch": 18.914027149321267, "grad_norm": 0.764021098613739, "learning_rate": 0.001, "loss": 2.5564, "step": 146300 }, { "epoch": 18.926955397543633, "grad_norm": 0.8205910921096802, "learning_rate": 0.001, "loss": 2.532, "step": 146400 }, { "epoch": 18.939883645766, "grad_norm": 1.1922063827514648, "learning_rate": 0.001, "loss": 2.5547, "step": 146500 }, { "epoch": 18.952811893988365, "grad_norm": 0.8549427390098572, "learning_rate": 0.001, "loss": 2.5478, "step": 146600 }, { "epoch": 18.96574014221073, "grad_norm": 1.0182076692581177, "learning_rate": 0.001, "loss": 2.5516, "step": 146700 }, { "epoch": 18.978668390433096, "grad_norm": 0.9566065669059753, "learning_rate": 0.001, "loss": 2.5318, "step": 146800 }, { "epoch": 18.991596638655462, "grad_norm": 0.7024259567260742, "learning_rate": 0.001, "loss": 2.5353, "step": 146900 }, { "epoch": 19.004524886877828, "grad_norm": 0.8477246761322021, "learning_rate": 0.001, "loss": 2.5414, "step": 147000 }, { "epoch": 19.017453135100194, "grad_norm": 0.8237463235855103, "learning_rate": 0.001, "loss": 2.4542, "step": 147100 }, { "epoch": 19.03038138332256, "grad_norm": 1.319801688194275, "learning_rate": 0.001, "loss": 2.4549, "step": 147200 }, { "epoch": 19.043309631544926, "grad_norm": 0.8251582384109497, "learning_rate": 0.001, "loss": 2.4661, "step": 147300 }, { "epoch": 19.05623787976729, "grad_norm": 0.9114189147949219, "learning_rate": 0.001, "loss": 2.4623, "step": 147400 }, { "epoch": 19.069166127989657, "grad_norm": 2.4250757694244385, "learning_rate": 0.001, "loss": 2.4832, "step": 147500 }, { "epoch": 19.082094376212023, "grad_norm": 2.622300624847412, "learning_rate": 0.001, "loss": 2.4602, "step": 147600 }, { "epoch": 19.09502262443439, "grad_norm": 0.9096581339836121, "learning_rate": 0.001, "loss": 2.4677, "step": 147700 }, { "epoch": 19.107950872656755, "grad_norm": 0.8613170981407166, "learning_rate": 0.001, "loss": 2.4665, "step": 147800 }, { "epoch": 19.12087912087912, "grad_norm": 1.4643497467041016, "learning_rate": 0.001, "loss": 2.4684, "step": 147900 }, { "epoch": 19.133807369101486, "grad_norm": 0.8535947799682617, "learning_rate": 0.001, "loss": 2.4591, "step": 148000 }, { "epoch": 19.146735617323852, "grad_norm": 1.1182326078414917, "learning_rate": 0.001, "loss": 2.4561, "step": 148100 }, { "epoch": 19.159663865546218, "grad_norm": 0.9079563021659851, "learning_rate": 0.001, "loss": 2.4685, "step": 148200 }, { "epoch": 19.172592113768584, "grad_norm": 0.840794026851654, "learning_rate": 0.001, "loss": 2.4534, "step": 148300 }, { "epoch": 19.18552036199095, "grad_norm": 0.9052335619926453, "learning_rate": 0.001, "loss": 2.4848, "step": 148400 }, { "epoch": 19.198448610213315, "grad_norm": 0.9210603833198547, "learning_rate": 0.001, "loss": 2.4645, "step": 148500 }, { "epoch": 19.21137685843568, "grad_norm": 1.8360157012939453, "learning_rate": 0.001, "loss": 2.4719, "step": 148600 }, { "epoch": 19.224305106658047, "grad_norm": 0.8917382955551147, "learning_rate": 0.001, "loss": 2.4517, "step": 148700 }, { "epoch": 19.237233354880413, "grad_norm": 1.0352623462677002, "learning_rate": 0.001, "loss": 2.481, "step": 148800 }, { "epoch": 19.25016160310278, "grad_norm": 1.2179285287857056, "learning_rate": 0.001, "loss": 2.472, "step": 148900 }, { "epoch": 19.263089851325145, "grad_norm": 0.912381112575531, "learning_rate": 0.001, "loss": 2.4667, "step": 149000 }, { "epoch": 19.27601809954751, "grad_norm": 1.100796103477478, "learning_rate": 0.001, "loss": 2.4623, "step": 149100 }, { "epoch": 19.288946347769876, "grad_norm": 0.9066677689552307, "learning_rate": 0.001, "loss": 2.457, "step": 149200 }, { "epoch": 19.301874595992242, "grad_norm": 4.733089923858643, "learning_rate": 0.001, "loss": 2.4616, "step": 149300 }, { "epoch": 19.314802844214608, "grad_norm": 8.665818214416504, "learning_rate": 0.001, "loss": 2.4624, "step": 149400 }, { "epoch": 19.327731092436974, "grad_norm": 0.8140535354614258, "learning_rate": 0.001, "loss": 2.4668, "step": 149500 }, { "epoch": 19.34065934065934, "grad_norm": 0.9089771509170532, "learning_rate": 0.001, "loss": 2.5129, "step": 149600 }, { "epoch": 19.353587588881705, "grad_norm": 0.9218637347221375, "learning_rate": 0.001, "loss": 2.491, "step": 149700 }, { "epoch": 19.36651583710407, "grad_norm": 1.9827762842178345, "learning_rate": 0.001, "loss": 2.4899, "step": 149800 }, { "epoch": 19.379444085326437, "grad_norm": 0.8305982947349548, "learning_rate": 0.001, "loss": 2.4905, "step": 149900 }, { "epoch": 19.392372333548803, "grad_norm": 21.252788543701172, "learning_rate": 0.001, "loss": 2.464, "step": 150000 }, { "epoch": 19.40530058177117, "grad_norm": 2.008701801300049, "learning_rate": 0.001, "loss": 2.4786, "step": 150100 }, { "epoch": 19.418228829993534, "grad_norm": 1.0854326486587524, "learning_rate": 0.001, "loss": 2.4807, "step": 150200 }, { "epoch": 19.4311570782159, "grad_norm": 0.8205916285514832, "learning_rate": 0.001, "loss": 2.4915, "step": 150300 }, { "epoch": 19.444085326438266, "grad_norm": 0.9572974443435669, "learning_rate": 0.001, "loss": 2.4871, "step": 150400 }, { "epoch": 19.457013574660632, "grad_norm": 3.523578643798828, "learning_rate": 0.001, "loss": 2.4595, "step": 150500 }, { "epoch": 19.469941822882998, "grad_norm": 0.9886376857757568, "learning_rate": 0.001, "loss": 2.491, "step": 150600 }, { "epoch": 19.482870071105364, "grad_norm": 0.9619378447532654, "learning_rate": 0.001, "loss": 2.4959, "step": 150700 }, { "epoch": 19.495798319327733, "grad_norm": 1.6548235416412354, "learning_rate": 0.001, "loss": 2.4851, "step": 150800 }, { "epoch": 19.5087265675501, "grad_norm": 1.3420881032943726, "learning_rate": 0.001, "loss": 2.4908, "step": 150900 }, { "epoch": 19.521654815772465, "grad_norm": 0.9296849370002747, "learning_rate": 0.001, "loss": 2.4945, "step": 151000 }, { "epoch": 19.53458306399483, "grad_norm": 1.3596820831298828, "learning_rate": 0.001, "loss": 2.503, "step": 151100 }, { "epoch": 19.547511312217196, "grad_norm": 1.4776437282562256, "learning_rate": 0.001, "loss": 2.4825, "step": 151200 }, { "epoch": 19.560439560439562, "grad_norm": 1.3711928129196167, "learning_rate": 0.001, "loss": 2.4893, "step": 151300 }, { "epoch": 19.573367808661928, "grad_norm": 0.782110333442688, "learning_rate": 0.001, "loss": 2.4955, "step": 151400 }, { "epoch": 19.586296056884294, "grad_norm": 0.8537931442260742, "learning_rate": 0.001, "loss": 2.4969, "step": 151500 }, { "epoch": 19.59922430510666, "grad_norm": 1.0540826320648193, "learning_rate": 0.001, "loss": 2.5032, "step": 151600 }, { "epoch": 19.612152553329025, "grad_norm": 1.0656594038009644, "learning_rate": 0.001, "loss": 2.4899, "step": 151700 }, { "epoch": 19.62508080155139, "grad_norm": 1.1910480260849, "learning_rate": 0.001, "loss": 2.4853, "step": 151800 }, { "epoch": 19.638009049773757, "grad_norm": 0.7360547780990601, "learning_rate": 0.001, "loss": 2.5033, "step": 151900 }, { "epoch": 19.650937297996123, "grad_norm": 0.8938814997673035, "learning_rate": 0.001, "loss": 2.5018, "step": 152000 }, { "epoch": 19.66386554621849, "grad_norm": 0.8584172129631042, "learning_rate": 0.001, "loss": 2.5054, "step": 152100 }, { "epoch": 19.676793794440854, "grad_norm": 0.9527407288551331, "learning_rate": 0.001, "loss": 2.4992, "step": 152200 }, { "epoch": 19.68972204266322, "grad_norm": 0.9841679334640503, "learning_rate": 0.001, "loss": 2.5013, "step": 152300 }, { "epoch": 19.702650290885586, "grad_norm": 0.7896699905395508, "learning_rate": 0.001, "loss": 2.5046, "step": 152400 }, { "epoch": 19.715578539107952, "grad_norm": 1.1070281267166138, "learning_rate": 0.001, "loss": 2.4954, "step": 152500 }, { "epoch": 19.728506787330318, "grad_norm": 1.1642028093338013, "learning_rate": 0.001, "loss": 2.4832, "step": 152600 }, { "epoch": 19.741435035552684, "grad_norm": 0.9196297526359558, "learning_rate": 0.001, "loss": 2.5211, "step": 152700 }, { "epoch": 19.75436328377505, "grad_norm": 6.72641658782959, "learning_rate": 0.001, "loss": 2.512, "step": 152800 }, { "epoch": 19.767291531997415, "grad_norm": 4.4482245445251465, "learning_rate": 0.001, "loss": 2.4942, "step": 152900 }, { "epoch": 19.78021978021978, "grad_norm": 0.8630152344703674, "learning_rate": 0.001, "loss": 2.506, "step": 153000 }, { "epoch": 19.793148028442147, "grad_norm": 0.8290995359420776, "learning_rate": 0.001, "loss": 2.4842, "step": 153100 }, { "epoch": 19.806076276664513, "grad_norm": 1.4131746292114258, "learning_rate": 0.001, "loss": 2.4998, "step": 153200 }, { "epoch": 19.81900452488688, "grad_norm": 0.977256178855896, "learning_rate": 0.001, "loss": 2.5111, "step": 153300 }, { "epoch": 19.831932773109244, "grad_norm": 0.9321542978286743, "learning_rate": 0.001, "loss": 2.5101, "step": 153400 }, { "epoch": 19.84486102133161, "grad_norm": 2.0945851802825928, "learning_rate": 0.001, "loss": 2.4997, "step": 153500 }, { "epoch": 19.857789269553976, "grad_norm": 0.8246294856071472, "learning_rate": 0.001, "loss": 2.4998, "step": 153600 }, { "epoch": 19.87071751777634, "grad_norm": 1.1864343881607056, "learning_rate": 0.001, "loss": 2.518, "step": 153700 }, { "epoch": 19.883645765998708, "grad_norm": 0.8329616785049438, "learning_rate": 0.001, "loss": 2.5239, "step": 153800 }, { "epoch": 19.896574014221073, "grad_norm": 0.7787247896194458, "learning_rate": 0.001, "loss": 2.5217, "step": 153900 }, { "epoch": 19.90950226244344, "grad_norm": 0.9635729193687439, "learning_rate": 0.001, "loss": 2.5376, "step": 154000 }, { "epoch": 19.922430510665805, "grad_norm": 0.6824477910995483, "learning_rate": 0.001, "loss": 2.5284, "step": 154100 }, { "epoch": 19.93535875888817, "grad_norm": 1.003235936164856, "learning_rate": 0.001, "loss": 2.5313, "step": 154200 }, { "epoch": 19.948287007110537, "grad_norm": 0.7087455987930298, "learning_rate": 0.001, "loss": 2.5273, "step": 154300 }, { "epoch": 19.961215255332903, "grad_norm": 2.2609457969665527, "learning_rate": 0.001, "loss": 2.5216, "step": 154400 }, { "epoch": 19.97414350355527, "grad_norm": 0.7219851613044739, "learning_rate": 0.001, "loss": 2.5291, "step": 154500 }, { "epoch": 19.987071751777634, "grad_norm": 0.9308517575263977, "learning_rate": 0.001, "loss": 2.5201, "step": 154600 }, { "epoch": 20.0, "grad_norm": 27.87584114074707, "learning_rate": 0.001, "loss": 2.4895, "step": 154700 }, { "epoch": 20.012928248222366, "grad_norm": 1.5753073692321777, "learning_rate": 0.001, "loss": 2.4415, "step": 154800 }, { "epoch": 20.02585649644473, "grad_norm": 1.743921160697937, "learning_rate": 0.001, "loss": 2.4592, "step": 154900 }, { "epoch": 20.038784744667097, "grad_norm": 1.7031422853469849, "learning_rate": 0.001, "loss": 2.453, "step": 155000 }, { "epoch": 20.051712992889463, "grad_norm": 2.121762752532959, "learning_rate": 0.001, "loss": 2.434, "step": 155100 }, { "epoch": 20.06464124111183, "grad_norm": 1.549182653427124, "learning_rate": 0.001, "loss": 2.4237, "step": 155200 }, { "epoch": 20.077569489334195, "grad_norm": 1.6513586044311523, "learning_rate": 0.001, "loss": 2.44, "step": 155300 }, { "epoch": 20.09049773755656, "grad_norm": 3.3315701484680176, "learning_rate": 0.001, "loss": 2.4651, "step": 155400 }, { "epoch": 20.103425985778927, "grad_norm": 1.3396602869033813, "learning_rate": 0.001, "loss": 2.4458, "step": 155500 }, { "epoch": 20.116354234001292, "grad_norm": 3.534902811050415, "learning_rate": 0.001, "loss": 2.4449, "step": 155600 }, { "epoch": 20.12928248222366, "grad_norm": 35.53631591796875, "learning_rate": 0.001, "loss": 2.434, "step": 155700 }, { "epoch": 20.142210730446024, "grad_norm": 1.8637299537658691, "learning_rate": 0.001, "loss": 2.468, "step": 155800 }, { "epoch": 20.15513897866839, "grad_norm": 1.582653522491455, "learning_rate": 0.001, "loss": 2.4418, "step": 155900 }, { "epoch": 20.168067226890756, "grad_norm": 2.058037757873535, "learning_rate": 0.001, "loss": 2.4562, "step": 156000 }, { "epoch": 20.18099547511312, "grad_norm": 1.6643801927566528, "learning_rate": 0.001, "loss": 2.45, "step": 156100 }, { "epoch": 20.193923723335487, "grad_norm": 9.37109088897705, "learning_rate": 0.001, "loss": 2.4766, "step": 156200 }, { "epoch": 20.206851971557853, "grad_norm": 1.2501329183578491, "learning_rate": 0.001, "loss": 2.468, "step": 156300 }, { "epoch": 20.21978021978022, "grad_norm": 1.6158936023712158, "learning_rate": 0.001, "loss": 2.4761, "step": 156400 }, { "epoch": 20.232708468002585, "grad_norm": 1.4429748058319092, "learning_rate": 0.001, "loss": 2.4592, "step": 156500 }, { "epoch": 20.24563671622495, "grad_norm": 5.3553032875061035, "learning_rate": 0.001, "loss": 2.4667, "step": 156600 }, { "epoch": 20.258564964447316, "grad_norm": 2.1253111362457275, "learning_rate": 0.001, "loss": 2.4713, "step": 156700 }, { "epoch": 20.271493212669682, "grad_norm": 2.1394803524017334, "learning_rate": 0.001, "loss": 2.4645, "step": 156800 }, { "epoch": 20.284421460892048, "grad_norm": 1.6188217401504517, "learning_rate": 0.001, "loss": 2.4661, "step": 156900 }, { "epoch": 20.297349709114414, "grad_norm": 1.5464756488800049, "learning_rate": 0.001, "loss": 2.469, "step": 157000 }, { "epoch": 20.31027795733678, "grad_norm": 1.3389337062835693, "learning_rate": 0.001, "loss": 2.4775, "step": 157100 }, { "epoch": 20.323206205559146, "grad_norm": 1.631119966506958, "learning_rate": 0.001, "loss": 2.4663, "step": 157200 }, { "epoch": 20.33613445378151, "grad_norm": 1.4329222440719604, "learning_rate": 0.001, "loss": 2.48, "step": 157300 }, { "epoch": 20.349062702003877, "grad_norm": 1.8325433731079102, "learning_rate": 0.001, "loss": 2.501, "step": 157400 }, { "epoch": 20.361990950226243, "grad_norm": 1.2550899982452393, "learning_rate": 0.001, "loss": 2.4807, "step": 157500 }, { "epoch": 20.37491919844861, "grad_norm": 2.133481740951538, "learning_rate": 0.001, "loss": 2.4753, "step": 157600 }, { "epoch": 20.387847446670975, "grad_norm": 2.1133224964141846, "learning_rate": 0.001, "loss": 2.4939, "step": 157700 }, { "epoch": 20.40077569489334, "grad_norm": 2.7849323749542236, "learning_rate": 0.001, "loss": 2.487, "step": 157800 }, { "epoch": 20.413703943115706, "grad_norm": 1.5993068218231201, "learning_rate": 0.001, "loss": 2.4758, "step": 157900 }, { "epoch": 20.426632191338072, "grad_norm": 1.8264578580856323, "learning_rate": 0.001, "loss": 2.488, "step": 158000 }, { "epoch": 20.439560439560438, "grad_norm": 1.8162280321121216, "learning_rate": 0.001, "loss": 2.4783, "step": 158100 }, { "epoch": 20.452488687782804, "grad_norm": 2.030921220779419, "learning_rate": 0.001, "loss": 2.4643, "step": 158200 }, { "epoch": 20.46541693600517, "grad_norm": 1.716097116470337, "learning_rate": 0.001, "loss": 2.4966, "step": 158300 }, { "epoch": 20.478345184227535, "grad_norm": 1.6265625953674316, "learning_rate": 0.001, "loss": 2.4814, "step": 158400 }, { "epoch": 20.4912734324499, "grad_norm": 3.4179189205169678, "learning_rate": 0.001, "loss": 2.4772, "step": 158500 }, { "epoch": 20.504201680672267, "grad_norm": 1.522342562675476, "learning_rate": 0.001, "loss": 2.4929, "step": 158600 }, { "epoch": 20.517129928894636, "grad_norm": 2.5326809883117676, "learning_rate": 0.001, "loss": 2.4741, "step": 158700 }, { "epoch": 20.530058177117002, "grad_norm": 1.7521649599075317, "learning_rate": 0.001, "loss": 2.4942, "step": 158800 }, { "epoch": 20.542986425339368, "grad_norm": 3.707758903503418, "learning_rate": 0.001, "loss": 2.4931, "step": 158900 }, { "epoch": 20.555914673561734, "grad_norm": 1.634279727935791, "learning_rate": 0.001, "loss": 2.5067, "step": 159000 }, { "epoch": 20.5688429217841, "grad_norm": 2.3126118183135986, "learning_rate": 0.001, "loss": 2.4918, "step": 159100 }, { "epoch": 20.581771170006466, "grad_norm": 1.6536849737167358, "learning_rate": 0.001, "loss": 2.5122, "step": 159200 }, { "epoch": 20.59469941822883, "grad_norm": 1.3432544469833374, "learning_rate": 0.001, "loss": 2.4869, "step": 159300 }, { "epoch": 20.607627666451197, "grad_norm": 6.382686614990234, "learning_rate": 0.001, "loss": 2.5046, "step": 159400 }, { "epoch": 20.620555914673563, "grad_norm": 6.966188430786133, "learning_rate": 0.001, "loss": 2.4895, "step": 159500 }, { "epoch": 20.63348416289593, "grad_norm": 2.008010149002075, "learning_rate": 0.001, "loss": 2.5107, "step": 159600 }, { "epoch": 20.646412411118295, "grad_norm": 1.792756199836731, "learning_rate": 0.001, "loss": 2.4941, "step": 159700 }, { "epoch": 20.65934065934066, "grad_norm": 1.4433969259262085, "learning_rate": 0.001, "loss": 2.4896, "step": 159800 }, { "epoch": 20.672268907563026, "grad_norm": 2.4359402656555176, "learning_rate": 0.001, "loss": 2.4855, "step": 159900 }, { "epoch": 20.685197155785392, "grad_norm": 1.3594673871994019, "learning_rate": 0.001, "loss": 2.5016, "step": 160000 }, { "epoch": 20.698125404007758, "grad_norm": 1.405410885810852, "learning_rate": 0.001, "loss": 2.5199, "step": 160100 }, { "epoch": 20.711053652230124, "grad_norm": 3.022890090942383, "learning_rate": 0.001, "loss": 2.509, "step": 160200 }, { "epoch": 20.72398190045249, "grad_norm": 1.4392484426498413, "learning_rate": 0.001, "loss": 2.5042, "step": 160300 }, { "epoch": 20.736910148674855, "grad_norm": 1.1434990167617798, "learning_rate": 0.001, "loss": 2.49, "step": 160400 }, { "epoch": 20.74983839689722, "grad_norm": 2.205655097961426, "learning_rate": 0.001, "loss": 2.5155, "step": 160500 }, { "epoch": 20.762766645119587, "grad_norm": 2.1731863021850586, "learning_rate": 0.001, "loss": 2.4914, "step": 160600 }, { "epoch": 20.775694893341953, "grad_norm": 1.9055991172790527, "learning_rate": 0.001, "loss": 2.5005, "step": 160700 }, { "epoch": 20.78862314156432, "grad_norm": 2.3584797382354736, "learning_rate": 0.001, "loss": 2.5012, "step": 160800 }, { "epoch": 20.801551389786685, "grad_norm": 1.3340818881988525, "learning_rate": 0.001, "loss": 2.4977, "step": 160900 }, { "epoch": 20.81447963800905, "grad_norm": 2.088304281234741, "learning_rate": 0.001, "loss": 2.4991, "step": 161000 }, { "epoch": 20.827407886231416, "grad_norm": 3.0177249908447266, "learning_rate": 0.001, "loss": 2.4978, "step": 161100 }, { "epoch": 20.840336134453782, "grad_norm": 1.4891356229782104, "learning_rate": 0.001, "loss": 2.5028, "step": 161200 }, { "epoch": 20.853264382676148, "grad_norm": 1.877690315246582, "learning_rate": 0.001, "loss": 2.4992, "step": 161300 }, { "epoch": 20.866192630898514, "grad_norm": 1.42477548122406, "learning_rate": 0.001, "loss": 2.5149, "step": 161400 }, { "epoch": 20.87912087912088, "grad_norm": 1.569355845451355, "learning_rate": 0.001, "loss": 2.5312, "step": 161500 }, { "epoch": 20.892049127343245, "grad_norm": 1.9470707178115845, "learning_rate": 0.001, "loss": 2.5083, "step": 161600 }, { "epoch": 20.90497737556561, "grad_norm": 1.4686484336853027, "learning_rate": 0.001, "loss": 2.5151, "step": 161700 }, { "epoch": 20.917905623787977, "grad_norm": 1.6777766942977905, "learning_rate": 0.001, "loss": 2.5243, "step": 161800 }, { "epoch": 20.930833872010343, "grad_norm": 6.4991326332092285, "learning_rate": 0.001, "loss": 2.512, "step": 161900 }, { "epoch": 20.94376212023271, "grad_norm": 1.3031553030014038, "learning_rate": 0.001, "loss": 2.5219, "step": 162000 }, { "epoch": 20.956690368455074, "grad_norm": 1.589040756225586, "learning_rate": 0.001, "loss": 2.5215, "step": 162100 }, { "epoch": 20.96961861667744, "grad_norm": 1.6321709156036377, "learning_rate": 0.001, "loss": 2.5172, "step": 162200 }, { "epoch": 20.982546864899806, "grad_norm": 2.074775218963623, "learning_rate": 0.001, "loss": 2.5197, "step": 162300 }, { "epoch": 20.995475113122172, "grad_norm": 1.4616668224334717, "learning_rate": 0.001, "loss": 2.5111, "step": 162400 }, { "epoch": 21.008403361344538, "grad_norm": 0.9369063973426819, "learning_rate": 0.001, "loss": 2.4568, "step": 162500 }, { "epoch": 21.021331609566904, "grad_norm": 0.8724343180656433, "learning_rate": 0.001, "loss": 2.4314, "step": 162600 }, { "epoch": 21.03425985778927, "grad_norm": 0.9512951970100403, "learning_rate": 0.001, "loss": 2.4339, "step": 162700 }, { "epoch": 21.047188106011635, "grad_norm": 0.7325547933578491, "learning_rate": 0.001, "loss": 2.418, "step": 162800 }, { "epoch": 21.060116354234, "grad_norm": 2.475933313369751, "learning_rate": 0.001, "loss": 2.4399, "step": 162900 }, { "epoch": 21.073044602456367, "grad_norm": 2.2424378395080566, "learning_rate": 0.001, "loss": 2.407, "step": 163000 }, { "epoch": 21.085972850678733, "grad_norm": 0.8441184163093567, "learning_rate": 0.001, "loss": 2.4533, "step": 163100 }, { "epoch": 21.0989010989011, "grad_norm": 0.943762481212616, "learning_rate": 0.001, "loss": 2.4366, "step": 163200 }, { "epoch": 21.111829347123464, "grad_norm": 0.8481854200363159, "learning_rate": 0.001, "loss": 2.4204, "step": 163300 }, { "epoch": 21.12475759534583, "grad_norm": 0.8961603045463562, "learning_rate": 0.001, "loss": 2.4322, "step": 163400 }, { "epoch": 21.137685843568196, "grad_norm": 0.7513332962989807, "learning_rate": 0.001, "loss": 2.4303, "step": 163500 }, { "epoch": 21.150614091790562, "grad_norm": 0.9808944463729858, "learning_rate": 0.001, "loss": 2.4414, "step": 163600 }, { "epoch": 21.163542340012928, "grad_norm": 0.7665818929672241, "learning_rate": 0.001, "loss": 2.4424, "step": 163700 }, { "epoch": 21.176470588235293, "grad_norm": 0.7993526458740234, "learning_rate": 0.001, "loss": 2.4364, "step": 163800 }, { "epoch": 21.18939883645766, "grad_norm": 0.9973015785217285, "learning_rate": 0.001, "loss": 2.4584, "step": 163900 }, { "epoch": 21.202327084680025, "grad_norm": 1.194126844406128, "learning_rate": 0.001, "loss": 2.4452, "step": 164000 }, { "epoch": 21.21525533290239, "grad_norm": 0.9314330816268921, "learning_rate": 0.001, "loss": 2.4509, "step": 164100 }, { "epoch": 21.228183581124757, "grad_norm": 1.1576292514801025, "learning_rate": 0.001, "loss": 2.4312, "step": 164200 }, { "epoch": 21.241111829347123, "grad_norm": 0.7341636419296265, "learning_rate": 0.001, "loss": 2.467, "step": 164300 }, { "epoch": 21.25404007756949, "grad_norm": 1.6115810871124268, "learning_rate": 0.001, "loss": 2.4378, "step": 164400 }, { "epoch": 21.266968325791854, "grad_norm": 0.8908278942108154, "learning_rate": 0.001, "loss": 2.4436, "step": 164500 }, { "epoch": 21.27989657401422, "grad_norm": 0.9946580529212952, "learning_rate": 0.001, "loss": 2.4356, "step": 164600 }, { "epoch": 21.292824822236586, "grad_norm": 0.8667323589324951, "learning_rate": 0.001, "loss": 2.4567, "step": 164700 }, { "epoch": 21.30575307045895, "grad_norm": 0.9648801684379578, "learning_rate": 0.001, "loss": 2.4492, "step": 164800 }, { "epoch": 21.318681318681318, "grad_norm": 1.3914839029312134, "learning_rate": 0.001, "loss": 2.4529, "step": 164900 }, { "epoch": 21.331609566903683, "grad_norm": 2.7901952266693115, "learning_rate": 0.001, "loss": 2.4651, "step": 165000 }, { "epoch": 21.34453781512605, "grad_norm": 0.7852101922035217, "learning_rate": 0.001, "loss": 2.4729, "step": 165100 }, { "epoch": 21.357466063348415, "grad_norm": 0.9213072061538696, "learning_rate": 0.001, "loss": 2.4742, "step": 165200 }, { "epoch": 21.37039431157078, "grad_norm": 0.7989716529846191, "learning_rate": 0.001, "loss": 2.4442, "step": 165300 }, { "epoch": 21.383322559793147, "grad_norm": 1.027638554573059, "learning_rate": 0.001, "loss": 2.4533, "step": 165400 }, { "epoch": 21.396250808015512, "grad_norm": 0.9559603929519653, "learning_rate": 0.001, "loss": 2.4667, "step": 165500 }, { "epoch": 21.40917905623788, "grad_norm": 1.0063868761062622, "learning_rate": 0.001, "loss": 2.4669, "step": 165600 }, { "epoch": 21.422107304460244, "grad_norm": 0.8287100195884705, "learning_rate": 0.001, "loss": 2.4698, "step": 165700 }, { "epoch": 21.43503555268261, "grad_norm": 1.0100241899490356, "learning_rate": 0.001, "loss": 2.4811, "step": 165800 }, { "epoch": 21.447963800904976, "grad_norm": 0.7497538924217224, "learning_rate": 0.001, "loss": 2.4709, "step": 165900 }, { "epoch": 21.46089204912734, "grad_norm": 0.9580041170120239, "learning_rate": 0.001, "loss": 2.4703, "step": 166000 }, { "epoch": 21.473820297349707, "grad_norm": 0.8735139966011047, "learning_rate": 0.001, "loss": 2.4841, "step": 166100 }, { "epoch": 21.486748545572073, "grad_norm": 0.7676759958267212, "learning_rate": 0.001, "loss": 2.4574, "step": 166200 }, { "epoch": 21.499676793794443, "grad_norm": 0.9760861992835999, "learning_rate": 0.001, "loss": 2.4811, "step": 166300 }, { "epoch": 21.51260504201681, "grad_norm": 1.1480234861373901, "learning_rate": 0.001, "loss": 2.4601, "step": 166400 }, { "epoch": 21.525533290239174, "grad_norm": 0.8407095074653625, "learning_rate": 0.001, "loss": 2.4748, "step": 166500 }, { "epoch": 21.53846153846154, "grad_norm": 0.7404454350471497, "learning_rate": 0.001, "loss": 2.4734, "step": 166600 }, { "epoch": 21.551389786683906, "grad_norm": 0.8595699667930603, "learning_rate": 0.001, "loss": 2.4728, "step": 166700 }, { "epoch": 21.56431803490627, "grad_norm": 0.781103789806366, "learning_rate": 0.001, "loss": 2.4755, "step": 166800 }, { "epoch": 21.577246283128638, "grad_norm": 1.0037267208099365, "learning_rate": 0.001, "loss": 2.4793, "step": 166900 }, { "epoch": 21.590174531351003, "grad_norm": 0.933387041091919, "learning_rate": 0.001, "loss": 2.4858, "step": 167000 }, { "epoch": 21.60310277957337, "grad_norm": 1.1259608268737793, "learning_rate": 0.001, "loss": 2.496, "step": 167100 }, { "epoch": 21.616031027795735, "grad_norm": 0.9490242600440979, "learning_rate": 0.001, "loss": 2.4999, "step": 167200 }, { "epoch": 21.6289592760181, "grad_norm": 1.600340723991394, "learning_rate": 0.001, "loss": 2.4957, "step": 167300 }, { "epoch": 21.641887524240467, "grad_norm": 0.9250016808509827, "learning_rate": 0.001, "loss": 2.4821, "step": 167400 }, { "epoch": 21.654815772462833, "grad_norm": 0.8814137578010559, "learning_rate": 0.001, "loss": 2.4827, "step": 167500 }, { "epoch": 21.6677440206852, "grad_norm": 0.9850792288780212, "learning_rate": 0.001, "loss": 2.4822, "step": 167600 }, { "epoch": 21.680672268907564, "grad_norm": 0.8157269954681396, "learning_rate": 0.001, "loss": 2.5009, "step": 167700 }, { "epoch": 21.69360051712993, "grad_norm": 0.8588546514511108, "learning_rate": 0.001, "loss": 2.4726, "step": 167800 }, { "epoch": 21.706528765352296, "grad_norm": 0.9034969806671143, "learning_rate": 0.001, "loss": 2.4814, "step": 167900 }, { "epoch": 21.71945701357466, "grad_norm": 1.0984010696411133, "learning_rate": 0.001, "loss": 2.4819, "step": 168000 }, { "epoch": 21.732385261797027, "grad_norm": 0.9740549325942993, "learning_rate": 0.001, "loss": 2.4907, "step": 168100 }, { "epoch": 21.745313510019393, "grad_norm": 0.8739873766899109, "learning_rate": 0.001, "loss": 2.5004, "step": 168200 }, { "epoch": 21.75824175824176, "grad_norm": 2.9433345794677734, "learning_rate": 0.001, "loss": 2.4729, "step": 168300 }, { "epoch": 21.771170006464125, "grad_norm": 0.9163528084754944, "learning_rate": 0.001, "loss": 2.4874, "step": 168400 }, { "epoch": 21.78409825468649, "grad_norm": 1.26796555519104, "learning_rate": 0.001, "loss": 2.4975, "step": 168500 }, { "epoch": 21.797026502908857, "grad_norm": 5.234375953674316, "learning_rate": 0.001, "loss": 2.4963, "step": 168600 }, { "epoch": 21.809954751131222, "grad_norm": 0.6687754988670349, "learning_rate": 0.001, "loss": 2.5032, "step": 168700 }, { "epoch": 21.822882999353588, "grad_norm": 0.9019963145256042, "learning_rate": 0.001, "loss": 2.48, "step": 168800 }, { "epoch": 21.835811247575954, "grad_norm": 0.9769672155380249, "learning_rate": 0.001, "loss": 2.471, "step": 168900 }, { "epoch": 21.84873949579832, "grad_norm": 0.8045530319213867, "learning_rate": 0.001, "loss": 2.5044, "step": 169000 }, { "epoch": 21.861667744020686, "grad_norm": 1.1124022006988525, "learning_rate": 0.001, "loss": 2.4862, "step": 169100 }, { "epoch": 21.87459599224305, "grad_norm": 0.751042366027832, "learning_rate": 0.001, "loss": 2.4942, "step": 169200 }, { "epoch": 21.887524240465417, "grad_norm": 0.8436326384544373, "learning_rate": 0.001, "loss": 2.507, "step": 169300 }, { "epoch": 21.900452488687783, "grad_norm": 10.37498664855957, "learning_rate": 0.001, "loss": 2.5136, "step": 169400 }, { "epoch": 21.91338073691015, "grad_norm": 0.8866704702377319, "learning_rate": 0.001, "loss": 2.4908, "step": 169500 }, { "epoch": 21.926308985132515, "grad_norm": 0.9314737319946289, "learning_rate": 0.001, "loss": 2.5031, "step": 169600 }, { "epoch": 21.93923723335488, "grad_norm": 0.9863609075546265, "learning_rate": 0.001, "loss": 2.5171, "step": 169700 }, { "epoch": 21.952165481577246, "grad_norm": 0.8950083255767822, "learning_rate": 0.001, "loss": 2.4962, "step": 169800 }, { "epoch": 21.965093729799612, "grad_norm": 0.8532005548477173, "learning_rate": 0.001, "loss": 2.5077, "step": 169900 }, { "epoch": 21.978021978021978, "grad_norm": 1.3771615028381348, "learning_rate": 0.001, "loss": 2.4928, "step": 170000 }, { "epoch": 21.990950226244344, "grad_norm": 0.9057130813598633, "learning_rate": 0.001, "loss": 2.4877, "step": 170100 }, { "epoch": 22.00387847446671, "grad_norm": 0.9760139584541321, "learning_rate": 0.001, "loss": 2.497, "step": 170200 }, { "epoch": 22.016806722689076, "grad_norm": 2.7030029296875, "learning_rate": 0.001, "loss": 2.4162, "step": 170300 }, { "epoch": 22.02973497091144, "grad_norm": 1.070343017578125, "learning_rate": 0.001, "loss": 2.4066, "step": 170400 }, { "epoch": 22.042663219133807, "grad_norm": 0.8392583727836609, "learning_rate": 0.001, "loss": 2.4182, "step": 170500 }, { "epoch": 22.055591467356173, "grad_norm": 0.8850677013397217, "learning_rate": 0.001, "loss": 2.4056, "step": 170600 }, { "epoch": 22.06851971557854, "grad_norm": 0.9662542343139648, "learning_rate": 0.001, "loss": 2.3961, "step": 170700 }, { "epoch": 22.081447963800905, "grad_norm": 0.9150701761245728, "learning_rate": 0.001, "loss": 2.4176, "step": 170800 }, { "epoch": 22.09437621202327, "grad_norm": 1.3634603023529053, "learning_rate": 0.001, "loss": 2.4178, "step": 170900 }, { "epoch": 22.107304460245636, "grad_norm": 0.8842512369155884, "learning_rate": 0.001, "loss": 2.4394, "step": 171000 }, { "epoch": 22.120232708468002, "grad_norm": 0.684041440486908, "learning_rate": 0.001, "loss": 2.4432, "step": 171100 }, { "epoch": 22.133160956690368, "grad_norm": 0.9396585822105408, "learning_rate": 0.001, "loss": 2.4197, "step": 171200 }, { "epoch": 22.146089204912734, "grad_norm": 0.8042652010917664, "learning_rate": 0.001, "loss": 2.4078, "step": 171300 }, { "epoch": 22.1590174531351, "grad_norm": 0.8542316555976868, "learning_rate": 0.001, "loss": 2.43, "step": 171400 }, { "epoch": 22.171945701357465, "grad_norm": 2.404662847518921, "learning_rate": 0.001, "loss": 2.4346, "step": 171500 }, { "epoch": 22.18487394957983, "grad_norm": 1.0905617475509644, "learning_rate": 0.001, "loss": 2.448, "step": 171600 }, { "epoch": 22.197802197802197, "grad_norm": 0.7254491448402405, "learning_rate": 0.001, "loss": 2.4201, "step": 171700 }, { "epoch": 22.210730446024563, "grad_norm": 1.023796796798706, "learning_rate": 0.001, "loss": 2.422, "step": 171800 }, { "epoch": 22.22365869424693, "grad_norm": 1.4993089437484741, "learning_rate": 0.001, "loss": 2.4256, "step": 171900 }, { "epoch": 22.236586942469295, "grad_norm": 0.7927997708320618, "learning_rate": 0.001, "loss": 2.4715, "step": 172000 }, { "epoch": 22.24951519069166, "grad_norm": 0.9547163844108582, "learning_rate": 0.001, "loss": 2.4319, "step": 172100 }, { "epoch": 22.262443438914026, "grad_norm": 1.0138567686080933, "learning_rate": 0.001, "loss": 2.4245, "step": 172200 }, { "epoch": 22.275371687136392, "grad_norm": 1.4015204906463623, "learning_rate": 0.001, "loss": 2.4421, "step": 172300 }, { "epoch": 22.288299935358758, "grad_norm": 4.051333427429199, "learning_rate": 0.001, "loss": 2.4325, "step": 172400 }, { "epoch": 22.301228183581124, "grad_norm": 0.8529426455497742, "learning_rate": 0.001, "loss": 2.4453, "step": 172500 }, { "epoch": 22.31415643180349, "grad_norm": 0.9433965086936951, "learning_rate": 0.001, "loss": 2.4252, "step": 172600 }, { "epoch": 22.327084680025855, "grad_norm": 0.7183476686477661, "learning_rate": 0.001, "loss": 2.4427, "step": 172700 }, { "epoch": 22.34001292824822, "grad_norm": 2.399972915649414, "learning_rate": 0.001, "loss": 2.4565, "step": 172800 }, { "epoch": 22.352941176470587, "grad_norm": 8.170467376708984, "learning_rate": 0.001, "loss": 2.4501, "step": 172900 }, { "epoch": 22.365869424692953, "grad_norm": 1.2024199962615967, "learning_rate": 0.001, "loss": 2.4602, "step": 173000 }, { "epoch": 22.37879767291532, "grad_norm": 0.9576772451400757, "learning_rate": 0.001, "loss": 2.4544, "step": 173100 }, { "epoch": 22.391725921137684, "grad_norm": 0.8780048489570618, "learning_rate": 0.001, "loss": 2.4687, "step": 173200 }, { "epoch": 22.40465416936005, "grad_norm": 1.069534182548523, "learning_rate": 0.001, "loss": 2.4831, "step": 173300 }, { "epoch": 22.417582417582416, "grad_norm": 0.8322734236717224, "learning_rate": 0.001, "loss": 2.4457, "step": 173400 }, { "epoch": 22.430510665804782, "grad_norm": 0.8525444269180298, "learning_rate": 0.001, "loss": 2.4508, "step": 173500 }, { "epoch": 22.443438914027148, "grad_norm": 1.0242419242858887, "learning_rate": 0.001, "loss": 2.4464, "step": 173600 }, { "epoch": 22.456367162249514, "grad_norm": 0.7690640091896057, "learning_rate": 0.001, "loss": 2.457, "step": 173700 }, { "epoch": 22.46929541047188, "grad_norm": 2.4258804321289062, "learning_rate": 0.001, "loss": 2.4745, "step": 173800 }, { "epoch": 22.482223658694245, "grad_norm": 4.005376815795898, "learning_rate": 0.001, "loss": 2.4428, "step": 173900 }, { "epoch": 22.49515190691661, "grad_norm": 0.818865180015564, "learning_rate": 0.001, "loss": 2.4718, "step": 174000 }, { "epoch": 22.508080155138977, "grad_norm": 0.938861072063446, "learning_rate": 0.001, "loss": 2.4477, "step": 174100 }, { "epoch": 22.521008403361346, "grad_norm": 0.8475835919380188, "learning_rate": 0.001, "loss": 2.4543, "step": 174200 }, { "epoch": 22.533936651583712, "grad_norm": 0.8531373739242554, "learning_rate": 0.001, "loss": 2.4642, "step": 174300 }, { "epoch": 22.546864899806078, "grad_norm": 0.8083274960517883, "learning_rate": 0.001, "loss": 2.4633, "step": 174400 }, { "epoch": 22.559793148028444, "grad_norm": 0.7766776084899902, "learning_rate": 0.001, "loss": 2.4699, "step": 174500 }, { "epoch": 22.57272139625081, "grad_norm": 0.8035716414451599, "learning_rate": 0.001, "loss": 2.4478, "step": 174600 }, { "epoch": 22.585649644473175, "grad_norm": 0.8608381748199463, "learning_rate": 0.001, "loss": 2.4735, "step": 174700 }, { "epoch": 22.59857789269554, "grad_norm": 0.8293935656547546, "learning_rate": 0.001, "loss": 2.4497, "step": 174800 }, { "epoch": 22.611506140917907, "grad_norm": 0.7660079598426819, "learning_rate": 0.001, "loss": 2.4689, "step": 174900 }, { "epoch": 22.624434389140273, "grad_norm": 1.27647864818573, "learning_rate": 0.001, "loss": 2.4701, "step": 175000 }, { "epoch": 22.63736263736264, "grad_norm": 0.8778761029243469, "learning_rate": 0.001, "loss": 2.4782, "step": 175100 }, { "epoch": 22.650290885585004, "grad_norm": 1.8253538608551025, "learning_rate": 0.001, "loss": 2.4611, "step": 175200 }, { "epoch": 22.66321913380737, "grad_norm": 0.943833589553833, "learning_rate": 0.001, "loss": 2.4753, "step": 175300 }, { "epoch": 22.676147382029736, "grad_norm": 0.8975098133087158, "learning_rate": 0.001, "loss": 2.4791, "step": 175400 }, { "epoch": 22.689075630252102, "grad_norm": 0.7694480419158936, "learning_rate": 0.001, "loss": 2.4619, "step": 175500 }, { "epoch": 22.702003878474468, "grad_norm": 1.3286608457565308, "learning_rate": 0.001, "loss": 2.461, "step": 175600 }, { "epoch": 22.714932126696834, "grad_norm": 1.6331632137298584, "learning_rate": 0.001, "loss": 2.4806, "step": 175700 }, { "epoch": 22.7278603749192, "grad_norm": 1.053825855255127, "learning_rate": 0.001, "loss": 2.4624, "step": 175800 }, { "epoch": 22.740788623141565, "grad_norm": 0.954031229019165, "learning_rate": 0.001, "loss": 2.4686, "step": 175900 }, { "epoch": 22.75371687136393, "grad_norm": 0.8499009609222412, "learning_rate": 0.001, "loss": 2.4729, "step": 176000 }, { "epoch": 22.766645119586297, "grad_norm": 0.8807371258735657, "learning_rate": 0.001, "loss": 2.4825, "step": 176100 }, { "epoch": 22.779573367808663, "grad_norm": 0.7144501805305481, "learning_rate": 0.001, "loss": 2.4939, "step": 176200 }, { "epoch": 22.79250161603103, "grad_norm": 1.3167195320129395, "learning_rate": 0.001, "loss": 2.4987, "step": 176300 }, { "epoch": 22.805429864253394, "grad_norm": 0.7931557297706604, "learning_rate": 0.001, "loss": 2.4683, "step": 176400 }, { "epoch": 22.81835811247576, "grad_norm": 1.0890754461288452, "learning_rate": 0.001, "loss": 2.4724, "step": 176500 }, { "epoch": 22.831286360698126, "grad_norm": 0.9044108390808105, "learning_rate": 0.001, "loss": 2.4795, "step": 176600 }, { "epoch": 22.844214608920492, "grad_norm": 0.8664490580558777, "learning_rate": 0.001, "loss": 2.4564, "step": 176700 }, { "epoch": 22.857142857142858, "grad_norm": 6.2639055252075195, "learning_rate": 0.001, "loss": 2.4897, "step": 176800 }, { "epoch": 22.870071105365223, "grad_norm": 2.18843936920166, "learning_rate": 0.001, "loss": 2.4726, "step": 176900 }, { "epoch": 22.88299935358759, "grad_norm": 4.489678382873535, "learning_rate": 0.001, "loss": 2.4595, "step": 177000 }, { "epoch": 22.895927601809955, "grad_norm": 15.298226356506348, "learning_rate": 0.001, "loss": 2.4827, "step": 177100 }, { "epoch": 22.90885585003232, "grad_norm": 0.8042095303535461, "learning_rate": 0.001, "loss": 2.4861, "step": 177200 }, { "epoch": 22.921784098254687, "grad_norm": 0.945747435092926, "learning_rate": 0.001, "loss": 2.4977, "step": 177300 }, { "epoch": 22.934712346477053, "grad_norm": 0.8454249501228333, "learning_rate": 0.001, "loss": 2.4694, "step": 177400 }, { "epoch": 22.94764059469942, "grad_norm": 0.8025349974632263, "learning_rate": 0.001, "loss": 2.471, "step": 177500 }, { "epoch": 22.960568842921784, "grad_norm": 2.019660234451294, "learning_rate": 0.001, "loss": 2.4971, "step": 177600 }, { "epoch": 22.97349709114415, "grad_norm": 0.8746926784515381, "learning_rate": 0.001, "loss": 2.4779, "step": 177700 }, { "epoch": 22.986425339366516, "grad_norm": 0.7533841133117676, "learning_rate": 0.001, "loss": 2.4717, "step": 177800 }, { "epoch": 22.99935358758888, "grad_norm": 1.041802167892456, "learning_rate": 0.001, "loss": 2.4836, "step": 177900 }, { "epoch": 23.012281835811248, "grad_norm": 0.94200599193573, "learning_rate": 0.001, "loss": 2.359, "step": 178000 }, { "epoch": 23.025210084033613, "grad_norm": 1.003679871559143, "learning_rate": 0.001, "loss": 2.4016, "step": 178100 }, { "epoch": 23.03813833225598, "grad_norm": 0.904025673866272, "learning_rate": 0.001, "loss": 2.3979, "step": 178200 }, { "epoch": 23.051066580478345, "grad_norm": 1.1821460723876953, "learning_rate": 0.001, "loss": 2.3934, "step": 178300 }, { "epoch": 23.06399482870071, "grad_norm": 0.9919053316116333, "learning_rate": 0.001, "loss": 2.4103, "step": 178400 }, { "epoch": 23.076923076923077, "grad_norm": 1.8093540668487549, "learning_rate": 0.001, "loss": 2.4039, "step": 178500 }, { "epoch": 23.089851325145442, "grad_norm": 1.2199440002441406, "learning_rate": 0.001, "loss": 2.3842, "step": 178600 }, { "epoch": 23.10277957336781, "grad_norm": 1.2631572484970093, "learning_rate": 0.001, "loss": 2.4143, "step": 178700 }, { "epoch": 23.115707821590174, "grad_norm": 19.340368270874023, "learning_rate": 0.001, "loss": 2.3888, "step": 178800 }, { "epoch": 23.12863606981254, "grad_norm": 2.5557188987731934, "learning_rate": 0.001, "loss": 2.4187, "step": 178900 }, { "epoch": 23.141564318034906, "grad_norm": 1.0816459655761719, "learning_rate": 0.001, "loss": 2.4194, "step": 179000 }, { "epoch": 23.15449256625727, "grad_norm": 0.9932698607444763, "learning_rate": 0.001, "loss": 2.4009, "step": 179100 }, { "epoch": 23.167420814479637, "grad_norm": 2.2178075313568115, "learning_rate": 0.001, "loss": 2.4063, "step": 179200 }, { "epoch": 23.180349062702003, "grad_norm": 0.9215783476829529, "learning_rate": 0.001, "loss": 2.4233, "step": 179300 }, { "epoch": 23.19327731092437, "grad_norm": 3.399376630783081, "learning_rate": 0.001, "loss": 2.4209, "step": 179400 }, { "epoch": 23.206205559146735, "grad_norm": 0.9536006450653076, "learning_rate": 0.001, "loss": 2.4281, "step": 179500 }, { "epoch": 23.2191338073691, "grad_norm": 1.1709249019622803, "learning_rate": 0.001, "loss": 2.4142, "step": 179600 }, { "epoch": 23.232062055591467, "grad_norm": 1.1491050720214844, "learning_rate": 0.001, "loss": 2.4405, "step": 179700 }, { "epoch": 23.244990303813832, "grad_norm": 0.9619319438934326, "learning_rate": 0.001, "loss": 2.4296, "step": 179800 }, { "epoch": 23.257918552036198, "grad_norm": 4.022651195526123, "learning_rate": 0.001, "loss": 2.412, "step": 179900 }, { "epoch": 23.270846800258564, "grad_norm": 0.9682009816169739, "learning_rate": 0.001, "loss": 2.4137, "step": 180000 }, { "epoch": 23.28377504848093, "grad_norm": 1.1711292266845703, "learning_rate": 0.001, "loss": 2.4277, "step": 180100 }, { "epoch": 23.296703296703296, "grad_norm": 0.9100988507270813, "learning_rate": 0.001, "loss": 2.4275, "step": 180200 }, { "epoch": 23.30963154492566, "grad_norm": 2.9555561542510986, "learning_rate": 0.001, "loss": 2.4289, "step": 180300 }, { "epoch": 23.322559793148027, "grad_norm": 1.445806622505188, "learning_rate": 0.001, "loss": 2.4315, "step": 180400 }, { "epoch": 23.335488041370393, "grad_norm": 0.9703245759010315, "learning_rate": 0.001, "loss": 2.431, "step": 180500 }, { "epoch": 23.34841628959276, "grad_norm": 1.0125597715377808, "learning_rate": 0.001, "loss": 2.4372, "step": 180600 }, { "epoch": 23.361344537815125, "grad_norm": 0.9580983519554138, "learning_rate": 0.001, "loss": 2.4526, "step": 180700 }, { "epoch": 23.37427278603749, "grad_norm": 1.1778483390808105, "learning_rate": 0.001, "loss": 2.4413, "step": 180800 }, { "epoch": 23.387201034259856, "grad_norm": 2.0782716274261475, "learning_rate": 0.001, "loss": 2.4665, "step": 180900 }, { "epoch": 23.400129282482222, "grad_norm": 10.331995964050293, "learning_rate": 0.001, "loss": 2.4283, "step": 181000 }, { "epoch": 23.413057530704588, "grad_norm": 0.978748619556427, "learning_rate": 0.001, "loss": 2.4343, "step": 181100 }, { "epoch": 23.425985778926954, "grad_norm": 1.1604937314987183, "learning_rate": 0.001, "loss": 2.4504, "step": 181200 }, { "epoch": 23.43891402714932, "grad_norm": 1.1536979675292969, "learning_rate": 0.001, "loss": 2.4345, "step": 181300 }, { "epoch": 23.451842275371686, "grad_norm": 1.0077041387557983, "learning_rate": 0.001, "loss": 2.4337, "step": 181400 }, { "epoch": 23.46477052359405, "grad_norm": 1.017958164215088, "learning_rate": 0.001, "loss": 2.4375, "step": 181500 }, { "epoch": 23.477698771816417, "grad_norm": 1.3163269758224487, "learning_rate": 0.001, "loss": 2.447, "step": 181600 }, { "epoch": 23.490627020038783, "grad_norm": 1.6506413221359253, "learning_rate": 0.001, "loss": 2.436, "step": 181700 }, { "epoch": 23.503555268261152, "grad_norm": 1.0066903829574585, "learning_rate": 0.001, "loss": 2.4281, "step": 181800 }, { "epoch": 23.516483516483518, "grad_norm": 0.9972594976425171, "learning_rate": 0.001, "loss": 2.4361, "step": 181900 }, { "epoch": 23.529411764705884, "grad_norm": 1.2724612951278687, "learning_rate": 0.001, "loss": 2.4379, "step": 182000 }, { "epoch": 23.54234001292825, "grad_norm": 1.7165181636810303, "learning_rate": 0.001, "loss": 2.4374, "step": 182100 }, { "epoch": 23.555268261150616, "grad_norm": 0.9853883981704712, "learning_rate": 0.001, "loss": 2.4403, "step": 182200 }, { "epoch": 23.56819650937298, "grad_norm": 1.2415176630020142, "learning_rate": 0.001, "loss": 2.4658, "step": 182300 }, { "epoch": 23.581124757595347, "grad_norm": 1.034203290939331, "learning_rate": 0.001, "loss": 2.4542, "step": 182400 }, { "epoch": 23.594053005817713, "grad_norm": 1.0210163593292236, "learning_rate": 0.001, "loss": 2.4498, "step": 182500 }, { "epoch": 23.60698125404008, "grad_norm": 1.8754364252090454, "learning_rate": 0.001, "loss": 2.4266, "step": 182600 }, { "epoch": 23.619909502262445, "grad_norm": 1.1705867052078247, "learning_rate": 0.001, "loss": 2.46, "step": 182700 }, { "epoch": 23.63283775048481, "grad_norm": 1.2474311590194702, "learning_rate": 0.001, "loss": 2.438, "step": 182800 }, { "epoch": 23.645765998707176, "grad_norm": 1.6733574867248535, "learning_rate": 0.001, "loss": 2.4347, "step": 182900 }, { "epoch": 23.658694246929542, "grad_norm": 1.1129391193389893, "learning_rate": 0.001, "loss": 2.4423, "step": 183000 }, { "epoch": 23.671622495151908, "grad_norm": 1.5823432207107544, "learning_rate": 0.001, "loss": 2.4461, "step": 183100 }, { "epoch": 23.684550743374274, "grad_norm": 1.0494744777679443, "learning_rate": 0.001, "loss": 2.448, "step": 183200 }, { "epoch": 23.69747899159664, "grad_norm": 1.1325232982635498, "learning_rate": 0.001, "loss": 2.4708, "step": 183300 }, { "epoch": 23.710407239819006, "grad_norm": 1.3630377054214478, "learning_rate": 0.001, "loss": 2.4635, "step": 183400 }, { "epoch": 23.72333548804137, "grad_norm": 2.519577980041504, "learning_rate": 0.001, "loss": 2.456, "step": 183500 }, { "epoch": 23.736263736263737, "grad_norm": 3.934995412826538, "learning_rate": 0.001, "loss": 2.4637, "step": 183600 }, { "epoch": 23.749191984486103, "grad_norm": 1.0478836297988892, "learning_rate": 0.001, "loss": 2.4489, "step": 183700 }, { "epoch": 23.76212023270847, "grad_norm": 1.0461094379425049, "learning_rate": 0.001, "loss": 2.4669, "step": 183800 }, { "epoch": 23.775048480930835, "grad_norm": 1.7816814184188843, "learning_rate": 0.001, "loss": 2.468, "step": 183900 }, { "epoch": 23.7879767291532, "grad_norm": 1.400420904159546, "learning_rate": 0.001, "loss": 2.4559, "step": 184000 }, { "epoch": 23.800904977375566, "grad_norm": 0.9643328189849854, "learning_rate": 0.001, "loss": 2.4585, "step": 184100 }, { "epoch": 23.813833225597932, "grad_norm": 0.9494936466217041, "learning_rate": 0.001, "loss": 2.4782, "step": 184200 }, { "epoch": 23.826761473820298, "grad_norm": 1.0424139499664307, "learning_rate": 0.001, "loss": 2.4587, "step": 184300 }, { "epoch": 23.839689722042664, "grad_norm": 0.9896219372749329, "learning_rate": 0.001, "loss": 2.459, "step": 184400 }, { "epoch": 23.85261797026503, "grad_norm": 0.8715159296989441, "learning_rate": 0.001, "loss": 2.4558, "step": 184500 }, { "epoch": 23.865546218487395, "grad_norm": 1.0939120054244995, "learning_rate": 0.001, "loss": 2.4704, "step": 184600 }, { "epoch": 23.87847446670976, "grad_norm": 0.9343132376670837, "learning_rate": 0.001, "loss": 2.4859, "step": 184700 }, { "epoch": 23.891402714932127, "grad_norm": 1.1476508378982544, "learning_rate": 0.001, "loss": 2.479, "step": 184800 }, { "epoch": 23.904330963154493, "grad_norm": 1.5956569910049438, "learning_rate": 0.001, "loss": 2.4538, "step": 184900 }, { "epoch": 23.91725921137686, "grad_norm": 1.2100390195846558, "learning_rate": 0.001, "loss": 2.4769, "step": 185000 }, { "epoch": 23.930187459599225, "grad_norm": 6.826539039611816, "learning_rate": 0.001, "loss": 2.4751, "step": 185100 }, { "epoch": 23.94311570782159, "grad_norm": 2.8177578449249268, "learning_rate": 0.001, "loss": 2.4749, "step": 185200 }, { "epoch": 23.956043956043956, "grad_norm": 0.9792162179946899, "learning_rate": 0.001, "loss": 2.4734, "step": 185300 }, { "epoch": 23.968972204266322, "grad_norm": 1.2187882661819458, "learning_rate": 0.001, "loss": 2.4631, "step": 185400 }, { "epoch": 23.981900452488688, "grad_norm": 1.5382040739059448, "learning_rate": 0.001, "loss": 2.4641, "step": 185500 }, { "epoch": 23.994828700711054, "grad_norm": 9.08344554901123, "learning_rate": 0.001, "loss": 2.4651, "step": 185600 }, { "epoch": 24.00775694893342, "grad_norm": 0.9780240654945374, "learning_rate": 0.001, "loss": 2.3925, "step": 185700 }, { "epoch": 24.020685197155785, "grad_norm": 0.8266626596450806, "learning_rate": 0.001, "loss": 2.3768, "step": 185800 }, { "epoch": 24.03361344537815, "grad_norm": 0.9092418551445007, "learning_rate": 0.001, "loss": 2.3879, "step": 185900 }, { "epoch": 24.046541693600517, "grad_norm": 4.737064838409424, "learning_rate": 0.001, "loss": 2.3746, "step": 186000 }, { "epoch": 24.059469941822883, "grad_norm": 0.8734612464904785, "learning_rate": 0.001, "loss": 2.4094, "step": 186100 }, { "epoch": 24.07239819004525, "grad_norm": 0.9324131011962891, "learning_rate": 0.001, "loss": 2.3777, "step": 186200 }, { "epoch": 24.085326438267614, "grad_norm": 1.2980215549468994, "learning_rate": 0.001, "loss": 2.4044, "step": 186300 }, { "epoch": 24.09825468648998, "grad_norm": 1.2382272481918335, "learning_rate": 0.001, "loss": 2.3938, "step": 186400 }, { "epoch": 24.111182934712346, "grad_norm": 1.3172459602355957, "learning_rate": 0.001, "loss": 2.3938, "step": 186500 }, { "epoch": 24.124111182934712, "grad_norm": 1.1268738508224487, "learning_rate": 0.001, "loss": 2.3962, "step": 186600 }, { "epoch": 24.137039431157078, "grad_norm": 0.8291018009185791, "learning_rate": 0.001, "loss": 2.4004, "step": 186700 }, { "epoch": 24.149967679379444, "grad_norm": 0.8068827986717224, "learning_rate": 0.001, "loss": 2.3854, "step": 186800 }, { "epoch": 24.16289592760181, "grad_norm": 0.7618997097015381, "learning_rate": 0.001, "loss": 2.4028, "step": 186900 }, { "epoch": 24.175824175824175, "grad_norm": 0.8239403963088989, "learning_rate": 0.001, "loss": 2.4177, "step": 187000 }, { "epoch": 24.18875242404654, "grad_norm": 0.8769806027412415, "learning_rate": 0.001, "loss": 2.4035, "step": 187100 }, { "epoch": 24.201680672268907, "grad_norm": 0.8456646203994751, "learning_rate": 0.001, "loss": 2.421, "step": 187200 }, { "epoch": 24.214608920491273, "grad_norm": 0.9527842402458191, "learning_rate": 0.001, "loss": 2.3975, "step": 187300 }, { "epoch": 24.22753716871364, "grad_norm": 1.7951229810714722, "learning_rate": 0.001, "loss": 2.4109, "step": 187400 }, { "epoch": 24.240465416936004, "grad_norm": 0.8749905228614807, "learning_rate": 0.001, "loss": 2.4181, "step": 187500 }, { "epoch": 24.25339366515837, "grad_norm": 1.3025789260864258, "learning_rate": 0.001, "loss": 2.4117, "step": 187600 }, { "epoch": 24.266321913380736, "grad_norm": 1.127397894859314, "learning_rate": 0.001, "loss": 2.3908, "step": 187700 }, { "epoch": 24.279250161603102, "grad_norm": 1.6609126329421997, "learning_rate": 0.001, "loss": 2.4163, "step": 187800 }, { "epoch": 24.292178409825468, "grad_norm": 0.9886419773101807, "learning_rate": 0.001, "loss": 2.4178, "step": 187900 }, { "epoch": 24.305106658047833, "grad_norm": 0.7825621366500854, "learning_rate": 0.001, "loss": 2.4168, "step": 188000 }, { "epoch": 24.3180349062702, "grad_norm": 1.7965352535247803, "learning_rate": 0.001, "loss": 2.3652, "step": 188100 }, { "epoch": 24.330963154492565, "grad_norm": 1.0114789009094238, "learning_rate": 0.001, "loss": 2.3803, "step": 188200 }, { "epoch": 24.34389140271493, "grad_norm": 0.8431440591812134, "learning_rate": 0.001, "loss": 2.389, "step": 188300 }, { "epoch": 24.356819650937297, "grad_norm": 1.060101866722107, "learning_rate": 0.001, "loss": 2.3937, "step": 188400 }, { "epoch": 24.369747899159663, "grad_norm": 1.1677331924438477, "learning_rate": 0.001, "loss": 2.4017, "step": 188500 }, { "epoch": 24.38267614738203, "grad_norm": 1.2631828784942627, "learning_rate": 0.001, "loss": 2.3844, "step": 188600 }, { "epoch": 24.395604395604394, "grad_norm": 0.7687875628471375, "learning_rate": 0.001, "loss": 2.3882, "step": 188700 }, { "epoch": 24.40853264382676, "grad_norm": 1.0170830488204956, "learning_rate": 0.001, "loss": 2.3897, "step": 188800 }, { "epoch": 24.421460892049126, "grad_norm": 0.9041008949279785, "learning_rate": 0.001, "loss": 2.3783, "step": 188900 }, { "epoch": 24.43438914027149, "grad_norm": 1.071158528327942, "learning_rate": 0.001, "loss": 2.4147, "step": 189000 }, { "epoch": 24.447317388493857, "grad_norm": 1.1613526344299316, "learning_rate": 0.001, "loss": 2.3885, "step": 189100 }, { "epoch": 24.460245636716223, "grad_norm": 0.9818555116653442, "learning_rate": 0.001, "loss": 2.3856, "step": 189200 }, { "epoch": 24.47317388493859, "grad_norm": 0.8324120044708252, "learning_rate": 0.001, "loss": 2.4167, "step": 189300 }, { "epoch": 24.486102133160955, "grad_norm": 1.074468731880188, "learning_rate": 0.001, "loss": 2.4092, "step": 189400 }, { "epoch": 24.49903038138332, "grad_norm": 2.2534215450286865, "learning_rate": 0.001, "loss": 2.4188, "step": 189500 }, { "epoch": 24.511958629605687, "grad_norm": 0.8064150214195251, "learning_rate": 0.001, "loss": 2.4112, "step": 189600 }, { "epoch": 24.524886877828056, "grad_norm": 2.4953372478485107, "learning_rate": 0.001, "loss": 2.4144, "step": 189700 }, { "epoch": 24.537815126050422, "grad_norm": 0.8996461629867554, "learning_rate": 0.001, "loss": 2.4189, "step": 189800 }, { "epoch": 24.550743374272788, "grad_norm": 0.9721525311470032, "learning_rate": 0.001, "loss": 2.3978, "step": 189900 }, { "epoch": 24.563671622495153, "grad_norm": 0.8884000182151794, "learning_rate": 0.001, "loss": 2.4067, "step": 190000 }, { "epoch": 24.57659987071752, "grad_norm": 0.9819591045379639, "learning_rate": 0.001, "loss": 2.4134, "step": 190100 }, { "epoch": 24.589528118939885, "grad_norm": 1.1628437042236328, "learning_rate": 0.001, "loss": 2.4059, "step": 190200 }, { "epoch": 24.60245636716225, "grad_norm": 0.763469398021698, "learning_rate": 0.001, "loss": 2.4135, "step": 190300 }, { "epoch": 24.615384615384617, "grad_norm": 0.9251025319099426, "learning_rate": 0.001, "loss": 2.4045, "step": 190400 }, { "epoch": 24.628312863606983, "grad_norm": 0.9581373929977417, "learning_rate": 0.001, "loss": 2.416, "step": 190500 }, { "epoch": 24.64124111182935, "grad_norm": 1.031028389930725, "learning_rate": 0.001, "loss": 2.4068, "step": 190600 }, { "epoch": 24.654169360051714, "grad_norm": 0.8350049257278442, "learning_rate": 0.001, "loss": 2.413, "step": 190700 }, { "epoch": 24.66709760827408, "grad_norm": 1.0323007106781006, "learning_rate": 0.001, "loss": 2.4155, "step": 190800 }, { "epoch": 24.680025856496446, "grad_norm": 1.1615450382232666, "learning_rate": 0.001, "loss": 2.4226, "step": 190900 }, { "epoch": 24.69295410471881, "grad_norm": 1.370247483253479, "learning_rate": 0.001, "loss": 2.4159, "step": 191000 }, { "epoch": 24.705882352941178, "grad_norm": 1.144364595413208, "learning_rate": 0.001, "loss": 2.4195, "step": 191100 }, { "epoch": 24.718810601163543, "grad_norm": 0.9929494857788086, "learning_rate": 0.001, "loss": 2.4227, "step": 191200 }, { "epoch": 24.73173884938591, "grad_norm": 0.9529424905776978, "learning_rate": 0.001, "loss": 2.4111, "step": 191300 }, { "epoch": 24.744667097608275, "grad_norm": 1.4608503580093384, "learning_rate": 0.001, "loss": 2.4362, "step": 191400 }, { "epoch": 24.75759534583064, "grad_norm": 1.5169272422790527, "learning_rate": 0.001, "loss": 2.4251, "step": 191500 }, { "epoch": 24.770523594053007, "grad_norm": 0.9267252087593079, "learning_rate": 0.001, "loss": 2.4175, "step": 191600 }, { "epoch": 24.783451842275372, "grad_norm": 0.8878386616706848, "learning_rate": 0.001, "loss": 2.4009, "step": 191700 }, { "epoch": 24.79638009049774, "grad_norm": 0.9165541529655457, "learning_rate": 0.001, "loss": 2.4147, "step": 191800 }, { "epoch": 24.809308338720104, "grad_norm": 0.8393217325210571, "learning_rate": 0.001, "loss": 2.4057, "step": 191900 }, { "epoch": 24.82223658694247, "grad_norm": 2.0102744102478027, "learning_rate": 0.001, "loss": 2.4202, "step": 192000 }, { "epoch": 24.835164835164836, "grad_norm": 0.8082312345504761, "learning_rate": 0.001, "loss": 2.4284, "step": 192100 }, { "epoch": 24.8480930833872, "grad_norm": 1.4527651071548462, "learning_rate": 0.001, "loss": 2.4165, "step": 192200 }, { "epoch": 24.861021331609567, "grad_norm": 90.81161499023438, "learning_rate": 0.001, "loss": 2.4268, "step": 192300 }, { "epoch": 24.873949579831933, "grad_norm": 1.1368752717971802, "learning_rate": 0.001, "loss": 2.426, "step": 192400 }, { "epoch": 24.8868778280543, "grad_norm": 0.8832337260246277, "learning_rate": 0.001, "loss": 2.4398, "step": 192500 }, { "epoch": 24.899806076276665, "grad_norm": 1.120948314666748, "learning_rate": 0.001, "loss": 2.4353, "step": 192600 }, { "epoch": 24.91273432449903, "grad_norm": 0.835502564907074, "learning_rate": 0.001, "loss": 2.4449, "step": 192700 }, { "epoch": 24.925662572721397, "grad_norm": 1.2224787473678589, "learning_rate": 0.001, "loss": 2.4337, "step": 192800 }, { "epoch": 24.938590820943762, "grad_norm": 1.6779547929763794, "learning_rate": 0.001, "loss": 2.4424, "step": 192900 }, { "epoch": 24.951519069166128, "grad_norm": 0.7395892143249512, "learning_rate": 0.001, "loss": 2.4372, "step": 193000 }, { "epoch": 24.964447317388494, "grad_norm": 1.0368883609771729, "learning_rate": 0.001, "loss": 2.442, "step": 193100 }, { "epoch": 24.97737556561086, "grad_norm": 0.823797881603241, "learning_rate": 0.001, "loss": 2.4483, "step": 193200 }, { "epoch": 24.990303813833226, "grad_norm": 0.7474195957183838, "learning_rate": 0.001, "loss": 2.4293, "step": 193300 }, { "epoch": 25.00323206205559, "grad_norm": 0.9888672828674316, "learning_rate": 0.001, "loss": 2.4202, "step": 193400 }, { "epoch": 25.016160310277957, "grad_norm": 0.8165050148963928, "learning_rate": 0.001, "loss": 2.3712, "step": 193500 }, { "epoch": 25.029088558500323, "grad_norm": 1.1115483045578003, "learning_rate": 0.001, "loss": 2.3581, "step": 193600 }, { "epoch": 25.04201680672269, "grad_norm": 3.463916301727295, "learning_rate": 0.001, "loss": 2.3775, "step": 193700 }, { "epoch": 25.054945054945055, "grad_norm": 0.8839962482452393, "learning_rate": 0.001, "loss": 2.3617, "step": 193800 }, { "epoch": 25.06787330316742, "grad_norm": 8.92577838897705, "learning_rate": 0.001, "loss": 2.3803, "step": 193900 }, { "epoch": 25.080801551389786, "grad_norm": 0.922687292098999, "learning_rate": 0.001, "loss": 2.3804, "step": 194000 }, { "epoch": 25.093729799612152, "grad_norm": 0.8975924849510193, "learning_rate": 0.001, "loss": 2.3667, "step": 194100 }, { "epoch": 25.106658047834518, "grad_norm": 0.7488020658493042, "learning_rate": 0.001, "loss": 2.3845, "step": 194200 }, { "epoch": 25.119586296056884, "grad_norm": 0.8589635491371155, "learning_rate": 0.001, "loss": 2.391, "step": 194300 }, { "epoch": 25.13251454427925, "grad_norm": 0.8082188367843628, "learning_rate": 0.001, "loss": 2.3814, "step": 194400 }, { "epoch": 25.145442792501616, "grad_norm": 0.8176384568214417, "learning_rate": 0.001, "loss": 2.3816, "step": 194500 }, { "epoch": 25.15837104072398, "grad_norm": 1.599045753479004, "learning_rate": 0.001, "loss": 2.3926, "step": 194600 }, { "epoch": 25.171299288946347, "grad_norm": 1.4339522123336792, "learning_rate": 0.001, "loss": 2.3953, "step": 194700 }, { "epoch": 25.184227537168713, "grad_norm": 1.0219720602035522, "learning_rate": 0.001, "loss": 2.4048, "step": 194800 }, { "epoch": 25.19715578539108, "grad_norm": 1.0475897789001465, "learning_rate": 0.001, "loss": 2.3983, "step": 194900 }, { "epoch": 25.210084033613445, "grad_norm": 0.9325540065765381, "learning_rate": 0.001, "loss": 2.3853, "step": 195000 }, { "epoch": 25.22301228183581, "grad_norm": 0.968618631362915, "learning_rate": 0.001, "loss": 2.3931, "step": 195100 }, { "epoch": 25.235940530058176, "grad_norm": 0.8765245676040649, "learning_rate": 0.001, "loss": 2.4177, "step": 195200 }, { "epoch": 25.248868778280542, "grad_norm": 0.7703477144241333, "learning_rate": 0.001, "loss": 2.3957, "step": 195300 }, { "epoch": 25.261797026502908, "grad_norm": 1.0075936317443848, "learning_rate": 0.001, "loss": 2.3975, "step": 195400 }, { "epoch": 25.274725274725274, "grad_norm": 1.2777581214904785, "learning_rate": 0.001, "loss": 2.4077, "step": 195500 }, { "epoch": 25.28765352294764, "grad_norm": 0.7478423118591309, "learning_rate": 0.001, "loss": 2.4148, "step": 195600 }, { "epoch": 25.300581771170005, "grad_norm": 0.9187114238739014, "learning_rate": 0.001, "loss": 2.3982, "step": 195700 }, { "epoch": 25.31351001939237, "grad_norm": 1.20347261428833, "learning_rate": 0.001, "loss": 2.4179, "step": 195800 }, { "epoch": 25.326438267614737, "grad_norm": 8.653583526611328, "learning_rate": 0.001, "loss": 2.3926, "step": 195900 }, { "epoch": 25.339366515837103, "grad_norm": 1.0097768306732178, "learning_rate": 0.001, "loss": 2.4171, "step": 196000 }, { "epoch": 25.35229476405947, "grad_norm": 0.8280107975006104, "learning_rate": 0.001, "loss": 2.3885, "step": 196100 }, { "epoch": 25.365223012281835, "grad_norm": 0.9500492215156555, "learning_rate": 0.001, "loss": 2.3976, "step": 196200 }, { "epoch": 25.3781512605042, "grad_norm": 0.9704200625419617, "learning_rate": 0.001, "loss": 2.4156, "step": 196300 }, { "epoch": 25.391079508726566, "grad_norm": 1.7412893772125244, "learning_rate": 0.001, "loss": 2.4218, "step": 196400 }, { "epoch": 25.404007756948932, "grad_norm": 0.8709100484848022, "learning_rate": 0.001, "loss": 2.3974, "step": 196500 }, { "epoch": 25.416936005171298, "grad_norm": 0.8524885773658752, "learning_rate": 0.001, "loss": 2.4293, "step": 196600 }, { "epoch": 25.429864253393664, "grad_norm": 1.0102595090866089, "learning_rate": 0.001, "loss": 2.4116, "step": 196700 }, { "epoch": 25.44279250161603, "grad_norm": 0.9424996972084045, "learning_rate": 0.001, "loss": 2.4208, "step": 196800 }, { "epoch": 25.455720749838395, "grad_norm": 0.8348327279090881, "learning_rate": 0.001, "loss": 2.414, "step": 196900 }, { "epoch": 25.46864899806076, "grad_norm": 0.8716698884963989, "learning_rate": 0.001, "loss": 2.3966, "step": 197000 }, { "epoch": 25.481577246283127, "grad_norm": 7.341949939727783, "learning_rate": 0.001, "loss": 2.4264, "step": 197100 }, { "epoch": 25.494505494505496, "grad_norm": 0.9204500317573547, "learning_rate": 0.001, "loss": 2.3941, "step": 197200 }, { "epoch": 25.507433742727862, "grad_norm": 0.8077972531318665, "learning_rate": 0.001, "loss": 2.4066, "step": 197300 }, { "epoch": 25.520361990950228, "grad_norm": 1.1493667364120483, "learning_rate": 0.001, "loss": 2.4415, "step": 197400 }, { "epoch": 25.533290239172594, "grad_norm": 0.9240307807922363, "learning_rate": 0.001, "loss": 2.4299, "step": 197500 }, { "epoch": 25.54621848739496, "grad_norm": 0.8641419410705566, "learning_rate": 0.001, "loss": 2.4058, "step": 197600 }, { "epoch": 25.559146735617325, "grad_norm": 0.834628701210022, "learning_rate": 0.001, "loss": 2.4174, "step": 197700 }, { "epoch": 25.57207498383969, "grad_norm": 0.8682546615600586, "learning_rate": 0.001, "loss": 2.4155, "step": 197800 }, { "epoch": 25.585003232062057, "grad_norm": 0.8538148403167725, "learning_rate": 0.001, "loss": 2.4239, "step": 197900 }, { "epoch": 25.597931480284423, "grad_norm": 1.0552042722702026, "learning_rate": 0.001, "loss": 2.4054, "step": 198000 }, { "epoch": 25.61085972850679, "grad_norm": 1.4515444040298462, "learning_rate": 0.001, "loss": 2.4189, "step": 198100 }, { "epoch": 25.623787976729155, "grad_norm": 0.7237298488616943, "learning_rate": 0.001, "loss": 2.4073, "step": 198200 }, { "epoch": 25.63671622495152, "grad_norm": 1.4570788145065308, "learning_rate": 0.001, "loss": 2.4181, "step": 198300 }, { "epoch": 25.649644473173886, "grad_norm": 2.9761710166931152, "learning_rate": 0.001, "loss": 2.4118, "step": 198400 }, { "epoch": 25.662572721396252, "grad_norm": 0.7256437540054321, "learning_rate": 0.001, "loss": 2.4186, "step": 198500 }, { "epoch": 25.675500969618618, "grad_norm": 0.7613636255264282, "learning_rate": 0.001, "loss": 2.4026, "step": 198600 }, { "epoch": 25.688429217840984, "grad_norm": 1.1100369691848755, "learning_rate": 0.001, "loss": 2.4338, "step": 198700 }, { "epoch": 25.70135746606335, "grad_norm": 0.6913226246833801, "learning_rate": 0.001, "loss": 2.4475, "step": 198800 }, { "epoch": 25.714285714285715, "grad_norm": 0.8189799785614014, "learning_rate": 0.001, "loss": 2.4156, "step": 198900 }, { "epoch": 25.72721396250808, "grad_norm": 0.8027017116546631, "learning_rate": 0.001, "loss": 2.4036, "step": 199000 }, { "epoch": 25.740142210730447, "grad_norm": 1.1056150197982788, "learning_rate": 0.001, "loss": 2.4206, "step": 199100 }, { "epoch": 25.753070458952813, "grad_norm": 0.8608382344245911, "learning_rate": 0.001, "loss": 2.4356, "step": 199200 }, { "epoch": 25.76599870717518, "grad_norm": 0.8893944621086121, "learning_rate": 0.001, "loss": 2.4056, "step": 199300 }, { "epoch": 25.778926955397544, "grad_norm": 0.7577182054519653, "learning_rate": 0.001, "loss": 2.4191, "step": 199400 }, { "epoch": 25.79185520361991, "grad_norm": 0.8680034875869751, "learning_rate": 0.001, "loss": 2.4391, "step": 199500 }, { "epoch": 25.804783451842276, "grad_norm": 0.8014060854911804, "learning_rate": 0.001, "loss": 2.4488, "step": 199600 }, { "epoch": 25.817711700064642, "grad_norm": 0.9887559413909912, "learning_rate": 0.001, "loss": 2.4265, "step": 199700 }, { "epoch": 25.830639948287008, "grad_norm": 0.6850007176399231, "learning_rate": 0.001, "loss": 2.4378, "step": 199800 }, { "epoch": 25.843568196509374, "grad_norm": 0.8685833811759949, "learning_rate": 0.001, "loss": 2.4502, "step": 199900 }, { "epoch": 25.85649644473174, "grad_norm": 0.7094824314117432, "learning_rate": 0.001, "loss": 2.4279, "step": 200000 }, { "epoch": 25.869424692954105, "grad_norm": 1.1815558671951294, "learning_rate": 0.001, "loss": 2.4447, "step": 200100 }, { "epoch": 25.88235294117647, "grad_norm": 1.0467942953109741, "learning_rate": 0.001, "loss": 2.4343, "step": 200200 }, { "epoch": 25.895281189398837, "grad_norm": 0.8027378916740417, "learning_rate": 0.001, "loss": 2.4459, "step": 200300 }, { "epoch": 25.908209437621203, "grad_norm": 3.8284506797790527, "learning_rate": 0.001, "loss": 2.4314, "step": 200400 }, { "epoch": 25.92113768584357, "grad_norm": 47.214996337890625, "learning_rate": 0.001, "loss": 2.4486, "step": 200500 }, { "epoch": 25.934065934065934, "grad_norm": 4.249235153198242, "learning_rate": 0.001, "loss": 2.4354, "step": 200600 }, { "epoch": 25.9469941822883, "grad_norm": 0.9143348932266235, "learning_rate": 0.001, "loss": 2.4412, "step": 200700 }, { "epoch": 25.959922430510666, "grad_norm": 0.814903974533081, "learning_rate": 0.001, "loss": 2.4605, "step": 200800 }, { "epoch": 25.97285067873303, "grad_norm": 1.0333657264709473, "learning_rate": 0.001, "loss": 2.4533, "step": 200900 }, { "epoch": 25.985778926955398, "grad_norm": 0.7924696803092957, "learning_rate": 0.001, "loss": 2.4403, "step": 201000 }, { "epoch": 25.998707175177763, "grad_norm": 0.901109516620636, "learning_rate": 0.001, "loss": 2.4682, "step": 201100 }, { "epoch": 26.01163542340013, "grad_norm": 0.8966038823127747, "learning_rate": 0.001, "loss": 2.3763, "step": 201200 }, { "epoch": 26.024563671622495, "grad_norm": 0.9458600282669067, "learning_rate": 0.001, "loss": 2.3523, "step": 201300 }, { "epoch": 26.03749191984486, "grad_norm": 1.072376012802124, "learning_rate": 0.001, "loss": 2.3688, "step": 201400 }, { "epoch": 26.050420168067227, "grad_norm": 1.0208388566970825, "learning_rate": 0.001, "loss": 2.3584, "step": 201500 }, { "epoch": 26.063348416289593, "grad_norm": 0.888276219367981, "learning_rate": 0.001, "loss": 2.3446, "step": 201600 }, { "epoch": 26.07627666451196, "grad_norm": 1.1632184982299805, "learning_rate": 0.001, "loss": 2.3802, "step": 201700 }, { "epoch": 26.089204912734324, "grad_norm": 0.9266953468322754, "learning_rate": 0.001, "loss": 2.3645, "step": 201800 }, { "epoch": 26.10213316095669, "grad_norm": 2.7092134952545166, "learning_rate": 0.001, "loss": 2.3775, "step": 201900 }, { "epoch": 26.115061409179056, "grad_norm": 0.9825138449668884, "learning_rate": 0.001, "loss": 2.3609, "step": 202000 }, { "epoch": 26.12798965740142, "grad_norm": 0.9562487006187439, "learning_rate": 0.001, "loss": 2.3714, "step": 202100 }, { "epoch": 26.140917905623787, "grad_norm": 6.3066301345825195, "learning_rate": 0.001, "loss": 2.3658, "step": 202200 }, { "epoch": 26.153846153846153, "grad_norm": 0.917851448059082, "learning_rate": 0.001, "loss": 2.3593, "step": 202300 }, { "epoch": 26.16677440206852, "grad_norm": 1.0236879587173462, "learning_rate": 0.001, "loss": 2.3713, "step": 202400 }, { "epoch": 26.179702650290885, "grad_norm": 0.9276930689811707, "learning_rate": 0.001, "loss": 2.3715, "step": 202500 }, { "epoch": 26.19263089851325, "grad_norm": 1.0350911617279053, "learning_rate": 0.001, "loss": 2.3764, "step": 202600 }, { "epoch": 26.205559146735617, "grad_norm": 0.9642398357391357, "learning_rate": 0.001, "loss": 2.3743, "step": 202700 }, { "epoch": 26.218487394957982, "grad_norm": 0.8644739389419556, "learning_rate": 0.001, "loss": 2.385, "step": 202800 }, { "epoch": 26.23141564318035, "grad_norm": 1.237693428993225, "learning_rate": 0.001, "loss": 2.3946, "step": 202900 }, { "epoch": 26.244343891402714, "grad_norm": 1.380218267440796, "learning_rate": 0.001, "loss": 2.3769, "step": 203000 }, { "epoch": 26.25727213962508, "grad_norm": 0.8029330968856812, "learning_rate": 0.001, "loss": 2.3839, "step": 203100 }, { "epoch": 26.270200387847446, "grad_norm": 1.1607589721679688, "learning_rate": 0.001, "loss": 2.3948, "step": 203200 }, { "epoch": 26.28312863606981, "grad_norm": 1.1254023313522339, "learning_rate": 0.001, "loss": 2.3767, "step": 203300 }, { "epoch": 26.296056884292177, "grad_norm": 1.076112151145935, "learning_rate": 0.001, "loss": 2.3915, "step": 203400 }, { "epoch": 26.308985132514543, "grad_norm": 0.8212273120880127, "learning_rate": 0.001, "loss": 2.3907, "step": 203500 }, { "epoch": 26.32191338073691, "grad_norm": 0.775037407875061, "learning_rate": 0.001, "loss": 2.3887, "step": 203600 }, { "epoch": 26.334841628959275, "grad_norm": 0.923126757144928, "learning_rate": 0.001, "loss": 2.3695, "step": 203700 }, { "epoch": 26.34776987718164, "grad_norm": 0.8735946416854858, "learning_rate": 0.001, "loss": 2.3867, "step": 203800 }, { "epoch": 26.360698125404006, "grad_norm": 1.0111980438232422, "learning_rate": 0.001, "loss": 2.3886, "step": 203900 }, { "epoch": 26.373626373626372, "grad_norm": 0.9280951619148254, "learning_rate": 0.001, "loss": 2.3949, "step": 204000 }, { "epoch": 26.386554621848738, "grad_norm": 0.9065943360328674, "learning_rate": 0.001, "loss": 2.4084, "step": 204100 }, { "epoch": 26.399482870071104, "grad_norm": 0.9176847338676453, "learning_rate": 0.001, "loss": 2.3844, "step": 204200 }, { "epoch": 26.41241111829347, "grad_norm": 0.8462110161781311, "learning_rate": 0.001, "loss": 2.4074, "step": 204300 }, { "epoch": 26.425339366515836, "grad_norm": 1.1313848495483398, "learning_rate": 0.001, "loss": 2.4014, "step": 204400 }, { "epoch": 26.4382676147382, "grad_norm": 0.9265581965446472, "learning_rate": 0.001, "loss": 2.4014, "step": 204500 }, { "epoch": 26.451195862960567, "grad_norm": 1.594151258468628, "learning_rate": 0.001, "loss": 2.4064, "step": 204600 }, { "epoch": 26.464124111182933, "grad_norm": 0.88593590259552, "learning_rate": 0.001, "loss": 2.4059, "step": 204700 }, { "epoch": 26.4770523594053, "grad_norm": 0.9342084527015686, "learning_rate": 0.001, "loss": 2.4159, "step": 204800 }, { "epoch": 26.489980607627665, "grad_norm": 1.0098443031311035, "learning_rate": 0.001, "loss": 2.398, "step": 204900 }, { "epoch": 26.50290885585003, "grad_norm": 0.8435212969779968, "learning_rate": 0.001, "loss": 2.429, "step": 205000 }, { "epoch": 26.5158371040724, "grad_norm": 1.0210003852844238, "learning_rate": 0.001, "loss": 2.415, "step": 205100 }, { "epoch": 26.528765352294766, "grad_norm": 1.0807920694351196, "learning_rate": 0.001, "loss": 2.3778, "step": 205200 }, { "epoch": 26.54169360051713, "grad_norm": 0.9620898962020874, "learning_rate": 0.001, "loss": 2.4024, "step": 205300 }, { "epoch": 26.554621848739497, "grad_norm": 1.1221996545791626, "learning_rate": 0.001, "loss": 2.4106, "step": 205400 }, { "epoch": 26.567550096961863, "grad_norm": 0.9294606447219849, "learning_rate": 0.001, "loss": 2.412, "step": 205500 }, { "epoch": 26.58047834518423, "grad_norm": 1.0933871269226074, "learning_rate": 0.001, "loss": 2.3849, "step": 205600 }, { "epoch": 26.593406593406595, "grad_norm": 0.947729766368866, "learning_rate": 0.001, "loss": 2.4036, "step": 205700 }, { "epoch": 26.60633484162896, "grad_norm": 1.1086753606796265, "learning_rate": 0.001, "loss": 2.3999, "step": 205800 }, { "epoch": 26.619263089851326, "grad_norm": 1.0614217519760132, "learning_rate": 0.001, "loss": 2.3771, "step": 205900 }, { "epoch": 26.632191338073692, "grad_norm": 1.0227724313735962, "learning_rate": 0.001, "loss": 2.3926, "step": 206000 }, { "epoch": 26.645119586296058, "grad_norm": 0.8031155467033386, "learning_rate": 0.001, "loss": 2.4403, "step": 206100 }, { "epoch": 26.658047834518424, "grad_norm": 0.9448789954185486, "learning_rate": 0.001, "loss": 2.4063, "step": 206200 }, { "epoch": 26.67097608274079, "grad_norm": 26.84228515625, "learning_rate": 0.001, "loss": 2.4029, "step": 206300 }, { "epoch": 26.683904330963156, "grad_norm": 13.646012306213379, "learning_rate": 0.001, "loss": 2.4089, "step": 206400 }, { "epoch": 26.69683257918552, "grad_norm": 1.6179442405700684, "learning_rate": 0.001, "loss": 2.4105, "step": 206500 }, { "epoch": 26.709760827407887, "grad_norm": 0.8623113632202148, "learning_rate": 0.001, "loss": 2.4102, "step": 206600 }, { "epoch": 26.722689075630253, "grad_norm": 0.8899856209754944, "learning_rate": 0.001, "loss": 2.4124, "step": 206700 }, { "epoch": 26.73561732385262, "grad_norm": 0.8816211223602295, "learning_rate": 0.001, "loss": 2.42, "step": 206800 }, { "epoch": 26.748545572074985, "grad_norm": 7.941159725189209, "learning_rate": 0.001, "loss": 2.4213, "step": 206900 }, { "epoch": 26.76147382029735, "grad_norm": 1.008460283279419, "learning_rate": 0.001, "loss": 2.4206, "step": 207000 }, { "epoch": 26.774402068519716, "grad_norm": 0.8881164789199829, "learning_rate": 0.001, "loss": 2.4167, "step": 207100 }, { "epoch": 26.787330316742082, "grad_norm": 0.8492231965065002, "learning_rate": 0.001, "loss": 2.4292, "step": 207200 }, { "epoch": 26.800258564964448, "grad_norm": 1.0510401725769043, "learning_rate": 0.001, "loss": 2.415, "step": 207300 }, { "epoch": 26.813186813186814, "grad_norm": 1.1382030248641968, "learning_rate": 0.001, "loss": 2.4119, "step": 207400 }, { "epoch": 26.82611506140918, "grad_norm": 1.327113151550293, "learning_rate": 0.001, "loss": 2.4137, "step": 207500 }, { "epoch": 26.839043309631545, "grad_norm": 1.0110738277435303, "learning_rate": 0.001, "loss": 2.4194, "step": 207600 }, { "epoch": 26.85197155785391, "grad_norm": 0.8581327795982361, "learning_rate": 0.001, "loss": 2.4275, "step": 207700 }, { "epoch": 26.864899806076277, "grad_norm": 0.8753148317337036, "learning_rate": 0.001, "loss": 2.4276, "step": 207800 }, { "epoch": 26.877828054298643, "grad_norm": 0.9065788984298706, "learning_rate": 0.001, "loss": 2.4244, "step": 207900 }, { "epoch": 26.89075630252101, "grad_norm": 0.7499058842658997, "learning_rate": 0.001, "loss": 2.4185, "step": 208000 }, { "epoch": 26.903684550743375, "grad_norm": 1.0114853382110596, "learning_rate": 0.001, "loss": 2.4232, "step": 208100 }, { "epoch": 26.91661279896574, "grad_norm": 0.9319187998771667, "learning_rate": 0.001, "loss": 2.4063, "step": 208200 }, { "epoch": 26.929541047188106, "grad_norm": 7.428157806396484, "learning_rate": 0.001, "loss": 2.4325, "step": 208300 }, { "epoch": 26.942469295410472, "grad_norm": 0.913886308670044, "learning_rate": 0.001, "loss": 2.4227, "step": 208400 }, { "epoch": 26.955397543632838, "grad_norm": 0.8695008754730225, "learning_rate": 0.001, "loss": 2.4393, "step": 208500 }, { "epoch": 26.968325791855204, "grad_norm": 2.7860467433929443, "learning_rate": 0.001, "loss": 2.392, "step": 208600 }, { "epoch": 26.98125404007757, "grad_norm": 0.8075985312461853, "learning_rate": 0.001, "loss": 2.4229, "step": 208700 }, { "epoch": 26.994182288299935, "grad_norm": 0.7880262732505798, "learning_rate": 0.001, "loss": 2.4365, "step": 208800 }, { "epoch": 27.0071105365223, "grad_norm": 0.8341518640518188, "learning_rate": 0.001, "loss": 2.3433, "step": 208900 }, { "epoch": 27.020038784744667, "grad_norm": 1.0064882040023804, "learning_rate": 0.001, "loss": 2.3416, "step": 209000 }, { "epoch": 27.032967032967033, "grad_norm": 1.136238932609558, "learning_rate": 0.001, "loss": 2.3156, "step": 209100 }, { "epoch": 27.0458952811894, "grad_norm": 1.101976990699768, "learning_rate": 0.001, "loss": 2.3525, "step": 209200 }, { "epoch": 27.058823529411764, "grad_norm": 1.0341662168502808, "learning_rate": 0.001, "loss": 2.3415, "step": 209300 }, { "epoch": 27.07175177763413, "grad_norm": 1.2064462900161743, "learning_rate": 0.001, "loss": 2.3386, "step": 209400 }, { "epoch": 27.084680025856496, "grad_norm": 1.3184958696365356, "learning_rate": 0.001, "loss": 2.3366, "step": 209500 }, { "epoch": 27.097608274078862, "grad_norm": 1.1214312314987183, "learning_rate": 0.001, "loss": 2.3544, "step": 209600 }, { "epoch": 27.110536522301228, "grad_norm": 0.9697237610816956, "learning_rate": 0.001, "loss": 2.3498, "step": 209700 }, { "epoch": 27.123464770523594, "grad_norm": 1.123079538345337, "learning_rate": 0.001, "loss": 2.3493, "step": 209800 }, { "epoch": 27.13639301874596, "grad_norm": 1.3232340812683105, "learning_rate": 0.001, "loss": 2.3411, "step": 209900 }, { "epoch": 27.149321266968325, "grad_norm": 0.9322007894515991, "learning_rate": 0.001, "loss": 2.3625, "step": 210000 }, { "epoch": 27.16224951519069, "grad_norm": 0.9338539242744446, "learning_rate": 0.001, "loss": 2.3513, "step": 210100 }, { "epoch": 27.175177763413057, "grad_norm": 0.8197593092918396, "learning_rate": 0.001, "loss": 2.3627, "step": 210200 }, { "epoch": 27.188106011635423, "grad_norm": 1.0446823835372925, "learning_rate": 0.001, "loss": 2.3373, "step": 210300 }, { "epoch": 27.20103425985779, "grad_norm": 2.094609022140503, "learning_rate": 0.001, "loss": 2.3579, "step": 210400 }, { "epoch": 27.213962508080154, "grad_norm": 1.2120685577392578, "learning_rate": 0.001, "loss": 2.3464, "step": 210500 }, { "epoch": 27.22689075630252, "grad_norm": 4.578570365905762, "learning_rate": 0.001, "loss": 2.3501, "step": 210600 }, { "epoch": 27.239819004524886, "grad_norm": 0.8924579620361328, "learning_rate": 0.001, "loss": 2.3916, "step": 210700 }, { "epoch": 27.252747252747252, "grad_norm": 1.895655632019043, "learning_rate": 0.001, "loss": 2.3682, "step": 210800 }, { "epoch": 27.265675500969618, "grad_norm": 1.4196749925613403, "learning_rate": 0.001, "loss": 2.3527, "step": 210900 }, { "epoch": 27.278603749191983, "grad_norm": 0.8711206316947937, "learning_rate": 0.001, "loss": 2.3661, "step": 211000 }, { "epoch": 27.29153199741435, "grad_norm": 1.140377402305603, "learning_rate": 0.001, "loss": 2.3885, "step": 211100 }, { "epoch": 27.304460245636715, "grad_norm": 1.064698338508606, "learning_rate": 0.001, "loss": 2.3704, "step": 211200 }, { "epoch": 27.31738849385908, "grad_norm": 1.1789652109146118, "learning_rate": 0.001, "loss": 2.3515, "step": 211300 }, { "epoch": 27.330316742081447, "grad_norm": 0.9897536635398865, "learning_rate": 0.001, "loss": 2.37, "step": 211400 }, { "epoch": 27.343244990303813, "grad_norm": 1.0931514501571655, "learning_rate": 0.001, "loss": 2.392, "step": 211500 }, { "epoch": 27.35617323852618, "grad_norm": 1.0339710712432861, "learning_rate": 0.001, "loss": 2.3785, "step": 211600 }, { "epoch": 27.369101486748544, "grad_norm": 1.020810604095459, "learning_rate": 0.001, "loss": 2.3783, "step": 211700 }, { "epoch": 27.38202973497091, "grad_norm": 1.233630657196045, "learning_rate": 0.001, "loss": 2.38, "step": 211800 }, { "epoch": 27.394957983193276, "grad_norm": 3.0957913398742676, "learning_rate": 0.001, "loss": 2.3654, "step": 211900 }, { "epoch": 27.40788623141564, "grad_norm": 0.9940770864486694, "learning_rate": 0.001, "loss": 2.3761, "step": 212000 }, { "epoch": 27.420814479638008, "grad_norm": 1.0038813352584839, "learning_rate": 0.001, "loss": 2.3799, "step": 212100 }, { "epoch": 27.433742727860373, "grad_norm": 1.3913151025772095, "learning_rate": 0.001, "loss": 2.3889, "step": 212200 }, { "epoch": 27.44667097608274, "grad_norm": 1.500860333442688, "learning_rate": 0.001, "loss": 2.3847, "step": 212300 }, { "epoch": 27.459599224305105, "grad_norm": 1.0788583755493164, "learning_rate": 0.001, "loss": 2.3617, "step": 212400 }, { "epoch": 27.47252747252747, "grad_norm": 0.9632897973060608, "learning_rate": 0.001, "loss": 2.3947, "step": 212500 }, { "epoch": 27.485455720749837, "grad_norm": 1.0949831008911133, "learning_rate": 0.001, "loss": 2.3786, "step": 212600 }, { "epoch": 27.498383968972206, "grad_norm": 0.9827914237976074, "learning_rate": 0.001, "loss": 2.3951, "step": 212700 }, { "epoch": 27.511312217194572, "grad_norm": 0.8863192200660706, "learning_rate": 0.001, "loss": 2.3691, "step": 212800 }, { "epoch": 27.524240465416938, "grad_norm": 1.1217416524887085, "learning_rate": 0.001, "loss": 2.3845, "step": 212900 }, { "epoch": 27.537168713639304, "grad_norm": 1.057210922241211, "learning_rate": 0.001, "loss": 2.3775, "step": 213000 }, { "epoch": 27.55009696186167, "grad_norm": 1.072123646736145, "learning_rate": 0.001, "loss": 2.3895, "step": 213100 }, { "epoch": 27.563025210084035, "grad_norm": 0.8300528526306152, "learning_rate": 0.001, "loss": 2.3845, "step": 213200 }, { "epoch": 27.5759534583064, "grad_norm": 1.413459300994873, "learning_rate": 0.001, "loss": 2.407, "step": 213300 }, { "epoch": 27.588881706528767, "grad_norm": 1.2766976356506348, "learning_rate": 0.001, "loss": 2.39, "step": 213400 }, { "epoch": 27.601809954751133, "grad_norm": 0.8579210638999939, "learning_rate": 0.001, "loss": 2.3904, "step": 213500 }, { "epoch": 27.6147382029735, "grad_norm": 0.9182913899421692, "learning_rate": 0.001, "loss": 2.3947, "step": 213600 }, { "epoch": 27.627666451195864, "grad_norm": 1.0236173868179321, "learning_rate": 0.001, "loss": 2.3716, "step": 213700 }, { "epoch": 27.64059469941823, "grad_norm": 1.4260467290878296, "learning_rate": 0.001, "loss": 2.3994, "step": 213800 }, { "epoch": 27.653522947640596, "grad_norm": 1.0911239385604858, "learning_rate": 0.001, "loss": 2.4021, "step": 213900 }, { "epoch": 27.66645119586296, "grad_norm": 1.0309743881225586, "learning_rate": 0.001, "loss": 2.4005, "step": 214000 }, { "epoch": 27.679379444085328, "grad_norm": 1.1510089635849, "learning_rate": 0.001, "loss": 2.3977, "step": 214100 }, { "epoch": 27.692307692307693, "grad_norm": 0.8815612196922302, "learning_rate": 0.001, "loss": 2.4062, "step": 214200 }, { "epoch": 27.70523594053006, "grad_norm": 0.8063983917236328, "learning_rate": 0.001, "loss": 2.3937, "step": 214300 }, { "epoch": 27.718164188752425, "grad_norm": 1.5184615850448608, "learning_rate": 0.001, "loss": 2.4099, "step": 214400 }, { "epoch": 27.73109243697479, "grad_norm": 1.113146185874939, "learning_rate": 0.001, "loss": 2.4017, "step": 214500 }, { "epoch": 27.744020685197157, "grad_norm": 0.9406982660293579, "learning_rate": 0.001, "loss": 2.3949, "step": 214600 }, { "epoch": 27.756948933419523, "grad_norm": 1.1849616765975952, "learning_rate": 0.001, "loss": 2.3947, "step": 214700 }, { "epoch": 27.76987718164189, "grad_norm": 1.002642273902893, "learning_rate": 0.001, "loss": 2.3963, "step": 214800 }, { "epoch": 27.782805429864254, "grad_norm": 1.1582485437393188, "learning_rate": 0.001, "loss": 2.406, "step": 214900 }, { "epoch": 27.79573367808662, "grad_norm": 1.0781618356704712, "learning_rate": 0.001, "loss": 2.3984, "step": 215000 }, { "epoch": 27.808661926308986, "grad_norm": 1.020018458366394, "learning_rate": 0.001, "loss": 2.4167, "step": 215100 }, { "epoch": 27.82159017453135, "grad_norm": 1.20122230052948, "learning_rate": 0.001, "loss": 2.3896, "step": 215200 }, { "epoch": 27.834518422753717, "grad_norm": 0.9196073412895203, "learning_rate": 0.001, "loss": 2.4071, "step": 215300 }, { "epoch": 27.847446670976083, "grad_norm": 0.9267793297767639, "learning_rate": 0.001, "loss": 2.4175, "step": 215400 }, { "epoch": 27.86037491919845, "grad_norm": 52.006256103515625, "learning_rate": 0.001, "loss": 2.4008, "step": 215500 }, { "epoch": 27.873303167420815, "grad_norm": 0.9853566288948059, "learning_rate": 0.001, "loss": 2.4079, "step": 215600 }, { "epoch": 27.88623141564318, "grad_norm": 0.9651537537574768, "learning_rate": 0.001, "loss": 2.413, "step": 215700 }, { "epoch": 27.899159663865547, "grad_norm": 0.9098378419876099, "learning_rate": 0.001, "loss": 2.4105, "step": 215800 }, { "epoch": 27.912087912087912, "grad_norm": 0.9500021934509277, "learning_rate": 0.001, "loss": 2.4495, "step": 215900 }, { "epoch": 27.92501616031028, "grad_norm": 2.056971311569214, "learning_rate": 0.001, "loss": 2.4259, "step": 216000 }, { "epoch": 27.937944408532644, "grad_norm": 0.9949756264686584, "learning_rate": 0.001, "loss": 2.4096, "step": 216100 }, { "epoch": 27.95087265675501, "grad_norm": 1.155133605003357, "learning_rate": 0.001, "loss": 2.4274, "step": 216200 }, { "epoch": 27.963800904977376, "grad_norm": 1.0423123836517334, "learning_rate": 0.001, "loss": 2.4209, "step": 216300 }, { "epoch": 27.97672915319974, "grad_norm": 1.0296010971069336, "learning_rate": 0.001, "loss": 2.4186, "step": 216400 }, { "epoch": 27.989657401422107, "grad_norm": 1.3274986743927002, "learning_rate": 0.001, "loss": 2.4235, "step": 216500 }, { "epoch": 28.002585649644473, "grad_norm": 0.7931661009788513, "learning_rate": 0.001, "loss": 2.4224, "step": 216600 }, { "epoch": 28.01551389786684, "grad_norm": 0.9291642904281616, "learning_rate": 0.001, "loss": 2.3304, "step": 216700 }, { "epoch": 28.028442146089205, "grad_norm": 0.9394917488098145, "learning_rate": 0.001, "loss": 2.3191, "step": 216800 }, { "epoch": 28.04137039431157, "grad_norm": 1.0620568990707397, "learning_rate": 0.001, "loss": 2.324, "step": 216900 }, { "epoch": 28.054298642533936, "grad_norm": 0.8730033040046692, "learning_rate": 0.001, "loss": 2.3175, "step": 217000 }, { "epoch": 28.067226890756302, "grad_norm": 1.1562663316726685, "learning_rate": 0.001, "loss": 2.3344, "step": 217100 }, { "epoch": 28.080155138978668, "grad_norm": 1.0068001747131348, "learning_rate": 0.001, "loss": 2.3307, "step": 217200 }, { "epoch": 28.093083387201034, "grad_norm": 1.1384886503219604, "learning_rate": 0.001, "loss": 2.3401, "step": 217300 }, { "epoch": 28.1060116354234, "grad_norm": 0.9024704098701477, "learning_rate": 0.001, "loss": 2.3275, "step": 217400 }, { "epoch": 28.118939883645766, "grad_norm": 0.8765543699264526, "learning_rate": 0.001, "loss": 2.3482, "step": 217500 }, { "epoch": 28.13186813186813, "grad_norm": 0.9913923740386963, "learning_rate": 0.001, "loss": 2.3452, "step": 217600 }, { "epoch": 28.144796380090497, "grad_norm": 0.97794109582901, "learning_rate": 0.001, "loss": 2.3437, "step": 217700 }, { "epoch": 28.157724628312863, "grad_norm": 2.349830389022827, "learning_rate": 0.001, "loss": 2.3344, "step": 217800 }, { "epoch": 28.17065287653523, "grad_norm": 0.8273319602012634, "learning_rate": 0.001, "loss": 2.3412, "step": 217900 }, { "epoch": 28.183581124757595, "grad_norm": 0.7746471166610718, "learning_rate": 0.001, "loss": 2.361, "step": 218000 }, { "epoch": 28.19650937297996, "grad_norm": 0.8934696316719055, "learning_rate": 0.001, "loss": 2.3448, "step": 218100 }, { "epoch": 28.209437621202326, "grad_norm": 1.8730906248092651, "learning_rate": 0.001, "loss": 2.3435, "step": 218200 }, { "epoch": 28.222365869424692, "grad_norm": 1.058454990386963, "learning_rate": 0.001, "loss": 2.3437, "step": 218300 }, { "epoch": 28.235294117647058, "grad_norm": 0.7532123923301697, "learning_rate": 0.001, "loss": 2.3433, "step": 218400 }, { "epoch": 28.248222365869424, "grad_norm": 0.8744400143623352, "learning_rate": 0.001, "loss": 2.3457, "step": 218500 }, { "epoch": 28.26115061409179, "grad_norm": 1.0515726804733276, "learning_rate": 0.001, "loss": 2.3285, "step": 218600 }, { "epoch": 28.274078862314155, "grad_norm": 0.7461304664611816, "learning_rate": 0.001, "loss": 2.3597, "step": 218700 }, { "epoch": 28.28700711053652, "grad_norm": 0.9371544122695923, "learning_rate": 0.001, "loss": 2.3554, "step": 218800 }, { "epoch": 28.299935358758887, "grad_norm": 0.9900177121162415, "learning_rate": 0.001, "loss": 2.3598, "step": 218900 }, { "epoch": 28.312863606981253, "grad_norm": 0.902205765247345, "learning_rate": 0.001, "loss": 2.3501, "step": 219000 }, { "epoch": 28.32579185520362, "grad_norm": 1.4901353120803833, "learning_rate": 0.001, "loss": 2.3314, "step": 219100 }, { "epoch": 28.338720103425985, "grad_norm": 0.9814837574958801, "learning_rate": 0.001, "loss": 2.3907, "step": 219200 }, { "epoch": 28.35164835164835, "grad_norm": 0.7469857335090637, "learning_rate": 0.001, "loss": 2.3569, "step": 219300 }, { "epoch": 28.364576599870716, "grad_norm": 0.7707846760749817, "learning_rate": 0.001, "loss": 2.3607, "step": 219400 }, { "epoch": 28.377504848093082, "grad_norm": 0.8184402585029602, "learning_rate": 0.001, "loss": 2.3388, "step": 219500 }, { "epoch": 28.390433096315448, "grad_norm": 0.8435554504394531, "learning_rate": 0.001, "loss": 2.3672, "step": 219600 }, { "epoch": 28.403361344537814, "grad_norm": 0.797379732131958, "learning_rate": 0.001, "loss": 2.3592, "step": 219700 }, { "epoch": 28.41628959276018, "grad_norm": 0.764184832572937, "learning_rate": 0.001, "loss": 2.3798, "step": 219800 }, { "epoch": 28.429217840982545, "grad_norm": 0.8124292492866516, "learning_rate": 0.001, "loss": 2.3732, "step": 219900 }, { "epoch": 28.44214608920491, "grad_norm": 0.8376145958900452, "learning_rate": 0.001, "loss": 2.3673, "step": 220000 }, { "epoch": 28.455074337427277, "grad_norm": 0.949983537197113, "learning_rate": 0.001, "loss": 2.3605, "step": 220100 }, { "epoch": 28.468002585649643, "grad_norm": 1.2091141939163208, "learning_rate": 0.001, "loss": 2.3919, "step": 220200 }, { "epoch": 28.48093083387201, "grad_norm": 1.0471614599227905, "learning_rate": 0.001, "loss": 2.3743, "step": 220300 }, { "epoch": 28.493859082094374, "grad_norm": 0.8241241574287415, "learning_rate": 0.001, "loss": 2.3689, "step": 220400 }, { "epoch": 28.50678733031674, "grad_norm": 0.8777588605880737, "learning_rate": 0.001, "loss": 2.3793, "step": 220500 }, { "epoch": 28.51971557853911, "grad_norm": 1.039381742477417, "learning_rate": 0.001, "loss": 2.377, "step": 220600 }, { "epoch": 28.532643826761475, "grad_norm": 4.072911262512207, "learning_rate": 0.001, "loss": 2.3696, "step": 220700 }, { "epoch": 28.54557207498384, "grad_norm": 1.0069321393966675, "learning_rate": 0.001, "loss": 2.3776, "step": 220800 }, { "epoch": 28.558500323206207, "grad_norm": 0.9498946666717529, "learning_rate": 0.001, "loss": 2.3559, "step": 220900 }, { "epoch": 28.571428571428573, "grad_norm": 0.8857526183128357, "learning_rate": 0.001, "loss": 2.3798, "step": 221000 }, { "epoch": 28.58435681965094, "grad_norm": 1.0534820556640625, "learning_rate": 0.001, "loss": 2.3974, "step": 221100 }, { "epoch": 28.597285067873305, "grad_norm": 1.0009658336639404, "learning_rate": 0.001, "loss": 2.4009, "step": 221200 }, { "epoch": 28.61021331609567, "grad_norm": 0.9179207682609558, "learning_rate": 0.001, "loss": 2.377, "step": 221300 }, { "epoch": 28.623141564318036, "grad_norm": 2.4361462593078613, "learning_rate": 0.001, "loss": 2.4044, "step": 221400 }, { "epoch": 28.636069812540402, "grad_norm": 0.8780477046966553, "learning_rate": 0.001, "loss": 2.3847, "step": 221500 }, { "epoch": 28.648998060762768, "grad_norm": 0.7274078130722046, "learning_rate": 0.001, "loss": 2.3658, "step": 221600 }, { "epoch": 28.661926308985134, "grad_norm": 1.298095941543579, "learning_rate": 0.001, "loss": 2.4026, "step": 221700 }, { "epoch": 28.6748545572075, "grad_norm": 0.9564741253852844, "learning_rate": 0.001, "loss": 2.3784, "step": 221800 }, { "epoch": 28.687782805429865, "grad_norm": 0.7539255619049072, "learning_rate": 0.001, "loss": 2.3763, "step": 221900 }, { "epoch": 28.70071105365223, "grad_norm": 0.7992677092552185, "learning_rate": 0.001, "loss": 2.4004, "step": 222000 }, { "epoch": 28.713639301874597, "grad_norm": 1.1170318126678467, "learning_rate": 0.001, "loss": 2.3809, "step": 222100 }, { "epoch": 28.726567550096963, "grad_norm": 0.7260889410972595, "learning_rate": 0.001, "loss": 2.3901, "step": 222200 }, { "epoch": 28.73949579831933, "grad_norm": 0.9283661246299744, "learning_rate": 0.001, "loss": 2.3797, "step": 222300 }, { "epoch": 28.752424046541694, "grad_norm": 0.9741328358650208, "learning_rate": 0.001, "loss": 2.3811, "step": 222400 }, { "epoch": 28.76535229476406, "grad_norm": 1.1482956409454346, "learning_rate": 0.001, "loss": 2.3682, "step": 222500 }, { "epoch": 28.778280542986426, "grad_norm": 0.8291159868240356, "learning_rate": 0.001, "loss": 2.3753, "step": 222600 }, { "epoch": 28.791208791208792, "grad_norm": 0.9762463569641113, "learning_rate": 0.001, "loss": 2.392, "step": 222700 }, { "epoch": 28.804137039431158, "grad_norm": 0.8923696875572205, "learning_rate": 0.001, "loss": 2.3898, "step": 222800 }, { "epoch": 28.817065287653524, "grad_norm": 0.9456131458282471, "learning_rate": 0.001, "loss": 2.3877, "step": 222900 }, { "epoch": 28.82999353587589, "grad_norm": 1.3924247026443481, "learning_rate": 0.001, "loss": 2.401, "step": 223000 }, { "epoch": 28.842921784098255, "grad_norm": 1.0859631299972534, "learning_rate": 0.001, "loss": 2.3878, "step": 223100 }, { "epoch": 28.85585003232062, "grad_norm": 1.1006052494049072, "learning_rate": 0.001, "loss": 2.3966, "step": 223200 }, { "epoch": 28.868778280542987, "grad_norm": 0.7348927855491638, "learning_rate": 0.001, "loss": 2.4062, "step": 223300 }, { "epoch": 28.881706528765353, "grad_norm": 0.7181686758995056, "learning_rate": 0.001, "loss": 2.3814, "step": 223400 }, { "epoch": 28.89463477698772, "grad_norm": 1.1310467720031738, "learning_rate": 0.001, "loss": 2.379, "step": 223500 }, { "epoch": 28.907563025210084, "grad_norm": 0.8947080373764038, "learning_rate": 0.001, "loss": 2.3908, "step": 223600 }, { "epoch": 28.92049127343245, "grad_norm": 0.8003687858581543, "learning_rate": 0.001, "loss": 2.3857, "step": 223700 }, { "epoch": 28.933419521654816, "grad_norm": 0.9747632145881653, "learning_rate": 0.001, "loss": 2.4014, "step": 223800 }, { "epoch": 28.946347769877182, "grad_norm": 0.9493471384048462, "learning_rate": 0.001, "loss": 2.4239, "step": 223900 }, { "epoch": 28.959276018099548, "grad_norm": 1.2437355518341064, "learning_rate": 0.001, "loss": 2.4159, "step": 224000 }, { "epoch": 28.972204266321913, "grad_norm": 0.8638398051261902, "learning_rate": 0.001, "loss": 2.4087, "step": 224100 }, { "epoch": 28.98513251454428, "grad_norm": 0.8151601552963257, "learning_rate": 0.001, "loss": 2.3993, "step": 224200 }, { "epoch": 28.998060762766645, "grad_norm": 1.0853047370910645, "learning_rate": 0.001, "loss": 2.4311, "step": 224300 }, { "epoch": 29.01098901098901, "grad_norm": 0.8810889720916748, "learning_rate": 0.001, "loss": 2.3472, "step": 224400 }, { "epoch": 29.023917259211377, "grad_norm": 0.871283769607544, "learning_rate": 0.001, "loss": 2.3034, "step": 224500 }, { "epoch": 29.036845507433743, "grad_norm": 0.8153939843177795, "learning_rate": 0.001, "loss": 2.3053, "step": 224600 }, { "epoch": 29.04977375565611, "grad_norm": 0.8424943089485168, "learning_rate": 0.001, "loss": 2.3226, "step": 224700 }, { "epoch": 29.062702003878474, "grad_norm": 1.0679141283035278, "learning_rate": 0.001, "loss": 2.3138, "step": 224800 }, { "epoch": 29.07563025210084, "grad_norm": 1.0457277297973633, "learning_rate": 0.001, "loss": 2.3163, "step": 224900 }, { "epoch": 29.088558500323206, "grad_norm": 0.8904995322227478, "learning_rate": 0.001, "loss": 2.3125, "step": 225000 }, { "epoch": 29.10148674854557, "grad_norm": 0.8399600386619568, "learning_rate": 0.001, "loss": 2.3429, "step": 225100 }, { "epoch": 29.114414996767938, "grad_norm": 0.9358722567558289, "learning_rate": 0.001, "loss": 2.3436, "step": 225200 }, { "epoch": 29.127343244990303, "grad_norm": 1.9865525960922241, "learning_rate": 0.001, "loss": 2.335, "step": 225300 }, { "epoch": 29.14027149321267, "grad_norm": 0.8082976341247559, "learning_rate": 0.001, "loss": 2.3226, "step": 225400 }, { "epoch": 29.153199741435035, "grad_norm": 1.0878657102584839, "learning_rate": 0.001, "loss": 2.3382, "step": 225500 }, { "epoch": 29.1661279896574, "grad_norm": 0.8844921588897705, "learning_rate": 0.001, "loss": 2.3204, "step": 225600 }, { "epoch": 29.179056237879767, "grad_norm": 0.9656618237495422, "learning_rate": 0.001, "loss": 2.3133, "step": 225700 }, { "epoch": 29.191984486102132, "grad_norm": 3.8670036792755127, "learning_rate": 0.001, "loss": 2.3352, "step": 225800 }, { "epoch": 29.2049127343245, "grad_norm": 1.7561933994293213, "learning_rate": 0.001, "loss": 2.3449, "step": 225900 }, { "epoch": 29.217840982546864, "grad_norm": 1.1347781419754028, "learning_rate": 0.001, "loss": 2.3421, "step": 226000 }, { "epoch": 29.23076923076923, "grad_norm": 1.021430492401123, "learning_rate": 0.001, "loss": 2.3584, "step": 226100 }, { "epoch": 29.243697478991596, "grad_norm": 0.9981648325920105, "learning_rate": 0.001, "loss": 2.3255, "step": 226200 }, { "epoch": 29.25662572721396, "grad_norm": 0.8690951466560364, "learning_rate": 0.001, "loss": 2.3518, "step": 226300 }, { "epoch": 29.269553975436327, "grad_norm": 0.7785423398017883, "learning_rate": 0.001, "loss": 2.3437, "step": 226400 }, { "epoch": 29.282482223658693, "grad_norm": 1.7251250743865967, "learning_rate": 0.001, "loss": 2.3433, "step": 226500 }, { "epoch": 29.29541047188106, "grad_norm": 0.9703306555747986, "learning_rate": 0.001, "loss": 2.3316, "step": 226600 }, { "epoch": 29.308338720103425, "grad_norm": 1.7674615383148193, "learning_rate": 0.001, "loss": 2.3454, "step": 226700 }, { "epoch": 29.32126696832579, "grad_norm": 3.5028765201568604, "learning_rate": 0.001, "loss": 2.3468, "step": 226800 }, { "epoch": 29.334195216548157, "grad_norm": 1.4285778999328613, "learning_rate": 0.001, "loss": 2.3553, "step": 226900 }, { "epoch": 29.347123464770522, "grad_norm": 0.8427510261535645, "learning_rate": 0.001, "loss": 2.3434, "step": 227000 }, { "epoch": 29.360051712992888, "grad_norm": 6.121968746185303, "learning_rate": 0.001, "loss": 2.3591, "step": 227100 }, { "epoch": 29.372979961215254, "grad_norm": 0.7858746647834778, "learning_rate": 0.001, "loss": 2.3557, "step": 227200 }, { "epoch": 29.38590820943762, "grad_norm": 1.0994629859924316, "learning_rate": 0.001, "loss": 2.3776, "step": 227300 }, { "epoch": 29.398836457659986, "grad_norm": 1.0491267442703247, "learning_rate": 0.001, "loss": 2.3615, "step": 227400 }, { "epoch": 29.41176470588235, "grad_norm": 0.8538762927055359, "learning_rate": 0.001, "loss": 2.3622, "step": 227500 }, { "epoch": 29.424692954104717, "grad_norm": 0.9318026304244995, "learning_rate": 0.001, "loss": 2.3591, "step": 227600 }, { "epoch": 29.437621202327083, "grad_norm": 0.9265081882476807, "learning_rate": 0.001, "loss": 2.3608, "step": 227700 }, { "epoch": 29.45054945054945, "grad_norm": 0.8378821015357971, "learning_rate": 0.001, "loss": 2.3447, "step": 227800 }, { "epoch": 29.463477698771815, "grad_norm": 0.9568947553634644, "learning_rate": 0.001, "loss": 2.3507, "step": 227900 }, { "epoch": 29.47640594699418, "grad_norm": 2.4050710201263428, "learning_rate": 0.001, "loss": 2.3633, "step": 228000 }, { "epoch": 29.489334195216546, "grad_norm": 2.1325643062591553, "learning_rate": 0.001, "loss": 2.3836, "step": 228100 }, { "epoch": 29.502262443438916, "grad_norm": 0.9107269048690796, "learning_rate": 0.001, "loss": 2.3768, "step": 228200 }, { "epoch": 29.51519069166128, "grad_norm": 0.8083791732788086, "learning_rate": 0.001, "loss": 2.3666, "step": 228300 }, { "epoch": 29.528118939883647, "grad_norm": 0.9020543694496155, "learning_rate": 0.001, "loss": 2.3639, "step": 228400 }, { "epoch": 29.541047188106013, "grad_norm": 0.7890804409980774, "learning_rate": 0.001, "loss": 2.3461, "step": 228500 }, { "epoch": 29.55397543632838, "grad_norm": 0.8020476698875427, "learning_rate": 0.001, "loss": 2.3559, "step": 228600 }, { "epoch": 29.566903684550745, "grad_norm": 0.7387930750846863, "learning_rate": 0.001, "loss": 2.3645, "step": 228700 }, { "epoch": 29.57983193277311, "grad_norm": 0.7234213948249817, "learning_rate": 0.001, "loss": 2.3664, "step": 228800 }, { "epoch": 29.592760180995477, "grad_norm": 1.0490007400512695, "learning_rate": 0.001, "loss": 2.3895, "step": 228900 }, { "epoch": 29.605688429217842, "grad_norm": 1.089826226234436, "learning_rate": 0.001, "loss": 2.3718, "step": 229000 }, { "epoch": 29.618616677440208, "grad_norm": 0.9106038212776184, "learning_rate": 0.001, "loss": 2.3649, "step": 229100 }, { "epoch": 29.631544925662574, "grad_norm": 0.9532368183135986, "learning_rate": 0.001, "loss": 2.3753, "step": 229200 }, { "epoch": 29.64447317388494, "grad_norm": 0.888085126876831, "learning_rate": 0.001, "loss": 2.3875, "step": 229300 }, { "epoch": 29.657401422107306, "grad_norm": 2.1045191287994385, "learning_rate": 0.001, "loss": 2.3686, "step": 229400 }, { "epoch": 29.67032967032967, "grad_norm": 0.7713128924369812, "learning_rate": 0.001, "loss": 2.3677, "step": 229500 }, { "epoch": 29.683257918552037, "grad_norm": 0.9448463320732117, "learning_rate": 0.001, "loss": 2.3783, "step": 229600 }, { "epoch": 29.696186166774403, "grad_norm": 1.2952263355255127, "learning_rate": 0.001, "loss": 2.3869, "step": 229700 }, { "epoch": 29.70911441499677, "grad_norm": 12.062212944030762, "learning_rate": 0.001, "loss": 2.3804, "step": 229800 }, { "epoch": 29.722042663219135, "grad_norm": 0.7993664145469666, "learning_rate": 0.001, "loss": 2.3846, "step": 229900 }, { "epoch": 29.7349709114415, "grad_norm": 0.963200569152832, "learning_rate": 0.001, "loss": 2.3873, "step": 230000 }, { "epoch": 29.747899159663866, "grad_norm": 0.792620062828064, "learning_rate": 0.001, "loss": 2.3818, "step": 230100 }, { "epoch": 29.760827407886232, "grad_norm": 0.9300122261047363, "learning_rate": 0.001, "loss": 2.3894, "step": 230200 }, { "epoch": 29.773755656108598, "grad_norm": 0.982225775718689, "learning_rate": 0.001, "loss": 2.3755, "step": 230300 }, { "epoch": 29.786683904330964, "grad_norm": 1.2075508832931519, "learning_rate": 0.001, "loss": 2.3797, "step": 230400 }, { "epoch": 29.79961215255333, "grad_norm": 0.8875972032546997, "learning_rate": 0.001, "loss": 2.3664, "step": 230500 }, { "epoch": 29.812540400775696, "grad_norm": 1.094596266746521, "learning_rate": 0.001, "loss": 2.384, "step": 230600 }, { "epoch": 29.82546864899806, "grad_norm": 0.8508328199386597, "learning_rate": 0.001, "loss": 2.3642, "step": 230700 }, { "epoch": 29.838396897220427, "grad_norm": 0.7953973412513733, "learning_rate": 0.001, "loss": 2.3673, "step": 230800 }, { "epoch": 29.851325145442793, "grad_norm": 0.9112909436225891, "learning_rate": 0.001, "loss": 2.3651, "step": 230900 }, { "epoch": 29.86425339366516, "grad_norm": 0.9462515711784363, "learning_rate": 0.001, "loss": 2.3848, "step": 231000 }, { "epoch": 29.877181641887525, "grad_norm": 0.9775808453559875, "learning_rate": 0.001, "loss": 2.3714, "step": 231100 }, { "epoch": 29.89010989010989, "grad_norm": 13.665424346923828, "learning_rate": 0.001, "loss": 2.4003, "step": 231200 }, { "epoch": 29.903038138332256, "grad_norm": 0.9433664083480835, "learning_rate": 0.001, "loss": 2.3864, "step": 231300 }, { "epoch": 29.915966386554622, "grad_norm": 1.03389310836792, "learning_rate": 0.001, "loss": 2.3746, "step": 231400 }, { "epoch": 29.928894634776988, "grad_norm": 1.0311260223388672, "learning_rate": 0.001, "loss": 2.3858, "step": 231500 }, { "epoch": 29.941822882999354, "grad_norm": 0.9557684063911438, "learning_rate": 0.001, "loss": 2.3966, "step": 231600 }, { "epoch": 29.95475113122172, "grad_norm": 3.901906728744507, "learning_rate": 0.001, "loss": 2.3767, "step": 231700 }, { "epoch": 29.967679379444085, "grad_norm": 0.7978503108024597, "learning_rate": 0.001, "loss": 2.4083, "step": 231800 }, { "epoch": 29.98060762766645, "grad_norm": 1.0295127630233765, "learning_rate": 0.001, "loss": 2.3902, "step": 231900 }, { "epoch": 29.993535875888817, "grad_norm": 1.122122049331665, "learning_rate": 0.001, "loss": 2.3834, "step": 232000 }, { "epoch": 30.006464124111183, "grad_norm": 1.579999327659607, "learning_rate": 0.001, "loss": 2.3284, "step": 232100 }, { "epoch": 30.01939237233355, "grad_norm": 1.5691392421722412, "learning_rate": 0.001, "loss": 2.3191, "step": 232200 }, { "epoch": 30.032320620555915, "grad_norm": 1.7749329805374146, "learning_rate": 0.001, "loss": 2.2924, "step": 232300 }, { "epoch": 30.04524886877828, "grad_norm": 1.4876872301101685, "learning_rate": 0.001, "loss": 2.3181, "step": 232400 }, { "epoch": 30.058177117000646, "grad_norm": 1.4847691059112549, "learning_rate": 0.001, "loss": 2.3026, "step": 232500 }, { "epoch": 30.071105365223012, "grad_norm": 1.9471344947814941, "learning_rate": 0.001, "loss": 2.2925, "step": 232600 }, { "epoch": 30.084033613445378, "grad_norm": 1.9598275423049927, "learning_rate": 0.001, "loss": 2.3042, "step": 232700 }, { "epoch": 30.096961861667744, "grad_norm": 1.2648465633392334, "learning_rate": 0.001, "loss": 2.2979, "step": 232800 }, { "epoch": 30.10989010989011, "grad_norm": 1.891363263130188, "learning_rate": 0.001, "loss": 2.3244, "step": 232900 }, { "epoch": 30.122818358112475, "grad_norm": 1.4893090724945068, "learning_rate": 0.001, "loss": 2.3166, "step": 233000 }, { "epoch": 30.13574660633484, "grad_norm": 1.6694296598434448, "learning_rate": 0.001, "loss": 2.321, "step": 233100 }, { "epoch": 30.148674854557207, "grad_norm": 1.2942357063293457, "learning_rate": 0.001, "loss": 2.3181, "step": 233200 }, { "epoch": 30.161603102779573, "grad_norm": 3.158985137939453, "learning_rate": 0.001, "loss": 2.3337, "step": 233300 }, { "epoch": 30.17453135100194, "grad_norm": 1.4943639039993286, "learning_rate": 0.001, "loss": 2.3254, "step": 233400 }, { "epoch": 30.187459599224304, "grad_norm": 1.6419651508331299, "learning_rate": 0.001, "loss": 2.3311, "step": 233500 }, { "epoch": 30.20038784744667, "grad_norm": 1.6251482963562012, "learning_rate": 0.001, "loss": 2.3301, "step": 233600 }, { "epoch": 30.213316095669036, "grad_norm": 1.7523834705352783, "learning_rate": 0.001, "loss": 2.3501, "step": 233700 }, { "epoch": 30.226244343891402, "grad_norm": 1.840513825416565, "learning_rate": 0.001, "loss": 2.3168, "step": 233800 }, { "epoch": 30.239172592113768, "grad_norm": 1.9657496213912964, "learning_rate": 0.001, "loss": 2.3463, "step": 233900 }, { "epoch": 30.252100840336134, "grad_norm": 1.504516839981079, "learning_rate": 0.001, "loss": 2.3332, "step": 234000 }, { "epoch": 30.2650290885585, "grad_norm": 23.63057518005371, "learning_rate": 0.001, "loss": 2.3435, "step": 234100 }, { "epoch": 30.277957336780865, "grad_norm": 1.9385404586791992, "learning_rate": 0.001, "loss": 2.3373, "step": 234200 }, { "epoch": 30.29088558500323, "grad_norm": 1.637305498123169, "learning_rate": 0.001, "loss": 2.339, "step": 234300 }, { "epoch": 30.303813833225597, "grad_norm": 1.5857988595962524, "learning_rate": 0.001, "loss": 2.3434, "step": 234400 }, { "epoch": 30.316742081447963, "grad_norm": 5.131324768066406, "learning_rate": 0.001, "loss": 2.3468, "step": 234500 }, { "epoch": 30.32967032967033, "grad_norm": 1.6298445463180542, "learning_rate": 0.001, "loss": 2.3318, "step": 234600 }, { "epoch": 30.342598577892694, "grad_norm": 2.197200298309326, "learning_rate": 0.001, "loss": 2.359, "step": 234700 }, { "epoch": 30.35552682611506, "grad_norm": 1.4626154899597168, "learning_rate": 0.001, "loss": 2.3502, "step": 234800 }, { "epoch": 30.368455074337426, "grad_norm": 1.4849152565002441, "learning_rate": 0.001, "loss": 2.3373, "step": 234900 }, { "epoch": 30.381383322559792, "grad_norm": 1.5547409057617188, "learning_rate": 0.001, "loss": 2.3503, "step": 235000 }, { "epoch": 30.394311570782158, "grad_norm": 1.7165988683700562, "learning_rate": 0.001, "loss": 2.3489, "step": 235100 }, { "epoch": 30.407239819004523, "grad_norm": 1.6909223794937134, "learning_rate": 0.001, "loss": 2.3276, "step": 235200 }, { "epoch": 30.42016806722689, "grad_norm": 10.23599910736084, "learning_rate": 0.001, "loss": 2.3254, "step": 235300 }, { "epoch": 30.433096315449255, "grad_norm": 18.060985565185547, "learning_rate": 0.001, "loss": 2.3549, "step": 235400 }, { "epoch": 30.44602456367162, "grad_norm": 1.6555835008621216, "learning_rate": 0.001, "loss": 2.3444, "step": 235500 }, { "epoch": 30.458952811893987, "grad_norm": 3.1825037002563477, "learning_rate": 0.001, "loss": 2.3582, "step": 235600 }, { "epoch": 30.471881060116353, "grad_norm": 1.369403600692749, "learning_rate": 0.001, "loss": 2.3487, "step": 235700 }, { "epoch": 30.48480930833872, "grad_norm": 1.7295209169387817, "learning_rate": 0.001, "loss": 2.3348, "step": 235800 }, { "epoch": 30.497737556561084, "grad_norm": 2.10884690284729, "learning_rate": 0.001, "loss": 2.3457, "step": 235900 }, { "epoch": 30.51066580478345, "grad_norm": 1.2632339000701904, "learning_rate": 0.001, "loss": 2.3611, "step": 236000 }, { "epoch": 30.52359405300582, "grad_norm": 1.6858065128326416, "learning_rate": 0.001, "loss": 2.3427, "step": 236100 }, { "epoch": 30.536522301228185, "grad_norm": 2.111140489578247, "learning_rate": 0.001, "loss": 2.3434, "step": 236200 }, { "epoch": 30.54945054945055, "grad_norm": 1.6283049583435059, "learning_rate": 0.001, "loss": 2.36, "step": 236300 }, { "epoch": 30.562378797672917, "grad_norm": 1.5709648132324219, "learning_rate": 0.001, "loss": 2.3574, "step": 236400 }, { "epoch": 30.575307045895283, "grad_norm": 2.0060012340545654, "learning_rate": 0.001, "loss": 2.3662, "step": 236500 }, { "epoch": 30.58823529411765, "grad_norm": 1.414734959602356, "learning_rate": 0.001, "loss": 2.3592, "step": 236600 }, { "epoch": 30.601163542340014, "grad_norm": 1.6526533365249634, "learning_rate": 0.001, "loss": 2.3673, "step": 236700 }, { "epoch": 30.61409179056238, "grad_norm": 1.5901535749435425, "learning_rate": 0.001, "loss": 2.3467, "step": 236800 }, { "epoch": 30.627020038784746, "grad_norm": 1.4068105220794678, "learning_rate": 0.001, "loss": 2.3735, "step": 236900 }, { "epoch": 30.639948287007112, "grad_norm": 1.3515253067016602, "learning_rate": 0.001, "loss": 2.3664, "step": 237000 }, { "epoch": 30.652876535229478, "grad_norm": 2.778916597366333, "learning_rate": 0.001, "loss": 2.3794, "step": 237100 }, { "epoch": 30.665804783451843, "grad_norm": 1.4455811977386475, "learning_rate": 0.001, "loss": 2.3656, "step": 237200 }, { "epoch": 30.67873303167421, "grad_norm": 1.7094477415084839, "learning_rate": 0.001, "loss": 2.361, "step": 237300 }, { "epoch": 30.691661279896575, "grad_norm": 1.7003173828125, "learning_rate": 0.001, "loss": 2.3546, "step": 237400 }, { "epoch": 30.70458952811894, "grad_norm": 1.6416064500808716, "learning_rate": 0.001, "loss": 2.3732, "step": 237500 }, { "epoch": 30.717517776341307, "grad_norm": 1.5658409595489502, "learning_rate": 0.001, "loss": 2.39, "step": 237600 }, { "epoch": 30.730446024563673, "grad_norm": 4.84766149520874, "learning_rate": 0.001, "loss": 2.3712, "step": 237700 }, { "epoch": 30.74337427278604, "grad_norm": 1.5516718626022339, "learning_rate": 0.001, "loss": 2.3514, "step": 237800 }, { "epoch": 30.756302521008404, "grad_norm": 2.021029233932495, "learning_rate": 0.001, "loss": 2.3652, "step": 237900 }, { "epoch": 30.76923076923077, "grad_norm": 2.137267589569092, "learning_rate": 0.001, "loss": 2.3741, "step": 238000 }, { "epoch": 30.782159017453136, "grad_norm": 3.2006423473358154, "learning_rate": 0.001, "loss": 2.37, "step": 238100 }, { "epoch": 30.7950872656755, "grad_norm": 1.6301740407943726, "learning_rate": 0.001, "loss": 2.3818, "step": 238200 }, { "epoch": 30.808015513897868, "grad_norm": 1.9147586822509766, "learning_rate": 0.001, "loss": 2.3885, "step": 238300 }, { "epoch": 30.820943762120233, "grad_norm": 1.8247063159942627, "learning_rate": 0.001, "loss": 2.3818, "step": 238400 }, { "epoch": 30.8338720103426, "grad_norm": 4.1297101974487305, "learning_rate": 0.001, "loss": 2.3636, "step": 238500 }, { "epoch": 30.846800258564965, "grad_norm": 1.6884938478469849, "learning_rate": 0.001, "loss": 2.3796, "step": 238600 }, { "epoch": 30.85972850678733, "grad_norm": 1.6775590181350708, "learning_rate": 0.001, "loss": 2.3832, "step": 238700 }, { "epoch": 30.872656755009697, "grad_norm": 1.4997119903564453, "learning_rate": 0.001, "loss": 2.3694, "step": 238800 }, { "epoch": 30.885585003232062, "grad_norm": 1.3220926523208618, "learning_rate": 0.001, "loss": 2.3644, "step": 238900 }, { "epoch": 30.89851325145443, "grad_norm": 1.370070457458496, "learning_rate": 0.001, "loss": 2.374, "step": 239000 }, { "epoch": 30.911441499676794, "grad_norm": 1.423473596572876, "learning_rate": 0.001, "loss": 2.3734, "step": 239100 }, { "epoch": 30.92436974789916, "grad_norm": 1.959728717803955, "learning_rate": 0.001, "loss": 2.3895, "step": 239200 }, { "epoch": 30.937297996121526, "grad_norm": 1.7016552686691284, "learning_rate": 0.001, "loss": 2.3988, "step": 239300 }, { "epoch": 30.95022624434389, "grad_norm": 1.8438737392425537, "learning_rate": 0.001, "loss": 2.3813, "step": 239400 }, { "epoch": 30.963154492566257, "grad_norm": 1.5476466417312622, "learning_rate": 0.001, "loss": 2.3914, "step": 239500 }, { "epoch": 30.976082740788623, "grad_norm": 1.6090165376663208, "learning_rate": 0.001, "loss": 2.3801, "step": 239600 }, { "epoch": 30.98901098901099, "grad_norm": 4.936541557312012, "learning_rate": 0.001, "loss": 2.3727, "step": 239700 }, { "epoch": 31.001939237233355, "grad_norm": 0.8120733499526978, "learning_rate": 0.001, "loss": 2.4056, "step": 239800 }, { "epoch": 31.01486748545572, "grad_norm": 1.0298163890838623, "learning_rate": 0.001, "loss": 2.3033, "step": 239900 }, { "epoch": 31.027795733678087, "grad_norm": 0.9400488138198853, "learning_rate": 0.001, "loss": 2.2832, "step": 240000 }, { "epoch": 31.040723981900452, "grad_norm": 1.0681086778640747, "learning_rate": 0.001, "loss": 2.3, "step": 240100 }, { "epoch": 31.053652230122818, "grad_norm": 0.9152793884277344, "learning_rate": 0.001, "loss": 2.3107, "step": 240200 }, { "epoch": 31.066580478345184, "grad_norm": 0.7954617738723755, "learning_rate": 0.001, "loss": 2.3079, "step": 240300 }, { "epoch": 31.07950872656755, "grad_norm": 0.992202639579773, "learning_rate": 0.001, "loss": 2.2893, "step": 240400 }, { "epoch": 31.092436974789916, "grad_norm": 5.509512424468994, "learning_rate": 0.001, "loss": 2.302, "step": 240500 }, { "epoch": 31.10536522301228, "grad_norm": 0.8047901391983032, "learning_rate": 0.001, "loss": 2.317, "step": 240600 }, { "epoch": 31.118293471234647, "grad_norm": 0.8896815776824951, "learning_rate": 0.001, "loss": 2.2969, "step": 240700 }, { "epoch": 31.131221719457013, "grad_norm": 1.5383204221725464, "learning_rate": 0.001, "loss": 2.3216, "step": 240800 }, { "epoch": 31.14414996767938, "grad_norm": 0.8610559701919556, "learning_rate": 0.001, "loss": 2.3172, "step": 240900 }, { "epoch": 31.157078215901745, "grad_norm": 1.2164890766143799, "learning_rate": 0.001, "loss": 2.2954, "step": 241000 }, { "epoch": 31.17000646412411, "grad_norm": 0.9731534719467163, "learning_rate": 0.001, "loss": 2.3333, "step": 241100 }, { "epoch": 31.182934712346476, "grad_norm": 0.9690970182418823, "learning_rate": 0.001, "loss": 2.3287, "step": 241200 }, { "epoch": 31.195862960568842, "grad_norm": 0.9240332841873169, "learning_rate": 0.001, "loss": 2.3163, "step": 241300 }, { "epoch": 31.208791208791208, "grad_norm": 0.828520655632019, "learning_rate": 0.001, "loss": 2.3062, "step": 241400 }, { "epoch": 31.221719457013574, "grad_norm": 1.211458683013916, "learning_rate": 0.001, "loss": 2.3346, "step": 241500 }, { "epoch": 31.23464770523594, "grad_norm": 1.1847816705703735, "learning_rate": 0.001, "loss": 2.3304, "step": 241600 }, { "epoch": 31.247575953458306, "grad_norm": 1.5685145854949951, "learning_rate": 0.001, "loss": 2.3594, "step": 241700 }, { "epoch": 31.26050420168067, "grad_norm": 1.4992828369140625, "learning_rate": 0.001, "loss": 2.3242, "step": 241800 }, { "epoch": 31.273432449903037, "grad_norm": 1.2240442037582397, "learning_rate": 0.001, "loss": 2.3331, "step": 241900 }, { "epoch": 31.286360698125403, "grad_norm": 1.916422724723816, "learning_rate": 0.001, "loss": 2.3376, "step": 242000 }, { "epoch": 31.29928894634777, "grad_norm": 0.7916958928108215, "learning_rate": 0.001, "loss": 2.3192, "step": 242100 }, { "epoch": 31.312217194570135, "grad_norm": 0.8147183060646057, "learning_rate": 0.001, "loss": 2.3297, "step": 242200 }, { "epoch": 31.3251454427925, "grad_norm": 0.8414350152015686, "learning_rate": 0.001, "loss": 2.3381, "step": 242300 }, { "epoch": 31.338073691014866, "grad_norm": 0.7968239784240723, "learning_rate": 0.001, "loss": 2.322, "step": 242400 }, { "epoch": 31.351001939237232, "grad_norm": 0.9013005495071411, "learning_rate": 0.001, "loss": 2.3198, "step": 242500 }, { "epoch": 31.363930187459598, "grad_norm": 0.9503751993179321, "learning_rate": 0.001, "loss": 2.3435, "step": 242600 }, { "epoch": 31.376858435681964, "grad_norm": 8.167922973632812, "learning_rate": 0.001, "loss": 2.3313, "step": 242700 }, { "epoch": 31.38978668390433, "grad_norm": 0.9387877583503723, "learning_rate": 0.001, "loss": 2.3314, "step": 242800 }, { "epoch": 31.402714932126695, "grad_norm": 0.8631706237792969, "learning_rate": 0.001, "loss": 2.3536, "step": 242900 }, { "epoch": 31.41564318034906, "grad_norm": 1.1135529279708862, "learning_rate": 0.001, "loss": 2.3418, "step": 243000 }, { "epoch": 31.428571428571427, "grad_norm": 1.0767805576324463, "learning_rate": 0.001, "loss": 2.3573, "step": 243100 }, { "epoch": 31.441499676793793, "grad_norm": 0.9951920509338379, "learning_rate": 0.001, "loss": 2.3443, "step": 243200 }, { "epoch": 31.45442792501616, "grad_norm": 0.9296994805335999, "learning_rate": 0.001, "loss": 2.3439, "step": 243300 }, { "epoch": 31.467356173238525, "grad_norm": 0.7984361052513123, "learning_rate": 0.001, "loss": 2.3689, "step": 243400 }, { "epoch": 31.48028442146089, "grad_norm": 1.464819312095642, "learning_rate": 0.001, "loss": 2.3386, "step": 243500 }, { "epoch": 31.49321266968326, "grad_norm": 1.2210297584533691, "learning_rate": 0.001, "loss": 2.3603, "step": 243600 }, { "epoch": 31.506140917905626, "grad_norm": 0.8336296081542969, "learning_rate": 0.001, "loss": 2.3338, "step": 243700 }, { "epoch": 31.51906916612799, "grad_norm": 0.8224362730979919, "learning_rate": 0.001, "loss": 2.366, "step": 243800 }, { "epoch": 31.531997414350357, "grad_norm": 0.9839072227478027, "learning_rate": 0.001, "loss": 2.3449, "step": 243900 }, { "epoch": 31.544925662572723, "grad_norm": 3.3660194873809814, "learning_rate": 0.001, "loss": 2.3513, "step": 244000 }, { "epoch": 31.55785391079509, "grad_norm": 1.131670594215393, "learning_rate": 0.001, "loss": 2.3637, "step": 244100 }, { "epoch": 31.570782159017455, "grad_norm": 0.9902528524398804, "learning_rate": 0.001, "loss": 2.3384, "step": 244200 }, { "epoch": 31.58371040723982, "grad_norm": 0.9839742183685303, "learning_rate": 0.001, "loss": 2.3418, "step": 244300 }, { "epoch": 31.596638655462186, "grad_norm": 0.8501009345054626, "learning_rate": 0.001, "loss": 2.3545, "step": 244400 }, { "epoch": 31.609566903684552, "grad_norm": 1.0148403644561768, "learning_rate": 0.001, "loss": 2.3684, "step": 244500 }, { "epoch": 31.622495151906918, "grad_norm": 1.031610369682312, "learning_rate": 0.001, "loss": 2.3228, "step": 244600 }, { "epoch": 31.635423400129284, "grad_norm": 0.8355256915092468, "learning_rate": 0.001, "loss": 2.343, "step": 244700 }, { "epoch": 31.64835164835165, "grad_norm": 0.9355105757713318, "learning_rate": 0.001, "loss": 2.3564, "step": 244800 }, { "epoch": 31.661279896574015, "grad_norm": 0.8918055295944214, "learning_rate": 0.001, "loss": 2.3422, "step": 244900 }, { "epoch": 31.67420814479638, "grad_norm": 0.7968000769615173, "learning_rate": 0.001, "loss": 2.3488, "step": 245000 }, { "epoch": 31.687136393018747, "grad_norm": 0.869384229183197, "learning_rate": 0.001, "loss": 2.3798, "step": 245100 }, { "epoch": 31.700064641241113, "grad_norm": 0.8526738286018372, "learning_rate": 0.001, "loss": 2.3548, "step": 245200 }, { "epoch": 31.71299288946348, "grad_norm": 0.8758681416511536, "learning_rate": 0.001, "loss": 2.3561, "step": 245300 }, { "epoch": 31.725921137685845, "grad_norm": 0.8866673707962036, "learning_rate": 0.001, "loss": 2.3704, "step": 245400 }, { "epoch": 31.73884938590821, "grad_norm": 0.7486825585365295, "learning_rate": 0.001, "loss": 2.3329, "step": 245500 }, { "epoch": 31.751777634130576, "grad_norm": 0.6942880749702454, "learning_rate": 0.001, "loss": 2.371, "step": 245600 }, { "epoch": 31.764705882352942, "grad_norm": 1.0576125383377075, "learning_rate": 0.001, "loss": 2.3621, "step": 245700 }, { "epoch": 31.777634130575308, "grad_norm": 0.834611713886261, "learning_rate": 0.001, "loss": 2.3808, "step": 245800 }, { "epoch": 31.790562378797674, "grad_norm": 1.0040979385375977, "learning_rate": 0.001, "loss": 2.3699, "step": 245900 }, { "epoch": 31.80349062702004, "grad_norm": 0.9656838774681091, "learning_rate": 0.001, "loss": 2.3744, "step": 246000 }, { "epoch": 31.816418875242405, "grad_norm": 0.8995634913444519, "learning_rate": 0.001, "loss": 2.388, "step": 246100 }, { "epoch": 31.82934712346477, "grad_norm": 1.533859372138977, "learning_rate": 0.001, "loss": 2.364, "step": 246200 }, { "epoch": 31.842275371687137, "grad_norm": 1.1541317701339722, "learning_rate": 0.001, "loss": 2.3749, "step": 246300 }, { "epoch": 31.855203619909503, "grad_norm": 0.8839662671089172, "learning_rate": 0.001, "loss": 2.3567, "step": 246400 }, { "epoch": 31.86813186813187, "grad_norm": 0.7614924311637878, "learning_rate": 0.001, "loss": 2.3752, "step": 246500 }, { "epoch": 31.881060116354234, "grad_norm": 0.9797805547714233, "learning_rate": 0.001, "loss": 2.3494, "step": 246600 }, { "epoch": 31.8939883645766, "grad_norm": 0.860873818397522, "learning_rate": 0.001, "loss": 2.3735, "step": 246700 }, { "epoch": 31.906916612798966, "grad_norm": 4.039159297943115, "learning_rate": 0.001, "loss": 2.3718, "step": 246800 }, { "epoch": 31.919844861021332, "grad_norm": 0.9982579946517944, "learning_rate": 0.001, "loss": 2.3805, "step": 246900 }, { "epoch": 31.932773109243698, "grad_norm": 0.9422257542610168, "learning_rate": 0.001, "loss": 2.3812, "step": 247000 }, { "epoch": 31.945701357466064, "grad_norm": 0.9128100275993347, "learning_rate": 0.001, "loss": 2.3662, "step": 247100 }, { "epoch": 31.95862960568843, "grad_norm": 1.0234426259994507, "learning_rate": 0.001, "loss": 2.3918, "step": 247200 }, { "epoch": 31.971557853910795, "grad_norm": 0.8345157504081726, "learning_rate": 0.001, "loss": 2.3732, "step": 247300 }, { "epoch": 31.98448610213316, "grad_norm": 0.8465962409973145, "learning_rate": 0.001, "loss": 2.3651, "step": 247400 }, { "epoch": 31.997414350355527, "grad_norm": 1.0688384771347046, "learning_rate": 0.001, "loss": 2.3882, "step": 247500 }, { "epoch": 32.01034259857789, "grad_norm": 0.9017046093940735, "learning_rate": 0.001, "loss": 2.3129, "step": 247600 }, { "epoch": 32.02327084680026, "grad_norm": 0.9588302373886108, "learning_rate": 0.001, "loss": 2.2796, "step": 247700 }, { "epoch": 32.036199095022624, "grad_norm": 1.127841591835022, "learning_rate": 0.001, "loss": 2.2971, "step": 247800 }, { "epoch": 32.04912734324499, "grad_norm": 0.7362520098686218, "learning_rate": 0.001, "loss": 2.291, "step": 247900 }, { "epoch": 32.062055591467356, "grad_norm": 5.668458461761475, "learning_rate": 0.001, "loss": 2.3066, "step": 248000 }, { "epoch": 32.07498383968972, "grad_norm": 0.8157813549041748, "learning_rate": 0.001, "loss": 2.3144, "step": 248100 }, { "epoch": 32.08791208791209, "grad_norm": 0.9030776619911194, "learning_rate": 0.001, "loss": 2.2943, "step": 248200 }, { "epoch": 32.10084033613445, "grad_norm": 1.1465890407562256, "learning_rate": 0.001, "loss": 2.313, "step": 248300 }, { "epoch": 32.11376858435682, "grad_norm": 0.855173647403717, "learning_rate": 0.001, "loss": 2.3006, "step": 248400 }, { "epoch": 32.126696832579185, "grad_norm": 0.8130258917808533, "learning_rate": 0.001, "loss": 2.2957, "step": 248500 }, { "epoch": 32.13962508080155, "grad_norm": 1.0144370794296265, "learning_rate": 0.001, "loss": 2.2987, "step": 248600 }, { "epoch": 32.15255332902392, "grad_norm": 0.8790687322616577, "learning_rate": 0.001, "loss": 2.3103, "step": 248700 }, { "epoch": 32.16548157724628, "grad_norm": 5.181687355041504, "learning_rate": 0.001, "loss": 2.3186, "step": 248800 }, { "epoch": 32.17840982546865, "grad_norm": 1.1254135370254517, "learning_rate": 0.001, "loss": 2.312, "step": 248900 }, { "epoch": 32.191338073691014, "grad_norm": 1.083653450012207, "learning_rate": 0.001, "loss": 2.3078, "step": 249000 }, { "epoch": 32.20426632191338, "grad_norm": 0.8457058072090149, "learning_rate": 0.001, "loss": 2.3082, "step": 249100 }, { "epoch": 32.217194570135746, "grad_norm": 1.346848726272583, "learning_rate": 0.001, "loss": 2.3193, "step": 249200 }, { "epoch": 32.23012281835811, "grad_norm": 0.946753740310669, "learning_rate": 0.001, "loss": 2.3155, "step": 249300 }, { "epoch": 32.24305106658048, "grad_norm": 0.9590714573860168, "learning_rate": 0.001, "loss": 2.3194, "step": 249400 }, { "epoch": 32.25597931480284, "grad_norm": 0.8244489431381226, "learning_rate": 0.001, "loss": 2.3142, "step": 249500 }, { "epoch": 32.26890756302521, "grad_norm": 0.8095967173576355, "learning_rate": 0.001, "loss": 2.3433, "step": 249600 }, { "epoch": 32.281835811247575, "grad_norm": 0.8173559904098511, "learning_rate": 0.001, "loss": 2.3493, "step": 249700 }, { "epoch": 32.29476405946994, "grad_norm": 0.9091458916664124, "learning_rate": 0.001, "loss": 2.3188, "step": 249800 }, { "epoch": 32.30769230769231, "grad_norm": 1.1782166957855225, "learning_rate": 0.001, "loss": 2.3326, "step": 249900 }, { "epoch": 32.32062055591467, "grad_norm": 0.8581821322441101, "learning_rate": 0.001, "loss": 2.3158, "step": 250000 }, { "epoch": 32.33354880413704, "grad_norm": 1.2731324434280396, "learning_rate": 0.001, "loss": 2.3069, "step": 250100 }, { "epoch": 32.346477052359404, "grad_norm": 1.4180259704589844, "learning_rate": 0.001, "loss": 2.3331, "step": 250200 }, { "epoch": 32.35940530058177, "grad_norm": 5.357227325439453, "learning_rate": 0.001, "loss": 2.3407, "step": 250300 }, { "epoch": 32.372333548804136, "grad_norm": 0.8203734755516052, "learning_rate": 0.001, "loss": 2.3448, "step": 250400 }, { "epoch": 32.3852617970265, "grad_norm": 0.8279623985290527, "learning_rate": 0.001, "loss": 2.3275, "step": 250500 }, { "epoch": 32.39819004524887, "grad_norm": 0.7836129069328308, "learning_rate": 0.001, "loss": 2.3238, "step": 250600 }, { "epoch": 32.41111829347123, "grad_norm": 0.8948267698287964, "learning_rate": 0.001, "loss": 2.3442, "step": 250700 }, { "epoch": 32.4240465416936, "grad_norm": 0.7897282838821411, "learning_rate": 0.001, "loss": 2.3215, "step": 250800 }, { "epoch": 32.436974789915965, "grad_norm": 1.1083019971847534, "learning_rate": 0.001, "loss": 2.349, "step": 250900 }, { "epoch": 32.44990303813833, "grad_norm": 1.0611270666122437, "learning_rate": 0.001, "loss": 2.3571, "step": 251000 }, { "epoch": 32.4628312863607, "grad_norm": 0.8951107859611511, "learning_rate": 0.001, "loss": 2.3418, "step": 251100 }, { "epoch": 32.47575953458306, "grad_norm": 0.9011873006820679, "learning_rate": 0.001, "loss": 2.3117, "step": 251200 }, { "epoch": 32.48868778280543, "grad_norm": 2.9235804080963135, "learning_rate": 0.001, "loss": 2.3414, "step": 251300 }, { "epoch": 32.501616031027794, "grad_norm": 0.9244527816772461, "learning_rate": 0.001, "loss": 2.3225, "step": 251400 }, { "epoch": 32.51454427925016, "grad_norm": 41.43782043457031, "learning_rate": 0.001, "loss": 2.3262, "step": 251500 }, { "epoch": 32.527472527472526, "grad_norm": 1.0158241987228394, "learning_rate": 0.001, "loss": 2.3595, "step": 251600 }, { "epoch": 32.54040077569489, "grad_norm": 0.8633265495300293, "learning_rate": 0.001, "loss": 2.353, "step": 251700 }, { "epoch": 32.55332902391726, "grad_norm": 0.8903251886367798, "learning_rate": 0.001, "loss": 2.3486, "step": 251800 }, { "epoch": 32.56625727213962, "grad_norm": 0.884276807308197, "learning_rate": 0.001, "loss": 2.349, "step": 251900 }, { "epoch": 32.57918552036199, "grad_norm": 0.8793445229530334, "learning_rate": 0.001, "loss": 2.3411, "step": 252000 }, { "epoch": 32.592113768584355, "grad_norm": 1.6072851419448853, "learning_rate": 0.001, "loss": 2.3541, "step": 252100 }, { "epoch": 32.60504201680672, "grad_norm": 4.41829252243042, "learning_rate": 0.001, "loss": 2.3345, "step": 252200 }, { "epoch": 32.617970265029086, "grad_norm": 4.528825759887695, "learning_rate": 0.001, "loss": 2.3415, "step": 252300 }, { "epoch": 32.63089851325145, "grad_norm": 42.21258544921875, "learning_rate": 0.001, "loss": 2.3482, "step": 252400 }, { "epoch": 32.64382676147382, "grad_norm": 1.148459792137146, "learning_rate": 0.001, "loss": 2.3335, "step": 252500 }, { "epoch": 32.656755009696184, "grad_norm": 0.9537655711174011, "learning_rate": 0.001, "loss": 2.3346, "step": 252600 }, { "epoch": 32.66968325791855, "grad_norm": 0.7452446222305298, "learning_rate": 0.001, "loss": 2.3333, "step": 252700 }, { "epoch": 32.682611506140915, "grad_norm": 0.9337222576141357, "learning_rate": 0.001, "loss": 2.3587, "step": 252800 }, { "epoch": 32.69553975436328, "grad_norm": 0.8684788346290588, "learning_rate": 0.001, "loss": 2.3352, "step": 252900 }, { "epoch": 32.70846800258565, "grad_norm": 1.1676878929138184, "learning_rate": 0.001, "loss": 2.3347, "step": 253000 }, { "epoch": 32.72139625080801, "grad_norm": 0.9747164249420166, "learning_rate": 0.001, "loss": 2.3586, "step": 253100 }, { "epoch": 32.73432449903038, "grad_norm": 2.120286226272583, "learning_rate": 0.001, "loss": 2.3639, "step": 253200 }, { "epoch": 32.747252747252745, "grad_norm": 0.7932896018028259, "learning_rate": 0.001, "loss": 2.3636, "step": 253300 }, { "epoch": 32.76018099547511, "grad_norm": 0.9064538478851318, "learning_rate": 0.001, "loss": 2.3424, "step": 253400 }, { "epoch": 32.773109243697476, "grad_norm": 0.7185405492782593, "learning_rate": 0.001, "loss": 2.3621, "step": 253500 }, { "epoch": 32.78603749191984, "grad_norm": 1.0441172122955322, "learning_rate": 0.001, "loss": 2.3287, "step": 253600 }, { "epoch": 32.79896574014221, "grad_norm": 1.0577094554901123, "learning_rate": 0.001, "loss": 2.3585, "step": 253700 }, { "epoch": 32.811893988364574, "grad_norm": 0.863756537437439, "learning_rate": 0.001, "loss": 2.3718, "step": 253800 }, { "epoch": 32.82482223658694, "grad_norm": 0.9243741035461426, "learning_rate": 0.001, "loss": 2.3616, "step": 253900 }, { "epoch": 32.837750484809305, "grad_norm": 0.9286218285560608, "learning_rate": 0.001, "loss": 2.3751, "step": 254000 }, { "epoch": 32.85067873303167, "grad_norm": 0.8968793153762817, "learning_rate": 0.001, "loss": 2.3536, "step": 254100 }, { "epoch": 32.86360698125404, "grad_norm": 2.059882402420044, "learning_rate": 0.001, "loss": 2.3632, "step": 254200 }, { "epoch": 32.8765352294764, "grad_norm": 0.9918668866157532, "learning_rate": 0.001, "loss": 2.3621, "step": 254300 }, { "epoch": 32.88946347769877, "grad_norm": 0.9657626152038574, "learning_rate": 0.001, "loss": 2.3699, "step": 254400 }, { "epoch": 32.902391725921134, "grad_norm": 0.9977561831474304, "learning_rate": 0.001, "loss": 2.3492, "step": 254500 }, { "epoch": 32.9153199741435, "grad_norm": 1.3229690790176392, "learning_rate": 0.001, "loss": 2.3858, "step": 254600 }, { "epoch": 32.928248222365866, "grad_norm": 1.0301514863967896, "learning_rate": 0.001, "loss": 2.3635, "step": 254700 }, { "epoch": 32.94117647058823, "grad_norm": 8.76497745513916, "learning_rate": 0.001, "loss": 2.3642, "step": 254800 }, { "epoch": 32.9541047188106, "grad_norm": 0.8749055862426758, "learning_rate": 0.001, "loss": 2.3612, "step": 254900 }, { "epoch": 32.967032967032964, "grad_norm": 0.9088620543479919, "learning_rate": 0.001, "loss": 2.3678, "step": 255000 }, { "epoch": 32.97996121525533, "grad_norm": 1.0313410758972168, "learning_rate": 0.001, "loss": 2.3647, "step": 255100 }, { "epoch": 32.992889463477695, "grad_norm": 0.8731614351272583, "learning_rate": 0.001, "loss": 2.3793, "step": 255200 }, { "epoch": 33.00581771170007, "grad_norm": 1.0477160215377808, "learning_rate": 0.001, "loss": 2.3075, "step": 255300 }, { "epoch": 33.018745959922434, "grad_norm": 1.2152339220046997, "learning_rate": 0.001, "loss": 2.2835, "step": 255400 }, { "epoch": 33.0316742081448, "grad_norm": 1.135392665863037, "learning_rate": 0.001, "loss": 2.2922, "step": 255500 }, { "epoch": 33.044602456367166, "grad_norm": 20.931482315063477, "learning_rate": 0.001, "loss": 2.2666, "step": 255600 }, { "epoch": 33.05753070458953, "grad_norm": 1.0107544660568237, "learning_rate": 0.001, "loss": 2.2748, "step": 255700 }, { "epoch": 33.0704589528119, "grad_norm": 0.8927043676376343, "learning_rate": 0.001, "loss": 2.2769, "step": 255800 }, { "epoch": 33.08338720103426, "grad_norm": 1.8541843891143799, "learning_rate": 0.001, "loss": 2.3027, "step": 255900 }, { "epoch": 33.09631544925663, "grad_norm": 1.087019443511963, "learning_rate": 0.001, "loss": 2.2802, "step": 256000 }, { "epoch": 33.109243697478995, "grad_norm": 0.9958358407020569, "learning_rate": 0.001, "loss": 2.2975, "step": 256100 }, { "epoch": 33.12217194570136, "grad_norm": 1.317949652671814, "learning_rate": 0.001, "loss": 2.2959, "step": 256200 }, { "epoch": 33.135100193923726, "grad_norm": 1.113891363143921, "learning_rate": 0.001, "loss": 2.2958, "step": 256300 }, { "epoch": 33.14802844214609, "grad_norm": 1.6630116701126099, "learning_rate": 0.001, "loss": 2.2897, "step": 256400 }, { "epoch": 33.16095669036846, "grad_norm": 1.1534594297409058, "learning_rate": 0.001, "loss": 2.3157, "step": 256500 }, { "epoch": 33.173884938590824, "grad_norm": 0.957991898059845, "learning_rate": 0.001, "loss": 2.2968, "step": 256600 }, { "epoch": 33.18681318681319, "grad_norm": 0.9491854310035706, "learning_rate": 0.001, "loss": 2.31, "step": 256700 }, { "epoch": 33.199741435035556, "grad_norm": 1.1512068510055542, "learning_rate": 0.001, "loss": 2.3246, "step": 256800 }, { "epoch": 33.21266968325792, "grad_norm": 1.0600074529647827, "learning_rate": 0.001, "loss": 2.3059, "step": 256900 }, { "epoch": 33.22559793148029, "grad_norm": 28.118383407592773, "learning_rate": 0.001, "loss": 2.2938, "step": 257000 }, { "epoch": 33.23852617970265, "grad_norm": 8.056256294250488, "learning_rate": 0.001, "loss": 2.2891, "step": 257100 }, { "epoch": 33.25145442792502, "grad_norm": 1.1232680082321167, "learning_rate": 0.001, "loss": 2.2997, "step": 257200 }, { "epoch": 33.264382676147385, "grad_norm": 1.0150465965270996, "learning_rate": 0.001, "loss": 2.322, "step": 257300 }, { "epoch": 33.27731092436975, "grad_norm": 1.0245370864868164, "learning_rate": 0.001, "loss": 2.3337, "step": 257400 }, { "epoch": 33.290239172592116, "grad_norm": 1.0414156913757324, "learning_rate": 0.001, "loss": 2.3166, "step": 257500 }, { "epoch": 33.30316742081448, "grad_norm": 1.2367228269577026, "learning_rate": 0.001, "loss": 2.3127, "step": 257600 }, { "epoch": 33.31609566903685, "grad_norm": 4.022750377655029, "learning_rate": 0.001, "loss": 2.3046, "step": 257700 }, { "epoch": 33.329023917259214, "grad_norm": 1.5312392711639404, "learning_rate": 0.001, "loss": 2.3208, "step": 257800 }, { "epoch": 33.34195216548158, "grad_norm": 1.5919263362884521, "learning_rate": 0.001, "loss": 2.3356, "step": 257900 }, { "epoch": 33.354880413703945, "grad_norm": 138.90963745117188, "learning_rate": 0.001, "loss": 2.3218, "step": 258000 }, { "epoch": 33.36780866192631, "grad_norm": 1.131279706954956, "learning_rate": 0.001, "loss": 2.3281, "step": 258100 }, { "epoch": 33.38073691014868, "grad_norm": 1.1722748279571533, "learning_rate": 0.001, "loss": 2.321, "step": 258200 }, { "epoch": 33.39366515837104, "grad_norm": 1.0446531772613525, "learning_rate": 0.001, "loss": 2.3172, "step": 258300 }, { "epoch": 33.40659340659341, "grad_norm": 1.0861471891403198, "learning_rate": 0.001, "loss": 2.3334, "step": 258400 }, { "epoch": 33.419521654815775, "grad_norm": 0.9551491737365723, "learning_rate": 0.001, "loss": 2.3307, "step": 258500 }, { "epoch": 33.43244990303814, "grad_norm": 1.2997331619262695, "learning_rate": 0.001, "loss": 2.3302, "step": 258600 }, { "epoch": 33.445378151260506, "grad_norm": 1.3053487539291382, "learning_rate": 0.001, "loss": 2.3419, "step": 258700 }, { "epoch": 33.45830639948287, "grad_norm": 3.7954745292663574, "learning_rate": 0.001, "loss": 2.3207, "step": 258800 }, { "epoch": 33.47123464770524, "grad_norm": 1.8072153329849243, "learning_rate": 0.001, "loss": 2.3156, "step": 258900 }, { "epoch": 33.484162895927604, "grad_norm": 261.9762268066406, "learning_rate": 0.001, "loss": 2.3356, "step": 259000 }, { "epoch": 33.49709114414997, "grad_norm": 1.0431486368179321, "learning_rate": 0.001, "loss": 2.3442, "step": 259100 }, { "epoch": 33.510019392372335, "grad_norm": 4.047733783721924, "learning_rate": 0.001, "loss": 2.3389, "step": 259200 }, { "epoch": 33.5229476405947, "grad_norm": 6.222929954528809, "learning_rate": 0.001, "loss": 2.3246, "step": 259300 }, { "epoch": 33.53587588881707, "grad_norm": 1.6590880155563354, "learning_rate": 0.001, "loss": 2.3294, "step": 259400 }, { "epoch": 33.54880413703943, "grad_norm": 1.6498653888702393, "learning_rate": 0.001, "loss": 2.3375, "step": 259500 }, { "epoch": 33.5617323852618, "grad_norm": 4.321327209472656, "learning_rate": 0.001, "loss": 2.3507, "step": 259600 }, { "epoch": 33.574660633484164, "grad_norm": 1.1133010387420654, "learning_rate": 0.001, "loss": 2.3411, "step": 259700 }, { "epoch": 33.58758888170653, "grad_norm": 0.9960143566131592, "learning_rate": 0.001, "loss": 2.3475, "step": 259800 }, { "epoch": 33.600517129928896, "grad_norm": 1.7473540306091309, "learning_rate": 0.001, "loss": 2.3439, "step": 259900 }, { "epoch": 33.61344537815126, "grad_norm": 1.1720799207687378, "learning_rate": 0.001, "loss": 2.3286, "step": 260000 }, { "epoch": 33.62637362637363, "grad_norm": 0.8569528460502625, "learning_rate": 0.001, "loss": 2.3441, "step": 260100 }, { "epoch": 33.63930187459599, "grad_norm": 1.04877507686615, "learning_rate": 0.001, "loss": 2.3468, "step": 260200 }, { "epoch": 33.65223012281836, "grad_norm": 0.9870538115501404, "learning_rate": 0.001, "loss": 2.3483, "step": 260300 }, { "epoch": 33.665158371040725, "grad_norm": 2.4412567615509033, "learning_rate": 0.001, "loss": 2.3551, "step": 260400 }, { "epoch": 33.67808661926309, "grad_norm": 1.0311144590377808, "learning_rate": 0.001, "loss": 2.3596, "step": 260500 }, { "epoch": 33.69101486748546, "grad_norm": 1.7463706731796265, "learning_rate": 0.001, "loss": 2.3425, "step": 260600 }, { "epoch": 33.70394311570782, "grad_norm": 1.008294939994812, "learning_rate": 0.001, "loss": 2.3212, "step": 260700 }, { "epoch": 33.71687136393019, "grad_norm": 0.899677038192749, "learning_rate": 0.001, "loss": 2.3573, "step": 260800 }, { "epoch": 33.729799612152554, "grad_norm": 0.8394414186477661, "learning_rate": 0.001, "loss": 2.3514, "step": 260900 }, { "epoch": 33.74272786037492, "grad_norm": 0.8665146231651306, "learning_rate": 0.001, "loss": 2.3566, "step": 261000 }, { "epoch": 33.755656108597286, "grad_norm": 1.0399270057678223, "learning_rate": 0.001, "loss": 2.3363, "step": 261100 }, { "epoch": 33.76858435681965, "grad_norm": 1.0701279640197754, "learning_rate": 0.001, "loss": 2.3488, "step": 261200 }, { "epoch": 33.78151260504202, "grad_norm": 1.0343505144119263, "learning_rate": 0.001, "loss": 2.3519, "step": 261300 }, { "epoch": 33.79444085326438, "grad_norm": 1.0662841796875, "learning_rate": 0.001, "loss": 2.34, "step": 261400 }, { "epoch": 33.80736910148675, "grad_norm": 0.9702130556106567, "learning_rate": 0.001, "loss": 2.3402, "step": 261500 }, { "epoch": 33.820297349709115, "grad_norm": 0.9155075550079346, "learning_rate": 0.001, "loss": 2.3542, "step": 261600 }, { "epoch": 33.83322559793148, "grad_norm": 0.974030077457428, "learning_rate": 0.001, "loss": 2.3526, "step": 261700 }, { "epoch": 33.84615384615385, "grad_norm": 1.0056425333023071, "learning_rate": 0.001, "loss": 2.3566, "step": 261800 }, { "epoch": 33.85908209437621, "grad_norm": 0.9460893273353577, "learning_rate": 0.001, "loss": 2.3619, "step": 261900 }, { "epoch": 33.87201034259858, "grad_norm": 1.1724274158477783, "learning_rate": 0.001, "loss": 2.3443, "step": 262000 }, { "epoch": 33.884938590820944, "grad_norm": 1.130208969116211, "learning_rate": 0.001, "loss": 2.3573, "step": 262100 }, { "epoch": 33.89786683904331, "grad_norm": 0.9663734436035156, "learning_rate": 0.001, "loss": 2.3536, "step": 262200 }, { "epoch": 33.910795087265676, "grad_norm": 0.9509479403495789, "learning_rate": 0.001, "loss": 2.3493, "step": 262300 }, { "epoch": 33.92372333548804, "grad_norm": 1.0996090173721313, "learning_rate": 0.001, "loss": 2.3591, "step": 262400 }, { "epoch": 33.93665158371041, "grad_norm": 1.303161859512329, "learning_rate": 0.001, "loss": 2.3666, "step": 262500 }, { "epoch": 33.94957983193277, "grad_norm": 0.9685322642326355, "learning_rate": 0.001, "loss": 2.3406, "step": 262600 }, { "epoch": 33.96250808015514, "grad_norm": 1.0271499156951904, "learning_rate": 0.001, "loss": 2.3571, "step": 262700 }, { "epoch": 33.975436328377505, "grad_norm": 1.0411099195480347, "learning_rate": 0.001, "loss": 2.3553, "step": 262800 }, { "epoch": 33.98836457659987, "grad_norm": 3.238006830215454, "learning_rate": 0.001, "loss": 2.349, "step": 262900 }, { "epoch": 34.00129282482224, "grad_norm": 0.7144445180892944, "learning_rate": 0.001, "loss": 2.3056, "step": 263000 }, { "epoch": 34.0142210730446, "grad_norm": 1.1037026643753052, "learning_rate": 0.001, "loss": 2.2666, "step": 263100 }, { "epoch": 34.02714932126697, "grad_norm": 0.7999835014343262, "learning_rate": 0.001, "loss": 2.2787, "step": 263200 }, { "epoch": 34.040077569489334, "grad_norm": 1.0395094156265259, "learning_rate": 0.001, "loss": 2.2701, "step": 263300 }, { "epoch": 34.0530058177117, "grad_norm": 0.8681461215019226, "learning_rate": 0.001, "loss": 2.2738, "step": 263400 }, { "epoch": 34.065934065934066, "grad_norm": 0.9515722393989563, "learning_rate": 0.001, "loss": 2.2756, "step": 263500 }, { "epoch": 34.07886231415643, "grad_norm": 0.8471808433532715, "learning_rate": 0.001, "loss": 2.274, "step": 263600 }, { "epoch": 34.0917905623788, "grad_norm": 1.3025094270706177, "learning_rate": 0.001, "loss": 2.2685, "step": 263700 }, { "epoch": 34.10471881060116, "grad_norm": 1.5350909233093262, "learning_rate": 0.001, "loss": 2.2887, "step": 263800 }, { "epoch": 34.11764705882353, "grad_norm": 1.0800222158432007, "learning_rate": 0.001, "loss": 2.2772, "step": 263900 }, { "epoch": 34.130575307045895, "grad_norm": 19.906211853027344, "learning_rate": 0.001, "loss": 2.2945, "step": 264000 }, { "epoch": 34.14350355526826, "grad_norm": 3.2746334075927734, "learning_rate": 0.001, "loss": 2.304, "step": 264100 }, { "epoch": 34.15643180349063, "grad_norm": 0.8490199446678162, "learning_rate": 0.001, "loss": 2.2816, "step": 264200 }, { "epoch": 34.16936005171299, "grad_norm": 1.026039481163025, "learning_rate": 0.001, "loss": 2.2801, "step": 264300 }, { "epoch": 34.18228829993536, "grad_norm": 0.7691171169281006, "learning_rate": 0.001, "loss": 2.2786, "step": 264400 }, { "epoch": 34.195216548157724, "grad_norm": 0.912538468837738, "learning_rate": 0.001, "loss": 2.2879, "step": 264500 }, { "epoch": 34.20814479638009, "grad_norm": 1.047615885734558, "learning_rate": 0.001, "loss": 2.2897, "step": 264600 }, { "epoch": 34.221073044602456, "grad_norm": 0.9390262365341187, "learning_rate": 0.001, "loss": 2.294, "step": 264700 }, { "epoch": 34.23400129282482, "grad_norm": 0.8901190757751465, "learning_rate": 0.001, "loss": 2.3085, "step": 264800 }, { "epoch": 34.24692954104719, "grad_norm": 0.8217082619667053, "learning_rate": 0.001, "loss": 2.2985, "step": 264900 }, { "epoch": 34.25985778926955, "grad_norm": 1.047544240951538, "learning_rate": 0.001, "loss": 2.2917, "step": 265000 }, { "epoch": 34.27278603749192, "grad_norm": 0.7448850870132446, "learning_rate": 0.001, "loss": 2.2889, "step": 265100 }, { "epoch": 34.285714285714285, "grad_norm": 0.798063337802887, "learning_rate": 0.001, "loss": 2.2958, "step": 265200 }, { "epoch": 34.29864253393665, "grad_norm": 1.2209969758987427, "learning_rate": 0.001, "loss": 2.3061, "step": 265300 }, { "epoch": 34.311570782159016, "grad_norm": 0.794380784034729, "learning_rate": 0.001, "loss": 2.2789, "step": 265400 }, { "epoch": 34.32449903038138, "grad_norm": 0.8285496234893799, "learning_rate": 0.001, "loss": 2.2998, "step": 265500 }, { "epoch": 34.33742727860375, "grad_norm": 2.273010015487671, "learning_rate": 0.001, "loss": 2.2963, "step": 265600 }, { "epoch": 34.350355526826114, "grad_norm": 1.244762897491455, "learning_rate": 0.001, "loss": 2.3043, "step": 265700 }, { "epoch": 34.36328377504848, "grad_norm": 0.8638696074485779, "learning_rate": 0.001, "loss": 2.3133, "step": 265800 }, { "epoch": 34.376212023270845, "grad_norm": 1.3922441005706787, "learning_rate": 0.001, "loss": 2.3095, "step": 265900 }, { "epoch": 34.38914027149321, "grad_norm": 0.8471962213516235, "learning_rate": 0.001, "loss": 2.3021, "step": 266000 }, { "epoch": 34.40206851971558, "grad_norm": 0.8657557368278503, "learning_rate": 0.001, "loss": 2.3274, "step": 266100 }, { "epoch": 34.41499676793794, "grad_norm": 0.8042029142379761, "learning_rate": 0.001, "loss": 2.3109, "step": 266200 }, { "epoch": 34.42792501616031, "grad_norm": 0.9876615405082703, "learning_rate": 0.001, "loss": 2.3133, "step": 266300 }, { "epoch": 34.440853264382675, "grad_norm": 1.134290337562561, "learning_rate": 0.001, "loss": 2.3313, "step": 266400 }, { "epoch": 34.45378151260504, "grad_norm": 1.0943061113357544, "learning_rate": 0.001, "loss": 2.2935, "step": 266500 }, { "epoch": 34.466709760827406, "grad_norm": 0.7990856766700745, "learning_rate": 0.001, "loss": 2.313, "step": 266600 }, { "epoch": 34.47963800904977, "grad_norm": 0.8658341765403748, "learning_rate": 0.001, "loss": 2.3062, "step": 266700 }, { "epoch": 34.49256625727214, "grad_norm": 0.7678709626197815, "learning_rate": 0.001, "loss": 2.3315, "step": 266800 }, { "epoch": 34.505494505494504, "grad_norm": 1.1417275667190552, "learning_rate": 0.001, "loss": 2.3232, "step": 266900 }, { "epoch": 34.51842275371687, "grad_norm": 3.438483953475952, "learning_rate": 0.001, "loss": 2.3338, "step": 267000 }, { "epoch": 34.531351001939235, "grad_norm": 0.9061375260353088, "learning_rate": 0.001, "loss": 2.3379, "step": 267100 }, { "epoch": 34.5442792501616, "grad_norm": 0.8993545174598694, "learning_rate": 0.001, "loss": 2.3166, "step": 267200 }, { "epoch": 34.55720749838397, "grad_norm": 1.092663288116455, "learning_rate": 0.001, "loss": 2.3287, "step": 267300 }, { "epoch": 34.57013574660633, "grad_norm": 0.9745708703994751, "learning_rate": 0.001, "loss": 2.3292, "step": 267400 }, { "epoch": 34.5830639948287, "grad_norm": 0.7112572193145752, "learning_rate": 0.001, "loss": 2.3403, "step": 267500 }, { "epoch": 34.595992243051064, "grad_norm": 12.223304748535156, "learning_rate": 0.001, "loss": 2.3331, "step": 267600 }, { "epoch": 34.60892049127343, "grad_norm": 0.9284887313842773, "learning_rate": 0.001, "loss": 2.3352, "step": 267700 }, { "epoch": 34.621848739495796, "grad_norm": 1.2191364765167236, "learning_rate": 0.001, "loss": 2.3069, "step": 267800 }, { "epoch": 34.63477698771816, "grad_norm": 0.9012308120727539, "learning_rate": 0.001, "loss": 2.3181, "step": 267900 }, { "epoch": 34.64770523594053, "grad_norm": 0.8399214744567871, "learning_rate": 0.001, "loss": 2.3143, "step": 268000 }, { "epoch": 34.660633484162894, "grad_norm": 0.9893556833267212, "learning_rate": 0.001, "loss": 2.3379, "step": 268100 }, { "epoch": 34.67356173238526, "grad_norm": 1.0177825689315796, "learning_rate": 0.001, "loss": 2.3474, "step": 268200 }, { "epoch": 34.686489980607625, "grad_norm": 0.8343576788902283, "learning_rate": 0.001, "loss": 2.3424, "step": 268300 }, { "epoch": 34.69941822882999, "grad_norm": 1.3723200559616089, "learning_rate": 0.001, "loss": 2.337, "step": 268400 }, { "epoch": 34.71234647705236, "grad_norm": 0.8580241799354553, "learning_rate": 0.001, "loss": 2.3303, "step": 268500 }, { "epoch": 34.72527472527472, "grad_norm": 0.8473759293556213, "learning_rate": 0.001, "loss": 2.3204, "step": 268600 }, { "epoch": 34.73820297349709, "grad_norm": 12.417221069335938, "learning_rate": 0.001, "loss": 2.3547, "step": 268700 }, { "epoch": 34.751131221719454, "grad_norm": 0.6684747338294983, "learning_rate": 0.001, "loss": 2.3468, "step": 268800 }, { "epoch": 34.76405946994182, "grad_norm": 1.03945791721344, "learning_rate": 0.001, "loss": 2.3214, "step": 268900 }, { "epoch": 34.776987718164186, "grad_norm": 1.642748236656189, "learning_rate": 0.001, "loss": 2.3322, "step": 269000 }, { "epoch": 34.78991596638655, "grad_norm": 0.878079354763031, "learning_rate": 0.001, "loss": 2.3496, "step": 269100 }, { "epoch": 34.80284421460892, "grad_norm": 0.8718327879905701, "learning_rate": 0.001, "loss": 2.3546, "step": 269200 }, { "epoch": 34.81577246283128, "grad_norm": 0.8244274258613586, "learning_rate": 0.001, "loss": 2.3481, "step": 269300 }, { "epoch": 34.82870071105365, "grad_norm": 0.7444800734519958, "learning_rate": 0.001, "loss": 2.3435, "step": 269400 }, { "epoch": 34.841628959276015, "grad_norm": 0.8307334780693054, "learning_rate": 0.001, "loss": 2.3347, "step": 269500 }, { "epoch": 34.85455720749838, "grad_norm": 1.0675410032272339, "learning_rate": 0.001, "loss": 2.3734, "step": 269600 }, { "epoch": 34.86748545572075, "grad_norm": 0.9072102308273315, "learning_rate": 0.001, "loss": 2.3452, "step": 269700 }, { "epoch": 34.88041370394311, "grad_norm": 0.8375012278556824, "learning_rate": 0.001, "loss": 2.3474, "step": 269800 }, { "epoch": 34.89334195216548, "grad_norm": 0.8995103240013123, "learning_rate": 0.001, "loss": 2.3377, "step": 269900 }, { "epoch": 34.906270200387844, "grad_norm": 0.9947850704193115, "learning_rate": 0.001, "loss": 2.3525, "step": 270000 }, { "epoch": 34.91919844861021, "grad_norm": 1.1891534328460693, "learning_rate": 0.001, "loss": 2.3652, "step": 270100 }, { "epoch": 34.932126696832576, "grad_norm": 2.3877971172332764, "learning_rate": 0.001, "loss": 2.3403, "step": 270200 }, { "epoch": 34.94505494505494, "grad_norm": 1.0910476446151733, "learning_rate": 0.001, "loss": 2.3574, "step": 270300 }, { "epoch": 34.95798319327731, "grad_norm": 1.4036927223205566, "learning_rate": 0.001, "loss": 2.3407, "step": 270400 }, { "epoch": 34.97091144149967, "grad_norm": 1.0809755325317383, "learning_rate": 0.001, "loss": 2.3586, "step": 270500 }, { "epoch": 34.98383968972204, "grad_norm": 0.8457688093185425, "learning_rate": 0.001, "loss": 2.3415, "step": 270600 }, { "epoch": 34.99676793794441, "grad_norm": 2.0513415336608887, "learning_rate": 0.001, "loss": 2.3482, "step": 270700 }, { "epoch": 35.00969618616678, "grad_norm": 0.9018962383270264, "learning_rate": 0.001, "loss": 2.2994, "step": 270800 }, { "epoch": 35.022624434389144, "grad_norm": 0.8496239185333252, "learning_rate": 0.001, "loss": 2.2286, "step": 270900 }, { "epoch": 35.03555268261151, "grad_norm": 0.8376985788345337, "learning_rate": 0.001, "loss": 2.2771, "step": 271000 }, { "epoch": 35.048480930833875, "grad_norm": 16.16242027282715, "learning_rate": 0.001, "loss": 2.2507, "step": 271100 }, { "epoch": 35.06140917905624, "grad_norm": 0.7201473116874695, "learning_rate": 0.001, "loss": 2.2747, "step": 271200 }, { "epoch": 35.07433742727861, "grad_norm": 1.2351069450378418, "learning_rate": 0.001, "loss": 2.2819, "step": 271300 }, { "epoch": 35.08726567550097, "grad_norm": 0.8194921016693115, "learning_rate": 0.001, "loss": 2.2671, "step": 271400 }, { "epoch": 35.10019392372334, "grad_norm": 0.9344362616539001, "learning_rate": 0.001, "loss": 2.2764, "step": 271500 }, { "epoch": 35.113122171945705, "grad_norm": 0.6940861940383911, "learning_rate": 0.001, "loss": 2.2834, "step": 271600 }, { "epoch": 35.12605042016807, "grad_norm": 1.132933259010315, "learning_rate": 0.001, "loss": 2.2635, "step": 271700 }, { "epoch": 35.138978668390436, "grad_norm": 5.792572498321533, "learning_rate": 0.001, "loss": 2.2904, "step": 271800 }, { "epoch": 35.1519069166128, "grad_norm": 1.106912612915039, "learning_rate": 0.001, "loss": 2.2699, "step": 271900 }, { "epoch": 35.16483516483517, "grad_norm": 1.2069000005722046, "learning_rate": 0.001, "loss": 2.2925, "step": 272000 }, { "epoch": 35.177763413057534, "grad_norm": 1.2148104906082153, "learning_rate": 0.001, "loss": 2.3027, "step": 272100 }, { "epoch": 35.1906916612799, "grad_norm": 0.974347710609436, "learning_rate": 0.001, "loss": 2.3004, "step": 272200 }, { "epoch": 35.203619909502265, "grad_norm": 0.8778591156005859, "learning_rate": 0.001, "loss": 2.2799, "step": 272300 }, { "epoch": 35.21654815772463, "grad_norm": 1.2820708751678467, "learning_rate": 0.001, "loss": 2.2819, "step": 272400 }, { "epoch": 35.229476405947, "grad_norm": 1.0455683469772339, "learning_rate": 0.001, "loss": 2.3, "step": 272500 }, { "epoch": 35.24240465416936, "grad_norm": 1.103020429611206, "learning_rate": 0.001, "loss": 2.2919, "step": 272600 }, { "epoch": 35.25533290239173, "grad_norm": 0.7412613034248352, "learning_rate": 0.001, "loss": 2.2898, "step": 272700 }, { "epoch": 35.268261150614094, "grad_norm": 0.9953662157058716, "learning_rate": 0.001, "loss": 2.2754, "step": 272800 }, { "epoch": 35.28118939883646, "grad_norm": 0.8468871116638184, "learning_rate": 0.001, "loss": 2.3077, "step": 272900 }, { "epoch": 35.294117647058826, "grad_norm": 0.832181990146637, "learning_rate": 0.001, "loss": 2.2919, "step": 273000 }, { "epoch": 35.30704589528119, "grad_norm": 1.1481356620788574, "learning_rate": 0.001, "loss": 2.3123, "step": 273100 }, { "epoch": 35.31997414350356, "grad_norm": 0.7281816005706787, "learning_rate": 0.001, "loss": 2.3137, "step": 273200 }, { "epoch": 35.33290239172592, "grad_norm": 0.9475312829017639, "learning_rate": 0.001, "loss": 2.3062, "step": 273300 }, { "epoch": 35.34583063994829, "grad_norm": 0.7136187553405762, "learning_rate": 0.001, "loss": 2.3188, "step": 273400 }, { "epoch": 35.358758888170655, "grad_norm": 3.9797768592834473, "learning_rate": 0.001, "loss": 2.2938, "step": 273500 }, { "epoch": 35.37168713639302, "grad_norm": 1.0155435800552368, "learning_rate": 0.001, "loss": 2.2887, "step": 273600 }, { "epoch": 35.38461538461539, "grad_norm": 1.755313754081726, "learning_rate": 0.001, "loss": 2.3141, "step": 273700 }, { "epoch": 35.39754363283775, "grad_norm": 27.790922164916992, "learning_rate": 0.001, "loss": 2.315, "step": 273800 }, { "epoch": 35.41047188106012, "grad_norm": 1.1164735555648804, "learning_rate": 0.001, "loss": 2.317, "step": 273900 }, { "epoch": 35.423400129282484, "grad_norm": 0.9173967838287354, "learning_rate": 0.001, "loss": 2.3085, "step": 274000 }, { "epoch": 35.43632837750485, "grad_norm": 1.7978346347808838, "learning_rate": 0.001, "loss": 2.3061, "step": 274100 }, { "epoch": 35.449256625727216, "grad_norm": 1.0576542615890503, "learning_rate": 0.001, "loss": 2.3323, "step": 274200 }, { "epoch": 35.46218487394958, "grad_norm": 1.0237078666687012, "learning_rate": 0.001, "loss": 2.3054, "step": 274300 }, { "epoch": 35.47511312217195, "grad_norm": 0.9711461663246155, "learning_rate": 0.001, "loss": 2.3042, "step": 274400 }, { "epoch": 35.48804137039431, "grad_norm": 0.9487500190734863, "learning_rate": 0.001, "loss": 2.3206, "step": 274500 }, { "epoch": 35.50096961861668, "grad_norm": 2.175232172012329, "learning_rate": 0.001, "loss": 2.3106, "step": 274600 }, { "epoch": 35.513897866839045, "grad_norm": 0.9864091277122498, "learning_rate": 0.001, "loss": 2.3047, "step": 274700 }, { "epoch": 35.52682611506141, "grad_norm": 1.2396035194396973, "learning_rate": 0.001, "loss": 2.3099, "step": 274800 }, { "epoch": 35.53975436328378, "grad_norm": 0.9742470979690552, "learning_rate": 0.001, "loss": 2.3073, "step": 274900 }, { "epoch": 35.55268261150614, "grad_norm": 0.8649815917015076, "learning_rate": 0.001, "loss": 2.3314, "step": 275000 }, { "epoch": 35.56561085972851, "grad_norm": 0.9092321395874023, "learning_rate": 0.001, "loss": 2.3348, "step": 275100 }, { "epoch": 35.578539107950874, "grad_norm": 1.008737325668335, "learning_rate": 0.001, "loss": 2.3184, "step": 275200 }, { "epoch": 35.59146735617324, "grad_norm": 0.7478454113006592, "learning_rate": 0.001, "loss": 2.3031, "step": 275300 }, { "epoch": 35.604395604395606, "grad_norm": 0.9046449065208435, "learning_rate": 0.001, "loss": 2.3176, "step": 275400 }, { "epoch": 35.61732385261797, "grad_norm": 0.9170364141464233, "learning_rate": 0.001, "loss": 2.3428, "step": 275500 }, { "epoch": 35.63025210084034, "grad_norm": 0.9850847125053406, "learning_rate": 0.001, "loss": 2.3135, "step": 275600 }, { "epoch": 35.6431803490627, "grad_norm": 2.9460413455963135, "learning_rate": 0.001, "loss": 2.3119, "step": 275700 }, { "epoch": 35.65610859728507, "grad_norm": 0.9193796515464783, "learning_rate": 0.001, "loss": 2.3343, "step": 275800 }, { "epoch": 35.669036845507435, "grad_norm": 0.8919903039932251, "learning_rate": 0.001, "loss": 2.333, "step": 275900 }, { "epoch": 35.6819650937298, "grad_norm": 0.9989479184150696, "learning_rate": 0.001, "loss": 2.3212, "step": 276000 }, { "epoch": 35.69489334195217, "grad_norm": 0.8294728398323059, "learning_rate": 0.001, "loss": 2.3289, "step": 276100 }, { "epoch": 35.70782159017453, "grad_norm": 0.754709005355835, "learning_rate": 0.001, "loss": 2.3029, "step": 276200 }, { "epoch": 35.7207498383969, "grad_norm": 0.755172848701477, "learning_rate": 0.001, "loss": 2.3204, "step": 276300 }, { "epoch": 35.733678086619264, "grad_norm": 0.9482194781303406, "learning_rate": 0.001, "loss": 2.3151, "step": 276400 }, { "epoch": 35.74660633484163, "grad_norm": 0.9312056303024292, "learning_rate": 0.001, "loss": 2.3328, "step": 276500 }, { "epoch": 35.759534583063996, "grad_norm": 1.155457854270935, "learning_rate": 0.001, "loss": 2.3289, "step": 276600 }, { "epoch": 35.77246283128636, "grad_norm": 1.1534397602081299, "learning_rate": 0.001, "loss": 2.3263, "step": 276700 }, { "epoch": 35.78539107950873, "grad_norm": 1.0778506994247437, "learning_rate": 0.001, "loss": 2.3214, "step": 276800 }, { "epoch": 35.79831932773109, "grad_norm": 0.838191032409668, "learning_rate": 0.001, "loss": 2.3211, "step": 276900 }, { "epoch": 35.81124757595346, "grad_norm": 1.6523330211639404, "learning_rate": 0.001, "loss": 2.3208, "step": 277000 }, { "epoch": 35.824175824175825, "grad_norm": 1.1403101682662964, "learning_rate": 0.001, "loss": 2.3268, "step": 277100 }, { "epoch": 35.83710407239819, "grad_norm": 1.3225597143173218, "learning_rate": 0.001, "loss": 2.3284, "step": 277200 }, { "epoch": 35.85003232062056, "grad_norm": 0.7917505502700806, "learning_rate": 0.001, "loss": 2.3344, "step": 277300 }, { "epoch": 35.86296056884292, "grad_norm": 0.7618785500526428, "learning_rate": 0.001, "loss": 2.342, "step": 277400 }, { "epoch": 35.87588881706529, "grad_norm": 0.9094759225845337, "learning_rate": 0.001, "loss": 2.3242, "step": 277500 }, { "epoch": 35.888817065287654, "grad_norm": 0.9891490936279297, "learning_rate": 0.001, "loss": 2.3504, "step": 277600 }, { "epoch": 35.90174531351002, "grad_norm": 0.8697724342346191, "learning_rate": 0.001, "loss": 2.3577, "step": 277700 }, { "epoch": 35.914673561732386, "grad_norm": 0.9905003905296326, "learning_rate": 0.001, "loss": 2.3403, "step": 277800 }, { "epoch": 35.92760180995475, "grad_norm": 1.0038561820983887, "learning_rate": 0.001, "loss": 2.3447, "step": 277900 }, { "epoch": 35.94053005817712, "grad_norm": 1.0515104532241821, "learning_rate": 0.001, "loss": 2.3371, "step": 278000 }, { "epoch": 35.95345830639948, "grad_norm": 66.66024780273438, "learning_rate": 0.001, "loss": 2.3525, "step": 278100 }, { "epoch": 35.96638655462185, "grad_norm": 24.542591094970703, "learning_rate": 0.001, "loss": 2.3391, "step": 278200 }, { "epoch": 35.979314802844215, "grad_norm": 0.9129610061645508, "learning_rate": 0.001, "loss": 2.3508, "step": 278300 }, { "epoch": 35.99224305106658, "grad_norm": 1.056785225868225, "learning_rate": 0.001, "loss": 2.3613, "step": 278400 }, { "epoch": 36.005171299288946, "grad_norm": 0.9970940947532654, "learning_rate": 0.001, "loss": 2.3224, "step": 278500 }, { "epoch": 36.01809954751131, "grad_norm": 0.8513656258583069, "learning_rate": 0.001, "loss": 2.2398, "step": 278600 }, { "epoch": 36.03102779573368, "grad_norm": 0.9235019087791443, "learning_rate": 0.001, "loss": 2.2447, "step": 278700 }, { "epoch": 36.043956043956044, "grad_norm": 0.9196135401725769, "learning_rate": 0.001, "loss": 2.2792, "step": 278800 }, { "epoch": 36.05688429217841, "grad_norm": 1.325130581855774, "learning_rate": 0.001, "loss": 2.2757, "step": 278900 }, { "epoch": 36.069812540400775, "grad_norm": 0.9471011757850647, "learning_rate": 0.001, "loss": 2.2702, "step": 279000 }, { "epoch": 36.08274078862314, "grad_norm": 1.116039752960205, "learning_rate": 0.001, "loss": 2.2739, "step": 279100 }, { "epoch": 36.09566903684551, "grad_norm": 1.4333600997924805, "learning_rate": 0.001, "loss": 2.282, "step": 279200 }, { "epoch": 36.10859728506787, "grad_norm": 1.012392282485962, "learning_rate": 0.001, "loss": 2.2653, "step": 279300 }, { "epoch": 36.12152553329024, "grad_norm": 1.0723364353179932, "learning_rate": 0.001, "loss": 2.2819, "step": 279400 }, { "epoch": 36.134453781512605, "grad_norm": 1.1486167907714844, "learning_rate": 0.001, "loss": 2.277, "step": 279500 }, { "epoch": 36.14738202973497, "grad_norm": 1.0513079166412354, "learning_rate": 0.001, "loss": 2.2713, "step": 279600 }, { "epoch": 36.160310277957336, "grad_norm": 1.5857536792755127, "learning_rate": 0.001, "loss": 2.2823, "step": 279700 }, { "epoch": 36.1732385261797, "grad_norm": 1.865390419960022, "learning_rate": 0.001, "loss": 2.27, "step": 279800 }, { "epoch": 36.18616677440207, "grad_norm": 1.0897332429885864, "learning_rate": 0.001, "loss": 2.2687, "step": 279900 }, { "epoch": 36.199095022624434, "grad_norm": 0.9383131265640259, "learning_rate": 0.001, "loss": 2.2842, "step": 280000 }, { "epoch": 36.2120232708468, "grad_norm": 16.01864242553711, "learning_rate": 0.001, "loss": 2.2974, "step": 280100 }, { "epoch": 36.224951519069165, "grad_norm": 1.1909775733947754, "learning_rate": 0.001, "loss": 2.273, "step": 280200 }, { "epoch": 36.23787976729153, "grad_norm": 1.0194798707962036, "learning_rate": 0.001, "loss": 2.2834, "step": 280300 }, { "epoch": 36.2508080155139, "grad_norm": 1.2197915315628052, "learning_rate": 0.001, "loss": 2.2948, "step": 280400 }, { "epoch": 36.26373626373626, "grad_norm": 1.1021982431411743, "learning_rate": 0.001, "loss": 2.2969, "step": 280500 }, { "epoch": 36.27666451195863, "grad_norm": 1.1400458812713623, "learning_rate": 0.001, "loss": 2.3033, "step": 280600 }, { "epoch": 36.289592760180994, "grad_norm": 0.9854379892349243, "learning_rate": 0.001, "loss": 2.2972, "step": 280700 }, { "epoch": 36.30252100840336, "grad_norm": 1.3006876707077026, "learning_rate": 0.001, "loss": 2.2856, "step": 280800 }, { "epoch": 36.315449256625726, "grad_norm": 1.0780441761016846, "learning_rate": 0.001, "loss": 2.3052, "step": 280900 }, { "epoch": 36.32837750484809, "grad_norm": 0.9624074697494507, "learning_rate": 0.001, "loss": 2.2917, "step": 281000 }, { "epoch": 36.34130575307046, "grad_norm": 2.764989137649536, "learning_rate": 0.001, "loss": 2.2964, "step": 281100 }, { "epoch": 36.354234001292824, "grad_norm": 1.0919126272201538, "learning_rate": 0.001, "loss": 2.2859, "step": 281200 }, { "epoch": 36.36716224951519, "grad_norm": 8.595905303955078, "learning_rate": 0.001, "loss": 2.3218, "step": 281300 }, { "epoch": 36.380090497737555, "grad_norm": 1.6826426982879639, "learning_rate": 0.001, "loss": 2.2881, "step": 281400 }, { "epoch": 36.39301874595992, "grad_norm": 1.164161205291748, "learning_rate": 0.001, "loss": 2.3036, "step": 281500 }, { "epoch": 36.40594699418229, "grad_norm": 0.8686888217926025, "learning_rate": 0.001, "loss": 2.2969, "step": 281600 }, { "epoch": 36.41887524240465, "grad_norm": 0.9590396881103516, "learning_rate": 0.001, "loss": 2.3032, "step": 281700 }, { "epoch": 36.43180349062702, "grad_norm": 0.8533987998962402, "learning_rate": 0.001, "loss": 2.3161, "step": 281800 }, { "epoch": 36.444731738849384, "grad_norm": 0.8600620031356812, "learning_rate": 0.001, "loss": 2.3146, "step": 281900 }, { "epoch": 36.45765998707175, "grad_norm": 0.881571352481842, "learning_rate": 0.001, "loss": 2.3133, "step": 282000 }, { "epoch": 36.470588235294116, "grad_norm": 1.0894184112548828, "learning_rate": 0.001, "loss": 2.2881, "step": 282100 }, { "epoch": 36.48351648351648, "grad_norm": 1.110060453414917, "learning_rate": 0.001, "loss": 2.3187, "step": 282200 }, { "epoch": 36.49644473173885, "grad_norm": 0.8869652152061462, "learning_rate": 0.001, "loss": 2.3087, "step": 282300 }, { "epoch": 36.50937297996121, "grad_norm": 0.9156047701835632, "learning_rate": 0.001, "loss": 2.323, "step": 282400 }, { "epoch": 36.52230122818358, "grad_norm": 1.0049806833267212, "learning_rate": 0.001, "loss": 2.3211, "step": 282500 }, { "epoch": 36.535229476405945, "grad_norm": 1.2209011316299438, "learning_rate": 0.001, "loss": 2.3092, "step": 282600 }, { "epoch": 36.54815772462831, "grad_norm": 1.0007619857788086, "learning_rate": 0.001, "loss": 2.2938, "step": 282700 }, { "epoch": 36.56108597285068, "grad_norm": 1.228371024131775, "learning_rate": 0.001, "loss": 2.3146, "step": 282800 }, { "epoch": 36.57401422107304, "grad_norm": 1.0764658451080322, "learning_rate": 0.001, "loss": 2.3267, "step": 282900 }, { "epoch": 36.58694246929541, "grad_norm": 1.1888458728790283, "learning_rate": 0.001, "loss": 2.3042, "step": 283000 }, { "epoch": 36.599870717517774, "grad_norm": 2.522371768951416, "learning_rate": 0.001, "loss": 2.3147, "step": 283100 }, { "epoch": 36.61279896574014, "grad_norm": 1.2734612226486206, "learning_rate": 0.001, "loss": 2.3261, "step": 283200 }, { "epoch": 36.625727213962506, "grad_norm": 10.13039493560791, "learning_rate": 0.001, "loss": 2.3168, "step": 283300 }, { "epoch": 36.63865546218487, "grad_norm": 0.9772014021873474, "learning_rate": 0.001, "loss": 2.3209, "step": 283400 }, { "epoch": 36.65158371040724, "grad_norm": 1.1816086769104004, "learning_rate": 0.001, "loss": 2.3253, "step": 283500 }, { "epoch": 36.6645119586296, "grad_norm": 0.9113471508026123, "learning_rate": 0.001, "loss": 2.3372, "step": 283600 }, { "epoch": 36.67744020685197, "grad_norm": 1.1011446714401245, "learning_rate": 0.001, "loss": 2.3425, "step": 283700 }, { "epoch": 36.690368455074335, "grad_norm": 3.5968236923217773, "learning_rate": 0.001, "loss": 2.3271, "step": 283800 }, { "epoch": 36.7032967032967, "grad_norm": 1.5661674737930298, "learning_rate": 0.001, "loss": 2.3404, "step": 283900 }, { "epoch": 36.71622495151907, "grad_norm": 0.9586053490638733, "learning_rate": 0.001, "loss": 2.3395, "step": 284000 }, { "epoch": 36.72915319974143, "grad_norm": 1.217433214187622, "learning_rate": 0.001, "loss": 2.3315, "step": 284100 }, { "epoch": 36.7420814479638, "grad_norm": 1.3353558778762817, "learning_rate": 0.001, "loss": 2.3329, "step": 284200 }, { "epoch": 36.755009696186164, "grad_norm": 59.00389099121094, "learning_rate": 0.001, "loss": 2.346, "step": 284300 }, { "epoch": 36.76793794440853, "grad_norm": 0.957310676574707, "learning_rate": 0.001, "loss": 2.3453, "step": 284400 }, { "epoch": 36.780866192630896, "grad_norm": 0.9586453437805176, "learning_rate": 0.001, "loss": 2.2956, "step": 284500 }, { "epoch": 36.79379444085326, "grad_norm": 1.1360796689987183, "learning_rate": 0.001, "loss": 2.3333, "step": 284600 }, { "epoch": 36.80672268907563, "grad_norm": 1.1595746278762817, "learning_rate": 0.001, "loss": 2.3373, "step": 284700 }, { "epoch": 36.81965093729799, "grad_norm": 1.1174944639205933, "learning_rate": 0.001, "loss": 2.3303, "step": 284800 }, { "epoch": 36.83257918552036, "grad_norm": 1.0552772283554077, "learning_rate": 0.001, "loss": 2.3279, "step": 284900 }, { "epoch": 36.845507433742725, "grad_norm": 0.9464792609214783, "learning_rate": 0.001, "loss": 2.3317, "step": 285000 }, { "epoch": 36.85843568196509, "grad_norm": 1.8702682256698608, "learning_rate": 0.001, "loss": 2.3531, "step": 285100 }, { "epoch": 36.87136393018746, "grad_norm": 1.0141576528549194, "learning_rate": 0.001, "loss": 2.351, "step": 285200 }, { "epoch": 36.88429217840982, "grad_norm": 1.9584813117980957, "learning_rate": 0.001, "loss": 2.3298, "step": 285300 }, { "epoch": 36.89722042663219, "grad_norm": 0.899688720703125, "learning_rate": 0.001, "loss": 2.3492, "step": 285400 }, { "epoch": 36.910148674854554, "grad_norm": 2.7548129558563232, "learning_rate": 0.001, "loss": 2.3428, "step": 285500 }, { "epoch": 36.92307692307692, "grad_norm": 1.2709040641784668, "learning_rate": 0.001, "loss": 2.3346, "step": 285600 }, { "epoch": 36.936005171299286, "grad_norm": 0.9143582582473755, "learning_rate": 0.001, "loss": 2.3396, "step": 285700 }, { "epoch": 36.94893341952165, "grad_norm": 0.8568939566612244, "learning_rate": 0.001, "loss": 2.364, "step": 285800 }, { "epoch": 36.96186166774402, "grad_norm": 0.9414082169532776, "learning_rate": 0.001, "loss": 2.3694, "step": 285900 }, { "epoch": 36.97478991596638, "grad_norm": 2.9157614707946777, "learning_rate": 0.001, "loss": 2.3464, "step": 286000 }, { "epoch": 36.98771816418875, "grad_norm": 1.0645862817764282, "learning_rate": 0.001, "loss": 2.3233, "step": 286100 }, { "epoch": 37.00064641241112, "grad_norm": 1.6795384883880615, "learning_rate": 0.001, "loss": 2.3293, "step": 286200 }, { "epoch": 37.01357466063349, "grad_norm": 1.1287802457809448, "learning_rate": 0.001, "loss": 2.2682, "step": 286300 }, { "epoch": 37.02650290885585, "grad_norm": 13.925851821899414, "learning_rate": 0.001, "loss": 2.2781, "step": 286400 }, { "epoch": 37.03943115707822, "grad_norm": 0.9774664044380188, "learning_rate": 0.001, "loss": 2.2658, "step": 286500 }, { "epoch": 37.052359405300585, "grad_norm": 1.3668423891067505, "learning_rate": 0.001, "loss": 2.2764, "step": 286600 }, { "epoch": 37.06528765352295, "grad_norm": 0.9317799806594849, "learning_rate": 0.001, "loss": 2.2678, "step": 286700 }, { "epoch": 37.07821590174532, "grad_norm": 0.8740684390068054, "learning_rate": 0.001, "loss": 2.2677, "step": 286800 }, { "epoch": 37.09114414996768, "grad_norm": 2.5013861656188965, "learning_rate": 0.001, "loss": 2.2776, "step": 286900 }, { "epoch": 37.10407239819005, "grad_norm": 1.0080987215042114, "learning_rate": 0.001, "loss": 2.2996, "step": 287000 }, { "epoch": 37.117000646412414, "grad_norm": 1.0527337789535522, "learning_rate": 0.001, "loss": 2.2803, "step": 287100 }, { "epoch": 37.12992889463478, "grad_norm": 1.400596022605896, "learning_rate": 0.001, "loss": 2.2748, "step": 287200 }, { "epoch": 37.142857142857146, "grad_norm": 15.708232879638672, "learning_rate": 0.001, "loss": 2.2691, "step": 287300 }, { "epoch": 37.15578539107951, "grad_norm": 1.2015992403030396, "learning_rate": 0.001, "loss": 2.2796, "step": 287400 }, { "epoch": 37.16871363930188, "grad_norm": 0.8390912413597107, "learning_rate": 0.001, "loss": 2.2857, "step": 287500 }, { "epoch": 37.18164188752424, "grad_norm": 0.9656884670257568, "learning_rate": 0.001, "loss": 2.2813, "step": 287600 }, { "epoch": 37.19457013574661, "grad_norm": 1.3324609994888306, "learning_rate": 0.001, "loss": 2.2682, "step": 287700 }, { "epoch": 37.207498383968975, "grad_norm": 1.1140952110290527, "learning_rate": 0.001, "loss": 2.2712, "step": 287800 }, { "epoch": 37.22042663219134, "grad_norm": 1.6109776496887207, "learning_rate": 0.001, "loss": 2.2791, "step": 287900 }, { "epoch": 37.23335488041371, "grad_norm": 0.8621035218238831, "learning_rate": 0.001, "loss": 2.2933, "step": 288000 }, { "epoch": 37.24628312863607, "grad_norm": 0.9649895429611206, "learning_rate": 0.001, "loss": 2.2805, "step": 288100 }, { "epoch": 37.25921137685844, "grad_norm": 1.4170122146606445, "learning_rate": 0.001, "loss": 2.2805, "step": 288200 }, { "epoch": 37.272139625080804, "grad_norm": 0.9896919131278992, "learning_rate": 0.001, "loss": 2.3165, "step": 288300 }, { "epoch": 37.28506787330317, "grad_norm": 0.8381648659706116, "learning_rate": 0.001, "loss": 2.3019, "step": 288400 }, { "epoch": 37.297996121525536, "grad_norm": 1.0348386764526367, "learning_rate": 0.001, "loss": 2.2952, "step": 288500 }, { "epoch": 37.3109243697479, "grad_norm": 1.3234835863113403, "learning_rate": 0.001, "loss": 2.3098, "step": 288600 }, { "epoch": 37.32385261797027, "grad_norm": 5.205316543579102, "learning_rate": 0.001, "loss": 2.307, "step": 288700 }, { "epoch": 37.33678086619263, "grad_norm": 0.8872490525245667, "learning_rate": 0.001, "loss": 2.2902, "step": 288800 }, { "epoch": 37.349709114415, "grad_norm": 1.2363568544387817, "learning_rate": 0.001, "loss": 2.2642, "step": 288900 }, { "epoch": 37.362637362637365, "grad_norm": 1.0086052417755127, "learning_rate": 0.001, "loss": 2.2946, "step": 289000 }, { "epoch": 37.37556561085973, "grad_norm": 14.165241241455078, "learning_rate": 0.001, "loss": 2.3, "step": 289100 }, { "epoch": 37.3884938590821, "grad_norm": 1.0292195081710815, "learning_rate": 0.001, "loss": 2.3051, "step": 289200 }, { "epoch": 37.40142210730446, "grad_norm": 1.1267566680908203, "learning_rate": 0.001, "loss": 2.2953, "step": 289300 }, { "epoch": 37.41435035552683, "grad_norm": 1.0929243564605713, "learning_rate": 0.001, "loss": 2.3128, "step": 289400 }, { "epoch": 37.427278603749194, "grad_norm": 3.7657413482666016, "learning_rate": 0.001, "loss": 2.2994, "step": 289500 }, { "epoch": 37.44020685197156, "grad_norm": 1.076318621635437, "learning_rate": 0.001, "loss": 2.2916, "step": 289600 }, { "epoch": 37.453135100193926, "grad_norm": 0.9936931729316711, "learning_rate": 0.001, "loss": 2.2854, "step": 289700 }, { "epoch": 37.46606334841629, "grad_norm": 0.9356802105903625, "learning_rate": 0.001, "loss": 2.3029, "step": 289800 }, { "epoch": 37.47899159663866, "grad_norm": 0.8066093921661377, "learning_rate": 0.001, "loss": 2.3202, "step": 289900 }, { "epoch": 37.49191984486102, "grad_norm": 1.0817058086395264, "learning_rate": 0.001, "loss": 2.3183, "step": 290000 }, { "epoch": 37.50484809308339, "grad_norm": 0.8340252041816711, "learning_rate": 0.001, "loss": 2.2995, "step": 290100 }, { "epoch": 37.517776341305755, "grad_norm": 1.089989185333252, "learning_rate": 0.001, "loss": 2.2941, "step": 290200 }, { "epoch": 37.53070458952812, "grad_norm": 1.0923917293548584, "learning_rate": 0.001, "loss": 2.3267, "step": 290300 }, { "epoch": 37.543632837750486, "grad_norm": 1.0958425998687744, "learning_rate": 0.001, "loss": 2.319, "step": 290400 }, { "epoch": 37.55656108597285, "grad_norm": 0.9059712290763855, "learning_rate": 0.001, "loss": 2.3122, "step": 290500 }, { "epoch": 37.56948933419522, "grad_norm": 0.9154351353645325, "learning_rate": 0.001, "loss": 2.3091, "step": 290600 }, { "epoch": 37.582417582417584, "grad_norm": 0.9988973736763, "learning_rate": 0.001, "loss": 2.3126, "step": 290700 }, { "epoch": 37.59534583063995, "grad_norm": 0.9448109865188599, "learning_rate": 0.001, "loss": 2.2932, "step": 290800 }, { "epoch": 37.608274078862316, "grad_norm": 1.277525544166565, "learning_rate": 0.001, "loss": 2.2995, "step": 290900 }, { "epoch": 37.62120232708468, "grad_norm": 8.642166137695312, "learning_rate": 0.001, "loss": 2.2953, "step": 291000 }, { "epoch": 37.63413057530705, "grad_norm": 1.356089472770691, "learning_rate": 0.001, "loss": 2.3068, "step": 291100 }, { "epoch": 37.64705882352941, "grad_norm": 0.9623914361000061, "learning_rate": 0.001, "loss": 2.3165, "step": 291200 }, { "epoch": 37.65998707175178, "grad_norm": 0.8872880935668945, "learning_rate": 0.001, "loss": 2.3198, "step": 291300 }, { "epoch": 37.672915319974145, "grad_norm": 0.8716536164283752, "learning_rate": 0.001, "loss": 2.2921, "step": 291400 }, { "epoch": 37.68584356819651, "grad_norm": 1.1671825647354126, "learning_rate": 0.001, "loss": 2.329, "step": 291500 }, { "epoch": 37.698771816418876, "grad_norm": 0.9523661732673645, "learning_rate": 0.001, "loss": 2.3097, "step": 291600 }, { "epoch": 37.71170006464124, "grad_norm": 1.0700926780700684, "learning_rate": 0.001, "loss": 2.3394, "step": 291700 }, { "epoch": 37.72462831286361, "grad_norm": 0.8696663975715637, "learning_rate": 0.001, "loss": 2.3324, "step": 291800 }, { "epoch": 37.737556561085974, "grad_norm": 1.0045833587646484, "learning_rate": 0.001, "loss": 2.3213, "step": 291900 }, { "epoch": 37.75048480930834, "grad_norm": 1.2034624814987183, "learning_rate": 0.001, "loss": 2.3177, "step": 292000 }, { "epoch": 37.763413057530705, "grad_norm": 1.4267425537109375, "learning_rate": 0.001, "loss": 2.3415, "step": 292100 }, { "epoch": 37.77634130575307, "grad_norm": 1.0821388959884644, "learning_rate": 0.001, "loss": 2.3237, "step": 292200 }, { "epoch": 37.78926955397544, "grad_norm": 1.2337356805801392, "learning_rate": 0.001, "loss": 2.3237, "step": 292300 }, { "epoch": 37.8021978021978, "grad_norm": 0.7857568264007568, "learning_rate": 0.001, "loss": 2.3044, "step": 292400 }, { "epoch": 37.81512605042017, "grad_norm": 1.9879436492919922, "learning_rate": 0.001, "loss": 2.3246, "step": 292500 }, { "epoch": 37.828054298642535, "grad_norm": 0.8072868585586548, "learning_rate": 0.001, "loss": 2.3374, "step": 292600 }, { "epoch": 37.8409825468649, "grad_norm": 4.311631679534912, "learning_rate": 0.001, "loss": 2.3584, "step": 292700 }, { "epoch": 37.853910795087266, "grad_norm": 0.9256271123886108, "learning_rate": 0.001, "loss": 2.3422, "step": 292800 }, { "epoch": 37.86683904330963, "grad_norm": 12.917051315307617, "learning_rate": 0.001, "loss": 2.3298, "step": 292900 }, { "epoch": 37.879767291532, "grad_norm": 1.0887889862060547, "learning_rate": 0.001, "loss": 2.3263, "step": 293000 }, { "epoch": 37.892695539754364, "grad_norm": 1.0802913904190063, "learning_rate": 0.001, "loss": 2.3256, "step": 293100 }, { "epoch": 37.90562378797673, "grad_norm": 0.853336751461029, "learning_rate": 0.001, "loss": 2.3496, "step": 293200 }, { "epoch": 37.918552036199095, "grad_norm": 1.008598804473877, "learning_rate": 0.001, "loss": 2.3213, "step": 293300 }, { "epoch": 37.93148028442146, "grad_norm": 0.9513654708862305, "learning_rate": 0.001, "loss": 2.3084, "step": 293400 }, { "epoch": 37.94440853264383, "grad_norm": 6.264597415924072, "learning_rate": 0.001, "loss": 2.338, "step": 293500 }, { "epoch": 37.95733678086619, "grad_norm": 5.21397066116333, "learning_rate": 0.001, "loss": 2.3481, "step": 293600 }, { "epoch": 37.97026502908856, "grad_norm": 0.811362087726593, "learning_rate": 0.001, "loss": 2.3431, "step": 293700 }, { "epoch": 37.983193277310924, "grad_norm": 0.8888514637947083, "learning_rate": 0.001, "loss": 2.3188, "step": 293800 }, { "epoch": 37.99612152553329, "grad_norm": 1.7303383350372314, "learning_rate": 0.001, "loss": 2.3024, "step": 293900 }, { "epoch": 38.009049773755656, "grad_norm": 0.7748279571533203, "learning_rate": 0.001, "loss": 2.2637, "step": 294000 }, { "epoch": 38.02197802197802, "grad_norm": 0.9447478652000427, "learning_rate": 0.001, "loss": 2.252, "step": 294100 }, { "epoch": 38.03490627020039, "grad_norm": 3.7506885528564453, "learning_rate": 0.001, "loss": 2.2522, "step": 294200 }, { "epoch": 38.04783451842275, "grad_norm": 2.7029573917388916, "learning_rate": 0.001, "loss": 2.253, "step": 294300 }, { "epoch": 38.06076276664512, "grad_norm": 0.9952832460403442, "learning_rate": 0.001, "loss": 2.2341, "step": 294400 }, { "epoch": 38.073691014867485, "grad_norm": 531.6683349609375, "learning_rate": 0.001, "loss": 2.2694, "step": 294500 }, { "epoch": 38.08661926308985, "grad_norm": 0.8409490585327148, "learning_rate": 0.001, "loss": 2.2656, "step": 294600 }, { "epoch": 38.09954751131222, "grad_norm": 0.9790078997612, "learning_rate": 0.001, "loss": 2.2572, "step": 294700 }, { "epoch": 38.11247575953458, "grad_norm": 0.8269152045249939, "learning_rate": 0.001, "loss": 2.2358, "step": 294800 }, { "epoch": 38.12540400775695, "grad_norm": 0.7911316752433777, "learning_rate": 0.001, "loss": 2.2568, "step": 294900 }, { "epoch": 38.138332255979314, "grad_norm": 0.8899140357971191, "learning_rate": 0.001, "loss": 2.2393, "step": 295000 }, { "epoch": 38.15126050420168, "grad_norm": 9.630796432495117, "learning_rate": 0.001, "loss": 2.2371, "step": 295100 }, { "epoch": 38.164188752424046, "grad_norm": 0.8190352916717529, "learning_rate": 0.001, "loss": 2.2444, "step": 295200 }, { "epoch": 38.17711700064641, "grad_norm": 0.7378726005554199, "learning_rate": 0.001, "loss": 2.2843, "step": 295300 }, { "epoch": 38.19004524886878, "grad_norm": 1.3111671209335327, "learning_rate": 0.001, "loss": 2.2728, "step": 295400 }, { "epoch": 38.20297349709114, "grad_norm": 0.9441081285476685, "learning_rate": 0.001, "loss": 2.278, "step": 295500 }, { "epoch": 38.21590174531351, "grad_norm": 0.859037458896637, "learning_rate": 0.001, "loss": 2.264, "step": 295600 }, { "epoch": 38.228829993535875, "grad_norm": 0.8471625447273254, "learning_rate": 0.001, "loss": 2.2537, "step": 295700 }, { "epoch": 38.24175824175824, "grad_norm": 1.0247235298156738, "learning_rate": 0.001, "loss": 2.2684, "step": 295800 }, { "epoch": 38.25468648998061, "grad_norm": 1.0526527166366577, "learning_rate": 0.001, "loss": 2.2735, "step": 295900 }, { "epoch": 38.26761473820297, "grad_norm": 0.9049152731895447, "learning_rate": 0.001, "loss": 2.2871, "step": 296000 }, { "epoch": 38.28054298642534, "grad_norm": 1.427931785583496, "learning_rate": 0.001, "loss": 2.2879, "step": 296100 }, { "epoch": 38.293471234647704, "grad_norm": 0.8766980767250061, "learning_rate": 0.001, "loss": 2.2659, "step": 296200 }, { "epoch": 38.30639948287007, "grad_norm": 1.5689034461975098, "learning_rate": 0.001, "loss": 2.2888, "step": 296300 }, { "epoch": 38.319327731092436, "grad_norm": 1.001023292541504, "learning_rate": 0.001, "loss": 2.2963, "step": 296400 }, { "epoch": 38.3322559793148, "grad_norm": 0.9888316988945007, "learning_rate": 0.001, "loss": 2.2749, "step": 296500 }, { "epoch": 38.34518422753717, "grad_norm": 0.7812533974647522, "learning_rate": 0.001, "loss": 2.2834, "step": 296600 }, { "epoch": 38.35811247575953, "grad_norm": 1.1692326068878174, "learning_rate": 0.001, "loss": 2.2959, "step": 296700 }, { "epoch": 38.3710407239819, "grad_norm": 0.9409220814704895, "learning_rate": 0.001, "loss": 2.2819, "step": 296800 }, { "epoch": 38.383968972204265, "grad_norm": 0.9519249796867371, "learning_rate": 0.001, "loss": 2.3083, "step": 296900 }, { "epoch": 38.39689722042663, "grad_norm": 0.7679749727249146, "learning_rate": 0.001, "loss": 2.286, "step": 297000 }, { "epoch": 38.409825468649, "grad_norm": 1.0300284624099731, "learning_rate": 0.001, "loss": 2.3126, "step": 297100 }, { "epoch": 38.42275371687136, "grad_norm": 1.2941006422042847, "learning_rate": 0.001, "loss": 2.2669, "step": 297200 }, { "epoch": 38.43568196509373, "grad_norm": 1.4107745885849, "learning_rate": 0.001, "loss": 2.3121, "step": 297300 }, { "epoch": 38.448610213316094, "grad_norm": 0.8736290335655212, "learning_rate": 0.001, "loss": 2.304, "step": 297400 }, { "epoch": 38.46153846153846, "grad_norm": 1.1801663637161255, "learning_rate": 0.001, "loss": 2.3061, "step": 297500 }, { "epoch": 38.474466709760826, "grad_norm": 1.1658673286437988, "learning_rate": 0.001, "loss": 2.3001, "step": 297600 }, { "epoch": 38.48739495798319, "grad_norm": 0.9151771068572998, "learning_rate": 0.001, "loss": 2.2878, "step": 297700 }, { "epoch": 38.50032320620556, "grad_norm": 5.692348480224609, "learning_rate": 0.001, "loss": 2.3091, "step": 297800 }, { "epoch": 38.51325145442792, "grad_norm": 0.8141626715660095, "learning_rate": 0.001, "loss": 2.2966, "step": 297900 }, { "epoch": 38.52617970265029, "grad_norm": 1.2996338605880737, "learning_rate": 0.001, "loss": 2.2966, "step": 298000 }, { "epoch": 38.539107950872655, "grad_norm": 1.6075537204742432, "learning_rate": 0.001, "loss": 2.2915, "step": 298100 }, { "epoch": 38.55203619909502, "grad_norm": 1.0656648874282837, "learning_rate": 0.001, "loss": 2.3228, "step": 298200 }, { "epoch": 38.56496444731739, "grad_norm": 0.903914213180542, "learning_rate": 0.001, "loss": 2.3041, "step": 298300 }, { "epoch": 38.57789269553975, "grad_norm": 0.9478490352630615, "learning_rate": 0.001, "loss": 2.2802, "step": 298400 }, { "epoch": 38.59082094376212, "grad_norm": 0.8610548973083496, "learning_rate": 0.001, "loss": 2.3028, "step": 298500 }, { "epoch": 38.603749191984484, "grad_norm": 0.7601284384727478, "learning_rate": 0.001, "loss": 2.3041, "step": 298600 }, { "epoch": 38.61667744020685, "grad_norm": 0.9025532007217407, "learning_rate": 0.001, "loss": 2.301, "step": 298700 }, { "epoch": 38.629605688429216, "grad_norm": 1.0534065961837769, "learning_rate": 0.001, "loss": 2.295, "step": 298800 }, { "epoch": 38.64253393665158, "grad_norm": 0.9152564406394958, "learning_rate": 0.001, "loss": 2.2977, "step": 298900 }, { "epoch": 38.65546218487395, "grad_norm": 0.8996568918228149, "learning_rate": 0.001, "loss": 2.3199, "step": 299000 }, { "epoch": 38.66839043309631, "grad_norm": 0.9966284036636353, "learning_rate": 0.001, "loss": 2.2861, "step": 299100 }, { "epoch": 38.68131868131868, "grad_norm": 0.8803106546401978, "learning_rate": 0.001, "loss": 2.2928, "step": 299200 }, { "epoch": 38.694246929541045, "grad_norm": 2.6466586589813232, "learning_rate": 0.001, "loss": 2.3227, "step": 299300 }, { "epoch": 38.70717517776341, "grad_norm": 0.8886678218841553, "learning_rate": 0.001, "loss": 2.3052, "step": 299400 }, { "epoch": 38.720103425985776, "grad_norm": 0.9300984144210815, "learning_rate": 0.001, "loss": 2.2983, "step": 299500 }, { "epoch": 38.73303167420814, "grad_norm": 0.8727144598960876, "learning_rate": 0.001, "loss": 2.3177, "step": 299600 }, { "epoch": 38.74595992243051, "grad_norm": 1.0045512914657593, "learning_rate": 0.001, "loss": 2.3185, "step": 299700 }, { "epoch": 38.758888170652874, "grad_norm": 0.9725853800773621, "learning_rate": 0.001, "loss": 2.3122, "step": 299800 }, { "epoch": 38.77181641887524, "grad_norm": 0.9932114481925964, "learning_rate": 0.001, "loss": 2.3199, "step": 299900 }, { "epoch": 38.784744667097605, "grad_norm": 1.0641424655914307, "learning_rate": 0.001, "loss": 2.3082, "step": 300000 }, { "epoch": 38.79767291531997, "grad_norm": 0.9595224857330322, "learning_rate": 0.001, "loss": 2.3154, "step": 300100 }, { "epoch": 38.81060116354234, "grad_norm": 0.8651654720306396, "learning_rate": 0.001, "loss": 2.3158, "step": 300200 }, { "epoch": 38.8235294117647, "grad_norm": 0.9641025066375732, "learning_rate": 0.001, "loss": 2.3167, "step": 300300 }, { "epoch": 38.83645765998707, "grad_norm": 1.0787748098373413, "learning_rate": 0.001, "loss": 2.3321, "step": 300400 }, { "epoch": 38.849385908209435, "grad_norm": 0.7810189723968506, "learning_rate": 0.001, "loss": 2.3198, "step": 300500 }, { "epoch": 38.8623141564318, "grad_norm": 0.7073070406913757, "learning_rate": 0.001, "loss": 2.3403, "step": 300600 }, { "epoch": 38.875242404654166, "grad_norm": 1.8081023693084717, "learning_rate": 0.001, "loss": 2.3267, "step": 300700 }, { "epoch": 38.88817065287653, "grad_norm": 0.8382379412651062, "learning_rate": 0.001, "loss": 2.3291, "step": 300800 }, { "epoch": 38.9010989010989, "grad_norm": 2.795884609222412, "learning_rate": 0.001, "loss": 2.3375, "step": 300900 }, { "epoch": 38.914027149321264, "grad_norm": 1.0764235258102417, "learning_rate": 0.001, "loss": 2.3398, "step": 301000 }, { "epoch": 38.92695539754363, "grad_norm": 2.970245122909546, "learning_rate": 0.001, "loss": 2.3339, "step": 301100 }, { "epoch": 38.939883645765995, "grad_norm": 0.7319746613502502, "learning_rate": 0.001, "loss": 2.3185, "step": 301200 }, { "epoch": 38.95281189398836, "grad_norm": 0.8849451541900635, "learning_rate": 0.001, "loss": 2.3276, "step": 301300 }, { "epoch": 38.96574014221073, "grad_norm": 0.8370879292488098, "learning_rate": 0.001, "loss": 2.3334, "step": 301400 }, { "epoch": 38.97866839043309, "grad_norm": 0.9181233048439026, "learning_rate": 0.001, "loss": 2.3255, "step": 301500 }, { "epoch": 38.991596638655466, "grad_norm": 1.1437344551086426, "learning_rate": 0.001, "loss": 2.3488, "step": 301600 }, { "epoch": 39.00452488687783, "grad_norm": 0.8366681933403015, "learning_rate": 0.001, "loss": 2.3212, "step": 301700 }, { "epoch": 39.0174531351002, "grad_norm": 0.9203605055809021, "learning_rate": 0.001, "loss": 2.2382, "step": 301800 }, { "epoch": 39.03038138332256, "grad_norm": 0.9239465594291687, "learning_rate": 0.001, "loss": 2.2377, "step": 301900 }, { "epoch": 39.04330963154493, "grad_norm": 0.9318289160728455, "learning_rate": 0.001, "loss": 2.2466, "step": 302000 }, { "epoch": 39.056237879767295, "grad_norm": 1.1527163982391357, "learning_rate": 0.001, "loss": 2.2424, "step": 302100 }, { "epoch": 39.06916612798966, "grad_norm": 0.8082700967788696, "learning_rate": 0.001, "loss": 2.2485, "step": 302200 }, { "epoch": 39.08209437621203, "grad_norm": 1.025955080986023, "learning_rate": 0.001, "loss": 2.2469, "step": 302300 }, { "epoch": 39.09502262443439, "grad_norm": 0.8863586783409119, "learning_rate": 0.001, "loss": 2.248, "step": 302400 }, { "epoch": 39.10795087265676, "grad_norm": 0.8317383527755737, "learning_rate": 0.001, "loss": 2.2553, "step": 302500 }, { "epoch": 39.120879120879124, "grad_norm": 0.9379178285598755, "learning_rate": 0.001, "loss": 2.2548, "step": 302600 }, { "epoch": 39.13380736910149, "grad_norm": 1.8980505466461182, "learning_rate": 0.001, "loss": 2.2427, "step": 302700 }, { "epoch": 39.146735617323856, "grad_norm": 6.978908538818359, "learning_rate": 0.001, "loss": 2.275, "step": 302800 }, { "epoch": 39.15966386554622, "grad_norm": 0.9937589764595032, "learning_rate": 0.001, "loss": 2.2836, "step": 302900 }, { "epoch": 39.17259211376859, "grad_norm": 0.9854945540428162, "learning_rate": 0.001, "loss": 2.2378, "step": 303000 }, { "epoch": 39.18552036199095, "grad_norm": 1.109395146369934, "learning_rate": 0.001, "loss": 2.272, "step": 303100 }, { "epoch": 39.19844861021332, "grad_norm": 0.848433256149292, "learning_rate": 0.001, "loss": 2.2685, "step": 303200 }, { "epoch": 39.211376858435685, "grad_norm": 0.9635061621665955, "learning_rate": 0.001, "loss": 2.2884, "step": 303300 }, { "epoch": 39.22430510665805, "grad_norm": 0.7706529498100281, "learning_rate": 0.001, "loss": 2.2625, "step": 303400 }, { "epoch": 39.237233354880416, "grad_norm": 1.0018867254257202, "learning_rate": 0.001, "loss": 2.2705, "step": 303500 }, { "epoch": 39.25016160310278, "grad_norm": 2.491832733154297, "learning_rate": 0.001, "loss": 2.2482, "step": 303600 }, { "epoch": 39.26308985132515, "grad_norm": 21.609098434448242, "learning_rate": 0.001, "loss": 2.2688, "step": 303700 }, { "epoch": 39.276018099547514, "grad_norm": 0.8365232348442078, "learning_rate": 0.001, "loss": 2.2782, "step": 303800 }, { "epoch": 39.28894634776988, "grad_norm": 0.9786430597305298, "learning_rate": 0.001, "loss": 2.2797, "step": 303900 }, { "epoch": 39.301874595992246, "grad_norm": 1.2914767265319824, "learning_rate": 0.001, "loss": 2.2887, "step": 304000 }, { "epoch": 39.31480284421461, "grad_norm": 1.0518893003463745, "learning_rate": 0.001, "loss": 2.2746, "step": 304100 }, { "epoch": 39.32773109243698, "grad_norm": 0.911395251750946, "learning_rate": 0.001, "loss": 2.2747, "step": 304200 }, { "epoch": 39.34065934065934, "grad_norm": 0.878485918045044, "learning_rate": 0.001, "loss": 2.2917, "step": 304300 }, { "epoch": 39.35358758888171, "grad_norm": 0.8602640628814697, "learning_rate": 0.001, "loss": 2.2948, "step": 304400 }, { "epoch": 39.366515837104075, "grad_norm": 1.006882905960083, "learning_rate": 0.001, "loss": 2.2946, "step": 304500 }, { "epoch": 39.37944408532644, "grad_norm": 1.3898484706878662, "learning_rate": 0.001, "loss": 2.2864, "step": 304600 }, { "epoch": 39.392372333548806, "grad_norm": 0.7864917516708374, "learning_rate": 0.001, "loss": 2.2509, "step": 304700 }, { "epoch": 39.40530058177117, "grad_norm": 0.9158228635787964, "learning_rate": 0.001, "loss": 2.2765, "step": 304800 }, { "epoch": 39.41822882999354, "grad_norm": 1.1317373514175415, "learning_rate": 0.001, "loss": 2.2957, "step": 304900 }, { "epoch": 39.431157078215904, "grad_norm": 1.063559651374817, "learning_rate": 0.001, "loss": 2.2809, "step": 305000 }, { "epoch": 39.44408532643827, "grad_norm": 0.8054841160774231, "learning_rate": 0.001, "loss": 2.2792, "step": 305100 }, { "epoch": 39.457013574660635, "grad_norm": 0.8988564014434814, "learning_rate": 0.001, "loss": 2.2666, "step": 305200 }, { "epoch": 39.469941822883, "grad_norm": 0.8965588808059692, "learning_rate": 0.001, "loss": 2.2923, "step": 305300 }, { "epoch": 39.48287007110537, "grad_norm": 3.581740617752075, "learning_rate": 0.001, "loss": 2.2963, "step": 305400 }, { "epoch": 39.49579831932773, "grad_norm": 0.8783922791481018, "learning_rate": 0.001, "loss": 2.2894, "step": 305500 }, { "epoch": 39.5087265675501, "grad_norm": 0.8010335564613342, "learning_rate": 0.001, "loss": 2.3016, "step": 305600 }, { "epoch": 39.521654815772465, "grad_norm": 0.9010352492332458, "learning_rate": 0.001, "loss": 2.2979, "step": 305700 }, { "epoch": 39.53458306399483, "grad_norm": 0.998214840888977, "learning_rate": 0.001, "loss": 2.2913, "step": 305800 }, { "epoch": 39.547511312217196, "grad_norm": 0.8468450903892517, "learning_rate": 0.001, "loss": 2.2876, "step": 305900 }, { "epoch": 39.56043956043956, "grad_norm": 1.736551284790039, "learning_rate": 0.001, "loss": 2.2876, "step": 306000 }, { "epoch": 39.57336780866193, "grad_norm": 0.8270843625068665, "learning_rate": 0.001, "loss": 2.2796, "step": 306100 }, { "epoch": 39.586296056884294, "grad_norm": 1.1424996852874756, "learning_rate": 0.001, "loss": 2.2863, "step": 306200 }, { "epoch": 39.59922430510666, "grad_norm": 1.015686273574829, "learning_rate": 0.001, "loss": 2.2918, "step": 306300 }, { "epoch": 39.612152553329025, "grad_norm": 2.3739819526672363, "learning_rate": 0.001, "loss": 2.2969, "step": 306400 }, { "epoch": 39.62508080155139, "grad_norm": 0.7923754453659058, "learning_rate": 0.001, "loss": 2.3032, "step": 306500 }, { "epoch": 39.63800904977376, "grad_norm": 1.0259324312210083, "learning_rate": 0.001, "loss": 2.2963, "step": 306600 }, { "epoch": 39.65093729799612, "grad_norm": 1.3444812297821045, "learning_rate": 0.001, "loss": 2.303, "step": 306700 }, { "epoch": 39.66386554621849, "grad_norm": 0.8581533432006836, "learning_rate": 0.001, "loss": 2.3117, "step": 306800 }, { "epoch": 39.676793794440854, "grad_norm": 2.866663694381714, "learning_rate": 0.001, "loss": 2.3112, "step": 306900 }, { "epoch": 39.68972204266322, "grad_norm": 1.0497416257858276, "learning_rate": 0.001, "loss": 2.2926, "step": 307000 }, { "epoch": 39.702650290885586, "grad_norm": 0.9235449433326721, "learning_rate": 0.001, "loss": 2.2911, "step": 307100 }, { "epoch": 39.71557853910795, "grad_norm": 0.8271528482437134, "learning_rate": 0.001, "loss": 2.3001, "step": 307200 }, { "epoch": 39.72850678733032, "grad_norm": 1.0382094383239746, "learning_rate": 0.001, "loss": 2.307, "step": 307300 }, { "epoch": 39.74143503555268, "grad_norm": 0.8950594663619995, "learning_rate": 0.001, "loss": 2.3035, "step": 307400 }, { "epoch": 39.75436328377505, "grad_norm": 0.9373320937156677, "learning_rate": 0.001, "loss": 2.3141, "step": 307500 }, { "epoch": 39.767291531997415, "grad_norm": 1.0266646146774292, "learning_rate": 0.001, "loss": 2.2928, "step": 307600 }, { "epoch": 39.78021978021978, "grad_norm": 0.9912760853767395, "learning_rate": 0.001, "loss": 2.298, "step": 307700 }, { "epoch": 39.79314802844215, "grad_norm": 6.731635093688965, "learning_rate": 0.001, "loss": 2.3073, "step": 307800 }, { "epoch": 39.80607627666451, "grad_norm": 0.8230584859848022, "learning_rate": 0.001, "loss": 2.3151, "step": 307900 }, { "epoch": 39.81900452488688, "grad_norm": 0.8898859024047852, "learning_rate": 0.001, "loss": 2.303, "step": 308000 }, { "epoch": 39.831932773109244, "grad_norm": 1.2503662109375, "learning_rate": 0.001, "loss": 2.3222, "step": 308100 }, { "epoch": 39.84486102133161, "grad_norm": 11.598251342773438, "learning_rate": 0.001, "loss": 2.3265, "step": 308200 }, { "epoch": 39.857789269553976, "grad_norm": 10.78840160369873, "learning_rate": 0.001, "loss": 2.2948, "step": 308300 }, { "epoch": 39.87071751777634, "grad_norm": 1.0664637088775635, "learning_rate": 0.001, "loss": 2.3222, "step": 308400 }, { "epoch": 39.88364576599871, "grad_norm": 1.1629340648651123, "learning_rate": 0.001, "loss": 2.3073, "step": 308500 }, { "epoch": 39.89657401422107, "grad_norm": 0.9676783680915833, "learning_rate": 0.001, "loss": 2.3004, "step": 308600 }, { "epoch": 39.90950226244344, "grad_norm": 0.825302243232727, "learning_rate": 0.001, "loss": 2.3139, "step": 308700 }, { "epoch": 39.922430510665805, "grad_norm": 1.0546283721923828, "learning_rate": 0.001, "loss": 2.3102, "step": 308800 }, { "epoch": 39.93535875888817, "grad_norm": 0.8844892978668213, "learning_rate": 0.001, "loss": 2.3305, "step": 308900 }, { "epoch": 39.94828700711054, "grad_norm": 1.6552164554595947, "learning_rate": 0.001, "loss": 2.2971, "step": 309000 }, { "epoch": 39.9612152553329, "grad_norm": 1.3376561403274536, "learning_rate": 0.001, "loss": 2.3071, "step": 309100 }, { "epoch": 39.97414350355527, "grad_norm": 1.0087612867355347, "learning_rate": 0.001, "loss": 2.3144, "step": 309200 }, { "epoch": 39.987071751777634, "grad_norm": 0.9921035766601562, "learning_rate": 0.001, "loss": 2.3194, "step": 309300 }, { "epoch": 40.0, "grad_norm": 3.8009209632873535, "learning_rate": 0.001, "loss": 2.2749, "step": 309400 }, { "epoch": 40.012928248222366, "grad_norm": 1.7098215818405151, "learning_rate": 0.001, "loss": 2.2091, "step": 309500 }, { "epoch": 40.02585649644473, "grad_norm": 1.4469047784805298, "learning_rate": 0.001, "loss": 2.21, "step": 309600 }, { "epoch": 40.0387847446671, "grad_norm": 1.417436122894287, "learning_rate": 0.001, "loss": 2.2287, "step": 309700 }, { "epoch": 40.05171299288946, "grad_norm": 1.4612804651260376, "learning_rate": 0.001, "loss": 2.2318, "step": 309800 }, { "epoch": 40.06464124111183, "grad_norm": 1.708810806274414, "learning_rate": 0.001, "loss": 2.2444, "step": 309900 }, { "epoch": 40.077569489334195, "grad_norm": 2.208813190460205, "learning_rate": 0.001, "loss": 2.2301, "step": 310000 }, { "epoch": 40.09049773755656, "grad_norm": 1.7382380962371826, "learning_rate": 0.001, "loss": 2.2393, "step": 310100 }, { "epoch": 40.10342598577893, "grad_norm": 1.392792820930481, "learning_rate": 0.001, "loss": 2.265, "step": 310200 }, { "epoch": 40.11635423400129, "grad_norm": 1.7844822406768799, "learning_rate": 0.001, "loss": 2.252, "step": 310300 }, { "epoch": 40.12928248222366, "grad_norm": 1.6369693279266357, "learning_rate": 0.001, "loss": 2.2665, "step": 310400 }, { "epoch": 40.142210730446024, "grad_norm": 1.5159152746200562, "learning_rate": 0.001, "loss": 2.2572, "step": 310500 }, { "epoch": 40.15513897866839, "grad_norm": 1.3893249034881592, "learning_rate": 0.001, "loss": 2.2458, "step": 310600 }, { "epoch": 40.168067226890756, "grad_norm": 1.8061078786849976, "learning_rate": 0.001, "loss": 2.2448, "step": 310700 }, { "epoch": 40.18099547511312, "grad_norm": 1.6910438537597656, "learning_rate": 0.001, "loss": 2.2548, "step": 310800 }, { "epoch": 40.19392372333549, "grad_norm": 2.001554489135742, "learning_rate": 0.001, "loss": 2.2483, "step": 310900 }, { "epoch": 40.20685197155785, "grad_norm": 1.8401974439620972, "learning_rate": 0.001, "loss": 2.2611, "step": 311000 }, { "epoch": 40.21978021978022, "grad_norm": 1.9077733755111694, "learning_rate": 0.001, "loss": 2.253, "step": 311100 }, { "epoch": 40.232708468002585, "grad_norm": 1.4913978576660156, "learning_rate": 0.001, "loss": 2.2649, "step": 311200 }, { "epoch": 40.24563671622495, "grad_norm": 1.467464804649353, "learning_rate": 0.001, "loss": 2.2482, "step": 311300 }, { "epoch": 40.25856496444732, "grad_norm": 1.639357328414917, "learning_rate": 0.001, "loss": 2.2496, "step": 311400 }, { "epoch": 40.27149321266968, "grad_norm": 1.689586877822876, "learning_rate": 0.001, "loss": 2.2687, "step": 311500 }, { "epoch": 40.28442146089205, "grad_norm": 1.8610886335372925, "learning_rate": 0.001, "loss": 2.2589, "step": 311600 }, { "epoch": 40.297349709114414, "grad_norm": 2.1913468837738037, "learning_rate": 0.001, "loss": 2.2276, "step": 311700 }, { "epoch": 40.31027795733678, "grad_norm": 1.5539705753326416, "learning_rate": 0.001, "loss": 2.261, "step": 311800 }, { "epoch": 40.323206205559146, "grad_norm": 1.8537489175796509, "learning_rate": 0.001, "loss": 2.2699, "step": 311900 }, { "epoch": 40.33613445378151, "grad_norm": 1.4990074634552002, "learning_rate": 0.001, "loss": 2.2799, "step": 312000 }, { "epoch": 40.34906270200388, "grad_norm": 1.4842256307601929, "learning_rate": 0.001, "loss": 2.2582, "step": 312100 }, { "epoch": 40.36199095022624, "grad_norm": 1.6053208112716675, "learning_rate": 0.001, "loss": 2.2826, "step": 312200 }, { "epoch": 40.37491919844861, "grad_norm": 1.799709677696228, "learning_rate": 0.001, "loss": 2.2651, "step": 312300 }, { "epoch": 40.387847446670975, "grad_norm": 6.940207004547119, "learning_rate": 0.001, "loss": 2.2735, "step": 312400 }, { "epoch": 40.40077569489334, "grad_norm": 1.8571419715881348, "learning_rate": 0.001, "loss": 2.2668, "step": 312500 }, { "epoch": 40.413703943115706, "grad_norm": 1.4691754579544067, "learning_rate": 0.001, "loss": 2.2741, "step": 312600 }, { "epoch": 40.42663219133807, "grad_norm": 1.4990462064743042, "learning_rate": 0.001, "loss": 2.2691, "step": 312700 }, { "epoch": 40.43956043956044, "grad_norm": 2.4671077728271484, "learning_rate": 0.001, "loss": 2.2637, "step": 312800 }, { "epoch": 40.452488687782804, "grad_norm": 1.8855656385421753, "learning_rate": 0.001, "loss": 2.2593, "step": 312900 }, { "epoch": 40.46541693600517, "grad_norm": 2.1678404808044434, "learning_rate": 0.001, "loss": 2.2803, "step": 313000 }, { "epoch": 40.478345184227535, "grad_norm": 1.487504243850708, "learning_rate": 0.001, "loss": 2.2579, "step": 313100 }, { "epoch": 40.4912734324499, "grad_norm": 1.7120510339736938, "learning_rate": 0.001, "loss": 2.3191, "step": 313200 }, { "epoch": 40.50420168067227, "grad_norm": 1.5506964921951294, "learning_rate": 0.001, "loss": 2.2794, "step": 313300 }, { "epoch": 40.51712992889463, "grad_norm": 1.3604402542114258, "learning_rate": 0.001, "loss": 2.2943, "step": 313400 }, { "epoch": 40.530058177117, "grad_norm": 2.040322780609131, "learning_rate": 0.001, "loss": 2.292, "step": 313500 }, { "epoch": 40.542986425339365, "grad_norm": 2.5596141815185547, "learning_rate": 0.001, "loss": 2.2913, "step": 313600 }, { "epoch": 40.55591467356173, "grad_norm": 2.1743886470794678, "learning_rate": 0.001, "loss": 2.2865, "step": 313700 }, { "epoch": 40.568842921784096, "grad_norm": 258.0453796386719, "learning_rate": 0.001, "loss": 2.3015, "step": 313800 }, { "epoch": 40.58177117000646, "grad_norm": 1.2622108459472656, "learning_rate": 0.001, "loss": 2.2943, "step": 313900 }, { "epoch": 40.59469941822883, "grad_norm": 1.5178743600845337, "learning_rate": 0.001, "loss": 2.2826, "step": 314000 }, { "epoch": 40.607627666451194, "grad_norm": 1.6596399545669556, "learning_rate": 0.001, "loss": 2.2843, "step": 314100 }, { "epoch": 40.62055591467356, "grad_norm": 1.4160417318344116, "learning_rate": 0.001, "loss": 2.276, "step": 314200 }, { "epoch": 40.633484162895925, "grad_norm": 1.9704184532165527, "learning_rate": 0.001, "loss": 2.3061, "step": 314300 }, { "epoch": 40.64641241111829, "grad_norm": 1.421712040901184, "learning_rate": 0.001, "loss": 2.2999, "step": 314400 }, { "epoch": 40.65934065934066, "grad_norm": 1.8642412424087524, "learning_rate": 0.001, "loss": 2.2927, "step": 314500 }, { "epoch": 40.67226890756302, "grad_norm": 2.1886239051818848, "learning_rate": 0.001, "loss": 2.2823, "step": 314600 }, { "epoch": 40.68519715578539, "grad_norm": 1.79164719581604, "learning_rate": 0.001, "loss": 2.3097, "step": 314700 }, { "epoch": 40.698125404007754, "grad_norm": 1.5339109897613525, "learning_rate": 0.001, "loss": 2.3075, "step": 314800 }, { "epoch": 40.71105365223012, "grad_norm": 1.8617578744888306, "learning_rate": 0.001, "loss": 2.2943, "step": 314900 }, { "epoch": 40.723981900452486, "grad_norm": 2.057493209838867, "learning_rate": 0.001, "loss": 2.3133, "step": 315000 }, { "epoch": 40.73691014867485, "grad_norm": 12.2993803024292, "learning_rate": 0.001, "loss": 2.3187, "step": 315100 }, { "epoch": 40.74983839689722, "grad_norm": 29.225975036621094, "learning_rate": 0.001, "loss": 2.2981, "step": 315200 }, { "epoch": 40.762766645119584, "grad_norm": 2.5804193019866943, "learning_rate": 0.001, "loss": 2.3085, "step": 315300 }, { "epoch": 40.77569489334195, "grad_norm": 1.6957643032073975, "learning_rate": 0.001, "loss": 2.2818, "step": 315400 }, { "epoch": 40.788623141564315, "grad_norm": 2.040342092514038, "learning_rate": 0.001, "loss": 2.3146, "step": 315500 }, { "epoch": 40.80155138978668, "grad_norm": 1.4929412603378296, "learning_rate": 0.001, "loss": 2.2999, "step": 315600 }, { "epoch": 40.81447963800905, "grad_norm": 2.333847999572754, "learning_rate": 0.001, "loss": 2.3085, "step": 315700 }, { "epoch": 40.82740788623141, "grad_norm": 1.6126044988632202, "learning_rate": 0.001, "loss": 2.3296, "step": 315800 }, { "epoch": 40.84033613445378, "grad_norm": 5.163610935211182, "learning_rate": 0.001, "loss": 2.2984, "step": 315900 }, { "epoch": 40.853264382676144, "grad_norm": 1.6581789255142212, "learning_rate": 0.001, "loss": 2.333, "step": 316000 }, { "epoch": 40.86619263089851, "grad_norm": 1.926135540008545, "learning_rate": 0.001, "loss": 2.3398, "step": 316100 }, { "epoch": 40.879120879120876, "grad_norm": 1.7515499591827393, "learning_rate": 0.001, "loss": 2.3121, "step": 316200 }, { "epoch": 40.89204912734324, "grad_norm": 1.4290529489517212, "learning_rate": 0.001, "loss": 2.3244, "step": 316300 }, { "epoch": 40.90497737556561, "grad_norm": 1.5435349941253662, "learning_rate": 0.001, "loss": 2.3102, "step": 316400 }, { "epoch": 40.91790562378797, "grad_norm": 1.3535139560699463, "learning_rate": 0.001, "loss": 2.3192, "step": 316500 }, { "epoch": 40.93083387201034, "grad_norm": 1.1795158386230469, "learning_rate": 0.001, "loss": 2.3297, "step": 316600 }, { "epoch": 40.943762120232705, "grad_norm": 1.6672722101211548, "learning_rate": 0.001, "loss": 2.3192, "step": 316700 }, { "epoch": 40.95669036845507, "grad_norm": 1.6341421604156494, "learning_rate": 0.001, "loss": 2.3318, "step": 316800 }, { "epoch": 40.96961861667744, "grad_norm": 2.1886110305786133, "learning_rate": 0.001, "loss": 2.3144, "step": 316900 }, { "epoch": 40.9825468648998, "grad_norm": 17.400619506835938, "learning_rate": 0.001, "loss": 2.3466, "step": 317000 }, { "epoch": 40.99547511312217, "grad_norm": 1.7003215551376343, "learning_rate": 0.001, "loss": 2.3206, "step": 317100 }, { "epoch": 41.00840336134454, "grad_norm": 0.9823348522186279, "learning_rate": 0.001, "loss": 2.2541, "step": 317200 }, { "epoch": 41.02133160956691, "grad_norm": 0.8415964841842651, "learning_rate": 0.001, "loss": 2.2187, "step": 317300 }, { "epoch": 41.03425985778927, "grad_norm": 0.9827258586883545, "learning_rate": 0.001, "loss": 2.2134, "step": 317400 }, { "epoch": 41.04718810601164, "grad_norm": 0.9685075879096985, "learning_rate": 0.001, "loss": 2.207, "step": 317500 }, { "epoch": 41.060116354234005, "grad_norm": 0.9613476395606995, "learning_rate": 0.001, "loss": 2.2243, "step": 317600 }, { "epoch": 41.07304460245637, "grad_norm": 1.0596168041229248, "learning_rate": 0.001, "loss": 2.2488, "step": 317700 }, { "epoch": 41.085972850678736, "grad_norm": 0.9083664417266846, "learning_rate": 0.001, "loss": 2.2381, "step": 317800 }, { "epoch": 41.0989010989011, "grad_norm": 1.0631532669067383, "learning_rate": 0.001, "loss": 2.2279, "step": 317900 }, { "epoch": 41.11182934712347, "grad_norm": 0.9975860714912415, "learning_rate": 0.001, "loss": 2.2255, "step": 318000 }, { "epoch": 41.124757595345834, "grad_norm": 0.9181241989135742, "learning_rate": 0.001, "loss": 2.2261, "step": 318100 }, { "epoch": 41.1376858435682, "grad_norm": 0.9391404390335083, "learning_rate": 0.001, "loss": 2.2244, "step": 318200 }, { "epoch": 41.150614091790565, "grad_norm": 10.291114807128906, "learning_rate": 0.001, "loss": 2.2382, "step": 318300 }, { "epoch": 41.16354234001293, "grad_norm": 1.2588189840316772, "learning_rate": 0.001, "loss": 2.2291, "step": 318400 }, { "epoch": 41.1764705882353, "grad_norm": 0.956172525882721, "learning_rate": 0.001, "loss": 2.2406, "step": 318500 }, { "epoch": 41.18939883645766, "grad_norm": 1.01069176197052, "learning_rate": 0.001, "loss": 2.2444, "step": 318600 }, { "epoch": 41.20232708468003, "grad_norm": 0.9258178472518921, "learning_rate": 0.001, "loss": 2.2612, "step": 318700 }, { "epoch": 41.215255332902395, "grad_norm": 0.8808141946792603, "learning_rate": 0.001, "loss": 2.2612, "step": 318800 }, { "epoch": 41.22818358112476, "grad_norm": 0.9613651037216187, "learning_rate": 0.001, "loss": 2.2479, "step": 318900 }, { "epoch": 41.241111829347126, "grad_norm": 0.9572213888168335, "learning_rate": 0.001, "loss": 2.233, "step": 319000 }, { "epoch": 41.25404007756949, "grad_norm": 1.0153367519378662, "learning_rate": 0.001, "loss": 2.2549, "step": 319100 }, { "epoch": 41.26696832579186, "grad_norm": 1.2371715307235718, "learning_rate": 0.001, "loss": 2.2495, "step": 319200 }, { "epoch": 41.279896574014224, "grad_norm": 1.2471072673797607, "learning_rate": 0.001, "loss": 2.2322, "step": 319300 }, { "epoch": 41.29282482223659, "grad_norm": 1.1517325639724731, "learning_rate": 0.001, "loss": 2.2619, "step": 319400 }, { "epoch": 41.305753070458955, "grad_norm": 1.008955478668213, "learning_rate": 0.001, "loss": 2.2797, "step": 319500 }, { "epoch": 41.31868131868132, "grad_norm": 0.9058171510696411, "learning_rate": 0.001, "loss": 2.2464, "step": 319600 }, { "epoch": 41.33160956690369, "grad_norm": 0.8182075023651123, "learning_rate": 0.001, "loss": 2.2709, "step": 319700 }, { "epoch": 41.34453781512605, "grad_norm": 10.291059494018555, "learning_rate": 0.001, "loss": 2.2604, "step": 319800 }, { "epoch": 41.35746606334842, "grad_norm": 1.0311391353607178, "learning_rate": 0.001, "loss": 2.2439, "step": 319900 }, { "epoch": 41.370394311570784, "grad_norm": 0.866374135017395, "learning_rate": 0.001, "loss": 2.2584, "step": 320000 }, { "epoch": 41.38332255979315, "grad_norm": 0.8805108666419983, "learning_rate": 0.001, "loss": 2.2495, "step": 320100 }, { "epoch": 41.396250808015516, "grad_norm": 0.8274638056755066, "learning_rate": 0.001, "loss": 2.2786, "step": 320200 }, { "epoch": 41.40917905623788, "grad_norm": 1.1118338108062744, "learning_rate": 0.001, "loss": 2.2604, "step": 320300 }, { "epoch": 41.42210730446025, "grad_norm": 1.3068139553070068, "learning_rate": 0.001, "loss": 2.2628, "step": 320400 }, { "epoch": 41.43503555268261, "grad_norm": 0.9266138672828674, "learning_rate": 0.001, "loss": 2.272, "step": 320500 }, { "epoch": 41.44796380090498, "grad_norm": 0.9940004944801331, "learning_rate": 0.001, "loss": 2.2721, "step": 320600 }, { "epoch": 41.460892049127345, "grad_norm": 0.8620325326919556, "learning_rate": 0.001, "loss": 2.2638, "step": 320700 }, { "epoch": 41.47382029734971, "grad_norm": 0.7366208434104919, "learning_rate": 0.001, "loss": 2.2845, "step": 320800 }, { "epoch": 41.48674854557208, "grad_norm": 1.0796139240264893, "learning_rate": 0.001, "loss": 2.2647, "step": 320900 }, { "epoch": 41.49967679379444, "grad_norm": 0.8561561107635498, "learning_rate": 0.001, "loss": 2.2676, "step": 321000 }, { "epoch": 41.51260504201681, "grad_norm": 0.9440327286720276, "learning_rate": 0.001, "loss": 2.2629, "step": 321100 }, { "epoch": 41.525533290239174, "grad_norm": 0.9376988410949707, "learning_rate": 0.001, "loss": 2.2696, "step": 321200 }, { "epoch": 41.53846153846154, "grad_norm": 1.1279915571212769, "learning_rate": 0.001, "loss": 2.2685, "step": 321300 }, { "epoch": 41.551389786683906, "grad_norm": 0.9151734113693237, "learning_rate": 0.001, "loss": 2.2648, "step": 321400 }, { "epoch": 41.56431803490627, "grad_norm": 0.705574095249176, "learning_rate": 0.001, "loss": 2.2951, "step": 321500 }, { "epoch": 41.57724628312864, "grad_norm": 0.9085053205490112, "learning_rate": 0.001, "loss": 2.2516, "step": 321600 }, { "epoch": 41.590174531351, "grad_norm": 0.976110577583313, "learning_rate": 0.001, "loss": 2.2839, "step": 321700 }, { "epoch": 41.60310277957337, "grad_norm": 0.8516009449958801, "learning_rate": 0.001, "loss": 2.2711, "step": 321800 }, { "epoch": 41.616031027795735, "grad_norm": 0.8994240164756775, "learning_rate": 0.001, "loss": 2.2848, "step": 321900 }, { "epoch": 41.6289592760181, "grad_norm": 0.8403706550598145, "learning_rate": 0.001, "loss": 2.2868, "step": 322000 }, { "epoch": 41.64188752424047, "grad_norm": 0.9412771463394165, "learning_rate": 0.001, "loss": 2.2753, "step": 322100 }, { "epoch": 41.65481577246283, "grad_norm": 0.7634378671646118, "learning_rate": 0.001, "loss": 2.2807, "step": 322200 }, { "epoch": 41.6677440206852, "grad_norm": 0.8941670060157776, "learning_rate": 0.001, "loss": 2.284, "step": 322300 }, { "epoch": 41.680672268907564, "grad_norm": 1.072508692741394, "learning_rate": 0.001, "loss": 2.2706, "step": 322400 }, { "epoch": 41.69360051712993, "grad_norm": 1.236066222190857, "learning_rate": 0.001, "loss": 2.2713, "step": 322500 }, { "epoch": 41.706528765352296, "grad_norm": 0.8766956329345703, "learning_rate": 0.001, "loss": 2.2704, "step": 322600 }, { "epoch": 41.71945701357466, "grad_norm": 0.7377656698226929, "learning_rate": 0.001, "loss": 2.2723, "step": 322700 }, { "epoch": 41.73238526179703, "grad_norm": 1.0406455993652344, "learning_rate": 0.001, "loss": 2.2815, "step": 322800 }, { "epoch": 41.74531351001939, "grad_norm": 0.9399386644363403, "learning_rate": 0.001, "loss": 2.2815, "step": 322900 }, { "epoch": 41.75824175824176, "grad_norm": 0.8080585598945618, "learning_rate": 0.001, "loss": 2.2741, "step": 323000 }, { "epoch": 41.771170006464125, "grad_norm": 0.9707673192024231, "learning_rate": 0.001, "loss": 2.2966, "step": 323100 }, { "epoch": 41.78409825468649, "grad_norm": 1.0282843112945557, "learning_rate": 0.001, "loss": 2.2732, "step": 323200 }, { "epoch": 41.79702650290886, "grad_norm": 0.910554826259613, "learning_rate": 0.001, "loss": 2.2774, "step": 323300 }, { "epoch": 41.80995475113122, "grad_norm": 0.7816225290298462, "learning_rate": 0.001, "loss": 2.3032, "step": 323400 }, { "epoch": 41.82288299935359, "grad_norm": 0.9902456402778625, "learning_rate": 0.001, "loss": 2.2917, "step": 323500 }, { "epoch": 41.835811247575954, "grad_norm": 1.188258171081543, "learning_rate": 0.001, "loss": 2.2943, "step": 323600 }, { "epoch": 41.84873949579832, "grad_norm": 0.7342848181724548, "learning_rate": 0.001, "loss": 2.306, "step": 323700 }, { "epoch": 41.861667744020686, "grad_norm": 0.8802220821380615, "learning_rate": 0.001, "loss": 2.3028, "step": 323800 }, { "epoch": 41.87459599224305, "grad_norm": 1.0482569932937622, "learning_rate": 0.001, "loss": 2.3018, "step": 323900 }, { "epoch": 41.88752424046542, "grad_norm": 1.1913714408874512, "learning_rate": 0.001, "loss": 2.2907, "step": 324000 }, { "epoch": 41.90045248868778, "grad_norm": 0.8537502884864807, "learning_rate": 0.001, "loss": 2.3008, "step": 324100 }, { "epoch": 41.91338073691015, "grad_norm": 0.8735985159873962, "learning_rate": 0.001, "loss": 2.2902, "step": 324200 }, { "epoch": 41.926308985132515, "grad_norm": 0.7309550642967224, "learning_rate": 0.001, "loss": 2.2969, "step": 324300 }, { "epoch": 41.93923723335488, "grad_norm": 1.6275758743286133, "learning_rate": 0.001, "loss": 2.2924, "step": 324400 }, { "epoch": 41.95216548157725, "grad_norm": 0.9839608073234558, "learning_rate": 0.001, "loss": 2.3078, "step": 324500 }, { "epoch": 41.96509372979961, "grad_norm": 6.437243938446045, "learning_rate": 0.001, "loss": 2.3057, "step": 324600 }, { "epoch": 41.97802197802198, "grad_norm": 1.109365463256836, "learning_rate": 0.001, "loss": 2.2912, "step": 324700 }, { "epoch": 41.990950226244344, "grad_norm": 0.7644253373146057, "learning_rate": 0.001, "loss": 2.2944, "step": 324800 }, { "epoch": 42.00387847446671, "grad_norm": 4.004805088043213, "learning_rate": 0.001, "loss": 2.2888, "step": 324900 }, { "epoch": 42.016806722689076, "grad_norm": 0.894649863243103, "learning_rate": 0.001, "loss": 2.2014, "step": 325000 }, { "epoch": 42.02973497091144, "grad_norm": 0.9703426957130432, "learning_rate": 0.001, "loss": 2.204, "step": 325100 }, { "epoch": 42.04266321913381, "grad_norm": 0.9745487570762634, "learning_rate": 0.001, "loss": 2.2242, "step": 325200 }, { "epoch": 42.05559146735617, "grad_norm": 1.0690556764602661, "learning_rate": 0.001, "loss": 2.2044, "step": 325300 }, { "epoch": 42.06851971557854, "grad_norm": 1.082793951034546, "learning_rate": 0.001, "loss": 2.2248, "step": 325400 }, { "epoch": 42.081447963800905, "grad_norm": 3.076871633529663, "learning_rate": 0.001, "loss": 2.2377, "step": 325500 }, { "epoch": 42.09437621202327, "grad_norm": 0.9949036240577698, "learning_rate": 0.001, "loss": 2.224, "step": 325600 }, { "epoch": 42.107304460245636, "grad_norm": 0.8127809762954712, "learning_rate": 0.001, "loss": 2.2263, "step": 325700 }, { "epoch": 42.120232708468, "grad_norm": 1.180733323097229, "learning_rate": 0.001, "loss": 2.2287, "step": 325800 }, { "epoch": 42.13316095669037, "grad_norm": 3.149332046508789, "learning_rate": 0.001, "loss": 2.2121, "step": 325900 }, { "epoch": 42.146089204912734, "grad_norm": 0.7869861721992493, "learning_rate": 0.001, "loss": 2.2244, "step": 326000 }, { "epoch": 42.1590174531351, "grad_norm": 1.1478502750396729, "learning_rate": 0.001, "loss": 2.2175, "step": 326100 }, { "epoch": 42.171945701357465, "grad_norm": 0.8683115839958191, "learning_rate": 0.001, "loss": 2.2337, "step": 326200 }, { "epoch": 42.18487394957983, "grad_norm": 0.8064966797828674, "learning_rate": 0.001, "loss": 2.2242, "step": 326300 }, { "epoch": 42.1978021978022, "grad_norm": 0.7902854084968567, "learning_rate": 0.001, "loss": 2.2348, "step": 326400 }, { "epoch": 42.21073044602456, "grad_norm": 1.0403004884719849, "learning_rate": 0.001, "loss": 2.2307, "step": 326500 }, { "epoch": 42.22365869424693, "grad_norm": 1.0809893608093262, "learning_rate": 0.001, "loss": 2.2361, "step": 326600 }, { "epoch": 42.236586942469295, "grad_norm": 1.0995150804519653, "learning_rate": 0.001, "loss": 2.2361, "step": 326700 }, { "epoch": 42.24951519069166, "grad_norm": 0.9019262790679932, "learning_rate": 0.001, "loss": 2.2351, "step": 326800 }, { "epoch": 42.262443438914026, "grad_norm": 1.133380651473999, "learning_rate": 0.001, "loss": 2.2439, "step": 326900 }, { "epoch": 42.27537168713639, "grad_norm": 1.1739871501922607, "learning_rate": 0.001, "loss": 2.2383, "step": 327000 }, { "epoch": 42.28829993535876, "grad_norm": 9.575297355651855, "learning_rate": 0.001, "loss": 2.2378, "step": 327100 }, { "epoch": 42.301228183581124, "grad_norm": 0.9775987863540649, "learning_rate": 0.001, "loss": 2.2339, "step": 327200 }, { "epoch": 42.31415643180349, "grad_norm": 1.0619035959243774, "learning_rate": 0.001, "loss": 2.2731, "step": 327300 }, { "epoch": 42.327084680025855, "grad_norm": 0.8253508806228638, "learning_rate": 0.001, "loss": 2.2397, "step": 327400 }, { "epoch": 42.34001292824822, "grad_norm": 0.9199579358100891, "learning_rate": 0.001, "loss": 2.2346, "step": 327500 }, { "epoch": 42.35294117647059, "grad_norm": 1.1565144062042236, "learning_rate": 0.001, "loss": 2.2719, "step": 327600 }, { "epoch": 42.36586942469295, "grad_norm": 0.8124529123306274, "learning_rate": 0.001, "loss": 2.256, "step": 327700 }, { "epoch": 42.37879767291532, "grad_norm": 1.187366247177124, "learning_rate": 0.001, "loss": 2.2197, "step": 327800 }, { "epoch": 42.391725921137684, "grad_norm": 0.9464920163154602, "learning_rate": 0.001, "loss": 2.2666, "step": 327900 }, { "epoch": 42.40465416936005, "grad_norm": 1.7827260494232178, "learning_rate": 0.001, "loss": 2.2514, "step": 328000 }, { "epoch": 42.417582417582416, "grad_norm": 0.8865476846694946, "learning_rate": 0.001, "loss": 2.2632, "step": 328100 }, { "epoch": 42.43051066580478, "grad_norm": 1.4302055835723877, "learning_rate": 0.001, "loss": 2.2718, "step": 328200 }, { "epoch": 42.44343891402715, "grad_norm": 0.8101428151130676, "learning_rate": 0.001, "loss": 2.2564, "step": 328300 }, { "epoch": 42.456367162249514, "grad_norm": 0.9892765879631042, "learning_rate": 0.001, "loss": 2.2605, "step": 328400 }, { "epoch": 42.46929541047188, "grad_norm": 4.254183292388916, "learning_rate": 0.001, "loss": 2.2659, "step": 328500 }, { "epoch": 42.482223658694245, "grad_norm": 0.869411051273346, "learning_rate": 0.001, "loss": 2.2762, "step": 328600 }, { "epoch": 42.49515190691661, "grad_norm": 5.033255577087402, "learning_rate": 0.001, "loss": 2.2642, "step": 328700 }, { "epoch": 42.50808015513898, "grad_norm": 2.1530463695526123, "learning_rate": 0.001, "loss": 2.2767, "step": 328800 }, { "epoch": 42.52100840336134, "grad_norm": 0.9150707721710205, "learning_rate": 0.001, "loss": 2.2627, "step": 328900 }, { "epoch": 42.53393665158371, "grad_norm": 1.9956530332565308, "learning_rate": 0.001, "loss": 2.2714, "step": 329000 }, { "epoch": 42.546864899806074, "grad_norm": 0.9136866331100464, "learning_rate": 0.001, "loss": 2.2774, "step": 329100 }, { "epoch": 42.55979314802844, "grad_norm": 1.191107153892517, "learning_rate": 0.001, "loss": 2.269, "step": 329200 }, { "epoch": 42.572721396250806, "grad_norm": 0.868606448173523, "learning_rate": 0.001, "loss": 2.2806, "step": 329300 }, { "epoch": 42.58564964447317, "grad_norm": 1.5182815790176392, "learning_rate": 0.001, "loss": 2.2938, "step": 329400 }, { "epoch": 42.59857789269554, "grad_norm": 1.1945663690567017, "learning_rate": 0.001, "loss": 2.2911, "step": 329500 }, { "epoch": 42.6115061409179, "grad_norm": 0.9797205924987793, "learning_rate": 0.001, "loss": 2.2611, "step": 329600 }, { "epoch": 42.62443438914027, "grad_norm": 0.8887245059013367, "learning_rate": 0.001, "loss": 2.2865, "step": 329700 }, { "epoch": 42.637362637362635, "grad_norm": 1.2239397764205933, "learning_rate": 0.001, "loss": 2.2945, "step": 329800 }, { "epoch": 42.650290885585, "grad_norm": 1.1750860214233398, "learning_rate": 0.001, "loss": 2.267, "step": 329900 }, { "epoch": 42.66321913380737, "grad_norm": 1.0936685800552368, "learning_rate": 0.001, "loss": 2.2601, "step": 330000 }, { "epoch": 42.67614738202973, "grad_norm": 3.1559243202209473, "learning_rate": 0.001, "loss": 2.2825, "step": 330100 }, { "epoch": 42.6890756302521, "grad_norm": 0.9100037217140198, "learning_rate": 0.001, "loss": 2.2793, "step": 330200 }, { "epoch": 42.702003878474464, "grad_norm": 1.476170539855957, "learning_rate": 0.001, "loss": 2.2815, "step": 330300 }, { "epoch": 42.71493212669683, "grad_norm": 2.0890092849731445, "learning_rate": 0.001, "loss": 2.2648, "step": 330400 }, { "epoch": 42.727860374919196, "grad_norm": 1.0997235774993896, "learning_rate": 0.001, "loss": 2.2939, "step": 330500 }, { "epoch": 42.74078862314156, "grad_norm": 0.7869559526443481, "learning_rate": 0.001, "loss": 2.285, "step": 330600 }, { "epoch": 42.75371687136393, "grad_norm": 0.9284212589263916, "learning_rate": 0.001, "loss": 2.2902, "step": 330700 }, { "epoch": 42.76664511958629, "grad_norm": 1.842348575592041, "learning_rate": 0.001, "loss": 2.2742, "step": 330800 }, { "epoch": 42.77957336780866, "grad_norm": 0.7792439460754395, "learning_rate": 0.001, "loss": 2.2982, "step": 330900 }, { "epoch": 42.792501616031025, "grad_norm": 0.9592788219451904, "learning_rate": 0.001, "loss": 2.2973, "step": 331000 }, { "epoch": 42.80542986425339, "grad_norm": 0.9752908945083618, "learning_rate": 0.001, "loss": 2.2801, "step": 331100 }, { "epoch": 42.81835811247576, "grad_norm": 1.0584452152252197, "learning_rate": 0.001, "loss": 2.2781, "step": 331200 }, { "epoch": 42.83128636069812, "grad_norm": 0.9420809149742126, "learning_rate": 0.001, "loss": 2.2781, "step": 331300 }, { "epoch": 42.84421460892049, "grad_norm": 1.1050257682800293, "learning_rate": 0.001, "loss": 2.2832, "step": 331400 }, { "epoch": 42.857142857142854, "grad_norm": 0.8154997825622559, "learning_rate": 0.001, "loss": 2.2953, "step": 331500 }, { "epoch": 42.87007110536522, "grad_norm": 9.333405494689941, "learning_rate": 0.001, "loss": 2.2765, "step": 331600 }, { "epoch": 42.882999353587586, "grad_norm": 3.1905863285064697, "learning_rate": 0.001, "loss": 2.2952, "step": 331700 }, { "epoch": 42.89592760180995, "grad_norm": 0.877446711063385, "learning_rate": 0.001, "loss": 2.2822, "step": 331800 }, { "epoch": 42.90885585003232, "grad_norm": 3.4208364486694336, "learning_rate": 0.001, "loss": 2.3164, "step": 331900 }, { "epoch": 42.92178409825468, "grad_norm": 0.8511395454406738, "learning_rate": 0.001, "loss": 2.2935, "step": 332000 }, { "epoch": 42.93471234647705, "grad_norm": 1.0433536767959595, "learning_rate": 0.001, "loss": 2.2741, "step": 332100 }, { "epoch": 42.947640594699415, "grad_norm": 1.0869476795196533, "learning_rate": 0.001, "loss": 2.295, "step": 332200 }, { "epoch": 42.96056884292178, "grad_norm": 0.9486316442489624, "learning_rate": 0.001, "loss": 2.2993, "step": 332300 }, { "epoch": 42.97349709114415, "grad_norm": 1.4379619359970093, "learning_rate": 0.001, "loss": 2.2786, "step": 332400 }, { "epoch": 42.98642533936652, "grad_norm": 1.3825342655181885, "learning_rate": 0.001, "loss": 2.2891, "step": 332500 }, { "epoch": 42.999353587588885, "grad_norm": 1.1244267225265503, "learning_rate": 0.001, "loss": 2.2927, "step": 332600 }, { "epoch": 43.01228183581125, "grad_norm": 1.2583116292953491, "learning_rate": 0.001, "loss": 2.2064, "step": 332700 }, { "epoch": 43.02521008403362, "grad_norm": 0.9020789265632629, "learning_rate": 0.001, "loss": 2.2187, "step": 332800 }, { "epoch": 43.03813833225598, "grad_norm": 9.186049461364746, "learning_rate": 0.001, "loss": 2.2055, "step": 332900 }, { "epoch": 43.05106658047835, "grad_norm": 1.2586379051208496, "learning_rate": 0.001, "loss": 2.2065, "step": 333000 }, { "epoch": 43.063994828700714, "grad_norm": 1.0767298936843872, "learning_rate": 0.001, "loss": 2.2112, "step": 333100 }, { "epoch": 43.07692307692308, "grad_norm": 6.070093154907227, "learning_rate": 0.001, "loss": 2.1985, "step": 333200 }, { "epoch": 43.089851325145446, "grad_norm": 1.3459951877593994, "learning_rate": 0.001, "loss": 2.2125, "step": 333300 }, { "epoch": 43.10277957336781, "grad_norm": 0.9753942489624023, "learning_rate": 0.001, "loss": 2.2174, "step": 333400 }, { "epoch": 43.11570782159018, "grad_norm": 1.1657960414886475, "learning_rate": 0.001, "loss": 2.2081, "step": 333500 }, { "epoch": 43.12863606981254, "grad_norm": 1.0928910970687866, "learning_rate": 0.001, "loss": 2.2436, "step": 333600 }, { "epoch": 43.14156431803491, "grad_norm": 1.088786244392395, "learning_rate": 0.001, "loss": 2.2282, "step": 333700 }, { "epoch": 43.154492566257275, "grad_norm": 0.9424124360084534, "learning_rate": 0.001, "loss": 2.2109, "step": 333800 }, { "epoch": 43.16742081447964, "grad_norm": 0.9896833300590515, "learning_rate": 0.001, "loss": 2.2239, "step": 333900 }, { "epoch": 43.18034906270201, "grad_norm": 1.0889220237731934, "learning_rate": 0.001, "loss": 2.2154, "step": 334000 }, { "epoch": 43.19327731092437, "grad_norm": 1.0846225023269653, "learning_rate": 0.001, "loss": 2.2224, "step": 334100 }, { "epoch": 43.20620555914674, "grad_norm": 1.037866234779358, "learning_rate": 0.001, "loss": 2.2313, "step": 334200 }, { "epoch": 43.219133807369104, "grad_norm": 1.5492429733276367, "learning_rate": 0.001, "loss": 2.2297, "step": 334300 }, { "epoch": 43.23206205559147, "grad_norm": 1.2981067895889282, "learning_rate": 0.001, "loss": 2.2116, "step": 334400 }, { "epoch": 43.244990303813836, "grad_norm": 1.4155553579330444, "learning_rate": 0.001, "loss": 2.2212, "step": 334500 }, { "epoch": 43.2579185520362, "grad_norm": 1.030220866203308, "learning_rate": 0.001, "loss": 2.2478, "step": 334600 }, { "epoch": 43.27084680025857, "grad_norm": 1.0043396949768066, "learning_rate": 0.001, "loss": 2.2461, "step": 334700 }, { "epoch": 43.28377504848093, "grad_norm": 1.8313276767730713, "learning_rate": 0.001, "loss": 2.2559, "step": 334800 }, { "epoch": 43.2967032967033, "grad_norm": 20.734455108642578, "learning_rate": 0.001, "loss": 2.2425, "step": 334900 }, { "epoch": 43.309631544925665, "grad_norm": 0.9220669269561768, "learning_rate": 0.001, "loss": 2.2474, "step": 335000 }, { "epoch": 43.32255979314803, "grad_norm": 1.1314971446990967, "learning_rate": 0.001, "loss": 2.2531, "step": 335100 }, { "epoch": 43.3354880413704, "grad_norm": 1.1778452396392822, "learning_rate": 0.001, "loss": 2.2449, "step": 335200 }, { "epoch": 43.34841628959276, "grad_norm": 1.5559656620025635, "learning_rate": 0.001, "loss": 2.2408, "step": 335300 }, { "epoch": 43.36134453781513, "grad_norm": 1.2032549381256104, "learning_rate": 0.001, "loss": 2.2522, "step": 335400 }, { "epoch": 43.374272786037494, "grad_norm": 0.932044506072998, "learning_rate": 0.001, "loss": 2.2421, "step": 335500 }, { "epoch": 43.38720103425986, "grad_norm": 0.972621500492096, "learning_rate": 0.001, "loss": 2.2553, "step": 335600 }, { "epoch": 43.400129282482226, "grad_norm": 1.0260034799575806, "learning_rate": 0.001, "loss": 2.2445, "step": 335700 }, { "epoch": 43.41305753070459, "grad_norm": 0.9612904191017151, "learning_rate": 0.001, "loss": 2.2456, "step": 335800 }, { "epoch": 43.42598577892696, "grad_norm": 1.0249487161636353, "learning_rate": 0.001, "loss": 2.282, "step": 335900 }, { "epoch": 43.43891402714932, "grad_norm": 1.1244758367538452, "learning_rate": 0.001, "loss": 2.272, "step": 336000 }, { "epoch": 43.45184227537169, "grad_norm": 1.3091089725494385, "learning_rate": 0.001, "loss": 2.2504, "step": 336100 }, { "epoch": 43.464770523594055, "grad_norm": 0.9877008199691772, "learning_rate": 0.001, "loss": 2.2574, "step": 336200 }, { "epoch": 43.47769877181642, "grad_norm": 1.291802167892456, "learning_rate": 0.001, "loss": 2.2418, "step": 336300 }, { "epoch": 43.49062702003879, "grad_norm": 0.9918951988220215, "learning_rate": 0.001, "loss": 2.2867, "step": 336400 }, { "epoch": 43.50355526826115, "grad_norm": 1.0764724016189575, "learning_rate": 0.001, "loss": 2.248, "step": 336500 }, { "epoch": 43.51648351648352, "grad_norm": 1.0708739757537842, "learning_rate": 0.001, "loss": 2.2585, "step": 336600 }, { "epoch": 43.529411764705884, "grad_norm": 1.0535615682601929, "learning_rate": 0.001, "loss": 2.2596, "step": 336700 }, { "epoch": 43.54234001292825, "grad_norm": 1.1924726963043213, "learning_rate": 0.001, "loss": 2.2566, "step": 336800 }, { "epoch": 43.555268261150616, "grad_norm": 1.2948497533798218, "learning_rate": 0.001, "loss": 2.2686, "step": 336900 }, { "epoch": 43.56819650937298, "grad_norm": 1.013824701309204, "learning_rate": 0.001, "loss": 2.2557, "step": 337000 }, { "epoch": 43.58112475759535, "grad_norm": 1.2194181680679321, "learning_rate": 0.001, "loss": 2.2625, "step": 337100 }, { "epoch": 43.59405300581771, "grad_norm": 3.0257718563079834, "learning_rate": 0.001, "loss": 2.2712, "step": 337200 }, { "epoch": 43.60698125404008, "grad_norm": 1.059747576713562, "learning_rate": 0.001, "loss": 2.2466, "step": 337300 }, { "epoch": 43.619909502262445, "grad_norm": 1.049677848815918, "learning_rate": 0.001, "loss": 2.2735, "step": 337400 }, { "epoch": 43.63283775048481, "grad_norm": 1.0107004642486572, "learning_rate": 0.001, "loss": 2.2797, "step": 337500 }, { "epoch": 43.645765998707176, "grad_norm": 0.9215869903564453, "learning_rate": 0.001, "loss": 2.2756, "step": 337600 }, { "epoch": 43.65869424692954, "grad_norm": 1.0610439777374268, "learning_rate": 0.001, "loss": 2.2732, "step": 337700 }, { "epoch": 43.67162249515191, "grad_norm": 1.231418490409851, "learning_rate": 0.001, "loss": 2.2803, "step": 337800 }, { "epoch": 43.684550743374274, "grad_norm": 1.2186243534088135, "learning_rate": 0.001, "loss": 2.2672, "step": 337900 }, { "epoch": 43.69747899159664, "grad_norm": 1.0001308917999268, "learning_rate": 0.001, "loss": 2.2659, "step": 338000 }, { "epoch": 43.710407239819006, "grad_norm": 1.1067463159561157, "learning_rate": 0.001, "loss": 2.275, "step": 338100 }, { "epoch": 43.72333548804137, "grad_norm": 1.075865387916565, "learning_rate": 0.001, "loss": 2.2772, "step": 338200 }, { "epoch": 43.73626373626374, "grad_norm": 2.3420448303222656, "learning_rate": 0.001, "loss": 2.2605, "step": 338300 }, { "epoch": 43.7491919844861, "grad_norm": 1.33390212059021, "learning_rate": 0.001, "loss": 2.2947, "step": 338400 }, { "epoch": 43.76212023270847, "grad_norm": 1.1111044883728027, "learning_rate": 0.001, "loss": 2.2918, "step": 338500 }, { "epoch": 43.775048480930835, "grad_norm": 1.0430560111999512, "learning_rate": 0.001, "loss": 2.2822, "step": 338600 }, { "epoch": 43.7879767291532, "grad_norm": 4.090807914733887, "learning_rate": 0.001, "loss": 2.2891, "step": 338700 }, { "epoch": 43.800904977375566, "grad_norm": 1.077617883682251, "learning_rate": 0.001, "loss": 2.2767, "step": 338800 }, { "epoch": 43.81383322559793, "grad_norm": 1.0840373039245605, "learning_rate": 0.001, "loss": 2.2754, "step": 338900 }, { "epoch": 43.8267614738203, "grad_norm": 1.0962207317352295, "learning_rate": 0.001, "loss": 2.2818, "step": 339000 }, { "epoch": 43.839689722042664, "grad_norm": 1.2003010511398315, "learning_rate": 0.001, "loss": 2.3101, "step": 339100 }, { "epoch": 43.85261797026503, "grad_norm": 1.3158296346664429, "learning_rate": 0.001, "loss": 2.2966, "step": 339200 }, { "epoch": 43.865546218487395, "grad_norm": 1.3478866815567017, "learning_rate": 0.001, "loss": 2.2931, "step": 339300 }, { "epoch": 43.87847446670976, "grad_norm": 1.0188488960266113, "learning_rate": 0.001, "loss": 2.2855, "step": 339400 }, { "epoch": 43.89140271493213, "grad_norm": 1.8422967195510864, "learning_rate": 0.001, "loss": 2.2874, "step": 339500 }, { "epoch": 43.90433096315449, "grad_norm": 1.0662100315093994, "learning_rate": 0.001, "loss": 2.2862, "step": 339600 }, { "epoch": 43.91725921137686, "grad_norm": 1.070412516593933, "learning_rate": 0.001, "loss": 2.2942, "step": 339700 }, { "epoch": 43.930187459599225, "grad_norm": 1.2643119096755981, "learning_rate": 0.001, "loss": 2.2725, "step": 339800 }, { "epoch": 43.94311570782159, "grad_norm": 3.65637469291687, "learning_rate": 0.001, "loss": 2.2841, "step": 339900 }, { "epoch": 43.956043956043956, "grad_norm": 1.0302813053131104, "learning_rate": 0.001, "loss": 2.2887, "step": 340000 }, { "epoch": 43.96897220426632, "grad_norm": 1.0845439434051514, "learning_rate": 0.001, "loss": 2.2839, "step": 340100 }, { "epoch": 43.98190045248869, "grad_norm": 1.1332998275756836, "learning_rate": 0.001, "loss": 2.2848, "step": 340200 }, { "epoch": 43.994828700711054, "grad_norm": 1.1181721687316895, "learning_rate": 0.001, "loss": 2.2772, "step": 340300 }, { "epoch": 44.00775694893342, "grad_norm": 0.8601441383361816, "learning_rate": 0.001, "loss": 2.1982, "step": 340400 }, { "epoch": 44.020685197155785, "grad_norm": 1.3954271078109741, "learning_rate": 0.001, "loss": 2.1964, "step": 340500 }, { "epoch": 44.03361344537815, "grad_norm": 0.897794246673584, "learning_rate": 0.001, "loss": 2.188, "step": 340600 }, { "epoch": 44.04654169360052, "grad_norm": 0.8843953609466553, "learning_rate": 0.001, "loss": 2.2108, "step": 340700 }, { "epoch": 44.05946994182288, "grad_norm": 0.9297419786453247, "learning_rate": 0.001, "loss": 2.1888, "step": 340800 }, { "epoch": 44.07239819004525, "grad_norm": 0.8285385966300964, "learning_rate": 0.001, "loss": 2.2198, "step": 340900 }, { "epoch": 44.085326438267614, "grad_norm": 0.8591221570968628, "learning_rate": 0.001, "loss": 2.2061, "step": 341000 }, { "epoch": 44.09825468648998, "grad_norm": 2.1063015460968018, "learning_rate": 0.001, "loss": 2.1977, "step": 341100 }, { "epoch": 44.111182934712346, "grad_norm": 5.8396430015563965, "learning_rate": 0.001, "loss": 2.2191, "step": 341200 }, { "epoch": 44.12411118293471, "grad_norm": 21.38283920288086, "learning_rate": 0.001, "loss": 2.2176, "step": 341300 }, { "epoch": 44.13703943115708, "grad_norm": 0.9049410820007324, "learning_rate": 0.001, "loss": 2.2096, "step": 341400 }, { "epoch": 44.14996767937944, "grad_norm": 1.1013444662094116, "learning_rate": 0.001, "loss": 2.2053, "step": 341500 }, { "epoch": 44.16289592760181, "grad_norm": 3.068969488143921, "learning_rate": 0.001, "loss": 2.2134, "step": 341600 }, { "epoch": 44.175824175824175, "grad_norm": 0.9123636484146118, "learning_rate": 0.001, "loss": 2.1931, "step": 341700 }, { "epoch": 44.18875242404654, "grad_norm": 1.2563046216964722, "learning_rate": 0.001, "loss": 2.2106, "step": 341800 }, { "epoch": 44.20168067226891, "grad_norm": 0.772727906703949, "learning_rate": 0.001, "loss": 2.223, "step": 341900 }, { "epoch": 44.21460892049127, "grad_norm": 0.8637462258338928, "learning_rate": 0.001, "loss": 2.2179, "step": 342000 }, { "epoch": 44.22753716871364, "grad_norm": 0.91810142993927, "learning_rate": 0.001, "loss": 2.2192, "step": 342100 }, { "epoch": 44.240465416936004, "grad_norm": 0.9826820492744446, "learning_rate": 0.001, "loss": 2.244, "step": 342200 }, { "epoch": 44.25339366515837, "grad_norm": 0.8527712225914001, "learning_rate": 0.001, "loss": 2.2302, "step": 342300 }, { "epoch": 44.266321913380736, "grad_norm": 1.139571189880371, "learning_rate": 0.001, "loss": 2.2511, "step": 342400 }, { "epoch": 44.2792501616031, "grad_norm": 1.1016709804534912, "learning_rate": 0.001, "loss": 2.2079, "step": 342500 }, { "epoch": 44.29217840982547, "grad_norm": 6.264004230499268, "learning_rate": 0.001, "loss": 2.2461, "step": 342600 }, { "epoch": 44.30510665804783, "grad_norm": 1.0666799545288086, "learning_rate": 0.001, "loss": 2.2465, "step": 342700 }, { "epoch": 44.3180349062702, "grad_norm": 0.9493518471717834, "learning_rate": 0.001, "loss": 2.2469, "step": 342800 }, { "epoch": 44.330963154492565, "grad_norm": 2.543193817138672, "learning_rate": 0.001, "loss": 2.2324, "step": 342900 }, { "epoch": 44.34389140271493, "grad_norm": 2.698103904724121, "learning_rate": 0.001, "loss": 2.2151, "step": 343000 }, { "epoch": 44.3568196509373, "grad_norm": 1.3030449151992798, "learning_rate": 0.001, "loss": 2.2311, "step": 343100 }, { "epoch": 44.36974789915966, "grad_norm": 11.19513988494873, "learning_rate": 0.001, "loss": 2.2221, "step": 343200 }, { "epoch": 44.38267614738203, "grad_norm": 1.0408328771591187, "learning_rate": 0.001, "loss": 2.2302, "step": 343300 }, { "epoch": 44.395604395604394, "grad_norm": 0.9105063676834106, "learning_rate": 0.001, "loss": 2.2198, "step": 343400 }, { "epoch": 44.40853264382676, "grad_norm": 0.874471127986908, "learning_rate": 0.001, "loss": 2.2346, "step": 343500 }, { "epoch": 44.421460892049126, "grad_norm": 1.030591368675232, "learning_rate": 0.001, "loss": 2.2428, "step": 343600 }, { "epoch": 44.43438914027149, "grad_norm": 0.8589972853660583, "learning_rate": 0.001, "loss": 2.242, "step": 343700 }, { "epoch": 44.44731738849386, "grad_norm": 0.991093099117279, "learning_rate": 0.001, "loss": 2.2373, "step": 343800 }, { "epoch": 44.46024563671622, "grad_norm": 0.8400933742523193, "learning_rate": 0.001, "loss": 2.2497, "step": 343900 }, { "epoch": 44.47317388493859, "grad_norm": 1.129067063331604, "learning_rate": 0.001, "loss": 2.251, "step": 344000 }, { "epoch": 44.486102133160955, "grad_norm": 1.4743375778198242, "learning_rate": 0.001, "loss": 2.2493, "step": 344100 }, { "epoch": 44.49903038138332, "grad_norm": 2.649165153503418, "learning_rate": 0.001, "loss": 2.2548, "step": 344200 }, { "epoch": 44.51195862960569, "grad_norm": 1.2217050790786743, "learning_rate": 0.001, "loss": 2.2553, "step": 344300 }, { "epoch": 44.52488687782805, "grad_norm": 0.7848824262619019, "learning_rate": 0.001, "loss": 2.2586, "step": 344400 }, { "epoch": 44.53781512605042, "grad_norm": 1.0820854902267456, "learning_rate": 0.001, "loss": 2.2622, "step": 344500 }, { "epoch": 44.550743374272784, "grad_norm": 1.24954354763031, "learning_rate": 0.001, "loss": 2.2561, "step": 344600 }, { "epoch": 44.56367162249515, "grad_norm": 2.1667320728302, "learning_rate": 0.001, "loss": 2.2504, "step": 344700 }, { "epoch": 44.576599870717516, "grad_norm": 1.263750433921814, "learning_rate": 0.001, "loss": 2.2521, "step": 344800 }, { "epoch": 44.58952811893988, "grad_norm": 1.3493068218231201, "learning_rate": 0.001, "loss": 2.2482, "step": 344900 }, { "epoch": 44.60245636716225, "grad_norm": 1.3374370336532593, "learning_rate": 0.001, "loss": 2.2711, "step": 345000 }, { "epoch": 44.61538461538461, "grad_norm": 0.9874895215034485, "learning_rate": 0.001, "loss": 2.2556, "step": 345100 }, { "epoch": 44.62831286360698, "grad_norm": 0.8590736985206604, "learning_rate": 0.001, "loss": 2.2534, "step": 345200 }, { "epoch": 44.641241111829345, "grad_norm": 0.7827123999595642, "learning_rate": 0.001, "loss": 2.2723, "step": 345300 }, { "epoch": 44.65416936005171, "grad_norm": 2.360060930252075, "learning_rate": 0.001, "loss": 2.2694, "step": 345400 }, { "epoch": 44.66709760827408, "grad_norm": 3.4317195415496826, "learning_rate": 0.001, "loss": 2.262, "step": 345500 }, { "epoch": 44.68002585649644, "grad_norm": 1.0628113746643066, "learning_rate": 0.001, "loss": 2.2844, "step": 345600 }, { "epoch": 44.69295410471881, "grad_norm": 1.113126516342163, "learning_rate": 0.001, "loss": 2.2595, "step": 345700 }, { "epoch": 44.705882352941174, "grad_norm": 1.1126354932785034, "learning_rate": 0.001, "loss": 2.2831, "step": 345800 }, { "epoch": 44.71881060116354, "grad_norm": 1.0384259223937988, "learning_rate": 0.001, "loss": 2.2852, "step": 345900 }, { "epoch": 44.731738849385906, "grad_norm": 0.8733525276184082, "learning_rate": 0.001, "loss": 2.2554, "step": 346000 }, { "epoch": 44.74466709760827, "grad_norm": 25.639667510986328, "learning_rate": 0.001, "loss": 2.2621, "step": 346100 }, { "epoch": 44.75759534583064, "grad_norm": 1.4327316284179688, "learning_rate": 0.001, "loss": 2.2647, "step": 346200 }, { "epoch": 44.770523594053, "grad_norm": 0.8888167142868042, "learning_rate": 0.001, "loss": 2.2876, "step": 346300 }, { "epoch": 44.78345184227537, "grad_norm": 0.9916154146194458, "learning_rate": 0.001, "loss": 2.2783, "step": 346400 }, { "epoch": 44.796380090497735, "grad_norm": 0.9121406674385071, "learning_rate": 0.001, "loss": 2.2962, "step": 346500 }, { "epoch": 44.8093083387201, "grad_norm": 1.0007984638214111, "learning_rate": 0.001, "loss": 2.2613, "step": 346600 }, { "epoch": 44.822236586942466, "grad_norm": 0.810133695602417, "learning_rate": 0.001, "loss": 2.277, "step": 346700 }, { "epoch": 44.83516483516483, "grad_norm": 1.1453220844268799, "learning_rate": 0.001, "loss": 2.2711, "step": 346800 }, { "epoch": 44.8480930833872, "grad_norm": 1.075317621231079, "learning_rate": 0.001, "loss": 2.2776, "step": 346900 }, { "epoch": 44.861021331609564, "grad_norm": 0.9293371438980103, "learning_rate": 0.001, "loss": 2.2931, "step": 347000 }, { "epoch": 44.87394957983193, "grad_norm": 1.0223891735076904, "learning_rate": 0.001, "loss": 2.2577, "step": 347100 }, { "epoch": 44.886877828054295, "grad_norm": 1.1039865016937256, "learning_rate": 0.001, "loss": 2.2619, "step": 347200 }, { "epoch": 44.89980607627666, "grad_norm": 0.7974594235420227, "learning_rate": 0.001, "loss": 2.2706, "step": 347300 }, { "epoch": 44.91273432449903, "grad_norm": 1.0126097202301025, "learning_rate": 0.001, "loss": 2.2848, "step": 347400 }, { "epoch": 44.92566257272139, "grad_norm": 0.9688549041748047, "learning_rate": 0.001, "loss": 2.2914, "step": 347500 }, { "epoch": 44.93859082094376, "grad_norm": 1.0815123319625854, "learning_rate": 0.001, "loss": 2.2668, "step": 347600 }, { "epoch": 44.951519069166125, "grad_norm": 0.8834629058837891, "learning_rate": 0.001, "loss": 2.282, "step": 347700 }, { "epoch": 44.96444731738849, "grad_norm": 1.1221973896026611, "learning_rate": 0.001, "loss": 2.2709, "step": 347800 }, { "epoch": 44.977375565610856, "grad_norm": 537.2210693359375, "learning_rate": 0.001, "loss": 2.2658, "step": 347900 }, { "epoch": 44.99030381383322, "grad_norm": 1.3885926008224487, "learning_rate": 0.001, "loss": 2.2788, "step": 348000 }, { "epoch": 45.003232062055595, "grad_norm": 0.914176344871521, "learning_rate": 0.001, "loss": 2.2711, "step": 348100 }, { "epoch": 45.01616031027796, "grad_norm": 1.0270394086837769, "learning_rate": 0.001, "loss": 2.1992, "step": 348200 }, { "epoch": 45.02908855850033, "grad_norm": 1.4322627782821655, "learning_rate": 0.001, "loss": 2.1935, "step": 348300 }, { "epoch": 45.04201680672269, "grad_norm": 1.4238131046295166, "learning_rate": 0.001, "loss": 2.1803, "step": 348400 }, { "epoch": 45.05494505494506, "grad_norm": 1.3334845304489136, "learning_rate": 0.001, "loss": 2.1986, "step": 348500 }, { "epoch": 45.067873303167424, "grad_norm": 0.837833046913147, "learning_rate": 0.001, "loss": 2.1857, "step": 348600 }, { "epoch": 45.08080155138979, "grad_norm": 0.8859372735023499, "learning_rate": 0.001, "loss": 2.2014, "step": 348700 }, { "epoch": 45.093729799612156, "grad_norm": 0.9110192060470581, "learning_rate": 0.001, "loss": 2.1933, "step": 348800 }, { "epoch": 45.10665804783452, "grad_norm": 0.9696800708770752, "learning_rate": 0.001, "loss": 2.1985, "step": 348900 }, { "epoch": 45.11958629605689, "grad_norm": 0.9849048256874084, "learning_rate": 0.001, "loss": 2.2111, "step": 349000 }, { "epoch": 45.13251454427925, "grad_norm": 1.049054741859436, "learning_rate": 0.001, "loss": 2.1983, "step": 349100 }, { "epoch": 45.14544279250162, "grad_norm": 0.8702212572097778, "learning_rate": 0.001, "loss": 2.2111, "step": 349200 }, { "epoch": 45.158371040723985, "grad_norm": 2.4881904125213623, "learning_rate": 0.001, "loss": 2.1915, "step": 349300 }, { "epoch": 45.17129928894635, "grad_norm": 0.9486092329025269, "learning_rate": 0.001, "loss": 2.2232, "step": 349400 }, { "epoch": 45.18422753716872, "grad_norm": 0.9231517314910889, "learning_rate": 0.001, "loss": 2.2227, "step": 349500 }, { "epoch": 45.19715578539108, "grad_norm": 0.973095715045929, "learning_rate": 0.001, "loss": 2.2105, "step": 349600 }, { "epoch": 45.21008403361345, "grad_norm": 1.0947808027267456, "learning_rate": 0.001, "loss": 2.2167, "step": 349700 }, { "epoch": 45.223012281835814, "grad_norm": 0.8695765733718872, "learning_rate": 0.001, "loss": 2.2219, "step": 349800 }, { "epoch": 45.23594053005818, "grad_norm": 1.1604645252227783, "learning_rate": 0.001, "loss": 2.2377, "step": 349900 }, { "epoch": 45.248868778280546, "grad_norm": 0.8424009084701538, "learning_rate": 0.001, "loss": 2.2242, "step": 350000 }, { "epoch": 45.26179702650291, "grad_norm": 1.0286132097244263, "learning_rate": 0.001, "loss": 2.2162, "step": 350100 }, { "epoch": 45.27472527472528, "grad_norm": 1.0178916454315186, "learning_rate": 0.001, "loss": 2.2258, "step": 350200 }, { "epoch": 45.28765352294764, "grad_norm": 1.0752063989639282, "learning_rate": 0.001, "loss": 2.238, "step": 350300 }, { "epoch": 45.30058177117001, "grad_norm": 0.8937036991119385, "learning_rate": 0.001, "loss": 2.2214, "step": 350400 }, { "epoch": 45.313510019392375, "grad_norm": 0.8240094184875488, "learning_rate": 0.001, "loss": 2.2363, "step": 350500 }, { "epoch": 45.32643826761474, "grad_norm": 0.9749323129653931, "learning_rate": 0.001, "loss": 2.2165, "step": 350600 }, { "epoch": 45.339366515837106, "grad_norm": 1.3104677200317383, "learning_rate": 0.001, "loss": 2.2358, "step": 350700 }, { "epoch": 45.35229476405947, "grad_norm": 0.9417831897735596, "learning_rate": 0.001, "loss": 2.2363, "step": 350800 }, { "epoch": 45.36522301228184, "grad_norm": 1.0506197214126587, "learning_rate": 0.001, "loss": 2.25, "step": 350900 }, { "epoch": 45.378151260504204, "grad_norm": 3.621742010116577, "learning_rate": 0.001, "loss": 2.2427, "step": 351000 }, { "epoch": 45.39107950872657, "grad_norm": 0.8484338521957397, "learning_rate": 0.001, "loss": 2.2244, "step": 351100 }, { "epoch": 45.404007756948936, "grad_norm": 0.9896932244300842, "learning_rate": 0.001, "loss": 2.246, "step": 351200 }, { "epoch": 45.4169360051713, "grad_norm": 1.3651368618011475, "learning_rate": 0.001, "loss": 2.2182, "step": 351300 }, { "epoch": 45.42986425339367, "grad_norm": 3.8931894302368164, "learning_rate": 0.001, "loss": 2.2376, "step": 351400 }, { "epoch": 45.44279250161603, "grad_norm": 1.0801347494125366, "learning_rate": 0.001, "loss": 2.237, "step": 351500 }, { "epoch": 45.4557207498384, "grad_norm": 0.8678674101829529, "learning_rate": 0.001, "loss": 2.2587, "step": 351600 }, { "epoch": 45.468648998060765, "grad_norm": 1.0166538953781128, "learning_rate": 0.001, "loss": 2.2477, "step": 351700 }, { "epoch": 45.48157724628313, "grad_norm": 1.4725193977355957, "learning_rate": 0.001, "loss": 2.2329, "step": 351800 }, { "epoch": 45.494505494505496, "grad_norm": 1.5284016132354736, "learning_rate": 0.001, "loss": 2.2609, "step": 351900 }, { "epoch": 45.50743374272786, "grad_norm": 0.8809040188789368, "learning_rate": 0.001, "loss": 2.2751, "step": 352000 }, { "epoch": 45.52036199095023, "grad_norm": 0.88487309217453, "learning_rate": 0.001, "loss": 2.2298, "step": 352100 }, { "epoch": 45.533290239172594, "grad_norm": 0.9834949374198914, "learning_rate": 0.001, "loss": 2.2322, "step": 352200 }, { "epoch": 45.54621848739496, "grad_norm": 10.484971046447754, "learning_rate": 0.001, "loss": 2.2555, "step": 352300 }, { "epoch": 45.559146735617325, "grad_norm": 0.8183871507644653, "learning_rate": 0.001, "loss": 2.2557, "step": 352400 }, { "epoch": 45.57207498383969, "grad_norm": 0.84075528383255, "learning_rate": 0.001, "loss": 2.2669, "step": 352500 }, { "epoch": 45.58500323206206, "grad_norm": 1.4368740320205688, "learning_rate": 0.001, "loss": 2.2484, "step": 352600 }, { "epoch": 45.59793148028442, "grad_norm": 0.9366762042045593, "learning_rate": 0.001, "loss": 2.2359, "step": 352700 }, { "epoch": 45.61085972850679, "grad_norm": 1.2450802326202393, "learning_rate": 0.001, "loss": 2.2487, "step": 352800 }, { "epoch": 45.623787976729155, "grad_norm": 0.8974369168281555, "learning_rate": 0.001, "loss": 2.2488, "step": 352900 }, { "epoch": 45.63671622495152, "grad_norm": 1.027796983718872, "learning_rate": 0.001, "loss": 2.2602, "step": 353000 }, { "epoch": 45.649644473173886, "grad_norm": 0.8386234641075134, "learning_rate": 0.001, "loss": 2.2473, "step": 353100 }, { "epoch": 45.66257272139625, "grad_norm": 4.779776096343994, "learning_rate": 0.001, "loss": 2.2601, "step": 353200 }, { "epoch": 45.67550096961862, "grad_norm": 0.9824104309082031, "learning_rate": 0.001, "loss": 2.262, "step": 353300 }, { "epoch": 45.688429217840984, "grad_norm": 0.8639425039291382, "learning_rate": 0.001, "loss": 2.2536, "step": 353400 }, { "epoch": 45.70135746606335, "grad_norm": 0.9440513849258423, "learning_rate": 0.001, "loss": 2.2519, "step": 353500 }, { "epoch": 45.714285714285715, "grad_norm": 1.2257839441299438, "learning_rate": 0.001, "loss": 2.2646, "step": 353600 }, { "epoch": 45.72721396250808, "grad_norm": 1.0389056205749512, "learning_rate": 0.001, "loss": 2.2473, "step": 353700 }, { "epoch": 45.74014221073045, "grad_norm": 0.9414228796958923, "learning_rate": 0.001, "loss": 2.2596, "step": 353800 }, { "epoch": 45.75307045895281, "grad_norm": 1.1861122846603394, "learning_rate": 0.001, "loss": 2.2589, "step": 353900 }, { "epoch": 45.76599870717518, "grad_norm": 0.7727833986282349, "learning_rate": 0.001, "loss": 2.2642, "step": 354000 }, { "epoch": 45.778926955397544, "grad_norm": 1.1262893676757812, "learning_rate": 0.001, "loss": 2.2633, "step": 354100 }, { "epoch": 45.79185520361991, "grad_norm": 0.7626751661300659, "learning_rate": 0.001, "loss": 2.2813, "step": 354200 }, { "epoch": 45.804783451842276, "grad_norm": 1.0413029193878174, "learning_rate": 0.001, "loss": 2.273, "step": 354300 }, { "epoch": 45.81771170006464, "grad_norm": 0.9695804715156555, "learning_rate": 0.001, "loss": 2.2481, "step": 354400 }, { "epoch": 45.83063994828701, "grad_norm": 0.9481690526008606, "learning_rate": 0.001, "loss": 2.2504, "step": 354500 }, { "epoch": 45.84356819650937, "grad_norm": 0.7918584942817688, "learning_rate": 0.001, "loss": 2.2676, "step": 354600 }, { "epoch": 45.85649644473174, "grad_norm": 1.0245722532272339, "learning_rate": 0.001, "loss": 2.269, "step": 354700 }, { "epoch": 45.869424692954105, "grad_norm": 0.8697942495346069, "learning_rate": 0.001, "loss": 2.2636, "step": 354800 }, { "epoch": 45.88235294117647, "grad_norm": 1.1371862888336182, "learning_rate": 0.001, "loss": 2.2764, "step": 354900 }, { "epoch": 45.89528118939884, "grad_norm": 2.9310507774353027, "learning_rate": 0.001, "loss": 2.267, "step": 355000 }, { "epoch": 45.9082094376212, "grad_norm": 0.9409781694412231, "learning_rate": 0.001, "loss": 2.2604, "step": 355100 }, { "epoch": 45.92113768584357, "grad_norm": 0.8168300986289978, "learning_rate": 0.001, "loss": 2.271, "step": 355200 }, { "epoch": 45.934065934065934, "grad_norm": 0.7719116806983948, "learning_rate": 0.001, "loss": 2.2736, "step": 355300 }, { "epoch": 45.9469941822883, "grad_norm": 8.610112190246582, "learning_rate": 0.001, "loss": 2.2737, "step": 355400 }, { "epoch": 45.959922430510666, "grad_norm": 2.398695230484009, "learning_rate": 0.001, "loss": 2.273, "step": 355500 }, { "epoch": 45.97285067873303, "grad_norm": 0.8656231164932251, "learning_rate": 0.001, "loss": 2.2872, "step": 355600 }, { "epoch": 45.9857789269554, "grad_norm": 0.887792706489563, "learning_rate": 0.001, "loss": 2.2672, "step": 355700 }, { "epoch": 45.99870717517776, "grad_norm": 1.0175342559814453, "learning_rate": 0.001, "loss": 2.2663, "step": 355800 }, { "epoch": 46.01163542340013, "grad_norm": 1.0497517585754395, "learning_rate": 0.001, "loss": 2.2094, "step": 355900 }, { "epoch": 46.024563671622495, "grad_norm": 1.2283867597579956, "learning_rate": 0.001, "loss": 2.1939, "step": 356000 }, { "epoch": 46.03749191984486, "grad_norm": 2.5491135120391846, "learning_rate": 0.001, "loss": 2.1961, "step": 356100 }, { "epoch": 46.05042016806723, "grad_norm": 1.1835895776748657, "learning_rate": 0.001, "loss": 2.1905, "step": 356200 }, { "epoch": 46.06334841628959, "grad_norm": 1.771876335144043, "learning_rate": 0.001, "loss": 2.1906, "step": 356300 }, { "epoch": 46.07627666451196, "grad_norm": 0.8241035342216492, "learning_rate": 0.001, "loss": 2.1819, "step": 356400 }, { "epoch": 46.089204912734324, "grad_norm": 1.021978735923767, "learning_rate": 0.001, "loss": 2.1903, "step": 356500 }, { "epoch": 46.10213316095669, "grad_norm": 1.2026896476745605, "learning_rate": 0.001, "loss": 2.1973, "step": 356600 }, { "epoch": 46.115061409179056, "grad_norm": 60.076698303222656, "learning_rate": 0.001, "loss": 2.2037, "step": 356700 }, { "epoch": 46.12798965740142, "grad_norm": 0.9791095852851868, "learning_rate": 0.001, "loss": 2.2, "step": 356800 }, { "epoch": 46.14091790562379, "grad_norm": 1.1938540935516357, "learning_rate": 0.001, "loss": 2.2295, "step": 356900 }, { "epoch": 46.15384615384615, "grad_norm": 0.8534099459648132, "learning_rate": 0.001, "loss": 2.2162, "step": 357000 }, { "epoch": 46.16677440206852, "grad_norm": 1.1331923007965088, "learning_rate": 0.001, "loss": 2.2129, "step": 357100 }, { "epoch": 46.179702650290885, "grad_norm": 1.1332834959030151, "learning_rate": 0.001, "loss": 2.2283, "step": 357200 }, { "epoch": 46.19263089851325, "grad_norm": 1.187043309211731, "learning_rate": 0.001, "loss": 2.2153, "step": 357300 }, { "epoch": 46.20555914673562, "grad_norm": 0.957661509513855, "learning_rate": 0.001, "loss": 2.1989, "step": 357400 }, { "epoch": 46.21848739495798, "grad_norm": 0.8271764516830444, "learning_rate": 0.001, "loss": 2.2178, "step": 357500 }, { "epoch": 46.23141564318035, "grad_norm": 0.8669760227203369, "learning_rate": 0.001, "loss": 2.2128, "step": 357600 }, { "epoch": 46.244343891402714, "grad_norm": 0.9598681330680847, "learning_rate": 0.001, "loss": 2.2276, "step": 357700 }, { "epoch": 46.25727213962508, "grad_norm": 1.688615083694458, "learning_rate": 0.001, "loss": 2.2319, "step": 357800 }, { "epoch": 46.270200387847446, "grad_norm": 1.5812280178070068, "learning_rate": 0.001, "loss": 2.213, "step": 357900 }, { "epoch": 46.28312863606981, "grad_norm": 1.0714703798294067, "learning_rate": 0.001, "loss": 2.2138, "step": 358000 }, { "epoch": 46.29605688429218, "grad_norm": 0.9558077454566956, "learning_rate": 0.001, "loss": 2.2336, "step": 358100 }, { "epoch": 46.30898513251454, "grad_norm": 1.0404630899429321, "learning_rate": 0.001, "loss": 2.2208, "step": 358200 }, { "epoch": 46.32191338073691, "grad_norm": 1.2728852033615112, "learning_rate": 0.001, "loss": 2.191, "step": 358300 }, { "epoch": 46.334841628959275, "grad_norm": 0.8954976201057434, "learning_rate": 0.001, "loss": 2.2414, "step": 358400 }, { "epoch": 46.34776987718164, "grad_norm": 0.9611422419548035, "learning_rate": 0.001, "loss": 2.2498, "step": 358500 }, { "epoch": 46.36069812540401, "grad_norm": 1.4293123483657837, "learning_rate": 0.001, "loss": 2.2008, "step": 358600 }, { "epoch": 46.37362637362637, "grad_norm": 0.9777549505233765, "learning_rate": 0.001, "loss": 2.2142, "step": 358700 }, { "epoch": 46.38655462184874, "grad_norm": 1.1605472564697266, "learning_rate": 0.001, "loss": 2.2324, "step": 358800 }, { "epoch": 46.399482870071104, "grad_norm": 0.9573602080345154, "learning_rate": 0.001, "loss": 2.2051, "step": 358900 }, { "epoch": 46.41241111829347, "grad_norm": 1.063541293144226, "learning_rate": 0.001, "loss": 2.2185, "step": 359000 }, { "epoch": 46.425339366515836, "grad_norm": 0.9988188743591309, "learning_rate": 0.001, "loss": 2.2434, "step": 359100 }, { "epoch": 46.4382676147382, "grad_norm": 2.3498666286468506, "learning_rate": 0.001, "loss": 2.2501, "step": 359200 }, { "epoch": 46.45119586296057, "grad_norm": 2.509880304336548, "learning_rate": 0.001, "loss": 2.2372, "step": 359300 }, { "epoch": 46.46412411118293, "grad_norm": 1.2315821647644043, "learning_rate": 0.001, "loss": 2.2469, "step": 359400 }, { "epoch": 46.4770523594053, "grad_norm": 1.0325379371643066, "learning_rate": 0.001, "loss": 2.2232, "step": 359500 }, { "epoch": 46.489980607627665, "grad_norm": 1.0105063915252686, "learning_rate": 0.001, "loss": 2.226, "step": 359600 }, { "epoch": 46.50290885585003, "grad_norm": 0.9973015785217285, "learning_rate": 0.001, "loss": 2.2228, "step": 359700 }, { "epoch": 46.515837104072396, "grad_norm": 1.1855876445770264, "learning_rate": 0.001, "loss": 2.2241, "step": 359800 }, { "epoch": 46.52876535229476, "grad_norm": 1.1097016334533691, "learning_rate": 0.001, "loss": 2.2197, "step": 359900 }, { "epoch": 46.54169360051713, "grad_norm": 1.1750285625457764, "learning_rate": 0.001, "loss": 2.2499, "step": 360000 }, { "epoch": 46.554621848739494, "grad_norm": 0.8492341637611389, "learning_rate": 0.001, "loss": 2.2479, "step": 360100 }, { "epoch": 46.56755009696186, "grad_norm": 0.9291776418685913, "learning_rate": 0.001, "loss": 2.2431, "step": 360200 }, { "epoch": 46.580478345184225, "grad_norm": 0.9374492168426514, "learning_rate": 0.001, "loss": 2.2369, "step": 360300 }, { "epoch": 46.59340659340659, "grad_norm": 1.0223523378372192, "learning_rate": 0.001, "loss": 2.2028, "step": 360400 }, { "epoch": 46.60633484162896, "grad_norm": 1.2990813255310059, "learning_rate": 0.001, "loss": 2.2432, "step": 360500 }, { "epoch": 46.61926308985132, "grad_norm": 1.1917204856872559, "learning_rate": 0.001, "loss": 2.25, "step": 360600 }, { "epoch": 46.63219133807369, "grad_norm": 0.7788655757904053, "learning_rate": 0.001, "loss": 2.2503, "step": 360700 }, { "epoch": 46.645119586296055, "grad_norm": 1.3908066749572754, "learning_rate": 0.001, "loss": 2.2341, "step": 360800 }, { "epoch": 46.65804783451842, "grad_norm": 0.9585310220718384, "learning_rate": 0.001, "loss": 2.2419, "step": 360900 }, { "epoch": 46.670976082740786, "grad_norm": 0.9780144095420837, "learning_rate": 0.001, "loss": 2.2636, "step": 361000 }, { "epoch": 46.68390433096315, "grad_norm": 1.2332285642623901, "learning_rate": 0.001, "loss": 2.2578, "step": 361100 }, { "epoch": 46.69683257918552, "grad_norm": 0.9136077761650085, "learning_rate": 0.001, "loss": 2.2642, "step": 361200 }, { "epoch": 46.709760827407884, "grad_norm": 1.1587622165679932, "learning_rate": 0.001, "loss": 2.2878, "step": 361300 }, { "epoch": 46.72268907563025, "grad_norm": 4.720739364624023, "learning_rate": 0.001, "loss": 2.2636, "step": 361400 }, { "epoch": 46.735617323852615, "grad_norm": 1.0120750665664673, "learning_rate": 0.001, "loss": 2.2453, "step": 361500 }, { "epoch": 46.74854557207498, "grad_norm": 0.8377371430397034, "learning_rate": 0.001, "loss": 2.253, "step": 361600 }, { "epoch": 46.76147382029735, "grad_norm": 1.113764762878418, "learning_rate": 0.001, "loss": 2.2561, "step": 361700 }, { "epoch": 46.77440206851971, "grad_norm": 1.264032244682312, "learning_rate": 0.001, "loss": 2.2711, "step": 361800 }, { "epoch": 46.78733031674208, "grad_norm": 17.49085235595703, "learning_rate": 0.001, "loss": 2.2471, "step": 361900 }, { "epoch": 46.800258564964444, "grad_norm": 1.0510810613632202, "learning_rate": 0.001, "loss": 2.2471, "step": 362000 }, { "epoch": 46.81318681318681, "grad_norm": 1.387086033821106, "learning_rate": 0.001, "loss": 2.2316, "step": 362100 }, { "epoch": 46.826115061409176, "grad_norm": 1.0679619312286377, "learning_rate": 0.001, "loss": 2.272, "step": 362200 }, { "epoch": 46.83904330963154, "grad_norm": 0.9308840036392212, "learning_rate": 0.001, "loss": 2.2859, "step": 362300 }, { "epoch": 46.85197155785391, "grad_norm": 1.0520144701004028, "learning_rate": 0.001, "loss": 2.2686, "step": 362400 }, { "epoch": 46.864899806076274, "grad_norm": 0.9338826537132263, "learning_rate": 0.001, "loss": 2.2666, "step": 362500 }, { "epoch": 46.87782805429864, "grad_norm": 1.2177042961120605, "learning_rate": 0.001, "loss": 2.2658, "step": 362600 }, { "epoch": 46.890756302521005, "grad_norm": 1.1337796449661255, "learning_rate": 0.001, "loss": 2.2632, "step": 362700 }, { "epoch": 46.90368455074337, "grad_norm": 0.9732680916786194, "learning_rate": 0.001, "loss": 2.2552, "step": 362800 }, { "epoch": 46.91661279896574, "grad_norm": 0.8975716233253479, "learning_rate": 0.001, "loss": 2.2656, "step": 362900 }, { "epoch": 46.9295410471881, "grad_norm": 1.066333293914795, "learning_rate": 0.001, "loss": 2.286, "step": 363000 }, { "epoch": 46.94246929541047, "grad_norm": 1.021390676498413, "learning_rate": 0.001, "loss": 2.2754, "step": 363100 }, { "epoch": 46.955397543632834, "grad_norm": 0.9781057238578796, "learning_rate": 0.001, "loss": 2.2798, "step": 363200 }, { "epoch": 46.9683257918552, "grad_norm": 1.0121208429336548, "learning_rate": 0.001, "loss": 2.274, "step": 363300 }, { "epoch": 46.981254040077566, "grad_norm": 1.174600601196289, "learning_rate": 0.001, "loss": 2.2605, "step": 363400 }, { "epoch": 46.99418228829994, "grad_norm": 0.8982529640197754, "learning_rate": 0.001, "loss": 2.2706, "step": 363500 }, { "epoch": 47.007110536522305, "grad_norm": 2333.178466796875, "learning_rate": 0.001, "loss": 2.1774, "step": 363600 }, { "epoch": 47.02003878474467, "grad_norm": 1.0440080165863037, "learning_rate": 0.001, "loss": 2.1668, "step": 363700 }, { "epoch": 47.032967032967036, "grad_norm": 1.0467873811721802, "learning_rate": 0.001, "loss": 2.167, "step": 363800 }, { "epoch": 47.0458952811894, "grad_norm": 1.2102386951446533, "learning_rate": 0.001, "loss": 2.1806, "step": 363900 }, { "epoch": 47.05882352941177, "grad_norm": 1.20137357711792, "learning_rate": 0.001, "loss": 2.188, "step": 364000 }, { "epoch": 47.071751777634134, "grad_norm": 0.9604125618934631, "learning_rate": 0.001, "loss": 2.1862, "step": 364100 }, { "epoch": 47.0846800258565, "grad_norm": 1.0274862051010132, "learning_rate": 0.001, "loss": 2.1797, "step": 364200 }, { "epoch": 47.097608274078866, "grad_norm": 1.2253738641738892, "learning_rate": 0.001, "loss": 2.1851, "step": 364300 }, { "epoch": 47.11053652230123, "grad_norm": 0.9439888000488281, "learning_rate": 0.001, "loss": 2.1939, "step": 364400 }, { "epoch": 47.1234647705236, "grad_norm": 1.3436098098754883, "learning_rate": 0.001, "loss": 2.1833, "step": 364500 }, { "epoch": 47.13639301874596, "grad_norm": 0.9398271441459656, "learning_rate": 0.001, "loss": 2.1776, "step": 364600 }, { "epoch": 47.14932126696833, "grad_norm": 0.8494735956192017, "learning_rate": 0.001, "loss": 2.2061, "step": 364700 }, { "epoch": 47.162249515190695, "grad_norm": 0.9351396560668945, "learning_rate": 0.001, "loss": 2.1822, "step": 364800 }, { "epoch": 47.17517776341306, "grad_norm": 0.7765023112297058, "learning_rate": 0.001, "loss": 2.2095, "step": 364900 }, { "epoch": 47.188106011635426, "grad_norm": 0.958942711353302, "learning_rate": 0.001, "loss": 2.1868, "step": 365000 }, { "epoch": 47.20103425985779, "grad_norm": 1.4749884605407715, "learning_rate": 0.001, "loss": 2.2051, "step": 365100 }, { "epoch": 47.21396250808016, "grad_norm": 3.3716955184936523, "learning_rate": 0.001, "loss": 2.1995, "step": 365200 }, { "epoch": 47.226890756302524, "grad_norm": 0.9772596955299377, "learning_rate": 0.001, "loss": 2.2065, "step": 365300 }, { "epoch": 47.23981900452489, "grad_norm": 0.8580068349838257, "learning_rate": 0.001, "loss": 2.1893, "step": 365400 }, { "epoch": 47.252747252747255, "grad_norm": 2.6016573905944824, "learning_rate": 0.001, "loss": 2.1989, "step": 365500 }, { "epoch": 47.26567550096962, "grad_norm": 0.7326056957244873, "learning_rate": 0.001, "loss": 2.2125, "step": 365600 }, { "epoch": 47.27860374919199, "grad_norm": 0.6939808130264282, "learning_rate": 0.001, "loss": 2.2189, "step": 365700 }, { "epoch": 47.29153199741435, "grad_norm": 1.1010019779205322, "learning_rate": 0.001, "loss": 2.1987, "step": 365800 }, { "epoch": 47.30446024563672, "grad_norm": 1.4878571033477783, "learning_rate": 0.001, "loss": 2.208, "step": 365900 }, { "epoch": 47.317388493859085, "grad_norm": 1.0273863077163696, "learning_rate": 0.001, "loss": 2.2032, "step": 366000 }, { "epoch": 47.33031674208145, "grad_norm": 1.210166573524475, "learning_rate": 0.001, "loss": 2.2185, "step": 366100 }, { "epoch": 47.343244990303816, "grad_norm": 0.9100984334945679, "learning_rate": 0.001, "loss": 2.1941, "step": 366200 }, { "epoch": 47.35617323852618, "grad_norm": 1.1619065999984741, "learning_rate": 0.001, "loss": 2.2151, "step": 366300 }, { "epoch": 47.36910148674855, "grad_norm": 1.1569732427597046, "learning_rate": 0.001, "loss": 2.2094, "step": 366400 }, { "epoch": 47.382029734970914, "grad_norm": 0.9754027724266052, "learning_rate": 0.001, "loss": 2.2202, "step": 366500 }, { "epoch": 47.39495798319328, "grad_norm": 0.982079803943634, "learning_rate": 0.001, "loss": 2.2068, "step": 366600 }, { "epoch": 47.407886231415645, "grad_norm": 2.721086025238037, "learning_rate": 0.001, "loss": 2.2195, "step": 366700 }, { "epoch": 47.42081447963801, "grad_norm": 1.5045593976974487, "learning_rate": 0.001, "loss": 2.2244, "step": 366800 }, { "epoch": 47.43374272786038, "grad_norm": 1.597566843032837, "learning_rate": 0.001, "loss": 2.2288, "step": 366900 }, { "epoch": 47.44667097608274, "grad_norm": 0.9917261600494385, "learning_rate": 0.001, "loss": 2.2228, "step": 367000 }, { "epoch": 47.45959922430511, "grad_norm": 0.8921237587928772, "learning_rate": 0.001, "loss": 2.2116, "step": 367100 }, { "epoch": 47.472527472527474, "grad_norm": 0.8542685508728027, "learning_rate": 0.001, "loss": 2.2076, "step": 367200 }, { "epoch": 47.48545572074984, "grad_norm": 0.9745506048202515, "learning_rate": 0.001, "loss": 2.1988, "step": 367300 }, { "epoch": 47.498383968972206, "grad_norm": 1.7240055799484253, "learning_rate": 0.001, "loss": 2.219, "step": 367400 }, { "epoch": 47.51131221719457, "grad_norm": 1.1177679300308228, "learning_rate": 0.001, "loss": 2.2244, "step": 367500 }, { "epoch": 47.52424046541694, "grad_norm": 0.9412502646446228, "learning_rate": 0.001, "loss": 2.2382, "step": 367600 }, { "epoch": 47.5371687136393, "grad_norm": 0.73223876953125, "learning_rate": 0.001, "loss": 2.2585, "step": 367700 }, { "epoch": 47.55009696186167, "grad_norm": 0.8378291130065918, "learning_rate": 0.001, "loss": 2.2193, "step": 367800 }, { "epoch": 47.563025210084035, "grad_norm": 1.006942629814148, "learning_rate": 0.001, "loss": 2.2145, "step": 367900 }, { "epoch": 47.5759534583064, "grad_norm": 0.9432110786437988, "learning_rate": 0.001, "loss": 2.2319, "step": 368000 }, { "epoch": 47.58888170652877, "grad_norm": 1.1263530254364014, "learning_rate": 0.001, "loss": 2.2515, "step": 368100 }, { "epoch": 47.60180995475113, "grad_norm": 8.5414457321167, "learning_rate": 0.001, "loss": 2.2387, "step": 368200 }, { "epoch": 47.6147382029735, "grad_norm": 1.4740331172943115, "learning_rate": 0.001, "loss": 2.22, "step": 368300 }, { "epoch": 47.627666451195864, "grad_norm": 1.1033475399017334, "learning_rate": 0.001, "loss": 2.2331, "step": 368400 }, { "epoch": 47.64059469941823, "grad_norm": 1.2878447771072388, "learning_rate": 0.001, "loss": 2.238, "step": 368500 }, { "epoch": 47.653522947640596, "grad_norm": 1.0546878576278687, "learning_rate": 0.001, "loss": 2.2241, "step": 368600 }, { "epoch": 47.66645119586296, "grad_norm": 0.9102590084075928, "learning_rate": 0.001, "loss": 2.2334, "step": 368700 }, { "epoch": 47.67937944408533, "grad_norm": 1.0803381204605103, "learning_rate": 0.001, "loss": 2.2404, "step": 368800 }, { "epoch": 47.69230769230769, "grad_norm": 1.0590580701828003, "learning_rate": 0.001, "loss": 2.2353, "step": 368900 }, { "epoch": 47.70523594053006, "grad_norm": 1.282676100730896, "learning_rate": 0.001, "loss": 2.2469, "step": 369000 }, { "epoch": 47.718164188752425, "grad_norm": 0.8681998252868652, "learning_rate": 0.001, "loss": 2.2401, "step": 369100 }, { "epoch": 47.73109243697479, "grad_norm": 1.3025223016738892, "learning_rate": 0.001, "loss": 2.2344, "step": 369200 }, { "epoch": 47.74402068519716, "grad_norm": 2.166748046875, "learning_rate": 0.001, "loss": 2.2413, "step": 369300 }, { "epoch": 47.75694893341952, "grad_norm": 1.017893671989441, "learning_rate": 0.001, "loss": 2.2397, "step": 369400 }, { "epoch": 47.76987718164189, "grad_norm": 1.1270626783370972, "learning_rate": 0.001, "loss": 2.263, "step": 369500 }, { "epoch": 47.782805429864254, "grad_norm": 2.9705746173858643, "learning_rate": 0.001, "loss": 2.2564, "step": 369600 }, { "epoch": 47.79573367808662, "grad_norm": 1.008923888206482, "learning_rate": 0.001, "loss": 2.2409, "step": 369700 }, { "epoch": 47.808661926308986, "grad_norm": 1.063834309577942, "learning_rate": 0.001, "loss": 2.2329, "step": 369800 }, { "epoch": 47.82159017453135, "grad_norm": 1.0086612701416016, "learning_rate": 0.001, "loss": 2.2549, "step": 369900 }, { "epoch": 47.83451842275372, "grad_norm": 1.0084576606750488, "learning_rate": 0.001, "loss": 2.248, "step": 370000 }, { "epoch": 47.84744667097608, "grad_norm": 1.0366301536560059, "learning_rate": 0.001, "loss": 2.2581, "step": 370100 }, { "epoch": 47.86037491919845, "grad_norm": 2.103281259536743, "learning_rate": 0.001, "loss": 2.2469, "step": 370200 }, { "epoch": 47.873303167420815, "grad_norm": 0.8950073719024658, "learning_rate": 0.001, "loss": 2.2464, "step": 370300 }, { "epoch": 47.88623141564318, "grad_norm": 1.2860063314437866, "learning_rate": 0.001, "loss": 2.24, "step": 370400 }, { "epoch": 47.89915966386555, "grad_norm": 0.9552168846130371, "learning_rate": 0.001, "loss": 2.269, "step": 370500 }, { "epoch": 47.91208791208791, "grad_norm": 1.4447917938232422, "learning_rate": 0.001, "loss": 2.2677, "step": 370600 }, { "epoch": 47.92501616031028, "grad_norm": 1.1287517547607422, "learning_rate": 0.001, "loss": 2.2405, "step": 370700 }, { "epoch": 47.937944408532644, "grad_norm": 0.8614937663078308, "learning_rate": 0.001, "loss": 2.2783, "step": 370800 }, { "epoch": 47.95087265675501, "grad_norm": 0.899679958820343, "learning_rate": 0.001, "loss": 2.2748, "step": 370900 }, { "epoch": 47.963800904977376, "grad_norm": 0.906891942024231, "learning_rate": 0.001, "loss": 2.2606, "step": 371000 }, { "epoch": 47.97672915319974, "grad_norm": 0.9147067666053772, "learning_rate": 0.001, "loss": 2.2724, "step": 371100 }, { "epoch": 47.98965740142211, "grad_norm": 1.0170706510543823, "learning_rate": 0.001, "loss": 2.2599, "step": 371200 }, { "epoch": 48.00258564964447, "grad_norm": 0.7982606291770935, "learning_rate": 0.001, "loss": 2.2677, "step": 371300 }, { "epoch": 48.01551389786684, "grad_norm": 2.6380629539489746, "learning_rate": 0.001, "loss": 2.1565, "step": 371400 }, { "epoch": 48.028442146089205, "grad_norm": 1.144717812538147, "learning_rate": 0.001, "loss": 2.1862, "step": 371500 }, { "epoch": 48.04137039431157, "grad_norm": 0.8527225255966187, "learning_rate": 0.001, "loss": 2.1654, "step": 371600 }, { "epoch": 48.05429864253394, "grad_norm": 0.7906619906425476, "learning_rate": 0.001, "loss": 2.1637, "step": 371700 }, { "epoch": 48.0672268907563, "grad_norm": 0.9643663763999939, "learning_rate": 0.001, "loss": 2.1676, "step": 371800 }, { "epoch": 48.08015513897867, "grad_norm": 0.8438687920570374, "learning_rate": 0.001, "loss": 2.1889, "step": 371900 }, { "epoch": 48.093083387201034, "grad_norm": 0.7937598824501038, "learning_rate": 0.001, "loss": 2.164, "step": 372000 }, { "epoch": 48.1060116354234, "grad_norm": 0.8740502595901489, "learning_rate": 0.001, "loss": 2.1596, "step": 372100 }, { "epoch": 48.118939883645766, "grad_norm": 1.0240522623062134, "learning_rate": 0.001, "loss": 2.1976, "step": 372200 }, { "epoch": 48.13186813186813, "grad_norm": 1.1151461601257324, "learning_rate": 0.001, "loss": 2.1728, "step": 372300 }, { "epoch": 48.1447963800905, "grad_norm": 0.8320398926734924, "learning_rate": 0.001, "loss": 2.188, "step": 372400 }, { "epoch": 48.15772462831286, "grad_norm": 0.8251625299453735, "learning_rate": 0.001, "loss": 2.1781, "step": 372500 }, { "epoch": 48.17065287653523, "grad_norm": 0.9371460676193237, "learning_rate": 0.001, "loss": 2.1862, "step": 372600 }, { "epoch": 48.183581124757595, "grad_norm": 1.866036295890808, "learning_rate": 0.001, "loss": 2.1851, "step": 372700 }, { "epoch": 48.19650937297996, "grad_norm": 0.9163609743118286, "learning_rate": 0.001, "loss": 2.1728, "step": 372800 }, { "epoch": 48.209437621202326, "grad_norm": 0.7908868193626404, "learning_rate": 0.001, "loss": 2.1965, "step": 372900 }, { "epoch": 48.22236586942469, "grad_norm": 1.1599829196929932, "learning_rate": 0.001, "loss": 2.202, "step": 373000 }, { "epoch": 48.23529411764706, "grad_norm": 1.4006571769714355, "learning_rate": 0.001, "loss": 2.2096, "step": 373100 }, { "epoch": 48.248222365869424, "grad_norm": 0.7479515671730042, "learning_rate": 0.001, "loss": 2.1952, "step": 373200 }, { "epoch": 48.26115061409179, "grad_norm": 0.8697400093078613, "learning_rate": 0.001, "loss": 2.1975, "step": 373300 }, { "epoch": 48.274078862314155, "grad_norm": 0.9853440523147583, "learning_rate": 0.001, "loss": 2.1918, "step": 373400 }, { "epoch": 48.28700711053652, "grad_norm": 1.2271625995635986, "learning_rate": 0.001, "loss": 2.2081, "step": 373500 }, { "epoch": 48.29993535875889, "grad_norm": 0.8805230855941772, "learning_rate": 0.001, "loss": 2.2195, "step": 373600 }, { "epoch": 48.31286360698125, "grad_norm": 0.8905846476554871, "learning_rate": 0.001, "loss": 2.1998, "step": 373700 }, { "epoch": 48.32579185520362, "grad_norm": 0.8163439631462097, "learning_rate": 0.001, "loss": 2.1898, "step": 373800 }, { "epoch": 48.338720103425985, "grad_norm": 1.0181678533554077, "learning_rate": 0.001, "loss": 2.2203, "step": 373900 }, { "epoch": 48.35164835164835, "grad_norm": 0.7765902280807495, "learning_rate": 0.001, "loss": 2.2107, "step": 374000 }, { "epoch": 48.364576599870716, "grad_norm": 0.7330175042152405, "learning_rate": 0.001, "loss": 2.2059, "step": 374100 }, { "epoch": 48.37750484809308, "grad_norm": 0.9315392971038818, "learning_rate": 0.001, "loss": 2.2251, "step": 374200 }, { "epoch": 48.39043309631545, "grad_norm": 4.825009822845459, "learning_rate": 0.001, "loss": 2.2096, "step": 374300 }, { "epoch": 48.403361344537814, "grad_norm": 1.1610294580459595, "learning_rate": 0.001, "loss": 2.1981, "step": 374400 }, { "epoch": 48.41628959276018, "grad_norm": 0.8149643540382385, "learning_rate": 0.001, "loss": 2.2148, "step": 374500 }, { "epoch": 48.429217840982545, "grad_norm": 0.9828343391418457, "learning_rate": 0.001, "loss": 2.2166, "step": 374600 }, { "epoch": 48.44214608920491, "grad_norm": 0.9503055810928345, "learning_rate": 0.001, "loss": 2.2015, "step": 374700 }, { "epoch": 48.45507433742728, "grad_norm": 0.8060276508331299, "learning_rate": 0.001, "loss": 2.209, "step": 374800 }, { "epoch": 48.46800258564964, "grad_norm": 0.9254444241523743, "learning_rate": 0.001, "loss": 2.2151, "step": 374900 }, { "epoch": 48.48093083387201, "grad_norm": 8.381851196289062, "learning_rate": 0.001, "loss": 2.2193, "step": 375000 }, { "epoch": 48.493859082094374, "grad_norm": 0.8814203143119812, "learning_rate": 0.001, "loss": 2.2338, "step": 375100 }, { "epoch": 48.50678733031674, "grad_norm": 0.9834265112876892, "learning_rate": 0.001, "loss": 2.2308, "step": 375200 }, { "epoch": 48.519715578539106, "grad_norm": 0.943657636642456, "learning_rate": 0.001, "loss": 2.2235, "step": 375300 }, { "epoch": 48.53264382676147, "grad_norm": 1.0432922840118408, "learning_rate": 0.001, "loss": 2.2364, "step": 375400 }, { "epoch": 48.54557207498384, "grad_norm": 0.7932634949684143, "learning_rate": 0.001, "loss": 2.225, "step": 375500 }, { "epoch": 48.558500323206204, "grad_norm": 1.3452211618423462, "learning_rate": 0.001, "loss": 2.2206, "step": 375600 }, { "epoch": 48.57142857142857, "grad_norm": 0.8725557327270508, "learning_rate": 0.001, "loss": 2.2195, "step": 375700 }, { "epoch": 48.584356819650935, "grad_norm": 0.8341683745384216, "learning_rate": 0.001, "loss": 2.2384, "step": 375800 }, { "epoch": 48.5972850678733, "grad_norm": 0.7458724975585938, "learning_rate": 0.001, "loss": 2.2344, "step": 375900 }, { "epoch": 48.61021331609567, "grad_norm": 0.872810423374176, "learning_rate": 0.001, "loss": 2.2421, "step": 376000 }, { "epoch": 48.62314156431803, "grad_norm": 0.9577134251594543, "learning_rate": 0.001, "loss": 2.2308, "step": 376100 }, { "epoch": 48.6360698125404, "grad_norm": 0.8842284083366394, "learning_rate": 0.001, "loss": 2.2187, "step": 376200 }, { "epoch": 48.648998060762764, "grad_norm": 1.0450397729873657, "learning_rate": 0.001, "loss": 2.2627, "step": 376300 }, { "epoch": 48.66192630898513, "grad_norm": 1.6697964668273926, "learning_rate": 0.001, "loss": 2.2079, "step": 376400 }, { "epoch": 48.674854557207496, "grad_norm": 1.1405308246612549, "learning_rate": 0.001, "loss": 2.2453, "step": 376500 }, { "epoch": 48.68778280542986, "grad_norm": 0.7862551808357239, "learning_rate": 0.001, "loss": 2.2337, "step": 376600 }, { "epoch": 48.70071105365223, "grad_norm": 1.0977001190185547, "learning_rate": 0.001, "loss": 2.2075, "step": 376700 }, { "epoch": 48.71363930187459, "grad_norm": 0.791750967502594, "learning_rate": 0.001, "loss": 2.2424, "step": 376800 }, { "epoch": 48.72656755009696, "grad_norm": 0.7496529221534729, "learning_rate": 0.001, "loss": 2.2417, "step": 376900 }, { "epoch": 48.739495798319325, "grad_norm": 0.9083983898162842, "learning_rate": 0.001, "loss": 2.2114, "step": 377000 }, { "epoch": 48.75242404654169, "grad_norm": 0.9976235032081604, "learning_rate": 0.001, "loss": 2.2341, "step": 377100 }, { "epoch": 48.76535229476406, "grad_norm": 11.685735702514648, "learning_rate": 0.001, "loss": 2.2479, "step": 377200 }, { "epoch": 48.77828054298642, "grad_norm": 0.7485424280166626, "learning_rate": 0.001, "loss": 2.2224, "step": 377300 }, { "epoch": 48.79120879120879, "grad_norm": 0.9447299838066101, "learning_rate": 0.001, "loss": 2.2381, "step": 377400 }, { "epoch": 48.804137039431154, "grad_norm": 1.0827471017837524, "learning_rate": 0.001, "loss": 2.219, "step": 377500 }, { "epoch": 48.81706528765352, "grad_norm": 0.9238104224205017, "learning_rate": 0.001, "loss": 2.2508, "step": 377600 }, { "epoch": 48.829993535875886, "grad_norm": 0.9541816115379333, "learning_rate": 0.001, "loss": 2.2469, "step": 377700 }, { "epoch": 48.84292178409825, "grad_norm": 4.927735328674316, "learning_rate": 0.001, "loss": 2.239, "step": 377800 }, { "epoch": 48.85585003232062, "grad_norm": 1.194907307624817, "learning_rate": 0.001, "loss": 2.2257, "step": 377900 }, { "epoch": 48.86877828054298, "grad_norm": 1.0521007776260376, "learning_rate": 0.001, "loss": 2.2524, "step": 378000 }, { "epoch": 48.88170652876535, "grad_norm": 1.0285857915878296, "learning_rate": 0.001, "loss": 2.245, "step": 378100 }, { "epoch": 48.894634776987715, "grad_norm": 0.750813901424408, "learning_rate": 0.001, "loss": 2.259, "step": 378200 }, { "epoch": 48.90756302521008, "grad_norm": 1.0513558387756348, "learning_rate": 0.001, "loss": 2.2437, "step": 378300 }, { "epoch": 48.92049127343245, "grad_norm": 0.9479520916938782, "learning_rate": 0.001, "loss": 2.2566, "step": 378400 }, { "epoch": 48.93341952165481, "grad_norm": 1.809870719909668, "learning_rate": 0.001, "loss": 2.2398, "step": 378500 }, { "epoch": 48.94634776987718, "grad_norm": 1.1624903678894043, "learning_rate": 0.001, "loss": 2.2463, "step": 378600 }, { "epoch": 48.959276018099544, "grad_norm": 0.8766412138938904, "learning_rate": 0.001, "loss": 2.2385, "step": 378700 }, { "epoch": 48.97220426632191, "grad_norm": 0.8656702041625977, "learning_rate": 0.001, "loss": 2.2761, "step": 378800 }, { "epoch": 48.985132514544276, "grad_norm": 1.7489672899246216, "learning_rate": 0.001, "loss": 2.2567, "step": 378900 }, { "epoch": 48.99806076276664, "grad_norm": 1.1491901874542236, "learning_rate": 0.001, "loss": 2.2493, "step": 379000 }, { "epoch": 49.010989010989015, "grad_norm": 1.2551854848861694, "learning_rate": 0.001, "loss": 2.1501, "step": 379100 }, { "epoch": 49.02391725921138, "grad_norm": 1.0843181610107422, "learning_rate": 0.001, "loss": 2.1307, "step": 379200 }, { "epoch": 49.036845507433746, "grad_norm": 780.0646362304688, "learning_rate": 0.001, "loss": 2.1873, "step": 379300 }, { "epoch": 49.04977375565611, "grad_norm": 0.7890309691429138, "learning_rate": 0.001, "loss": 2.1712, "step": 379400 }, { "epoch": 49.06270200387848, "grad_norm": 0.9211398959159851, "learning_rate": 0.001, "loss": 2.1747, "step": 379500 }, { "epoch": 49.075630252100844, "grad_norm": 0.8555267453193665, "learning_rate": 0.001, "loss": 2.18, "step": 379600 }, { "epoch": 49.08855850032321, "grad_norm": 1.1375633478164673, "learning_rate": 0.001, "loss": 2.1782, "step": 379700 }, { "epoch": 49.101486748545575, "grad_norm": 0.815634548664093, "learning_rate": 0.001, "loss": 2.1821, "step": 379800 }, { "epoch": 49.11441499676794, "grad_norm": 0.8714706301689148, "learning_rate": 0.001, "loss": 2.1835, "step": 379900 }, { "epoch": 49.12734324499031, "grad_norm": 1.6103525161743164, "learning_rate": 0.001, "loss": 2.1947, "step": 380000 }, { "epoch": 49.14027149321267, "grad_norm": 1.043413519859314, "learning_rate": 0.001, "loss": 2.1766, "step": 380100 }, { "epoch": 49.15319974143504, "grad_norm": 0.8937097191810608, "learning_rate": 0.001, "loss": 2.1882, "step": 380200 }, { "epoch": 49.166127989657404, "grad_norm": 0.9687947630882263, "learning_rate": 0.001, "loss": 2.1897, "step": 380300 }, { "epoch": 49.17905623787977, "grad_norm": 1.903564691543579, "learning_rate": 0.001, "loss": 2.192, "step": 380400 }, { "epoch": 49.191984486102136, "grad_norm": 5.46146821975708, "learning_rate": 0.001, "loss": 2.1899, "step": 380500 }, { "epoch": 49.2049127343245, "grad_norm": 0.8936435580253601, "learning_rate": 0.001, "loss": 2.2069, "step": 380600 }, { "epoch": 49.21784098254687, "grad_norm": 0.8890781402587891, "learning_rate": 0.001, "loss": 2.1859, "step": 380700 }, { "epoch": 49.23076923076923, "grad_norm": 0.8884627819061279, "learning_rate": 0.001, "loss": 2.1842, "step": 380800 }, { "epoch": 49.2436974789916, "grad_norm": 1.002982258796692, "learning_rate": 0.001, "loss": 2.1957, "step": 380900 }, { "epoch": 49.256625727213965, "grad_norm": 0.9806551933288574, "learning_rate": 0.001, "loss": 2.1657, "step": 381000 }, { "epoch": 49.26955397543633, "grad_norm": 1.2891792058944702, "learning_rate": 0.001, "loss": 2.202, "step": 381100 }, { "epoch": 49.2824822236587, "grad_norm": 0.7823850512504578, "learning_rate": 0.001, "loss": 2.197, "step": 381200 }, { "epoch": 49.29541047188106, "grad_norm": 1.714310884475708, "learning_rate": 0.001, "loss": 2.1865, "step": 381300 }, { "epoch": 49.30833872010343, "grad_norm": 0.838390052318573, "learning_rate": 0.001, "loss": 2.2109, "step": 381400 }, { "epoch": 49.321266968325794, "grad_norm": 0.7504981756210327, "learning_rate": 0.001, "loss": 2.2177, "step": 381500 }, { "epoch": 49.33419521654816, "grad_norm": 1.0850282907485962, "learning_rate": 0.001, "loss": 2.2006, "step": 381600 }, { "epoch": 49.347123464770526, "grad_norm": 1.652367115020752, "learning_rate": 0.001, "loss": 2.2126, "step": 381700 }, { "epoch": 49.36005171299289, "grad_norm": 1.4712790250778198, "learning_rate": 0.001, "loss": 2.2217, "step": 381800 }, { "epoch": 49.37297996121526, "grad_norm": 1.0922534465789795, "learning_rate": 0.001, "loss": 2.1989, "step": 381900 }, { "epoch": 49.38590820943762, "grad_norm": 1.1625548601150513, "learning_rate": 0.001, "loss": 2.2204, "step": 382000 }, { "epoch": 49.39883645765999, "grad_norm": 0.8696342706680298, "learning_rate": 0.001, "loss": 2.2187, "step": 382100 }, { "epoch": 49.411764705882355, "grad_norm": 0.9100884199142456, "learning_rate": 0.001, "loss": 2.2236, "step": 382200 }, { "epoch": 49.42469295410472, "grad_norm": 0.8845438361167908, "learning_rate": 0.001, "loss": 2.2193, "step": 382300 }, { "epoch": 49.43762120232709, "grad_norm": 1.0227493047714233, "learning_rate": 0.001, "loss": 2.2219, "step": 382400 }, { "epoch": 49.45054945054945, "grad_norm": 1.0742816925048828, "learning_rate": 0.001, "loss": 2.2254, "step": 382500 }, { "epoch": 49.46347769877182, "grad_norm": 1.5063763856887817, "learning_rate": 0.001, "loss": 2.204, "step": 382600 }, { "epoch": 49.476405946994184, "grad_norm": 1.062048316001892, "learning_rate": 0.001, "loss": 2.222, "step": 382700 }, { "epoch": 49.48933419521655, "grad_norm": 0.8589151501655579, "learning_rate": 0.001, "loss": 2.23, "step": 382800 }, { "epoch": 49.502262443438916, "grad_norm": 0.8505949974060059, "learning_rate": 0.001, "loss": 2.2223, "step": 382900 }, { "epoch": 49.51519069166128, "grad_norm": 0.9699360132217407, "learning_rate": 0.001, "loss": 2.2387, "step": 383000 }, { "epoch": 49.52811893988365, "grad_norm": 1.0643435716629028, "learning_rate": 0.001, "loss": 2.2386, "step": 383100 }, { "epoch": 49.54104718810601, "grad_norm": 1.4512566328048706, "learning_rate": 0.001, "loss": 2.2155, "step": 383200 }, { "epoch": 49.55397543632838, "grad_norm": 4.916077613830566, "learning_rate": 0.001, "loss": 2.2381, "step": 383300 }, { "epoch": 49.566903684550745, "grad_norm": 0.9086153507232666, "learning_rate": 0.001, "loss": 2.2254, "step": 383400 }, { "epoch": 49.57983193277311, "grad_norm": 0.8530846238136292, "learning_rate": 0.001, "loss": 2.2539, "step": 383500 }, { "epoch": 49.59276018099548, "grad_norm": 4.069005966186523, "learning_rate": 0.001, "loss": 2.2333, "step": 383600 }, { "epoch": 49.60568842921784, "grad_norm": 1.0399448871612549, "learning_rate": 0.001, "loss": 2.2329, "step": 383700 }, { "epoch": 49.61861667744021, "grad_norm": 0.9151965975761414, "learning_rate": 0.001, "loss": 2.211, "step": 383800 }, { "epoch": 49.631544925662574, "grad_norm": 29.088062286376953, "learning_rate": 0.001, "loss": 2.2034, "step": 383900 }, { "epoch": 49.64447317388494, "grad_norm": 0.7572119235992432, "learning_rate": 0.001, "loss": 2.2433, "step": 384000 }, { "epoch": 49.657401422107306, "grad_norm": 0.9308040142059326, "learning_rate": 0.001, "loss": 2.2377, "step": 384100 }, { "epoch": 49.67032967032967, "grad_norm": 7.215054512023926, "learning_rate": 0.001, "loss": 2.2163, "step": 384200 }, { "epoch": 49.68325791855204, "grad_norm": 1.1515604257583618, "learning_rate": 0.001, "loss": 2.2252, "step": 384300 }, { "epoch": 49.6961861667744, "grad_norm": 0.9201291799545288, "learning_rate": 0.001, "loss": 2.2465, "step": 384400 }, { "epoch": 49.70911441499677, "grad_norm": 0.9706757068634033, "learning_rate": 0.001, "loss": 2.2377, "step": 384500 }, { "epoch": 49.722042663219135, "grad_norm": 1.0084832906723022, "learning_rate": 0.001, "loss": 2.2428, "step": 384600 }, { "epoch": 49.7349709114415, "grad_norm": 1.0477818250656128, "learning_rate": 0.001, "loss": 2.2462, "step": 384700 }, { "epoch": 49.747899159663866, "grad_norm": 0.8089842200279236, "learning_rate": 0.001, "loss": 2.233, "step": 384800 }, { "epoch": 49.76082740788623, "grad_norm": 6.225785732269287, "learning_rate": 0.001, "loss": 2.2258, "step": 384900 }, { "epoch": 49.7737556561086, "grad_norm": 1.5755640268325806, "learning_rate": 0.001, "loss": 2.2442, "step": 385000 }, { "epoch": 49.786683904330964, "grad_norm": 0.8096144199371338, "learning_rate": 0.001, "loss": 2.2504, "step": 385100 }, { "epoch": 49.79961215255333, "grad_norm": 0.949174702167511, "learning_rate": 0.001, "loss": 2.2516, "step": 385200 }, { "epoch": 49.812540400775696, "grad_norm": 0.84523606300354, "learning_rate": 0.001, "loss": 2.2337, "step": 385300 }, { "epoch": 49.82546864899806, "grad_norm": 0.9562318921089172, "learning_rate": 0.001, "loss": 2.2223, "step": 385400 }, { "epoch": 49.83839689722043, "grad_norm": 1.4206814765930176, "learning_rate": 0.001, "loss": 2.2324, "step": 385500 }, { "epoch": 49.85132514544279, "grad_norm": 0.9222531318664551, "learning_rate": 0.001, "loss": 2.239, "step": 385600 }, { "epoch": 49.86425339366516, "grad_norm": 0.9868611693382263, "learning_rate": 0.001, "loss": 2.2616, "step": 385700 }, { "epoch": 49.877181641887525, "grad_norm": 0.9354754686355591, "learning_rate": 0.001, "loss": 2.2422, "step": 385800 }, { "epoch": 49.89010989010989, "grad_norm": 1.8618592023849487, "learning_rate": 0.001, "loss": 2.2455, "step": 385900 }, { "epoch": 49.903038138332256, "grad_norm": 1.0792086124420166, "learning_rate": 0.001, "loss": 2.2476, "step": 386000 }, { "epoch": 49.91596638655462, "grad_norm": 0.997604250907898, "learning_rate": 0.001, "loss": 2.246, "step": 386100 }, { "epoch": 49.92889463477699, "grad_norm": 1.2990520000457764, "learning_rate": 0.001, "loss": 2.2377, "step": 386200 }, { "epoch": 49.941822882999354, "grad_norm": 1.0923515558242798, "learning_rate": 0.001, "loss": 2.2552, "step": 386300 }, { "epoch": 49.95475113122172, "grad_norm": 0.8635362386703491, "learning_rate": 0.001, "loss": 2.2545, "step": 386400 }, { "epoch": 49.967679379444085, "grad_norm": 1.3482729196548462, "learning_rate": 0.001, "loss": 2.2402, "step": 386500 }, { "epoch": 49.98060762766645, "grad_norm": 1.3467209339141846, "learning_rate": 0.001, "loss": 2.219, "step": 386600 }, { "epoch": 49.99353587588882, "grad_norm": 1.5216456651687622, "learning_rate": 0.001, "loss": 2.2504, "step": 386700 }, { "epoch": 50.00646412411118, "grad_norm": 1.5498578548431396, "learning_rate": 0.001, "loss": 2.1586, "step": 386800 }, { "epoch": 50.01939237233355, "grad_norm": 1.544446587562561, "learning_rate": 0.001, "loss": 2.1465, "step": 386900 }, { "epoch": 50.032320620555915, "grad_norm": 1.619469165802002, "learning_rate": 0.001, "loss": 2.1692, "step": 387000 }, { "epoch": 50.04524886877828, "grad_norm": 1.6965556144714355, "learning_rate": 0.001, "loss": 2.1561, "step": 387100 }, { "epoch": 50.058177117000646, "grad_norm": 2.7811532020568848, "learning_rate": 0.001, "loss": 2.133, "step": 387200 }, { "epoch": 50.07110536522301, "grad_norm": 1.487463116645813, "learning_rate": 0.001, "loss": 2.1889, "step": 387300 }, { "epoch": 50.08403361344538, "grad_norm": 2.092264413833618, "learning_rate": 0.001, "loss": 2.1587, "step": 387400 }, { "epoch": 50.096961861667744, "grad_norm": 1.7386164665222168, "learning_rate": 0.001, "loss": 2.1651, "step": 387500 }, { "epoch": 50.10989010989011, "grad_norm": 2.065073251724243, "learning_rate": 0.001, "loss": 2.1908, "step": 387600 }, { "epoch": 50.122818358112475, "grad_norm": 1.5917768478393555, "learning_rate": 0.001, "loss": 2.1738, "step": 387700 }, { "epoch": 50.13574660633484, "grad_norm": 3.6581668853759766, "learning_rate": 0.001, "loss": 2.1806, "step": 387800 }, { "epoch": 50.14867485455721, "grad_norm": 2.2407844066619873, "learning_rate": 0.001, "loss": 2.1817, "step": 387900 }, { "epoch": 50.16160310277957, "grad_norm": 1.8745391368865967, "learning_rate": 0.001, "loss": 2.1738, "step": 388000 }, { "epoch": 50.17453135100194, "grad_norm": 3.0922820568084717, "learning_rate": 0.001, "loss": 2.1746, "step": 388100 }, { "epoch": 50.187459599224304, "grad_norm": 1.779405117034912, "learning_rate": 0.001, "loss": 2.1775, "step": 388200 }, { "epoch": 50.20038784744667, "grad_norm": 1.996253252029419, "learning_rate": 0.001, "loss": 2.1734, "step": 388300 }, { "epoch": 50.213316095669036, "grad_norm": 2.460604190826416, "learning_rate": 0.001, "loss": 2.1879, "step": 388400 }, { "epoch": 50.2262443438914, "grad_norm": 1.4137535095214844, "learning_rate": 0.001, "loss": 2.1824, "step": 388500 }, { "epoch": 50.23917259211377, "grad_norm": 1.594556450843811, "learning_rate": 0.001, "loss": 2.196, "step": 388600 }, { "epoch": 50.252100840336134, "grad_norm": 1.4813467264175415, "learning_rate": 0.001, "loss": 2.1741, "step": 388700 }, { "epoch": 50.2650290885585, "grad_norm": 1.4363689422607422, "learning_rate": 0.001, "loss": 2.1806, "step": 388800 }, { "epoch": 50.277957336780865, "grad_norm": 1.7462717294692993, "learning_rate": 0.001, "loss": 2.1977, "step": 388900 }, { "epoch": 50.29088558500323, "grad_norm": 2.1404192447662354, "learning_rate": 0.001, "loss": 2.1918, "step": 389000 }, { "epoch": 50.3038138332256, "grad_norm": 2.038055181503296, "learning_rate": 0.001, "loss": 2.1898, "step": 389100 }, { "epoch": 50.31674208144796, "grad_norm": 1.5564056634902954, "learning_rate": 0.001, "loss": 2.1804, "step": 389200 }, { "epoch": 50.32967032967033, "grad_norm": 1.8193674087524414, "learning_rate": 0.001, "loss": 2.197, "step": 389300 }, { "epoch": 50.342598577892694, "grad_norm": 1.4108794927597046, "learning_rate": 0.001, "loss": 2.1761, "step": 389400 }, { "epoch": 50.35552682611506, "grad_norm": 1.4445847272872925, "learning_rate": 0.001, "loss": 2.1878, "step": 389500 }, { "epoch": 50.368455074337426, "grad_norm": 2.1427183151245117, "learning_rate": 0.001, "loss": 2.2015, "step": 389600 }, { "epoch": 50.38138332255979, "grad_norm": 2.0570693016052246, "learning_rate": 0.001, "loss": 2.2273, "step": 389700 }, { "epoch": 50.39431157078216, "grad_norm": 1.841122031211853, "learning_rate": 0.001, "loss": 2.2146, "step": 389800 }, { "epoch": 50.40723981900452, "grad_norm": 2.2148244380950928, "learning_rate": 0.001, "loss": 2.2121, "step": 389900 }, { "epoch": 50.42016806722689, "grad_norm": 1.5118504762649536, "learning_rate": 0.001, "loss": 2.1899, "step": 390000 }, { "epoch": 50.433096315449255, "grad_norm": 2.291076421737671, "learning_rate": 0.001, "loss": 2.2184, "step": 390100 }, { "epoch": 50.44602456367162, "grad_norm": 1.8270773887634277, "learning_rate": 0.001, "loss": 2.2274, "step": 390200 }, { "epoch": 50.45895281189399, "grad_norm": 1.999358892440796, "learning_rate": 0.001, "loss": 2.2157, "step": 390300 }, { "epoch": 50.47188106011635, "grad_norm": 1.4004982709884644, "learning_rate": 0.001, "loss": 2.2008, "step": 390400 }, { "epoch": 50.48480930833872, "grad_norm": 2.07088303565979, "learning_rate": 0.001, "loss": 2.2166, "step": 390500 }, { "epoch": 50.497737556561084, "grad_norm": 121.79307556152344, "learning_rate": 0.001, "loss": 2.2494, "step": 390600 }, { "epoch": 50.51066580478345, "grad_norm": 2.0167036056518555, "learning_rate": 0.001, "loss": 2.2114, "step": 390700 }, { "epoch": 50.523594053005816, "grad_norm": 2.152561902999878, "learning_rate": 0.001, "loss": 2.2162, "step": 390800 }, { "epoch": 50.53652230122818, "grad_norm": 1.6478477716445923, "learning_rate": 0.001, "loss": 2.2355, "step": 390900 }, { "epoch": 50.54945054945055, "grad_norm": 1.8705381155014038, "learning_rate": 0.001, "loss": 2.2094, "step": 391000 }, { "epoch": 50.56237879767291, "grad_norm": 2.5869882106781006, "learning_rate": 0.001, "loss": 2.2317, "step": 391100 }, { "epoch": 50.57530704589528, "grad_norm": 1.7233860492706299, "learning_rate": 0.001, "loss": 2.2298, "step": 391200 }, { "epoch": 50.588235294117645, "grad_norm": 1.592603325843811, "learning_rate": 0.001, "loss": 2.239, "step": 391300 }, { "epoch": 50.60116354234001, "grad_norm": 2.2181551456451416, "learning_rate": 0.001, "loss": 2.2297, "step": 391400 }, { "epoch": 50.61409179056238, "grad_norm": 1.6086596250534058, "learning_rate": 0.001, "loss": 2.2212, "step": 391500 }, { "epoch": 50.62702003878474, "grad_norm": 2.6762428283691406, "learning_rate": 0.001, "loss": 2.2313, "step": 391600 }, { "epoch": 50.63994828700711, "grad_norm": 1.6370364427566528, "learning_rate": 0.001, "loss": 2.2409, "step": 391700 }, { "epoch": 50.652876535229474, "grad_norm": 1.8737925291061401, "learning_rate": 0.001, "loss": 2.2299, "step": 391800 }, { "epoch": 50.66580478345184, "grad_norm": 1.5593385696411133, "learning_rate": 0.001, "loss": 2.2272, "step": 391900 }, { "epoch": 50.678733031674206, "grad_norm": 2.2506020069122314, "learning_rate": 0.001, "loss": 2.231, "step": 392000 }, { "epoch": 50.69166127989657, "grad_norm": 1.774066686630249, "learning_rate": 0.001, "loss": 2.22, "step": 392100 }, { "epoch": 50.70458952811894, "grad_norm": 2.0739858150482178, "learning_rate": 0.001, "loss": 2.2559, "step": 392200 }, { "epoch": 50.7175177763413, "grad_norm": 1.7699518203735352, "learning_rate": 0.001, "loss": 2.2616, "step": 392300 }, { "epoch": 50.73044602456367, "grad_norm": 5.972966194152832, "learning_rate": 0.001, "loss": 2.2343, "step": 392400 }, { "epoch": 50.743374272786035, "grad_norm": 2.0731277465820312, "learning_rate": 0.001, "loss": 2.2281, "step": 392500 }, { "epoch": 50.7563025210084, "grad_norm": 1.5652092695236206, "learning_rate": 0.001, "loss": 2.2277, "step": 392600 }, { "epoch": 50.76923076923077, "grad_norm": 1.7139681577682495, "learning_rate": 0.001, "loss": 2.2452, "step": 392700 }, { "epoch": 50.78215901745313, "grad_norm": 1.5239914655685425, "learning_rate": 0.001, "loss": 2.2363, "step": 392800 }, { "epoch": 50.7950872656755, "grad_norm": 1.8191090822219849, "learning_rate": 0.001, "loss": 2.2259, "step": 392900 }, { "epoch": 50.808015513897864, "grad_norm": 1.7267959117889404, "learning_rate": 0.001, "loss": 2.2239, "step": 393000 }, { "epoch": 50.82094376212023, "grad_norm": 1.9803661108016968, "learning_rate": 0.001, "loss": 2.2375, "step": 393100 }, { "epoch": 50.833872010342596, "grad_norm": 1.9083611965179443, "learning_rate": 0.001, "loss": 2.2372, "step": 393200 }, { "epoch": 50.84680025856496, "grad_norm": 1.485741376876831, "learning_rate": 0.001, "loss": 2.2433, "step": 393300 }, { "epoch": 50.85972850678733, "grad_norm": 1.987155795097351, "learning_rate": 0.001, "loss": 2.244, "step": 393400 }, { "epoch": 50.87265675500969, "grad_norm": 1.6559087038040161, "learning_rate": 0.001, "loss": 2.2572, "step": 393500 }, { "epoch": 50.88558500323206, "grad_norm": 2.4144036769866943, "learning_rate": 0.001, "loss": 2.2484, "step": 393600 }, { "epoch": 50.898513251454425, "grad_norm": 1.5100114345550537, "learning_rate": 0.001, "loss": 2.2367, "step": 393700 }, { "epoch": 50.91144149967679, "grad_norm": 1.7015310525894165, "learning_rate": 0.001, "loss": 2.2698, "step": 393800 }, { "epoch": 50.924369747899156, "grad_norm": 2.1816983222961426, "learning_rate": 0.001, "loss": 2.2552, "step": 393900 }, { "epoch": 50.93729799612152, "grad_norm": 1.6741979122161865, "learning_rate": 0.001, "loss": 2.2672, "step": 394000 }, { "epoch": 50.95022624434389, "grad_norm": 1.5520250797271729, "learning_rate": 0.001, "loss": 2.2612, "step": 394100 }, { "epoch": 50.963154492566254, "grad_norm": 1.9958630800247192, "learning_rate": 0.001, "loss": 2.2611, "step": 394200 }, { "epoch": 50.97608274078862, "grad_norm": 1.6468743085861206, "learning_rate": 0.001, "loss": 2.251, "step": 394300 }, { "epoch": 50.98901098901099, "grad_norm": 1.6827830076217651, "learning_rate": 0.001, "loss": 2.2666, "step": 394400 }, { "epoch": 51.00193923723336, "grad_norm": 0.9525300860404968, "learning_rate": 0.001, "loss": 2.2372, "step": 394500 }, { "epoch": 51.014867485455724, "grad_norm": 1.128535270690918, "learning_rate": 0.001, "loss": 2.1814, "step": 394600 }, { "epoch": 51.02779573367809, "grad_norm": 1.0648363828659058, "learning_rate": 0.001, "loss": 2.1802, "step": 394700 }, { "epoch": 51.040723981900456, "grad_norm": 0.765201210975647, "learning_rate": 0.001, "loss": 2.1604, "step": 394800 }, { "epoch": 51.05365223012282, "grad_norm": 0.9402522444725037, "learning_rate": 0.001, "loss": 2.1772, "step": 394900 }, { "epoch": 51.06658047834519, "grad_norm": 1.2054253816604614, "learning_rate": 0.001, "loss": 2.1638, "step": 395000 }, { "epoch": 51.07950872656755, "grad_norm": 0.9289795756340027, "learning_rate": 0.001, "loss": 2.1554, "step": 395100 }, { "epoch": 51.09243697478992, "grad_norm": 0.9324256777763367, "learning_rate": 0.001, "loss": 2.1722, "step": 395200 }, { "epoch": 51.105365223012285, "grad_norm": 1.091755986213684, "learning_rate": 0.001, "loss": 2.1655, "step": 395300 }, { "epoch": 51.11829347123465, "grad_norm": 0.8496136665344238, "learning_rate": 0.001, "loss": 2.1974, "step": 395400 }, { "epoch": 51.13122171945702, "grad_norm": 1.0773056745529175, "learning_rate": 0.001, "loss": 2.1485, "step": 395500 }, { "epoch": 51.14414996767938, "grad_norm": 1.6344274282455444, "learning_rate": 0.001, "loss": 2.1576, "step": 395600 }, { "epoch": 51.15707821590175, "grad_norm": 0.8412167429924011, "learning_rate": 0.001, "loss": 2.1724, "step": 395700 }, { "epoch": 51.170006464124114, "grad_norm": 1.1716562509536743, "learning_rate": 0.001, "loss": 2.2181, "step": 395800 }, { "epoch": 51.18293471234648, "grad_norm": 0.8163767457008362, "learning_rate": 0.001, "loss": 2.1901, "step": 395900 }, { "epoch": 51.195862960568846, "grad_norm": 0.7449765205383301, "learning_rate": 0.001, "loss": 2.1648, "step": 396000 }, { "epoch": 51.20879120879121, "grad_norm": 0.8507226705551147, "learning_rate": 0.001, "loss": 2.2116, "step": 396100 }, { "epoch": 51.22171945701358, "grad_norm": 1.0397251844406128, "learning_rate": 0.001, "loss": 2.1903, "step": 396200 }, { "epoch": 51.23464770523594, "grad_norm": 0.9989331364631653, "learning_rate": 0.001, "loss": 2.192, "step": 396300 }, { "epoch": 51.24757595345831, "grad_norm": 0.9804033637046814, "learning_rate": 0.001, "loss": 2.1863, "step": 396400 }, { "epoch": 51.260504201680675, "grad_norm": 1.0314263105392456, "learning_rate": 0.001, "loss": 2.1835, "step": 396500 }, { "epoch": 51.27343244990304, "grad_norm": 0.9687551259994507, "learning_rate": 0.001, "loss": 2.1674, "step": 396600 }, { "epoch": 51.28636069812541, "grad_norm": 0.9188796877861023, "learning_rate": 0.001, "loss": 2.1869, "step": 396700 }, { "epoch": 51.29928894634777, "grad_norm": 1.0772420167922974, "learning_rate": 0.001, "loss": 2.1844, "step": 396800 }, { "epoch": 51.31221719457014, "grad_norm": 4.085197925567627, "learning_rate": 0.001, "loss": 2.2044, "step": 396900 }, { "epoch": 51.325145442792504, "grad_norm": 1.1146589517593384, "learning_rate": 0.001, "loss": 2.1671, "step": 397000 }, { "epoch": 51.33807369101487, "grad_norm": 1.6810176372528076, "learning_rate": 0.001, "loss": 2.1979, "step": 397100 }, { "epoch": 51.351001939237236, "grad_norm": 0.9052703976631165, "learning_rate": 0.001, "loss": 2.2078, "step": 397200 }, { "epoch": 51.3639301874596, "grad_norm": 0.7648420333862305, "learning_rate": 0.001, "loss": 2.2154, "step": 397300 }, { "epoch": 51.37685843568197, "grad_norm": 0.8211202621459961, "learning_rate": 0.001, "loss": 2.2188, "step": 397400 }, { "epoch": 51.38978668390433, "grad_norm": 1.670017123222351, "learning_rate": 0.001, "loss": 2.209, "step": 397500 }, { "epoch": 51.4027149321267, "grad_norm": 1.4371280670166016, "learning_rate": 0.001, "loss": 2.1941, "step": 397600 }, { "epoch": 51.415643180349065, "grad_norm": 0.8074912428855896, "learning_rate": 0.001, "loss": 2.2014, "step": 397700 }, { "epoch": 51.42857142857143, "grad_norm": 1.0921357870101929, "learning_rate": 0.001, "loss": 2.2178, "step": 397800 }, { "epoch": 51.441499676793796, "grad_norm": 1.1311519145965576, "learning_rate": 0.001, "loss": 2.2134, "step": 397900 }, { "epoch": 51.45442792501616, "grad_norm": 0.8189895749092102, "learning_rate": 0.001, "loss": 2.2014, "step": 398000 }, { "epoch": 51.46735617323853, "grad_norm": 9.470246315002441, "learning_rate": 0.001, "loss": 2.2115, "step": 398100 }, { "epoch": 51.480284421460894, "grad_norm": 8.547236442565918, "learning_rate": 0.001, "loss": 2.2292, "step": 398200 }, { "epoch": 51.49321266968326, "grad_norm": 0.8567830324172974, "learning_rate": 0.001, "loss": 2.2014, "step": 398300 }, { "epoch": 51.506140917905626, "grad_norm": 19.82508659362793, "learning_rate": 0.001, "loss": 2.2139, "step": 398400 }, { "epoch": 51.51906916612799, "grad_norm": 0.8645457029342651, "learning_rate": 0.001, "loss": 2.1953, "step": 398500 }, { "epoch": 51.53199741435036, "grad_norm": 3.7019901275634766, "learning_rate": 0.001, "loss": 2.2358, "step": 398600 }, { "epoch": 51.54492566257272, "grad_norm": 1.1332294940948486, "learning_rate": 0.001, "loss": 2.2221, "step": 398700 }, { "epoch": 51.55785391079509, "grad_norm": 1.0385631322860718, "learning_rate": 0.001, "loss": 2.2135, "step": 398800 }, { "epoch": 51.570782159017455, "grad_norm": 0.91073077917099, "learning_rate": 0.001, "loss": 2.2269, "step": 398900 }, { "epoch": 51.58371040723982, "grad_norm": 2.0802927017211914, "learning_rate": 0.001, "loss": 2.2252, "step": 399000 }, { "epoch": 51.596638655462186, "grad_norm": 0.8334566950798035, "learning_rate": 0.001, "loss": 2.2203, "step": 399100 }, { "epoch": 51.60956690368455, "grad_norm": 1.1924021244049072, "learning_rate": 0.001, "loss": 2.2323, "step": 399200 }, { "epoch": 51.62249515190692, "grad_norm": 1.004475712776184, "learning_rate": 0.001, "loss": 2.2156, "step": 399300 }, { "epoch": 51.635423400129284, "grad_norm": 1.1992095708847046, "learning_rate": 0.001, "loss": 2.2198, "step": 399400 }, { "epoch": 51.64835164835165, "grad_norm": 1.1828689575195312, "learning_rate": 0.001, "loss": 2.2265, "step": 399500 }, { "epoch": 51.661279896574015, "grad_norm": 0.9058612585067749, "learning_rate": 0.001, "loss": 2.2284, "step": 399600 }, { "epoch": 51.67420814479638, "grad_norm": 1.0953617095947266, "learning_rate": 0.001, "loss": 2.2445, "step": 399700 }, { "epoch": 51.68713639301875, "grad_norm": 1.1590265035629272, "learning_rate": 0.001, "loss": 2.2415, "step": 399800 }, { "epoch": 51.70006464124111, "grad_norm": 1.046927809715271, "learning_rate": 0.001, "loss": 2.2232, "step": 399900 }, { "epoch": 51.71299288946348, "grad_norm": 2.498849391937256, "learning_rate": 0.001, "loss": 2.2375, "step": 400000 }, { "epoch": 51.725921137685845, "grad_norm": 1.0121958255767822, "learning_rate": 0.001, "loss": 2.2502, "step": 400100 }, { "epoch": 51.73884938590821, "grad_norm": 1.1873679161071777, "learning_rate": 0.001, "loss": 2.2462, "step": 400200 }, { "epoch": 51.751777634130576, "grad_norm": 1.6969588994979858, "learning_rate": 0.001, "loss": 2.2526, "step": 400300 }, { "epoch": 51.76470588235294, "grad_norm": 14.512615203857422, "learning_rate": 0.001, "loss": 2.2248, "step": 400400 }, { "epoch": 51.77763413057531, "grad_norm": 1.0654021501541138, "learning_rate": 0.001, "loss": 2.2346, "step": 400500 }, { "epoch": 51.790562378797674, "grad_norm": 0.946796178817749, "learning_rate": 0.001, "loss": 2.22, "step": 400600 }, { "epoch": 51.80349062702004, "grad_norm": 0.9541617035865784, "learning_rate": 0.001, "loss": 2.2158, "step": 400700 }, { "epoch": 51.816418875242405, "grad_norm": 1.3610353469848633, "learning_rate": 0.001, "loss": 2.2444, "step": 400800 }, { "epoch": 51.82934712346477, "grad_norm": 0.8276808857917786, "learning_rate": 0.001, "loss": 2.2487, "step": 400900 }, { "epoch": 51.84227537168714, "grad_norm": 0.8165733218193054, "learning_rate": 0.001, "loss": 2.2489, "step": 401000 }, { "epoch": 51.8552036199095, "grad_norm": 0.9889691472053528, "learning_rate": 0.001, "loss": 2.2468, "step": 401100 }, { "epoch": 51.86813186813187, "grad_norm": 0.9081488847732544, "learning_rate": 0.001, "loss": 2.2297, "step": 401200 }, { "epoch": 51.881060116354234, "grad_norm": 1.0041180849075317, "learning_rate": 0.001, "loss": 2.2329, "step": 401300 }, { "epoch": 51.8939883645766, "grad_norm": 0.8461706042289734, "learning_rate": 0.001, "loss": 2.2588, "step": 401400 }, { "epoch": 51.906916612798966, "grad_norm": 1.6514064073562622, "learning_rate": 0.001, "loss": 2.2392, "step": 401500 }, { "epoch": 51.91984486102133, "grad_norm": 1.136407732963562, "learning_rate": 0.001, "loss": 2.2461, "step": 401600 }, { "epoch": 51.9327731092437, "grad_norm": 1.075065016746521, "learning_rate": 0.001, "loss": 2.235, "step": 401700 }, { "epoch": 51.94570135746606, "grad_norm": 0.8136035799980164, "learning_rate": 0.001, "loss": 2.2373, "step": 401800 }, { "epoch": 51.95862960568843, "grad_norm": 1.0970444679260254, "learning_rate": 0.001, "loss": 2.234, "step": 401900 }, { "epoch": 51.971557853910795, "grad_norm": 1.295796275138855, "learning_rate": 0.001, "loss": 2.2504, "step": 402000 }, { "epoch": 51.98448610213316, "grad_norm": 0.7689101696014404, "learning_rate": 0.001, "loss": 2.2539, "step": 402100 }, { "epoch": 51.99741435035553, "grad_norm": 1.0287636518478394, "learning_rate": 0.001, "loss": 2.2434, "step": 402200 }, { "epoch": 52.01034259857789, "grad_norm": 1.6443291902542114, "learning_rate": 0.001, "loss": 2.1816, "step": 402300 }, { "epoch": 52.02327084680026, "grad_norm": 0.8625040650367737, "learning_rate": 0.001, "loss": 2.1672, "step": 402400 }, { "epoch": 52.036199095022624, "grad_norm": 1.6502182483673096, "learning_rate": 0.001, "loss": 2.1644, "step": 402500 }, { "epoch": 52.04912734324499, "grad_norm": 0.8919135928153992, "learning_rate": 0.001, "loss": 2.1454, "step": 402600 }, { "epoch": 52.062055591467356, "grad_norm": 0.9791855812072754, "learning_rate": 0.001, "loss": 2.1726, "step": 402700 }, { "epoch": 52.07498383968972, "grad_norm": 1.9665100574493408, "learning_rate": 0.001, "loss": 2.1695, "step": 402800 }, { "epoch": 52.08791208791209, "grad_norm": 0.7832034230232239, "learning_rate": 0.001, "loss": 2.1579, "step": 402900 }, { "epoch": 52.10084033613445, "grad_norm": 1.108007550239563, "learning_rate": 0.001, "loss": 2.1683, "step": 403000 }, { "epoch": 52.11376858435682, "grad_norm": 0.9084193706512451, "learning_rate": 0.001, "loss": 2.1676, "step": 403100 }, { "epoch": 52.126696832579185, "grad_norm": 0.9567168951034546, "learning_rate": 0.001, "loss": 2.163, "step": 403200 }, { "epoch": 52.13962508080155, "grad_norm": 0.923818826675415, "learning_rate": 0.001, "loss": 2.1803, "step": 403300 }, { "epoch": 52.15255332902392, "grad_norm": 2.3361988067626953, "learning_rate": 0.001, "loss": 2.1467, "step": 403400 }, { "epoch": 52.16548157724628, "grad_norm": 0.9473086595535278, "learning_rate": 0.001, "loss": 2.188, "step": 403500 }, { "epoch": 52.17840982546865, "grad_norm": 1.3395328521728516, "learning_rate": 0.001, "loss": 2.1892, "step": 403600 }, { "epoch": 52.191338073691014, "grad_norm": 0.8627855181694031, "learning_rate": 0.001, "loss": 2.1617, "step": 403700 }, { "epoch": 52.20426632191338, "grad_norm": 1.5713242292404175, "learning_rate": 0.001, "loss": 2.1741, "step": 403800 }, { "epoch": 52.217194570135746, "grad_norm": 1.059898853302002, "learning_rate": 0.001, "loss": 2.1929, "step": 403900 }, { "epoch": 52.23012281835811, "grad_norm": 2.382338762283325, "learning_rate": 0.001, "loss": 2.183, "step": 404000 }, { "epoch": 52.24305106658048, "grad_norm": 1.243952751159668, "learning_rate": 0.001, "loss": 2.1761, "step": 404100 }, { "epoch": 52.25597931480284, "grad_norm": 1.011945128440857, "learning_rate": 0.001, "loss": 2.1873, "step": 404200 }, { "epoch": 52.26890756302521, "grad_norm": 0.8488190174102783, "learning_rate": 0.001, "loss": 2.1825, "step": 404300 }, { "epoch": 52.281835811247575, "grad_norm": 0.9952630400657654, "learning_rate": 0.001, "loss": 2.1825, "step": 404400 }, { "epoch": 52.29476405946994, "grad_norm": 1.049008846282959, "learning_rate": 0.001, "loss": 2.1819, "step": 404500 }, { "epoch": 52.30769230769231, "grad_norm": 1.4138529300689697, "learning_rate": 0.001, "loss": 2.1752, "step": 404600 }, { "epoch": 52.32062055591467, "grad_norm": 0.7432920336723328, "learning_rate": 0.001, "loss": 2.1842, "step": 404700 }, { "epoch": 52.33354880413704, "grad_norm": 0.8728629946708679, "learning_rate": 0.001, "loss": 2.18, "step": 404800 }, { "epoch": 52.346477052359404, "grad_norm": 0.9544297456741333, "learning_rate": 0.001, "loss": 2.2042, "step": 404900 }, { "epoch": 52.35940530058177, "grad_norm": 0.9349939227104187, "learning_rate": 0.001, "loss": 2.1765, "step": 405000 }, { "epoch": 52.372333548804136, "grad_norm": 1.1578516960144043, "learning_rate": 0.001, "loss": 2.1956, "step": 405100 }, { "epoch": 52.3852617970265, "grad_norm": 1.029625415802002, "learning_rate": 0.001, "loss": 2.2111, "step": 405200 }, { "epoch": 52.39819004524887, "grad_norm": 0.7592127919197083, "learning_rate": 0.001, "loss": 2.2153, "step": 405300 }, { "epoch": 52.41111829347123, "grad_norm": 0.8207055926322937, "learning_rate": 0.001, "loss": 2.1763, "step": 405400 }, { "epoch": 52.4240465416936, "grad_norm": 11.042133331298828, "learning_rate": 0.001, "loss": 2.1875, "step": 405500 }, { "epoch": 52.436974789915965, "grad_norm": 0.9993590712547302, "learning_rate": 0.001, "loss": 2.2107, "step": 405600 }, { "epoch": 52.44990303813833, "grad_norm": 1.1347869634628296, "learning_rate": 0.001, "loss": 2.1875, "step": 405700 }, { "epoch": 52.4628312863607, "grad_norm": 1.168523907661438, "learning_rate": 0.001, "loss": 2.2129, "step": 405800 }, { "epoch": 52.47575953458306, "grad_norm": 1.0181000232696533, "learning_rate": 0.001, "loss": 2.2071, "step": 405900 }, { "epoch": 52.48868778280543, "grad_norm": 8.090771675109863, "learning_rate": 0.001, "loss": 2.1876, "step": 406000 }, { "epoch": 52.501616031027794, "grad_norm": 1.1497881412506104, "learning_rate": 0.001, "loss": 2.1977, "step": 406100 }, { "epoch": 52.51454427925016, "grad_norm": 7.443631172180176, "learning_rate": 0.001, "loss": 2.2115, "step": 406200 }, { "epoch": 52.527472527472526, "grad_norm": 0.9931620359420776, "learning_rate": 0.001, "loss": 2.2207, "step": 406300 }, { "epoch": 52.54040077569489, "grad_norm": 1.1952219009399414, "learning_rate": 0.001, "loss": 2.1854, "step": 406400 }, { "epoch": 52.55332902391726, "grad_norm": 0.9150611162185669, "learning_rate": 0.001, "loss": 2.2128, "step": 406500 }, { "epoch": 52.56625727213962, "grad_norm": 1.1574574708938599, "learning_rate": 0.001, "loss": 2.2167, "step": 406600 }, { "epoch": 52.57918552036199, "grad_norm": 0.7832790613174438, "learning_rate": 0.001, "loss": 2.2044, "step": 406700 }, { "epoch": 52.592113768584355, "grad_norm": 0.9193177819252014, "learning_rate": 0.001, "loss": 2.2064, "step": 406800 }, { "epoch": 52.60504201680672, "grad_norm": 0.8200361132621765, "learning_rate": 0.001, "loss": 2.2158, "step": 406900 }, { "epoch": 52.617970265029086, "grad_norm": 0.7800462245941162, "learning_rate": 0.001, "loss": 2.1891, "step": 407000 }, { "epoch": 52.63089851325145, "grad_norm": 1.0581963062286377, "learning_rate": 0.001, "loss": 2.2062, "step": 407100 }, { "epoch": 52.64382676147382, "grad_norm": 0.9793205857276917, "learning_rate": 0.001, "loss": 2.2239, "step": 407200 }, { "epoch": 52.656755009696184, "grad_norm": 0.9537611603736877, "learning_rate": 0.001, "loss": 2.2141, "step": 407300 }, { "epoch": 52.66968325791855, "grad_norm": 1.0384042263031006, "learning_rate": 0.001, "loss": 2.2024, "step": 407400 }, { "epoch": 52.682611506140915, "grad_norm": 1.2507473230361938, "learning_rate": 0.001, "loss": 2.2143, "step": 407500 }, { "epoch": 52.69553975436328, "grad_norm": 1.314554214477539, "learning_rate": 0.001, "loss": 2.2121, "step": 407600 }, { "epoch": 52.70846800258565, "grad_norm": 1.1361076831817627, "learning_rate": 0.001, "loss": 2.2173, "step": 407700 }, { "epoch": 52.72139625080801, "grad_norm": 1.1056517362594604, "learning_rate": 0.001, "loss": 2.1954, "step": 407800 }, { "epoch": 52.73432449903038, "grad_norm": 1.0464487075805664, "learning_rate": 0.001, "loss": 2.2233, "step": 407900 }, { "epoch": 52.747252747252745, "grad_norm": 1.1017590761184692, "learning_rate": 0.001, "loss": 2.2272, "step": 408000 }, { "epoch": 52.76018099547511, "grad_norm": 0.954435408115387, "learning_rate": 0.001, "loss": 2.2207, "step": 408100 }, { "epoch": 52.773109243697476, "grad_norm": 1.3855229616165161, "learning_rate": 0.001, "loss": 2.2295, "step": 408200 }, { "epoch": 52.78603749191984, "grad_norm": 0.8317700624465942, "learning_rate": 0.001, "loss": 2.2247, "step": 408300 }, { "epoch": 52.79896574014221, "grad_norm": 1.7168042659759521, "learning_rate": 0.001, "loss": 2.2092, "step": 408400 }, { "epoch": 52.811893988364574, "grad_norm": 1.1305382251739502, "learning_rate": 0.001, "loss": 2.237, "step": 408500 }, { "epoch": 52.82482223658694, "grad_norm": 1.2202800512313843, "learning_rate": 0.001, "loss": 2.2259, "step": 408600 }, { "epoch": 52.837750484809305, "grad_norm": 1.282901406288147, "learning_rate": 0.001, "loss": 2.2264, "step": 408700 }, { "epoch": 52.85067873303167, "grad_norm": 1.071366310119629, "learning_rate": 0.001, "loss": 2.2461, "step": 408800 }, { "epoch": 52.86360698125404, "grad_norm": 1.0458663702011108, "learning_rate": 0.001, "loss": 2.2404, "step": 408900 }, { "epoch": 52.8765352294764, "grad_norm": 1.771612524986267, "learning_rate": 0.001, "loss": 2.2362, "step": 409000 }, { "epoch": 52.88946347769877, "grad_norm": 1.5962872505187988, "learning_rate": 0.001, "loss": 2.24, "step": 409100 }, { "epoch": 52.902391725921134, "grad_norm": 0.8657016158103943, "learning_rate": 0.001, "loss": 2.2377, "step": 409200 }, { "epoch": 52.9153199741435, "grad_norm": 3.189598321914673, "learning_rate": 0.001, "loss": 2.235, "step": 409300 }, { "epoch": 52.928248222365866, "grad_norm": 0.9935047626495361, "learning_rate": 0.001, "loss": 2.2361, "step": 409400 }, { "epoch": 52.94117647058823, "grad_norm": 1.2261946201324463, "learning_rate": 0.001, "loss": 2.2415, "step": 409500 }, { "epoch": 52.9541047188106, "grad_norm": 1.487992525100708, "learning_rate": 0.001, "loss": 2.229, "step": 409600 }, { "epoch": 52.967032967032964, "grad_norm": 0.9335711002349854, "learning_rate": 0.001, "loss": 2.2353, "step": 409700 }, { "epoch": 52.97996121525533, "grad_norm": 1.0136048793792725, "learning_rate": 0.001, "loss": 2.2138, "step": 409800 }, { "epoch": 52.992889463477695, "grad_norm": 0.9536816477775574, "learning_rate": 0.001, "loss": 2.2309, "step": 409900 }, { "epoch": 53.00581771170007, "grad_norm": 11.372562408447266, "learning_rate": 0.001, "loss": 2.1876, "step": 410000 }, { "epoch": 53.018745959922434, "grad_norm": 1.1353493928909302, "learning_rate": 0.001, "loss": 2.1523, "step": 410100 }, { "epoch": 53.0316742081448, "grad_norm": 1.0238125324249268, "learning_rate": 0.001, "loss": 2.129, "step": 410200 }, { "epoch": 53.044602456367166, "grad_norm": 1.8296374082565308, "learning_rate": 0.001, "loss": 2.149, "step": 410300 }, { "epoch": 53.05753070458953, "grad_norm": 1.1955342292785645, "learning_rate": 0.001, "loss": 2.1375, "step": 410400 }, { "epoch": 53.0704589528119, "grad_norm": 0.9201542139053345, "learning_rate": 0.001, "loss": 2.1449, "step": 410500 }, { "epoch": 53.08338720103426, "grad_norm": 1.1392742395401, "learning_rate": 0.001, "loss": 2.1394, "step": 410600 }, { "epoch": 53.09631544925663, "grad_norm": 1.0292466878890991, "learning_rate": 0.001, "loss": 2.183, "step": 410700 }, { "epoch": 53.109243697478995, "grad_norm": 1.1570264101028442, "learning_rate": 0.001, "loss": 2.1454, "step": 410800 }, { "epoch": 53.12217194570136, "grad_norm": 1.140332579612732, "learning_rate": 0.001, "loss": 2.146, "step": 410900 }, { "epoch": 53.135100193923726, "grad_norm": 1.0106295347213745, "learning_rate": 0.001, "loss": 2.1695, "step": 411000 }, { "epoch": 53.14802844214609, "grad_norm": 1.3493247032165527, "learning_rate": 0.001, "loss": 2.1618, "step": 411100 }, { "epoch": 53.16095669036846, "grad_norm": 1.8874033689498901, "learning_rate": 0.001, "loss": 2.1663, "step": 411200 }, { "epoch": 53.173884938590824, "grad_norm": 0.9318674206733704, "learning_rate": 0.001, "loss": 2.168, "step": 411300 }, { "epoch": 53.18681318681319, "grad_norm": 3.721895933151245, "learning_rate": 0.001, "loss": 2.1611, "step": 411400 }, { "epoch": 53.199741435035556, "grad_norm": 2.5897011756896973, "learning_rate": 0.001, "loss": 2.1694, "step": 411500 }, { "epoch": 53.21266968325792, "grad_norm": 1.0703186988830566, "learning_rate": 0.001, "loss": 2.1643, "step": 411600 }, { "epoch": 53.22559793148029, "grad_norm": 1.1743680238723755, "learning_rate": 0.001, "loss": 2.1875, "step": 411700 }, { "epoch": 53.23852617970265, "grad_norm": 0.9943298101425171, "learning_rate": 0.001, "loss": 2.1892, "step": 411800 }, { "epoch": 53.25145442792502, "grad_norm": 1.0276713371276855, "learning_rate": 0.001, "loss": 2.1598, "step": 411900 }, { "epoch": 53.264382676147385, "grad_norm": 1.3807704448699951, "learning_rate": 0.001, "loss": 2.1682, "step": 412000 }, { "epoch": 53.27731092436975, "grad_norm": 1.3303724527359009, "learning_rate": 0.001, "loss": 2.1814, "step": 412100 }, { "epoch": 53.290239172592116, "grad_norm": 1.0666053295135498, "learning_rate": 0.001, "loss": 2.1887, "step": 412200 }, { "epoch": 53.30316742081448, "grad_norm": 1.1311050653457642, "learning_rate": 0.001, "loss": 2.1809, "step": 412300 }, { "epoch": 53.31609566903685, "grad_norm": 1.3184497356414795, "learning_rate": 0.001, "loss": 2.1609, "step": 412400 }, { "epoch": 53.329023917259214, "grad_norm": 1.299046277999878, "learning_rate": 0.001, "loss": 2.1835, "step": 412500 }, { "epoch": 53.34195216548158, "grad_norm": 1.0967493057250977, "learning_rate": 0.001, "loss": 2.1915, "step": 412600 }, { "epoch": 53.354880413703945, "grad_norm": 1.044872760772705, "learning_rate": 0.001, "loss": 2.1694, "step": 412700 }, { "epoch": 53.36780866192631, "grad_norm": 1.2919092178344727, "learning_rate": 0.001, "loss": 2.2015, "step": 412800 }, { "epoch": 53.38073691014868, "grad_norm": 1.1259177923202515, "learning_rate": 0.001, "loss": 2.1864, "step": 412900 }, { "epoch": 53.39366515837104, "grad_norm": 1.1917535066604614, "learning_rate": 0.001, "loss": 2.1773, "step": 413000 }, { "epoch": 53.40659340659341, "grad_norm": 12.175249099731445, "learning_rate": 0.001, "loss": 2.1924, "step": 413100 }, { "epoch": 53.419521654815775, "grad_norm": 0.954131543636322, "learning_rate": 0.001, "loss": 2.1893, "step": 413200 }, { "epoch": 53.43244990303814, "grad_norm": 0.9774305820465088, "learning_rate": 0.001, "loss": 2.2013, "step": 413300 }, { "epoch": 53.445378151260506, "grad_norm": 1.1517820358276367, "learning_rate": 0.001, "loss": 2.1685, "step": 413400 }, { "epoch": 53.45830639948287, "grad_norm": 1.3304303884506226, "learning_rate": 0.001, "loss": 2.191, "step": 413500 }, { "epoch": 53.47123464770524, "grad_norm": 1.249843716621399, "learning_rate": 0.001, "loss": 2.1872, "step": 413600 }, { "epoch": 53.484162895927604, "grad_norm": 1.5115238428115845, "learning_rate": 0.001, "loss": 2.2119, "step": 413700 }, { "epoch": 53.49709114414997, "grad_norm": 1.7539185285568237, "learning_rate": 0.001, "loss": 2.1976, "step": 413800 }, { "epoch": 53.510019392372335, "grad_norm": 1.2597754001617432, "learning_rate": 0.001, "loss": 2.2239, "step": 413900 }, { "epoch": 53.5229476405947, "grad_norm": 1.1289756298065186, "learning_rate": 0.001, "loss": 2.2152, "step": 414000 }, { "epoch": 53.53587588881707, "grad_norm": 1.2047665119171143, "learning_rate": 0.001, "loss": 2.2087, "step": 414100 }, { "epoch": 53.54880413703943, "grad_norm": 1.1595187187194824, "learning_rate": 0.001, "loss": 2.1994, "step": 414200 }, { "epoch": 53.5617323852618, "grad_norm": 2.195873260498047, "learning_rate": 0.001, "loss": 2.2015, "step": 414300 }, { "epoch": 53.574660633484164, "grad_norm": 1.0192984342575073, "learning_rate": 0.001, "loss": 2.1983, "step": 414400 }, { "epoch": 53.58758888170653, "grad_norm": 1.3089481592178345, "learning_rate": 0.001, "loss": 2.2172, "step": 414500 }, { "epoch": 53.600517129928896, "grad_norm": 1.2402232885360718, "learning_rate": 0.001, "loss": 2.201, "step": 414600 }, { "epoch": 53.61344537815126, "grad_norm": 1.0339689254760742, "learning_rate": 0.001, "loss": 2.216, "step": 414700 }, { "epoch": 53.62637362637363, "grad_norm": 1.0031615495681763, "learning_rate": 0.001, "loss": 2.201, "step": 414800 }, { "epoch": 53.63930187459599, "grad_norm": 1.372776985168457, "learning_rate": 0.001, "loss": 2.2048, "step": 414900 }, { "epoch": 53.65223012281836, "grad_norm": 1.2407013177871704, "learning_rate": 0.001, "loss": 2.2271, "step": 415000 }, { "epoch": 53.665158371040725, "grad_norm": 1.3064041137695312, "learning_rate": 0.001, "loss": 2.2098, "step": 415100 }, { "epoch": 53.67808661926309, "grad_norm": 1.1497257947921753, "learning_rate": 0.001, "loss": 2.2183, "step": 415200 }, { "epoch": 53.69101486748546, "grad_norm": 2.41310715675354, "learning_rate": 0.001, "loss": 2.2209, "step": 415300 }, { "epoch": 53.70394311570782, "grad_norm": 1.2121341228485107, "learning_rate": 0.001, "loss": 2.2286, "step": 415400 }, { "epoch": 53.71687136393019, "grad_norm": 2.0091710090637207, "learning_rate": 0.001, "loss": 2.2313, "step": 415500 }, { "epoch": 53.729799612152554, "grad_norm": 1.120705246925354, "learning_rate": 0.001, "loss": 2.2186, "step": 415600 }, { "epoch": 53.74272786037492, "grad_norm": 1.1182795763015747, "learning_rate": 0.001, "loss": 2.2214, "step": 415700 }, { "epoch": 53.755656108597286, "grad_norm": 1.7058063745498657, "learning_rate": 0.001, "loss": 2.2316, "step": 415800 }, { "epoch": 53.76858435681965, "grad_norm": 1.3001524209976196, "learning_rate": 0.001, "loss": 2.2162, "step": 415900 }, { "epoch": 53.78151260504202, "grad_norm": 14.380501747131348, "learning_rate": 0.001, "loss": 2.2042, "step": 416000 }, { "epoch": 53.79444085326438, "grad_norm": 1.5132445096969604, "learning_rate": 0.001, "loss": 2.2237, "step": 416100 }, { "epoch": 53.80736910148675, "grad_norm": 1.1358747482299805, "learning_rate": 0.001, "loss": 2.2281, "step": 416200 }, { "epoch": 53.820297349709115, "grad_norm": 1.5237737894058228, "learning_rate": 0.001, "loss": 2.2275, "step": 416300 }, { "epoch": 53.83322559793148, "grad_norm": 1.1418156623840332, "learning_rate": 0.001, "loss": 2.2376, "step": 416400 }, { "epoch": 53.84615384615385, "grad_norm": 1.0635056495666504, "learning_rate": 0.001, "loss": 2.2349, "step": 416500 }, { "epoch": 53.85908209437621, "grad_norm": 1.1889541149139404, "learning_rate": 0.001, "loss": 2.218, "step": 416600 }, { "epoch": 53.87201034259858, "grad_norm": 1.2438920736312866, "learning_rate": 0.001, "loss": 2.2117, "step": 416700 }, { "epoch": 53.884938590820944, "grad_norm": 1.3520911931991577, "learning_rate": 0.001, "loss": 2.2092, "step": 416800 }, { "epoch": 53.89786683904331, "grad_norm": 1.0275487899780273, "learning_rate": 0.001, "loss": 2.2274, "step": 416900 }, { "epoch": 53.910795087265676, "grad_norm": 1.2566996812820435, "learning_rate": 0.001, "loss": 2.2301, "step": 417000 }, { "epoch": 53.92372333548804, "grad_norm": 1.0906912088394165, "learning_rate": 0.001, "loss": 2.2336, "step": 417100 }, { "epoch": 53.93665158371041, "grad_norm": 7.417097568511963, "learning_rate": 0.001, "loss": 2.2338, "step": 417200 }, { "epoch": 53.94957983193277, "grad_norm": 3.565079927444458, "learning_rate": 0.001, "loss": 2.2295, "step": 417300 }, { "epoch": 53.96250808015514, "grad_norm": 1.1298328638076782, "learning_rate": 0.001, "loss": 2.2246, "step": 417400 }, { "epoch": 53.975436328377505, "grad_norm": 1.10088050365448, "learning_rate": 0.001, "loss": 2.2471, "step": 417500 }, { "epoch": 53.98836457659987, "grad_norm": 1.1325486898422241, "learning_rate": 0.001, "loss": 2.2462, "step": 417600 }, { "epoch": 54.00129282482224, "grad_norm": 0.8027070760726929, "learning_rate": 0.001, "loss": 2.2188, "step": 417700 }, { "epoch": 54.0142210730446, "grad_norm": 1.1841908693313599, "learning_rate": 0.001, "loss": 2.1528, "step": 417800 }, { "epoch": 54.02714932126697, "grad_norm": 0.9843541383743286, "learning_rate": 0.001, "loss": 2.138, "step": 417900 }, { "epoch": 54.040077569489334, "grad_norm": 1.2084696292877197, "learning_rate": 0.001, "loss": 2.1302, "step": 418000 }, { "epoch": 54.0530058177117, "grad_norm": 1.2820038795471191, "learning_rate": 0.001, "loss": 2.1733, "step": 418100 }, { "epoch": 54.065934065934066, "grad_norm": 1.283634901046753, "learning_rate": 0.001, "loss": 2.1417, "step": 418200 }, { "epoch": 54.07886231415643, "grad_norm": 1.3227022886276245, "learning_rate": 0.001, "loss": 2.1654, "step": 418300 }, { "epoch": 54.0917905623788, "grad_norm": 0.9884834885597229, "learning_rate": 0.001, "loss": 2.1436, "step": 418400 }, { "epoch": 54.10471881060116, "grad_norm": 0.8331636190414429, "learning_rate": 0.001, "loss": 2.1558, "step": 418500 }, { "epoch": 54.11764705882353, "grad_norm": 1.2945544719696045, "learning_rate": 0.001, "loss": 2.1616, "step": 418600 }, { "epoch": 54.130575307045895, "grad_norm": 1.04231595993042, "learning_rate": 0.001, "loss": 2.1617, "step": 418700 }, { "epoch": 54.14350355526826, "grad_norm": 4.828259468078613, "learning_rate": 0.001, "loss": 2.1575, "step": 418800 }, { "epoch": 54.15643180349063, "grad_norm": 1.240376353263855, "learning_rate": 0.001, "loss": 2.1678, "step": 418900 }, { "epoch": 54.16936005171299, "grad_norm": 0.9365543127059937, "learning_rate": 0.001, "loss": 2.1484, "step": 419000 }, { "epoch": 54.18228829993536, "grad_norm": 0.897531270980835, "learning_rate": 0.001, "loss": 2.1542, "step": 419100 }, { "epoch": 54.195216548157724, "grad_norm": 1.2307636737823486, "learning_rate": 0.001, "loss": 2.1629, "step": 419200 }, { "epoch": 54.20814479638009, "grad_norm": 0.8832455277442932, "learning_rate": 0.001, "loss": 2.1604, "step": 419300 }, { "epoch": 54.221073044602456, "grad_norm": 0.8310726284980774, "learning_rate": 0.001, "loss": 2.1609, "step": 419400 }, { "epoch": 54.23400129282482, "grad_norm": 1.0921039581298828, "learning_rate": 0.001, "loss": 2.1778, "step": 419500 }, { "epoch": 54.24692954104719, "grad_norm": 21.48760414123535, "learning_rate": 0.001, "loss": 2.1778, "step": 419600 }, { "epoch": 54.25985778926955, "grad_norm": 1.102482557296753, "learning_rate": 0.001, "loss": 2.1753, "step": 419700 }, { "epoch": 54.27278603749192, "grad_norm": 0.7863437533378601, "learning_rate": 0.001, "loss": 2.1664, "step": 419800 }, { "epoch": 54.285714285714285, "grad_norm": 0.9485898017883301, "learning_rate": 0.001, "loss": 2.1742, "step": 419900 }, { "epoch": 54.29864253393665, "grad_norm": 0.9484174847602844, "learning_rate": 0.001, "loss": 2.1855, "step": 420000 }, { "epoch": 54.311570782159016, "grad_norm": 0.9770823121070862, "learning_rate": 0.001, "loss": 2.1661, "step": 420100 }, { "epoch": 54.32449903038138, "grad_norm": 1.935044527053833, "learning_rate": 0.001, "loss": 2.1686, "step": 420200 }, { "epoch": 54.33742727860375, "grad_norm": 0.9935631155967712, "learning_rate": 0.001, "loss": 2.1656, "step": 420300 }, { "epoch": 54.350355526826114, "grad_norm": 1.125401258468628, "learning_rate": 0.001, "loss": 2.1638, "step": 420400 }, { "epoch": 54.36328377504848, "grad_norm": 1.1356977224349976, "learning_rate": 0.001, "loss": 2.1655, "step": 420500 }, { "epoch": 54.376212023270845, "grad_norm": 4.178274631500244, "learning_rate": 0.001, "loss": 2.1735, "step": 420600 }, { "epoch": 54.38914027149321, "grad_norm": 0.9328211545944214, "learning_rate": 0.001, "loss": 2.1673, "step": 420700 }, { "epoch": 54.40206851971558, "grad_norm": 0.9704561233520508, "learning_rate": 0.001, "loss": 2.1847, "step": 420800 }, { "epoch": 54.41499676793794, "grad_norm": 1.2866215705871582, "learning_rate": 0.001, "loss": 2.2077, "step": 420900 }, { "epoch": 54.42792501616031, "grad_norm": 1.0869452953338623, "learning_rate": 0.001, "loss": 2.1793, "step": 421000 }, { "epoch": 54.440853264382675, "grad_norm": 0.9294473528862, "learning_rate": 0.001, "loss": 2.1865, "step": 421100 }, { "epoch": 54.45378151260504, "grad_norm": 0.8278458118438721, "learning_rate": 0.001, "loss": 2.1909, "step": 421200 }, { "epoch": 54.466709760827406, "grad_norm": 1.1982494592666626, "learning_rate": 0.001, "loss": 2.1704, "step": 421300 }, { "epoch": 54.47963800904977, "grad_norm": 1.547098994255066, "learning_rate": 0.001, "loss": 2.2066, "step": 421400 }, { "epoch": 54.49256625727214, "grad_norm": 8.433126449584961, "learning_rate": 0.001, "loss": 2.1899, "step": 421500 }, { "epoch": 54.505494505494504, "grad_norm": 0.8715642094612122, "learning_rate": 0.001, "loss": 2.1978, "step": 421600 }, { "epoch": 54.51842275371687, "grad_norm": 0.9133907556533813, "learning_rate": 0.001, "loss": 2.1742, "step": 421700 }, { "epoch": 54.531351001939235, "grad_norm": 0.9496439099311829, "learning_rate": 0.001, "loss": 2.1971, "step": 421800 }, { "epoch": 54.5442792501616, "grad_norm": 0.7948375344276428, "learning_rate": 0.001, "loss": 2.1844, "step": 421900 }, { "epoch": 54.55720749838397, "grad_norm": 0.9182512760162354, "learning_rate": 0.001, "loss": 2.1945, "step": 422000 }, { "epoch": 54.57013574660633, "grad_norm": 0.8687048554420471, "learning_rate": 0.001, "loss": 2.2144, "step": 422100 }, { "epoch": 54.5830639948287, "grad_norm": 1.1144248247146606, "learning_rate": 0.001, "loss": 2.1836, "step": 422200 }, { "epoch": 54.595992243051064, "grad_norm": 0.9027930498123169, "learning_rate": 0.001, "loss": 2.2068, "step": 422300 }, { "epoch": 54.60892049127343, "grad_norm": 1.0254998207092285, "learning_rate": 0.001, "loss": 2.1972, "step": 422400 }, { "epoch": 54.621848739495796, "grad_norm": 1.7412517070770264, "learning_rate": 0.001, "loss": 2.197, "step": 422500 }, { "epoch": 54.63477698771816, "grad_norm": 0.854976236820221, "learning_rate": 0.001, "loss": 2.1794, "step": 422600 }, { "epoch": 54.64770523594053, "grad_norm": 1.153038501739502, "learning_rate": 0.001, "loss": 2.201, "step": 422700 }, { "epoch": 54.660633484162894, "grad_norm": 0.8680347204208374, "learning_rate": 0.001, "loss": 2.1842, "step": 422800 }, { "epoch": 54.67356173238526, "grad_norm": 1.0185452699661255, "learning_rate": 0.001, "loss": 2.1981, "step": 422900 }, { "epoch": 54.686489980607625, "grad_norm": 0.8455255031585693, "learning_rate": 0.001, "loss": 2.2072, "step": 423000 }, { "epoch": 54.69941822882999, "grad_norm": 1.4340115785598755, "learning_rate": 0.001, "loss": 2.214, "step": 423100 }, { "epoch": 54.71234647705236, "grad_norm": 1.117319107055664, "learning_rate": 0.001, "loss": 2.21, "step": 423200 }, { "epoch": 54.72527472527472, "grad_norm": 1.0372815132141113, "learning_rate": 0.001, "loss": 2.1992, "step": 423300 }, { "epoch": 54.73820297349709, "grad_norm": 5.469690799713135, "learning_rate": 0.001, "loss": 2.2107, "step": 423400 }, { "epoch": 54.751131221719454, "grad_norm": 0.8521455526351929, "learning_rate": 0.001, "loss": 2.204, "step": 423500 }, { "epoch": 54.76405946994182, "grad_norm": 1.15244722366333, "learning_rate": 0.001, "loss": 2.1869, "step": 423600 }, { "epoch": 54.776987718164186, "grad_norm": 0.8934130668640137, "learning_rate": 0.001, "loss": 2.2239, "step": 423700 }, { "epoch": 54.78991596638655, "grad_norm": 1.350104808807373, "learning_rate": 0.001, "loss": 2.2072, "step": 423800 }, { "epoch": 54.80284421460892, "grad_norm": 0.9452086687088013, "learning_rate": 0.001, "loss": 2.2063, "step": 423900 }, { "epoch": 54.81577246283128, "grad_norm": 0.9627230763435364, "learning_rate": 0.001, "loss": 2.2125, "step": 424000 }, { "epoch": 54.82870071105365, "grad_norm": 0.786243200302124, "learning_rate": 0.001, "loss": 2.2038, "step": 424100 }, { "epoch": 54.841628959276015, "grad_norm": 0.7173594236373901, "learning_rate": 0.001, "loss": 2.2054, "step": 424200 }, { "epoch": 54.85455720749838, "grad_norm": 1.0086008310317993, "learning_rate": 0.001, "loss": 2.2188, "step": 424300 }, { "epoch": 54.86748545572075, "grad_norm": 0.8328046202659607, "learning_rate": 0.001, "loss": 2.2238, "step": 424400 }, { "epoch": 54.88041370394311, "grad_norm": 1.2993661165237427, "learning_rate": 0.001, "loss": 2.2343, "step": 424500 }, { "epoch": 54.89334195216548, "grad_norm": 0.8180429339408875, "learning_rate": 0.001, "loss": 2.2149, "step": 424600 }, { "epoch": 54.906270200387844, "grad_norm": 0.973127007484436, "learning_rate": 0.001, "loss": 2.2099, "step": 424700 }, { "epoch": 54.91919844861021, "grad_norm": 1.277063250541687, "learning_rate": 0.001, "loss": 2.2333, "step": 424800 }, { "epoch": 54.932126696832576, "grad_norm": 1.0784807205200195, "learning_rate": 0.001, "loss": 2.225, "step": 424900 }, { "epoch": 54.94505494505494, "grad_norm": 1.0692416429519653, "learning_rate": 0.001, "loss": 2.2208, "step": 425000 }, { "epoch": 54.95798319327731, "grad_norm": 1.1215637922286987, "learning_rate": 0.001, "loss": 2.2128, "step": 425100 }, { "epoch": 54.97091144149967, "grad_norm": 0.9502516984939575, "learning_rate": 0.001, "loss": 2.2292, "step": 425200 }, { "epoch": 54.98383968972204, "grad_norm": 1.1718926429748535, "learning_rate": 0.001, "loss": 2.2228, "step": 425300 }, { "epoch": 54.99676793794441, "grad_norm": 0.7990277409553528, "learning_rate": 0.001, "loss": 2.2241, "step": 425400 }, { "epoch": 55.00969618616678, "grad_norm": 1.0116515159606934, "learning_rate": 0.001, "loss": 2.1778, "step": 425500 }, { "epoch": 55.022624434389144, "grad_norm": 0.8435268998146057, "learning_rate": 0.001, "loss": 2.1221, "step": 425600 }, { "epoch": 55.03555268261151, "grad_norm": 0.8228572010993958, "learning_rate": 0.001, "loss": 2.1223, "step": 425700 }, { "epoch": 55.048480930833875, "grad_norm": 0.888073742389679, "learning_rate": 0.001, "loss": 2.1375, "step": 425800 }, { "epoch": 55.06140917905624, "grad_norm": 1.2029902935028076, "learning_rate": 0.001, "loss": 2.1227, "step": 425900 }, { "epoch": 55.07433742727861, "grad_norm": 2.0757341384887695, "learning_rate": 0.001, "loss": 2.1512, "step": 426000 }, { "epoch": 55.08726567550097, "grad_norm": 15.422935485839844, "learning_rate": 0.001, "loss": 2.127, "step": 426100 }, { "epoch": 55.10019392372334, "grad_norm": 0.8423173427581787, "learning_rate": 0.001, "loss": 2.1526, "step": 426200 }, { "epoch": 55.113122171945705, "grad_norm": 1.073805570602417, "learning_rate": 0.001, "loss": 2.1468, "step": 426300 }, { "epoch": 55.12605042016807, "grad_norm": 0.8813201785087585, "learning_rate": 0.001, "loss": 2.1642, "step": 426400 }, { "epoch": 55.138978668390436, "grad_norm": 1.2918591499328613, "learning_rate": 0.001, "loss": 2.152, "step": 426500 }, { "epoch": 55.1519069166128, "grad_norm": 123.4190673828125, "learning_rate": 0.001, "loss": 2.153, "step": 426600 }, { "epoch": 55.16483516483517, "grad_norm": 0.8208494782447815, "learning_rate": 0.001, "loss": 2.155, "step": 426700 }, { "epoch": 55.177763413057534, "grad_norm": 0.9120679497718811, "learning_rate": 0.001, "loss": 2.1449, "step": 426800 }, { "epoch": 55.1906916612799, "grad_norm": 0.8435400128364563, "learning_rate": 0.001, "loss": 2.1403, "step": 426900 }, { "epoch": 55.203619909502265, "grad_norm": 5.063479423522949, "learning_rate": 0.001, "loss": 2.1502, "step": 427000 }, { "epoch": 55.21654815772463, "grad_norm": 1.2474446296691895, "learning_rate": 0.001, "loss": 2.1757, "step": 427100 }, { "epoch": 55.229476405947, "grad_norm": 1.1353362798690796, "learning_rate": 0.001, "loss": 2.1659, "step": 427200 }, { "epoch": 55.24240465416936, "grad_norm": 1.067815899848938, "learning_rate": 0.001, "loss": 2.1732, "step": 427300 }, { "epoch": 55.25533290239173, "grad_norm": 1.363184928894043, "learning_rate": 0.001, "loss": 2.1678, "step": 427400 }, { "epoch": 55.268261150614094, "grad_norm": 0.7412878274917603, "learning_rate": 0.001, "loss": 2.1666, "step": 427500 }, { "epoch": 55.28118939883646, "grad_norm": 0.9123314619064331, "learning_rate": 0.001, "loss": 2.1659, "step": 427600 }, { "epoch": 55.294117647058826, "grad_norm": 1.8685389757156372, "learning_rate": 0.001, "loss": 2.18, "step": 427700 }, { "epoch": 55.30704589528119, "grad_norm": 1.1456475257873535, "learning_rate": 0.001, "loss": 2.1778, "step": 427800 }, { "epoch": 55.31997414350356, "grad_norm": 1.2978689670562744, "learning_rate": 0.001, "loss": 2.1749, "step": 427900 }, { "epoch": 55.33290239172592, "grad_norm": 0.8078108429908752, "learning_rate": 0.001, "loss": 2.1672, "step": 428000 }, { "epoch": 55.34583063994829, "grad_norm": 1.7803560495376587, "learning_rate": 0.001, "loss": 2.1648, "step": 428100 }, { "epoch": 55.358758888170655, "grad_norm": 0.8426740169525146, "learning_rate": 0.001, "loss": 2.1677, "step": 428200 }, { "epoch": 55.37168713639302, "grad_norm": 0.7453967928886414, "learning_rate": 0.001, "loss": 2.1829, "step": 428300 }, { "epoch": 55.38461538461539, "grad_norm": 0.8240205645561218, "learning_rate": 0.001, "loss": 2.1654, "step": 428400 }, { "epoch": 55.39754363283775, "grad_norm": 1.0666359663009644, "learning_rate": 0.001, "loss": 2.1624, "step": 428500 }, { "epoch": 55.41047188106012, "grad_norm": 0.8972625136375427, "learning_rate": 0.001, "loss": 2.1919, "step": 428600 }, { "epoch": 55.423400129282484, "grad_norm": 1.0303304195404053, "learning_rate": 0.001, "loss": 2.1929, "step": 428700 }, { "epoch": 55.43632837750485, "grad_norm": 1.0073071718215942, "learning_rate": 0.001, "loss": 2.1965, "step": 428800 }, { "epoch": 55.449256625727216, "grad_norm": 0.8778576850891113, "learning_rate": 0.001, "loss": 2.1784, "step": 428900 }, { "epoch": 55.46218487394958, "grad_norm": 0.9650039672851562, "learning_rate": 0.001, "loss": 2.187, "step": 429000 }, { "epoch": 55.47511312217195, "grad_norm": 0.847028911113739, "learning_rate": 0.001, "loss": 2.1727, "step": 429100 }, { "epoch": 55.48804137039431, "grad_norm": 0.8875895738601685, "learning_rate": 0.001, "loss": 2.1857, "step": 429200 }, { "epoch": 55.50096961861668, "grad_norm": 1.0883771181106567, "learning_rate": 0.001, "loss": 2.165, "step": 429300 }, { "epoch": 55.513897866839045, "grad_norm": 1.0826510190963745, "learning_rate": 0.001, "loss": 2.1885, "step": 429400 }, { "epoch": 55.52682611506141, "grad_norm": 0.8988215923309326, "learning_rate": 0.001, "loss": 2.1989, "step": 429500 }, { "epoch": 55.53975436328378, "grad_norm": 0.8069854974746704, "learning_rate": 0.001, "loss": 2.2017, "step": 429600 }, { "epoch": 55.55268261150614, "grad_norm": 0.882178783416748, "learning_rate": 0.001, "loss": 2.194, "step": 429700 }, { "epoch": 55.56561085972851, "grad_norm": 0.8352794051170349, "learning_rate": 0.001, "loss": 2.214, "step": 429800 }, { "epoch": 55.578539107950874, "grad_norm": 1.0264945030212402, "learning_rate": 0.001, "loss": 2.2107, "step": 429900 }, { "epoch": 55.59146735617324, "grad_norm": 2.729973077774048, "learning_rate": 0.001, "loss": 2.203, "step": 430000 }, { "epoch": 55.604395604395606, "grad_norm": 0.8203094601631165, "learning_rate": 0.001, "loss": 2.1855, "step": 430100 }, { "epoch": 55.61732385261797, "grad_norm": 0.9206326007843018, "learning_rate": 0.001, "loss": 2.2019, "step": 430200 }, { "epoch": 55.63025210084034, "grad_norm": 1.1219614744186401, "learning_rate": 0.001, "loss": 2.198, "step": 430300 }, { "epoch": 55.6431803490627, "grad_norm": 1.1471894979476929, "learning_rate": 0.001, "loss": 2.2102, "step": 430400 }, { "epoch": 55.65610859728507, "grad_norm": 1.09243643283844, "learning_rate": 0.001, "loss": 2.184, "step": 430500 }, { "epoch": 55.669036845507435, "grad_norm": 1.2433222532272339, "learning_rate": 0.001, "loss": 2.2117, "step": 430600 }, { "epoch": 55.6819650937298, "grad_norm": 1.0081161260604858, "learning_rate": 0.001, "loss": 2.1986, "step": 430700 }, { "epoch": 55.69489334195217, "grad_norm": 0.8425843119621277, "learning_rate": 0.001, "loss": 2.2052, "step": 430800 }, { "epoch": 55.70782159017453, "grad_norm": 1.1639982461929321, "learning_rate": 0.001, "loss": 2.2094, "step": 430900 }, { "epoch": 55.7207498383969, "grad_norm": 0.997532069683075, "learning_rate": 0.001, "loss": 2.2073, "step": 431000 }, { "epoch": 55.733678086619264, "grad_norm": 1.0237133502960205, "learning_rate": 0.001, "loss": 2.2204, "step": 431100 }, { "epoch": 55.74660633484163, "grad_norm": 0.879923939704895, "learning_rate": 0.001, "loss": 2.2047, "step": 431200 }, { "epoch": 55.759534583063996, "grad_norm": 0.992730975151062, "learning_rate": 0.001, "loss": 2.2092, "step": 431300 }, { "epoch": 55.77246283128636, "grad_norm": 1.127909541130066, "learning_rate": 0.001, "loss": 2.2225, "step": 431400 }, { "epoch": 55.78539107950873, "grad_norm": 1.1943010091781616, "learning_rate": 0.001, "loss": 2.221, "step": 431500 }, { "epoch": 55.79831932773109, "grad_norm": 0.8738762736320496, "learning_rate": 0.001, "loss": 2.1867, "step": 431600 }, { "epoch": 55.81124757595346, "grad_norm": 0.9957398772239685, "learning_rate": 0.001, "loss": 2.2291, "step": 431700 }, { "epoch": 55.824175824175825, "grad_norm": 1.1055572032928467, "learning_rate": 0.001, "loss": 2.2134, "step": 431800 }, { "epoch": 55.83710407239819, "grad_norm": 0.8424046039581299, "learning_rate": 0.001, "loss": 2.2157, "step": 431900 }, { "epoch": 55.85003232062056, "grad_norm": 1.3936965465545654, "learning_rate": 0.001, "loss": 2.2116, "step": 432000 }, { "epoch": 55.86296056884292, "grad_norm": 1.2201621532440186, "learning_rate": 0.001, "loss": 2.22, "step": 432100 }, { "epoch": 55.87588881706529, "grad_norm": 0.9348771572113037, "learning_rate": 0.001, "loss": 2.2219, "step": 432200 }, { "epoch": 55.888817065287654, "grad_norm": 15.004467964172363, "learning_rate": 0.001, "loss": 2.2461, "step": 432300 }, { "epoch": 55.90174531351002, "grad_norm": 1.236611247062683, "learning_rate": 0.001, "loss": 2.2223, "step": 432400 }, { "epoch": 55.914673561732386, "grad_norm": 1.150402545928955, "learning_rate": 0.001, "loss": 2.2196, "step": 432500 }, { "epoch": 55.92760180995475, "grad_norm": 0.7919371724128723, "learning_rate": 0.001, "loss": 2.1999, "step": 432600 }, { "epoch": 55.94053005817712, "grad_norm": 0.8969845175743103, "learning_rate": 0.001, "loss": 2.2228, "step": 432700 }, { "epoch": 55.95345830639948, "grad_norm": 0.7595580816268921, "learning_rate": 0.001, "loss": 2.2288, "step": 432800 }, { "epoch": 55.96638655462185, "grad_norm": 0.9974280595779419, "learning_rate": 0.001, "loss": 2.2328, "step": 432900 }, { "epoch": 55.979314802844215, "grad_norm": 1.008109211921692, "learning_rate": 0.001, "loss": 2.2433, "step": 433000 }, { "epoch": 55.99224305106658, "grad_norm": 2.31213641166687, "learning_rate": 0.001, "loss": 2.2231, "step": 433100 }, { "epoch": 56.005171299288946, "grad_norm": 1.0670816898345947, "learning_rate": 0.001, "loss": 2.1968, "step": 433200 }, { "epoch": 56.01809954751131, "grad_norm": 3.9657411575317383, "learning_rate": 0.001, "loss": 2.1504, "step": 433300 }, { "epoch": 56.03102779573368, "grad_norm": 1.271431803703308, "learning_rate": 0.001, "loss": 2.1462, "step": 433400 }, { "epoch": 56.043956043956044, "grad_norm": 1.1885467767715454, "learning_rate": 0.001, "loss": 2.128, "step": 433500 }, { "epoch": 56.05688429217841, "grad_norm": 1.10542631149292, "learning_rate": 0.001, "loss": 2.1415, "step": 433600 }, { "epoch": 56.069812540400775, "grad_norm": 1.1458709239959717, "learning_rate": 0.001, "loss": 2.1246, "step": 433700 }, { "epoch": 56.08274078862314, "grad_norm": 1.1313703060150146, "learning_rate": 0.001, "loss": 2.1579, "step": 433800 }, { "epoch": 56.09566903684551, "grad_norm": 1.0008461475372314, "learning_rate": 0.001, "loss": 2.1432, "step": 433900 }, { "epoch": 56.10859728506787, "grad_norm": 1.1536695957183838, "learning_rate": 0.001, "loss": 2.1432, "step": 434000 }, { "epoch": 56.12152553329024, "grad_norm": 1.4755510091781616, "learning_rate": 0.001, "loss": 2.1555, "step": 434100 }, { "epoch": 56.134453781512605, "grad_norm": 1.1108779907226562, "learning_rate": 0.001, "loss": 2.1458, "step": 434200 }, { "epoch": 56.14738202973497, "grad_norm": 1.1788949966430664, "learning_rate": 0.001, "loss": 2.1428, "step": 434300 }, { "epoch": 56.160310277957336, "grad_norm": 0.9289242029190063, "learning_rate": 0.001, "loss": 2.1485, "step": 434400 }, { "epoch": 56.1732385261797, "grad_norm": 0.8835317492485046, "learning_rate": 0.001, "loss": 2.1638, "step": 434500 }, { "epoch": 56.18616677440207, "grad_norm": 0.9161893129348755, "learning_rate": 0.001, "loss": 2.1411, "step": 434600 }, { "epoch": 56.199095022624434, "grad_norm": 1.0058248043060303, "learning_rate": 0.001, "loss": 2.1574, "step": 434700 }, { "epoch": 56.2120232708468, "grad_norm": 1.008274793624878, "learning_rate": 0.001, "loss": 2.1579, "step": 434800 }, { "epoch": 56.224951519069165, "grad_norm": 1.1031361818313599, "learning_rate": 0.001, "loss": 2.1772, "step": 434900 }, { "epoch": 56.23787976729153, "grad_norm": 1.0232454538345337, "learning_rate": 0.001, "loss": 2.1526, "step": 435000 }, { "epoch": 56.2508080155139, "grad_norm": 1.7106364965438843, "learning_rate": 0.001, "loss": 2.1756, "step": 435100 }, { "epoch": 56.26373626373626, "grad_norm": 13.525527954101562, "learning_rate": 0.001, "loss": 2.1489, "step": 435200 }, { "epoch": 56.27666451195863, "grad_norm": 1.4174407720565796, "learning_rate": 0.001, "loss": 2.1764, "step": 435300 }, { "epoch": 56.289592760180994, "grad_norm": 2.5786314010620117, "learning_rate": 0.001, "loss": 2.1685, "step": 435400 }, { "epoch": 56.30252100840336, "grad_norm": 0.9321570992469788, "learning_rate": 0.001, "loss": 2.1596, "step": 435500 }, { "epoch": 56.315449256625726, "grad_norm": 31.419857025146484, "learning_rate": 0.001, "loss": 2.1743, "step": 435600 }, { "epoch": 56.32837750484809, "grad_norm": 1.1333571672439575, "learning_rate": 0.001, "loss": 2.1799, "step": 435700 }, { "epoch": 56.34130575307046, "grad_norm": 1.0544979572296143, "learning_rate": 0.001, "loss": 2.1676, "step": 435800 }, { "epoch": 56.354234001292824, "grad_norm": 1.085017204284668, "learning_rate": 0.001, "loss": 2.176, "step": 435900 }, { "epoch": 56.36716224951519, "grad_norm": 1.043196678161621, "learning_rate": 0.001, "loss": 2.1606, "step": 436000 }, { "epoch": 56.380090497737555, "grad_norm": 1.230389952659607, "learning_rate": 0.001, "loss": 2.1747, "step": 436100 }, { "epoch": 56.39301874595992, "grad_norm": 1.1422429084777832, "learning_rate": 0.001, "loss": 2.1891, "step": 436200 }, { "epoch": 56.40594699418229, "grad_norm": 1.6499998569488525, "learning_rate": 0.001, "loss": 2.1998, "step": 436300 }, { "epoch": 56.41887524240465, "grad_norm": 1.0569249391555786, "learning_rate": 0.001, "loss": 2.1852, "step": 436400 }, { "epoch": 56.43180349062702, "grad_norm": 1.2344167232513428, "learning_rate": 0.001, "loss": 2.1583, "step": 436500 }, { "epoch": 56.444731738849384, "grad_norm": 1.1679548025131226, "learning_rate": 0.001, "loss": 2.1841, "step": 436600 }, { "epoch": 56.45765998707175, "grad_norm": 1.1191295385360718, "learning_rate": 0.001, "loss": 2.1723, "step": 436700 }, { "epoch": 56.470588235294116, "grad_norm": 0.8135955929756165, "learning_rate": 0.001, "loss": 2.1712, "step": 436800 }, { "epoch": 56.48351648351648, "grad_norm": 1.171868085861206, "learning_rate": 0.001, "loss": 2.1807, "step": 436900 }, { "epoch": 56.49644473173885, "grad_norm": 0.997683048248291, "learning_rate": 0.001, "loss": 2.2013, "step": 437000 }, { "epoch": 56.50937297996121, "grad_norm": 1.186270833015442, "learning_rate": 0.001, "loss": 2.16, "step": 437100 }, { "epoch": 56.52230122818358, "grad_norm": 1.1686092615127563, "learning_rate": 0.001, "loss": 2.2071, "step": 437200 }, { "epoch": 56.535229476405945, "grad_norm": 0.9980637431144714, "learning_rate": 0.001, "loss": 2.1922, "step": 437300 }, { "epoch": 56.54815772462831, "grad_norm": 1.037029504776001, "learning_rate": 0.001, "loss": 2.1857, "step": 437400 }, { "epoch": 56.56108597285068, "grad_norm": 5.01025915145874, "learning_rate": 0.001, "loss": 2.1848, "step": 437500 }, { "epoch": 56.57401422107304, "grad_norm": 1.2681797742843628, "learning_rate": 0.001, "loss": 2.1784, "step": 437600 }, { "epoch": 56.58694246929541, "grad_norm": 29.33336067199707, "learning_rate": 0.001, "loss": 2.1913, "step": 437700 }, { "epoch": 56.599870717517774, "grad_norm": 1.1817578077316284, "learning_rate": 0.001, "loss": 2.1938, "step": 437800 }, { "epoch": 56.61279896574014, "grad_norm": 1.198508858680725, "learning_rate": 0.001, "loss": 2.1989, "step": 437900 }, { "epoch": 56.625727213962506, "grad_norm": 0.9660055041313171, "learning_rate": 0.001, "loss": 2.207, "step": 438000 }, { "epoch": 56.63865546218487, "grad_norm": 1.026295781135559, "learning_rate": 0.001, "loss": 2.1839, "step": 438100 }, { "epoch": 56.65158371040724, "grad_norm": 1.0029852390289307, "learning_rate": 0.001, "loss": 2.1894, "step": 438200 }, { "epoch": 56.6645119586296, "grad_norm": 11.977059364318848, "learning_rate": 0.001, "loss": 2.1893, "step": 438300 }, { "epoch": 56.67744020685197, "grad_norm": 1.2387579679489136, "learning_rate": 0.001, "loss": 2.195, "step": 438400 }, { "epoch": 56.690368455074335, "grad_norm": 1.237770438194275, "learning_rate": 0.001, "loss": 2.1795, "step": 438500 }, { "epoch": 56.7032967032967, "grad_norm": 1.2182674407958984, "learning_rate": 0.001, "loss": 2.1882, "step": 438600 }, { "epoch": 56.71622495151907, "grad_norm": 0.9354594945907593, "learning_rate": 0.001, "loss": 2.1778, "step": 438700 }, { "epoch": 56.72915319974143, "grad_norm": 1.036739706993103, "learning_rate": 0.001, "loss": 2.1823, "step": 438800 }, { "epoch": 56.7420814479638, "grad_norm": 1.2513800859451294, "learning_rate": 0.001, "loss": 2.1904, "step": 438900 }, { "epoch": 56.755009696186164, "grad_norm": 0.9415711164474487, "learning_rate": 0.001, "loss": 2.1946, "step": 439000 }, { "epoch": 56.76793794440853, "grad_norm": 9.062629699707031, "learning_rate": 0.001, "loss": 2.2099, "step": 439100 }, { "epoch": 56.780866192630896, "grad_norm": 1.4248257875442505, "learning_rate": 0.001, "loss": 2.1957, "step": 439200 }, { "epoch": 56.79379444085326, "grad_norm": 1.0574617385864258, "learning_rate": 0.001, "loss": 2.2056, "step": 439300 }, { "epoch": 56.80672268907563, "grad_norm": 0.881255030632019, "learning_rate": 0.001, "loss": 2.1971, "step": 439400 }, { "epoch": 56.81965093729799, "grad_norm": 1.2472147941589355, "learning_rate": 0.001, "loss": 2.209, "step": 439500 }, { "epoch": 56.83257918552036, "grad_norm": 1.3544816970825195, "learning_rate": 0.001, "loss": 2.1929, "step": 439600 }, { "epoch": 56.845507433742725, "grad_norm": 1.0946398973464966, "learning_rate": 0.001, "loss": 2.1929, "step": 439700 }, { "epoch": 56.85843568196509, "grad_norm": 1.003428339958191, "learning_rate": 0.001, "loss": 2.1977, "step": 439800 }, { "epoch": 56.87136393018746, "grad_norm": 1.2328314781188965, "learning_rate": 0.001, "loss": 2.1823, "step": 439900 }, { "epoch": 56.88429217840982, "grad_norm": 1.04096257686615, "learning_rate": 0.001, "loss": 2.2106, "step": 440000 }, { "epoch": 56.89722042663219, "grad_norm": 0.9444669485092163, "learning_rate": 0.001, "loss": 2.2177, "step": 440100 }, { "epoch": 56.910148674854554, "grad_norm": 1.275795340538025, "learning_rate": 0.001, "loss": 2.1878, "step": 440200 }, { "epoch": 56.92307692307692, "grad_norm": 1.001016616821289, "learning_rate": 0.001, "loss": 2.189, "step": 440300 }, { "epoch": 56.936005171299286, "grad_norm": 1.3969852924346924, "learning_rate": 0.001, "loss": 2.1918, "step": 440400 }, { "epoch": 56.94893341952165, "grad_norm": 1.1854668855667114, "learning_rate": 0.001, "loss": 2.1949, "step": 440500 }, { "epoch": 56.96186166774402, "grad_norm": 1.0377061367034912, "learning_rate": 0.001, "loss": 2.2201, "step": 440600 }, { "epoch": 56.97478991596638, "grad_norm": 1.4894002676010132, "learning_rate": 0.001, "loss": 2.2061, "step": 440700 }, { "epoch": 56.98771816418875, "grad_norm": 11.226133346557617, "learning_rate": 0.001, "loss": 2.2099, "step": 440800 }, { "epoch": 57.00064641241112, "grad_norm": 0.8875390291213989, "learning_rate": 0.001, "loss": 2.1893, "step": 440900 }, { "epoch": 57.01357466063349, "grad_norm": 1.150948405265808, "learning_rate": 0.001, "loss": 2.1164, "step": 441000 }, { "epoch": 57.02650290885585, "grad_norm": 0.7525100111961365, "learning_rate": 0.001, "loss": 2.1206, "step": 441100 }, { "epoch": 57.03943115707822, "grad_norm": 0.8787497878074646, "learning_rate": 0.001, "loss": 2.1322, "step": 441200 }, { "epoch": 57.052359405300585, "grad_norm": 1.0540202856063843, "learning_rate": 0.001, "loss": 2.1255, "step": 441300 }, { "epoch": 57.06528765352295, "grad_norm": 1.1787155866622925, "learning_rate": 0.001, "loss": 2.1149, "step": 441400 }, { "epoch": 57.07821590174532, "grad_norm": 1.0212541818618774, "learning_rate": 0.001, "loss": 2.1266, "step": 441500 }, { "epoch": 57.09114414996768, "grad_norm": 1.0141769647598267, "learning_rate": 0.001, "loss": 2.1425, "step": 441600 }, { "epoch": 57.10407239819005, "grad_norm": 1.0903056859970093, "learning_rate": 0.001, "loss": 2.1231, "step": 441700 }, { "epoch": 57.117000646412414, "grad_norm": 0.8427395820617676, "learning_rate": 0.001, "loss": 2.1329, "step": 441800 }, { "epoch": 57.12992889463478, "grad_norm": 1.1575400829315186, "learning_rate": 0.001, "loss": 2.1557, "step": 441900 }, { "epoch": 57.142857142857146, "grad_norm": 1.2498408555984497, "learning_rate": 0.001, "loss": 2.1505, "step": 442000 }, { "epoch": 57.15578539107951, "grad_norm": 1.0960248708724976, "learning_rate": 0.001, "loss": 2.1468, "step": 442100 }, { "epoch": 57.16871363930188, "grad_norm": 1.267174482345581, "learning_rate": 0.001, "loss": 2.1454, "step": 442200 }, { "epoch": 57.18164188752424, "grad_norm": 1.2655055522918701, "learning_rate": 0.001, "loss": 2.1378, "step": 442300 }, { "epoch": 57.19457013574661, "grad_norm": 2.476675033569336, "learning_rate": 0.001, "loss": 2.1491, "step": 442400 }, { "epoch": 57.207498383968975, "grad_norm": 0.9084395170211792, "learning_rate": 0.001, "loss": 2.1454, "step": 442500 }, { "epoch": 57.22042663219134, "grad_norm": 0.8102348446846008, "learning_rate": 0.001, "loss": 2.1435, "step": 442600 }, { "epoch": 57.23335488041371, "grad_norm": 1.1359119415283203, "learning_rate": 0.001, "loss": 2.1516, "step": 442700 }, { "epoch": 57.24628312863607, "grad_norm": 1.0534394979476929, "learning_rate": 0.001, "loss": 2.1394, "step": 442800 }, { "epoch": 57.25921137685844, "grad_norm": 0.7564138770103455, "learning_rate": 0.001, "loss": 2.1606, "step": 442900 }, { "epoch": 57.272139625080804, "grad_norm": 0.798935055732727, "learning_rate": 0.001, "loss": 2.1456, "step": 443000 }, { "epoch": 57.28506787330317, "grad_norm": 0.9595226049423218, "learning_rate": 0.001, "loss": 2.15, "step": 443100 }, { "epoch": 57.297996121525536, "grad_norm": 0.8533619046211243, "learning_rate": 0.001, "loss": 2.1509, "step": 443200 }, { "epoch": 57.3109243697479, "grad_norm": 1.030059576034546, "learning_rate": 0.001, "loss": 2.1487, "step": 443300 }, { "epoch": 57.32385261797027, "grad_norm": 0.8334523439407349, "learning_rate": 0.001, "loss": 2.1594, "step": 443400 }, { "epoch": 57.33678086619263, "grad_norm": 1.1240748167037964, "learning_rate": 0.001, "loss": 2.1652, "step": 443500 }, { "epoch": 57.349709114415, "grad_norm": 0.891976535320282, "learning_rate": 0.001, "loss": 2.1642, "step": 443600 }, { "epoch": 57.362637362637365, "grad_norm": 1.1430466175079346, "learning_rate": 0.001, "loss": 2.1567, "step": 443700 }, { "epoch": 57.37556561085973, "grad_norm": 1.054990291595459, "learning_rate": 0.001, "loss": 2.1586, "step": 443800 }, { "epoch": 57.3884938590821, "grad_norm": 1.0865243673324585, "learning_rate": 0.001, "loss": 2.1642, "step": 443900 }, { "epoch": 57.40142210730446, "grad_norm": 0.9429094791412354, "learning_rate": 0.001, "loss": 2.1575, "step": 444000 }, { "epoch": 57.41435035552683, "grad_norm": 1.7239081859588623, "learning_rate": 0.001, "loss": 2.1529, "step": 444100 }, { "epoch": 57.427278603749194, "grad_norm": 0.8615800738334656, "learning_rate": 0.001, "loss": 2.175, "step": 444200 }, { "epoch": 57.44020685197156, "grad_norm": 1.1098217964172363, "learning_rate": 0.001, "loss": 2.1742, "step": 444300 }, { "epoch": 57.453135100193926, "grad_norm": 1.0210554599761963, "learning_rate": 0.001, "loss": 2.1649, "step": 444400 }, { "epoch": 57.46606334841629, "grad_norm": 1.2817907333374023, "learning_rate": 0.001, "loss": 2.1694, "step": 444500 }, { "epoch": 57.47899159663866, "grad_norm": 1.023175597190857, "learning_rate": 0.001, "loss": 2.1486, "step": 444600 }, { "epoch": 57.49191984486102, "grad_norm": 5.000954627990723, "learning_rate": 0.001, "loss": 2.1783, "step": 444700 }, { "epoch": 57.50484809308339, "grad_norm": 0.8907515406608582, "learning_rate": 0.001, "loss": 2.1848, "step": 444800 }, { "epoch": 57.517776341305755, "grad_norm": 1.1425909996032715, "learning_rate": 0.001, "loss": 2.1838, "step": 444900 }, { "epoch": 57.53070458952812, "grad_norm": 1.0380799770355225, "learning_rate": 0.001, "loss": 2.1751, "step": 445000 }, { "epoch": 57.543632837750486, "grad_norm": 1.2741676568984985, "learning_rate": 0.001, "loss": 2.1815, "step": 445100 }, { "epoch": 57.55656108597285, "grad_norm": 0.868259608745575, "learning_rate": 0.001, "loss": 2.1842, "step": 445200 }, { "epoch": 57.56948933419522, "grad_norm": 1.1430063247680664, "learning_rate": 0.001, "loss": 2.1843, "step": 445300 }, { "epoch": 57.582417582417584, "grad_norm": 1.3305703401565552, "learning_rate": 0.001, "loss": 2.1826, "step": 445400 }, { "epoch": 57.59534583063995, "grad_norm": 1.0865583419799805, "learning_rate": 0.001, "loss": 2.1979, "step": 445500 }, { "epoch": 57.608274078862316, "grad_norm": 0.8686307668685913, "learning_rate": 0.001, "loss": 2.1904, "step": 445600 }, { "epoch": 57.62120232708468, "grad_norm": 1.4612054824829102, "learning_rate": 0.001, "loss": 2.1771, "step": 445700 }, { "epoch": 57.63413057530705, "grad_norm": 1.2804734706878662, "learning_rate": 0.001, "loss": 2.1886, "step": 445800 }, { "epoch": 57.64705882352941, "grad_norm": 1.0810657739639282, "learning_rate": 0.001, "loss": 2.1859, "step": 445900 }, { "epoch": 57.65998707175178, "grad_norm": 2.0937321186065674, "learning_rate": 0.001, "loss": 2.1689, "step": 446000 }, { "epoch": 57.672915319974145, "grad_norm": 1.4382338523864746, "learning_rate": 0.001, "loss": 2.1842, "step": 446100 }, { "epoch": 57.68584356819651, "grad_norm": 0.8566992282867432, "learning_rate": 0.001, "loss": 2.2068, "step": 446200 }, { "epoch": 57.698771816418876, "grad_norm": 1.5083255767822266, "learning_rate": 0.001, "loss": 2.1936, "step": 446300 }, { "epoch": 57.71170006464124, "grad_norm": 1.1362136602401733, "learning_rate": 0.001, "loss": 2.1783, "step": 446400 }, { "epoch": 57.72462831286361, "grad_norm": 1.0346778631210327, "learning_rate": 0.001, "loss": 2.1681, "step": 446500 }, { "epoch": 57.737556561085974, "grad_norm": 1.1268583536148071, "learning_rate": 0.001, "loss": 2.1892, "step": 446600 }, { "epoch": 57.75048480930834, "grad_norm": 1.0105268955230713, "learning_rate": 0.001, "loss": 2.192, "step": 446700 }, { "epoch": 57.763413057530705, "grad_norm": 1.0625404119491577, "learning_rate": 0.001, "loss": 2.1998, "step": 446800 }, { "epoch": 57.77634130575307, "grad_norm": 1.1516121625900269, "learning_rate": 0.001, "loss": 2.1906, "step": 446900 }, { "epoch": 57.78926955397544, "grad_norm": 0.935725748538971, "learning_rate": 0.001, "loss": 2.1967, "step": 447000 }, { "epoch": 57.8021978021978, "grad_norm": 0.863503634929657, "learning_rate": 0.001, "loss": 2.1916, "step": 447100 }, { "epoch": 57.81512605042017, "grad_norm": 1.0834473371505737, "learning_rate": 0.001, "loss": 2.1968, "step": 447200 }, { "epoch": 57.828054298642535, "grad_norm": 1.258195400238037, "learning_rate": 0.001, "loss": 2.2294, "step": 447300 }, { "epoch": 57.8409825468649, "grad_norm": 1.6500145196914673, "learning_rate": 0.001, "loss": 2.1972, "step": 447400 }, { "epoch": 57.853910795087266, "grad_norm": 1.2156736850738525, "learning_rate": 0.001, "loss": 2.1886, "step": 447500 }, { "epoch": 57.86683904330963, "grad_norm": 0.9443284869194031, "learning_rate": 0.001, "loss": 2.194, "step": 447600 }, { "epoch": 57.879767291532, "grad_norm": 1.1605784893035889, "learning_rate": 0.001, "loss": 2.1949, "step": 447700 }, { "epoch": 57.892695539754364, "grad_norm": 1.472683310508728, "learning_rate": 0.001, "loss": 2.2084, "step": 447800 }, { "epoch": 57.90562378797673, "grad_norm": 1.2355793714523315, "learning_rate": 0.001, "loss": 2.187, "step": 447900 }, { "epoch": 57.918552036199095, "grad_norm": 1.3023878335952759, "learning_rate": 0.001, "loss": 2.2163, "step": 448000 }, { "epoch": 57.93148028442146, "grad_norm": 1.22116219997406, "learning_rate": 0.001, "loss": 2.1951, "step": 448100 }, { "epoch": 57.94440853264383, "grad_norm": 1.2066795825958252, "learning_rate": 0.001, "loss": 2.1996, "step": 448200 }, { "epoch": 57.95733678086619, "grad_norm": 1.088233232498169, "learning_rate": 0.001, "loss": 2.2115, "step": 448300 }, { "epoch": 57.97026502908856, "grad_norm": 0.8101451396942139, "learning_rate": 0.001, "loss": 2.2147, "step": 448400 }, { "epoch": 57.983193277310924, "grad_norm": 2.717845916748047, "learning_rate": 0.001, "loss": 2.192, "step": 448500 }, { "epoch": 57.99612152553329, "grad_norm": 0.9690465927124023, "learning_rate": 0.001, "loss": 2.2046, "step": 448600 }, { "epoch": 58.009049773755656, "grad_norm": 1.0362616777420044, "learning_rate": 0.001, "loss": 2.1387, "step": 448700 }, { "epoch": 58.02197802197802, "grad_norm": 0.8355876803398132, "learning_rate": 0.001, "loss": 2.1061, "step": 448800 }, { "epoch": 58.03490627020039, "grad_norm": 1.4579602479934692, "learning_rate": 0.001, "loss": 2.116, "step": 448900 }, { "epoch": 58.04783451842275, "grad_norm": 1.0361167192459106, "learning_rate": 0.001, "loss": 2.11, "step": 449000 }, { "epoch": 58.06076276664512, "grad_norm": 0.9420852065086365, "learning_rate": 0.001, "loss": 2.1279, "step": 449100 }, { "epoch": 58.073691014867485, "grad_norm": 1.0378605127334595, "learning_rate": 0.001, "loss": 2.1347, "step": 449200 }, { "epoch": 58.08661926308985, "grad_norm": 1.150696873664856, "learning_rate": 0.001, "loss": 2.1188, "step": 449300 }, { "epoch": 58.09954751131222, "grad_norm": 1.0716304779052734, "learning_rate": 0.001, "loss": 2.1315, "step": 449400 }, { "epoch": 58.11247575953458, "grad_norm": 0.8977892398834229, "learning_rate": 0.001, "loss": 2.1447, "step": 449500 }, { "epoch": 58.12540400775695, "grad_norm": 1.0564234256744385, "learning_rate": 0.001, "loss": 2.143, "step": 449600 }, { "epoch": 58.138332255979314, "grad_norm": 0.8533039093017578, "learning_rate": 0.001, "loss": 2.1336, "step": 449700 }, { "epoch": 58.15126050420168, "grad_norm": 0.9859077334403992, "learning_rate": 0.001, "loss": 2.1285, "step": 449800 }, { "epoch": 58.164188752424046, "grad_norm": 0.8608753681182861, "learning_rate": 0.001, "loss": 2.1238, "step": 449900 }, { "epoch": 58.17711700064641, "grad_norm": 1.0566481351852417, "learning_rate": 0.001, "loss": 2.1318, "step": 450000 }, { "epoch": 58.19004524886878, "grad_norm": 0.9321627616882324, "learning_rate": 0.001, "loss": 2.1574, "step": 450100 }, { "epoch": 58.20297349709114, "grad_norm": 0.9453641772270203, "learning_rate": 0.001, "loss": 2.1488, "step": 450200 }, { "epoch": 58.21590174531351, "grad_norm": 1.074171543121338, "learning_rate": 0.001, "loss": 2.1738, "step": 450300 }, { "epoch": 58.228829993535875, "grad_norm": 1.0522422790527344, "learning_rate": 0.001, "loss": 2.1332, "step": 450400 }, { "epoch": 58.24175824175824, "grad_norm": 1.2238874435424805, "learning_rate": 0.001, "loss": 2.1575, "step": 450500 }, { "epoch": 58.25468648998061, "grad_norm": 0.8440265655517578, "learning_rate": 0.001, "loss": 2.1487, "step": 450600 }, { "epoch": 58.26761473820297, "grad_norm": 1.159397006034851, "learning_rate": 0.001, "loss": 2.1644, "step": 450700 }, { "epoch": 58.28054298642534, "grad_norm": 0.9408294558525085, "learning_rate": 0.001, "loss": 2.1498, "step": 450800 }, { "epoch": 58.293471234647704, "grad_norm": 0.8083540797233582, "learning_rate": 0.001, "loss": 2.1682, "step": 450900 }, { "epoch": 58.30639948287007, "grad_norm": 0.9360142350196838, "learning_rate": 0.001, "loss": 2.1528, "step": 451000 }, { "epoch": 58.319327731092436, "grad_norm": 1.6730915307998657, "learning_rate": 0.001, "loss": 2.1405, "step": 451100 }, { "epoch": 58.3322559793148, "grad_norm": 1.104443907737732, "learning_rate": 0.001, "loss": 2.145, "step": 451200 }, { "epoch": 58.34518422753717, "grad_norm": 1.1414750814437866, "learning_rate": 0.001, "loss": 2.1461, "step": 451300 }, { "epoch": 58.35811247575953, "grad_norm": 1.061569333076477, "learning_rate": 0.001, "loss": 2.1719, "step": 451400 }, { "epoch": 58.3710407239819, "grad_norm": 0.8981425762176514, "learning_rate": 0.001, "loss": 2.1619, "step": 451500 }, { "epoch": 58.383968972204265, "grad_norm": 0.8598940372467041, "learning_rate": 0.001, "loss": 2.1855, "step": 451600 }, { "epoch": 58.39689722042663, "grad_norm": 0.7661129236221313, "learning_rate": 0.001, "loss": 2.1468, "step": 451700 }, { "epoch": 58.409825468649, "grad_norm": 0.933005154132843, "learning_rate": 0.001, "loss": 2.1659, "step": 451800 }, { "epoch": 58.42275371687136, "grad_norm": 1.1896065473556519, "learning_rate": 0.001, "loss": 2.1655, "step": 451900 }, { "epoch": 58.43568196509373, "grad_norm": 1.1125649213790894, "learning_rate": 0.001, "loss": 2.1493, "step": 452000 }, { "epoch": 58.448610213316094, "grad_norm": 1.3312561511993408, "learning_rate": 0.001, "loss": 2.1864, "step": 452100 }, { "epoch": 58.46153846153846, "grad_norm": 2.835216522216797, "learning_rate": 0.001, "loss": 2.168, "step": 452200 }, { "epoch": 58.474466709760826, "grad_norm": 1.0918785333633423, "learning_rate": 0.001, "loss": 2.1579, "step": 452300 }, { "epoch": 58.48739495798319, "grad_norm": 0.7527005672454834, "learning_rate": 0.001, "loss": 2.187, "step": 452400 }, { "epoch": 58.50032320620556, "grad_norm": 1.5500452518463135, "learning_rate": 0.001, "loss": 2.1594, "step": 452500 }, { "epoch": 58.51325145442792, "grad_norm": 3.268749475479126, "learning_rate": 0.001, "loss": 2.1921, "step": 452600 }, { "epoch": 58.52617970265029, "grad_norm": 1.6677029132843018, "learning_rate": 0.001, "loss": 2.186, "step": 452700 }, { "epoch": 58.539107950872655, "grad_norm": 1.0430630445480347, "learning_rate": 0.001, "loss": 2.164, "step": 452800 }, { "epoch": 58.55203619909502, "grad_norm": 1.1519440412521362, "learning_rate": 0.001, "loss": 2.1941, "step": 452900 }, { "epoch": 58.56496444731739, "grad_norm": 1.094925045967102, "learning_rate": 0.001, "loss": 2.1705, "step": 453000 }, { "epoch": 58.57789269553975, "grad_norm": 0.9992480874061584, "learning_rate": 0.001, "loss": 2.1773, "step": 453100 }, { "epoch": 58.59082094376212, "grad_norm": 1.077775239944458, "learning_rate": 0.001, "loss": 2.1933, "step": 453200 }, { "epoch": 58.603749191984484, "grad_norm": 1.0427526235580444, "learning_rate": 0.001, "loss": 2.158, "step": 453300 }, { "epoch": 58.61667744020685, "grad_norm": 1.4239212274551392, "learning_rate": 0.001, "loss": 2.1779, "step": 453400 }, { "epoch": 58.629605688429216, "grad_norm": 1.007533311843872, "learning_rate": 0.001, "loss": 2.1911, "step": 453500 }, { "epoch": 58.64253393665158, "grad_norm": 0.8268265128135681, "learning_rate": 0.001, "loss": 2.1795, "step": 453600 }, { "epoch": 58.65546218487395, "grad_norm": 4.81456184387207, "learning_rate": 0.001, "loss": 2.2032, "step": 453700 }, { "epoch": 58.66839043309631, "grad_norm": 1.169783592224121, "learning_rate": 0.001, "loss": 2.1709, "step": 453800 }, { "epoch": 58.68131868131868, "grad_norm": 0.9257127642631531, "learning_rate": 0.001, "loss": 2.1752, "step": 453900 }, { "epoch": 58.694246929541045, "grad_norm": 2.5359761714935303, "learning_rate": 0.001, "loss": 2.1786, "step": 454000 }, { "epoch": 58.70717517776341, "grad_norm": 1.2159374952316284, "learning_rate": 0.001, "loss": 2.1836, "step": 454100 }, { "epoch": 58.720103425985776, "grad_norm": 0.7369974255561829, "learning_rate": 0.001, "loss": 2.1848, "step": 454200 }, { "epoch": 58.73303167420814, "grad_norm": 1.0372599363327026, "learning_rate": 0.001, "loss": 2.1781, "step": 454300 }, { "epoch": 58.74595992243051, "grad_norm": 1.0757217407226562, "learning_rate": 0.001, "loss": 2.2019, "step": 454400 }, { "epoch": 58.758888170652874, "grad_norm": 0.9146413803100586, "learning_rate": 0.001, "loss": 2.2001, "step": 454500 }, { "epoch": 58.77181641887524, "grad_norm": 1.3334541320800781, "learning_rate": 0.001, "loss": 2.1791, "step": 454600 }, { "epoch": 58.784744667097605, "grad_norm": 0.8306586742401123, "learning_rate": 0.001, "loss": 2.1914, "step": 454700 }, { "epoch": 58.79767291531997, "grad_norm": 1.0291887521743774, "learning_rate": 0.001, "loss": 2.2103, "step": 454800 }, { "epoch": 58.81060116354234, "grad_norm": 0.9501275420188904, "learning_rate": 0.001, "loss": 2.2019, "step": 454900 }, { "epoch": 58.8235294117647, "grad_norm": 0.9666630625724792, "learning_rate": 0.001, "loss": 2.1777, "step": 455000 }, { "epoch": 58.83645765998707, "grad_norm": 1.0700268745422363, "learning_rate": 0.001, "loss": 2.2051, "step": 455100 }, { "epoch": 58.849385908209435, "grad_norm": 1.1567116975784302, "learning_rate": 0.001, "loss": 2.1976, "step": 455200 }, { "epoch": 58.8623141564318, "grad_norm": 0.9600836038589478, "learning_rate": 0.001, "loss": 2.2041, "step": 455300 }, { "epoch": 58.875242404654166, "grad_norm": 1.2260091304779053, "learning_rate": 0.001, "loss": 2.2021, "step": 455400 }, { "epoch": 58.88817065287653, "grad_norm": 0.915492594242096, "learning_rate": 0.001, "loss": 2.185, "step": 455500 }, { "epoch": 58.9010989010989, "grad_norm": 1.5600992441177368, "learning_rate": 0.001, "loss": 2.2004, "step": 455600 }, { "epoch": 58.914027149321264, "grad_norm": 1.1805139780044556, "learning_rate": 0.001, "loss": 2.1806, "step": 455700 }, { "epoch": 58.92695539754363, "grad_norm": 0.8991602063179016, "learning_rate": 0.001, "loss": 2.1913, "step": 455800 }, { "epoch": 58.939883645765995, "grad_norm": 0.8167803287506104, "learning_rate": 0.001, "loss": 2.2139, "step": 455900 }, { "epoch": 58.95281189398836, "grad_norm": 0.8560812473297119, "learning_rate": 0.001, "loss": 2.2034, "step": 456000 }, { "epoch": 58.96574014221073, "grad_norm": 0.8779365420341492, "learning_rate": 0.001, "loss": 2.2019, "step": 456100 }, { "epoch": 58.97866839043309, "grad_norm": 2.020662307739258, "learning_rate": 0.001, "loss": 2.2175, "step": 456200 }, { "epoch": 58.991596638655466, "grad_norm": 1.5775818824768066, "learning_rate": 0.001, "loss": 2.1988, "step": 456300 }, { "epoch": 59.00452488687783, "grad_norm": 0.8613344430923462, "learning_rate": 0.001, "loss": 2.1853, "step": 456400 }, { "epoch": 59.0174531351002, "grad_norm": 0.9399396181106567, "learning_rate": 0.001, "loss": 2.1213, "step": 456500 }, { "epoch": 59.03038138332256, "grad_norm": 1.091321587562561, "learning_rate": 0.001, "loss": 2.1291, "step": 456600 }, { "epoch": 59.04330963154493, "grad_norm": 1.1507686376571655, "learning_rate": 0.001, "loss": 2.1318, "step": 456700 }, { "epoch": 59.056237879767295, "grad_norm": 0.9712055921554565, "learning_rate": 0.001, "loss": 2.1292, "step": 456800 }, { "epoch": 59.06916612798966, "grad_norm": 0.8589370846748352, "learning_rate": 0.001, "loss": 2.116, "step": 456900 }, { "epoch": 59.08209437621203, "grad_norm": 1.1259042024612427, "learning_rate": 0.001, "loss": 2.1355, "step": 457000 }, { "epoch": 59.09502262443439, "grad_norm": 1.9565410614013672, "learning_rate": 0.001, "loss": 2.1259, "step": 457100 }, { "epoch": 59.10795087265676, "grad_norm": 1.0832005739212036, "learning_rate": 0.001, "loss": 2.1245, "step": 457200 }, { "epoch": 59.120879120879124, "grad_norm": 1.1373786926269531, "learning_rate": 0.001, "loss": 2.1312, "step": 457300 }, { "epoch": 59.13380736910149, "grad_norm": 1.0665301084518433, "learning_rate": 0.001, "loss": 2.1372, "step": 457400 }, { "epoch": 59.146735617323856, "grad_norm": 1.107534408569336, "learning_rate": 0.001, "loss": 2.1374, "step": 457500 }, { "epoch": 59.15966386554622, "grad_norm": 0.9965604543685913, "learning_rate": 0.001, "loss": 2.1233, "step": 457600 }, { "epoch": 59.17259211376859, "grad_norm": 0.9728068113327026, "learning_rate": 0.001, "loss": 2.1188, "step": 457700 }, { "epoch": 59.18552036199095, "grad_norm": 1.0368012189865112, "learning_rate": 0.001, "loss": 2.1148, "step": 457800 }, { "epoch": 59.19844861021332, "grad_norm": 1.444899082183838, "learning_rate": 0.001, "loss": 2.1389, "step": 457900 }, { "epoch": 59.211376858435685, "grad_norm": 1.0570826530456543, "learning_rate": 0.001, "loss": 2.147, "step": 458000 }, { "epoch": 59.22430510665805, "grad_norm": 0.9571044445037842, "learning_rate": 0.001, "loss": 2.1517, "step": 458100 }, { "epoch": 59.237233354880416, "grad_norm": 0.9657354354858398, "learning_rate": 0.001, "loss": 2.141, "step": 458200 }, { "epoch": 59.25016160310278, "grad_norm": 0.9810009598731995, "learning_rate": 0.001, "loss": 2.1636, "step": 458300 }, { "epoch": 59.26308985132515, "grad_norm": 0.9984093904495239, "learning_rate": 0.001, "loss": 2.1253, "step": 458400 }, { "epoch": 59.276018099547514, "grad_norm": 1.055309534072876, "learning_rate": 0.001, "loss": 2.155, "step": 458500 }, { "epoch": 59.28894634776988, "grad_norm": 0.8875457048416138, "learning_rate": 0.001, "loss": 2.135, "step": 458600 }, { "epoch": 59.301874595992246, "grad_norm": 0.8996825218200684, "learning_rate": 0.001, "loss": 2.1212, "step": 458700 }, { "epoch": 59.31480284421461, "grad_norm": 0.9490931630134583, "learning_rate": 0.001, "loss": 2.1489, "step": 458800 }, { "epoch": 59.32773109243698, "grad_norm": 0.8840624690055847, "learning_rate": 0.001, "loss": 2.1416, "step": 458900 }, { "epoch": 59.34065934065934, "grad_norm": 0.90696120262146, "learning_rate": 0.001, "loss": 2.1478, "step": 459000 }, { "epoch": 59.35358758888171, "grad_norm": 0.9010637402534485, "learning_rate": 0.001, "loss": 2.1456, "step": 459100 }, { "epoch": 59.366515837104075, "grad_norm": 1.0876010656356812, "learning_rate": 0.001, "loss": 2.1644, "step": 459200 }, { "epoch": 59.37944408532644, "grad_norm": 1.0352210998535156, "learning_rate": 0.001, "loss": 2.1516, "step": 459300 }, { "epoch": 59.392372333548806, "grad_norm": 0.8346291780471802, "learning_rate": 0.001, "loss": 2.1249, "step": 459400 }, { "epoch": 59.40530058177117, "grad_norm": 1.1577122211456299, "learning_rate": 0.001, "loss": 2.1505, "step": 459500 }, { "epoch": 59.41822882999354, "grad_norm": 1.5691719055175781, "learning_rate": 0.001, "loss": 2.1445, "step": 459600 }, { "epoch": 59.431157078215904, "grad_norm": 0.8587340712547302, "learning_rate": 0.001, "loss": 2.1653, "step": 459700 }, { "epoch": 59.44408532643827, "grad_norm": 0.9209771156311035, "learning_rate": 0.001, "loss": 2.1656, "step": 459800 }, { "epoch": 59.457013574660635, "grad_norm": 2.5647037029266357, "learning_rate": 0.001, "loss": 2.1588, "step": 459900 }, { "epoch": 59.469941822883, "grad_norm": 1.1395944356918335, "learning_rate": 0.001, "loss": 2.1787, "step": 460000 }, { "epoch": 59.48287007110537, "grad_norm": 0.9001279473304749, "learning_rate": 0.001, "loss": 2.1615, "step": 460100 }, { "epoch": 59.49579831932773, "grad_norm": 1.0046358108520508, "learning_rate": 0.001, "loss": 2.1626, "step": 460200 }, { "epoch": 59.5087265675501, "grad_norm": 1.1617944240570068, "learning_rate": 0.001, "loss": 2.1605, "step": 460300 }, { "epoch": 59.521654815772465, "grad_norm": 3.41201114654541, "learning_rate": 0.001, "loss": 2.1484, "step": 460400 }, { "epoch": 59.53458306399483, "grad_norm": 1.2921230792999268, "learning_rate": 0.001, "loss": 2.1675, "step": 460500 }, { "epoch": 59.547511312217196, "grad_norm": 1.167002558708191, "learning_rate": 0.001, "loss": 2.1737, "step": 460600 }, { "epoch": 59.56043956043956, "grad_norm": 1.804695963859558, "learning_rate": 0.001, "loss": 2.1884, "step": 460700 }, { "epoch": 59.57336780866193, "grad_norm": 1.2488627433776855, "learning_rate": 0.001, "loss": 2.1849, "step": 460800 }, { "epoch": 59.586296056884294, "grad_norm": 1.5710711479187012, "learning_rate": 0.001, "loss": 2.198, "step": 460900 }, { "epoch": 59.59922430510666, "grad_norm": 0.9252648949623108, "learning_rate": 0.001, "loss": 2.172, "step": 461000 }, { "epoch": 59.612152553329025, "grad_norm": 1.085293173789978, "learning_rate": 0.001, "loss": 2.1864, "step": 461100 }, { "epoch": 59.62508080155139, "grad_norm": 1.310067892074585, "learning_rate": 0.001, "loss": 2.1579, "step": 461200 }, { "epoch": 59.63800904977376, "grad_norm": 0.8343604803085327, "learning_rate": 0.001, "loss": 2.1845, "step": 461300 }, { "epoch": 59.65093729799612, "grad_norm": 1.0940946340560913, "learning_rate": 0.001, "loss": 2.174, "step": 461400 }, { "epoch": 59.66386554621849, "grad_norm": 0.9541037082672119, "learning_rate": 0.001, "loss": 2.1717, "step": 461500 }, { "epoch": 59.676793794440854, "grad_norm": 1.0572283267974854, "learning_rate": 0.001, "loss": 2.1537, "step": 461600 }, { "epoch": 59.68972204266322, "grad_norm": 1.0883197784423828, "learning_rate": 0.001, "loss": 2.1708, "step": 461700 }, { "epoch": 59.702650290885586, "grad_norm": 0.8739935755729675, "learning_rate": 0.001, "loss": 2.1814, "step": 461800 }, { "epoch": 59.71557853910795, "grad_norm": 1.6610032320022583, "learning_rate": 0.001, "loss": 2.1811, "step": 461900 }, { "epoch": 59.72850678733032, "grad_norm": 1.3836004734039307, "learning_rate": 0.001, "loss": 2.2072, "step": 462000 }, { "epoch": 59.74143503555268, "grad_norm": 1.2654815912246704, "learning_rate": 0.001, "loss": 2.1811, "step": 462100 }, { "epoch": 59.75436328377505, "grad_norm": 1.0124967098236084, "learning_rate": 0.001, "loss": 2.1952, "step": 462200 }, { "epoch": 59.767291531997415, "grad_norm": 0.9189296364784241, "learning_rate": 0.001, "loss": 2.1784, "step": 462300 }, { "epoch": 59.78021978021978, "grad_norm": 1.445117473602295, "learning_rate": 0.001, "loss": 2.1967, "step": 462400 }, { "epoch": 59.79314802844215, "grad_norm": 0.9743508696556091, "learning_rate": 0.001, "loss": 2.1812, "step": 462500 }, { "epoch": 59.80607627666451, "grad_norm": 1.095767855644226, "learning_rate": 0.001, "loss": 2.1784, "step": 462600 }, { "epoch": 59.81900452488688, "grad_norm": 1.1392220258712769, "learning_rate": 0.001, "loss": 2.2003, "step": 462700 }, { "epoch": 59.831932773109244, "grad_norm": 1.1394479274749756, "learning_rate": 0.001, "loss": 2.1853, "step": 462800 }, { "epoch": 59.84486102133161, "grad_norm": 1.0247414112091064, "learning_rate": 0.001, "loss": 2.1976, "step": 462900 }, { "epoch": 59.857789269553976, "grad_norm": 1.2891467809677124, "learning_rate": 0.001, "loss": 2.1908, "step": 463000 }, { "epoch": 59.87071751777634, "grad_norm": 1.1850237846374512, "learning_rate": 0.001, "loss": 2.1945, "step": 463100 }, { "epoch": 59.88364576599871, "grad_norm": 1.244676113128662, "learning_rate": 0.001, "loss": 2.1949, "step": 463200 }, { "epoch": 59.89657401422107, "grad_norm": 0.9813307523727417, "learning_rate": 0.001, "loss": 2.1905, "step": 463300 }, { "epoch": 59.90950226244344, "grad_norm": 1.7458158731460571, "learning_rate": 0.001, "loss": 2.1988, "step": 463400 }, { "epoch": 59.922430510665805, "grad_norm": 0.8327128291130066, "learning_rate": 0.001, "loss": 2.1971, "step": 463500 }, { "epoch": 59.93535875888817, "grad_norm": 0.9403359293937683, "learning_rate": 0.001, "loss": 2.1802, "step": 463600 }, { "epoch": 59.94828700711054, "grad_norm": 1.1269240379333496, "learning_rate": 0.001, "loss": 2.1741, "step": 463700 }, { "epoch": 59.9612152553329, "grad_norm": 0.8886727094650269, "learning_rate": 0.001, "loss": 2.2218, "step": 463800 }, { "epoch": 59.97414350355527, "grad_norm": 0.9089176654815674, "learning_rate": 0.001, "loss": 2.197, "step": 463900 }, { "epoch": 59.987071751777634, "grad_norm": 0.9570876955986023, "learning_rate": 0.001, "loss": 2.2032, "step": 464000 }, { "epoch": 60.0, "grad_norm": 3.1124095916748047, "learning_rate": 0.001, "loss": 2.1559, "step": 464100 }, { "epoch": 60.012928248222366, "grad_norm": 1.7794305086135864, "learning_rate": 0.001, "loss": 2.1086, "step": 464200 }, { "epoch": 60.02585649644473, "grad_norm": 2.0868399143218994, "learning_rate": 0.001, "loss": 2.1213, "step": 464300 }, { "epoch": 60.0387847446671, "grad_norm": 1.9850748777389526, "learning_rate": 0.001, "loss": 2.1276, "step": 464400 }, { "epoch": 60.05171299288946, "grad_norm": 2.0731348991394043, "learning_rate": 0.001, "loss": 2.1189, "step": 464500 }, { "epoch": 60.06464124111183, "grad_norm": 1.1953433752059937, "learning_rate": 0.001, "loss": 2.1101, "step": 464600 }, { "epoch": 60.077569489334195, "grad_norm": 2.2653324604034424, "learning_rate": 0.001, "loss": 2.0982, "step": 464700 }, { "epoch": 60.09049773755656, "grad_norm": 1.5183846950531006, "learning_rate": 0.001, "loss": 2.1238, "step": 464800 }, { "epoch": 60.10342598577893, "grad_norm": 1.9369176626205444, "learning_rate": 0.001, "loss": 2.1136, "step": 464900 }, { "epoch": 60.11635423400129, "grad_norm": 1.7047442197799683, "learning_rate": 0.001, "loss": 2.1288, "step": 465000 }, { "epoch": 60.12928248222366, "grad_norm": 1.8565011024475098, "learning_rate": 0.001, "loss": 2.1118, "step": 465100 }, { "epoch": 60.142210730446024, "grad_norm": 1.6812502145767212, "learning_rate": 0.001, "loss": 2.1365, "step": 465200 }, { "epoch": 60.15513897866839, "grad_norm": 1.3717910051345825, "learning_rate": 0.001, "loss": 2.1048, "step": 465300 }, { "epoch": 60.168067226890756, "grad_norm": 1.484788179397583, "learning_rate": 0.001, "loss": 2.1367, "step": 465400 }, { "epoch": 60.18099547511312, "grad_norm": 1.8509202003479004, "learning_rate": 0.001, "loss": 2.131, "step": 465500 }, { "epoch": 60.19392372333549, "grad_norm": 2.9273016452789307, "learning_rate": 0.001, "loss": 2.11, "step": 465600 }, { "epoch": 60.20685197155785, "grad_norm": 1.4568381309509277, "learning_rate": 0.001, "loss": 2.1189, "step": 465700 }, { "epoch": 60.21978021978022, "grad_norm": 1.6071696281433105, "learning_rate": 0.001, "loss": 2.1295, "step": 465800 }, { "epoch": 60.232708468002585, "grad_norm": 2.1448705196380615, "learning_rate": 0.001, "loss": 2.1232, "step": 465900 }, { "epoch": 60.24563671622495, "grad_norm": 1.8558244705200195, "learning_rate": 0.001, "loss": 2.1387, "step": 466000 }, { "epoch": 60.25856496444732, "grad_norm": 1.6082736253738403, "learning_rate": 0.001, "loss": 2.139, "step": 466100 }, { "epoch": 60.27149321266968, "grad_norm": 1.6730395555496216, "learning_rate": 0.001, "loss": 2.1405, "step": 466200 }, { "epoch": 60.28442146089205, "grad_norm": 1.90849769115448, "learning_rate": 0.001, "loss": 2.127, "step": 466300 }, { "epoch": 60.297349709114414, "grad_norm": 1.9695103168487549, "learning_rate": 0.001, "loss": 2.1299, "step": 466400 }, { "epoch": 60.31027795733678, "grad_norm": 1.4983335733413696, "learning_rate": 0.001, "loss": 2.1555, "step": 466500 }, { "epoch": 60.323206205559146, "grad_norm": 1.360674500465393, "learning_rate": 0.001, "loss": 2.1269, "step": 466600 }, { "epoch": 60.33613445378151, "grad_norm": 1.8916645050048828, "learning_rate": 0.001, "loss": 2.1492, "step": 466700 }, { "epoch": 60.34906270200388, "grad_norm": 1.483698844909668, "learning_rate": 0.001, "loss": 2.1351, "step": 466800 }, { "epoch": 60.36199095022624, "grad_norm": 1.7998626232147217, "learning_rate": 0.001, "loss": 2.1417, "step": 466900 }, { "epoch": 60.37491919844861, "grad_norm": 1.788037657737732, "learning_rate": 0.001, "loss": 2.1523, "step": 467000 }, { "epoch": 60.387847446670975, "grad_norm": 2.028696298599243, "learning_rate": 0.001, "loss": 2.1362, "step": 467100 }, { "epoch": 60.40077569489334, "grad_norm": 1.345616340637207, "learning_rate": 0.001, "loss": 2.1643, "step": 467200 }, { "epoch": 60.413703943115706, "grad_norm": 1.927949070930481, "learning_rate": 0.001, "loss": 2.1511, "step": 467300 }, { "epoch": 60.42663219133807, "grad_norm": 1.9930397272109985, "learning_rate": 0.001, "loss": 2.1554, "step": 467400 }, { "epoch": 60.43956043956044, "grad_norm": 3.068042278289795, "learning_rate": 0.001, "loss": 2.1619, "step": 467500 }, { "epoch": 60.452488687782804, "grad_norm": 1.64463210105896, "learning_rate": 0.001, "loss": 2.1666, "step": 467600 }, { "epoch": 60.46541693600517, "grad_norm": 1.9881343841552734, "learning_rate": 0.001, "loss": 2.175, "step": 467700 }, { "epoch": 60.478345184227535, "grad_norm": 1.8649799823760986, "learning_rate": 0.001, "loss": 2.1519, "step": 467800 }, { "epoch": 60.4912734324499, "grad_norm": 1.6322358846664429, "learning_rate": 0.001, "loss": 2.1585, "step": 467900 }, { "epoch": 60.50420168067227, "grad_norm": 171.13394165039062, "learning_rate": 0.001, "loss": 2.1797, "step": 468000 }, { "epoch": 60.51712992889463, "grad_norm": 1.6762487888336182, "learning_rate": 0.001, "loss": 2.17, "step": 468100 }, { "epoch": 60.530058177117, "grad_norm": 1.84880530834198, "learning_rate": 0.001, "loss": 2.1683, "step": 468200 }, { "epoch": 60.542986425339365, "grad_norm": 2.0142955780029297, "learning_rate": 0.001, "loss": 2.1497, "step": 468300 }, { "epoch": 60.55591467356173, "grad_norm": 1.6599338054656982, "learning_rate": 0.001, "loss": 2.1713, "step": 468400 }, { "epoch": 60.568842921784096, "grad_norm": 2.1421964168548584, "learning_rate": 0.001, "loss": 2.1786, "step": 468500 }, { "epoch": 60.58177117000646, "grad_norm": 178.49508666992188, "learning_rate": 0.001, "loss": 2.1582, "step": 468600 }, { "epoch": 60.59469941822883, "grad_norm": 1.3611279726028442, "learning_rate": 0.001, "loss": 2.1707, "step": 468700 }, { "epoch": 60.607627666451194, "grad_norm": 1.5953624248504639, "learning_rate": 0.001, "loss": 2.1865, "step": 468800 }, { "epoch": 60.62055591467356, "grad_norm": 1.909622311592102, "learning_rate": 0.001, "loss": 2.1939, "step": 468900 }, { "epoch": 60.633484162895925, "grad_norm": 1.5437697172164917, "learning_rate": 0.001, "loss": 2.1555, "step": 469000 }, { "epoch": 60.64641241111829, "grad_norm": 1.9346457719802856, "learning_rate": 0.001, "loss": 2.1704, "step": 469100 }, { "epoch": 60.65934065934066, "grad_norm": 1.824455976486206, "learning_rate": 0.001, "loss": 2.1628, "step": 469200 }, { "epoch": 60.67226890756302, "grad_norm": 1.8908220529556274, "learning_rate": 0.001, "loss": 2.1693, "step": 469300 }, { "epoch": 60.68519715578539, "grad_norm": 2.0218591690063477, "learning_rate": 0.001, "loss": 2.1726, "step": 469400 }, { "epoch": 60.698125404007754, "grad_norm": 1.9695247411727905, "learning_rate": 0.001, "loss": 2.1614, "step": 469500 }, { "epoch": 60.71105365223012, "grad_norm": 1.705413579940796, "learning_rate": 0.001, "loss": 2.1867, "step": 469600 }, { "epoch": 60.723981900452486, "grad_norm": 1.6007574796676636, "learning_rate": 0.001, "loss": 2.1854, "step": 469700 }, { "epoch": 60.73691014867485, "grad_norm": 2.0132768154144287, "learning_rate": 0.001, "loss": 2.1794, "step": 469800 }, { "epoch": 60.74983839689722, "grad_norm": 2.092313289642334, "learning_rate": 0.001, "loss": 2.1885, "step": 469900 }, { "epoch": 60.762766645119584, "grad_norm": 1.7734782695770264, "learning_rate": 0.001, "loss": 2.2, "step": 470000 }, { "epoch": 60.77569489334195, "grad_norm": 2.458193063735962, "learning_rate": 0.001, "loss": 2.182, "step": 470100 }, { "epoch": 60.788623141564315, "grad_norm": 1.9400615692138672, "learning_rate": 0.001, "loss": 2.1851, "step": 470200 }, { "epoch": 60.80155138978668, "grad_norm": 1.5682027339935303, "learning_rate": 0.001, "loss": 2.2094, "step": 470300 }, { "epoch": 60.81447963800905, "grad_norm": 1.8749641180038452, "learning_rate": 0.001, "loss": 2.1791, "step": 470400 }, { "epoch": 60.82740788623141, "grad_norm": 2.034240245819092, "learning_rate": 0.001, "loss": 2.1916, "step": 470500 }, { "epoch": 60.84033613445378, "grad_norm": 2.679262638092041, "learning_rate": 0.001, "loss": 2.1909, "step": 470600 }, { "epoch": 60.853264382676144, "grad_norm": 1.5640159845352173, "learning_rate": 0.001, "loss": 2.1932, "step": 470700 }, { "epoch": 60.86619263089851, "grad_norm": 1.5363823175430298, "learning_rate": 0.001, "loss": 2.2027, "step": 470800 }, { "epoch": 60.879120879120876, "grad_norm": 1.4191218614578247, "learning_rate": 0.001, "loss": 2.1892, "step": 470900 }, { "epoch": 60.89204912734324, "grad_norm": 2.2298424243927, "learning_rate": 0.001, "loss": 2.1996, "step": 471000 }, { "epoch": 60.90497737556561, "grad_norm": 2.921215057373047, "learning_rate": 0.001, "loss": 2.1986, "step": 471100 }, { "epoch": 60.91790562378797, "grad_norm": 1.7449872493743896, "learning_rate": 0.001, "loss": 2.1945, "step": 471200 }, { "epoch": 60.93083387201034, "grad_norm": 2.3799686431884766, "learning_rate": 0.001, "loss": 2.1833, "step": 471300 }, { "epoch": 60.943762120232705, "grad_norm": 2.06326961517334, "learning_rate": 0.001, "loss": 2.1849, "step": 471400 }, { "epoch": 60.95669036845507, "grad_norm": 1.8820133209228516, "learning_rate": 0.001, "loss": 2.1935, "step": 471500 }, { "epoch": 60.96961861667744, "grad_norm": 2.1966712474823, "learning_rate": 0.001, "loss": 2.1788, "step": 471600 }, { "epoch": 60.9825468648998, "grad_norm": 1.8628380298614502, "learning_rate": 0.001, "loss": 2.1699, "step": 471700 }, { "epoch": 60.99547511312217, "grad_norm": 1.8461014032363892, "learning_rate": 0.001, "loss": 2.2005, "step": 471800 }, { "epoch": 61.00840336134454, "grad_norm": 0.9945745468139648, "learning_rate": 0.001, "loss": 2.1239, "step": 471900 }, { "epoch": 61.02133160956691, "grad_norm": 1.008078694343567, "learning_rate": 0.001, "loss": 2.0893, "step": 472000 }, { "epoch": 61.03425985778927, "grad_norm": 1.409685492515564, "learning_rate": 0.001, "loss": 2.1072, "step": 472100 }, { "epoch": 61.04718810601164, "grad_norm": 1.0016847848892212, "learning_rate": 0.001, "loss": 2.1014, "step": 472200 }, { "epoch": 61.060116354234005, "grad_norm": 1.0192008018493652, "learning_rate": 0.001, "loss": 2.1218, "step": 472300 }, { "epoch": 61.07304460245637, "grad_norm": 1.0410082340240479, "learning_rate": 0.001, "loss": 2.0875, "step": 472400 }, { "epoch": 61.085972850678736, "grad_norm": 0.9715091586112976, "learning_rate": 0.001, "loss": 2.1139, "step": 472500 }, { "epoch": 61.0989010989011, "grad_norm": 1.019139051437378, "learning_rate": 0.001, "loss": 2.1161, "step": 472600 }, { "epoch": 61.11182934712347, "grad_norm": 2.7006306648254395, "learning_rate": 0.001, "loss": 2.1037, "step": 472700 }, { "epoch": 61.124757595345834, "grad_norm": 0.9568288922309875, "learning_rate": 0.001, "loss": 2.1137, "step": 472800 }, { "epoch": 61.1376858435682, "grad_norm": 0.9469406604766846, "learning_rate": 0.001, "loss": 2.1164, "step": 472900 }, { "epoch": 61.150614091790565, "grad_norm": 0.8980591893196106, "learning_rate": 0.001, "loss": 2.1358, "step": 473000 }, { "epoch": 61.16354234001293, "grad_norm": 1.363674283027649, "learning_rate": 0.001, "loss": 2.1174, "step": 473100 }, { "epoch": 61.1764705882353, "grad_norm": 0.8691591620445251, "learning_rate": 0.001, "loss": 2.135, "step": 473200 }, { "epoch": 61.18939883645766, "grad_norm": 1.0029106140136719, "learning_rate": 0.001, "loss": 2.1221, "step": 473300 }, { "epoch": 61.20232708468003, "grad_norm": 1.1531715393066406, "learning_rate": 0.001, "loss": 2.1281, "step": 473400 }, { "epoch": 61.215255332902395, "grad_norm": 1.0465589761734009, "learning_rate": 0.001, "loss": 2.1381, "step": 473500 }, { "epoch": 61.22818358112476, "grad_norm": 1.091729998588562, "learning_rate": 0.001, "loss": 2.1231, "step": 473600 }, { "epoch": 61.241111829347126, "grad_norm": 1.143532156944275, "learning_rate": 0.001, "loss": 2.1192, "step": 473700 }, { "epoch": 61.25404007756949, "grad_norm": 1.031496524810791, "learning_rate": 0.001, "loss": 2.1292, "step": 473800 }, { "epoch": 61.26696832579186, "grad_norm": 1.3863377571105957, "learning_rate": 0.001, "loss": 2.1354, "step": 473900 }, { "epoch": 61.279896574014224, "grad_norm": 1.0213667154312134, "learning_rate": 0.001, "loss": 2.1425, "step": 474000 }, { "epoch": 61.29282482223659, "grad_norm": 2.711209297180176, "learning_rate": 0.001, "loss": 2.1171, "step": 474100 }, { "epoch": 61.305753070458955, "grad_norm": 1.4824705123901367, "learning_rate": 0.001, "loss": 2.1272, "step": 474200 }, { "epoch": 61.31868131868132, "grad_norm": 1.0449342727661133, "learning_rate": 0.001, "loss": 2.1341, "step": 474300 }, { "epoch": 61.33160956690369, "grad_norm": 0.8330239057540894, "learning_rate": 0.001, "loss": 2.1532, "step": 474400 }, { "epoch": 61.34453781512605, "grad_norm": 1.1824895143508911, "learning_rate": 0.001, "loss": 2.142, "step": 474500 }, { "epoch": 61.35746606334842, "grad_norm": 1.0667736530303955, "learning_rate": 0.001, "loss": 2.1487, "step": 474600 }, { "epoch": 61.370394311570784, "grad_norm": 1.2707529067993164, "learning_rate": 0.001, "loss": 2.1431, "step": 474700 }, { "epoch": 61.38332255979315, "grad_norm": 1.0634033679962158, "learning_rate": 0.001, "loss": 2.1486, "step": 474800 }, { "epoch": 61.396250808015516, "grad_norm": 1.353855013847351, "learning_rate": 0.001, "loss": 2.1367, "step": 474900 }, { "epoch": 61.40917905623788, "grad_norm": 0.7384710311889648, "learning_rate": 0.001, "loss": 2.1686, "step": 475000 }, { "epoch": 61.42210730446025, "grad_norm": 0.9122633934020996, "learning_rate": 0.001, "loss": 2.1346, "step": 475100 }, { "epoch": 61.43503555268261, "grad_norm": 0.9572334885597229, "learning_rate": 0.001, "loss": 2.1668, "step": 475200 }, { "epoch": 61.44796380090498, "grad_norm": 1.0119572877883911, "learning_rate": 0.001, "loss": 2.147, "step": 475300 }, { "epoch": 61.460892049127345, "grad_norm": 1.0357295274734497, "learning_rate": 0.001, "loss": 2.1558, "step": 475400 }, { "epoch": 61.47382029734971, "grad_norm": 0.9466652274131775, "learning_rate": 0.001, "loss": 2.1332, "step": 475500 }, { "epoch": 61.48674854557208, "grad_norm": 2.1865060329437256, "learning_rate": 0.001, "loss": 2.1622, "step": 475600 }, { "epoch": 61.49967679379444, "grad_norm": 0.878238320350647, "learning_rate": 0.001, "loss": 2.1669, "step": 475700 }, { "epoch": 61.51260504201681, "grad_norm": 0.859127402305603, "learning_rate": 0.001, "loss": 2.1699, "step": 475800 }, { "epoch": 61.525533290239174, "grad_norm": 0.8283673524856567, "learning_rate": 0.001, "loss": 2.1528, "step": 475900 }, { "epoch": 61.53846153846154, "grad_norm": 0.8978778719902039, "learning_rate": 0.001, "loss": 2.1789, "step": 476000 }, { "epoch": 61.551389786683906, "grad_norm": 1.2396841049194336, "learning_rate": 0.001, "loss": 2.1764, "step": 476100 }, { "epoch": 61.56431803490627, "grad_norm": 7.402273178100586, "learning_rate": 0.001, "loss": 2.1453, "step": 476200 }, { "epoch": 61.57724628312864, "grad_norm": 1.5100637674331665, "learning_rate": 0.001, "loss": 2.1686, "step": 476300 }, { "epoch": 61.590174531351, "grad_norm": 0.8235408067703247, "learning_rate": 0.001, "loss": 2.1893, "step": 476400 }, { "epoch": 61.60310277957337, "grad_norm": 0.9805994629859924, "learning_rate": 0.001, "loss": 2.1623, "step": 476500 }, { "epoch": 61.616031027795735, "grad_norm": 0.9458043575286865, "learning_rate": 0.001, "loss": 2.1772, "step": 476600 }, { "epoch": 61.6289592760181, "grad_norm": 1.5645774602890015, "learning_rate": 0.001, "loss": 2.1783, "step": 476700 }, { "epoch": 61.64188752424047, "grad_norm": 1.1576464176177979, "learning_rate": 0.001, "loss": 2.1778, "step": 476800 }, { "epoch": 61.65481577246283, "grad_norm": 0.8711026310920715, "learning_rate": 0.001, "loss": 2.1924, "step": 476900 }, { "epoch": 61.6677440206852, "grad_norm": 0.8126427531242371, "learning_rate": 0.001, "loss": 2.1739, "step": 477000 }, { "epoch": 61.680672268907564, "grad_norm": 0.7464815378189087, "learning_rate": 0.001, "loss": 2.1527, "step": 477100 }, { "epoch": 61.69360051712993, "grad_norm": 0.9982279539108276, "learning_rate": 0.001, "loss": 2.1578, "step": 477200 }, { "epoch": 61.706528765352296, "grad_norm": 1.102919578552246, "learning_rate": 0.001, "loss": 2.1813, "step": 477300 }, { "epoch": 61.71945701357466, "grad_norm": 1.0997051000595093, "learning_rate": 0.001, "loss": 2.1748, "step": 477400 }, { "epoch": 61.73238526179703, "grad_norm": 1.148249626159668, "learning_rate": 0.001, "loss": 2.2094, "step": 477500 }, { "epoch": 61.74531351001939, "grad_norm": 0.7830320000648499, "learning_rate": 0.001, "loss": 2.1681, "step": 477600 }, { "epoch": 61.75824175824176, "grad_norm": 1.5226600170135498, "learning_rate": 0.001, "loss": 2.1782, "step": 477700 }, { "epoch": 61.771170006464125, "grad_norm": 0.8340833783149719, "learning_rate": 0.001, "loss": 2.1549, "step": 477800 }, { "epoch": 61.78409825468649, "grad_norm": 0.9949930906295776, "learning_rate": 0.001, "loss": 2.1723, "step": 477900 }, { "epoch": 61.79702650290886, "grad_norm": 0.876323401927948, "learning_rate": 0.001, "loss": 2.1974, "step": 478000 }, { "epoch": 61.80995475113122, "grad_norm": 0.745360255241394, "learning_rate": 0.001, "loss": 2.1709, "step": 478100 }, { "epoch": 61.82288299935359, "grad_norm": 0.8970654606819153, "learning_rate": 0.001, "loss": 2.1692, "step": 478200 }, { "epoch": 61.835811247575954, "grad_norm": 1.9572255611419678, "learning_rate": 0.001, "loss": 2.1819, "step": 478300 }, { "epoch": 61.84873949579832, "grad_norm": 1.0318738222122192, "learning_rate": 0.001, "loss": 2.1882, "step": 478400 }, { "epoch": 61.861667744020686, "grad_norm": 0.8595945835113525, "learning_rate": 0.001, "loss": 2.1724, "step": 478500 }, { "epoch": 61.87459599224305, "grad_norm": 1.1839983463287354, "learning_rate": 0.001, "loss": 2.1938, "step": 478600 }, { "epoch": 61.88752424046542, "grad_norm": 0.8937172889709473, "learning_rate": 0.001, "loss": 2.1805, "step": 478700 }, { "epoch": 61.90045248868778, "grad_norm": 1.0542199611663818, "learning_rate": 0.001, "loss": 2.1665, "step": 478800 }, { "epoch": 61.91338073691015, "grad_norm": 1.1401311159133911, "learning_rate": 0.001, "loss": 2.1892, "step": 478900 }, { "epoch": 61.926308985132515, "grad_norm": 0.8432530164718628, "learning_rate": 0.001, "loss": 2.1802, "step": 479000 }, { "epoch": 61.93923723335488, "grad_norm": 0.9292218089103699, "learning_rate": 0.001, "loss": 2.176, "step": 479100 }, { "epoch": 61.95216548157725, "grad_norm": 0.8614493012428284, "learning_rate": 0.001, "loss": 2.1764, "step": 479200 }, { "epoch": 61.96509372979961, "grad_norm": 0.9344472885131836, "learning_rate": 0.001, "loss": 2.164, "step": 479300 }, { "epoch": 61.97802197802198, "grad_norm": 1.4154270887374878, "learning_rate": 0.001, "loss": 2.1668, "step": 479400 }, { "epoch": 61.990950226244344, "grad_norm": 1.2198092937469482, "learning_rate": 0.001, "loss": 2.1775, "step": 479500 }, { "epoch": 62.00387847446671, "grad_norm": 1.3053570985794067, "learning_rate": 0.001, "loss": 2.189, "step": 479600 }, { "epoch": 62.016806722689076, "grad_norm": 1.054728388786316, "learning_rate": 0.001, "loss": 2.0842, "step": 479700 }, { "epoch": 62.02973497091144, "grad_norm": 1.0133899450302124, "learning_rate": 0.001, "loss": 2.0933, "step": 479800 }, { "epoch": 62.04266321913381, "grad_norm": 1.4761942625045776, "learning_rate": 0.001, "loss": 2.1105, "step": 479900 }, { "epoch": 62.05559146735617, "grad_norm": 0.9300850629806519, "learning_rate": 0.001, "loss": 2.103, "step": 480000 }, { "epoch": 62.06851971557854, "grad_norm": 1.332985758781433, "learning_rate": 0.001, "loss": 2.1018, "step": 480100 }, { "epoch": 62.081447963800905, "grad_norm": 1.3024473190307617, "learning_rate": 0.001, "loss": 2.1146, "step": 480200 }, { "epoch": 62.09437621202327, "grad_norm": 0.8682134747505188, "learning_rate": 0.001, "loss": 2.0995, "step": 480300 }, { "epoch": 62.107304460245636, "grad_norm": 1.20331609249115, "learning_rate": 0.001, "loss": 2.0921, "step": 480400 }, { "epoch": 62.120232708468, "grad_norm": 1.4414596557617188, "learning_rate": 0.001, "loss": 2.1172, "step": 480500 }, { "epoch": 62.13316095669037, "grad_norm": 0.8038631081581116, "learning_rate": 0.001, "loss": 2.1102, "step": 480600 }, { "epoch": 62.146089204912734, "grad_norm": 0.8712775707244873, "learning_rate": 0.001, "loss": 2.1159, "step": 480700 }, { "epoch": 62.1590174531351, "grad_norm": 1.1211237907409668, "learning_rate": 0.001, "loss": 2.1229, "step": 480800 }, { "epoch": 62.171945701357465, "grad_norm": 1.0931053161621094, "learning_rate": 0.001, "loss": 2.1184, "step": 480900 }, { "epoch": 62.18487394957983, "grad_norm": 6.298970699310303, "learning_rate": 0.001, "loss": 2.1193, "step": 481000 }, { "epoch": 62.1978021978022, "grad_norm": 1.1338508129119873, "learning_rate": 0.001, "loss": 2.1248, "step": 481100 }, { "epoch": 62.21073044602456, "grad_norm": 0.9218490123748779, "learning_rate": 0.001, "loss": 2.1369, "step": 481200 }, { "epoch": 62.22365869424693, "grad_norm": 0.996441125869751, "learning_rate": 0.001, "loss": 2.1339, "step": 481300 }, { "epoch": 62.236586942469295, "grad_norm": 2.1910834312438965, "learning_rate": 0.001, "loss": 2.1305, "step": 481400 }, { "epoch": 62.24951519069166, "grad_norm": 1.7959579229354858, "learning_rate": 0.001, "loss": 2.1489, "step": 481500 }, { "epoch": 62.262443438914026, "grad_norm": 0.8463423848152161, "learning_rate": 0.001, "loss": 2.1326, "step": 481600 }, { "epoch": 62.27537168713639, "grad_norm": 0.9483916759490967, "learning_rate": 0.001, "loss": 2.1433, "step": 481700 }, { "epoch": 62.28829993535876, "grad_norm": 0.881891131401062, "learning_rate": 0.001, "loss": 2.1319, "step": 481800 }, { "epoch": 62.301228183581124, "grad_norm": 1.2024033069610596, "learning_rate": 0.001, "loss": 2.1393, "step": 481900 }, { "epoch": 62.31415643180349, "grad_norm": 1.3703526258468628, "learning_rate": 0.001, "loss": 2.1511, "step": 482000 }, { "epoch": 62.327084680025855, "grad_norm": 0.8330098390579224, "learning_rate": 0.001, "loss": 2.1313, "step": 482100 }, { "epoch": 62.34001292824822, "grad_norm": 1.1093302965164185, "learning_rate": 0.001, "loss": 2.1486, "step": 482200 }, { "epoch": 62.35294117647059, "grad_norm": 0.8369740843772888, "learning_rate": 0.001, "loss": 2.1294, "step": 482300 }, { "epoch": 62.36586942469295, "grad_norm": 1.1398383378982544, "learning_rate": 0.001, "loss": 2.1346, "step": 482400 }, { "epoch": 62.37879767291532, "grad_norm": 1.1277623176574707, "learning_rate": 0.001, "loss": 2.1143, "step": 482500 }, { "epoch": 62.391725921137684, "grad_norm": 1.0798914432525635, "learning_rate": 0.001, "loss": 2.1233, "step": 482600 }, { "epoch": 62.40465416936005, "grad_norm": 0.9849027991294861, "learning_rate": 0.001, "loss": 2.153, "step": 482700 }, { "epoch": 62.417582417582416, "grad_norm": 1.2725999355316162, "learning_rate": 0.001, "loss": 2.1349, "step": 482800 }, { "epoch": 62.43051066580478, "grad_norm": 1.0341856479644775, "learning_rate": 0.001, "loss": 2.1441, "step": 482900 }, { "epoch": 62.44343891402715, "grad_norm": 1.0191619396209717, "learning_rate": 0.001, "loss": 2.1371, "step": 483000 }, { "epoch": 62.456367162249514, "grad_norm": 1.640635371208191, "learning_rate": 0.001, "loss": 2.1437, "step": 483100 }, { "epoch": 62.46929541047188, "grad_norm": 1.3118170499801636, "learning_rate": 0.001, "loss": 2.1594, "step": 483200 }, { "epoch": 62.482223658694245, "grad_norm": 1.1739463806152344, "learning_rate": 0.001, "loss": 2.1496, "step": 483300 }, { "epoch": 62.49515190691661, "grad_norm": 1.2341525554656982, "learning_rate": 0.001, "loss": 2.133, "step": 483400 }, { "epoch": 62.50808015513898, "grad_norm": 0.7801305055618286, "learning_rate": 0.001, "loss": 2.1416, "step": 483500 }, { "epoch": 62.52100840336134, "grad_norm": 1.0015288591384888, "learning_rate": 0.001, "loss": 2.1434, "step": 483600 }, { "epoch": 62.53393665158371, "grad_norm": 1.5794211626052856, "learning_rate": 0.001, "loss": 2.1531, "step": 483700 }, { "epoch": 62.546864899806074, "grad_norm": 1.1322566270828247, "learning_rate": 0.001, "loss": 2.1425, "step": 483800 }, { "epoch": 62.55979314802844, "grad_norm": 1.0529794692993164, "learning_rate": 0.001, "loss": 2.1381, "step": 483900 }, { "epoch": 62.572721396250806, "grad_norm": 0.9446023106575012, "learning_rate": 0.001, "loss": 2.1615, "step": 484000 }, { "epoch": 62.58564964447317, "grad_norm": 0.8866491317749023, "learning_rate": 0.001, "loss": 2.1611, "step": 484100 }, { "epoch": 62.59857789269554, "grad_norm": 0.8448085784912109, "learning_rate": 0.001, "loss": 2.165, "step": 484200 }, { "epoch": 62.6115061409179, "grad_norm": 0.8445166349411011, "learning_rate": 0.001, "loss": 2.1713, "step": 484300 }, { "epoch": 62.62443438914027, "grad_norm": 0.9605010151863098, "learning_rate": 0.001, "loss": 2.1701, "step": 484400 }, { "epoch": 62.637362637362635, "grad_norm": 0.8927921652793884, "learning_rate": 0.001, "loss": 2.1487, "step": 484500 }, { "epoch": 62.650290885585, "grad_norm": 1.070634126663208, "learning_rate": 0.001, "loss": 2.1644, "step": 484600 }, { "epoch": 62.66321913380737, "grad_norm": 1.02628493309021, "learning_rate": 0.001, "loss": 2.1566, "step": 484700 }, { "epoch": 62.67614738202973, "grad_norm": 0.9750914573669434, "learning_rate": 0.001, "loss": 2.1604, "step": 484800 }, { "epoch": 62.6890756302521, "grad_norm": 1.149978756904602, "learning_rate": 0.001, "loss": 2.1661, "step": 484900 }, { "epoch": 62.702003878474464, "grad_norm": 1.0087558031082153, "learning_rate": 0.001, "loss": 2.1689, "step": 485000 }, { "epoch": 62.71493212669683, "grad_norm": 1.351178526878357, "learning_rate": 0.001, "loss": 2.1901, "step": 485100 }, { "epoch": 62.727860374919196, "grad_norm": 1.0520098209381104, "learning_rate": 0.001, "loss": 2.177, "step": 485200 }, { "epoch": 62.74078862314156, "grad_norm": 1.3292208909988403, "learning_rate": 0.001, "loss": 2.1783, "step": 485300 }, { "epoch": 62.75371687136393, "grad_norm": 1.2226817607879639, "learning_rate": 0.001, "loss": 2.1462, "step": 485400 }, { "epoch": 62.76664511958629, "grad_norm": 0.9309093952178955, "learning_rate": 0.001, "loss": 2.1837, "step": 485500 }, { "epoch": 62.77957336780866, "grad_norm": 1.5767807960510254, "learning_rate": 0.001, "loss": 2.166, "step": 485600 }, { "epoch": 62.792501616031025, "grad_norm": 1.0971609354019165, "learning_rate": 0.001, "loss": 2.1674, "step": 485700 }, { "epoch": 62.80542986425339, "grad_norm": 1.1352412700653076, "learning_rate": 0.001, "loss": 2.1676, "step": 485800 }, { "epoch": 62.81835811247576, "grad_norm": 1.262736201286316, "learning_rate": 0.001, "loss": 2.1891, "step": 485900 }, { "epoch": 62.83128636069812, "grad_norm": 1.153200387954712, "learning_rate": 0.001, "loss": 2.1528, "step": 486000 }, { "epoch": 62.84421460892049, "grad_norm": 1.5272530317306519, "learning_rate": 0.001, "loss": 2.1781, "step": 486100 }, { "epoch": 62.857142857142854, "grad_norm": 1.026496410369873, "learning_rate": 0.001, "loss": 2.1564, "step": 486200 }, { "epoch": 62.87007110536522, "grad_norm": 1.0361328125, "learning_rate": 0.001, "loss": 2.1818, "step": 486300 }, { "epoch": 62.882999353587586, "grad_norm": 0.9932563304901123, "learning_rate": 0.001, "loss": 2.1858, "step": 486400 }, { "epoch": 62.89592760180995, "grad_norm": 0.96904057264328, "learning_rate": 0.001, "loss": 2.1926, "step": 486500 }, { "epoch": 62.90885585003232, "grad_norm": 1.0521634817123413, "learning_rate": 0.001, "loss": 2.188, "step": 486600 }, { "epoch": 62.92178409825468, "grad_norm": 0.8217435479164124, "learning_rate": 0.001, "loss": 2.185, "step": 486700 }, { "epoch": 62.93471234647705, "grad_norm": 1.0606657266616821, "learning_rate": 0.001, "loss": 2.1808, "step": 486800 }, { "epoch": 62.947640594699415, "grad_norm": 1.177857756614685, "learning_rate": 0.001, "loss": 2.2009, "step": 486900 }, { "epoch": 62.96056884292178, "grad_norm": 0.7803717851638794, "learning_rate": 0.001, "loss": 2.1734, "step": 487000 }, { "epoch": 62.97349709114415, "grad_norm": 2.8881165981292725, "learning_rate": 0.001, "loss": 2.1801, "step": 487100 }, { "epoch": 62.98642533936652, "grad_norm": 0.7895533442497253, "learning_rate": 0.001, "loss": 2.1922, "step": 487200 }, { "epoch": 62.999353587588885, "grad_norm": 3.1655328273773193, "learning_rate": 0.001, "loss": 2.1756, "step": 487300 }, { "epoch": 63.01228183581125, "grad_norm": 1.4609891176223755, "learning_rate": 0.001, "loss": 2.1001, "step": 487400 }, { "epoch": 63.02521008403362, "grad_norm": 1.0560851097106934, "learning_rate": 0.001, "loss": 2.1211, "step": 487500 }, { "epoch": 63.03813833225598, "grad_norm": 1.2918167114257812, "learning_rate": 0.001, "loss": 2.1029, "step": 487600 }, { "epoch": 63.05106658047835, "grad_norm": 1.081490397453308, "learning_rate": 0.001, "loss": 2.0962, "step": 487700 }, { "epoch": 63.063994828700714, "grad_norm": 1.660591959953308, "learning_rate": 0.001, "loss": 2.1031, "step": 487800 }, { "epoch": 63.07692307692308, "grad_norm": 1.911940097808838, "learning_rate": 0.001, "loss": 2.113, "step": 487900 }, { "epoch": 63.089851325145446, "grad_norm": 1.0819648504257202, "learning_rate": 0.001, "loss": 2.1206, "step": 488000 }, { "epoch": 63.10277957336781, "grad_norm": 0.9212496280670166, "learning_rate": 0.001, "loss": 2.1113, "step": 488100 }, { "epoch": 63.11570782159018, "grad_norm": 1.4237114191055298, "learning_rate": 0.001, "loss": 2.0954, "step": 488200 }, { "epoch": 63.12863606981254, "grad_norm": 0.8887714743614197, "learning_rate": 0.001, "loss": 2.1303, "step": 488300 }, { "epoch": 63.14156431803491, "grad_norm": 1.4799532890319824, "learning_rate": 0.001, "loss": 2.1359, "step": 488400 }, { "epoch": 63.154492566257275, "grad_norm": 1.7796552181243896, "learning_rate": 0.001, "loss": 2.1114, "step": 488500 }, { "epoch": 63.16742081447964, "grad_norm": 1.0640257596969604, "learning_rate": 0.001, "loss": 2.1182, "step": 488600 }, { "epoch": 63.18034906270201, "grad_norm": 1.1090031862258911, "learning_rate": 0.001, "loss": 2.1192, "step": 488700 }, { "epoch": 63.19327731092437, "grad_norm": 1.0605090856552124, "learning_rate": 0.001, "loss": 2.0981, "step": 488800 }, { "epoch": 63.20620555914674, "grad_norm": 1.4716180562973022, "learning_rate": 0.001, "loss": 2.1234, "step": 488900 }, { "epoch": 63.219133807369104, "grad_norm": 1.7617136240005493, "learning_rate": 0.001, "loss": 2.1103, "step": 489000 }, { "epoch": 63.23206205559147, "grad_norm": 1.0817162990570068, "learning_rate": 0.001, "loss": 2.1427, "step": 489100 }, { "epoch": 63.244990303813836, "grad_norm": 1.4206292629241943, "learning_rate": 0.001, "loss": 2.1366, "step": 489200 }, { "epoch": 63.2579185520362, "grad_norm": 1.6966333389282227, "learning_rate": 0.001, "loss": 2.1275, "step": 489300 }, { "epoch": 63.27084680025857, "grad_norm": 1.3027029037475586, "learning_rate": 0.001, "loss": 2.1365, "step": 489400 }, { "epoch": 63.28377504848093, "grad_norm": 1.0493834018707275, "learning_rate": 0.001, "loss": 2.1415, "step": 489500 }, { "epoch": 63.2967032967033, "grad_norm": 1.0616586208343506, "learning_rate": 0.001, "loss": 2.1252, "step": 489600 }, { "epoch": 63.309631544925665, "grad_norm": 1.2692185640335083, "learning_rate": 0.001, "loss": 2.1477, "step": 489700 }, { "epoch": 63.32255979314803, "grad_norm": 1.06673002243042, "learning_rate": 0.001, "loss": 2.124, "step": 489800 }, { "epoch": 63.3354880413704, "grad_norm": 1.2891734838485718, "learning_rate": 0.001, "loss": 2.1215, "step": 489900 }, { "epoch": 63.34841628959276, "grad_norm": 1.2280933856964111, "learning_rate": 0.001, "loss": 2.1458, "step": 490000 }, { "epoch": 63.36134453781513, "grad_norm": 1.2860995531082153, "learning_rate": 0.001, "loss": 2.1253, "step": 490100 }, { "epoch": 63.374272786037494, "grad_norm": 1.5536692142486572, "learning_rate": 0.001, "loss": 2.1421, "step": 490200 }, { "epoch": 63.38720103425986, "grad_norm": 2.6151890754699707, "learning_rate": 0.001, "loss": 2.153, "step": 490300 }, { "epoch": 63.400129282482226, "grad_norm": 2.2492475509643555, "learning_rate": 0.001, "loss": 2.1395, "step": 490400 }, { "epoch": 63.41305753070459, "grad_norm": 5.158632755279541, "learning_rate": 0.001, "loss": 2.1293, "step": 490500 }, { "epoch": 63.42598577892696, "grad_norm": 1.2065292596817017, "learning_rate": 0.001, "loss": 2.1341, "step": 490600 }, { "epoch": 63.43891402714932, "grad_norm": 1.1457624435424805, "learning_rate": 0.001, "loss": 2.1419, "step": 490700 }, { "epoch": 63.45184227537169, "grad_norm": 1.3156230449676514, "learning_rate": 0.001, "loss": 2.139, "step": 490800 }, { "epoch": 63.464770523594055, "grad_norm": 2.8472652435302734, "learning_rate": 0.001, "loss": 2.143, "step": 490900 }, { "epoch": 63.47769877181642, "grad_norm": 1.5906908512115479, "learning_rate": 0.001, "loss": 2.1315, "step": 491000 }, { "epoch": 63.49062702003879, "grad_norm": 1.3240773677825928, "learning_rate": 0.001, "loss": 2.1373, "step": 491100 }, { "epoch": 63.50355526826115, "grad_norm": 1.4787650108337402, "learning_rate": 0.001, "loss": 2.1512, "step": 491200 }, { "epoch": 63.51648351648352, "grad_norm": 0.9813044667243958, "learning_rate": 0.001, "loss": 2.1602, "step": 491300 }, { "epoch": 63.529411764705884, "grad_norm": 1.0598618984222412, "learning_rate": 0.001, "loss": 2.1508, "step": 491400 }, { "epoch": 63.54234001292825, "grad_norm": 1.858522891998291, "learning_rate": 0.001, "loss": 2.1589, "step": 491500 }, { "epoch": 63.555268261150616, "grad_norm": 1.315252423286438, "learning_rate": 0.001, "loss": 2.165, "step": 491600 }, { "epoch": 63.56819650937298, "grad_norm": 5.312290191650391, "learning_rate": 0.001, "loss": 2.16, "step": 491700 }, { "epoch": 63.58112475759535, "grad_norm": 1.1681747436523438, "learning_rate": 0.001, "loss": 2.1596, "step": 491800 }, { "epoch": 63.59405300581771, "grad_norm": 6.987508773803711, "learning_rate": 0.001, "loss": 2.1521, "step": 491900 }, { "epoch": 63.60698125404008, "grad_norm": 1.8543871641159058, "learning_rate": 0.001, "loss": 2.1777, "step": 492000 }, { "epoch": 63.619909502262445, "grad_norm": 1.236051321029663, "learning_rate": 0.001, "loss": 2.1643, "step": 492100 }, { "epoch": 63.63283775048481, "grad_norm": 22.19891929626465, "learning_rate": 0.001, "loss": 2.1503, "step": 492200 }, { "epoch": 63.645765998707176, "grad_norm": 1.2369941473007202, "learning_rate": 0.001, "loss": 2.1714, "step": 492300 }, { "epoch": 63.65869424692954, "grad_norm": 1.0803842544555664, "learning_rate": 0.001, "loss": 2.1749, "step": 492400 }, { "epoch": 63.67162249515191, "grad_norm": 1.2363331317901611, "learning_rate": 0.001, "loss": 2.1752, "step": 492500 }, { "epoch": 63.684550743374274, "grad_norm": 1.0606552362442017, "learning_rate": 0.001, "loss": 2.1629, "step": 492600 }, { "epoch": 63.69747899159664, "grad_norm": 1.4235572814941406, "learning_rate": 0.001, "loss": 2.1597, "step": 492700 }, { "epoch": 63.710407239819006, "grad_norm": 3.3864896297454834, "learning_rate": 0.001, "loss": 2.1693, "step": 492800 }, { "epoch": 63.72333548804137, "grad_norm": 1.1495232582092285, "learning_rate": 0.001, "loss": 2.1651, "step": 492900 }, { "epoch": 63.73626373626374, "grad_norm": 1.331545352935791, "learning_rate": 0.001, "loss": 2.1562, "step": 493000 }, { "epoch": 63.7491919844861, "grad_norm": 1.4809985160827637, "learning_rate": 0.001, "loss": 2.1442, "step": 493100 }, { "epoch": 63.76212023270847, "grad_norm": 1.3718547821044922, "learning_rate": 0.001, "loss": 2.1668, "step": 493200 }, { "epoch": 63.775048480930835, "grad_norm": 3.810098171234131, "learning_rate": 0.001, "loss": 2.1726, "step": 493300 }, { "epoch": 63.7879767291532, "grad_norm": 1.5934162139892578, "learning_rate": 0.001, "loss": 2.1673, "step": 493400 }, { "epoch": 63.800904977375566, "grad_norm": 1.2835235595703125, "learning_rate": 0.001, "loss": 2.1609, "step": 493500 }, { "epoch": 63.81383322559793, "grad_norm": 1.1645616292953491, "learning_rate": 0.001, "loss": 2.169, "step": 493600 }, { "epoch": 63.8267614738203, "grad_norm": 1.6774332523345947, "learning_rate": 0.001, "loss": 2.1829, "step": 493700 }, { "epoch": 63.839689722042664, "grad_norm": 1.4269920587539673, "learning_rate": 0.001, "loss": 2.1783, "step": 493800 }, { "epoch": 63.85261797026503, "grad_norm": 1.4013171195983887, "learning_rate": 0.001, "loss": 2.1776, "step": 493900 }, { "epoch": 63.865546218487395, "grad_norm": 3.8351051807403564, "learning_rate": 0.001, "loss": 2.1978, "step": 494000 }, { "epoch": 63.87847446670976, "grad_norm": 1.2001538276672363, "learning_rate": 0.001, "loss": 2.1713, "step": 494100 }, { "epoch": 63.89140271493213, "grad_norm": 1.3282079696655273, "learning_rate": 0.001, "loss": 2.1733, "step": 494200 }, { "epoch": 63.90433096315449, "grad_norm": 2.4722633361816406, "learning_rate": 0.001, "loss": 2.172, "step": 494300 }, { "epoch": 63.91725921137686, "grad_norm": 1.4418742656707764, "learning_rate": 0.001, "loss": 2.1911, "step": 494400 }, { "epoch": 63.930187459599225, "grad_norm": 3.498851776123047, "learning_rate": 0.001, "loss": 2.1734, "step": 494500 }, { "epoch": 63.94311570782159, "grad_norm": 1.3219964504241943, "learning_rate": 0.001, "loss": 2.1689, "step": 494600 }, { "epoch": 63.956043956043956, "grad_norm": 1.186608910560608, "learning_rate": 0.001, "loss": 2.1857, "step": 494700 }, { "epoch": 63.96897220426632, "grad_norm": 2.6743252277374268, "learning_rate": 0.001, "loss": 2.1846, "step": 494800 }, { "epoch": 63.98190045248869, "grad_norm": 1.0909504890441895, "learning_rate": 0.001, "loss": 2.1878, "step": 494900 }, { "epoch": 63.994828700711054, "grad_norm": 1.0557948350906372, "learning_rate": 0.001, "loss": 2.1857, "step": 495000 }, { "epoch": 64.00775694893342, "grad_norm": 1.0860263109207153, "learning_rate": 0.001, "loss": 2.1103, "step": 495100 }, { "epoch": 64.02068519715579, "grad_norm": 1.102678894996643, "learning_rate": 0.001, "loss": 2.1042, "step": 495200 }, { "epoch": 64.03361344537815, "grad_norm": 0.9086452126502991, "learning_rate": 0.001, "loss": 2.0934, "step": 495300 }, { "epoch": 64.04654169360052, "grad_norm": 1.8125101327896118, "learning_rate": 0.001, "loss": 2.0856, "step": 495400 }, { "epoch": 64.05946994182288, "grad_norm": 0.9156757593154907, "learning_rate": 0.001, "loss": 2.1233, "step": 495500 }, { "epoch": 64.07239819004525, "grad_norm": 1.10150146484375, "learning_rate": 0.001, "loss": 2.0967, "step": 495600 }, { "epoch": 64.08532643826761, "grad_norm": 0.9474294185638428, "learning_rate": 0.001, "loss": 2.1034, "step": 495700 }, { "epoch": 64.09825468648998, "grad_norm": 1.069403052330017, "learning_rate": 0.001, "loss": 2.1129, "step": 495800 }, { "epoch": 64.11118293471235, "grad_norm": 0.8817936182022095, "learning_rate": 0.001, "loss": 2.1085, "step": 495900 }, { "epoch": 64.12411118293471, "grad_norm": 1.0874667167663574, "learning_rate": 0.001, "loss": 2.1147, "step": 496000 }, { "epoch": 64.13703943115708, "grad_norm": 1.428000569343567, "learning_rate": 0.001, "loss": 2.1398, "step": 496100 }, { "epoch": 64.14996767937944, "grad_norm": 1.0802658796310425, "learning_rate": 0.001, "loss": 2.1328, "step": 496200 }, { "epoch": 64.16289592760181, "grad_norm": 1.2602989673614502, "learning_rate": 0.001, "loss": 2.117, "step": 496300 }, { "epoch": 64.17582417582418, "grad_norm": 0.8141772747039795, "learning_rate": 0.001, "loss": 2.1275, "step": 496400 }, { "epoch": 64.18875242404654, "grad_norm": 1.0182170867919922, "learning_rate": 0.001, "loss": 2.1204, "step": 496500 }, { "epoch": 64.2016806722689, "grad_norm": 1.289367914199829, "learning_rate": 0.001, "loss": 2.1264, "step": 496600 }, { "epoch": 64.21460892049127, "grad_norm": 0.9652644991874695, "learning_rate": 0.001, "loss": 2.1235, "step": 496700 }, { "epoch": 64.22753716871364, "grad_norm": 0.9769936800003052, "learning_rate": 0.001, "loss": 2.1462, "step": 496800 }, { "epoch": 64.240465416936, "grad_norm": 0.9752294421195984, "learning_rate": 0.001, "loss": 2.1136, "step": 496900 }, { "epoch": 64.25339366515837, "grad_norm": 0.9727685451507568, "learning_rate": 0.001, "loss": 2.1212, "step": 497000 }, { "epoch": 64.26632191338074, "grad_norm": 0.979823887348175, "learning_rate": 0.001, "loss": 2.1302, "step": 497100 }, { "epoch": 64.2792501616031, "grad_norm": 1.1813931465148926, "learning_rate": 0.001, "loss": 2.1244, "step": 497200 }, { "epoch": 64.29217840982547, "grad_norm": 1.6104722023010254, "learning_rate": 0.001, "loss": 2.1295, "step": 497300 }, { "epoch": 64.30510665804783, "grad_norm": 1.0917552709579468, "learning_rate": 0.001, "loss": 2.1307, "step": 497400 }, { "epoch": 64.3180349062702, "grad_norm": 1.2001415491104126, "learning_rate": 0.001, "loss": 2.149, "step": 497500 }, { "epoch": 64.33096315449257, "grad_norm": 0.9153510928153992, "learning_rate": 0.001, "loss": 2.1249, "step": 497600 }, { "epoch": 64.34389140271493, "grad_norm": 1.0339146852493286, "learning_rate": 0.001, "loss": 2.1414, "step": 497700 }, { "epoch": 64.3568196509373, "grad_norm": 1.42917799949646, "learning_rate": 0.001, "loss": 2.1478, "step": 497800 }, { "epoch": 64.36974789915966, "grad_norm": 1.8682153224945068, "learning_rate": 0.001, "loss": 2.141, "step": 497900 }, { "epoch": 64.38267614738203, "grad_norm": 1.0696476697921753, "learning_rate": 0.001, "loss": 2.1493, "step": 498000 }, { "epoch": 64.3956043956044, "grad_norm": 1.4817161560058594, "learning_rate": 0.001, "loss": 2.1449, "step": 498100 }, { "epoch": 64.40853264382676, "grad_norm": 1.5553139448165894, "learning_rate": 0.001, "loss": 2.1405, "step": 498200 }, { "epoch": 64.42146089204913, "grad_norm": 1.1807515621185303, "learning_rate": 0.001, "loss": 2.1586, "step": 498300 }, { "epoch": 64.43438914027149, "grad_norm": 0.9426583051681519, "learning_rate": 0.001, "loss": 2.1406, "step": 498400 }, { "epoch": 64.44731738849386, "grad_norm": 1.1412732601165771, "learning_rate": 0.001, "loss": 2.1539, "step": 498500 }, { "epoch": 64.46024563671622, "grad_norm": 1.5366867780685425, "learning_rate": 0.001, "loss": 2.1445, "step": 498600 }, { "epoch": 64.47317388493859, "grad_norm": 0.8618477582931519, "learning_rate": 0.001, "loss": 2.1374, "step": 498700 }, { "epoch": 64.48610213316095, "grad_norm": 0.9749851822853088, "learning_rate": 0.001, "loss": 2.1537, "step": 498800 }, { "epoch": 64.49903038138332, "grad_norm": 1.8299181461334229, "learning_rate": 0.001, "loss": 2.1344, "step": 498900 }, { "epoch": 64.51195862960569, "grad_norm": 1.384885549545288, "learning_rate": 0.001, "loss": 2.1484, "step": 499000 }, { "epoch": 64.52488687782805, "grad_norm": 0.8061456084251404, "learning_rate": 0.001, "loss": 2.1465, "step": 499100 }, { "epoch": 64.53781512605042, "grad_norm": 1.1810606718063354, "learning_rate": 0.001, "loss": 2.1613, "step": 499200 }, { "epoch": 64.55074337427278, "grad_norm": 1.0545631647109985, "learning_rate": 0.001, "loss": 2.1352, "step": 499300 }, { "epoch": 64.56367162249515, "grad_norm": 1.0789546966552734, "learning_rate": 0.001, "loss": 2.1649, "step": 499400 }, { "epoch": 64.57659987071752, "grad_norm": 9.110381126403809, "learning_rate": 0.001, "loss": 2.1397, "step": 499500 }, { "epoch": 64.58952811893988, "grad_norm": 0.9692444205284119, "learning_rate": 0.001, "loss": 2.1397, "step": 499600 }, { "epoch": 64.60245636716225, "grad_norm": 1.0037554502487183, "learning_rate": 0.001, "loss": 2.1424, "step": 499700 }, { "epoch": 64.61538461538461, "grad_norm": 1.1804907321929932, "learning_rate": 0.001, "loss": 2.1525, "step": 499800 }, { "epoch": 64.62831286360698, "grad_norm": 1.0305958986282349, "learning_rate": 0.001, "loss": 2.1456, "step": 499900 }, { "epoch": 64.64124111182934, "grad_norm": 0.9457415342330933, "learning_rate": 0.001, "loss": 2.1657, "step": 500000 }, { "epoch": 64.65416936005171, "grad_norm": 1.1077866554260254, "learning_rate": 0.001, "loss": 2.156, "step": 500100 }, { "epoch": 64.66709760827408, "grad_norm": 0.8181153535842896, "learning_rate": 0.001, "loss": 2.1731, "step": 500200 }, { "epoch": 64.68002585649644, "grad_norm": 1.0027929544448853, "learning_rate": 0.001, "loss": 2.1647, "step": 500300 }, { "epoch": 64.69295410471881, "grad_norm": 0.8067225813865662, "learning_rate": 0.001, "loss": 2.15, "step": 500400 }, { "epoch": 64.70588235294117, "grad_norm": 0.8578668832778931, "learning_rate": 0.001, "loss": 2.1385, "step": 500500 }, { "epoch": 64.71881060116354, "grad_norm": 0.7847951054573059, "learning_rate": 0.001, "loss": 2.1587, "step": 500600 }, { "epoch": 64.7317388493859, "grad_norm": 0.8755277991294861, "learning_rate": 0.001, "loss": 2.1521, "step": 500700 }, { "epoch": 64.74466709760827, "grad_norm": 1.3025262355804443, "learning_rate": 0.001, "loss": 2.1613, "step": 500800 }, { "epoch": 64.75759534583064, "grad_norm": 0.9726753234863281, "learning_rate": 0.001, "loss": 2.1581, "step": 500900 }, { "epoch": 64.770523594053, "grad_norm": 1.510306477546692, "learning_rate": 0.001, "loss": 2.1755, "step": 501000 }, { "epoch": 64.78345184227537, "grad_norm": 0.8482311964035034, "learning_rate": 0.001, "loss": 2.1936, "step": 501100 }, { "epoch": 64.79638009049773, "grad_norm": 0.9081204533576965, "learning_rate": 0.001, "loss": 2.1605, "step": 501200 }, { "epoch": 64.8093083387201, "grad_norm": 0.8564449548721313, "learning_rate": 0.001, "loss": 2.177, "step": 501300 }, { "epoch": 64.82223658694247, "grad_norm": 1.0797311067581177, "learning_rate": 0.001, "loss": 2.1754, "step": 501400 }, { "epoch": 64.83516483516483, "grad_norm": 1.4759374856948853, "learning_rate": 0.001, "loss": 2.1742, "step": 501500 }, { "epoch": 64.8480930833872, "grad_norm": 0.9029476642608643, "learning_rate": 0.001, "loss": 2.181, "step": 501600 }, { "epoch": 64.86102133160956, "grad_norm": 1.7536834478378296, "learning_rate": 0.001, "loss": 2.1771, "step": 501700 }, { "epoch": 64.87394957983193, "grad_norm": 1.002232551574707, "learning_rate": 0.001, "loss": 2.1644, "step": 501800 }, { "epoch": 64.8868778280543, "grad_norm": 0.8956558704376221, "learning_rate": 0.001, "loss": 2.1675, "step": 501900 }, { "epoch": 64.89980607627666, "grad_norm": 5.990950584411621, "learning_rate": 0.001, "loss": 2.1684, "step": 502000 }, { "epoch": 64.91273432449903, "grad_norm": 1.1545195579528809, "learning_rate": 0.001, "loss": 2.1773, "step": 502100 }, { "epoch": 64.9256625727214, "grad_norm": 7.412445068359375, "learning_rate": 0.001, "loss": 2.1828, "step": 502200 }, { "epoch": 64.93859082094376, "grad_norm": 1.1584216356277466, "learning_rate": 0.001, "loss": 2.1758, "step": 502300 }, { "epoch": 64.95151906916612, "grad_norm": 1.6759170293807983, "learning_rate": 0.001, "loss": 2.168, "step": 502400 }, { "epoch": 64.96444731738849, "grad_norm": 1.4884073734283447, "learning_rate": 0.001, "loss": 2.1639, "step": 502500 }, { "epoch": 64.97737556561086, "grad_norm": 1.5717328786849976, "learning_rate": 0.001, "loss": 2.2005, "step": 502600 }, { "epoch": 64.99030381383322, "grad_norm": 0.9269816279411316, "learning_rate": 0.001, "loss": 2.1843, "step": 502700 }, { "epoch": 65.00323206205559, "grad_norm": 1.049461007118225, "learning_rate": 0.001, "loss": 2.1773, "step": 502800 }, { "epoch": 65.01616031027795, "grad_norm": 1.4980180263519287, "learning_rate": 0.001, "loss": 2.0768, "step": 502900 }, { "epoch": 65.02908855850032, "grad_norm": 1.2419462203979492, "learning_rate": 0.001, "loss": 2.0854, "step": 503000 }, { "epoch": 65.04201680672269, "grad_norm": 0.8387981653213501, "learning_rate": 0.001, "loss": 2.0875, "step": 503100 }, { "epoch": 65.05494505494505, "grad_norm": 2.677069902420044, "learning_rate": 0.001, "loss": 2.1053, "step": 503200 }, { "epoch": 65.06787330316742, "grad_norm": 0.8353313207626343, "learning_rate": 0.001, "loss": 2.1121, "step": 503300 }, { "epoch": 65.08080155138978, "grad_norm": 1.3546314239501953, "learning_rate": 0.001, "loss": 2.1191, "step": 503400 }, { "epoch": 65.09372979961215, "grad_norm": 1.01200270652771, "learning_rate": 0.001, "loss": 2.1034, "step": 503500 }, { "epoch": 65.10665804783451, "grad_norm": 1.2187881469726562, "learning_rate": 0.001, "loss": 2.1108, "step": 503600 }, { "epoch": 65.11958629605688, "grad_norm": 0.8558415770530701, "learning_rate": 0.001, "loss": 2.0993, "step": 503700 }, { "epoch": 65.13251454427925, "grad_norm": 2.2153775691986084, "learning_rate": 0.001, "loss": 2.1127, "step": 503800 }, { "epoch": 65.14544279250161, "grad_norm": 1.3353904485702515, "learning_rate": 0.001, "loss": 2.1087, "step": 503900 }, { "epoch": 65.15837104072398, "grad_norm": 0.9533994197845459, "learning_rate": 0.001, "loss": 2.1215, "step": 504000 }, { "epoch": 65.17129928894634, "grad_norm": 1.0111676454544067, "learning_rate": 0.001, "loss": 2.1319, "step": 504100 }, { "epoch": 65.18422753716871, "grad_norm": 1.293208360671997, "learning_rate": 0.001, "loss": 2.1045, "step": 504200 }, { "epoch": 65.19715578539108, "grad_norm": 1.4481594562530518, "learning_rate": 0.001, "loss": 2.0923, "step": 504300 }, { "epoch": 65.21008403361344, "grad_norm": 0.9477838277816772, "learning_rate": 0.001, "loss": 2.1336, "step": 504400 }, { "epoch": 65.2230122818358, "grad_norm": 0.8452276587486267, "learning_rate": 0.001, "loss": 2.1131, "step": 504500 }, { "epoch": 65.23594053005817, "grad_norm": 1.1350725889205933, "learning_rate": 0.001, "loss": 2.1306, "step": 504600 }, { "epoch": 65.24886877828054, "grad_norm": 2.7759103775024414, "learning_rate": 0.001, "loss": 2.113, "step": 504700 }, { "epoch": 65.2617970265029, "grad_norm": 0.9894657135009766, "learning_rate": 0.001, "loss": 2.1366, "step": 504800 }, { "epoch": 65.27472527472527, "grad_norm": 0.84378981590271, "learning_rate": 0.001, "loss": 2.1143, "step": 504900 }, { "epoch": 65.28765352294764, "grad_norm": 0.8866474628448486, "learning_rate": 0.001, "loss": 2.1026, "step": 505000 }, { "epoch": 65.30058177117, "grad_norm": 1.3528802394866943, "learning_rate": 0.001, "loss": 2.1386, "step": 505100 }, { "epoch": 65.31351001939237, "grad_norm": 0.8628848195075989, "learning_rate": 0.001, "loss": 2.1427, "step": 505200 }, { "epoch": 65.32643826761473, "grad_norm": 1.4119272232055664, "learning_rate": 0.001, "loss": 2.1246, "step": 505300 }, { "epoch": 65.3393665158371, "grad_norm": 1.6158559322357178, "learning_rate": 0.001, "loss": 2.1339, "step": 505400 }, { "epoch": 65.35229476405947, "grad_norm": 1.0430717468261719, "learning_rate": 0.001, "loss": 2.1398, "step": 505500 }, { "epoch": 65.36522301228183, "grad_norm": 1.4142084121704102, "learning_rate": 0.001, "loss": 2.1177, "step": 505600 }, { "epoch": 65.3781512605042, "grad_norm": 0.9050608277320862, "learning_rate": 0.001, "loss": 2.1386, "step": 505700 }, { "epoch": 65.39107950872656, "grad_norm": 1.4950813055038452, "learning_rate": 0.001, "loss": 2.1392, "step": 505800 }, { "epoch": 65.40400775694893, "grad_norm": 1.7998664379119873, "learning_rate": 0.001, "loss": 2.1348, "step": 505900 }, { "epoch": 65.4169360051713, "grad_norm": 1.0981365442276, "learning_rate": 0.001, "loss": 2.1414, "step": 506000 }, { "epoch": 65.42986425339366, "grad_norm": 1.0595357418060303, "learning_rate": 0.001, "loss": 2.1389, "step": 506100 }, { "epoch": 65.44279250161603, "grad_norm": 1.2161972522735596, "learning_rate": 0.001, "loss": 2.1415, "step": 506200 }, { "epoch": 65.45572074983839, "grad_norm": 0.8837611675262451, "learning_rate": 0.001, "loss": 2.1449, "step": 506300 }, { "epoch": 65.46864899806076, "grad_norm": 0.8627933263778687, "learning_rate": 0.001, "loss": 2.1499, "step": 506400 }, { "epoch": 65.48157724628312, "grad_norm": 0.9719666242599487, "learning_rate": 0.001, "loss": 2.1492, "step": 506500 }, { "epoch": 65.49450549450549, "grad_norm": 1.1416022777557373, "learning_rate": 0.001, "loss": 2.1474, "step": 506600 }, { "epoch": 65.50743374272786, "grad_norm": 0.8179353475570679, "learning_rate": 0.001, "loss": 2.1659, "step": 506700 }, { "epoch": 65.52036199095022, "grad_norm": 1.171409010887146, "learning_rate": 0.001, "loss": 2.1284, "step": 506800 }, { "epoch": 65.53329023917259, "grad_norm": 1.0289839506149292, "learning_rate": 0.001, "loss": 2.1403, "step": 506900 }, { "epoch": 65.54621848739495, "grad_norm": 0.912223756313324, "learning_rate": 0.001, "loss": 2.1457, "step": 507000 }, { "epoch": 65.55914673561732, "grad_norm": 1.2380478382110596, "learning_rate": 0.001, "loss": 2.1564, "step": 507100 }, { "epoch": 65.57207498383968, "grad_norm": 1.0279666185379028, "learning_rate": 0.001, "loss": 2.1528, "step": 507200 }, { "epoch": 65.58500323206205, "grad_norm": 1.3040512800216675, "learning_rate": 0.001, "loss": 2.155, "step": 507300 }, { "epoch": 65.59793148028442, "grad_norm": 1.0261937379837036, "learning_rate": 0.001, "loss": 2.1654, "step": 507400 }, { "epoch": 65.61085972850678, "grad_norm": 0.9564575552940369, "learning_rate": 0.001, "loss": 2.1694, "step": 507500 }, { "epoch": 65.62378797672915, "grad_norm": 0.928525447845459, "learning_rate": 0.001, "loss": 2.1655, "step": 507600 }, { "epoch": 65.63671622495151, "grad_norm": 1.069089412689209, "learning_rate": 0.001, "loss": 2.1385, "step": 507700 }, { "epoch": 65.64964447317388, "grad_norm": 1.2648065090179443, "learning_rate": 0.001, "loss": 2.146, "step": 507800 }, { "epoch": 65.66257272139624, "grad_norm": 1.6723445653915405, "learning_rate": 0.001, "loss": 2.1514, "step": 507900 }, { "epoch": 65.67550096961861, "grad_norm": 1.3938406705856323, "learning_rate": 0.001, "loss": 2.1523, "step": 508000 }, { "epoch": 65.68842921784098, "grad_norm": 0.8780449032783508, "learning_rate": 0.001, "loss": 2.1612, "step": 508100 }, { "epoch": 65.70135746606334, "grad_norm": 1.3227163553237915, "learning_rate": 0.001, "loss": 2.1478, "step": 508200 }, { "epoch": 65.71428571428571, "grad_norm": 1.053831934928894, "learning_rate": 0.001, "loss": 2.1566, "step": 508300 }, { "epoch": 65.72721396250807, "grad_norm": 0.9225063920021057, "learning_rate": 0.001, "loss": 2.164, "step": 508400 }, { "epoch": 65.74014221073044, "grad_norm": 1.0866472721099854, "learning_rate": 0.001, "loss": 2.1685, "step": 508500 }, { "epoch": 65.7530704589528, "grad_norm": 0.715441107749939, "learning_rate": 0.001, "loss": 2.1673, "step": 508600 }, { "epoch": 65.76599870717517, "grad_norm": 0.8322176933288574, "learning_rate": 0.001, "loss": 2.1639, "step": 508700 }, { "epoch": 65.77892695539754, "grad_norm": 3.175175905227661, "learning_rate": 0.001, "loss": 2.1565, "step": 508800 }, { "epoch": 65.7918552036199, "grad_norm": 1.115622639656067, "learning_rate": 0.001, "loss": 2.1673, "step": 508900 }, { "epoch": 65.80478345184227, "grad_norm": 1.0503801107406616, "learning_rate": 0.001, "loss": 2.1622, "step": 509000 }, { "epoch": 65.81771170006463, "grad_norm": 40.00404739379883, "learning_rate": 0.001, "loss": 2.1744, "step": 509100 }, { "epoch": 65.830639948287, "grad_norm": 1.0841084718704224, "learning_rate": 0.001, "loss": 2.1677, "step": 509200 }, { "epoch": 65.84356819650937, "grad_norm": 1.0541719198226929, "learning_rate": 0.001, "loss": 2.1745, "step": 509300 }, { "epoch": 65.85649644473173, "grad_norm": 0.9925304055213928, "learning_rate": 0.001, "loss": 2.1637, "step": 509400 }, { "epoch": 65.8694246929541, "grad_norm": 1.188831090927124, "learning_rate": 0.001, "loss": 2.1676, "step": 509500 }, { "epoch": 65.88235294117646, "grad_norm": 1.2130018472671509, "learning_rate": 0.001, "loss": 2.1788, "step": 509600 }, { "epoch": 65.89528118939883, "grad_norm": 13.572433471679688, "learning_rate": 0.001, "loss": 2.1652, "step": 509700 }, { "epoch": 65.9082094376212, "grad_norm": 1.0295345783233643, "learning_rate": 0.001, "loss": 2.1608, "step": 509800 }, { "epoch": 65.92113768584356, "grad_norm": 0.9893361330032349, "learning_rate": 0.001, "loss": 2.1785, "step": 509900 }, { "epoch": 65.93406593406593, "grad_norm": 1.2610067129135132, "learning_rate": 0.001, "loss": 2.1769, "step": 510000 }, { "epoch": 65.9469941822883, "grad_norm": 15.03819751739502, "learning_rate": 0.001, "loss": 2.1831, "step": 510100 }, { "epoch": 65.95992243051066, "grad_norm": 2.579874277114868, "learning_rate": 0.001, "loss": 2.1706, "step": 510200 }, { "epoch": 65.97285067873302, "grad_norm": 0.9525448083877563, "learning_rate": 0.001, "loss": 2.1992, "step": 510300 }, { "epoch": 65.98577892695539, "grad_norm": 1.0663384199142456, "learning_rate": 0.001, "loss": 2.1751, "step": 510400 }, { "epoch": 65.99870717517777, "grad_norm": 0.968645453453064, "learning_rate": 0.001, "loss": 2.1751, "step": 510500 }, { "epoch": 66.01163542340014, "grad_norm": 1.1172258853912354, "learning_rate": 0.001, "loss": 2.101, "step": 510600 }, { "epoch": 66.0245636716225, "grad_norm": 1.1196869611740112, "learning_rate": 0.001, "loss": 2.0797, "step": 510700 }, { "epoch": 66.03749191984487, "grad_norm": 1.2157915830612183, "learning_rate": 0.001, "loss": 2.079, "step": 510800 }, { "epoch": 66.05042016806723, "grad_norm": 7.257924556732178, "learning_rate": 0.001, "loss": 2.0927, "step": 510900 }, { "epoch": 66.0633484162896, "grad_norm": 1.3105138540267944, "learning_rate": 0.001, "loss": 2.0834, "step": 511000 }, { "epoch": 66.07627666451197, "grad_norm": 0.9524511098861694, "learning_rate": 0.001, "loss": 2.0996, "step": 511100 }, { "epoch": 66.08920491273433, "grad_norm": 1.274863362312317, "learning_rate": 0.001, "loss": 2.1139, "step": 511200 }, { "epoch": 66.1021331609567, "grad_norm": 1.1103471517562866, "learning_rate": 0.001, "loss": 2.0743, "step": 511300 }, { "epoch": 66.11506140917906, "grad_norm": 1.7189340591430664, "learning_rate": 0.001, "loss": 2.1013, "step": 511400 }, { "epoch": 66.12798965740143, "grad_norm": 1.4276527166366577, "learning_rate": 0.001, "loss": 2.0959, "step": 511500 }, { "epoch": 66.1409179056238, "grad_norm": 2.465272903442383, "learning_rate": 0.001, "loss": 2.0936, "step": 511600 }, { "epoch": 66.15384615384616, "grad_norm": 1.9654605388641357, "learning_rate": 0.001, "loss": 2.1015, "step": 511700 }, { "epoch": 66.16677440206853, "grad_norm": 1.1486315727233887, "learning_rate": 0.001, "loss": 2.0994, "step": 511800 }, { "epoch": 66.17970265029089, "grad_norm": 1.7217870950698853, "learning_rate": 0.001, "loss": 2.127, "step": 511900 }, { "epoch": 66.19263089851326, "grad_norm": 1.6222127676010132, "learning_rate": 0.001, "loss": 2.1355, "step": 512000 }, { "epoch": 66.20555914673562, "grad_norm": 1.0303289890289307, "learning_rate": 0.001, "loss": 2.1137, "step": 512100 }, { "epoch": 66.21848739495799, "grad_norm": 1.0214381217956543, "learning_rate": 0.001, "loss": 2.1159, "step": 512200 }, { "epoch": 66.23141564318036, "grad_norm": 0.9089242815971375, "learning_rate": 0.001, "loss": 2.1153, "step": 512300 }, { "epoch": 66.24434389140272, "grad_norm": 1.2952841520309448, "learning_rate": 0.001, "loss": 2.1333, "step": 512400 }, { "epoch": 66.25727213962509, "grad_norm": 0.8853986263275146, "learning_rate": 0.001, "loss": 2.1078, "step": 512500 }, { "epoch": 66.27020038784745, "grad_norm": 1.2541275024414062, "learning_rate": 0.001, "loss": 2.1137, "step": 512600 }, { "epoch": 66.28312863606982, "grad_norm": 1.265755534172058, "learning_rate": 0.001, "loss": 2.1203, "step": 512700 }, { "epoch": 66.29605688429218, "grad_norm": 1.645658016204834, "learning_rate": 0.001, "loss": 2.1186, "step": 512800 }, { "epoch": 66.30898513251455, "grad_norm": 1.2047069072723389, "learning_rate": 0.001, "loss": 2.1143, "step": 512900 }, { "epoch": 66.32191338073692, "grad_norm": 1.1984120607376099, "learning_rate": 0.001, "loss": 2.115, "step": 513000 }, { "epoch": 66.33484162895928, "grad_norm": 1.4884905815124512, "learning_rate": 0.001, "loss": 2.137, "step": 513100 }, { "epoch": 66.34776987718165, "grad_norm": 2.199274778366089, "learning_rate": 0.001, "loss": 2.1308, "step": 513200 }, { "epoch": 66.36069812540401, "grad_norm": 1.2090686559677124, "learning_rate": 0.001, "loss": 2.1306, "step": 513300 }, { "epoch": 66.37362637362638, "grad_norm": 1.1913156509399414, "learning_rate": 0.001, "loss": 2.1323, "step": 513400 }, { "epoch": 66.38655462184875, "grad_norm": 0.9984092116355896, "learning_rate": 0.001, "loss": 2.1132, "step": 513500 }, { "epoch": 66.39948287007111, "grad_norm": 1.4106314182281494, "learning_rate": 0.001, "loss": 2.1143, "step": 513600 }, { "epoch": 66.41241111829348, "grad_norm": 3.9881505966186523, "learning_rate": 0.001, "loss": 2.1462, "step": 513700 }, { "epoch": 66.42533936651584, "grad_norm": 1.1080704927444458, "learning_rate": 0.001, "loss": 2.1433, "step": 513800 }, { "epoch": 66.43826761473821, "grad_norm": 1.1402912139892578, "learning_rate": 0.001, "loss": 2.1188, "step": 513900 }, { "epoch": 66.45119586296057, "grad_norm": 2.709717273712158, "learning_rate": 0.001, "loss": 2.1302, "step": 514000 }, { "epoch": 66.46412411118294, "grad_norm": 1.1796553134918213, "learning_rate": 0.001, "loss": 2.1315, "step": 514100 }, { "epoch": 66.4770523594053, "grad_norm": 0.9738767147064209, "learning_rate": 0.001, "loss": 2.1197, "step": 514200 }, { "epoch": 66.48998060762767, "grad_norm": 1.299966812133789, "learning_rate": 0.001, "loss": 2.134, "step": 514300 }, { "epoch": 66.50290885585004, "grad_norm": 1.0806903839111328, "learning_rate": 0.001, "loss": 2.145, "step": 514400 }, { "epoch": 66.5158371040724, "grad_norm": 0.8670704960823059, "learning_rate": 0.001, "loss": 2.1332, "step": 514500 }, { "epoch": 66.52876535229477, "grad_norm": 1.121796727180481, "learning_rate": 0.001, "loss": 2.1475, "step": 514600 }, { "epoch": 66.54169360051714, "grad_norm": 1.1634548902511597, "learning_rate": 0.001, "loss": 2.1343, "step": 514700 }, { "epoch": 66.5546218487395, "grad_norm": 1.2909685373306274, "learning_rate": 0.001, "loss": 2.151, "step": 514800 }, { "epoch": 66.56755009696187, "grad_norm": 1.0572798252105713, "learning_rate": 0.001, "loss": 2.135, "step": 514900 }, { "epoch": 66.58047834518423, "grad_norm": 1.0077481269836426, "learning_rate": 0.001, "loss": 2.1299, "step": 515000 }, { "epoch": 66.5934065934066, "grad_norm": 0.9200018048286438, "learning_rate": 0.001, "loss": 2.1478, "step": 515100 }, { "epoch": 66.60633484162896, "grad_norm": 1.2113378047943115, "learning_rate": 0.001, "loss": 2.1602, "step": 515200 }, { "epoch": 66.61926308985133, "grad_norm": 1.4789239168167114, "learning_rate": 0.001, "loss": 2.1493, "step": 515300 }, { "epoch": 66.6321913380737, "grad_norm": 0.9531114101409912, "learning_rate": 0.001, "loss": 2.1569, "step": 515400 }, { "epoch": 66.64511958629606, "grad_norm": 0.9633907079696655, "learning_rate": 0.001, "loss": 2.1481, "step": 515500 }, { "epoch": 66.65804783451843, "grad_norm": 1.1930961608886719, "learning_rate": 0.001, "loss": 2.1543, "step": 515600 }, { "epoch": 66.6709760827408, "grad_norm": 1.1066282987594604, "learning_rate": 0.001, "loss": 2.1495, "step": 515700 }, { "epoch": 66.68390433096316, "grad_norm": 1.4104359149932861, "learning_rate": 0.001, "loss": 2.1312, "step": 515800 }, { "epoch": 66.69683257918552, "grad_norm": 1.0533889532089233, "learning_rate": 0.001, "loss": 2.1754, "step": 515900 }, { "epoch": 66.70976082740789, "grad_norm": 0.9326623678207397, "learning_rate": 0.001, "loss": 2.1732, "step": 516000 }, { "epoch": 66.72268907563026, "grad_norm": 1.1964672803878784, "learning_rate": 0.001, "loss": 2.1645, "step": 516100 }, { "epoch": 66.73561732385262, "grad_norm": 1.102807879447937, "learning_rate": 0.001, "loss": 2.1641, "step": 516200 }, { "epoch": 66.74854557207499, "grad_norm": 5.759790420532227, "learning_rate": 0.001, "loss": 2.1664, "step": 516300 }, { "epoch": 66.76147382029735, "grad_norm": 0.8530991673469543, "learning_rate": 0.001, "loss": 2.1653, "step": 516400 }, { "epoch": 66.77440206851972, "grad_norm": 1.145770788192749, "learning_rate": 0.001, "loss": 2.1588, "step": 516500 }, { "epoch": 66.78733031674209, "grad_norm": 0.9357365369796753, "learning_rate": 0.001, "loss": 2.1441, "step": 516600 }, { "epoch": 66.80025856496445, "grad_norm": 1.0925170183181763, "learning_rate": 0.001, "loss": 2.168, "step": 516700 }, { "epoch": 66.81318681318682, "grad_norm": 1.5391554832458496, "learning_rate": 0.001, "loss": 2.1712, "step": 516800 }, { "epoch": 66.82611506140918, "grad_norm": 0.9378085136413574, "learning_rate": 0.001, "loss": 2.15, "step": 516900 }, { "epoch": 66.83904330963155, "grad_norm": 1.021998405456543, "learning_rate": 0.001, "loss": 2.1546, "step": 517000 }, { "epoch": 66.85197155785391, "grad_norm": 1.3818331956863403, "learning_rate": 0.001, "loss": 2.1593, "step": 517100 }, { "epoch": 66.86489980607628, "grad_norm": 1.844153881072998, "learning_rate": 0.001, "loss": 2.1645, "step": 517200 }, { "epoch": 66.87782805429865, "grad_norm": 1.2140203714370728, "learning_rate": 0.001, "loss": 2.1788, "step": 517300 }, { "epoch": 66.89075630252101, "grad_norm": 0.8991268873214722, "learning_rate": 0.001, "loss": 2.1679, "step": 517400 }, { "epoch": 66.90368455074338, "grad_norm": 0.9796055555343628, "learning_rate": 0.001, "loss": 2.1743, "step": 517500 }, { "epoch": 66.91661279896574, "grad_norm": 2.228085517883301, "learning_rate": 0.001, "loss": 2.1733, "step": 517600 }, { "epoch": 66.92954104718811, "grad_norm": 0.8897301554679871, "learning_rate": 0.001, "loss": 2.1646, "step": 517700 }, { "epoch": 66.94246929541048, "grad_norm": 1.0216008424758911, "learning_rate": 0.001, "loss": 2.1867, "step": 517800 }, { "epoch": 66.95539754363284, "grad_norm": 0.9828084707260132, "learning_rate": 0.001, "loss": 2.1636, "step": 517900 }, { "epoch": 66.96832579185521, "grad_norm": 1.4242910146713257, "learning_rate": 0.001, "loss": 2.1695, "step": 518000 }, { "epoch": 66.98125404007757, "grad_norm": 1.2517482042312622, "learning_rate": 0.001, "loss": 2.182, "step": 518100 }, { "epoch": 66.99418228829994, "grad_norm": 3.64599347114563, "learning_rate": 0.001, "loss": 2.1787, "step": 518200 }, { "epoch": 67.0071105365223, "grad_norm": 1.2676576375961304, "learning_rate": 0.001, "loss": 2.0811, "step": 518300 }, { "epoch": 67.02003878474467, "grad_norm": 1.180334448814392, "learning_rate": 0.001, "loss": 2.0935, "step": 518400 }, { "epoch": 67.03296703296704, "grad_norm": 1.2839010953903198, "learning_rate": 0.001, "loss": 2.0794, "step": 518500 }, { "epoch": 67.0458952811894, "grad_norm": 1.027044653892517, "learning_rate": 0.001, "loss": 2.0712, "step": 518600 }, { "epoch": 67.05882352941177, "grad_norm": 1.4632104635238647, "learning_rate": 0.001, "loss": 2.098, "step": 518700 }, { "epoch": 67.07175177763413, "grad_norm": 1.2246211767196655, "learning_rate": 0.001, "loss": 2.09, "step": 518800 }, { "epoch": 67.0846800258565, "grad_norm": 1.002480149269104, "learning_rate": 0.001, "loss": 2.1044, "step": 518900 }, { "epoch": 67.09760827407887, "grad_norm": 1.0198742151260376, "learning_rate": 0.001, "loss": 2.093, "step": 519000 }, { "epoch": 67.11053652230123, "grad_norm": 0.8626322746276855, "learning_rate": 0.001, "loss": 2.0948, "step": 519100 }, { "epoch": 67.1234647705236, "grad_norm": 0.9275432229042053, "learning_rate": 0.001, "loss": 2.1184, "step": 519200 }, { "epoch": 67.13639301874596, "grad_norm": 0.9068947434425354, "learning_rate": 0.001, "loss": 2.0986, "step": 519300 }, { "epoch": 67.14932126696833, "grad_norm": 1.431886076927185, "learning_rate": 0.001, "loss": 2.0886, "step": 519400 }, { "epoch": 67.1622495151907, "grad_norm": 1.2932208776474, "learning_rate": 0.001, "loss": 2.1037, "step": 519500 }, { "epoch": 67.17517776341306, "grad_norm": 1.0353007316589355, "learning_rate": 0.001, "loss": 2.1129, "step": 519600 }, { "epoch": 67.18810601163543, "grad_norm": 1.3163398504257202, "learning_rate": 0.001, "loss": 2.1024, "step": 519700 }, { "epoch": 67.20103425985779, "grad_norm": 1.3248438835144043, "learning_rate": 0.001, "loss": 2.0959, "step": 519800 }, { "epoch": 67.21396250808016, "grad_norm": 1.1773227453231812, "learning_rate": 0.001, "loss": 2.1022, "step": 519900 }, { "epoch": 67.22689075630252, "grad_norm": 0.9319822192192078, "learning_rate": 0.001, "loss": 2.0982, "step": 520000 }, { "epoch": 67.23981900452489, "grad_norm": 0.9927107691764832, "learning_rate": 0.001, "loss": 2.1075, "step": 520100 }, { "epoch": 67.25274725274726, "grad_norm": 1.6955702304840088, "learning_rate": 0.001, "loss": 2.1282, "step": 520200 }, { "epoch": 67.26567550096962, "grad_norm": 3.8318967819213867, "learning_rate": 0.001, "loss": 2.1244, "step": 520300 }, { "epoch": 67.27860374919199, "grad_norm": 1.1117267608642578, "learning_rate": 0.001, "loss": 2.1165, "step": 520400 }, { "epoch": 67.29153199741435, "grad_norm": 1.0982483625411987, "learning_rate": 0.001, "loss": 2.1179, "step": 520500 }, { "epoch": 67.30446024563672, "grad_norm": 0.9028845429420471, "learning_rate": 0.001, "loss": 2.1246, "step": 520600 }, { "epoch": 67.31738849385908, "grad_norm": 0.8591315150260925, "learning_rate": 0.001, "loss": 2.1019, "step": 520700 }, { "epoch": 67.33031674208145, "grad_norm": 1.1083635091781616, "learning_rate": 0.001, "loss": 2.1197, "step": 520800 }, { "epoch": 67.34324499030382, "grad_norm": 1.1420255899429321, "learning_rate": 0.001, "loss": 2.1327, "step": 520900 }, { "epoch": 67.35617323852618, "grad_norm": 1.307123064994812, "learning_rate": 0.001, "loss": 2.1175, "step": 521000 }, { "epoch": 67.36910148674855, "grad_norm": 0.963595449924469, "learning_rate": 0.001, "loss": 2.1275, "step": 521100 }, { "epoch": 67.38202973497091, "grad_norm": 1.3323538303375244, "learning_rate": 0.001, "loss": 2.11, "step": 521200 }, { "epoch": 67.39495798319328, "grad_norm": 1.2848905324935913, "learning_rate": 0.001, "loss": 2.1289, "step": 521300 }, { "epoch": 67.40788623141565, "grad_norm": 1.071738362312317, "learning_rate": 0.001, "loss": 2.1126, "step": 521400 }, { "epoch": 67.42081447963801, "grad_norm": 0.8916468620300293, "learning_rate": 0.001, "loss": 2.1404, "step": 521500 }, { "epoch": 67.43374272786038, "grad_norm": 1.0211042165756226, "learning_rate": 0.001, "loss": 2.1149, "step": 521600 }, { "epoch": 67.44667097608274, "grad_norm": 0.7799056768417358, "learning_rate": 0.001, "loss": 2.1249, "step": 521700 }, { "epoch": 67.45959922430511, "grad_norm": 1.1012731790542603, "learning_rate": 0.001, "loss": 2.1233, "step": 521800 }, { "epoch": 67.47252747252747, "grad_norm": 1.2301461696624756, "learning_rate": 0.001, "loss": 2.136, "step": 521900 }, { "epoch": 67.48545572074984, "grad_norm": 1.132843017578125, "learning_rate": 0.001, "loss": 2.1242, "step": 522000 }, { "epoch": 67.4983839689722, "grad_norm": 1.1454613208770752, "learning_rate": 0.001, "loss": 2.1376, "step": 522100 }, { "epoch": 67.51131221719457, "grad_norm": 0.8116059899330139, "learning_rate": 0.001, "loss": 2.1186, "step": 522200 }, { "epoch": 67.52424046541694, "grad_norm": 1.4560511112213135, "learning_rate": 0.001, "loss": 2.1292, "step": 522300 }, { "epoch": 67.5371687136393, "grad_norm": 1.147134780883789, "learning_rate": 0.001, "loss": 2.1362, "step": 522400 }, { "epoch": 67.55009696186167, "grad_norm": 6.726008892059326, "learning_rate": 0.001, "loss": 2.125, "step": 522500 }, { "epoch": 67.56302521008404, "grad_norm": 1.078460693359375, "learning_rate": 0.001, "loss": 2.1575, "step": 522600 }, { "epoch": 67.5759534583064, "grad_norm": 1.2796043157577515, "learning_rate": 0.001, "loss": 2.1214, "step": 522700 }, { "epoch": 67.58888170652877, "grad_norm": 1.2943549156188965, "learning_rate": 0.001, "loss": 2.1442, "step": 522800 }, { "epoch": 67.60180995475113, "grad_norm": 0.8315401077270508, "learning_rate": 0.001, "loss": 2.1404, "step": 522900 }, { "epoch": 67.6147382029735, "grad_norm": 1.020492672920227, "learning_rate": 0.001, "loss": 2.1432, "step": 523000 }, { "epoch": 67.62766645119586, "grad_norm": 1.9136039018630981, "learning_rate": 0.001, "loss": 2.1473, "step": 523100 }, { "epoch": 67.64059469941823, "grad_norm": 1.1166235208511353, "learning_rate": 0.001, "loss": 2.1209, "step": 523200 }, { "epoch": 67.6535229476406, "grad_norm": 1.6156877279281616, "learning_rate": 0.001, "loss": 2.1403, "step": 523300 }, { "epoch": 67.66645119586296, "grad_norm": 1.0434917211532593, "learning_rate": 0.001, "loss": 2.1448, "step": 523400 }, { "epoch": 67.67937944408533, "grad_norm": 1.1286576986312866, "learning_rate": 0.001, "loss": 2.1551, "step": 523500 }, { "epoch": 67.6923076923077, "grad_norm": 1.1106611490249634, "learning_rate": 0.001, "loss": 2.1389, "step": 523600 }, { "epoch": 67.70523594053006, "grad_norm": 1.101037859916687, "learning_rate": 0.001, "loss": 2.147, "step": 523700 }, { "epoch": 67.71816418875243, "grad_norm": 1.683922529220581, "learning_rate": 0.001, "loss": 2.1432, "step": 523800 }, { "epoch": 67.73109243697479, "grad_norm": 7.114354133605957, "learning_rate": 0.001, "loss": 2.1618, "step": 523900 }, { "epoch": 67.74402068519716, "grad_norm": 1.0430850982666016, "learning_rate": 0.001, "loss": 2.1653, "step": 524000 }, { "epoch": 67.75694893341952, "grad_norm": 1.079468846321106, "learning_rate": 0.001, "loss": 2.1546, "step": 524100 }, { "epoch": 67.76987718164189, "grad_norm": 1.139569640159607, "learning_rate": 0.001, "loss": 2.1506, "step": 524200 }, { "epoch": 67.78280542986425, "grad_norm": 1.8481158018112183, "learning_rate": 0.001, "loss": 2.1587, "step": 524300 }, { "epoch": 67.79573367808662, "grad_norm": 1.054556131362915, "learning_rate": 0.001, "loss": 2.1616, "step": 524400 }, { "epoch": 67.80866192630899, "grad_norm": 1.1904878616333008, "learning_rate": 0.001, "loss": 2.139, "step": 524500 }, { "epoch": 67.82159017453135, "grad_norm": 1.101843237876892, "learning_rate": 0.001, "loss": 2.1554, "step": 524600 }, { "epoch": 67.83451842275372, "grad_norm": 0.9660131335258484, "learning_rate": 0.001, "loss": 2.1584, "step": 524700 }, { "epoch": 67.84744667097608, "grad_norm": 1.1870468854904175, "learning_rate": 0.001, "loss": 2.157, "step": 524800 }, { "epoch": 67.86037491919845, "grad_norm": 1.058630108833313, "learning_rate": 0.001, "loss": 2.1389, "step": 524900 }, { "epoch": 67.87330316742081, "grad_norm": 1.5269485712051392, "learning_rate": 0.001, "loss": 2.167, "step": 525000 }, { "epoch": 67.88623141564318, "grad_norm": 0.9040039777755737, "learning_rate": 0.001, "loss": 2.1564, "step": 525100 }, { "epoch": 67.89915966386555, "grad_norm": 1.0497665405273438, "learning_rate": 0.001, "loss": 2.1757, "step": 525200 }, { "epoch": 67.91208791208791, "grad_norm": 1.2641725540161133, "learning_rate": 0.001, "loss": 2.1569, "step": 525300 }, { "epoch": 67.92501616031028, "grad_norm": 1.6633166074752808, "learning_rate": 0.001, "loss": 2.166, "step": 525400 }, { "epoch": 67.93794440853264, "grad_norm": 1.5332794189453125, "learning_rate": 0.001, "loss": 2.1874, "step": 525500 }, { "epoch": 67.95087265675501, "grad_norm": 0.9052603244781494, "learning_rate": 0.001, "loss": 2.1495, "step": 525600 }, { "epoch": 67.96380090497738, "grad_norm": 5.9485931396484375, "learning_rate": 0.001, "loss": 2.1728, "step": 525700 }, { "epoch": 67.97672915319974, "grad_norm": 1.1328978538513184, "learning_rate": 0.001, "loss": 2.175, "step": 525800 }, { "epoch": 67.98965740142211, "grad_norm": 1.072062373161316, "learning_rate": 0.001, "loss": 2.1697, "step": 525900 }, { "epoch": 68.00258564964447, "grad_norm": 2.774000644683838, "learning_rate": 0.001, "loss": 2.1617, "step": 526000 }, { "epoch": 68.01551389786684, "grad_norm": 1.0530946254730225, "learning_rate": 0.001, "loss": 2.079, "step": 526100 }, { "epoch": 68.0284421460892, "grad_norm": 2.013136625289917, "learning_rate": 0.001, "loss": 2.0822, "step": 526200 }, { "epoch": 68.04137039431157, "grad_norm": 1.230920672416687, "learning_rate": 0.001, "loss": 2.092, "step": 526300 }, { "epoch": 68.05429864253394, "grad_norm": 1.2330819368362427, "learning_rate": 0.001, "loss": 2.0817, "step": 526400 }, { "epoch": 68.0672268907563, "grad_norm": 1.4290677309036255, "learning_rate": 0.001, "loss": 2.095, "step": 526500 }, { "epoch": 68.08015513897867, "grad_norm": 1.246511697769165, "learning_rate": 0.001, "loss": 2.07, "step": 526600 }, { "epoch": 68.09308338720103, "grad_norm": 1.2228176593780518, "learning_rate": 0.001, "loss": 2.088, "step": 526700 }, { "epoch": 68.1060116354234, "grad_norm": 0.8786615133285522, "learning_rate": 0.001, "loss": 2.0873, "step": 526800 }, { "epoch": 68.11893988364577, "grad_norm": 0.8721392154693604, "learning_rate": 0.001, "loss": 2.0752, "step": 526900 }, { "epoch": 68.13186813186813, "grad_norm": 1.6183525323867798, "learning_rate": 0.001, "loss": 2.0884, "step": 527000 }, { "epoch": 68.1447963800905, "grad_norm": 1.1401784420013428, "learning_rate": 0.001, "loss": 2.0806, "step": 527100 }, { "epoch": 68.15772462831286, "grad_norm": 0.8631347417831421, "learning_rate": 0.001, "loss": 2.0938, "step": 527200 }, { "epoch": 68.17065287653523, "grad_norm": 1.1370099782943726, "learning_rate": 0.001, "loss": 2.0981, "step": 527300 }, { "epoch": 68.1835811247576, "grad_norm": 1.5208183526992798, "learning_rate": 0.001, "loss": 2.1101, "step": 527400 }, { "epoch": 68.19650937297996, "grad_norm": 1.0267541408538818, "learning_rate": 0.001, "loss": 2.0988, "step": 527500 }, { "epoch": 68.20943762120233, "grad_norm": 0.9639902710914612, "learning_rate": 0.001, "loss": 2.1111, "step": 527600 }, { "epoch": 68.22236586942469, "grad_norm": 0.9893854260444641, "learning_rate": 0.001, "loss": 2.1106, "step": 527700 }, { "epoch": 68.23529411764706, "grad_norm": 1.5699206590652466, "learning_rate": 0.001, "loss": 2.117, "step": 527800 }, { "epoch": 68.24822236586942, "grad_norm": 1.073523998260498, "learning_rate": 0.001, "loss": 2.0931, "step": 527900 }, { "epoch": 68.26115061409179, "grad_norm": 1.1148380041122437, "learning_rate": 0.001, "loss": 2.0949, "step": 528000 }, { "epoch": 68.27407886231416, "grad_norm": 1.0769309997558594, "learning_rate": 0.001, "loss": 2.1176, "step": 528100 }, { "epoch": 68.28700711053652, "grad_norm": 1.023732304573059, "learning_rate": 0.001, "loss": 2.1085, "step": 528200 }, { "epoch": 68.29993535875889, "grad_norm": 0.9321329593658447, "learning_rate": 0.001, "loss": 2.1146, "step": 528300 }, { "epoch": 68.31286360698125, "grad_norm": 0.9195079207420349, "learning_rate": 0.001, "loss": 2.1061, "step": 528400 }, { "epoch": 68.32579185520362, "grad_norm": 1.090757131576538, "learning_rate": 0.001, "loss": 2.1172, "step": 528500 }, { "epoch": 68.33872010342598, "grad_norm": 0.8732224702835083, "learning_rate": 0.001, "loss": 2.124, "step": 528600 }, { "epoch": 68.35164835164835, "grad_norm": 1.3787060976028442, "learning_rate": 0.001, "loss": 2.1295, "step": 528700 }, { "epoch": 68.36457659987072, "grad_norm": 1.307375431060791, "learning_rate": 0.001, "loss": 2.1121, "step": 528800 }, { "epoch": 68.37750484809308, "grad_norm": 1.3797358274459839, "learning_rate": 0.001, "loss": 2.1208, "step": 528900 }, { "epoch": 68.39043309631545, "grad_norm": 3.8072850704193115, "learning_rate": 0.001, "loss": 2.125, "step": 529000 }, { "epoch": 68.40336134453781, "grad_norm": 1.2905465364456177, "learning_rate": 0.001, "loss": 2.1255, "step": 529100 }, { "epoch": 68.41628959276018, "grad_norm": 0.9402815699577332, "learning_rate": 0.001, "loss": 2.1064, "step": 529200 }, { "epoch": 68.42921784098255, "grad_norm": 0.8722462058067322, "learning_rate": 0.001, "loss": 2.1279, "step": 529300 }, { "epoch": 68.44214608920491, "grad_norm": 1.1298205852508545, "learning_rate": 0.001, "loss": 2.1309, "step": 529400 }, { "epoch": 68.45507433742728, "grad_norm": 0.7931156158447266, "learning_rate": 0.001, "loss": 2.1177, "step": 529500 }, { "epoch": 68.46800258564964, "grad_norm": 2.8224892616271973, "learning_rate": 0.001, "loss": 2.1167, "step": 529600 }, { "epoch": 68.48093083387201, "grad_norm": 0.8788281083106995, "learning_rate": 0.001, "loss": 2.1305, "step": 529700 }, { "epoch": 68.49385908209437, "grad_norm": 10.275518417358398, "learning_rate": 0.001, "loss": 2.1277, "step": 529800 }, { "epoch": 68.50678733031674, "grad_norm": 1.039918303489685, "learning_rate": 0.001, "loss": 2.1413, "step": 529900 }, { "epoch": 68.5197155785391, "grad_norm": 1.3101229667663574, "learning_rate": 0.001, "loss": 2.1224, "step": 530000 }, { "epoch": 68.53264382676147, "grad_norm": 0.9354768991470337, "learning_rate": 0.001, "loss": 2.1256, "step": 530100 }, { "epoch": 68.54557207498384, "grad_norm": 0.9988278746604919, "learning_rate": 0.001, "loss": 2.1397, "step": 530200 }, { "epoch": 68.5585003232062, "grad_norm": 0.8413119316101074, "learning_rate": 0.001, "loss": 2.1216, "step": 530300 }, { "epoch": 68.57142857142857, "grad_norm": 0.8674630522727966, "learning_rate": 0.001, "loss": 2.1416, "step": 530400 }, { "epoch": 68.58435681965094, "grad_norm": 0.923542320728302, "learning_rate": 0.001, "loss": 2.1591, "step": 530500 }, { "epoch": 68.5972850678733, "grad_norm": 1.1984455585479736, "learning_rate": 0.001, "loss": 2.1391, "step": 530600 }, { "epoch": 68.61021331609567, "grad_norm": 0.8169175982475281, "learning_rate": 0.001, "loss": 2.1385, "step": 530700 }, { "epoch": 68.62314156431803, "grad_norm": 0.8267318606376648, "learning_rate": 0.001, "loss": 2.1333, "step": 530800 }, { "epoch": 68.6360698125404, "grad_norm": 0.7569036483764648, "learning_rate": 0.001, "loss": 2.1377, "step": 530900 }, { "epoch": 68.64899806076276, "grad_norm": 1.2715868949890137, "learning_rate": 0.001, "loss": 2.1358, "step": 531000 }, { "epoch": 68.66192630898513, "grad_norm": 0.9157313704490662, "learning_rate": 0.001, "loss": 2.1339, "step": 531100 }, { "epoch": 68.6748545572075, "grad_norm": 1.0328384637832642, "learning_rate": 0.001, "loss": 2.1359, "step": 531200 }, { "epoch": 68.68778280542986, "grad_norm": 0.9508442282676697, "learning_rate": 0.001, "loss": 2.1333, "step": 531300 }, { "epoch": 68.70071105365223, "grad_norm": 1.1645976305007935, "learning_rate": 0.001, "loss": 2.1394, "step": 531400 }, { "epoch": 68.7136393018746, "grad_norm": 1.203108310699463, "learning_rate": 0.001, "loss": 2.1459, "step": 531500 }, { "epoch": 68.72656755009696, "grad_norm": 0.9985458850860596, "learning_rate": 0.001, "loss": 2.1472, "step": 531600 }, { "epoch": 68.73949579831933, "grad_norm": 0.9888998866081238, "learning_rate": 0.001, "loss": 2.1344, "step": 531700 }, { "epoch": 68.75242404654169, "grad_norm": 0.9910878539085388, "learning_rate": 0.001, "loss": 2.1368, "step": 531800 }, { "epoch": 68.76535229476406, "grad_norm": 0.9249954223632812, "learning_rate": 0.001, "loss": 2.1573, "step": 531900 }, { "epoch": 68.77828054298642, "grad_norm": 0.9481098651885986, "learning_rate": 0.001, "loss": 2.1487, "step": 532000 }, { "epoch": 68.79120879120879, "grad_norm": 0.8443711996078491, "learning_rate": 0.001, "loss": 2.1425, "step": 532100 }, { "epoch": 68.80413703943115, "grad_norm": 1.1817636489868164, "learning_rate": 0.001, "loss": 2.1727, "step": 532200 }, { "epoch": 68.81706528765352, "grad_norm": 0.9981171488761902, "learning_rate": 0.001, "loss": 2.1443, "step": 532300 }, { "epoch": 68.82999353587589, "grad_norm": 1.2251648902893066, "learning_rate": 0.001, "loss": 2.1488, "step": 532400 }, { "epoch": 68.84292178409825, "grad_norm": 0.9037687182426453, "learning_rate": 0.001, "loss": 2.1509, "step": 532500 }, { "epoch": 68.85585003232062, "grad_norm": 1.1175774335861206, "learning_rate": 0.001, "loss": 2.1499, "step": 532600 }, { "epoch": 68.86877828054298, "grad_norm": 1.208908200263977, "learning_rate": 0.001, "loss": 2.1648, "step": 532700 }, { "epoch": 68.88170652876535, "grad_norm": 0.835323691368103, "learning_rate": 0.001, "loss": 2.1603, "step": 532800 }, { "epoch": 68.89463477698771, "grad_norm": 1.0245450735092163, "learning_rate": 0.001, "loss": 2.1623, "step": 532900 }, { "epoch": 68.90756302521008, "grad_norm": 1.2906267642974854, "learning_rate": 0.001, "loss": 2.1503, "step": 533000 }, { "epoch": 68.92049127343245, "grad_norm": 0.8417224287986755, "learning_rate": 0.001, "loss": 2.1718, "step": 533100 }, { "epoch": 68.93341952165481, "grad_norm": 1.208695888519287, "learning_rate": 0.001, "loss": 2.1532, "step": 533200 }, { "epoch": 68.94634776987718, "grad_norm": 1.0615853071212769, "learning_rate": 0.001, "loss": 2.1725, "step": 533300 }, { "epoch": 68.95927601809954, "grad_norm": 2.680402994155884, "learning_rate": 0.001, "loss": 2.1711, "step": 533400 }, { "epoch": 68.97220426632191, "grad_norm": 1.6609898805618286, "learning_rate": 0.001, "loss": 2.1699, "step": 533500 }, { "epoch": 68.98513251454428, "grad_norm": 1.2854900360107422, "learning_rate": 0.001, "loss": 2.1684, "step": 533600 }, { "epoch": 68.99806076276664, "grad_norm": 1.1214439868927002, "learning_rate": 0.001, "loss": 2.1759, "step": 533700 }, { "epoch": 69.01098901098901, "grad_norm": 1.3695871829986572, "learning_rate": 0.001, "loss": 2.1048, "step": 533800 }, { "epoch": 69.02391725921137, "grad_norm": 1.0465617179870605, "learning_rate": 0.001, "loss": 2.0753, "step": 533900 }, { "epoch": 69.03684550743374, "grad_norm": 1.2260982990264893, "learning_rate": 0.001, "loss": 2.0886, "step": 534000 }, { "epoch": 69.0497737556561, "grad_norm": 1.1188032627105713, "learning_rate": 0.001, "loss": 2.0876, "step": 534100 }, { "epoch": 69.06270200387847, "grad_norm": 1.3871264457702637, "learning_rate": 0.001, "loss": 2.0786, "step": 534200 }, { "epoch": 69.07563025210084, "grad_norm": 2.241848945617676, "learning_rate": 0.001, "loss": 2.0748, "step": 534300 }, { "epoch": 69.0885585003232, "grad_norm": 1.0481486320495605, "learning_rate": 0.001, "loss": 2.0929, "step": 534400 }, { "epoch": 69.10148674854557, "grad_norm": 1.227655053138733, "learning_rate": 0.001, "loss": 2.0772, "step": 534500 }, { "epoch": 69.11441499676793, "grad_norm": 1.1435388326644897, "learning_rate": 0.001, "loss": 2.0866, "step": 534600 }, { "epoch": 69.1273432449903, "grad_norm": 0.8832960724830627, "learning_rate": 0.001, "loss": 2.09, "step": 534700 }, { "epoch": 69.14027149321267, "grad_norm": 1.620054841041565, "learning_rate": 0.001, "loss": 2.0887, "step": 534800 }, { "epoch": 69.15319974143503, "grad_norm": 0.8692420721054077, "learning_rate": 0.001, "loss": 2.0852, "step": 534900 }, { "epoch": 69.1661279896574, "grad_norm": 1.133213996887207, "learning_rate": 0.001, "loss": 2.1003, "step": 535000 }, { "epoch": 69.17905623787976, "grad_norm": 0.9185032248497009, "learning_rate": 0.001, "loss": 2.0831, "step": 535100 }, { "epoch": 69.19198448610213, "grad_norm": 1.3519442081451416, "learning_rate": 0.001, "loss": 2.1012, "step": 535200 }, { "epoch": 69.2049127343245, "grad_norm": 1.5165376663208008, "learning_rate": 0.001, "loss": 2.0809, "step": 535300 }, { "epoch": 69.21784098254686, "grad_norm": 0.9728363156318665, "learning_rate": 0.001, "loss": 2.0929, "step": 535400 }, { "epoch": 69.23076923076923, "grad_norm": 1.6095255613327026, "learning_rate": 0.001, "loss": 2.1103, "step": 535500 }, { "epoch": 69.24369747899159, "grad_norm": 0.8533179759979248, "learning_rate": 0.001, "loss": 2.1051, "step": 535600 }, { "epoch": 69.25662572721396, "grad_norm": 1.0434038639068604, "learning_rate": 0.001, "loss": 2.0942, "step": 535700 }, { "epoch": 69.26955397543632, "grad_norm": 1.2233610153198242, "learning_rate": 0.001, "loss": 2.1133, "step": 535800 }, { "epoch": 69.28248222365869, "grad_norm": 1.003429651260376, "learning_rate": 0.001, "loss": 2.0981, "step": 535900 }, { "epoch": 69.29541047188106, "grad_norm": 1.1849695444107056, "learning_rate": 0.001, "loss": 2.1184, "step": 536000 }, { "epoch": 69.30833872010342, "grad_norm": 1.69330632686615, "learning_rate": 0.001, "loss": 2.1085, "step": 536100 }, { "epoch": 69.32126696832579, "grad_norm": 3.500532865524292, "learning_rate": 0.001, "loss": 2.1259, "step": 536200 }, { "epoch": 69.33419521654815, "grad_norm": 0.8614107966423035, "learning_rate": 0.001, "loss": 2.1258, "step": 536300 }, { "epoch": 69.34712346477052, "grad_norm": 81.783935546875, "learning_rate": 0.001, "loss": 2.1101, "step": 536400 }, { "epoch": 69.36005171299288, "grad_norm": 1.3719509840011597, "learning_rate": 0.001, "loss": 2.1276, "step": 536500 }, { "epoch": 69.37297996121525, "grad_norm": 1.007376790046692, "learning_rate": 0.001, "loss": 2.1089, "step": 536600 }, { "epoch": 69.38590820943762, "grad_norm": 2.6779799461364746, "learning_rate": 0.001, "loss": 2.1026, "step": 536700 }, { "epoch": 69.39883645765998, "grad_norm": 1.180261492729187, "learning_rate": 0.001, "loss": 2.1234, "step": 536800 }, { "epoch": 69.41176470588235, "grad_norm": 1.8027859926223755, "learning_rate": 0.001, "loss": 2.1041, "step": 536900 }, { "epoch": 69.42469295410471, "grad_norm": 1.2505885362625122, "learning_rate": 0.001, "loss": 2.1085, "step": 537000 }, { "epoch": 69.43762120232708, "grad_norm": 2.201385974884033, "learning_rate": 0.001, "loss": 2.1262, "step": 537100 }, { "epoch": 69.45054945054945, "grad_norm": 1.568232774734497, "learning_rate": 0.001, "loss": 2.128, "step": 537200 }, { "epoch": 69.46347769877181, "grad_norm": 1.0071582794189453, "learning_rate": 0.001, "loss": 2.1565, "step": 537300 }, { "epoch": 69.47640594699418, "grad_norm": 0.9695952534675598, "learning_rate": 0.001, "loss": 2.1358, "step": 537400 }, { "epoch": 69.48933419521654, "grad_norm": 1.284895896911621, "learning_rate": 0.001, "loss": 2.1198, "step": 537500 }, { "epoch": 69.50226244343891, "grad_norm": 1.0669538974761963, "learning_rate": 0.001, "loss": 2.1409, "step": 537600 }, { "epoch": 69.51519069166127, "grad_norm": 1.01898193359375, "learning_rate": 0.001, "loss": 2.1432, "step": 537700 }, { "epoch": 69.52811893988364, "grad_norm": 1.1126792430877686, "learning_rate": 0.001, "loss": 2.1331, "step": 537800 }, { "epoch": 69.541047188106, "grad_norm": 1.2742607593536377, "learning_rate": 0.001, "loss": 2.1441, "step": 537900 }, { "epoch": 69.55397543632837, "grad_norm": 1.245391845703125, "learning_rate": 0.001, "loss": 2.1342, "step": 538000 }, { "epoch": 69.56690368455074, "grad_norm": 1.010145902633667, "learning_rate": 0.001, "loss": 2.1263, "step": 538100 }, { "epoch": 69.5798319327731, "grad_norm": 1.082183837890625, "learning_rate": 0.001, "loss": 2.1344, "step": 538200 }, { "epoch": 69.59276018099547, "grad_norm": 1.019364833831787, "learning_rate": 0.001, "loss": 2.1383, "step": 538300 }, { "epoch": 69.60568842921784, "grad_norm": 1.2127453088760376, "learning_rate": 0.001, "loss": 2.1378, "step": 538400 }, { "epoch": 69.6186166774402, "grad_norm": 0.9422163367271423, "learning_rate": 0.001, "loss": 2.1329, "step": 538500 }, { "epoch": 69.63154492566257, "grad_norm": 1.2221126556396484, "learning_rate": 0.001, "loss": 2.1317, "step": 538600 }, { "epoch": 69.64447317388493, "grad_norm": 1.4582574367523193, "learning_rate": 0.001, "loss": 2.1257, "step": 538700 }, { "epoch": 69.6574014221073, "grad_norm": 1.619157314300537, "learning_rate": 0.001, "loss": 2.1345, "step": 538800 }, { "epoch": 69.67032967032966, "grad_norm": 1.1785991191864014, "learning_rate": 0.001, "loss": 2.146, "step": 538900 }, { "epoch": 69.68325791855203, "grad_norm": 1.103493571281433, "learning_rate": 0.001, "loss": 2.153, "step": 539000 }, { "epoch": 69.6961861667744, "grad_norm": 1.2180317640304565, "learning_rate": 0.001, "loss": 2.1391, "step": 539100 }, { "epoch": 69.70911441499676, "grad_norm": 1.4877289533615112, "learning_rate": 0.001, "loss": 2.1425, "step": 539200 }, { "epoch": 69.72204266321913, "grad_norm": 1.2080172300338745, "learning_rate": 0.001, "loss": 2.1319, "step": 539300 }, { "epoch": 69.7349709114415, "grad_norm": 1.376742959022522, "learning_rate": 0.001, "loss": 2.1539, "step": 539400 }, { "epoch": 69.74789915966386, "grad_norm": 1.6683316230773926, "learning_rate": 0.001, "loss": 2.1493, "step": 539500 }, { "epoch": 69.76082740788623, "grad_norm": 0.9274850487709045, "learning_rate": 0.001, "loss": 2.165, "step": 539600 }, { "epoch": 69.77375565610859, "grad_norm": 1.6203041076660156, "learning_rate": 0.001, "loss": 2.15, "step": 539700 }, { "epoch": 69.78668390433096, "grad_norm": 1.0323469638824463, "learning_rate": 0.001, "loss": 2.1627, "step": 539800 }, { "epoch": 69.79961215255332, "grad_norm": 1.329203486442566, "learning_rate": 0.001, "loss": 2.1536, "step": 539900 }, { "epoch": 69.81254040077569, "grad_norm": 0.8182693719863892, "learning_rate": 0.001, "loss": 2.152, "step": 540000 }, { "epoch": 69.82546864899805, "grad_norm": 1.3675223588943481, "learning_rate": 0.001, "loss": 2.1552, "step": 540100 }, { "epoch": 69.83839689722042, "grad_norm": 1.1571296453475952, "learning_rate": 0.001, "loss": 2.135, "step": 540200 }, { "epoch": 69.85132514544279, "grad_norm": 1.2735954523086548, "learning_rate": 0.001, "loss": 2.1675, "step": 540300 }, { "epoch": 69.86425339366515, "grad_norm": 1.2674757242202759, "learning_rate": 0.001, "loss": 2.1391, "step": 540400 }, { "epoch": 69.87718164188752, "grad_norm": 0.97130286693573, "learning_rate": 0.001, "loss": 2.1577, "step": 540500 }, { "epoch": 69.89010989010988, "grad_norm": 1.0226465463638306, "learning_rate": 0.001, "loss": 2.1455, "step": 540600 }, { "epoch": 69.90303813833225, "grad_norm": 0.9011550545692444, "learning_rate": 0.001, "loss": 2.165, "step": 540700 }, { "epoch": 69.91596638655462, "grad_norm": 1.1168111562728882, "learning_rate": 0.001, "loss": 2.1497, "step": 540800 }, { "epoch": 69.92889463477698, "grad_norm": 1.6170704364776611, "learning_rate": 0.001, "loss": 2.1849, "step": 540900 }, { "epoch": 69.94182288299935, "grad_norm": 1.515434980392456, "learning_rate": 0.001, "loss": 2.1683, "step": 541000 }, { "epoch": 69.95475113122171, "grad_norm": 2.0254111289978027, "learning_rate": 0.001, "loss": 2.1635, "step": 541100 }, { "epoch": 69.96767937944408, "grad_norm": 1.2232649326324463, "learning_rate": 0.001, "loss": 2.1419, "step": 541200 }, { "epoch": 69.98060762766644, "grad_norm": 1.9036744832992554, "learning_rate": 0.001, "loss": 2.1632, "step": 541300 }, { "epoch": 69.99353587588882, "grad_norm": 0.9125404953956604, "learning_rate": 0.001, "loss": 2.1633, "step": 541400 }, { "epoch": 70.00646412411119, "grad_norm": 1.5787835121154785, "learning_rate": 0.001, "loss": 2.0833, "step": 541500 }, { "epoch": 70.01939237233356, "grad_norm": 3.330205202102661, "learning_rate": 0.001, "loss": 2.0557, "step": 541600 }, { "epoch": 70.03232062055592, "grad_norm": 1.5200328826904297, "learning_rate": 0.001, "loss": 2.0791, "step": 541700 }, { "epoch": 70.04524886877829, "grad_norm": 3.3450117111206055, "learning_rate": 0.001, "loss": 2.0751, "step": 541800 }, { "epoch": 70.05817711700065, "grad_norm": 2.0110840797424316, "learning_rate": 0.001, "loss": 2.0718, "step": 541900 }, { "epoch": 70.07110536522302, "grad_norm": 1.8482621908187866, "learning_rate": 0.001, "loss": 2.0993, "step": 542000 }, { "epoch": 70.08403361344538, "grad_norm": 1.4644430875778198, "learning_rate": 0.001, "loss": 2.0724, "step": 542100 }, { "epoch": 70.09696186166775, "grad_norm": 2.177394390106201, "learning_rate": 0.001, "loss": 2.0756, "step": 542200 }, { "epoch": 70.10989010989012, "grad_norm": 1.8186720609664917, "learning_rate": 0.001, "loss": 2.1005, "step": 542300 }, { "epoch": 70.12281835811248, "grad_norm": 1.868275761604309, "learning_rate": 0.001, "loss": 2.0822, "step": 542400 }, { "epoch": 70.13574660633485, "grad_norm": 5.828369617462158, "learning_rate": 0.001, "loss": 2.0806, "step": 542500 }, { "epoch": 70.14867485455721, "grad_norm": 3.517639636993408, "learning_rate": 0.001, "loss": 2.0784, "step": 542600 }, { "epoch": 70.16160310277958, "grad_norm": 1.8918009996414185, "learning_rate": 0.001, "loss": 2.0854, "step": 542700 }, { "epoch": 70.17453135100195, "grad_norm": 2.016324996948242, "learning_rate": 0.001, "loss": 2.1103, "step": 542800 }, { "epoch": 70.18745959922431, "grad_norm": 2.0019636154174805, "learning_rate": 0.001, "loss": 2.09, "step": 542900 }, { "epoch": 70.20038784744668, "grad_norm": 2.141519069671631, "learning_rate": 0.001, "loss": 2.1226, "step": 543000 }, { "epoch": 70.21331609566904, "grad_norm": 1.5514335632324219, "learning_rate": 0.001, "loss": 2.0997, "step": 543100 }, { "epoch": 70.22624434389141, "grad_norm": 2.9129669666290283, "learning_rate": 0.001, "loss": 2.1092, "step": 543200 }, { "epoch": 70.23917259211377, "grad_norm": 2.063525676727295, "learning_rate": 0.001, "loss": 2.1115, "step": 543300 }, { "epoch": 70.25210084033614, "grad_norm": 1.8917784690856934, "learning_rate": 0.001, "loss": 2.1106, "step": 543400 }, { "epoch": 70.2650290885585, "grad_norm": 2.451192855834961, "learning_rate": 0.001, "loss": 2.1146, "step": 543500 }, { "epoch": 70.27795733678087, "grad_norm": 1.83621346950531, "learning_rate": 0.001, "loss": 2.1051, "step": 543600 }, { "epoch": 70.29088558500324, "grad_norm": 1.8945273160934448, "learning_rate": 0.001, "loss": 2.126, "step": 543700 }, { "epoch": 70.3038138332256, "grad_norm": 1.8999732732772827, "learning_rate": 0.001, "loss": 2.1255, "step": 543800 }, { "epoch": 70.31674208144797, "grad_norm": 3.0596776008605957, "learning_rate": 0.001, "loss": 2.1118, "step": 543900 }, { "epoch": 70.32967032967034, "grad_norm": 1.7574204206466675, "learning_rate": 0.001, "loss": 2.1318, "step": 544000 }, { "epoch": 70.3425985778927, "grad_norm": 2.7948217391967773, "learning_rate": 0.001, "loss": 2.1214, "step": 544100 }, { "epoch": 70.35552682611507, "grad_norm": 10.078763008117676, "learning_rate": 0.001, "loss": 2.1139, "step": 544200 }, { "epoch": 70.36845507433743, "grad_norm": 1.7414530515670776, "learning_rate": 0.001, "loss": 2.1041, "step": 544300 }, { "epoch": 70.3813833225598, "grad_norm": 1.600632667541504, "learning_rate": 0.001, "loss": 2.1092, "step": 544400 }, { "epoch": 70.39431157078216, "grad_norm": 2.8312461376190186, "learning_rate": 0.001, "loss": 2.1215, "step": 544500 }, { "epoch": 70.40723981900453, "grad_norm": 1.7652888298034668, "learning_rate": 0.001, "loss": 2.1274, "step": 544600 }, { "epoch": 70.4201680672269, "grad_norm": 1.711275339126587, "learning_rate": 0.001, "loss": 2.129, "step": 544700 }, { "epoch": 70.43309631544926, "grad_norm": 1.806888461112976, "learning_rate": 0.001, "loss": 2.1071, "step": 544800 }, { "epoch": 70.44602456367163, "grad_norm": 4.548313140869141, "learning_rate": 0.001, "loss": 2.1359, "step": 544900 }, { "epoch": 70.458952811894, "grad_norm": 1.737223505973816, "learning_rate": 0.001, "loss": 2.1314, "step": 545000 }, { "epoch": 70.47188106011636, "grad_norm": 1.9109770059585571, "learning_rate": 0.001, "loss": 2.131, "step": 545100 }, { "epoch": 70.48480930833873, "grad_norm": 2.1626665592193604, "learning_rate": 0.001, "loss": 2.1366, "step": 545200 }, { "epoch": 70.49773755656109, "grad_norm": 1.505635380744934, "learning_rate": 0.001, "loss": 2.1274, "step": 545300 }, { "epoch": 70.51066580478346, "grad_norm": 1.9186804294586182, "learning_rate": 0.001, "loss": 2.1201, "step": 545400 }, { "epoch": 70.52359405300582, "grad_norm": 1.7275995016098022, "learning_rate": 0.001, "loss": 2.112, "step": 545500 }, { "epoch": 70.53652230122819, "grad_norm": 10.473587989807129, "learning_rate": 0.001, "loss": 2.1287, "step": 545600 }, { "epoch": 70.54945054945055, "grad_norm": 1.644701600074768, "learning_rate": 0.001, "loss": 2.1329, "step": 545700 }, { "epoch": 70.56237879767292, "grad_norm": 2.1073174476623535, "learning_rate": 0.001, "loss": 2.1159, "step": 545800 }, { "epoch": 70.57530704589529, "grad_norm": 1.6854932308197021, "learning_rate": 0.001, "loss": 2.1419, "step": 545900 }, { "epoch": 70.58823529411765, "grad_norm": 1.8230048418045044, "learning_rate": 0.001, "loss": 2.1439, "step": 546000 }, { "epoch": 70.60116354234002, "grad_norm": 1.5324970483779907, "learning_rate": 0.001, "loss": 2.1418, "step": 546100 }, { "epoch": 70.61409179056238, "grad_norm": 2.0286223888397217, "learning_rate": 0.001, "loss": 2.126, "step": 546200 }, { "epoch": 70.62702003878475, "grad_norm": 1.720978856086731, "learning_rate": 0.001, "loss": 2.1376, "step": 546300 }, { "epoch": 70.63994828700712, "grad_norm": 1.7798516750335693, "learning_rate": 0.001, "loss": 2.1457, "step": 546400 }, { "epoch": 70.65287653522948, "grad_norm": 1.495468258857727, "learning_rate": 0.001, "loss": 2.139, "step": 546500 }, { "epoch": 70.66580478345185, "grad_norm": 2.462306499481201, "learning_rate": 0.001, "loss": 2.1466, "step": 546600 }, { "epoch": 70.67873303167421, "grad_norm": 4.538450241088867, "learning_rate": 0.001, "loss": 2.1376, "step": 546700 }, { "epoch": 70.69166127989658, "grad_norm": 1.8369042873382568, "learning_rate": 0.001, "loss": 2.1411, "step": 546800 }, { "epoch": 70.70458952811894, "grad_norm": 4.166144847869873, "learning_rate": 0.001, "loss": 2.1569, "step": 546900 }, { "epoch": 70.71751777634131, "grad_norm": 2.089672803878784, "learning_rate": 0.001, "loss": 2.1391, "step": 547000 }, { "epoch": 70.73044602456368, "grad_norm": 2.1127331256866455, "learning_rate": 0.001, "loss": 2.1645, "step": 547100 }, { "epoch": 70.74337427278604, "grad_norm": 4.465630054473877, "learning_rate": 0.001, "loss": 2.1612, "step": 547200 }, { "epoch": 70.75630252100841, "grad_norm": 1.7453821897506714, "learning_rate": 0.001, "loss": 2.1388, "step": 547300 }, { "epoch": 70.76923076923077, "grad_norm": 2.192857027053833, "learning_rate": 0.001, "loss": 2.1669, "step": 547400 }, { "epoch": 70.78215901745314, "grad_norm": 2.2364392280578613, "learning_rate": 0.001, "loss": 2.1343, "step": 547500 }, { "epoch": 70.7950872656755, "grad_norm": 2.2608909606933594, "learning_rate": 0.001, "loss": 2.149, "step": 547600 }, { "epoch": 70.80801551389787, "grad_norm": 4.0657057762146, "learning_rate": 0.001, "loss": 2.1545, "step": 547700 }, { "epoch": 70.82094376212024, "grad_norm": 2.390660047531128, "learning_rate": 0.001, "loss": 2.1583, "step": 547800 }, { "epoch": 70.8338720103426, "grad_norm": 2.3151423931121826, "learning_rate": 0.001, "loss": 2.147, "step": 547900 }, { "epoch": 70.84680025856497, "grad_norm": 2.925309181213379, "learning_rate": 0.001, "loss": 2.1716, "step": 548000 }, { "epoch": 70.85972850678733, "grad_norm": 1.8553273677825928, "learning_rate": 0.001, "loss": 2.1558, "step": 548100 }, { "epoch": 70.8726567550097, "grad_norm": 4.09849739074707, "learning_rate": 0.001, "loss": 2.1456, "step": 548200 }, { "epoch": 70.88558500323207, "grad_norm": 2.64568829536438, "learning_rate": 0.001, "loss": 2.1639, "step": 548300 }, { "epoch": 70.89851325145443, "grad_norm": 15.9048490524292, "learning_rate": 0.001, "loss": 2.1626, "step": 548400 }, { "epoch": 70.9114414996768, "grad_norm": 2.0233232975006104, "learning_rate": 0.001, "loss": 2.1696, "step": 548500 }, { "epoch": 70.92436974789916, "grad_norm": 1.716644287109375, "learning_rate": 0.001, "loss": 2.147, "step": 548600 }, { "epoch": 70.93729799612153, "grad_norm": 1.7360395193099976, "learning_rate": 0.001, "loss": 2.1659, "step": 548700 }, { "epoch": 70.9502262443439, "grad_norm": 2.8746564388275146, "learning_rate": 0.001, "loss": 2.1713, "step": 548800 }, { "epoch": 70.96315449256626, "grad_norm": 2.1129941940307617, "learning_rate": 0.001, "loss": 2.1849, "step": 548900 }, { "epoch": 70.97608274078863, "grad_norm": 2.2202155590057373, "learning_rate": 0.001, "loss": 2.1853, "step": 549000 }, { "epoch": 70.98901098901099, "grad_norm": 2.3231029510498047, "learning_rate": 0.001, "loss": 2.1568, "step": 549100 }, { "epoch": 71.00193923723336, "grad_norm": 1.2164679765701294, "learning_rate": 0.001, "loss": 2.1718, "step": 549200 }, { "epoch": 71.01486748545572, "grad_norm": 0.7580966949462891, "learning_rate": 0.001, "loss": 2.0642, "step": 549300 }, { "epoch": 71.02779573367809, "grad_norm": 1.1910408735275269, "learning_rate": 0.001, "loss": 2.0786, "step": 549400 }, { "epoch": 71.04072398190046, "grad_norm": 0.8924117088317871, "learning_rate": 0.001, "loss": 2.0823, "step": 549500 }, { "epoch": 71.05365223012282, "grad_norm": 1.2623414993286133, "learning_rate": 0.001, "loss": 2.0897, "step": 549600 }, { "epoch": 71.06658047834519, "grad_norm": 1.9485019445419312, "learning_rate": 0.001, "loss": 2.0848, "step": 549700 }, { "epoch": 71.07950872656755, "grad_norm": 1.5016729831695557, "learning_rate": 0.001, "loss": 2.0925, "step": 549800 }, { "epoch": 71.09243697478992, "grad_norm": 0.9503042101860046, "learning_rate": 0.001, "loss": 2.0851, "step": 549900 }, { "epoch": 71.10536522301229, "grad_norm": 0.8324622511863708, "learning_rate": 0.001, "loss": 2.1, "step": 550000 }, { "epoch": 71.11829347123465, "grad_norm": 0.9397624135017395, "learning_rate": 0.001, "loss": 2.1013, "step": 550100 }, { "epoch": 71.13122171945702, "grad_norm": 0.8981685638427734, "learning_rate": 0.001, "loss": 2.0993, "step": 550200 }, { "epoch": 71.14414996767938, "grad_norm": 1.3048402070999146, "learning_rate": 0.001, "loss": 2.108, "step": 550300 }, { "epoch": 71.15707821590175, "grad_norm": 1.5008281469345093, "learning_rate": 0.001, "loss": 2.0884, "step": 550400 }, { "epoch": 71.17000646412411, "grad_norm": 0.9394376873970032, "learning_rate": 0.001, "loss": 2.0859, "step": 550500 }, { "epoch": 71.18293471234648, "grad_norm": 0.9041224718093872, "learning_rate": 0.001, "loss": 2.0927, "step": 550600 }, { "epoch": 71.19586296056885, "grad_norm": 4.0437798500061035, "learning_rate": 0.001, "loss": 2.0916, "step": 550700 }, { "epoch": 71.20879120879121, "grad_norm": 1.0094727277755737, "learning_rate": 0.001, "loss": 2.1173, "step": 550800 }, { "epoch": 71.22171945701358, "grad_norm": 0.9742932915687561, "learning_rate": 0.001, "loss": 2.0888, "step": 550900 }, { "epoch": 71.23464770523594, "grad_norm": 1.0348992347717285, "learning_rate": 0.001, "loss": 2.0995, "step": 551000 }, { "epoch": 71.24757595345831, "grad_norm": 0.9282940030097961, "learning_rate": 0.001, "loss": 2.107, "step": 551100 }, { "epoch": 71.26050420168067, "grad_norm": 1.0746619701385498, "learning_rate": 0.001, "loss": 2.1019, "step": 551200 }, { "epoch": 71.27343244990304, "grad_norm": 1.2286547422409058, "learning_rate": 0.001, "loss": 2.1211, "step": 551300 }, { "epoch": 71.2863606981254, "grad_norm": 1.186279296875, "learning_rate": 0.001, "loss": 2.1063, "step": 551400 }, { "epoch": 71.29928894634777, "grad_norm": 1.171669363975525, "learning_rate": 0.001, "loss": 2.1001, "step": 551500 }, { "epoch": 71.31221719457014, "grad_norm": 0.9386503100395203, "learning_rate": 0.001, "loss": 2.1023, "step": 551600 }, { "epoch": 71.3251454427925, "grad_norm": 0.9352876543998718, "learning_rate": 0.001, "loss": 2.1189, "step": 551700 }, { "epoch": 71.33807369101487, "grad_norm": 1.247776985168457, "learning_rate": 0.001, "loss": 2.1023, "step": 551800 }, { "epoch": 71.35100193923724, "grad_norm": 1.501413106918335, "learning_rate": 0.001, "loss": 2.1039, "step": 551900 }, { "epoch": 71.3639301874596, "grad_norm": 0.9072788953781128, "learning_rate": 0.001, "loss": 2.1208, "step": 552000 }, { "epoch": 71.37685843568197, "grad_norm": 5.884795665740967, "learning_rate": 0.001, "loss": 2.1137, "step": 552100 }, { "epoch": 71.38978668390433, "grad_norm": 1.2116875648498535, "learning_rate": 0.001, "loss": 2.1205, "step": 552200 }, { "epoch": 71.4027149321267, "grad_norm": 1.2420611381530762, "learning_rate": 0.001, "loss": 2.1301, "step": 552300 }, { "epoch": 71.41564318034906, "grad_norm": 1.1369801759719849, "learning_rate": 0.001, "loss": 2.1096, "step": 552400 }, { "epoch": 71.42857142857143, "grad_norm": 0.8363597393035889, "learning_rate": 0.001, "loss": 2.1169, "step": 552500 }, { "epoch": 71.4414996767938, "grad_norm": 46.62432098388672, "learning_rate": 0.001, "loss": 2.1033, "step": 552600 }, { "epoch": 71.45442792501616, "grad_norm": 1.3482061624526978, "learning_rate": 0.001, "loss": 2.1217, "step": 552700 }, { "epoch": 71.46735617323853, "grad_norm": 0.896153450012207, "learning_rate": 0.001, "loss": 2.0961, "step": 552800 }, { "epoch": 71.4802844214609, "grad_norm": 2.1161017417907715, "learning_rate": 0.001, "loss": 2.124, "step": 552900 }, { "epoch": 71.49321266968326, "grad_norm": 1.0828949213027954, "learning_rate": 0.001, "loss": 2.1383, "step": 553000 }, { "epoch": 71.50614091790563, "grad_norm": 1.0471094846725464, "learning_rate": 0.001, "loss": 2.1224, "step": 553100 }, { "epoch": 71.51906916612799, "grad_norm": 0.9798727631568909, "learning_rate": 0.001, "loss": 2.1134, "step": 553200 }, { "epoch": 71.53199741435036, "grad_norm": 1.1032730340957642, "learning_rate": 0.001, "loss": 2.1281, "step": 553300 }, { "epoch": 71.54492566257272, "grad_norm": 1.0446650981903076, "learning_rate": 0.001, "loss": 2.1234, "step": 553400 }, { "epoch": 71.55785391079509, "grad_norm": 0.9564764499664307, "learning_rate": 0.001, "loss": 2.1423, "step": 553500 }, { "epoch": 71.57078215901745, "grad_norm": 1.093326210975647, "learning_rate": 0.001, "loss": 2.1489, "step": 553600 }, { "epoch": 71.58371040723982, "grad_norm": 1.258183479309082, "learning_rate": 0.001, "loss": 2.1256, "step": 553700 }, { "epoch": 71.59663865546219, "grad_norm": 1.0460962057113647, "learning_rate": 0.001, "loss": 2.1186, "step": 553800 }, { "epoch": 71.60956690368455, "grad_norm": 1.1621341705322266, "learning_rate": 0.001, "loss": 2.1416, "step": 553900 }, { "epoch": 71.62249515190692, "grad_norm": 1.153882622718811, "learning_rate": 0.001, "loss": 2.1366, "step": 554000 }, { "epoch": 71.63542340012928, "grad_norm": 1.1011879444122314, "learning_rate": 0.001, "loss": 2.1532, "step": 554100 }, { "epoch": 71.64835164835165, "grad_norm": 1.1541551351547241, "learning_rate": 0.001, "loss": 2.1448, "step": 554200 }, { "epoch": 71.66127989657402, "grad_norm": 0.8999978303909302, "learning_rate": 0.001, "loss": 2.136, "step": 554300 }, { "epoch": 71.67420814479638, "grad_norm": 1.264527440071106, "learning_rate": 0.001, "loss": 2.1517, "step": 554400 }, { "epoch": 71.68713639301875, "grad_norm": 2.278714418411255, "learning_rate": 0.001, "loss": 2.15, "step": 554500 }, { "epoch": 71.70006464124111, "grad_norm": 1.1641656160354614, "learning_rate": 0.001, "loss": 2.147, "step": 554600 }, { "epoch": 71.71299288946348, "grad_norm": 1.995112657546997, "learning_rate": 0.001, "loss": 2.1284, "step": 554700 }, { "epoch": 71.72592113768584, "grad_norm": 0.9233253598213196, "learning_rate": 0.001, "loss": 2.1475, "step": 554800 }, { "epoch": 71.73884938590821, "grad_norm": 0.8999326825141907, "learning_rate": 0.001, "loss": 2.1638, "step": 554900 }, { "epoch": 71.75177763413058, "grad_norm": 1.0485183000564575, "learning_rate": 0.001, "loss": 2.1405, "step": 555000 }, { "epoch": 71.76470588235294, "grad_norm": 1.2253332138061523, "learning_rate": 0.001, "loss": 2.1475, "step": 555100 }, { "epoch": 71.77763413057531, "grad_norm": 1.1286706924438477, "learning_rate": 0.001, "loss": 2.1712, "step": 555200 }, { "epoch": 71.79056237879767, "grad_norm": 1.0209605693817139, "learning_rate": 0.001, "loss": 2.1575, "step": 555300 }, { "epoch": 71.80349062702004, "grad_norm": 0.8287493586540222, "learning_rate": 0.001, "loss": 2.1544, "step": 555400 }, { "epoch": 71.8164188752424, "grad_norm": 0.904281497001648, "learning_rate": 0.001, "loss": 2.1464, "step": 555500 }, { "epoch": 71.82934712346477, "grad_norm": 1.0225474834442139, "learning_rate": 0.001, "loss": 2.1593, "step": 555600 }, { "epoch": 71.84227537168714, "grad_norm": 1.0868526697158813, "learning_rate": 0.001, "loss": 2.1732, "step": 555700 }, { "epoch": 71.8552036199095, "grad_norm": 2.1058590412139893, "learning_rate": 0.001, "loss": 2.141, "step": 555800 }, { "epoch": 71.86813186813187, "grad_norm": 1.0110657215118408, "learning_rate": 0.001, "loss": 2.165, "step": 555900 }, { "epoch": 71.88106011635423, "grad_norm": 1.0757874250411987, "learning_rate": 0.001, "loss": 2.1452, "step": 556000 }, { "epoch": 71.8939883645766, "grad_norm": 1.4223694801330566, "learning_rate": 0.001, "loss": 2.1641, "step": 556100 }, { "epoch": 71.90691661279897, "grad_norm": 1.1082870960235596, "learning_rate": 0.001, "loss": 2.1668, "step": 556200 }, { "epoch": 71.91984486102133, "grad_norm": 0.9250260591506958, "learning_rate": 0.001, "loss": 2.1434, "step": 556300 }, { "epoch": 71.9327731092437, "grad_norm": 0.9968329668045044, "learning_rate": 0.001, "loss": 2.1888, "step": 556400 }, { "epoch": 71.94570135746606, "grad_norm": 1.2923187017440796, "learning_rate": 0.001, "loss": 2.1553, "step": 556500 }, { "epoch": 71.95862960568843, "grad_norm": 0.8384433388710022, "learning_rate": 0.001, "loss": 2.1733, "step": 556600 }, { "epoch": 71.9715578539108, "grad_norm": 1.2580461502075195, "learning_rate": 0.001, "loss": 2.1589, "step": 556700 }, { "epoch": 71.98448610213316, "grad_norm": 0.9599249958992004, "learning_rate": 0.001, "loss": 2.1664, "step": 556800 }, { "epoch": 71.99741435035553, "grad_norm": 1.0446710586547852, "learning_rate": 0.001, "loss": 2.1612, "step": 556900 }, { "epoch": 72.01034259857789, "grad_norm": 1.1401991844177246, "learning_rate": 0.001, "loss": 2.0914, "step": 557000 }, { "epoch": 72.02327084680026, "grad_norm": 1.062974214553833, "learning_rate": 0.001, "loss": 2.0675, "step": 557100 }, { "epoch": 72.03619909502262, "grad_norm": 0.9462392330169678, "learning_rate": 0.001, "loss": 2.0667, "step": 557200 }, { "epoch": 72.04912734324499, "grad_norm": 1.2854399681091309, "learning_rate": 0.001, "loss": 2.0829, "step": 557300 }, { "epoch": 72.06205559146736, "grad_norm": 0.920536994934082, "learning_rate": 0.001, "loss": 2.088, "step": 557400 }, { "epoch": 72.07498383968972, "grad_norm": 0.7749442458152771, "learning_rate": 0.001, "loss": 2.0879, "step": 557500 }, { "epoch": 72.08791208791209, "grad_norm": 2.1788570880889893, "learning_rate": 0.001, "loss": 2.0984, "step": 557600 }, { "epoch": 72.10084033613445, "grad_norm": 0.8901124000549316, "learning_rate": 0.001, "loss": 2.0978, "step": 557700 }, { "epoch": 72.11376858435682, "grad_norm": 1.38504958152771, "learning_rate": 0.001, "loss": 2.0928, "step": 557800 }, { "epoch": 72.12669683257919, "grad_norm": 1.2901748418807983, "learning_rate": 0.001, "loss": 2.0899, "step": 557900 }, { "epoch": 72.13962508080155, "grad_norm": 1.6831068992614746, "learning_rate": 0.001, "loss": 2.0906, "step": 558000 }, { "epoch": 72.15255332902392, "grad_norm": 1.23569917678833, "learning_rate": 0.001, "loss": 2.1123, "step": 558100 }, { "epoch": 72.16548157724628, "grad_norm": 1.4691262245178223, "learning_rate": 0.001, "loss": 2.0868, "step": 558200 }, { "epoch": 72.17840982546865, "grad_norm": 1.0027965307235718, "learning_rate": 0.001, "loss": 2.1088, "step": 558300 }, { "epoch": 72.19133807369101, "grad_norm": 0.9447149038314819, "learning_rate": 0.001, "loss": 2.0968, "step": 558400 }, { "epoch": 72.20426632191338, "grad_norm": 1.224266767501831, "learning_rate": 0.001, "loss": 2.0952, "step": 558500 }, { "epoch": 72.21719457013575, "grad_norm": 2.3793227672576904, "learning_rate": 0.001, "loss": 2.1129, "step": 558600 }, { "epoch": 72.23012281835811, "grad_norm": 0.9786816239356995, "learning_rate": 0.001, "loss": 2.0968, "step": 558700 }, { "epoch": 72.24305106658048, "grad_norm": 1.4405848979949951, "learning_rate": 0.001, "loss": 2.1025, "step": 558800 }, { "epoch": 72.25597931480284, "grad_norm": 0.859796404838562, "learning_rate": 0.001, "loss": 2.1128, "step": 558900 }, { "epoch": 72.26890756302521, "grad_norm": 0.9090040922164917, "learning_rate": 0.001, "loss": 2.1095, "step": 559000 }, { "epoch": 72.28183581124757, "grad_norm": 0.9780765771865845, "learning_rate": 0.001, "loss": 2.0992, "step": 559100 }, { "epoch": 72.29476405946994, "grad_norm": 1.322144627571106, "learning_rate": 0.001, "loss": 2.0992, "step": 559200 }, { "epoch": 72.3076923076923, "grad_norm": 1.2614892721176147, "learning_rate": 0.001, "loss": 2.092, "step": 559300 }, { "epoch": 72.32062055591467, "grad_norm": 1.1759542226791382, "learning_rate": 0.001, "loss": 2.103, "step": 559400 }, { "epoch": 72.33354880413704, "grad_norm": 1.014059066772461, "learning_rate": 0.001, "loss": 2.1153, "step": 559500 }, { "epoch": 72.3464770523594, "grad_norm": 0.9432125091552734, "learning_rate": 0.001, "loss": 2.111, "step": 559600 }, { "epoch": 72.35940530058177, "grad_norm": 1.3194524049758911, "learning_rate": 0.001, "loss": 2.1156, "step": 559700 }, { "epoch": 72.37233354880414, "grad_norm": 1.1992740631103516, "learning_rate": 0.001, "loss": 2.145, "step": 559800 }, { "epoch": 72.3852617970265, "grad_norm": 1.034371018409729, "learning_rate": 0.001, "loss": 2.1087, "step": 559900 }, { "epoch": 72.39819004524887, "grad_norm": 1.1323974132537842, "learning_rate": 0.001, "loss": 2.1246, "step": 560000 }, { "epoch": 72.41111829347123, "grad_norm": 3.1140713691711426, "learning_rate": 0.001, "loss": 2.1388, "step": 560100 }, { "epoch": 72.4240465416936, "grad_norm": 1.3464230298995972, "learning_rate": 0.001, "loss": 2.1425, "step": 560200 }, { "epoch": 72.43697478991596, "grad_norm": 1.2302809953689575, "learning_rate": 0.001, "loss": 2.1514, "step": 560300 }, { "epoch": 72.44990303813833, "grad_norm": 6.6978349685668945, "learning_rate": 0.001, "loss": 2.124, "step": 560400 }, { "epoch": 72.4628312863607, "grad_norm": 1.0040379762649536, "learning_rate": 0.001, "loss": 2.1255, "step": 560500 }, { "epoch": 72.47575953458306, "grad_norm": 1.2068132162094116, "learning_rate": 0.001, "loss": 2.1435, "step": 560600 }, { "epoch": 72.48868778280543, "grad_norm": 1.204702615737915, "learning_rate": 0.001, "loss": 2.1319, "step": 560700 }, { "epoch": 72.5016160310278, "grad_norm": 1.0454524755477905, "learning_rate": 0.001, "loss": 2.1354, "step": 560800 }, { "epoch": 72.51454427925016, "grad_norm": 0.9419769644737244, "learning_rate": 0.001, "loss": 2.1293, "step": 560900 }, { "epoch": 72.52747252747253, "grad_norm": 4.191535949707031, "learning_rate": 0.001, "loss": 2.1359, "step": 561000 }, { "epoch": 72.54040077569489, "grad_norm": 0.9613664746284485, "learning_rate": 0.001, "loss": 2.1087, "step": 561100 }, { "epoch": 72.55332902391726, "grad_norm": 1.9550375938415527, "learning_rate": 0.001, "loss": 2.1112, "step": 561200 }, { "epoch": 72.56625727213962, "grad_norm": 2.050764799118042, "learning_rate": 0.001, "loss": 2.1248, "step": 561300 }, { "epoch": 72.57918552036199, "grad_norm": 1.228751301765442, "learning_rate": 0.001, "loss": 2.1226, "step": 561400 }, { "epoch": 72.59211376858435, "grad_norm": 1.6104016304016113, "learning_rate": 0.001, "loss": 2.1325, "step": 561500 }, { "epoch": 72.60504201680672, "grad_norm": 1.0387217998504639, "learning_rate": 0.001, "loss": 2.12, "step": 561600 }, { "epoch": 72.61797026502909, "grad_norm": 1.1445481777191162, "learning_rate": 0.001, "loss": 2.1548, "step": 561700 }, { "epoch": 72.63089851325145, "grad_norm": 1.693646788597107, "learning_rate": 0.001, "loss": 2.1346, "step": 561800 }, { "epoch": 72.64382676147382, "grad_norm": 0.9186107516288757, "learning_rate": 0.001, "loss": 2.1514, "step": 561900 }, { "epoch": 72.65675500969618, "grad_norm": 1.2969237565994263, "learning_rate": 0.001, "loss": 2.1232, "step": 562000 }, { "epoch": 72.66968325791855, "grad_norm": 1.2149701118469238, "learning_rate": 0.001, "loss": 2.1349, "step": 562100 }, { "epoch": 72.68261150614092, "grad_norm": 0.9348363280296326, "learning_rate": 0.001, "loss": 2.1406, "step": 562200 }, { "epoch": 72.69553975436328, "grad_norm": 10.047039031982422, "learning_rate": 0.001, "loss": 2.1371, "step": 562300 }, { "epoch": 72.70846800258565, "grad_norm": 1.7773633003234863, "learning_rate": 0.001, "loss": 2.1193, "step": 562400 }, { "epoch": 72.72139625080801, "grad_norm": 0.9139713048934937, "learning_rate": 0.001, "loss": 2.1437, "step": 562500 }, { "epoch": 72.73432449903038, "grad_norm": 1.017425298690796, "learning_rate": 0.001, "loss": 2.1336, "step": 562600 }, { "epoch": 72.74725274725274, "grad_norm": 1.1632078886032104, "learning_rate": 0.001, "loss": 2.1457, "step": 562700 }, { "epoch": 72.76018099547511, "grad_norm": 1.332291603088379, "learning_rate": 0.001, "loss": 2.1511, "step": 562800 }, { "epoch": 72.77310924369748, "grad_norm": 0.976431667804718, "learning_rate": 0.001, "loss": 2.136, "step": 562900 }, { "epoch": 72.78603749191984, "grad_norm": 1.4190832376480103, "learning_rate": 0.001, "loss": 2.1643, "step": 563000 }, { "epoch": 72.79896574014221, "grad_norm": 1.3821301460266113, "learning_rate": 0.001, "loss": 2.1495, "step": 563100 }, { "epoch": 72.81189398836457, "grad_norm": 1.0280076265335083, "learning_rate": 0.001, "loss": 2.1385, "step": 563200 }, { "epoch": 72.82482223658694, "grad_norm": 1.2989007234573364, "learning_rate": 0.001, "loss": 2.1532, "step": 563300 }, { "epoch": 72.8377504848093, "grad_norm": 1.865086555480957, "learning_rate": 0.001, "loss": 2.1541, "step": 563400 }, { "epoch": 72.85067873303167, "grad_norm": 1.0935485363006592, "learning_rate": 0.001, "loss": 2.1431, "step": 563500 }, { "epoch": 72.86360698125404, "grad_norm": 2.0625874996185303, "learning_rate": 0.001, "loss": 2.1485, "step": 563600 }, { "epoch": 72.8765352294764, "grad_norm": 0.9531559348106384, "learning_rate": 0.001, "loss": 2.1732, "step": 563700 }, { "epoch": 72.88946347769877, "grad_norm": 1.257267713546753, "learning_rate": 0.001, "loss": 2.1477, "step": 563800 }, { "epoch": 72.90239172592113, "grad_norm": 1.2019343376159668, "learning_rate": 0.001, "loss": 2.1672, "step": 563900 }, { "epoch": 72.9153199741435, "grad_norm": 7.506185054779053, "learning_rate": 0.001, "loss": 2.1466, "step": 564000 }, { "epoch": 72.92824822236587, "grad_norm": 0.9139539003372192, "learning_rate": 0.001, "loss": 2.1529, "step": 564100 }, { "epoch": 72.94117647058823, "grad_norm": 1.0721182823181152, "learning_rate": 0.001, "loss": 2.1428, "step": 564200 }, { "epoch": 72.9541047188106, "grad_norm": 0.9517807364463806, "learning_rate": 0.001, "loss": 2.1397, "step": 564300 }, { "epoch": 72.96703296703296, "grad_norm": 1.1610692739486694, "learning_rate": 0.001, "loss": 2.1767, "step": 564400 }, { "epoch": 72.97996121525533, "grad_norm": 1.1008822917938232, "learning_rate": 0.001, "loss": 2.1623, "step": 564500 }, { "epoch": 72.9928894634777, "grad_norm": 0.948501467704773, "learning_rate": 0.001, "loss": 2.1413, "step": 564600 }, { "epoch": 73.00581771170006, "grad_norm": 1.0339123010635376, "learning_rate": 0.001, "loss": 2.1096, "step": 564700 }, { "epoch": 73.01874595992243, "grad_norm": 1.1748067140579224, "learning_rate": 0.001, "loss": 2.0731, "step": 564800 }, { "epoch": 73.03167420814479, "grad_norm": 1.1372655630111694, "learning_rate": 0.001, "loss": 2.0742, "step": 564900 }, { "epoch": 73.04460245636716, "grad_norm": 1.1191922426223755, "learning_rate": 0.001, "loss": 2.0762, "step": 565000 }, { "epoch": 73.05753070458952, "grad_norm": 1.5013914108276367, "learning_rate": 0.001, "loss": 2.0949, "step": 565100 }, { "epoch": 73.07045895281189, "grad_norm": 1.0626291036605835, "learning_rate": 0.001, "loss": 2.0721, "step": 565200 }, { "epoch": 73.08338720103426, "grad_norm": 1.4007818698883057, "learning_rate": 0.001, "loss": 2.0716, "step": 565300 }, { "epoch": 73.09631544925662, "grad_norm": 6.134360313415527, "learning_rate": 0.001, "loss": 2.0802, "step": 565400 }, { "epoch": 73.10924369747899, "grad_norm": 1.2186590433120728, "learning_rate": 0.001, "loss": 2.0866, "step": 565500 }, { "epoch": 73.12217194570135, "grad_norm": 1.0338085889816284, "learning_rate": 0.001, "loss": 2.088, "step": 565600 }, { "epoch": 73.13510019392372, "grad_norm": 1.2162423133850098, "learning_rate": 0.001, "loss": 2.069, "step": 565700 }, { "epoch": 73.14802844214609, "grad_norm": 1.3070565462112427, "learning_rate": 0.001, "loss": 2.0791, "step": 565800 }, { "epoch": 73.16095669036845, "grad_norm": 2.0195109844207764, "learning_rate": 0.001, "loss": 2.0918, "step": 565900 }, { "epoch": 73.17388493859082, "grad_norm": 1.2154432535171509, "learning_rate": 0.001, "loss": 2.0825, "step": 566000 }, { "epoch": 73.18681318681318, "grad_norm": 1.544424295425415, "learning_rate": 0.001, "loss": 2.0892, "step": 566100 }, { "epoch": 73.19974143503555, "grad_norm": 1.3402976989746094, "learning_rate": 0.001, "loss": 2.0755, "step": 566200 }, { "epoch": 73.21266968325791, "grad_norm": 2.276616334915161, "learning_rate": 0.001, "loss": 2.1163, "step": 566300 }, { "epoch": 73.22559793148028, "grad_norm": 0.9849509000778198, "learning_rate": 0.001, "loss": 2.0842, "step": 566400 }, { "epoch": 73.23852617970265, "grad_norm": 1.1996890306472778, "learning_rate": 0.001, "loss": 2.0931, "step": 566500 }, { "epoch": 73.25145442792501, "grad_norm": 1.2363929748535156, "learning_rate": 0.001, "loss": 2.099, "step": 566600 }, { "epoch": 73.26438267614738, "grad_norm": 1.355947494506836, "learning_rate": 0.001, "loss": 2.0904, "step": 566700 }, { "epoch": 73.27731092436974, "grad_norm": 1.0491187572479248, "learning_rate": 0.001, "loss": 2.0944, "step": 566800 }, { "epoch": 73.29023917259211, "grad_norm": 1.2863003015518188, "learning_rate": 0.001, "loss": 2.1109, "step": 566900 }, { "epoch": 73.30316742081448, "grad_norm": 6.129720687866211, "learning_rate": 0.001, "loss": 2.0996, "step": 567000 }, { "epoch": 73.31609566903684, "grad_norm": 1.1352181434631348, "learning_rate": 0.001, "loss": 2.1026, "step": 567100 }, { "epoch": 73.3290239172592, "grad_norm": 1.2637441158294678, "learning_rate": 0.001, "loss": 2.1014, "step": 567200 }, { "epoch": 73.34195216548157, "grad_norm": 1.076370358467102, "learning_rate": 0.001, "loss": 2.0942, "step": 567300 }, { "epoch": 73.35488041370394, "grad_norm": 1.5313063859939575, "learning_rate": 0.001, "loss": 2.1104, "step": 567400 }, { "epoch": 73.3678086619263, "grad_norm": 1.216633915901184, "learning_rate": 0.001, "loss": 2.1154, "step": 567500 }, { "epoch": 73.38073691014867, "grad_norm": 1.2188794612884521, "learning_rate": 0.001, "loss": 2.1013, "step": 567600 }, { "epoch": 73.39366515837104, "grad_norm": 1.2171646356582642, "learning_rate": 0.001, "loss": 2.1224, "step": 567700 }, { "epoch": 73.4065934065934, "grad_norm": 1.273604393005371, "learning_rate": 0.001, "loss": 2.1004, "step": 567800 }, { "epoch": 73.41952165481577, "grad_norm": 3.049173593521118, "learning_rate": 0.001, "loss": 2.112, "step": 567900 }, { "epoch": 73.43244990303813, "grad_norm": 1.2191438674926758, "learning_rate": 0.001, "loss": 2.1211, "step": 568000 }, { "epoch": 73.4453781512605, "grad_norm": 2.3703863620758057, "learning_rate": 0.001, "loss": 2.1087, "step": 568100 }, { "epoch": 73.45830639948286, "grad_norm": 1.2809268236160278, "learning_rate": 0.001, "loss": 2.1065, "step": 568200 }, { "epoch": 73.47123464770523, "grad_norm": 1.1735992431640625, "learning_rate": 0.001, "loss": 2.1179, "step": 568300 }, { "epoch": 73.4841628959276, "grad_norm": 1.1977012157440186, "learning_rate": 0.001, "loss": 2.1104, "step": 568400 }, { "epoch": 73.49709114414996, "grad_norm": 1.5385595560073853, "learning_rate": 0.001, "loss": 2.1151, "step": 568500 }, { "epoch": 73.51001939237233, "grad_norm": 1.2661529779434204, "learning_rate": 0.001, "loss": 2.1126, "step": 568600 }, { "epoch": 73.5229476405947, "grad_norm": 1.7393507957458496, "learning_rate": 0.001, "loss": 2.1381, "step": 568700 }, { "epoch": 73.53587588881706, "grad_norm": 1.162866473197937, "learning_rate": 0.001, "loss": 2.1327, "step": 568800 }, { "epoch": 73.54880413703943, "grad_norm": 1.3442237377166748, "learning_rate": 0.001, "loss": 2.1271, "step": 568900 }, { "epoch": 73.56173238526179, "grad_norm": 1.230814814567566, "learning_rate": 0.001, "loss": 2.1313, "step": 569000 }, { "epoch": 73.57466063348416, "grad_norm": 4.1122050285339355, "learning_rate": 0.001, "loss": 2.1396, "step": 569100 }, { "epoch": 73.58758888170652, "grad_norm": 1.2743844985961914, "learning_rate": 0.001, "loss": 2.1491, "step": 569200 }, { "epoch": 73.60051712992889, "grad_norm": 1.437061071395874, "learning_rate": 0.001, "loss": 2.1449, "step": 569300 }, { "epoch": 73.61344537815125, "grad_norm": 1.5854309797286987, "learning_rate": 0.001, "loss": 2.1337, "step": 569400 }, { "epoch": 73.62637362637362, "grad_norm": 1.7266494035720825, "learning_rate": 0.001, "loss": 2.1287, "step": 569500 }, { "epoch": 73.63930187459599, "grad_norm": 1.9550284147262573, "learning_rate": 0.001, "loss": 2.124, "step": 569600 }, { "epoch": 73.65223012281835, "grad_norm": 1.4248110055923462, "learning_rate": 0.001, "loss": 2.1479, "step": 569700 }, { "epoch": 73.66515837104072, "grad_norm": 1.2801576852798462, "learning_rate": 0.001, "loss": 2.1182, "step": 569800 }, { "epoch": 73.67808661926308, "grad_norm": 1.3546159267425537, "learning_rate": 0.001, "loss": 2.1246, "step": 569900 }, { "epoch": 73.69101486748545, "grad_norm": 4.540140628814697, "learning_rate": 0.001, "loss": 2.1473, "step": 570000 }, { "epoch": 73.70394311570782, "grad_norm": 1.105661153793335, "learning_rate": 0.001, "loss": 2.1317, "step": 570100 }, { "epoch": 73.71687136393018, "grad_norm": 1.6137206554412842, "learning_rate": 0.001, "loss": 2.1523, "step": 570200 }, { "epoch": 73.72979961215255, "grad_norm": 1.047258734703064, "learning_rate": 0.001, "loss": 2.1498, "step": 570300 }, { "epoch": 73.74272786037491, "grad_norm": 11.42508316040039, "learning_rate": 0.001, "loss": 2.1251, "step": 570400 }, { "epoch": 73.75565610859728, "grad_norm": 1.5549843311309814, "learning_rate": 0.001, "loss": 2.1406, "step": 570500 }, { "epoch": 73.76858435681964, "grad_norm": 1.2758424282073975, "learning_rate": 0.001, "loss": 2.1444, "step": 570600 }, { "epoch": 73.78151260504201, "grad_norm": 1.1074119806289673, "learning_rate": 0.001, "loss": 2.1353, "step": 570700 }, { "epoch": 73.79444085326438, "grad_norm": 1.187036395072937, "learning_rate": 0.001, "loss": 2.133, "step": 570800 }, { "epoch": 73.80736910148674, "grad_norm": 1.185246229171753, "learning_rate": 0.001, "loss": 2.1473, "step": 570900 }, { "epoch": 73.82029734970911, "grad_norm": 1.0759228467941284, "learning_rate": 0.001, "loss": 2.15, "step": 571000 }, { "epoch": 73.83322559793147, "grad_norm": 1.047889232635498, "learning_rate": 0.001, "loss": 2.1397, "step": 571100 }, { "epoch": 73.84615384615384, "grad_norm": 1.2017611265182495, "learning_rate": 0.001, "loss": 2.136, "step": 571200 }, { "epoch": 73.8590820943762, "grad_norm": 1.349411964416504, "learning_rate": 0.001, "loss": 2.1446, "step": 571300 }, { "epoch": 73.87201034259857, "grad_norm": 1.5531861782073975, "learning_rate": 0.001, "loss": 2.1636, "step": 571400 }, { "epoch": 73.88493859082094, "grad_norm": 1.650673747062683, "learning_rate": 0.001, "loss": 2.1457, "step": 571500 }, { "epoch": 73.8978668390433, "grad_norm": 5.4695658683776855, "learning_rate": 0.001, "loss": 2.1429, "step": 571600 }, { "epoch": 73.91079508726567, "grad_norm": 1.453227162361145, "learning_rate": 0.001, "loss": 2.1558, "step": 571700 }, { "epoch": 73.92372333548803, "grad_norm": 2.033245086669922, "learning_rate": 0.001, "loss": 2.1597, "step": 571800 }, { "epoch": 73.9366515837104, "grad_norm": 1.1272404193878174, "learning_rate": 0.001, "loss": 2.1651, "step": 571900 }, { "epoch": 73.94957983193277, "grad_norm": 1.2463867664337158, "learning_rate": 0.001, "loss": 2.1424, "step": 572000 }, { "epoch": 73.96250808015513, "grad_norm": 1.1899054050445557, "learning_rate": 0.001, "loss": 2.138, "step": 572100 }, { "epoch": 73.9754363283775, "grad_norm": 1.8087245225906372, "learning_rate": 0.001, "loss": 2.1648, "step": 572200 }, { "epoch": 73.98836457659988, "grad_norm": 1.119019627571106, "learning_rate": 0.001, "loss": 2.1582, "step": 572300 }, { "epoch": 74.00129282482224, "grad_norm": 1.3610165119171143, "learning_rate": 0.001, "loss": 2.1362, "step": 572400 }, { "epoch": 74.01422107304461, "grad_norm": 1.1433228254318237, "learning_rate": 0.001, "loss": 2.055, "step": 572500 }, { "epoch": 74.02714932126698, "grad_norm": 0.8235462307929993, "learning_rate": 0.001, "loss": 2.0679, "step": 572600 }, { "epoch": 74.04007756948934, "grad_norm": 0.9924898743629456, "learning_rate": 0.001, "loss": 2.0811, "step": 572700 }, { "epoch": 74.0530058177117, "grad_norm": 1.117003321647644, "learning_rate": 0.001, "loss": 2.0788, "step": 572800 }, { "epoch": 74.06593406593407, "grad_norm": 1.2882182598114014, "learning_rate": 0.001, "loss": 2.0746, "step": 572900 }, { "epoch": 74.07886231415644, "grad_norm": 1.1880114078521729, "learning_rate": 0.001, "loss": 2.0749, "step": 573000 }, { "epoch": 74.0917905623788, "grad_norm": 2.1529510021209717, "learning_rate": 0.001, "loss": 2.0799, "step": 573100 }, { "epoch": 74.10471881060117, "grad_norm": 0.9342865347862244, "learning_rate": 0.001, "loss": 2.065, "step": 573200 }, { "epoch": 74.11764705882354, "grad_norm": 1.061259388923645, "learning_rate": 0.001, "loss": 2.0817, "step": 573300 }, { "epoch": 74.1305753070459, "grad_norm": 9.687249183654785, "learning_rate": 0.001, "loss": 2.0942, "step": 573400 }, { "epoch": 74.14350355526827, "grad_norm": 0.9767224192619324, "learning_rate": 0.001, "loss": 2.0831, "step": 573500 }, { "epoch": 74.15643180349063, "grad_norm": 0.9698808193206787, "learning_rate": 0.001, "loss": 2.0741, "step": 573600 }, { "epoch": 74.169360051713, "grad_norm": 1.098283052444458, "learning_rate": 0.001, "loss": 2.1103, "step": 573700 }, { "epoch": 74.18228829993537, "grad_norm": 1.8990471363067627, "learning_rate": 0.001, "loss": 2.0919, "step": 573800 }, { "epoch": 74.19521654815773, "grad_norm": 0.9321915507316589, "learning_rate": 0.001, "loss": 2.0941, "step": 573900 }, { "epoch": 74.2081447963801, "grad_norm": 1.3625496625900269, "learning_rate": 0.001, "loss": 2.0822, "step": 574000 }, { "epoch": 74.22107304460246, "grad_norm": 1.5483901500701904, "learning_rate": 0.001, "loss": 2.105, "step": 574100 }, { "epoch": 74.23400129282483, "grad_norm": 0.7791236042976379, "learning_rate": 0.001, "loss": 2.0886, "step": 574200 }, { "epoch": 74.2469295410472, "grad_norm": 1.2281697988510132, "learning_rate": 0.001, "loss": 2.0868, "step": 574300 }, { "epoch": 74.25985778926956, "grad_norm": 92.4283676147461, "learning_rate": 0.001, "loss": 2.0935, "step": 574400 }, { "epoch": 74.27278603749193, "grad_norm": 1.0314092636108398, "learning_rate": 0.001, "loss": 2.1093, "step": 574500 }, { "epoch": 74.28571428571429, "grad_norm": 29.90082359313965, "learning_rate": 0.001, "loss": 2.1112, "step": 574600 }, { "epoch": 74.29864253393666, "grad_norm": 2.745425224304199, "learning_rate": 0.001, "loss": 2.103, "step": 574700 }, { "epoch": 74.31157078215902, "grad_norm": 0.9696181416511536, "learning_rate": 0.001, "loss": 2.112, "step": 574800 }, { "epoch": 74.32449903038139, "grad_norm": 22.40960121154785, "learning_rate": 0.001, "loss": 2.1048, "step": 574900 }, { "epoch": 74.33742727860376, "grad_norm": 1.3068503141403198, "learning_rate": 0.001, "loss": 2.0906, "step": 575000 }, { "epoch": 74.35035552682612, "grad_norm": 1.2374604940414429, "learning_rate": 0.001, "loss": 2.1243, "step": 575100 }, { "epoch": 74.36328377504849, "grad_norm": 1.0959529876708984, "learning_rate": 0.001, "loss": 2.0859, "step": 575200 }, { "epoch": 74.37621202327085, "grad_norm": 0.9568427205085754, "learning_rate": 0.001, "loss": 2.1207, "step": 575300 }, { "epoch": 74.38914027149322, "grad_norm": 1.4865469932556152, "learning_rate": 0.001, "loss": 2.0991, "step": 575400 }, { "epoch": 74.40206851971558, "grad_norm": 1.2161295413970947, "learning_rate": 0.001, "loss": 2.1173, "step": 575500 }, { "epoch": 74.41499676793795, "grad_norm": 1.3550251722335815, "learning_rate": 0.001, "loss": 2.1169, "step": 575600 }, { "epoch": 74.42792501616032, "grad_norm": 1.21518075466156, "learning_rate": 0.001, "loss": 2.1206, "step": 575700 }, { "epoch": 74.44085326438268, "grad_norm": 0.896406888961792, "learning_rate": 0.001, "loss": 2.1026, "step": 575800 }, { "epoch": 74.45378151260505, "grad_norm": 1.1302937269210815, "learning_rate": 0.001, "loss": 2.1118, "step": 575900 }, { "epoch": 74.46670976082741, "grad_norm": 1.0166077613830566, "learning_rate": 0.001, "loss": 2.1183, "step": 576000 }, { "epoch": 74.47963800904978, "grad_norm": 4.97719669342041, "learning_rate": 0.001, "loss": 2.1152, "step": 576100 }, { "epoch": 74.49256625727214, "grad_norm": 0.8474728465080261, "learning_rate": 0.001, "loss": 2.1203, "step": 576200 }, { "epoch": 74.50549450549451, "grad_norm": 0.9487027525901794, "learning_rate": 0.001, "loss": 2.1148, "step": 576300 }, { "epoch": 74.51842275371688, "grad_norm": 0.8943815231323242, "learning_rate": 0.001, "loss": 2.1279, "step": 576400 }, { "epoch": 74.53135100193924, "grad_norm": 1.1287323236465454, "learning_rate": 0.001, "loss": 2.1317, "step": 576500 }, { "epoch": 74.54427925016161, "grad_norm": 1.2530797719955444, "learning_rate": 0.001, "loss": 2.1189, "step": 576600 }, { "epoch": 74.55720749838397, "grad_norm": 1.113512635231018, "learning_rate": 0.001, "loss": 2.1154, "step": 576700 }, { "epoch": 74.57013574660634, "grad_norm": 1.106650948524475, "learning_rate": 0.001, "loss": 2.1094, "step": 576800 }, { "epoch": 74.5830639948287, "grad_norm": 0.9562047123908997, "learning_rate": 0.001, "loss": 2.1328, "step": 576900 }, { "epoch": 74.59599224305107, "grad_norm": 1.1812150478363037, "learning_rate": 0.001, "loss": 2.1283, "step": 577000 }, { "epoch": 74.60892049127344, "grad_norm": 1.1948766708374023, "learning_rate": 0.001, "loss": 2.15, "step": 577100 }, { "epoch": 74.6218487394958, "grad_norm": 1.2485425472259521, "learning_rate": 0.001, "loss": 2.1314, "step": 577200 }, { "epoch": 74.63477698771817, "grad_norm": 1.3362343311309814, "learning_rate": 0.001, "loss": 2.1355, "step": 577300 }, { "epoch": 74.64770523594053, "grad_norm": 1.1249046325683594, "learning_rate": 0.001, "loss": 2.1305, "step": 577400 }, { "epoch": 74.6606334841629, "grad_norm": 0.8356468677520752, "learning_rate": 0.001, "loss": 2.1501, "step": 577500 }, { "epoch": 74.67356173238527, "grad_norm": 1.0051519870758057, "learning_rate": 0.001, "loss": 2.1215, "step": 577600 }, { "epoch": 74.68648998060763, "grad_norm": 1.2112200260162354, "learning_rate": 0.001, "loss": 2.1421, "step": 577700 }, { "epoch": 74.69941822883, "grad_norm": 0.9134469628334045, "learning_rate": 0.001, "loss": 2.1143, "step": 577800 }, { "epoch": 74.71234647705236, "grad_norm": 1.1797478199005127, "learning_rate": 0.001, "loss": 2.1326, "step": 577900 }, { "epoch": 74.72527472527473, "grad_norm": 1.0257935523986816, "learning_rate": 0.001, "loss": 2.1287, "step": 578000 }, { "epoch": 74.7382029734971, "grad_norm": 0.8779332041740417, "learning_rate": 0.001, "loss": 2.1253, "step": 578100 }, { "epoch": 74.75113122171946, "grad_norm": 1.089795708656311, "learning_rate": 0.001, "loss": 2.1236, "step": 578200 }, { "epoch": 74.76405946994183, "grad_norm": 1.2585700750350952, "learning_rate": 0.001, "loss": 2.1547, "step": 578300 }, { "epoch": 74.7769877181642, "grad_norm": 1.5943809747695923, "learning_rate": 0.001, "loss": 2.1511, "step": 578400 }, { "epoch": 74.78991596638656, "grad_norm": 1.5921075344085693, "learning_rate": 0.001, "loss": 2.1592, "step": 578500 }, { "epoch": 74.80284421460892, "grad_norm": 0.9812028408050537, "learning_rate": 0.001, "loss": 2.1437, "step": 578600 }, { "epoch": 74.81577246283129, "grad_norm": 0.9485569596290588, "learning_rate": 0.001, "loss": 2.1414, "step": 578700 }, { "epoch": 74.82870071105366, "grad_norm": 1.0818620920181274, "learning_rate": 0.001, "loss": 2.1486, "step": 578800 }, { "epoch": 74.84162895927602, "grad_norm": 1.7417978048324585, "learning_rate": 0.001, "loss": 2.1407, "step": 578900 }, { "epoch": 74.85455720749839, "grad_norm": 1.0371413230895996, "learning_rate": 0.001, "loss": 2.1587, "step": 579000 }, { "epoch": 74.86748545572075, "grad_norm": 1.0654118061065674, "learning_rate": 0.001, "loss": 2.1481, "step": 579100 }, { "epoch": 74.88041370394312, "grad_norm": 1.1893272399902344, "learning_rate": 0.001, "loss": 2.1441, "step": 579200 }, { "epoch": 74.89334195216549, "grad_norm": 1.5550764799118042, "learning_rate": 0.001, "loss": 2.1374, "step": 579300 }, { "epoch": 74.90627020038785, "grad_norm": 1.4313335418701172, "learning_rate": 0.001, "loss": 2.1332, "step": 579400 }, { "epoch": 74.91919844861022, "grad_norm": 0.8447491526603699, "learning_rate": 0.001, "loss": 2.1457, "step": 579500 }, { "epoch": 74.93212669683258, "grad_norm": 1.0229320526123047, "learning_rate": 0.001, "loss": 2.1589, "step": 579600 }, { "epoch": 74.94505494505495, "grad_norm": 1.6585917472839355, "learning_rate": 0.001, "loss": 2.1643, "step": 579700 }, { "epoch": 74.95798319327731, "grad_norm": 1.2506765127182007, "learning_rate": 0.001, "loss": 2.1343, "step": 579800 }, { "epoch": 74.97091144149968, "grad_norm": 1.2326463460922241, "learning_rate": 0.001, "loss": 2.1476, "step": 579900 }, { "epoch": 74.98383968972205, "grad_norm": 49.257781982421875, "learning_rate": 0.001, "loss": 2.1439, "step": 580000 }, { "epoch": 74.99676793794441, "grad_norm": 1.5001507997512817, "learning_rate": 0.001, "loss": 2.1735, "step": 580100 }, { "epoch": 75.00969618616678, "grad_norm": 1.37468421459198, "learning_rate": 0.001, "loss": 2.0826, "step": 580200 }, { "epoch": 75.02262443438914, "grad_norm": 1.0757628679275513, "learning_rate": 0.001, "loss": 2.0735, "step": 580300 }, { "epoch": 75.03555268261151, "grad_norm": 1.4391753673553467, "learning_rate": 0.001, "loss": 2.086, "step": 580400 }, { "epoch": 75.04848093083388, "grad_norm": 1.1036280393600464, "learning_rate": 0.001, "loss": 2.0807, "step": 580500 }, { "epoch": 75.06140917905624, "grad_norm": 1.1043651103973389, "learning_rate": 0.001, "loss": 2.0745, "step": 580600 }, { "epoch": 75.0743374272786, "grad_norm": 1.3035681247711182, "learning_rate": 0.001, "loss": 2.0553, "step": 580700 }, { "epoch": 75.08726567550097, "grad_norm": 2.0779714584350586, "learning_rate": 0.001, "loss": 2.0729, "step": 580800 }, { "epoch": 75.10019392372334, "grad_norm": 1.2401542663574219, "learning_rate": 0.001, "loss": 2.0867, "step": 580900 }, { "epoch": 75.1131221719457, "grad_norm": 1.2609033584594727, "learning_rate": 0.001, "loss": 2.0786, "step": 581000 }, { "epoch": 75.12605042016807, "grad_norm": 0.9589295983314514, "learning_rate": 0.001, "loss": 2.0689, "step": 581100 }, { "epoch": 75.13897866839044, "grad_norm": 1.059346318244934, "learning_rate": 0.001, "loss": 2.1019, "step": 581200 }, { "epoch": 75.1519069166128, "grad_norm": 1.5729444026947021, "learning_rate": 0.001, "loss": 2.0876, "step": 581300 }, { "epoch": 75.16483516483517, "grad_norm": 1.4271671772003174, "learning_rate": 0.001, "loss": 2.0686, "step": 581400 }, { "epoch": 75.17776341305753, "grad_norm": 1.059328317642212, "learning_rate": 0.001, "loss": 2.0828, "step": 581500 }, { "epoch": 75.1906916612799, "grad_norm": 0.9404968619346619, "learning_rate": 0.001, "loss": 2.091, "step": 581600 }, { "epoch": 75.20361990950227, "grad_norm": 10.330784797668457, "learning_rate": 0.001, "loss": 2.0816, "step": 581700 }, { "epoch": 75.21654815772463, "grad_norm": 0.8904867172241211, "learning_rate": 0.001, "loss": 2.0897, "step": 581800 }, { "epoch": 75.229476405947, "grad_norm": 1.1488709449768066, "learning_rate": 0.001, "loss": 2.0772, "step": 581900 }, { "epoch": 75.24240465416936, "grad_norm": 1.0855379104614258, "learning_rate": 0.001, "loss": 2.1052, "step": 582000 }, { "epoch": 75.25533290239173, "grad_norm": 0.9231694936752319, "learning_rate": 0.001, "loss": 2.1135, "step": 582100 }, { "epoch": 75.2682611506141, "grad_norm": 1.1221188306808472, "learning_rate": 0.001, "loss": 2.0938, "step": 582200 }, { "epoch": 75.28118939883646, "grad_norm": 1.217773675918579, "learning_rate": 0.001, "loss": 2.095, "step": 582300 }, { "epoch": 75.29411764705883, "grad_norm": 1.7115556001663208, "learning_rate": 0.001, "loss": 2.1073, "step": 582400 }, { "epoch": 75.30704589528119, "grad_norm": 1.0704418420791626, "learning_rate": 0.001, "loss": 2.0992, "step": 582500 }, { "epoch": 75.31997414350356, "grad_norm": 18.67177391052246, "learning_rate": 0.001, "loss": 2.1124, "step": 582600 }, { "epoch": 75.33290239172592, "grad_norm": 0.9513359069824219, "learning_rate": 0.001, "loss": 2.1021, "step": 582700 }, { "epoch": 75.34583063994829, "grad_norm": 0.9243494272232056, "learning_rate": 0.001, "loss": 2.1075, "step": 582800 }, { "epoch": 75.35875888817066, "grad_norm": 13.982529640197754, "learning_rate": 0.001, "loss": 2.0976, "step": 582900 }, { "epoch": 75.37168713639302, "grad_norm": 1.8407829999923706, "learning_rate": 0.001, "loss": 2.1246, "step": 583000 }, { "epoch": 75.38461538461539, "grad_norm": 1.0135447978973389, "learning_rate": 0.001, "loss": 2.1155, "step": 583100 }, { "epoch": 75.39754363283775, "grad_norm": 0.9418948888778687, "learning_rate": 0.001, "loss": 2.1134, "step": 583200 }, { "epoch": 75.41047188106012, "grad_norm": 0.8994348645210266, "learning_rate": 0.001, "loss": 2.108, "step": 583300 }, { "epoch": 75.42340012928248, "grad_norm": 0.8878253698348999, "learning_rate": 0.001, "loss": 2.1219, "step": 583400 }, { "epoch": 75.43632837750485, "grad_norm": 1.4622068405151367, "learning_rate": 0.001, "loss": 2.1174, "step": 583500 }, { "epoch": 75.44925662572722, "grad_norm": 0.9741876721382141, "learning_rate": 0.001, "loss": 2.1161, "step": 583600 }, { "epoch": 75.46218487394958, "grad_norm": 0.9613406658172607, "learning_rate": 0.001, "loss": 2.114, "step": 583700 }, { "epoch": 75.47511312217195, "grad_norm": 1.8128271102905273, "learning_rate": 0.001, "loss": 2.1126, "step": 583800 }, { "epoch": 75.48804137039431, "grad_norm": 0.8394350409507751, "learning_rate": 0.001, "loss": 2.131, "step": 583900 }, { "epoch": 75.50096961861668, "grad_norm": 1.1902787685394287, "learning_rate": 0.001, "loss": 2.1306, "step": 584000 }, { "epoch": 75.51389786683905, "grad_norm": 1.03947913646698, "learning_rate": 0.001, "loss": 2.1249, "step": 584100 }, { "epoch": 75.52682611506141, "grad_norm": 0.8957974910736084, "learning_rate": 0.001, "loss": 2.1203, "step": 584200 }, { "epoch": 75.53975436328378, "grad_norm": 1.7097294330596924, "learning_rate": 0.001, "loss": 2.1248, "step": 584300 }, { "epoch": 75.55268261150614, "grad_norm": 1.4282327890396118, "learning_rate": 0.001, "loss": 2.1098, "step": 584400 }, { "epoch": 75.56561085972851, "grad_norm": 1.0193862915039062, "learning_rate": 0.001, "loss": 2.0911, "step": 584500 }, { "epoch": 75.57853910795087, "grad_norm": 11.986799240112305, "learning_rate": 0.001, "loss": 2.1138, "step": 584600 }, { "epoch": 75.59146735617324, "grad_norm": 1.9967108964920044, "learning_rate": 0.001, "loss": 2.1225, "step": 584700 }, { "epoch": 75.6043956043956, "grad_norm": 1.3226114511489868, "learning_rate": 0.001, "loss": 2.1277, "step": 584800 }, { "epoch": 75.61732385261797, "grad_norm": 1.2120438814163208, "learning_rate": 0.001, "loss": 2.1216, "step": 584900 }, { "epoch": 75.63025210084034, "grad_norm": 1.5591319799423218, "learning_rate": 0.001, "loss": 2.1267, "step": 585000 }, { "epoch": 75.6431803490627, "grad_norm": 0.9652265310287476, "learning_rate": 0.001, "loss": 2.142, "step": 585100 }, { "epoch": 75.65610859728507, "grad_norm": 13.699159622192383, "learning_rate": 0.001, "loss": 2.1073, "step": 585200 }, { "epoch": 75.66903684550743, "grad_norm": 1.09867262840271, "learning_rate": 0.001, "loss": 2.1416, "step": 585300 }, { "epoch": 75.6819650937298, "grad_norm": 1.728212594985962, "learning_rate": 0.001, "loss": 2.1148, "step": 585400 }, { "epoch": 75.69489334195217, "grad_norm": 1.3049356937408447, "learning_rate": 0.001, "loss": 2.1265, "step": 585500 }, { "epoch": 75.70782159017453, "grad_norm": 1.3900107145309448, "learning_rate": 0.001, "loss": 2.139, "step": 585600 }, { "epoch": 75.7207498383969, "grad_norm": 0.9545176029205322, "learning_rate": 0.001, "loss": 2.15, "step": 585700 }, { "epoch": 75.73367808661926, "grad_norm": 1.8162274360656738, "learning_rate": 0.001, "loss": 2.1318, "step": 585800 }, { "epoch": 75.74660633484163, "grad_norm": 1.0262378454208374, "learning_rate": 0.001, "loss": 2.1565, "step": 585900 }, { "epoch": 75.759534583064, "grad_norm": 1.1393414735794067, "learning_rate": 0.001, "loss": 2.1242, "step": 586000 }, { "epoch": 75.77246283128636, "grad_norm": 1.271954894065857, "learning_rate": 0.001, "loss": 2.1413, "step": 586100 }, { "epoch": 75.78539107950873, "grad_norm": 1.287378191947937, "learning_rate": 0.001, "loss": 2.1289, "step": 586200 }, { "epoch": 75.7983193277311, "grad_norm": 1.2447324991226196, "learning_rate": 0.001, "loss": 2.1486, "step": 586300 }, { "epoch": 75.81124757595346, "grad_norm": 1.3204587697982788, "learning_rate": 0.001, "loss": 2.138, "step": 586400 }, { "epoch": 75.82417582417582, "grad_norm": 0.988107442855835, "learning_rate": 0.001, "loss": 2.1262, "step": 586500 }, { "epoch": 75.83710407239819, "grad_norm": 3.174212694168091, "learning_rate": 0.001, "loss": 2.1396, "step": 586600 }, { "epoch": 75.85003232062056, "grad_norm": 1.4991739988327026, "learning_rate": 0.001, "loss": 2.137, "step": 586700 }, { "epoch": 75.86296056884292, "grad_norm": 0.9447921514511108, "learning_rate": 0.001, "loss": 2.1445, "step": 586800 }, { "epoch": 75.87588881706529, "grad_norm": 1.1077138185501099, "learning_rate": 0.001, "loss": 2.1457, "step": 586900 }, { "epoch": 75.88881706528765, "grad_norm": 1.1207045316696167, "learning_rate": 0.001, "loss": 2.1448, "step": 587000 }, { "epoch": 75.90174531351002, "grad_norm": 0.9543654322624207, "learning_rate": 0.001, "loss": 2.1623, "step": 587100 }, { "epoch": 75.91467356173239, "grad_norm": 2.058452606201172, "learning_rate": 0.001, "loss": 2.1392, "step": 587200 }, { "epoch": 75.92760180995475, "grad_norm": 2.491140604019165, "learning_rate": 0.001, "loss": 2.1598, "step": 587300 }, { "epoch": 75.94053005817712, "grad_norm": 1.1067994832992554, "learning_rate": 0.001, "loss": 2.1467, "step": 587400 }, { "epoch": 75.95345830639948, "grad_norm": 1.8042863607406616, "learning_rate": 0.001, "loss": 2.1391, "step": 587500 }, { "epoch": 75.96638655462185, "grad_norm": 1.0159767866134644, "learning_rate": 0.001, "loss": 2.1692, "step": 587600 }, { "epoch": 75.97931480284421, "grad_norm": 11.8511962890625, "learning_rate": 0.001, "loss": 2.1333, "step": 587700 }, { "epoch": 75.99224305106658, "grad_norm": 1.0331156253814697, "learning_rate": 0.001, "loss": 2.1432, "step": 587800 }, { "epoch": 76.00517129928895, "grad_norm": 1.288453459739685, "learning_rate": 0.001, "loss": 2.1116, "step": 587900 }, { "epoch": 76.01809954751131, "grad_norm": 2.0611789226531982, "learning_rate": 0.001, "loss": 2.0703, "step": 588000 }, { "epoch": 76.03102779573368, "grad_norm": 2.0872128009796143, "learning_rate": 0.001, "loss": 2.0751, "step": 588100 }, { "epoch": 76.04395604395604, "grad_norm": 1.0430583953857422, "learning_rate": 0.001, "loss": 2.0448, "step": 588200 }, { "epoch": 76.05688429217841, "grad_norm": 2.760770082473755, "learning_rate": 0.001, "loss": 2.076, "step": 588300 }, { "epoch": 76.06981254040078, "grad_norm": 1.1261488199234009, "learning_rate": 0.001, "loss": 2.0644, "step": 588400 }, { "epoch": 76.08274078862314, "grad_norm": 1.3005889654159546, "learning_rate": 0.001, "loss": 2.0808, "step": 588500 }, { "epoch": 76.0956690368455, "grad_norm": 2.9702389240264893, "learning_rate": 0.001, "loss": 2.0719, "step": 588600 }, { "epoch": 76.10859728506787, "grad_norm": 1.1921354532241821, "learning_rate": 0.001, "loss": 2.0715, "step": 588700 }, { "epoch": 76.12152553329024, "grad_norm": 1.9545366764068604, "learning_rate": 0.001, "loss": 2.0972, "step": 588800 }, { "epoch": 76.1344537815126, "grad_norm": 4.932995796203613, "learning_rate": 0.001, "loss": 2.0925, "step": 588900 }, { "epoch": 76.14738202973497, "grad_norm": 1.6228992938995361, "learning_rate": 0.001, "loss": 2.0912, "step": 589000 }, { "epoch": 76.16031027795734, "grad_norm": 0.9965455532073975, "learning_rate": 0.001, "loss": 2.1017, "step": 589100 }, { "epoch": 76.1732385261797, "grad_norm": 1.2993593215942383, "learning_rate": 0.001, "loss": 2.0894, "step": 589200 }, { "epoch": 76.18616677440207, "grad_norm": 1.1672947406768799, "learning_rate": 0.001, "loss": 2.0992, "step": 589300 }, { "epoch": 76.19909502262443, "grad_norm": 1.3008770942687988, "learning_rate": 0.001, "loss": 2.0869, "step": 589400 }, { "epoch": 76.2120232708468, "grad_norm": 1.2012895345687866, "learning_rate": 0.001, "loss": 2.0805, "step": 589500 }, { "epoch": 76.22495151906917, "grad_norm": 1.2080051898956299, "learning_rate": 0.001, "loss": 2.0933, "step": 589600 }, { "epoch": 76.23787976729153, "grad_norm": 1.0236624479293823, "learning_rate": 0.001, "loss": 2.0916, "step": 589700 }, { "epoch": 76.2508080155139, "grad_norm": 1.009979248046875, "learning_rate": 0.001, "loss": 2.0737, "step": 589800 }, { "epoch": 76.26373626373626, "grad_norm": 1.2723164558410645, "learning_rate": 0.001, "loss": 2.1032, "step": 589900 }, { "epoch": 76.27666451195863, "grad_norm": 1.530097484588623, "learning_rate": 0.001, "loss": 2.088, "step": 590000 }, { "epoch": 76.289592760181, "grad_norm": 0.9500280618667603, "learning_rate": 0.001, "loss": 2.0849, "step": 590100 }, { "epoch": 76.30252100840336, "grad_norm": 1.1363346576690674, "learning_rate": 0.001, "loss": 2.1002, "step": 590200 }, { "epoch": 76.31544925662573, "grad_norm": 0.9681076407432556, "learning_rate": 0.001, "loss": 2.1044, "step": 590300 }, { "epoch": 76.32837750484809, "grad_norm": 1.0265207290649414, "learning_rate": 0.001, "loss": 2.0949, "step": 590400 }, { "epoch": 76.34130575307046, "grad_norm": 1.5268701314926147, "learning_rate": 0.001, "loss": 2.097, "step": 590500 }, { "epoch": 76.35423400129282, "grad_norm": 1.3748424053192139, "learning_rate": 0.001, "loss": 2.1064, "step": 590600 }, { "epoch": 76.36716224951519, "grad_norm": 1.2898659706115723, "learning_rate": 0.001, "loss": 2.0944, "step": 590700 }, { "epoch": 76.38009049773756, "grad_norm": 1.144659399986267, "learning_rate": 0.001, "loss": 2.0743, "step": 590800 }, { "epoch": 76.39301874595992, "grad_norm": 1.0055561065673828, "learning_rate": 0.001, "loss": 2.111, "step": 590900 }, { "epoch": 76.40594699418229, "grad_norm": 1.2323822975158691, "learning_rate": 0.001, "loss": 2.117, "step": 591000 }, { "epoch": 76.41887524240465, "grad_norm": 1.132580280303955, "learning_rate": 0.001, "loss": 2.101, "step": 591100 }, { "epoch": 76.43180349062702, "grad_norm": 1.5537399053573608, "learning_rate": 0.001, "loss": 2.1172, "step": 591200 }, { "epoch": 76.44473173884938, "grad_norm": 0.9946369528770447, "learning_rate": 0.001, "loss": 2.0912, "step": 591300 }, { "epoch": 76.45765998707175, "grad_norm": 1.4990378618240356, "learning_rate": 0.001, "loss": 2.1128, "step": 591400 }, { "epoch": 76.47058823529412, "grad_norm": 1.007534146308899, "learning_rate": 0.001, "loss": 2.0936, "step": 591500 }, { "epoch": 76.48351648351648, "grad_norm": 3.0731329917907715, "learning_rate": 0.001, "loss": 2.108, "step": 591600 }, { "epoch": 76.49644473173885, "grad_norm": 0.9928284883499146, "learning_rate": 0.001, "loss": 2.1164, "step": 591700 }, { "epoch": 76.50937297996121, "grad_norm": 2.267653226852417, "learning_rate": 0.001, "loss": 2.0895, "step": 591800 }, { "epoch": 76.52230122818358, "grad_norm": 1.3729479312896729, "learning_rate": 0.001, "loss": 2.1093, "step": 591900 }, { "epoch": 76.53522947640595, "grad_norm": 1.1883877515792847, "learning_rate": 0.001, "loss": 2.1069, "step": 592000 }, { "epoch": 76.54815772462831, "grad_norm": 1.221059799194336, "learning_rate": 0.001, "loss": 2.1039, "step": 592100 }, { "epoch": 76.56108597285068, "grad_norm": 1.223517656326294, "learning_rate": 0.001, "loss": 2.1213, "step": 592200 }, { "epoch": 76.57401422107304, "grad_norm": 1.4277998208999634, "learning_rate": 0.001, "loss": 2.1192, "step": 592300 }, { "epoch": 76.58694246929541, "grad_norm": 1.1547659635543823, "learning_rate": 0.001, "loss": 2.1108, "step": 592400 }, { "epoch": 76.59987071751777, "grad_norm": 0.9962666034698486, "learning_rate": 0.001, "loss": 2.1325, "step": 592500 }, { "epoch": 76.61279896574014, "grad_norm": 4.029606342315674, "learning_rate": 0.001, "loss": 2.1302, "step": 592600 }, { "epoch": 76.6257272139625, "grad_norm": 1.350865125656128, "learning_rate": 0.001, "loss": 2.1249, "step": 592700 }, { "epoch": 76.63865546218487, "grad_norm": 1.1587871313095093, "learning_rate": 0.001, "loss": 2.129, "step": 592800 }, { "epoch": 76.65158371040724, "grad_norm": 1.2679777145385742, "learning_rate": 0.001, "loss": 2.1206, "step": 592900 }, { "epoch": 76.6645119586296, "grad_norm": 1.3707195520401, "learning_rate": 0.001, "loss": 2.1252, "step": 593000 }, { "epoch": 76.67744020685197, "grad_norm": 1.287767767906189, "learning_rate": 0.001, "loss": 2.1298, "step": 593100 }, { "epoch": 76.69036845507433, "grad_norm": 1.3122409582138062, "learning_rate": 0.001, "loss": 2.1307, "step": 593200 }, { "epoch": 76.7032967032967, "grad_norm": 1.0820953845977783, "learning_rate": 0.001, "loss": 2.1397, "step": 593300 }, { "epoch": 76.71622495151907, "grad_norm": 1.1538360118865967, "learning_rate": 0.001, "loss": 2.147, "step": 593400 }, { "epoch": 76.72915319974143, "grad_norm": 1.510743260383606, "learning_rate": 0.001, "loss": 2.1336, "step": 593500 }, { "epoch": 76.7420814479638, "grad_norm": 1.8270149230957031, "learning_rate": 0.001, "loss": 2.1468, "step": 593600 }, { "epoch": 76.75500969618616, "grad_norm": 1.0122419595718384, "learning_rate": 0.001, "loss": 2.1293, "step": 593700 }, { "epoch": 76.76793794440853, "grad_norm": 1.0110752582550049, "learning_rate": 0.001, "loss": 2.1239, "step": 593800 }, { "epoch": 76.7808661926309, "grad_norm": 1.3449221849441528, "learning_rate": 0.001, "loss": 2.1251, "step": 593900 }, { "epoch": 76.79379444085326, "grad_norm": 1.2396901845932007, "learning_rate": 0.001, "loss": 2.1264, "step": 594000 }, { "epoch": 76.80672268907563, "grad_norm": 1.1327834129333496, "learning_rate": 0.001, "loss": 2.1266, "step": 594100 }, { "epoch": 76.819650937298, "grad_norm": 1.1028989553451538, "learning_rate": 0.001, "loss": 2.1254, "step": 594200 }, { "epoch": 76.83257918552036, "grad_norm": 1.34250009059906, "learning_rate": 0.001, "loss": 2.1443, "step": 594300 }, { "epoch": 76.84550743374272, "grad_norm": 1.4117584228515625, "learning_rate": 0.001, "loss": 2.1313, "step": 594400 }, { "epoch": 76.85843568196509, "grad_norm": 1.398654580116272, "learning_rate": 0.001, "loss": 2.1326, "step": 594500 }, { "epoch": 76.87136393018746, "grad_norm": 3.347372531890869, "learning_rate": 0.001, "loss": 2.1521, "step": 594600 }, { "epoch": 76.88429217840982, "grad_norm": 1.0893020629882812, "learning_rate": 0.001, "loss": 2.1217, "step": 594700 }, { "epoch": 76.89722042663219, "grad_norm": 2.423746109008789, "learning_rate": 0.001, "loss": 2.1422, "step": 594800 }, { "epoch": 76.91014867485455, "grad_norm": 1.3016252517700195, "learning_rate": 0.001, "loss": 2.1648, "step": 594900 }, { "epoch": 76.92307692307692, "grad_norm": 1.0477553606033325, "learning_rate": 0.001, "loss": 2.1561, "step": 595000 }, { "epoch": 76.93600517129929, "grad_norm": 1.2936638593673706, "learning_rate": 0.001, "loss": 2.1446, "step": 595100 }, { "epoch": 76.94893341952165, "grad_norm": 1.7703251838684082, "learning_rate": 0.001, "loss": 2.1462, "step": 595200 }, { "epoch": 76.96186166774402, "grad_norm": 1.4198774099349976, "learning_rate": 0.001, "loss": 2.1517, "step": 595300 }, { "epoch": 76.97478991596638, "grad_norm": 1.3829643726348877, "learning_rate": 0.001, "loss": 2.1562, "step": 595400 }, { "epoch": 76.98771816418875, "grad_norm": 12.771397590637207, "learning_rate": 0.001, "loss": 2.1438, "step": 595500 }, { "epoch": 77.00064641241111, "grad_norm": 1.3439359664916992, "learning_rate": 0.001, "loss": 2.1078, "step": 595600 }, { "epoch": 77.01357466063348, "grad_norm": 1.785329818725586, "learning_rate": 0.001, "loss": 2.0544, "step": 595700 }, { "epoch": 77.02650290885585, "grad_norm": 0.9452971816062927, "learning_rate": 0.001, "loss": 2.0598, "step": 595800 }, { "epoch": 77.03943115707821, "grad_norm": 1.3342527151107788, "learning_rate": 0.001, "loss": 2.0511, "step": 595900 }, { "epoch": 77.05235940530058, "grad_norm": 0.9414064288139343, "learning_rate": 0.001, "loss": 2.071, "step": 596000 }, { "epoch": 77.06528765352294, "grad_norm": 1.073803186416626, "learning_rate": 0.001, "loss": 2.0775, "step": 596100 }, { "epoch": 77.07821590174531, "grad_norm": 1.5773626565933228, "learning_rate": 0.001, "loss": 2.0941, "step": 596200 }, { "epoch": 77.09114414996768, "grad_norm": 1.109998345375061, "learning_rate": 0.001, "loss": 2.0537, "step": 596300 }, { "epoch": 77.10407239819004, "grad_norm": 1.068638801574707, "learning_rate": 0.001, "loss": 2.0947, "step": 596400 }, { "epoch": 77.11700064641241, "grad_norm": 1.2466191053390503, "learning_rate": 0.001, "loss": 2.0807, "step": 596500 }, { "epoch": 77.12992889463477, "grad_norm": 1.0109564065933228, "learning_rate": 0.001, "loss": 2.0848, "step": 596600 }, { "epoch": 77.14285714285714, "grad_norm": 0.9656892418861389, "learning_rate": 0.001, "loss": 2.0736, "step": 596700 }, { "epoch": 77.1557853910795, "grad_norm": 13.834449768066406, "learning_rate": 0.001, "loss": 2.1031, "step": 596800 }, { "epoch": 77.16871363930187, "grad_norm": 1.336377739906311, "learning_rate": 0.001, "loss": 2.0612, "step": 596900 }, { "epoch": 77.18164188752424, "grad_norm": 1.0284918546676636, "learning_rate": 0.001, "loss": 2.0868, "step": 597000 }, { "epoch": 77.1945701357466, "grad_norm": 1.38957941532135, "learning_rate": 0.001, "loss": 2.0676, "step": 597100 }, { "epoch": 77.20749838396897, "grad_norm": 1.6455883979797363, "learning_rate": 0.001, "loss": 2.0865, "step": 597200 }, { "epoch": 77.22042663219133, "grad_norm": 1.318483591079712, "learning_rate": 0.001, "loss": 2.1003, "step": 597300 }, { "epoch": 77.2333548804137, "grad_norm": 1.0199095010757446, "learning_rate": 0.001, "loss": 2.0796, "step": 597400 }, { "epoch": 77.24628312863607, "grad_norm": 1.3851743936538696, "learning_rate": 0.001, "loss": 2.0747, "step": 597500 }, { "epoch": 77.25921137685843, "grad_norm": 1.1226768493652344, "learning_rate": 0.001, "loss": 2.0713, "step": 597600 }, { "epoch": 77.2721396250808, "grad_norm": 1.0282227993011475, "learning_rate": 0.001, "loss": 2.0779, "step": 597700 }, { "epoch": 77.28506787330316, "grad_norm": 1.3375775814056396, "learning_rate": 0.001, "loss": 2.0888, "step": 597800 }, { "epoch": 77.29799612152553, "grad_norm": 2.0047223567962646, "learning_rate": 0.001, "loss": 2.1017, "step": 597900 }, { "epoch": 77.3109243697479, "grad_norm": 0.9345430135726929, "learning_rate": 0.001, "loss": 2.0896, "step": 598000 }, { "epoch": 77.32385261797026, "grad_norm": 0.9823113083839417, "learning_rate": 0.001, "loss": 2.0755, "step": 598100 }, { "epoch": 77.33678086619263, "grad_norm": 1.6828914880752563, "learning_rate": 0.001, "loss": 2.0969, "step": 598200 }, { "epoch": 77.34970911441499, "grad_norm": 1.272218108177185, "learning_rate": 0.001, "loss": 2.0929, "step": 598300 }, { "epoch": 77.36263736263736, "grad_norm": 4.742881774902344, "learning_rate": 0.001, "loss": 2.0929, "step": 598400 }, { "epoch": 77.37556561085972, "grad_norm": 2.5110039710998535, "learning_rate": 0.001, "loss": 2.1051, "step": 598500 }, { "epoch": 77.38849385908209, "grad_norm": 1.1232911348342896, "learning_rate": 0.001, "loss": 2.1074, "step": 598600 }, { "epoch": 77.40142210730446, "grad_norm": 1.1789849996566772, "learning_rate": 0.001, "loss": 2.1044, "step": 598700 }, { "epoch": 77.41435035552682, "grad_norm": 1.387789011001587, "learning_rate": 0.001, "loss": 2.0976, "step": 598800 }, { "epoch": 77.42727860374919, "grad_norm": 0.9406704306602478, "learning_rate": 0.001, "loss": 2.0846, "step": 598900 }, { "epoch": 77.44020685197155, "grad_norm": 3.0553226470947266, "learning_rate": 0.001, "loss": 2.1059, "step": 599000 }, { "epoch": 77.45313510019392, "grad_norm": 1.6257632970809937, "learning_rate": 0.001, "loss": 2.1046, "step": 599100 }, { "epoch": 77.46606334841628, "grad_norm": 1.1796603202819824, "learning_rate": 0.001, "loss": 2.0968, "step": 599200 }, { "epoch": 77.47899159663865, "grad_norm": 1.5407956838607788, "learning_rate": 0.001, "loss": 2.1088, "step": 599300 }, { "epoch": 77.49191984486102, "grad_norm": 0.9737226963043213, "learning_rate": 0.001, "loss": 2.1231, "step": 599400 }, { "epoch": 77.50484809308338, "grad_norm": 1.1586538553237915, "learning_rate": 0.001, "loss": 2.113, "step": 599500 }, { "epoch": 77.51777634130575, "grad_norm": 1.5088030099868774, "learning_rate": 0.001, "loss": 2.1246, "step": 599600 }, { "epoch": 77.53070458952811, "grad_norm": 1.2449811697006226, "learning_rate": 0.001, "loss": 2.1191, "step": 599700 }, { "epoch": 77.54363283775048, "grad_norm": 1.1503885984420776, "learning_rate": 0.001, "loss": 2.1182, "step": 599800 }, { "epoch": 77.55656108597285, "grad_norm": 1.0932010412216187, "learning_rate": 0.001, "loss": 2.0954, "step": 599900 }, { "epoch": 77.56948933419521, "grad_norm": 6.130471229553223, "learning_rate": 0.001, "loss": 2.1229, "step": 600000 }, { "epoch": 77.58241758241758, "grad_norm": 0.9622364044189453, "learning_rate": 0.001, "loss": 2.1111, "step": 600100 }, { "epoch": 77.59534583063994, "grad_norm": 1.1665290594100952, "learning_rate": 0.001, "loss": 2.1253, "step": 600200 }, { "epoch": 77.60827407886231, "grad_norm": 1.175708532333374, "learning_rate": 0.001, "loss": 2.1456, "step": 600300 }, { "epoch": 77.62120232708467, "grad_norm": 1.5114037990570068, "learning_rate": 0.001, "loss": 2.1326, "step": 600400 }, { "epoch": 77.63413057530704, "grad_norm": 1.102973461151123, "learning_rate": 0.001, "loss": 2.1101, "step": 600500 }, { "epoch": 77.6470588235294, "grad_norm": 1.1130335330963135, "learning_rate": 0.001, "loss": 2.1232, "step": 600600 }, { "epoch": 77.65998707175177, "grad_norm": 1.0958746671676636, "learning_rate": 0.001, "loss": 2.1058, "step": 600700 }, { "epoch": 77.67291531997414, "grad_norm": 1.016883373260498, "learning_rate": 0.001, "loss": 2.1397, "step": 600800 }, { "epoch": 77.6858435681965, "grad_norm": 0.9641351103782654, "learning_rate": 0.001, "loss": 2.1459, "step": 600900 }, { "epoch": 77.69877181641887, "grad_norm": 1.0894702672958374, "learning_rate": 0.001, "loss": 2.1133, "step": 601000 }, { "epoch": 77.71170006464124, "grad_norm": 1.7534259557724, "learning_rate": 0.001, "loss": 2.1286, "step": 601100 }, { "epoch": 77.7246283128636, "grad_norm": 1.8150672912597656, "learning_rate": 0.001, "loss": 2.1273, "step": 601200 }, { "epoch": 77.73755656108597, "grad_norm": 1.9712227582931519, "learning_rate": 0.001, "loss": 2.1368, "step": 601300 }, { "epoch": 77.75048480930833, "grad_norm": 0.9158166646957397, "learning_rate": 0.001, "loss": 2.112, "step": 601400 }, { "epoch": 77.7634130575307, "grad_norm": 1.3428219556808472, "learning_rate": 0.001, "loss": 2.148, "step": 601500 }, { "epoch": 77.77634130575306, "grad_norm": 1.2852579355239868, "learning_rate": 0.001, "loss": 2.144, "step": 601600 }, { "epoch": 77.78926955397543, "grad_norm": 1.3253024816513062, "learning_rate": 0.001, "loss": 2.1435, "step": 601700 }, { "epoch": 77.8021978021978, "grad_norm": 1.042024850845337, "learning_rate": 0.001, "loss": 2.1444, "step": 601800 }, { "epoch": 77.81512605042016, "grad_norm": 1.1323438882827759, "learning_rate": 0.001, "loss": 2.1349, "step": 601900 }, { "epoch": 77.82805429864253, "grad_norm": 1.5558173656463623, "learning_rate": 0.001, "loss": 2.1321, "step": 602000 }, { "epoch": 77.8409825468649, "grad_norm": 1.2011897563934326, "learning_rate": 0.001, "loss": 2.1407, "step": 602100 }, { "epoch": 77.85391079508726, "grad_norm": 1.2761775255203247, "learning_rate": 0.001, "loss": 2.1406, "step": 602200 }, { "epoch": 77.86683904330962, "grad_norm": 1.1230127811431885, "learning_rate": 0.001, "loss": 2.1305, "step": 602300 }, { "epoch": 77.87976729153199, "grad_norm": 4.302533149719238, "learning_rate": 0.001, "loss": 2.1128, "step": 602400 }, { "epoch": 77.89269553975436, "grad_norm": 1.2883535623550415, "learning_rate": 0.001, "loss": 2.1636, "step": 602500 }, { "epoch": 77.90562378797672, "grad_norm": 3.598275661468506, "learning_rate": 0.001, "loss": 2.158, "step": 602600 }, { "epoch": 77.91855203619909, "grad_norm": 0.9481402039527893, "learning_rate": 0.001, "loss": 2.1542, "step": 602700 }, { "epoch": 77.93148028442145, "grad_norm": 1.112444519996643, "learning_rate": 0.001, "loss": 2.1516, "step": 602800 }, { "epoch": 77.94440853264382, "grad_norm": 1.0766528844833374, "learning_rate": 0.001, "loss": 2.1523, "step": 602900 }, { "epoch": 77.95733678086619, "grad_norm": 1.276124358177185, "learning_rate": 0.001, "loss": 2.1365, "step": 603000 }, { "epoch": 77.97026502908855, "grad_norm": 2.385479211807251, "learning_rate": 0.001, "loss": 2.1314, "step": 603100 }, { "epoch": 77.98319327731092, "grad_norm": 1.3605430126190186, "learning_rate": 0.001, "loss": 2.1513, "step": 603200 }, { "epoch": 77.99612152553328, "grad_norm": 1.6642543077468872, "learning_rate": 0.001, "loss": 2.1394, "step": 603300 }, { "epoch": 78.00904977375566, "grad_norm": 0.9107715487480164, "learning_rate": 0.001, "loss": 2.1032, "step": 603400 }, { "epoch": 78.02197802197803, "grad_norm": 1.0177861452102661, "learning_rate": 0.001, "loss": 2.0561, "step": 603500 }, { "epoch": 78.0349062702004, "grad_norm": 0.9866741895675659, "learning_rate": 0.001, "loss": 2.0677, "step": 603600 }, { "epoch": 78.04783451842276, "grad_norm": 1.094768762588501, "learning_rate": 0.001, "loss": 2.0434, "step": 603700 }, { "epoch": 78.06076276664513, "grad_norm": 0.9542916417121887, "learning_rate": 0.001, "loss": 2.0658, "step": 603800 }, { "epoch": 78.07369101486749, "grad_norm": 1.4638341665267944, "learning_rate": 0.001, "loss": 2.0766, "step": 603900 }, { "epoch": 78.08661926308986, "grad_norm": 1.5894743204116821, "learning_rate": 0.001, "loss": 2.0618, "step": 604000 }, { "epoch": 78.09954751131222, "grad_norm": 1.2725746631622314, "learning_rate": 0.001, "loss": 2.0778, "step": 604100 }, { "epoch": 78.11247575953459, "grad_norm": 1.1646157503128052, "learning_rate": 0.001, "loss": 2.0832, "step": 604200 }, { "epoch": 78.12540400775696, "grad_norm": 0.9951688051223755, "learning_rate": 0.001, "loss": 2.0667, "step": 604300 }, { "epoch": 78.13833225597932, "grad_norm": 0.8404219746589661, "learning_rate": 0.001, "loss": 2.0802, "step": 604400 }, { "epoch": 78.15126050420169, "grad_norm": 1.0004521608352661, "learning_rate": 0.001, "loss": 2.084, "step": 604500 }, { "epoch": 78.16418875242405, "grad_norm": 1.0457762479782104, "learning_rate": 0.001, "loss": 2.071, "step": 604600 }, { "epoch": 78.17711700064642, "grad_norm": 2.9811055660247803, "learning_rate": 0.001, "loss": 2.074, "step": 604700 }, { "epoch": 78.19004524886878, "grad_norm": 1.3053030967712402, "learning_rate": 0.001, "loss": 2.0903, "step": 604800 }, { "epoch": 78.20297349709115, "grad_norm": 1.101378083229065, "learning_rate": 0.001, "loss": 2.0493, "step": 604900 }, { "epoch": 78.21590174531352, "grad_norm": 3.1548638343811035, "learning_rate": 0.001, "loss": 2.0764, "step": 605000 }, { "epoch": 78.22882999353588, "grad_norm": 1.4667472839355469, "learning_rate": 0.001, "loss": 2.0817, "step": 605100 }, { "epoch": 78.24175824175825, "grad_norm": 1.3191677331924438, "learning_rate": 0.001, "loss": 2.1041, "step": 605200 }, { "epoch": 78.25468648998061, "grad_norm": 0.9309079647064209, "learning_rate": 0.001, "loss": 2.0801, "step": 605300 }, { "epoch": 78.26761473820298, "grad_norm": 1.244781494140625, "learning_rate": 0.001, "loss": 2.0831, "step": 605400 }, { "epoch": 78.28054298642535, "grad_norm": 0.9324705004692078, "learning_rate": 0.001, "loss": 2.0934, "step": 605500 }, { "epoch": 78.29347123464771, "grad_norm": 1.257218599319458, "learning_rate": 0.001, "loss": 2.0892, "step": 605600 }, { "epoch": 78.30639948287008, "grad_norm": 28.509227752685547, "learning_rate": 0.001, "loss": 2.0916, "step": 605700 }, { "epoch": 78.31932773109244, "grad_norm": 1.1489307880401611, "learning_rate": 0.001, "loss": 2.0844, "step": 605800 }, { "epoch": 78.33225597931481, "grad_norm": 1.001421332359314, "learning_rate": 0.001, "loss": 2.0952, "step": 605900 }, { "epoch": 78.34518422753717, "grad_norm": 0.6948531270027161, "learning_rate": 0.001, "loss": 2.0784, "step": 606000 }, { "epoch": 78.35811247575954, "grad_norm": 1.1157777309417725, "learning_rate": 0.001, "loss": 2.093, "step": 606100 }, { "epoch": 78.3710407239819, "grad_norm": 0.8125796914100647, "learning_rate": 0.001, "loss": 2.0825, "step": 606200 }, { "epoch": 78.38396897220427, "grad_norm": 1.1281684637069702, "learning_rate": 0.001, "loss": 2.1051, "step": 606300 }, { "epoch": 78.39689722042664, "grad_norm": 5.560187339782715, "learning_rate": 0.001, "loss": 2.0947, "step": 606400 }, { "epoch": 78.409825468649, "grad_norm": 0.8488938212394714, "learning_rate": 0.001, "loss": 2.1155, "step": 606500 }, { "epoch": 78.42275371687137, "grad_norm": 1.0772844552993774, "learning_rate": 0.001, "loss": 2.0985, "step": 606600 }, { "epoch": 78.43568196509374, "grad_norm": 1.3249549865722656, "learning_rate": 0.001, "loss": 2.1082, "step": 606700 }, { "epoch": 78.4486102133161, "grad_norm": 1.0697640180587769, "learning_rate": 0.001, "loss": 2.0921, "step": 606800 }, { "epoch": 78.46153846153847, "grad_norm": 1.2319488525390625, "learning_rate": 0.001, "loss": 2.0971, "step": 606900 }, { "epoch": 78.47446670976083, "grad_norm": 1.9536542892456055, "learning_rate": 0.001, "loss": 2.1039, "step": 607000 }, { "epoch": 78.4873949579832, "grad_norm": 1.1843913793563843, "learning_rate": 0.001, "loss": 2.1248, "step": 607100 }, { "epoch": 78.50032320620556, "grad_norm": 0.9125053286552429, "learning_rate": 0.001, "loss": 2.1106, "step": 607200 }, { "epoch": 78.51325145442793, "grad_norm": 1.4860352277755737, "learning_rate": 0.001, "loss": 2.1319, "step": 607300 }, { "epoch": 78.5261797026503, "grad_norm": 1.530971884727478, "learning_rate": 0.001, "loss": 2.1248, "step": 607400 }, { "epoch": 78.53910795087266, "grad_norm": 1.3765435218811035, "learning_rate": 0.001, "loss": 2.1193, "step": 607500 }, { "epoch": 78.55203619909503, "grad_norm": 1.0447875261306763, "learning_rate": 0.001, "loss": 2.1016, "step": 607600 }, { "epoch": 78.5649644473174, "grad_norm": 2.0528647899627686, "learning_rate": 0.001, "loss": 2.1143, "step": 607700 }, { "epoch": 78.57789269553976, "grad_norm": 0.961033821105957, "learning_rate": 0.001, "loss": 2.1262, "step": 607800 }, { "epoch": 78.59082094376213, "grad_norm": 1.1148048639297485, "learning_rate": 0.001, "loss": 2.1284, "step": 607900 }, { "epoch": 78.60374919198449, "grad_norm": 3.225733995437622, "learning_rate": 0.001, "loss": 2.116, "step": 608000 }, { "epoch": 78.61667744020686, "grad_norm": 0.8270561099052429, "learning_rate": 0.001, "loss": 2.1018, "step": 608100 }, { "epoch": 78.62960568842922, "grad_norm": 1.1809275150299072, "learning_rate": 0.001, "loss": 2.1332, "step": 608200 }, { "epoch": 78.64253393665159, "grad_norm": 1.2981632947921753, "learning_rate": 0.001, "loss": 2.1412, "step": 608300 }, { "epoch": 78.65546218487395, "grad_norm": 1.5261964797973633, "learning_rate": 0.001, "loss": 2.1319, "step": 608400 }, { "epoch": 78.66839043309632, "grad_norm": 0.8846156597137451, "learning_rate": 0.001, "loss": 2.1243, "step": 608500 }, { "epoch": 78.68131868131869, "grad_norm": 1.4408581256866455, "learning_rate": 0.001, "loss": 2.117, "step": 608600 }, { "epoch": 78.69424692954105, "grad_norm": 0.9201158285140991, "learning_rate": 0.001, "loss": 2.118, "step": 608700 }, { "epoch": 78.70717517776342, "grad_norm": 1.092191457748413, "learning_rate": 0.001, "loss": 2.12, "step": 608800 }, { "epoch": 78.72010342598578, "grad_norm": 0.8564698100090027, "learning_rate": 0.001, "loss": 2.1097, "step": 608900 }, { "epoch": 78.73303167420815, "grad_norm": 0.8982309699058533, "learning_rate": 0.001, "loss": 2.1299, "step": 609000 }, { "epoch": 78.74595992243052, "grad_norm": 0.9825608134269714, "learning_rate": 0.001, "loss": 2.1351, "step": 609100 }, { "epoch": 78.75888817065288, "grad_norm": 1.3557230234146118, "learning_rate": 0.001, "loss": 2.133, "step": 609200 }, { "epoch": 78.77181641887525, "grad_norm": 1.2503845691680908, "learning_rate": 0.001, "loss": 2.1427, "step": 609300 }, { "epoch": 78.78474466709761, "grad_norm": 1.4583079814910889, "learning_rate": 0.001, "loss": 2.121, "step": 609400 }, { "epoch": 78.79767291531998, "grad_norm": 1.7484875917434692, "learning_rate": 0.001, "loss": 2.1385, "step": 609500 }, { "epoch": 78.81060116354234, "grad_norm": 1.1896119117736816, "learning_rate": 0.001, "loss": 2.1438, "step": 609600 }, { "epoch": 78.82352941176471, "grad_norm": 1.781640648841858, "learning_rate": 0.001, "loss": 2.1542, "step": 609700 }, { "epoch": 78.83645765998708, "grad_norm": 1.0418059825897217, "learning_rate": 0.001, "loss": 2.1318, "step": 609800 }, { "epoch": 78.84938590820944, "grad_norm": 1.2954456806182861, "learning_rate": 0.001, "loss": 2.1103, "step": 609900 }, { "epoch": 78.86231415643181, "grad_norm": 0.9239157438278198, "learning_rate": 0.001, "loss": 2.149, "step": 610000 }, { "epoch": 78.87524240465417, "grad_norm": 1.0331627130508423, "learning_rate": 0.001, "loss": 2.152, "step": 610100 }, { "epoch": 78.88817065287654, "grad_norm": 1.179622769355774, "learning_rate": 0.001, "loss": 2.1411, "step": 610200 }, { "epoch": 78.9010989010989, "grad_norm": 1.2834469079971313, "learning_rate": 0.001, "loss": 2.1358, "step": 610300 }, { "epoch": 78.91402714932127, "grad_norm": 2.15362811088562, "learning_rate": 0.001, "loss": 2.1117, "step": 610400 }, { "epoch": 78.92695539754364, "grad_norm": 1.3310365676879883, "learning_rate": 0.001, "loss": 2.1352, "step": 610500 }, { "epoch": 78.939883645766, "grad_norm": 0.885395348072052, "learning_rate": 0.001, "loss": 2.15, "step": 610600 }, { "epoch": 78.95281189398837, "grad_norm": 1.0720927715301514, "learning_rate": 0.001, "loss": 2.129, "step": 610700 }, { "epoch": 78.96574014221073, "grad_norm": 1.1477758884429932, "learning_rate": 0.001, "loss": 2.1242, "step": 610800 }, { "epoch": 78.9786683904331, "grad_norm": 1.4501372575759888, "learning_rate": 0.001, "loss": 2.1281, "step": 610900 }, { "epoch": 78.99159663865547, "grad_norm": 1.3479284048080444, "learning_rate": 0.001, "loss": 2.1205, "step": 611000 }, { "epoch": 79.00452488687783, "grad_norm": 1.1205706596374512, "learning_rate": 0.001, "loss": 2.1137, "step": 611100 }, { "epoch": 79.0174531351002, "grad_norm": 1.0644644498825073, "learning_rate": 0.001, "loss": 2.0601, "step": 611200 }, { "epoch": 79.03038138332256, "grad_norm": 1.096726655960083, "learning_rate": 0.001, "loss": 2.0456, "step": 611300 }, { "epoch": 79.04330963154493, "grad_norm": 2.054892063140869, "learning_rate": 0.001, "loss": 2.0587, "step": 611400 }, { "epoch": 79.0562378797673, "grad_norm": 1.2451112270355225, "learning_rate": 0.001, "loss": 2.0557, "step": 611500 }, { "epoch": 79.06916612798966, "grad_norm": 1.2483789920806885, "learning_rate": 0.001, "loss": 2.085, "step": 611600 }, { "epoch": 79.08209437621203, "grad_norm": 1.5554882287979126, "learning_rate": 0.001, "loss": 2.0639, "step": 611700 }, { "epoch": 79.09502262443439, "grad_norm": 1.5438774824142456, "learning_rate": 0.001, "loss": 2.0741, "step": 611800 }, { "epoch": 79.10795087265676, "grad_norm": 0.9372180700302124, "learning_rate": 0.001, "loss": 2.0756, "step": 611900 }, { "epoch": 79.12087912087912, "grad_norm": 1.381975769996643, "learning_rate": 0.001, "loss": 2.0647, "step": 612000 }, { "epoch": 79.13380736910149, "grad_norm": 1.1351085901260376, "learning_rate": 0.001, "loss": 2.0745, "step": 612100 }, { "epoch": 79.14673561732386, "grad_norm": 1.1767094135284424, "learning_rate": 0.001, "loss": 2.0616, "step": 612200 }, { "epoch": 79.15966386554622, "grad_norm": 1.3208483457565308, "learning_rate": 0.001, "loss": 2.0729, "step": 612300 }, { "epoch": 79.17259211376859, "grad_norm": 1.3367807865142822, "learning_rate": 0.001, "loss": 2.073, "step": 612400 }, { "epoch": 79.18552036199095, "grad_norm": 1.4118748903274536, "learning_rate": 0.001, "loss": 2.0795, "step": 612500 }, { "epoch": 79.19844861021332, "grad_norm": 1.0719653367996216, "learning_rate": 0.001, "loss": 2.0643, "step": 612600 }, { "epoch": 79.21137685843568, "grad_norm": 1.3938442468643188, "learning_rate": 0.001, "loss": 2.0644, "step": 612700 }, { "epoch": 79.22430510665805, "grad_norm": 1.2452508211135864, "learning_rate": 0.001, "loss": 2.079, "step": 612800 }, { "epoch": 79.23723335488042, "grad_norm": 1.1633723974227905, "learning_rate": 0.001, "loss": 2.0724, "step": 612900 }, { "epoch": 79.25016160310278, "grad_norm": 1.4172370433807373, "learning_rate": 0.001, "loss": 2.0888, "step": 613000 }, { "epoch": 79.26308985132515, "grad_norm": 1.2228267192840576, "learning_rate": 0.001, "loss": 2.0847, "step": 613100 }, { "epoch": 79.27601809954751, "grad_norm": 1.75875723361969, "learning_rate": 0.001, "loss": 2.0831, "step": 613200 }, { "epoch": 79.28894634776988, "grad_norm": 1.0879902839660645, "learning_rate": 0.001, "loss": 2.0899, "step": 613300 }, { "epoch": 79.30187459599225, "grad_norm": 1.510006308555603, "learning_rate": 0.001, "loss": 2.0814, "step": 613400 }, { "epoch": 79.31480284421461, "grad_norm": 1.2373236417770386, "learning_rate": 0.001, "loss": 2.0736, "step": 613500 }, { "epoch": 79.32773109243698, "grad_norm": 1.0163838863372803, "learning_rate": 0.001, "loss": 2.0854, "step": 613600 }, { "epoch": 79.34065934065934, "grad_norm": 0.9690737128257751, "learning_rate": 0.001, "loss": 2.0838, "step": 613700 }, { "epoch": 79.35358758888171, "grad_norm": 1.3920857906341553, "learning_rate": 0.001, "loss": 2.0933, "step": 613800 }, { "epoch": 79.36651583710407, "grad_norm": 1.556525468826294, "learning_rate": 0.001, "loss": 2.0925, "step": 613900 }, { "epoch": 79.37944408532644, "grad_norm": 1.4764833450317383, "learning_rate": 0.001, "loss": 2.0841, "step": 614000 }, { "epoch": 79.3923723335488, "grad_norm": 1.621255874633789, "learning_rate": 0.001, "loss": 2.0862, "step": 614100 }, { "epoch": 79.40530058177117, "grad_norm": 1.1596927642822266, "learning_rate": 0.001, "loss": 2.1127, "step": 614200 }, { "epoch": 79.41822882999354, "grad_norm": 1.418266773223877, "learning_rate": 0.001, "loss": 2.0996, "step": 614300 }, { "epoch": 79.4311570782159, "grad_norm": 3.525491952896118, "learning_rate": 0.001, "loss": 2.109, "step": 614400 }, { "epoch": 79.44408532643827, "grad_norm": 1.9132565259933472, "learning_rate": 0.001, "loss": 2.1099, "step": 614500 }, { "epoch": 79.45701357466064, "grad_norm": 2.396066665649414, "learning_rate": 0.001, "loss": 2.105, "step": 614600 }, { "epoch": 79.469941822883, "grad_norm": 1.425046682357788, "learning_rate": 0.001, "loss": 2.1195, "step": 614700 }, { "epoch": 79.48287007110537, "grad_norm": 1.103775143623352, "learning_rate": 0.001, "loss": 2.1205, "step": 614800 }, { "epoch": 79.49579831932773, "grad_norm": 1.0750572681427002, "learning_rate": 0.001, "loss": 2.1257, "step": 614900 }, { "epoch": 79.5087265675501, "grad_norm": 1.2885066270828247, "learning_rate": 0.001, "loss": 2.1163, "step": 615000 }, { "epoch": 79.52165481577246, "grad_norm": 19.55661392211914, "learning_rate": 0.001, "loss": 2.091, "step": 615100 }, { "epoch": 79.53458306399483, "grad_norm": 1.2279222011566162, "learning_rate": 0.001, "loss": 2.1349, "step": 615200 }, { "epoch": 79.5475113122172, "grad_norm": 1.208027958869934, "learning_rate": 0.001, "loss": 2.1064, "step": 615300 }, { "epoch": 79.56043956043956, "grad_norm": 1.6336714029312134, "learning_rate": 0.001, "loss": 2.1006, "step": 615400 }, { "epoch": 79.57336780866193, "grad_norm": 0.9578374028205872, "learning_rate": 0.001, "loss": 2.1079, "step": 615500 }, { "epoch": 79.5862960568843, "grad_norm": 1.8860774040222168, "learning_rate": 0.001, "loss": 2.127, "step": 615600 }, { "epoch": 79.59922430510666, "grad_norm": 0.9959498047828674, "learning_rate": 0.001, "loss": 2.1193, "step": 615700 }, { "epoch": 79.61215255332903, "grad_norm": 1.312226414680481, "learning_rate": 0.001, "loss": 2.1206, "step": 615800 }, { "epoch": 79.62508080155139, "grad_norm": 1.0286426544189453, "learning_rate": 0.001, "loss": 2.1161, "step": 615900 }, { "epoch": 79.63800904977376, "grad_norm": 2.349815845489502, "learning_rate": 0.001, "loss": 2.1001, "step": 616000 }, { "epoch": 79.65093729799612, "grad_norm": 1.2692545652389526, "learning_rate": 0.001, "loss": 2.109, "step": 616100 }, { "epoch": 79.66386554621849, "grad_norm": 1.640408992767334, "learning_rate": 0.001, "loss": 2.1218, "step": 616200 }, { "epoch": 79.67679379444085, "grad_norm": 1.0948641300201416, "learning_rate": 0.001, "loss": 2.1092, "step": 616300 }, { "epoch": 79.68972204266322, "grad_norm": 0.88990318775177, "learning_rate": 0.001, "loss": 2.1186, "step": 616400 }, { "epoch": 79.70265029088559, "grad_norm": 1.47487211227417, "learning_rate": 0.001, "loss": 2.1208, "step": 616500 }, { "epoch": 79.71557853910795, "grad_norm": 25.315872192382812, "learning_rate": 0.001, "loss": 2.1105, "step": 616600 }, { "epoch": 79.72850678733032, "grad_norm": 1.0884419679641724, "learning_rate": 0.001, "loss": 2.1114, "step": 616700 }, { "epoch": 79.74143503555268, "grad_norm": 1.0800679922103882, "learning_rate": 0.001, "loss": 2.1249, "step": 616800 }, { "epoch": 79.75436328377505, "grad_norm": 1.751590371131897, "learning_rate": 0.001, "loss": 2.143, "step": 616900 }, { "epoch": 79.76729153199742, "grad_norm": 1.0246540307998657, "learning_rate": 0.001, "loss": 2.1319, "step": 617000 }, { "epoch": 79.78021978021978, "grad_norm": 1.194527506828308, "learning_rate": 0.001, "loss": 2.1305, "step": 617100 }, { "epoch": 79.79314802844215, "grad_norm": 1.0965057611465454, "learning_rate": 0.001, "loss": 2.1322, "step": 617200 }, { "epoch": 79.80607627666451, "grad_norm": 1.0992811918258667, "learning_rate": 0.001, "loss": 2.1129, "step": 617300 }, { "epoch": 79.81900452488688, "grad_norm": 1.12116277217865, "learning_rate": 0.001, "loss": 2.1418, "step": 617400 }, { "epoch": 79.83193277310924, "grad_norm": 1.079067587852478, "learning_rate": 0.001, "loss": 2.1337, "step": 617500 }, { "epoch": 79.84486102133161, "grad_norm": 1.2872263193130493, "learning_rate": 0.001, "loss": 2.1293, "step": 617600 }, { "epoch": 79.85778926955398, "grad_norm": 1.5195297002792358, "learning_rate": 0.001, "loss": 2.1148, "step": 617700 }, { "epoch": 79.87071751777634, "grad_norm": 1.1766350269317627, "learning_rate": 0.001, "loss": 2.1427, "step": 617800 }, { "epoch": 79.88364576599871, "grad_norm": 1.3812706470489502, "learning_rate": 0.001, "loss": 2.1358, "step": 617900 }, { "epoch": 79.89657401422107, "grad_norm": 1.0496364831924438, "learning_rate": 0.001, "loss": 2.1369, "step": 618000 }, { "epoch": 79.90950226244344, "grad_norm": 1.402374505996704, "learning_rate": 0.001, "loss": 2.1339, "step": 618100 }, { "epoch": 79.9224305106658, "grad_norm": 1.4655194282531738, "learning_rate": 0.001, "loss": 2.1418, "step": 618200 }, { "epoch": 79.93535875888817, "grad_norm": 1.3051177263259888, "learning_rate": 0.001, "loss": 2.1537, "step": 618300 }, { "epoch": 79.94828700711054, "grad_norm": 1.0932332277297974, "learning_rate": 0.001, "loss": 2.144, "step": 618400 }, { "epoch": 79.9612152553329, "grad_norm": 1.6426887512207031, "learning_rate": 0.001, "loss": 2.133, "step": 618500 }, { "epoch": 79.97414350355527, "grad_norm": 0.9657354950904846, "learning_rate": 0.001, "loss": 2.1406, "step": 618600 }, { "epoch": 79.98707175177763, "grad_norm": 0.9550199508666992, "learning_rate": 0.001, "loss": 2.1451, "step": 618700 }, { "epoch": 80.0, "grad_norm": 4.0951008796691895, "learning_rate": 0.001, "loss": 2.1003, "step": 618800 }, { "epoch": 80.01292824822237, "grad_norm": 67.59344482421875, "learning_rate": 0.001, "loss": 2.0612, "step": 618900 }, { "epoch": 80.02585649644473, "grad_norm": 2.472365617752075, "learning_rate": 0.001, "loss": 2.0643, "step": 619000 }, { "epoch": 80.0387847446671, "grad_norm": 2.2379002571105957, "learning_rate": 0.001, "loss": 2.0685, "step": 619100 }, { "epoch": 80.05171299288946, "grad_norm": 2.5067100524902344, "learning_rate": 0.001, "loss": 2.0825, "step": 619200 }, { "epoch": 80.06464124111183, "grad_norm": 2.652261972427368, "learning_rate": 0.001, "loss": 2.0418, "step": 619300 }, { "epoch": 80.0775694893342, "grad_norm": 2.287304401397705, "learning_rate": 0.001, "loss": 2.0566, "step": 619400 }, { "epoch": 80.09049773755656, "grad_norm": 1.7235380411148071, "learning_rate": 0.001, "loss": 2.0626, "step": 619500 }, { "epoch": 80.10342598577893, "grad_norm": 2.1902945041656494, "learning_rate": 0.001, "loss": 2.0644, "step": 619600 }, { "epoch": 80.11635423400129, "grad_norm": 3.0437002182006836, "learning_rate": 0.001, "loss": 2.0818, "step": 619700 }, { "epoch": 80.12928248222366, "grad_norm": 2.405482292175293, "learning_rate": 0.001, "loss": 2.0762, "step": 619800 }, { "epoch": 80.14221073044602, "grad_norm": 2.324348211288452, "learning_rate": 0.001, "loss": 2.0582, "step": 619900 }, { "epoch": 80.15513897866839, "grad_norm": 1.9978338479995728, "learning_rate": 0.001, "loss": 2.0619, "step": 620000 }, { "epoch": 80.16806722689076, "grad_norm": 1.4679151773452759, "learning_rate": 0.001, "loss": 2.0673, "step": 620100 }, { "epoch": 80.18099547511312, "grad_norm": 1.5124629735946655, "learning_rate": 0.001, "loss": 2.0778, "step": 620200 }, { "epoch": 80.19392372333549, "grad_norm": 2.0438427925109863, "learning_rate": 0.001, "loss": 2.0762, "step": 620300 }, { "epoch": 80.20685197155785, "grad_norm": 1.8609812259674072, "learning_rate": 0.001, "loss": 2.0651, "step": 620400 }, { "epoch": 80.21978021978022, "grad_norm": 2.026766777038574, "learning_rate": 0.001, "loss": 2.0615, "step": 620500 }, { "epoch": 80.23270846800258, "grad_norm": 1.6643664836883545, "learning_rate": 0.001, "loss": 2.0994, "step": 620600 }, { "epoch": 80.24563671622495, "grad_norm": 2.019871950149536, "learning_rate": 0.001, "loss": 2.0676, "step": 620700 }, { "epoch": 80.25856496444732, "grad_norm": 2.395250082015991, "learning_rate": 0.001, "loss": 2.0754, "step": 620800 }, { "epoch": 80.27149321266968, "grad_norm": 2.4144647121429443, "learning_rate": 0.001, "loss": 2.0766, "step": 620900 }, { "epoch": 80.28442146089205, "grad_norm": 1.95177161693573, "learning_rate": 0.001, "loss": 2.0888, "step": 621000 }, { "epoch": 80.29734970911441, "grad_norm": 1.7972345352172852, "learning_rate": 0.001, "loss": 2.0928, "step": 621100 }, { "epoch": 80.31027795733678, "grad_norm": 2.0481369495391846, "learning_rate": 0.001, "loss": 2.0929, "step": 621200 }, { "epoch": 80.32320620555915, "grad_norm": 3.670073986053467, "learning_rate": 0.001, "loss": 2.0905, "step": 621300 }, { "epoch": 80.33613445378151, "grad_norm": 2.8290956020355225, "learning_rate": 0.001, "loss": 2.091, "step": 621400 }, { "epoch": 80.34906270200388, "grad_norm": 1.841187834739685, "learning_rate": 0.001, "loss": 2.0908, "step": 621500 }, { "epoch": 80.36199095022624, "grad_norm": 1.939766764640808, "learning_rate": 0.001, "loss": 2.0923, "step": 621600 }, { "epoch": 80.37491919844861, "grad_norm": 1.4274147748947144, "learning_rate": 0.001, "loss": 2.066, "step": 621700 }, { "epoch": 80.38784744667097, "grad_norm": 1.869908332824707, "learning_rate": 0.001, "loss": 2.0928, "step": 621800 }, { "epoch": 80.40077569489334, "grad_norm": 1.7871127128601074, "learning_rate": 0.001, "loss": 2.0878, "step": 621900 }, { "epoch": 80.4137039431157, "grad_norm": 2.2895920276641846, "learning_rate": 0.001, "loss": 2.0893, "step": 622000 }, { "epoch": 80.42663219133807, "grad_norm": 2.099015235900879, "learning_rate": 0.001, "loss": 2.0944, "step": 622100 }, { "epoch": 80.43956043956044, "grad_norm": 2.6208066940307617, "learning_rate": 0.001, "loss": 2.1086, "step": 622200 }, { "epoch": 80.4524886877828, "grad_norm": 2.378777503967285, "learning_rate": 0.001, "loss": 2.0885, "step": 622300 }, { "epoch": 80.46541693600517, "grad_norm": 2.4655768871307373, "learning_rate": 0.001, "loss": 2.1005, "step": 622400 }, { "epoch": 80.47834518422754, "grad_norm": 1.7955025434494019, "learning_rate": 0.001, "loss": 2.0911, "step": 622500 }, { "epoch": 80.4912734324499, "grad_norm": 1.8331882953643799, "learning_rate": 0.001, "loss": 2.1117, "step": 622600 }, { "epoch": 80.50420168067227, "grad_norm": 2.18530535697937, "learning_rate": 0.001, "loss": 2.1038, "step": 622700 }, { "epoch": 80.51712992889463, "grad_norm": 1.4810092449188232, "learning_rate": 0.001, "loss": 2.123, "step": 622800 }, { "epoch": 80.530058177117, "grad_norm": 1.5327521562576294, "learning_rate": 0.001, "loss": 2.1387, "step": 622900 }, { "epoch": 80.54298642533936, "grad_norm": 6.281840801239014, "learning_rate": 0.001, "loss": 2.1021, "step": 623000 }, { "epoch": 80.55591467356173, "grad_norm": 1.7097290754318237, "learning_rate": 0.001, "loss": 2.1033, "step": 623100 }, { "epoch": 80.5688429217841, "grad_norm": 2.218503475189209, "learning_rate": 0.001, "loss": 2.0964, "step": 623200 }, { "epoch": 80.58177117000646, "grad_norm": 1.991477370262146, "learning_rate": 0.001, "loss": 2.0848, "step": 623300 }, { "epoch": 80.59469941822883, "grad_norm": 1.6037907600402832, "learning_rate": 0.001, "loss": 2.1198, "step": 623400 }, { "epoch": 80.6076276664512, "grad_norm": 1.7070319652557373, "learning_rate": 0.001, "loss": 2.1173, "step": 623500 }, { "epoch": 80.62055591467356, "grad_norm": 2.6679580211639404, "learning_rate": 0.001, "loss": 2.1152, "step": 623600 }, { "epoch": 80.63348416289593, "grad_norm": 1.917973279953003, "learning_rate": 0.001, "loss": 2.1122, "step": 623700 }, { "epoch": 80.64641241111829, "grad_norm": 1.5669606924057007, "learning_rate": 0.001, "loss": 2.1243, "step": 623800 }, { "epoch": 80.65934065934066, "grad_norm": 1.6265188455581665, "learning_rate": 0.001, "loss": 2.1161, "step": 623900 }, { "epoch": 80.67226890756302, "grad_norm": 6.235071659088135, "learning_rate": 0.001, "loss": 2.1212, "step": 624000 }, { "epoch": 80.68519715578539, "grad_norm": 1.6588214635849, "learning_rate": 0.001, "loss": 2.1248, "step": 624100 }, { "epoch": 80.69812540400775, "grad_norm": 3.2201955318450928, "learning_rate": 0.001, "loss": 2.1103, "step": 624200 }, { "epoch": 80.71105365223012, "grad_norm": 2.425413131713867, "learning_rate": 0.001, "loss": 2.112, "step": 624300 }, { "epoch": 80.72398190045249, "grad_norm": 3.424060583114624, "learning_rate": 0.001, "loss": 2.1098, "step": 624400 }, { "epoch": 80.73691014867485, "grad_norm": 2.116227626800537, "learning_rate": 0.001, "loss": 2.1312, "step": 624500 }, { "epoch": 80.74983839689722, "grad_norm": 1.8862825632095337, "learning_rate": 0.001, "loss": 2.1279, "step": 624600 }, { "epoch": 80.76276664511958, "grad_norm": 2.0393621921539307, "learning_rate": 0.001, "loss": 2.1148, "step": 624700 }, { "epoch": 80.77569489334195, "grad_norm": 2.0290579795837402, "learning_rate": 0.001, "loss": 2.1226, "step": 624800 }, { "epoch": 80.78862314156432, "grad_norm": 1.408251166343689, "learning_rate": 0.001, "loss": 2.1293, "step": 624900 }, { "epoch": 80.80155138978668, "grad_norm": 1.6290861368179321, "learning_rate": 0.001, "loss": 2.1365, "step": 625000 }, { "epoch": 80.81447963800905, "grad_norm": 1.8673828840255737, "learning_rate": 0.001, "loss": 2.1203, "step": 625100 }, { "epoch": 80.82740788623141, "grad_norm": 1.8712096214294434, "learning_rate": 0.001, "loss": 2.125, "step": 625200 }, { "epoch": 80.84033613445378, "grad_norm": 1.700962781906128, "learning_rate": 0.001, "loss": 2.1273, "step": 625300 }, { "epoch": 80.85326438267614, "grad_norm": 1.7121787071228027, "learning_rate": 0.001, "loss": 2.1418, "step": 625400 }, { "epoch": 80.86619263089851, "grad_norm": 11.823551177978516, "learning_rate": 0.001, "loss": 2.1352, "step": 625500 }, { "epoch": 80.87912087912088, "grad_norm": 1.841294527053833, "learning_rate": 0.001, "loss": 2.1201, "step": 625600 }, { "epoch": 80.89204912734324, "grad_norm": 3.33683705329895, "learning_rate": 0.001, "loss": 2.1371, "step": 625700 }, { "epoch": 80.90497737556561, "grad_norm": 1.4440866708755493, "learning_rate": 0.001, "loss": 2.1375, "step": 625800 }, { "epoch": 80.91790562378797, "grad_norm": 1.623294711112976, "learning_rate": 0.001, "loss": 2.1416, "step": 625900 }, { "epoch": 80.93083387201034, "grad_norm": 2.1891884803771973, "learning_rate": 0.001, "loss": 2.1218, "step": 626000 }, { "epoch": 80.9437621202327, "grad_norm": 1.8794009685516357, "learning_rate": 0.001, "loss": 2.1377, "step": 626100 }, { "epoch": 80.95669036845507, "grad_norm": 2.4809935092926025, "learning_rate": 0.001, "loss": 2.1301, "step": 626200 }, { "epoch": 80.96961861667744, "grad_norm": 2.0178980827331543, "learning_rate": 0.001, "loss": 2.1202, "step": 626300 }, { "epoch": 80.9825468648998, "grad_norm": 1.4696441888809204, "learning_rate": 0.001, "loss": 2.1504, "step": 626400 }, { "epoch": 80.99547511312217, "grad_norm": 2.5312840938568115, "learning_rate": 0.001, "loss": 2.1495, "step": 626500 }, { "epoch": 81.00840336134453, "grad_norm": 0.898420512676239, "learning_rate": 0.001, "loss": 2.1011, "step": 626600 }, { "epoch": 81.0213316095669, "grad_norm": 1.1840877532958984, "learning_rate": 0.001, "loss": 2.0688, "step": 626700 }, { "epoch": 81.03425985778927, "grad_norm": 0.9973182082176208, "learning_rate": 0.001, "loss": 2.0468, "step": 626800 }, { "epoch": 81.04718810601163, "grad_norm": 0.8454036116600037, "learning_rate": 0.001, "loss": 2.0466, "step": 626900 }, { "epoch": 81.060116354234, "grad_norm": 1.1826258897781372, "learning_rate": 0.001, "loss": 2.0479, "step": 627000 }, { "epoch": 81.07304460245636, "grad_norm": 1.4695624113082886, "learning_rate": 0.001, "loss": 2.0705, "step": 627100 }, { "epoch": 81.08597285067873, "grad_norm": 1.3335250616073608, "learning_rate": 0.001, "loss": 2.0696, "step": 627200 }, { "epoch": 81.0989010989011, "grad_norm": 1.705963373184204, "learning_rate": 0.001, "loss": 2.0711, "step": 627300 }, { "epoch": 81.11182934712346, "grad_norm": 0.9340882897377014, "learning_rate": 0.001, "loss": 2.0864, "step": 627400 }, { "epoch": 81.12475759534583, "grad_norm": 1.056546688079834, "learning_rate": 0.001, "loss": 2.0813, "step": 627500 }, { "epoch": 81.13768584356819, "grad_norm": 18.98674201965332, "learning_rate": 0.001, "loss": 2.0682, "step": 627600 }, { "epoch": 81.15061409179056, "grad_norm": 1.0840765237808228, "learning_rate": 0.001, "loss": 2.0961, "step": 627700 }, { "epoch": 81.16354234001292, "grad_norm": 1.0193394422531128, "learning_rate": 0.001, "loss": 2.0662, "step": 627800 }, { "epoch": 81.17647058823529, "grad_norm": 2.295135259628296, "learning_rate": 0.001, "loss": 2.0737, "step": 627900 }, { "epoch": 81.18939883645766, "grad_norm": 1.5771607160568237, "learning_rate": 0.001, "loss": 2.072, "step": 628000 }, { "epoch": 81.20232708468002, "grad_norm": 1.223828673362732, "learning_rate": 0.001, "loss": 2.0751, "step": 628100 }, { "epoch": 81.21525533290239, "grad_norm": 5.472410202026367, "learning_rate": 0.001, "loss": 2.071, "step": 628200 }, { "epoch": 81.22818358112475, "grad_norm": 1.1092720031738281, "learning_rate": 0.001, "loss": 2.0838, "step": 628300 }, { "epoch": 81.24111182934712, "grad_norm": 2.135404348373413, "learning_rate": 0.001, "loss": 2.0891, "step": 628400 }, { "epoch": 81.25404007756948, "grad_norm": 1.0024324655532837, "learning_rate": 0.001, "loss": 2.0834, "step": 628500 }, { "epoch": 81.26696832579185, "grad_norm": 1.04876708984375, "learning_rate": 0.001, "loss": 2.0962, "step": 628600 }, { "epoch": 81.27989657401422, "grad_norm": 5.757811069488525, "learning_rate": 0.001, "loss": 2.0957, "step": 628700 }, { "epoch": 81.29282482223658, "grad_norm": 11.608166694641113, "learning_rate": 0.001, "loss": 2.0924, "step": 628800 }, { "epoch": 81.30575307045895, "grad_norm": 1.0945712327957153, "learning_rate": 0.001, "loss": 2.0871, "step": 628900 }, { "epoch": 81.31868131868131, "grad_norm": 1.3414068222045898, "learning_rate": 0.001, "loss": 2.0879, "step": 629000 }, { "epoch": 81.33160956690368, "grad_norm": 1.2017320394515991, "learning_rate": 0.001, "loss": 2.0805, "step": 629100 }, { "epoch": 81.34453781512605, "grad_norm": 1.1477261781692505, "learning_rate": 0.001, "loss": 2.0916, "step": 629200 }, { "epoch": 81.35746606334841, "grad_norm": 1.2923879623413086, "learning_rate": 0.001, "loss": 2.0881, "step": 629300 }, { "epoch": 81.37039431157078, "grad_norm": 1.2787201404571533, "learning_rate": 0.001, "loss": 2.0924, "step": 629400 }, { "epoch": 81.38332255979314, "grad_norm": 1.1234692335128784, "learning_rate": 0.001, "loss": 2.0988, "step": 629500 }, { "epoch": 81.39625080801551, "grad_norm": 1.1327135562896729, "learning_rate": 0.001, "loss": 2.0706, "step": 629600 }, { "epoch": 81.40917905623787, "grad_norm": 0.9369994401931763, "learning_rate": 0.001, "loss": 2.0962, "step": 629700 }, { "epoch": 81.42210730446024, "grad_norm": 1.0579102039337158, "learning_rate": 0.001, "loss": 2.114, "step": 629800 }, { "epoch": 81.4350355526826, "grad_norm": 1.536880373954773, "learning_rate": 0.001, "loss": 2.0908, "step": 629900 }, { "epoch": 81.44796380090497, "grad_norm": 1.3206192255020142, "learning_rate": 0.001, "loss": 2.1167, "step": 630000 }, { "epoch": 81.46089204912734, "grad_norm": 1.2109382152557373, "learning_rate": 0.001, "loss": 2.098, "step": 630100 }, { "epoch": 81.4738202973497, "grad_norm": 1.3300946950912476, "learning_rate": 0.001, "loss": 2.0907, "step": 630200 }, { "epoch": 81.48674854557207, "grad_norm": 1.4449055194854736, "learning_rate": 0.001, "loss": 2.1182, "step": 630300 }, { "epoch": 81.49967679379444, "grad_norm": 1.0854624509811401, "learning_rate": 0.001, "loss": 2.0925, "step": 630400 }, { "epoch": 81.5126050420168, "grad_norm": 1.2925879955291748, "learning_rate": 0.001, "loss": 2.0816, "step": 630500 }, { "epoch": 81.52553329023917, "grad_norm": 1.3234062194824219, "learning_rate": 0.001, "loss": 2.0866, "step": 630600 }, { "epoch": 81.53846153846153, "grad_norm": 0.8852449655532837, "learning_rate": 0.001, "loss": 2.1026, "step": 630700 }, { "epoch": 81.5513897866839, "grad_norm": 1.3305385112762451, "learning_rate": 0.001, "loss": 2.1047, "step": 630800 }, { "epoch": 81.56431803490626, "grad_norm": 0.961287796497345, "learning_rate": 0.001, "loss": 2.1133, "step": 630900 }, { "epoch": 81.57724628312863, "grad_norm": 1.237219214439392, "learning_rate": 0.001, "loss": 2.123, "step": 631000 }, { "epoch": 81.590174531351, "grad_norm": 1.9292972087860107, "learning_rate": 0.001, "loss": 2.1232, "step": 631100 }, { "epoch": 81.60310277957336, "grad_norm": 0.830272912979126, "learning_rate": 0.001, "loss": 2.1148, "step": 631200 }, { "epoch": 81.61603102779573, "grad_norm": 0.9293739199638367, "learning_rate": 0.001, "loss": 2.1118, "step": 631300 }, { "epoch": 81.6289592760181, "grad_norm": 1.303252935409546, "learning_rate": 0.001, "loss": 2.1057, "step": 631400 }, { "epoch": 81.64188752424046, "grad_norm": 0.9266567826271057, "learning_rate": 0.001, "loss": 2.1225, "step": 631500 }, { "epoch": 81.65481577246283, "grad_norm": 1.015781283378601, "learning_rate": 0.001, "loss": 2.1271, "step": 631600 }, { "epoch": 81.66774402068519, "grad_norm": 26.113357543945312, "learning_rate": 0.001, "loss": 2.1146, "step": 631700 }, { "epoch": 81.68067226890756, "grad_norm": 1.1797153949737549, "learning_rate": 0.001, "loss": 2.1136, "step": 631800 }, { "epoch": 81.69360051712992, "grad_norm": 1.2828868627548218, "learning_rate": 0.001, "loss": 2.1172, "step": 631900 }, { "epoch": 81.70652876535229, "grad_norm": 1.569140911102295, "learning_rate": 0.001, "loss": 2.1246, "step": 632000 }, { "epoch": 81.71945701357465, "grad_norm": 1.474955439567566, "learning_rate": 0.001, "loss": 2.1322, "step": 632100 }, { "epoch": 81.73238526179702, "grad_norm": 1.7141127586364746, "learning_rate": 0.001, "loss": 2.1349, "step": 632200 }, { "epoch": 81.74531351001939, "grad_norm": 1.515311360359192, "learning_rate": 0.001, "loss": 2.1257, "step": 632300 }, { "epoch": 81.75824175824175, "grad_norm": 1.0447628498077393, "learning_rate": 0.001, "loss": 2.1129, "step": 632400 }, { "epoch": 81.77117000646412, "grad_norm": 0.9238556623458862, "learning_rate": 0.001, "loss": 2.1343, "step": 632500 }, { "epoch": 81.78409825468648, "grad_norm": 2.1712074279785156, "learning_rate": 0.001, "loss": 2.1347, "step": 632600 }, { "epoch": 81.79702650290885, "grad_norm": 0.990210235118866, "learning_rate": 0.001, "loss": 2.1096, "step": 632700 }, { "epoch": 81.80995475113122, "grad_norm": 17.692834854125977, "learning_rate": 0.001, "loss": 2.1282, "step": 632800 }, { "epoch": 81.82288299935358, "grad_norm": 0.8523116111755371, "learning_rate": 0.001, "loss": 2.1209, "step": 632900 }, { "epoch": 81.83581124757595, "grad_norm": 1.789852261543274, "learning_rate": 0.001, "loss": 2.1274, "step": 633000 }, { "epoch": 81.84873949579831, "grad_norm": 2.152006149291992, "learning_rate": 0.001, "loss": 2.1244, "step": 633100 }, { "epoch": 81.86166774402068, "grad_norm": 0.9003329277038574, "learning_rate": 0.001, "loss": 2.1435, "step": 633200 }, { "epoch": 81.87459599224304, "grad_norm": 0.873390257358551, "learning_rate": 0.001, "loss": 2.1196, "step": 633300 }, { "epoch": 81.88752424046541, "grad_norm": 1.0904775857925415, "learning_rate": 0.001, "loss": 2.1307, "step": 633400 }, { "epoch": 81.90045248868778, "grad_norm": 1.0192941427230835, "learning_rate": 0.001, "loss": 2.1281, "step": 633500 }, { "epoch": 81.91338073691014, "grad_norm": 0.8671779036521912, "learning_rate": 0.001, "loss": 2.1192, "step": 633600 }, { "epoch": 81.92630898513251, "grad_norm": 1.0093969106674194, "learning_rate": 0.001, "loss": 2.1277, "step": 633700 }, { "epoch": 81.93923723335487, "grad_norm": 0.9601141810417175, "learning_rate": 0.001, "loss": 2.1351, "step": 633800 }, { "epoch": 81.95216548157724, "grad_norm": 0.7719299793243408, "learning_rate": 0.001, "loss": 2.1379, "step": 633900 }, { "epoch": 81.9650937297996, "grad_norm": 1.3677656650543213, "learning_rate": 0.001, "loss": 2.1309, "step": 634000 }, { "epoch": 81.97802197802197, "grad_norm": 1.0692615509033203, "learning_rate": 0.001, "loss": 2.1329, "step": 634100 }, { "epoch": 81.99095022624434, "grad_norm": 1.108319878578186, "learning_rate": 0.001, "loss": 2.1408, "step": 634200 }, { "epoch": 82.00387847446672, "grad_norm": 1.059198021888733, "learning_rate": 0.001, "loss": 2.1338, "step": 634300 }, { "epoch": 82.01680672268908, "grad_norm": 1.6152838468551636, "learning_rate": 0.001, "loss": 2.0673, "step": 634400 }, { "epoch": 82.02973497091145, "grad_norm": 1.0060441493988037, "learning_rate": 0.001, "loss": 2.0585, "step": 634500 }, { "epoch": 82.04266321913381, "grad_norm": 1.4990040063858032, "learning_rate": 0.001, "loss": 2.0533, "step": 634600 }, { "epoch": 82.05559146735618, "grad_norm": 0.9469605684280396, "learning_rate": 0.001, "loss": 2.0657, "step": 634700 }, { "epoch": 82.06851971557855, "grad_norm": 1.150113821029663, "learning_rate": 0.001, "loss": 2.0512, "step": 634800 }, { "epoch": 82.08144796380091, "grad_norm": 4.401688098907471, "learning_rate": 0.001, "loss": 2.0595, "step": 634900 }, { "epoch": 82.09437621202328, "grad_norm": 1.628347635269165, "learning_rate": 0.001, "loss": 2.0597, "step": 635000 }, { "epoch": 82.10730446024564, "grad_norm": 0.9817349910736084, "learning_rate": 0.001, "loss": 2.0587, "step": 635100 }, { "epoch": 82.12023270846801, "grad_norm": 1.1325582265853882, "learning_rate": 0.001, "loss": 2.0501, "step": 635200 }, { "epoch": 82.13316095669038, "grad_norm": 1.3616832494735718, "learning_rate": 0.001, "loss": 2.0668, "step": 635300 }, { "epoch": 82.14608920491274, "grad_norm": 0.9697052836418152, "learning_rate": 0.001, "loss": 2.0603, "step": 635400 }, { "epoch": 82.1590174531351, "grad_norm": 0.933107316493988, "learning_rate": 0.001, "loss": 2.0657, "step": 635500 }, { "epoch": 82.17194570135747, "grad_norm": 2.3463199138641357, "learning_rate": 0.001, "loss": 2.0633, "step": 635600 }, { "epoch": 82.18487394957984, "grad_norm": 0.8837299942970276, "learning_rate": 0.001, "loss": 2.0643, "step": 635700 }, { "epoch": 82.1978021978022, "grad_norm": 1.467429280281067, "learning_rate": 0.001, "loss": 2.0737, "step": 635800 }, { "epoch": 82.21073044602457, "grad_norm": 1.4701052904129028, "learning_rate": 0.001, "loss": 2.0716, "step": 635900 }, { "epoch": 82.22365869424694, "grad_norm": 1.6988861560821533, "learning_rate": 0.001, "loss": 2.0542, "step": 636000 }, { "epoch": 82.2365869424693, "grad_norm": 0.9107171893119812, "learning_rate": 0.001, "loss": 2.0769, "step": 636100 }, { "epoch": 82.24951519069167, "grad_norm": 0.9122713208198547, "learning_rate": 0.001, "loss": 2.0706, "step": 636200 }, { "epoch": 82.26244343891403, "grad_norm": 1.5010724067687988, "learning_rate": 0.001, "loss": 2.088, "step": 636300 }, { "epoch": 82.2753716871364, "grad_norm": 0.9741874933242798, "learning_rate": 0.001, "loss": 2.0755, "step": 636400 }, { "epoch": 82.28829993535876, "grad_norm": 1.5274755954742432, "learning_rate": 0.001, "loss": 2.0819, "step": 636500 }, { "epoch": 82.30122818358113, "grad_norm": 0.8499266505241394, "learning_rate": 0.001, "loss": 2.0929, "step": 636600 }, { "epoch": 82.3141564318035, "grad_norm": 1.9699859619140625, "learning_rate": 0.001, "loss": 2.0924, "step": 636700 }, { "epoch": 82.32708468002586, "grad_norm": 1.0324101448059082, "learning_rate": 0.001, "loss": 2.0875, "step": 636800 }, { "epoch": 82.34001292824823, "grad_norm": 1.1977040767669678, "learning_rate": 0.001, "loss": 2.0814, "step": 636900 }, { "epoch": 82.3529411764706, "grad_norm": 1.6027783155441284, "learning_rate": 0.001, "loss": 2.0744, "step": 637000 }, { "epoch": 82.36586942469296, "grad_norm": 1601.650146484375, "learning_rate": 0.001, "loss": 2.0873, "step": 637100 }, { "epoch": 82.37879767291533, "grad_norm": 1.5458552837371826, "learning_rate": 0.001, "loss": 2.0874, "step": 637200 }, { "epoch": 82.39172592113769, "grad_norm": 4.775110244750977, "learning_rate": 0.001, "loss": 2.0918, "step": 637300 }, { "epoch": 82.40465416936006, "grad_norm": 1.3134816884994507, "learning_rate": 0.001, "loss": 2.1069, "step": 637400 }, { "epoch": 82.41758241758242, "grad_norm": 0.8678467273712158, "learning_rate": 0.001, "loss": 2.0918, "step": 637500 }, { "epoch": 82.43051066580479, "grad_norm": 1.2314307689666748, "learning_rate": 0.001, "loss": 2.0949, "step": 637600 }, { "epoch": 82.44343891402715, "grad_norm": 1.0741289854049683, "learning_rate": 0.001, "loss": 2.0955, "step": 637700 }, { "epoch": 82.45636716224952, "grad_norm": 0.859849214553833, "learning_rate": 0.001, "loss": 2.0952, "step": 637800 }, { "epoch": 82.46929541047189, "grad_norm": 1.1809622049331665, "learning_rate": 0.001, "loss": 2.0822, "step": 637900 }, { "epoch": 82.48222365869425, "grad_norm": 1.0319157838821411, "learning_rate": 0.001, "loss": 2.116, "step": 638000 }, { "epoch": 82.49515190691662, "grad_norm": 1.6506245136260986, "learning_rate": 0.001, "loss": 2.1077, "step": 638100 }, { "epoch": 82.50808015513898, "grad_norm": 1.1961054801940918, "learning_rate": 0.001, "loss": 2.1112, "step": 638200 }, { "epoch": 82.52100840336135, "grad_norm": 0.953344464302063, "learning_rate": 0.001, "loss": 2.088, "step": 638300 }, { "epoch": 82.53393665158372, "grad_norm": 0.8619931936264038, "learning_rate": 0.001, "loss": 2.1139, "step": 638400 }, { "epoch": 82.54686489980608, "grad_norm": 1.0825599431991577, "learning_rate": 0.001, "loss": 2.1171, "step": 638500 }, { "epoch": 82.55979314802845, "grad_norm": 0.8408172726631165, "learning_rate": 0.001, "loss": 2.1155, "step": 638600 }, { "epoch": 82.57272139625081, "grad_norm": 1.1575201749801636, "learning_rate": 0.001, "loss": 2.0918, "step": 638700 }, { "epoch": 82.58564964447318, "grad_norm": 1.1568760871887207, "learning_rate": 0.001, "loss": 2.1045, "step": 638800 }, { "epoch": 82.59857789269554, "grad_norm": 1.084494709968567, "learning_rate": 0.001, "loss": 2.1105, "step": 638900 }, { "epoch": 82.61150614091791, "grad_norm": 1.0799388885498047, "learning_rate": 0.001, "loss": 2.1027, "step": 639000 }, { "epoch": 82.62443438914028, "grad_norm": 2.2552521228790283, "learning_rate": 0.001, "loss": 2.1034, "step": 639100 }, { "epoch": 82.63736263736264, "grad_norm": 1.2471425533294678, "learning_rate": 0.001, "loss": 2.0908, "step": 639200 }, { "epoch": 82.65029088558501, "grad_norm": 1.1169624328613281, "learning_rate": 0.001, "loss": 2.1103, "step": 639300 }, { "epoch": 82.66321913380737, "grad_norm": 1.1591416597366333, "learning_rate": 0.001, "loss": 2.1002, "step": 639400 }, { "epoch": 82.67614738202974, "grad_norm": 1.0750491619110107, "learning_rate": 0.001, "loss": 2.1084, "step": 639500 }, { "epoch": 82.6890756302521, "grad_norm": 1.1126214265823364, "learning_rate": 0.001, "loss": 2.1067, "step": 639600 }, { "epoch": 82.70200387847447, "grad_norm": 1.3901610374450684, "learning_rate": 0.001, "loss": 2.1045, "step": 639700 }, { "epoch": 82.71493212669684, "grad_norm": 1.199165940284729, "learning_rate": 0.001, "loss": 2.0777, "step": 639800 }, { "epoch": 82.7278603749192, "grad_norm": 1.417380928993225, "learning_rate": 0.001, "loss": 2.0884, "step": 639900 }, { "epoch": 82.74078862314157, "grad_norm": 0.8876221776008606, "learning_rate": 0.001, "loss": 2.1193, "step": 640000 }, { "epoch": 82.75371687136393, "grad_norm": 1.3551630973815918, "learning_rate": 0.001, "loss": 2.1136, "step": 640100 }, { "epoch": 82.7666451195863, "grad_norm": 1.089699387550354, "learning_rate": 0.001, "loss": 2.1534, "step": 640200 }, { "epoch": 82.77957336780867, "grad_norm": 1.3690906763076782, "learning_rate": 0.001, "loss": 2.1349, "step": 640300 }, { "epoch": 82.79250161603103, "grad_norm": 25.67148780822754, "learning_rate": 0.001, "loss": 2.1008, "step": 640400 }, { "epoch": 82.8054298642534, "grad_norm": 1.2264010906219482, "learning_rate": 0.001, "loss": 2.124, "step": 640500 }, { "epoch": 82.81835811247576, "grad_norm": 9.402071952819824, "learning_rate": 0.001, "loss": 2.1153, "step": 640600 }, { "epoch": 82.83128636069813, "grad_norm": 1.4877510070800781, "learning_rate": 0.001, "loss": 2.1154, "step": 640700 }, { "epoch": 82.8442146089205, "grad_norm": 1.866662621498108, "learning_rate": 0.001, "loss": 2.123, "step": 640800 }, { "epoch": 82.85714285714286, "grad_norm": 3.6831231117248535, "learning_rate": 0.001, "loss": 2.1114, "step": 640900 }, { "epoch": 82.87007110536523, "grad_norm": 1.5033336877822876, "learning_rate": 0.001, "loss": 2.1395, "step": 641000 }, { "epoch": 82.88299935358759, "grad_norm": 1.4757604598999023, "learning_rate": 0.001, "loss": 2.1229, "step": 641100 }, { "epoch": 82.89592760180996, "grad_norm": 1.3077785968780518, "learning_rate": 0.001, "loss": 2.1231, "step": 641200 }, { "epoch": 82.90885585003232, "grad_norm": 1.1366479396820068, "learning_rate": 0.001, "loss": 2.1103, "step": 641300 }, { "epoch": 82.92178409825469, "grad_norm": 1.3427822589874268, "learning_rate": 0.001, "loss": 2.1252, "step": 641400 }, { "epoch": 82.93471234647706, "grad_norm": 0.919872522354126, "learning_rate": 0.001, "loss": 2.1396, "step": 641500 }, { "epoch": 82.94764059469942, "grad_norm": 7.095617771148682, "learning_rate": 0.001, "loss": 2.136, "step": 641600 }, { "epoch": 82.96056884292179, "grad_norm": 2.3246915340423584, "learning_rate": 0.001, "loss": 2.1396, "step": 641700 }, { "epoch": 82.97349709114415, "grad_norm": 1.6189980506896973, "learning_rate": 0.001, "loss": 2.141, "step": 641800 }, { "epoch": 82.98642533936652, "grad_norm": 1.283534049987793, "learning_rate": 0.001, "loss": 2.1426, "step": 641900 }, { "epoch": 82.99935358758889, "grad_norm": 1.2318929433822632, "learning_rate": 0.001, "loss": 2.1091, "step": 642000 }, { "epoch": 83.01228183581125, "grad_norm": 2.191987991333008, "learning_rate": 0.001, "loss": 2.0412, "step": 642100 }, { "epoch": 83.02521008403362, "grad_norm": 1.8961527347564697, "learning_rate": 0.001, "loss": 2.0362, "step": 642200 }, { "epoch": 83.03813833225598, "grad_norm": 2.764598846435547, "learning_rate": 0.001, "loss": 2.0429, "step": 642300 }, { "epoch": 83.05106658047835, "grad_norm": 2.29594087600708, "learning_rate": 0.001, "loss": 2.0523, "step": 642400 }, { "epoch": 83.06399482870071, "grad_norm": 1.2866952419281006, "learning_rate": 0.001, "loss": 2.0624, "step": 642500 }, { "epoch": 83.07692307692308, "grad_norm": 1.7063655853271484, "learning_rate": 0.001, "loss": 2.043, "step": 642600 }, { "epoch": 83.08985132514545, "grad_norm": 1.6821461915969849, "learning_rate": 0.001, "loss": 2.0602, "step": 642700 }, { "epoch": 83.10277957336781, "grad_norm": 1.1474344730377197, "learning_rate": 0.001, "loss": 2.0458, "step": 642800 }, { "epoch": 83.11570782159018, "grad_norm": 1.1503980159759521, "learning_rate": 0.001, "loss": 2.0693, "step": 642900 }, { "epoch": 83.12863606981254, "grad_norm": 1.131980538368225, "learning_rate": 0.001, "loss": 2.048, "step": 643000 }, { "epoch": 83.14156431803491, "grad_norm": 1.1096972227096558, "learning_rate": 0.001, "loss": 2.0673, "step": 643100 }, { "epoch": 83.15449256625728, "grad_norm": 1.0625840425491333, "learning_rate": 0.001, "loss": 2.0626, "step": 643200 }, { "epoch": 83.16742081447964, "grad_norm": 1.058914303779602, "learning_rate": 0.001, "loss": 2.0813, "step": 643300 }, { "epoch": 83.180349062702, "grad_norm": 1.3958885669708252, "learning_rate": 0.001, "loss": 2.0634, "step": 643400 }, { "epoch": 83.19327731092437, "grad_norm": 1.4092425107955933, "learning_rate": 0.001, "loss": 2.0441, "step": 643500 }, { "epoch": 83.20620555914674, "grad_norm": 1.241809368133545, "learning_rate": 0.001, "loss": 2.0666, "step": 643600 }, { "epoch": 83.2191338073691, "grad_norm": 7.334580421447754, "learning_rate": 0.001, "loss": 2.0709, "step": 643700 }, { "epoch": 83.23206205559147, "grad_norm": 1.292954683303833, "learning_rate": 0.001, "loss": 2.0734, "step": 643800 }, { "epoch": 83.24499030381384, "grad_norm": 1.8039637804031372, "learning_rate": 0.001, "loss": 2.056, "step": 643900 }, { "epoch": 83.2579185520362, "grad_norm": 1.5401641130447388, "learning_rate": 0.001, "loss": 2.0746, "step": 644000 }, { "epoch": 83.27084680025857, "grad_norm": 1.2116327285766602, "learning_rate": 0.001, "loss": 2.0805, "step": 644100 }, { "epoch": 83.28377504848093, "grad_norm": 2.0652008056640625, "learning_rate": 0.001, "loss": 2.0762, "step": 644200 }, { "epoch": 83.2967032967033, "grad_norm": 1.0717241764068604, "learning_rate": 0.001, "loss": 2.0829, "step": 644300 }, { "epoch": 83.30963154492567, "grad_norm": 26.48138427734375, "learning_rate": 0.001, "loss": 2.0851, "step": 644400 }, { "epoch": 83.32255979314803, "grad_norm": 2.7234549522399902, "learning_rate": 0.001, "loss": 2.1024, "step": 644500 }, { "epoch": 83.3354880413704, "grad_norm": 1.8892017602920532, "learning_rate": 0.001, "loss": 2.08, "step": 644600 }, { "epoch": 83.34841628959276, "grad_norm": 1.6482363939285278, "learning_rate": 0.001, "loss": 2.0663, "step": 644700 }, { "epoch": 83.36134453781513, "grad_norm": 1.2491490840911865, "learning_rate": 0.001, "loss": 2.1035, "step": 644800 }, { "epoch": 83.3742727860375, "grad_norm": 1.3357897996902466, "learning_rate": 0.001, "loss": 2.0993, "step": 644900 }, { "epoch": 83.38720103425986, "grad_norm": 1.0316686630249023, "learning_rate": 0.001, "loss": 2.0657, "step": 645000 }, { "epoch": 83.40012928248223, "grad_norm": 1.8308308124542236, "learning_rate": 0.001, "loss": 2.1012, "step": 645100 }, { "epoch": 83.41305753070459, "grad_norm": 1.3121718168258667, "learning_rate": 0.001, "loss": 2.0914, "step": 645200 }, { "epoch": 83.42598577892696, "grad_norm": 1.4757627248764038, "learning_rate": 0.001, "loss": 2.0831, "step": 645300 }, { "epoch": 83.43891402714932, "grad_norm": 1.133423089981079, "learning_rate": 0.001, "loss": 2.0994, "step": 645400 }, { "epoch": 83.45184227537169, "grad_norm": 1.2978835105895996, "learning_rate": 0.001, "loss": 2.0937, "step": 645500 }, { "epoch": 83.46477052359405, "grad_norm": 1.3061819076538086, "learning_rate": 0.001, "loss": 2.0846, "step": 645600 }, { "epoch": 83.47769877181642, "grad_norm": 1.2623627185821533, "learning_rate": 0.001, "loss": 2.1086, "step": 645700 }, { "epoch": 83.49062702003879, "grad_norm": 3.500617265701294, "learning_rate": 0.001, "loss": 2.1198, "step": 645800 }, { "epoch": 83.50355526826115, "grad_norm": 1.1644903421401978, "learning_rate": 0.001, "loss": 2.0865, "step": 645900 }, { "epoch": 83.51648351648352, "grad_norm": 1.433349609375, "learning_rate": 0.001, "loss": 2.0839, "step": 646000 }, { "epoch": 83.52941176470588, "grad_norm": 1.6866918802261353, "learning_rate": 0.001, "loss": 2.1204, "step": 646100 }, { "epoch": 83.54234001292825, "grad_norm": 0.9060424566268921, "learning_rate": 0.001, "loss": 2.0959, "step": 646200 }, { "epoch": 83.55526826115062, "grad_norm": 1.9858638048171997, "learning_rate": 0.001, "loss": 2.0996, "step": 646300 }, { "epoch": 83.56819650937298, "grad_norm": 1.5883089303970337, "learning_rate": 0.001, "loss": 2.1022, "step": 646400 }, { "epoch": 83.58112475759535, "grad_norm": 1.1963671445846558, "learning_rate": 0.001, "loss": 2.1106, "step": 646500 }, { "epoch": 83.59405300581771, "grad_norm": 1.4311537742614746, "learning_rate": 0.001, "loss": 2.0874, "step": 646600 }, { "epoch": 83.60698125404008, "grad_norm": 1.3215177059173584, "learning_rate": 0.001, "loss": 2.1167, "step": 646700 }, { "epoch": 83.61990950226244, "grad_norm": 3.1199793815612793, "learning_rate": 0.001, "loss": 2.0953, "step": 646800 }, { "epoch": 83.63283775048481, "grad_norm": 1.634786605834961, "learning_rate": 0.001, "loss": 2.0968, "step": 646900 }, { "epoch": 83.64576599870718, "grad_norm": 1.8281241655349731, "learning_rate": 0.001, "loss": 2.1066, "step": 647000 }, { "epoch": 83.65869424692954, "grad_norm": 1.278691053390503, "learning_rate": 0.001, "loss": 2.1134, "step": 647100 }, { "epoch": 83.67162249515191, "grad_norm": 5.992390155792236, "learning_rate": 0.001, "loss": 2.0883, "step": 647200 }, { "epoch": 83.68455074337427, "grad_norm": 1.3341615200042725, "learning_rate": 0.001, "loss": 2.0993, "step": 647300 }, { "epoch": 83.69747899159664, "grad_norm": 2.35555100440979, "learning_rate": 0.001, "loss": 2.1133, "step": 647400 }, { "epoch": 83.710407239819, "grad_norm": 1.1659908294677734, "learning_rate": 0.001, "loss": 2.1083, "step": 647500 }, { "epoch": 83.72333548804137, "grad_norm": 1.3878999948501587, "learning_rate": 0.001, "loss": 2.1089, "step": 647600 }, { "epoch": 83.73626373626374, "grad_norm": 1.5155967473983765, "learning_rate": 0.001, "loss": 2.1111, "step": 647700 }, { "epoch": 83.7491919844861, "grad_norm": 2.353053331375122, "learning_rate": 0.001, "loss": 2.1177, "step": 647800 }, { "epoch": 83.76212023270847, "grad_norm": 1.0934016704559326, "learning_rate": 0.001, "loss": 2.1074, "step": 647900 }, { "epoch": 83.77504848093083, "grad_norm": 1.2973486185073853, "learning_rate": 0.001, "loss": 2.1029, "step": 648000 }, { "epoch": 83.7879767291532, "grad_norm": 1.8968769311904907, "learning_rate": 0.001, "loss": 2.1087, "step": 648100 }, { "epoch": 83.80090497737557, "grad_norm": 1.2119934558868408, "learning_rate": 0.001, "loss": 2.1236, "step": 648200 }, { "epoch": 83.81383322559793, "grad_norm": 1.7099463939666748, "learning_rate": 0.001, "loss": 2.114, "step": 648300 }, { "epoch": 83.8267614738203, "grad_norm": 1.6441657543182373, "learning_rate": 0.001, "loss": 2.1119, "step": 648400 }, { "epoch": 83.83968972204266, "grad_norm": 1.277866005897522, "learning_rate": 0.001, "loss": 2.1047, "step": 648500 }, { "epoch": 83.85261797026503, "grad_norm": 1.729787826538086, "learning_rate": 0.001, "loss": 2.1274, "step": 648600 }, { "epoch": 83.8655462184874, "grad_norm": 1.6034270524978638, "learning_rate": 0.001, "loss": 2.1124, "step": 648700 }, { "epoch": 83.87847446670976, "grad_norm": 1.5012274980545044, "learning_rate": 0.001, "loss": 2.1367, "step": 648800 }, { "epoch": 83.89140271493213, "grad_norm": 1.3507400751113892, "learning_rate": 0.001, "loss": 2.1113, "step": 648900 }, { "epoch": 83.9043309631545, "grad_norm": 1.633341670036316, "learning_rate": 0.001, "loss": 2.1309, "step": 649000 }, { "epoch": 83.91725921137686, "grad_norm": 1.7244386672973633, "learning_rate": 0.001, "loss": 2.1244, "step": 649100 }, { "epoch": 83.93018745959922, "grad_norm": 1.2687491178512573, "learning_rate": 0.001, "loss": 2.129, "step": 649200 }, { "epoch": 83.94311570782159, "grad_norm": 1.404370903968811, "learning_rate": 0.001, "loss": 2.1219, "step": 649300 }, { "epoch": 83.95604395604396, "grad_norm": 1.285840392112732, "learning_rate": 0.001, "loss": 2.1442, "step": 649400 }, { "epoch": 83.96897220426632, "grad_norm": 1.2860996723175049, "learning_rate": 0.001, "loss": 2.1306, "step": 649500 }, { "epoch": 83.98190045248869, "grad_norm": 1.1773673295974731, "learning_rate": 0.001, "loss": 2.1065, "step": 649600 }, { "epoch": 83.99482870071105, "grad_norm": 1.2398349046707153, "learning_rate": 0.001, "loss": 2.1375, "step": 649700 }, { "epoch": 84.00775694893342, "grad_norm": 2.3468363285064697, "learning_rate": 0.001, "loss": 2.0375, "step": 649800 }, { "epoch": 84.02068519715579, "grad_norm": 3.15334153175354, "learning_rate": 0.001, "loss": 2.0087, "step": 649900 }, { "epoch": 84.03361344537815, "grad_norm": 1.4316608905792236, "learning_rate": 0.001, "loss": 2.0508, "step": 650000 }, { "epoch": 84.04654169360052, "grad_norm": 1.1624637842178345, "learning_rate": 0.001, "loss": 2.0391, "step": 650100 }, { "epoch": 84.05946994182288, "grad_norm": 1.0140836238861084, "learning_rate": 0.001, "loss": 2.0472, "step": 650200 }, { "epoch": 84.07239819004525, "grad_norm": 1.0067139863967896, "learning_rate": 0.001, "loss": 2.0337, "step": 650300 }, { "epoch": 84.08532643826761, "grad_norm": 1.12550950050354, "learning_rate": 0.001, "loss": 2.0556, "step": 650400 }, { "epoch": 84.09825468648998, "grad_norm": 1.1377090215682983, "learning_rate": 0.001, "loss": 2.0332, "step": 650500 }, { "epoch": 84.11118293471235, "grad_norm": 1.0580837726593018, "learning_rate": 0.001, "loss": 2.0438, "step": 650600 }, { "epoch": 84.12411118293471, "grad_norm": 0.9915028810501099, "learning_rate": 0.001, "loss": 2.0777, "step": 650700 }, { "epoch": 84.13703943115708, "grad_norm": 1.36472749710083, "learning_rate": 0.001, "loss": 2.0593, "step": 650800 }, { "epoch": 84.14996767937944, "grad_norm": 1.0206022262573242, "learning_rate": 0.001, "loss": 2.0588, "step": 650900 }, { "epoch": 84.16289592760181, "grad_norm": 1.244957685470581, "learning_rate": 0.001, "loss": 2.0751, "step": 651000 }, { "epoch": 84.17582417582418, "grad_norm": 8.501677513122559, "learning_rate": 0.001, "loss": 2.0593, "step": 651100 }, { "epoch": 84.18875242404654, "grad_norm": 0.9700061082839966, "learning_rate": 0.001, "loss": 2.0819, "step": 651200 }, { "epoch": 84.2016806722689, "grad_norm": 1.2177278995513916, "learning_rate": 0.001, "loss": 2.0854, "step": 651300 }, { "epoch": 84.21460892049127, "grad_norm": 1.5274765491485596, "learning_rate": 0.001, "loss": 2.0521, "step": 651400 }, { "epoch": 84.22753716871364, "grad_norm": 1.8663558959960938, "learning_rate": 0.001, "loss": 2.0753, "step": 651500 }, { "epoch": 84.240465416936, "grad_norm": 224.54156494140625, "learning_rate": 0.001, "loss": 2.0695, "step": 651600 }, { "epoch": 84.25339366515837, "grad_norm": 0.8723047971725464, "learning_rate": 0.001, "loss": 2.0906, "step": 651700 }, { "epoch": 84.26632191338074, "grad_norm": 1.046882152557373, "learning_rate": 0.001, "loss": 2.0482, "step": 651800 }, { "epoch": 84.2792501616031, "grad_norm": 1.2715624570846558, "learning_rate": 0.001, "loss": 2.0805, "step": 651900 }, { "epoch": 84.29217840982547, "grad_norm": 1.0290415287017822, "learning_rate": 0.001, "loss": 2.0863, "step": 652000 }, { "epoch": 84.30510665804783, "grad_norm": 27.67350959777832, "learning_rate": 0.001, "loss": 2.064, "step": 652100 }, { "epoch": 84.3180349062702, "grad_norm": 1.1210415363311768, "learning_rate": 0.001, "loss": 2.0825, "step": 652200 }, { "epoch": 84.33096315449257, "grad_norm": 1.405429482460022, "learning_rate": 0.001, "loss": 2.0619, "step": 652300 }, { "epoch": 84.34389140271493, "grad_norm": 1.1399813890457153, "learning_rate": 0.001, "loss": 2.1031, "step": 652400 }, { "epoch": 84.3568196509373, "grad_norm": 1.3870123624801636, "learning_rate": 0.001, "loss": 2.0854, "step": 652500 }, { "epoch": 84.36974789915966, "grad_norm": 1.3855551481246948, "learning_rate": 0.001, "loss": 2.1006, "step": 652600 }, { "epoch": 84.38267614738203, "grad_norm": 1.5671840906143188, "learning_rate": 0.001, "loss": 2.0808, "step": 652700 }, { "epoch": 84.3956043956044, "grad_norm": 1.4239461421966553, "learning_rate": 0.001, "loss": 2.0607, "step": 652800 }, { "epoch": 84.40853264382676, "grad_norm": 2.299696207046509, "learning_rate": 0.001, "loss": 2.097, "step": 652900 }, { "epoch": 84.42146089204913, "grad_norm": 1.2131788730621338, "learning_rate": 0.001, "loss": 2.0861, "step": 653000 }, { "epoch": 84.43438914027149, "grad_norm": 1.0863693952560425, "learning_rate": 0.001, "loss": 2.0831, "step": 653100 }, { "epoch": 84.44731738849386, "grad_norm": 1.392754316329956, "learning_rate": 0.001, "loss": 2.1035, "step": 653200 }, { "epoch": 84.46024563671622, "grad_norm": 0.9519808888435364, "learning_rate": 0.001, "loss": 2.0971, "step": 653300 }, { "epoch": 84.47317388493859, "grad_norm": 1.1642578840255737, "learning_rate": 0.001, "loss": 2.1059, "step": 653400 }, { "epoch": 84.48610213316095, "grad_norm": 1.3348294496536255, "learning_rate": 0.001, "loss": 2.0849, "step": 653500 }, { "epoch": 84.49903038138332, "grad_norm": 1.2310051918029785, "learning_rate": 0.001, "loss": 2.0993, "step": 653600 }, { "epoch": 84.51195862960569, "grad_norm": 1.1562706232070923, "learning_rate": 0.001, "loss": 2.0698, "step": 653700 }, { "epoch": 84.52488687782805, "grad_norm": 0.9757695198059082, "learning_rate": 0.001, "loss": 2.0918, "step": 653800 }, { "epoch": 84.53781512605042, "grad_norm": 2.1119184494018555, "learning_rate": 0.001, "loss": 2.0878, "step": 653900 }, { "epoch": 84.55074337427278, "grad_norm": 0.9882436394691467, "learning_rate": 0.001, "loss": 2.1163, "step": 654000 }, { "epoch": 84.56367162249515, "grad_norm": 1.8902431726455688, "learning_rate": 0.001, "loss": 2.1157, "step": 654100 }, { "epoch": 84.57659987071752, "grad_norm": 1.674090027809143, "learning_rate": 0.001, "loss": 2.1068, "step": 654200 }, { "epoch": 84.58952811893988, "grad_norm": 1.0376739501953125, "learning_rate": 0.001, "loss": 2.107, "step": 654300 }, { "epoch": 84.60245636716225, "grad_norm": 1.315948486328125, "learning_rate": 0.001, "loss": 2.1126, "step": 654400 }, { "epoch": 84.61538461538461, "grad_norm": 3.420506000518799, "learning_rate": 0.001, "loss": 2.1061, "step": 654500 }, { "epoch": 84.62831286360698, "grad_norm": 1.0026928186416626, "learning_rate": 0.001, "loss": 2.1129, "step": 654600 }, { "epoch": 84.64124111182934, "grad_norm": 1.1301902532577515, "learning_rate": 0.001, "loss": 2.0988, "step": 654700 }, { "epoch": 84.65416936005171, "grad_norm": 1.3749607801437378, "learning_rate": 0.001, "loss": 2.0998, "step": 654800 }, { "epoch": 84.66709760827408, "grad_norm": 1.570216178894043, "learning_rate": 0.001, "loss": 2.0999, "step": 654900 }, { "epoch": 84.68002585649644, "grad_norm": 1.1494724750518799, "learning_rate": 0.001, "loss": 2.1143, "step": 655000 }, { "epoch": 84.69295410471881, "grad_norm": 1.3333723545074463, "learning_rate": 0.001, "loss": 2.1032, "step": 655100 }, { "epoch": 84.70588235294117, "grad_norm": 1.0842920541763306, "learning_rate": 0.001, "loss": 2.1023, "step": 655200 }, { "epoch": 84.71881060116354, "grad_norm": 1.236276388168335, "learning_rate": 0.001, "loss": 2.1284, "step": 655300 }, { "epoch": 84.7317388493859, "grad_norm": 1.1237164735794067, "learning_rate": 0.001, "loss": 2.1173, "step": 655400 }, { "epoch": 84.74466709760827, "grad_norm": 1.0888242721557617, "learning_rate": 0.001, "loss": 2.1221, "step": 655500 }, { "epoch": 84.75759534583064, "grad_norm": 1.0777689218521118, "learning_rate": 0.001, "loss": 2.1105, "step": 655600 }, { "epoch": 84.770523594053, "grad_norm": 1.040274977684021, "learning_rate": 0.001, "loss": 2.1085, "step": 655700 }, { "epoch": 84.78345184227537, "grad_norm": 1.8214625120162964, "learning_rate": 0.001, "loss": 2.1073, "step": 655800 }, { "epoch": 84.79638009049773, "grad_norm": 1.6474151611328125, "learning_rate": 0.001, "loss": 2.1202, "step": 655900 }, { "epoch": 84.8093083387201, "grad_norm": 1.1380064487457275, "learning_rate": 0.001, "loss": 2.1219, "step": 656000 }, { "epoch": 84.82223658694247, "grad_norm": 2.075218915939331, "learning_rate": 0.001, "loss": 2.1162, "step": 656100 }, { "epoch": 84.83516483516483, "grad_norm": 1.6158816814422607, "learning_rate": 0.001, "loss": 2.1192, "step": 656200 }, { "epoch": 84.8480930833872, "grad_norm": 1.3105463981628418, "learning_rate": 0.001, "loss": 2.1223, "step": 656300 }, { "epoch": 84.86102133160956, "grad_norm": 2.4721453189849854, "learning_rate": 0.001, "loss": 2.1121, "step": 656400 }, { "epoch": 84.87394957983193, "grad_norm": 1.156374454498291, "learning_rate": 0.001, "loss": 2.1164, "step": 656500 }, { "epoch": 84.8868778280543, "grad_norm": 1.7749626636505127, "learning_rate": 0.001, "loss": 2.1222, "step": 656600 }, { "epoch": 84.89980607627666, "grad_norm": 1.0246546268463135, "learning_rate": 0.001, "loss": 2.1234, "step": 656700 }, { "epoch": 84.91273432449903, "grad_norm": 2.514385223388672, "learning_rate": 0.001, "loss": 2.1153, "step": 656800 }, { "epoch": 84.9256625727214, "grad_norm": 1.038434624671936, "learning_rate": 0.001, "loss": 2.1186, "step": 656900 }, { "epoch": 84.93859082094376, "grad_norm": 1.317301869392395, "learning_rate": 0.001, "loss": 2.1147, "step": 657000 }, { "epoch": 84.95151906916612, "grad_norm": 0.9654971361160278, "learning_rate": 0.001, "loss": 2.13, "step": 657100 }, { "epoch": 84.96444731738849, "grad_norm": 1.3047066926956177, "learning_rate": 0.001, "loss": 2.113, "step": 657200 }, { "epoch": 84.97737556561086, "grad_norm": 1.1606731414794922, "learning_rate": 0.001, "loss": 2.1436, "step": 657300 }, { "epoch": 84.99030381383322, "grad_norm": 1.1880040168762207, "learning_rate": 0.001, "loss": 2.1246, "step": 657400 }, { "epoch": 85.00323206205559, "grad_norm": 1.1226797103881836, "learning_rate": 0.001, "loss": 2.1082, "step": 657500 }, { "epoch": 85.01616031027795, "grad_norm": 1.0407557487487793, "learning_rate": 0.001, "loss": 2.0146, "step": 657600 }, { "epoch": 85.02908855850032, "grad_norm": 0.9165316224098206, "learning_rate": 0.001, "loss": 2.0491, "step": 657700 }, { "epoch": 85.04201680672269, "grad_norm": 1.0621862411499023, "learning_rate": 0.001, "loss": 2.0354, "step": 657800 }, { "epoch": 85.05494505494505, "grad_norm": 1.3507587909698486, "learning_rate": 0.001, "loss": 2.0517, "step": 657900 }, { "epoch": 85.06787330316742, "grad_norm": 1.0780130624771118, "learning_rate": 0.001, "loss": 2.041, "step": 658000 }, { "epoch": 85.08080155138978, "grad_norm": 1.3562015295028687, "learning_rate": 0.001, "loss": 2.068, "step": 658100 }, { "epoch": 85.09372979961215, "grad_norm": 1.4205858707427979, "learning_rate": 0.001, "loss": 2.047, "step": 658200 }, { "epoch": 85.10665804783451, "grad_norm": 1.0093309879302979, "learning_rate": 0.001, "loss": 2.0391, "step": 658300 }, { "epoch": 85.11958629605688, "grad_norm": 0.9836440682411194, "learning_rate": 0.001, "loss": 2.0546, "step": 658400 }, { "epoch": 85.13251454427925, "grad_norm": 1.315863847732544, "learning_rate": 0.001, "loss": 2.0739, "step": 658500 }, { "epoch": 85.14544279250161, "grad_norm": 0.9341942071914673, "learning_rate": 0.001, "loss": 2.0548, "step": 658600 }, { "epoch": 85.15837104072398, "grad_norm": 1.0794541835784912, "learning_rate": 0.001, "loss": 2.0526, "step": 658700 }, { "epoch": 85.17129928894634, "grad_norm": 1.032434344291687, "learning_rate": 0.001, "loss": 2.0372, "step": 658800 }, { "epoch": 85.18422753716871, "grad_norm": 1.4122940301895142, "learning_rate": 0.001, "loss": 2.0658, "step": 658900 }, { "epoch": 85.19715578539108, "grad_norm": 1.1373536586761475, "learning_rate": 0.001, "loss": 2.0478, "step": 659000 }, { "epoch": 85.21008403361344, "grad_norm": 1.5022228956222534, "learning_rate": 0.001, "loss": 2.0702, "step": 659100 }, { "epoch": 85.2230122818358, "grad_norm": 0.9825940132141113, "learning_rate": 0.001, "loss": 2.0751, "step": 659200 }, { "epoch": 85.23594053005817, "grad_norm": 1.0984209775924683, "learning_rate": 0.001, "loss": 2.0646, "step": 659300 }, { "epoch": 85.24886877828054, "grad_norm": 0.8171877861022949, "learning_rate": 0.001, "loss": 2.045, "step": 659400 }, { "epoch": 85.2617970265029, "grad_norm": 1.092653512954712, "learning_rate": 0.001, "loss": 2.0676, "step": 659500 }, { "epoch": 85.27472527472527, "grad_norm": 1.3341832160949707, "learning_rate": 0.001, "loss": 2.0708, "step": 659600 }, { "epoch": 85.28765352294764, "grad_norm": 1.157711148262024, "learning_rate": 0.001, "loss": 2.0702, "step": 659700 }, { "epoch": 85.30058177117, "grad_norm": 1.5071995258331299, "learning_rate": 0.001, "loss": 2.0672, "step": 659800 }, { "epoch": 85.31351001939237, "grad_norm": 1.905611276626587, "learning_rate": 0.001, "loss": 2.0755, "step": 659900 }, { "epoch": 85.32643826761473, "grad_norm": 1.0754661560058594, "learning_rate": 0.001, "loss": 2.0787, "step": 660000 }, { "epoch": 85.3393665158371, "grad_norm": 1.4082906246185303, "learning_rate": 0.001, "loss": 2.1012, "step": 660100 }, { "epoch": 85.35229476405947, "grad_norm": 1.0615501403808594, "learning_rate": 0.001, "loss": 2.0889, "step": 660200 }, { "epoch": 85.36522301228183, "grad_norm": 1.064083218574524, "learning_rate": 0.001, "loss": 2.087, "step": 660300 }, { "epoch": 85.3781512605042, "grad_norm": 0.9888755679130554, "learning_rate": 0.001, "loss": 2.0855, "step": 660400 }, { "epoch": 85.39107950872656, "grad_norm": 0.8883970975875854, "learning_rate": 0.001, "loss": 2.0739, "step": 660500 }, { "epoch": 85.40400775694893, "grad_norm": 1.1116969585418701, "learning_rate": 0.001, "loss": 2.0751, "step": 660600 }, { "epoch": 85.4169360051713, "grad_norm": 2.0699450969696045, "learning_rate": 0.001, "loss": 2.0797, "step": 660700 }, { "epoch": 85.42986425339366, "grad_norm": 0.8175131678581238, "learning_rate": 0.001, "loss": 2.0989, "step": 660800 }, { "epoch": 85.44279250161603, "grad_norm": 1.145201325416565, "learning_rate": 0.001, "loss": 2.0945, "step": 660900 }, { "epoch": 85.45572074983839, "grad_norm": 1.780518889427185, "learning_rate": 0.001, "loss": 2.0825, "step": 661000 }, { "epoch": 85.46864899806076, "grad_norm": 0.9656334519386292, "learning_rate": 0.001, "loss": 2.0845, "step": 661100 }, { "epoch": 85.48157724628312, "grad_norm": 1.549669623374939, "learning_rate": 0.001, "loss": 2.0892, "step": 661200 }, { "epoch": 85.49450549450549, "grad_norm": 1.249569296836853, "learning_rate": 0.001, "loss": 2.082, "step": 661300 }, { "epoch": 85.50743374272786, "grad_norm": 1.06536865234375, "learning_rate": 0.001, "loss": 2.0795, "step": 661400 }, { "epoch": 85.52036199095022, "grad_norm": 2.054896831512451, "learning_rate": 0.001, "loss": 2.0953, "step": 661500 }, { "epoch": 85.53329023917259, "grad_norm": 1.1868547201156616, "learning_rate": 0.001, "loss": 2.0915, "step": 661600 }, { "epoch": 85.54621848739495, "grad_norm": 1.1989948749542236, "learning_rate": 0.001, "loss": 2.097, "step": 661700 }, { "epoch": 85.55914673561732, "grad_norm": 0.9476444125175476, "learning_rate": 0.001, "loss": 2.1089, "step": 661800 }, { "epoch": 85.57207498383968, "grad_norm": 1.5174560546875, "learning_rate": 0.001, "loss": 2.1038, "step": 661900 }, { "epoch": 85.58500323206205, "grad_norm": 0.9679763317108154, "learning_rate": 0.001, "loss": 2.0951, "step": 662000 }, { "epoch": 85.59793148028442, "grad_norm": 1.1893963813781738, "learning_rate": 0.001, "loss": 2.1043, "step": 662100 }, { "epoch": 85.61085972850678, "grad_norm": 0.9168775081634521, "learning_rate": 0.001, "loss": 2.1114, "step": 662200 }, { "epoch": 85.62378797672915, "grad_norm": 0.956989586353302, "learning_rate": 0.001, "loss": 2.1042, "step": 662300 }, { "epoch": 85.63671622495151, "grad_norm": 1.0052940845489502, "learning_rate": 0.001, "loss": 2.1118, "step": 662400 }, { "epoch": 85.64964447317388, "grad_norm": 28.197124481201172, "learning_rate": 0.001, "loss": 2.0895, "step": 662500 }, { "epoch": 85.66257272139624, "grad_norm": 1.2409237623214722, "learning_rate": 0.001, "loss": 2.1219, "step": 662600 }, { "epoch": 85.67550096961861, "grad_norm": 1.3053089380264282, "learning_rate": 0.001, "loss": 2.1359, "step": 662700 }, { "epoch": 85.68842921784098, "grad_norm": 1.314577341079712, "learning_rate": 0.001, "loss": 2.1211, "step": 662800 }, { "epoch": 85.70135746606334, "grad_norm": 0.788080632686615, "learning_rate": 0.001, "loss": 2.1068, "step": 662900 }, { "epoch": 85.71428571428571, "grad_norm": 1.4570151567459106, "learning_rate": 0.001, "loss": 2.1149, "step": 663000 }, { "epoch": 85.72721396250807, "grad_norm": 1.7267485857009888, "learning_rate": 0.001, "loss": 2.1117, "step": 663100 }, { "epoch": 85.74014221073044, "grad_norm": 1.197393536567688, "learning_rate": 0.001, "loss": 2.1156, "step": 663200 }, { "epoch": 85.7530704589528, "grad_norm": 1.3345990180969238, "learning_rate": 0.001, "loss": 2.1158, "step": 663300 }, { "epoch": 85.76599870717517, "grad_norm": 0.9049318432807922, "learning_rate": 0.001, "loss": 2.0939, "step": 663400 }, { "epoch": 85.77892695539754, "grad_norm": 1.1283818483352661, "learning_rate": 0.001, "loss": 2.1163, "step": 663500 }, { "epoch": 85.7918552036199, "grad_norm": 1.14384925365448, "learning_rate": 0.001, "loss": 2.1454, "step": 663600 }, { "epoch": 85.80478345184227, "grad_norm": 1.1737473011016846, "learning_rate": 0.001, "loss": 2.1414, "step": 663700 }, { "epoch": 85.81771170006463, "grad_norm": 1.0959358215332031, "learning_rate": 0.001, "loss": 2.1012, "step": 663800 }, { "epoch": 85.830639948287, "grad_norm": 1.0951787233352661, "learning_rate": 0.001, "loss": 2.1274, "step": 663900 }, { "epoch": 85.84356819650937, "grad_norm": 1.2194116115570068, "learning_rate": 0.001, "loss": 2.1278, "step": 664000 }, { "epoch": 85.85649644473173, "grad_norm": 1.3289203643798828, "learning_rate": 0.001, "loss": 2.1216, "step": 664100 }, { "epoch": 85.8694246929541, "grad_norm": 6.370782852172852, "learning_rate": 0.001, "loss": 2.1298, "step": 664200 }, { "epoch": 85.88235294117646, "grad_norm": 1.432807207107544, "learning_rate": 0.001, "loss": 2.1105, "step": 664300 }, { "epoch": 85.89528118939883, "grad_norm": 0.8877795338630676, "learning_rate": 0.001, "loss": 2.1263, "step": 664400 }, { "epoch": 85.9082094376212, "grad_norm": 1.9391744136810303, "learning_rate": 0.001, "loss": 2.1076, "step": 664500 }, { "epoch": 85.92113768584356, "grad_norm": 1.3928238153457642, "learning_rate": 0.001, "loss": 2.0967, "step": 664600 }, { "epoch": 85.93406593406593, "grad_norm": 1.5534098148345947, "learning_rate": 0.001, "loss": 2.1497, "step": 664700 }, { "epoch": 85.9469941822883, "grad_norm": 4.484560489654541, "learning_rate": 0.001, "loss": 2.114, "step": 664800 }, { "epoch": 85.95992243051066, "grad_norm": 1.2154191732406616, "learning_rate": 0.001, "loss": 2.1247, "step": 664900 }, { "epoch": 85.97285067873302, "grad_norm": 0.9833287000656128, "learning_rate": 0.001, "loss": 2.1329, "step": 665000 }, { "epoch": 85.98577892695539, "grad_norm": 0.8003381490707397, "learning_rate": 0.001, "loss": 2.1442, "step": 665100 }, { "epoch": 85.99870717517777, "grad_norm": 1.2167986631393433, "learning_rate": 0.001, "loss": 2.1322, "step": 665200 }, { "epoch": 86.01163542340014, "grad_norm": 1.3558565378189087, "learning_rate": 0.001, "loss": 2.0473, "step": 665300 }, { "epoch": 86.0245636716225, "grad_norm": 1.3914886713027954, "learning_rate": 0.001, "loss": 2.0523, "step": 665400 }, { "epoch": 86.03749191984487, "grad_norm": 1.4101570844650269, "learning_rate": 0.001, "loss": 2.0546, "step": 665500 }, { "epoch": 86.05042016806723, "grad_norm": 1.733941674232483, "learning_rate": 0.001, "loss": 2.0453, "step": 665600 }, { "epoch": 86.0633484162896, "grad_norm": 1.4797799587249756, "learning_rate": 0.001, "loss": 2.0629, "step": 665700 }, { "epoch": 86.07627666451197, "grad_norm": 49.97026824951172, "learning_rate": 0.001, "loss": 2.0308, "step": 665800 }, { "epoch": 86.08920491273433, "grad_norm": 1.3979952335357666, "learning_rate": 0.001, "loss": 2.0477, "step": 665900 }, { "epoch": 86.1021331609567, "grad_norm": 1.0057318210601807, "learning_rate": 0.001, "loss": 2.0647, "step": 666000 }, { "epoch": 86.11506140917906, "grad_norm": 1.8520570993423462, "learning_rate": 0.001, "loss": 2.059, "step": 666100 }, { "epoch": 86.12798965740143, "grad_norm": 1.3583985567092896, "learning_rate": 0.001, "loss": 2.0376, "step": 666200 }, { "epoch": 86.1409179056238, "grad_norm": 1.1790249347686768, "learning_rate": 0.001, "loss": 2.0527, "step": 666300 }, { "epoch": 86.15384615384616, "grad_norm": 1.0907166004180908, "learning_rate": 0.001, "loss": 2.0619, "step": 666400 }, { "epoch": 86.16677440206853, "grad_norm": 1.5851809978485107, "learning_rate": 0.001, "loss": 2.0626, "step": 666500 }, { "epoch": 86.17970265029089, "grad_norm": 2.055755376815796, "learning_rate": 0.001, "loss": 2.0733, "step": 666600 }, { "epoch": 86.19263089851326, "grad_norm": 1.0226597785949707, "learning_rate": 0.001, "loss": 2.0642, "step": 666700 }, { "epoch": 86.20555914673562, "grad_norm": 1.1286042928695679, "learning_rate": 0.001, "loss": 2.0741, "step": 666800 }, { "epoch": 86.21848739495799, "grad_norm": 1.256507396697998, "learning_rate": 0.001, "loss": 2.0789, "step": 666900 }, { "epoch": 86.23141564318036, "grad_norm": 1.3078083992004395, "learning_rate": 0.001, "loss": 2.0654, "step": 667000 }, { "epoch": 86.24434389140272, "grad_norm": 3.80114483833313, "learning_rate": 0.001, "loss": 2.066, "step": 667100 }, { "epoch": 86.25727213962509, "grad_norm": 1.1971945762634277, "learning_rate": 0.001, "loss": 2.0774, "step": 667200 }, { "epoch": 86.27020038784745, "grad_norm": 1.3517321348190308, "learning_rate": 0.001, "loss": 2.0666, "step": 667300 }, { "epoch": 86.28312863606982, "grad_norm": 460.49810791015625, "learning_rate": 0.001, "loss": 2.0665, "step": 667400 }, { "epoch": 86.29605688429218, "grad_norm": 1.2379711866378784, "learning_rate": 0.001, "loss": 2.0778, "step": 667500 }, { "epoch": 86.30898513251455, "grad_norm": 1.1978168487548828, "learning_rate": 0.001, "loss": 2.0639, "step": 667600 }, { "epoch": 86.32191338073692, "grad_norm": 1.2376952171325684, "learning_rate": 0.001, "loss": 2.0647, "step": 667700 }, { "epoch": 86.33484162895928, "grad_norm": 1.1754746437072754, "learning_rate": 0.001, "loss": 2.0796, "step": 667800 }, { "epoch": 86.34776987718165, "grad_norm": 1.2252168655395508, "learning_rate": 0.001, "loss": 2.0889, "step": 667900 }, { "epoch": 86.36069812540401, "grad_norm": 1.5820297002792358, "learning_rate": 0.001, "loss": 2.0732, "step": 668000 }, { "epoch": 86.37362637362638, "grad_norm": 1.166224718093872, "learning_rate": 0.001, "loss": 2.0829, "step": 668100 }, { "epoch": 86.38655462184875, "grad_norm": 1.2493151426315308, "learning_rate": 0.001, "loss": 2.0804, "step": 668200 }, { "epoch": 86.39948287007111, "grad_norm": 1.1337398290634155, "learning_rate": 0.001, "loss": 2.0753, "step": 668300 }, { "epoch": 86.41241111829348, "grad_norm": 1.2775365114212036, "learning_rate": 0.001, "loss": 2.0862, "step": 668400 }, { "epoch": 86.42533936651584, "grad_norm": 35.00059127807617, "learning_rate": 0.001, "loss": 2.0852, "step": 668500 }, { "epoch": 86.43826761473821, "grad_norm": 0.8847180008888245, "learning_rate": 0.001, "loss": 2.1025, "step": 668600 }, { "epoch": 86.45119586296057, "grad_norm": 1.1517213582992554, "learning_rate": 0.001, "loss": 2.0811, "step": 668700 }, { "epoch": 86.46412411118294, "grad_norm": 5.112398147583008, "learning_rate": 0.001, "loss": 2.1014, "step": 668800 }, { "epoch": 86.4770523594053, "grad_norm": 1.0491403341293335, "learning_rate": 0.001, "loss": 2.1086, "step": 668900 }, { "epoch": 86.48998060762767, "grad_norm": 3.9104554653167725, "learning_rate": 0.001, "loss": 2.0917, "step": 669000 }, { "epoch": 86.50290885585004, "grad_norm": 1.160240650177002, "learning_rate": 0.001, "loss": 2.1162, "step": 669100 }, { "epoch": 86.5158371040724, "grad_norm": 2.156402587890625, "learning_rate": 0.001, "loss": 2.0999, "step": 669200 }, { "epoch": 86.52876535229477, "grad_norm": 1.6440880298614502, "learning_rate": 0.001, "loss": 2.0938, "step": 669300 }, { "epoch": 86.54169360051714, "grad_norm": 1.4373555183410645, "learning_rate": 0.001, "loss": 2.1011, "step": 669400 }, { "epoch": 86.5546218487395, "grad_norm": 2.324174642562866, "learning_rate": 0.001, "loss": 2.0913, "step": 669500 }, { "epoch": 86.56755009696187, "grad_norm": 1.211135745048523, "learning_rate": 0.001, "loss": 2.0983, "step": 669600 }, { "epoch": 86.58047834518423, "grad_norm": 1.8800883293151855, "learning_rate": 0.001, "loss": 2.1055, "step": 669700 }, { "epoch": 86.5934065934066, "grad_norm": 11.687389373779297, "learning_rate": 0.001, "loss": 2.0968, "step": 669800 }, { "epoch": 86.60633484162896, "grad_norm": 1.1648353338241577, "learning_rate": 0.001, "loss": 2.0976, "step": 669900 }, { "epoch": 86.61926308985133, "grad_norm": 1.0798707008361816, "learning_rate": 0.001, "loss": 2.113, "step": 670000 }, { "epoch": 86.6321913380737, "grad_norm": 1.2590223550796509, "learning_rate": 0.001, "loss": 2.0928, "step": 670100 }, { "epoch": 86.64511958629606, "grad_norm": 0.9694281816482544, "learning_rate": 0.001, "loss": 2.1175, "step": 670200 }, { "epoch": 86.65804783451843, "grad_norm": 1.6068817377090454, "learning_rate": 0.001, "loss": 2.1074, "step": 670300 }, { "epoch": 86.6709760827408, "grad_norm": 1.7555540800094604, "learning_rate": 0.001, "loss": 2.1048, "step": 670400 }, { "epoch": 86.68390433096316, "grad_norm": 1.3931199312210083, "learning_rate": 0.001, "loss": 2.1188, "step": 670500 }, { "epoch": 86.69683257918552, "grad_norm": 1.1960738897323608, "learning_rate": 0.001, "loss": 2.1013, "step": 670600 }, { "epoch": 86.70976082740789, "grad_norm": 6.848217964172363, "learning_rate": 0.001, "loss": 2.1202, "step": 670700 }, { "epoch": 86.72268907563026, "grad_norm": 1.1849126815795898, "learning_rate": 0.001, "loss": 2.1131, "step": 670800 }, { "epoch": 86.73561732385262, "grad_norm": 1.209836721420288, "learning_rate": 0.001, "loss": 2.1058, "step": 670900 }, { "epoch": 86.74854557207499, "grad_norm": 1.0592451095581055, "learning_rate": 0.001, "loss": 2.1364, "step": 671000 }, { "epoch": 86.76147382029735, "grad_norm": 4.2569804191589355, "learning_rate": 0.001, "loss": 2.1275, "step": 671100 }, { "epoch": 86.77440206851972, "grad_norm": 1.6216615438461304, "learning_rate": 0.001, "loss": 2.1251, "step": 671200 }, { "epoch": 86.78733031674209, "grad_norm": 1.6882450580596924, "learning_rate": 0.001, "loss": 2.1138, "step": 671300 }, { "epoch": 86.80025856496445, "grad_norm": 1.2017433643341064, "learning_rate": 0.001, "loss": 2.1208, "step": 671400 }, { "epoch": 86.81318681318682, "grad_norm": 1.8294843435287476, "learning_rate": 0.001, "loss": 2.1173, "step": 671500 }, { "epoch": 86.82611506140918, "grad_norm": 1.2887420654296875, "learning_rate": 0.001, "loss": 2.1294, "step": 671600 }, { "epoch": 86.83904330963155, "grad_norm": 3.4155499935150146, "learning_rate": 0.001, "loss": 2.1217, "step": 671700 }, { "epoch": 86.85197155785391, "grad_norm": 1.1536142826080322, "learning_rate": 0.001, "loss": 2.1137, "step": 671800 }, { "epoch": 86.86489980607628, "grad_norm": 1.4105671644210815, "learning_rate": 0.001, "loss": 2.1227, "step": 671900 }, { "epoch": 86.87782805429865, "grad_norm": 1.075365662574768, "learning_rate": 0.001, "loss": 2.1298, "step": 672000 }, { "epoch": 86.89075630252101, "grad_norm": 7.666426658630371, "learning_rate": 0.001, "loss": 2.1017, "step": 672100 }, { "epoch": 86.90368455074338, "grad_norm": 1.3651666641235352, "learning_rate": 0.001, "loss": 2.1147, "step": 672200 }, { "epoch": 86.91661279896574, "grad_norm": 4.257864475250244, "learning_rate": 0.001, "loss": 2.1123, "step": 672300 }, { "epoch": 86.92954104718811, "grad_norm": 0.9323015809059143, "learning_rate": 0.001, "loss": 2.1228, "step": 672400 }, { "epoch": 86.94246929541048, "grad_norm": 1.2801156044006348, "learning_rate": 0.001, "loss": 2.1277, "step": 672500 }, { "epoch": 86.95539754363284, "grad_norm": 1.4301247596740723, "learning_rate": 0.001, "loss": 2.1261, "step": 672600 }, { "epoch": 86.96832579185521, "grad_norm": 1.2162200212478638, "learning_rate": 0.001, "loss": 2.1297, "step": 672700 }, { "epoch": 86.98125404007757, "grad_norm": 1.1490298509597778, "learning_rate": 0.001, "loss": 2.12, "step": 672800 }, { "epoch": 86.99418228829994, "grad_norm": 2.0212204456329346, "learning_rate": 0.001, "loss": 2.1277, "step": 672900 }, { "epoch": 87.0071105365223, "grad_norm": 1.3146878480911255, "learning_rate": 0.001, "loss": 2.0424, "step": 673000 }, { "epoch": 87.02003878474467, "grad_norm": 2.8162758350372314, "learning_rate": 0.001, "loss": 2.0366, "step": 673100 }, { "epoch": 87.03296703296704, "grad_norm": 0.9646626114845276, "learning_rate": 0.001, "loss": 2.046, "step": 673200 }, { "epoch": 87.0458952811894, "grad_norm": 1.6727317571640015, "learning_rate": 0.001, "loss": 2.0437, "step": 673300 }, { "epoch": 87.05882352941177, "grad_norm": 1.0347559452056885, "learning_rate": 0.001, "loss": 2.0366, "step": 673400 }, { "epoch": 87.07175177763413, "grad_norm": 0.992372989654541, "learning_rate": 0.001, "loss": 2.037, "step": 673500 }, { "epoch": 87.0846800258565, "grad_norm": 1.0192824602127075, "learning_rate": 0.001, "loss": 2.0445, "step": 673600 }, { "epoch": 87.09760827407887, "grad_norm": 1.1021007299423218, "learning_rate": 0.001, "loss": 2.0536, "step": 673700 }, { "epoch": 87.11053652230123, "grad_norm": 1.1057543754577637, "learning_rate": 0.001, "loss": 2.0383, "step": 673800 }, { "epoch": 87.1234647705236, "grad_norm": 1.565991997718811, "learning_rate": 0.001, "loss": 2.0706, "step": 673900 }, { "epoch": 87.13639301874596, "grad_norm": 3.351379632949829, "learning_rate": 0.001, "loss": 2.0471, "step": 674000 }, { "epoch": 87.14932126696833, "grad_norm": 1.5145387649536133, "learning_rate": 0.001, "loss": 2.0506, "step": 674100 }, { "epoch": 87.1622495151907, "grad_norm": 1.440995693206787, "learning_rate": 0.001, "loss": 2.0665, "step": 674200 }, { "epoch": 87.17517776341306, "grad_norm": 0.982802152633667, "learning_rate": 0.001, "loss": 2.0641, "step": 674300 }, { "epoch": 87.18810601163543, "grad_norm": 1.2152119874954224, "learning_rate": 0.001, "loss": 2.0735, "step": 674400 }, { "epoch": 87.20103425985779, "grad_norm": 1.7984460592269897, "learning_rate": 0.001, "loss": 2.0445, "step": 674500 }, { "epoch": 87.21396250808016, "grad_norm": 1.4896003007888794, "learning_rate": 0.001, "loss": 2.0481, "step": 674600 }, { "epoch": 87.22689075630252, "grad_norm": 1.2147079706192017, "learning_rate": 0.001, "loss": 2.0547, "step": 674700 }, { "epoch": 87.23981900452489, "grad_norm": 1.0623724460601807, "learning_rate": 0.001, "loss": 2.0894, "step": 674800 }, { "epoch": 87.25274725274726, "grad_norm": 5.6264495849609375, "learning_rate": 0.001, "loss": 2.0619, "step": 674900 }, { "epoch": 87.26567550096962, "grad_norm": 0.9375671148300171, "learning_rate": 0.001, "loss": 2.0635, "step": 675000 }, { "epoch": 87.27860374919199, "grad_norm": 1.3487637042999268, "learning_rate": 0.001, "loss": 2.0754, "step": 675100 }, { "epoch": 87.29153199741435, "grad_norm": 1.2704588174819946, "learning_rate": 0.001, "loss": 2.0554, "step": 675200 }, { "epoch": 87.30446024563672, "grad_norm": 3.230109214782715, "learning_rate": 0.001, "loss": 2.0791, "step": 675300 }, { "epoch": 87.31738849385908, "grad_norm": 1.4822651147842407, "learning_rate": 0.001, "loss": 2.0853, "step": 675400 }, { "epoch": 87.33031674208145, "grad_norm": 0.9960110783576965, "learning_rate": 0.001, "loss": 2.1042, "step": 675500 }, { "epoch": 87.34324499030382, "grad_norm": 1.6527189016342163, "learning_rate": 0.001, "loss": 2.0861, "step": 675600 }, { "epoch": 87.35617323852618, "grad_norm": 1.610472321510315, "learning_rate": 0.001, "loss": 2.0839, "step": 675700 }, { "epoch": 87.36910148674855, "grad_norm": 1.5933022499084473, "learning_rate": 0.001, "loss": 2.067, "step": 675800 }, { "epoch": 87.38202973497091, "grad_norm": 1.453676462173462, "learning_rate": 0.001, "loss": 2.0769, "step": 675900 }, { "epoch": 87.39495798319328, "grad_norm": 1.6588722467422485, "learning_rate": 0.001, "loss": 2.0846, "step": 676000 }, { "epoch": 87.40788623141565, "grad_norm": 3.5887451171875, "learning_rate": 0.001, "loss": 2.0809, "step": 676100 }, { "epoch": 87.42081447963801, "grad_norm": 6.41648006439209, "learning_rate": 0.001, "loss": 2.0881, "step": 676200 }, { "epoch": 87.43374272786038, "grad_norm": 1.1446559429168701, "learning_rate": 0.001, "loss": 2.0833, "step": 676300 }, { "epoch": 87.44667097608274, "grad_norm": 1.5158826112747192, "learning_rate": 0.001, "loss": 2.0921, "step": 676400 }, { "epoch": 87.45959922430511, "grad_norm": 1.1655813455581665, "learning_rate": 0.001, "loss": 2.0996, "step": 676500 }, { "epoch": 87.47252747252747, "grad_norm": 1.7178821563720703, "learning_rate": 0.001, "loss": 2.098, "step": 676600 }, { "epoch": 87.48545572074984, "grad_norm": 1.5564719438552856, "learning_rate": 0.001, "loss": 2.1088, "step": 676700 }, { "epoch": 87.4983839689722, "grad_norm": 38.05824279785156, "learning_rate": 0.001, "loss": 2.0821, "step": 676800 }, { "epoch": 87.51131221719457, "grad_norm": 2.004847288131714, "learning_rate": 0.001, "loss": 2.0976, "step": 676900 }, { "epoch": 87.52424046541694, "grad_norm": 2.5728232860565186, "learning_rate": 0.001, "loss": 2.0849, "step": 677000 }, { "epoch": 87.5371687136393, "grad_norm": 0.9801663160324097, "learning_rate": 0.001, "loss": 2.0843, "step": 677100 }, { "epoch": 87.55009696186167, "grad_norm": 1.1305968761444092, "learning_rate": 0.001, "loss": 2.1069, "step": 677200 }, { "epoch": 87.56302521008404, "grad_norm": 0.9978017210960388, "learning_rate": 0.001, "loss": 2.0901, "step": 677300 }, { "epoch": 87.5759534583064, "grad_norm": 1.4039500951766968, "learning_rate": 0.001, "loss": 2.1037, "step": 677400 }, { "epoch": 87.58888170652877, "grad_norm": 1.903924822807312, "learning_rate": 0.001, "loss": 2.1039, "step": 677500 }, { "epoch": 87.60180995475113, "grad_norm": 1.0357933044433594, "learning_rate": 0.001, "loss": 2.0873, "step": 677600 }, { "epoch": 87.6147382029735, "grad_norm": 1.4200152158737183, "learning_rate": 0.001, "loss": 2.1014, "step": 677700 }, { "epoch": 87.62766645119586, "grad_norm": 1.0058876276016235, "learning_rate": 0.001, "loss": 2.1061, "step": 677800 }, { "epoch": 87.64059469941823, "grad_norm": 1.3391526937484741, "learning_rate": 0.001, "loss": 2.1196, "step": 677900 }, { "epoch": 87.6535229476406, "grad_norm": 1.307572603225708, "learning_rate": 0.001, "loss": 2.1206, "step": 678000 }, { "epoch": 87.66645119586296, "grad_norm": 3.596257209777832, "learning_rate": 0.001, "loss": 2.1073, "step": 678100 }, { "epoch": 87.67937944408533, "grad_norm": 3.1555862426757812, "learning_rate": 0.001, "loss": 2.1287, "step": 678200 }, { "epoch": 87.6923076923077, "grad_norm": 1.6259535551071167, "learning_rate": 0.001, "loss": 2.1124, "step": 678300 }, { "epoch": 87.70523594053006, "grad_norm": 1.3792717456817627, "learning_rate": 0.001, "loss": 2.0982, "step": 678400 }, { "epoch": 87.71816418875243, "grad_norm": 1.6280022859573364, "learning_rate": 0.001, "loss": 2.1135, "step": 678500 }, { "epoch": 87.73109243697479, "grad_norm": 1.581009864807129, "learning_rate": 0.001, "loss": 2.1091, "step": 678600 }, { "epoch": 87.74402068519716, "grad_norm": 1.1238353252410889, "learning_rate": 0.001, "loss": 2.1277, "step": 678700 }, { "epoch": 87.75694893341952, "grad_norm": 1.447584629058838, "learning_rate": 0.001, "loss": 2.1177, "step": 678800 }, { "epoch": 87.76987718164189, "grad_norm": 1.6834149360656738, "learning_rate": 0.001, "loss": 2.1124, "step": 678900 }, { "epoch": 87.78280542986425, "grad_norm": 2.104003429412842, "learning_rate": 0.001, "loss": 2.1215, "step": 679000 }, { "epoch": 87.79573367808662, "grad_norm": 1.4449851512908936, "learning_rate": 0.001, "loss": 2.1229, "step": 679100 }, { "epoch": 87.80866192630899, "grad_norm": 0.9664196372032166, "learning_rate": 0.001, "loss": 2.1242, "step": 679200 }, { "epoch": 87.82159017453135, "grad_norm": 1.1709994077682495, "learning_rate": 0.001, "loss": 2.115, "step": 679300 }, { "epoch": 87.83451842275372, "grad_norm": 1.6067761182785034, "learning_rate": 0.001, "loss": 2.1134, "step": 679400 }, { "epoch": 87.84744667097608, "grad_norm": 1.2554632425308228, "learning_rate": 0.001, "loss": 2.1053, "step": 679500 }, { "epoch": 87.86037491919845, "grad_norm": 1.4691871404647827, "learning_rate": 0.001, "loss": 2.1139, "step": 679600 }, { "epoch": 87.87330316742081, "grad_norm": 1.0779240131378174, "learning_rate": 0.001, "loss": 2.1252, "step": 679700 }, { "epoch": 87.88623141564318, "grad_norm": 1.1201385259628296, "learning_rate": 0.001, "loss": 2.1309, "step": 679800 }, { "epoch": 87.89915966386555, "grad_norm": 0.8005668520927429, "learning_rate": 0.001, "loss": 2.1297, "step": 679900 }, { "epoch": 87.91208791208791, "grad_norm": 0.9382094144821167, "learning_rate": 0.001, "loss": 2.1181, "step": 680000 }, { "epoch": 87.92501616031028, "grad_norm": 0.8788191080093384, "learning_rate": 0.001, "loss": 2.1312, "step": 680100 }, { "epoch": 87.93794440853264, "grad_norm": 0.9171880483627319, "learning_rate": 0.001, "loss": 2.1093, "step": 680200 }, { "epoch": 87.95087265675501, "grad_norm": 0.9949539303779602, "learning_rate": 0.001, "loss": 2.1261, "step": 680300 }, { "epoch": 87.96380090497738, "grad_norm": 5.035364151000977, "learning_rate": 0.001, "loss": 2.1152, "step": 680400 }, { "epoch": 87.97672915319974, "grad_norm": 1.0462573766708374, "learning_rate": 0.001, "loss": 2.1237, "step": 680500 }, { "epoch": 87.98965740142211, "grad_norm": 1.3219444751739502, "learning_rate": 0.001, "loss": 2.1234, "step": 680600 }, { "epoch": 88.00258564964447, "grad_norm": 1.0396404266357422, "learning_rate": 0.001, "loss": 2.126, "step": 680700 }, { "epoch": 88.01551389786684, "grad_norm": 1.9305026531219482, "learning_rate": 0.001, "loss": 2.0377, "step": 680800 }, { "epoch": 88.0284421460892, "grad_norm": 0.9530040621757507, "learning_rate": 0.001, "loss": 2.055, "step": 680900 }, { "epoch": 88.04137039431157, "grad_norm": 1.9865370988845825, "learning_rate": 0.001, "loss": 2.0469, "step": 681000 }, { "epoch": 88.05429864253394, "grad_norm": 15.85123348236084, "learning_rate": 0.001, "loss": 2.0347, "step": 681100 }, { "epoch": 88.0672268907563, "grad_norm": 1.3554813861846924, "learning_rate": 0.001, "loss": 2.037, "step": 681200 }, { "epoch": 88.08015513897867, "grad_norm": 1.2520618438720703, "learning_rate": 0.001, "loss": 2.0312, "step": 681300 }, { "epoch": 88.09308338720103, "grad_norm": 1.1189686059951782, "learning_rate": 0.001, "loss": 2.0468, "step": 681400 }, { "epoch": 88.1060116354234, "grad_norm": 1.6989762783050537, "learning_rate": 0.001, "loss": 2.0506, "step": 681500 }, { "epoch": 88.11893988364577, "grad_norm": 1.2508591413497925, "learning_rate": 0.001, "loss": 2.0481, "step": 681600 }, { "epoch": 88.13186813186813, "grad_norm": 5.817564487457275, "learning_rate": 0.001, "loss": 2.054, "step": 681700 }, { "epoch": 88.1447963800905, "grad_norm": 1.6919106245040894, "learning_rate": 0.001, "loss": 2.0597, "step": 681800 }, { "epoch": 88.15772462831286, "grad_norm": 1.1735087633132935, "learning_rate": 0.001, "loss": 2.0719, "step": 681900 }, { "epoch": 88.17065287653523, "grad_norm": 2.717454195022583, "learning_rate": 0.001, "loss": 2.044, "step": 682000 }, { "epoch": 88.1835811247576, "grad_norm": 0.9574738144874573, "learning_rate": 0.001, "loss": 2.053, "step": 682100 }, { "epoch": 88.19650937297996, "grad_norm": 0.9545354843139648, "learning_rate": 0.001, "loss": 2.0525, "step": 682200 }, { "epoch": 88.20943762120233, "grad_norm": 1.9955840110778809, "learning_rate": 0.001, "loss": 2.058, "step": 682300 }, { "epoch": 88.22236586942469, "grad_norm": 1.2963811159133911, "learning_rate": 0.001, "loss": 2.08, "step": 682400 }, { "epoch": 88.23529411764706, "grad_norm": 0.9964961409568787, "learning_rate": 0.001, "loss": 2.0692, "step": 682500 }, { "epoch": 88.24822236586942, "grad_norm": 1.0550249814987183, "learning_rate": 0.001, "loss": 2.0575, "step": 682600 }, { "epoch": 88.26115061409179, "grad_norm": 1.3434795141220093, "learning_rate": 0.001, "loss": 2.0603, "step": 682700 }, { "epoch": 88.27407886231416, "grad_norm": 0.9411715269088745, "learning_rate": 0.001, "loss": 2.0709, "step": 682800 }, { "epoch": 88.28700711053652, "grad_norm": 0.9475138187408447, "learning_rate": 0.001, "loss": 2.0761, "step": 682900 }, { "epoch": 88.29993535875889, "grad_norm": 1.8000918626785278, "learning_rate": 0.001, "loss": 2.0822, "step": 683000 }, { "epoch": 88.31286360698125, "grad_norm": 1.9871468544006348, "learning_rate": 0.001, "loss": 2.0968, "step": 683100 }, { "epoch": 88.32579185520362, "grad_norm": 1.351845145225525, "learning_rate": 0.001, "loss": 2.0798, "step": 683200 }, { "epoch": 88.33872010342598, "grad_norm": 1.0777719020843506, "learning_rate": 0.001, "loss": 2.0687, "step": 683300 }, { "epoch": 88.35164835164835, "grad_norm": 1.678695797920227, "learning_rate": 0.001, "loss": 2.0985, "step": 683400 }, { "epoch": 88.36457659987072, "grad_norm": 1.5937069654464722, "learning_rate": 0.001, "loss": 2.073, "step": 683500 }, { "epoch": 88.37750484809308, "grad_norm": 1.992294430732727, "learning_rate": 0.001, "loss": 2.0806, "step": 683600 }, { "epoch": 88.39043309631545, "grad_norm": 1.4198955297470093, "learning_rate": 0.001, "loss": 2.074, "step": 683700 }, { "epoch": 88.40336134453781, "grad_norm": 1.0158225297927856, "learning_rate": 0.001, "loss": 2.0954, "step": 683800 }, { "epoch": 88.41628959276018, "grad_norm": 1.127616047859192, "learning_rate": 0.001, "loss": 2.0737, "step": 683900 }, { "epoch": 88.42921784098255, "grad_norm": 1.1795039176940918, "learning_rate": 0.001, "loss": 2.0781, "step": 684000 }, { "epoch": 88.44214608920491, "grad_norm": 0.8619024157524109, "learning_rate": 0.001, "loss": 2.0873, "step": 684100 }, { "epoch": 88.45507433742728, "grad_norm": 1.1166493892669678, "learning_rate": 0.001, "loss": 2.0892, "step": 684200 }, { "epoch": 88.46800258564964, "grad_norm": 0.9751518964767456, "learning_rate": 0.001, "loss": 2.1146, "step": 684300 }, { "epoch": 88.48093083387201, "grad_norm": 1.7477869987487793, "learning_rate": 0.001, "loss": 2.1006, "step": 684400 }, { "epoch": 88.49385908209437, "grad_norm": 1.303438663482666, "learning_rate": 0.001, "loss": 2.1007, "step": 684500 }, { "epoch": 88.50678733031674, "grad_norm": 1.3237055540084839, "learning_rate": 0.001, "loss": 2.0982, "step": 684600 }, { "epoch": 88.5197155785391, "grad_norm": 1.3804397583007812, "learning_rate": 0.001, "loss": 2.0786, "step": 684700 }, { "epoch": 88.53264382676147, "grad_norm": 1.1834031343460083, "learning_rate": 0.001, "loss": 2.1083, "step": 684800 }, { "epoch": 88.54557207498384, "grad_norm": 1.3639750480651855, "learning_rate": 0.001, "loss": 2.0926, "step": 684900 }, { "epoch": 88.5585003232062, "grad_norm": 1.26958167552948, "learning_rate": 0.001, "loss": 2.0989, "step": 685000 }, { "epoch": 88.57142857142857, "grad_norm": 0.9901506304740906, "learning_rate": 0.001, "loss": 2.1223, "step": 685100 }, { "epoch": 88.58435681965094, "grad_norm": 2.407783269882202, "learning_rate": 0.001, "loss": 2.0798, "step": 685200 }, { "epoch": 88.5972850678733, "grad_norm": 2.1647820472717285, "learning_rate": 0.001, "loss": 2.0989, "step": 685300 }, { "epoch": 88.61021331609567, "grad_norm": 2.2325398921966553, "learning_rate": 0.001, "loss": 2.078, "step": 685400 }, { "epoch": 88.62314156431803, "grad_norm": 0.9265846014022827, "learning_rate": 0.001, "loss": 2.1158, "step": 685500 }, { "epoch": 88.6360698125404, "grad_norm": 0.9347838759422302, "learning_rate": 0.001, "loss": 2.0847, "step": 685600 }, { "epoch": 88.64899806076276, "grad_norm": 0.9151781797409058, "learning_rate": 0.001, "loss": 2.0835, "step": 685700 }, { "epoch": 88.66192630898513, "grad_norm": 0.8447695970535278, "learning_rate": 0.001, "loss": 2.1039, "step": 685800 }, { "epoch": 88.6748545572075, "grad_norm": 1.420844316482544, "learning_rate": 0.001, "loss": 2.098, "step": 685900 }, { "epoch": 88.68778280542986, "grad_norm": 1.9715473651885986, "learning_rate": 0.001, "loss": 2.0964, "step": 686000 }, { "epoch": 88.70071105365223, "grad_norm": 1.1511098146438599, "learning_rate": 0.001, "loss": 2.1182, "step": 686100 }, { "epoch": 88.7136393018746, "grad_norm": 1.2601655721664429, "learning_rate": 0.001, "loss": 2.1111, "step": 686200 }, { "epoch": 88.72656755009696, "grad_norm": 0.9500147700309753, "learning_rate": 0.001, "loss": 2.079, "step": 686300 }, { "epoch": 88.73949579831933, "grad_norm": 0.9434110522270203, "learning_rate": 0.001, "loss": 2.1076, "step": 686400 }, { "epoch": 88.75242404654169, "grad_norm": 1.0202782154083252, "learning_rate": 0.001, "loss": 2.1228, "step": 686500 }, { "epoch": 88.76535229476406, "grad_norm": 1.5861046314239502, "learning_rate": 0.001, "loss": 2.0962, "step": 686600 }, { "epoch": 88.77828054298642, "grad_norm": 0.982229471206665, "learning_rate": 0.001, "loss": 2.1088, "step": 686700 }, { "epoch": 88.79120879120879, "grad_norm": 0.8481512665748596, "learning_rate": 0.001, "loss": 2.1172, "step": 686800 }, { "epoch": 88.80413703943115, "grad_norm": 2.5396909713745117, "learning_rate": 0.001, "loss": 2.1095, "step": 686900 }, { "epoch": 88.81706528765352, "grad_norm": 0.8591639995574951, "learning_rate": 0.001, "loss": 2.0927, "step": 687000 }, { "epoch": 88.82999353587589, "grad_norm": 0.9654812216758728, "learning_rate": 0.001, "loss": 2.1125, "step": 687100 }, { "epoch": 88.84292178409825, "grad_norm": 1.190589427947998, "learning_rate": 0.001, "loss": 2.1078, "step": 687200 }, { "epoch": 88.85585003232062, "grad_norm": 1.0033631324768066, "learning_rate": 0.001, "loss": 2.1104, "step": 687300 }, { "epoch": 88.86877828054298, "grad_norm": 36.17502212524414, "learning_rate": 0.001, "loss": 2.1078, "step": 687400 }, { "epoch": 88.88170652876535, "grad_norm": 4.6396026611328125, "learning_rate": 0.001, "loss": 2.1144, "step": 687500 }, { "epoch": 88.89463477698771, "grad_norm": 0.868927538394928, "learning_rate": 0.001, "loss": 2.1205, "step": 687600 }, { "epoch": 88.90756302521008, "grad_norm": 1.2299100160598755, "learning_rate": 0.001, "loss": 2.1153, "step": 687700 }, { "epoch": 88.92049127343245, "grad_norm": 1.3164759874343872, "learning_rate": 0.001, "loss": 2.1142, "step": 687800 }, { "epoch": 88.93341952165481, "grad_norm": 1.4256677627563477, "learning_rate": 0.001, "loss": 2.1313, "step": 687900 }, { "epoch": 88.94634776987718, "grad_norm": 0.9821195602416992, "learning_rate": 0.001, "loss": 2.1186, "step": 688000 }, { "epoch": 88.95927601809954, "grad_norm": 1.6782971620559692, "learning_rate": 0.001, "loss": 2.1139, "step": 688100 }, { "epoch": 88.97220426632191, "grad_norm": 1.1438616514205933, "learning_rate": 0.001, "loss": 2.1167, "step": 688200 }, { "epoch": 88.98513251454428, "grad_norm": 1.033712387084961, "learning_rate": 0.001, "loss": 2.1336, "step": 688300 }, { "epoch": 88.99806076276664, "grad_norm": 0.9755064249038696, "learning_rate": 0.001, "loss": 2.117, "step": 688400 }, { "epoch": 89.01098901098901, "grad_norm": 3.8463757038116455, "learning_rate": 0.001, "loss": 2.0533, "step": 688500 }, { "epoch": 89.02391725921137, "grad_norm": 1.2052624225616455, "learning_rate": 0.001, "loss": 2.0213, "step": 688600 }, { "epoch": 89.03684550743374, "grad_norm": 2.050605058670044, "learning_rate": 0.001, "loss": 2.0413, "step": 688700 }, { "epoch": 89.0497737556561, "grad_norm": 2.008741855621338, "learning_rate": 0.001, "loss": 2.0523, "step": 688800 }, { "epoch": 89.06270200387847, "grad_norm": 1.6967346668243408, "learning_rate": 0.001, "loss": 2.0481, "step": 688900 }, { "epoch": 89.07563025210084, "grad_norm": 1.3818457126617432, "learning_rate": 0.001, "loss": 2.0275, "step": 689000 }, { "epoch": 89.0885585003232, "grad_norm": 1.1126487255096436, "learning_rate": 0.001, "loss": 2.05, "step": 689100 }, { "epoch": 89.10148674854557, "grad_norm": 1.4301557540893555, "learning_rate": 0.001, "loss": 2.0454, "step": 689200 }, { "epoch": 89.11441499676793, "grad_norm": 1.1763423681259155, "learning_rate": 0.001, "loss": 2.0637, "step": 689300 }, { "epoch": 89.1273432449903, "grad_norm": 2.9312634468078613, "learning_rate": 0.001, "loss": 2.0624, "step": 689400 }, { "epoch": 89.14027149321267, "grad_norm": 1.1498059034347534, "learning_rate": 0.001, "loss": 2.0569, "step": 689500 }, { "epoch": 89.15319974143503, "grad_norm": 0.9568167328834534, "learning_rate": 0.001, "loss": 2.0584, "step": 689600 }, { "epoch": 89.1661279896574, "grad_norm": 1.2353843450546265, "learning_rate": 0.001, "loss": 2.0793, "step": 689700 }, { "epoch": 89.17905623787976, "grad_norm": 1.2132279872894287, "learning_rate": 0.001, "loss": 2.072, "step": 689800 }, { "epoch": 89.19198448610213, "grad_norm": 1.3951616287231445, "learning_rate": 0.001, "loss": 2.0618, "step": 689900 }, { "epoch": 89.2049127343245, "grad_norm": 1.8289852142333984, "learning_rate": 0.001, "loss": 2.074, "step": 690000 }, { "epoch": 89.21784098254686, "grad_norm": 1.5510085821151733, "learning_rate": 0.001, "loss": 2.0741, "step": 690100 }, { "epoch": 89.23076923076923, "grad_norm": 1.4661569595336914, "learning_rate": 0.001, "loss": 2.067, "step": 690200 }, { "epoch": 89.24369747899159, "grad_norm": 0.912743866443634, "learning_rate": 0.001, "loss": 2.0746, "step": 690300 }, { "epoch": 89.25662572721396, "grad_norm": 14.752772331237793, "learning_rate": 0.001, "loss": 2.0647, "step": 690400 }, { "epoch": 89.26955397543632, "grad_norm": 2.4970333576202393, "learning_rate": 0.001, "loss": 2.074, "step": 690500 }, { "epoch": 89.28248222365869, "grad_norm": 1.2608966827392578, "learning_rate": 0.001, "loss": 2.0826, "step": 690600 }, { "epoch": 89.29541047188106, "grad_norm": 1.1168655157089233, "learning_rate": 0.001, "loss": 2.0785, "step": 690700 }, { "epoch": 89.30833872010342, "grad_norm": 1.0014991760253906, "learning_rate": 0.001, "loss": 2.0775, "step": 690800 }, { "epoch": 89.32126696832579, "grad_norm": 1.1765705347061157, "learning_rate": 0.001, "loss": 2.0898, "step": 690900 }, { "epoch": 89.33419521654815, "grad_norm": 1.651347041130066, "learning_rate": 0.001, "loss": 2.0746, "step": 691000 }, { "epoch": 89.34712346477052, "grad_norm": 1.1487464904785156, "learning_rate": 0.001, "loss": 2.0766, "step": 691100 }, { "epoch": 89.36005171299288, "grad_norm": 1.1217520236968994, "learning_rate": 0.001, "loss": 2.0768, "step": 691200 }, { "epoch": 89.37297996121525, "grad_norm": 1.3285671472549438, "learning_rate": 0.001, "loss": 2.0971, "step": 691300 }, { "epoch": 89.38590820943762, "grad_norm": 1.1801867485046387, "learning_rate": 0.001, "loss": 2.0816, "step": 691400 }, { "epoch": 89.39883645765998, "grad_norm": 1.077734112739563, "learning_rate": 0.001, "loss": 2.1042, "step": 691500 }, { "epoch": 89.41176470588235, "grad_norm": 3.23750638961792, "learning_rate": 0.001, "loss": 2.0983, "step": 691600 }, { "epoch": 89.42469295410471, "grad_norm": 1.209603190422058, "learning_rate": 0.001, "loss": 2.092, "step": 691700 }, { "epoch": 89.43762120232708, "grad_norm": 2.0691864490509033, "learning_rate": 0.001, "loss": 2.0924, "step": 691800 }, { "epoch": 89.45054945054945, "grad_norm": 1.1091737747192383, "learning_rate": 0.001, "loss": 2.1135, "step": 691900 }, { "epoch": 89.46347769877181, "grad_norm": 2.192340135574341, "learning_rate": 0.001, "loss": 2.0925, "step": 692000 }, { "epoch": 89.47640594699418, "grad_norm": 1.1445655822753906, "learning_rate": 0.001, "loss": 2.0888, "step": 692100 }, { "epoch": 89.48933419521654, "grad_norm": 0.9989975094795227, "learning_rate": 0.001, "loss": 2.0825, "step": 692200 }, { "epoch": 89.50226244343891, "grad_norm": 0.8908292055130005, "learning_rate": 0.001, "loss": 2.1019, "step": 692300 }, { "epoch": 89.51519069166127, "grad_norm": 1.0956065654754639, "learning_rate": 0.001, "loss": 2.0947, "step": 692400 }, { "epoch": 89.52811893988364, "grad_norm": 0.9169111251831055, "learning_rate": 0.001, "loss": 2.0935, "step": 692500 }, { "epoch": 89.541047188106, "grad_norm": 1.3044780492782593, "learning_rate": 0.001, "loss": 2.1179, "step": 692600 }, { "epoch": 89.55397543632837, "grad_norm": 1.119352102279663, "learning_rate": 0.001, "loss": 2.0971, "step": 692700 }, { "epoch": 89.56690368455074, "grad_norm": 8.157697677612305, "learning_rate": 0.001, "loss": 2.1111, "step": 692800 }, { "epoch": 89.5798319327731, "grad_norm": 1.4452379941940308, "learning_rate": 0.001, "loss": 2.0925, "step": 692900 }, { "epoch": 89.59276018099547, "grad_norm": 1.449674367904663, "learning_rate": 0.001, "loss": 2.0995, "step": 693000 }, { "epoch": 89.60568842921784, "grad_norm": 1.5840237140655518, "learning_rate": 0.001, "loss": 2.0991, "step": 693100 }, { "epoch": 89.6186166774402, "grad_norm": 1.6051219701766968, "learning_rate": 0.001, "loss": 2.1096, "step": 693200 }, { "epoch": 89.63154492566257, "grad_norm": 2.0297300815582275, "learning_rate": 0.001, "loss": 2.0918, "step": 693300 }, { "epoch": 89.64447317388493, "grad_norm": 1.685821533203125, "learning_rate": 0.001, "loss": 2.091, "step": 693400 }, { "epoch": 89.6574014221073, "grad_norm": 1.6246368885040283, "learning_rate": 0.001, "loss": 2.1, "step": 693500 }, { "epoch": 89.67032967032966, "grad_norm": 2.7114040851593018, "learning_rate": 0.001, "loss": 2.1112, "step": 693600 }, { "epoch": 89.68325791855203, "grad_norm": 1.0319170951843262, "learning_rate": 0.001, "loss": 2.0854, "step": 693700 }, { "epoch": 89.6961861667744, "grad_norm": 0.9432569146156311, "learning_rate": 0.001, "loss": 2.103, "step": 693800 }, { "epoch": 89.70911441499676, "grad_norm": 1.436737060546875, "learning_rate": 0.001, "loss": 2.0986, "step": 693900 }, { "epoch": 89.72204266321913, "grad_norm": 1.8106175661087036, "learning_rate": 0.001, "loss": 2.1014, "step": 694000 }, { "epoch": 89.7349709114415, "grad_norm": 2.0048811435699463, "learning_rate": 0.001, "loss": 2.0949, "step": 694100 }, { "epoch": 89.74789915966386, "grad_norm": 1.4625893831253052, "learning_rate": 0.001, "loss": 2.0951, "step": 694200 }, { "epoch": 89.76082740788623, "grad_norm": 1.048667073249817, "learning_rate": 0.001, "loss": 2.1041, "step": 694300 }, { "epoch": 89.77375565610859, "grad_norm": 1.793103575706482, "learning_rate": 0.001, "loss": 2.1185, "step": 694400 }, { "epoch": 89.78668390433096, "grad_norm": 1.3414889574050903, "learning_rate": 0.001, "loss": 2.1096, "step": 694500 }, { "epoch": 89.79961215255332, "grad_norm": 1.13485586643219, "learning_rate": 0.001, "loss": 2.1026, "step": 694600 }, { "epoch": 89.81254040077569, "grad_norm": 0.9325368404388428, "learning_rate": 0.001, "loss": 2.1014, "step": 694700 }, { "epoch": 89.82546864899805, "grad_norm": 1.0763410329818726, "learning_rate": 0.001, "loss": 2.1062, "step": 694800 }, { "epoch": 89.83839689722042, "grad_norm": 3.598443031311035, "learning_rate": 0.001, "loss": 2.1144, "step": 694900 }, { "epoch": 89.85132514544279, "grad_norm": 1.2720178365707397, "learning_rate": 0.001, "loss": 2.1271, "step": 695000 }, { "epoch": 89.86425339366515, "grad_norm": 1.2164747714996338, "learning_rate": 0.001, "loss": 2.1168, "step": 695100 }, { "epoch": 89.87718164188752, "grad_norm": 1.774399995803833, "learning_rate": 0.001, "loss": 2.1206, "step": 695200 }, { "epoch": 89.89010989010988, "grad_norm": 2.0553433895111084, "learning_rate": 0.001, "loss": 2.1043, "step": 695300 }, { "epoch": 89.90303813833225, "grad_norm": 1.982657551765442, "learning_rate": 0.001, "loss": 2.1229, "step": 695400 }, { "epoch": 89.91596638655462, "grad_norm": 1.1004793643951416, "learning_rate": 0.001, "loss": 2.125, "step": 695500 }, { "epoch": 89.92889463477698, "grad_norm": 1.6328319311141968, "learning_rate": 0.001, "loss": 2.1171, "step": 695600 }, { "epoch": 89.94182288299935, "grad_norm": 1.0192601680755615, "learning_rate": 0.001, "loss": 2.1124, "step": 695700 }, { "epoch": 89.95475113122171, "grad_norm": 1.3616758584976196, "learning_rate": 0.001, "loss": 2.1307, "step": 695800 }, { "epoch": 89.96767937944408, "grad_norm": 1.1003797054290771, "learning_rate": 0.001, "loss": 2.117, "step": 695900 }, { "epoch": 89.98060762766644, "grad_norm": 1.4122676849365234, "learning_rate": 0.001, "loss": 2.0981, "step": 696000 }, { "epoch": 89.99353587588882, "grad_norm": 1.1967525482177734, "learning_rate": 0.001, "loss": 2.1226, "step": 696100 }, { "epoch": 90.00646412411119, "grad_norm": 1.6013163328170776, "learning_rate": 0.001, "loss": 2.0385, "step": 696200 }, { "epoch": 90.01939237233356, "grad_norm": 1.8478738069534302, "learning_rate": 0.001, "loss": 2.0422, "step": 696300 }, { "epoch": 90.03232062055592, "grad_norm": 1.5608121156692505, "learning_rate": 0.001, "loss": 2.0389, "step": 696400 }, { "epoch": 90.04524886877829, "grad_norm": 2.6873178482055664, "learning_rate": 0.001, "loss": 2.0437, "step": 696500 }, { "epoch": 90.05817711700065, "grad_norm": 2.385868787765503, "learning_rate": 0.001, "loss": 2.0399, "step": 696600 }, { "epoch": 90.07110536522302, "grad_norm": 2.478785276412964, "learning_rate": 0.001, "loss": 2.0538, "step": 696700 }, { "epoch": 90.08403361344538, "grad_norm": 2.103672742843628, "learning_rate": 0.001, "loss": 2.053, "step": 696800 }, { "epoch": 90.09696186166775, "grad_norm": 4.299813747406006, "learning_rate": 0.001, "loss": 2.0299, "step": 696900 }, { "epoch": 90.10989010989012, "grad_norm": 2.1824734210968018, "learning_rate": 0.001, "loss": 2.0574, "step": 697000 }, { "epoch": 90.12281835811248, "grad_norm": 58.5394401550293, "learning_rate": 0.001, "loss": 2.0563, "step": 697100 }, { "epoch": 90.13574660633485, "grad_norm": 1.8452109098434448, "learning_rate": 0.001, "loss": 2.0296, "step": 697200 }, { "epoch": 90.14867485455721, "grad_norm": 3.4171395301818848, "learning_rate": 0.001, "loss": 2.059, "step": 697300 }, { "epoch": 90.16160310277958, "grad_norm": 1.9978384971618652, "learning_rate": 0.001, "loss": 2.0559, "step": 697400 }, { "epoch": 90.17453135100195, "grad_norm": 2.1640939712524414, "learning_rate": 0.001, "loss": 2.0659, "step": 697500 }, { "epoch": 90.18745959922431, "grad_norm": 2.0618057250976562, "learning_rate": 0.001, "loss": 2.0524, "step": 697600 }, { "epoch": 90.20038784744668, "grad_norm": 1.4888664484024048, "learning_rate": 0.001, "loss": 2.0704, "step": 697700 }, { "epoch": 90.21331609566904, "grad_norm": 1.9925875663757324, "learning_rate": 0.001, "loss": 2.0565, "step": 697800 }, { "epoch": 90.22624434389141, "grad_norm": 2.837529182434082, "learning_rate": 0.001, "loss": 2.0486, "step": 697900 }, { "epoch": 90.23917259211377, "grad_norm": 7.507633209228516, "learning_rate": 0.001, "loss": 2.0669, "step": 698000 }, { "epoch": 90.25210084033614, "grad_norm": 1.5755428075790405, "learning_rate": 0.001, "loss": 2.0777, "step": 698100 }, { "epoch": 90.2650290885585, "grad_norm": 1.8134406805038452, "learning_rate": 0.001, "loss": 2.0632, "step": 698200 }, { "epoch": 90.27795733678087, "grad_norm": 1.633092999458313, "learning_rate": 0.001, "loss": 2.0595, "step": 698300 }, { "epoch": 90.29088558500324, "grad_norm": 2.0763158798217773, "learning_rate": 0.001, "loss": 2.0472, "step": 698400 }, { "epoch": 90.3038138332256, "grad_norm": 1.8010778427124023, "learning_rate": 0.001, "loss": 2.0653, "step": 698500 }, { "epoch": 90.31674208144797, "grad_norm": 2.52554988861084, "learning_rate": 0.001, "loss": 2.0637, "step": 698600 }, { "epoch": 90.32967032967034, "grad_norm": 4.050734519958496, "learning_rate": 0.001, "loss": 2.0768, "step": 698700 }, { "epoch": 90.3425985778927, "grad_norm": 2.0018742084503174, "learning_rate": 0.001, "loss": 2.0791, "step": 698800 }, { "epoch": 90.35552682611507, "grad_norm": 1.6604235172271729, "learning_rate": 0.001, "loss": 2.061, "step": 698900 }, { "epoch": 90.36845507433743, "grad_norm": 1.963460922241211, "learning_rate": 0.001, "loss": 2.0595, "step": 699000 }, { "epoch": 90.3813833225598, "grad_norm": 2.0113086700439453, "learning_rate": 0.001, "loss": 2.0743, "step": 699100 }, { "epoch": 90.39431157078216, "grad_norm": 1.5369609594345093, "learning_rate": 0.001, "loss": 2.0691, "step": 699200 }, { "epoch": 90.40723981900453, "grad_norm": 1.8090168237686157, "learning_rate": 0.001, "loss": 2.0695, "step": 699300 }, { "epoch": 90.4201680672269, "grad_norm": 2.3020517826080322, "learning_rate": 0.001, "loss": 2.0829, "step": 699400 }, { "epoch": 90.43309631544926, "grad_norm": 1.810500979423523, "learning_rate": 0.001, "loss": 2.0712, "step": 699500 }, { "epoch": 90.44602456367163, "grad_norm": 1.4725010395050049, "learning_rate": 0.001, "loss": 2.0882, "step": 699600 }, { "epoch": 90.458952811894, "grad_norm": 1.861162543296814, "learning_rate": 0.001, "loss": 2.0824, "step": 699700 }, { "epoch": 90.47188106011636, "grad_norm": 2.9615538120269775, "learning_rate": 0.001, "loss": 2.0672, "step": 699800 }, { "epoch": 90.48480930833873, "grad_norm": 4.317444324493408, "learning_rate": 0.001, "loss": 2.1, "step": 699900 }, { "epoch": 90.49773755656109, "grad_norm": 1.7613846063613892, "learning_rate": 0.001, "loss": 2.0781, "step": 700000 }, { "epoch": 90.51066580478346, "grad_norm": 1.5117532014846802, "learning_rate": 0.001, "loss": 2.0918, "step": 700100 }, { "epoch": 90.52359405300582, "grad_norm": 1.7616888284683228, "learning_rate": 0.001, "loss": 2.0863, "step": 700200 }, { "epoch": 90.53652230122819, "grad_norm": 1.9221570491790771, "learning_rate": 0.001, "loss": 2.0809, "step": 700300 }, { "epoch": 90.54945054945055, "grad_norm": 2.318866014480591, "learning_rate": 0.001, "loss": 2.1025, "step": 700400 }, { "epoch": 90.56237879767292, "grad_norm": 3.4418551921844482, "learning_rate": 0.001, "loss": 2.0832, "step": 700500 }, { "epoch": 90.57530704589529, "grad_norm": 1.7309201955795288, "learning_rate": 0.001, "loss": 2.0932, "step": 700600 }, { "epoch": 90.58823529411765, "grad_norm": 1.9195506572723389, "learning_rate": 0.001, "loss": 2.0985, "step": 700700 }, { "epoch": 90.60116354234002, "grad_norm": 1.7535429000854492, "learning_rate": 0.001, "loss": 2.1063, "step": 700800 }, { "epoch": 90.61409179056238, "grad_norm": 2.7044589519500732, "learning_rate": 0.001, "loss": 2.0866, "step": 700900 }, { "epoch": 90.62702003878475, "grad_norm": 2.3767192363739014, "learning_rate": 0.001, "loss": 2.107, "step": 701000 }, { "epoch": 90.63994828700712, "grad_norm": 1.8608505725860596, "learning_rate": 0.001, "loss": 2.1147, "step": 701100 }, { "epoch": 90.65287653522948, "grad_norm": 1.893446683883667, "learning_rate": 0.001, "loss": 2.1002, "step": 701200 }, { "epoch": 90.66580478345185, "grad_norm": 2.3774313926696777, "learning_rate": 0.001, "loss": 2.0889, "step": 701300 }, { "epoch": 90.67873303167421, "grad_norm": 1.7989399433135986, "learning_rate": 0.001, "loss": 2.0928, "step": 701400 }, { "epoch": 90.69166127989658, "grad_norm": 2.420210361480713, "learning_rate": 0.001, "loss": 2.1119, "step": 701500 }, { "epoch": 90.70458952811894, "grad_norm": 1.8061020374298096, "learning_rate": 0.001, "loss": 2.1044, "step": 701600 }, { "epoch": 90.71751777634131, "grad_norm": 2.4468870162963867, "learning_rate": 0.001, "loss": 2.0854, "step": 701700 }, { "epoch": 90.73044602456368, "grad_norm": 1.9699790477752686, "learning_rate": 0.001, "loss": 2.1126, "step": 701800 }, { "epoch": 90.74337427278604, "grad_norm": 2.2844221591949463, "learning_rate": 0.001, "loss": 2.0867, "step": 701900 }, { "epoch": 90.75630252100841, "grad_norm": 2.27612042427063, "learning_rate": 0.001, "loss": 2.1132, "step": 702000 }, { "epoch": 90.76923076923077, "grad_norm": 2.0659282207489014, "learning_rate": 0.001, "loss": 2.1016, "step": 702100 }, { "epoch": 90.78215901745314, "grad_norm": 3.135788679122925, "learning_rate": 0.001, "loss": 2.1197, "step": 702200 }, { "epoch": 90.7950872656755, "grad_norm": 2.175541877746582, "learning_rate": 0.001, "loss": 2.1229, "step": 702300 }, { "epoch": 90.80801551389787, "grad_norm": 2.2132561206817627, "learning_rate": 0.001, "loss": 2.1015, "step": 702400 }, { "epoch": 90.82094376212024, "grad_norm": 2.0245048999786377, "learning_rate": 0.001, "loss": 2.1055, "step": 702500 }, { "epoch": 90.8338720103426, "grad_norm": 2.9017252922058105, "learning_rate": 0.001, "loss": 2.1294, "step": 702600 }, { "epoch": 90.84680025856497, "grad_norm": 1.7418770790100098, "learning_rate": 0.001, "loss": 2.1034, "step": 702700 }, { "epoch": 90.85972850678733, "grad_norm": 1.8264260292053223, "learning_rate": 0.001, "loss": 2.1036, "step": 702800 }, { "epoch": 90.8726567550097, "grad_norm": 3.5025088787078857, "learning_rate": 0.001, "loss": 2.1059, "step": 702900 }, { "epoch": 90.88558500323207, "grad_norm": 1.7479324340820312, "learning_rate": 0.001, "loss": 2.1094, "step": 703000 }, { "epoch": 90.89851325145443, "grad_norm": 1.7878388166427612, "learning_rate": 0.001, "loss": 2.1067, "step": 703100 }, { "epoch": 90.9114414996768, "grad_norm": 22.37232208251953, "learning_rate": 0.001, "loss": 2.1276, "step": 703200 }, { "epoch": 90.92436974789916, "grad_norm": 3.670975923538208, "learning_rate": 0.001, "loss": 2.1061, "step": 703300 }, { "epoch": 90.93729799612153, "grad_norm": 2.241339683532715, "learning_rate": 0.001, "loss": 2.098, "step": 703400 }, { "epoch": 90.9502262443439, "grad_norm": 1.8141906261444092, "learning_rate": 0.001, "loss": 2.1154, "step": 703500 }, { "epoch": 90.96315449256626, "grad_norm": 1.627529501914978, "learning_rate": 0.001, "loss": 2.1157, "step": 703600 }, { "epoch": 90.97608274078863, "grad_norm": 1.5293877124786377, "learning_rate": 0.001, "loss": 2.1194, "step": 703700 }, { "epoch": 90.98901098901099, "grad_norm": 2.3672842979431152, "learning_rate": 0.001, "loss": 2.1123, "step": 703800 }, { "epoch": 91.00193923723336, "grad_norm": 1.5243830680847168, "learning_rate": 0.001, "loss": 2.0956, "step": 703900 }, { "epoch": 91.01486748545572, "grad_norm": 0.86536705493927, "learning_rate": 0.001, "loss": 2.0483, "step": 704000 }, { "epoch": 91.02779573367809, "grad_norm": 1.476426124572754, "learning_rate": 0.001, "loss": 2.0235, "step": 704100 }, { "epoch": 91.04072398190046, "grad_norm": 0.9075133800506592, "learning_rate": 0.001, "loss": 2.0356, "step": 704200 }, { "epoch": 91.05365223012282, "grad_norm": 0.932953417301178, "learning_rate": 0.001, "loss": 2.0412, "step": 704300 }, { "epoch": 91.06658047834519, "grad_norm": 1.4115345478057861, "learning_rate": 0.001, "loss": 2.037, "step": 704400 }, { "epoch": 91.07950872656755, "grad_norm": 1.2938653230667114, "learning_rate": 0.001, "loss": 2.0353, "step": 704500 }, { "epoch": 91.09243697478992, "grad_norm": 1.5065386295318604, "learning_rate": 0.001, "loss": 2.04, "step": 704600 }, { "epoch": 91.10536522301229, "grad_norm": 1.8279998302459717, "learning_rate": 0.001, "loss": 2.0579, "step": 704700 }, { "epoch": 91.11829347123465, "grad_norm": 1.0348374843597412, "learning_rate": 0.001, "loss": 2.045, "step": 704800 }, { "epoch": 91.13122171945702, "grad_norm": 1.2232636213302612, "learning_rate": 0.001, "loss": 2.0581, "step": 704900 }, { "epoch": 91.14414996767938, "grad_norm": 1.9908746480941772, "learning_rate": 0.001, "loss": 2.0358, "step": 705000 }, { "epoch": 91.15707821590175, "grad_norm": 0.9896164536476135, "learning_rate": 0.001, "loss": 2.054, "step": 705100 }, { "epoch": 91.17000646412411, "grad_norm": 1.122342824935913, "learning_rate": 0.001, "loss": 2.0418, "step": 705200 }, { "epoch": 91.18293471234648, "grad_norm": 1.8719918727874756, "learning_rate": 0.001, "loss": 2.0512, "step": 705300 }, { "epoch": 91.19586296056885, "grad_norm": 1.2655725479125977, "learning_rate": 0.001, "loss": 2.0529, "step": 705400 }, { "epoch": 91.20879120879121, "grad_norm": 0.9077129364013672, "learning_rate": 0.001, "loss": 2.0535, "step": 705500 }, { "epoch": 91.22171945701358, "grad_norm": 1.2538748979568481, "learning_rate": 0.001, "loss": 2.0795, "step": 705600 }, { "epoch": 91.23464770523594, "grad_norm": 84.45097351074219, "learning_rate": 0.001, "loss": 2.0567, "step": 705700 }, { "epoch": 91.24757595345831, "grad_norm": 1.6066386699676514, "learning_rate": 0.001, "loss": 2.0503, "step": 705800 }, { "epoch": 91.26050420168067, "grad_norm": 1.5409250259399414, "learning_rate": 0.001, "loss": 2.0605, "step": 705900 }, { "epoch": 91.27343244990304, "grad_norm": 1.0991278886795044, "learning_rate": 0.001, "loss": 2.0731, "step": 706000 }, { "epoch": 91.2863606981254, "grad_norm": 1.5564777851104736, "learning_rate": 0.001, "loss": 2.0503, "step": 706100 }, { "epoch": 91.29928894634777, "grad_norm": 0.9312041997909546, "learning_rate": 0.001, "loss": 2.0672, "step": 706200 }, { "epoch": 91.31221719457014, "grad_norm": 1.5446999073028564, "learning_rate": 0.001, "loss": 2.0796, "step": 706300 }, { "epoch": 91.3251454427925, "grad_norm": 0.892019510269165, "learning_rate": 0.001, "loss": 2.0629, "step": 706400 }, { "epoch": 91.33807369101487, "grad_norm": 1.094099998474121, "learning_rate": 0.001, "loss": 2.0529, "step": 706500 }, { "epoch": 91.35100193923724, "grad_norm": 1.2216938734054565, "learning_rate": 0.001, "loss": 2.0675, "step": 706600 }, { "epoch": 91.3639301874596, "grad_norm": 1.0615322589874268, "learning_rate": 0.001, "loss": 2.0598, "step": 706700 }, { "epoch": 91.37685843568197, "grad_norm": 0.9952980875968933, "learning_rate": 0.001, "loss": 2.069, "step": 706800 }, { "epoch": 91.38978668390433, "grad_norm": 1.4513121843338013, "learning_rate": 0.001, "loss": 2.0731, "step": 706900 }, { "epoch": 91.4027149321267, "grad_norm": 1.418848991394043, "learning_rate": 0.001, "loss": 2.0576, "step": 707000 }, { "epoch": 91.41564318034906, "grad_norm": 0.8819543719291687, "learning_rate": 0.001, "loss": 2.069, "step": 707100 }, { "epoch": 91.42857142857143, "grad_norm": 1.214870572090149, "learning_rate": 0.001, "loss": 2.0771, "step": 707200 }, { "epoch": 91.4414996767938, "grad_norm": 1.4262523651123047, "learning_rate": 0.001, "loss": 2.0893, "step": 707300 }, { "epoch": 91.45442792501616, "grad_norm": 1.0748573541641235, "learning_rate": 0.001, "loss": 2.0696, "step": 707400 }, { "epoch": 91.46735617323853, "grad_norm": 1.1592611074447632, "learning_rate": 0.001, "loss": 2.0993, "step": 707500 }, { "epoch": 91.4802844214609, "grad_norm": 0.9155535697937012, "learning_rate": 0.001, "loss": 2.0895, "step": 707600 }, { "epoch": 91.49321266968326, "grad_norm": 1.3611125946044922, "learning_rate": 0.001, "loss": 2.0954, "step": 707700 }, { "epoch": 91.50614091790563, "grad_norm": 1.0512598752975464, "learning_rate": 0.001, "loss": 2.0929, "step": 707800 }, { "epoch": 91.51906916612799, "grad_norm": 0.8463331460952759, "learning_rate": 0.001, "loss": 2.0831, "step": 707900 }, { "epoch": 91.53199741435036, "grad_norm": 0.8863061666488647, "learning_rate": 0.001, "loss": 2.0897, "step": 708000 }, { "epoch": 91.54492566257272, "grad_norm": 1.52980637550354, "learning_rate": 0.001, "loss": 2.0727, "step": 708100 }, { "epoch": 91.55785391079509, "grad_norm": 1.251807451248169, "learning_rate": 0.001, "loss": 2.1107, "step": 708200 }, { "epoch": 91.57078215901745, "grad_norm": 6.81406307220459, "learning_rate": 0.001, "loss": 2.1002, "step": 708300 }, { "epoch": 91.58371040723982, "grad_norm": 1.4304715394973755, "learning_rate": 0.001, "loss": 2.0757, "step": 708400 }, { "epoch": 91.59663865546219, "grad_norm": 1.115186095237732, "learning_rate": 0.001, "loss": 2.0977, "step": 708500 }, { "epoch": 91.60956690368455, "grad_norm": 1.5235446691513062, "learning_rate": 0.001, "loss": 2.0992, "step": 708600 }, { "epoch": 91.62249515190692, "grad_norm": 1.553852915763855, "learning_rate": 0.001, "loss": 2.1059, "step": 708700 }, { "epoch": 91.63542340012928, "grad_norm": 2.0099313259124756, "learning_rate": 0.001, "loss": 2.099, "step": 708800 }, { "epoch": 91.64835164835165, "grad_norm": 0.9558082222938538, "learning_rate": 0.001, "loss": 2.1197, "step": 708900 }, { "epoch": 91.66127989657402, "grad_norm": 1.2295665740966797, "learning_rate": 0.001, "loss": 2.1099, "step": 709000 }, { "epoch": 91.67420814479638, "grad_norm": 1.3244715929031372, "learning_rate": 0.001, "loss": 2.1097, "step": 709100 }, { "epoch": 91.68713639301875, "grad_norm": 2.3740394115448, "learning_rate": 0.001, "loss": 2.1084, "step": 709200 }, { "epoch": 91.70006464124111, "grad_norm": 1.3027290105819702, "learning_rate": 0.001, "loss": 2.1184, "step": 709300 }, { "epoch": 91.71299288946348, "grad_norm": 1.0273375511169434, "learning_rate": 0.001, "loss": 2.1078, "step": 709400 }, { "epoch": 91.72592113768584, "grad_norm": 51.018028259277344, "learning_rate": 0.001, "loss": 2.1165, "step": 709500 }, { "epoch": 91.73884938590821, "grad_norm": 1.1149619817733765, "learning_rate": 0.001, "loss": 2.1221, "step": 709600 }, { "epoch": 91.75177763413058, "grad_norm": 1.5266441106796265, "learning_rate": 0.001, "loss": 2.0953, "step": 709700 }, { "epoch": 91.76470588235294, "grad_norm": 1.0606414079666138, "learning_rate": 0.001, "loss": 2.1004, "step": 709800 }, { "epoch": 91.77763413057531, "grad_norm": 1.4079022407531738, "learning_rate": 0.001, "loss": 2.1272, "step": 709900 }, { "epoch": 91.79056237879767, "grad_norm": 1.4858378171920776, "learning_rate": 0.001, "loss": 2.1107, "step": 710000 }, { "epoch": 91.80349062702004, "grad_norm": 0.9879045486450195, "learning_rate": 0.001, "loss": 2.099, "step": 710100 }, { "epoch": 91.8164188752424, "grad_norm": 1.554813265800476, "learning_rate": 0.001, "loss": 2.1036, "step": 710200 }, { "epoch": 91.82934712346477, "grad_norm": 1.8580807447433472, "learning_rate": 0.001, "loss": 2.1118, "step": 710300 }, { "epoch": 91.84227537168714, "grad_norm": 1.127497911453247, "learning_rate": 0.001, "loss": 2.1023, "step": 710400 }, { "epoch": 91.8552036199095, "grad_norm": 1.3578435182571411, "learning_rate": 0.001, "loss": 2.1153, "step": 710500 }, { "epoch": 91.86813186813187, "grad_norm": 0.8228057026863098, "learning_rate": 0.001, "loss": 2.1211, "step": 710600 }, { "epoch": 91.88106011635423, "grad_norm": 1.5074803829193115, "learning_rate": 0.001, "loss": 2.1135, "step": 710700 }, { "epoch": 91.8939883645766, "grad_norm": 1.192051887512207, "learning_rate": 0.001, "loss": 2.1001, "step": 710800 }, { "epoch": 91.90691661279897, "grad_norm": 3.239490032196045, "learning_rate": 0.001, "loss": 2.1126, "step": 710900 }, { "epoch": 91.91984486102133, "grad_norm": 1.031209111213684, "learning_rate": 0.001, "loss": 2.1212, "step": 711000 }, { "epoch": 91.9327731092437, "grad_norm": 0.9453611969947815, "learning_rate": 0.001, "loss": 2.1412, "step": 711100 }, { "epoch": 91.94570135746606, "grad_norm": 0.9672669768333435, "learning_rate": 0.001, "loss": 2.1307, "step": 711200 }, { "epoch": 91.95862960568843, "grad_norm": 1.7787262201309204, "learning_rate": 0.001, "loss": 2.1237, "step": 711300 }, { "epoch": 91.9715578539108, "grad_norm": 2.1786704063415527, "learning_rate": 0.001, "loss": 2.0974, "step": 711400 }, { "epoch": 91.98448610213316, "grad_norm": 1.2323178052902222, "learning_rate": 0.001, "loss": 2.111, "step": 711500 }, { "epoch": 91.99741435035553, "grad_norm": 1.6393513679504395, "learning_rate": 0.001, "loss": 2.1232, "step": 711600 }, { "epoch": 92.01034259857789, "grad_norm": 1.0779187679290771, "learning_rate": 0.001, "loss": 2.0664, "step": 711700 }, { "epoch": 92.02327084680026, "grad_norm": 1.781935214996338, "learning_rate": 0.001, "loss": 2.0357, "step": 711800 }, { "epoch": 92.03619909502262, "grad_norm": 0.9940275549888611, "learning_rate": 0.001, "loss": 2.0355, "step": 711900 }, { "epoch": 92.04912734324499, "grad_norm": 1.017614722251892, "learning_rate": 0.001, "loss": 2.0252, "step": 712000 }, { "epoch": 92.06205559146736, "grad_norm": 1.258502721786499, "learning_rate": 0.001, "loss": 2.0558, "step": 712100 }, { "epoch": 92.07498383968972, "grad_norm": 1.1422890424728394, "learning_rate": 0.001, "loss": 2.0536, "step": 712200 }, { "epoch": 92.08791208791209, "grad_norm": 2.5643081665039062, "learning_rate": 0.001, "loss": 2.0487, "step": 712300 }, { "epoch": 92.10084033613445, "grad_norm": 1.2826876640319824, "learning_rate": 0.001, "loss": 2.0494, "step": 712400 }, { "epoch": 92.11376858435682, "grad_norm": 0.9187793135643005, "learning_rate": 0.001, "loss": 2.0575, "step": 712500 }, { "epoch": 92.12669683257919, "grad_norm": 1.0026825666427612, "learning_rate": 0.001, "loss": 2.0434, "step": 712600 }, { "epoch": 92.13962508080155, "grad_norm": 13.145195960998535, "learning_rate": 0.001, "loss": 2.0534, "step": 712700 }, { "epoch": 92.15255332902392, "grad_norm": 1.3979874849319458, "learning_rate": 0.001, "loss": 2.0735, "step": 712800 }, { "epoch": 92.16548157724628, "grad_norm": 1.4073565006256104, "learning_rate": 0.001, "loss": 2.0515, "step": 712900 }, { "epoch": 92.17840982546865, "grad_norm": 1.1810882091522217, "learning_rate": 0.001, "loss": 2.0516, "step": 713000 }, { "epoch": 92.19133807369101, "grad_norm": 2.002256155014038, "learning_rate": 0.001, "loss": 2.0671, "step": 713100 }, { "epoch": 92.20426632191338, "grad_norm": 1.828918695449829, "learning_rate": 0.001, "loss": 2.0572, "step": 713200 }, { "epoch": 92.21719457013575, "grad_norm": 1.419687032699585, "learning_rate": 0.001, "loss": 2.0509, "step": 713300 }, { "epoch": 92.23012281835811, "grad_norm": 1.111018180847168, "learning_rate": 0.001, "loss": 2.0613, "step": 713400 }, { "epoch": 92.24305106658048, "grad_norm": 1.050318956375122, "learning_rate": 0.001, "loss": 2.0688, "step": 713500 }, { "epoch": 92.25597931480284, "grad_norm": 1.2485201358795166, "learning_rate": 0.001, "loss": 2.0657, "step": 713600 }, { "epoch": 92.26890756302521, "grad_norm": 0.9798104763031006, "learning_rate": 0.001, "loss": 2.0715, "step": 713700 }, { "epoch": 92.28183581124757, "grad_norm": 3.3305108547210693, "learning_rate": 0.001, "loss": 2.0687, "step": 713800 }, { "epoch": 92.29476405946994, "grad_norm": 1.236644983291626, "learning_rate": 0.001, "loss": 2.0551, "step": 713900 }, { "epoch": 92.3076923076923, "grad_norm": 0.985194981098175, "learning_rate": 0.001, "loss": 2.0634, "step": 714000 }, { "epoch": 92.32062055591467, "grad_norm": 8.467185020446777, "learning_rate": 0.001, "loss": 2.0542, "step": 714100 }, { "epoch": 92.33354880413704, "grad_norm": 1.405228853225708, "learning_rate": 0.001, "loss": 2.0597, "step": 714200 }, { "epoch": 92.3464770523594, "grad_norm": 1.511078119277954, "learning_rate": 0.001, "loss": 2.0832, "step": 714300 }, { "epoch": 92.35940530058177, "grad_norm": 1.251650333404541, "learning_rate": 0.001, "loss": 2.0674, "step": 714400 }, { "epoch": 92.37233354880414, "grad_norm": 4.676865100860596, "learning_rate": 0.001, "loss": 2.0622, "step": 714500 }, { "epoch": 92.3852617970265, "grad_norm": 1.1997442245483398, "learning_rate": 0.001, "loss": 2.0737, "step": 714600 }, { "epoch": 92.39819004524887, "grad_norm": 1.2773349285125732, "learning_rate": 0.001, "loss": 2.0709, "step": 714700 }, { "epoch": 92.41111829347123, "grad_norm": 1.5337669849395752, "learning_rate": 0.001, "loss": 2.0616, "step": 714800 }, { "epoch": 92.4240465416936, "grad_norm": 2.2858335971832275, "learning_rate": 0.001, "loss": 2.086, "step": 714900 }, { "epoch": 92.43697478991596, "grad_norm": 2.006777286529541, "learning_rate": 0.001, "loss": 2.0942, "step": 715000 }, { "epoch": 92.44990303813833, "grad_norm": 1.2843056917190552, "learning_rate": 0.001, "loss": 2.0717, "step": 715100 }, { "epoch": 92.4628312863607, "grad_norm": 1.6231836080551147, "learning_rate": 0.001, "loss": 2.0793, "step": 715200 }, { "epoch": 92.47575953458306, "grad_norm": 1.3682843446731567, "learning_rate": 0.001, "loss": 2.0791, "step": 715300 }, { "epoch": 92.48868778280543, "grad_norm": 1.6910853385925293, "learning_rate": 0.001, "loss": 2.07, "step": 715400 }, { "epoch": 92.5016160310278, "grad_norm": 2.277350902557373, "learning_rate": 0.001, "loss": 2.0931, "step": 715500 }, { "epoch": 92.51454427925016, "grad_norm": 4.343719005584717, "learning_rate": 0.001, "loss": 2.0955, "step": 715600 }, { "epoch": 92.52747252747253, "grad_norm": 2.667124032974243, "learning_rate": 0.001, "loss": 2.0963, "step": 715700 }, { "epoch": 92.54040077569489, "grad_norm": 1.3432934284210205, "learning_rate": 0.001, "loss": 2.0892, "step": 715800 }, { "epoch": 92.55332902391726, "grad_norm": 3.067654848098755, "learning_rate": 0.001, "loss": 2.1141, "step": 715900 }, { "epoch": 92.56625727213962, "grad_norm": 1.203290343284607, "learning_rate": 0.001, "loss": 2.0973, "step": 716000 }, { "epoch": 92.57918552036199, "grad_norm": 1.272364854812622, "learning_rate": 0.001, "loss": 2.0806, "step": 716100 }, { "epoch": 92.59211376858435, "grad_norm": 0.9129601120948792, "learning_rate": 0.001, "loss": 2.0806, "step": 716200 }, { "epoch": 92.60504201680672, "grad_norm": 1.62814462184906, "learning_rate": 0.001, "loss": 2.1058, "step": 716300 }, { "epoch": 92.61797026502909, "grad_norm": 1.6724164485931396, "learning_rate": 0.001, "loss": 2.1143, "step": 716400 }, { "epoch": 92.63089851325145, "grad_norm": 1.0412232875823975, "learning_rate": 0.001, "loss": 2.1132, "step": 716500 }, { "epoch": 92.64382676147382, "grad_norm": 2.2707161903381348, "learning_rate": 0.001, "loss": 2.0878, "step": 716600 }, { "epoch": 92.65675500969618, "grad_norm": 1.0493700504302979, "learning_rate": 0.001, "loss": 2.1105, "step": 716700 }, { "epoch": 92.66968325791855, "grad_norm": 1.086326003074646, "learning_rate": 0.001, "loss": 2.1026, "step": 716800 }, { "epoch": 92.68261150614092, "grad_norm": 1.1138615608215332, "learning_rate": 0.001, "loss": 2.1136, "step": 716900 }, { "epoch": 92.69553975436328, "grad_norm": 1.4249836206436157, "learning_rate": 0.001, "loss": 2.119, "step": 717000 }, { "epoch": 92.70846800258565, "grad_norm": 1.4224294424057007, "learning_rate": 0.001, "loss": 2.1042, "step": 717100 }, { "epoch": 92.72139625080801, "grad_norm": 0.7829979062080383, "learning_rate": 0.001, "loss": 2.1188, "step": 717200 }, { "epoch": 92.73432449903038, "grad_norm": 0.9922003149986267, "learning_rate": 0.001, "loss": 2.1026, "step": 717300 }, { "epoch": 92.74725274725274, "grad_norm": 3.009495973587036, "learning_rate": 0.001, "loss": 2.0886, "step": 717400 }, { "epoch": 92.76018099547511, "grad_norm": 0.891880214214325, "learning_rate": 0.001, "loss": 2.1164, "step": 717500 }, { "epoch": 92.77310924369748, "grad_norm": 1.001165509223938, "learning_rate": 0.001, "loss": 2.0928, "step": 717600 }, { "epoch": 92.78603749191984, "grad_norm": 1.2609477043151855, "learning_rate": 0.001, "loss": 2.0882, "step": 717700 }, { "epoch": 92.79896574014221, "grad_norm": 0.9766662120819092, "learning_rate": 0.001, "loss": 2.1083, "step": 717800 }, { "epoch": 92.81189398836457, "grad_norm": 1.1826149225234985, "learning_rate": 0.001, "loss": 2.097, "step": 717900 }, { "epoch": 92.82482223658694, "grad_norm": 0.9647908210754395, "learning_rate": 0.001, "loss": 2.0851, "step": 718000 }, { "epoch": 92.8377504848093, "grad_norm": 1.270946979522705, "learning_rate": 0.001, "loss": 2.115, "step": 718100 }, { "epoch": 92.85067873303167, "grad_norm": 0.9844934940338135, "learning_rate": 0.001, "loss": 2.11, "step": 718200 }, { "epoch": 92.86360698125404, "grad_norm": 1.1919339895248413, "learning_rate": 0.001, "loss": 2.1107, "step": 718300 }, { "epoch": 92.8765352294764, "grad_norm": 1.013777732849121, "learning_rate": 0.001, "loss": 2.11, "step": 718400 }, { "epoch": 92.88946347769877, "grad_norm": 1.007153868675232, "learning_rate": 0.001, "loss": 2.1177, "step": 718500 }, { "epoch": 92.90239172592113, "grad_norm": 1.1020457744598389, "learning_rate": 0.001, "loss": 2.0933, "step": 718600 }, { "epoch": 92.9153199741435, "grad_norm": 1.0643088817596436, "learning_rate": 0.001, "loss": 2.1127, "step": 718700 }, { "epoch": 92.92824822236587, "grad_norm": 1.5958726406097412, "learning_rate": 0.001, "loss": 2.1351, "step": 718800 }, { "epoch": 92.94117647058823, "grad_norm": 1.072494387626648, "learning_rate": 0.001, "loss": 2.1019, "step": 718900 }, { "epoch": 92.9541047188106, "grad_norm": 2.8269131183624268, "learning_rate": 0.001, "loss": 2.1194, "step": 719000 }, { "epoch": 92.96703296703296, "grad_norm": 0.8903823494911194, "learning_rate": 0.001, "loss": 2.1225, "step": 719100 }, { "epoch": 92.97996121525533, "grad_norm": 2.8442084789276123, "learning_rate": 0.001, "loss": 2.1043, "step": 719200 }, { "epoch": 92.9928894634777, "grad_norm": 1.0140089988708496, "learning_rate": 0.001, "loss": 2.118, "step": 719300 }, { "epoch": 93.00581771170006, "grad_norm": 1.4536173343658447, "learning_rate": 0.001, "loss": 2.0553, "step": 719400 }, { "epoch": 93.01874595992243, "grad_norm": 2.204479694366455, "learning_rate": 0.001, "loss": 2.0256, "step": 719500 }, { "epoch": 93.03167420814479, "grad_norm": 2.6784751415252686, "learning_rate": 0.001, "loss": 2.0361, "step": 719600 }, { "epoch": 93.04460245636716, "grad_norm": 10.34334659576416, "learning_rate": 0.001, "loss": 2.0436, "step": 719700 }, { "epoch": 93.05753070458952, "grad_norm": 1.2168015241622925, "learning_rate": 0.001, "loss": 2.0344, "step": 719800 }, { "epoch": 93.07045895281189, "grad_norm": 1.4874576330184937, "learning_rate": 0.001, "loss": 2.0598, "step": 719900 }, { "epoch": 93.08338720103426, "grad_norm": 1.2836291790008545, "learning_rate": 0.001, "loss": 2.0448, "step": 720000 }, { "epoch": 93.09631544925662, "grad_norm": 1.3097561597824097, "learning_rate": 0.001, "loss": 2.0531, "step": 720100 }, { "epoch": 93.10924369747899, "grad_norm": 1.2569435834884644, "learning_rate": 0.001, "loss": 2.0593, "step": 720200 }, { "epoch": 93.12217194570135, "grad_norm": 1.2389391660690308, "learning_rate": 0.001, "loss": 2.0397, "step": 720300 }, { "epoch": 93.13510019392372, "grad_norm": 1.3280565738677979, "learning_rate": 0.001, "loss": 2.0473, "step": 720400 }, { "epoch": 93.14802844214609, "grad_norm": 1.3955429792404175, "learning_rate": 0.001, "loss": 2.0489, "step": 720500 }, { "epoch": 93.16095669036845, "grad_norm": 2.2947824001312256, "learning_rate": 0.001, "loss": 2.0498, "step": 720600 }, { "epoch": 93.17388493859082, "grad_norm": 1.3972495794296265, "learning_rate": 0.001, "loss": 2.047, "step": 720700 }, { "epoch": 93.18681318681318, "grad_norm": 2.585813283920288, "learning_rate": 0.001, "loss": 2.0534, "step": 720800 }, { "epoch": 93.19974143503555, "grad_norm": 1.4019272327423096, "learning_rate": 0.001, "loss": 2.0429, "step": 720900 }, { "epoch": 93.21266968325791, "grad_norm": 1.8997786045074463, "learning_rate": 0.001, "loss": 2.0526, "step": 721000 }, { "epoch": 93.22559793148028, "grad_norm": 69.7764892578125, "learning_rate": 0.001, "loss": 2.0567, "step": 721100 }, { "epoch": 93.23852617970265, "grad_norm": 1.4402474164962769, "learning_rate": 0.001, "loss": 2.0454, "step": 721200 }, { "epoch": 93.25145442792501, "grad_norm": 1.2612046003341675, "learning_rate": 0.001, "loss": 2.0595, "step": 721300 }, { "epoch": 93.26438267614738, "grad_norm": 1.2155663967132568, "learning_rate": 0.001, "loss": 2.0613, "step": 721400 }, { "epoch": 93.27731092436974, "grad_norm": 1.6371181011199951, "learning_rate": 0.001, "loss": 2.0623, "step": 721500 }, { "epoch": 93.29023917259211, "grad_norm": 1.3596032857894897, "learning_rate": 0.001, "loss": 2.0603, "step": 721600 }, { "epoch": 93.30316742081448, "grad_norm": 1.4232523441314697, "learning_rate": 0.001, "loss": 2.0768, "step": 721700 }, { "epoch": 93.31609566903684, "grad_norm": 1.154861330986023, "learning_rate": 0.001, "loss": 2.065, "step": 721800 }, { "epoch": 93.3290239172592, "grad_norm": 2.410336494445801, "learning_rate": 0.001, "loss": 2.0542, "step": 721900 }, { "epoch": 93.34195216548157, "grad_norm": 1.567678689956665, "learning_rate": 0.001, "loss": 2.0815, "step": 722000 }, { "epoch": 93.35488041370394, "grad_norm": 1.1511735916137695, "learning_rate": 0.001, "loss": 2.0787, "step": 722100 }, { "epoch": 93.3678086619263, "grad_norm": 1.1244969367980957, "learning_rate": 0.001, "loss": 2.088, "step": 722200 }, { "epoch": 93.38073691014867, "grad_norm": 1.6254032850265503, "learning_rate": 0.001, "loss": 2.0965, "step": 722300 }, { "epoch": 93.39366515837104, "grad_norm": 1.4228472709655762, "learning_rate": 0.001, "loss": 2.0793, "step": 722400 }, { "epoch": 93.4065934065934, "grad_norm": 1.2811743021011353, "learning_rate": 0.001, "loss": 2.0801, "step": 722500 }, { "epoch": 93.41952165481577, "grad_norm": 2.0589540004730225, "learning_rate": 0.001, "loss": 2.0642, "step": 722600 }, { "epoch": 93.43244990303813, "grad_norm": 1.099194884300232, "learning_rate": 0.001, "loss": 2.0769, "step": 722700 }, { "epoch": 93.4453781512605, "grad_norm": 1.1822096109390259, "learning_rate": 0.001, "loss": 2.0664, "step": 722800 }, { "epoch": 93.45830639948286, "grad_norm": 1.4440393447875977, "learning_rate": 0.001, "loss": 2.0683, "step": 722900 }, { "epoch": 93.47123464770523, "grad_norm": 1.27591073513031, "learning_rate": 0.001, "loss": 2.0754, "step": 723000 }, { "epoch": 93.4841628959276, "grad_norm": 1.2795581817626953, "learning_rate": 0.001, "loss": 2.086, "step": 723100 }, { "epoch": 93.49709114414996, "grad_norm": 1.210094690322876, "learning_rate": 0.001, "loss": 2.0791, "step": 723200 }, { "epoch": 93.51001939237233, "grad_norm": 1.9454641342163086, "learning_rate": 0.001, "loss": 2.0776, "step": 723300 }, { "epoch": 93.5229476405947, "grad_norm": 1.1874111890792847, "learning_rate": 0.001, "loss": 2.0914, "step": 723400 }, { "epoch": 93.53587588881706, "grad_norm": 1.4315433502197266, "learning_rate": 0.001, "loss": 2.0894, "step": 723500 }, { "epoch": 93.54880413703943, "grad_norm": 1.1825153827667236, "learning_rate": 0.001, "loss": 2.0943, "step": 723600 }, { "epoch": 93.56173238526179, "grad_norm": 1.797814965248108, "learning_rate": 0.001, "loss": 2.1042, "step": 723700 }, { "epoch": 93.57466063348416, "grad_norm": 1.4114274978637695, "learning_rate": 0.001, "loss": 2.0939, "step": 723800 }, { "epoch": 93.58758888170652, "grad_norm": 1.2156286239624023, "learning_rate": 0.001, "loss": 2.0701, "step": 723900 }, { "epoch": 93.60051712992889, "grad_norm": 2.347858428955078, "learning_rate": 0.001, "loss": 2.0869, "step": 724000 }, { "epoch": 93.61344537815125, "grad_norm": 7.267082214355469, "learning_rate": 0.001, "loss": 2.0998, "step": 724100 }, { "epoch": 93.62637362637362, "grad_norm": 1.704172134399414, "learning_rate": 0.001, "loss": 2.0771, "step": 724200 }, { "epoch": 93.63930187459599, "grad_norm": 2.2483723163604736, "learning_rate": 0.001, "loss": 2.0933, "step": 724300 }, { "epoch": 93.65223012281835, "grad_norm": 1.3275243043899536, "learning_rate": 0.001, "loss": 2.1034, "step": 724400 }, { "epoch": 93.66515837104072, "grad_norm": 1.0727999210357666, "learning_rate": 0.001, "loss": 2.1016, "step": 724500 }, { "epoch": 93.67808661926308, "grad_norm": 1.0177441835403442, "learning_rate": 0.001, "loss": 2.1122, "step": 724600 }, { "epoch": 93.69101486748545, "grad_norm": 1.7167296409606934, "learning_rate": 0.001, "loss": 2.1047, "step": 724700 }, { "epoch": 93.70394311570782, "grad_norm": 1.629538655281067, "learning_rate": 0.001, "loss": 2.1053, "step": 724800 }, { "epoch": 93.71687136393018, "grad_norm": 1.3650932312011719, "learning_rate": 0.001, "loss": 2.0954, "step": 724900 }, { "epoch": 93.72979961215255, "grad_norm": 3.009290933609009, "learning_rate": 0.001, "loss": 2.1067, "step": 725000 }, { "epoch": 93.74272786037491, "grad_norm": 6.815956115722656, "learning_rate": 0.001, "loss": 2.1162, "step": 725100 }, { "epoch": 93.75565610859728, "grad_norm": 1.5201005935668945, "learning_rate": 0.001, "loss": 2.1125, "step": 725200 }, { "epoch": 93.76858435681964, "grad_norm": 1.2599185705184937, "learning_rate": 0.001, "loss": 2.1171, "step": 725300 }, { "epoch": 93.78151260504201, "grad_norm": 3.6356329917907715, "learning_rate": 0.001, "loss": 2.1107, "step": 725400 }, { "epoch": 93.79444085326438, "grad_norm": 1.0783921480178833, "learning_rate": 0.001, "loss": 2.1015, "step": 725500 }, { "epoch": 93.80736910148674, "grad_norm": 1.9955159425735474, "learning_rate": 0.001, "loss": 2.0946, "step": 725600 }, { "epoch": 93.82029734970911, "grad_norm": 2.169011354446411, "learning_rate": 0.001, "loss": 2.1016, "step": 725700 }, { "epoch": 93.83322559793147, "grad_norm": 13.394356727600098, "learning_rate": 0.001, "loss": 2.1015, "step": 725800 }, { "epoch": 93.84615384615384, "grad_norm": 1.035314679145813, "learning_rate": 0.001, "loss": 2.1155, "step": 725900 }, { "epoch": 93.8590820943762, "grad_norm": 1.6765872240066528, "learning_rate": 0.001, "loss": 2.1297, "step": 726000 }, { "epoch": 93.87201034259857, "grad_norm": 1.482680082321167, "learning_rate": 0.001, "loss": 2.1204, "step": 726100 }, { "epoch": 93.88493859082094, "grad_norm": 1.1586261987686157, "learning_rate": 0.001, "loss": 2.1168, "step": 726200 }, { "epoch": 93.8978668390433, "grad_norm": 1.2053173780441284, "learning_rate": 0.001, "loss": 2.1155, "step": 726300 }, { "epoch": 93.91079508726567, "grad_norm": 1.7758787870407104, "learning_rate": 0.001, "loss": 2.1224, "step": 726400 }, { "epoch": 93.92372333548803, "grad_norm": 1.561867594718933, "learning_rate": 0.001, "loss": 2.0958, "step": 726500 }, { "epoch": 93.9366515837104, "grad_norm": 1.796181559562683, "learning_rate": 0.001, "loss": 2.1141, "step": 726600 }, { "epoch": 93.94957983193277, "grad_norm": 1.7317497730255127, "learning_rate": 0.001, "loss": 2.1133, "step": 726700 }, { "epoch": 93.96250808015513, "grad_norm": 2.2423062324523926, "learning_rate": 0.001, "loss": 2.1161, "step": 726800 }, { "epoch": 93.9754363283775, "grad_norm": 1.226156234741211, "learning_rate": 0.001, "loss": 2.1053, "step": 726900 }, { "epoch": 93.98836457659988, "grad_norm": 1.3122203350067139, "learning_rate": 0.001, "loss": 2.1132, "step": 727000 }, { "epoch": 94.00129282482224, "grad_norm": 1.3052226305007935, "learning_rate": 0.001, "loss": 2.104, "step": 727100 }, { "epoch": 94.01422107304461, "grad_norm": 1.5336592197418213, "learning_rate": 0.001, "loss": 2.0261, "step": 727200 }, { "epoch": 94.02714932126698, "grad_norm": 0.9276921153068542, "learning_rate": 0.001, "loss": 2.0593, "step": 727300 }, { "epoch": 94.04007756948934, "grad_norm": 1.4177181720733643, "learning_rate": 0.001, "loss": 2.0301, "step": 727400 }, { "epoch": 94.0530058177117, "grad_norm": 0.9244619607925415, "learning_rate": 0.001, "loss": 2.0333, "step": 727500 }, { "epoch": 94.06593406593407, "grad_norm": 0.9048365354537964, "learning_rate": 0.001, "loss": 2.0191, "step": 727600 }, { "epoch": 94.07886231415644, "grad_norm": 2.832162857055664, "learning_rate": 0.001, "loss": 2.0451, "step": 727700 }, { "epoch": 94.0917905623788, "grad_norm": 7.578777313232422, "learning_rate": 0.001, "loss": 2.0504, "step": 727800 }, { "epoch": 94.10471881060117, "grad_norm": 1.153145670890808, "learning_rate": 0.001, "loss": 2.0337, "step": 727900 }, { "epoch": 94.11764705882354, "grad_norm": 1.121364951133728, "learning_rate": 0.001, "loss": 2.0371, "step": 728000 }, { "epoch": 94.1305753070459, "grad_norm": 1.002484917640686, "learning_rate": 0.001, "loss": 2.0593, "step": 728100 }, { "epoch": 94.14350355526827, "grad_norm": 1.367431402206421, "learning_rate": 0.001, "loss": 2.0298, "step": 728200 }, { "epoch": 94.15643180349063, "grad_norm": 1.115747332572937, "learning_rate": 0.001, "loss": 2.0441, "step": 728300 }, { "epoch": 94.169360051713, "grad_norm": 1.4010339975357056, "learning_rate": 0.001, "loss": 2.0672, "step": 728400 }, { "epoch": 94.18228829993537, "grad_norm": 1.7060155868530273, "learning_rate": 0.001, "loss": 2.0478, "step": 728500 }, { "epoch": 94.19521654815773, "grad_norm": 1.5424976348876953, "learning_rate": 0.001, "loss": 2.0587, "step": 728600 }, { "epoch": 94.2081447963801, "grad_norm": 3.847830057144165, "learning_rate": 0.001, "loss": 2.0585, "step": 728700 }, { "epoch": 94.22107304460246, "grad_norm": 1.4124470949172974, "learning_rate": 0.001, "loss": 2.0626, "step": 728800 }, { "epoch": 94.23400129282483, "grad_norm": 0.9351629614830017, "learning_rate": 0.001, "loss": 2.0449, "step": 728900 }, { "epoch": 94.2469295410472, "grad_norm": 1.4383165836334229, "learning_rate": 0.001, "loss": 2.0556, "step": 729000 }, { "epoch": 94.25985778926956, "grad_norm": 1.1851890087127686, "learning_rate": 0.001, "loss": 2.0897, "step": 729100 }, { "epoch": 94.27278603749193, "grad_norm": 0.9738306403160095, "learning_rate": 0.001, "loss": 2.0663, "step": 729200 }, { "epoch": 94.28571428571429, "grad_norm": 4.0907464027404785, "learning_rate": 0.001, "loss": 2.0731, "step": 729300 }, { "epoch": 94.29864253393666, "grad_norm": 1.1429775953292847, "learning_rate": 0.001, "loss": 2.0772, "step": 729400 }, { "epoch": 94.31157078215902, "grad_norm": 0.8977724313735962, "learning_rate": 0.001, "loss": 2.0716, "step": 729500 }, { "epoch": 94.32449903038139, "grad_norm": 1.4026107788085938, "learning_rate": 0.001, "loss": 2.0728, "step": 729600 }, { "epoch": 94.33742727860376, "grad_norm": 0.9745983481407166, "learning_rate": 0.001, "loss": 2.0846, "step": 729700 }, { "epoch": 94.35035552682612, "grad_norm": 0.9301339983940125, "learning_rate": 0.001, "loss": 2.0711, "step": 729800 }, { "epoch": 94.36328377504849, "grad_norm": 3.1610653400421143, "learning_rate": 0.001, "loss": 2.0563, "step": 729900 }, { "epoch": 94.37621202327085, "grad_norm": 2.140411376953125, "learning_rate": 0.001, "loss": 2.0795, "step": 730000 }, { "epoch": 94.38914027149322, "grad_norm": 1.3349084854125977, "learning_rate": 0.001, "loss": 2.0738, "step": 730100 }, { "epoch": 94.40206851971558, "grad_norm": 0.8825266361236572, "learning_rate": 0.001, "loss": 2.0517, "step": 730200 }, { "epoch": 94.41499676793795, "grad_norm": 2.828174114227295, "learning_rate": 0.001, "loss": 2.0783, "step": 730300 }, { "epoch": 94.42792501616032, "grad_norm": 2.095654010772705, "learning_rate": 0.001, "loss": 2.0822, "step": 730400 }, { "epoch": 94.44085326438268, "grad_norm": 1.5042020082473755, "learning_rate": 0.001, "loss": 2.0704, "step": 730500 }, { "epoch": 94.45378151260505, "grad_norm": 2.419271230697632, "learning_rate": 0.001, "loss": 2.0612, "step": 730600 }, { "epoch": 94.46670976082741, "grad_norm": 1.4436737298965454, "learning_rate": 0.001, "loss": 2.0782, "step": 730700 }, { "epoch": 94.47963800904978, "grad_norm": 4.668410778045654, "learning_rate": 0.001, "loss": 2.08, "step": 730800 }, { "epoch": 94.49256625727214, "grad_norm": 1.2496998310089111, "learning_rate": 0.001, "loss": 2.0821, "step": 730900 }, { "epoch": 94.50549450549451, "grad_norm": 5.612856864929199, "learning_rate": 0.001, "loss": 2.0768, "step": 731000 }, { "epoch": 94.51842275371688, "grad_norm": 0.9946067929267883, "learning_rate": 0.001, "loss": 2.0793, "step": 731100 }, { "epoch": 94.53135100193924, "grad_norm": 7.229412078857422, "learning_rate": 0.001, "loss": 2.0982, "step": 731200 }, { "epoch": 94.54427925016161, "grad_norm": 1.629976749420166, "learning_rate": 0.001, "loss": 2.0834, "step": 731300 }, { "epoch": 94.55720749838397, "grad_norm": 0.9991433024406433, "learning_rate": 0.001, "loss": 2.0964, "step": 731400 }, { "epoch": 94.57013574660634, "grad_norm": 1.342138648033142, "learning_rate": 0.001, "loss": 2.0851, "step": 731500 }, { "epoch": 94.5830639948287, "grad_norm": 1.198196530342102, "learning_rate": 0.001, "loss": 2.0837, "step": 731600 }, { "epoch": 94.59599224305107, "grad_norm": 0.9527946710586548, "learning_rate": 0.001, "loss": 2.0904, "step": 731700 }, { "epoch": 94.60892049127344, "grad_norm": 1.1155974864959717, "learning_rate": 0.001, "loss": 2.0786, "step": 731800 }, { "epoch": 94.6218487394958, "grad_norm": 1.270581841468811, "learning_rate": 0.001, "loss": 2.1054, "step": 731900 }, { "epoch": 94.63477698771817, "grad_norm": 0.9522911906242371, "learning_rate": 0.001, "loss": 2.0915, "step": 732000 }, { "epoch": 94.64770523594053, "grad_norm": 0.9722246527671814, "learning_rate": 0.001, "loss": 2.0781, "step": 732100 }, { "epoch": 94.6606334841629, "grad_norm": 1.0109078884124756, "learning_rate": 0.001, "loss": 2.0871, "step": 732200 }, { "epoch": 94.67356173238527, "grad_norm": 1.7087854146957397, "learning_rate": 0.001, "loss": 2.097, "step": 732300 }, { "epoch": 94.68648998060763, "grad_norm": 1.2196874618530273, "learning_rate": 0.001, "loss": 2.1006, "step": 732400 }, { "epoch": 94.69941822883, "grad_norm": 1.2010701894760132, "learning_rate": 0.001, "loss": 2.099, "step": 732500 }, { "epoch": 94.71234647705236, "grad_norm": 1.379474401473999, "learning_rate": 0.001, "loss": 2.0873, "step": 732600 }, { "epoch": 94.72527472527473, "grad_norm": 2.713721752166748, "learning_rate": 0.001, "loss": 2.1146, "step": 732700 }, { "epoch": 94.7382029734971, "grad_norm": 1.0479947328567505, "learning_rate": 0.001, "loss": 2.0826, "step": 732800 }, { "epoch": 94.75113122171946, "grad_norm": 1.3714834451675415, "learning_rate": 0.001, "loss": 2.0938, "step": 732900 }, { "epoch": 94.76405946994183, "grad_norm": 1.0326377153396606, "learning_rate": 0.001, "loss": 2.0916, "step": 733000 }, { "epoch": 94.7769877181642, "grad_norm": 2.610685348510742, "learning_rate": 0.001, "loss": 2.0874, "step": 733100 }, { "epoch": 94.78991596638656, "grad_norm": 0.9343053102493286, "learning_rate": 0.001, "loss": 2.0857, "step": 733200 }, { "epoch": 94.80284421460892, "grad_norm": 1.4798623323440552, "learning_rate": 0.001, "loss": 2.0872, "step": 733300 }, { "epoch": 94.81577246283129, "grad_norm": 1.87284517288208, "learning_rate": 0.001, "loss": 2.11, "step": 733400 }, { "epoch": 94.82870071105366, "grad_norm": 2.6274588108062744, "learning_rate": 0.001, "loss": 2.0941, "step": 733500 }, { "epoch": 94.84162895927602, "grad_norm": 10.323975563049316, "learning_rate": 0.001, "loss": 2.1043, "step": 733600 }, { "epoch": 94.85455720749839, "grad_norm": 1.2500154972076416, "learning_rate": 0.001, "loss": 2.106, "step": 733700 }, { "epoch": 94.86748545572075, "grad_norm": 46.104251861572266, "learning_rate": 0.001, "loss": 2.1161, "step": 733800 }, { "epoch": 94.88041370394312, "grad_norm": 1.1285791397094727, "learning_rate": 0.001, "loss": 2.107, "step": 733900 }, { "epoch": 94.89334195216549, "grad_norm": 1.4714933633804321, "learning_rate": 0.001, "loss": 2.1021, "step": 734000 }, { "epoch": 94.90627020038785, "grad_norm": 1.4041218757629395, "learning_rate": 0.001, "loss": 2.0886, "step": 734100 }, { "epoch": 94.91919844861022, "grad_norm": 1.788190245628357, "learning_rate": 0.001, "loss": 2.1009, "step": 734200 }, { "epoch": 94.93212669683258, "grad_norm": 1.222821593284607, "learning_rate": 0.001, "loss": 2.1178, "step": 734300 }, { "epoch": 94.94505494505495, "grad_norm": 2.528068780899048, "learning_rate": 0.001, "loss": 2.0985, "step": 734400 }, { "epoch": 94.95798319327731, "grad_norm": 1.7806527614593506, "learning_rate": 0.001, "loss": 2.0949, "step": 734500 }, { "epoch": 94.97091144149968, "grad_norm": 1.212540864944458, "learning_rate": 0.001, "loss": 2.113, "step": 734600 }, { "epoch": 94.98383968972205, "grad_norm": 1.2091246843338013, "learning_rate": 0.001, "loss": 2.1392, "step": 734700 }, { "epoch": 94.99676793794441, "grad_norm": 1.0369374752044678, "learning_rate": 0.001, "loss": 2.1244, "step": 734800 }, { "epoch": 95.00969618616678, "grad_norm": 2.1611616611480713, "learning_rate": 0.001, "loss": 2.0613, "step": 734900 }, { "epoch": 95.02262443438914, "grad_norm": 1.3577936887741089, "learning_rate": 0.001, "loss": 2.0106, "step": 735000 }, { "epoch": 95.03555268261151, "grad_norm": 1.0201466083526611, "learning_rate": 0.001, "loss": 2.0335, "step": 735100 }, { "epoch": 95.04848093083388, "grad_norm": 1.4819419384002686, "learning_rate": 0.001, "loss": 2.0024, "step": 735200 }, { "epoch": 95.06140917905624, "grad_norm": 2.4616808891296387, "learning_rate": 0.001, "loss": 2.0165, "step": 735300 }, { "epoch": 95.0743374272786, "grad_norm": 2.5371286869049072, "learning_rate": 0.001, "loss": 2.019, "step": 735400 }, { "epoch": 95.08726567550097, "grad_norm": 0.987496018409729, "learning_rate": 0.001, "loss": 2.0335, "step": 735500 }, { "epoch": 95.10019392372334, "grad_norm": 1.6179163455963135, "learning_rate": 0.001, "loss": 2.0362, "step": 735600 }, { "epoch": 95.1131221719457, "grad_norm": 1.258858323097229, "learning_rate": 0.001, "loss": 2.0333, "step": 735700 }, { "epoch": 95.12605042016807, "grad_norm": 1.7300198078155518, "learning_rate": 0.001, "loss": 2.0441, "step": 735800 }, { "epoch": 95.13897866839044, "grad_norm": 1.080888271331787, "learning_rate": 0.001, "loss": 2.0326, "step": 735900 }, { "epoch": 95.1519069166128, "grad_norm": 1.0737078189849854, "learning_rate": 0.001, "loss": 2.0318, "step": 736000 }, { "epoch": 95.16483516483517, "grad_norm": 1.1777173280715942, "learning_rate": 0.001, "loss": 2.0526, "step": 736100 }, { "epoch": 95.17776341305753, "grad_norm": 2.0855796337127686, "learning_rate": 0.001, "loss": 2.0414, "step": 736200 }, { "epoch": 95.1906916612799, "grad_norm": 1.2904853820800781, "learning_rate": 0.001, "loss": 2.0398, "step": 736300 }, { "epoch": 95.20361990950227, "grad_norm": 1.7678133249282837, "learning_rate": 0.001, "loss": 2.0595, "step": 736400 }, { "epoch": 95.21654815772463, "grad_norm": 1.5107508897781372, "learning_rate": 0.001, "loss": 2.0435, "step": 736500 }, { "epoch": 95.229476405947, "grad_norm": 1.241921305656433, "learning_rate": 0.001, "loss": 2.0668, "step": 736600 }, { "epoch": 95.24240465416936, "grad_norm": 1.1521323919296265, "learning_rate": 0.001, "loss": 2.0637, "step": 736700 }, { "epoch": 95.25533290239173, "grad_norm": 1.237791895866394, "learning_rate": 0.001, "loss": 2.0441, "step": 736800 }, { "epoch": 95.2682611506141, "grad_norm": 3.517331123352051, "learning_rate": 0.001, "loss": 2.073, "step": 736900 }, { "epoch": 95.28118939883646, "grad_norm": 0.9448727965354919, "learning_rate": 0.001, "loss": 2.0478, "step": 737000 }, { "epoch": 95.29411764705883, "grad_norm": 1.7023669481277466, "learning_rate": 0.001, "loss": 2.0771, "step": 737100 }, { "epoch": 95.30704589528119, "grad_norm": 0.9504889845848083, "learning_rate": 0.001, "loss": 2.0584, "step": 737200 }, { "epoch": 95.31997414350356, "grad_norm": 1.1006306409835815, "learning_rate": 0.001, "loss": 2.082, "step": 737300 }, { "epoch": 95.33290239172592, "grad_norm": 1.1859722137451172, "learning_rate": 0.001, "loss": 2.0628, "step": 737400 }, { "epoch": 95.34583063994829, "grad_norm": 7.304642200469971, "learning_rate": 0.001, "loss": 2.0676, "step": 737500 }, { "epoch": 95.35875888817066, "grad_norm": 0.7953305244445801, "learning_rate": 0.001, "loss": 2.0803, "step": 737600 }, { "epoch": 95.37168713639302, "grad_norm": 1.4320207834243774, "learning_rate": 0.001, "loss": 2.0618, "step": 737700 }, { "epoch": 95.38461538461539, "grad_norm": 1.5580716133117676, "learning_rate": 0.001, "loss": 2.0736, "step": 737800 }, { "epoch": 95.39754363283775, "grad_norm": 1.694422960281372, "learning_rate": 0.001, "loss": 2.0642, "step": 737900 }, { "epoch": 95.41047188106012, "grad_norm": 0.8006861805915833, "learning_rate": 0.001, "loss": 2.0745, "step": 738000 }, { "epoch": 95.42340012928248, "grad_norm": 1.5715276002883911, "learning_rate": 0.001, "loss": 2.0815, "step": 738100 }, { "epoch": 95.43632837750485, "grad_norm": 3.224534749984741, "learning_rate": 0.001, "loss": 2.0598, "step": 738200 }, { "epoch": 95.44925662572722, "grad_norm": 0.8149052262306213, "learning_rate": 0.001, "loss": 2.0808, "step": 738300 }, { "epoch": 95.46218487394958, "grad_norm": 1.5409127473831177, "learning_rate": 0.001, "loss": 2.093, "step": 738400 }, { "epoch": 95.47511312217195, "grad_norm": 0.9434000253677368, "learning_rate": 0.001, "loss": 2.0684, "step": 738500 }, { "epoch": 95.48804137039431, "grad_norm": 0.9484599232673645, "learning_rate": 0.001, "loss": 2.0655, "step": 738600 }, { "epoch": 95.50096961861668, "grad_norm": 1.0792707204818726, "learning_rate": 0.001, "loss": 2.0689, "step": 738700 }, { "epoch": 95.51389786683905, "grad_norm": 1.1465253829956055, "learning_rate": 0.001, "loss": 2.0698, "step": 738800 }, { "epoch": 95.52682611506141, "grad_norm": 1.4939992427825928, "learning_rate": 0.001, "loss": 2.0739, "step": 738900 }, { "epoch": 95.53975436328378, "grad_norm": 1.265174388885498, "learning_rate": 0.001, "loss": 2.0777, "step": 739000 }, { "epoch": 95.55268261150614, "grad_norm": 1.2235928773880005, "learning_rate": 0.001, "loss": 2.0722, "step": 739100 }, { "epoch": 95.56561085972851, "grad_norm": 1.1798382997512817, "learning_rate": 0.001, "loss": 2.0721, "step": 739200 }, { "epoch": 95.57853910795087, "grad_norm": 1.1477298736572266, "learning_rate": 0.001, "loss": 2.069, "step": 739300 }, { "epoch": 95.59146735617324, "grad_norm": 1.143639087677002, "learning_rate": 0.001, "loss": 2.0751, "step": 739400 }, { "epoch": 95.6043956043956, "grad_norm": 1.0968639850616455, "learning_rate": 0.001, "loss": 2.0927, "step": 739500 }, { "epoch": 95.61732385261797, "grad_norm": 1.7733287811279297, "learning_rate": 0.001, "loss": 2.0688, "step": 739600 }, { "epoch": 95.63025210084034, "grad_norm": 1.134974718093872, "learning_rate": 0.001, "loss": 2.0855, "step": 739700 }, { "epoch": 95.6431803490627, "grad_norm": 1.0096824169158936, "learning_rate": 0.001, "loss": 2.085, "step": 739800 }, { "epoch": 95.65610859728507, "grad_norm": 1.7872651815414429, "learning_rate": 0.001, "loss": 2.0878, "step": 739900 }, { "epoch": 95.66903684550743, "grad_norm": 1.097344160079956, "learning_rate": 0.001, "loss": 2.0931, "step": 740000 }, { "epoch": 95.6819650937298, "grad_norm": 1.1772626638412476, "learning_rate": 0.001, "loss": 2.0782, "step": 740100 }, { "epoch": 95.69489334195217, "grad_norm": 9.257392883300781, "learning_rate": 0.001, "loss": 2.0832, "step": 740200 }, { "epoch": 95.70782159017453, "grad_norm": 1.167159080505371, "learning_rate": 0.001, "loss": 2.0776, "step": 740300 }, { "epoch": 95.7207498383969, "grad_norm": 2.00331449508667, "learning_rate": 0.001, "loss": 2.083, "step": 740400 }, { "epoch": 95.73367808661926, "grad_norm": 1.2364555597305298, "learning_rate": 0.001, "loss": 2.0904, "step": 740500 }, { "epoch": 95.74660633484163, "grad_norm": 1.0290769338607788, "learning_rate": 0.001, "loss": 2.0885, "step": 740600 }, { "epoch": 95.759534583064, "grad_norm": 1.0734727382659912, "learning_rate": 0.001, "loss": 2.0907, "step": 740700 }, { "epoch": 95.77246283128636, "grad_norm": 0.9668688178062439, "learning_rate": 0.001, "loss": 2.0981, "step": 740800 }, { "epoch": 95.78539107950873, "grad_norm": 1.7404284477233887, "learning_rate": 0.001, "loss": 2.1043, "step": 740900 }, { "epoch": 95.7983193277311, "grad_norm": 1.729507565498352, "learning_rate": 0.001, "loss": 2.1033, "step": 741000 }, { "epoch": 95.81124757595346, "grad_norm": 1.0948492288589478, "learning_rate": 0.001, "loss": 2.1046, "step": 741100 }, { "epoch": 95.82417582417582, "grad_norm": 1.0377116203308105, "learning_rate": 0.001, "loss": 2.0949, "step": 741200 }, { "epoch": 95.83710407239819, "grad_norm": 1.1473023891448975, "learning_rate": 0.001, "loss": 2.0934, "step": 741300 }, { "epoch": 95.85003232062056, "grad_norm": 1.23599112033844, "learning_rate": 0.001, "loss": 2.1087, "step": 741400 }, { "epoch": 95.86296056884292, "grad_norm": 2.375060558319092, "learning_rate": 0.001, "loss": 2.104, "step": 741500 }, { "epoch": 95.87588881706529, "grad_norm": 1.062970757484436, "learning_rate": 0.001, "loss": 2.1118, "step": 741600 }, { "epoch": 95.88881706528765, "grad_norm": 0.9830029010772705, "learning_rate": 0.001, "loss": 2.0886, "step": 741700 }, { "epoch": 95.90174531351002, "grad_norm": 1.2176225185394287, "learning_rate": 0.001, "loss": 2.0919, "step": 741800 }, { "epoch": 95.91467356173239, "grad_norm": 0.9263651967048645, "learning_rate": 0.001, "loss": 2.1136, "step": 741900 }, { "epoch": 95.92760180995475, "grad_norm": 1.5449769496917725, "learning_rate": 0.001, "loss": 2.1164, "step": 742000 }, { "epoch": 95.94053005817712, "grad_norm": 1.0753690004348755, "learning_rate": 0.001, "loss": 2.1173, "step": 742100 }, { "epoch": 95.95345830639948, "grad_norm": 1.5101827383041382, "learning_rate": 0.001, "loss": 2.1313, "step": 742200 }, { "epoch": 95.96638655462185, "grad_norm": 1.1950047016143799, "learning_rate": 0.001, "loss": 2.1072, "step": 742300 }, { "epoch": 95.97931480284421, "grad_norm": 0.9887027740478516, "learning_rate": 0.001, "loss": 2.1161, "step": 742400 }, { "epoch": 95.99224305106658, "grad_norm": 1.2302117347717285, "learning_rate": 0.001, "loss": 2.1143, "step": 742500 }, { "epoch": 96.00517129928895, "grad_norm": 1.3448816537857056, "learning_rate": 0.001, "loss": 2.0797, "step": 742600 }, { "epoch": 96.01809954751131, "grad_norm": 2.0213217735290527, "learning_rate": 0.001, "loss": 2.0211, "step": 742700 }, { "epoch": 96.03102779573368, "grad_norm": 1.1517120599746704, "learning_rate": 0.001, "loss": 2.0213, "step": 742800 }, { "epoch": 96.04395604395604, "grad_norm": 1.8902297019958496, "learning_rate": 0.001, "loss": 2.0295, "step": 742900 }, { "epoch": 96.05688429217841, "grad_norm": 1.3065502643585205, "learning_rate": 0.001, "loss": 2.032, "step": 743000 }, { "epoch": 96.06981254040078, "grad_norm": 1.8424053192138672, "learning_rate": 0.001, "loss": 2.0281, "step": 743100 }, { "epoch": 96.08274078862314, "grad_norm": 1.187770962715149, "learning_rate": 0.001, "loss": 2.0397, "step": 743200 }, { "epoch": 96.0956690368455, "grad_norm": 1.522733449935913, "learning_rate": 0.001, "loss": 2.0434, "step": 743300 }, { "epoch": 96.10859728506787, "grad_norm": 1.5776983499526978, "learning_rate": 0.001, "loss": 2.0413, "step": 743400 }, { "epoch": 96.12152553329024, "grad_norm": 2.061474084854126, "learning_rate": 0.001, "loss": 2.041, "step": 743500 }, { "epoch": 96.1344537815126, "grad_norm": 2.6538615226745605, "learning_rate": 0.001, "loss": 2.0303, "step": 743600 }, { "epoch": 96.14738202973497, "grad_norm": 1.1429013013839722, "learning_rate": 0.001, "loss": 2.0532, "step": 743700 }, { "epoch": 96.16031027795734, "grad_norm": 1.1463191509246826, "learning_rate": 0.001, "loss": 2.0416, "step": 743800 }, { "epoch": 96.1732385261797, "grad_norm": 1.1023638248443604, "learning_rate": 0.001, "loss": 2.0373, "step": 743900 }, { "epoch": 96.18616677440207, "grad_norm": 1.266683578491211, "learning_rate": 0.001, "loss": 2.027, "step": 744000 }, { "epoch": 96.19909502262443, "grad_norm": 1.4276620149612427, "learning_rate": 0.001, "loss": 2.0598, "step": 744100 }, { "epoch": 96.2120232708468, "grad_norm": 1.1743721961975098, "learning_rate": 0.001, "loss": 2.0473, "step": 744200 }, { "epoch": 96.22495151906917, "grad_norm": 1.7043867111206055, "learning_rate": 0.001, "loss": 2.0526, "step": 744300 }, { "epoch": 96.23787976729153, "grad_norm": 4.138449668884277, "learning_rate": 0.001, "loss": 2.0593, "step": 744400 }, { "epoch": 96.2508080155139, "grad_norm": 5.7319135665893555, "learning_rate": 0.001, "loss": 2.0453, "step": 744500 }, { "epoch": 96.26373626373626, "grad_norm": 1.1716967821121216, "learning_rate": 0.001, "loss": 2.0658, "step": 744600 }, { "epoch": 96.27666451195863, "grad_norm": 1.8222345113754272, "learning_rate": 0.001, "loss": 2.0321, "step": 744700 }, { "epoch": 96.289592760181, "grad_norm": 1.2463715076446533, "learning_rate": 0.001, "loss": 2.0637, "step": 744800 }, { "epoch": 96.30252100840336, "grad_norm": 1.444382667541504, "learning_rate": 0.001, "loss": 2.077, "step": 744900 }, { "epoch": 96.31544925662573, "grad_norm": 1.099483847618103, "learning_rate": 0.001, "loss": 2.0686, "step": 745000 }, { "epoch": 96.32837750484809, "grad_norm": 1.342423677444458, "learning_rate": 0.001, "loss": 2.0865, "step": 745100 }, { "epoch": 96.34130575307046, "grad_norm": 1.4854308366775513, "learning_rate": 0.001, "loss": 2.0584, "step": 745200 }, { "epoch": 96.35423400129282, "grad_norm": 1.068543553352356, "learning_rate": 0.001, "loss": 2.0585, "step": 745300 }, { "epoch": 96.36716224951519, "grad_norm": 1.1614903211593628, "learning_rate": 0.001, "loss": 2.0961, "step": 745400 }, { "epoch": 96.38009049773756, "grad_norm": 2.126065731048584, "learning_rate": 0.001, "loss": 2.0632, "step": 745500 }, { "epoch": 96.39301874595992, "grad_norm": 0.9513659477233887, "learning_rate": 0.001, "loss": 2.0854, "step": 745600 }, { "epoch": 96.40594699418229, "grad_norm": 1.5710575580596924, "learning_rate": 0.001, "loss": 2.0609, "step": 745700 }, { "epoch": 96.41887524240465, "grad_norm": 1.0982463359832764, "learning_rate": 0.001, "loss": 2.0655, "step": 745800 }, { "epoch": 96.43180349062702, "grad_norm": 1.2149428129196167, "learning_rate": 0.001, "loss": 2.0723, "step": 745900 }, { "epoch": 96.44473173884938, "grad_norm": 1.8792155981063843, "learning_rate": 0.001, "loss": 2.081, "step": 746000 }, { "epoch": 96.45765998707175, "grad_norm": 1.2893787622451782, "learning_rate": 0.001, "loss": 2.084, "step": 746100 }, { "epoch": 96.47058823529412, "grad_norm": 1.2021559476852417, "learning_rate": 0.001, "loss": 2.0733, "step": 746200 }, { "epoch": 96.48351648351648, "grad_norm": 1.2009289264678955, "learning_rate": 0.001, "loss": 2.0717, "step": 746300 }, { "epoch": 96.49644473173885, "grad_norm": 2.8372926712036133, "learning_rate": 0.001, "loss": 2.0797, "step": 746400 }, { "epoch": 96.50937297996121, "grad_norm": 5.202903747558594, "learning_rate": 0.001, "loss": 2.0758, "step": 746500 }, { "epoch": 96.52230122818358, "grad_norm": 2.334526538848877, "learning_rate": 0.001, "loss": 2.0716, "step": 746600 }, { "epoch": 96.53522947640595, "grad_norm": 1.3925890922546387, "learning_rate": 0.001, "loss": 2.0691, "step": 746700 }, { "epoch": 96.54815772462831, "grad_norm": 1.3236159086227417, "learning_rate": 0.001, "loss": 2.1034, "step": 746800 }, { "epoch": 96.56108597285068, "grad_norm": 1.7899320125579834, "learning_rate": 0.001, "loss": 2.0809, "step": 746900 }, { "epoch": 96.57401422107304, "grad_norm": 1.493200659751892, "learning_rate": 0.001, "loss": 2.1003, "step": 747000 }, { "epoch": 96.58694246929541, "grad_norm": 1.1165542602539062, "learning_rate": 0.001, "loss": 2.0863, "step": 747100 }, { "epoch": 96.59987071751777, "grad_norm": 1.9885693788528442, "learning_rate": 0.001, "loss": 2.0825, "step": 747200 }, { "epoch": 96.61279896574014, "grad_norm": 3.018761396408081, "learning_rate": 0.001, "loss": 2.0917, "step": 747300 }, { "epoch": 96.6257272139625, "grad_norm": 2.113672971725464, "learning_rate": 0.001, "loss": 2.082, "step": 747400 }, { "epoch": 96.63865546218487, "grad_norm": 2.0506410598754883, "learning_rate": 0.001, "loss": 2.0904, "step": 747500 }, { "epoch": 96.65158371040724, "grad_norm": 1.1136071681976318, "learning_rate": 0.001, "loss": 2.078, "step": 747600 }, { "epoch": 96.6645119586296, "grad_norm": 2.114542007446289, "learning_rate": 0.001, "loss": 2.0886, "step": 747700 }, { "epoch": 96.67744020685197, "grad_norm": 1.7485902309417725, "learning_rate": 0.001, "loss": 2.0889, "step": 747800 }, { "epoch": 96.69036845507433, "grad_norm": 3.0649807453155518, "learning_rate": 0.001, "loss": 2.0597, "step": 747900 }, { "epoch": 96.7032967032967, "grad_norm": 1.0388987064361572, "learning_rate": 0.001, "loss": 2.0964, "step": 748000 }, { "epoch": 96.71622495151907, "grad_norm": 1.2017505168914795, "learning_rate": 0.001, "loss": 2.0914, "step": 748100 }, { "epoch": 96.72915319974143, "grad_norm": 1.024833083152771, "learning_rate": 0.001, "loss": 2.1059, "step": 748200 }, { "epoch": 96.7420814479638, "grad_norm": 4.013667583465576, "learning_rate": 0.001, "loss": 2.1239, "step": 748300 }, { "epoch": 96.75500969618616, "grad_norm": 6.3345136642456055, "learning_rate": 0.001, "loss": 2.0991, "step": 748400 }, { "epoch": 96.76793794440853, "grad_norm": 2.1096208095550537, "learning_rate": 0.001, "loss": 2.0946, "step": 748500 }, { "epoch": 96.7808661926309, "grad_norm": 1.517626404762268, "learning_rate": 0.001, "loss": 2.1114, "step": 748600 }, { "epoch": 96.79379444085326, "grad_norm": 1.4721927642822266, "learning_rate": 0.001, "loss": 2.1026, "step": 748700 }, { "epoch": 96.80672268907563, "grad_norm": 1.3433820009231567, "learning_rate": 0.001, "loss": 2.1129, "step": 748800 }, { "epoch": 96.819650937298, "grad_norm": 1.2606818675994873, "learning_rate": 0.001, "loss": 2.1195, "step": 748900 }, { "epoch": 96.83257918552036, "grad_norm": 1.2036243677139282, "learning_rate": 0.001, "loss": 2.0948, "step": 749000 }, { "epoch": 96.84550743374272, "grad_norm": 1.065386414527893, "learning_rate": 0.001, "loss": 2.0986, "step": 749100 }, { "epoch": 96.85843568196509, "grad_norm": 1.3362267017364502, "learning_rate": 0.001, "loss": 2.0907, "step": 749200 }, { "epoch": 96.87136393018746, "grad_norm": 1.0486212968826294, "learning_rate": 0.001, "loss": 2.1294, "step": 749300 }, { "epoch": 96.88429217840982, "grad_norm": 1.3421940803527832, "learning_rate": 0.001, "loss": 2.1014, "step": 749400 }, { "epoch": 96.89722042663219, "grad_norm": 1.57880699634552, "learning_rate": 0.001, "loss": 2.1218, "step": 749500 }, { "epoch": 96.91014867485455, "grad_norm": 0.9256184697151184, "learning_rate": 0.001, "loss": 2.094, "step": 749600 }, { "epoch": 96.92307692307692, "grad_norm": 1.0349299907684326, "learning_rate": 0.001, "loss": 2.1202, "step": 749700 }, { "epoch": 96.93600517129929, "grad_norm": 1.0580642223358154, "learning_rate": 0.001, "loss": 2.1057, "step": 749800 }, { "epoch": 96.94893341952165, "grad_norm": 1.2098113298416138, "learning_rate": 0.001, "loss": 2.0934, "step": 749900 }, { "epoch": 96.96186166774402, "grad_norm": 1.0454400777816772, "learning_rate": 0.001, "loss": 2.0996, "step": 750000 }, { "epoch": 96.97478991596638, "grad_norm": 1.2086608409881592, "learning_rate": 0.001, "loss": 2.1285, "step": 750100 }, { "epoch": 96.98771816418875, "grad_norm": 14.640071868896484, "learning_rate": 0.001, "loss": 2.1009, "step": 750200 }, { "epoch": 97.00064641241111, "grad_norm": 1.7820905447006226, "learning_rate": 0.001, "loss": 2.0713, "step": 750300 }, { "epoch": 97.01357466063348, "grad_norm": 1.0693938732147217, "learning_rate": 0.001, "loss": 2.0054, "step": 750400 }, { "epoch": 97.02650290885585, "grad_norm": 1.309707760810852, "learning_rate": 0.001, "loss": 2.0383, "step": 750500 }, { "epoch": 97.03943115707821, "grad_norm": 1.101975440979004, "learning_rate": 0.001, "loss": 2.0332, "step": 750600 }, { "epoch": 97.05235940530058, "grad_norm": 1.3841246366500854, "learning_rate": 0.001, "loss": 2.0233, "step": 750700 }, { "epoch": 97.06528765352294, "grad_norm": 1.444321870803833, "learning_rate": 0.001, "loss": 2.0251, "step": 750800 }, { "epoch": 97.07821590174531, "grad_norm": 1.5007201433181763, "learning_rate": 0.001, "loss": 2.033, "step": 750900 }, { "epoch": 97.09114414996768, "grad_norm": 1.5125789642333984, "learning_rate": 0.001, "loss": 2.0417, "step": 751000 }, { "epoch": 97.10407239819004, "grad_norm": 1.6721700429916382, "learning_rate": 0.001, "loss": 2.0408, "step": 751100 }, { "epoch": 97.11700064641241, "grad_norm": 0.9644359946250916, "learning_rate": 0.001, "loss": 2.0163, "step": 751200 }, { "epoch": 97.12992889463477, "grad_norm": 1.0786775350570679, "learning_rate": 0.001, "loss": 2.0441, "step": 751300 }, { "epoch": 97.14285714285714, "grad_norm": 1.380187749862671, "learning_rate": 0.001, "loss": 2.0371, "step": 751400 }, { "epoch": 97.1557853910795, "grad_norm": 0.9250904321670532, "learning_rate": 0.001, "loss": 2.0296, "step": 751500 }, { "epoch": 97.16871363930187, "grad_norm": 1.2907971143722534, "learning_rate": 0.001, "loss": 2.0598, "step": 751600 }, { "epoch": 97.18164188752424, "grad_norm": 0.9972094297409058, "learning_rate": 0.001, "loss": 2.0512, "step": 751700 }, { "epoch": 97.1945701357466, "grad_norm": 1.4499754905700684, "learning_rate": 0.001, "loss": 2.0427, "step": 751800 }, { "epoch": 97.20749838396897, "grad_norm": 0.9581387639045715, "learning_rate": 0.001, "loss": 2.0543, "step": 751900 }, { "epoch": 97.22042663219133, "grad_norm": 0.8985323905944824, "learning_rate": 0.001, "loss": 2.072, "step": 752000 }, { "epoch": 97.2333548804137, "grad_norm": 1.8404170274734497, "learning_rate": 0.001, "loss": 2.0535, "step": 752100 }, { "epoch": 97.24628312863607, "grad_norm": 1.1425944566726685, "learning_rate": 0.001, "loss": 2.0385, "step": 752200 }, { "epoch": 97.25921137685843, "grad_norm": 1.2263789176940918, "learning_rate": 0.001, "loss": 2.0256, "step": 752300 }, { "epoch": 97.2721396250808, "grad_norm": 1.444058895111084, "learning_rate": 0.001, "loss": 2.0388, "step": 752400 }, { "epoch": 97.28506787330316, "grad_norm": 1.7494590282440186, "learning_rate": 0.001, "loss": 2.06, "step": 752500 }, { "epoch": 97.29799612152553, "grad_norm": 1.154781699180603, "learning_rate": 0.001, "loss": 2.0584, "step": 752600 }, { "epoch": 97.3109243697479, "grad_norm": 2.0461344718933105, "learning_rate": 0.001, "loss": 2.0504, "step": 752700 }, { "epoch": 97.32385261797026, "grad_norm": 1.9152932167053223, "learning_rate": 0.001, "loss": 2.0493, "step": 752800 }, { "epoch": 97.33678086619263, "grad_norm": 1.1173852682113647, "learning_rate": 0.001, "loss": 2.0549, "step": 752900 }, { "epoch": 97.34970911441499, "grad_norm": 0.9567067623138428, "learning_rate": 0.001, "loss": 2.0594, "step": 753000 }, { "epoch": 97.36263736263736, "grad_norm": 1.1589404344558716, "learning_rate": 0.001, "loss": 2.0803, "step": 753100 }, { "epoch": 97.37556561085972, "grad_norm": 1.0686125755310059, "learning_rate": 0.001, "loss": 2.04, "step": 753200 }, { "epoch": 97.38849385908209, "grad_norm": 1.1387264728546143, "learning_rate": 0.001, "loss": 2.0675, "step": 753300 }, { "epoch": 97.40142210730446, "grad_norm": 1.5804977416992188, "learning_rate": 0.001, "loss": 2.0595, "step": 753400 }, { "epoch": 97.41435035552682, "grad_norm": 1.8500816822052002, "learning_rate": 0.001, "loss": 2.0735, "step": 753500 }, { "epoch": 97.42727860374919, "grad_norm": 1.433671236038208, "learning_rate": 0.001, "loss": 2.0817, "step": 753600 }, { "epoch": 97.44020685197155, "grad_norm": 1.2379658222198486, "learning_rate": 0.001, "loss": 2.0573, "step": 753700 }, { "epoch": 97.45313510019392, "grad_norm": 1.1377923488616943, "learning_rate": 0.001, "loss": 2.0825, "step": 753800 }, { "epoch": 97.46606334841628, "grad_norm": 102.60649871826172, "learning_rate": 0.001, "loss": 2.0759, "step": 753900 }, { "epoch": 97.47899159663865, "grad_norm": 1.124011754989624, "learning_rate": 0.001, "loss": 2.0724, "step": 754000 }, { "epoch": 97.49191984486102, "grad_norm": 1.0583992004394531, "learning_rate": 0.001, "loss": 2.0896, "step": 754100 }, { "epoch": 97.50484809308338, "grad_norm": 1.0106548070907593, "learning_rate": 0.001, "loss": 2.0612, "step": 754200 }, { "epoch": 97.51777634130575, "grad_norm": 1.5147340297698975, "learning_rate": 0.001, "loss": 2.0626, "step": 754300 }, { "epoch": 97.53070458952811, "grad_norm": 0.9362762570381165, "learning_rate": 0.001, "loss": 2.0722, "step": 754400 }, { "epoch": 97.54363283775048, "grad_norm": 1.2093238830566406, "learning_rate": 0.001, "loss": 2.0636, "step": 754500 }, { "epoch": 97.55656108597285, "grad_norm": 4.22667932510376, "learning_rate": 0.001, "loss": 2.0809, "step": 754600 }, { "epoch": 97.56948933419521, "grad_norm": 1.051695704460144, "learning_rate": 0.001, "loss": 2.0867, "step": 754700 }, { "epoch": 97.58241758241758, "grad_norm": 1.592270016670227, "learning_rate": 0.001, "loss": 2.0842, "step": 754800 }, { "epoch": 97.59534583063994, "grad_norm": 1.533310890197754, "learning_rate": 0.001, "loss": 2.0827, "step": 754900 }, { "epoch": 97.60827407886231, "grad_norm": 1.594529390335083, "learning_rate": 0.001, "loss": 2.0666, "step": 755000 }, { "epoch": 97.62120232708467, "grad_norm": 1.6991889476776123, "learning_rate": 0.001, "loss": 2.0749, "step": 755100 }, { "epoch": 97.63413057530704, "grad_norm": 0.9300208687782288, "learning_rate": 0.001, "loss": 2.0944, "step": 755200 }, { "epoch": 97.6470588235294, "grad_norm": 4.71173620223999, "learning_rate": 0.001, "loss": 2.0804, "step": 755300 }, { "epoch": 97.65998707175177, "grad_norm": 1.0139224529266357, "learning_rate": 0.001, "loss": 2.09, "step": 755400 }, { "epoch": 97.67291531997414, "grad_norm": 0.833493173122406, "learning_rate": 0.001, "loss": 2.1027, "step": 755500 }, { "epoch": 97.6858435681965, "grad_norm": 1.383185863494873, "learning_rate": 0.001, "loss": 2.0841, "step": 755600 }, { "epoch": 97.69877181641887, "grad_norm": 1.3543531894683838, "learning_rate": 0.001, "loss": 2.0844, "step": 755700 }, { "epoch": 97.71170006464124, "grad_norm": 1.077793836593628, "learning_rate": 0.001, "loss": 2.0694, "step": 755800 }, { "epoch": 97.7246283128636, "grad_norm": 1.6846370697021484, "learning_rate": 0.001, "loss": 2.0906, "step": 755900 }, { "epoch": 97.73755656108597, "grad_norm": 1.002244472503662, "learning_rate": 0.001, "loss": 2.1016, "step": 756000 }, { "epoch": 97.75048480930833, "grad_norm": 1.0960841178894043, "learning_rate": 0.001, "loss": 2.0916, "step": 756100 }, { "epoch": 97.7634130575307, "grad_norm": 0.9645204544067383, "learning_rate": 0.001, "loss": 2.1181, "step": 756200 }, { "epoch": 97.77634130575306, "grad_norm": 1.5263326168060303, "learning_rate": 0.001, "loss": 2.0967, "step": 756300 }, { "epoch": 97.78926955397543, "grad_norm": 1.0114206075668335, "learning_rate": 0.001, "loss": 2.0821, "step": 756400 }, { "epoch": 97.8021978021978, "grad_norm": 0.9744566082954407, "learning_rate": 0.001, "loss": 2.0846, "step": 756500 }, { "epoch": 97.81512605042016, "grad_norm": 1.2205064296722412, "learning_rate": 0.001, "loss": 2.1086, "step": 756600 }, { "epoch": 97.82805429864253, "grad_norm": 1.0557130575180054, "learning_rate": 0.001, "loss": 2.0924, "step": 756700 }, { "epoch": 97.8409825468649, "grad_norm": 1.0185813903808594, "learning_rate": 0.001, "loss": 2.1013, "step": 756800 }, { "epoch": 97.85391079508726, "grad_norm": 1.1547648906707764, "learning_rate": 0.001, "loss": 2.0911, "step": 756900 }, { "epoch": 97.86683904330962, "grad_norm": 1.9443259239196777, "learning_rate": 0.001, "loss": 2.1125, "step": 757000 }, { "epoch": 97.87976729153199, "grad_norm": 1.4226901531219482, "learning_rate": 0.001, "loss": 2.115, "step": 757100 }, { "epoch": 97.89269553975436, "grad_norm": 0.9386547803878784, "learning_rate": 0.001, "loss": 2.1355, "step": 757200 }, { "epoch": 97.90562378797672, "grad_norm": 1.758970856666565, "learning_rate": 0.001, "loss": 2.0972, "step": 757300 }, { "epoch": 97.91855203619909, "grad_norm": 1.6537400484085083, "learning_rate": 0.001, "loss": 2.0815, "step": 757400 }, { "epoch": 97.93148028442145, "grad_norm": 0.965125560760498, "learning_rate": 0.001, "loss": 2.1011, "step": 757500 }, { "epoch": 97.94440853264382, "grad_norm": 1.4994903802871704, "learning_rate": 0.001, "loss": 2.1103, "step": 757600 }, { "epoch": 97.95733678086619, "grad_norm": 2.7576887607574463, "learning_rate": 0.001, "loss": 2.0967, "step": 757700 }, { "epoch": 97.97026502908855, "grad_norm": 2.7567825317382812, "learning_rate": 0.001, "loss": 2.1088, "step": 757800 }, { "epoch": 97.98319327731092, "grad_norm": 0.8174504637718201, "learning_rate": 0.001, "loss": 2.1001, "step": 757900 }, { "epoch": 97.99612152553328, "grad_norm": 1.0862258672714233, "learning_rate": 0.001, "loss": 2.1153, "step": 758000 }, { "epoch": 98.00904977375566, "grad_norm": 0.9806318283081055, "learning_rate": 0.001, "loss": 2.0573, "step": 758100 }, { "epoch": 98.02197802197803, "grad_norm": 0.9841448068618774, "learning_rate": 0.001, "loss": 2.0113, "step": 758200 }, { "epoch": 98.0349062702004, "grad_norm": 1.1726495027542114, "learning_rate": 0.001, "loss": 2.0197, "step": 758300 }, { "epoch": 98.04783451842276, "grad_norm": 1.0288511514663696, "learning_rate": 0.001, "loss": 2.023, "step": 758400 }, { "epoch": 98.06076276664513, "grad_norm": 0.8923460841178894, "learning_rate": 0.001, "loss": 2.0179, "step": 758500 }, { "epoch": 98.07369101486749, "grad_norm": 1.5534812211990356, "learning_rate": 0.001, "loss": 2.028, "step": 758600 }, { "epoch": 98.08661926308986, "grad_norm": 0.985370934009552, "learning_rate": 0.001, "loss": 2.0299, "step": 758700 }, { "epoch": 98.09954751131222, "grad_norm": 0.9924208521842957, "learning_rate": 0.001, "loss": 2.0313, "step": 758800 }, { "epoch": 98.11247575953459, "grad_norm": 1.0820608139038086, "learning_rate": 0.001, "loss": 2.026, "step": 758900 }, { "epoch": 98.12540400775696, "grad_norm": 1.1049543619155884, "learning_rate": 0.001, "loss": 2.0317, "step": 759000 }, { "epoch": 98.13833225597932, "grad_norm": 3.248885154724121, "learning_rate": 0.001, "loss": 2.0339, "step": 759100 }, { "epoch": 98.15126050420169, "grad_norm": 1.6863645315170288, "learning_rate": 0.001, "loss": 2.034, "step": 759200 }, { "epoch": 98.16418875242405, "grad_norm": 1.5484930276870728, "learning_rate": 0.001, "loss": 2.028, "step": 759300 }, { "epoch": 98.17711700064642, "grad_norm": 1.215631365776062, "learning_rate": 0.001, "loss": 2.0307, "step": 759400 }, { "epoch": 98.19004524886878, "grad_norm": 1.0158222913742065, "learning_rate": 0.001, "loss": 2.0289, "step": 759500 }, { "epoch": 98.20297349709115, "grad_norm": 1.1027039289474487, "learning_rate": 0.001, "loss": 2.0348, "step": 759600 }, { "epoch": 98.21590174531352, "grad_norm": 1.1508947610855103, "learning_rate": 0.001, "loss": 2.0404, "step": 759700 }, { "epoch": 98.22882999353588, "grad_norm": 1.669180154800415, "learning_rate": 0.001, "loss": 2.0438, "step": 759800 }, { "epoch": 98.24175824175825, "grad_norm": 1.3486531972885132, "learning_rate": 0.001, "loss": 2.0391, "step": 759900 }, { "epoch": 98.25468648998061, "grad_norm": 1.0848313570022583, "learning_rate": 0.001, "loss": 2.0503, "step": 760000 }, { "epoch": 98.26761473820298, "grad_norm": 1.2952184677124023, "learning_rate": 0.001, "loss": 2.0304, "step": 760100 }, { "epoch": 98.28054298642535, "grad_norm": 0.7500085830688477, "learning_rate": 0.001, "loss": 2.0497, "step": 760200 }, { "epoch": 98.29347123464771, "grad_norm": 1.0989539623260498, "learning_rate": 0.001, "loss": 2.0478, "step": 760300 }, { "epoch": 98.30639948287008, "grad_norm": 1.5051239728927612, "learning_rate": 0.001, "loss": 2.027, "step": 760400 }, { "epoch": 98.31932773109244, "grad_norm": 1.2106877565383911, "learning_rate": 0.001, "loss": 2.0521, "step": 760500 }, { "epoch": 98.33225597931481, "grad_norm": 1.2846932411193848, "learning_rate": 0.001, "loss": 2.0686, "step": 760600 }, { "epoch": 98.34518422753717, "grad_norm": 1.2793517112731934, "learning_rate": 0.001, "loss": 2.0551, "step": 760700 }, { "epoch": 98.35811247575954, "grad_norm": 1.3428603410720825, "learning_rate": 0.001, "loss": 2.0534, "step": 760800 }, { "epoch": 98.3710407239819, "grad_norm": 1.0149378776550293, "learning_rate": 0.001, "loss": 2.0367, "step": 760900 }, { "epoch": 98.38396897220427, "grad_norm": 1.4014430046081543, "learning_rate": 0.001, "loss": 2.074, "step": 761000 }, { "epoch": 98.39689722042664, "grad_norm": 1.6058952808380127, "learning_rate": 0.001, "loss": 2.0748, "step": 761100 }, { "epoch": 98.409825468649, "grad_norm": 1.0439444780349731, "learning_rate": 0.001, "loss": 2.0658, "step": 761200 }, { "epoch": 98.42275371687137, "grad_norm": 1.0432509183883667, "learning_rate": 0.001, "loss": 2.068, "step": 761300 }, { "epoch": 98.43568196509374, "grad_norm": 1.0652920007705688, "learning_rate": 0.001, "loss": 2.0417, "step": 761400 }, { "epoch": 98.4486102133161, "grad_norm": 0.9640112519264221, "learning_rate": 0.001, "loss": 2.0626, "step": 761500 }, { "epoch": 98.46153846153847, "grad_norm": 1.3203349113464355, "learning_rate": 0.001, "loss": 2.0702, "step": 761600 }, { "epoch": 98.47446670976083, "grad_norm": 1.136728286743164, "learning_rate": 0.001, "loss": 2.0628, "step": 761700 }, { "epoch": 98.4873949579832, "grad_norm": 2.9687538146972656, "learning_rate": 0.001, "loss": 2.0891, "step": 761800 }, { "epoch": 98.50032320620556, "grad_norm": 1.2532320022583008, "learning_rate": 0.001, "loss": 2.0661, "step": 761900 }, { "epoch": 98.51325145442793, "grad_norm": 1.229234218597412, "learning_rate": 0.001, "loss": 2.06, "step": 762000 }, { "epoch": 98.5261797026503, "grad_norm": 0.9844412207603455, "learning_rate": 0.001, "loss": 2.0772, "step": 762100 }, { "epoch": 98.53910795087266, "grad_norm": 1.1293622255325317, "learning_rate": 0.001, "loss": 2.0757, "step": 762200 }, { "epoch": 98.55203619909503, "grad_norm": 1.8117550611495972, "learning_rate": 0.001, "loss": 2.0782, "step": 762300 }, { "epoch": 98.5649644473174, "grad_norm": 1.7933282852172852, "learning_rate": 0.001, "loss": 2.0929, "step": 762400 }, { "epoch": 98.57789269553976, "grad_norm": 1.0070183277130127, "learning_rate": 0.001, "loss": 2.0784, "step": 762500 }, { "epoch": 98.59082094376213, "grad_norm": 1.2071616649627686, "learning_rate": 0.001, "loss": 2.0754, "step": 762600 }, { "epoch": 98.60374919198449, "grad_norm": 1.319739580154419, "learning_rate": 0.001, "loss": 2.0842, "step": 762700 }, { "epoch": 98.61667744020686, "grad_norm": 3.3309404850006104, "learning_rate": 0.001, "loss": 2.0841, "step": 762800 }, { "epoch": 98.62960568842922, "grad_norm": 1.042338490486145, "learning_rate": 0.001, "loss": 2.0726, "step": 762900 }, { "epoch": 98.64253393665159, "grad_norm": 1.9236029386520386, "learning_rate": 0.001, "loss": 2.0805, "step": 763000 }, { "epoch": 98.65546218487395, "grad_norm": 1.2244881391525269, "learning_rate": 0.001, "loss": 2.0815, "step": 763100 }, { "epoch": 98.66839043309632, "grad_norm": 1.2392324209213257, "learning_rate": 0.001, "loss": 2.0732, "step": 763200 }, { "epoch": 98.68131868131869, "grad_norm": 1.3438377380371094, "learning_rate": 0.001, "loss": 2.0784, "step": 763300 }, { "epoch": 98.69424692954105, "grad_norm": 1.2497464418411255, "learning_rate": 0.001, "loss": 2.0909, "step": 763400 }, { "epoch": 98.70717517776342, "grad_norm": 1.388110637664795, "learning_rate": 0.001, "loss": 2.0636, "step": 763500 }, { "epoch": 98.72010342598578, "grad_norm": 2.0968129634857178, "learning_rate": 0.001, "loss": 2.0756, "step": 763600 }, { "epoch": 98.73303167420815, "grad_norm": 1.1181257963180542, "learning_rate": 0.001, "loss": 2.0737, "step": 763700 }, { "epoch": 98.74595992243052, "grad_norm": 1.0719281435012817, "learning_rate": 0.001, "loss": 2.095, "step": 763800 }, { "epoch": 98.75888817065288, "grad_norm": 1.3828444480895996, "learning_rate": 0.001, "loss": 2.0894, "step": 763900 }, { "epoch": 98.77181641887525, "grad_norm": 1.227839469909668, "learning_rate": 0.001, "loss": 2.0763, "step": 764000 }, { "epoch": 98.78474466709761, "grad_norm": 1.7973661422729492, "learning_rate": 0.001, "loss": 2.0889, "step": 764100 }, { "epoch": 98.79767291531998, "grad_norm": 1.1353389024734497, "learning_rate": 0.001, "loss": 2.0831, "step": 764200 }, { "epoch": 98.81060116354234, "grad_norm": 1.0866427421569824, "learning_rate": 0.001, "loss": 2.106, "step": 764300 }, { "epoch": 98.82352941176471, "grad_norm": 1.5617252588272095, "learning_rate": 0.001, "loss": 2.0875, "step": 764400 }, { "epoch": 98.83645765998708, "grad_norm": 1.3251203298568726, "learning_rate": 0.001, "loss": 2.0922, "step": 764500 }, { "epoch": 98.84938590820944, "grad_norm": 5.315347194671631, "learning_rate": 0.001, "loss": 2.105, "step": 764600 }, { "epoch": 98.86231415643181, "grad_norm": 1.0022552013397217, "learning_rate": 0.001, "loss": 2.096, "step": 764700 }, { "epoch": 98.87524240465417, "grad_norm": 0.819837749004364, "learning_rate": 0.001, "loss": 2.1021, "step": 764800 }, { "epoch": 98.88817065287654, "grad_norm": 0.9975865483283997, "learning_rate": 0.001, "loss": 2.0787, "step": 764900 }, { "epoch": 98.9010989010989, "grad_norm": 1.180559754371643, "learning_rate": 0.001, "loss": 2.0923, "step": 765000 }, { "epoch": 98.91402714932127, "grad_norm": 0.8978433012962341, "learning_rate": 0.001, "loss": 2.0821, "step": 765100 }, { "epoch": 98.92695539754364, "grad_norm": 1.0189166069030762, "learning_rate": 0.001, "loss": 2.09, "step": 765200 }, { "epoch": 98.939883645766, "grad_norm": 4.519464015960693, "learning_rate": 0.001, "loss": 2.0999, "step": 765300 }, { "epoch": 98.95281189398837, "grad_norm": 1.1733318567276, "learning_rate": 0.001, "loss": 2.0847, "step": 765400 }, { "epoch": 98.96574014221073, "grad_norm": 0.9778850078582764, "learning_rate": 0.001, "loss": 2.0788, "step": 765500 }, { "epoch": 98.9786683904331, "grad_norm": 1.749243140220642, "learning_rate": 0.001, "loss": 2.1072, "step": 765600 }, { "epoch": 98.99159663865547, "grad_norm": 1.3369940519332886, "learning_rate": 0.001, "loss": 2.1048, "step": 765700 }, { "epoch": 99.00452488687783, "grad_norm": 0.9550539255142212, "learning_rate": 0.001, "loss": 2.0733, "step": 765800 }, { "epoch": 99.0174531351002, "grad_norm": 0.9420326352119446, "learning_rate": 0.001, "loss": 1.9934, "step": 765900 }, { "epoch": 99.03038138332256, "grad_norm": 1.1777714490890503, "learning_rate": 0.001, "loss": 2.0217, "step": 766000 }, { "epoch": 99.04330963154493, "grad_norm": 1.4202206134796143, "learning_rate": 0.001, "loss": 2.0188, "step": 766100 }, { "epoch": 99.0562378797673, "grad_norm": 0.9541141390800476, "learning_rate": 0.001, "loss": 2.035, "step": 766200 }, { "epoch": 99.06916612798966, "grad_norm": 1.3159948587417603, "learning_rate": 0.001, "loss": 2.0211, "step": 766300 }, { "epoch": 99.08209437621203, "grad_norm": 1.00889253616333, "learning_rate": 0.001, "loss": 2.0123, "step": 766400 }, { "epoch": 99.09502262443439, "grad_norm": 2.1092019081115723, "learning_rate": 0.001, "loss": 2.0223, "step": 766500 }, { "epoch": 99.10795087265676, "grad_norm": 1.9757956266403198, "learning_rate": 0.001, "loss": 2.0348, "step": 766600 }, { "epoch": 99.12087912087912, "grad_norm": 1.1108876466751099, "learning_rate": 0.001, "loss": 2.0364, "step": 766700 }, { "epoch": 99.13380736910149, "grad_norm": 1.7402448654174805, "learning_rate": 0.001, "loss": 2.0367, "step": 766800 }, { "epoch": 99.14673561732386, "grad_norm": 1.0563132762908936, "learning_rate": 0.001, "loss": 2.0537, "step": 766900 }, { "epoch": 99.15966386554622, "grad_norm": 1.4641183614730835, "learning_rate": 0.001, "loss": 2.0309, "step": 767000 }, { "epoch": 99.17259211376859, "grad_norm": 2.2620434761047363, "learning_rate": 0.001, "loss": 2.0278, "step": 767100 }, { "epoch": 99.18552036199095, "grad_norm": 1.5954097509384155, "learning_rate": 0.001, "loss": 2.0327, "step": 767200 }, { "epoch": 99.19844861021332, "grad_norm": 1.6130136251449585, "learning_rate": 0.001, "loss": 2.0357, "step": 767300 }, { "epoch": 99.21137685843568, "grad_norm": 1.5103048086166382, "learning_rate": 0.001, "loss": 2.0267, "step": 767400 }, { "epoch": 99.22430510665805, "grad_norm": 1.5836067199707031, "learning_rate": 0.001, "loss": 2.043, "step": 767500 }, { "epoch": 99.23723335488042, "grad_norm": 1.5830436944961548, "learning_rate": 0.001, "loss": 2.0507, "step": 767600 }, { "epoch": 99.25016160310278, "grad_norm": 1.0414098501205444, "learning_rate": 0.001, "loss": 2.0393, "step": 767700 }, { "epoch": 99.26308985132515, "grad_norm": 17.472047805786133, "learning_rate": 0.001, "loss": 2.0435, "step": 767800 }, { "epoch": 99.27601809954751, "grad_norm": 1.2278518676757812, "learning_rate": 0.001, "loss": 2.0417, "step": 767900 }, { "epoch": 99.28894634776988, "grad_norm": 1.4628028869628906, "learning_rate": 0.001, "loss": 2.0552, "step": 768000 }, { "epoch": 99.30187459599225, "grad_norm": 1.7387053966522217, "learning_rate": 0.001, "loss": 2.0645, "step": 768100 }, { "epoch": 99.31480284421461, "grad_norm": 1.296979546546936, "learning_rate": 0.001, "loss": 2.037, "step": 768200 }, { "epoch": 99.32773109243698, "grad_norm": 1.2855712175369263, "learning_rate": 0.001, "loss": 2.0539, "step": 768300 }, { "epoch": 99.34065934065934, "grad_norm": 1.1161450147628784, "learning_rate": 0.001, "loss": 2.0564, "step": 768400 }, { "epoch": 99.35358758888171, "grad_norm": 1.0274378061294556, "learning_rate": 0.001, "loss": 2.0489, "step": 768500 }, { "epoch": 99.36651583710407, "grad_norm": 1.6331721544265747, "learning_rate": 0.001, "loss": 2.0572, "step": 768600 }, { "epoch": 99.37944408532644, "grad_norm": 1.0209852457046509, "learning_rate": 0.001, "loss": 2.0568, "step": 768700 }, { "epoch": 99.3923723335488, "grad_norm": 3.0395145416259766, "learning_rate": 0.001, "loss": 2.07, "step": 768800 }, { "epoch": 99.40530058177117, "grad_norm": 1.8583918809890747, "learning_rate": 0.001, "loss": 2.0496, "step": 768900 }, { "epoch": 99.41822882999354, "grad_norm": 3.3426694869995117, "learning_rate": 0.001, "loss": 2.0608, "step": 769000 }, { "epoch": 99.4311570782159, "grad_norm": 1.1119320392608643, "learning_rate": 0.001, "loss": 2.0662, "step": 769100 }, { "epoch": 99.44408532643827, "grad_norm": 1.0692205429077148, "learning_rate": 0.001, "loss": 2.0654, "step": 769200 }, { "epoch": 99.45701357466064, "grad_norm": 1.0201817750930786, "learning_rate": 0.001, "loss": 2.0587, "step": 769300 }, { "epoch": 99.469941822883, "grad_norm": 1.2209703922271729, "learning_rate": 0.001, "loss": 2.0645, "step": 769400 }, { "epoch": 99.48287007110537, "grad_norm": 3.405653238296509, "learning_rate": 0.001, "loss": 2.0621, "step": 769500 }, { "epoch": 99.49579831932773, "grad_norm": 1.2442470788955688, "learning_rate": 0.001, "loss": 2.0566, "step": 769600 }, { "epoch": 99.5087265675501, "grad_norm": 1.010501742362976, "learning_rate": 0.001, "loss": 2.0699, "step": 769700 }, { "epoch": 99.52165481577246, "grad_norm": 8.63658618927002, "learning_rate": 0.001, "loss": 2.0615, "step": 769800 }, { "epoch": 99.53458306399483, "grad_norm": 1.0473334789276123, "learning_rate": 0.001, "loss": 2.0822, "step": 769900 }, { "epoch": 99.5475113122172, "grad_norm": 1.7963345050811768, "learning_rate": 0.001, "loss": 2.0668, "step": 770000 }, { "epoch": 99.56043956043956, "grad_norm": 1.0331025123596191, "learning_rate": 0.001, "loss": 2.0724, "step": 770100 }, { "epoch": 99.57336780866193, "grad_norm": 1.3224682807922363, "learning_rate": 0.001, "loss": 2.0521, "step": 770200 }, { "epoch": 99.5862960568843, "grad_norm": 1.8576409816741943, "learning_rate": 0.001, "loss": 2.0823, "step": 770300 }, { "epoch": 99.59922430510666, "grad_norm": 1.181518793106079, "learning_rate": 0.001, "loss": 2.089, "step": 770400 }, { "epoch": 99.61215255332903, "grad_norm": 1.057032823562622, "learning_rate": 0.001, "loss": 2.0859, "step": 770500 }, { "epoch": 99.62508080155139, "grad_norm": 3.0410029888153076, "learning_rate": 0.001, "loss": 2.0625, "step": 770600 }, { "epoch": 99.63800904977376, "grad_norm": 1.2085886001586914, "learning_rate": 0.001, "loss": 2.0947, "step": 770700 }, { "epoch": 99.65093729799612, "grad_norm": 1.4443784952163696, "learning_rate": 0.001, "loss": 2.0879, "step": 770800 }, { "epoch": 99.66386554621849, "grad_norm": 1.0383845567703247, "learning_rate": 0.001, "loss": 2.0508, "step": 770900 }, { "epoch": 99.67679379444085, "grad_norm": 1.5767204761505127, "learning_rate": 0.001, "loss": 2.0753, "step": 771000 }, { "epoch": 99.68972204266322, "grad_norm": 1.4047152996063232, "learning_rate": 0.001, "loss": 2.0729, "step": 771100 }, { "epoch": 99.70265029088559, "grad_norm": 1.0817312002182007, "learning_rate": 0.001, "loss": 2.1052, "step": 771200 }, { "epoch": 99.71557853910795, "grad_norm": 1.7640438079833984, "learning_rate": 0.001, "loss": 2.0746, "step": 771300 }, { "epoch": 99.72850678733032, "grad_norm": 2.152024984359741, "learning_rate": 0.001, "loss": 2.0852, "step": 771400 }, { "epoch": 99.74143503555268, "grad_norm": 1.1203761100769043, "learning_rate": 0.001, "loss": 2.0678, "step": 771500 }, { "epoch": 99.75436328377505, "grad_norm": 1.0554304122924805, "learning_rate": 0.001, "loss": 2.0968, "step": 771600 }, { "epoch": 99.76729153199742, "grad_norm": 1.1040574312210083, "learning_rate": 0.001, "loss": 2.0609, "step": 771700 }, { "epoch": 99.78021978021978, "grad_norm": 2.197925329208374, "learning_rate": 0.001, "loss": 2.0728, "step": 771800 }, { "epoch": 99.79314802844215, "grad_norm": 1.0656843185424805, "learning_rate": 0.001, "loss": 2.0859, "step": 771900 }, { "epoch": 99.80607627666451, "grad_norm": 0.9172446727752686, "learning_rate": 0.001, "loss": 2.0817, "step": 772000 }, { "epoch": 99.81900452488688, "grad_norm": 0.9730091691017151, "learning_rate": 0.001, "loss": 2.0719, "step": 772100 }, { "epoch": 99.83193277310924, "grad_norm": 1.3280757665634155, "learning_rate": 0.001, "loss": 2.0864, "step": 772200 }, { "epoch": 99.84486102133161, "grad_norm": 1.159346580505371, "learning_rate": 0.001, "loss": 2.0644, "step": 772300 }, { "epoch": 99.85778926955398, "grad_norm": 1.349966287612915, "learning_rate": 0.001, "loss": 2.087, "step": 772400 }, { "epoch": 99.87071751777634, "grad_norm": 1.0654726028442383, "learning_rate": 0.001, "loss": 2.0861, "step": 772500 }, { "epoch": 99.88364576599871, "grad_norm": 1.27714204788208, "learning_rate": 0.001, "loss": 2.0823, "step": 772600 }, { "epoch": 99.89657401422107, "grad_norm": 1.2731585502624512, "learning_rate": 0.001, "loss": 2.0826, "step": 772700 }, { "epoch": 99.90950226244344, "grad_norm": 1.3861024379730225, "learning_rate": 0.001, "loss": 2.0939, "step": 772800 }, { "epoch": 99.9224305106658, "grad_norm": 1.0389690399169922, "learning_rate": 0.001, "loss": 2.0978, "step": 772900 }, { "epoch": 99.93535875888817, "grad_norm": 1.1207822561264038, "learning_rate": 0.001, "loss": 2.1072, "step": 773000 }, { "epoch": 99.94828700711054, "grad_norm": 1.2808066606521606, "learning_rate": 0.001, "loss": 2.0816, "step": 773100 }, { "epoch": 99.9612152553329, "grad_norm": 1.2283062934875488, "learning_rate": 0.001, "loss": 2.092, "step": 773200 }, { "epoch": 99.97414350355527, "grad_norm": 1.2903398275375366, "learning_rate": 0.001, "loss": 2.0907, "step": 773300 }, { "epoch": 99.98707175177763, "grad_norm": 1.068626880645752, "learning_rate": 0.001, "loss": 2.0822, "step": 773400 }, { "epoch": 100.0, "grad_norm": 2.4892613887786865, "learning_rate": 0.001, "loss": 2.0517, "step": 773500 }, { "epoch": 100.0, "step": 773500, "total_flos": 1.742106998016e+18, "train_loss": 1.653784541361241, "train_runtime": 58976.6804, "train_samples_per_second": 419.657, "train_steps_per_second": 13.115 } ], "logging_steps": 100, "max_steps": 773500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.742106998016e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }