| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 27.98507462686567, | |
| "eval_steps": 500, | |
| "global_step": 7500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03731343283582089, | |
| "grad_norm": 0.8186072111129761, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.3847, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 0.5007426142692566, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.4283, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11194029850746269, | |
| "grad_norm": 0.49460887908935547, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4868, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 0.5032920837402344, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.4491, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 0.5688469409942627, | |
| "learning_rate": 5e-06, | |
| "loss": 1.3703, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 0.5052517652511597, | |
| "learning_rate": 6e-06, | |
| "loss": 1.419, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26119402985074625, | |
| "grad_norm": 0.6315643787384033, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 1.3058, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 0.6060447692871094, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.2908, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3358208955223881, | |
| "grad_norm": 0.5513179302215576, | |
| "learning_rate": 9e-06, | |
| "loss": 1.2311, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 0.8467404246330261, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2043, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.41044776119402987, | |
| "grad_norm": 0.8141824007034302, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 1.0707, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 0.7932347059249878, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.9377, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.48507462686567165, | |
| "grad_norm": 0.684220552444458, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.714, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 0.5886895060539246, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.6479, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 0.4764939248561859, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.5463, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.4621008038520813, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.4641, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6343283582089553, | |
| "grad_norm": 0.46492910385131836, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 0.4159, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 0.5017415881156921, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.4094, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7089552238805971, | |
| "grad_norm": 0.34392210841178894, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.3478, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 0.3240516483783722, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3821, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7835820895522388, | |
| "grad_norm": 0.26301339268684387, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.3606, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 0.34712520241737366, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.3421, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8582089552238806, | |
| "grad_norm": 0.3248469829559326, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.3389, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 0.298149436712265, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.3145, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 0.2757190763950348, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3065, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 0.30510950088500977, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.2971, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.007462686567164, | |
| "grad_norm": 0.37349891662597656, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.3273, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.044776119402985, | |
| "grad_norm": 0.3667634129524231, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.308, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0820895522388059, | |
| "grad_norm": 0.3463355004787445, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.3109, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 0.3888525366783142, | |
| "learning_rate": 3e-05, | |
| "loss": 0.2644, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1567164179104479, | |
| "grad_norm": 0.3749147951602936, | |
| "learning_rate": 3.1e-05, | |
| "loss": 0.2858, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1940298507462686, | |
| "grad_norm": 0.3270276188850403, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.2573, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2313432835820897, | |
| "grad_norm": 0.3658592998981476, | |
| "learning_rate": 3.3e-05, | |
| "loss": 0.2613, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2686567164179103, | |
| "grad_norm": 0.3526328206062317, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.2328, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3059701492537314, | |
| "grad_norm": 0.4528139531612396, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.2429, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3432835820895521, | |
| "grad_norm": 0.5426791310310364, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.2209, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.3805970149253732, | |
| "grad_norm": 0.41844552755355835, | |
| "learning_rate": 3.7e-05, | |
| "loss": 0.2319, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.417910447761194, | |
| "grad_norm": 0.4749431908130646, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.2233, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.455223880597015, | |
| "grad_norm": 0.7010189890861511, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 0.2181, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 0.5747635960578918, | |
| "learning_rate": 4e-05, | |
| "loss": 0.213, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5298507462686568, | |
| "grad_norm": 0.3661474287509918, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.2171, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.5671641791044775, | |
| "grad_norm": 0.467835396528244, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.1985, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6044776119402986, | |
| "grad_norm": 0.5470123291015625, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.2176, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.6417910447761193, | |
| "grad_norm": 0.5761199593544006, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.2007, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.6791044776119404, | |
| "grad_norm": 0.48257485032081604, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2043, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.716417910447761, | |
| "grad_norm": 0.48353052139282227, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.1872, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.7537313432835822, | |
| "grad_norm": 0.4388391375541687, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.206, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.7910447761194028, | |
| "grad_norm": 0.47332626581192017, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.1876, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.828358208955224, | |
| "grad_norm": 0.8053535223007202, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.1839, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.8656716417910446, | |
| "grad_norm": 0.413979709148407, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1732, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.9029850746268657, | |
| "grad_norm": 0.36910712718963623, | |
| "learning_rate": 5.1000000000000006e-05, | |
| "loss": 0.1827, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.9402985074626866, | |
| "grad_norm": 0.8458298444747925, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 0.1727, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.9776119402985075, | |
| "grad_norm": 0.5452115535736084, | |
| "learning_rate": 5.300000000000001e-05, | |
| "loss": 0.1818, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.014925373134328, | |
| "grad_norm": 0.4518108069896698, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 0.177, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.0522388059701493, | |
| "grad_norm": 0.66865074634552, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 0.1726, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.08955223880597, | |
| "grad_norm": 0.6536034345626831, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 0.1541, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.126865671641791, | |
| "grad_norm": 0.5571377277374268, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 0.1671, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.1641791044776117, | |
| "grad_norm": 0.5385546684265137, | |
| "learning_rate": 5.8e-05, | |
| "loss": 0.1582, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.201492537313433, | |
| "grad_norm": 0.577961266040802, | |
| "learning_rate": 5.9e-05, | |
| "loss": 0.1528, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.5082416534423828, | |
| "learning_rate": 6e-05, | |
| "loss": 0.1638, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.2761194029850746, | |
| "grad_norm": 0.5490861535072327, | |
| "learning_rate": 6.1e-05, | |
| "loss": 0.166, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.3134328358208958, | |
| "grad_norm": 0.492366760969162, | |
| "learning_rate": 6.2e-05, | |
| "loss": 0.1481, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.3507462686567164, | |
| "grad_norm": 0.3702855110168457, | |
| "learning_rate": 6.3e-05, | |
| "loss": 0.1514, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.388059701492537, | |
| "grad_norm": 0.664667010307312, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.1441, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.425373134328358, | |
| "grad_norm": 0.33382174372673035, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 0.1573, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.4626865671641793, | |
| "grad_norm": 0.4848814010620117, | |
| "learning_rate": 6.6e-05, | |
| "loss": 0.1457, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.3649997413158417, | |
| "learning_rate": 6.7e-05, | |
| "loss": 0.1467, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.5373134328358207, | |
| "grad_norm": 0.6385223865509033, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 0.145, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.574626865671642, | |
| "grad_norm": 0.4580625891685486, | |
| "learning_rate": 6.9e-05, | |
| "loss": 0.1352, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.611940298507463, | |
| "grad_norm": 0.5141746401786804, | |
| "learning_rate": 7e-05, | |
| "loss": 0.1444, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.6492537313432836, | |
| "grad_norm": 0.40220722556114197, | |
| "learning_rate": 7.1e-05, | |
| "loss": 0.1493, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.6865671641791042, | |
| "grad_norm": 0.5510571002960205, | |
| "learning_rate": 7.2e-05, | |
| "loss": 0.1387, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.7238805970149254, | |
| "grad_norm": 0.43814659118652344, | |
| "learning_rate": 7.3e-05, | |
| "loss": 0.1374, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.7611940298507465, | |
| "grad_norm": 0.4118008613586426, | |
| "learning_rate": 7.4e-05, | |
| "loss": 0.1297, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.798507462686567, | |
| "grad_norm": 0.5626503229141235, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.1299, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.835820895522388, | |
| "grad_norm": 0.4066360592842102, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.1102, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.873134328358209, | |
| "grad_norm": 0.47184985876083374, | |
| "learning_rate": 7.7e-05, | |
| "loss": 0.1219, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.91044776119403, | |
| "grad_norm": 0.6611475348472595, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 0.1267, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.9477611940298507, | |
| "grad_norm": 0.3570108413696289, | |
| "learning_rate": 7.900000000000001e-05, | |
| "loss": 0.1191, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 0.4581681489944458, | |
| "learning_rate": 8e-05, | |
| "loss": 0.1209, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.0223880597014925, | |
| "grad_norm": 0.4643435776233673, | |
| "learning_rate": 8.1e-05, | |
| "loss": 0.129, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.0597014925373136, | |
| "grad_norm": 0.5595763921737671, | |
| "learning_rate": 8.2e-05, | |
| "loss": 0.1158, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.0970149253731343, | |
| "grad_norm": 0.48848605155944824, | |
| "learning_rate": 8.3e-05, | |
| "loss": 0.1188, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.1343283582089554, | |
| "grad_norm": 0.4496570825576782, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.114, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.171641791044776, | |
| "grad_norm": 0.31364986300468445, | |
| "learning_rate": 8.5e-05, | |
| "loss": 0.1196, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.208955223880597, | |
| "grad_norm": 0.3395878076553345, | |
| "learning_rate": 8.6e-05, | |
| "loss": 0.1124, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.246268656716418, | |
| "grad_norm": 0.4917413592338562, | |
| "learning_rate": 8.7e-05, | |
| "loss": 0.1074, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.283582089552239, | |
| "grad_norm": 0.44114553928375244, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.1095, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.3208955223880596, | |
| "grad_norm": 0.3323831558227539, | |
| "learning_rate": 8.900000000000001e-05, | |
| "loss": 0.106, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.3582089552238807, | |
| "grad_norm": 0.4495660066604614, | |
| "learning_rate": 9e-05, | |
| "loss": 0.1222, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.3955223880597014, | |
| "grad_norm": 0.40784788131713867, | |
| "learning_rate": 9.1e-05, | |
| "loss": 0.1048, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.4328358208955225, | |
| "grad_norm": 0.4643700420856476, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 0.1097, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.470149253731343, | |
| "grad_norm": 0.472494512796402, | |
| "learning_rate": 9.300000000000001e-05, | |
| "loss": 0.1041, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.5074626865671643, | |
| "grad_norm": 0.6110897660255432, | |
| "learning_rate": 9.4e-05, | |
| "loss": 0.0959, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.544776119402985, | |
| "grad_norm": 0.5313069820404053, | |
| "learning_rate": 9.5e-05, | |
| "loss": 0.113, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.582089552238806, | |
| "grad_norm": 0.4223133623600006, | |
| "learning_rate": 9.6e-05, | |
| "loss": 0.099, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.6194029850746268, | |
| "grad_norm": 0.5464731454849243, | |
| "learning_rate": 9.7e-05, | |
| "loss": 0.1008, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.656716417910448, | |
| "grad_norm": 0.3538314402103424, | |
| "learning_rate": 9.8e-05, | |
| "loss": 0.1049, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.6940298507462686, | |
| "grad_norm": 0.7460148334503174, | |
| "learning_rate": 9.900000000000001e-05, | |
| "loss": 0.1088, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 0.3210597038269043, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1041, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.7686567164179103, | |
| "grad_norm": 0.4450497627258301, | |
| "learning_rate": 9.999993165095463e-05, | |
| "loss": 0.0985, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.8059701492537314, | |
| "grad_norm": 0.4348960816860199, | |
| "learning_rate": 9.999972660400536e-05, | |
| "loss": 0.1015, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.843283582089552, | |
| "grad_norm": 0.462782621383667, | |
| "learning_rate": 9.999938485971279e-05, | |
| "loss": 0.1068, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.8805970149253732, | |
| "grad_norm": 0.3801368474960327, | |
| "learning_rate": 9.999890641901125e-05, | |
| "loss": 0.1117, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.917910447761194, | |
| "grad_norm": 0.45135366916656494, | |
| "learning_rate": 9.999829128320874e-05, | |
| "loss": 0.0917, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.955223880597015, | |
| "grad_norm": 0.41138389706611633, | |
| "learning_rate": 9.999753945398704e-05, | |
| "loss": 0.1049, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.9925373134328357, | |
| "grad_norm": 0.4976252317428589, | |
| "learning_rate": 9.999665093340165e-05, | |
| "loss": 0.1029, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.029850746268656, | |
| "grad_norm": 0.46372008323669434, | |
| "learning_rate": 9.99956257238817e-05, | |
| "loss": 0.1012, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.067164179104478, | |
| "grad_norm": 0.546938955783844, | |
| "learning_rate": 9.999446382823013e-05, | |
| "loss": 0.0829, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.104477611940299, | |
| "grad_norm": 0.40513405203819275, | |
| "learning_rate": 9.999316524962345e-05, | |
| "loss": 0.0933, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.141791044776119, | |
| "grad_norm": 0.4198484420776367, | |
| "learning_rate": 9.999172999161198e-05, | |
| "loss": 0.0895, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.17910447761194, | |
| "grad_norm": 0.3965628743171692, | |
| "learning_rate": 9.999015805811965e-05, | |
| "loss": 0.0917, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.2164179104477615, | |
| "grad_norm": 0.3095884621143341, | |
| "learning_rate": 9.998844945344405e-05, | |
| "loss": 0.0953, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.253731343283582, | |
| "grad_norm": 0.7962276339530945, | |
| "learning_rate": 9.998660418225645e-05, | |
| "loss": 0.0979, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.291044776119403, | |
| "grad_norm": 0.42066490650177, | |
| "learning_rate": 9.998462224960175e-05, | |
| "loss": 0.099, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.3283582089552235, | |
| "grad_norm": 0.3894193470478058, | |
| "learning_rate": 9.998250366089848e-05, | |
| "loss": 0.0887, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.365671641791045, | |
| "grad_norm": 0.28998032212257385, | |
| "learning_rate": 9.998024842193876e-05, | |
| "loss": 0.0943, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.402985074626866, | |
| "grad_norm": 0.3919823467731476, | |
| "learning_rate": 9.997785653888835e-05, | |
| "loss": 0.0916, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.440298507462686, | |
| "grad_norm": 0.3708650469779968, | |
| "learning_rate": 9.997532801828658e-05, | |
| "loss": 0.0858, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.477611940298507, | |
| "grad_norm": 0.2935069799423218, | |
| "learning_rate": 9.997266286704631e-05, | |
| "loss": 0.0992, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.514925373134329, | |
| "grad_norm": 0.4675377607345581, | |
| "learning_rate": 9.996986109245395e-05, | |
| "loss": 0.0854, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.552238805970149, | |
| "grad_norm": 0.31374865770339966, | |
| "learning_rate": 9.996692270216947e-05, | |
| "loss": 0.0788, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 4.58955223880597, | |
| "grad_norm": 0.419249951839447, | |
| "learning_rate": 9.996384770422629e-05, | |
| "loss": 0.0873, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.6268656716417915, | |
| "grad_norm": 0.26002731919288635, | |
| "learning_rate": 9.996063610703137e-05, | |
| "loss": 0.0845, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 4.664179104477612, | |
| "grad_norm": 0.29573896527290344, | |
| "learning_rate": 9.995728791936504e-05, | |
| "loss": 0.091, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.701492537313433, | |
| "grad_norm": 0.33090147376060486, | |
| "learning_rate": 9.995380315038119e-05, | |
| "loss": 0.0827, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.7388059701492535, | |
| "grad_norm": 0.24417485296726227, | |
| "learning_rate": 9.9950181809607e-05, | |
| "loss": 0.0859, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.776119402985074, | |
| "grad_norm": 0.48290401697158813, | |
| "learning_rate": 9.994642390694308e-05, | |
| "loss": 0.0889, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.813432835820896, | |
| "grad_norm": 0.4479697048664093, | |
| "learning_rate": 9.99425294526634e-05, | |
| "loss": 0.097, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.850746268656716, | |
| "grad_norm": 0.3560147285461426, | |
| "learning_rate": 9.993849845741524e-05, | |
| "loss": 0.0904, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.888059701492537, | |
| "grad_norm": 0.6645416617393494, | |
| "learning_rate": 9.99343309322192e-05, | |
| "loss": 0.0922, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.925373134328359, | |
| "grad_norm": 0.29696759581565857, | |
| "learning_rate": 9.993002688846913e-05, | |
| "loss": 0.093, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.962686567164179, | |
| "grad_norm": 0.47146692872047424, | |
| "learning_rate": 9.992558633793212e-05, | |
| "loss": 0.085, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.3430916368961334, | |
| "learning_rate": 9.992100929274846e-05, | |
| "loss": 0.0805, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.037313432835821, | |
| "grad_norm": 0.3205055892467499, | |
| "learning_rate": 9.991629576543163e-05, | |
| "loss": 0.0766, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.074626865671641, | |
| "grad_norm": 0.3664805293083191, | |
| "learning_rate": 9.991144576886823e-05, | |
| "loss": 0.0766, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.111940298507463, | |
| "grad_norm": 0.3753412663936615, | |
| "learning_rate": 9.990645931631796e-05, | |
| "loss": 0.0688, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 5.149253731343284, | |
| "grad_norm": 0.31633055210113525, | |
| "learning_rate": 9.990133642141359e-05, | |
| "loss": 0.0796, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 5.186567164179104, | |
| "grad_norm": 0.3355732262134552, | |
| "learning_rate": 9.989607709816091e-05, | |
| "loss": 0.0716, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 5.223880597014926, | |
| "grad_norm": 0.24850831925868988, | |
| "learning_rate": 9.989068136093873e-05, | |
| "loss": 0.0778, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.2611940298507465, | |
| "grad_norm": 0.29537102580070496, | |
| "learning_rate": 9.988514922449879e-05, | |
| "loss": 0.0759, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 5.298507462686567, | |
| "grad_norm": 0.3430945873260498, | |
| "learning_rate": 9.987948070396571e-05, | |
| "loss": 0.0774, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.335820895522388, | |
| "grad_norm": 0.5220637917518616, | |
| "learning_rate": 9.987367581483705e-05, | |
| "loss": 0.0836, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 5.373134328358209, | |
| "grad_norm": 0.28184008598327637, | |
| "learning_rate": 9.986773457298311e-05, | |
| "loss": 0.0752, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.41044776119403, | |
| "grad_norm": 0.36261311173439026, | |
| "learning_rate": 9.986165699464705e-05, | |
| "loss": 0.075, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.447761194029851, | |
| "grad_norm": 0.5107380151748657, | |
| "learning_rate": 9.985544309644475e-05, | |
| "loss": 0.0814, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 5.485074626865671, | |
| "grad_norm": 0.2446671426296234, | |
| "learning_rate": 9.984909289536473e-05, | |
| "loss": 0.0704, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 5.522388059701493, | |
| "grad_norm": 0.30449381470680237, | |
| "learning_rate": 9.984260640876821e-05, | |
| "loss": 0.0794, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 5.559701492537314, | |
| "grad_norm": 0.25645050406455994, | |
| "learning_rate": 9.983598365438902e-05, | |
| "loss": 0.0709, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 5.597014925373134, | |
| "grad_norm": 0.23825006186962128, | |
| "learning_rate": 9.98292246503335e-05, | |
| "loss": 0.0828, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.634328358208955, | |
| "grad_norm": 0.3259269893169403, | |
| "learning_rate": 9.98223294150805e-05, | |
| "loss": 0.0824, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 5.6716417910447765, | |
| "grad_norm": 0.24058914184570312, | |
| "learning_rate": 9.981529796748134e-05, | |
| "loss": 0.073, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 5.708955223880597, | |
| "grad_norm": 0.34457242488861084, | |
| "learning_rate": 9.980813032675974e-05, | |
| "loss": 0.0845, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 5.746268656716418, | |
| "grad_norm": 0.32940393686294556, | |
| "learning_rate": 9.980082651251175e-05, | |
| "loss": 0.0832, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 5.7835820895522385, | |
| "grad_norm": 0.5683007836341858, | |
| "learning_rate": 9.979338654470569e-05, | |
| "loss": 0.0836, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 5.82089552238806, | |
| "grad_norm": 0.31041061878204346, | |
| "learning_rate": 9.97858104436822e-05, | |
| "loss": 0.07, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 5.858208955223881, | |
| "grad_norm": 0.37858131527900696, | |
| "learning_rate": 9.977809823015401e-05, | |
| "loss": 0.0738, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 5.895522388059701, | |
| "grad_norm": 0.2743091583251953, | |
| "learning_rate": 9.977024992520602e-05, | |
| "loss": 0.0761, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 5.932835820895522, | |
| "grad_norm": 0.29117098450660706, | |
| "learning_rate": 9.976226555029522e-05, | |
| "loss": 0.0777, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 5.970149253731344, | |
| "grad_norm": 0.31398633122444153, | |
| "learning_rate": 9.975414512725057e-05, | |
| "loss": 0.0664, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.007462686567164, | |
| "grad_norm": 0.2684272527694702, | |
| "learning_rate": 9.974588867827301e-05, | |
| "loss": 0.0686, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.044776119402985, | |
| "grad_norm": 0.3945397436618805, | |
| "learning_rate": 9.973749622593534e-05, | |
| "loss": 0.0614, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.082089552238806, | |
| "grad_norm": 0.2747954726219177, | |
| "learning_rate": 9.972896779318219e-05, | |
| "loss": 0.0681, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 6.119402985074627, | |
| "grad_norm": 0.43257200717926025, | |
| "learning_rate": 9.972030340333001e-05, | |
| "loss": 0.0725, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 6.156716417910448, | |
| "grad_norm": 0.3559250831604004, | |
| "learning_rate": 9.97115030800669e-05, | |
| "loss": 0.0804, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 6.1940298507462686, | |
| "grad_norm": 0.3079264760017395, | |
| "learning_rate": 9.970256684745258e-05, | |
| "loss": 0.0649, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 6.231343283582089, | |
| "grad_norm": 0.32298946380615234, | |
| "learning_rate": 9.969349472991838e-05, | |
| "loss": 0.0668, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 6.268656716417911, | |
| "grad_norm": 0.2826225459575653, | |
| "learning_rate": 9.968428675226714e-05, | |
| "loss": 0.0734, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 6.3059701492537314, | |
| "grad_norm": 0.39002349972724915, | |
| "learning_rate": 9.967494293967312e-05, | |
| "loss": 0.0728, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 6.343283582089552, | |
| "grad_norm": 0.403890997171402, | |
| "learning_rate": 9.966546331768191e-05, | |
| "loss": 0.067, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.380597014925373, | |
| "grad_norm": 0.3755359351634979, | |
| "learning_rate": 9.965584791221048e-05, | |
| "loss": 0.0755, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 6.417910447761194, | |
| "grad_norm": 0.26346635818481445, | |
| "learning_rate": 9.964609674954696e-05, | |
| "loss": 0.0728, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 6.455223880597015, | |
| "grad_norm": 0.45292145013809204, | |
| "learning_rate": 9.963620985635065e-05, | |
| "loss": 0.0731, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 6.492537313432836, | |
| "grad_norm": 0.3568434715270996, | |
| "learning_rate": 9.962618725965196e-05, | |
| "loss": 0.0761, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 6.529850746268656, | |
| "grad_norm": 0.2551257014274597, | |
| "learning_rate": 9.961602898685226e-05, | |
| "loss": 0.0694, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 6.567164179104478, | |
| "grad_norm": 0.6106354594230652, | |
| "learning_rate": 9.96057350657239e-05, | |
| "loss": 0.0827, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 6.604477611940299, | |
| "grad_norm": 0.3226093053817749, | |
| "learning_rate": 9.959530552441005e-05, | |
| "loss": 0.0716, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 6.641791044776119, | |
| "grad_norm": 0.4297254979610443, | |
| "learning_rate": 9.95847403914247e-05, | |
| "loss": 0.0748, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 6.67910447761194, | |
| "grad_norm": 0.26469680666923523, | |
| "learning_rate": 9.95740396956525e-05, | |
| "loss": 0.074, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 6.7164179104477615, | |
| "grad_norm": 0.22717897593975067, | |
| "learning_rate": 9.956320346634876e-05, | |
| "loss": 0.0739, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 6.753731343283582, | |
| "grad_norm": 0.4513498544692993, | |
| "learning_rate": 9.955223173313931e-05, | |
| "loss": 0.0664, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 6.791044776119403, | |
| "grad_norm": 0.31683439016342163, | |
| "learning_rate": 9.954112452602045e-05, | |
| "loss": 0.069, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 6.8283582089552235, | |
| "grad_norm": 0.3350532650947571, | |
| "learning_rate": 9.952988187535886e-05, | |
| "loss": 0.0699, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 6.865671641791045, | |
| "grad_norm": 0.29829463362693787, | |
| "learning_rate": 9.95185038118915e-05, | |
| "loss": 0.0663, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 6.902985074626866, | |
| "grad_norm": 0.31650781631469727, | |
| "learning_rate": 9.950699036672559e-05, | |
| "loss": 0.0668, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 6.940298507462686, | |
| "grad_norm": 0.360944926738739, | |
| "learning_rate": 9.949534157133844e-05, | |
| "loss": 0.0696, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 6.977611940298507, | |
| "grad_norm": 0.31337013840675354, | |
| "learning_rate": 9.948355745757741e-05, | |
| "loss": 0.073, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 7.014925373134329, | |
| "grad_norm": 0.4675919711589813, | |
| "learning_rate": 9.94716380576598e-05, | |
| "loss": 0.0688, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 7.052238805970149, | |
| "grad_norm": 0.3031919002532959, | |
| "learning_rate": 9.945958340417283e-05, | |
| "loss": 0.0596, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 7.08955223880597, | |
| "grad_norm": 0.24858474731445312, | |
| "learning_rate": 9.944739353007344e-05, | |
| "loss": 0.0717, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.126865671641791, | |
| "grad_norm": 0.20959483087062836, | |
| "learning_rate": 9.943506846868826e-05, | |
| "loss": 0.0694, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 7.164179104477612, | |
| "grad_norm": 0.35621434450149536, | |
| "learning_rate": 9.942260825371358e-05, | |
| "loss": 0.063, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.201492537313433, | |
| "grad_norm": 0.3462587594985962, | |
| "learning_rate": 9.941001291921512e-05, | |
| "loss": 0.068, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 7.2388059701492535, | |
| "grad_norm": 0.38649681210517883, | |
| "learning_rate": 9.939728249962807e-05, | |
| "loss": 0.0638, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 7.276119402985074, | |
| "grad_norm": 0.29564595222473145, | |
| "learning_rate": 9.938441702975689e-05, | |
| "loss": 0.0626, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 7.313432835820896, | |
| "grad_norm": 0.339857816696167, | |
| "learning_rate": 9.937141654477528e-05, | |
| "loss": 0.0535, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 7.350746268656716, | |
| "grad_norm": 0.2591215670108795, | |
| "learning_rate": 9.93582810802261e-05, | |
| "loss": 0.0645, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 7.388059701492537, | |
| "grad_norm": 0.30237796902656555, | |
| "learning_rate": 9.934501067202117e-05, | |
| "loss": 0.0675, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 7.425373134328359, | |
| "grad_norm": 0.28394174575805664, | |
| "learning_rate": 9.93316053564413e-05, | |
| "loss": 0.0643, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 7.462686567164179, | |
| "grad_norm": 0.3124663233757019, | |
| "learning_rate": 9.931806517013612e-05, | |
| "loss": 0.059, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 0.36073037981987, | |
| "learning_rate": 9.930439015012396e-05, | |
| "loss": 0.0606, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 7.537313432835821, | |
| "grad_norm": 0.4091481864452362, | |
| "learning_rate": 9.929058033379181e-05, | |
| "loss": 0.0603, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 7.574626865671641, | |
| "grad_norm": 0.44718074798583984, | |
| "learning_rate": 9.927663575889521e-05, | |
| "loss": 0.0741, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 7.611940298507463, | |
| "grad_norm": 0.3819601833820343, | |
| "learning_rate": 9.926255646355804e-05, | |
| "loss": 0.0707, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 7.649253731343284, | |
| "grad_norm": 0.23336420953273773, | |
| "learning_rate": 9.92483424862726e-05, | |
| "loss": 0.0676, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 7.686567164179104, | |
| "grad_norm": 0.24415315687656403, | |
| "learning_rate": 9.923399386589933e-05, | |
| "loss": 0.0594, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 7.723880597014926, | |
| "grad_norm": 0.3735473155975342, | |
| "learning_rate": 9.921951064166684e-05, | |
| "loss": 0.062, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 7.7611940298507465, | |
| "grad_norm": 0.31629472970962524, | |
| "learning_rate": 9.92048928531717e-05, | |
| "loss": 0.0606, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 7.798507462686567, | |
| "grad_norm": 0.37902557849884033, | |
| "learning_rate": 9.919014054037836e-05, | |
| "loss": 0.0584, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 7.835820895522388, | |
| "grad_norm": 0.3486720323562622, | |
| "learning_rate": 9.917525374361912e-05, | |
| "loss": 0.056, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 7.8731343283582085, | |
| "grad_norm": 0.3731362521648407, | |
| "learning_rate": 9.91602325035939e-05, | |
| "loss": 0.0601, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 7.91044776119403, | |
| "grad_norm": 0.3560399115085602, | |
| "learning_rate": 9.914507686137019e-05, | |
| "loss": 0.06, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 7.947761194029851, | |
| "grad_norm": 0.30075564980506897, | |
| "learning_rate": 9.912978685838294e-05, | |
| "loss": 0.0657, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 7.985074626865671, | |
| "grad_norm": 0.2984028458595276, | |
| "learning_rate": 9.911436253643445e-05, | |
| "loss": 0.0587, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 8.022388059701493, | |
| "grad_norm": 0.1980169117450714, | |
| "learning_rate": 9.90988039376942e-05, | |
| "loss": 0.0718, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 8.059701492537313, | |
| "grad_norm": 0.31339579820632935, | |
| "learning_rate": 9.90831111046988e-05, | |
| "loss": 0.0557, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 8.097014925373134, | |
| "grad_norm": 0.1968696266412735, | |
| "learning_rate": 9.90672840803519e-05, | |
| "loss": 0.0571, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 8.134328358208956, | |
| "grad_norm": 0.23931682109832764, | |
| "learning_rate": 9.905132290792394e-05, | |
| "loss": 0.0566, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 8.171641791044776, | |
| "grad_norm": 0.21741189062595367, | |
| "learning_rate": 9.903522763105218e-05, | |
| "loss": 0.0575, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 8.208955223880597, | |
| "grad_norm": 0.22874368727207184, | |
| "learning_rate": 9.901899829374047e-05, | |
| "loss": 0.0565, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 8.246268656716419, | |
| "grad_norm": 0.3441888093948364, | |
| "learning_rate": 9.900263494035921e-05, | |
| "loss": 0.0565, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 8.283582089552239, | |
| "grad_norm": 0.2539830803871155, | |
| "learning_rate": 9.89861376156452e-05, | |
| "loss": 0.0538, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 8.32089552238806, | |
| "grad_norm": 0.2235102653503418, | |
| "learning_rate": 9.896950636470147e-05, | |
| "loss": 0.0609, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 8.35820895522388, | |
| "grad_norm": 0.1941322684288025, | |
| "learning_rate": 9.895274123299723e-05, | |
| "loss": 0.0562, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 8.395522388059701, | |
| "grad_norm": 0.2691369950771332, | |
| "learning_rate": 9.893584226636772e-05, | |
| "loss": 0.0608, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 8.432835820895523, | |
| "grad_norm": 0.24730461835861206, | |
| "learning_rate": 9.891880951101407e-05, | |
| "loss": 0.0582, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 8.470149253731343, | |
| "grad_norm": 0.34785839915275574, | |
| "learning_rate": 9.890164301350318e-05, | |
| "loss": 0.0506, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 8.507462686567164, | |
| "grad_norm": 0.3625825345516205, | |
| "learning_rate": 9.888434282076758e-05, | |
| "loss": 0.0614, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 8.544776119402986, | |
| "grad_norm": 0.25210148096084595, | |
| "learning_rate": 9.886690898010535e-05, | |
| "loss": 0.0611, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 8.582089552238806, | |
| "grad_norm": 0.27312466502189636, | |
| "learning_rate": 9.884934153917997e-05, | |
| "loss": 0.0537, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 8.619402985074627, | |
| "grad_norm": 0.314647912979126, | |
| "learning_rate": 9.883164054602012e-05, | |
| "loss": 0.0602, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 8.656716417910447, | |
| "grad_norm": 0.21531912684440613, | |
| "learning_rate": 9.881380604901964e-05, | |
| "loss": 0.0552, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 8.694029850746269, | |
| "grad_norm": 0.23920664191246033, | |
| "learning_rate": 9.879583809693738e-05, | |
| "loss": 0.0613, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 8.73134328358209, | |
| "grad_norm": 0.21864956617355347, | |
| "learning_rate": 9.877773673889701e-05, | |
| "loss": 0.0649, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 8.76865671641791, | |
| "grad_norm": 0.27523377537727356, | |
| "learning_rate": 9.8759502024387e-05, | |
| "loss": 0.0606, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 8.805970149253731, | |
| "grad_norm": 0.24805469810962677, | |
| "learning_rate": 9.87411340032603e-05, | |
| "loss": 0.0549, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 8.843283582089553, | |
| "grad_norm": 0.23070092499256134, | |
| "learning_rate": 9.872263272573443e-05, | |
| "loss": 0.0562, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 8.880597014925373, | |
| "grad_norm": 0.20833946764469147, | |
| "learning_rate": 9.870399824239117e-05, | |
| "loss": 0.05, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 8.917910447761194, | |
| "grad_norm": 0.34507372975349426, | |
| "learning_rate": 9.868523060417646e-05, | |
| "loss": 0.0613, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 8.955223880597014, | |
| "grad_norm": 0.32865110039711, | |
| "learning_rate": 9.86663298624003e-05, | |
| "loss": 0.0621, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 8.992537313432836, | |
| "grad_norm": 0.21305270493030548, | |
| "learning_rate": 9.864729606873663e-05, | |
| "loss": 0.0572, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 9.029850746268657, | |
| "grad_norm": 0.28193730115890503, | |
| "learning_rate": 9.862812927522309e-05, | |
| "loss": 0.0555, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 9.067164179104477, | |
| "grad_norm": 0.3953789472579956, | |
| "learning_rate": 9.860882953426099e-05, | |
| "loss": 0.0536, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 9.104477611940299, | |
| "grad_norm": 0.23013322055339813, | |
| "learning_rate": 9.858939689861506e-05, | |
| "loss": 0.0572, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 9.14179104477612, | |
| "grad_norm": 0.2906680107116699, | |
| "learning_rate": 9.856983142141339e-05, | |
| "loss": 0.0592, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 9.17910447761194, | |
| "grad_norm": 0.23490828275680542, | |
| "learning_rate": 9.855013315614725e-05, | |
| "loss": 0.0583, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 9.216417910447761, | |
| "grad_norm": 0.22825880348682404, | |
| "learning_rate": 9.853030215667093e-05, | |
| "loss": 0.059, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 9.253731343283581, | |
| "grad_norm": 0.25871285796165466, | |
| "learning_rate": 9.851033847720166e-05, | |
| "loss": 0.0555, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 9.291044776119403, | |
| "grad_norm": 0.27220776677131653, | |
| "learning_rate": 9.849024217231935e-05, | |
| "loss": 0.0542, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 9.328358208955224, | |
| "grad_norm": 0.26534005999565125, | |
| "learning_rate": 9.847001329696653e-05, | |
| "loss": 0.0526, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 9.365671641791044, | |
| "grad_norm": 0.33486032485961914, | |
| "learning_rate": 9.844965190644817e-05, | |
| "loss": 0.0563, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 9.402985074626866, | |
| "grad_norm": 0.2949483394622803, | |
| "learning_rate": 9.842915805643155e-05, | |
| "loss": 0.0556, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 9.440298507462687, | |
| "grad_norm": 0.24123981595039368, | |
| "learning_rate": 9.840853180294608e-05, | |
| "loss": 0.05, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 9.477611940298507, | |
| "grad_norm": 0.22536049783229828, | |
| "learning_rate": 9.838777320238312e-05, | |
| "loss": 0.0522, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 9.514925373134329, | |
| "grad_norm": 0.23206663131713867, | |
| "learning_rate": 9.836688231149592e-05, | |
| "loss": 0.0591, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 9.552238805970148, | |
| "grad_norm": 0.28573134541511536, | |
| "learning_rate": 9.834585918739936e-05, | |
| "loss": 0.0568, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 9.58955223880597, | |
| "grad_norm": 0.2628820538520813, | |
| "learning_rate": 9.832470388756987e-05, | |
| "loss": 0.0571, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 9.626865671641792, | |
| "grad_norm": 0.2880440652370453, | |
| "learning_rate": 9.830341646984521e-05, | |
| "loss": 0.0559, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 9.664179104477611, | |
| "grad_norm": 0.1786259263753891, | |
| "learning_rate": 9.82819969924244e-05, | |
| "loss": 0.058, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 9.701492537313433, | |
| "grad_norm": 0.3501608073711395, | |
| "learning_rate": 9.826044551386744e-05, | |
| "loss": 0.0523, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 9.738805970149254, | |
| "grad_norm": 0.24757252633571625, | |
| "learning_rate": 9.823876209309527e-05, | |
| "loss": 0.0587, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 9.776119402985074, | |
| "grad_norm": 0.2556290626525879, | |
| "learning_rate": 9.821694678938953e-05, | |
| "loss": 0.0555, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 9.813432835820896, | |
| "grad_norm": 0.2561217248439789, | |
| "learning_rate": 9.819499966239243e-05, | |
| "loss": 0.052, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 9.850746268656717, | |
| "grad_norm": 0.2776634097099304, | |
| "learning_rate": 9.817292077210659e-05, | |
| "loss": 0.0498, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 9.888059701492537, | |
| "grad_norm": 0.20668549835681915, | |
| "learning_rate": 9.815071017889482e-05, | |
| "loss": 0.0517, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 9.925373134328359, | |
| "grad_norm": 0.3100263178348541, | |
| "learning_rate": 9.812836794348004e-05, | |
| "loss": 0.0633, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 9.962686567164178, | |
| "grad_norm": 0.2780782878398895, | |
| "learning_rate": 9.81058941269451e-05, | |
| "loss": 0.0581, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.28903728723526, | |
| "learning_rate": 9.808328879073251e-05, | |
| "loss": 0.0538, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 10.037313432835822, | |
| "grad_norm": 0.22727562487125397, | |
| "learning_rate": 9.806055199664446e-05, | |
| "loss": 0.0491, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 10.074626865671641, | |
| "grad_norm": 0.267918199300766, | |
| "learning_rate": 9.803768380684242e-05, | |
| "loss": 0.0562, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 10.111940298507463, | |
| "grad_norm": 0.2988606095314026, | |
| "learning_rate": 9.801468428384716e-05, | |
| "loss": 0.0566, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 10.149253731343283, | |
| "grad_norm": 0.2710281312465668, | |
| "learning_rate": 9.799155349053851e-05, | |
| "loss": 0.0541, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 10.186567164179104, | |
| "grad_norm": 0.15320520102977753, | |
| "learning_rate": 9.796829149015517e-05, | |
| "loss": 0.0548, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 10.223880597014926, | |
| "grad_norm": 0.2653089463710785, | |
| "learning_rate": 9.794489834629455e-05, | |
| "loss": 0.0599, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 10.261194029850746, | |
| "grad_norm": 0.19223959743976593, | |
| "learning_rate": 9.792137412291265e-05, | |
| "loss": 0.0494, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 10.298507462686567, | |
| "grad_norm": 0.20455987751483917, | |
| "learning_rate": 9.789771888432375e-05, | |
| "loss": 0.0538, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 10.335820895522389, | |
| "grad_norm": 0.24908749759197235, | |
| "learning_rate": 9.787393269520039e-05, | |
| "loss": 0.0481, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 10.373134328358208, | |
| "grad_norm": 0.3131813406944275, | |
| "learning_rate": 9.785001562057309e-05, | |
| "loss": 0.0526, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 10.41044776119403, | |
| "grad_norm": 0.24828971922397614, | |
| "learning_rate": 9.782596772583026e-05, | |
| "loss": 0.0489, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 10.447761194029852, | |
| "grad_norm": 0.21727119386196136, | |
| "learning_rate": 9.780178907671789e-05, | |
| "loss": 0.0532, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 10.485074626865671, | |
| "grad_norm": 0.20279547572135925, | |
| "learning_rate": 9.777747973933948e-05, | |
| "loss": 0.0565, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 10.522388059701493, | |
| "grad_norm": 0.17726702988147736, | |
| "learning_rate": 9.775303978015585e-05, | |
| "loss": 0.0437, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 10.559701492537313, | |
| "grad_norm": 0.18961119651794434, | |
| "learning_rate": 9.772846926598491e-05, | |
| "loss": 0.0584, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 10.597014925373134, | |
| "grad_norm": 0.2498980015516281, | |
| "learning_rate": 9.77037682640015e-05, | |
| "loss": 0.0496, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 10.634328358208956, | |
| "grad_norm": 0.16978798806667328, | |
| "learning_rate": 9.767893684173721e-05, | |
| "loss": 0.0469, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 10.671641791044776, | |
| "grad_norm": 0.16128584742546082, | |
| "learning_rate": 9.765397506708023e-05, | |
| "loss": 0.0533, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 10.708955223880597, | |
| "grad_norm": 0.20463155210018158, | |
| "learning_rate": 9.762888300827507e-05, | |
| "loss": 0.0464, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 10.746268656716419, | |
| "grad_norm": 0.30601629614830017, | |
| "learning_rate": 9.760366073392246e-05, | |
| "loss": 0.0489, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 10.783582089552239, | |
| "grad_norm": 0.2730671763420105, | |
| "learning_rate": 9.757830831297914e-05, | |
| "loss": 0.0495, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 10.82089552238806, | |
| "grad_norm": 0.251432865858078, | |
| "learning_rate": 9.755282581475769e-05, | |
| "loss": 0.0549, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 10.85820895522388, | |
| "grad_norm": 0.26670166850090027, | |
| "learning_rate": 9.752721330892624e-05, | |
| "loss": 0.061, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 10.895522388059701, | |
| "grad_norm": 0.2965967655181885, | |
| "learning_rate": 9.750147086550844e-05, | |
| "loss": 0.0473, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 10.932835820895523, | |
| "grad_norm": 0.683840274810791, | |
| "learning_rate": 9.747559855488313e-05, | |
| "loss": 0.0509, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 10.970149253731343, | |
| "grad_norm": 0.25740495324134827, | |
| "learning_rate": 9.744959644778422e-05, | |
| "loss": 0.0515, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 11.007462686567164, | |
| "grad_norm": 0.2880542278289795, | |
| "learning_rate": 9.742346461530048e-05, | |
| "loss": 0.0482, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 11.044776119402986, | |
| "grad_norm": 0.45032551884651184, | |
| "learning_rate": 9.739720312887535e-05, | |
| "loss": 0.0557, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 11.082089552238806, | |
| "grad_norm": 0.2829900085926056, | |
| "learning_rate": 9.73708120603067e-05, | |
| "loss": 0.052, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 11.119402985074627, | |
| "grad_norm": 0.309597373008728, | |
| "learning_rate": 9.734429148174675e-05, | |
| "loss": 0.0541, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 11.156716417910447, | |
| "grad_norm": 0.2433389127254486, | |
| "learning_rate": 9.731764146570173e-05, | |
| "loss": 0.0482, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 11.194029850746269, | |
| "grad_norm": 0.24458132684230804, | |
| "learning_rate": 9.729086208503174e-05, | |
| "loss": 0.0505, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 11.23134328358209, | |
| "grad_norm": 0.2305087298154831, | |
| "learning_rate": 9.726395341295062e-05, | |
| "loss": 0.0504, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 11.26865671641791, | |
| "grad_norm": 0.18110457062721252, | |
| "learning_rate": 9.723691552302562e-05, | |
| "loss": 0.0575, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 11.305970149253731, | |
| "grad_norm": 0.20407621562480927, | |
| "learning_rate": 9.720974848917735e-05, | |
| "loss": 0.0494, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 11.343283582089553, | |
| "grad_norm": 0.25924697518348694, | |
| "learning_rate": 9.718245238567939e-05, | |
| "loss": 0.0472, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 11.380597014925373, | |
| "grad_norm": 0.23041822016239166, | |
| "learning_rate": 9.715502728715826e-05, | |
| "loss": 0.0481, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 11.417910447761194, | |
| "grad_norm": 0.25381171703338623, | |
| "learning_rate": 9.712747326859315e-05, | |
| "loss": 0.0543, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 11.455223880597014, | |
| "grad_norm": 0.18027640879154205, | |
| "learning_rate": 9.709979040531569e-05, | |
| "loss": 0.055, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 11.492537313432836, | |
| "grad_norm": 0.2954868674278259, | |
| "learning_rate": 9.707197877300974e-05, | |
| "loss": 0.0473, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 11.529850746268657, | |
| "grad_norm": 0.25323861837387085, | |
| "learning_rate": 9.704403844771128e-05, | |
| "loss": 0.0509, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 11.567164179104477, | |
| "grad_norm": 0.36910176277160645, | |
| "learning_rate": 9.701596950580806e-05, | |
| "loss": 0.0504, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 11.604477611940299, | |
| "grad_norm": 0.34199246764183044, | |
| "learning_rate": 9.698777202403953e-05, | |
| "loss": 0.0526, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 11.64179104477612, | |
| "grad_norm": 0.2146557718515396, | |
| "learning_rate": 9.695944607949649e-05, | |
| "loss": 0.0579, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 11.67910447761194, | |
| "grad_norm": 0.20559175312519073, | |
| "learning_rate": 9.693099174962103e-05, | |
| "loss": 0.0514, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 11.716417910447761, | |
| "grad_norm": 0.2689419090747833, | |
| "learning_rate": 9.690240911220618e-05, | |
| "loss": 0.0534, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 11.753731343283581, | |
| "grad_norm": 0.34870603680610657, | |
| "learning_rate": 9.687369824539577e-05, | |
| "loss": 0.0485, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 11.791044776119403, | |
| "grad_norm": 0.15433363616466522, | |
| "learning_rate": 9.684485922768422e-05, | |
| "loss": 0.0418, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 11.828358208955224, | |
| "grad_norm": 0.26874423027038574, | |
| "learning_rate": 9.681589213791633e-05, | |
| "loss": 0.0537, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 11.865671641791044, | |
| "grad_norm": 0.3361654281616211, | |
| "learning_rate": 9.6786797055287e-05, | |
| "loss": 0.0474, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 11.902985074626866, | |
| "grad_norm": 0.17938771843910217, | |
| "learning_rate": 9.675757405934103e-05, | |
| "loss": 0.0443, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 11.940298507462687, | |
| "grad_norm": 0.31368622183799744, | |
| "learning_rate": 9.672822322997305e-05, | |
| "loss": 0.0594, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 11.977611940298507, | |
| "grad_norm": 0.16268151998519897, | |
| "learning_rate": 9.669874464742705e-05, | |
| "loss": 0.0487, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 12.014925373134329, | |
| "grad_norm": 0.23879969120025635, | |
| "learning_rate": 9.66691383922964e-05, | |
| "loss": 0.0484, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 12.052238805970148, | |
| "grad_norm": 0.2321789413690567, | |
| "learning_rate": 9.663940454552342e-05, | |
| "loss": 0.051, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 12.08955223880597, | |
| "grad_norm": 0.22873088717460632, | |
| "learning_rate": 9.660954318839933e-05, | |
| "loss": 0.0406, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 12.126865671641792, | |
| "grad_norm": 0.3767557740211487, | |
| "learning_rate": 9.657955440256395e-05, | |
| "loss": 0.0432, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 12.164179104477611, | |
| "grad_norm": 0.21569453179836273, | |
| "learning_rate": 9.654943827000548e-05, | |
| "loss": 0.0528, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 12.201492537313433, | |
| "grad_norm": 0.23698291182518005, | |
| "learning_rate": 9.651919487306025e-05, | |
| "loss": 0.0457, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 12.238805970149254, | |
| "grad_norm": 0.21086478233337402, | |
| "learning_rate": 9.648882429441257e-05, | |
| "loss": 0.0508, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 12.276119402985074, | |
| "grad_norm": 0.19763463735580444, | |
| "learning_rate": 9.645832661709444e-05, | |
| "loss": 0.0497, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 12.313432835820896, | |
| "grad_norm": 0.18413852155208588, | |
| "learning_rate": 9.642770192448536e-05, | |
| "loss": 0.0441, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 12.350746268656717, | |
| "grad_norm": 0.13946911692619324, | |
| "learning_rate": 9.639695030031204e-05, | |
| "loss": 0.0453, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 12.388059701492537, | |
| "grad_norm": 0.21613670885562897, | |
| "learning_rate": 9.636607182864827e-05, | |
| "loss": 0.0511, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 12.425373134328359, | |
| "grad_norm": 0.24953646957874298, | |
| "learning_rate": 9.63350665939146e-05, | |
| "loss": 0.0451, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 12.462686567164178, | |
| "grad_norm": 0.2993795871734619, | |
| "learning_rate": 9.630393468087818e-05, | |
| "loss": 0.0469, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 0.2261819839477539, | |
| "learning_rate": 9.627267617465243e-05, | |
| "loss": 0.0484, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 12.537313432835822, | |
| "grad_norm": 0.23026186227798462, | |
| "learning_rate": 9.624129116069694e-05, | |
| "loss": 0.0452, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 12.574626865671641, | |
| "grad_norm": 0.27859947085380554, | |
| "learning_rate": 9.620977972481716e-05, | |
| "loss": 0.0593, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 12.611940298507463, | |
| "grad_norm": 0.23060785233974457, | |
| "learning_rate": 9.617814195316411e-05, | |
| "loss": 0.05, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 12.649253731343283, | |
| "grad_norm": 0.20185025036334991, | |
| "learning_rate": 9.614637793223425e-05, | |
| "loss": 0.0573, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 12.686567164179104, | |
| "grad_norm": 0.3584498167037964, | |
| "learning_rate": 9.611448774886924e-05, | |
| "loss": 0.052, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 12.723880597014926, | |
| "grad_norm": 0.19336827099323273, | |
| "learning_rate": 9.60824714902556e-05, | |
| "loss": 0.0535, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 12.761194029850746, | |
| "grad_norm": 0.22223635017871857, | |
| "learning_rate": 9.605032924392457e-05, | |
| "loss": 0.05, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 12.798507462686567, | |
| "grad_norm": 0.17108851671218872, | |
| "learning_rate": 9.601806109775179e-05, | |
| "loss": 0.0475, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 12.835820895522389, | |
| "grad_norm": 0.3861902952194214, | |
| "learning_rate": 9.598566713995718e-05, | |
| "loss": 0.0439, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 12.873134328358208, | |
| "grad_norm": 0.18927253782749176, | |
| "learning_rate": 9.595314745910456e-05, | |
| "loss": 0.052, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 12.91044776119403, | |
| "grad_norm": 0.21963383257389069, | |
| "learning_rate": 9.59205021441015e-05, | |
| "loss": 0.0504, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 12.947761194029852, | |
| "grad_norm": 0.18016670644283295, | |
| "learning_rate": 9.588773128419906e-05, | |
| "loss": 0.0467, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 12.985074626865671, | |
| "grad_norm": 0.1776365041732788, | |
| "learning_rate": 9.58548349689915e-05, | |
| "loss": 0.0414, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 13.022388059701493, | |
| "grad_norm": 0.2616482973098755, | |
| "learning_rate": 9.582181328841611e-05, | |
| "loss": 0.0442, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 13.059701492537313, | |
| "grad_norm": 0.20341171324253082, | |
| "learning_rate": 9.578866633275288e-05, | |
| "loss": 0.0533, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 13.097014925373134, | |
| "grad_norm": 0.2223699688911438, | |
| "learning_rate": 9.575539419262434e-05, | |
| "loss": 0.0458, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 13.134328358208956, | |
| "grad_norm": 0.22557464241981506, | |
| "learning_rate": 9.572199695899522e-05, | |
| "loss": 0.0445, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 13.171641791044776, | |
| "grad_norm": 0.25104308128356934, | |
| "learning_rate": 9.568847472317232e-05, | |
| "loss": 0.0435, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 13.208955223880597, | |
| "grad_norm": 0.18720711767673492, | |
| "learning_rate": 9.565482757680415e-05, | |
| "loss": 0.0453, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 13.246268656716419, | |
| "grad_norm": 0.16838951408863068, | |
| "learning_rate": 9.562105561188069e-05, | |
| "loss": 0.0505, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 13.283582089552239, | |
| "grad_norm": 0.31681734323501587, | |
| "learning_rate": 9.558715892073323e-05, | |
| "loss": 0.0494, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 13.32089552238806, | |
| "grad_norm": 0.2390700727701187, | |
| "learning_rate": 9.555313759603402e-05, | |
| "loss": 0.0538, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 13.35820895522388, | |
| "grad_norm": 0.20680709183216095, | |
| "learning_rate": 9.551899173079607e-05, | |
| "loss": 0.0519, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 13.395522388059701, | |
| "grad_norm": 0.2758580148220062, | |
| "learning_rate": 9.548472141837286e-05, | |
| "loss": 0.0512, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 13.432835820895523, | |
| "grad_norm": 0.3653097450733185, | |
| "learning_rate": 9.545032675245813e-05, | |
| "loss": 0.0496, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 13.470149253731343, | |
| "grad_norm": 0.23886866867542267, | |
| "learning_rate": 9.541580782708557e-05, | |
| "loss": 0.0455, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 13.507462686567164, | |
| "grad_norm": 0.3280908465385437, | |
| "learning_rate": 9.538116473662861e-05, | |
| "loss": 0.0489, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 13.544776119402986, | |
| "grad_norm": 0.20268180966377258, | |
| "learning_rate": 9.534639757580013e-05, | |
| "loss": 0.0484, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 13.582089552238806, | |
| "grad_norm": 0.2582015097141266, | |
| "learning_rate": 9.531150643965223e-05, | |
| "loss": 0.0487, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 13.619402985074627, | |
| "grad_norm": 0.18157973885536194, | |
| "learning_rate": 9.527649142357596e-05, | |
| "loss": 0.0496, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 13.656716417910447, | |
| "grad_norm": 0.22841542959213257, | |
| "learning_rate": 9.524135262330098e-05, | |
| "loss": 0.0467, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 13.694029850746269, | |
| "grad_norm": 0.2519935369491577, | |
| "learning_rate": 9.520609013489547e-05, | |
| "loss": 0.0487, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 13.73134328358209, | |
| "grad_norm": 0.24680495262145996, | |
| "learning_rate": 9.517070405476575e-05, | |
| "loss": 0.0457, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 13.76865671641791, | |
| "grad_norm": 0.26362067461013794, | |
| "learning_rate": 9.513519447965595e-05, | |
| "loss": 0.0495, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 13.805970149253731, | |
| "grad_norm": 0.3240712583065033, | |
| "learning_rate": 9.509956150664796e-05, | |
| "loss": 0.0496, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 13.843283582089553, | |
| "grad_norm": 0.21009013056755066, | |
| "learning_rate": 9.50638052331609e-05, | |
| "loss": 0.0457, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 13.880597014925373, | |
| "grad_norm": 0.1669154316186905, | |
| "learning_rate": 9.502792575695112e-05, | |
| "loss": 0.0496, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 13.917910447761194, | |
| "grad_norm": 0.22347605228424072, | |
| "learning_rate": 9.499192317611167e-05, | |
| "loss": 0.0426, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 13.955223880597014, | |
| "grad_norm": 0.15208907425403595, | |
| "learning_rate": 9.49557975890723e-05, | |
| "loss": 0.0447, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 13.992537313432836, | |
| "grad_norm": 0.3206101059913635, | |
| "learning_rate": 9.491954909459895e-05, | |
| "loss": 0.0471, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 14.029850746268657, | |
| "grad_norm": 0.15873713791370392, | |
| "learning_rate": 9.488317779179361e-05, | |
| "loss": 0.0401, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 14.067164179104477, | |
| "grad_norm": 0.19690357148647308, | |
| "learning_rate": 9.484668378009408e-05, | |
| "loss": 0.0491, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 14.104477611940299, | |
| "grad_norm": 0.3211113214492798, | |
| "learning_rate": 9.481006715927351e-05, | |
| "loss": 0.049, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 14.14179104477612, | |
| "grad_norm": 0.27657604217529297, | |
| "learning_rate": 9.477332802944044e-05, | |
| "loss": 0.0396, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 14.17910447761194, | |
| "grad_norm": 0.20194031298160553, | |
| "learning_rate": 9.473646649103818e-05, | |
| "loss": 0.0442, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 14.216417910447761, | |
| "grad_norm": 0.20344595611095428, | |
| "learning_rate": 9.46994826448448e-05, | |
| "loss": 0.0427, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 14.253731343283581, | |
| "grad_norm": 0.2067718505859375, | |
| "learning_rate": 9.46623765919727e-05, | |
| "loss": 0.0501, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 14.291044776119403, | |
| "grad_norm": 0.29719170928001404, | |
| "learning_rate": 9.462514843386845e-05, | |
| "loss": 0.0519, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 14.328358208955224, | |
| "grad_norm": 0.2347182184457779, | |
| "learning_rate": 9.458779827231237e-05, | |
| "loss": 0.0413, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 14.365671641791044, | |
| "grad_norm": 0.1558852344751358, | |
| "learning_rate": 9.45503262094184e-05, | |
| "loss": 0.0442, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 14.402985074626866, | |
| "grad_norm": 0.23085005581378937, | |
| "learning_rate": 9.451273234763371e-05, | |
| "loss": 0.047, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 14.440298507462687, | |
| "grad_norm": 0.1515151560306549, | |
| "learning_rate": 9.447501678973852e-05, | |
| "loss": 0.0481, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 14.477611940298507, | |
| "grad_norm": 0.1916729211807251, | |
| "learning_rate": 9.443717963884569e-05, | |
| "loss": 0.0474, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 14.514925373134329, | |
| "grad_norm": 0.2536492943763733, | |
| "learning_rate": 9.439922099840054e-05, | |
| "loss": 0.0382, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 14.552238805970148, | |
| "grad_norm": 0.1672086864709854, | |
| "learning_rate": 9.43611409721806e-05, | |
| "loss": 0.0497, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 14.58955223880597, | |
| "grad_norm": 0.3644237518310547, | |
| "learning_rate": 9.432293966429514e-05, | |
| "loss": 0.0444, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 14.626865671641792, | |
| "grad_norm": 0.20307251811027527, | |
| "learning_rate": 9.428461717918511e-05, | |
| "loss": 0.0452, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 14.664179104477611, | |
| "grad_norm": 0.20441733300685883, | |
| "learning_rate": 9.424617362162271e-05, | |
| "loss": 0.0454, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 14.701492537313433, | |
| "grad_norm": 0.26315611600875854, | |
| "learning_rate": 9.420760909671118e-05, | |
| "loss": 0.0486, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 14.738805970149254, | |
| "grad_norm": 0.1983092874288559, | |
| "learning_rate": 9.416892370988444e-05, | |
| "loss": 0.0483, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 14.776119402985074, | |
| "grad_norm": 0.18301443755626678, | |
| "learning_rate": 9.413011756690685e-05, | |
| "loss": 0.0456, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 14.813432835820896, | |
| "grad_norm": 0.2433597594499588, | |
| "learning_rate": 9.409119077387294e-05, | |
| "loss": 0.0463, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 14.850746268656717, | |
| "grad_norm": 0.27949392795562744, | |
| "learning_rate": 9.405214343720707e-05, | |
| "loss": 0.0412, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 14.888059701492537, | |
| "grad_norm": 0.22806599736213684, | |
| "learning_rate": 9.401297566366318e-05, | |
| "loss": 0.0448, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 14.925373134328359, | |
| "grad_norm": 0.25421562790870667, | |
| "learning_rate": 9.397368756032445e-05, | |
| "loss": 0.0426, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 14.962686567164178, | |
| "grad_norm": 0.2436474859714508, | |
| "learning_rate": 9.393427923460308e-05, | |
| "loss": 0.0474, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.3756405711174011, | |
| "learning_rate": 9.389475079423988e-05, | |
| "loss": 0.0438, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 15.037313432835822, | |
| "grad_norm": 0.25687697529792786, | |
| "learning_rate": 9.385510234730415e-05, | |
| "loss": 0.0435, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 15.074626865671641, | |
| "grad_norm": 0.17263716459274292, | |
| "learning_rate": 9.381533400219318e-05, | |
| "loss": 0.0455, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 15.111940298507463, | |
| "grad_norm": 0.2471216470003128, | |
| "learning_rate": 9.377544586763215e-05, | |
| "loss": 0.0429, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 15.149253731343283, | |
| "grad_norm": 0.20195460319519043, | |
| "learning_rate": 9.373543805267368e-05, | |
| "loss": 0.0432, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 15.186567164179104, | |
| "grad_norm": 0.1709851622581482, | |
| "learning_rate": 9.369531066669758e-05, | |
| "loss": 0.0477, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 15.223880597014926, | |
| "grad_norm": 0.23063932359218597, | |
| "learning_rate": 9.365506381941066e-05, | |
| "loss": 0.0379, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 15.261194029850746, | |
| "grad_norm": 0.3265426754951477, | |
| "learning_rate": 9.36146976208462e-05, | |
| "loss": 0.0435, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 15.298507462686567, | |
| "grad_norm": 0.26373934745788574, | |
| "learning_rate": 9.357421218136386e-05, | |
| "loss": 0.047, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 15.335820895522389, | |
| "grad_norm": 0.16861388087272644, | |
| "learning_rate": 9.353360761164931e-05, | |
| "loss": 0.0448, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 15.373134328358208, | |
| "grad_norm": 0.303790807723999, | |
| "learning_rate": 9.349288402271388e-05, | |
| "loss": 0.0396, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 15.41044776119403, | |
| "grad_norm": 0.1940719038248062, | |
| "learning_rate": 9.345204152589428e-05, | |
| "loss": 0.0474, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 15.447761194029852, | |
| "grad_norm": 0.34091615676879883, | |
| "learning_rate": 9.341108023285238e-05, | |
| "loss": 0.0424, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 15.485074626865671, | |
| "grad_norm": 0.27036693692207336, | |
| "learning_rate": 9.337000025557476e-05, | |
| "loss": 0.0482, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 15.522388059701493, | |
| "grad_norm": 0.16908007860183716, | |
| "learning_rate": 9.332880170637252e-05, | |
| "loss": 0.0381, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 15.559701492537313, | |
| "grad_norm": 0.23332923650741577, | |
| "learning_rate": 9.328748469788093e-05, | |
| "loss": 0.0427, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 15.597014925373134, | |
| "grad_norm": 0.16899706423282623, | |
| "learning_rate": 9.32460493430591e-05, | |
| "loss": 0.0439, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 15.634328358208956, | |
| "grad_norm": 0.12869524955749512, | |
| "learning_rate": 9.320449575518972e-05, | |
| "loss": 0.0481, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 15.671641791044776, | |
| "grad_norm": 0.21159130334854126, | |
| "learning_rate": 9.316282404787871e-05, | |
| "loss": 0.0446, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 15.708955223880597, | |
| "grad_norm": 0.1849961131811142, | |
| "learning_rate": 9.31210343350549e-05, | |
| "loss": 0.041, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 15.746268656716419, | |
| "grad_norm": 0.16107840836048126, | |
| "learning_rate": 9.30791267309698e-05, | |
| "loss": 0.0429, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 15.783582089552239, | |
| "grad_norm": 0.14206446707248688, | |
| "learning_rate": 9.30371013501972e-05, | |
| "loss": 0.0409, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 15.82089552238806, | |
| "grad_norm": 0.2168441116809845, | |
| "learning_rate": 9.299495830763286e-05, | |
| "loss": 0.0413, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 15.85820895522388, | |
| "grad_norm": 0.21431951224803925, | |
| "learning_rate": 9.295269771849427e-05, | |
| "loss": 0.0472, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 15.895522388059701, | |
| "grad_norm": 0.16851255297660828, | |
| "learning_rate": 9.291031969832026e-05, | |
| "loss": 0.0508, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 15.932835820895523, | |
| "grad_norm": 0.18404732644557953, | |
| "learning_rate": 9.286782436297073e-05, | |
| "loss": 0.0402, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 15.970149253731343, | |
| "grad_norm": 0.21722930669784546, | |
| "learning_rate": 9.282521182862629e-05, | |
| "loss": 0.0397, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 16.007462686567163, | |
| "grad_norm": 0.2523709833621979, | |
| "learning_rate": 9.278248221178798e-05, | |
| "loss": 0.0427, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 16.044776119402986, | |
| "grad_norm": 0.17736563086509705, | |
| "learning_rate": 9.273963562927695e-05, | |
| "loss": 0.0458, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 16.082089552238806, | |
| "grad_norm": 0.20613858103752136, | |
| "learning_rate": 9.269667219823412e-05, | |
| "loss": 0.0387, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 16.119402985074625, | |
| "grad_norm": 0.16557513177394867, | |
| "learning_rate": 9.265359203611987e-05, | |
| "loss": 0.0411, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 16.15671641791045, | |
| "grad_norm": 0.28119519352912903, | |
| "learning_rate": 9.261039526071374e-05, | |
| "loss": 0.0468, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 16.19402985074627, | |
| "grad_norm": 0.21538576483726501, | |
| "learning_rate": 9.256708199011401e-05, | |
| "loss": 0.0368, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 16.23134328358209, | |
| "grad_norm": 0.19657357037067413, | |
| "learning_rate": 9.252365234273755e-05, | |
| "loss": 0.038, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 16.26865671641791, | |
| "grad_norm": 0.19258421659469604, | |
| "learning_rate": 9.248010643731935e-05, | |
| "loss": 0.0414, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 16.30597014925373, | |
| "grad_norm": 0.28801625967025757, | |
| "learning_rate": 9.243644439291223e-05, | |
| "loss": 0.0387, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 16.34328358208955, | |
| "grad_norm": 0.16581468284130096, | |
| "learning_rate": 9.239266632888659e-05, | |
| "loss": 0.0383, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 16.380597014925375, | |
| "grad_norm": 0.34664949774742126, | |
| "learning_rate": 9.234877236492997e-05, | |
| "loss": 0.0453, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 16.417910447761194, | |
| "grad_norm": 0.1439947783946991, | |
| "learning_rate": 9.230476262104677e-05, | |
| "loss": 0.0466, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 16.455223880597014, | |
| "grad_norm": 0.15509940683841705, | |
| "learning_rate": 9.226063721755799e-05, | |
| "loss": 0.0488, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 16.492537313432837, | |
| "grad_norm": 0.18005985021591187, | |
| "learning_rate": 9.221639627510076e-05, | |
| "loss": 0.0407, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 16.529850746268657, | |
| "grad_norm": 0.16012470424175262, | |
| "learning_rate": 9.217203991462815e-05, | |
| "loss": 0.0394, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 16.567164179104477, | |
| "grad_norm": 0.2978847920894623, | |
| "learning_rate": 9.212756825740873e-05, | |
| "loss": 0.0451, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 16.604477611940297, | |
| "grad_norm": 0.2236834019422531, | |
| "learning_rate": 9.208298142502636e-05, | |
| "loss": 0.0487, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 16.64179104477612, | |
| "grad_norm": 0.2686060667037964, | |
| "learning_rate": 9.20382795393797e-05, | |
| "loss": 0.0403, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 16.67910447761194, | |
| "grad_norm": 0.33534038066864014, | |
| "learning_rate": 9.199346272268199e-05, | |
| "loss": 0.0385, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 16.71641791044776, | |
| "grad_norm": 0.19250528514385223, | |
| "learning_rate": 9.194853109746074e-05, | |
| "loss": 0.0441, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 16.753731343283583, | |
| "grad_norm": 0.19218407571315765, | |
| "learning_rate": 9.190348478655724e-05, | |
| "loss": 0.0474, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 16.791044776119403, | |
| "grad_norm": 0.21163488924503326, | |
| "learning_rate": 9.185832391312644e-05, | |
| "loss": 0.0411, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 16.828358208955223, | |
| "grad_norm": 0.1758819818496704, | |
| "learning_rate": 9.18130486006364e-05, | |
| "loss": 0.0462, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 16.865671641791046, | |
| "grad_norm": 0.18571069836616516, | |
| "learning_rate": 9.176765897286813e-05, | |
| "loss": 0.0425, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 16.902985074626866, | |
| "grad_norm": 0.20819155871868134, | |
| "learning_rate": 9.17221551539151e-05, | |
| "loss": 0.0428, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 16.940298507462686, | |
| "grad_norm": 0.30357328057289124, | |
| "learning_rate": 9.167653726818305e-05, | |
| "loss": 0.0414, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 16.97761194029851, | |
| "grad_norm": 0.20977462828159332, | |
| "learning_rate": 9.163080544038952e-05, | |
| "loss": 0.0447, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 17.01492537313433, | |
| "grad_norm": 0.2535971701145172, | |
| "learning_rate": 9.158495979556358e-05, | |
| "loss": 0.0384, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 17.05223880597015, | |
| "grad_norm": 0.2789897620677948, | |
| "learning_rate": 9.153900045904549e-05, | |
| "loss": 0.042, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 17.08955223880597, | |
| "grad_norm": 0.18474848568439484, | |
| "learning_rate": 9.14929275564863e-05, | |
| "loss": 0.0398, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 17.12686567164179, | |
| "grad_norm": 0.12615208327770233, | |
| "learning_rate": 9.144674121384757e-05, | |
| "loss": 0.0466, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 17.16417910447761, | |
| "grad_norm": 0.17756640911102295, | |
| "learning_rate": 9.140044155740101e-05, | |
| "loss": 0.035, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 17.20149253731343, | |
| "grad_norm": 0.24410821497440338, | |
| "learning_rate": 9.135402871372808e-05, | |
| "loss": 0.0459, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 17.238805970149254, | |
| "grad_norm": 0.21573011577129364, | |
| "learning_rate": 9.130750280971978e-05, | |
| "loss": 0.0385, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 17.276119402985074, | |
| "grad_norm": 0.13879653811454773, | |
| "learning_rate": 9.126086397257612e-05, | |
| "loss": 0.0391, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 17.313432835820894, | |
| "grad_norm": 0.17508305609226227, | |
| "learning_rate": 9.121411232980588e-05, | |
| "loss": 0.038, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 17.350746268656717, | |
| "grad_norm": 0.2536008358001709, | |
| "learning_rate": 9.116724800922629e-05, | |
| "loss": 0.0418, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 17.388059701492537, | |
| "grad_norm": 0.1942976713180542, | |
| "learning_rate": 9.112027113896262e-05, | |
| "loss": 0.052, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 17.425373134328357, | |
| "grad_norm": 0.16561119258403778, | |
| "learning_rate": 9.107318184744781e-05, | |
| "loss": 0.0451, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 17.46268656716418, | |
| "grad_norm": 0.22971832752227783, | |
| "learning_rate": 9.102598026342222e-05, | |
| "loss": 0.0407, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 0.1306753158569336, | |
| "learning_rate": 9.097866651593317e-05, | |
| "loss": 0.042, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 17.53731343283582, | |
| "grad_norm": 0.21278400719165802, | |
| "learning_rate": 9.093124073433463e-05, | |
| "loss": 0.0458, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 17.574626865671643, | |
| "grad_norm": 0.22757171094417572, | |
| "learning_rate": 9.088370304828685e-05, | |
| "loss": 0.0364, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 17.611940298507463, | |
| "grad_norm": 0.216596320271492, | |
| "learning_rate": 9.083605358775612e-05, | |
| "loss": 0.0434, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 17.649253731343283, | |
| "grad_norm": 0.13022471964359283, | |
| "learning_rate": 9.078829248301417e-05, | |
| "loss": 0.0415, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 17.686567164179106, | |
| "grad_norm": 0.2280716598033905, | |
| "learning_rate": 9.074041986463808e-05, | |
| "loss": 0.0385, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 17.723880597014926, | |
| "grad_norm": 0.14666135609149933, | |
| "learning_rate": 9.069243586350975e-05, | |
| "loss": 0.0347, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 17.761194029850746, | |
| "grad_norm": 0.1631281077861786, | |
| "learning_rate": 9.064434061081562e-05, | |
| "loss": 0.0407, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 17.798507462686565, | |
| "grad_norm": 0.18697327375411987, | |
| "learning_rate": 9.059613423804623e-05, | |
| "loss": 0.0425, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 17.83582089552239, | |
| "grad_norm": 0.12955111265182495, | |
| "learning_rate": 9.0547816876996e-05, | |
| "loss": 0.0417, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 17.87313432835821, | |
| "grad_norm": 0.15547148883342743, | |
| "learning_rate": 9.049938865976275e-05, | |
| "loss": 0.0409, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 17.91044776119403, | |
| "grad_norm": 0.1900598704814911, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.0369, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 17.94776119402985, | |
| "grad_norm": 0.1846715807914734, | |
| "learning_rate": 9.040220018665347e-05, | |
| "loss": 0.0415, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 17.98507462686567, | |
| "grad_norm": 0.1829937845468521, | |
| "learning_rate": 9.035344019648702e-05, | |
| "loss": 0.0407, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 18.02238805970149, | |
| "grad_norm": 0.25900354981422424, | |
| "learning_rate": 9.030456988155596e-05, | |
| "loss": 0.0398, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 18.059701492537314, | |
| "grad_norm": 0.21235992014408112, | |
| "learning_rate": 9.025558937546988e-05, | |
| "loss": 0.0477, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 18.097014925373134, | |
| "grad_norm": 0.18785078823566437, | |
| "learning_rate": 9.020649881213958e-05, | |
| "loss": 0.039, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 18.134328358208954, | |
| "grad_norm": 0.1951548010110855, | |
| "learning_rate": 9.015729832577681e-05, | |
| "loss": 0.0357, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 18.171641791044777, | |
| "grad_norm": 0.1280934363603592, | |
| "learning_rate": 9.010798805089384e-05, | |
| "loss": 0.0425, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 18.208955223880597, | |
| "grad_norm": 0.1693423092365265, | |
| "learning_rate": 9.005856812230304e-05, | |
| "loss": 0.0447, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 18.246268656716417, | |
| "grad_norm": 0.23712658882141113, | |
| "learning_rate": 9.000903867511666e-05, | |
| "loss": 0.042, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 18.28358208955224, | |
| "grad_norm": 0.26489710807800293, | |
| "learning_rate": 8.995939984474624e-05, | |
| "loss": 0.0457, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 18.32089552238806, | |
| "grad_norm": 0.20792756974697113, | |
| "learning_rate": 8.990965176690252e-05, | |
| "loss": 0.0422, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 18.35820895522388, | |
| "grad_norm": 0.18526089191436768, | |
| "learning_rate": 8.98597945775948e-05, | |
| "loss": 0.0366, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 18.395522388059703, | |
| "grad_norm": 0.2214607298374176, | |
| "learning_rate": 8.980982841313074e-05, | |
| "loss": 0.0405, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 18.432835820895523, | |
| "grad_norm": 0.1896953135728836, | |
| "learning_rate": 8.975975341011596e-05, | |
| "loss": 0.0391, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 18.470149253731343, | |
| "grad_norm": 0.1430232971906662, | |
| "learning_rate": 8.970956970545355e-05, | |
| "loss": 0.0403, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 18.507462686567163, | |
| "grad_norm": 0.1991272121667862, | |
| "learning_rate": 8.965927743634391e-05, | |
| "loss": 0.0429, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 18.544776119402986, | |
| "grad_norm": 0.2361849844455719, | |
| "learning_rate": 8.96088767402841e-05, | |
| "loss": 0.0416, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 18.582089552238806, | |
| "grad_norm": 0.25857019424438477, | |
| "learning_rate": 8.955836775506776e-05, | |
| "loss": 0.0461, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 18.619402985074625, | |
| "grad_norm": 0.12873682379722595, | |
| "learning_rate": 8.950775061878453e-05, | |
| "loss": 0.035, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 18.65671641791045, | |
| "grad_norm": 0.19786769151687622, | |
| "learning_rate": 8.945702546981969e-05, | |
| "loss": 0.0399, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 18.69402985074627, | |
| "grad_norm": 0.2562239170074463, | |
| "learning_rate": 8.940619244685388e-05, | |
| "loss": 0.0372, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 18.73134328358209, | |
| "grad_norm": 0.14586858451366425, | |
| "learning_rate": 8.935525168886262e-05, | |
| "loss": 0.0427, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 18.76865671641791, | |
| "grad_norm": 0.20062318444252014, | |
| "learning_rate": 8.930420333511606e-05, | |
| "loss": 0.0403, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 18.80597014925373, | |
| "grad_norm": 0.22698874771595, | |
| "learning_rate": 8.92530475251784e-05, | |
| "loss": 0.036, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 18.84328358208955, | |
| "grad_norm": 0.2103697657585144, | |
| "learning_rate": 8.920178439890765e-05, | |
| "loss": 0.0431, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 18.880597014925375, | |
| "grad_norm": 0.16042308509349823, | |
| "learning_rate": 8.91504140964553e-05, | |
| "loss": 0.0388, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 18.917910447761194, | |
| "grad_norm": 0.16874109208583832, | |
| "learning_rate": 8.909893675826574e-05, | |
| "loss": 0.0388, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 18.955223880597014, | |
| "grad_norm": 0.15569192171096802, | |
| "learning_rate": 8.90473525250761e-05, | |
| "loss": 0.0353, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 18.992537313432837, | |
| "grad_norm": 0.16723507642745972, | |
| "learning_rate": 8.899566153791566e-05, | |
| "loss": 0.0443, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 19.029850746268657, | |
| "grad_norm": 0.23284228146076202, | |
| "learning_rate": 8.894386393810563e-05, | |
| "loss": 0.05, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 19.067164179104477, | |
| "grad_norm": 0.1621718853712082, | |
| "learning_rate": 8.889195986725865e-05, | |
| "loss": 0.0369, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 19.104477611940297, | |
| "grad_norm": 0.17522747814655304, | |
| "learning_rate": 8.883994946727849e-05, | |
| "loss": 0.0475, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 19.14179104477612, | |
| "grad_norm": 0.16110533475875854, | |
| "learning_rate": 8.878783288035957e-05, | |
| "loss": 0.0383, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 19.17910447761194, | |
| "grad_norm": 0.2574177086353302, | |
| "learning_rate": 8.873561024898668e-05, | |
| "loss": 0.0383, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 19.21641791044776, | |
| "grad_norm": 0.14560100436210632, | |
| "learning_rate": 8.868328171593448e-05, | |
| "loss": 0.037, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 19.253731343283583, | |
| "grad_norm": 0.14456631243228912, | |
| "learning_rate": 8.863084742426719e-05, | |
| "loss": 0.0423, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 19.291044776119403, | |
| "grad_norm": 0.1403595507144928, | |
| "learning_rate": 8.857830751733815e-05, | |
| "loss": 0.0327, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 19.328358208955223, | |
| "grad_norm": 0.18462564051151276, | |
| "learning_rate": 8.852566213878947e-05, | |
| "loss": 0.037, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 19.365671641791046, | |
| "grad_norm": 0.20725117623806, | |
| "learning_rate": 8.84729114325516e-05, | |
| "loss": 0.0376, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 19.402985074626866, | |
| "grad_norm": 0.17023132741451263, | |
| "learning_rate": 8.842005554284296e-05, | |
| "loss": 0.0467, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 19.440298507462686, | |
| "grad_norm": 0.31033241748809814, | |
| "learning_rate": 8.836709461416952e-05, | |
| "loss": 0.0425, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 19.47761194029851, | |
| "grad_norm": 0.14057482779026031, | |
| "learning_rate": 8.831402879132446e-05, | |
| "loss": 0.0432, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 19.51492537313433, | |
| "grad_norm": 0.23247437179088593, | |
| "learning_rate": 8.82608582193877e-05, | |
| "loss": 0.0396, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 19.55223880597015, | |
| "grad_norm": 0.1305907964706421, | |
| "learning_rate": 8.820758304372557e-05, | |
| "loss": 0.0389, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 19.58955223880597, | |
| "grad_norm": 0.17093417048454285, | |
| "learning_rate": 8.815420340999033e-05, | |
| "loss": 0.0347, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 19.62686567164179, | |
| "grad_norm": 0.24105240404605865, | |
| "learning_rate": 8.810071946411989e-05, | |
| "loss": 0.0392, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 19.66417910447761, | |
| "grad_norm": 0.2234315127134323, | |
| "learning_rate": 8.804713135233731e-05, | |
| "loss": 0.0403, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 19.701492537313435, | |
| "grad_norm": 0.16947844624519348, | |
| "learning_rate": 8.799343922115044e-05, | |
| "loss": 0.0368, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 19.738805970149254, | |
| "grad_norm": 0.26133742928504944, | |
| "learning_rate": 8.79396432173515e-05, | |
| "loss": 0.041, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 19.776119402985074, | |
| "grad_norm": 0.2099352777004242, | |
| "learning_rate": 8.788574348801675e-05, | |
| "loss": 0.0363, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 19.813432835820894, | |
| "grad_norm": 0.1662513017654419, | |
| "learning_rate": 8.783174018050594e-05, | |
| "loss": 0.0409, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 19.850746268656717, | |
| "grad_norm": 0.18933714926242828, | |
| "learning_rate": 8.77776334424621e-05, | |
| "loss": 0.0348, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 19.888059701492537, | |
| "grad_norm": 0.21673552691936493, | |
| "learning_rate": 8.772342342181095e-05, | |
| "loss": 0.037, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 19.925373134328357, | |
| "grad_norm": 0.13009892404079437, | |
| "learning_rate": 8.766911026676064e-05, | |
| "loss": 0.0386, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 19.96268656716418, | |
| "grad_norm": 0.1655230075120926, | |
| "learning_rate": 8.761469412580125e-05, | |
| "loss": 0.0404, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.2821272611618042, | |
| "learning_rate": 8.756017514770443e-05, | |
| "loss": 0.0441, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 20.03731343283582, | |
| "grad_norm": 0.1302652508020401, | |
| "learning_rate": 8.750555348152298e-05, | |
| "loss": 0.0389, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 20.074626865671643, | |
| "grad_norm": 0.13331563770771027, | |
| "learning_rate": 8.745082927659047e-05, | |
| "loss": 0.0393, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 20.111940298507463, | |
| "grad_norm": 0.244130939245224, | |
| "learning_rate": 8.739600268252078e-05, | |
| "loss": 0.0372, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 20.149253731343283, | |
| "grad_norm": 0.20429308712482452, | |
| "learning_rate": 8.73410738492077e-05, | |
| "loss": 0.0387, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 20.186567164179106, | |
| "grad_norm": 0.2954719364643097, | |
| "learning_rate": 8.728604292682459e-05, | |
| "loss": 0.0404, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 20.223880597014926, | |
| "grad_norm": 0.20438429713249207, | |
| "learning_rate": 8.723091006582389e-05, | |
| "loss": 0.0359, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 20.261194029850746, | |
| "grad_norm": 0.17289331555366516, | |
| "learning_rate": 8.717567541693673e-05, | |
| "loss": 0.0357, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 20.298507462686565, | |
| "grad_norm": 0.24367138743400574, | |
| "learning_rate": 8.71203391311725e-05, | |
| "loss": 0.0392, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 20.33582089552239, | |
| "grad_norm": 0.21900270879268646, | |
| "learning_rate": 8.706490135981855e-05, | |
| "loss": 0.0419, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 20.37313432835821, | |
| "grad_norm": 0.1526443362236023, | |
| "learning_rate": 8.700936225443959e-05, | |
| "loss": 0.0333, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 20.41044776119403, | |
| "grad_norm": 0.24582353234291077, | |
| "learning_rate": 8.695372196687743e-05, | |
| "loss": 0.0417, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 20.44776119402985, | |
| "grad_norm": 0.21462485194206238, | |
| "learning_rate": 8.689798064925049e-05, | |
| "loss": 0.0347, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 20.48507462686567, | |
| "grad_norm": 0.17611616849899292, | |
| "learning_rate": 8.684213845395339e-05, | |
| "loss": 0.0395, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 20.52238805970149, | |
| "grad_norm": 0.19724012911319733, | |
| "learning_rate": 8.678619553365659e-05, | |
| "loss": 0.0332, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 20.559701492537314, | |
| "grad_norm": 0.2080456167459488, | |
| "learning_rate": 8.673015204130586e-05, | |
| "loss": 0.0361, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 20.597014925373134, | |
| "grad_norm": 0.21469220519065857, | |
| "learning_rate": 8.6674008130122e-05, | |
| "loss": 0.039, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 20.634328358208954, | |
| "grad_norm": 0.242497980594635, | |
| "learning_rate": 8.661776395360029e-05, | |
| "loss": 0.0397, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 20.671641791044777, | |
| "grad_norm": 0.20539864897727966, | |
| "learning_rate": 8.656141966551019e-05, | |
| "loss": 0.0392, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 20.708955223880597, | |
| "grad_norm": 0.21964021027088165, | |
| "learning_rate": 8.650497541989482e-05, | |
| "loss": 0.035, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 20.746268656716417, | |
| "grad_norm": 0.15793637931346893, | |
| "learning_rate": 8.644843137107059e-05, | |
| "loss": 0.0363, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 20.78358208955224, | |
| "grad_norm": 0.1731041818857193, | |
| "learning_rate": 8.639178767362676e-05, | |
| "loss": 0.0371, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 20.82089552238806, | |
| "grad_norm": 0.15019342303276062, | |
| "learning_rate": 8.633504448242505e-05, | |
| "loss": 0.0335, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 20.85820895522388, | |
| "grad_norm": 0.1397496908903122, | |
| "learning_rate": 8.627820195259918e-05, | |
| "loss": 0.0391, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 20.895522388059703, | |
| "grad_norm": 0.141131192445755, | |
| "learning_rate": 8.622126023955446e-05, | |
| "loss": 0.041, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 20.932835820895523, | |
| "grad_norm": 0.20025403797626495, | |
| "learning_rate": 8.616421949896734e-05, | |
| "loss": 0.0412, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 20.970149253731343, | |
| "grad_norm": 0.2251378893852234, | |
| "learning_rate": 8.610707988678503e-05, | |
| "loss": 0.037, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 21.007462686567163, | |
| "grad_norm": 0.1341109722852707, | |
| "learning_rate": 8.604984155922506e-05, | |
| "loss": 0.0371, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 21.044776119402986, | |
| "grad_norm": 0.28053462505340576, | |
| "learning_rate": 8.599250467277483e-05, | |
| "loss": 0.0366, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 21.082089552238806, | |
| "grad_norm": 0.10567930340766907, | |
| "learning_rate": 8.59350693841912e-05, | |
| "loss": 0.0394, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 21.119402985074625, | |
| "grad_norm": 0.17919886112213135, | |
| "learning_rate": 8.587753585050004e-05, | |
| "loss": 0.0357, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 21.15671641791045, | |
| "grad_norm": 0.3223204016685486, | |
| "learning_rate": 8.581990422899585e-05, | |
| "loss": 0.0369, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 21.19402985074627, | |
| "grad_norm": 0.20072297751903534, | |
| "learning_rate": 8.576217467724128e-05, | |
| "loss": 0.0389, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 21.23134328358209, | |
| "grad_norm": 0.1556226760149002, | |
| "learning_rate": 8.570434735306671e-05, | |
| "loss": 0.035, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 21.26865671641791, | |
| "grad_norm": 0.20265886187553406, | |
| "learning_rate": 8.564642241456986e-05, | |
| "loss": 0.0418, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 21.30597014925373, | |
| "grad_norm": 0.15518955886363983, | |
| "learning_rate": 8.558840002011528e-05, | |
| "loss": 0.0331, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 21.34328358208955, | |
| "grad_norm": 0.1822584569454193, | |
| "learning_rate": 8.553028032833397e-05, | |
| "loss": 0.0421, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 21.380597014925375, | |
| "grad_norm": 0.14216330647468567, | |
| "learning_rate": 8.547206349812298e-05, | |
| "loss": 0.0413, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 21.417910447761194, | |
| "grad_norm": 0.24156329035758972, | |
| "learning_rate": 8.541374968864487e-05, | |
| "loss": 0.0404, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 21.455223880597014, | |
| "grad_norm": 0.2753167748451233, | |
| "learning_rate": 8.535533905932738e-05, | |
| "loss": 0.0369, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 21.492537313432837, | |
| "grad_norm": 0.17052626609802246, | |
| "learning_rate": 8.529683176986295e-05, | |
| "loss": 0.0328, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 21.529850746268657, | |
| "grad_norm": 0.11597824096679688, | |
| "learning_rate": 8.523822798020827e-05, | |
| "loss": 0.041, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 21.567164179104477, | |
| "grad_norm": 0.14363346993923187, | |
| "learning_rate": 8.517952785058385e-05, | |
| "loss": 0.0393, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 21.604477611940297, | |
| "grad_norm": 0.19373776018619537, | |
| "learning_rate": 8.512073154147362e-05, | |
| "loss": 0.0372, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 21.64179104477612, | |
| "grad_norm": 0.20276981592178345, | |
| "learning_rate": 8.506183921362443e-05, | |
| "loss": 0.0389, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 21.67910447761194, | |
| "grad_norm": 0.19267870485782623, | |
| "learning_rate": 8.500285102804568e-05, | |
| "loss": 0.0371, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 21.71641791044776, | |
| "grad_norm": 0.2701839208602905, | |
| "learning_rate": 8.494376714600878e-05, | |
| "loss": 0.0333, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 21.753731343283583, | |
| "grad_norm": 0.20612668991088867, | |
| "learning_rate": 8.488458772904684e-05, | |
| "loss": 0.0358, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 21.791044776119403, | |
| "grad_norm": 0.18102902173995972, | |
| "learning_rate": 8.482531293895412e-05, | |
| "loss": 0.0376, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 21.828358208955223, | |
| "grad_norm": 0.23202018439769745, | |
| "learning_rate": 8.476594293778561e-05, | |
| "loss": 0.0418, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 21.865671641791046, | |
| "grad_norm": 0.09540139883756638, | |
| "learning_rate": 8.470647788785665e-05, | |
| "loss": 0.041, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 21.902985074626866, | |
| "grad_norm": 0.23362809419631958, | |
| "learning_rate": 8.46469179517424e-05, | |
| "loss": 0.0402, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 21.940298507462686, | |
| "grad_norm": 0.20929335057735443, | |
| "learning_rate": 8.458726329227747e-05, | |
| "loss": 0.0385, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 21.97761194029851, | |
| "grad_norm": 0.18403425812721252, | |
| "learning_rate": 8.452751407255541e-05, | |
| "loss": 0.0399, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 22.01492537313433, | |
| "grad_norm": 0.2034774273633957, | |
| "learning_rate": 8.44676704559283e-05, | |
| "loss": 0.0361, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 22.05223880597015, | |
| "grad_norm": 0.14981597661972046, | |
| "learning_rate": 8.44077326060063e-05, | |
| "loss": 0.0393, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 22.08955223880597, | |
| "grad_norm": 0.20903146266937256, | |
| "learning_rate": 8.434770068665723e-05, | |
| "loss": 0.0406, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 22.12686567164179, | |
| "grad_norm": 0.12090307474136353, | |
| "learning_rate": 8.428757486200603e-05, | |
| "loss": 0.0349, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 22.16417910447761, | |
| "grad_norm": 0.14085660874843597, | |
| "learning_rate": 8.422735529643444e-05, | |
| "loss": 0.0344, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 22.20149253731343, | |
| "grad_norm": 0.30808404088020325, | |
| "learning_rate": 8.416704215458043e-05, | |
| "loss": 0.0298, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 22.238805970149254, | |
| "grad_norm": 0.17409317195415497, | |
| "learning_rate": 8.410663560133784e-05, | |
| "loss": 0.035, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 22.276119402985074, | |
| "grad_norm": 0.18731828033924103, | |
| "learning_rate": 8.404613580185585e-05, | |
| "loss": 0.0322, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 22.313432835820894, | |
| "grad_norm": 0.16483667492866516, | |
| "learning_rate": 8.398554292153866e-05, | |
| "loss": 0.033, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 22.350746268656717, | |
| "grad_norm": 0.195018008351326, | |
| "learning_rate": 8.392485712604483e-05, | |
| "loss": 0.0344, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 22.388059701492537, | |
| "grad_norm": 0.18210549652576447, | |
| "learning_rate": 8.386407858128706e-05, | |
| "loss": 0.0387, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 22.425373134328357, | |
| "grad_norm": 0.18658341467380524, | |
| "learning_rate": 8.380320745343153e-05, | |
| "loss": 0.0359, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 22.46268656716418, | |
| "grad_norm": 0.260953426361084, | |
| "learning_rate": 8.37422439088976e-05, | |
| "loss": 0.0291, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "grad_norm": 0.2177930772304535, | |
| "learning_rate": 8.368118811435726e-05, | |
| "loss": 0.0384, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 22.53731343283582, | |
| "grad_norm": 0.1596938520669937, | |
| "learning_rate": 8.362004023673474e-05, | |
| "loss": 0.0372, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 22.574626865671643, | |
| "grad_norm": 0.21605637669563293, | |
| "learning_rate": 8.355880044320598e-05, | |
| "loss": 0.0304, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 22.611940298507463, | |
| "grad_norm": 0.13812203705310822, | |
| "learning_rate": 8.349746890119826e-05, | |
| "loss": 0.0295, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 22.649253731343283, | |
| "grad_norm": 0.22850565612316132, | |
| "learning_rate": 8.343604577838964e-05, | |
| "loss": 0.0385, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 22.686567164179106, | |
| "grad_norm": 0.22924698889255524, | |
| "learning_rate": 8.337453124270863e-05, | |
| "loss": 0.0438, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 22.723880597014926, | |
| "grad_norm": 0.1455918848514557, | |
| "learning_rate": 8.331292546233362e-05, | |
| "loss": 0.0358, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 22.761194029850746, | |
| "grad_norm": 0.1839921921491623, | |
| "learning_rate": 8.32512286056924e-05, | |
| "loss": 0.0349, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 22.798507462686565, | |
| "grad_norm": 0.24356882274150848, | |
| "learning_rate": 8.318944084146192e-05, | |
| "loss": 0.0335, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 22.83582089552239, | |
| "grad_norm": 0.2336840182542801, | |
| "learning_rate": 8.31275623385675e-05, | |
| "loss": 0.0339, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 22.87313432835821, | |
| "grad_norm": 0.17839699983596802, | |
| "learning_rate": 8.306559326618259e-05, | |
| "loss": 0.0365, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 22.91044776119403, | |
| "grad_norm": 0.18088172376155853, | |
| "learning_rate": 8.300353379372834e-05, | |
| "loss": 0.0331, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 22.94776119402985, | |
| "grad_norm": 0.1771453320980072, | |
| "learning_rate": 8.29413840908729e-05, | |
| "loss": 0.0321, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 22.98507462686567, | |
| "grad_norm": 0.1374535709619522, | |
| "learning_rate": 8.287914432753123e-05, | |
| "loss": 0.0328, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 23.02238805970149, | |
| "grad_norm": 0.17898012697696686, | |
| "learning_rate": 8.281681467386446e-05, | |
| "loss": 0.0408, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 23.059701492537314, | |
| "grad_norm": 0.21729676425457, | |
| "learning_rate": 8.275439530027948e-05, | |
| "loss": 0.0354, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 23.097014925373134, | |
| "grad_norm": 0.2473490685224533, | |
| "learning_rate": 8.269188637742846e-05, | |
| "loss": 0.0361, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 23.134328358208954, | |
| "grad_norm": 0.15661069750785828, | |
| "learning_rate": 8.262928807620843e-05, | |
| "loss": 0.036, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 23.171641791044777, | |
| "grad_norm": 0.12378236651420593, | |
| "learning_rate": 8.256660056776076e-05, | |
| "loss": 0.0308, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 23.208955223880597, | |
| "grad_norm": 0.1373433768749237, | |
| "learning_rate": 8.250382402347065e-05, | |
| "loss": 0.0344, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 23.246268656716417, | |
| "grad_norm": 0.14814983308315277, | |
| "learning_rate": 8.244095861496686e-05, | |
| "loss": 0.0368, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 23.28358208955224, | |
| "grad_norm": 0.15903662145137787, | |
| "learning_rate": 8.237800451412095e-05, | |
| "loss": 0.033, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 23.32089552238806, | |
| "grad_norm": 0.1676921397447586, | |
| "learning_rate": 8.231496189304704e-05, | |
| "loss": 0.0361, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 23.35820895522388, | |
| "grad_norm": 0.2496129870414734, | |
| "learning_rate": 8.225183092410128e-05, | |
| "loss": 0.037, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 23.395522388059703, | |
| "grad_norm": 0.1830875128507614, | |
| "learning_rate": 8.218861177988129e-05, | |
| "loss": 0.0377, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 23.432835820895523, | |
| "grad_norm": 0.18538393080234528, | |
| "learning_rate": 8.212530463322583e-05, | |
| "loss": 0.0343, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 23.470149253731343, | |
| "grad_norm": 0.23813718557357788, | |
| "learning_rate": 8.206190965721419e-05, | |
| "loss": 0.0336, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 23.507462686567163, | |
| "grad_norm": 0.14053800702095032, | |
| "learning_rate": 8.199842702516583e-05, | |
| "loss": 0.0334, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 23.544776119402986, | |
| "grad_norm": 0.19115787744522095, | |
| "learning_rate": 8.193485691063985e-05, | |
| "loss": 0.0338, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 23.582089552238806, | |
| "grad_norm": 0.1176459789276123, | |
| "learning_rate": 8.18711994874345e-05, | |
| "loss": 0.0324, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 23.619402985074625, | |
| "grad_norm": 0.13881400227546692, | |
| "learning_rate": 8.180745492958674e-05, | |
| "loss": 0.0375, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 23.65671641791045, | |
| "grad_norm": 0.12102743983268738, | |
| "learning_rate": 8.174362341137177e-05, | |
| "loss": 0.0338, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 23.69402985074627, | |
| "grad_norm": 0.16610436141490936, | |
| "learning_rate": 8.167970510730253e-05, | |
| "loss": 0.0296, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 23.73134328358209, | |
| "grad_norm": 0.12234822660684586, | |
| "learning_rate": 8.161570019212921e-05, | |
| "loss": 0.029, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 23.76865671641791, | |
| "grad_norm": 0.17056342959403992, | |
| "learning_rate": 8.155160884083881e-05, | |
| "loss": 0.0381, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 23.80597014925373, | |
| "grad_norm": 0.1477614790201187, | |
| "learning_rate": 8.148743122865463e-05, | |
| "loss": 0.0315, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 23.84328358208955, | |
| "grad_norm": 0.38320279121398926, | |
| "learning_rate": 8.14231675310358e-05, | |
| "loss": 0.0366, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 23.880597014925375, | |
| "grad_norm": 0.1497313380241394, | |
| "learning_rate": 8.135881792367686e-05, | |
| "loss": 0.0325, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 23.917910447761194, | |
| "grad_norm": 0.1574944257736206, | |
| "learning_rate": 8.129438258250712e-05, | |
| "loss": 0.0372, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 23.955223880597014, | |
| "grad_norm": 0.17678116261959076, | |
| "learning_rate": 8.12298616836904e-05, | |
| "loss": 0.034, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 23.992537313432837, | |
| "grad_norm": 0.13617518544197083, | |
| "learning_rate": 8.116525540362434e-05, | |
| "loss": 0.032, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 24.029850746268657, | |
| "grad_norm": 0.1610628217458725, | |
| "learning_rate": 8.110056391894005e-05, | |
| "loss": 0.0295, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 24.067164179104477, | |
| "grad_norm": 0.24379907548427582, | |
| "learning_rate": 8.103578740650156e-05, | |
| "loss": 0.0318, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 24.104477611940297, | |
| "grad_norm": 0.15908868610858917, | |
| "learning_rate": 8.097092604340542e-05, | |
| "loss": 0.0285, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 24.14179104477612, | |
| "grad_norm": 0.17211472988128662, | |
| "learning_rate": 8.090598000698009e-05, | |
| "loss": 0.0345, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 24.17910447761194, | |
| "grad_norm": 0.10870133340358734, | |
| "learning_rate": 8.084094947478556e-05, | |
| "loss": 0.0349, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 24.21641791044776, | |
| "grad_norm": 0.1614072173833847, | |
| "learning_rate": 8.077583462461283e-05, | |
| "loss": 0.0305, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 24.253731343283583, | |
| "grad_norm": 0.1449541449546814, | |
| "learning_rate": 8.07106356344834e-05, | |
| "loss": 0.0326, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 24.291044776119403, | |
| "grad_norm": 0.15968690812587738, | |
| "learning_rate": 8.064535268264883e-05, | |
| "loss": 0.0379, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 24.328358208955223, | |
| "grad_norm": 0.2027505785226822, | |
| "learning_rate": 8.057998594759022e-05, | |
| "loss": 0.0368, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 24.365671641791046, | |
| "grad_norm": 0.18664468824863434, | |
| "learning_rate": 8.051453560801772e-05, | |
| "loss": 0.041, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 24.402985074626866, | |
| "grad_norm": 0.2137981504201889, | |
| "learning_rate": 8.044900184287007e-05, | |
| "loss": 0.036, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 24.440298507462686, | |
| "grad_norm": 0.1381145715713501, | |
| "learning_rate": 8.038338483131407e-05, | |
| "loss": 0.0342, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 24.47761194029851, | |
| "grad_norm": 0.2125469446182251, | |
| "learning_rate": 8.031768475274413e-05, | |
| "loss": 0.0363, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 24.51492537313433, | |
| "grad_norm": 0.1482478678226471, | |
| "learning_rate": 8.025190178678175e-05, | |
| "loss": 0.0359, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 24.55223880597015, | |
| "grad_norm": 0.17988649010658264, | |
| "learning_rate": 8.018603611327504e-05, | |
| "loss": 0.0388, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 24.58955223880597, | |
| "grad_norm": 0.1568310409784317, | |
| "learning_rate": 8.012008791229826e-05, | |
| "loss": 0.0357, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 24.62686567164179, | |
| "grad_norm": 0.17348839342594147, | |
| "learning_rate": 8.005405736415126e-05, | |
| "loss": 0.0348, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 24.66417910447761, | |
| "grad_norm": 0.18807284533977509, | |
| "learning_rate": 7.998794464935904e-05, | |
| "loss": 0.0371, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 24.701492537313435, | |
| "grad_norm": 0.12133855372667313, | |
| "learning_rate": 7.992174994867123e-05, | |
| "loss": 0.0376, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 24.738805970149254, | |
| "grad_norm": 0.2808085083961487, | |
| "learning_rate": 7.985547344306161e-05, | |
| "loss": 0.0346, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 24.776119402985074, | |
| "grad_norm": 0.13642264902591705, | |
| "learning_rate": 7.978911531372765e-05, | |
| "loss": 0.0365, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 24.813432835820894, | |
| "grad_norm": 0.19014127552509308, | |
| "learning_rate": 7.972267574208991e-05, | |
| "loss": 0.0344, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 24.850746268656717, | |
| "grad_norm": 0.16038668155670166, | |
| "learning_rate": 7.965615490979163e-05, | |
| "loss": 0.0339, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 24.888059701492537, | |
| "grad_norm": 0.17937994003295898, | |
| "learning_rate": 7.958955299869825e-05, | |
| "loss": 0.0294, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 24.925373134328357, | |
| "grad_norm": 0.19632326066493988, | |
| "learning_rate": 7.952287019089685e-05, | |
| "loss": 0.0365, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 24.96268656716418, | |
| "grad_norm": 0.14519083499908447, | |
| "learning_rate": 7.945610666869568e-05, | |
| "loss": 0.0307, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.17961327731609344, | |
| "learning_rate": 7.938926261462366e-05, | |
| "loss": 0.0348, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 25.03731343283582, | |
| "grad_norm": 0.1272597312927246, | |
| "learning_rate": 7.932233821142987e-05, | |
| "loss": 0.0296, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 25.074626865671643, | |
| "grad_norm": 0.21824714541435242, | |
| "learning_rate": 7.925533364208309e-05, | |
| "loss": 0.0333, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 25.111940298507463, | |
| "grad_norm": 0.12162213027477264, | |
| "learning_rate": 7.918824908977123e-05, | |
| "loss": 0.0314, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 25.149253731343283, | |
| "grad_norm": 0.17663027346134186, | |
| "learning_rate": 7.912108473790092e-05, | |
| "loss": 0.0395, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 25.186567164179106, | |
| "grad_norm": 0.1496419459581375, | |
| "learning_rate": 7.905384077009693e-05, | |
| "loss": 0.0377, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 25.223880597014926, | |
| "grad_norm": 0.15282033383846283, | |
| "learning_rate": 7.898651737020166e-05, | |
| "loss": 0.0308, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 25.261194029850746, | |
| "grad_norm": 0.1586643010377884, | |
| "learning_rate": 7.891911472227478e-05, | |
| "loss": 0.031, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 25.298507462686565, | |
| "grad_norm": 0.15809810161590576, | |
| "learning_rate": 7.88516330105925e-05, | |
| "loss": 0.0304, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 25.33582089552239, | |
| "grad_norm": 0.19378156960010529, | |
| "learning_rate": 7.878407241964729e-05, | |
| "loss": 0.0336, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 25.37313432835821, | |
| "grad_norm": 0.17140574753284454, | |
| "learning_rate": 7.871643313414718e-05, | |
| "loss": 0.0301, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 25.41044776119403, | |
| "grad_norm": 0.1791999191045761, | |
| "learning_rate": 7.864871533901544e-05, | |
| "loss": 0.0381, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 25.44776119402985, | |
| "grad_norm": 0.13285186886787415, | |
| "learning_rate": 7.858091921938988e-05, | |
| "loss": 0.0301, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 25.48507462686567, | |
| "grad_norm": 0.1444288194179535, | |
| "learning_rate": 7.851304496062254e-05, | |
| "loss": 0.0257, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 25.52238805970149, | |
| "grad_norm": 0.17137834429740906, | |
| "learning_rate": 7.844509274827907e-05, | |
| "loss": 0.0272, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 25.559701492537314, | |
| "grad_norm": 0.17752587795257568, | |
| "learning_rate": 7.837706276813819e-05, | |
| "loss": 0.0343, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 25.597014925373134, | |
| "grad_norm": 0.1934349089860916, | |
| "learning_rate": 7.830895520619128e-05, | |
| "loss": 0.0289, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 25.634328358208954, | |
| "grad_norm": 0.21587027609348297, | |
| "learning_rate": 7.824077024864179e-05, | |
| "loss": 0.0349, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 25.671641791044777, | |
| "grad_norm": 0.15302105247974396, | |
| "learning_rate": 7.817250808190483e-05, | |
| "loss": 0.0346, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 25.708955223880597, | |
| "grad_norm": 0.15441982448101044, | |
| "learning_rate": 7.810416889260653e-05, | |
| "loss": 0.0403, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 25.746268656716417, | |
| "grad_norm": 0.11743316054344177, | |
| "learning_rate": 7.803575286758364e-05, | |
| "loss": 0.0329, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 25.78358208955224, | |
| "grad_norm": 0.1417740434408188, | |
| "learning_rate": 7.796726019388295e-05, | |
| "loss": 0.0346, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 25.82089552238806, | |
| "grad_norm": 0.14196589589118958, | |
| "learning_rate": 7.789869105876083e-05, | |
| "loss": 0.0333, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 25.85820895522388, | |
| "grad_norm": 0.17111234366893768, | |
| "learning_rate": 7.783004564968263e-05, | |
| "loss": 0.0403, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 25.895522388059703, | |
| "grad_norm": 0.159880131483078, | |
| "learning_rate": 7.776132415432234e-05, | |
| "loss": 0.0281, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 25.932835820895523, | |
| "grad_norm": 0.1706574410200119, | |
| "learning_rate": 7.769252676056187e-05, | |
| "loss": 0.0327, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 25.970149253731343, | |
| "grad_norm": 0.20553110539913177, | |
| "learning_rate": 7.762365365649067e-05, | |
| "loss": 0.0345, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 26.007462686567163, | |
| "grad_norm": 0.17439968883991241, | |
| "learning_rate": 7.755470503040516e-05, | |
| "loss": 0.0338, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 26.044776119402986, | |
| "grad_norm": 0.1736845076084137, | |
| "learning_rate": 7.748568107080832e-05, | |
| "loss": 0.0309, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 26.082089552238806, | |
| "grad_norm": 0.15822389721870422, | |
| "learning_rate": 7.741658196640892e-05, | |
| "loss": 0.0338, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 26.119402985074625, | |
| "grad_norm": 0.24268855154514313, | |
| "learning_rate": 7.734740790612136e-05, | |
| "loss": 0.0356, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 26.15671641791045, | |
| "grad_norm": 0.18587811291217804, | |
| "learning_rate": 7.727815907906481e-05, | |
| "loss": 0.0302, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 26.19402985074627, | |
| "grad_norm": 0.18292690813541412, | |
| "learning_rate": 7.720883567456298e-05, | |
| "loss": 0.0337, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 26.23134328358209, | |
| "grad_norm": 0.1578633338212967, | |
| "learning_rate": 7.713943788214337e-05, | |
| "loss": 0.0303, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 26.26865671641791, | |
| "grad_norm": 0.21999752521514893, | |
| "learning_rate": 7.70699658915369e-05, | |
| "loss": 0.0289, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 26.30597014925373, | |
| "grad_norm": 0.2454708218574524, | |
| "learning_rate": 7.700041989267736e-05, | |
| "loss": 0.0403, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 26.34328358208955, | |
| "grad_norm": 0.1282198578119278, | |
| "learning_rate": 7.693080007570084e-05, | |
| "loss": 0.0378, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 26.380597014925375, | |
| "grad_norm": 0.14491668343544006, | |
| "learning_rate": 7.686110663094525e-05, | |
| "loss": 0.0364, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 26.417910447761194, | |
| "grad_norm": 0.2097531259059906, | |
| "learning_rate": 7.679133974894983e-05, | |
| "loss": 0.0306, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 26.455223880597014, | |
| "grad_norm": 0.15113137662410736, | |
| "learning_rate": 7.672149962045457e-05, | |
| "loss": 0.0352, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 26.492537313432837, | |
| "grad_norm": 0.1912260502576828, | |
| "learning_rate": 7.66515864363997e-05, | |
| "loss": 0.0279, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 26.529850746268657, | |
| "grad_norm": 0.17578865587711334, | |
| "learning_rate": 7.658160038792518e-05, | |
| "loss": 0.0343, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 26.567164179104477, | |
| "grad_norm": 0.13778840005397797, | |
| "learning_rate": 7.651154166637025e-05, | |
| "loss": 0.0286, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 26.604477611940297, | |
| "grad_norm": 0.13315582275390625, | |
| "learning_rate": 7.644141046327271e-05, | |
| "loss": 0.0316, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 26.64179104477612, | |
| "grad_norm": 0.2831936180591583, | |
| "learning_rate": 7.637120697036866e-05, | |
| "loss": 0.0341, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 26.67910447761194, | |
| "grad_norm": 0.1701228767633438, | |
| "learning_rate": 7.630093137959171e-05, | |
| "loss": 0.039, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 26.71641791044776, | |
| "grad_norm": 0.1315871775150299, | |
| "learning_rate": 7.623058388307269e-05, | |
| "loss": 0.0323, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 26.753731343283583, | |
| "grad_norm": 0.12028495967388153, | |
| "learning_rate": 7.616016467313891e-05, | |
| "loss": 0.0343, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 26.791044776119403, | |
| "grad_norm": 0.14795522391796112, | |
| "learning_rate": 7.608967394231387e-05, | |
| "loss": 0.0359, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 26.828358208955223, | |
| "grad_norm": 0.08910278230905533, | |
| "learning_rate": 7.60191118833165e-05, | |
| "loss": 0.0381, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 26.865671641791046, | |
| "grad_norm": 0.12936276197433472, | |
| "learning_rate": 7.594847868906076e-05, | |
| "loss": 0.0344, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 26.902985074626866, | |
| "grad_norm": 0.1755450963973999, | |
| "learning_rate": 7.587777455265515e-05, | |
| "loss": 0.0298, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 26.940298507462686, | |
| "grad_norm": 0.15424427390098572, | |
| "learning_rate": 7.580699966740201e-05, | |
| "loss": 0.03, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 26.97761194029851, | |
| "grad_norm": 0.18190084397792816, | |
| "learning_rate": 7.573615422679726e-05, | |
| "loss": 0.036, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 27.01492537313433, | |
| "grad_norm": 0.12391633540391922, | |
| "learning_rate": 7.566523842452958e-05, | |
| "loss": 0.0302, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 27.05223880597015, | |
| "grad_norm": 0.09776072204113007, | |
| "learning_rate": 7.559425245448006e-05, | |
| "loss": 0.0301, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 27.08955223880597, | |
| "grad_norm": 0.13018116354942322, | |
| "learning_rate": 7.552319651072164e-05, | |
| "loss": 0.0263, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 27.12686567164179, | |
| "grad_norm": 0.11622302234172821, | |
| "learning_rate": 7.545207078751857e-05, | |
| "loss": 0.0307, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 27.16417910447761, | |
| "grad_norm": 0.14957569539546967, | |
| "learning_rate": 7.538087547932585e-05, | |
| "loss": 0.0298, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 27.20149253731343, | |
| "grad_norm": 0.18458008766174316, | |
| "learning_rate": 7.530961078078873e-05, | |
| "loss": 0.0297, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 27.238805970149254, | |
| "grad_norm": 0.16861510276794434, | |
| "learning_rate": 7.52382768867422e-05, | |
| "loss": 0.0353, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 27.276119402985074, | |
| "grad_norm": 0.16894373297691345, | |
| "learning_rate": 7.516687399221037e-05, | |
| "loss": 0.0342, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 27.313432835820894, | |
| "grad_norm": 0.16104437410831451, | |
| "learning_rate": 7.509540229240601e-05, | |
| "loss": 0.035, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 27.350746268656717, | |
| "grad_norm": 0.24792194366455078, | |
| "learning_rate": 7.50238619827301e-05, | |
| "loss": 0.033, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 27.388059701492537, | |
| "grad_norm": 0.15868139266967773, | |
| "learning_rate": 7.495225325877103e-05, | |
| "loss": 0.0372, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 27.425373134328357, | |
| "grad_norm": 0.1612396538257599, | |
| "learning_rate": 7.488057631630437e-05, | |
| "loss": 0.0324, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 27.46268656716418, | |
| "grad_norm": 0.1115829274058342, | |
| "learning_rate": 7.480883135129211e-05, | |
| "loss": 0.0283, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "grad_norm": 0.14605456590652466, | |
| "learning_rate": 7.473701855988227e-05, | |
| "loss": 0.0299, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 27.53731343283582, | |
| "grad_norm": 0.18136604130268097, | |
| "learning_rate": 7.466513813840825e-05, | |
| "loss": 0.0298, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 27.574626865671643, | |
| "grad_norm": 0.16361157596111298, | |
| "learning_rate": 7.45931902833884e-05, | |
| "loss": 0.0338, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 27.611940298507463, | |
| "grad_norm": 0.18856105208396912, | |
| "learning_rate": 7.452117519152542e-05, | |
| "loss": 0.0384, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 27.649253731343283, | |
| "grad_norm": 0.15344281494617462, | |
| "learning_rate": 7.444909305970578e-05, | |
| "loss": 0.0295, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 27.686567164179106, | |
| "grad_norm": 0.17113055288791656, | |
| "learning_rate": 7.437694408499933e-05, | |
| "loss": 0.0311, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 27.723880597014926, | |
| "grad_norm": 0.1809830665588379, | |
| "learning_rate": 7.430472846465856e-05, | |
| "loss": 0.0343, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 27.761194029850746, | |
| "grad_norm": 0.1445932686328888, | |
| "learning_rate": 7.423244639611826e-05, | |
| "loss": 0.0338, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 27.798507462686565, | |
| "grad_norm": 0.1392519325017929, | |
| "learning_rate": 7.416009807699482e-05, | |
| "loss": 0.0329, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 27.83582089552239, | |
| "grad_norm": 0.1715395152568817, | |
| "learning_rate": 7.408768370508576e-05, | |
| "loss": 0.0308, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 27.87313432835821, | |
| "grad_norm": 0.25407588481903076, | |
| "learning_rate": 7.401520347836926e-05, | |
| "loss": 0.0367, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 27.91044776119403, | |
| "grad_norm": 0.19018910825252533, | |
| "learning_rate": 7.394265759500348e-05, | |
| "loss": 0.0296, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 27.94776119402985, | |
| "grad_norm": 0.19958099722862244, | |
| "learning_rate": 7.387004625332608e-05, | |
| "loss": 0.0363, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 27.98507462686567, | |
| "grad_norm": 0.16039235889911652, | |
| "learning_rate": 7.379736965185368e-05, | |
| "loss": 0.0323, | |
| "step": 7500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 75, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.638017556203379e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |