| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.52238805970149, | |
| "eval_steps": 500, | |
| "global_step": 5500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03731343283582089, | |
| "grad_norm": 0.8186072111129761, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.3847, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 0.5007426142692566, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.4283, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11194029850746269, | |
| "grad_norm": 0.49460887908935547, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4868, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 0.5032920837402344, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.4491, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 0.5688469409942627, | |
| "learning_rate": 5e-06, | |
| "loss": 1.3703, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 0.5052517652511597, | |
| "learning_rate": 6e-06, | |
| "loss": 1.419, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26119402985074625, | |
| "grad_norm": 0.6315643787384033, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 1.3058, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 0.6060447692871094, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.2908, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3358208955223881, | |
| "grad_norm": 0.5513179302215576, | |
| "learning_rate": 9e-06, | |
| "loss": 1.2311, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 0.8467404246330261, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2043, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.41044776119402987, | |
| "grad_norm": 0.8141824007034302, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 1.0707, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 0.7932347059249878, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.9377, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.48507462686567165, | |
| "grad_norm": 0.684220552444458, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.714, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 0.5886895060539246, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.6479, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 0.4764939248561859, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.5463, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.4621008038520813, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.4641, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6343283582089553, | |
| "grad_norm": 0.46492910385131836, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 0.4159, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 0.5017415881156921, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.4094, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7089552238805971, | |
| "grad_norm": 0.34392210841178894, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.3478, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 0.3240516483783722, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3821, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7835820895522388, | |
| "grad_norm": 0.26301339268684387, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.3606, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 0.34712520241737366, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.3421, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8582089552238806, | |
| "grad_norm": 0.3248469829559326, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.3389, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 0.298149436712265, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.3145, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 0.2757190763950348, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.3065, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 0.30510950088500977, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.2971, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.007462686567164, | |
| "grad_norm": 0.37349891662597656, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.3273, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.044776119402985, | |
| "grad_norm": 0.3667634129524231, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.308, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0820895522388059, | |
| "grad_norm": 0.3463355004787445, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.3109, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 0.3888525366783142, | |
| "learning_rate": 3e-05, | |
| "loss": 0.2644, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1567164179104479, | |
| "grad_norm": 0.3749147951602936, | |
| "learning_rate": 3.1e-05, | |
| "loss": 0.2858, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1940298507462686, | |
| "grad_norm": 0.3270276188850403, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.2573, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2313432835820897, | |
| "grad_norm": 0.3658592998981476, | |
| "learning_rate": 3.3e-05, | |
| "loss": 0.2613, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2686567164179103, | |
| "grad_norm": 0.3526328206062317, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.2328, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3059701492537314, | |
| "grad_norm": 0.4528139531612396, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.2429, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3432835820895521, | |
| "grad_norm": 0.5426791310310364, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.2209, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.3805970149253732, | |
| "grad_norm": 0.41844552755355835, | |
| "learning_rate": 3.7e-05, | |
| "loss": 0.2319, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.417910447761194, | |
| "grad_norm": 0.4749431908130646, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.2233, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.455223880597015, | |
| "grad_norm": 0.7010189890861511, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 0.2181, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 0.5747635960578918, | |
| "learning_rate": 4e-05, | |
| "loss": 0.213, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5298507462686568, | |
| "grad_norm": 0.3661474287509918, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.2171, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.5671641791044775, | |
| "grad_norm": 0.467835396528244, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.1985, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6044776119402986, | |
| "grad_norm": 0.5470123291015625, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.2176, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.6417910447761193, | |
| "grad_norm": 0.5761199593544006, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.2007, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.6791044776119404, | |
| "grad_norm": 0.48257485032081604, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2043, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.716417910447761, | |
| "grad_norm": 0.48353052139282227, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.1872, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.7537313432835822, | |
| "grad_norm": 0.4388391375541687, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.206, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.7910447761194028, | |
| "grad_norm": 0.47332626581192017, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.1876, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.828358208955224, | |
| "grad_norm": 0.8053535223007202, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.1839, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.8656716417910446, | |
| "grad_norm": 0.413979709148407, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1732, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.9029850746268657, | |
| "grad_norm": 0.36910712718963623, | |
| "learning_rate": 5.1000000000000006e-05, | |
| "loss": 0.1827, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.9402985074626866, | |
| "grad_norm": 0.8458298444747925, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 0.1727, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.9776119402985075, | |
| "grad_norm": 0.5452115535736084, | |
| "learning_rate": 5.300000000000001e-05, | |
| "loss": 0.1818, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.014925373134328, | |
| "grad_norm": 0.4518108069896698, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 0.177, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.0522388059701493, | |
| "grad_norm": 0.66865074634552, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 0.1726, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.08955223880597, | |
| "grad_norm": 0.6536034345626831, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 0.1541, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.126865671641791, | |
| "grad_norm": 0.5571377277374268, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 0.1671, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.1641791044776117, | |
| "grad_norm": 0.5385546684265137, | |
| "learning_rate": 5.8e-05, | |
| "loss": 0.1582, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.201492537313433, | |
| "grad_norm": 0.577961266040802, | |
| "learning_rate": 5.9e-05, | |
| "loss": 0.1528, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.5082416534423828, | |
| "learning_rate": 6e-05, | |
| "loss": 0.1638, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.2761194029850746, | |
| "grad_norm": 0.5490861535072327, | |
| "learning_rate": 6.1e-05, | |
| "loss": 0.166, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.3134328358208958, | |
| "grad_norm": 0.492366760969162, | |
| "learning_rate": 6.2e-05, | |
| "loss": 0.1481, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.3507462686567164, | |
| "grad_norm": 0.3702855110168457, | |
| "learning_rate": 6.3e-05, | |
| "loss": 0.1514, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.388059701492537, | |
| "grad_norm": 0.664667010307312, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.1441, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.425373134328358, | |
| "grad_norm": 0.33382174372673035, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 0.1573, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.4626865671641793, | |
| "grad_norm": 0.4848814010620117, | |
| "learning_rate": 6.6e-05, | |
| "loss": 0.1457, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.3649997413158417, | |
| "learning_rate": 6.7e-05, | |
| "loss": 0.1467, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.5373134328358207, | |
| "grad_norm": 0.6385223865509033, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 0.145, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.574626865671642, | |
| "grad_norm": 0.4580625891685486, | |
| "learning_rate": 6.9e-05, | |
| "loss": 0.1352, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.611940298507463, | |
| "grad_norm": 0.5141746401786804, | |
| "learning_rate": 7e-05, | |
| "loss": 0.1444, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.6492537313432836, | |
| "grad_norm": 0.40220722556114197, | |
| "learning_rate": 7.1e-05, | |
| "loss": 0.1493, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.6865671641791042, | |
| "grad_norm": 0.5510571002960205, | |
| "learning_rate": 7.2e-05, | |
| "loss": 0.1387, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.7238805970149254, | |
| "grad_norm": 0.43814659118652344, | |
| "learning_rate": 7.3e-05, | |
| "loss": 0.1374, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.7611940298507465, | |
| "grad_norm": 0.4118008613586426, | |
| "learning_rate": 7.4e-05, | |
| "loss": 0.1297, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.798507462686567, | |
| "grad_norm": 0.5626503229141235, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.1299, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.835820895522388, | |
| "grad_norm": 0.4066360592842102, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.1102, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.873134328358209, | |
| "grad_norm": 0.47184985876083374, | |
| "learning_rate": 7.7e-05, | |
| "loss": 0.1219, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.91044776119403, | |
| "grad_norm": 0.6611475348472595, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 0.1267, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.9477611940298507, | |
| "grad_norm": 0.3570108413696289, | |
| "learning_rate": 7.900000000000001e-05, | |
| "loss": 0.1191, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 0.4581681489944458, | |
| "learning_rate": 8e-05, | |
| "loss": 0.1209, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.0223880597014925, | |
| "grad_norm": 0.4643435776233673, | |
| "learning_rate": 8.1e-05, | |
| "loss": 0.129, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.0597014925373136, | |
| "grad_norm": 0.5595763921737671, | |
| "learning_rate": 8.2e-05, | |
| "loss": 0.1158, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.0970149253731343, | |
| "grad_norm": 0.48848605155944824, | |
| "learning_rate": 8.3e-05, | |
| "loss": 0.1188, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.1343283582089554, | |
| "grad_norm": 0.4496570825576782, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.114, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.171641791044776, | |
| "grad_norm": 0.31364986300468445, | |
| "learning_rate": 8.5e-05, | |
| "loss": 0.1196, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.208955223880597, | |
| "grad_norm": 0.3395878076553345, | |
| "learning_rate": 8.6e-05, | |
| "loss": 0.1124, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.246268656716418, | |
| "grad_norm": 0.4917413592338562, | |
| "learning_rate": 8.7e-05, | |
| "loss": 0.1074, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.283582089552239, | |
| "grad_norm": 0.44114553928375244, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.1095, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.3208955223880596, | |
| "grad_norm": 0.3323831558227539, | |
| "learning_rate": 8.900000000000001e-05, | |
| "loss": 0.106, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.3582089552238807, | |
| "grad_norm": 0.4495660066604614, | |
| "learning_rate": 9e-05, | |
| "loss": 0.1222, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.3955223880597014, | |
| "grad_norm": 0.40784788131713867, | |
| "learning_rate": 9.1e-05, | |
| "loss": 0.1048, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.4328358208955225, | |
| "grad_norm": 0.4643700420856476, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 0.1097, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.470149253731343, | |
| "grad_norm": 0.472494512796402, | |
| "learning_rate": 9.300000000000001e-05, | |
| "loss": 0.1041, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.5074626865671643, | |
| "grad_norm": 0.6110897660255432, | |
| "learning_rate": 9.4e-05, | |
| "loss": 0.0959, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.544776119402985, | |
| "grad_norm": 0.5313069820404053, | |
| "learning_rate": 9.5e-05, | |
| "loss": 0.113, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.582089552238806, | |
| "grad_norm": 0.4223133623600006, | |
| "learning_rate": 9.6e-05, | |
| "loss": 0.099, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.6194029850746268, | |
| "grad_norm": 0.5464731454849243, | |
| "learning_rate": 9.7e-05, | |
| "loss": 0.1008, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.656716417910448, | |
| "grad_norm": 0.3538314402103424, | |
| "learning_rate": 9.8e-05, | |
| "loss": 0.1049, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.6940298507462686, | |
| "grad_norm": 0.7460148334503174, | |
| "learning_rate": 9.900000000000001e-05, | |
| "loss": 0.1088, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 0.3210597038269043, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1041, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.7686567164179103, | |
| "grad_norm": 0.4450497627258301, | |
| "learning_rate": 9.999993165095463e-05, | |
| "loss": 0.0985, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.8059701492537314, | |
| "grad_norm": 0.4348960816860199, | |
| "learning_rate": 9.999972660400536e-05, | |
| "loss": 0.1015, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.843283582089552, | |
| "grad_norm": 0.462782621383667, | |
| "learning_rate": 9.999938485971279e-05, | |
| "loss": 0.1068, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.8805970149253732, | |
| "grad_norm": 0.3801368474960327, | |
| "learning_rate": 9.999890641901125e-05, | |
| "loss": 0.1117, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.917910447761194, | |
| "grad_norm": 0.45135366916656494, | |
| "learning_rate": 9.999829128320874e-05, | |
| "loss": 0.0917, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.955223880597015, | |
| "grad_norm": 0.41138389706611633, | |
| "learning_rate": 9.999753945398704e-05, | |
| "loss": 0.1049, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.9925373134328357, | |
| "grad_norm": 0.4976252317428589, | |
| "learning_rate": 9.999665093340165e-05, | |
| "loss": 0.1029, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.029850746268656, | |
| "grad_norm": 0.46372008323669434, | |
| "learning_rate": 9.99956257238817e-05, | |
| "loss": 0.1012, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.067164179104478, | |
| "grad_norm": 0.546938955783844, | |
| "learning_rate": 9.999446382823013e-05, | |
| "loss": 0.0829, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.104477611940299, | |
| "grad_norm": 0.40513405203819275, | |
| "learning_rate": 9.999316524962345e-05, | |
| "loss": 0.0933, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.141791044776119, | |
| "grad_norm": 0.4198484420776367, | |
| "learning_rate": 9.999172999161198e-05, | |
| "loss": 0.0895, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.17910447761194, | |
| "grad_norm": 0.3965628743171692, | |
| "learning_rate": 9.999015805811965e-05, | |
| "loss": 0.0917, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.2164179104477615, | |
| "grad_norm": 0.3095884621143341, | |
| "learning_rate": 9.998844945344405e-05, | |
| "loss": 0.0953, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.253731343283582, | |
| "grad_norm": 0.7962276339530945, | |
| "learning_rate": 9.998660418225645e-05, | |
| "loss": 0.0979, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.291044776119403, | |
| "grad_norm": 0.42066490650177, | |
| "learning_rate": 9.998462224960175e-05, | |
| "loss": 0.099, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.3283582089552235, | |
| "grad_norm": 0.3894193470478058, | |
| "learning_rate": 9.998250366089848e-05, | |
| "loss": 0.0887, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.365671641791045, | |
| "grad_norm": 0.28998032212257385, | |
| "learning_rate": 9.998024842193876e-05, | |
| "loss": 0.0943, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.402985074626866, | |
| "grad_norm": 0.3919823467731476, | |
| "learning_rate": 9.997785653888835e-05, | |
| "loss": 0.0916, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.440298507462686, | |
| "grad_norm": 0.3708650469779968, | |
| "learning_rate": 9.997532801828658e-05, | |
| "loss": 0.0858, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.477611940298507, | |
| "grad_norm": 0.2935069799423218, | |
| "learning_rate": 9.997266286704631e-05, | |
| "loss": 0.0992, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.514925373134329, | |
| "grad_norm": 0.4675377607345581, | |
| "learning_rate": 9.996986109245395e-05, | |
| "loss": 0.0854, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.552238805970149, | |
| "grad_norm": 0.31374865770339966, | |
| "learning_rate": 9.996692270216947e-05, | |
| "loss": 0.0788, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 4.58955223880597, | |
| "grad_norm": 0.419249951839447, | |
| "learning_rate": 9.996384770422629e-05, | |
| "loss": 0.0873, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.6268656716417915, | |
| "grad_norm": 0.26002731919288635, | |
| "learning_rate": 9.996063610703137e-05, | |
| "loss": 0.0845, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 4.664179104477612, | |
| "grad_norm": 0.29573896527290344, | |
| "learning_rate": 9.995728791936504e-05, | |
| "loss": 0.091, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.701492537313433, | |
| "grad_norm": 0.33090147376060486, | |
| "learning_rate": 9.995380315038119e-05, | |
| "loss": 0.0827, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.7388059701492535, | |
| "grad_norm": 0.24417485296726227, | |
| "learning_rate": 9.9950181809607e-05, | |
| "loss": 0.0859, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.776119402985074, | |
| "grad_norm": 0.48290401697158813, | |
| "learning_rate": 9.994642390694308e-05, | |
| "loss": 0.0889, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.813432835820896, | |
| "grad_norm": 0.4479697048664093, | |
| "learning_rate": 9.99425294526634e-05, | |
| "loss": 0.097, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.850746268656716, | |
| "grad_norm": 0.3560147285461426, | |
| "learning_rate": 9.993849845741524e-05, | |
| "loss": 0.0904, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.888059701492537, | |
| "grad_norm": 0.6645416617393494, | |
| "learning_rate": 9.99343309322192e-05, | |
| "loss": 0.0922, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.925373134328359, | |
| "grad_norm": 0.29696759581565857, | |
| "learning_rate": 9.993002688846913e-05, | |
| "loss": 0.093, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.962686567164179, | |
| "grad_norm": 0.47146692872047424, | |
| "learning_rate": 9.992558633793212e-05, | |
| "loss": 0.085, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.3430916368961334, | |
| "learning_rate": 9.992100929274846e-05, | |
| "loss": 0.0805, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.037313432835821, | |
| "grad_norm": 0.3205055892467499, | |
| "learning_rate": 9.991629576543163e-05, | |
| "loss": 0.0766, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.074626865671641, | |
| "grad_norm": 0.3664805293083191, | |
| "learning_rate": 9.991144576886823e-05, | |
| "loss": 0.0766, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.111940298507463, | |
| "grad_norm": 0.3753412663936615, | |
| "learning_rate": 9.990645931631796e-05, | |
| "loss": 0.0688, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 5.149253731343284, | |
| "grad_norm": 0.31633055210113525, | |
| "learning_rate": 9.990133642141359e-05, | |
| "loss": 0.0796, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 5.186567164179104, | |
| "grad_norm": 0.3355732262134552, | |
| "learning_rate": 9.989607709816091e-05, | |
| "loss": 0.0716, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 5.223880597014926, | |
| "grad_norm": 0.24850831925868988, | |
| "learning_rate": 9.989068136093873e-05, | |
| "loss": 0.0778, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.2611940298507465, | |
| "grad_norm": 0.29537102580070496, | |
| "learning_rate": 9.988514922449879e-05, | |
| "loss": 0.0759, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 5.298507462686567, | |
| "grad_norm": 0.3430945873260498, | |
| "learning_rate": 9.987948070396571e-05, | |
| "loss": 0.0774, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.335820895522388, | |
| "grad_norm": 0.5220637917518616, | |
| "learning_rate": 9.987367581483705e-05, | |
| "loss": 0.0836, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 5.373134328358209, | |
| "grad_norm": 0.28184008598327637, | |
| "learning_rate": 9.986773457298311e-05, | |
| "loss": 0.0752, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.41044776119403, | |
| "grad_norm": 0.36261311173439026, | |
| "learning_rate": 9.986165699464705e-05, | |
| "loss": 0.075, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.447761194029851, | |
| "grad_norm": 0.5107380151748657, | |
| "learning_rate": 9.985544309644475e-05, | |
| "loss": 0.0814, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 5.485074626865671, | |
| "grad_norm": 0.2446671426296234, | |
| "learning_rate": 9.984909289536473e-05, | |
| "loss": 0.0704, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 5.522388059701493, | |
| "grad_norm": 0.30449381470680237, | |
| "learning_rate": 9.984260640876821e-05, | |
| "loss": 0.0794, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 5.559701492537314, | |
| "grad_norm": 0.25645050406455994, | |
| "learning_rate": 9.983598365438902e-05, | |
| "loss": 0.0709, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 5.597014925373134, | |
| "grad_norm": 0.23825006186962128, | |
| "learning_rate": 9.98292246503335e-05, | |
| "loss": 0.0828, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.634328358208955, | |
| "grad_norm": 0.3259269893169403, | |
| "learning_rate": 9.98223294150805e-05, | |
| "loss": 0.0824, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 5.6716417910447765, | |
| "grad_norm": 0.24058914184570312, | |
| "learning_rate": 9.981529796748134e-05, | |
| "loss": 0.073, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 5.708955223880597, | |
| "grad_norm": 0.34457242488861084, | |
| "learning_rate": 9.980813032675974e-05, | |
| "loss": 0.0845, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 5.746268656716418, | |
| "grad_norm": 0.32940393686294556, | |
| "learning_rate": 9.980082651251175e-05, | |
| "loss": 0.0832, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 5.7835820895522385, | |
| "grad_norm": 0.5683007836341858, | |
| "learning_rate": 9.979338654470569e-05, | |
| "loss": 0.0836, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 5.82089552238806, | |
| "grad_norm": 0.31041061878204346, | |
| "learning_rate": 9.97858104436822e-05, | |
| "loss": 0.07, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 5.858208955223881, | |
| "grad_norm": 0.37858131527900696, | |
| "learning_rate": 9.977809823015401e-05, | |
| "loss": 0.0738, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 5.895522388059701, | |
| "grad_norm": 0.2743091583251953, | |
| "learning_rate": 9.977024992520602e-05, | |
| "loss": 0.0761, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 5.932835820895522, | |
| "grad_norm": 0.29117098450660706, | |
| "learning_rate": 9.976226555029522e-05, | |
| "loss": 0.0777, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 5.970149253731344, | |
| "grad_norm": 0.31398633122444153, | |
| "learning_rate": 9.975414512725057e-05, | |
| "loss": 0.0664, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.007462686567164, | |
| "grad_norm": 0.2684272527694702, | |
| "learning_rate": 9.974588867827301e-05, | |
| "loss": 0.0686, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.044776119402985, | |
| "grad_norm": 0.3945397436618805, | |
| "learning_rate": 9.973749622593534e-05, | |
| "loss": 0.0614, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.082089552238806, | |
| "grad_norm": 0.2747954726219177, | |
| "learning_rate": 9.972896779318219e-05, | |
| "loss": 0.0681, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 6.119402985074627, | |
| "grad_norm": 0.43257200717926025, | |
| "learning_rate": 9.972030340333001e-05, | |
| "loss": 0.0725, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 6.156716417910448, | |
| "grad_norm": 0.3559250831604004, | |
| "learning_rate": 9.97115030800669e-05, | |
| "loss": 0.0804, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 6.1940298507462686, | |
| "grad_norm": 0.3079264760017395, | |
| "learning_rate": 9.970256684745258e-05, | |
| "loss": 0.0649, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 6.231343283582089, | |
| "grad_norm": 0.32298946380615234, | |
| "learning_rate": 9.969349472991838e-05, | |
| "loss": 0.0668, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 6.268656716417911, | |
| "grad_norm": 0.2826225459575653, | |
| "learning_rate": 9.968428675226714e-05, | |
| "loss": 0.0734, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 6.3059701492537314, | |
| "grad_norm": 0.39002349972724915, | |
| "learning_rate": 9.967494293967312e-05, | |
| "loss": 0.0728, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 6.343283582089552, | |
| "grad_norm": 0.403890997171402, | |
| "learning_rate": 9.966546331768191e-05, | |
| "loss": 0.067, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.380597014925373, | |
| "grad_norm": 0.3755359351634979, | |
| "learning_rate": 9.965584791221048e-05, | |
| "loss": 0.0755, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 6.417910447761194, | |
| "grad_norm": 0.26346635818481445, | |
| "learning_rate": 9.964609674954696e-05, | |
| "loss": 0.0728, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 6.455223880597015, | |
| "grad_norm": 0.45292145013809204, | |
| "learning_rate": 9.963620985635065e-05, | |
| "loss": 0.0731, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 6.492537313432836, | |
| "grad_norm": 0.3568434715270996, | |
| "learning_rate": 9.962618725965196e-05, | |
| "loss": 0.0761, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 6.529850746268656, | |
| "grad_norm": 0.2551257014274597, | |
| "learning_rate": 9.961602898685226e-05, | |
| "loss": 0.0694, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 6.567164179104478, | |
| "grad_norm": 0.6106354594230652, | |
| "learning_rate": 9.96057350657239e-05, | |
| "loss": 0.0827, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 6.604477611940299, | |
| "grad_norm": 0.3226093053817749, | |
| "learning_rate": 9.959530552441005e-05, | |
| "loss": 0.0716, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 6.641791044776119, | |
| "grad_norm": 0.4297254979610443, | |
| "learning_rate": 9.95847403914247e-05, | |
| "loss": 0.0748, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 6.67910447761194, | |
| "grad_norm": 0.26469680666923523, | |
| "learning_rate": 9.95740396956525e-05, | |
| "loss": 0.074, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 6.7164179104477615, | |
| "grad_norm": 0.22717897593975067, | |
| "learning_rate": 9.956320346634876e-05, | |
| "loss": 0.0739, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 6.753731343283582, | |
| "grad_norm": 0.4513498544692993, | |
| "learning_rate": 9.955223173313931e-05, | |
| "loss": 0.0664, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 6.791044776119403, | |
| "grad_norm": 0.31683439016342163, | |
| "learning_rate": 9.954112452602045e-05, | |
| "loss": 0.069, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 6.8283582089552235, | |
| "grad_norm": 0.3350532650947571, | |
| "learning_rate": 9.952988187535886e-05, | |
| "loss": 0.0699, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 6.865671641791045, | |
| "grad_norm": 0.29829463362693787, | |
| "learning_rate": 9.95185038118915e-05, | |
| "loss": 0.0663, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 6.902985074626866, | |
| "grad_norm": 0.31650781631469727, | |
| "learning_rate": 9.950699036672559e-05, | |
| "loss": 0.0668, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 6.940298507462686, | |
| "grad_norm": 0.360944926738739, | |
| "learning_rate": 9.949534157133844e-05, | |
| "loss": 0.0696, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 6.977611940298507, | |
| "grad_norm": 0.31337013840675354, | |
| "learning_rate": 9.948355745757741e-05, | |
| "loss": 0.073, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 7.014925373134329, | |
| "grad_norm": 0.4675919711589813, | |
| "learning_rate": 9.94716380576598e-05, | |
| "loss": 0.0688, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 7.052238805970149, | |
| "grad_norm": 0.3031919002532959, | |
| "learning_rate": 9.945958340417283e-05, | |
| "loss": 0.0596, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 7.08955223880597, | |
| "grad_norm": 0.24858474731445312, | |
| "learning_rate": 9.944739353007344e-05, | |
| "loss": 0.0717, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.126865671641791, | |
| "grad_norm": 0.20959483087062836, | |
| "learning_rate": 9.943506846868826e-05, | |
| "loss": 0.0694, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 7.164179104477612, | |
| "grad_norm": 0.35621434450149536, | |
| "learning_rate": 9.942260825371358e-05, | |
| "loss": 0.063, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.201492537313433, | |
| "grad_norm": 0.3462587594985962, | |
| "learning_rate": 9.941001291921512e-05, | |
| "loss": 0.068, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 7.2388059701492535, | |
| "grad_norm": 0.38649681210517883, | |
| "learning_rate": 9.939728249962807e-05, | |
| "loss": 0.0638, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 7.276119402985074, | |
| "grad_norm": 0.29564595222473145, | |
| "learning_rate": 9.938441702975689e-05, | |
| "loss": 0.0626, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 7.313432835820896, | |
| "grad_norm": 0.339857816696167, | |
| "learning_rate": 9.937141654477528e-05, | |
| "loss": 0.0535, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 7.350746268656716, | |
| "grad_norm": 0.2591215670108795, | |
| "learning_rate": 9.93582810802261e-05, | |
| "loss": 0.0645, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 7.388059701492537, | |
| "grad_norm": 0.30237796902656555, | |
| "learning_rate": 9.934501067202117e-05, | |
| "loss": 0.0675, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 7.425373134328359, | |
| "grad_norm": 0.28394174575805664, | |
| "learning_rate": 9.93316053564413e-05, | |
| "loss": 0.0643, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 7.462686567164179, | |
| "grad_norm": 0.3124663233757019, | |
| "learning_rate": 9.931806517013612e-05, | |
| "loss": 0.059, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 0.36073037981987, | |
| "learning_rate": 9.930439015012396e-05, | |
| "loss": 0.0606, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 7.537313432835821, | |
| "grad_norm": 0.4091481864452362, | |
| "learning_rate": 9.929058033379181e-05, | |
| "loss": 0.0603, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 7.574626865671641, | |
| "grad_norm": 0.44718074798583984, | |
| "learning_rate": 9.927663575889521e-05, | |
| "loss": 0.0741, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 7.611940298507463, | |
| "grad_norm": 0.3819601833820343, | |
| "learning_rate": 9.926255646355804e-05, | |
| "loss": 0.0707, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 7.649253731343284, | |
| "grad_norm": 0.23336420953273773, | |
| "learning_rate": 9.92483424862726e-05, | |
| "loss": 0.0676, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 7.686567164179104, | |
| "grad_norm": 0.24415315687656403, | |
| "learning_rate": 9.923399386589933e-05, | |
| "loss": 0.0594, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 7.723880597014926, | |
| "grad_norm": 0.3735473155975342, | |
| "learning_rate": 9.921951064166684e-05, | |
| "loss": 0.062, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 7.7611940298507465, | |
| "grad_norm": 0.31629472970962524, | |
| "learning_rate": 9.92048928531717e-05, | |
| "loss": 0.0606, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 7.798507462686567, | |
| "grad_norm": 0.37902557849884033, | |
| "learning_rate": 9.919014054037836e-05, | |
| "loss": 0.0584, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 7.835820895522388, | |
| "grad_norm": 0.3486720323562622, | |
| "learning_rate": 9.917525374361912e-05, | |
| "loss": 0.056, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 7.8731343283582085, | |
| "grad_norm": 0.3731362521648407, | |
| "learning_rate": 9.91602325035939e-05, | |
| "loss": 0.0601, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 7.91044776119403, | |
| "grad_norm": 0.3560399115085602, | |
| "learning_rate": 9.914507686137019e-05, | |
| "loss": 0.06, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 7.947761194029851, | |
| "grad_norm": 0.30075564980506897, | |
| "learning_rate": 9.912978685838294e-05, | |
| "loss": 0.0657, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 7.985074626865671, | |
| "grad_norm": 0.2984028458595276, | |
| "learning_rate": 9.911436253643445e-05, | |
| "loss": 0.0587, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 8.022388059701493, | |
| "grad_norm": 0.1980169117450714, | |
| "learning_rate": 9.90988039376942e-05, | |
| "loss": 0.0718, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 8.059701492537313, | |
| "grad_norm": 0.31339579820632935, | |
| "learning_rate": 9.90831111046988e-05, | |
| "loss": 0.0557, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 8.097014925373134, | |
| "grad_norm": 0.1968696266412735, | |
| "learning_rate": 9.90672840803519e-05, | |
| "loss": 0.0571, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 8.134328358208956, | |
| "grad_norm": 0.23931682109832764, | |
| "learning_rate": 9.905132290792394e-05, | |
| "loss": 0.0566, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 8.171641791044776, | |
| "grad_norm": 0.21741189062595367, | |
| "learning_rate": 9.903522763105218e-05, | |
| "loss": 0.0575, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 8.208955223880597, | |
| "grad_norm": 0.22874368727207184, | |
| "learning_rate": 9.901899829374047e-05, | |
| "loss": 0.0565, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 8.246268656716419, | |
| "grad_norm": 0.3441888093948364, | |
| "learning_rate": 9.900263494035921e-05, | |
| "loss": 0.0565, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 8.283582089552239, | |
| "grad_norm": 0.2539830803871155, | |
| "learning_rate": 9.89861376156452e-05, | |
| "loss": 0.0538, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 8.32089552238806, | |
| "grad_norm": 0.2235102653503418, | |
| "learning_rate": 9.896950636470147e-05, | |
| "loss": 0.0609, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 8.35820895522388, | |
| "grad_norm": 0.1941322684288025, | |
| "learning_rate": 9.895274123299723e-05, | |
| "loss": 0.0562, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 8.395522388059701, | |
| "grad_norm": 0.2691369950771332, | |
| "learning_rate": 9.893584226636772e-05, | |
| "loss": 0.0608, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 8.432835820895523, | |
| "grad_norm": 0.24730461835861206, | |
| "learning_rate": 9.891880951101407e-05, | |
| "loss": 0.0582, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 8.470149253731343, | |
| "grad_norm": 0.34785839915275574, | |
| "learning_rate": 9.890164301350318e-05, | |
| "loss": 0.0506, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 8.507462686567164, | |
| "grad_norm": 0.3625825345516205, | |
| "learning_rate": 9.888434282076758e-05, | |
| "loss": 0.0614, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 8.544776119402986, | |
| "grad_norm": 0.25210148096084595, | |
| "learning_rate": 9.886690898010535e-05, | |
| "loss": 0.0611, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 8.582089552238806, | |
| "grad_norm": 0.27312466502189636, | |
| "learning_rate": 9.884934153917997e-05, | |
| "loss": 0.0537, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 8.619402985074627, | |
| "grad_norm": 0.314647912979126, | |
| "learning_rate": 9.883164054602012e-05, | |
| "loss": 0.0602, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 8.656716417910447, | |
| "grad_norm": 0.21531912684440613, | |
| "learning_rate": 9.881380604901964e-05, | |
| "loss": 0.0552, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 8.694029850746269, | |
| "grad_norm": 0.23920664191246033, | |
| "learning_rate": 9.879583809693738e-05, | |
| "loss": 0.0613, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 8.73134328358209, | |
| "grad_norm": 0.21864956617355347, | |
| "learning_rate": 9.877773673889701e-05, | |
| "loss": 0.0649, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 8.76865671641791, | |
| "grad_norm": 0.27523377537727356, | |
| "learning_rate": 9.8759502024387e-05, | |
| "loss": 0.0606, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 8.805970149253731, | |
| "grad_norm": 0.24805469810962677, | |
| "learning_rate": 9.87411340032603e-05, | |
| "loss": 0.0549, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 8.843283582089553, | |
| "grad_norm": 0.23070092499256134, | |
| "learning_rate": 9.872263272573443e-05, | |
| "loss": 0.0562, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 8.880597014925373, | |
| "grad_norm": 0.20833946764469147, | |
| "learning_rate": 9.870399824239117e-05, | |
| "loss": 0.05, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 8.917910447761194, | |
| "grad_norm": 0.34507372975349426, | |
| "learning_rate": 9.868523060417646e-05, | |
| "loss": 0.0613, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 8.955223880597014, | |
| "grad_norm": 0.32865110039711, | |
| "learning_rate": 9.86663298624003e-05, | |
| "loss": 0.0621, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 8.992537313432836, | |
| "grad_norm": 0.21305270493030548, | |
| "learning_rate": 9.864729606873663e-05, | |
| "loss": 0.0572, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 9.029850746268657, | |
| "grad_norm": 0.28193730115890503, | |
| "learning_rate": 9.862812927522309e-05, | |
| "loss": 0.0555, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 9.067164179104477, | |
| "grad_norm": 0.3953789472579956, | |
| "learning_rate": 9.860882953426099e-05, | |
| "loss": 0.0536, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 9.104477611940299, | |
| "grad_norm": 0.23013322055339813, | |
| "learning_rate": 9.858939689861506e-05, | |
| "loss": 0.0572, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 9.14179104477612, | |
| "grad_norm": 0.2906680107116699, | |
| "learning_rate": 9.856983142141339e-05, | |
| "loss": 0.0592, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 9.17910447761194, | |
| "grad_norm": 0.23490828275680542, | |
| "learning_rate": 9.855013315614725e-05, | |
| "loss": 0.0583, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 9.216417910447761, | |
| "grad_norm": 0.22825880348682404, | |
| "learning_rate": 9.853030215667093e-05, | |
| "loss": 0.059, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 9.253731343283581, | |
| "grad_norm": 0.25871285796165466, | |
| "learning_rate": 9.851033847720166e-05, | |
| "loss": 0.0555, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 9.291044776119403, | |
| "grad_norm": 0.27220776677131653, | |
| "learning_rate": 9.849024217231935e-05, | |
| "loss": 0.0542, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 9.328358208955224, | |
| "grad_norm": 0.26534005999565125, | |
| "learning_rate": 9.847001329696653e-05, | |
| "loss": 0.0526, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 9.365671641791044, | |
| "grad_norm": 0.33486032485961914, | |
| "learning_rate": 9.844965190644817e-05, | |
| "loss": 0.0563, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 9.402985074626866, | |
| "grad_norm": 0.2949483394622803, | |
| "learning_rate": 9.842915805643155e-05, | |
| "loss": 0.0556, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 9.440298507462687, | |
| "grad_norm": 0.24123981595039368, | |
| "learning_rate": 9.840853180294608e-05, | |
| "loss": 0.05, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 9.477611940298507, | |
| "grad_norm": 0.22536049783229828, | |
| "learning_rate": 9.838777320238312e-05, | |
| "loss": 0.0522, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 9.514925373134329, | |
| "grad_norm": 0.23206663131713867, | |
| "learning_rate": 9.836688231149592e-05, | |
| "loss": 0.0591, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 9.552238805970148, | |
| "grad_norm": 0.28573134541511536, | |
| "learning_rate": 9.834585918739936e-05, | |
| "loss": 0.0568, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 9.58955223880597, | |
| "grad_norm": 0.2628820538520813, | |
| "learning_rate": 9.832470388756987e-05, | |
| "loss": 0.0571, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 9.626865671641792, | |
| "grad_norm": 0.2880440652370453, | |
| "learning_rate": 9.830341646984521e-05, | |
| "loss": 0.0559, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 9.664179104477611, | |
| "grad_norm": 0.1786259263753891, | |
| "learning_rate": 9.82819969924244e-05, | |
| "loss": 0.058, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 9.701492537313433, | |
| "grad_norm": 0.3501608073711395, | |
| "learning_rate": 9.826044551386744e-05, | |
| "loss": 0.0523, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 9.738805970149254, | |
| "grad_norm": 0.24757252633571625, | |
| "learning_rate": 9.823876209309527e-05, | |
| "loss": 0.0587, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 9.776119402985074, | |
| "grad_norm": 0.2556290626525879, | |
| "learning_rate": 9.821694678938953e-05, | |
| "loss": 0.0555, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 9.813432835820896, | |
| "grad_norm": 0.2561217248439789, | |
| "learning_rate": 9.819499966239243e-05, | |
| "loss": 0.052, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 9.850746268656717, | |
| "grad_norm": 0.2776634097099304, | |
| "learning_rate": 9.817292077210659e-05, | |
| "loss": 0.0498, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 9.888059701492537, | |
| "grad_norm": 0.20668549835681915, | |
| "learning_rate": 9.815071017889482e-05, | |
| "loss": 0.0517, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 9.925373134328359, | |
| "grad_norm": 0.3100263178348541, | |
| "learning_rate": 9.812836794348004e-05, | |
| "loss": 0.0633, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 9.962686567164178, | |
| "grad_norm": 0.2780782878398895, | |
| "learning_rate": 9.81058941269451e-05, | |
| "loss": 0.0581, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.28903728723526, | |
| "learning_rate": 9.808328879073251e-05, | |
| "loss": 0.0538, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 10.037313432835822, | |
| "grad_norm": 0.22727562487125397, | |
| "learning_rate": 9.806055199664446e-05, | |
| "loss": 0.0491, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 10.074626865671641, | |
| "grad_norm": 0.267918199300766, | |
| "learning_rate": 9.803768380684242e-05, | |
| "loss": 0.0562, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 10.111940298507463, | |
| "grad_norm": 0.2988606095314026, | |
| "learning_rate": 9.801468428384716e-05, | |
| "loss": 0.0566, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 10.149253731343283, | |
| "grad_norm": 0.2710281312465668, | |
| "learning_rate": 9.799155349053851e-05, | |
| "loss": 0.0541, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 10.186567164179104, | |
| "grad_norm": 0.15320520102977753, | |
| "learning_rate": 9.796829149015517e-05, | |
| "loss": 0.0548, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 10.223880597014926, | |
| "grad_norm": 0.2653089463710785, | |
| "learning_rate": 9.794489834629455e-05, | |
| "loss": 0.0599, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 10.261194029850746, | |
| "grad_norm": 0.19223959743976593, | |
| "learning_rate": 9.792137412291265e-05, | |
| "loss": 0.0494, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 10.298507462686567, | |
| "grad_norm": 0.20455987751483917, | |
| "learning_rate": 9.789771888432375e-05, | |
| "loss": 0.0538, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 10.335820895522389, | |
| "grad_norm": 0.24908749759197235, | |
| "learning_rate": 9.787393269520039e-05, | |
| "loss": 0.0481, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 10.373134328358208, | |
| "grad_norm": 0.3131813406944275, | |
| "learning_rate": 9.785001562057309e-05, | |
| "loss": 0.0526, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 10.41044776119403, | |
| "grad_norm": 0.24828971922397614, | |
| "learning_rate": 9.782596772583026e-05, | |
| "loss": 0.0489, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 10.447761194029852, | |
| "grad_norm": 0.21727119386196136, | |
| "learning_rate": 9.780178907671789e-05, | |
| "loss": 0.0532, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 10.485074626865671, | |
| "grad_norm": 0.20279547572135925, | |
| "learning_rate": 9.777747973933948e-05, | |
| "loss": 0.0565, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 10.522388059701493, | |
| "grad_norm": 0.17726702988147736, | |
| "learning_rate": 9.775303978015585e-05, | |
| "loss": 0.0437, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 10.559701492537313, | |
| "grad_norm": 0.18961119651794434, | |
| "learning_rate": 9.772846926598491e-05, | |
| "loss": 0.0584, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 10.597014925373134, | |
| "grad_norm": 0.2498980015516281, | |
| "learning_rate": 9.77037682640015e-05, | |
| "loss": 0.0496, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 10.634328358208956, | |
| "grad_norm": 0.16978798806667328, | |
| "learning_rate": 9.767893684173721e-05, | |
| "loss": 0.0469, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 10.671641791044776, | |
| "grad_norm": 0.16128584742546082, | |
| "learning_rate": 9.765397506708023e-05, | |
| "loss": 0.0533, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 10.708955223880597, | |
| "grad_norm": 0.20463155210018158, | |
| "learning_rate": 9.762888300827507e-05, | |
| "loss": 0.0464, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 10.746268656716419, | |
| "grad_norm": 0.30601629614830017, | |
| "learning_rate": 9.760366073392246e-05, | |
| "loss": 0.0489, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 10.783582089552239, | |
| "grad_norm": 0.2730671763420105, | |
| "learning_rate": 9.757830831297914e-05, | |
| "loss": 0.0495, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 10.82089552238806, | |
| "grad_norm": 0.251432865858078, | |
| "learning_rate": 9.755282581475769e-05, | |
| "loss": 0.0549, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 10.85820895522388, | |
| "grad_norm": 0.26670166850090027, | |
| "learning_rate": 9.752721330892624e-05, | |
| "loss": 0.061, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 10.895522388059701, | |
| "grad_norm": 0.2965967655181885, | |
| "learning_rate": 9.750147086550844e-05, | |
| "loss": 0.0473, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 10.932835820895523, | |
| "grad_norm": 0.683840274810791, | |
| "learning_rate": 9.747559855488313e-05, | |
| "loss": 0.0509, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 10.970149253731343, | |
| "grad_norm": 0.25740495324134827, | |
| "learning_rate": 9.744959644778422e-05, | |
| "loss": 0.0515, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 11.007462686567164, | |
| "grad_norm": 0.2880542278289795, | |
| "learning_rate": 9.742346461530048e-05, | |
| "loss": 0.0482, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 11.044776119402986, | |
| "grad_norm": 0.45032551884651184, | |
| "learning_rate": 9.739720312887535e-05, | |
| "loss": 0.0557, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 11.082089552238806, | |
| "grad_norm": 0.2829900085926056, | |
| "learning_rate": 9.73708120603067e-05, | |
| "loss": 0.052, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 11.119402985074627, | |
| "grad_norm": 0.309597373008728, | |
| "learning_rate": 9.734429148174675e-05, | |
| "loss": 0.0541, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 11.156716417910447, | |
| "grad_norm": 0.2433389127254486, | |
| "learning_rate": 9.731764146570173e-05, | |
| "loss": 0.0482, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 11.194029850746269, | |
| "grad_norm": 0.24458132684230804, | |
| "learning_rate": 9.729086208503174e-05, | |
| "loss": 0.0505, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 11.23134328358209, | |
| "grad_norm": 0.2305087298154831, | |
| "learning_rate": 9.726395341295062e-05, | |
| "loss": 0.0504, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 11.26865671641791, | |
| "grad_norm": 0.18110457062721252, | |
| "learning_rate": 9.723691552302562e-05, | |
| "loss": 0.0575, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 11.305970149253731, | |
| "grad_norm": 0.20407621562480927, | |
| "learning_rate": 9.720974848917735e-05, | |
| "loss": 0.0494, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 11.343283582089553, | |
| "grad_norm": 0.25924697518348694, | |
| "learning_rate": 9.718245238567939e-05, | |
| "loss": 0.0472, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 11.380597014925373, | |
| "grad_norm": 0.23041822016239166, | |
| "learning_rate": 9.715502728715826e-05, | |
| "loss": 0.0481, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 11.417910447761194, | |
| "grad_norm": 0.25381171703338623, | |
| "learning_rate": 9.712747326859315e-05, | |
| "loss": 0.0543, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 11.455223880597014, | |
| "grad_norm": 0.18027640879154205, | |
| "learning_rate": 9.709979040531569e-05, | |
| "loss": 0.055, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 11.492537313432836, | |
| "grad_norm": 0.2954868674278259, | |
| "learning_rate": 9.707197877300974e-05, | |
| "loss": 0.0473, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 11.529850746268657, | |
| "grad_norm": 0.25323861837387085, | |
| "learning_rate": 9.704403844771128e-05, | |
| "loss": 0.0509, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 11.567164179104477, | |
| "grad_norm": 0.36910176277160645, | |
| "learning_rate": 9.701596950580806e-05, | |
| "loss": 0.0504, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 11.604477611940299, | |
| "grad_norm": 0.34199246764183044, | |
| "learning_rate": 9.698777202403953e-05, | |
| "loss": 0.0526, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 11.64179104477612, | |
| "grad_norm": 0.2146557718515396, | |
| "learning_rate": 9.695944607949649e-05, | |
| "loss": 0.0579, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 11.67910447761194, | |
| "grad_norm": 0.20559175312519073, | |
| "learning_rate": 9.693099174962103e-05, | |
| "loss": 0.0514, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 11.716417910447761, | |
| "grad_norm": 0.2689419090747833, | |
| "learning_rate": 9.690240911220618e-05, | |
| "loss": 0.0534, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 11.753731343283581, | |
| "grad_norm": 0.34870603680610657, | |
| "learning_rate": 9.687369824539577e-05, | |
| "loss": 0.0485, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 11.791044776119403, | |
| "grad_norm": 0.15433363616466522, | |
| "learning_rate": 9.684485922768422e-05, | |
| "loss": 0.0418, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 11.828358208955224, | |
| "grad_norm": 0.26874423027038574, | |
| "learning_rate": 9.681589213791633e-05, | |
| "loss": 0.0537, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 11.865671641791044, | |
| "grad_norm": 0.3361654281616211, | |
| "learning_rate": 9.6786797055287e-05, | |
| "loss": 0.0474, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 11.902985074626866, | |
| "grad_norm": 0.17938771843910217, | |
| "learning_rate": 9.675757405934103e-05, | |
| "loss": 0.0443, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 11.940298507462687, | |
| "grad_norm": 0.31368622183799744, | |
| "learning_rate": 9.672822322997305e-05, | |
| "loss": 0.0594, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 11.977611940298507, | |
| "grad_norm": 0.16268151998519897, | |
| "learning_rate": 9.669874464742705e-05, | |
| "loss": 0.0487, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 12.014925373134329, | |
| "grad_norm": 0.23879969120025635, | |
| "learning_rate": 9.66691383922964e-05, | |
| "loss": 0.0484, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 12.052238805970148, | |
| "grad_norm": 0.2321789413690567, | |
| "learning_rate": 9.663940454552342e-05, | |
| "loss": 0.051, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 12.08955223880597, | |
| "grad_norm": 0.22873088717460632, | |
| "learning_rate": 9.660954318839933e-05, | |
| "loss": 0.0406, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 12.126865671641792, | |
| "grad_norm": 0.3767557740211487, | |
| "learning_rate": 9.657955440256395e-05, | |
| "loss": 0.0432, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 12.164179104477611, | |
| "grad_norm": 0.21569453179836273, | |
| "learning_rate": 9.654943827000548e-05, | |
| "loss": 0.0528, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 12.201492537313433, | |
| "grad_norm": 0.23698291182518005, | |
| "learning_rate": 9.651919487306025e-05, | |
| "loss": 0.0457, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 12.238805970149254, | |
| "grad_norm": 0.21086478233337402, | |
| "learning_rate": 9.648882429441257e-05, | |
| "loss": 0.0508, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 12.276119402985074, | |
| "grad_norm": 0.19763463735580444, | |
| "learning_rate": 9.645832661709444e-05, | |
| "loss": 0.0497, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 12.313432835820896, | |
| "grad_norm": 0.18413852155208588, | |
| "learning_rate": 9.642770192448536e-05, | |
| "loss": 0.0441, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 12.350746268656717, | |
| "grad_norm": 0.13946911692619324, | |
| "learning_rate": 9.639695030031204e-05, | |
| "loss": 0.0453, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 12.388059701492537, | |
| "grad_norm": 0.21613670885562897, | |
| "learning_rate": 9.636607182864827e-05, | |
| "loss": 0.0511, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 12.425373134328359, | |
| "grad_norm": 0.24953646957874298, | |
| "learning_rate": 9.63350665939146e-05, | |
| "loss": 0.0451, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 12.462686567164178, | |
| "grad_norm": 0.2993795871734619, | |
| "learning_rate": 9.630393468087818e-05, | |
| "loss": 0.0469, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 0.2261819839477539, | |
| "learning_rate": 9.627267617465243e-05, | |
| "loss": 0.0484, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 12.537313432835822, | |
| "grad_norm": 0.23026186227798462, | |
| "learning_rate": 9.624129116069694e-05, | |
| "loss": 0.0452, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 12.574626865671641, | |
| "grad_norm": 0.27859947085380554, | |
| "learning_rate": 9.620977972481716e-05, | |
| "loss": 0.0593, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 12.611940298507463, | |
| "grad_norm": 0.23060785233974457, | |
| "learning_rate": 9.617814195316411e-05, | |
| "loss": 0.05, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 12.649253731343283, | |
| "grad_norm": 0.20185025036334991, | |
| "learning_rate": 9.614637793223425e-05, | |
| "loss": 0.0573, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 12.686567164179104, | |
| "grad_norm": 0.3584498167037964, | |
| "learning_rate": 9.611448774886924e-05, | |
| "loss": 0.052, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 12.723880597014926, | |
| "grad_norm": 0.19336827099323273, | |
| "learning_rate": 9.60824714902556e-05, | |
| "loss": 0.0535, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 12.761194029850746, | |
| "grad_norm": 0.22223635017871857, | |
| "learning_rate": 9.605032924392457e-05, | |
| "loss": 0.05, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 12.798507462686567, | |
| "grad_norm": 0.17108851671218872, | |
| "learning_rate": 9.601806109775179e-05, | |
| "loss": 0.0475, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 12.835820895522389, | |
| "grad_norm": 0.3861902952194214, | |
| "learning_rate": 9.598566713995718e-05, | |
| "loss": 0.0439, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 12.873134328358208, | |
| "grad_norm": 0.18927253782749176, | |
| "learning_rate": 9.595314745910456e-05, | |
| "loss": 0.052, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 12.91044776119403, | |
| "grad_norm": 0.21963383257389069, | |
| "learning_rate": 9.59205021441015e-05, | |
| "loss": 0.0504, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 12.947761194029852, | |
| "grad_norm": 0.18016670644283295, | |
| "learning_rate": 9.588773128419906e-05, | |
| "loss": 0.0467, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 12.985074626865671, | |
| "grad_norm": 0.1776365041732788, | |
| "learning_rate": 9.58548349689915e-05, | |
| "loss": 0.0414, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 13.022388059701493, | |
| "grad_norm": 0.2616482973098755, | |
| "learning_rate": 9.582181328841611e-05, | |
| "loss": 0.0442, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 13.059701492537313, | |
| "grad_norm": 0.20341171324253082, | |
| "learning_rate": 9.578866633275288e-05, | |
| "loss": 0.0533, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 13.097014925373134, | |
| "grad_norm": 0.2223699688911438, | |
| "learning_rate": 9.575539419262434e-05, | |
| "loss": 0.0458, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 13.134328358208956, | |
| "grad_norm": 0.22557464241981506, | |
| "learning_rate": 9.572199695899522e-05, | |
| "loss": 0.0445, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 13.171641791044776, | |
| "grad_norm": 0.25104308128356934, | |
| "learning_rate": 9.568847472317232e-05, | |
| "loss": 0.0435, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 13.208955223880597, | |
| "grad_norm": 0.18720711767673492, | |
| "learning_rate": 9.565482757680415e-05, | |
| "loss": 0.0453, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 13.246268656716419, | |
| "grad_norm": 0.16838951408863068, | |
| "learning_rate": 9.562105561188069e-05, | |
| "loss": 0.0505, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 13.283582089552239, | |
| "grad_norm": 0.31681734323501587, | |
| "learning_rate": 9.558715892073323e-05, | |
| "loss": 0.0494, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 13.32089552238806, | |
| "grad_norm": 0.2390700727701187, | |
| "learning_rate": 9.555313759603402e-05, | |
| "loss": 0.0538, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 13.35820895522388, | |
| "grad_norm": 0.20680709183216095, | |
| "learning_rate": 9.551899173079607e-05, | |
| "loss": 0.0519, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 13.395522388059701, | |
| "grad_norm": 0.2758580148220062, | |
| "learning_rate": 9.548472141837286e-05, | |
| "loss": 0.0512, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 13.432835820895523, | |
| "grad_norm": 0.3653097450733185, | |
| "learning_rate": 9.545032675245813e-05, | |
| "loss": 0.0496, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 13.470149253731343, | |
| "grad_norm": 0.23886866867542267, | |
| "learning_rate": 9.541580782708557e-05, | |
| "loss": 0.0455, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 13.507462686567164, | |
| "grad_norm": 0.3280908465385437, | |
| "learning_rate": 9.538116473662861e-05, | |
| "loss": 0.0489, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 13.544776119402986, | |
| "grad_norm": 0.20268180966377258, | |
| "learning_rate": 9.534639757580013e-05, | |
| "loss": 0.0484, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 13.582089552238806, | |
| "grad_norm": 0.2582015097141266, | |
| "learning_rate": 9.531150643965223e-05, | |
| "loss": 0.0487, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 13.619402985074627, | |
| "grad_norm": 0.18157973885536194, | |
| "learning_rate": 9.527649142357596e-05, | |
| "loss": 0.0496, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 13.656716417910447, | |
| "grad_norm": 0.22841542959213257, | |
| "learning_rate": 9.524135262330098e-05, | |
| "loss": 0.0467, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 13.694029850746269, | |
| "grad_norm": 0.2519935369491577, | |
| "learning_rate": 9.520609013489547e-05, | |
| "loss": 0.0487, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 13.73134328358209, | |
| "grad_norm": 0.24680495262145996, | |
| "learning_rate": 9.517070405476575e-05, | |
| "loss": 0.0457, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 13.76865671641791, | |
| "grad_norm": 0.26362067461013794, | |
| "learning_rate": 9.513519447965595e-05, | |
| "loss": 0.0495, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 13.805970149253731, | |
| "grad_norm": 0.3240712583065033, | |
| "learning_rate": 9.509956150664796e-05, | |
| "loss": 0.0496, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 13.843283582089553, | |
| "grad_norm": 0.21009013056755066, | |
| "learning_rate": 9.50638052331609e-05, | |
| "loss": 0.0457, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 13.880597014925373, | |
| "grad_norm": 0.1669154316186905, | |
| "learning_rate": 9.502792575695112e-05, | |
| "loss": 0.0496, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 13.917910447761194, | |
| "grad_norm": 0.22347605228424072, | |
| "learning_rate": 9.499192317611167e-05, | |
| "loss": 0.0426, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 13.955223880597014, | |
| "grad_norm": 0.15208907425403595, | |
| "learning_rate": 9.49557975890723e-05, | |
| "loss": 0.0447, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 13.992537313432836, | |
| "grad_norm": 0.3206101059913635, | |
| "learning_rate": 9.491954909459895e-05, | |
| "loss": 0.0471, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 14.029850746268657, | |
| "grad_norm": 0.15873713791370392, | |
| "learning_rate": 9.488317779179361e-05, | |
| "loss": 0.0401, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 14.067164179104477, | |
| "grad_norm": 0.19690357148647308, | |
| "learning_rate": 9.484668378009408e-05, | |
| "loss": 0.0491, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 14.104477611940299, | |
| "grad_norm": 0.3211113214492798, | |
| "learning_rate": 9.481006715927351e-05, | |
| "loss": 0.049, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 14.14179104477612, | |
| "grad_norm": 0.27657604217529297, | |
| "learning_rate": 9.477332802944044e-05, | |
| "loss": 0.0396, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 14.17910447761194, | |
| "grad_norm": 0.20194031298160553, | |
| "learning_rate": 9.473646649103818e-05, | |
| "loss": 0.0442, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 14.216417910447761, | |
| "grad_norm": 0.20344595611095428, | |
| "learning_rate": 9.46994826448448e-05, | |
| "loss": 0.0427, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 14.253731343283581, | |
| "grad_norm": 0.2067718505859375, | |
| "learning_rate": 9.46623765919727e-05, | |
| "loss": 0.0501, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 14.291044776119403, | |
| "grad_norm": 0.29719170928001404, | |
| "learning_rate": 9.462514843386845e-05, | |
| "loss": 0.0519, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 14.328358208955224, | |
| "grad_norm": 0.2347182184457779, | |
| "learning_rate": 9.458779827231237e-05, | |
| "loss": 0.0413, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 14.365671641791044, | |
| "grad_norm": 0.1558852344751358, | |
| "learning_rate": 9.45503262094184e-05, | |
| "loss": 0.0442, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 14.402985074626866, | |
| "grad_norm": 0.23085005581378937, | |
| "learning_rate": 9.451273234763371e-05, | |
| "loss": 0.047, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 14.440298507462687, | |
| "grad_norm": 0.1515151560306549, | |
| "learning_rate": 9.447501678973852e-05, | |
| "loss": 0.0481, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 14.477611940298507, | |
| "grad_norm": 0.1916729211807251, | |
| "learning_rate": 9.443717963884569e-05, | |
| "loss": 0.0474, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 14.514925373134329, | |
| "grad_norm": 0.2536492943763733, | |
| "learning_rate": 9.439922099840054e-05, | |
| "loss": 0.0382, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 14.552238805970148, | |
| "grad_norm": 0.1672086864709854, | |
| "learning_rate": 9.43611409721806e-05, | |
| "loss": 0.0497, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 14.58955223880597, | |
| "grad_norm": 0.3644237518310547, | |
| "learning_rate": 9.432293966429514e-05, | |
| "loss": 0.0444, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 14.626865671641792, | |
| "grad_norm": 0.20307251811027527, | |
| "learning_rate": 9.428461717918511e-05, | |
| "loss": 0.0452, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 14.664179104477611, | |
| "grad_norm": 0.20441733300685883, | |
| "learning_rate": 9.424617362162271e-05, | |
| "loss": 0.0454, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 14.701492537313433, | |
| "grad_norm": 0.26315611600875854, | |
| "learning_rate": 9.420760909671118e-05, | |
| "loss": 0.0486, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 14.738805970149254, | |
| "grad_norm": 0.1983092874288559, | |
| "learning_rate": 9.416892370988444e-05, | |
| "loss": 0.0483, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 14.776119402985074, | |
| "grad_norm": 0.18301443755626678, | |
| "learning_rate": 9.413011756690685e-05, | |
| "loss": 0.0456, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 14.813432835820896, | |
| "grad_norm": 0.2433597594499588, | |
| "learning_rate": 9.409119077387294e-05, | |
| "loss": 0.0463, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 14.850746268656717, | |
| "grad_norm": 0.27949392795562744, | |
| "learning_rate": 9.405214343720707e-05, | |
| "loss": 0.0412, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 14.888059701492537, | |
| "grad_norm": 0.22806599736213684, | |
| "learning_rate": 9.401297566366318e-05, | |
| "loss": 0.0448, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 14.925373134328359, | |
| "grad_norm": 0.25421562790870667, | |
| "learning_rate": 9.397368756032445e-05, | |
| "loss": 0.0426, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 14.962686567164178, | |
| "grad_norm": 0.2436474859714508, | |
| "learning_rate": 9.393427923460308e-05, | |
| "loss": 0.0474, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.3756405711174011, | |
| "learning_rate": 9.389475079423988e-05, | |
| "loss": 0.0438, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 15.037313432835822, | |
| "grad_norm": 0.25687697529792786, | |
| "learning_rate": 9.385510234730415e-05, | |
| "loss": 0.0435, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 15.074626865671641, | |
| "grad_norm": 0.17263716459274292, | |
| "learning_rate": 9.381533400219318e-05, | |
| "loss": 0.0455, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 15.111940298507463, | |
| "grad_norm": 0.2471216470003128, | |
| "learning_rate": 9.377544586763215e-05, | |
| "loss": 0.0429, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 15.149253731343283, | |
| "grad_norm": 0.20195460319519043, | |
| "learning_rate": 9.373543805267368e-05, | |
| "loss": 0.0432, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 15.186567164179104, | |
| "grad_norm": 0.1709851622581482, | |
| "learning_rate": 9.369531066669758e-05, | |
| "loss": 0.0477, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 15.223880597014926, | |
| "grad_norm": 0.23063932359218597, | |
| "learning_rate": 9.365506381941066e-05, | |
| "loss": 0.0379, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 15.261194029850746, | |
| "grad_norm": 0.3265426754951477, | |
| "learning_rate": 9.36146976208462e-05, | |
| "loss": 0.0435, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 15.298507462686567, | |
| "grad_norm": 0.26373934745788574, | |
| "learning_rate": 9.357421218136386e-05, | |
| "loss": 0.047, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 15.335820895522389, | |
| "grad_norm": 0.16861388087272644, | |
| "learning_rate": 9.353360761164931e-05, | |
| "loss": 0.0448, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 15.373134328358208, | |
| "grad_norm": 0.303790807723999, | |
| "learning_rate": 9.349288402271388e-05, | |
| "loss": 0.0396, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 15.41044776119403, | |
| "grad_norm": 0.1940719038248062, | |
| "learning_rate": 9.345204152589428e-05, | |
| "loss": 0.0474, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 15.447761194029852, | |
| "grad_norm": 0.34091615676879883, | |
| "learning_rate": 9.341108023285238e-05, | |
| "loss": 0.0424, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 15.485074626865671, | |
| "grad_norm": 0.27036693692207336, | |
| "learning_rate": 9.337000025557476e-05, | |
| "loss": 0.0482, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 15.522388059701493, | |
| "grad_norm": 0.16908007860183716, | |
| "learning_rate": 9.332880170637252e-05, | |
| "loss": 0.0381, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 15.559701492537313, | |
| "grad_norm": 0.23332923650741577, | |
| "learning_rate": 9.328748469788093e-05, | |
| "loss": 0.0427, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 15.597014925373134, | |
| "grad_norm": 0.16899706423282623, | |
| "learning_rate": 9.32460493430591e-05, | |
| "loss": 0.0439, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 15.634328358208956, | |
| "grad_norm": 0.12869524955749512, | |
| "learning_rate": 9.320449575518972e-05, | |
| "loss": 0.0481, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 15.671641791044776, | |
| "grad_norm": 0.21159130334854126, | |
| "learning_rate": 9.316282404787871e-05, | |
| "loss": 0.0446, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 15.708955223880597, | |
| "grad_norm": 0.1849961131811142, | |
| "learning_rate": 9.31210343350549e-05, | |
| "loss": 0.041, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 15.746268656716419, | |
| "grad_norm": 0.16107840836048126, | |
| "learning_rate": 9.30791267309698e-05, | |
| "loss": 0.0429, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 15.783582089552239, | |
| "grad_norm": 0.14206446707248688, | |
| "learning_rate": 9.30371013501972e-05, | |
| "loss": 0.0409, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 15.82089552238806, | |
| "grad_norm": 0.2168441116809845, | |
| "learning_rate": 9.299495830763286e-05, | |
| "loss": 0.0413, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 15.85820895522388, | |
| "grad_norm": 0.21431951224803925, | |
| "learning_rate": 9.295269771849427e-05, | |
| "loss": 0.0472, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 15.895522388059701, | |
| "grad_norm": 0.16851255297660828, | |
| "learning_rate": 9.291031969832026e-05, | |
| "loss": 0.0508, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 15.932835820895523, | |
| "grad_norm": 0.18404732644557953, | |
| "learning_rate": 9.286782436297073e-05, | |
| "loss": 0.0402, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 15.970149253731343, | |
| "grad_norm": 0.21722930669784546, | |
| "learning_rate": 9.282521182862629e-05, | |
| "loss": 0.0397, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 16.007462686567163, | |
| "grad_norm": 0.2523709833621979, | |
| "learning_rate": 9.278248221178798e-05, | |
| "loss": 0.0427, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 16.044776119402986, | |
| "grad_norm": 0.17736563086509705, | |
| "learning_rate": 9.273963562927695e-05, | |
| "loss": 0.0458, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 16.082089552238806, | |
| "grad_norm": 0.20613858103752136, | |
| "learning_rate": 9.269667219823412e-05, | |
| "loss": 0.0387, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 16.119402985074625, | |
| "grad_norm": 0.16557513177394867, | |
| "learning_rate": 9.265359203611987e-05, | |
| "loss": 0.0411, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 16.15671641791045, | |
| "grad_norm": 0.28119519352912903, | |
| "learning_rate": 9.261039526071374e-05, | |
| "loss": 0.0468, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 16.19402985074627, | |
| "grad_norm": 0.21538576483726501, | |
| "learning_rate": 9.256708199011401e-05, | |
| "loss": 0.0368, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 16.23134328358209, | |
| "grad_norm": 0.19657357037067413, | |
| "learning_rate": 9.252365234273755e-05, | |
| "loss": 0.038, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 16.26865671641791, | |
| "grad_norm": 0.19258421659469604, | |
| "learning_rate": 9.248010643731935e-05, | |
| "loss": 0.0414, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 16.30597014925373, | |
| "grad_norm": 0.28801625967025757, | |
| "learning_rate": 9.243644439291223e-05, | |
| "loss": 0.0387, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 16.34328358208955, | |
| "grad_norm": 0.16581468284130096, | |
| "learning_rate": 9.239266632888659e-05, | |
| "loss": 0.0383, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 16.380597014925375, | |
| "grad_norm": 0.34664949774742126, | |
| "learning_rate": 9.234877236492997e-05, | |
| "loss": 0.0453, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 16.417910447761194, | |
| "grad_norm": 0.1439947783946991, | |
| "learning_rate": 9.230476262104677e-05, | |
| "loss": 0.0466, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 16.455223880597014, | |
| "grad_norm": 0.15509940683841705, | |
| "learning_rate": 9.226063721755799e-05, | |
| "loss": 0.0488, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 16.492537313432837, | |
| "grad_norm": 0.18005985021591187, | |
| "learning_rate": 9.221639627510076e-05, | |
| "loss": 0.0407, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 16.529850746268657, | |
| "grad_norm": 0.16012470424175262, | |
| "learning_rate": 9.217203991462815e-05, | |
| "loss": 0.0394, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 16.567164179104477, | |
| "grad_norm": 0.2978847920894623, | |
| "learning_rate": 9.212756825740873e-05, | |
| "loss": 0.0451, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 16.604477611940297, | |
| "grad_norm": 0.2236834019422531, | |
| "learning_rate": 9.208298142502636e-05, | |
| "loss": 0.0487, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 16.64179104477612, | |
| "grad_norm": 0.2686060667037964, | |
| "learning_rate": 9.20382795393797e-05, | |
| "loss": 0.0403, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 16.67910447761194, | |
| "grad_norm": 0.33534038066864014, | |
| "learning_rate": 9.199346272268199e-05, | |
| "loss": 0.0385, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 16.71641791044776, | |
| "grad_norm": 0.19250528514385223, | |
| "learning_rate": 9.194853109746074e-05, | |
| "loss": 0.0441, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 16.753731343283583, | |
| "grad_norm": 0.19218407571315765, | |
| "learning_rate": 9.190348478655724e-05, | |
| "loss": 0.0474, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 16.791044776119403, | |
| "grad_norm": 0.21163488924503326, | |
| "learning_rate": 9.185832391312644e-05, | |
| "loss": 0.0411, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 16.828358208955223, | |
| "grad_norm": 0.1758819818496704, | |
| "learning_rate": 9.18130486006364e-05, | |
| "loss": 0.0462, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 16.865671641791046, | |
| "grad_norm": 0.18571069836616516, | |
| "learning_rate": 9.176765897286813e-05, | |
| "loss": 0.0425, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 16.902985074626866, | |
| "grad_norm": 0.20819155871868134, | |
| "learning_rate": 9.17221551539151e-05, | |
| "loss": 0.0428, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 16.940298507462686, | |
| "grad_norm": 0.30357328057289124, | |
| "learning_rate": 9.167653726818305e-05, | |
| "loss": 0.0414, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 16.97761194029851, | |
| "grad_norm": 0.20977462828159332, | |
| "learning_rate": 9.163080544038952e-05, | |
| "loss": 0.0447, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 17.01492537313433, | |
| "grad_norm": 0.2535971701145172, | |
| "learning_rate": 9.158495979556358e-05, | |
| "loss": 0.0384, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 17.05223880597015, | |
| "grad_norm": 0.2789897620677948, | |
| "learning_rate": 9.153900045904549e-05, | |
| "loss": 0.042, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 17.08955223880597, | |
| "grad_norm": 0.18474848568439484, | |
| "learning_rate": 9.14929275564863e-05, | |
| "loss": 0.0398, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 17.12686567164179, | |
| "grad_norm": 0.12615208327770233, | |
| "learning_rate": 9.144674121384757e-05, | |
| "loss": 0.0466, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 17.16417910447761, | |
| "grad_norm": 0.17756640911102295, | |
| "learning_rate": 9.140044155740101e-05, | |
| "loss": 0.035, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 17.20149253731343, | |
| "grad_norm": 0.24410821497440338, | |
| "learning_rate": 9.135402871372808e-05, | |
| "loss": 0.0459, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 17.238805970149254, | |
| "grad_norm": 0.21573011577129364, | |
| "learning_rate": 9.130750280971978e-05, | |
| "loss": 0.0385, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 17.276119402985074, | |
| "grad_norm": 0.13879653811454773, | |
| "learning_rate": 9.126086397257612e-05, | |
| "loss": 0.0391, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 17.313432835820894, | |
| "grad_norm": 0.17508305609226227, | |
| "learning_rate": 9.121411232980588e-05, | |
| "loss": 0.038, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 17.350746268656717, | |
| "grad_norm": 0.2536008358001709, | |
| "learning_rate": 9.116724800922629e-05, | |
| "loss": 0.0418, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 17.388059701492537, | |
| "grad_norm": 0.1942976713180542, | |
| "learning_rate": 9.112027113896262e-05, | |
| "loss": 0.052, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 17.425373134328357, | |
| "grad_norm": 0.16561119258403778, | |
| "learning_rate": 9.107318184744781e-05, | |
| "loss": 0.0451, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 17.46268656716418, | |
| "grad_norm": 0.22971832752227783, | |
| "learning_rate": 9.102598026342222e-05, | |
| "loss": 0.0407, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 0.1306753158569336, | |
| "learning_rate": 9.097866651593317e-05, | |
| "loss": 0.042, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 17.53731343283582, | |
| "grad_norm": 0.21278400719165802, | |
| "learning_rate": 9.093124073433463e-05, | |
| "loss": 0.0458, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 17.574626865671643, | |
| "grad_norm": 0.22757171094417572, | |
| "learning_rate": 9.088370304828685e-05, | |
| "loss": 0.0364, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 17.611940298507463, | |
| "grad_norm": 0.216596320271492, | |
| "learning_rate": 9.083605358775612e-05, | |
| "loss": 0.0434, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 17.649253731343283, | |
| "grad_norm": 0.13022471964359283, | |
| "learning_rate": 9.078829248301417e-05, | |
| "loss": 0.0415, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 17.686567164179106, | |
| "grad_norm": 0.2280716598033905, | |
| "learning_rate": 9.074041986463808e-05, | |
| "loss": 0.0385, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 17.723880597014926, | |
| "grad_norm": 0.14666135609149933, | |
| "learning_rate": 9.069243586350975e-05, | |
| "loss": 0.0347, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 17.761194029850746, | |
| "grad_norm": 0.1631281077861786, | |
| "learning_rate": 9.064434061081562e-05, | |
| "loss": 0.0407, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 17.798507462686565, | |
| "grad_norm": 0.18697327375411987, | |
| "learning_rate": 9.059613423804623e-05, | |
| "loss": 0.0425, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 17.83582089552239, | |
| "grad_norm": 0.12955111265182495, | |
| "learning_rate": 9.0547816876996e-05, | |
| "loss": 0.0417, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 17.87313432835821, | |
| "grad_norm": 0.15547148883342743, | |
| "learning_rate": 9.049938865976275e-05, | |
| "loss": 0.0409, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 17.91044776119403, | |
| "grad_norm": 0.1900598704814911, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.0369, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 17.94776119402985, | |
| "grad_norm": 0.1846715807914734, | |
| "learning_rate": 9.040220018665347e-05, | |
| "loss": 0.0415, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 17.98507462686567, | |
| "grad_norm": 0.1829937845468521, | |
| "learning_rate": 9.035344019648702e-05, | |
| "loss": 0.0407, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 18.02238805970149, | |
| "grad_norm": 0.25900354981422424, | |
| "learning_rate": 9.030456988155596e-05, | |
| "loss": 0.0398, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 18.059701492537314, | |
| "grad_norm": 0.21235992014408112, | |
| "learning_rate": 9.025558937546988e-05, | |
| "loss": 0.0477, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 18.097014925373134, | |
| "grad_norm": 0.18785078823566437, | |
| "learning_rate": 9.020649881213958e-05, | |
| "loss": 0.039, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 18.134328358208954, | |
| "grad_norm": 0.1951548010110855, | |
| "learning_rate": 9.015729832577681e-05, | |
| "loss": 0.0357, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 18.171641791044777, | |
| "grad_norm": 0.1280934363603592, | |
| "learning_rate": 9.010798805089384e-05, | |
| "loss": 0.0425, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 18.208955223880597, | |
| "grad_norm": 0.1693423092365265, | |
| "learning_rate": 9.005856812230304e-05, | |
| "loss": 0.0447, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 18.246268656716417, | |
| "grad_norm": 0.23712658882141113, | |
| "learning_rate": 9.000903867511666e-05, | |
| "loss": 0.042, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 18.28358208955224, | |
| "grad_norm": 0.26489710807800293, | |
| "learning_rate": 8.995939984474624e-05, | |
| "loss": 0.0457, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 18.32089552238806, | |
| "grad_norm": 0.20792756974697113, | |
| "learning_rate": 8.990965176690252e-05, | |
| "loss": 0.0422, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 18.35820895522388, | |
| "grad_norm": 0.18526089191436768, | |
| "learning_rate": 8.98597945775948e-05, | |
| "loss": 0.0366, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 18.395522388059703, | |
| "grad_norm": 0.2214607298374176, | |
| "learning_rate": 8.980982841313074e-05, | |
| "loss": 0.0405, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 18.432835820895523, | |
| "grad_norm": 0.1896953135728836, | |
| "learning_rate": 8.975975341011596e-05, | |
| "loss": 0.0391, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 18.470149253731343, | |
| "grad_norm": 0.1430232971906662, | |
| "learning_rate": 8.970956970545355e-05, | |
| "loss": 0.0403, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 18.507462686567163, | |
| "grad_norm": 0.1991272121667862, | |
| "learning_rate": 8.965927743634391e-05, | |
| "loss": 0.0429, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 18.544776119402986, | |
| "grad_norm": 0.2361849844455719, | |
| "learning_rate": 8.96088767402841e-05, | |
| "loss": 0.0416, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 18.582089552238806, | |
| "grad_norm": 0.25857019424438477, | |
| "learning_rate": 8.955836775506776e-05, | |
| "loss": 0.0461, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 18.619402985074625, | |
| "grad_norm": 0.12873682379722595, | |
| "learning_rate": 8.950775061878453e-05, | |
| "loss": 0.035, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 18.65671641791045, | |
| "grad_norm": 0.19786769151687622, | |
| "learning_rate": 8.945702546981969e-05, | |
| "loss": 0.0399, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 18.69402985074627, | |
| "grad_norm": 0.2562239170074463, | |
| "learning_rate": 8.940619244685388e-05, | |
| "loss": 0.0372, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 18.73134328358209, | |
| "grad_norm": 0.14586858451366425, | |
| "learning_rate": 8.935525168886262e-05, | |
| "loss": 0.0427, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 18.76865671641791, | |
| "grad_norm": 0.20062318444252014, | |
| "learning_rate": 8.930420333511606e-05, | |
| "loss": 0.0403, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 18.80597014925373, | |
| "grad_norm": 0.22698874771595, | |
| "learning_rate": 8.92530475251784e-05, | |
| "loss": 0.036, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 18.84328358208955, | |
| "grad_norm": 0.2103697657585144, | |
| "learning_rate": 8.920178439890765e-05, | |
| "loss": 0.0431, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 18.880597014925375, | |
| "grad_norm": 0.16042308509349823, | |
| "learning_rate": 8.91504140964553e-05, | |
| "loss": 0.0388, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 18.917910447761194, | |
| "grad_norm": 0.16874109208583832, | |
| "learning_rate": 8.909893675826574e-05, | |
| "loss": 0.0388, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 18.955223880597014, | |
| "grad_norm": 0.15569192171096802, | |
| "learning_rate": 8.90473525250761e-05, | |
| "loss": 0.0353, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 18.992537313432837, | |
| "grad_norm": 0.16723507642745972, | |
| "learning_rate": 8.899566153791566e-05, | |
| "loss": 0.0443, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 19.029850746268657, | |
| "grad_norm": 0.23284228146076202, | |
| "learning_rate": 8.894386393810563e-05, | |
| "loss": 0.05, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 19.067164179104477, | |
| "grad_norm": 0.1621718853712082, | |
| "learning_rate": 8.889195986725865e-05, | |
| "loss": 0.0369, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 19.104477611940297, | |
| "grad_norm": 0.17522747814655304, | |
| "learning_rate": 8.883994946727849e-05, | |
| "loss": 0.0475, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 19.14179104477612, | |
| "grad_norm": 0.16110533475875854, | |
| "learning_rate": 8.878783288035957e-05, | |
| "loss": 0.0383, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 19.17910447761194, | |
| "grad_norm": 0.2574177086353302, | |
| "learning_rate": 8.873561024898668e-05, | |
| "loss": 0.0383, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 19.21641791044776, | |
| "grad_norm": 0.14560100436210632, | |
| "learning_rate": 8.868328171593448e-05, | |
| "loss": 0.037, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 19.253731343283583, | |
| "grad_norm": 0.14456631243228912, | |
| "learning_rate": 8.863084742426719e-05, | |
| "loss": 0.0423, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 19.291044776119403, | |
| "grad_norm": 0.1403595507144928, | |
| "learning_rate": 8.857830751733815e-05, | |
| "loss": 0.0327, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 19.328358208955223, | |
| "grad_norm": 0.18462564051151276, | |
| "learning_rate": 8.852566213878947e-05, | |
| "loss": 0.037, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 19.365671641791046, | |
| "grad_norm": 0.20725117623806, | |
| "learning_rate": 8.84729114325516e-05, | |
| "loss": 0.0376, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 19.402985074626866, | |
| "grad_norm": 0.17023132741451263, | |
| "learning_rate": 8.842005554284296e-05, | |
| "loss": 0.0467, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 19.440298507462686, | |
| "grad_norm": 0.31033241748809814, | |
| "learning_rate": 8.836709461416952e-05, | |
| "loss": 0.0425, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 19.47761194029851, | |
| "grad_norm": 0.14057482779026031, | |
| "learning_rate": 8.831402879132446e-05, | |
| "loss": 0.0432, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 19.51492537313433, | |
| "grad_norm": 0.23247437179088593, | |
| "learning_rate": 8.82608582193877e-05, | |
| "loss": 0.0396, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 19.55223880597015, | |
| "grad_norm": 0.1305907964706421, | |
| "learning_rate": 8.820758304372557e-05, | |
| "loss": 0.0389, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 19.58955223880597, | |
| "grad_norm": 0.17093417048454285, | |
| "learning_rate": 8.815420340999033e-05, | |
| "loss": 0.0347, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 19.62686567164179, | |
| "grad_norm": 0.24105240404605865, | |
| "learning_rate": 8.810071946411989e-05, | |
| "loss": 0.0392, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 19.66417910447761, | |
| "grad_norm": 0.2234315127134323, | |
| "learning_rate": 8.804713135233731e-05, | |
| "loss": 0.0403, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 19.701492537313435, | |
| "grad_norm": 0.16947844624519348, | |
| "learning_rate": 8.799343922115044e-05, | |
| "loss": 0.0368, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 19.738805970149254, | |
| "grad_norm": 0.26133742928504944, | |
| "learning_rate": 8.79396432173515e-05, | |
| "loss": 0.041, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 19.776119402985074, | |
| "grad_norm": 0.2099352777004242, | |
| "learning_rate": 8.788574348801675e-05, | |
| "loss": 0.0363, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 19.813432835820894, | |
| "grad_norm": 0.1662513017654419, | |
| "learning_rate": 8.783174018050594e-05, | |
| "loss": 0.0409, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 19.850746268656717, | |
| "grad_norm": 0.18933714926242828, | |
| "learning_rate": 8.77776334424621e-05, | |
| "loss": 0.0348, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 19.888059701492537, | |
| "grad_norm": 0.21673552691936493, | |
| "learning_rate": 8.772342342181095e-05, | |
| "loss": 0.037, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 19.925373134328357, | |
| "grad_norm": 0.13009892404079437, | |
| "learning_rate": 8.766911026676064e-05, | |
| "loss": 0.0386, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 19.96268656716418, | |
| "grad_norm": 0.1655230075120926, | |
| "learning_rate": 8.761469412580125e-05, | |
| "loss": 0.0404, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.2821272611618042, | |
| "learning_rate": 8.756017514770443e-05, | |
| "loss": 0.0441, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 20.03731343283582, | |
| "grad_norm": 0.1302652508020401, | |
| "learning_rate": 8.750555348152298e-05, | |
| "loss": 0.0389, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 20.074626865671643, | |
| "grad_norm": 0.13331563770771027, | |
| "learning_rate": 8.745082927659047e-05, | |
| "loss": 0.0393, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 20.111940298507463, | |
| "grad_norm": 0.244130939245224, | |
| "learning_rate": 8.739600268252078e-05, | |
| "loss": 0.0372, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 20.149253731343283, | |
| "grad_norm": 0.20429308712482452, | |
| "learning_rate": 8.73410738492077e-05, | |
| "loss": 0.0387, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 20.186567164179106, | |
| "grad_norm": 0.2954719364643097, | |
| "learning_rate": 8.728604292682459e-05, | |
| "loss": 0.0404, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 20.223880597014926, | |
| "grad_norm": 0.20438429713249207, | |
| "learning_rate": 8.723091006582389e-05, | |
| "loss": 0.0359, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 20.261194029850746, | |
| "grad_norm": 0.17289331555366516, | |
| "learning_rate": 8.717567541693673e-05, | |
| "loss": 0.0357, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 20.298507462686565, | |
| "grad_norm": 0.24367138743400574, | |
| "learning_rate": 8.71203391311725e-05, | |
| "loss": 0.0392, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 20.33582089552239, | |
| "grad_norm": 0.21900270879268646, | |
| "learning_rate": 8.706490135981855e-05, | |
| "loss": 0.0419, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 20.37313432835821, | |
| "grad_norm": 0.1526443362236023, | |
| "learning_rate": 8.700936225443959e-05, | |
| "loss": 0.0333, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 20.41044776119403, | |
| "grad_norm": 0.24582353234291077, | |
| "learning_rate": 8.695372196687743e-05, | |
| "loss": 0.0417, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 20.44776119402985, | |
| "grad_norm": 0.21462485194206238, | |
| "learning_rate": 8.689798064925049e-05, | |
| "loss": 0.0347, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 20.48507462686567, | |
| "grad_norm": 0.17611616849899292, | |
| "learning_rate": 8.684213845395339e-05, | |
| "loss": 0.0395, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 20.52238805970149, | |
| "grad_norm": 0.19724012911319733, | |
| "learning_rate": 8.678619553365659e-05, | |
| "loss": 0.0332, | |
| "step": 5500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 75, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.134508587240192e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |