| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.992771084337349, | |
| "eval_steps": 500, | |
| "global_step": 775, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00642570281124498, | |
| "grad_norm": 0.8317294716835022, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.1978, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01285140562248996, | |
| "grad_norm": 0.94012850522995, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.2672, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01927710843373494, | |
| "grad_norm": 1.2400332689285278, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.2964, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02570281124497992, | |
| "grad_norm": 2.0498180389404297, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.3573, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0321285140562249, | |
| "grad_norm": 1.034347653388977, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.2913, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03855421686746988, | |
| "grad_norm": 0.6081845164299011, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2519, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04497991967871486, | |
| "grad_norm": 0.4193064272403717, | |
| "learning_rate": 5.833333333333334e-05, | |
| "loss": 0.2426, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05140562248995984, | |
| "grad_norm": 0.50704026222229, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.2293, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05783132530120482, | |
| "grad_norm": 0.3707605302333832, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.2189, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0642570281124498, | |
| "grad_norm": 0.34638485312461853, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 0.1798, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07068273092369477, | |
| "grad_norm": 0.4543774425983429, | |
| "learning_rate": 9.166666666666667e-05, | |
| "loss": 0.1842, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.07710843373493977, | |
| "grad_norm": 0.3193999230861664, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2133, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.08353413654618475, | |
| "grad_norm": 0.3274695575237274, | |
| "learning_rate": 0.00010833333333333333, | |
| "loss": 0.2086, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.08995983935742972, | |
| "grad_norm": 0.32100680470466614, | |
| "learning_rate": 0.00011666666666666668, | |
| "loss": 0.1988, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0963855421686747, | |
| "grad_norm": 0.3277706205844879, | |
| "learning_rate": 0.000125, | |
| "loss": 0.1865, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.10281124497991968, | |
| "grad_norm": 0.2264498621225357, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.1605, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.10923694779116466, | |
| "grad_norm": 0.3071700930595398, | |
| "learning_rate": 0.00014166666666666668, | |
| "loss": 0.1535, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.11566265060240964, | |
| "grad_norm": 0.3147311508655548, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.1637, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.12208835341365462, | |
| "grad_norm": 0.32233041524887085, | |
| "learning_rate": 0.00015833333333333332, | |
| "loss": 0.17, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.1285140562248996, | |
| "grad_norm": 0.2847141921520233, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 0.1432, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13493975903614458, | |
| "grad_norm": 0.45303934812545776, | |
| "learning_rate": 0.000175, | |
| "loss": 0.1595, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.14136546184738955, | |
| "grad_norm": 0.32803109288215637, | |
| "learning_rate": 0.00018333333333333334, | |
| "loss": 0.1263, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.14779116465863454, | |
| "grad_norm": 0.7632677555084229, | |
| "learning_rate": 0.00019166666666666667, | |
| "loss": 0.162, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.15421686746987953, | |
| "grad_norm": 7.622311115264893, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1743, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1606425702811245, | |
| "grad_norm": 0.41840752959251404, | |
| "learning_rate": 0.00019999912503789813, | |
| "loss": 0.1967, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1670682730923695, | |
| "grad_norm": 0.29047325253486633, | |
| "learning_rate": 0.00019999650016690364, | |
| "loss": 0.1196, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.17349397590361446, | |
| "grad_norm": 0.2337496131658554, | |
| "learning_rate": 0.0001999921254329498, | |
| "loss": 0.1488, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.17991967871485945, | |
| "grad_norm": 0.2089911699295044, | |
| "learning_rate": 0.00019998600091259113, | |
| "loss": 0.138, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.18634538152610441, | |
| "grad_norm": 0.268136590719223, | |
| "learning_rate": 0.00019997812671300214, | |
| "loss": 0.1845, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.1927710843373494, | |
| "grad_norm": 0.2347370982170105, | |
| "learning_rate": 0.0001999685029719753, | |
| "loss": 0.1257, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.19919678714859437, | |
| "grad_norm": 0.21996308863162994, | |
| "learning_rate": 0.0001999571298579188, | |
| "loss": 0.171, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.20562248995983937, | |
| "grad_norm": 0.1974944919347763, | |
| "learning_rate": 0.0001999440075698535, | |
| "loss": 0.1095, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.21204819277108433, | |
| "grad_norm": 0.15095502138137817, | |
| "learning_rate": 0.00019992913633740957, | |
| "loss": 0.1663, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.21847389558232932, | |
| "grad_norm": 0.2080863118171692, | |
| "learning_rate": 0.0001999125164208222, | |
| "loss": 0.141, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2248995983935743, | |
| "grad_norm": 0.2128421813249588, | |
| "learning_rate": 0.0001998941481109274, | |
| "loss": 0.2076, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.23132530120481928, | |
| "grad_norm": 0.2011524736881256, | |
| "learning_rate": 0.00019987403172915666, | |
| "loss": 0.1419, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.23775100401606425, | |
| "grad_norm": 0.16084055602550507, | |
| "learning_rate": 0.00019985216762753139, | |
| "loss": 0.1357, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.24417670682730924, | |
| "grad_norm": 0.1661912202835083, | |
| "learning_rate": 0.0001998285561886568, | |
| "loss": 0.1471, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.25060240963855424, | |
| "grad_norm": 0.2506616413593292, | |
| "learning_rate": 0.00019980319782571523, | |
| "loss": 0.1555, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.2570281124497992, | |
| "grad_norm": 0.17315006256103516, | |
| "learning_rate": 0.00019977609298245873, | |
| "loss": 0.1468, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.26345381526104417, | |
| "grad_norm": 0.1772138923406601, | |
| "learning_rate": 0.00019974724213320157, | |
| "loss": 0.1447, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.26987951807228916, | |
| "grad_norm": 0.2274760603904724, | |
| "learning_rate": 0.00019971664578281173, | |
| "loss": 0.1707, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.27630522088353415, | |
| "grad_norm": 0.16768603026866913, | |
| "learning_rate": 0.00019968430446670212, | |
| "loss": 0.147, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2827309236947791, | |
| "grad_norm": 0.1941104382276535, | |
| "learning_rate": 0.0001996502187508213, | |
| "loss": 0.1415, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.2891566265060241, | |
| "grad_norm": 0.17718106508255005, | |
| "learning_rate": 0.00019961438923164345, | |
| "loss": 0.1297, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2955823293172691, | |
| "grad_norm": 0.17948807775974274, | |
| "learning_rate": 0.00019957681653615797, | |
| "loss": 0.1349, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.30200803212851407, | |
| "grad_norm": 0.19423460960388184, | |
| "learning_rate": 0.0001995375013218586, | |
| "loss": 0.1277, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.30843373493975906, | |
| "grad_norm": 0.2893676161766052, | |
| "learning_rate": 0.00019949644427673177, | |
| "loss": 0.1485, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.314859437751004, | |
| "grad_norm": 0.4430103600025177, | |
| "learning_rate": 0.00019945364611924463, | |
| "loss": 0.1306, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.321285140562249, | |
| "grad_norm": 0.28305917978286743, | |
| "learning_rate": 0.0001994091075983325, | |
| "loss": 0.1646, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.327710843373494, | |
| "grad_norm": 0.10388786345720291, | |
| "learning_rate": 0.00019936282949338578, | |
| "loss": 0.097, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.334136546184739, | |
| "grad_norm": 0.13306961953639984, | |
| "learning_rate": 0.00019931481261423618, | |
| "loss": 0.1222, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3405622489959839, | |
| "grad_norm": 0.1919727623462677, | |
| "learning_rate": 0.00019926505780114276, | |
| "loss": 0.1566, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3469879518072289, | |
| "grad_norm": 0.21380549669265747, | |
| "learning_rate": 0.0001992135659247769, | |
| "loss": 0.1404, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3534136546184739, | |
| "grad_norm": 0.1510586440563202, | |
| "learning_rate": 0.00019916033788620755, | |
| "loss": 0.1453, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3598393574297189, | |
| "grad_norm": 0.11387544125318527, | |
| "learning_rate": 0.000199105374616885, | |
| "loss": 0.1261, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.36626506024096384, | |
| "grad_norm": 0.15470993518829346, | |
| "learning_rate": 0.00019904867707862476, | |
| "loss": 0.163, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.37269076305220883, | |
| "grad_norm": 0.18749335408210754, | |
| "learning_rate": 0.0001989902462635908, | |
| "loss": 0.1452, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3791164658634538, | |
| "grad_norm": 0.13796862959861755, | |
| "learning_rate": 0.00019893008319427812, | |
| "loss": 0.1257, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3855421686746988, | |
| "grad_norm": 0.18501056730747223, | |
| "learning_rate": 0.00019886818892349482, | |
| "loss": 0.1143, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.39196787148594375, | |
| "grad_norm": 0.18443071842193604, | |
| "learning_rate": 0.00019880456453434369, | |
| "loss": 0.1395, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.39839357429718875, | |
| "grad_norm": 0.1594623327255249, | |
| "learning_rate": 0.00019873921114020333, | |
| "loss": 0.1505, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.40481927710843374, | |
| "grad_norm": 0.1715545505285263, | |
| "learning_rate": 0.00019867212988470864, | |
| "loss": 0.115, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.41124497991967873, | |
| "grad_norm": 0.24589896202087402, | |
| "learning_rate": 0.0001986033219417307, | |
| "loss": 0.1549, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.41767068273092367, | |
| "grad_norm": 0.1842864602804184, | |
| "learning_rate": 0.00019853278851535638, | |
| "loss": 0.1511, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.42409638554216866, | |
| "grad_norm": 0.20570969581604004, | |
| "learning_rate": 0.00019846053083986717, | |
| "loss": 0.168, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.43052208835341366, | |
| "grad_norm": 0.1519116312265396, | |
| "learning_rate": 0.00019838655017971767, | |
| "loss": 0.1142, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.43694779116465865, | |
| "grad_norm": 0.2530803978443146, | |
| "learning_rate": 0.00019831084782951326, | |
| "loss": 0.1359, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.4433734939759036, | |
| "grad_norm": 0.2451787292957306, | |
| "learning_rate": 0.00019823342511398776, | |
| "loss": 0.1257, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.4497991967871486, | |
| "grad_norm": 0.43833062052726746, | |
| "learning_rate": 0.00019815428338798002, | |
| "loss": 0.1275, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4562248995983936, | |
| "grad_norm": 0.1903715431690216, | |
| "learning_rate": 0.0001980734240364102, | |
| "loss": 0.1357, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.46265060240963857, | |
| "grad_norm": 0.28559988737106323, | |
| "learning_rate": 0.00019799084847425572, | |
| "loss": 0.1312, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.46907630522088356, | |
| "grad_norm": 0.2496558576822281, | |
| "learning_rate": 0.0001979065581465263, | |
| "loss": 0.1633, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4755020080321285, | |
| "grad_norm": 0.2327835112810135, | |
| "learning_rate": 0.00019782055452823878, | |
| "loss": 0.1442, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.4819277108433735, | |
| "grad_norm": 0.3205198645591736, | |
| "learning_rate": 0.00019773283912439133, | |
| "loss": 0.1511, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4883534136546185, | |
| "grad_norm": 0.1274174302816391, | |
| "learning_rate": 0.00019764341346993698, | |
| "loss": 0.0996, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4947791164658635, | |
| "grad_norm": 0.2272380292415619, | |
| "learning_rate": 0.00019755227912975697, | |
| "loss": 0.1176, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.5012048192771085, | |
| "grad_norm": 0.13094107806682587, | |
| "learning_rate": 0.0001974594376986331, | |
| "loss": 0.1184, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.5076305220883535, | |
| "grad_norm": 0.13428834080696106, | |
| "learning_rate": 0.00019736489080122006, | |
| "loss": 0.1309, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.5140562248995983, | |
| "grad_norm": 0.1373153030872345, | |
| "learning_rate": 0.00019726864009201694, | |
| "loss": 0.1376, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5204819277108433, | |
| "grad_norm": 0.1309944987297058, | |
| "learning_rate": 0.00019717068725533818, | |
| "loss": 0.1403, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5269076305220883, | |
| "grad_norm": 0.14153365790843964, | |
| "learning_rate": 0.00019707103400528415, | |
| "loss": 0.1399, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.1331794112920761, | |
| "learning_rate": 0.0001969696820857112, | |
| "loss": 0.1455, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.5397590361445783, | |
| "grad_norm": 0.12772125005722046, | |
| "learning_rate": 0.0001968666332702011, | |
| "loss": 0.1409, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.5461847389558233, | |
| "grad_norm": 0.13976705074310303, | |
| "learning_rate": 0.00019676188936203006, | |
| "loss": 0.1144, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5526104417670683, | |
| "grad_norm": 0.1431870311498642, | |
| "learning_rate": 0.00019665545219413701, | |
| "loss": 0.148, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5590361445783133, | |
| "grad_norm": 0.1257038563489914, | |
| "learning_rate": 0.00019654732362909177, | |
| "loss": 0.1197, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.5654618473895582, | |
| "grad_norm": 0.16741393506526947, | |
| "learning_rate": 0.00019643750555906224, | |
| "loss": 0.1563, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5718875502008032, | |
| "grad_norm": 0.16155458986759186, | |
| "learning_rate": 0.00019632599990578143, | |
| "loss": 0.1333, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5783132530120482, | |
| "grad_norm": 0.17337974905967712, | |
| "learning_rate": 0.00019621280862051373, | |
| "loss": 0.1669, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5847389558232932, | |
| "grad_norm": 0.17315532267093658, | |
| "learning_rate": 0.00019609793368402086, | |
| "loss": 0.1488, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5911646586345382, | |
| "grad_norm": 0.15965646505355835, | |
| "learning_rate": 0.0001959813771065271, | |
| "loss": 0.1207, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5975903614457831, | |
| "grad_norm": 0.15546628832817078, | |
| "learning_rate": 0.00019586314092768424, | |
| "loss": 0.1147, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.6040160642570281, | |
| "grad_norm": 0.16691721975803375, | |
| "learning_rate": 0.00019574322721653583, | |
| "loss": 0.1172, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.6104417670682731, | |
| "grad_norm": 0.1729833036661148, | |
| "learning_rate": 0.00019562163807148084, | |
| "loss": 0.12, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6168674698795181, | |
| "grad_norm": 0.17864178121089935, | |
| "learning_rate": 0.0001954983756202372, | |
| "loss": 0.1266, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.623293172690763, | |
| "grad_norm": 0.20421630144119263, | |
| "learning_rate": 0.0001953734420198044, | |
| "loss": 0.1531, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.629718875502008, | |
| "grad_norm": 0.19765256345272064, | |
| "learning_rate": 0.0001952468394564257, | |
| "loss": 0.1134, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.636144578313253, | |
| "grad_norm": 0.197422593832016, | |
| "learning_rate": 0.00019511857014555, | |
| "loss": 0.1292, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.642570281124498, | |
| "grad_norm": 0.2465924769639969, | |
| "learning_rate": 0.00019498863633179308, | |
| "loss": 0.1426, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.648995983935743, | |
| "grad_norm": 0.136220321059227, | |
| "learning_rate": 0.00019485704028889813, | |
| "loss": 0.0881, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.655421686746988, | |
| "grad_norm": 0.14770826697349548, | |
| "learning_rate": 0.0001947237843196962, | |
| "loss": 0.125, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.661847389558233, | |
| "grad_norm": 0.13019175827503204, | |
| "learning_rate": 0.0001945888707560657, | |
| "loss": 0.1271, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.668273092369478, | |
| "grad_norm": 0.10536068677902222, | |
| "learning_rate": 0.0001944523019588918, | |
| "loss": 0.107, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.6746987951807228, | |
| "grad_norm": 0.10180668532848358, | |
| "learning_rate": 0.00019431408031802486, | |
| "loss": 0.1145, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6811244979919678, | |
| "grad_norm": 0.14559617638587952, | |
| "learning_rate": 0.00019417420825223891, | |
| "loss": 0.1395, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.6875502008032128, | |
| "grad_norm": 0.13509546220302582, | |
| "learning_rate": 0.000194032688209189, | |
| "loss": 0.1478, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6939759036144578, | |
| "grad_norm": 0.13227735459804535, | |
| "learning_rate": 0.00019388952266536868, | |
| "loss": 0.1445, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.7004016064257028, | |
| "grad_norm": 0.16162370145320892, | |
| "learning_rate": 0.00019374471412606642, | |
| "loss": 0.1246, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.7068273092369478, | |
| "grad_norm": 0.1407587081193924, | |
| "learning_rate": 0.00019359826512532194, | |
| "loss": 0.1421, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7132530120481928, | |
| "grad_norm": 0.15528196096420288, | |
| "learning_rate": 0.00019345017822588168, | |
| "loss": 0.1629, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.7196787148594378, | |
| "grad_norm": 0.1608172059059143, | |
| "learning_rate": 0.0001933004560191542, | |
| "loss": 0.1538, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.7261044176706827, | |
| "grad_norm": 0.15321175754070282, | |
| "learning_rate": 0.00019314910112516463, | |
| "loss": 0.1251, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.7325301204819277, | |
| "grad_norm": 0.17383867502212524, | |
| "learning_rate": 0.00019299611619250881, | |
| "loss": 0.1531, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.7389558232931727, | |
| "grad_norm": 0.18434979021549225, | |
| "learning_rate": 0.00019284150389830721, | |
| "loss": 0.1847, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7453815261044177, | |
| "grad_norm": 0.16240356862545013, | |
| "learning_rate": 0.00019268526694815773, | |
| "loss": 0.1712, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.7518072289156627, | |
| "grad_norm": 0.17521648108959198, | |
| "learning_rate": 0.0001925274080760886, | |
| "loss": 0.1222, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.7582329317269076, | |
| "grad_norm": 0.16100138425827026, | |
| "learning_rate": 0.00019236793004451044, | |
| "loss": 0.1238, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.7646586345381526, | |
| "grad_norm": 0.16682398319244385, | |
| "learning_rate": 0.00019220683564416787, | |
| "loss": 0.0914, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.7710843373493976, | |
| "grad_norm": 0.15211397409439087, | |
| "learning_rate": 0.00019204412769409086, | |
| "loss": 0.1051, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7775100401606426, | |
| "grad_norm": 0.19107018411159515, | |
| "learning_rate": 0.00019187980904154515, | |
| "loss": 0.1532, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.7839357429718875, | |
| "grad_norm": 0.18667763471603394, | |
| "learning_rate": 0.00019171388256198268, | |
| "loss": 0.1435, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.7903614457831325, | |
| "grad_norm": 0.1942739635705948, | |
| "learning_rate": 0.000191546351158991, | |
| "loss": 0.1137, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.7967871485943775, | |
| "grad_norm": 0.23028349876403809, | |
| "learning_rate": 0.00019137721776424274, | |
| "loss": 0.1293, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.8032128514056225, | |
| "grad_norm": 0.25495702028274536, | |
| "learning_rate": 0.0001912064853374441, | |
| "loss": 0.1441, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8096385542168675, | |
| "grad_norm": 0.10432910919189453, | |
| "learning_rate": 0.0001910341568662831, | |
| "loss": 0.0831, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.8160642570281125, | |
| "grad_norm": 0.11154992133378983, | |
| "learning_rate": 0.00019086023536637737, | |
| "loss": 0.1183, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.8224899598393575, | |
| "grad_norm": 0.13584573566913605, | |
| "learning_rate": 0.0001906847238812214, | |
| "loss": 0.1441, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.8289156626506025, | |
| "grad_norm": 0.35521605610847473, | |
| "learning_rate": 0.0001905076254821331, | |
| "loss": 0.1368, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.8353413654618473, | |
| "grad_norm": 0.14569194614887238, | |
| "learning_rate": 0.00019032894326820023, | |
| "loss": 0.1285, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8417670682730923, | |
| "grad_norm": 0.16236892342567444, | |
| "learning_rate": 0.0001901486803662261, | |
| "loss": 0.1578, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.8481927710843373, | |
| "grad_norm": 0.11677072197198868, | |
| "learning_rate": 0.00018996683993067483, | |
| "loss": 0.1183, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.8546184738955823, | |
| "grad_norm": 0.1277882605791092, | |
| "learning_rate": 0.00018978342514361626, | |
| "loss": 0.1196, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.8610441767068273, | |
| "grad_norm": 0.12904071807861328, | |
| "learning_rate": 0.00018959843921467014, | |
| "loss": 0.1281, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.8674698795180723, | |
| "grad_norm": 0.13108272850513458, | |
| "learning_rate": 0.00018941188538094999, | |
| "loss": 0.1187, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8738955823293173, | |
| "grad_norm": 0.15289278328418732, | |
| "learning_rate": 0.0001892237669070065, | |
| "loss": 0.1524, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.8803212851405623, | |
| "grad_norm": 0.13197748363018036, | |
| "learning_rate": 0.0001890340870847704, | |
| "loss": 0.1104, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.8867469879518072, | |
| "grad_norm": 0.15141014754772186, | |
| "learning_rate": 0.00018884284923349477, | |
| "loss": 0.154, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.8931726907630522, | |
| "grad_norm": 0.1232500970363617, | |
| "learning_rate": 0.00018865005669969708, | |
| "loss": 0.102, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.8995983935742972, | |
| "grad_norm": 0.16395455598831177, | |
| "learning_rate": 0.00018845571285710058, | |
| "loss": 0.145, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9060240963855422, | |
| "grad_norm": 0.15895424783229828, | |
| "learning_rate": 0.00018825982110657515, | |
| "loss": 0.1268, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.9124497991967871, | |
| "grad_norm": 0.15639305114746094, | |
| "learning_rate": 0.00018806238487607794, | |
| "loss": 0.126, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.9188755020080321, | |
| "grad_norm": 0.14362278580665588, | |
| "learning_rate": 0.0001878634076205934, | |
| "loss": 0.0981, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.9253012048192771, | |
| "grad_norm": 0.1624501496553421, | |
| "learning_rate": 0.00018766289282207263, | |
| "loss": 0.1208, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.9317269076305221, | |
| "grad_norm": 0.1643369346857071, | |
| "learning_rate": 0.00018746084398937266, | |
| "loss": 0.1088, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9381526104417671, | |
| "grad_norm": 0.19890688359737396, | |
| "learning_rate": 0.00018725726465819488, | |
| "loss": 0.1476, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.944578313253012, | |
| "grad_norm": 0.16708028316497803, | |
| "learning_rate": 0.00018705215839102328, | |
| "loss": 0.1175, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.951004016064257, | |
| "grad_norm": 0.20685526728630066, | |
| "learning_rate": 0.0001868455287770621, | |
| "loss": 0.1573, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.957429718875502, | |
| "grad_norm": 0.19720108807086945, | |
| "learning_rate": 0.00018663737943217296, | |
| "loss": 0.137, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.963855421686747, | |
| "grad_norm": 0.2381121814250946, | |
| "learning_rate": 0.00018642771399881162, | |
| "loss": 0.156, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.970281124497992, | |
| "grad_norm": 0.13865579664707184, | |
| "learning_rate": 0.00018621653614596425, | |
| "loss": 0.1229, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.976706827309237, | |
| "grad_norm": 0.10851379483938217, | |
| "learning_rate": 0.00018600384956908323, | |
| "loss": 0.1088, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.983132530120482, | |
| "grad_norm": 0.1621655523777008, | |
| "learning_rate": 0.00018578965799002236, | |
| "loss": 0.1479, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.989558232931727, | |
| "grad_norm": 0.18607285618782043, | |
| "learning_rate": 0.00018557396515697202, | |
| "loss": 0.1489, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.9959839357429718, | |
| "grad_norm": 0.19177676737308502, | |
| "learning_rate": 0.0001853567748443933, | |
| "loss": 0.1163, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.0056224899598394, | |
| "grad_norm": 0.6790018677711487, | |
| "learning_rate": 0.000185138090852952, | |
| "loss": 0.2256, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.0120481927710843, | |
| "grad_norm": 0.10716850310564041, | |
| "learning_rate": 0.0001849179170094522, | |
| "loss": 0.094, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.0184738955823294, | |
| "grad_norm": 0.11798243969678879, | |
| "learning_rate": 0.00018469625716676933, | |
| "loss": 0.1108, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.0248995983935743, | |
| "grad_norm": 0.13069161772727966, | |
| "learning_rate": 0.00018447311520378262, | |
| "loss": 0.1041, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.0313253012048194, | |
| "grad_norm": 0.19986286759376526, | |
| "learning_rate": 0.0001842484950253073, | |
| "loss": 0.125, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0377510040160642, | |
| "grad_norm": 0.16085122525691986, | |
| "learning_rate": 0.00018402240056202614, | |
| "loss": 0.1025, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.0441767068273093, | |
| "grad_norm": 0.20288337767124176, | |
| "learning_rate": 0.00018379483577042103, | |
| "loss": 0.1328, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.0506024096385542, | |
| "grad_norm": 0.15977489948272705, | |
| "learning_rate": 0.00018356580463270322, | |
| "loss": 0.0985, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.057028112449799, | |
| "grad_norm": 0.2264052927494049, | |
| "learning_rate": 0.00018333531115674408, | |
| "loss": 0.0931, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.0634538152610442, | |
| "grad_norm": 0.18668119609355927, | |
| "learning_rate": 0.0001831033593760047, | |
| "loss": 0.0777, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.069879518072289, | |
| "grad_norm": 0.2187710851430893, | |
| "learning_rate": 0.00018286995334946545, | |
| "loss": 0.1076, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.0763052208835342, | |
| "grad_norm": 0.19872407615184784, | |
| "learning_rate": 0.0001826350971615549, | |
| "loss": 0.1008, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.082730923694779, | |
| "grad_norm": 0.23164619505405426, | |
| "learning_rate": 0.00018239879492207831, | |
| "loss": 0.1104, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.0891566265060242, | |
| "grad_norm": 0.20669420063495636, | |
| "learning_rate": 0.00018216105076614576, | |
| "loss": 0.1042, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.095582329317269, | |
| "grad_norm": 0.23208123445510864, | |
| "learning_rate": 0.00018192186885409973, | |
| "loss": 0.1156, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.1020080321285142, | |
| "grad_norm": 0.25448471307754517, | |
| "learning_rate": 0.0001816812533714425, | |
| "loss": 0.1322, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.108433734939759, | |
| "grad_norm": 0.18578830361366272, | |
| "learning_rate": 0.00018143920852876257, | |
| "loss": 0.078, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.114859437751004, | |
| "grad_norm": 0.21140921115875244, | |
| "learning_rate": 0.0001811957385616612, | |
| "loss": 0.1078, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.121285140562249, | |
| "grad_norm": 0.24159879982471466, | |
| "learning_rate": 0.0001809508477306783, | |
| "loss": 0.098, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.127710843373494, | |
| "grad_norm": 0.2108435034751892, | |
| "learning_rate": 0.00018070454032121787, | |
| "loss": 0.085, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.134136546184739, | |
| "grad_norm": 0.25270572304725647, | |
| "learning_rate": 0.00018045682064347275, | |
| "loss": 0.0984, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.140562248995984, | |
| "grad_norm": 0.2605237066745758, | |
| "learning_rate": 0.00018020769303234962, | |
| "loss": 0.1125, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.146987951807229, | |
| "grad_norm": 0.2768741846084595, | |
| "learning_rate": 0.00017995716184739284, | |
| "loss": 0.0868, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.153413654618474, | |
| "grad_norm": 0.3283689320087433, | |
| "learning_rate": 0.00017970523147270822, | |
| "loss": 0.0932, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.159839357429719, | |
| "grad_norm": 0.2760496735572815, | |
| "learning_rate": 0.0001794519063168864, | |
| "loss": 0.0702, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1662650602409639, | |
| "grad_norm": 0.1508658230304718, | |
| "learning_rate": 0.0001791971908129256, | |
| "loss": 0.1086, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.1726907630522088, | |
| "grad_norm": 0.1741812825202942, | |
| "learning_rate": 0.000178941089418154, | |
| "loss": 0.1102, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.1791164658634539, | |
| "grad_norm": 0.18406537175178528, | |
| "learning_rate": 0.000178683606614152, | |
| "loss": 0.1185, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.1855421686746987, | |
| "grad_norm": 0.18714162707328796, | |
| "learning_rate": 0.00017842474690667344, | |
| "loss": 0.1078, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.1919678714859439, | |
| "grad_norm": 0.15225981175899506, | |
| "learning_rate": 0.00017816451482556702, | |
| "loss": 0.0808, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.1983935742971887, | |
| "grad_norm": 0.1622186154127121, | |
| "learning_rate": 0.0001779029149246969, | |
| "loss": 0.097, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.2048192771084336, | |
| "grad_norm": 0.17352862656116486, | |
| "learning_rate": 0.00017763995178186307, | |
| "loss": 0.1094, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.2112449799196787, | |
| "grad_norm": 0.14557699859142303, | |
| "learning_rate": 0.00017737562999872118, | |
| "loss": 0.1031, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.2176706827309236, | |
| "grad_norm": 0.1729564219713211, | |
| "learning_rate": 0.00017710995420070215, | |
| "loss": 0.1109, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.2240963855421687, | |
| "grad_norm": 0.17331069707870483, | |
| "learning_rate": 0.00017684292903693102, | |
| "loss": 0.1163, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.2305220883534136, | |
| "grad_norm": 0.1967068910598755, | |
| "learning_rate": 0.0001765745591801458, | |
| "loss": 0.1137, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.2369477911646587, | |
| "grad_norm": 0.20813412964344025, | |
| "learning_rate": 0.00017630484932661559, | |
| "loss": 0.0865, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.2433734939759036, | |
| "grad_norm": 0.17115503549575806, | |
| "learning_rate": 0.0001760338041960583, | |
| "loss": 0.0954, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.2497991967871487, | |
| "grad_norm": 0.2135663777589798, | |
| "learning_rate": 0.00017576142853155838, | |
| "loss": 0.099, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.2562248995983936, | |
| "grad_norm": 0.2796885669231415, | |
| "learning_rate": 0.00017548772709948343, | |
| "loss": 0.1166, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2626506024096384, | |
| "grad_norm": 0.2290525585412979, | |
| "learning_rate": 0.0001752127046894011, | |
| "loss": 0.1018, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.2690763052208835, | |
| "grad_norm": 0.23698222637176514, | |
| "learning_rate": 0.0001749363661139951, | |
| "loss": 0.0871, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.2755020080321284, | |
| "grad_norm": 0.2161116749048233, | |
| "learning_rate": 0.00017465871620898102, | |
| "loss": 0.0819, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.2819277108433735, | |
| "grad_norm": 0.2709653377532959, | |
| "learning_rate": 0.00017437975983302178, | |
| "loss": 0.082, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.2883534136546184, | |
| "grad_norm": 0.2437043935060501, | |
| "learning_rate": 0.0001740995018676425, | |
| "loss": 0.07, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2947791164658635, | |
| "grad_norm": 0.24623267352581024, | |
| "learning_rate": 0.0001738179472171452, | |
| "loss": 0.0868, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.3012048192771084, | |
| "grad_norm": 0.27310508489608765, | |
| "learning_rate": 0.00017353510080852282, | |
| "loss": 0.0857, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.3076305220883535, | |
| "grad_norm": 0.2532103657722473, | |
| "learning_rate": 0.0001732509675913731, | |
| "loss": 0.0885, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.3140562248995984, | |
| "grad_norm": 0.2618705928325653, | |
| "learning_rate": 0.000172965552537812, | |
| "loss": 0.0903, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.3204819277108433, | |
| "grad_norm": 0.3039279282093048, | |
| "learning_rate": 0.00017267886064238662, | |
| "loss": 0.0963, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.3269076305220884, | |
| "grad_norm": 0.16821685433387756, | |
| "learning_rate": 0.00017239089692198785, | |
| "loss": 0.0837, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.275329053401947, | |
| "learning_rate": 0.0001721016664157625, | |
| "loss": 0.1015, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.3397590361445784, | |
| "grad_norm": 0.1983174830675125, | |
| "learning_rate": 0.00017181117418502525, | |
| "loss": 0.1156, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.3461847389558232, | |
| "grad_norm": 0.19256579875946045, | |
| "learning_rate": 0.00017151942531316988, | |
| "loss": 0.1055, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.3526104417670683, | |
| "grad_norm": 0.19830577075481415, | |
| "learning_rate": 0.00017122642490558055, | |
| "loss": 0.1142, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.3590361445783132, | |
| "grad_norm": 0.17073017358779907, | |
| "learning_rate": 0.00017093217808954232, | |
| "loss": 0.1305, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.3654618473895583, | |
| "grad_norm": 0.18915396928787231, | |
| "learning_rate": 0.00017063669001415145, | |
| "loss": 0.1147, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.3718875502008032, | |
| "grad_norm": 0.13788312673568726, | |
| "learning_rate": 0.00017033996585022528, | |
| "loss": 0.1056, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.378313253012048, | |
| "grad_norm": 0.203065887093544, | |
| "learning_rate": 0.00017004201079021176, | |
| "loss": 0.1355, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.3847389558232932, | |
| "grad_norm": 0.2112981230020523, | |
| "learning_rate": 0.00016974283004809858, | |
| "loss": 0.1215, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.391164658634538, | |
| "grad_norm": 0.19515666365623474, | |
| "learning_rate": 0.00016944242885932206, | |
| "loss": 0.135, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.3975903614457832, | |
| "grad_norm": 0.19761696457862854, | |
| "learning_rate": 0.0001691408124806752, | |
| "loss": 0.125, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.404016064257028, | |
| "grad_norm": 0.18920212984085083, | |
| "learning_rate": 0.00016883798619021608, | |
| "loss": 0.0967, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.410441767068273, | |
| "grad_norm": 0.1732681393623352, | |
| "learning_rate": 0.0001685339552871752, | |
| "loss": 0.0984, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.416867469879518, | |
| "grad_norm": 0.20118467509746552, | |
| "learning_rate": 0.00016822872509186297, | |
| "loss": 0.0871, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.4232931726907632, | |
| "grad_norm": 0.24401867389678955, | |
| "learning_rate": 0.0001679223009455764, | |
| "loss": 0.0971, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.429718875502008, | |
| "grad_norm": 0.22608117759227753, | |
| "learning_rate": 0.00016761468821050585, | |
| "loss": 0.0996, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.436144578313253, | |
| "grad_norm": 0.19186720252037048, | |
| "learning_rate": 0.00016730589226964098, | |
| "loss": 0.0757, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.442570281124498, | |
| "grad_norm": 0.24773664772510529, | |
| "learning_rate": 0.00016699591852667673, | |
| "loss": 0.0819, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.448995983935743, | |
| "grad_norm": 0.2296506017446518, | |
| "learning_rate": 0.00016668477240591864, | |
| "loss": 0.0967, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.455421686746988, | |
| "grad_norm": 0.23210635781288147, | |
| "learning_rate": 0.00016637245935218799, | |
| "loss": 0.089, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.461847389558233, | |
| "grad_norm": 0.22376962006092072, | |
| "learning_rate": 0.00016605898483072648, | |
| "loss": 0.0839, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.4682730923694778, | |
| "grad_norm": 0.23127049207687378, | |
| "learning_rate": 0.00016574435432710068, | |
| "loss": 0.0827, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.4746987951807229, | |
| "grad_norm": 0.31701013445854187, | |
| "learning_rate": 0.0001654285733471059, | |
| "loss": 0.1, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.481124497991968, | |
| "grad_norm": 0.3070242702960968, | |
| "learning_rate": 0.0001651116474166699, | |
| "loss": 0.1036, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4875502008032129, | |
| "grad_norm": 0.18072882294654846, | |
| "learning_rate": 0.00016479358208175627, | |
| "loss": 0.1061, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.4939759036144578, | |
| "grad_norm": 0.14309802651405334, | |
| "learning_rate": 0.00016447438290826733, | |
| "loss": 0.092, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.5004016064257029, | |
| "grad_norm": 0.18977715075016022, | |
| "learning_rate": 0.00016415405548194663, | |
| "loss": 0.1152, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.5068273092369477, | |
| "grad_norm": 0.22865934669971466, | |
| "learning_rate": 0.00016383260540828135, | |
| "loss": 0.116, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.5132530120481928, | |
| "grad_norm": 0.2020760327577591, | |
| "learning_rate": 0.00016351003831240415, | |
| "loss": 0.112, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.5196787148594377, | |
| "grad_norm": 0.25912925601005554, | |
| "learning_rate": 0.00016318635983899465, | |
| "loss": 0.1282, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.5261044176706826, | |
| "grad_norm": 0.1735217273235321, | |
| "learning_rate": 0.0001628615756521809, | |
| "loss": 0.1034, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.5325301204819277, | |
| "grad_norm": 0.19721132516860962, | |
| "learning_rate": 0.0001625356914354399, | |
| "loss": 0.1338, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.5389558232931728, | |
| "grad_norm": 0.19484449923038483, | |
| "learning_rate": 0.0001622087128914985, | |
| "loss": 0.1214, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.5453815261044177, | |
| "grad_norm": 0.16421280801296234, | |
| "learning_rate": 0.00016188064574223335, | |
| "loss": 0.0866, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5518072289156626, | |
| "grad_norm": 0.1922108381986618, | |
| "learning_rate": 0.0001615514957285709, | |
| "loss": 0.1298, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.5582329317269075, | |
| "grad_norm": 0.16495804488658905, | |
| "learning_rate": 0.00016122126861038688, | |
| "loss": 0.1056, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.5646586345381526, | |
| "grad_norm": 0.2061115801334381, | |
| "learning_rate": 0.00016088997016640562, | |
| "loss": 0.1008, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.5710843373493977, | |
| "grad_norm": 0.21605950593948364, | |
| "learning_rate": 0.00016055760619409877, | |
| "loss": 0.099, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.5775100401606426, | |
| "grad_norm": 0.21308393776416779, | |
| "learning_rate": 0.00016022418250958385, | |
| "loss": 0.1041, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.5839357429718874, | |
| "grad_norm": 0.30087393522262573, | |
| "learning_rate": 0.00015988970494752272, | |
| "loss": 0.1192, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.5903614457831325, | |
| "grad_norm": 0.22396108508110046, | |
| "learning_rate": 0.00015955417936101913, | |
| "loss": 0.0985, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.5967871485943776, | |
| "grad_norm": 0.27335667610168457, | |
| "learning_rate": 0.00015921761162151653, | |
| "loss": 0.0809, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.6032128514056225, | |
| "grad_norm": 0.19635237753391266, | |
| "learning_rate": 0.00015888000761869528, | |
| "loss": 0.074, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.6096385542168674, | |
| "grad_norm": 0.24129214882850647, | |
| "learning_rate": 0.0001585413732603695, | |
| "loss": 0.0948, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.6160642570281123, | |
| "grad_norm": 0.26676061749458313, | |
| "learning_rate": 0.00015820171447238383, | |
| "loss": 0.1169, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.6224899598393574, | |
| "grad_norm": 0.23809854686260223, | |
| "learning_rate": 0.0001578610371985096, | |
| "loss": 0.0916, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.6289156626506025, | |
| "grad_norm": 0.21806567907333374, | |
| "learning_rate": 0.00015751934740034092, | |
| "loss": 0.0897, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.6353413654618474, | |
| "grad_norm": 0.2490801066160202, | |
| "learning_rate": 0.00015717665105719015, | |
| "loss": 0.1021, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.6417670682730923, | |
| "grad_norm": 0.32763025164604187, | |
| "learning_rate": 0.00015683295416598367, | |
| "loss": 0.0981, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.6481927710843374, | |
| "grad_norm": 0.15386274456977844, | |
| "learning_rate": 0.00015648826274115653, | |
| "loss": 0.0735, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.6546184738955825, | |
| "grad_norm": 0.16144217550754547, | |
| "learning_rate": 0.00015614258281454734, | |
| "loss": 0.1047, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.6610441767068274, | |
| "grad_norm": 0.16441850364208221, | |
| "learning_rate": 0.00015579592043529292, | |
| "loss": 0.1014, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.6674698795180722, | |
| "grad_norm": 0.2219092845916748, | |
| "learning_rate": 0.00015544828166972203, | |
| "loss": 0.1492, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.6738955823293171, | |
| "grad_norm": 0.2408316433429718, | |
| "learning_rate": 0.00015509967260124964, | |
| "loss": 0.1373, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6803212851405622, | |
| "grad_norm": 0.16629627346992493, | |
| "learning_rate": 0.0001547500993302702, | |
| "loss": 0.1024, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.6867469879518073, | |
| "grad_norm": 0.17428399622440338, | |
| "learning_rate": 0.000154399567974051, | |
| "loss": 0.1071, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.6931726907630522, | |
| "grad_norm": 0.20689523220062256, | |
| "learning_rate": 0.00015404808466662508, | |
| "loss": 0.1164, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.699598393574297, | |
| "grad_norm": 0.19431588053703308, | |
| "learning_rate": 0.0001536956555586839, | |
| "loss": 0.1095, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.7060240963855422, | |
| "grad_norm": 0.28836753964424133, | |
| "learning_rate": 0.0001533422868174697, | |
| "loss": 0.0958, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.7124497991967873, | |
| "grad_norm": 0.16699343919754028, | |
| "learning_rate": 0.00015298798462666765, | |
| "loss": 0.1017, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.7188755020080322, | |
| "grad_norm": 0.20274591445922852, | |
| "learning_rate": 0.00015263275518629754, | |
| "loss": 0.1082, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.725301204819277, | |
| "grad_norm": 0.20032569766044617, | |
| "learning_rate": 0.00015227660471260528, | |
| "loss": 0.1201, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.731726907630522, | |
| "grad_norm": 0.19980573654174805, | |
| "learning_rate": 0.00015191953943795427, | |
| "loss": 0.1072, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.738152610441767, | |
| "grad_norm": 0.2036619335412979, | |
| "learning_rate": 0.00015156156561071612, | |
| "loss": 0.1083, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.7445783132530122, | |
| "grad_norm": 0.17177674174308777, | |
| "learning_rate": 0.0001512026894951615, | |
| "loss": 0.0981, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.751004016064257, | |
| "grad_norm": 0.2405836135149002, | |
| "learning_rate": 0.00015084291737135048, | |
| "loss": 0.1005, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.757429718875502, | |
| "grad_norm": 0.20927219092845917, | |
| "learning_rate": 0.00015048225553502244, | |
| "loss": 0.0895, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.763855421686747, | |
| "grad_norm": 0.22314170002937317, | |
| "learning_rate": 0.00015012071029748614, | |
| "loss": 0.0874, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.7702811244979921, | |
| "grad_norm": 0.21546539664268494, | |
| "learning_rate": 0.00014975828798550933, | |
| "loss": 0.0765, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.776706827309237, | |
| "grad_norm": 0.27791547775268555, | |
| "learning_rate": 0.00014939499494120761, | |
| "loss": 0.0851, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.783132530120482, | |
| "grad_norm": 0.23379187285900116, | |
| "learning_rate": 0.00014903083752193397, | |
| "loss": 0.1173, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.7895582329317268, | |
| "grad_norm": 0.30948150157928467, | |
| "learning_rate": 0.0001486658221001672, | |
| "loss": 0.0994, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.7959839357429719, | |
| "grad_norm": 0.31466349959373474, | |
| "learning_rate": 0.0001482999550634006, | |
| "loss": 0.1006, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.802409638554217, | |
| "grad_norm": 0.29642632603645325, | |
| "learning_rate": 0.0001479332428140299, | |
| "loss": 0.0992, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.8088353413654619, | |
| "grad_norm": 0.155403733253479, | |
| "learning_rate": 0.00014756569176924153, | |
| "loss": 0.081, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.8152610441767068, | |
| "grad_norm": 0.15768149495124817, | |
| "learning_rate": 0.0001471973083609002, | |
| "loss": 0.0994, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.8216867469879519, | |
| "grad_norm": 0.21333357691764832, | |
| "learning_rate": 0.00014682809903543632, | |
| "loss": 0.0975, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.8281124497991967, | |
| "grad_norm": 0.1593853384256363, | |
| "learning_rate": 0.00014645807025373328, | |
| "loss": 0.1053, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.8345381526104418, | |
| "grad_norm": 0.2197875678539276, | |
| "learning_rate": 0.0001460872284910143, | |
| "loss": 0.1231, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.8409638554216867, | |
| "grad_norm": 0.1849106401205063, | |
| "learning_rate": 0.000145715580236729, | |
| "loss": 0.1369, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.8473895582329316, | |
| "grad_norm": 0.19566693902015686, | |
| "learning_rate": 0.00014534313199444031, | |
| "loss": 0.1229, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.8538152610441767, | |
| "grad_norm": 0.1796487420797348, | |
| "learning_rate": 0.00014496989028171012, | |
| "loss": 0.1046, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.8602409638554218, | |
| "grad_norm": 0.18316736817359924, | |
| "learning_rate": 0.00014459586162998545, | |
| "loss": 0.1128, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 0.11896166950464249, | |
| "learning_rate": 0.00014422105258448425, | |
| "loss": 0.0722, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.8730923694779116, | |
| "grad_norm": 0.17587630450725555, | |
| "learning_rate": 0.00014384546970408067, | |
| "loss": 0.1201, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.8795180722891565, | |
| "grad_norm": 0.22650708258152008, | |
| "learning_rate": 0.0001434691195611905, | |
| "loss": 0.1196, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.8859437751004016, | |
| "grad_norm": 0.17509253323078156, | |
| "learning_rate": 0.000143092008741656, | |
| "loss": 0.1129, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.8923694779116467, | |
| "grad_norm": 0.29918667674064636, | |
| "learning_rate": 0.00014271414384463063, | |
| "loss": 0.1159, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.8987951807228916, | |
| "grad_norm": 0.20377112925052643, | |
| "learning_rate": 0.00014233553148246364, | |
| "loss": 0.1046, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.9052208835341364, | |
| "grad_norm": 0.20923396944999695, | |
| "learning_rate": 0.00014195617828058446, | |
| "loss": 0.1094, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.9116465863453815, | |
| "grad_norm": 0.20258182287216187, | |
| "learning_rate": 0.00014157609087738656, | |
| "loss": 0.095, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.9180722891566266, | |
| "grad_norm": 0.17875652015209198, | |
| "learning_rate": 0.00014119527592411146, | |
| "loss": 0.0937, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.9244979919678715, | |
| "grad_norm": 0.21565286815166473, | |
| "learning_rate": 0.00014081374008473213, | |
| "loss": 0.0915, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.9309236947791164, | |
| "grad_norm": 0.2412794530391693, | |
| "learning_rate": 0.0001404314900358366, | |
| "loss": 0.087, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.9373493975903613, | |
| "grad_norm": 0.20525000989437103, | |
| "learning_rate": 0.00014004853246651092, | |
| "loss": 0.1062, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.9437751004016064, | |
| "grad_norm": 0.20360495150089264, | |
| "learning_rate": 0.0001396648740782224, | |
| "loss": 0.0909, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.9502008032128515, | |
| "grad_norm": 0.2990632653236389, | |
| "learning_rate": 0.000139280521584702, | |
| "loss": 0.0891, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.9566265060240964, | |
| "grad_norm": 0.2693440020084381, | |
| "learning_rate": 0.00013889548171182702, | |
| "loss": 0.0964, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.9630522088353413, | |
| "grad_norm": 0.35366424918174744, | |
| "learning_rate": 0.0001385097611975034, | |
| "loss": 0.1023, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.9694779116465864, | |
| "grad_norm": 0.189253568649292, | |
| "learning_rate": 0.00013812336679154777, | |
| "loss": 0.0928, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.9759036144578315, | |
| "grad_norm": 0.20141035318374634, | |
| "learning_rate": 0.0001377363052555693, | |
| "loss": 0.1159, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.9823293172690764, | |
| "grad_norm": 0.29336804151535034, | |
| "learning_rate": 0.00013734858336285162, | |
| "loss": 0.1123, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.9887550200803212, | |
| "grad_norm": 0.2803572714328766, | |
| "learning_rate": 0.00013696020789823388, | |
| "loss": 0.1217, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.9951807228915661, | |
| "grad_norm": 0.2023596316576004, | |
| "learning_rate": 0.00013657118565799236, | |
| "loss": 0.081, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.004819277108434, | |
| "grad_norm": 0.6364520192146301, | |
| "learning_rate": 0.00013618152344972142, | |
| "loss": 0.2296, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.0112449799196788, | |
| "grad_norm": 0.15013103187084198, | |
| "learning_rate": 0.00013579122809221432, | |
| "loss": 0.0985, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.0176706827309236, | |
| "grad_norm": 0.16968320310115814, | |
| "learning_rate": 0.00013540030641534404, | |
| "loss": 0.1061, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.0240963855421685, | |
| "grad_norm": 0.11979459226131439, | |
| "learning_rate": 0.00013500876525994354, | |
| "loss": 0.0778, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.030522088353414, | |
| "grad_norm": 0.1250924915075302, | |
| "learning_rate": 0.00013461661147768633, | |
| "loss": 0.076, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.0369477911646587, | |
| "grad_norm": 0.14744716882705688, | |
| "learning_rate": 0.00013422385193096636, | |
| "loss": 0.1088, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.0433734939759036, | |
| "grad_norm": 0.14878836274147034, | |
| "learning_rate": 0.000133830493492778, | |
| "loss": 0.0886, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.0497991967871485, | |
| "grad_norm": 0.14586399495601654, | |
| "learning_rate": 0.00013343654304659574, | |
| "loss": 0.0737, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.0562248995983934, | |
| "grad_norm": 0.26601502299308777, | |
| "learning_rate": 0.00013304200748625377, | |
| "loss": 0.1376, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.0626506024096387, | |
| "grad_norm": 0.13429085910320282, | |
| "learning_rate": 0.0001326468937158254, | |
| "loss": 0.0653, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.0690763052208836, | |
| "grad_norm": 0.15061058104038239, | |
| "learning_rate": 0.00013225120864950217, | |
| "loss": 0.0832, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.0755020080321285, | |
| "grad_norm": 0.19543641805648804, | |
| "learning_rate": 0.00013185495921147272, | |
| "loss": 0.0904, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.0819277108433734, | |
| "grad_norm": 0.19916664063930511, | |
| "learning_rate": 0.00013145815233580192, | |
| "loss": 0.1002, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.0883534136546187, | |
| "grad_norm": 0.18353912234306335, | |
| "learning_rate": 0.00013106079496630937, | |
| "loss": 0.065, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.0947791164658636, | |
| "grad_norm": 0.22987468540668488, | |
| "learning_rate": 0.00013066289405644778, | |
| "loss": 0.0889, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.1012048192771084, | |
| "grad_norm": 0.18574562668800354, | |
| "learning_rate": 0.00013026445656918155, | |
| "loss": 0.0771, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.1076305220883533, | |
| "grad_norm": 0.2045065313577652, | |
| "learning_rate": 0.00012986548947686467, | |
| "loss": 0.0761, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.114056224899598, | |
| "grad_norm": 0.20475462079048157, | |
| "learning_rate": 0.00012946599976111883, | |
| "loss": 0.0603, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.1204819277108435, | |
| "grad_norm": 0.2551652491092682, | |
| "learning_rate": 0.0001290659944127113, | |
| "loss": 0.0619, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.1269076305220884, | |
| "grad_norm": 0.24741911888122559, | |
| "learning_rate": 0.0001286654804314325, | |
| "loss": 0.0685, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 0.3083299696445465, | |
| "learning_rate": 0.0001282644648259735, | |
| "loss": 0.073, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.139759036144578, | |
| "grad_norm": 0.26823994517326355, | |
| "learning_rate": 0.00012786295461380344, | |
| "loss": 0.0743, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.1461847389558235, | |
| "grad_norm": 0.267733633518219, | |
| "learning_rate": 0.00012746095682104669, | |
| "loss": 0.0914, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.1526104417670684, | |
| "grad_norm": 0.5392053723335266, | |
| "learning_rate": 0.00012705847848235995, | |
| "loss": 0.0627, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.1590361445783133, | |
| "grad_norm": 0.2896229922771454, | |
| "learning_rate": 0.00012665552664080907, | |
| "loss": 0.0777, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.165461847389558, | |
| "grad_norm": 0.145288348197937, | |
| "learning_rate": 0.00012625210834774585, | |
| "loss": 0.0673, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.171887550200803, | |
| "grad_norm": 0.20815478265285492, | |
| "learning_rate": 0.00012584823066268466, | |
| "loss": 0.0887, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.1783132530120484, | |
| "grad_norm": 0.15457971394062042, | |
| "learning_rate": 0.00012544390065317887, | |
| "loss": 0.0806, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.1847389558232932, | |
| "grad_norm": 0.1739414930343628, | |
| "learning_rate": 0.00012503912539469714, | |
| "loss": 0.0904, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.191164658634538, | |
| "grad_norm": 0.19280481338500977, | |
| "learning_rate": 0.00012463391197049977, | |
| "loss": 0.0936, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.197590361445783, | |
| "grad_norm": 0.19487957656383514, | |
| "learning_rate": 0.00012422826747151444, | |
| "loss": 0.0711, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.2040160642570283, | |
| "grad_norm": 0.1922217458486557, | |
| "learning_rate": 0.00012382219899621246, | |
| "loss": 0.0937, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.2104417670682732, | |
| "grad_norm": 0.19978083670139313, | |
| "learning_rate": 0.00012341571365048442, | |
| "loss": 0.09, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.216867469879518, | |
| "grad_norm": 0.17337800562381744, | |
| "learning_rate": 0.00012300881854751568, | |
| "loss": 0.0849, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.223293172690763, | |
| "grad_norm": 0.16611528396606445, | |
| "learning_rate": 0.0001226015208076622, | |
| "loss": 0.0695, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.229718875502008, | |
| "grad_norm": 0.19705650210380554, | |
| "learning_rate": 0.0001221938275583257, | |
| "loss": 0.0924, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.236144578313253, | |
| "grad_norm": 0.2103363275527954, | |
| "learning_rate": 0.00012178574593382899, | |
| "loss": 0.0971, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.242570281124498, | |
| "grad_norm": 0.17318083345890045, | |
| "learning_rate": 0.0001213772830752912, | |
| "loss": 0.0764, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.248995983935743, | |
| "grad_norm": 0.18497149646282196, | |
| "learning_rate": 0.0001209684461305028, | |
| "loss": 0.0775, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.255421686746988, | |
| "grad_norm": 0.16112066805362701, | |
| "learning_rate": 0.00012055924225380038, | |
| "loss": 0.066, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.261847389558233, | |
| "grad_norm": 0.14638184010982513, | |
| "learning_rate": 0.00012014967860594164, | |
| "loss": 0.0667, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.268273092369478, | |
| "grad_norm": 0.23443520069122314, | |
| "learning_rate": 0.00011973976235398, | |
| "loss": 0.0746, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.274698795180723, | |
| "grad_norm": 0.15776869654655457, | |
| "learning_rate": 0.0001193295006711392, | |
| "loss": 0.0576, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.281124497991968, | |
| "grad_norm": 0.1940746307373047, | |
| "learning_rate": 0.00011891890073668763, | |
| "loss": 0.0614, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.2875502008032127, | |
| "grad_norm": 0.14611606299877167, | |
| "learning_rate": 0.00011850796973581302, | |
| "loss": 0.057, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.293975903614458, | |
| "grad_norm": 0.2916417419910431, | |
| "learning_rate": 0.00011809671485949636, | |
| "loss": 0.0677, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.300401606425703, | |
| "grad_norm": 0.2484523206949234, | |
| "learning_rate": 0.00011768514330438627, | |
| "loss": 0.0846, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.306827309236948, | |
| "grad_norm": 0.18685579299926758, | |
| "learning_rate": 0.00011727326227267308, | |
| "loss": 0.0682, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.3132530120481927, | |
| "grad_norm": 0.23231656849384308, | |
| "learning_rate": 0.00011686107897196255, | |
| "loss": 0.0782, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.319678714859438, | |
| "grad_norm": 0.2503097653388977, | |
| "learning_rate": 0.00011644860061515008, | |
| "loss": 0.0745, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.326104417670683, | |
| "grad_norm": 0.15438750386238098, | |
| "learning_rate": 0.00011603583442029426, | |
| "loss": 0.0599, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.3325301204819278, | |
| "grad_norm": 0.11933010071516037, | |
| "learning_rate": 0.00011562278761049066, | |
| "loss": 0.0705, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.3389558232931726, | |
| "grad_norm": 0.2344401627779007, | |
| "learning_rate": 0.00011520946741374534, | |
| "loss": 0.1086, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.3453815261044175, | |
| "grad_norm": 0.1429268717765808, | |
| "learning_rate": 0.00011479588106284848, | |
| "loss": 0.0793, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.3518072289156624, | |
| "grad_norm": 0.170423224568367, | |
| "learning_rate": 0.00011438203579524778, | |
| "loss": 0.0876, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.3582329317269077, | |
| "grad_norm": 0.21951356530189514, | |
| "learning_rate": 0.00011396793885292165, | |
| "loss": 0.1001, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.3646586345381526, | |
| "grad_norm": 0.18042345345020294, | |
| "learning_rate": 0.00011355359748225279, | |
| "loss": 0.096, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.3710843373493975, | |
| "grad_norm": 0.19519764184951782, | |
| "learning_rate": 0.00011313901893390113, | |
| "loss": 0.0842, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.3775100401606424, | |
| "grad_norm": 0.1564580202102661, | |
| "learning_rate": 0.00011272421046267696, | |
| "loss": 0.0849, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.3839357429718877, | |
| "grad_norm": 0.21376530826091766, | |
| "learning_rate": 0.00011230917932741418, | |
| "loss": 0.0848, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.3903614457831326, | |
| "grad_norm": 0.16924604773521423, | |
| "learning_rate": 0.00011189393279084308, | |
| "loss": 0.0986, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.3967871485943775, | |
| "grad_norm": 0.142182394862175, | |
| "learning_rate": 0.00011147847811946328, | |
| "loss": 0.0753, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.4032128514056224, | |
| "grad_norm": 0.2380589097738266, | |
| "learning_rate": 0.00011106282258341665, | |
| "loss": 0.0873, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.4096385542168672, | |
| "grad_norm": 0.17887993156909943, | |
| "learning_rate": 0.00011064697345636002, | |
| "loss": 0.0721, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.4160642570281126, | |
| "grad_norm": 0.16593624651432037, | |
| "learning_rate": 0.00011023093801533785, | |
| "loss": 0.0673, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.4224899598393574, | |
| "grad_norm": 0.19967305660247803, | |
| "learning_rate": 0.00010981472354065514, | |
| "loss": 0.0839, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.4289156626506023, | |
| "grad_norm": 0.1671656221151352, | |
| "learning_rate": 0.00010939833731574967, | |
| "loss": 0.0692, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.435341365461847, | |
| "grad_norm": 0.2287617176771164, | |
| "learning_rate": 0.00010898178662706471, | |
| "loss": 0.0841, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.4417670682730925, | |
| "grad_norm": 0.1811789572238922, | |
| "learning_rate": 0.00010856507876392166, | |
| "loss": 0.0549, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.4481927710843374, | |
| "grad_norm": 0.1921742856502533, | |
| "learning_rate": 0.00010814822101839224, | |
| "loss": 0.0723, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.4546184738955823, | |
| "grad_norm": 0.24973797798156738, | |
| "learning_rate": 0.00010773122068517103, | |
| "loss": 0.0746, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.461044176706827, | |
| "grad_norm": 0.22716690599918365, | |
| "learning_rate": 0.00010731408506144782, | |
| "loss": 0.0837, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.467469879518072, | |
| "grad_norm": 0.19027042388916016, | |
| "learning_rate": 0.00010689682144677983, | |
| "loss": 0.0575, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.4738955823293174, | |
| "grad_norm": 0.26334255933761597, | |
| "learning_rate": 0.00010647943714296405, | |
| "loss": 0.0688, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.4803212851405623, | |
| "grad_norm": 0.26130348443984985, | |
| "learning_rate": 0.00010606193945390943, | |
| "loss": 0.0704, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.486746987951807, | |
| "grad_norm": 0.16175444424152374, | |
| "learning_rate": 0.00010564433568550909, | |
| "loss": 0.0739, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.493172690763052, | |
| "grad_norm": 0.34543898701667786, | |
| "learning_rate": 0.00010522663314551247, | |
| "loss": 0.0883, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.4995983935742974, | |
| "grad_norm": 0.16623060405254364, | |
| "learning_rate": 0.00010480883914339736, | |
| "loss": 0.0916, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.5060240963855422, | |
| "grad_norm": 0.15224424004554749, | |
| "learning_rate": 0.0001043909609902422, | |
| "loss": 0.1017, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.512449799196787, | |
| "grad_norm": 0.2146722823381424, | |
| "learning_rate": 0.00010397300599859785, | |
| "loss": 0.0699, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.518875502008032, | |
| "grad_norm": 0.18753445148468018, | |
| "learning_rate": 0.00010355498148235996, | |
| "loss": 0.1012, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.525301204819277, | |
| "grad_norm": 0.19780860841274261, | |
| "learning_rate": 0.00010313689475664063, | |
| "loss": 0.0876, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.531726907630522, | |
| "grad_norm": 0.15880204737186432, | |
| "learning_rate": 0.0001027187531376407, | |
| "loss": 0.0978, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.538152610441767, | |
| "grad_norm": 0.20312370359897614, | |
| "learning_rate": 0.00010230056394252161, | |
| "loss": 0.0978, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.544578313253012, | |
| "grad_norm": 0.17712520062923431, | |
| "learning_rate": 0.00010188233448927724, | |
| "loss": 0.0632, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.551004016064257, | |
| "grad_norm": 0.1579594612121582, | |
| "learning_rate": 0.00010146407209660607, | |
| "loss": 0.0868, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.557429718875502, | |
| "grad_norm": 0.18369610607624054, | |
| "learning_rate": 0.00010104578408378289, | |
| "loss": 0.0721, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.563855421686747, | |
| "grad_norm": 0.1889200061559677, | |
| "learning_rate": 0.00010062747777053094, | |
| "loss": 0.079, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.570281124497992, | |
| "grad_norm": 0.1784103661775589, | |
| "learning_rate": 0.00010020916047689358, | |
| "loss": 0.0703, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.576706827309237, | |
| "grad_norm": 0.1942092776298523, | |
| "learning_rate": 9.979083952310643e-05, | |
| "loss": 0.0851, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.5831325301204817, | |
| "grad_norm": 0.17258763313293457, | |
| "learning_rate": 9.937252222946908e-05, | |
| "loss": 0.0632, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.589558232931727, | |
| "grad_norm": 0.1579107642173767, | |
| "learning_rate": 9.895421591621712e-05, | |
| "loss": 0.0568, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.595983935742972, | |
| "grad_norm": 0.16073255240917206, | |
| "learning_rate": 9.853592790339396e-05, | |
| "loss": 0.0571, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.602409638554217, | |
| "grad_norm": 0.19859455525875092, | |
| "learning_rate": 9.811766551072278e-05, | |
| "loss": 0.0695, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.6088353413654617, | |
| "grad_norm": 0.20779484510421753, | |
| "learning_rate": 9.769943605747844e-05, | |
| "loss": 0.0679, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.615261044176707, | |
| "grad_norm": 0.1808435469865799, | |
| "learning_rate": 9.72812468623593e-05, | |
| "loss": 0.0663, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.621686746987952, | |
| "grad_norm": 0.18272174894809723, | |
| "learning_rate": 9.686310524335938e-05, | |
| "loss": 0.0617, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.628112449799197, | |
| "grad_norm": 0.21186350286006927, | |
| "learning_rate": 9.644501851764007e-05, | |
| "loss": 0.0597, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.6345381526104417, | |
| "grad_norm": 0.1978769302368164, | |
| "learning_rate": 9.602699400140218e-05, | |
| "loss": 0.0573, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.6409638554216865, | |
| "grad_norm": 0.2839438319206238, | |
| "learning_rate": 9.560903900975785e-05, | |
| "loss": 0.0705, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.647389558232932, | |
| "grad_norm": 0.19903969764709473, | |
| "learning_rate": 9.519116085660267e-05, | |
| "loss": 0.0696, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.6538152610441768, | |
| "grad_norm": 0.2470594197511673, | |
| "learning_rate": 9.477336685448754e-05, | |
| "loss": 0.0781, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.6602409638554216, | |
| "grad_norm": 0.16970917582511902, | |
| "learning_rate": 9.435566431449092e-05, | |
| "loss": 0.0799, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.20240218937397003, | |
| "learning_rate": 9.39380605460906e-05, | |
| "loss": 0.0751, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.673092369477912, | |
| "grad_norm": 0.15831498801708221, | |
| "learning_rate": 9.352056285703599e-05, | |
| "loss": 0.0764, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.6795180722891567, | |
| "grad_norm": 0.2208964228630066, | |
| "learning_rate": 9.31031785532202e-05, | |
| "loss": 0.089, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.6859437751004016, | |
| "grad_norm": 0.23068203032016754, | |
| "learning_rate": 9.268591493855222e-05, | |
| "loss": 0.0822, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.6923694779116465, | |
| "grad_norm": 0.18787510693073273, | |
| "learning_rate": 9.226877931482898e-05, | |
| "loss": 0.0762, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.6987951807228914, | |
| "grad_norm": 0.22357405722141266, | |
| "learning_rate": 9.18517789816078e-05, | |
| "loss": 0.0889, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.7052208835341367, | |
| "grad_norm": 0.15312552452087402, | |
| "learning_rate": 9.143492123607838e-05, | |
| "loss": 0.0579, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.7116465863453816, | |
| "grad_norm": 0.2166433036327362, | |
| "learning_rate": 9.101821337293532e-05, | |
| "loss": 0.1423, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.7180722891566265, | |
| "grad_norm": 0.1675548553466797, | |
| "learning_rate": 9.060166268425038e-05, | |
| "loss": 0.09, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.7244979919678713, | |
| "grad_norm": 0.16221830248832703, | |
| "learning_rate": 9.018527645934488e-05, | |
| "loss": 0.0606, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.7309236947791167, | |
| "grad_norm": 0.20156528055667877, | |
| "learning_rate": 8.976906198466213e-05, | |
| "loss": 0.0855, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.7373493975903616, | |
| "grad_norm": 0.2165391594171524, | |
| "learning_rate": 8.935302654364e-05, | |
| "loss": 0.0935, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.7437751004016064, | |
| "grad_norm": 0.16395288705825806, | |
| "learning_rate": 8.893717741658336e-05, | |
| "loss": 0.092, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.7502008032128513, | |
| "grad_norm": 0.24706722795963287, | |
| "learning_rate": 8.852152188053674e-05, | |
| "loss": 0.0717, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.756626506024096, | |
| "grad_norm": 0.19659721851348877, | |
| "learning_rate": 8.810606720915697e-05, | |
| "loss": 0.0703, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.7630522088353415, | |
| "grad_norm": 0.2377336174249649, | |
| "learning_rate": 8.769082067258585e-05, | |
| "loss": 0.0711, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.7694779116465864, | |
| "grad_norm": 0.1496395319700241, | |
| "learning_rate": 8.727578953732303e-05, | |
| "loss": 0.0565, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.7759036144578313, | |
| "grad_norm": 0.230519101023674, | |
| "learning_rate": 8.686098106609889e-05, | |
| "loss": 0.0676, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.782329317269076, | |
| "grad_norm": 0.1836637556552887, | |
| "learning_rate": 8.644640251774722e-05, | |
| "loss": 0.0613, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.7887550200803215, | |
| "grad_norm": 0.17303813993930817, | |
| "learning_rate": 8.603206114707837e-05, | |
| "loss": 0.066, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.7951807228915664, | |
| "grad_norm": 0.18832409381866455, | |
| "learning_rate": 8.561796420475227e-05, | |
| "loss": 0.0539, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.8016064257028113, | |
| "grad_norm": 0.23530371487140656, | |
| "learning_rate": 8.52041189371515e-05, | |
| "loss": 0.0669, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.808032128514056, | |
| "grad_norm": 0.12783204019069672, | |
| "learning_rate": 8.479053258625467e-05, | |
| "loss": 0.0686, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.814457831325301, | |
| "grad_norm": 0.16126012802124023, | |
| "learning_rate": 8.437721238950938e-05, | |
| "loss": 0.0783, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.820883534136546, | |
| "grad_norm": 0.16663892567157745, | |
| "learning_rate": 8.396416557970576e-05, | |
| "loss": 0.0899, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.8273092369477912, | |
| "grad_norm": 0.12750643491744995, | |
| "learning_rate": 8.355139938484995e-05, | |
| "loss": 0.087, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.833734939759036, | |
| "grad_norm": 0.1801062375307083, | |
| "learning_rate": 8.313892102803749e-05, | |
| "loss": 0.079, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.840160642570281, | |
| "grad_norm": 0.2397543489933014, | |
| "learning_rate": 8.272673772732695e-05, | |
| "loss": 0.0982, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.8465863453815263, | |
| "grad_norm": 0.16935226321220398, | |
| "learning_rate": 8.231485669561371e-05, | |
| "loss": 0.0675, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.853012048192771, | |
| "grad_norm": 0.14082035422325134, | |
| "learning_rate": 8.190328514050365e-05, | |
| "loss": 0.0748, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.859437751004016, | |
| "grad_norm": 0.1471889168024063, | |
| "learning_rate": 8.1492030264187e-05, | |
| "loss": 0.0622, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.865863453815261, | |
| "grad_norm": 0.19526907801628113, | |
| "learning_rate": 8.108109926331238e-05, | |
| "loss": 0.0737, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.872289156626506, | |
| "grad_norm": 0.15070655941963196, | |
| "learning_rate": 8.067049932886084e-05, | |
| "loss": 0.0751, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.8787148594377507, | |
| "grad_norm": 0.1857602894306183, | |
| "learning_rate": 8.026023764601999e-05, | |
| "loss": 0.0717, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.885140562248996, | |
| "grad_norm": 0.20066916942596436, | |
| "learning_rate": 7.985032139405836e-05, | |
| "loss": 0.0792, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.891566265060241, | |
| "grad_norm": 0.19384227693080902, | |
| "learning_rate": 7.944075774619963e-05, | |
| "loss": 0.0575, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.897991967871486, | |
| "grad_norm": 0.21878117322921753, | |
| "learning_rate": 7.903155386949723e-05, | |
| "loss": 0.0799, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.904417670682731, | |
| "grad_norm": 0.2109462022781372, | |
| "learning_rate": 7.862271692470884e-05, | |
| "loss": 0.081, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.910843373493976, | |
| "grad_norm": 0.2199956178665161, | |
| "learning_rate": 7.821425406617106e-05, | |
| "loss": 0.0749, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.917269076305221, | |
| "grad_norm": 0.2037278711795807, | |
| "learning_rate": 7.780617244167432e-05, | |
| "loss": 0.0529, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.923694779116466, | |
| "grad_norm": 0.19817106425762177, | |
| "learning_rate": 7.739847919233781e-05, | |
| "loss": 0.0482, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.9301204819277107, | |
| "grad_norm": 0.16905316710472107, | |
| "learning_rate": 7.699118145248434e-05, | |
| "loss": 0.0518, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.9365461847389556, | |
| "grad_norm": 0.18011616170406342, | |
| "learning_rate": 7.658428634951562e-05, | |
| "loss": 0.0619, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.942971887550201, | |
| "grad_norm": 0.18177881836891174, | |
| "learning_rate": 7.617780100378756e-05, | |
| "loss": 0.057, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.9493975903614458, | |
| "grad_norm": 0.1976725161075592, | |
| "learning_rate": 7.57717325284856e-05, | |
| "loss": 0.0512, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.9558232931726907, | |
| "grad_norm": 0.16665002703666687, | |
| "learning_rate": 7.536608802950027e-05, | |
| "loss": 0.0603, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.962248995983936, | |
| "grad_norm": 0.2631849944591522, | |
| "learning_rate": 7.496087460530285e-05, | |
| "loss": 0.0644, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.968674698795181, | |
| "grad_norm": 0.14662465453147888, | |
| "learning_rate": 7.455609934682116e-05, | |
| "loss": 0.1023, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.9751004016064257, | |
| "grad_norm": 0.15676066279411316, | |
| "learning_rate": 7.415176933731536e-05, | |
| "loss": 0.078, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.9815261044176706, | |
| "grad_norm": 0.15064530074596405, | |
| "learning_rate": 7.374789165225416e-05, | |
| "loss": 0.0697, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.9879518072289155, | |
| "grad_norm": 0.21094205975532532, | |
| "learning_rate": 7.334447335919096e-05, | |
| "loss": 0.065, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.9943775100401604, | |
| "grad_norm": 0.19423390924930573, | |
| "learning_rate": 7.294152151764006e-05, | |
| "loss": 0.0587, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 3.004016064257028, | |
| "grad_norm": 0.5364864468574524, | |
| "learning_rate": 7.253904317895332e-05, | |
| "loss": 0.0888, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 3.010441767068273, | |
| "grad_norm": 0.10208001732826233, | |
| "learning_rate": 7.21370453861966e-05, | |
| "loss": 0.0712, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 3.016867469879518, | |
| "grad_norm": 0.13294284045696259, | |
| "learning_rate": 7.173553517402652e-05, | |
| "loss": 0.0869, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.0232931726907633, | |
| "grad_norm": 0.11979340761899948, | |
| "learning_rate": 7.133451956856751e-05, | |
| "loss": 0.0719, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 3.029718875502008, | |
| "grad_norm": 0.1777201145887375, | |
| "learning_rate": 7.093400558728871e-05, | |
| "loss": 0.068, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.036144578313253, | |
| "grad_norm": 0.14670364558696747, | |
| "learning_rate": 7.053400023888115e-05, | |
| "loss": 0.0693, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 3.042570281124498, | |
| "grad_norm": 0.15252293646335602, | |
| "learning_rate": 7.013451052313534e-05, | |
| "loss": 0.0649, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 3.048995983935743, | |
| "grad_norm": 0.14201124012470245, | |
| "learning_rate": 6.973554343081846e-05, | |
| "loss": 0.0515, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 3.055421686746988, | |
| "grad_norm": 0.15977801382541656, | |
| "learning_rate": 6.933710594355225e-05, | |
| "loss": 0.0593, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 3.061847389558233, | |
| "grad_norm": 0.11069828271865845, | |
| "learning_rate": 6.893920503369068e-05, | |
| "loss": 0.0407, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.068273092369478, | |
| "grad_norm": 0.1887078583240509, | |
| "learning_rate": 6.854184766419812e-05, | |
| "loss": 0.0619, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 3.0746987951807228, | |
| "grad_norm": 0.17938368022441864, | |
| "learning_rate": 6.814504078852729e-05, | |
| "loss": 0.0634, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 3.081124497991968, | |
| "grad_norm": 0.1669437140226364, | |
| "learning_rate": 6.774879135049787e-05, | |
| "loss": 0.0518, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 3.087550200803213, | |
| "grad_norm": 0.14382565021514893, | |
| "learning_rate": 6.735310628417461e-05, | |
| "loss": 0.0472, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 3.093975903614458, | |
| "grad_norm": 0.176390141248703, | |
| "learning_rate": 6.695799251374625e-05, | |
| "loss": 0.0519, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.1004016064257027, | |
| "grad_norm": 0.19478528201580048, | |
| "learning_rate": 6.656345695340431e-05, | |
| "loss": 0.0631, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 3.1068273092369476, | |
| "grad_norm": 0.24837417900562286, | |
| "learning_rate": 6.616950650722205e-05, | |
| "loss": 0.0646, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 3.113253012048193, | |
| "grad_norm": 0.20292866230010986, | |
| "learning_rate": 6.577614806903365e-05, | |
| "loss": 0.048, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 3.119678714859438, | |
| "grad_norm": 0.14314322173595428, | |
| "learning_rate": 6.538338852231367e-05, | |
| "loss": 0.0387, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.1261044176706827, | |
| "grad_norm": 0.623166561126709, | |
| "learning_rate": 6.499123474005647e-05, | |
| "loss": 0.056, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.1325301204819276, | |
| "grad_norm": 0.18953579664230347, | |
| "learning_rate": 6.4599693584656e-05, | |
| "loss": 0.052, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.1389558232931725, | |
| "grad_norm": 0.30603814125061035, | |
| "learning_rate": 6.420877190778569e-05, | |
| "loss": 0.0628, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 3.145381526104418, | |
| "grad_norm": 0.22006677091121674, | |
| "learning_rate": 6.381847655027864e-05, | |
| "loss": 0.0492, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.1518072289156627, | |
| "grad_norm": 0.2588195204734802, | |
| "learning_rate": 6.342881434200765e-05, | |
| "loss": 0.0466, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.1582329317269076, | |
| "grad_norm": 0.22890865802764893, | |
| "learning_rate": 6.303979210176614e-05, | |
| "loss": 0.0621, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.1646586345381524, | |
| "grad_norm": 0.15218952298164368, | |
| "learning_rate": 6.26514166371484e-05, | |
| "loss": 0.0451, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.1710843373493978, | |
| "grad_norm": 0.11059743911027908, | |
| "learning_rate": 6.226369474443072e-05, | |
| "loss": 0.0769, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.1775100401606426, | |
| "grad_norm": 0.1349543184041977, | |
| "learning_rate": 6.18766332084523e-05, | |
| "loss": 0.0576, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.1839357429718875, | |
| "grad_norm": 0.14686280488967896, | |
| "learning_rate": 6.149023880249665e-05, | |
| "loss": 0.0839, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.1903614457831324, | |
| "grad_norm": 0.14011719822883606, | |
| "learning_rate": 6.110451828817298e-05, | |
| "loss": 0.0591, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.1967871485943773, | |
| "grad_norm": 0.18035003542900085, | |
| "learning_rate": 6.071947841529801e-05, | |
| "loss": 0.069, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.2032128514056226, | |
| "grad_norm": 0.1427018791437149, | |
| "learning_rate": 6.03351259217776e-05, | |
| "loss": 0.0647, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.2096385542168675, | |
| "grad_norm": 0.21494245529174805, | |
| "learning_rate": 5.995146753348909e-05, | |
| "loss": 0.0764, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.2160642570281124, | |
| "grad_norm": 0.18623854219913483, | |
| "learning_rate": 5.9568509964163464e-05, | |
| "loss": 0.0558, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.2224899598393573, | |
| "grad_norm": 0.13678805530071259, | |
| "learning_rate": 5.9186259915267916e-05, | |
| "loss": 0.048, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.2289156626506026, | |
| "grad_norm": 0.14847120642662048, | |
| "learning_rate": 5.880472407588857e-05, | |
| "loss": 0.0668, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.2353413654618475, | |
| "grad_norm": 0.14581717550754547, | |
| "learning_rate": 5.842390912261344e-05, | |
| "loss": 0.0424, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.2417670682730924, | |
| "grad_norm": 0.14835858345031738, | |
| "learning_rate": 5.8043821719415534e-05, | |
| "loss": 0.0565, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.2481927710843372, | |
| "grad_norm": 0.1661667823791504, | |
| "learning_rate": 5.7664468517536395e-05, | |
| "loss": 0.0432, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.254618473895582, | |
| "grad_norm": 0.13604165613651276, | |
| "learning_rate": 5.728585615536946e-05, | |
| "loss": 0.0417, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.2610441767068274, | |
| "grad_norm": 0.18449333310127258, | |
| "learning_rate": 5.6907991258344e-05, | |
| "loss": 0.0662, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.2674698795180723, | |
| "grad_norm": 0.14538291096687317, | |
| "learning_rate": 5.6530880438809494e-05, | |
| "loss": 0.0378, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.273895582329317, | |
| "grad_norm": 0.19400519132614136, | |
| "learning_rate": 5.615453029591935e-05, | |
| "loss": 0.0494, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.280321285140562, | |
| "grad_norm": 0.2027265727519989, | |
| "learning_rate": 5.5778947415515784e-05, | |
| "loss": 0.0472, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.2867469879518074, | |
| "grad_norm": 0.18922823667526245, | |
| "learning_rate": 5.540413837001459e-05, | |
| "loss": 0.0393, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.2931726907630523, | |
| "grad_norm": 0.20464153587818146, | |
| "learning_rate": 5.50301097182899e-05, | |
| "loss": 0.0369, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.299598393574297, | |
| "grad_norm": 0.23268243670463562, | |
| "learning_rate": 5.465686800555967e-05, | |
| "loss": 0.0362, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.306024096385542, | |
| "grad_norm": 0.16447444260120392, | |
| "learning_rate": 5.4284419763271e-05, | |
| "loss": 0.0305, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.312449799196787, | |
| "grad_norm": 0.20870855450630188, | |
| "learning_rate": 5.391277150898575e-05, | |
| "loss": 0.0539, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.3188755020080323, | |
| "grad_norm": 0.19453927874565125, | |
| "learning_rate": 5.354192974626674e-05, | |
| "loss": 0.0462, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.325301204819277, | |
| "grad_norm": 0.12926504015922546, | |
| "learning_rate": 5.317190096456368e-05, | |
| "loss": 0.047, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.331726907630522, | |
| "grad_norm": 0.13683585822582245, | |
| "learning_rate": 5.2802691639099834e-05, | |
| "loss": 0.0655, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.338152610441767, | |
| "grad_norm": 0.13933661580085754, | |
| "learning_rate": 5.24343082307585e-05, | |
| "loss": 0.0713, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.3445783132530122, | |
| "grad_norm": 0.15325239300727844, | |
| "learning_rate": 5.206675718597012e-05, | |
| "loss": 0.0471, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.351004016064257, | |
| "grad_norm": 0.21659892797470093, | |
| "learning_rate": 5.1700044936599434e-05, | |
| "loss": 0.1054, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.357429718875502, | |
| "grad_norm": 0.1403968334197998, | |
| "learning_rate": 5.133417789983277e-05, | |
| "loss": 0.0668, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.363855421686747, | |
| "grad_norm": 0.16732285916805267, | |
| "learning_rate": 5.0969162478066055e-05, | |
| "loss": 0.0587, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.3702811244979918, | |
| "grad_norm": 0.12107214331626892, | |
| "learning_rate": 5.060500505879244e-05, | |
| "loss": 0.0534, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.376706827309237, | |
| "grad_norm": 0.1527867466211319, | |
| "learning_rate": 5.0241712014490684e-05, | |
| "loss": 0.0497, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.383132530120482, | |
| "grad_norm": 0.15944266319274902, | |
| "learning_rate": 4.9879289702513845e-05, | |
| "loss": 0.0653, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.389558232931727, | |
| "grad_norm": 0.15926086902618408, | |
| "learning_rate": 4.95177444649776e-05, | |
| "loss": 0.049, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.3959839357429717, | |
| "grad_norm": 0.13858525454998016, | |
| "learning_rate": 4.9157082628649545e-05, | |
| "loss": 0.0544, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.402409638554217, | |
| "grad_norm": 0.14041852951049805, | |
| "learning_rate": 4.87973105048385e-05, | |
| "loss": 0.0481, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.408835341365462, | |
| "grad_norm": 0.12636280059814453, | |
| "learning_rate": 4.8438434389283895e-05, | |
| "loss": 0.0533, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.415261044176707, | |
| "grad_norm": 0.21128305792808533, | |
| "learning_rate": 4.8080460562045736e-05, | |
| "loss": 0.0532, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.4216867469879517, | |
| "grad_norm": 0.13390301167964935, | |
| "learning_rate": 4.7723395287394746e-05, | |
| "loss": 0.0334, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.4281124497991966, | |
| "grad_norm": 0.17304478585720062, | |
| "learning_rate": 4.736724481370248e-05, | |
| "loss": 0.0537, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.434538152610442, | |
| "grad_norm": 0.17624051868915558, | |
| "learning_rate": 4.701201537333237e-05, | |
| "loss": 0.045, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.440963855421687, | |
| "grad_norm": 0.18076051771640778, | |
| "learning_rate": 4.6657713182530316e-05, | |
| "loss": 0.0458, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.4473895582329317, | |
| "grad_norm": 0.15712404251098633, | |
| "learning_rate": 4.630434444131615e-05, | |
| "loss": 0.0403, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.4538152610441766, | |
| "grad_norm": 0.1949484795331955, | |
| "learning_rate": 4.595191533337494e-05, | |
| "loss": 0.0455, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.460240963855422, | |
| "grad_norm": 0.18234334886074066, | |
| "learning_rate": 4.560043202594899e-05, | |
| "loss": 0.0347, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 0.23936811089515686, | |
| "learning_rate": 4.524990066972982e-05, | |
| "loss": 0.0567, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.4730923694779117, | |
| "grad_norm": 0.21601049602031708, | |
| "learning_rate": 4.4900327398750363e-05, | |
| "loss": 0.0487, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.4795180722891565, | |
| "grad_norm": 0.20177985727787018, | |
| "learning_rate": 4.4551718330278006e-05, | |
| "loss": 0.0401, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.4859437751004014, | |
| "grad_norm": 0.14613081514835358, | |
| "learning_rate": 4.4204079564707144e-05, | |
| "loss": 0.0383, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.4923694779116468, | |
| "grad_norm": 0.11608117818832397, | |
| "learning_rate": 4.3857417185452644e-05, | |
| "loss": 0.0638, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.4987951807228916, | |
| "grad_norm": 0.13324810564517975, | |
| "learning_rate": 4.351173725884351e-05, | |
| "loss": 0.0705, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.5052208835341365, | |
| "grad_norm": 0.16427621245384216, | |
| "learning_rate": 4.3167045834016326e-05, | |
| "loss": 0.0861, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.5116465863453814, | |
| "grad_norm": 0.12879817187786102, | |
| "learning_rate": 4.282334894280986e-05, | |
| "loss": 0.0532, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.5180722891566267, | |
| "grad_norm": 0.13565368950366974, | |
| "learning_rate": 4.2480652599659154e-05, | |
| "loss": 0.064, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.5244979919678716, | |
| "grad_norm": 0.1572374403476715, | |
| "learning_rate": 4.213896280149041e-05, | |
| "loss": 0.0627, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.5309236947791165, | |
| "grad_norm": 0.1345859169960022, | |
| "learning_rate": 4.179828552761617e-05, | |
| "loss": 0.0654, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.5373493975903614, | |
| "grad_norm": 0.18093015253543854, | |
| "learning_rate": 4.1458626739630526e-05, | |
| "loss": 0.0593, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.5437751004016063, | |
| "grad_norm": 0.16655421257019043, | |
| "learning_rate": 4.1119992381304754e-05, | |
| "loss": 0.0524, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.550200803212851, | |
| "grad_norm": 0.1454760730266571, | |
| "learning_rate": 4.078238837848352e-05, | |
| "loss": 0.0478, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.5566265060240965, | |
| "grad_norm": 0.16041091084480286, | |
| "learning_rate": 4.04458206389809e-05, | |
| "loss": 0.0586, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.5630522088353414, | |
| "grad_norm": 0.1313060075044632, | |
| "learning_rate": 4.011029505247732e-05, | |
| "loss": 0.0441, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.5694779116465862, | |
| "grad_norm": 0.22944222390651703, | |
| "learning_rate": 3.977581749041616e-05, | |
| "loss": 0.0543, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.5759036144578316, | |
| "grad_norm": 0.20055855810642242, | |
| "learning_rate": 3.9442393805901245e-05, | |
| "loss": 0.0542, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.5823293172690764, | |
| "grad_norm": 0.20784057676792145, | |
| "learning_rate": 3.91100298335944e-05, | |
| "loss": 0.0474, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.5887550200803213, | |
| "grad_norm": 0.1995190680027008, | |
| "learning_rate": 3.877873138961311e-05, | |
| "loss": 0.0494, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.595180722891566, | |
| "grad_norm": 0.195552259683609, | |
| "learning_rate": 3.844850427142914e-05, | |
| "loss": 0.0433, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.601606425702811, | |
| "grad_norm": 0.18116550147533417, | |
| "learning_rate": 3.811935425776667e-05, | |
| "loss": 0.0454, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.608032128514056, | |
| "grad_norm": 0.19290290772914886, | |
| "learning_rate": 3.779128710850151e-05, | |
| "loss": 0.0428, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.6144578313253013, | |
| "grad_norm": 0.21416397392749786, | |
| "learning_rate": 3.7464308564560106e-05, | |
| "loss": 0.051, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.620883534136546, | |
| "grad_norm": 0.16343973577022552, | |
| "learning_rate": 3.71384243478191e-05, | |
| "loss": 0.0348, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.627309236947791, | |
| "grad_norm": 0.23021504282951355, | |
| "learning_rate": 3.681364016100535e-05, | |
| "loss": 0.0395, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.6337349397590364, | |
| "grad_norm": 0.17765948176383972, | |
| "learning_rate": 3.64899616875959e-05, | |
| "loss": 0.03, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.6401606425702813, | |
| "grad_norm": 0.19902700185775757, | |
| "learning_rate": 3.616739459171866e-05, | |
| "loss": 0.0375, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.646586345381526, | |
| "grad_norm": 0.12952236831188202, | |
| "learning_rate": 3.5845944518053376e-05, | |
| "loss": 0.0586, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.653012048192771, | |
| "grad_norm": 0.1288733333349228, | |
| "learning_rate": 3.552561709173266e-05, | |
| "loss": 0.0611, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.659437751004016, | |
| "grad_norm": 0.13545019924640656, | |
| "learning_rate": 3.520641791824374e-05, | |
| "loss": 0.0742, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.665863453815261, | |
| "grad_norm": 0.18385440111160278, | |
| "learning_rate": 3.488835258333014e-05, | |
| "loss": 0.0468, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.672289156626506, | |
| "grad_norm": 0.15589255094528198, | |
| "learning_rate": 3.4571426652894144e-05, | |
| "loss": 0.0691, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.678714859437751, | |
| "grad_norm": 0.16703784465789795, | |
| "learning_rate": 3.4255645672899325e-05, | |
| "loss": 0.0707, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.685140562248996, | |
| "grad_norm": 0.1708272397518158, | |
| "learning_rate": 3.3941015169273524e-05, | |
| "loss": 0.0699, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.691566265060241, | |
| "grad_norm": 0.1434539556503296, | |
| "learning_rate": 3.362754064781202e-05, | |
| "loss": 0.058, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.697991967871486, | |
| "grad_norm": 0.12717658281326294, | |
| "learning_rate": 3.331522759408138e-05, | |
| "loss": 0.045, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.704417670682731, | |
| "grad_norm": 0.1311410516500473, | |
| "learning_rate": 3.300408147332327e-05, | |
| "loss": 0.0369, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.710843373493976, | |
| "grad_norm": 0.13452477753162384, | |
| "learning_rate": 3.269410773035903e-05, | |
| "loss": 0.0382, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.7172690763052207, | |
| "grad_norm": 1.2888418436050415, | |
| "learning_rate": 3.238531178949417e-05, | |
| "loss": 0.0785, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.7236947791164656, | |
| "grad_norm": 0.15146106481552124, | |
| "learning_rate": 3.207769905442359e-05, | |
| "loss": 0.0726, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.730120481927711, | |
| "grad_norm": 0.14550191164016724, | |
| "learning_rate": 3.177127490813706e-05, | |
| "loss": 0.0504, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.736546184738956, | |
| "grad_norm": 0.1444544643163681, | |
| "learning_rate": 3.1466044712824805e-05, | |
| "loss": 0.0384, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.7429718875502007, | |
| "grad_norm": 0.160190150141716, | |
| "learning_rate": 3.1162013809783955e-05, | |
| "loss": 0.0491, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.749397590361446, | |
| "grad_norm": 0.1942124217748642, | |
| "learning_rate": 3.0859187519324806e-05, | |
| "loss": 0.0536, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.755823293172691, | |
| "grad_norm": 0.1264030784368515, | |
| "learning_rate": 3.055757114067794e-05, | |
| "loss": 0.0319, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.762248995983936, | |
| "grad_norm": 0.19579629600048065, | |
| "learning_rate": 3.025716995190141e-05, | |
| "loss": 0.0477, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.7686746987951807, | |
| "grad_norm": 0.16901759803295135, | |
| "learning_rate": 2.995798920978825e-05, | |
| "loss": 0.0306, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.7751004016064256, | |
| "grad_norm": 0.16561923921108246, | |
| "learning_rate": 2.966003414977475e-05, | |
| "loss": 0.0492, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.7815261044176705, | |
| "grad_norm": 0.2909844219684601, | |
| "learning_rate": 2.9363309985848585e-05, | |
| "loss": 0.0502, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.787951807228916, | |
| "grad_norm": 0.15100587904453278, | |
| "learning_rate": 2.9067821910457704e-05, | |
| "loss": 0.0343, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.7943775100401607, | |
| "grad_norm": 0.26900508999824524, | |
| "learning_rate": 2.877357509441947e-05, | |
| "loss": 0.0544, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.8008032128514055, | |
| "grad_norm": 0.23953552544116974, | |
| "learning_rate": 2.8480574686830142e-05, | |
| "loss": 0.0438, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.807228915662651, | |
| "grad_norm": 0.14778892695903778, | |
| "learning_rate": 2.8188825814974795e-05, | |
| "loss": 0.0455, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.8136546184738958, | |
| "grad_norm": 0.11664500087499619, | |
| "learning_rate": 2.7898333584237534e-05, | |
| "loss": 0.0634, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.8200803212851406, | |
| "grad_norm": 0.10635870695114136, | |
| "learning_rate": 2.7609103078012166e-05, | |
| "loss": 0.0603, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.8265060240963855, | |
| "grad_norm": 0.11662769317626953, | |
| "learning_rate": 2.7321139357613412e-05, | |
| "loss": 0.0466, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.8329317269076304, | |
| "grad_norm": 0.12710097432136536, | |
| "learning_rate": 2.703444746218802e-05, | |
| "loss": 0.063, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.8393574297188753, | |
| "grad_norm": 0.16935308277606964, | |
| "learning_rate": 2.6749032408626907e-05, | |
| "loss": 0.0519, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.8457831325301206, | |
| "grad_norm": 0.15195302665233612, | |
| "learning_rate": 2.646489919147721e-05, | |
| "loss": 0.076, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.8522088353413655, | |
| "grad_norm": 0.21082603931427002, | |
| "learning_rate": 2.6182052782854806e-05, | |
| "loss": 0.0771, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.8586345381526104, | |
| "grad_norm": 0.1390170454978943, | |
| "learning_rate": 2.59004981323575e-05, | |
| "loss": 0.0673, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.8650602409638557, | |
| "grad_norm": 0.11533928662538528, | |
| "learning_rate": 2.5620240166978226e-05, | |
| "loss": 0.0392, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.8714859437751006, | |
| "grad_norm": 0.1865217238664627, | |
| "learning_rate": 2.5341283791018988e-05, | |
| "loss": 0.0505, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.8779116465863455, | |
| "grad_norm": 0.15040706098079681, | |
| "learning_rate": 2.5063633886004935e-05, | |
| "loss": 0.0589, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.8843373493975903, | |
| "grad_norm": 0.15011747181415558, | |
| "learning_rate": 2.4787295310598913e-05, | |
| "loss": 0.0607, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.8907630522088352, | |
| "grad_norm": 0.15416325628757477, | |
| "learning_rate": 2.45122729005166e-05, | |
| "loss": 0.0613, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.89718875502008, | |
| "grad_norm": 0.20439183712005615, | |
| "learning_rate": 2.423857146844164e-05, | |
| "loss": 0.0688, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.9036144578313254, | |
| "grad_norm": 0.16020047664642334, | |
| "learning_rate": 2.3966195803941715e-05, | |
| "loss": 0.0513, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.9100401606425703, | |
| "grad_norm": 0.14457987248897552, | |
| "learning_rate": 2.3695150673384437e-05, | |
| "loss": 0.0374, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.916465863453815, | |
| "grad_norm": 0.12410798668861389, | |
| "learning_rate": 2.3425440819854185e-05, | |
| "loss": 0.0335, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.92289156626506, | |
| "grad_norm": 0.1418788731098175, | |
| "learning_rate": 2.3157070963068984e-05, | |
| "loss": 0.0286, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.9293172690763054, | |
| "grad_norm": 0.1596471220254898, | |
| "learning_rate": 2.2890045799297876e-05, | |
| "loss": 0.0321, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.9357429718875503, | |
| "grad_norm": 0.19366958737373352, | |
| "learning_rate": 2.2624370001278838e-05, | |
| "loss": 0.0398, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.942168674698795, | |
| "grad_norm": 0.1489342749118805, | |
| "learning_rate": 2.2360048218136985e-05, | |
| "loss": 0.0313, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.94859437751004, | |
| "grad_norm": 0.2047041803598404, | |
| "learning_rate": 2.2097085075303138e-05, | |
| "loss": 0.0517, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.955020080321285, | |
| "grad_norm": 0.24907703697681427, | |
| "learning_rate": 2.1835485174433002e-05, | |
| "loss": 0.051, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.9614457831325303, | |
| "grad_norm": 0.21545164287090302, | |
| "learning_rate": 2.1575253093326586e-05, | |
| "loss": 0.0327, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.967871485943775, | |
| "grad_norm": 0.11793698370456696, | |
| "learning_rate": 2.131639338584801e-05, | |
| "loss": 0.0516, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.97429718875502, | |
| "grad_norm": 0.14671315252780914, | |
| "learning_rate": 2.1058910581846013e-05, | |
| "loss": 0.0528, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.980722891566265, | |
| "grad_norm": 0.12005927413702011, | |
| "learning_rate": 2.0802809187074434e-05, | |
| "loss": 0.0404, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.9871485943775102, | |
| "grad_norm": 0.16592156887054443, | |
| "learning_rate": 2.05480936831136e-05, | |
| "loss": 0.0456, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.993574297188755, | |
| "grad_norm": 0.14437763392925262, | |
| "learning_rate": 2.0294768527291796e-05, | |
| "loss": 0.0292, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.003212851405623, | |
| "grad_norm": 0.4867587685585022, | |
| "learning_rate": 2.004283815260717e-05, | |
| "loss": 0.0602, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 4.009638554216868, | |
| "grad_norm": 0.11754161864519119, | |
| "learning_rate": 1.9792306967650398e-05, | |
| "loss": 0.0577, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 4.016064257028113, | |
| "grad_norm": 0.12216556817293167, | |
| "learning_rate": 1.9543179356527252e-05, | |
| "loss": 0.0782, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 4.0224899598393575, | |
| "grad_norm": 0.10597134381532669, | |
| "learning_rate": 1.9295459678782168e-05, | |
| "loss": 0.0623, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 4.028915662650602, | |
| "grad_norm": 0.13430526852607727, | |
| "learning_rate": 1.904915226932169e-05, | |
| "loss": 0.0573, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.035341365461847, | |
| "grad_norm": 0.14790372550487518, | |
| "learning_rate": 1.88042614383388e-05, | |
| "loss": 0.0494, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 4.041767068273092, | |
| "grad_norm": 0.1289214938879013, | |
| "learning_rate": 1.856079147123746e-05, | |
| "loss": 0.0353, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 4.048192771084337, | |
| "grad_norm": 0.12450610101222992, | |
| "learning_rate": 1.8318746628557526e-05, | |
| "loss": 0.0379, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 4.054618473895582, | |
| "grad_norm": 0.17245958745479584, | |
| "learning_rate": 1.8078131145900267e-05, | |
| "loss": 0.0583, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 4.061044176706828, | |
| "grad_norm": 0.14711426198482513, | |
| "learning_rate": 1.7838949233854284e-05, | |
| "loss": 0.0612, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.067469879518073, | |
| "grad_norm": 0.16549347341060638, | |
| "learning_rate": 1.760120507792169e-05, | |
| "loss": 0.049, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 4.0738955823293175, | |
| "grad_norm": 0.10457637906074524, | |
| "learning_rate": 1.7364902838445106e-05, | |
| "loss": 0.0337, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 4.080321285140562, | |
| "grad_norm": 0.19436487555503845, | |
| "learning_rate": 1.713004665053457e-05, | |
| "loss": 0.0455, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 4.086746987951807, | |
| "grad_norm": 0.14365814626216888, | |
| "learning_rate": 1.6896640623995318e-05, | |
| "loss": 0.0363, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 4.093172690763052, | |
| "grad_norm": 0.13444365561008453, | |
| "learning_rate": 1.666468884325596e-05, | |
| "loss": 0.0382, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.099598393574297, | |
| "grad_norm": 0.11268598586320877, | |
| "learning_rate": 1.6434195367296802e-05, | |
| "loss": 0.0323, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 4.106024096385542, | |
| "grad_norm": 0.12947283685207367, | |
| "learning_rate": 1.6205164229578994e-05, | |
| "loss": 0.0281, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 4.112449799196787, | |
| "grad_norm": 0.12646523118019104, | |
| "learning_rate": 1.5977599437973867e-05, | |
| "loss": 0.0264, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 4.1188755020080325, | |
| "grad_norm": 0.1570567935705185, | |
| "learning_rate": 1.5751504974692733e-05, | |
| "loss": 0.0356, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 4.125301204819277, | |
| "grad_norm": 0.1824863851070404, | |
| "learning_rate": 1.55268847962174e-05, | |
| "loss": 0.0368, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.131726907630522, | |
| "grad_norm": 0.17944642901420593, | |
| "learning_rate": 1.5303742833230673e-05, | |
| "loss": 0.0275, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 4.138152610441767, | |
| "grad_norm": 0.16530513763427734, | |
| "learning_rate": 1.5082082990547796e-05, | |
| "loss": 0.0308, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 4.144578313253012, | |
| "grad_norm": 0.17503738403320312, | |
| "learning_rate": 1.4861909147048025e-05, | |
| "loss": 0.0232, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 4.151004016064257, | |
| "grad_norm": 0.18777841329574585, | |
| "learning_rate": 1.464322515560671e-05, | |
| "loss": 0.032, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 4.157429718875502, | |
| "grad_norm": 0.2501942217350006, | |
| "learning_rate": 1.4426034843027969e-05, | |
| "loss": 0.0328, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.163855421686747, | |
| "grad_norm": 0.13108326494693756, | |
| "learning_rate": 1.4210342009977628e-05, | |
| "loss": 0.0393, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 4.170281124497992, | |
| "grad_norm": 0.1045239120721817, | |
| "learning_rate": 1.3996150430916799e-05, | |
| "loss": 0.0491, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 4.176706827309237, | |
| "grad_norm": 0.14830029010772705, | |
| "learning_rate": 1.378346385403575e-05, | |
| "loss": 0.0488, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 4.183132530120482, | |
| "grad_norm": 0.12067140638828278, | |
| "learning_rate": 1.357228600118836e-05, | |
| "loss": 0.0431, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 4.189558232931727, | |
| "grad_norm": 0.12366708368062973, | |
| "learning_rate": 1.3362620567827033e-05, | |
| "loss": 0.0488, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.195983935742972, | |
| "grad_norm": 0.14555774629116058, | |
| "learning_rate": 1.3154471222937903e-05, | |
| "loss": 0.05, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.202409638554217, | |
| "grad_norm": 0.16288283467292786, | |
| "learning_rate": 1.2947841608976718e-05, | |
| "loss": 0.0583, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.208835341365462, | |
| "grad_norm": 0.14024418592453003, | |
| "learning_rate": 1.2742735341805145e-05, | |
| "loss": 0.0404, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.215261044176707, | |
| "grad_norm": 0.13420836627483368, | |
| "learning_rate": 1.253915601062734e-05, | |
| "loss": 0.0587, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.2216867469879515, | |
| "grad_norm": 0.1517859846353531, | |
| "learning_rate": 1.2337107177927365e-05, | |
| "loss": 0.041, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.228112449799196, | |
| "grad_norm": 0.14088109135627747, | |
| "learning_rate": 1.213659237940662e-05, | |
| "loss": 0.0419, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.234538152610442, | |
| "grad_norm": 0.1399402618408203, | |
| "learning_rate": 1.1937615123922052e-05, | |
| "loss": 0.0443, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.240963855421687, | |
| "grad_norm": 0.15205906331539154, | |
| "learning_rate": 1.174017889342489e-05, | |
| "loss": 0.0429, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.247389558232932, | |
| "grad_norm": 0.3410640358924866, | |
| "learning_rate": 1.1544287142899446e-05, | |
| "loss": 0.0367, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.253815261044177, | |
| "grad_norm": 0.1357734352350235, | |
| "learning_rate": 1.1349943300302913e-05, | |
| "loss": 0.0294, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.260240963855422, | |
| "grad_norm": 0.14988286793231964, | |
| "learning_rate": 1.1157150766505253e-05, | |
| "loss": 0.0384, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 0.1004982739686966, | |
| "learning_rate": 1.0965912915229625e-05, | |
| "loss": 0.0263, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.2730923694779115, | |
| "grad_norm": 0.1396017074584961, | |
| "learning_rate": 1.0776233092993527e-05, | |
| "loss": 0.028, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.279518072289156, | |
| "grad_norm": 0.11266002804040909, | |
| "learning_rate": 1.0588114619050028e-05, | |
| "loss": 0.0205, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.285943775100401, | |
| "grad_norm": 0.15433090925216675, | |
| "learning_rate": 1.040156078532989e-05, | |
| "loss": 0.0316, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.292369477911647, | |
| "grad_norm": 0.15806183218955994, | |
| "learning_rate": 1.0216574856383742e-05, | |
| "loss": 0.0319, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.298795180722892, | |
| "grad_norm": 0.184244304895401, | |
| "learning_rate": 1.0033160069325166e-05, | |
| "loss": 0.0203, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.305220883534137, | |
| "grad_norm": 0.14676746726036072, | |
| "learning_rate": 9.851319633773926e-06, | |
| "loss": 0.0222, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.311646586345382, | |
| "grad_norm": 0.22139282524585724, | |
| "learning_rate": 9.671056731799777e-06, | |
| "loss": 0.0349, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.3180722891566266, | |
| "grad_norm": 0.16691678762435913, | |
| "learning_rate": 9.49237451786692e-06, | |
| "loss": 0.0227, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.324497991967871, | |
| "grad_norm": 0.16708509624004364, | |
| "learning_rate": 9.315276118778627e-06, | |
| "loss": 0.0374, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.330923694779116, | |
| "grad_norm": 0.09550745040178299, | |
| "learning_rate": 9.139764633622617e-06, | |
| "loss": 0.0511, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.337349397590361, | |
| "grad_norm": 0.1416180431842804, | |
| "learning_rate": 8.965843133716933e-06, | |
| "loss": 0.0739, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.343775100401606, | |
| "grad_norm": 0.15338513255119324, | |
| "learning_rate": 8.793514662555946e-06, | |
| "loss": 0.0452, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.350200803212852, | |
| "grad_norm": 0.19460880756378174, | |
| "learning_rate": 8.622782235757276e-06, | |
| "loss": 0.0617, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.356626506024097, | |
| "grad_norm": 0.14072959125041962, | |
| "learning_rate": 8.453648841009021e-06, | |
| "loss": 0.0435, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.363052208835342, | |
| "grad_norm": 0.25675803422927856, | |
| "learning_rate": 8.286117438017337e-06, | |
| "loss": 0.0503, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.3694779116465865, | |
| "grad_norm": 0.13942734897136688, | |
| "learning_rate": 8.120190958454843e-06, | |
| "loss": 0.062, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.375903614457831, | |
| "grad_norm": 0.13195668160915375, | |
| "learning_rate": 7.955872305909152e-06, | |
| "loss": 0.0479, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.382329317269076, | |
| "grad_norm": 0.13269154727458954, | |
| "learning_rate": 7.793164355832127e-06, | |
| "loss": 0.0414, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.388755020080321, | |
| "grad_norm": 0.1273636668920517, | |
| "learning_rate": 7.632069955489585e-06, | |
| "loss": 0.0359, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.395180722891566, | |
| "grad_norm": 0.14960840344429016, | |
| "learning_rate": 7.472591923911398e-06, | |
| "loss": 0.0374, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.401606425702811, | |
| "grad_norm": 0.14672979712486267, | |
| "learning_rate": 7.314733051842282e-06, | |
| "loss": 0.0323, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.408032128514057, | |
| "grad_norm": 0.13686661422252655, | |
| "learning_rate": 7.158496101692802e-06, | |
| "loss": 0.0314, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.414457831325302, | |
| "grad_norm": 0.15936917066574097, | |
| "learning_rate": 7.003883807491185e-06, | |
| "loss": 0.0493, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.4208835341365464, | |
| "grad_norm": 0.16186754405498505, | |
| "learning_rate": 6.85089887483541e-06, | |
| "loss": 0.0337, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.427309236947791, | |
| "grad_norm": 0.1702742725610733, | |
| "learning_rate": 6.699543980845801e-06, | |
| "loss": 0.0306, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.433734939759036, | |
| "grad_norm": 0.16083656251430511, | |
| "learning_rate": 6.549821774118325e-06, | |
| "loss": 0.0341, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.440160642570281, | |
| "grad_norm": 0.11489646136760712, | |
| "learning_rate": 6.401734874678089e-06, | |
| "loss": 0.0221, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.446586345381526, | |
| "grad_norm": 0.1623634397983551, | |
| "learning_rate": 6.255285873933569e-06, | |
| "loss": 0.0234, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.453012048192771, | |
| "grad_norm": 0.1647336781024933, | |
| "learning_rate": 6.110477334631326e-06, | |
| "loss": 0.0305, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.459437751004016, | |
| "grad_norm": 0.13188236951828003, | |
| "learning_rate": 5.967311790811014e-06, | |
| "loss": 0.0194, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.4658634538152615, | |
| "grad_norm": 0.17995339632034302, | |
| "learning_rate": 5.825791747761123e-06, | |
| "loss": 0.0368, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.472289156626506, | |
| "grad_norm": 0.18333207070827484, | |
| "learning_rate": 5.685919681975149e-06, | |
| "loss": 0.0303, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.478714859437751, | |
| "grad_norm": 0.13925676047801971, | |
| "learning_rate": 5.547698041108229e-06, | |
| "loss": 0.0314, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.485140562248996, | |
| "grad_norm": 0.10495959222316742, | |
| "learning_rate": 5.4111292439342986e-06, | |
| "loss": 0.027, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.491566265060241, | |
| "grad_norm": 0.1200391873717308, | |
| "learning_rate": 5.276215680303831e-06, | |
| "loss": 0.0475, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.497991967871486, | |
| "grad_norm": 0.12512274086475372, | |
| "learning_rate": 5.14295971110188e-06, | |
| "loss": 0.0513, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.504417670682731, | |
| "grad_norm": 0.11158251017332077, | |
| "learning_rate": 5.011363668206948e-06, | |
| "loss": 0.049, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.510843373493976, | |
| "grad_norm": 0.12815481424331665, | |
| "learning_rate": 4.881429854450004e-06, | |
| "loss": 0.0465, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.517269076305221, | |
| "grad_norm": 0.1294533908367157, | |
| "learning_rate": 4.753160543574331e-06, | |
| "loss": 0.0445, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.523694779116466, | |
| "grad_norm": 0.1252312958240509, | |
| "learning_rate": 4.626557980195623e-06, | |
| "loss": 0.0383, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.530120481927711, | |
| "grad_norm": 0.14612750709056854, | |
| "learning_rate": 4.501624379762803e-06, | |
| "loss": 0.0567, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.536546184738956, | |
| "grad_norm": 0.11176297813653946, | |
| "learning_rate": 4.3783619285191705e-06, | |
| "loss": 0.0418, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.542971887550201, | |
| "grad_norm": 0.13328702747821808, | |
| "learning_rate": 4.2567727834641915e-06, | |
| "loss": 0.0316, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.549397590361446, | |
| "grad_norm": 0.11925917118787766, | |
| "learning_rate": 4.136859072315758e-06, | |
| "loss": 0.0403, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.555823293172691, | |
| "grad_norm": 0.14322586357593536, | |
| "learning_rate": 4.018622893472912e-06, | |
| "loss": 0.0486, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.562248995983936, | |
| "grad_norm": 0.135334774851799, | |
| "learning_rate": 3.902066315979158e-06, | |
| "loss": 0.0358, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.5686746987951805, | |
| "grad_norm": 0.14582079648971558, | |
| "learning_rate": 3.787191379486288e-06, | |
| "loss": 0.0322, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.575100401606425, | |
| "grad_norm": 0.16357247531414032, | |
| "learning_rate": 3.674000094218577e-06, | |
| "loss": 0.0331, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.581526104417671, | |
| "grad_norm": 0.1673492193222046, | |
| "learning_rate": 3.562494440937769e-06, | |
| "loss": 0.0299, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.587951807228916, | |
| "grad_norm": 0.14151403307914734, | |
| "learning_rate": 3.4526763709082476e-06, | |
| "loss": 0.0256, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.594377510040161, | |
| "grad_norm": 0.20980997383594513, | |
| "learning_rate": 3.344547805862985e-06, | |
| "loss": 0.0342, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.600803212851406, | |
| "grad_norm": 0.12801873683929443, | |
| "learning_rate": 3.2381106379699488e-06, | |
| "loss": 0.022, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.607228915662651, | |
| "grad_norm": 0.15073615312576294, | |
| "learning_rate": 3.1333667297989035e-06, | |
| "loss": 0.0179, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.613654618473896, | |
| "grad_norm": 0.14964726567268372, | |
| "learning_rate": 3.030317914288816e-06, | |
| "loss": 0.022, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.6200803212851405, | |
| "grad_norm": 0.11907092481851578, | |
| "learning_rate": 2.928965994715882e-06, | |
| "loss": 0.0199, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.626506024096385, | |
| "grad_norm": 0.1797790676355362, | |
| "learning_rate": 2.8293127446618383e-06, | |
| "loss": 0.0241, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.63293172690763, | |
| "grad_norm": 0.2542262673377991, | |
| "learning_rate": 2.7313599079830666e-06, | |
| "loss": 0.0415, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.639357429718876, | |
| "grad_norm": 0.16320320963859558, | |
| "learning_rate": 2.63510919877995e-06, | |
| "loss": 0.0283, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.64578313253012, | |
| "grad_norm": 0.13019272685050964, | |
| "learning_rate": 2.540562301366922e-06, | |
| "loss": 0.0467, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.652208835341366, | |
| "grad_norm": 0.09419631212949753, | |
| "learning_rate": 2.447720870243064e-06, | |
| "loss": 0.0517, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.658634538152611, | |
| "grad_norm": 0.2463325709104538, | |
| "learning_rate": 2.3565865300630206e-06, | |
| "loss": 0.0399, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.6650602409638555, | |
| "grad_norm": 0.14761923253536224, | |
| "learning_rate": 2.267160875608687e-06, | |
| "loss": 0.0643, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.6714859437751, | |
| "grad_norm": 0.15416769683361053, | |
| "learning_rate": 2.179445471761221e-06, | |
| "loss": 0.0506, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.677911646586345, | |
| "grad_norm": 0.1494145393371582, | |
| "learning_rate": 2.0934418534737098e-06, | |
| "loss": 0.0553, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.68433734939759, | |
| "grad_norm": 0.17155231535434723, | |
| "learning_rate": 2.0091515257442904e-06, | |
| "loss": 0.0434, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.690763052208835, | |
| "grad_norm": 0.12443775683641434, | |
| "learning_rate": 1.926575963589805e-06, | |
| "loss": 0.0337, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.697188755020081, | |
| "grad_norm": 0.11996244639158249, | |
| "learning_rate": 1.8457166120199987e-06, | |
| "loss": 0.0304, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.703614457831325, | |
| "grad_norm": 0.17615464329719543, | |
| "learning_rate": 1.7665748860122512e-06, | |
| "loss": 0.054, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.710040160642571, | |
| "grad_norm": 0.16451974213123322, | |
| "learning_rate": 1.689152170486752e-06, | |
| "loss": 0.0435, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.7164658634538155, | |
| "grad_norm": 0.223603755235672, | |
| "learning_rate": 1.6134498202823645e-06, | |
| "loss": 0.0439, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.72289156626506, | |
| "grad_norm": 0.14352591335773468, | |
| "learning_rate": 1.5394691601328338e-06, | |
| "loss": 0.0347, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.729317269076305, | |
| "grad_norm": 0.17710843682289124, | |
| "learning_rate": 1.467211484643627e-06, | |
| "loss": 0.0383, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.73574297188755, | |
| "grad_norm": 0.20051458477973938, | |
| "learning_rate": 1.3966780582693185e-06, | |
| "loss": 0.056, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.742168674698795, | |
| "grad_norm": 0.1505594104528427, | |
| "learning_rate": 1.3278701152913742e-06, | |
| "loss": 0.036, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.74859437751004, | |
| "grad_norm": 0.20499320328235626, | |
| "learning_rate": 1.2607888597966688e-06, | |
| "loss": 0.0438, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.755020080321285, | |
| "grad_norm": 0.1615976244211197, | |
| "learning_rate": 1.195435465656325e-06, | |
| "loss": 0.0258, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.76144578313253, | |
| "grad_norm": 0.13965967297554016, | |
| "learning_rate": 1.131811076505196e-06, | |
| "loss": 0.0195, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.767871485943775, | |
| "grad_norm": 0.18875765800476074, | |
| "learning_rate": 1.0699168057218823e-06, | |
| "loss": 0.0339, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.77429718875502, | |
| "grad_norm": 0.12984806299209595, | |
| "learning_rate": 1.0097537364091914e-06, | |
| "loss": 0.0274, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.780722891566265, | |
| "grad_norm": 0.11120978742837906, | |
| "learning_rate": 9.513229213752417e-07, | |
| "loss": 0.0233, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.78714859437751, | |
| "grad_norm": 0.1367001086473465, | |
| "learning_rate": 8.946253831150109e-07, | |
| "loss": 0.0301, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.793574297188755, | |
| "grad_norm": 0.11858794093132019, | |
| "learning_rate": 8.396621137924388e-07, | |
| "loss": 0.0244, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.1457306295633316, | |
| "learning_rate": 7.864340752230859e-07, | |
| "loss": 0.0297, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.806425702811245, | |
| "grad_norm": 0.133670836687088, | |
| "learning_rate": 7.349421988572691e-07, | |
| "loss": 0.0458, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.81285140562249, | |
| "grad_norm": 0.12656152248382568, | |
| "learning_rate": 6.851873857638192e-07, | |
| "loss": 0.0772, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.8192771084337345, | |
| "grad_norm": 0.09214270859956741, | |
| "learning_rate": 6.371705066142264e-07, | |
| "loss": 0.0415, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.82570281124498, | |
| "grad_norm": 0.1306275725364685, | |
| "learning_rate": 5.908924016674977e-07, | |
| "loss": 0.0608, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.832128514056225, | |
| "grad_norm": 0.1301136314868927, | |
| "learning_rate": 5.463538807553903e-07, | |
| "loss": 0.0429, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.83855421686747, | |
| "grad_norm": 0.1389213353395462, | |
| "learning_rate": 5.035557232682564e-07, | |
| "loss": 0.0525, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.844979919678715, | |
| "grad_norm": 0.15371650457382202, | |
| "learning_rate": 4.624986781414098e-07, | |
| "loss": 0.0382, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.85140562248996, | |
| "grad_norm": 0.13239037990570068, | |
| "learning_rate": 4.231834638420362e-07, | |
| "loss": 0.0378, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.857831325301205, | |
| "grad_norm": 0.13462603092193604, | |
| "learning_rate": 3.8561076835657017e-07, | |
| "loss": 0.0574, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.8642570281124495, | |
| "grad_norm": 0.13441871106624603, | |
| "learning_rate": 3.4978124917871556e-07, | |
| "loss": 0.0383, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.870682730923694, | |
| "grad_norm": 0.12943735718727112, | |
| "learning_rate": 3.1569553329788836e-07, | |
| "loss": 0.0372, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.877108433734939, | |
| "grad_norm": 0.13817009329795837, | |
| "learning_rate": 2.8335421718829193e-07, | |
| "loss": 0.037, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.883534136546185, | |
| "grad_norm": 0.15584351122379303, | |
| "learning_rate": 2.527578667984365e-07, | |
| "loss": 0.057, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.88995983935743, | |
| "grad_norm": 0.13548874855041504, | |
| "learning_rate": 2.239070175412694e-07, | |
| "loss": 0.0312, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.896385542168675, | |
| "grad_norm": 0.1511905938386917, | |
| "learning_rate": 1.9680217428479364e-07, | |
| "loss": 0.0422, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.90281124497992, | |
| "grad_norm": 0.1757678985595703, | |
| "learning_rate": 1.714438113431971e-07, | |
| "loss": 0.0431, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.909236947791165, | |
| "grad_norm": 0.17349182069301605, | |
| "learning_rate": 1.4783237246862592e-07, | |
| "loss": 0.0352, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.9156626506024095, | |
| "grad_norm": 0.14009258151054382, | |
| "learning_rate": 1.259682708433574e-07, | |
| "loss": 0.0405, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.922088353413654, | |
| "grad_norm": 0.1775335967540741, | |
| "learning_rate": 1.0585188907260569e-07, | |
| "loss": 0.04, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.928514056224899, | |
| "grad_norm": 0.15646418929100037, | |
| "learning_rate": 8.748357917780503e-08, | |
| "loss": 0.034, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.934939759036144, | |
| "grad_norm": 0.1268489807844162, | |
| "learning_rate": 7.086366259044796e-08, | |
| "loss": 0.0199, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.94136546184739, | |
| "grad_norm": 0.17802543938159943, | |
| "learning_rate": 5.59924301464898e-08, | |
| "loss": 0.0365, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.947791164658635, | |
| "grad_norm": 0.18624529242515564, | |
| "learning_rate": 4.287014208120832e-08, | |
| "loss": 0.0319, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.95421686746988, | |
| "grad_norm": 0.1787281632423401, | |
| "learning_rate": 3.149702802470733e-08, | |
| "loss": 0.0216, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.9606425702811245, | |
| "grad_norm": 0.19038884341716766, | |
| "learning_rate": 2.1873286997875498e-08, | |
| "loss": 0.0341, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.967068273092369, | |
| "grad_norm": 0.15438446402549744, | |
| "learning_rate": 1.3999087408866906e-08, | |
| "loss": 0.0378, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.973493975903614, | |
| "grad_norm": 0.14491575956344604, | |
| "learning_rate": 7.874567050214499e-09, | |
| "loss": 0.0398, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.979919678714859, | |
| "grad_norm": 0.18205036222934723, | |
| "learning_rate": 3.4998330963764705e-09, | |
| "loss": 0.0426, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.986345381526104, | |
| "grad_norm": 0.11920995265245438, | |
| "learning_rate": 8.749621018822041e-10, | |
| "loss": 0.027, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.992771084337349, | |
| "grad_norm": 0.15572018921375275, | |
| "learning_rate": 0.0, | |
| "loss": 0.0339, | |
| "step": 775 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 775, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3110452392841708e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |