| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 11.19194180190263, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005595970900951315, |
| "grad_norm": 7.419506072998047, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.9689, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01119194180190263, |
| "grad_norm": 8.035171508789062, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.8977, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.016787912702853944, |
| "grad_norm": 7.580524444580078, |
| "learning_rate": 3e-06, |
| "loss": 0.9942, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02238388360380526, |
| "grad_norm": 5.7520976066589355, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.8421, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.027979854504756575, |
| "grad_norm": 4.714428901672363, |
| "learning_rate": 5e-06, |
| "loss": 0.6063, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03357582540570789, |
| "grad_norm": 4.136861801147461, |
| "learning_rate": 6e-06, |
| "loss": 0.4259, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03917179630665921, |
| "grad_norm": 2.1667540073394775, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.3447, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04476776720761052, |
| "grad_norm": 2.3095765113830566, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.284, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05036373810856184, |
| "grad_norm": 1.2860591411590576, |
| "learning_rate": 9e-06, |
| "loss": 0.2067, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05595970900951315, |
| "grad_norm": 2.0302886962890625, |
| "learning_rate": 1e-05, |
| "loss": 0.1943, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06155567991046446, |
| "grad_norm": 1.2757196426391602, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.1442, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06715165081141578, |
| "grad_norm": 1.5842756032943726, |
| "learning_rate": 1.2e-05, |
| "loss": 0.132, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0727476217123671, |
| "grad_norm": 1.0327903032302856, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.097, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07834359261331841, |
| "grad_norm": 0.733019232749939, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.0807, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08393956351426973, |
| "grad_norm": 0.9548436999320984, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0922, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08953553441522104, |
| "grad_norm": 0.44906941056251526, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.0841, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09513150531617236, |
| "grad_norm": 0.9586009979248047, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.0726, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10072747621712368, |
| "grad_norm": 0.6236313581466675, |
| "learning_rate": 1.8e-05, |
| "loss": 0.0631, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10632344711807498, |
| "grad_norm": 1.1688262224197388, |
| "learning_rate": 1.9e-05, |
| "loss": 0.0717, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1119194180190263, |
| "grad_norm": 1.5576119422912598, |
| "learning_rate": 2e-05, |
| "loss": 0.0718, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11751538891997762, |
| "grad_norm": 1.0707802772521973, |
| "learning_rate": 2.1e-05, |
| "loss": 0.0591, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12311135982092893, |
| "grad_norm": 0.8612272143363953, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.0623, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12870733072188026, |
| "grad_norm": 0.796205997467041, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.0563, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.13430330162283155, |
| "grad_norm": 1.127061367034912, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0545, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.13989927252378287, |
| "grad_norm": 0.9559623003005981, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0543, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1454952434247342, |
| "grad_norm": 0.7295358777046204, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.0554, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1510912143256855, |
| "grad_norm": 0.8386074900627136, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.0488, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15668718522663683, |
| "grad_norm": 0.9443495869636536, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.0639, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.16228315612758815, |
| "grad_norm": 0.8754186630249023, |
| "learning_rate": 2.9e-05, |
| "loss": 0.0477, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.16787912702853947, |
| "grad_norm": 0.5491052269935608, |
| "learning_rate": 3e-05, |
| "loss": 0.0509, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17347509792949076, |
| "grad_norm": 0.7870469093322754, |
| "learning_rate": 3.1e-05, |
| "loss": 0.0478, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17907106883044208, |
| "grad_norm": 0.9322296380996704, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.0514, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1846670397313934, |
| "grad_norm": 1.236414909362793, |
| "learning_rate": 3.3e-05, |
| "loss": 0.0504, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.19026301063234471, |
| "grad_norm": 1.2571903467178345, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.0374, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.19585898153329603, |
| "grad_norm": 1.1705288887023926, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0514, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.20145495243424735, |
| "grad_norm": 1.0005333423614502, |
| "learning_rate": 3.6e-05, |
| "loss": 0.0459, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.20705092333519864, |
| "grad_norm": 0.5335679054260254, |
| "learning_rate": 3.7e-05, |
| "loss": 0.0444, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.21264689423614996, |
| "grad_norm": 1.052669882774353, |
| "learning_rate": 3.8e-05, |
| "loss": 0.0409, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21824286513710128, |
| "grad_norm": 0.44473376870155334, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.0505, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2238388360380526, |
| "grad_norm": 0.6711838841438293, |
| "learning_rate": 4e-05, |
| "loss": 0.0388, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22943480693900392, |
| "grad_norm": 0.55412358045578, |
| "learning_rate": 4.1e-05, |
| "loss": 0.0416, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.23503077783995524, |
| "grad_norm": 1.0375343561172485, |
| "learning_rate": 4.2e-05, |
| "loss": 0.0501, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.24062674874090656, |
| "grad_norm": 0.7955525517463684, |
| "learning_rate": 4.3e-05, |
| "loss": 0.0461, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.24622271964185785, |
| "grad_norm": 0.8107234239578247, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.0448, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2518186905428092, |
| "grad_norm": 0.8368202447891235, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0459, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2574146614437605, |
| "grad_norm": 0.6938339471817017, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.034, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2630106323447118, |
| "grad_norm": 0.8612020611763, |
| "learning_rate": 4.7e-05, |
| "loss": 0.0454, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2686066032456631, |
| "grad_norm": 0.777197539806366, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0381, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2742025741466144, |
| "grad_norm": 0.6520339250564575, |
| "learning_rate": 4.9e-05, |
| "loss": 0.0381, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.27979854504756574, |
| "grad_norm": 0.5808746814727783, |
| "learning_rate": 5e-05, |
| "loss": 0.0285, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.28539451594851706, |
| "grad_norm": 0.9482337832450867, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 0.0362, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2909904868494684, |
| "grad_norm": 0.5615134239196777, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 0.0322, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2965864577504197, |
| "grad_norm": 1.2695409059524536, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.0411, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.302182428651371, |
| "grad_norm": 0.7221632599830627, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 0.0422, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.30777839955232233, |
| "grad_norm": 1.1144938468933105, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 0.0334, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.31337437045327365, |
| "grad_norm": 0.6722885966300964, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.0436, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.318970341354225, |
| "grad_norm": 1.0043433904647827, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 0.0452, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3245663122551763, |
| "grad_norm": 0.9483539462089539, |
| "learning_rate": 5.8e-05, |
| "loss": 0.0492, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3301622831561276, |
| "grad_norm": 0.7825531363487244, |
| "learning_rate": 5.9e-05, |
| "loss": 0.0381, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.33575825405707893, |
| "grad_norm": 0.7982919216156006, |
| "learning_rate": 6e-05, |
| "loss": 0.0447, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3413542249580302, |
| "grad_norm": 0.9162524342536926, |
| "learning_rate": 6.1e-05, |
| "loss": 0.0453, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3469501958589815, |
| "grad_norm": 0.5597997903823853, |
| "learning_rate": 6.2e-05, |
| "loss": 0.0393, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.35254616675993283, |
| "grad_norm": 0.713256299495697, |
| "learning_rate": 6.3e-05, |
| "loss": 0.0394, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.35814213766088415, |
| "grad_norm": 0.7356066703796387, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.0339, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.36373810856183547, |
| "grad_norm": 0.5933259129524231, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 0.038, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3693340794627868, |
| "grad_norm": 0.5277016162872314, |
| "learning_rate": 6.6e-05, |
| "loss": 0.0383, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3749300503637381, |
| "grad_norm": 0.9106026887893677, |
| "learning_rate": 6.7e-05, |
| "loss": 0.0268, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.38052602126468943, |
| "grad_norm": 0.5941755771636963, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.0399, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.38612199216564075, |
| "grad_norm": 0.7207239270210266, |
| "learning_rate": 6.9e-05, |
| "loss": 0.0304, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.39171796306659207, |
| "grad_norm": 0.5808258652687073, |
| "learning_rate": 7e-05, |
| "loss": 0.0317, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3973139339675434, |
| "grad_norm": 0.6304859519004822, |
| "learning_rate": 7.1e-05, |
| "loss": 0.0417, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4029099048684947, |
| "grad_norm": 0.6625694036483765, |
| "learning_rate": 7.2e-05, |
| "loss": 0.0301, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.408505875769446, |
| "grad_norm": 0.6456591486930847, |
| "learning_rate": 7.3e-05, |
| "loss": 0.0416, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4141018466703973, |
| "grad_norm": 0.8103715181350708, |
| "learning_rate": 7.4e-05, |
| "loss": 0.0398, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4196978175713486, |
| "grad_norm": 0.592147707939148, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.0317, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4252937884722999, |
| "grad_norm": 0.6823825836181641, |
| "learning_rate": 7.6e-05, |
| "loss": 0.031, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.43088975937325125, |
| "grad_norm": 0.3274383544921875, |
| "learning_rate": 7.7e-05, |
| "loss": 0.0305, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.43648573027420257, |
| "grad_norm": 0.3436225950717926, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.0338, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4420817011751539, |
| "grad_norm": 0.8361327052116394, |
| "learning_rate": 7.900000000000001e-05, |
| "loss": 0.0264, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4476776720761052, |
| "grad_norm": 0.5449605584144592, |
| "learning_rate": 8e-05, |
| "loss": 0.0321, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4532736429770565, |
| "grad_norm": 0.31227922439575195, |
| "learning_rate": 8.1e-05, |
| "loss": 0.0272, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.45886961387800784, |
| "grad_norm": 0.6099038124084473, |
| "learning_rate": 8.2e-05, |
| "loss": 0.0504, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.46446558477895916, |
| "grad_norm": 0.6343345642089844, |
| "learning_rate": 8.3e-05, |
| "loss": 0.0343, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4700615556799105, |
| "grad_norm": 0.7962288856506348, |
| "learning_rate": 8.4e-05, |
| "loss": 0.0292, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.4756575265808618, |
| "grad_norm": 0.3960738182067871, |
| "learning_rate": 8.5e-05, |
| "loss": 0.033, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4812534974818131, |
| "grad_norm": 0.9380257725715637, |
| "learning_rate": 8.6e-05, |
| "loss": 0.0404, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4868494683827644, |
| "grad_norm": 0.7713156342506409, |
| "learning_rate": 8.7e-05, |
| "loss": 0.0387, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4924454392837157, |
| "grad_norm": 1.137207269668579, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.039, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.498041410184667, |
| "grad_norm": 0.7128203511238098, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 0.0354, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5036373810856184, |
| "grad_norm": 0.6396750211715698, |
| "learning_rate": 9e-05, |
| "loss": 0.0367, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5092333519865697, |
| "grad_norm": 0.6838144659996033, |
| "learning_rate": 9.1e-05, |
| "loss": 0.0369, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.514829322887521, |
| "grad_norm": 0.6156594157218933, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 0.0402, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5204252937884724, |
| "grad_norm": 0.5517926812171936, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 0.0497, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5260212646894236, |
| "grad_norm": 0.6177653670310974, |
| "learning_rate": 9.4e-05, |
| "loss": 0.0322, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5316172355903749, |
| "grad_norm": 0.5705161094665527, |
| "learning_rate": 9.5e-05, |
| "loss": 0.0365, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5372132064913262, |
| "grad_norm": 0.7966452836990356, |
| "learning_rate": 9.6e-05, |
| "loss": 0.0377, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5428091773922775, |
| "grad_norm": 0.7984173893928528, |
| "learning_rate": 9.7e-05, |
| "loss": 0.0335, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5484051482932288, |
| "grad_norm": 0.6380477547645569, |
| "learning_rate": 9.8e-05, |
| "loss": 0.0329, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5540011191941802, |
| "grad_norm": 0.7180393934249878, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 0.0302, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5595970900951315, |
| "grad_norm": 0.8885056972503662, |
| "learning_rate": 0.0001, |
| "loss": 0.0345, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5651930609960828, |
| "grad_norm": 0.41542354226112366, |
| "learning_rate": 9.999993165095463e-05, |
| "loss": 0.0445, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5707890318970341, |
| "grad_norm": 0.4343472421169281, |
| "learning_rate": 9.999972660400536e-05, |
| "loss": 0.0263, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5763850027979854, |
| "grad_norm": 0.7970145344734192, |
| "learning_rate": 9.999938485971279e-05, |
| "loss": 0.0322, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5819809736989368, |
| "grad_norm": 0.6129629015922546, |
| "learning_rate": 9.999890641901125e-05, |
| "loss": 0.0262, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5875769445998881, |
| "grad_norm": 0.5661425590515137, |
| "learning_rate": 9.999829128320874e-05, |
| "loss": 0.0317, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5931729155008394, |
| "grad_norm": 0.7532817721366882, |
| "learning_rate": 9.999753945398704e-05, |
| "loss": 0.0359, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5987688864017907, |
| "grad_norm": 0.42677804827690125, |
| "learning_rate": 9.999665093340165e-05, |
| "loss": 0.0273, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.604364857302742, |
| "grad_norm": 0.6325145363807678, |
| "learning_rate": 9.99956257238817e-05, |
| "loss": 0.0377, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6099608282036934, |
| "grad_norm": 0.6003039479255676, |
| "learning_rate": 9.999446382823013e-05, |
| "loss": 0.0327, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6155567991046447, |
| "grad_norm": 0.36753129959106445, |
| "learning_rate": 9.999316524962345e-05, |
| "loss": 0.0285, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.621152770005596, |
| "grad_norm": 0.43158769607543945, |
| "learning_rate": 9.999172999161198e-05, |
| "loss": 0.0275, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6267487409065473, |
| "grad_norm": 0.33566170930862427, |
| "learning_rate": 9.999015805811965e-05, |
| "loss": 0.0278, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6323447118074986, |
| "grad_norm": 0.671672523021698, |
| "learning_rate": 9.998844945344405e-05, |
| "loss": 0.0344, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.63794068270845, |
| "grad_norm": 1.1190325021743774, |
| "learning_rate": 9.998660418225645e-05, |
| "loss": 0.0304, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6435366536094013, |
| "grad_norm": 0.6546229124069214, |
| "learning_rate": 9.998462224960175e-05, |
| "loss": 0.0343, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6491326245103526, |
| "grad_norm": 0.7560105323791504, |
| "learning_rate": 9.998250366089848e-05, |
| "loss": 0.0259, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6547285954113039, |
| "grad_norm": 0.6937676072120667, |
| "learning_rate": 9.998024842193876e-05, |
| "loss": 0.0308, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6603245663122552, |
| "grad_norm": 0.4479691684246063, |
| "learning_rate": 9.997785653888835e-05, |
| "loss": 0.0272, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6659205372132065, |
| "grad_norm": 0.38218632340431213, |
| "learning_rate": 9.997532801828658e-05, |
| "loss": 0.0313, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6715165081141579, |
| "grad_norm": 0.3345787525177002, |
| "learning_rate": 9.997266286704631e-05, |
| "loss": 0.0328, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6771124790151091, |
| "grad_norm": 0.3578011989593506, |
| "learning_rate": 9.996986109245395e-05, |
| "loss": 0.0373, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6827084499160604, |
| "grad_norm": 0.6602341532707214, |
| "learning_rate": 9.996692270216947e-05, |
| "loss": 0.0346, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6883044208170117, |
| "grad_norm": 0.4503819942474365, |
| "learning_rate": 9.996384770422629e-05, |
| "loss": 0.0243, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.693900391717963, |
| "grad_norm": 0.753041684627533, |
| "learning_rate": 9.996063610703137e-05, |
| "loss": 0.0277, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6994963626189143, |
| "grad_norm": 0.3396258056163788, |
| "learning_rate": 9.995728791936504e-05, |
| "loss": 0.0219, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7050923335198657, |
| "grad_norm": 0.6529501676559448, |
| "learning_rate": 9.995380315038119e-05, |
| "loss": 0.0242, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.710688304420817, |
| "grad_norm": 0.2462773472070694, |
| "learning_rate": 9.9950181809607e-05, |
| "loss": 0.021, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7162842753217683, |
| "grad_norm": 0.4511205554008484, |
| "learning_rate": 9.994642390694308e-05, |
| "loss": 0.0267, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7218802462227196, |
| "grad_norm": 0.5708833336830139, |
| "learning_rate": 9.99425294526634e-05, |
| "loss": 0.0288, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7274762171236709, |
| "grad_norm": 0.4378319978713989, |
| "learning_rate": 9.993849845741524e-05, |
| "loss": 0.0308, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7330721880246223, |
| "grad_norm": 0.44127964973449707, |
| "learning_rate": 9.99343309322192e-05, |
| "loss": 0.0282, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7386681589255736, |
| "grad_norm": 0.35624831914901733, |
| "learning_rate": 9.993002688846913e-05, |
| "loss": 0.0298, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7442641298265249, |
| "grad_norm": 0.45579585433006287, |
| "learning_rate": 9.992558633793212e-05, |
| "loss": 0.0325, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7498601007274762, |
| "grad_norm": 0.6297839283943176, |
| "learning_rate": 9.992100929274846e-05, |
| "loss": 0.0369, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7554560716284275, |
| "grad_norm": 0.29105043411254883, |
| "learning_rate": 9.991629576543163e-05, |
| "loss": 0.0253, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7610520425293789, |
| "grad_norm": 0.501181960105896, |
| "learning_rate": 9.991144576886823e-05, |
| "loss": 0.0355, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7666480134303302, |
| "grad_norm": 0.4630679488182068, |
| "learning_rate": 9.990645931631796e-05, |
| "loss": 0.0264, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7722439843312815, |
| "grad_norm": 0.6088075637817383, |
| "learning_rate": 9.990133642141359e-05, |
| "loss": 0.0282, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7778399552322328, |
| "grad_norm": 0.5682616233825684, |
| "learning_rate": 9.989607709816091e-05, |
| "loss": 0.0331, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7834359261331841, |
| "grad_norm": 0.4457339644432068, |
| "learning_rate": 9.989068136093873e-05, |
| "loss": 0.0309, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7890318970341355, |
| "grad_norm": 0.566882848739624, |
| "learning_rate": 9.988514922449879e-05, |
| "loss": 0.0436, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7946278679350868, |
| "grad_norm": 0.4208590090274811, |
| "learning_rate": 9.987948070396571e-05, |
| "loss": 0.0293, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8002238388360381, |
| "grad_norm": 0.5373462438583374, |
| "learning_rate": 9.987367581483705e-05, |
| "loss": 0.0333, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8058198097369894, |
| "grad_norm": 0.4833603799343109, |
| "learning_rate": 9.986773457298311e-05, |
| "loss": 0.0238, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8114157806379407, |
| "grad_norm": 0.3185485303401947, |
| "learning_rate": 9.986165699464705e-05, |
| "loss": 0.0279, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.817011751538892, |
| "grad_norm": 0.32943880558013916, |
| "learning_rate": 9.985544309644475e-05, |
| "loss": 0.0259, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8226077224398433, |
| "grad_norm": 0.4028552174568176, |
| "learning_rate": 9.984909289536473e-05, |
| "loss": 0.0183, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8282036933407946, |
| "grad_norm": 0.3354315459728241, |
| "learning_rate": 9.984260640876821e-05, |
| "loss": 0.0279, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8337996642417459, |
| "grad_norm": 0.581444263458252, |
| "learning_rate": 9.983598365438902e-05, |
| "loss": 0.0231, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8393956351426972, |
| "grad_norm": 0.3263351321220398, |
| "learning_rate": 9.98292246503335e-05, |
| "loss": 0.0257, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8449916060436485, |
| "grad_norm": 0.4574286639690399, |
| "learning_rate": 9.98223294150805e-05, |
| "loss": 0.0172, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8505875769445999, |
| "grad_norm": 0.6482700705528259, |
| "learning_rate": 9.981529796748134e-05, |
| "loss": 0.0252, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8561835478455512, |
| "grad_norm": 0.22327029705047607, |
| "learning_rate": 9.980813032675974e-05, |
| "loss": 0.0296, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8617795187465025, |
| "grad_norm": 0.39261817932128906, |
| "learning_rate": 9.980082651251175e-05, |
| "loss": 0.0226, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8673754896474538, |
| "grad_norm": 0.3742023706436157, |
| "learning_rate": 9.979338654470569e-05, |
| "loss": 0.0283, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8729714605484051, |
| "grad_norm": 0.240834578871727, |
| "learning_rate": 9.97858104436822e-05, |
| "loss": 0.0176, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8785674314493565, |
| "grad_norm": 0.39040738344192505, |
| "learning_rate": 9.977809823015401e-05, |
| "loss": 0.0225, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8841634023503078, |
| "grad_norm": 0.3102349042892456, |
| "learning_rate": 9.977024992520602e-05, |
| "loss": 0.0229, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8897593732512591, |
| "grad_norm": 0.32893484830856323, |
| "learning_rate": 9.976226555029522e-05, |
| "loss": 0.0286, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8953553441522104, |
| "grad_norm": 0.3821198046207428, |
| "learning_rate": 9.975414512725057e-05, |
| "loss": 0.0278, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9009513150531617, |
| "grad_norm": 0.3672045171260834, |
| "learning_rate": 9.974588867827301e-05, |
| "loss": 0.0275, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.906547285954113, |
| "grad_norm": 0.36223965883255005, |
| "learning_rate": 9.973749622593534e-05, |
| "loss": 0.028, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9121432568550644, |
| "grad_norm": 0.5474312901496887, |
| "learning_rate": 9.972896779318219e-05, |
| "loss": 0.0307, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9177392277560157, |
| "grad_norm": 0.7324241399765015, |
| "learning_rate": 9.972030340333001e-05, |
| "loss": 0.0246, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.923335198656967, |
| "grad_norm": 0.44370922446250916, |
| "learning_rate": 9.97115030800669e-05, |
| "loss": 0.0229, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9289311695579183, |
| "grad_norm": 0.40400007367134094, |
| "learning_rate": 9.970256684745258e-05, |
| "loss": 0.0368, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9345271404588696, |
| "grad_norm": 0.4597970247268677, |
| "learning_rate": 9.969349472991838e-05, |
| "loss": 0.0215, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.940123111359821, |
| "grad_norm": 0.41508862376213074, |
| "learning_rate": 9.968428675226714e-05, |
| "loss": 0.0251, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9457190822607723, |
| "grad_norm": 0.5726234316825867, |
| "learning_rate": 9.967494293967312e-05, |
| "loss": 0.0385, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9513150531617236, |
| "grad_norm": 0.47390761971473694, |
| "learning_rate": 9.966546331768191e-05, |
| "loss": 0.0269, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9569110240626749, |
| "grad_norm": 0.3252114951610565, |
| "learning_rate": 9.965584791221048e-05, |
| "loss": 0.023, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.9625069949636262, |
| "grad_norm": 0.4773138761520386, |
| "learning_rate": 9.964609674954696e-05, |
| "loss": 0.0322, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9681029658645776, |
| "grad_norm": 0.45844170451164246, |
| "learning_rate": 9.963620985635065e-05, |
| "loss": 0.0233, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9736989367655288, |
| "grad_norm": 0.40978696942329407, |
| "learning_rate": 9.962618725965196e-05, |
| "loss": 0.0337, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9792949076664801, |
| "grad_norm": 0.43942537903785706, |
| "learning_rate": 9.961602898685226e-05, |
| "loss": 0.0225, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9848908785674314, |
| "grad_norm": 0.7744397521018982, |
| "learning_rate": 9.96057350657239e-05, |
| "loss": 0.0302, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9904868494683827, |
| "grad_norm": 0.3644595444202423, |
| "learning_rate": 9.959530552441005e-05, |
| "loss": 0.0252, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.996082820369334, |
| "grad_norm": 0.29574769735336304, |
| "learning_rate": 9.95847403914247e-05, |
| "loss": 0.0222, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.0016787912702854, |
| "grad_norm": 0.5153500437736511, |
| "learning_rate": 9.95740396956525e-05, |
| "loss": 0.0291, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.0072747621712368, |
| "grad_norm": 0.5961137413978577, |
| "learning_rate": 9.956320346634876e-05, |
| "loss": 0.0266, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.012870733072188, |
| "grad_norm": 0.48836737871170044, |
| "learning_rate": 9.955223173313931e-05, |
| "loss": 0.0213, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.0184667039731394, |
| "grad_norm": 0.5610430240631104, |
| "learning_rate": 9.954112452602045e-05, |
| "loss": 0.0205, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.0240626748740906, |
| "grad_norm": 0.4025803804397583, |
| "learning_rate": 9.952988187535886e-05, |
| "loss": 0.0224, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.029658645775042, |
| "grad_norm": 0.605367124080658, |
| "learning_rate": 9.95185038118915e-05, |
| "loss": 0.0303, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.0352546166759933, |
| "grad_norm": 0.3206970989704132, |
| "learning_rate": 9.950699036672559e-05, |
| "loss": 0.0231, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.0408505875769447, |
| "grad_norm": 0.3495715260505676, |
| "learning_rate": 9.949534157133844e-05, |
| "loss": 0.024, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.046446558477896, |
| "grad_norm": 0.3895197808742523, |
| "learning_rate": 9.948355745757741e-05, |
| "loss": 0.0203, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.0520425293788471, |
| "grad_norm": 0.40038052201271057, |
| "learning_rate": 9.94716380576598e-05, |
| "loss": 0.0221, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.0576385002797986, |
| "grad_norm": 0.479744553565979, |
| "learning_rate": 9.945958340417283e-05, |
| "loss": 0.028, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.0632344711807498, |
| "grad_norm": 0.3020111322402954, |
| "learning_rate": 9.944739353007344e-05, |
| "loss": 0.0265, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0688304420817012, |
| "grad_norm": 0.3391585648059845, |
| "learning_rate": 9.943506846868826e-05, |
| "loss": 0.0233, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.0744264129826524, |
| "grad_norm": 0.3941816985607147, |
| "learning_rate": 9.942260825371358e-05, |
| "loss": 0.0184, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.0800223838836038, |
| "grad_norm": 0.31161707639694214, |
| "learning_rate": 9.941001291921512e-05, |
| "loss": 0.0229, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.085618354784555, |
| "grad_norm": 0.33263275027275085, |
| "learning_rate": 9.939728249962807e-05, |
| "loss": 0.0227, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.0912143256855065, |
| "grad_norm": 0.35178300738334656, |
| "learning_rate": 9.938441702975689e-05, |
| "loss": 0.0224, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.0968102965864577, |
| "grad_norm": 0.374667227268219, |
| "learning_rate": 9.937141654477528e-05, |
| "loss": 0.0196, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.102406267487409, |
| "grad_norm": 0.2080841362476349, |
| "learning_rate": 9.93582810802261e-05, |
| "loss": 0.0274, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.1080022383883603, |
| "grad_norm": 0.29197070002555847, |
| "learning_rate": 9.934501067202117e-05, |
| "loss": 0.0242, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.1135982092893117, |
| "grad_norm": 0.32980409264564514, |
| "learning_rate": 9.93316053564413e-05, |
| "loss": 0.0189, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.119194180190263, |
| "grad_norm": 0.4776092767715454, |
| "learning_rate": 9.931806517013612e-05, |
| "loss": 0.022, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1247901510912144, |
| "grad_norm": 0.37389442324638367, |
| "learning_rate": 9.930439015012396e-05, |
| "loss": 0.0216, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.1303861219921656, |
| "grad_norm": 0.22275716066360474, |
| "learning_rate": 9.929058033379181e-05, |
| "loss": 0.0192, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.135982092893117, |
| "grad_norm": 0.5097452402114868, |
| "learning_rate": 9.927663575889521e-05, |
| "loss": 0.0198, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.1415780637940682, |
| "grad_norm": 0.3198114037513733, |
| "learning_rate": 9.926255646355804e-05, |
| "loss": 0.0218, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.1471740346950197, |
| "grad_norm": 0.1620880514383316, |
| "learning_rate": 9.92483424862726e-05, |
| "loss": 0.0227, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.1527700055959709, |
| "grad_norm": 0.2927526831626892, |
| "learning_rate": 9.923399386589933e-05, |
| "loss": 0.0195, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.1583659764969223, |
| "grad_norm": 0.2967079281806946, |
| "learning_rate": 9.921951064166684e-05, |
| "loss": 0.024, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.1639619473978735, |
| "grad_norm": 0.19401852786540985, |
| "learning_rate": 9.92048928531717e-05, |
| "loss": 0.0223, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.169557918298825, |
| "grad_norm": 0.28363627195358276, |
| "learning_rate": 9.919014054037836e-05, |
| "loss": 0.0188, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.1751538891997761, |
| "grad_norm": 0.3623961806297302, |
| "learning_rate": 9.917525374361912e-05, |
| "loss": 0.0206, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1807498601007276, |
| "grad_norm": 0.503246545791626, |
| "learning_rate": 9.91602325035939e-05, |
| "loss": 0.0253, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.1863458310016788, |
| "grad_norm": 0.7744673490524292, |
| "learning_rate": 9.914507686137019e-05, |
| "loss": 0.0337, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.19194180190263, |
| "grad_norm": 0.48357081413269043, |
| "learning_rate": 9.912978685838294e-05, |
| "loss": 0.0309, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.1975377728035814, |
| "grad_norm": 0.22658684849739075, |
| "learning_rate": 9.911436253643445e-05, |
| "loss": 0.0208, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.2031337437045329, |
| "grad_norm": 0.40776172280311584, |
| "learning_rate": 9.90988039376942e-05, |
| "loss": 0.0232, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.208729714605484, |
| "grad_norm": 0.48974546790122986, |
| "learning_rate": 9.90831111046988e-05, |
| "loss": 0.0278, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.2143256855064353, |
| "grad_norm": 0.3066832423210144, |
| "learning_rate": 9.90672840803519e-05, |
| "loss": 0.018, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.2199216564073867, |
| "grad_norm": 0.22434163093566895, |
| "learning_rate": 9.905132290792394e-05, |
| "loss": 0.0141, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.225517627308338, |
| "grad_norm": 0.3365159034729004, |
| "learning_rate": 9.903522763105218e-05, |
| "loss": 0.0205, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.2311135982092893, |
| "grad_norm": 0.3467719256877899, |
| "learning_rate": 9.901899829374047e-05, |
| "loss": 0.0206, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.2367095691102405, |
| "grad_norm": 0.31818097829818726, |
| "learning_rate": 9.900263494035921e-05, |
| "loss": 0.0255, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.242305540011192, |
| "grad_norm": 0.3118780851364136, |
| "learning_rate": 9.89861376156452e-05, |
| "loss": 0.0211, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.2479015109121432, |
| "grad_norm": 0.2563456594944, |
| "learning_rate": 9.896950636470147e-05, |
| "loss": 0.0249, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.2534974818130946, |
| "grad_norm": 0.4434971213340759, |
| "learning_rate": 9.895274123299723e-05, |
| "loss": 0.0214, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.2590934527140458, |
| "grad_norm": 0.36243245005607605, |
| "learning_rate": 9.893584226636772e-05, |
| "loss": 0.0239, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.2646894236149973, |
| "grad_norm": 0.4027983546257019, |
| "learning_rate": 9.891880951101407e-05, |
| "loss": 0.0328, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.2702853945159485, |
| "grad_norm": 0.4992479383945465, |
| "learning_rate": 9.890164301350318e-05, |
| "loss": 0.0247, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.2758813654169, |
| "grad_norm": 0.5188339948654175, |
| "learning_rate": 9.888434282076758e-05, |
| "loss": 0.0252, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.281477336317851, |
| "grad_norm": 0.2691977620124817, |
| "learning_rate": 9.886690898010535e-05, |
| "loss": 0.0238, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.2870733072188025, |
| "grad_norm": 0.42759424448013306, |
| "learning_rate": 9.884934153917997e-05, |
| "loss": 0.0252, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.2926692781197537, |
| "grad_norm": 0.315560519695282, |
| "learning_rate": 9.883164054602012e-05, |
| "loss": 0.0184, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.2982652490207052, |
| "grad_norm": 0.34518998861312866, |
| "learning_rate": 9.881380604901964e-05, |
| "loss": 0.026, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.3038612199216564, |
| "grad_norm": 0.322465717792511, |
| "learning_rate": 9.879583809693738e-05, |
| "loss": 0.0217, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.3094571908226076, |
| "grad_norm": 0.31809547543525696, |
| "learning_rate": 9.877773673889701e-05, |
| "loss": 0.0219, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.315053161723559, |
| "grad_norm": 0.4411179721355438, |
| "learning_rate": 9.8759502024387e-05, |
| "loss": 0.0221, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.3206491326245104, |
| "grad_norm": 0.44775789976119995, |
| "learning_rate": 9.87411340032603e-05, |
| "loss": 0.0234, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.3262451035254617, |
| "grad_norm": 0.5176445245742798, |
| "learning_rate": 9.872263272573443e-05, |
| "loss": 0.0255, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.3318410744264129, |
| "grad_norm": 0.36430883407592773, |
| "learning_rate": 9.870399824239117e-05, |
| "loss": 0.0205, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.3374370453273643, |
| "grad_norm": 0.5294170379638672, |
| "learning_rate": 9.868523060417646e-05, |
| "loss": 0.0266, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.3430330162283157, |
| "grad_norm": 0.3633783459663391, |
| "learning_rate": 9.86663298624003e-05, |
| "loss": 0.0208, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.348628987129267, |
| "grad_norm": 0.5161033272743225, |
| "learning_rate": 9.864729606873663e-05, |
| "loss": 0.0201, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.3542249580302181, |
| "grad_norm": 0.6746691465377808, |
| "learning_rate": 9.862812927522309e-05, |
| "loss": 0.0243, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.3598209289311696, |
| "grad_norm": 0.2213054746389389, |
| "learning_rate": 9.860882953426099e-05, |
| "loss": 0.0209, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.365416899832121, |
| "grad_norm": 0.6545590162277222, |
| "learning_rate": 9.858939689861506e-05, |
| "loss": 0.0225, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.3710128707330722, |
| "grad_norm": 0.46804091334342957, |
| "learning_rate": 9.856983142141339e-05, |
| "loss": 0.0271, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3766088416340234, |
| "grad_norm": 0.38381436467170715, |
| "learning_rate": 9.855013315614725e-05, |
| "loss": 0.0233, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.3822048125349748, |
| "grad_norm": 0.41659992933273315, |
| "learning_rate": 9.853030215667093e-05, |
| "loss": 0.0229, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.387800783435926, |
| "grad_norm": 0.4473920464515686, |
| "learning_rate": 9.851033847720166e-05, |
| "loss": 0.0278, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.3933967543368775, |
| "grad_norm": 0.3903592824935913, |
| "learning_rate": 9.849024217231935e-05, |
| "loss": 0.0222, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.3989927252378287, |
| "grad_norm": 0.296999454498291, |
| "learning_rate": 9.847001329696653e-05, |
| "loss": 0.0287, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4045886961387801, |
| "grad_norm": 0.45139339566230774, |
| "learning_rate": 9.844965190644817e-05, |
| "loss": 0.0253, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.4101846670397313, |
| "grad_norm": 0.29245492815971375, |
| "learning_rate": 9.842915805643155e-05, |
| "loss": 0.0149, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.4157806379406828, |
| "grad_norm": 0.2889615595340729, |
| "learning_rate": 9.840853180294608e-05, |
| "loss": 0.0224, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.421376608841634, |
| "grad_norm": 0.4102277457714081, |
| "learning_rate": 9.838777320238312e-05, |
| "loss": 0.0268, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.4269725797425854, |
| "grad_norm": 0.5045889616012573, |
| "learning_rate": 9.836688231149592e-05, |
| "loss": 0.0195, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.4325685506435366, |
| "grad_norm": 0.5412267446517944, |
| "learning_rate": 9.834585918739936e-05, |
| "loss": 0.0262, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.438164521544488, |
| "grad_norm": 0.5022779703140259, |
| "learning_rate": 9.832470388756987e-05, |
| "loss": 0.0268, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.4437604924454392, |
| "grad_norm": 0.5818321108818054, |
| "learning_rate": 9.830341646984521e-05, |
| "loss": 0.0262, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.4493564633463907, |
| "grad_norm": 0.3627963066101074, |
| "learning_rate": 9.82819969924244e-05, |
| "loss": 0.0161, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.4549524342473419, |
| "grad_norm": 0.35047340393066406, |
| "learning_rate": 9.826044551386744e-05, |
| "loss": 0.0245, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.4605484051482933, |
| "grad_norm": 0.2970013916492462, |
| "learning_rate": 9.823876209309527e-05, |
| "loss": 0.0206, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.4661443760492445, |
| "grad_norm": 0.39108118414878845, |
| "learning_rate": 9.821694678938953e-05, |
| "loss": 0.0229, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.4717403469501957, |
| "grad_norm": 0.30723538994789124, |
| "learning_rate": 9.819499966239243e-05, |
| "loss": 0.0239, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.4773363178511472, |
| "grad_norm": 0.316388338804245, |
| "learning_rate": 9.817292077210659e-05, |
| "loss": 0.0232, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.4829322887520986, |
| "grad_norm": 0.2693226635456085, |
| "learning_rate": 9.815071017889482e-05, |
| "loss": 0.0201, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.4885282596530498, |
| "grad_norm": 0.2165406197309494, |
| "learning_rate": 9.812836794348004e-05, |
| "loss": 0.0178, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.494124230554001, |
| "grad_norm": 0.33953240513801575, |
| "learning_rate": 9.81058941269451e-05, |
| "loss": 0.0247, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.4997202014549524, |
| "grad_norm": 0.37577569484710693, |
| "learning_rate": 9.808328879073251e-05, |
| "loss": 0.0188, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.5053161723559039, |
| "grad_norm": 0.3397989273071289, |
| "learning_rate": 9.806055199664446e-05, |
| "loss": 0.0174, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.510912143256855, |
| "grad_norm": 0.11495699733495712, |
| "learning_rate": 9.803768380684242e-05, |
| "loss": 0.0193, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.5165081141578063, |
| "grad_norm": 0.3947618305683136, |
| "learning_rate": 9.801468428384716e-05, |
| "loss": 0.0195, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.5221040850587577, |
| "grad_norm": 0.3024958670139313, |
| "learning_rate": 9.799155349053851e-05, |
| "loss": 0.021, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.5277000559597091, |
| "grad_norm": 0.3651089072227478, |
| "learning_rate": 9.796829149015517e-05, |
| "loss": 0.0148, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.5332960268606604, |
| "grad_norm": 0.6126254796981812, |
| "learning_rate": 9.794489834629455e-05, |
| "loss": 0.0187, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.5388919977616116, |
| "grad_norm": 0.35577818751335144, |
| "learning_rate": 9.792137412291265e-05, |
| "loss": 0.0183, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.544487968662563, |
| "grad_norm": 0.26784461736679077, |
| "learning_rate": 9.789771888432375e-05, |
| "loss": 0.0239, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.5500839395635144, |
| "grad_norm": 0.3259308338165283, |
| "learning_rate": 9.787393269520039e-05, |
| "loss": 0.0174, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.5556799104644656, |
| "grad_norm": 0.3289090394973755, |
| "learning_rate": 9.785001562057309e-05, |
| "loss": 0.0185, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.5612758813654168, |
| "grad_norm": 0.41667595505714417, |
| "learning_rate": 9.782596772583026e-05, |
| "loss": 0.0264, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.5668718522663683, |
| "grad_norm": 0.4217163324356079, |
| "learning_rate": 9.780178907671789e-05, |
| "loss": 0.0221, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.5724678231673195, |
| "grad_norm": 0.3442951440811157, |
| "learning_rate": 9.777747973933948e-05, |
| "loss": 0.0195, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.578063794068271, |
| "grad_norm": 0.38543257117271423, |
| "learning_rate": 9.775303978015585e-05, |
| "loss": 0.0189, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.5836597649692221, |
| "grad_norm": 0.6017774939537048, |
| "learning_rate": 9.772846926598491e-05, |
| "loss": 0.0254, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.5892557358701733, |
| "grad_norm": 0.5754305720329285, |
| "learning_rate": 9.77037682640015e-05, |
| "loss": 0.0224, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.5948517067711248, |
| "grad_norm": 0.2952113747596741, |
| "learning_rate": 9.767893684173721e-05, |
| "loss": 0.0209, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.6004476776720762, |
| "grad_norm": 0.3667709231376648, |
| "learning_rate": 9.765397506708023e-05, |
| "loss": 0.0221, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.6060436485730274, |
| "grad_norm": 0.543677031993866, |
| "learning_rate": 9.762888300827507e-05, |
| "loss": 0.0216, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.6116396194739786, |
| "grad_norm": 0.3521057069301605, |
| "learning_rate": 9.760366073392246e-05, |
| "loss": 0.02, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.61723559037493, |
| "grad_norm": 0.35763946175575256, |
| "learning_rate": 9.757830831297914e-05, |
| "loss": 0.0244, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.6228315612758815, |
| "grad_norm": 0.25549840927124023, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 0.0224, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.6284275321768327, |
| "grad_norm": 0.22006206214427948, |
| "learning_rate": 9.752721330892624e-05, |
| "loss": 0.0178, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.6340235030777839, |
| "grad_norm": 0.2791355550289154, |
| "learning_rate": 9.750147086550844e-05, |
| "loss": 0.0204, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.6396194739787353, |
| "grad_norm": 0.34600383043289185, |
| "learning_rate": 9.747559855488313e-05, |
| "loss": 0.0206, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.6452154448796867, |
| "grad_norm": 0.40189531445503235, |
| "learning_rate": 9.744959644778422e-05, |
| "loss": 0.0213, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.650811415780638, |
| "grad_norm": 0.21385939419269562, |
| "learning_rate": 9.742346461530048e-05, |
| "loss": 0.0287, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.6564073866815892, |
| "grad_norm": 0.4269281327724457, |
| "learning_rate": 9.739720312887535e-05, |
| "loss": 0.0226, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.6620033575825406, |
| "grad_norm": 0.46277040243148804, |
| "learning_rate": 9.73708120603067e-05, |
| "loss": 0.0206, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.667599328483492, |
| "grad_norm": 0.340044230222702, |
| "learning_rate": 9.734429148174675e-05, |
| "loss": 0.016, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.6731952993844432, |
| "grad_norm": 0.33839765191078186, |
| "learning_rate": 9.731764146570173e-05, |
| "loss": 0.0208, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.6787912702853944, |
| "grad_norm": 0.4214085042476654, |
| "learning_rate": 9.729086208503174e-05, |
| "loss": 0.0291, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6843872411863459, |
| "grad_norm": 0.29594293236732483, |
| "learning_rate": 9.726395341295062e-05, |
| "loss": 0.0194, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.6899832120872973, |
| "grad_norm": 0.43080446124076843, |
| "learning_rate": 9.723691552302562e-05, |
| "loss": 0.0204, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.6955791829882485, |
| "grad_norm": 0.3255208134651184, |
| "learning_rate": 9.720974848917735e-05, |
| "loss": 0.0219, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.7011751538891997, |
| "grad_norm": 0.30094242095947266, |
| "learning_rate": 9.718245238567939e-05, |
| "loss": 0.0207, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.7067711247901511, |
| "grad_norm": 0.27606436610221863, |
| "learning_rate": 9.715502728715826e-05, |
| "loss": 0.025, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.7123670956911026, |
| "grad_norm": 0.21307139098644257, |
| "learning_rate": 9.712747326859315e-05, |
| "loss": 0.0202, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.7179630665920538, |
| "grad_norm": 0.4076824188232422, |
| "learning_rate": 9.709979040531569e-05, |
| "loss": 0.0181, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.723559037493005, |
| "grad_norm": 0.3973149359226227, |
| "learning_rate": 9.707197877300974e-05, |
| "loss": 0.0278, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.7291550083939562, |
| "grad_norm": 0.3367111086845398, |
| "learning_rate": 9.704403844771128e-05, |
| "loss": 0.0284, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.7347509792949076, |
| "grad_norm": 0.4137897193431854, |
| "learning_rate": 9.701596950580806e-05, |
| "loss": 0.0251, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.740346950195859, |
| "grad_norm": 0.28888463973999023, |
| "learning_rate": 9.698777202403953e-05, |
| "loss": 0.0185, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.7459429210968103, |
| "grad_norm": 0.2732876241207123, |
| "learning_rate": 9.695944607949649e-05, |
| "loss": 0.0206, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.7515388919977615, |
| "grad_norm": 0.5475505590438843, |
| "learning_rate": 9.693099174962103e-05, |
| "loss": 0.0239, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.757134862898713, |
| "grad_norm": 0.3212341070175171, |
| "learning_rate": 9.690240911220618e-05, |
| "loss": 0.0193, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.7627308337996643, |
| "grad_norm": 0.38309773802757263, |
| "learning_rate": 9.687369824539577e-05, |
| "loss": 0.0228, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.7683268047006155, |
| "grad_norm": 0.22085356712341309, |
| "learning_rate": 9.684485922768422e-05, |
| "loss": 0.0167, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.7739227756015667, |
| "grad_norm": 0.32358717918395996, |
| "learning_rate": 9.681589213791633e-05, |
| "loss": 0.0216, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.7795187465025182, |
| "grad_norm": 0.30354073643684387, |
| "learning_rate": 9.6786797055287e-05, |
| "loss": 0.0202, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.7851147174034696, |
| "grad_norm": 0.3479655981063843, |
| "learning_rate": 9.675757405934103e-05, |
| "loss": 0.0167, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.7907106883044208, |
| "grad_norm": 0.3674020767211914, |
| "learning_rate": 9.672822322997305e-05, |
| "loss": 0.0216, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.796306659205372, |
| "grad_norm": 0.2632925808429718, |
| "learning_rate": 9.669874464742705e-05, |
| "loss": 0.0166, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.8019026301063235, |
| "grad_norm": 0.22815559804439545, |
| "learning_rate": 9.66691383922964e-05, |
| "loss": 0.0182, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.8074986010072749, |
| "grad_norm": 0.2246052771806717, |
| "learning_rate": 9.663940454552342e-05, |
| "loss": 0.0186, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.813094571908226, |
| "grad_norm": 0.28712260723114014, |
| "learning_rate": 9.660954318839933e-05, |
| "loss": 0.0157, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.8186905428091773, |
| "grad_norm": 0.2282487452030182, |
| "learning_rate": 9.657955440256395e-05, |
| "loss": 0.0201, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.8242865137101287, |
| "grad_norm": 0.3279257118701935, |
| "learning_rate": 9.654943827000548e-05, |
| "loss": 0.0153, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.8298824846110802, |
| "grad_norm": 0.3519797623157501, |
| "learning_rate": 9.651919487306025e-05, |
| "loss": 0.0217, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.8354784555120314, |
| "grad_norm": 0.29638567566871643, |
| "learning_rate": 9.648882429441257e-05, |
| "loss": 0.0165, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.8410744264129826, |
| "grad_norm": 0.3102523982524872, |
| "learning_rate": 9.645832661709444e-05, |
| "loss": 0.02, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.846670397313934, |
| "grad_norm": 0.31784892082214355, |
| "learning_rate": 9.642770192448536e-05, |
| "loss": 0.0259, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.8522663682148854, |
| "grad_norm": 0.31783589720726013, |
| "learning_rate": 9.639695030031204e-05, |
| "loss": 0.0154, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.8578623391158366, |
| "grad_norm": 0.4002092778682709, |
| "learning_rate": 9.636607182864827e-05, |
| "loss": 0.0128, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.8634583100167879, |
| "grad_norm": 0.3656691610813141, |
| "learning_rate": 9.63350665939146e-05, |
| "loss": 0.0167, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.869054280917739, |
| "grad_norm": 0.34003934264183044, |
| "learning_rate": 9.630393468087818e-05, |
| "loss": 0.018, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.8746502518186905, |
| "grad_norm": 0.3051067888736725, |
| "learning_rate": 9.627267617465243e-05, |
| "loss": 0.0192, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.880246222719642, |
| "grad_norm": 0.32361093163490295, |
| "learning_rate": 9.624129116069694e-05, |
| "loss": 0.0262, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.8858421936205931, |
| "grad_norm": 0.20856234431266785, |
| "learning_rate": 9.620977972481716e-05, |
| "loss": 0.0259, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.8914381645215443, |
| "grad_norm": 0.3916553258895874, |
| "learning_rate": 9.617814195316411e-05, |
| "loss": 0.0184, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.8970341354224958, |
| "grad_norm": 0.461211621761322, |
| "learning_rate": 9.614637793223425e-05, |
| "loss": 0.018, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.9026301063234472, |
| "grad_norm": 0.4060401916503906, |
| "learning_rate": 9.611448774886924e-05, |
| "loss": 0.0196, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.9082260772243984, |
| "grad_norm": 0.362894207239151, |
| "learning_rate": 9.60824714902556e-05, |
| "loss": 0.0149, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.9138220481253496, |
| "grad_norm": 0.2224276214838028, |
| "learning_rate": 9.605032924392457e-05, |
| "loss": 0.0214, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.919418019026301, |
| "grad_norm": 0.36570799350738525, |
| "learning_rate": 9.601806109775179e-05, |
| "loss": 0.019, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.9250139899272525, |
| "grad_norm": 0.37845227122306824, |
| "learning_rate": 9.598566713995718e-05, |
| "loss": 0.0283, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.9306099608282037, |
| "grad_norm": 0.2989262044429779, |
| "learning_rate": 9.595314745910456e-05, |
| "loss": 0.0195, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.936205931729155, |
| "grad_norm": 0.4651845097541809, |
| "learning_rate": 9.59205021441015e-05, |
| "loss": 0.0221, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.9418019026301063, |
| "grad_norm": 0.16341492533683777, |
| "learning_rate": 9.588773128419906e-05, |
| "loss": 0.0189, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.9473978735310578, |
| "grad_norm": 0.3499149978160858, |
| "learning_rate": 9.58548349689915e-05, |
| "loss": 0.0163, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.952993844432009, |
| "grad_norm": 0.5015300512313843, |
| "learning_rate": 9.582181328841611e-05, |
| "loss": 0.0287, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.9585898153329602, |
| "grad_norm": 0.3239698112010956, |
| "learning_rate": 9.578866633275288e-05, |
| "loss": 0.0168, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.9641857862339116, |
| "grad_norm": 0.29603099822998047, |
| "learning_rate": 9.575539419262434e-05, |
| "loss": 0.0204, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.969781757134863, |
| "grad_norm": 0.4523886740207672, |
| "learning_rate": 9.572199695899522e-05, |
| "loss": 0.0247, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.9753777280358142, |
| "grad_norm": 0.2664707899093628, |
| "learning_rate": 9.568847472317232e-05, |
| "loss": 0.0155, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.9809736989367654, |
| "grad_norm": 0.3717735707759857, |
| "learning_rate": 9.565482757680415e-05, |
| "loss": 0.0279, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.9865696698377169, |
| "grad_norm": 0.4721260070800781, |
| "learning_rate": 9.562105561188069e-05, |
| "loss": 0.017, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.9921656407386683, |
| "grad_norm": 0.19504283368587494, |
| "learning_rate": 9.558715892073323e-05, |
| "loss": 0.0251, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.9977616116396195, |
| "grad_norm": 0.3900291919708252, |
| "learning_rate": 9.555313759603402e-05, |
| "loss": 0.028, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.0033575825405707, |
| "grad_norm": 0.3327538073062897, |
| "learning_rate": 9.551899173079607e-05, |
| "loss": 0.0214, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.008953553441522, |
| "grad_norm": 0.5092990398406982, |
| "learning_rate": 9.548472141837286e-05, |
| "loss": 0.0204, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.0145495243424736, |
| "grad_norm": 0.2563795745372772, |
| "learning_rate": 9.545032675245813e-05, |
| "loss": 0.0242, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.020145495243425, |
| "grad_norm": 0.1788598746061325, |
| "learning_rate": 9.541580782708557e-05, |
| "loss": 0.0189, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.025741466144376, |
| "grad_norm": 0.2857683598995209, |
| "learning_rate": 9.538116473662861e-05, |
| "loss": 0.0187, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.031337437045327, |
| "grad_norm": 0.25776809453964233, |
| "learning_rate": 9.534639757580013e-05, |
| "loss": 0.0176, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.036933407946279, |
| "grad_norm": 0.37827619910240173, |
| "learning_rate": 9.531150643965223e-05, |
| "loss": 0.0133, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.04252937884723, |
| "grad_norm": 0.36484652757644653, |
| "learning_rate": 9.527649142357596e-05, |
| "loss": 0.021, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.0481253497481813, |
| "grad_norm": 0.41479215025901794, |
| "learning_rate": 9.524135262330098e-05, |
| "loss": 0.0159, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.0537213206491325, |
| "grad_norm": 0.261192262172699, |
| "learning_rate": 9.520609013489547e-05, |
| "loss": 0.0169, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.059317291550084, |
| "grad_norm": 0.3758920431137085, |
| "learning_rate": 9.517070405476575e-05, |
| "loss": 0.02, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.0649132624510353, |
| "grad_norm": 0.33406686782836914, |
| "learning_rate": 9.513519447965595e-05, |
| "loss": 0.0176, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.0705092333519866, |
| "grad_norm": 0.18889296054840088, |
| "learning_rate": 9.509956150664796e-05, |
| "loss": 0.0167, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.0761052042529378, |
| "grad_norm": 0.231406569480896, |
| "learning_rate": 9.50638052331609e-05, |
| "loss": 0.0232, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.0817011751538894, |
| "grad_norm": 0.31842225790023804, |
| "learning_rate": 9.502792575695112e-05, |
| "loss": 0.0219, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.0872971460548406, |
| "grad_norm": 0.2191598266363144, |
| "learning_rate": 9.499192317611167e-05, |
| "loss": 0.0207, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.092893116955792, |
| "grad_norm": 0.3848901391029358, |
| "learning_rate": 9.49557975890723e-05, |
| "loss": 0.0205, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.098489087856743, |
| "grad_norm": 0.3654007017612457, |
| "learning_rate": 9.491954909459895e-05, |
| "loss": 0.0202, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.1040850587576942, |
| "grad_norm": 0.3708373010158539, |
| "learning_rate": 9.488317779179361e-05, |
| "loss": 0.0186, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.109681029658646, |
| "grad_norm": 0.29888278245925903, |
| "learning_rate": 9.484668378009408e-05, |
| "loss": 0.0179, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.115277000559597, |
| "grad_norm": 0.3273047208786011, |
| "learning_rate": 9.481006715927351e-05, |
| "loss": 0.0194, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.1208729714605483, |
| "grad_norm": 0.30253902077674866, |
| "learning_rate": 9.477332802944044e-05, |
| "loss": 0.0172, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.1264689423614995, |
| "grad_norm": 0.3017847537994385, |
| "learning_rate": 9.473646649103818e-05, |
| "loss": 0.0239, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.132064913262451, |
| "grad_norm": 0.2024342566728592, |
| "learning_rate": 9.46994826448448e-05, |
| "loss": 0.0229, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.1376608841634024, |
| "grad_norm": 0.25708290934562683, |
| "learning_rate": 9.46623765919727e-05, |
| "loss": 0.017, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.1432568550643536, |
| "grad_norm": 0.38740572333335876, |
| "learning_rate": 9.462514843386845e-05, |
| "loss": 0.0186, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.148852825965305, |
| "grad_norm": 0.41047894954681396, |
| "learning_rate": 9.458779827231237e-05, |
| "loss": 0.0197, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.1544487968662565, |
| "grad_norm": 0.26995226740837097, |
| "learning_rate": 9.45503262094184e-05, |
| "loss": 0.0183, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.1600447677672077, |
| "grad_norm": 0.3127893805503845, |
| "learning_rate": 9.451273234763371e-05, |
| "loss": 0.0206, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.165640738668159, |
| "grad_norm": 0.33325016498565674, |
| "learning_rate": 9.447501678973852e-05, |
| "loss": 0.0208, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.17123670956911, |
| "grad_norm": 0.2265041172504425, |
| "learning_rate": 9.443717963884569e-05, |
| "loss": 0.0177, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.1768326804700617, |
| "grad_norm": 0.44378480315208435, |
| "learning_rate": 9.439922099840054e-05, |
| "loss": 0.0232, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.182428651371013, |
| "grad_norm": 0.2953107953071594, |
| "learning_rate": 9.43611409721806e-05, |
| "loss": 0.0201, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.188024622271964, |
| "grad_norm": 0.3876049518585205, |
| "learning_rate": 9.432293966429514e-05, |
| "loss": 0.0164, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.1936205931729154, |
| "grad_norm": 0.2669300138950348, |
| "learning_rate": 9.428461717918511e-05, |
| "loss": 0.0153, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.199216564073867, |
| "grad_norm": 0.6801855564117432, |
| "learning_rate": 9.424617362162271e-05, |
| "loss": 0.0185, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.204812534974818, |
| "grad_norm": 0.3502347469329834, |
| "learning_rate": 9.420760909671118e-05, |
| "loss": 0.0253, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.2104085058757694, |
| "grad_norm": 0.3213407099246979, |
| "learning_rate": 9.416892370988444e-05, |
| "loss": 0.0221, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.2160044767767206, |
| "grad_norm": 0.45591723918914795, |
| "learning_rate": 9.413011756690685e-05, |
| "loss": 0.0303, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.2216004476776723, |
| "grad_norm": 0.5190838575363159, |
| "learning_rate": 9.409119077387294e-05, |
| "loss": 0.0214, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.2271964185786235, |
| "grad_norm": 0.24658669531345367, |
| "learning_rate": 9.405214343720707e-05, |
| "loss": 0.0169, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.2327923894795747, |
| "grad_norm": 0.26745668053627014, |
| "learning_rate": 9.401297566366318e-05, |
| "loss": 0.0174, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.238388360380526, |
| "grad_norm": 0.23573242127895355, |
| "learning_rate": 9.397368756032445e-05, |
| "loss": 0.0166, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.243984331281477, |
| "grad_norm": 0.38697415590286255, |
| "learning_rate": 9.393427923460308e-05, |
| "loss": 0.0175, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.2495803021824288, |
| "grad_norm": 0.26302671432495117, |
| "learning_rate": 9.389475079423988e-05, |
| "loss": 0.016, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.25517627308338, |
| "grad_norm": 0.520627498626709, |
| "learning_rate": 9.385510234730415e-05, |
| "loss": 0.0196, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.260772243984331, |
| "grad_norm": 0.3094232976436615, |
| "learning_rate": 9.381533400219318e-05, |
| "loss": 0.0197, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.266368214885283, |
| "grad_norm": 0.3238268196582794, |
| "learning_rate": 9.377544586763215e-05, |
| "loss": 0.0242, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.271964185786234, |
| "grad_norm": 0.37398698925971985, |
| "learning_rate": 9.373543805267368e-05, |
| "loss": 0.0225, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.2775601566871853, |
| "grad_norm": 0.22411245107650757, |
| "learning_rate": 9.369531066669758e-05, |
| "loss": 0.0259, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.2831561275881365, |
| "grad_norm": 0.2310367226600647, |
| "learning_rate": 9.365506381941066e-05, |
| "loss": 0.0198, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.2887520984890877, |
| "grad_norm": 0.4910151958465576, |
| "learning_rate": 9.36146976208462e-05, |
| "loss": 0.0234, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.2943480693900393, |
| "grad_norm": 0.2820461392402649, |
| "learning_rate": 9.357421218136386e-05, |
| "loss": 0.0176, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.2999440402909905, |
| "grad_norm": 0.22990214824676514, |
| "learning_rate": 9.353360761164931e-05, |
| "loss": 0.0185, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.3055400111919417, |
| "grad_norm": 0.33790138363838196, |
| "learning_rate": 9.349288402271388e-05, |
| "loss": 0.0178, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.311135982092893, |
| "grad_norm": 0.3388676345348358, |
| "learning_rate": 9.345204152589428e-05, |
| "loss": 0.0147, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.3167319529938446, |
| "grad_norm": 0.36007586121559143, |
| "learning_rate": 9.341108023285238e-05, |
| "loss": 0.0185, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.322327923894796, |
| "grad_norm": 0.41096752882003784, |
| "learning_rate": 9.337000025557476e-05, |
| "loss": 0.0219, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.327923894795747, |
| "grad_norm": 0.2878301441669464, |
| "learning_rate": 9.332880170637252e-05, |
| "loss": 0.0159, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.3335198656966982, |
| "grad_norm": 0.32061803340911865, |
| "learning_rate": 9.328748469788093e-05, |
| "loss": 0.0216, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.33911583659765, |
| "grad_norm": 0.29178762435913086, |
| "learning_rate": 9.32460493430591e-05, |
| "loss": 0.0178, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.344711807498601, |
| "grad_norm": 0.32889455556869507, |
| "learning_rate": 9.320449575518972e-05, |
| "loss": 0.0194, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.3503077783995523, |
| "grad_norm": 0.2980196475982666, |
| "learning_rate": 9.316282404787871e-05, |
| "loss": 0.015, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.3559037493005035, |
| "grad_norm": 0.21256855130195618, |
| "learning_rate": 9.31210343350549e-05, |
| "loss": 0.0151, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.361499720201455, |
| "grad_norm": 0.2378161996603012, |
| "learning_rate": 9.30791267309698e-05, |
| "loss": 0.0179, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.3670956911024064, |
| "grad_norm": 0.211124449968338, |
| "learning_rate": 9.30371013501972e-05, |
| "loss": 0.0147, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.3726916620033576, |
| "grad_norm": 0.3496321439743042, |
| "learning_rate": 9.299495830763286e-05, |
| "loss": 0.0144, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.378287632904309, |
| "grad_norm": 0.2865016758441925, |
| "learning_rate": 9.295269771849427e-05, |
| "loss": 0.0209, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.38388360380526, |
| "grad_norm": 0.22519885003566742, |
| "learning_rate": 9.291031969832026e-05, |
| "loss": 0.0177, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.3894795747062116, |
| "grad_norm": 0.41060182452201843, |
| "learning_rate": 9.286782436297073e-05, |
| "loss": 0.0169, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.395075545607163, |
| "grad_norm": 0.6265867352485657, |
| "learning_rate": 9.282521182862629e-05, |
| "loss": 0.0189, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.400671516508114, |
| "grad_norm": 0.3811153173446655, |
| "learning_rate": 9.278248221178798e-05, |
| "loss": 0.0274, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.4062674874090657, |
| "grad_norm": 0.2686716318130493, |
| "learning_rate": 9.273963562927695e-05, |
| "loss": 0.0198, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.411863458310017, |
| "grad_norm": 0.31025633215904236, |
| "learning_rate": 9.269667219823412e-05, |
| "loss": 0.0159, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.417459429210968, |
| "grad_norm": 0.23998180031776428, |
| "learning_rate": 9.265359203611987e-05, |
| "loss": 0.018, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.4230554001119193, |
| "grad_norm": 0.45635882019996643, |
| "learning_rate": 9.261039526071374e-05, |
| "loss": 0.0199, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.4286513710128705, |
| "grad_norm": 0.34626588225364685, |
| "learning_rate": 9.256708199011401e-05, |
| "loss": 0.0169, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.434247341913822, |
| "grad_norm": 0.27278828620910645, |
| "learning_rate": 9.252365234273755e-05, |
| "loss": 0.0173, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.4398433128147734, |
| "grad_norm": 0.5236303806304932, |
| "learning_rate": 9.248010643731935e-05, |
| "loss": 0.0226, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.4454392837157246, |
| "grad_norm": 0.27782773971557617, |
| "learning_rate": 9.243644439291223e-05, |
| "loss": 0.0194, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.451035254616676, |
| "grad_norm": 0.280048131942749, |
| "learning_rate": 9.239266632888659e-05, |
| "loss": 0.0174, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.4566312255176275, |
| "grad_norm": 0.3045734763145447, |
| "learning_rate": 9.234877236492997e-05, |
| "loss": 0.0148, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.4622271964185787, |
| "grad_norm": 0.1700965315103531, |
| "learning_rate": 9.230476262104677e-05, |
| "loss": 0.0155, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.46782316731953, |
| "grad_norm": 0.3037347197532654, |
| "learning_rate": 9.226063721755799e-05, |
| "loss": 0.0132, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.473419138220481, |
| "grad_norm": 0.29750266671180725, |
| "learning_rate": 9.221639627510076e-05, |
| "loss": 0.0149, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.4790151091214327, |
| "grad_norm": 0.1919635832309723, |
| "learning_rate": 9.217203991462815e-05, |
| "loss": 0.015, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.484611080022384, |
| "grad_norm": 0.2919257879257202, |
| "learning_rate": 9.212756825740873e-05, |
| "loss": 0.0177, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.490207050923335, |
| "grad_norm": 0.17676684260368347, |
| "learning_rate": 9.208298142502636e-05, |
| "loss": 0.0175, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.4958030218242864, |
| "grad_norm": 0.24397723376750946, |
| "learning_rate": 9.20382795393797e-05, |
| "loss": 0.0179, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.501398992725238, |
| "grad_norm": 0.32645362615585327, |
| "learning_rate": 9.199346272268199e-05, |
| "loss": 0.0179, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.5069949636261892, |
| "grad_norm": 0.35162001848220825, |
| "learning_rate": 9.194853109746074e-05, |
| "loss": 0.0174, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.5125909345271404, |
| "grad_norm": 0.4019016623497009, |
| "learning_rate": 9.190348478655724e-05, |
| "loss": 0.015, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.5181869054280916, |
| "grad_norm": 0.4017965495586395, |
| "learning_rate": 9.185832391312644e-05, |
| "loss": 0.0238, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.523782876329043, |
| "grad_norm": 0.41645774245262146, |
| "learning_rate": 9.18130486006364e-05, |
| "loss": 0.0143, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.5293788472299945, |
| "grad_norm": 0.28400033712387085, |
| "learning_rate": 9.176765897286813e-05, |
| "loss": 0.0196, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.5349748181309457, |
| "grad_norm": 0.4045359492301941, |
| "learning_rate": 9.17221551539151e-05, |
| "loss": 0.0191, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.540570789031897, |
| "grad_norm": 0.37660202383995056, |
| "learning_rate": 9.167653726818305e-05, |
| "loss": 0.0138, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.5461667599328486, |
| "grad_norm": 0.35835906863212585, |
| "learning_rate": 9.163080544038952e-05, |
| "loss": 0.0213, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.5517627308338, |
| "grad_norm": 0.3906223177909851, |
| "learning_rate": 9.158495979556358e-05, |
| "loss": 0.0204, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.557358701734751, |
| "grad_norm": 0.23904386162757874, |
| "learning_rate": 9.153900045904549e-05, |
| "loss": 0.0193, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.562954672635702, |
| "grad_norm": 0.3690219521522522, |
| "learning_rate": 9.14929275564863e-05, |
| "loss": 0.0218, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.5685506435366534, |
| "grad_norm": 0.3098298907279968, |
| "learning_rate": 9.144674121384757e-05, |
| "loss": 0.0142, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.574146614437605, |
| "grad_norm": 0.5726227164268494, |
| "learning_rate": 9.140044155740101e-05, |
| "loss": 0.0168, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.5797425853385563, |
| "grad_norm": 0.32549935579299927, |
| "learning_rate": 9.135402871372808e-05, |
| "loss": 0.0228, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.5853385562395075, |
| "grad_norm": 0.35607558488845825, |
| "learning_rate": 9.130750280971978e-05, |
| "loss": 0.0234, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.590934527140459, |
| "grad_norm": 0.31833362579345703, |
| "learning_rate": 9.126086397257612e-05, |
| "loss": 0.0134, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.5965304980414103, |
| "grad_norm": 0.5075991749763489, |
| "learning_rate": 9.121411232980588e-05, |
| "loss": 0.0181, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.6021264689423615, |
| "grad_norm": 0.2868656814098358, |
| "learning_rate": 9.116724800922629e-05, |
| "loss": 0.0216, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.6077224398433128, |
| "grad_norm": 0.38551998138427734, |
| "learning_rate": 9.112027113896262e-05, |
| "loss": 0.0218, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.613318410744264, |
| "grad_norm": 0.3080727756023407, |
| "learning_rate": 9.107318184744781e-05, |
| "loss": 0.0263, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.618914381645215, |
| "grad_norm": 0.2743169665336609, |
| "learning_rate": 9.102598026342222e-05, |
| "loss": 0.0143, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.624510352546167, |
| "grad_norm": 0.286101758480072, |
| "learning_rate": 9.097866651593317e-05, |
| "loss": 0.0219, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.630106323447118, |
| "grad_norm": 0.1881791204214096, |
| "learning_rate": 9.093124073433463e-05, |
| "loss": 0.015, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.6357022943480692, |
| "grad_norm": 0.3556104004383087, |
| "learning_rate": 9.088370304828685e-05, |
| "loss": 0.0207, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.641298265249021, |
| "grad_norm": 0.2784225344657898, |
| "learning_rate": 9.083605358775612e-05, |
| "loss": 0.0159, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.646894236149972, |
| "grad_norm": 0.22262175381183624, |
| "learning_rate": 9.078829248301417e-05, |
| "loss": 0.0162, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.6524902070509233, |
| "grad_norm": 0.16783557832241058, |
| "learning_rate": 9.074041986463808e-05, |
| "loss": 0.018, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.6580861779518745, |
| "grad_norm": 0.31983381509780884, |
| "learning_rate": 9.069243586350975e-05, |
| "loss": 0.0168, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.6636821488528257, |
| "grad_norm": 0.2954675555229187, |
| "learning_rate": 9.064434061081562e-05, |
| "loss": 0.0157, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.6692781197537774, |
| "grad_norm": 0.37835440039634705, |
| "learning_rate": 9.059613423804623e-05, |
| "loss": 0.016, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.6748740906547286, |
| "grad_norm": 0.30182933807373047, |
| "learning_rate": 9.0547816876996e-05, |
| "loss": 0.0223, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.68047006155568, |
| "grad_norm": 0.3329738974571228, |
| "learning_rate": 9.049938865976275e-05, |
| "loss": 0.0232, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.6860660324566314, |
| "grad_norm": 0.2866031527519226, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 0.0193, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.6916620033575827, |
| "grad_norm": 0.3558676540851593, |
| "learning_rate": 9.040220018665347e-05, |
| "loss": 0.0181, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.697257974258534, |
| "grad_norm": 0.22001361846923828, |
| "learning_rate": 9.035344019648702e-05, |
| "loss": 0.0124, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.702853945159485, |
| "grad_norm": 0.28986766934394836, |
| "learning_rate": 9.030456988155596e-05, |
| "loss": 0.0179, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.7084499160604363, |
| "grad_norm": 0.3889327347278595, |
| "learning_rate": 9.025558937546988e-05, |
| "loss": 0.0186, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.714045886961388, |
| "grad_norm": 0.33833345770835876, |
| "learning_rate": 9.020649881213958e-05, |
| "loss": 0.0161, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.719641857862339, |
| "grad_norm": 0.23896977305412292, |
| "learning_rate": 9.015729832577681e-05, |
| "loss": 0.0149, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.7252378287632903, |
| "grad_norm": 0.44981443881988525, |
| "learning_rate": 9.010798805089384e-05, |
| "loss": 0.0221, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.730833799664242, |
| "grad_norm": 0.4389462471008301, |
| "learning_rate": 9.005856812230304e-05, |
| "loss": 0.0175, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.736429770565193, |
| "grad_norm": 0.2757073640823364, |
| "learning_rate": 9.000903867511666e-05, |
| "loss": 0.0176, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.7420257414661444, |
| "grad_norm": 0.2381424754858017, |
| "learning_rate": 8.995939984474624e-05, |
| "loss": 0.0145, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.7476217123670956, |
| "grad_norm": 0.25083616375923157, |
| "learning_rate": 8.990965176690252e-05, |
| "loss": 0.0184, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.753217683268047, |
| "grad_norm": 0.3651309013366699, |
| "learning_rate": 8.98597945775948e-05, |
| "loss": 0.0201, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.7588136541689985, |
| "grad_norm": 0.19562850892543793, |
| "learning_rate": 8.980982841313074e-05, |
| "loss": 0.0158, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.7644096250699497, |
| "grad_norm": 0.646306037902832, |
| "learning_rate": 8.975975341011596e-05, |
| "loss": 0.0172, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.770005595970901, |
| "grad_norm": 0.5771059393882751, |
| "learning_rate": 8.970956970545355e-05, |
| "loss": 0.0181, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.775601566871852, |
| "grad_norm": 0.2918018400669098, |
| "learning_rate": 8.965927743634391e-05, |
| "loss": 0.0199, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.7811975377728038, |
| "grad_norm": 0.5034765601158142, |
| "learning_rate": 8.96088767402841e-05, |
| "loss": 0.0172, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.786793508673755, |
| "grad_norm": 0.29646632075309753, |
| "learning_rate": 8.955836775506776e-05, |
| "loss": 0.0147, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.792389479574706, |
| "grad_norm": 0.2613969147205353, |
| "learning_rate": 8.950775061878453e-05, |
| "loss": 0.0164, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.7979854504756574, |
| "grad_norm": 0.27573442459106445, |
| "learning_rate": 8.945702546981969e-05, |
| "loss": 0.018, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.8035814213766086, |
| "grad_norm": 0.33170339465141296, |
| "learning_rate": 8.940619244685388e-05, |
| "loss": 0.019, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.8091773922775602, |
| "grad_norm": 0.2994827628135681, |
| "learning_rate": 8.935525168886262e-05, |
| "loss": 0.019, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.8147733631785115, |
| "grad_norm": 0.3199397921562195, |
| "learning_rate": 8.930420333511606e-05, |
| "loss": 0.0172, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.8203693340794627, |
| "grad_norm": 0.24537423253059387, |
| "learning_rate": 8.92530475251784e-05, |
| "loss": 0.0146, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.8259653049804143, |
| "grad_norm": 0.24761222302913666, |
| "learning_rate": 8.920178439890765e-05, |
| "loss": 0.0194, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.8315612758813655, |
| "grad_norm": 0.2208421230316162, |
| "learning_rate": 8.91504140964553e-05, |
| "loss": 0.0123, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.8371572467823167, |
| "grad_norm": 0.3568471074104309, |
| "learning_rate": 8.909893675826574e-05, |
| "loss": 0.0147, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.842753217683268, |
| "grad_norm": 0.24207855761051178, |
| "learning_rate": 8.90473525250761e-05, |
| "loss": 0.0166, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.848349188584219, |
| "grad_norm": 0.47056907415390015, |
| "learning_rate": 8.899566153791566e-05, |
| "loss": 0.0234, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.853945159485171, |
| "grad_norm": 0.26351991295814514, |
| "learning_rate": 8.894386393810563e-05, |
| "loss": 0.0212, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.859541130386122, |
| "grad_norm": 0.2002822756767273, |
| "learning_rate": 8.889195986725865e-05, |
| "loss": 0.0191, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.865137101287073, |
| "grad_norm": 0.28489527106285095, |
| "learning_rate": 8.883994946727849e-05, |
| "loss": 0.0155, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.870733072188025, |
| "grad_norm": 0.30861204862594604, |
| "learning_rate": 8.878783288035957e-05, |
| "loss": 0.0158, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.876329043088976, |
| "grad_norm": 0.2856840193271637, |
| "learning_rate": 8.873561024898668e-05, |
| "loss": 0.0201, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.8819250139899273, |
| "grad_norm": 0.3461334705352783, |
| "learning_rate": 8.868328171593448e-05, |
| "loss": 0.0184, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.8875209848908785, |
| "grad_norm": 0.22160184383392334, |
| "learning_rate": 8.863084742426719e-05, |
| "loss": 0.0171, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.8931169557918297, |
| "grad_norm": 0.2488642781972885, |
| "learning_rate": 8.857830751733815e-05, |
| "loss": 0.0153, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.8987129266927814, |
| "grad_norm": 0.33482569456100464, |
| "learning_rate": 8.852566213878947e-05, |
| "loss": 0.0189, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.9043088975937326, |
| "grad_norm": 0.2865656316280365, |
| "learning_rate": 8.84729114325516e-05, |
| "loss": 0.0168, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.9099048684946838, |
| "grad_norm": 0.3801150321960449, |
| "learning_rate": 8.842005554284296e-05, |
| "loss": 0.0149, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.915500839395635, |
| "grad_norm": 0.24389003217220306, |
| "learning_rate": 8.836709461416952e-05, |
| "loss": 0.0176, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.9210968102965866, |
| "grad_norm": 0.4815085828304291, |
| "learning_rate": 8.831402879132446e-05, |
| "loss": 0.014, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.926692781197538, |
| "grad_norm": 0.2196839153766632, |
| "learning_rate": 8.82608582193877e-05, |
| "loss": 0.0174, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.932288752098489, |
| "grad_norm": 0.30073830485343933, |
| "learning_rate": 8.820758304372557e-05, |
| "loss": 0.0168, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.9378847229994403, |
| "grad_norm": 0.21486796438694, |
| "learning_rate": 8.815420340999033e-05, |
| "loss": 0.0128, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.9434806939003915, |
| "grad_norm": 0.31880220770835876, |
| "learning_rate": 8.810071946411989e-05, |
| "loss": 0.0209, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.949076664801343, |
| "grad_norm": 0.20475736260414124, |
| "learning_rate": 8.804713135233731e-05, |
| "loss": 0.0152, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.9546726357022943, |
| "grad_norm": 0.19735224545001984, |
| "learning_rate": 8.799343922115044e-05, |
| "loss": 0.0104, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.9602686066032455, |
| "grad_norm": 0.17013341188430786, |
| "learning_rate": 8.79396432173515e-05, |
| "loss": 0.0129, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.965864577504197, |
| "grad_norm": 0.38702845573425293, |
| "learning_rate": 8.788574348801675e-05, |
| "loss": 0.0239, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.9714605484051484, |
| "grad_norm": 0.34306514263153076, |
| "learning_rate": 8.783174018050594e-05, |
| "loss": 0.03, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.9770565193060996, |
| "grad_norm": 0.26854732632637024, |
| "learning_rate": 8.77776334424621e-05, |
| "loss": 0.019, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.982652490207051, |
| "grad_norm": 0.28458869457244873, |
| "learning_rate": 8.772342342181095e-05, |
| "loss": 0.0213, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.988248461108002, |
| "grad_norm": 0.28708454966545105, |
| "learning_rate": 8.766911026676064e-05, |
| "loss": 0.0173, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.9938444320089537, |
| "grad_norm": 0.35600361227989197, |
| "learning_rate": 8.761469412580125e-05, |
| "loss": 0.0179, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.999440402909905, |
| "grad_norm": 0.29637375473976135, |
| "learning_rate": 8.756017514770443e-05, |
| "loss": 0.0223, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.005036373810856, |
| "grad_norm": 0.39075925946235657, |
| "learning_rate": 8.750555348152298e-05, |
| "loss": 0.0148, |
| "step": 5370 |
| }, |
| { |
| "epoch": 3.0106323447118073, |
| "grad_norm": 0.3552566468715668, |
| "learning_rate": 8.745082927659047e-05, |
| "loss": 0.0187, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.016228315612759, |
| "grad_norm": 0.2608230710029602, |
| "learning_rate": 8.739600268252078e-05, |
| "loss": 0.0205, |
| "step": 5390 |
| }, |
| { |
| "epoch": 3.02182428651371, |
| "grad_norm": 0.2771034240722656, |
| "learning_rate": 8.73410738492077e-05, |
| "loss": 0.0187, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.0274202574146614, |
| "grad_norm": 0.2750489413738251, |
| "learning_rate": 8.728604292682459e-05, |
| "loss": 0.0161, |
| "step": 5410 |
| }, |
| { |
| "epoch": 3.0330162283156126, |
| "grad_norm": 0.3373420834541321, |
| "learning_rate": 8.723091006582389e-05, |
| "loss": 0.0193, |
| "step": 5420 |
| }, |
| { |
| "epoch": 3.0386121992165642, |
| "grad_norm": 0.27592456340789795, |
| "learning_rate": 8.717567541693673e-05, |
| "loss": 0.0171, |
| "step": 5430 |
| }, |
| { |
| "epoch": 3.0442081701175154, |
| "grad_norm": 0.3381069004535675, |
| "learning_rate": 8.71203391311725e-05, |
| "loss": 0.0185, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.0498041410184666, |
| "grad_norm": 0.342650830745697, |
| "learning_rate": 8.706490135981855e-05, |
| "loss": 0.0223, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.055400111919418, |
| "grad_norm": 0.2777611017227173, |
| "learning_rate": 8.700936225443959e-05, |
| "loss": 0.0135, |
| "step": 5460 |
| }, |
| { |
| "epoch": 3.0609960828203695, |
| "grad_norm": 0.26987946033477783, |
| "learning_rate": 8.695372196687743e-05, |
| "loss": 0.0182, |
| "step": 5470 |
| }, |
| { |
| "epoch": 3.0665920537213207, |
| "grad_norm": 0.24877256155014038, |
| "learning_rate": 8.689798064925049e-05, |
| "loss": 0.015, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.072188024622272, |
| "grad_norm": 0.31654706597328186, |
| "learning_rate": 8.684213845395339e-05, |
| "loss": 0.0142, |
| "step": 5490 |
| }, |
| { |
| "epoch": 3.077783995523223, |
| "grad_norm": 0.22976505756378174, |
| "learning_rate": 8.678619553365659e-05, |
| "loss": 0.0119, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.083379966424175, |
| "grad_norm": 0.3443313241004944, |
| "learning_rate": 8.673015204130586e-05, |
| "loss": 0.0138, |
| "step": 5510 |
| }, |
| { |
| "epoch": 3.088975937325126, |
| "grad_norm": 0.34815511107444763, |
| "learning_rate": 8.6674008130122e-05, |
| "loss": 0.0127, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.094571908226077, |
| "grad_norm": 0.392868310213089, |
| "learning_rate": 8.661776395360029e-05, |
| "loss": 0.0148, |
| "step": 5530 |
| }, |
| { |
| "epoch": 3.1001678791270284, |
| "grad_norm": 0.15690505504608154, |
| "learning_rate": 8.656141966551019e-05, |
| "loss": 0.0158, |
| "step": 5540 |
| }, |
| { |
| "epoch": 3.10576385002798, |
| "grad_norm": 0.2958482503890991, |
| "learning_rate": 8.650497541989482e-05, |
| "loss": 0.015, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.1113598209289313, |
| "grad_norm": 0.34652698040008545, |
| "learning_rate": 8.644843137107059e-05, |
| "loss": 0.0186, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.1169557918298825, |
| "grad_norm": 0.2787473201751709, |
| "learning_rate": 8.639178767362676e-05, |
| "loss": 0.0171, |
| "step": 5570 |
| }, |
| { |
| "epoch": 3.1225517627308337, |
| "grad_norm": 0.28770115971565247, |
| "learning_rate": 8.633504448242505e-05, |
| "loss": 0.0088, |
| "step": 5580 |
| }, |
| { |
| "epoch": 3.128147733631785, |
| "grad_norm": 0.16269604861736298, |
| "learning_rate": 8.627820195259918e-05, |
| "loss": 0.0144, |
| "step": 5590 |
| }, |
| { |
| "epoch": 3.1337437045327365, |
| "grad_norm": 0.2170538753271103, |
| "learning_rate": 8.622126023955446e-05, |
| "loss": 0.0145, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.1393396754336877, |
| "grad_norm": 0.1933916211128235, |
| "learning_rate": 8.616421949896734e-05, |
| "loss": 0.0145, |
| "step": 5610 |
| }, |
| { |
| "epoch": 3.144935646334639, |
| "grad_norm": 0.28321388363838196, |
| "learning_rate": 8.610707988678503e-05, |
| "loss": 0.0171, |
| "step": 5620 |
| }, |
| { |
| "epoch": 3.1505316172355906, |
| "grad_norm": 0.1729007363319397, |
| "learning_rate": 8.604984155922506e-05, |
| "loss": 0.0103, |
| "step": 5630 |
| }, |
| { |
| "epoch": 3.156127588136542, |
| "grad_norm": 0.41079893708229065, |
| "learning_rate": 8.599250467277483e-05, |
| "loss": 0.0159, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.161723559037493, |
| "grad_norm": 0.4628431797027588, |
| "learning_rate": 8.59350693841912e-05, |
| "loss": 0.0184, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.1673195299384442, |
| "grad_norm": 0.30907726287841797, |
| "learning_rate": 8.587753585050004e-05, |
| "loss": 0.0183, |
| "step": 5660 |
| }, |
| { |
| "epoch": 3.1729155008393954, |
| "grad_norm": 0.19282157719135284, |
| "learning_rate": 8.581990422899585e-05, |
| "loss": 0.0127, |
| "step": 5670 |
| }, |
| { |
| "epoch": 3.178511471740347, |
| "grad_norm": 0.27166658639907837, |
| "learning_rate": 8.576217467724128e-05, |
| "loss": 0.023, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.1841074426412983, |
| "grad_norm": 0.3486577272415161, |
| "learning_rate": 8.570434735306671e-05, |
| "loss": 0.0108, |
| "step": 5690 |
| }, |
| { |
| "epoch": 3.1897034135422495, |
| "grad_norm": 0.295238733291626, |
| "learning_rate": 8.564642241456986e-05, |
| "loss": 0.0181, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.1952993844432007, |
| "grad_norm": 0.20616333186626434, |
| "learning_rate": 8.558840002011528e-05, |
| "loss": 0.0202, |
| "step": 5710 |
| }, |
| { |
| "epoch": 3.2008953553441524, |
| "grad_norm": 0.12979304790496826, |
| "learning_rate": 8.553028032833397e-05, |
| "loss": 0.0125, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.2064913262451036, |
| "grad_norm": 0.23997394740581512, |
| "learning_rate": 8.547206349812298e-05, |
| "loss": 0.0159, |
| "step": 5730 |
| }, |
| { |
| "epoch": 3.212087297146055, |
| "grad_norm": 0.2359701246023178, |
| "learning_rate": 8.541374968864487e-05, |
| "loss": 0.0136, |
| "step": 5740 |
| }, |
| { |
| "epoch": 3.217683268047006, |
| "grad_norm": 0.25309842824935913, |
| "learning_rate": 8.535533905932738e-05, |
| "loss": 0.0154, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.2232792389479576, |
| "grad_norm": 0.26648661494255066, |
| "learning_rate": 8.529683176986295e-05, |
| "loss": 0.0132, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.228875209848909, |
| "grad_norm": 0.32268235087394714, |
| "learning_rate": 8.523822798020827e-05, |
| "loss": 0.0133, |
| "step": 5770 |
| }, |
| { |
| "epoch": 3.23447118074986, |
| "grad_norm": 0.2632688283920288, |
| "learning_rate": 8.517952785058385e-05, |
| "loss": 0.017, |
| "step": 5780 |
| }, |
| { |
| "epoch": 3.2400671516508113, |
| "grad_norm": 0.16985219717025757, |
| "learning_rate": 8.512073154147362e-05, |
| "loss": 0.0143, |
| "step": 5790 |
| }, |
| { |
| "epoch": 3.245663122551763, |
| "grad_norm": 0.23951981961727142, |
| "learning_rate": 8.506183921362443e-05, |
| "loss": 0.0157, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.251259093452714, |
| "grad_norm": 0.36843812465667725, |
| "learning_rate": 8.500285102804568e-05, |
| "loss": 0.0198, |
| "step": 5810 |
| }, |
| { |
| "epoch": 3.2568550643536653, |
| "grad_norm": 0.27591267228126526, |
| "learning_rate": 8.494376714600878e-05, |
| "loss": 0.0246, |
| "step": 5820 |
| }, |
| { |
| "epoch": 3.2624510352546165, |
| "grad_norm": 0.3020281195640564, |
| "learning_rate": 8.488458772904684e-05, |
| "loss": 0.018, |
| "step": 5830 |
| }, |
| { |
| "epoch": 3.2680470061555678, |
| "grad_norm": 0.20429036021232605, |
| "learning_rate": 8.482531293895412e-05, |
| "loss": 0.0154, |
| "step": 5840 |
| }, |
| { |
| "epoch": 3.2736429770565194, |
| "grad_norm": 0.3011918067932129, |
| "learning_rate": 8.476594293778561e-05, |
| "loss": 0.0181, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.2792389479574706, |
| "grad_norm": 0.20082388818264008, |
| "learning_rate": 8.470647788785665e-05, |
| "loss": 0.0118, |
| "step": 5860 |
| }, |
| { |
| "epoch": 3.284834918858422, |
| "grad_norm": 0.25404563546180725, |
| "learning_rate": 8.46469179517424e-05, |
| "loss": 0.0122, |
| "step": 5870 |
| }, |
| { |
| "epoch": 3.2904308897593735, |
| "grad_norm": 0.17162342369556427, |
| "learning_rate": 8.458726329227747e-05, |
| "loss": 0.0178, |
| "step": 5880 |
| }, |
| { |
| "epoch": 3.2960268606603247, |
| "grad_norm": 0.2713855803012848, |
| "learning_rate": 8.452751407255541e-05, |
| "loss": 0.0127, |
| "step": 5890 |
| }, |
| { |
| "epoch": 3.301622831561276, |
| "grad_norm": 0.25792196393013, |
| "learning_rate": 8.44676704559283e-05, |
| "loss": 0.0151, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.307218802462227, |
| "grad_norm": 0.24708054959774017, |
| "learning_rate": 8.44077326060063e-05, |
| "loss": 0.0205, |
| "step": 5910 |
| }, |
| { |
| "epoch": 3.3128147733631783, |
| "grad_norm": 0.22907878458499908, |
| "learning_rate": 8.434770068665723e-05, |
| "loss": 0.0196, |
| "step": 5920 |
| }, |
| { |
| "epoch": 3.31841074426413, |
| "grad_norm": 0.42451682686805725, |
| "learning_rate": 8.428757486200603e-05, |
| "loss": 0.0181, |
| "step": 5930 |
| }, |
| { |
| "epoch": 3.324006715165081, |
| "grad_norm": 0.2787477970123291, |
| "learning_rate": 8.422735529643444e-05, |
| "loss": 0.0163, |
| "step": 5940 |
| }, |
| { |
| "epoch": 3.3296026860660324, |
| "grad_norm": 0.2536604404449463, |
| "learning_rate": 8.416704215458043e-05, |
| "loss": 0.0153, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.3351986569669836, |
| "grad_norm": 0.27685803174972534, |
| "learning_rate": 8.410663560133784e-05, |
| "loss": 0.0171, |
| "step": 5960 |
| }, |
| { |
| "epoch": 3.3407946278679352, |
| "grad_norm": 0.21129871904850006, |
| "learning_rate": 8.404613580185585e-05, |
| "loss": 0.0146, |
| "step": 5970 |
| }, |
| { |
| "epoch": 3.3463905987688864, |
| "grad_norm": 0.2712884247303009, |
| "learning_rate": 8.398554292153866e-05, |
| "loss": 0.0124, |
| "step": 5980 |
| }, |
| { |
| "epoch": 3.3519865696698377, |
| "grad_norm": 0.28807780146598816, |
| "learning_rate": 8.392485712604483e-05, |
| "loss": 0.0151, |
| "step": 5990 |
| }, |
| { |
| "epoch": 3.357582540570789, |
| "grad_norm": 0.24215184152126312, |
| "learning_rate": 8.386407858128706e-05, |
| "loss": 0.0201, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.3631785114717405, |
| "grad_norm": 0.3111182451248169, |
| "learning_rate": 8.380320745343153e-05, |
| "loss": 0.0148, |
| "step": 6010 |
| }, |
| { |
| "epoch": 3.3687744823726917, |
| "grad_norm": 0.3122502267360687, |
| "learning_rate": 8.37422439088976e-05, |
| "loss": 0.0138, |
| "step": 6020 |
| }, |
| { |
| "epoch": 3.374370453273643, |
| "grad_norm": 0.23829977214336395, |
| "learning_rate": 8.368118811435726e-05, |
| "loss": 0.0172, |
| "step": 6030 |
| }, |
| { |
| "epoch": 3.379966424174594, |
| "grad_norm": 0.22568489611148834, |
| "learning_rate": 8.362004023673474e-05, |
| "loss": 0.0191, |
| "step": 6040 |
| }, |
| { |
| "epoch": 3.385562395075546, |
| "grad_norm": 0.37260109186172485, |
| "learning_rate": 8.355880044320598e-05, |
| "loss": 0.0146, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.391158365976497, |
| "grad_norm": 0.36467012763023376, |
| "learning_rate": 8.349746890119826e-05, |
| "loss": 0.0144, |
| "step": 6060 |
| }, |
| { |
| "epoch": 3.396754336877448, |
| "grad_norm": 0.28992265462875366, |
| "learning_rate": 8.343604577838964e-05, |
| "loss": 0.014, |
| "step": 6070 |
| }, |
| { |
| "epoch": 3.4023503077783994, |
| "grad_norm": 0.3018409311771393, |
| "learning_rate": 8.337453124270863e-05, |
| "loss": 0.0126, |
| "step": 6080 |
| }, |
| { |
| "epoch": 3.4079462786793506, |
| "grad_norm": 0.31771036982536316, |
| "learning_rate": 8.331292546233362e-05, |
| "loss": 0.0124, |
| "step": 6090 |
| }, |
| { |
| "epoch": 3.4135422495803023, |
| "grad_norm": 0.2008838802576065, |
| "learning_rate": 8.32512286056924e-05, |
| "loss": 0.0181, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.4191382204812535, |
| "grad_norm": 0.3000880777835846, |
| "learning_rate": 8.318944084146192e-05, |
| "loss": 0.0178, |
| "step": 6110 |
| }, |
| { |
| "epoch": 3.4247341913822047, |
| "grad_norm": 0.201462984085083, |
| "learning_rate": 8.31275623385675e-05, |
| "loss": 0.0121, |
| "step": 6120 |
| }, |
| { |
| "epoch": 3.4303301622831563, |
| "grad_norm": 0.29394298791885376, |
| "learning_rate": 8.306559326618259e-05, |
| "loss": 0.019, |
| "step": 6130 |
| }, |
| { |
| "epoch": 3.4359261331841076, |
| "grad_norm": 0.20683641731739044, |
| "learning_rate": 8.300353379372834e-05, |
| "loss": 0.0157, |
| "step": 6140 |
| }, |
| { |
| "epoch": 3.4415221040850588, |
| "grad_norm": 0.2323373705148697, |
| "learning_rate": 8.29413840908729e-05, |
| "loss": 0.0132, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.44711807498601, |
| "grad_norm": 0.28800690174102783, |
| "learning_rate": 8.287914432753123e-05, |
| "loss": 0.0149, |
| "step": 6160 |
| }, |
| { |
| "epoch": 3.452714045886961, |
| "grad_norm": 0.24825571477413177, |
| "learning_rate": 8.281681467386446e-05, |
| "loss": 0.0143, |
| "step": 6170 |
| }, |
| { |
| "epoch": 3.458310016787913, |
| "grad_norm": 0.26586174964904785, |
| "learning_rate": 8.275439530027948e-05, |
| "loss": 0.0193, |
| "step": 6180 |
| }, |
| { |
| "epoch": 3.463905987688864, |
| "grad_norm": 0.384670615196228, |
| "learning_rate": 8.269188637742846e-05, |
| "loss": 0.0135, |
| "step": 6190 |
| }, |
| { |
| "epoch": 3.4695019585898152, |
| "grad_norm": 0.2598379850387573, |
| "learning_rate": 8.262928807620843e-05, |
| "loss": 0.0192, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.4750979294907665, |
| "grad_norm": 0.26824334263801575, |
| "learning_rate": 8.256660056776076e-05, |
| "loss": 0.017, |
| "step": 6210 |
| }, |
| { |
| "epoch": 3.480693900391718, |
| "grad_norm": 0.29601970314979553, |
| "learning_rate": 8.250382402347065e-05, |
| "loss": 0.0236, |
| "step": 6220 |
| }, |
| { |
| "epoch": 3.4862898712926693, |
| "grad_norm": 0.2569962739944458, |
| "learning_rate": 8.244095861496686e-05, |
| "loss": 0.0148, |
| "step": 6230 |
| }, |
| { |
| "epoch": 3.4918858421936205, |
| "grad_norm": 0.18870459496974945, |
| "learning_rate": 8.237800451412095e-05, |
| "loss": 0.0166, |
| "step": 6240 |
| }, |
| { |
| "epoch": 3.4974818130945717, |
| "grad_norm": 0.20874905586242676, |
| "learning_rate": 8.231496189304704e-05, |
| "loss": 0.012, |
| "step": 6250 |
| }, |
| { |
| "epoch": 3.5030777839955234, |
| "grad_norm": 0.456989586353302, |
| "learning_rate": 8.225183092410128e-05, |
| "loss": 0.0174, |
| "step": 6260 |
| }, |
| { |
| "epoch": 3.5086737548964746, |
| "grad_norm": 0.3724716305732727, |
| "learning_rate": 8.218861177988129e-05, |
| "loss": 0.0164, |
| "step": 6270 |
| }, |
| { |
| "epoch": 3.514269725797426, |
| "grad_norm": 0.2510260343551636, |
| "learning_rate": 8.212530463322583e-05, |
| "loss": 0.014, |
| "step": 6280 |
| }, |
| { |
| "epoch": 3.519865696698377, |
| "grad_norm": 0.17292679846286774, |
| "learning_rate": 8.206190965721419e-05, |
| "loss": 0.0135, |
| "step": 6290 |
| }, |
| { |
| "epoch": 3.5254616675993287, |
| "grad_norm": 0.25856831669807434, |
| "learning_rate": 8.199842702516583e-05, |
| "loss": 0.0159, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.53105763850028, |
| "grad_norm": 0.26525381207466125, |
| "learning_rate": 8.193485691063985e-05, |
| "loss": 0.0132, |
| "step": 6310 |
| }, |
| { |
| "epoch": 3.536653609401231, |
| "grad_norm": 0.319915235042572, |
| "learning_rate": 8.18711994874345e-05, |
| "loss": 0.0113, |
| "step": 6320 |
| }, |
| { |
| "epoch": 3.5422495803021823, |
| "grad_norm": 0.23749981820583344, |
| "learning_rate": 8.180745492958674e-05, |
| "loss": 0.0145, |
| "step": 6330 |
| }, |
| { |
| "epoch": 3.5478455512031335, |
| "grad_norm": 0.25086531043052673, |
| "learning_rate": 8.174362341137177e-05, |
| "loss": 0.0165, |
| "step": 6340 |
| }, |
| { |
| "epoch": 3.553441522104085, |
| "grad_norm": 0.19675312936306, |
| "learning_rate": 8.167970510730253e-05, |
| "loss": 0.0155, |
| "step": 6350 |
| }, |
| { |
| "epoch": 3.5590374930050364, |
| "grad_norm": 0.2085702270269394, |
| "learning_rate": 8.161570019212921e-05, |
| "loss": 0.0155, |
| "step": 6360 |
| }, |
| { |
| "epoch": 3.5646334639059876, |
| "grad_norm": 0.4404468536376953, |
| "learning_rate": 8.155160884083881e-05, |
| "loss": 0.0208, |
| "step": 6370 |
| }, |
| { |
| "epoch": 3.570229434806939, |
| "grad_norm": 0.10625205188989639, |
| "learning_rate": 8.148743122865463e-05, |
| "loss": 0.015, |
| "step": 6380 |
| }, |
| { |
| "epoch": 3.5758254057078904, |
| "grad_norm": 0.34253987669944763, |
| "learning_rate": 8.14231675310358e-05, |
| "loss": 0.0229, |
| "step": 6390 |
| }, |
| { |
| "epoch": 3.5814213766088416, |
| "grad_norm": 0.43956324458122253, |
| "learning_rate": 8.135881792367686e-05, |
| "loss": 0.0181, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.587017347509793, |
| "grad_norm": 0.45199209451675415, |
| "learning_rate": 8.129438258250712e-05, |
| "loss": 0.0198, |
| "step": 6410 |
| }, |
| { |
| "epoch": 3.592613318410744, |
| "grad_norm": 0.2245771586894989, |
| "learning_rate": 8.12298616836904e-05, |
| "loss": 0.0141, |
| "step": 6420 |
| }, |
| { |
| "epoch": 3.5982092893116957, |
| "grad_norm": 0.3338348865509033, |
| "learning_rate": 8.116525540362434e-05, |
| "loss": 0.0168, |
| "step": 6430 |
| }, |
| { |
| "epoch": 3.603805260212647, |
| "grad_norm": 0.21632985770702362, |
| "learning_rate": 8.110056391894005e-05, |
| "loss": 0.0117, |
| "step": 6440 |
| }, |
| { |
| "epoch": 3.609401231113598, |
| "grad_norm": 0.2893829643726349, |
| "learning_rate": 8.103578740650156e-05, |
| "loss": 0.0166, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.6149972020145498, |
| "grad_norm": 0.24873918294906616, |
| "learning_rate": 8.097092604340542e-05, |
| "loss": 0.0139, |
| "step": 6460 |
| }, |
| { |
| "epoch": 3.620593172915501, |
| "grad_norm": 0.31232985854148865, |
| "learning_rate": 8.090598000698009e-05, |
| "loss": 0.0122, |
| "step": 6470 |
| }, |
| { |
| "epoch": 3.626189143816452, |
| "grad_norm": 0.20202654600143433, |
| "learning_rate": 8.084094947478556e-05, |
| "loss": 0.0126, |
| "step": 6480 |
| }, |
| { |
| "epoch": 3.6317851147174034, |
| "grad_norm": 0.339890718460083, |
| "learning_rate": 8.077583462461283e-05, |
| "loss": 0.0107, |
| "step": 6490 |
| }, |
| { |
| "epoch": 3.6373810856183546, |
| "grad_norm": 0.17959007620811462, |
| "learning_rate": 8.07106356344834e-05, |
| "loss": 0.0125, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.6429770565193063, |
| "grad_norm": 0.21795189380645752, |
| "learning_rate": 8.064535268264883e-05, |
| "loss": 0.0202, |
| "step": 6510 |
| }, |
| { |
| "epoch": 3.6485730274202575, |
| "grad_norm": 0.17131085693836212, |
| "learning_rate": 8.057998594759022e-05, |
| "loss": 0.0197, |
| "step": 6520 |
| }, |
| { |
| "epoch": 3.6541689983212087, |
| "grad_norm": 0.180596724152565, |
| "learning_rate": 8.051453560801772e-05, |
| "loss": 0.0128, |
| "step": 6530 |
| }, |
| { |
| "epoch": 3.65976496922216, |
| "grad_norm": 0.23086079955101013, |
| "learning_rate": 8.044900184287007e-05, |
| "loss": 0.0171, |
| "step": 6540 |
| }, |
| { |
| "epoch": 3.6653609401231115, |
| "grad_norm": 0.40819284319877625, |
| "learning_rate": 8.038338483131407e-05, |
| "loss": 0.0162, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.6709569110240627, |
| "grad_norm": 0.20544512569904327, |
| "learning_rate": 8.031768475274413e-05, |
| "loss": 0.01, |
| "step": 6560 |
| }, |
| { |
| "epoch": 3.676552881925014, |
| "grad_norm": 0.3116811513900757, |
| "learning_rate": 8.025190178678175e-05, |
| "loss": 0.0183, |
| "step": 6570 |
| }, |
| { |
| "epoch": 3.682148852825965, |
| "grad_norm": 0.3111719787120819, |
| "learning_rate": 8.018603611327504e-05, |
| "loss": 0.015, |
| "step": 6580 |
| }, |
| { |
| "epoch": 3.6877448237269164, |
| "grad_norm": 0.20265722274780273, |
| "learning_rate": 8.012008791229826e-05, |
| "loss": 0.0136, |
| "step": 6590 |
| }, |
| { |
| "epoch": 3.693340794627868, |
| "grad_norm": 0.35717812180519104, |
| "learning_rate": 8.005405736415126e-05, |
| "loss": 0.0098, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.6989367655288192, |
| "grad_norm": 0.45737767219543457, |
| "learning_rate": 7.998794464935904e-05, |
| "loss": 0.0115, |
| "step": 6610 |
| }, |
| { |
| "epoch": 3.7045327364297704, |
| "grad_norm": 0.3025696873664856, |
| "learning_rate": 7.992174994867123e-05, |
| "loss": 0.0159, |
| "step": 6620 |
| }, |
| { |
| "epoch": 3.710128707330722, |
| "grad_norm": 0.3852231502532959, |
| "learning_rate": 7.985547344306161e-05, |
| "loss": 0.0116, |
| "step": 6630 |
| }, |
| { |
| "epoch": 3.7157246782316733, |
| "grad_norm": 0.23505637049674988, |
| "learning_rate": 7.978911531372765e-05, |
| "loss": 0.012, |
| "step": 6640 |
| }, |
| { |
| "epoch": 3.7213206491326245, |
| "grad_norm": 0.16072528064250946, |
| "learning_rate": 7.972267574208991e-05, |
| "loss": 0.0101, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.7269166200335757, |
| "grad_norm": 0.2579629719257355, |
| "learning_rate": 7.965615490979163e-05, |
| "loss": 0.0172, |
| "step": 6660 |
| }, |
| { |
| "epoch": 3.732512590934527, |
| "grad_norm": 0.170463427901268, |
| "learning_rate": 7.958955299869825e-05, |
| "loss": 0.0164, |
| "step": 6670 |
| }, |
| { |
| "epoch": 3.7381085618354786, |
| "grad_norm": 0.2048628181219101, |
| "learning_rate": 7.952287019089685e-05, |
| "loss": 0.0095, |
| "step": 6680 |
| }, |
| { |
| "epoch": 3.74370453273643, |
| "grad_norm": 0.1665850281715393, |
| "learning_rate": 7.945610666869568e-05, |
| "loss": 0.0131, |
| "step": 6690 |
| }, |
| { |
| "epoch": 3.749300503637381, |
| "grad_norm": 0.184804305434227, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 0.0161, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.7548964745383326, |
| "grad_norm": 0.17109259963035583, |
| "learning_rate": 7.932233821142987e-05, |
| "loss": 0.014, |
| "step": 6710 |
| }, |
| { |
| "epoch": 3.760492445439284, |
| "grad_norm": 0.23285003006458282, |
| "learning_rate": 7.925533364208309e-05, |
| "loss": 0.0106, |
| "step": 6720 |
| }, |
| { |
| "epoch": 3.766088416340235, |
| "grad_norm": 0.21361905336380005, |
| "learning_rate": 7.918824908977123e-05, |
| "loss": 0.0218, |
| "step": 6730 |
| }, |
| { |
| "epoch": 3.7716843872411863, |
| "grad_norm": 0.22354750335216522, |
| "learning_rate": 7.912108473790092e-05, |
| "loss": 0.0203, |
| "step": 6740 |
| }, |
| { |
| "epoch": 3.7772803581421375, |
| "grad_norm": 0.24767528474330902, |
| "learning_rate": 7.905384077009693e-05, |
| "loss": 0.0193, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.782876329043089, |
| "grad_norm": 0.18995364010334015, |
| "learning_rate": 7.898651737020166e-05, |
| "loss": 0.0162, |
| "step": 6760 |
| }, |
| { |
| "epoch": 3.7884722999440403, |
| "grad_norm": 0.13995826244354248, |
| "learning_rate": 7.891911472227478e-05, |
| "loss": 0.0187, |
| "step": 6770 |
| }, |
| { |
| "epoch": 3.7940682708449915, |
| "grad_norm": 0.2525804340839386, |
| "learning_rate": 7.88516330105925e-05, |
| "loss": 0.0136, |
| "step": 6780 |
| }, |
| { |
| "epoch": 3.799664241745943, |
| "grad_norm": 0.17206352949142456, |
| "learning_rate": 7.878407241964729e-05, |
| "loss": 0.0133, |
| "step": 6790 |
| }, |
| { |
| "epoch": 3.8052602126468944, |
| "grad_norm": 0.17433176934719086, |
| "learning_rate": 7.871643313414718e-05, |
| "loss": 0.0257, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.8108561835478456, |
| "grad_norm": 0.2698834240436554, |
| "learning_rate": 7.864871533901544e-05, |
| "loss": 0.0141, |
| "step": 6810 |
| }, |
| { |
| "epoch": 3.816452154448797, |
| "grad_norm": 0.2874978482723236, |
| "learning_rate": 7.858091921938988e-05, |
| "loss": 0.0175, |
| "step": 6820 |
| }, |
| { |
| "epoch": 3.822048125349748, |
| "grad_norm": 0.267092227935791, |
| "learning_rate": 7.851304496062254e-05, |
| "loss": 0.0169, |
| "step": 6830 |
| }, |
| { |
| "epoch": 3.8276440962506992, |
| "grad_norm": 0.31751275062561035, |
| "learning_rate": 7.844509274827907e-05, |
| "loss": 0.0175, |
| "step": 6840 |
| }, |
| { |
| "epoch": 3.833240067151651, |
| "grad_norm": 0.30981171131134033, |
| "learning_rate": 7.837706276813819e-05, |
| "loss": 0.0145, |
| "step": 6850 |
| }, |
| { |
| "epoch": 3.838836038052602, |
| "grad_norm": 0.31560707092285156, |
| "learning_rate": 7.830895520619128e-05, |
| "loss": 0.0157, |
| "step": 6860 |
| }, |
| { |
| "epoch": 3.8444320089535533, |
| "grad_norm": 0.22295020520687103, |
| "learning_rate": 7.824077024864179e-05, |
| "loss": 0.0108, |
| "step": 6870 |
| }, |
| { |
| "epoch": 3.850027979854505, |
| "grad_norm": 0.25469842553138733, |
| "learning_rate": 7.817250808190483e-05, |
| "loss": 0.015, |
| "step": 6880 |
| }, |
| { |
| "epoch": 3.855623950755456, |
| "grad_norm": 0.3890667259693146, |
| "learning_rate": 7.810416889260653e-05, |
| "loss": 0.0179, |
| "step": 6890 |
| }, |
| { |
| "epoch": 3.8612199216564074, |
| "grad_norm": 0.1923862248659134, |
| "learning_rate": 7.803575286758364e-05, |
| "loss": 0.013, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.8668158925573586, |
| "grad_norm": 0.17686985433101654, |
| "learning_rate": 7.796726019388295e-05, |
| "loss": 0.0143, |
| "step": 6910 |
| }, |
| { |
| "epoch": 3.87241186345831, |
| "grad_norm": 0.1899517923593521, |
| "learning_rate": 7.789869105876083e-05, |
| "loss": 0.0178, |
| "step": 6920 |
| }, |
| { |
| "epoch": 3.8780078343592614, |
| "grad_norm": 0.3056480586528778, |
| "learning_rate": 7.783004564968263e-05, |
| "loss": 0.0129, |
| "step": 6930 |
| }, |
| { |
| "epoch": 3.8836038052602126, |
| "grad_norm": 0.27795109152793884, |
| "learning_rate": 7.776132415432234e-05, |
| "loss": 0.0151, |
| "step": 6940 |
| }, |
| { |
| "epoch": 3.889199776161164, |
| "grad_norm": 0.22460781037807465, |
| "learning_rate": 7.769252676056187e-05, |
| "loss": 0.0145, |
| "step": 6950 |
| }, |
| { |
| "epoch": 3.8947957470621155, |
| "grad_norm": 0.29980891942977905, |
| "learning_rate": 7.762365365649067e-05, |
| "loss": 0.015, |
| "step": 6960 |
| }, |
| { |
| "epoch": 3.9003917179630667, |
| "grad_norm": 0.2440609186887741, |
| "learning_rate": 7.755470503040516e-05, |
| "loss": 0.0137, |
| "step": 6970 |
| }, |
| { |
| "epoch": 3.905987688864018, |
| "grad_norm": 0.2510973811149597, |
| "learning_rate": 7.748568107080832e-05, |
| "loss": 0.0118, |
| "step": 6980 |
| }, |
| { |
| "epoch": 3.911583659764969, |
| "grad_norm": 0.4981507956981659, |
| "learning_rate": 7.741658196640892e-05, |
| "loss": 0.0217, |
| "step": 6990 |
| }, |
| { |
| "epoch": 3.9171796306659203, |
| "grad_norm": 0.28161290287971497, |
| "learning_rate": 7.734740790612136e-05, |
| "loss": 0.0154, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.922775601566872, |
| "grad_norm": 0.40513697266578674, |
| "learning_rate": 7.727815907906481e-05, |
| "loss": 0.0169, |
| "step": 7010 |
| }, |
| { |
| "epoch": 3.928371572467823, |
| "grad_norm": 0.31741997599601746, |
| "learning_rate": 7.720883567456298e-05, |
| "loss": 0.0156, |
| "step": 7020 |
| }, |
| { |
| "epoch": 3.9339675433687744, |
| "grad_norm": 0.2534908652305603, |
| "learning_rate": 7.713943788214337e-05, |
| "loss": 0.0142, |
| "step": 7030 |
| }, |
| { |
| "epoch": 3.939563514269726, |
| "grad_norm": 0.2655825912952423, |
| "learning_rate": 7.70699658915369e-05, |
| "loss": 0.0154, |
| "step": 7040 |
| }, |
| { |
| "epoch": 3.9451594851706773, |
| "grad_norm": 0.32799914479255676, |
| "learning_rate": 7.700041989267736e-05, |
| "loss": 0.0137, |
| "step": 7050 |
| }, |
| { |
| "epoch": 3.9507554560716285, |
| "grad_norm": 0.184087872505188, |
| "learning_rate": 7.693080007570084e-05, |
| "loss": 0.013, |
| "step": 7060 |
| }, |
| { |
| "epoch": 3.9563514269725797, |
| "grad_norm": 0.31337958574295044, |
| "learning_rate": 7.686110663094525e-05, |
| "loss": 0.0203, |
| "step": 7070 |
| }, |
| { |
| "epoch": 3.961947397873531, |
| "grad_norm": 0.44696512818336487, |
| "learning_rate": 7.679133974894983e-05, |
| "loss": 0.0136, |
| "step": 7080 |
| }, |
| { |
| "epoch": 3.967543368774482, |
| "grad_norm": 0.2737766206264496, |
| "learning_rate": 7.672149962045457e-05, |
| "loss": 0.0157, |
| "step": 7090 |
| }, |
| { |
| "epoch": 3.9731393396754338, |
| "grad_norm": 0.4152137339115143, |
| "learning_rate": 7.66515864363997e-05, |
| "loss": 0.0151, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.978735310576385, |
| "grad_norm": 0.25766709446907043, |
| "learning_rate": 7.658160038792518e-05, |
| "loss": 0.0185, |
| "step": 7110 |
| }, |
| { |
| "epoch": 3.984331281477336, |
| "grad_norm": 0.2175714522600174, |
| "learning_rate": 7.651154166637025e-05, |
| "loss": 0.013, |
| "step": 7120 |
| }, |
| { |
| "epoch": 3.989927252378288, |
| "grad_norm": 0.2838795483112335, |
| "learning_rate": 7.644141046327271e-05, |
| "loss": 0.0152, |
| "step": 7130 |
| }, |
| { |
| "epoch": 3.995523223279239, |
| "grad_norm": 0.17076176404953003, |
| "learning_rate": 7.637120697036866e-05, |
| "loss": 0.0161, |
| "step": 7140 |
| }, |
| { |
| "epoch": 4.00111919418019, |
| "grad_norm": 0.34454286098480225, |
| "learning_rate": 7.630093137959171e-05, |
| "loss": 0.0155, |
| "step": 7150 |
| }, |
| { |
| "epoch": 4.0067151650811414, |
| "grad_norm": 0.2543468773365021, |
| "learning_rate": 7.623058388307269e-05, |
| "loss": 0.0224, |
| "step": 7160 |
| }, |
| { |
| "epoch": 4.012311135982093, |
| "grad_norm": 0.26474493741989136, |
| "learning_rate": 7.616016467313891e-05, |
| "loss": 0.0121, |
| "step": 7170 |
| }, |
| { |
| "epoch": 4.017907106883044, |
| "grad_norm": 0.2469242513179779, |
| "learning_rate": 7.608967394231387e-05, |
| "loss": 0.0168, |
| "step": 7180 |
| }, |
| { |
| "epoch": 4.023503077783996, |
| "grad_norm": 0.2605207562446594, |
| "learning_rate": 7.60191118833165e-05, |
| "loss": 0.0142, |
| "step": 7190 |
| }, |
| { |
| "epoch": 4.029099048684947, |
| "grad_norm": 0.1799083948135376, |
| "learning_rate": 7.594847868906076e-05, |
| "loss": 0.02, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.034695019585898, |
| "grad_norm": 0.179059699177742, |
| "learning_rate": 7.587777455265515e-05, |
| "loss": 0.0115, |
| "step": 7210 |
| }, |
| { |
| "epoch": 4.04029099048685, |
| "grad_norm": 0.2233004868030548, |
| "learning_rate": 7.580699966740201e-05, |
| "loss": 0.0128, |
| "step": 7220 |
| }, |
| { |
| "epoch": 4.045886961387801, |
| "grad_norm": 0.253635436296463, |
| "learning_rate": 7.573615422679726e-05, |
| "loss": 0.0149, |
| "step": 7230 |
| }, |
| { |
| "epoch": 4.051482932288752, |
| "grad_norm": 0.3416047692298889, |
| "learning_rate": 7.566523842452958e-05, |
| "loss": 0.0125, |
| "step": 7240 |
| }, |
| { |
| "epoch": 4.057078903189703, |
| "grad_norm": 0.27430468797683716, |
| "learning_rate": 7.559425245448006e-05, |
| "loss": 0.0153, |
| "step": 7250 |
| }, |
| { |
| "epoch": 4.062674874090654, |
| "grad_norm": 0.26396802067756653, |
| "learning_rate": 7.552319651072164e-05, |
| "loss": 0.0128, |
| "step": 7260 |
| }, |
| { |
| "epoch": 4.068270844991606, |
| "grad_norm": 0.1688843071460724, |
| "learning_rate": 7.545207078751857e-05, |
| "loss": 0.017, |
| "step": 7270 |
| }, |
| { |
| "epoch": 4.073866815892558, |
| "grad_norm": 0.25092509388923645, |
| "learning_rate": 7.538087547932585e-05, |
| "loss": 0.0119, |
| "step": 7280 |
| }, |
| { |
| "epoch": 4.079462786793509, |
| "grad_norm": 0.12876421213150024, |
| "learning_rate": 7.530961078078873e-05, |
| "loss": 0.0099, |
| "step": 7290 |
| }, |
| { |
| "epoch": 4.08505875769446, |
| "grad_norm": 0.13818064332008362, |
| "learning_rate": 7.52382768867422e-05, |
| "loss": 0.0156, |
| "step": 7300 |
| }, |
| { |
| "epoch": 4.090654728595411, |
| "grad_norm": 0.23580847680568695, |
| "learning_rate": 7.516687399221037e-05, |
| "loss": 0.0122, |
| "step": 7310 |
| }, |
| { |
| "epoch": 4.096250699496363, |
| "grad_norm": 0.22529348731040955, |
| "learning_rate": 7.509540229240601e-05, |
| "loss": 0.0115, |
| "step": 7320 |
| }, |
| { |
| "epoch": 4.101846670397314, |
| "grad_norm": 0.29066744446754456, |
| "learning_rate": 7.50238619827301e-05, |
| "loss": 0.0125, |
| "step": 7330 |
| }, |
| { |
| "epoch": 4.107442641298265, |
| "grad_norm": 0.30195966362953186, |
| "learning_rate": 7.495225325877103e-05, |
| "loss": 0.0136, |
| "step": 7340 |
| }, |
| { |
| "epoch": 4.113038612199216, |
| "grad_norm": 0.2478567361831665, |
| "learning_rate": 7.488057631630437e-05, |
| "loss": 0.0138, |
| "step": 7350 |
| }, |
| { |
| "epoch": 4.118634583100168, |
| "grad_norm": 0.23493291437625885, |
| "learning_rate": 7.480883135129211e-05, |
| "loss": 0.0171, |
| "step": 7360 |
| }, |
| { |
| "epoch": 4.1242305540011195, |
| "grad_norm": 0.28376439213752747, |
| "learning_rate": 7.473701855988227e-05, |
| "loss": 0.0161, |
| "step": 7370 |
| }, |
| { |
| "epoch": 4.129826524902071, |
| "grad_norm": 0.183238685131073, |
| "learning_rate": 7.466513813840825e-05, |
| "loss": 0.0159, |
| "step": 7380 |
| }, |
| { |
| "epoch": 4.135422495803022, |
| "grad_norm": 0.26259323954582214, |
| "learning_rate": 7.45931902833884e-05, |
| "loss": 0.0139, |
| "step": 7390 |
| }, |
| { |
| "epoch": 4.141018466703973, |
| "grad_norm": 0.31283116340637207, |
| "learning_rate": 7.452117519152542e-05, |
| "loss": 0.0103, |
| "step": 7400 |
| }, |
| { |
| "epoch": 4.146614437604924, |
| "grad_norm": 0.3131321370601654, |
| "learning_rate": 7.444909305970578e-05, |
| "loss": 0.0147, |
| "step": 7410 |
| }, |
| { |
| "epoch": 4.1522104085058755, |
| "grad_norm": 0.22739440202713013, |
| "learning_rate": 7.437694408499933e-05, |
| "loss": 0.0199, |
| "step": 7420 |
| }, |
| { |
| "epoch": 4.157806379406827, |
| "grad_norm": 0.22918283939361572, |
| "learning_rate": 7.430472846465856e-05, |
| "loss": 0.0152, |
| "step": 7430 |
| }, |
| { |
| "epoch": 4.163402350307779, |
| "grad_norm": 0.3530014455318451, |
| "learning_rate": 7.423244639611826e-05, |
| "loss": 0.0123, |
| "step": 7440 |
| }, |
| { |
| "epoch": 4.16899832120873, |
| "grad_norm": 0.32133522629737854, |
| "learning_rate": 7.416009807699482e-05, |
| "loss": 0.0151, |
| "step": 7450 |
| }, |
| { |
| "epoch": 4.174594292109681, |
| "grad_norm": 0.13515067100524902, |
| "learning_rate": 7.408768370508576e-05, |
| "loss": 0.0123, |
| "step": 7460 |
| }, |
| { |
| "epoch": 4.1801902630106325, |
| "grad_norm": 0.39963120222091675, |
| "learning_rate": 7.401520347836926e-05, |
| "loss": 0.0132, |
| "step": 7470 |
| }, |
| { |
| "epoch": 4.185786233911584, |
| "grad_norm": 0.16310429573059082, |
| "learning_rate": 7.394265759500348e-05, |
| "loss": 0.0211, |
| "step": 7480 |
| }, |
| { |
| "epoch": 4.191382204812535, |
| "grad_norm": 0.23062337934970856, |
| "learning_rate": 7.387004625332608e-05, |
| "loss": 0.0155, |
| "step": 7490 |
| }, |
| { |
| "epoch": 4.196978175713486, |
| "grad_norm": 0.3456437289714813, |
| "learning_rate": 7.379736965185368e-05, |
| "loss": 0.0149, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.202574146614437, |
| "grad_norm": 0.30712154507637024, |
| "learning_rate": 7.372462798928137e-05, |
| "loss": 0.0142, |
| "step": 7510 |
| }, |
| { |
| "epoch": 4.2081701175153885, |
| "grad_norm": 0.40980008244514465, |
| "learning_rate": 7.365182146448205e-05, |
| "loss": 0.0185, |
| "step": 7520 |
| }, |
| { |
| "epoch": 4.213766088416341, |
| "grad_norm": 0.3277069330215454, |
| "learning_rate": 7.357895027650598e-05, |
| "loss": 0.0202, |
| "step": 7530 |
| }, |
| { |
| "epoch": 4.219362059317292, |
| "grad_norm": 0.2991955280303955, |
| "learning_rate": 7.350601462458024e-05, |
| "loss": 0.0129, |
| "step": 7540 |
| }, |
| { |
| "epoch": 4.224958030218243, |
| "grad_norm": 0.3370542526245117, |
| "learning_rate": 7.343301470810808e-05, |
| "loss": 0.0186, |
| "step": 7550 |
| }, |
| { |
| "epoch": 4.230554001119194, |
| "grad_norm": 0.31613653898239136, |
| "learning_rate": 7.335995072666848e-05, |
| "loss": 0.0123, |
| "step": 7560 |
| }, |
| { |
| "epoch": 4.236149972020145, |
| "grad_norm": 0.21174335479736328, |
| "learning_rate": 7.328682288001561e-05, |
| "loss": 0.0088, |
| "step": 7570 |
| }, |
| { |
| "epoch": 4.241745942921097, |
| "grad_norm": 0.18430404365062714, |
| "learning_rate": 7.32136313680782e-05, |
| "loss": 0.0136, |
| "step": 7580 |
| }, |
| { |
| "epoch": 4.247341913822048, |
| "grad_norm": 0.161945641040802, |
| "learning_rate": 7.3140376390959e-05, |
| "loss": 0.0146, |
| "step": 7590 |
| }, |
| { |
| "epoch": 4.252937884722999, |
| "grad_norm": 0.3349175453186035, |
| "learning_rate": 7.30670581489344e-05, |
| "loss": 0.0151, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.258533855623951, |
| "grad_norm": 0.22331948578357697, |
| "learning_rate": 7.299367684245362e-05, |
| "loss": 0.0116, |
| "step": 7610 |
| }, |
| { |
| "epoch": 4.264129826524902, |
| "grad_norm": 0.32214659452438354, |
| "learning_rate": 7.292023267213835e-05, |
| "loss": 0.0125, |
| "step": 7620 |
| }, |
| { |
| "epoch": 4.269725797425854, |
| "grad_norm": 0.2628123164176941, |
| "learning_rate": 7.284672583878219e-05, |
| "loss": 0.021, |
| "step": 7630 |
| }, |
| { |
| "epoch": 4.275321768326805, |
| "grad_norm": 0.17666281759738922, |
| "learning_rate": 7.277315654334997e-05, |
| "loss": 0.0129, |
| "step": 7640 |
| }, |
| { |
| "epoch": 4.280917739227756, |
| "grad_norm": 0.13651759922504425, |
| "learning_rate": 7.269952498697734e-05, |
| "loss": 0.0136, |
| "step": 7650 |
| }, |
| { |
| "epoch": 4.286513710128707, |
| "grad_norm": 0.19819198548793793, |
| "learning_rate": 7.262583137097018e-05, |
| "loss": 0.0178, |
| "step": 7660 |
| }, |
| { |
| "epoch": 4.292109681029658, |
| "grad_norm": 0.30227622389793396, |
| "learning_rate": 7.255207589680402e-05, |
| "loss": 0.0099, |
| "step": 7670 |
| }, |
| { |
| "epoch": 4.29770565193061, |
| "grad_norm": 0.1803039014339447, |
| "learning_rate": 7.247825876612353e-05, |
| "loss": 0.0125, |
| "step": 7680 |
| }, |
| { |
| "epoch": 4.303301622831562, |
| "grad_norm": 0.2602524757385254, |
| "learning_rate": 7.240438018074189e-05, |
| "loss": 0.0128, |
| "step": 7690 |
| }, |
| { |
| "epoch": 4.308897593732513, |
| "grad_norm": 0.22282052040100098, |
| "learning_rate": 7.233044034264034e-05, |
| "loss": 0.0105, |
| "step": 7700 |
| }, |
| { |
| "epoch": 4.314493564633464, |
| "grad_norm": 0.3194449841976166, |
| "learning_rate": 7.225643945396757e-05, |
| "loss": 0.0133, |
| "step": 7710 |
| }, |
| { |
| "epoch": 4.320089535534415, |
| "grad_norm": 0.31051668524742126, |
| "learning_rate": 7.218237771703921e-05, |
| "loss": 0.021, |
| "step": 7720 |
| }, |
| { |
| "epoch": 4.3256855064353665, |
| "grad_norm": 0.23389574885368347, |
| "learning_rate": 7.210825533433719e-05, |
| "loss": 0.0151, |
| "step": 7730 |
| }, |
| { |
| "epoch": 4.331281477336318, |
| "grad_norm": 0.16604237258434296, |
| "learning_rate": 7.203407250850928e-05, |
| "loss": 0.0101, |
| "step": 7740 |
| }, |
| { |
| "epoch": 4.336877448237269, |
| "grad_norm": 0.26793259382247925, |
| "learning_rate": 7.195982944236851e-05, |
| "loss": 0.0177, |
| "step": 7750 |
| }, |
| { |
| "epoch": 4.34247341913822, |
| "grad_norm": 0.21598176658153534, |
| "learning_rate": 7.188552633889259e-05, |
| "loss": 0.0168, |
| "step": 7760 |
| }, |
| { |
| "epoch": 4.348069390039171, |
| "grad_norm": 0.30887526273727417, |
| "learning_rate": 7.181116340122336e-05, |
| "loss": 0.0122, |
| "step": 7770 |
| }, |
| { |
| "epoch": 4.3536653609401235, |
| "grad_norm": 0.3463345468044281, |
| "learning_rate": 7.173674083266624e-05, |
| "loss": 0.0143, |
| "step": 7780 |
| }, |
| { |
| "epoch": 4.359261331841075, |
| "grad_norm": 0.26217085123062134, |
| "learning_rate": 7.166225883668969e-05, |
| "loss": 0.0151, |
| "step": 7790 |
| }, |
| { |
| "epoch": 4.364857302742026, |
| "grad_norm": 0.28720608353614807, |
| "learning_rate": 7.158771761692464e-05, |
| "loss": 0.0139, |
| "step": 7800 |
| }, |
| { |
| "epoch": 4.370453273642977, |
| "grad_norm": 0.35230302810668945, |
| "learning_rate": 7.151311737716397e-05, |
| "loss": 0.0146, |
| "step": 7810 |
| }, |
| { |
| "epoch": 4.376049244543928, |
| "grad_norm": 0.2841963469982147, |
| "learning_rate": 7.143845832136188e-05, |
| "loss": 0.0153, |
| "step": 7820 |
| }, |
| { |
| "epoch": 4.3816452154448795, |
| "grad_norm": 0.3889724016189575, |
| "learning_rate": 7.136374065363334e-05, |
| "loss": 0.0147, |
| "step": 7830 |
| }, |
| { |
| "epoch": 4.387241186345831, |
| "grad_norm": 0.2717784345149994, |
| "learning_rate": 7.128896457825364e-05, |
| "loss": 0.0161, |
| "step": 7840 |
| }, |
| { |
| "epoch": 4.392837157246782, |
| "grad_norm": 0.27939334511756897, |
| "learning_rate": 7.121413029965769e-05, |
| "loss": 0.0127, |
| "step": 7850 |
| }, |
| { |
| "epoch": 4.398433128147734, |
| "grad_norm": 0.24780631065368652, |
| "learning_rate": 7.113923802243957e-05, |
| "loss": 0.0134, |
| "step": 7860 |
| }, |
| { |
| "epoch": 4.404029099048685, |
| "grad_norm": 0.2736693024635315, |
| "learning_rate": 7.10642879513519e-05, |
| "loss": 0.0157, |
| "step": 7870 |
| }, |
| { |
| "epoch": 4.409625069949636, |
| "grad_norm": 0.2332269549369812, |
| "learning_rate": 7.09892802913053e-05, |
| "loss": 0.0155, |
| "step": 7880 |
| }, |
| { |
| "epoch": 4.415221040850588, |
| "grad_norm": 0.3542332947254181, |
| "learning_rate": 7.091421524736784e-05, |
| "loss": 0.0161, |
| "step": 7890 |
| }, |
| { |
| "epoch": 4.420817011751539, |
| "grad_norm": 0.29242730140686035, |
| "learning_rate": 7.083909302476453e-05, |
| "loss": 0.0137, |
| "step": 7900 |
| }, |
| { |
| "epoch": 4.42641298265249, |
| "grad_norm": 0.33528995513916016, |
| "learning_rate": 7.076391382887661e-05, |
| "loss": 0.0146, |
| "step": 7910 |
| }, |
| { |
| "epoch": 4.432008953553441, |
| "grad_norm": 0.34565469622612, |
| "learning_rate": 7.068867786524116e-05, |
| "loss": 0.0128, |
| "step": 7920 |
| }, |
| { |
| "epoch": 4.4376049244543925, |
| "grad_norm": 0.29550039768218994, |
| "learning_rate": 7.061338533955043e-05, |
| "loss": 0.0143, |
| "step": 7930 |
| }, |
| { |
| "epoch": 4.443200895355345, |
| "grad_norm": 0.18918676674365997, |
| "learning_rate": 7.053803645765128e-05, |
| "loss": 0.017, |
| "step": 7940 |
| }, |
| { |
| "epoch": 4.448796866256296, |
| "grad_norm": 0.24842104315757751, |
| "learning_rate": 7.04626314255447e-05, |
| "loss": 0.0115, |
| "step": 7950 |
| }, |
| { |
| "epoch": 4.454392837157247, |
| "grad_norm": 0.25395554304122925, |
| "learning_rate": 7.038717044938519e-05, |
| "loss": 0.0136, |
| "step": 7960 |
| }, |
| { |
| "epoch": 4.459988808058198, |
| "grad_norm": 0.223357155919075, |
| "learning_rate": 7.031165373548014e-05, |
| "loss": 0.0159, |
| "step": 7970 |
| }, |
| { |
| "epoch": 4.465584778959149, |
| "grad_norm": 0.2434312105178833, |
| "learning_rate": 7.023608149028937e-05, |
| "loss": 0.0113, |
| "step": 7980 |
| }, |
| { |
| "epoch": 4.471180749860101, |
| "grad_norm": 0.27500098943710327, |
| "learning_rate": 7.016045392042452e-05, |
| "loss": 0.0127, |
| "step": 7990 |
| }, |
| { |
| "epoch": 4.476776720761052, |
| "grad_norm": 0.1670360416173935, |
| "learning_rate": 7.008477123264848e-05, |
| "loss": 0.0151, |
| "step": 8000 |
| }, |
| { |
| "epoch": 4.482372691662003, |
| "grad_norm": 0.3035995662212372, |
| "learning_rate": 7.000903363387482e-05, |
| "loss": 0.0143, |
| "step": 8010 |
| }, |
| { |
| "epoch": 4.487968662562954, |
| "grad_norm": 0.25943461060523987, |
| "learning_rate": 6.993324133116726e-05, |
| "loss": 0.0099, |
| "step": 8020 |
| }, |
| { |
| "epoch": 4.493564633463906, |
| "grad_norm": 0.20338699221611023, |
| "learning_rate": 6.985739453173903e-05, |
| "loss": 0.0127, |
| "step": 8030 |
| }, |
| { |
| "epoch": 4.4991606043648575, |
| "grad_norm": 0.18308840692043304, |
| "learning_rate": 6.978149344295242e-05, |
| "loss": 0.012, |
| "step": 8040 |
| }, |
| { |
| "epoch": 4.504756575265809, |
| "grad_norm": 0.142523393034935, |
| "learning_rate": 6.97055382723181e-05, |
| "loss": 0.0117, |
| "step": 8050 |
| }, |
| { |
| "epoch": 4.51035254616676, |
| "grad_norm": 0.26383474469184875, |
| "learning_rate": 6.962952922749457e-05, |
| "loss": 0.0171, |
| "step": 8060 |
| }, |
| { |
| "epoch": 4.515948517067711, |
| "grad_norm": 0.1817890852689743, |
| "learning_rate": 6.955346651628771e-05, |
| "loss": 0.0147, |
| "step": 8070 |
| }, |
| { |
| "epoch": 4.521544487968662, |
| "grad_norm": 0.20679673552513123, |
| "learning_rate": 6.947735034665002e-05, |
| "loss": 0.0161, |
| "step": 8080 |
| }, |
| { |
| "epoch": 4.527140458869614, |
| "grad_norm": 0.2073245346546173, |
| "learning_rate": 6.940118092668022e-05, |
| "loss": 0.0104, |
| "step": 8090 |
| }, |
| { |
| "epoch": 4.532736429770566, |
| "grad_norm": 0.45759397745132446, |
| "learning_rate": 6.932495846462261e-05, |
| "loss": 0.0141, |
| "step": 8100 |
| }, |
| { |
| "epoch": 4.538332400671517, |
| "grad_norm": 0.2275332510471344, |
| "learning_rate": 6.924868316886649e-05, |
| "loss": 0.0144, |
| "step": 8110 |
| }, |
| { |
| "epoch": 4.543928371572468, |
| "grad_norm": 0.24839594960212708, |
| "learning_rate": 6.917235524794558e-05, |
| "loss": 0.0153, |
| "step": 8120 |
| }, |
| { |
| "epoch": 4.549524342473419, |
| "grad_norm": 0.13045403361320496, |
| "learning_rate": 6.909597491053751e-05, |
| "loss": 0.0148, |
| "step": 8130 |
| }, |
| { |
| "epoch": 4.5551203133743705, |
| "grad_norm": 0.298033207654953, |
| "learning_rate": 6.901954236546323e-05, |
| "loss": 0.0148, |
| "step": 8140 |
| }, |
| { |
| "epoch": 4.560716284275322, |
| "grad_norm": 0.3102302849292755, |
| "learning_rate": 6.894305782168638e-05, |
| "loss": 0.0104, |
| "step": 8150 |
| }, |
| { |
| "epoch": 4.566312255176273, |
| "grad_norm": 0.3511497378349304, |
| "learning_rate": 6.886652148831279e-05, |
| "loss": 0.0114, |
| "step": 8160 |
| }, |
| { |
| "epoch": 4.571908226077224, |
| "grad_norm": 0.19204401969909668, |
| "learning_rate": 6.878993357458986e-05, |
| "loss": 0.0144, |
| "step": 8170 |
| }, |
| { |
| "epoch": 4.577504196978175, |
| "grad_norm": 0.27601921558380127, |
| "learning_rate": 6.871329428990602e-05, |
| "loss": 0.0121, |
| "step": 8180 |
| }, |
| { |
| "epoch": 4.583100167879127, |
| "grad_norm": 0.15351536870002747, |
| "learning_rate": 6.863660384379017e-05, |
| "loss": 0.017, |
| "step": 8190 |
| }, |
| { |
| "epoch": 4.588696138780079, |
| "grad_norm": 0.34269094467163086, |
| "learning_rate": 6.855986244591104e-05, |
| "loss": 0.0164, |
| "step": 8200 |
| }, |
| { |
| "epoch": 4.59429210968103, |
| "grad_norm": 0.20768719911575317, |
| "learning_rate": 6.84830703060767e-05, |
| "loss": 0.0186, |
| "step": 8210 |
| }, |
| { |
| "epoch": 4.599888080581981, |
| "grad_norm": 0.29763510823249817, |
| "learning_rate": 6.840622763423391e-05, |
| "loss": 0.0134, |
| "step": 8220 |
| }, |
| { |
| "epoch": 4.605484051482932, |
| "grad_norm": 0.29871609807014465, |
| "learning_rate": 6.83293346404676e-05, |
| "loss": 0.0118, |
| "step": 8230 |
| }, |
| { |
| "epoch": 4.6110800223838835, |
| "grad_norm": 0.24642953276634216, |
| "learning_rate": 6.825239153500029e-05, |
| "loss": 0.015, |
| "step": 8240 |
| }, |
| { |
| "epoch": 4.616675993284835, |
| "grad_norm": 0.20664198696613312, |
| "learning_rate": 6.817539852819149e-05, |
| "loss": 0.0165, |
| "step": 8250 |
| }, |
| { |
| "epoch": 4.622271964185786, |
| "grad_norm": 0.1941448450088501, |
| "learning_rate": 6.809835583053715e-05, |
| "loss": 0.0129, |
| "step": 8260 |
| }, |
| { |
| "epoch": 4.627867935086737, |
| "grad_norm": 0.21355387568473816, |
| "learning_rate": 6.802126365266905e-05, |
| "loss": 0.013, |
| "step": 8270 |
| }, |
| { |
| "epoch": 4.633463905987689, |
| "grad_norm": 0.2642342746257782, |
| "learning_rate": 6.794412220535426e-05, |
| "loss": 0.0176, |
| "step": 8280 |
| }, |
| { |
| "epoch": 4.63905987688864, |
| "grad_norm": 0.31280654668807983, |
| "learning_rate": 6.786693169949455e-05, |
| "loss": 0.017, |
| "step": 8290 |
| }, |
| { |
| "epoch": 4.644655847789592, |
| "grad_norm": 0.2257363200187683, |
| "learning_rate": 6.778969234612584e-05, |
| "loss": 0.0099, |
| "step": 8300 |
| }, |
| { |
| "epoch": 4.650251818690543, |
| "grad_norm": 0.16536390781402588, |
| "learning_rate": 6.771240435641754e-05, |
| "loss": 0.012, |
| "step": 8310 |
| }, |
| { |
| "epoch": 4.655847789591494, |
| "grad_norm": 0.16031181812286377, |
| "learning_rate": 6.763506794167208e-05, |
| "loss": 0.0094, |
| "step": 8320 |
| }, |
| { |
| "epoch": 4.661443760492445, |
| "grad_norm": 0.2519717514514923, |
| "learning_rate": 6.755768331332424e-05, |
| "loss": 0.0153, |
| "step": 8330 |
| }, |
| { |
| "epoch": 4.6670397313933965, |
| "grad_norm": 0.11290234327316284, |
| "learning_rate": 6.748025068294067e-05, |
| "loss": 0.0187, |
| "step": 8340 |
| }, |
| { |
| "epoch": 4.6726357022943485, |
| "grad_norm": 0.18607747554779053, |
| "learning_rate": 6.740277026221923e-05, |
| "loss": 0.0123, |
| "step": 8350 |
| }, |
| { |
| "epoch": 4.6782316731953, |
| "grad_norm": 0.20653483271598816, |
| "learning_rate": 6.732524226298841e-05, |
| "loss": 0.0128, |
| "step": 8360 |
| }, |
| { |
| "epoch": 4.683827644096251, |
| "grad_norm": 0.20888541638851166, |
| "learning_rate": 6.72476668972068e-05, |
| "loss": 0.0235, |
| "step": 8370 |
| }, |
| { |
| "epoch": 4.689423614997202, |
| "grad_norm": 0.23816397786140442, |
| "learning_rate": 6.71700443769625e-05, |
| "loss": 0.0125, |
| "step": 8380 |
| }, |
| { |
| "epoch": 4.695019585898153, |
| "grad_norm": 0.3250564932823181, |
| "learning_rate": 6.709237491447249e-05, |
| "loss": 0.011, |
| "step": 8390 |
| }, |
| { |
| "epoch": 4.700615556799105, |
| "grad_norm": 0.3211959898471832, |
| "learning_rate": 6.701465872208216e-05, |
| "loss": 0.0124, |
| "step": 8400 |
| }, |
| { |
| "epoch": 4.706211527700056, |
| "grad_norm": 0.3432743549346924, |
| "learning_rate": 6.693689601226458e-05, |
| "loss": 0.0119, |
| "step": 8410 |
| }, |
| { |
| "epoch": 4.711807498601007, |
| "grad_norm": 0.2595174014568329, |
| "learning_rate": 6.685908699762002e-05, |
| "loss": 0.0111, |
| "step": 8420 |
| }, |
| { |
| "epoch": 4.717403469501958, |
| "grad_norm": 0.283252090215683, |
| "learning_rate": 6.67812318908754e-05, |
| "loss": 0.0119, |
| "step": 8430 |
| }, |
| { |
| "epoch": 4.72299944040291, |
| "grad_norm": 0.20471790432929993, |
| "learning_rate": 6.670333090488356e-05, |
| "loss": 0.013, |
| "step": 8440 |
| }, |
| { |
| "epoch": 4.7285954113038615, |
| "grad_norm": 0.1850796490907669, |
| "learning_rate": 6.662538425262285e-05, |
| "loss": 0.0112, |
| "step": 8450 |
| }, |
| { |
| "epoch": 4.734191382204813, |
| "grad_norm": 0.2515677213668823, |
| "learning_rate": 6.654739214719641e-05, |
| "loss": 0.0084, |
| "step": 8460 |
| }, |
| { |
| "epoch": 4.739787353105764, |
| "grad_norm": 0.25231802463531494, |
| "learning_rate": 6.646935480183173e-05, |
| "loss": 0.0149, |
| "step": 8470 |
| }, |
| { |
| "epoch": 4.745383324006715, |
| "grad_norm": 0.24691557884216309, |
| "learning_rate": 6.639127242987988e-05, |
| "loss": 0.0144, |
| "step": 8480 |
| }, |
| { |
| "epoch": 4.750979294907666, |
| "grad_norm": 0.3806649446487427, |
| "learning_rate": 6.631314524481513e-05, |
| "loss": 0.0136, |
| "step": 8490 |
| }, |
| { |
| "epoch": 4.756575265808618, |
| "grad_norm": 0.233370840549469, |
| "learning_rate": 6.623497346023418e-05, |
| "loss": 0.0119, |
| "step": 8500 |
| }, |
| { |
| "epoch": 4.762171236709569, |
| "grad_norm": 0.16195163130760193, |
| "learning_rate": 6.615675728985572e-05, |
| "loss": 0.0178, |
| "step": 8510 |
| }, |
| { |
| "epoch": 4.76776720761052, |
| "grad_norm": 0.25800469517707825, |
| "learning_rate": 6.607849694751977e-05, |
| "loss": 0.012, |
| "step": 8520 |
| }, |
| { |
| "epoch": 4.773363178511472, |
| "grad_norm": 0.17752796411514282, |
| "learning_rate": 6.600019264718713e-05, |
| "loss": 0.0084, |
| "step": 8530 |
| }, |
| { |
| "epoch": 4.778959149412423, |
| "grad_norm": 0.2168557047843933, |
| "learning_rate": 6.592184460293877e-05, |
| "loss": 0.0163, |
| "step": 8540 |
| }, |
| { |
| "epoch": 4.7845551203133745, |
| "grad_norm": 0.2908076345920563, |
| "learning_rate": 6.584345302897523e-05, |
| "loss": 0.0091, |
| "step": 8550 |
| }, |
| { |
| "epoch": 4.790151091214326, |
| "grad_norm": 0.16817107796669006, |
| "learning_rate": 6.576501813961609e-05, |
| "loss": 0.012, |
| "step": 8560 |
| }, |
| { |
| "epoch": 4.795747062115277, |
| "grad_norm": 0.17607803642749786, |
| "learning_rate": 6.568654014929932e-05, |
| "loss": 0.0095, |
| "step": 8570 |
| }, |
| { |
| "epoch": 4.801343033016228, |
| "grad_norm": 0.1395525336265564, |
| "learning_rate": 6.56080192725808e-05, |
| "loss": 0.0127, |
| "step": 8580 |
| }, |
| { |
| "epoch": 4.806939003917179, |
| "grad_norm": 0.12721598148345947, |
| "learning_rate": 6.552945572413358e-05, |
| "loss": 0.0127, |
| "step": 8590 |
| }, |
| { |
| "epoch": 4.812534974818131, |
| "grad_norm": 0.220106303691864, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 0.0124, |
| "step": 8600 |
| }, |
| { |
| "epoch": 4.818130945719083, |
| "grad_norm": 0.1850575953722, |
| "learning_rate": 6.537220147132805e-05, |
| "loss": 0.0133, |
| "step": 8610 |
| }, |
| { |
| "epoch": 4.823726916620034, |
| "grad_norm": 0.14641323685646057, |
| "learning_rate": 6.529351119689688e-05, |
| "loss": 0.0083, |
| "step": 8620 |
| }, |
| { |
| "epoch": 4.829322887520985, |
| "grad_norm": 0.2565167546272278, |
| "learning_rate": 6.521477911059008e-05, |
| "loss": 0.0146, |
| "step": 8630 |
| }, |
| { |
| "epoch": 4.834918858421936, |
| "grad_norm": 0.1807018518447876, |
| "learning_rate": 6.513600542765817e-05, |
| "loss": 0.0093, |
| "step": 8640 |
| }, |
| { |
| "epoch": 4.8405148293228875, |
| "grad_norm": 0.22783279418945312, |
| "learning_rate": 6.505719036346539e-05, |
| "loss": 0.0105, |
| "step": 8650 |
| }, |
| { |
| "epoch": 4.846110800223839, |
| "grad_norm": 0.18857407569885254, |
| "learning_rate": 6.497833413348909e-05, |
| "loss": 0.012, |
| "step": 8660 |
| }, |
| { |
| "epoch": 4.85170677112479, |
| "grad_norm": 0.31593799591064453, |
| "learning_rate": 6.489943695331923e-05, |
| "loss": 0.013, |
| "step": 8670 |
| }, |
| { |
| "epoch": 4.857302742025741, |
| "grad_norm": 0.3053518533706665, |
| "learning_rate": 6.48204990386577e-05, |
| "loss": 0.0106, |
| "step": 8680 |
| }, |
| { |
| "epoch": 4.862898712926693, |
| "grad_norm": 0.2662791311740875, |
| "learning_rate": 6.474152060531768e-05, |
| "loss": 0.0151, |
| "step": 8690 |
| }, |
| { |
| "epoch": 4.868494683827644, |
| "grad_norm": 0.13093920052051544, |
| "learning_rate": 6.466250186922325e-05, |
| "loss": 0.0108, |
| "step": 8700 |
| }, |
| { |
| "epoch": 4.874090654728596, |
| "grad_norm": 0.17706599831581116, |
| "learning_rate": 6.458344304640858e-05, |
| "loss": 0.0118, |
| "step": 8710 |
| }, |
| { |
| "epoch": 4.879686625629547, |
| "grad_norm": 0.19158832728862762, |
| "learning_rate": 6.450434435301751e-05, |
| "loss": 0.0116, |
| "step": 8720 |
| }, |
| { |
| "epoch": 4.885282596530498, |
| "grad_norm": 0.12095298618078232, |
| "learning_rate": 6.44252060053028e-05, |
| "loss": 0.0134, |
| "step": 8730 |
| }, |
| { |
| "epoch": 4.890878567431449, |
| "grad_norm": 0.2882150411605835, |
| "learning_rate": 6.43460282196257e-05, |
| "loss": 0.0112, |
| "step": 8740 |
| }, |
| { |
| "epoch": 4.8964745383324, |
| "grad_norm": 0.34821435809135437, |
| "learning_rate": 6.426681121245527e-05, |
| "loss": 0.0111, |
| "step": 8750 |
| }, |
| { |
| "epoch": 4.902070509233352, |
| "grad_norm": 0.28680020570755005, |
| "learning_rate": 6.418755520036775e-05, |
| "loss": 0.011, |
| "step": 8760 |
| }, |
| { |
| "epoch": 4.907666480134303, |
| "grad_norm": 0.15372464060783386, |
| "learning_rate": 6.410826040004607e-05, |
| "loss": 0.0138, |
| "step": 8770 |
| }, |
| { |
| "epoch": 4.913262451035255, |
| "grad_norm": 0.24093207716941833, |
| "learning_rate": 6.402892702827916e-05, |
| "loss": 0.0152, |
| "step": 8780 |
| }, |
| { |
| "epoch": 4.918858421936206, |
| "grad_norm": 0.3779686689376831, |
| "learning_rate": 6.394955530196147e-05, |
| "loss": 0.0173, |
| "step": 8790 |
| }, |
| { |
| "epoch": 4.924454392837157, |
| "grad_norm": 0.19445843994617462, |
| "learning_rate": 6.387014543809223e-05, |
| "loss": 0.0142, |
| "step": 8800 |
| }, |
| { |
| "epoch": 4.930050363738109, |
| "grad_norm": 0.32286763191223145, |
| "learning_rate": 6.3790697653775e-05, |
| "loss": 0.0217, |
| "step": 8810 |
| }, |
| { |
| "epoch": 4.93564633463906, |
| "grad_norm": 0.27731436491012573, |
| "learning_rate": 6.371121216621698e-05, |
| "loss": 0.0103, |
| "step": 8820 |
| }, |
| { |
| "epoch": 4.941242305540011, |
| "grad_norm": 0.2174469232559204, |
| "learning_rate": 6.363168919272846e-05, |
| "loss": 0.0112, |
| "step": 8830 |
| }, |
| { |
| "epoch": 4.946838276440962, |
| "grad_norm": 0.20424802601337433, |
| "learning_rate": 6.355212895072223e-05, |
| "loss": 0.0179, |
| "step": 8840 |
| }, |
| { |
| "epoch": 4.952434247341914, |
| "grad_norm": 0.14288559556007385, |
| "learning_rate": 6.34725316577129e-05, |
| "loss": 0.0116, |
| "step": 8850 |
| }, |
| { |
| "epoch": 4.9580302182428655, |
| "grad_norm": 0.21734347939491272, |
| "learning_rate": 6.339289753131649e-05, |
| "loss": 0.012, |
| "step": 8860 |
| }, |
| { |
| "epoch": 4.963626189143817, |
| "grad_norm": 0.29445502161979675, |
| "learning_rate": 6.331322678924962e-05, |
| "loss": 0.0116, |
| "step": 8870 |
| }, |
| { |
| "epoch": 4.969222160044768, |
| "grad_norm": 0.2319229543209076, |
| "learning_rate": 6.323351964932908e-05, |
| "loss": 0.0194, |
| "step": 8880 |
| }, |
| { |
| "epoch": 4.974818130945719, |
| "grad_norm": 0.13166509568691254, |
| "learning_rate": 6.315377632947115e-05, |
| "loss": 0.0127, |
| "step": 8890 |
| }, |
| { |
| "epoch": 4.98041410184667, |
| "grad_norm": 0.2546875774860382, |
| "learning_rate": 6.307399704769099e-05, |
| "loss": 0.0115, |
| "step": 8900 |
| }, |
| { |
| "epoch": 4.9860100727476215, |
| "grad_norm": 0.2343253493309021, |
| "learning_rate": 6.299418202210214e-05, |
| "loss": 0.0123, |
| "step": 8910 |
| }, |
| { |
| "epoch": 4.991606043648573, |
| "grad_norm": 0.12813247740268707, |
| "learning_rate": 6.291433147091583e-05, |
| "loss": 0.0121, |
| "step": 8920 |
| }, |
| { |
| "epoch": 4.997202014549524, |
| "grad_norm": 0.11860624700784683, |
| "learning_rate": 6.283444561244042e-05, |
| "loss": 0.0125, |
| "step": 8930 |
| }, |
| { |
| "epoch": 5.002797985450476, |
| "grad_norm": 0.1995118260383606, |
| "learning_rate": 6.275452466508077e-05, |
| "loss": 0.0112, |
| "step": 8940 |
| }, |
| { |
| "epoch": 5.008393956351427, |
| "grad_norm": 0.2113560289144516, |
| "learning_rate": 6.26745688473377e-05, |
| "loss": 0.0118, |
| "step": 8950 |
| }, |
| { |
| "epoch": 5.0139899272523785, |
| "grad_norm": 0.321319580078125, |
| "learning_rate": 6.259457837780742e-05, |
| "loss": 0.0145, |
| "step": 8960 |
| }, |
| { |
| "epoch": 5.01958589815333, |
| "grad_norm": 0.15436704456806183, |
| "learning_rate": 6.251455347518073e-05, |
| "loss": 0.011, |
| "step": 8970 |
| }, |
| { |
| "epoch": 5.025181869054281, |
| "grad_norm": 0.2929522693157196, |
| "learning_rate": 6.243449435824276e-05, |
| "loss": 0.0145, |
| "step": 8980 |
| }, |
| { |
| "epoch": 5.030777839955232, |
| "grad_norm": 0.2311781346797943, |
| "learning_rate": 6.235440124587198e-05, |
| "loss": 0.0121, |
| "step": 8990 |
| }, |
| { |
| "epoch": 5.036373810856183, |
| "grad_norm": 0.16461458802223206, |
| "learning_rate": 6.227427435703997e-05, |
| "loss": 0.016, |
| "step": 9000 |
| }, |
| { |
| "epoch": 5.0419697817571345, |
| "grad_norm": 0.23925089836120605, |
| "learning_rate": 6.219411391081055e-05, |
| "loss": 0.0125, |
| "step": 9010 |
| }, |
| { |
| "epoch": 5.047565752658087, |
| "grad_norm": 0.3376557230949402, |
| "learning_rate": 6.211392012633932e-05, |
| "loss": 0.0147, |
| "step": 9020 |
| }, |
| { |
| "epoch": 5.053161723559038, |
| "grad_norm": 0.20988136529922485, |
| "learning_rate": 6.203369322287306e-05, |
| "loss": 0.0139, |
| "step": 9030 |
| }, |
| { |
| "epoch": 5.058757694459989, |
| "grad_norm": 0.17247657477855682, |
| "learning_rate": 6.195343341974899e-05, |
| "loss": 0.0133, |
| "step": 9040 |
| }, |
| { |
| "epoch": 5.06435366536094, |
| "grad_norm": 0.24936120212078094, |
| "learning_rate": 6.187314093639444e-05, |
| "loss": 0.0112, |
| "step": 9050 |
| }, |
| { |
| "epoch": 5.069949636261891, |
| "grad_norm": 0.1587497889995575, |
| "learning_rate": 6.179281599232591e-05, |
| "loss": 0.0127, |
| "step": 9060 |
| }, |
| { |
| "epoch": 5.075545607162843, |
| "grad_norm": 0.12296043336391449, |
| "learning_rate": 6.17124588071488e-05, |
| "loss": 0.0132, |
| "step": 9070 |
| }, |
| { |
| "epoch": 5.081141578063794, |
| "grad_norm": 0.2310076504945755, |
| "learning_rate": 6.163206960055651e-05, |
| "loss": 0.013, |
| "step": 9080 |
| }, |
| { |
| "epoch": 5.086737548964745, |
| "grad_norm": 0.1278199851512909, |
| "learning_rate": 6.155164859233012e-05, |
| "loss": 0.0127, |
| "step": 9090 |
| }, |
| { |
| "epoch": 5.092333519865696, |
| "grad_norm": 0.225848987698555, |
| "learning_rate": 6.147119600233758e-05, |
| "loss": 0.0125, |
| "step": 9100 |
| }, |
| { |
| "epoch": 5.097929490766648, |
| "grad_norm": 0.12778952717781067, |
| "learning_rate": 6.13907120505332e-05, |
| "loss": 0.0102, |
| "step": 9110 |
| }, |
| { |
| "epoch": 5.1035254616676, |
| "grad_norm": 0.2868061065673828, |
| "learning_rate": 6.131019695695702e-05, |
| "loss": 0.0102, |
| "step": 9120 |
| }, |
| { |
| "epoch": 5.109121432568551, |
| "grad_norm": 0.35349947214126587, |
| "learning_rate": 6.122965094173424e-05, |
| "loss": 0.0151, |
| "step": 9130 |
| }, |
| { |
| "epoch": 5.114717403469502, |
| "grad_norm": 0.24252165853977203, |
| "learning_rate": 6.11490742250746e-05, |
| "loss": 0.0111, |
| "step": 9140 |
| }, |
| { |
| "epoch": 5.120313374370453, |
| "grad_norm": 0.17868760228157043, |
| "learning_rate": 6.106846702727172e-05, |
| "loss": 0.0102, |
| "step": 9150 |
| }, |
| { |
| "epoch": 5.125909345271404, |
| "grad_norm": 0.21379156410694122, |
| "learning_rate": 6.0987829568702656e-05, |
| "loss": 0.0137, |
| "step": 9160 |
| }, |
| { |
| "epoch": 5.131505316172356, |
| "grad_norm": 0.29363685846328735, |
| "learning_rate": 6.090716206982714e-05, |
| "loss": 0.0131, |
| "step": 9170 |
| }, |
| { |
| "epoch": 5.137101287073307, |
| "grad_norm": 0.330162912607193, |
| "learning_rate": 6.0826464751186994e-05, |
| "loss": 0.0129, |
| "step": 9180 |
| }, |
| { |
| "epoch": 5.142697257974259, |
| "grad_norm": 0.2052110731601715, |
| "learning_rate": 6.074573783340562e-05, |
| "loss": 0.0108, |
| "step": 9190 |
| }, |
| { |
| "epoch": 5.14829322887521, |
| "grad_norm": 0.17011559009552002, |
| "learning_rate": 6.066498153718735e-05, |
| "loss": 0.0125, |
| "step": 9200 |
| }, |
| { |
| "epoch": 5.153889199776161, |
| "grad_norm": 0.3137349486351013, |
| "learning_rate": 6.0584196083316794e-05, |
| "loss": 0.0192, |
| "step": 9210 |
| }, |
| { |
| "epoch": 5.1594851706771125, |
| "grad_norm": 0.3046635389328003, |
| "learning_rate": 6.05033816926583e-05, |
| "loss": 0.0119, |
| "step": 9220 |
| }, |
| { |
| "epoch": 5.165081141578064, |
| "grad_norm": 0.1919318437576294, |
| "learning_rate": 6.042253858615532e-05, |
| "loss": 0.0139, |
| "step": 9230 |
| }, |
| { |
| "epoch": 5.170677112479015, |
| "grad_norm": 0.3815397322177887, |
| "learning_rate": 6.034166698482984e-05, |
| "loss": 0.0176, |
| "step": 9240 |
| }, |
| { |
| "epoch": 5.176273083379966, |
| "grad_norm": 0.23484662175178528, |
| "learning_rate": 6.026076710978171e-05, |
| "loss": 0.0137, |
| "step": 9250 |
| }, |
| { |
| "epoch": 5.181869054280917, |
| "grad_norm": 0.1737549602985382, |
| "learning_rate": 6.017983918218812e-05, |
| "loss": 0.0112, |
| "step": 9260 |
| }, |
| { |
| "epoch": 5.1874650251818695, |
| "grad_norm": 0.28736233711242676, |
| "learning_rate": 6.009888342330292e-05, |
| "loss": 0.0112, |
| "step": 9270 |
| }, |
| { |
| "epoch": 5.193060996082821, |
| "grad_norm": 0.21343185007572174, |
| "learning_rate": 6.001790005445607e-05, |
| "loss": 0.0089, |
| "step": 9280 |
| }, |
| { |
| "epoch": 5.198656966983772, |
| "grad_norm": 0.15162508189678192, |
| "learning_rate": 5.9936889297052986e-05, |
| "loss": 0.0156, |
| "step": 9290 |
| }, |
| { |
| "epoch": 5.204252937884723, |
| "grad_norm": 0.2816758155822754, |
| "learning_rate": 5.985585137257401e-05, |
| "loss": 0.0093, |
| "step": 9300 |
| }, |
| { |
| "epoch": 5.209848908785674, |
| "grad_norm": 0.1730954796075821, |
| "learning_rate": 5.977478650257374e-05, |
| "loss": 0.016, |
| "step": 9310 |
| }, |
| { |
| "epoch": 5.2154448796866255, |
| "grad_norm": 0.18365302681922913, |
| "learning_rate": 5.969369490868042e-05, |
| "loss": 0.0259, |
| "step": 9320 |
| }, |
| { |
| "epoch": 5.221040850587577, |
| "grad_norm": 0.12864327430725098, |
| "learning_rate": 5.961257681259535e-05, |
| "loss": 0.0119, |
| "step": 9330 |
| }, |
| { |
| "epoch": 5.226636821488528, |
| "grad_norm": 0.16363385319709778, |
| "learning_rate": 5.953143243609235e-05, |
| "loss": 0.0129, |
| "step": 9340 |
| }, |
| { |
| "epoch": 5.23223279238948, |
| "grad_norm": 0.15773551166057587, |
| "learning_rate": 5.945026200101702e-05, |
| "loss": 0.0083, |
| "step": 9350 |
| }, |
| { |
| "epoch": 5.237828763290431, |
| "grad_norm": 0.22605851292610168, |
| "learning_rate": 5.9369065729286245e-05, |
| "loss": 0.0096, |
| "step": 9360 |
| }, |
| { |
| "epoch": 5.243424734191382, |
| "grad_norm": 0.13637419044971466, |
| "learning_rate": 5.92878438428875e-05, |
| "loss": 0.0185, |
| "step": 9370 |
| }, |
| { |
| "epoch": 5.249020705092334, |
| "grad_norm": 0.12795643508434296, |
| "learning_rate": 5.9206596563878357e-05, |
| "loss": 0.008, |
| "step": 9380 |
| }, |
| { |
| "epoch": 5.254616675993285, |
| "grad_norm": 0.2635105550289154, |
| "learning_rate": 5.912532411438576e-05, |
| "loss": 0.0162, |
| "step": 9390 |
| }, |
| { |
| "epoch": 5.260212646894236, |
| "grad_norm": 0.18397080898284912, |
| "learning_rate": 5.90440267166055e-05, |
| "loss": 0.013, |
| "step": 9400 |
| }, |
| { |
| "epoch": 5.265808617795187, |
| "grad_norm": 0.23337115347385406, |
| "learning_rate": 5.896270459280153e-05, |
| "loss": 0.0105, |
| "step": 9410 |
| }, |
| { |
| "epoch": 5.2714045886961385, |
| "grad_norm": 0.24963605403900146, |
| "learning_rate": 5.888135796530544e-05, |
| "loss": 0.0098, |
| "step": 9420 |
| }, |
| { |
| "epoch": 5.27700055959709, |
| "grad_norm": 0.372761070728302, |
| "learning_rate": 5.8799987056515804e-05, |
| "loss": 0.0125, |
| "step": 9430 |
| }, |
| { |
| "epoch": 5.282596530498042, |
| "grad_norm": 0.2931661009788513, |
| "learning_rate": 5.871859208889759e-05, |
| "loss": 0.012, |
| "step": 9440 |
| }, |
| { |
| "epoch": 5.288192501398993, |
| "grad_norm": 0.2341478168964386, |
| "learning_rate": 5.8637173284981526e-05, |
| "loss": 0.0113, |
| "step": 9450 |
| }, |
| { |
| "epoch": 5.293788472299944, |
| "grad_norm": 0.2445063441991806, |
| "learning_rate": 5.85557308673635e-05, |
| "loss": 0.0157, |
| "step": 9460 |
| }, |
| { |
| "epoch": 5.299384443200895, |
| "grad_norm": 0.22766774892807007, |
| "learning_rate": 5.847426505870399e-05, |
| "loss": 0.011, |
| "step": 9470 |
| }, |
| { |
| "epoch": 5.304980414101847, |
| "grad_norm": 0.25397437810897827, |
| "learning_rate": 5.8392776081727385e-05, |
| "loss": 0.0088, |
| "step": 9480 |
| }, |
| { |
| "epoch": 5.310576385002798, |
| "grad_norm": 0.2036605179309845, |
| "learning_rate": 5.831126415922148e-05, |
| "loss": 0.0138, |
| "step": 9490 |
| }, |
| { |
| "epoch": 5.316172355903749, |
| "grad_norm": 0.17595243453979492, |
| "learning_rate": 5.8229729514036705e-05, |
| "loss": 0.0102, |
| "step": 9500 |
| }, |
| { |
| "epoch": 5.3217683268047, |
| "grad_norm": 0.14046894013881683, |
| "learning_rate": 5.8148172369085686e-05, |
| "loss": 0.0148, |
| "step": 9510 |
| }, |
| { |
| "epoch": 5.327364297705652, |
| "grad_norm": 0.2699585556983948, |
| "learning_rate": 5.8066592947342555e-05, |
| "loss": 0.0107, |
| "step": 9520 |
| }, |
| { |
| "epoch": 5.3329602686066035, |
| "grad_norm": 0.15614166855812073, |
| "learning_rate": 5.798499147184233e-05, |
| "loss": 0.0118, |
| "step": 9530 |
| }, |
| { |
| "epoch": 5.338556239507555, |
| "grad_norm": 0.3686412572860718, |
| "learning_rate": 5.7903368165680327e-05, |
| "loss": 0.0122, |
| "step": 9540 |
| }, |
| { |
| "epoch": 5.344152210408506, |
| "grad_norm": 0.2578679323196411, |
| "learning_rate": 5.782172325201155e-05, |
| "loss": 0.0152, |
| "step": 9550 |
| }, |
| { |
| "epoch": 5.349748181309457, |
| "grad_norm": 0.24605675041675568, |
| "learning_rate": 5.7740056954050084e-05, |
| "loss": 0.0106, |
| "step": 9560 |
| }, |
| { |
| "epoch": 5.355344152210408, |
| "grad_norm": 0.19138172268867493, |
| "learning_rate": 5.765836949506843e-05, |
| "loss": 0.0134, |
| "step": 9570 |
| }, |
| { |
| "epoch": 5.36094012311136, |
| "grad_norm": 0.23657287657260895, |
| "learning_rate": 5.757666109839702e-05, |
| "loss": 0.0076, |
| "step": 9580 |
| }, |
| { |
| "epoch": 5.366536094012311, |
| "grad_norm": 0.13402613997459412, |
| "learning_rate": 5.74949319874235e-05, |
| "loss": 0.0092, |
| "step": 9590 |
| }, |
| { |
| "epoch": 5.372132064913263, |
| "grad_norm": 0.16487988829612732, |
| "learning_rate": 5.74131823855921e-05, |
| "loss": 0.0165, |
| "step": 9600 |
| }, |
| { |
| "epoch": 5.377728035814214, |
| "grad_norm": 0.1842515617609024, |
| "learning_rate": 5.733141251640315e-05, |
| "loss": 0.0101, |
| "step": 9610 |
| }, |
| { |
| "epoch": 5.383324006715165, |
| "grad_norm": 0.17961528897285461, |
| "learning_rate": 5.72496226034123e-05, |
| "loss": 0.012, |
| "step": 9620 |
| }, |
| { |
| "epoch": 5.3889199776161165, |
| "grad_norm": 0.2516380548477173, |
| "learning_rate": 5.7167812870230094e-05, |
| "loss": 0.011, |
| "step": 9630 |
| }, |
| { |
| "epoch": 5.394515948517068, |
| "grad_norm": 0.1506935954093933, |
| "learning_rate": 5.7085983540521216e-05, |
| "loss": 0.0075, |
| "step": 9640 |
| }, |
| { |
| "epoch": 5.400111919418019, |
| "grad_norm": 0.3415573835372925, |
| "learning_rate": 5.70041348380039e-05, |
| "loss": 0.0142, |
| "step": 9650 |
| }, |
| { |
| "epoch": 5.40570789031897, |
| "grad_norm": 0.2501567006111145, |
| "learning_rate": 5.692226698644938e-05, |
| "loss": 0.0126, |
| "step": 9660 |
| }, |
| { |
| "epoch": 5.411303861219921, |
| "grad_norm": 0.15769636631011963, |
| "learning_rate": 5.6840380209681255e-05, |
| "loss": 0.0206, |
| "step": 9670 |
| }, |
| { |
| "epoch": 5.416899832120873, |
| "grad_norm": 0.17793142795562744, |
| "learning_rate": 5.675847473157485e-05, |
| "loss": 0.0198, |
| "step": 9680 |
| }, |
| { |
| "epoch": 5.422495803021825, |
| "grad_norm": 0.19135138392448425, |
| "learning_rate": 5.667655077605659e-05, |
| "loss": 0.0089, |
| "step": 9690 |
| }, |
| { |
| "epoch": 5.428091773922776, |
| "grad_norm": 0.1910410374403, |
| "learning_rate": 5.6594608567103456e-05, |
| "loss": 0.0178, |
| "step": 9700 |
| }, |
| { |
| "epoch": 5.433687744823727, |
| "grad_norm": 0.18896977603435516, |
| "learning_rate": 5.65126483287423e-05, |
| "loss": 0.0102, |
| "step": 9710 |
| }, |
| { |
| "epoch": 5.439283715724678, |
| "grad_norm": 0.12857311964035034, |
| "learning_rate": 5.6430670285049314e-05, |
| "loss": 0.0147, |
| "step": 9720 |
| }, |
| { |
| "epoch": 5.4448796866256295, |
| "grad_norm": 0.20521825551986694, |
| "learning_rate": 5.634867466014932e-05, |
| "loss": 0.0101, |
| "step": 9730 |
| }, |
| { |
| "epoch": 5.450475657526581, |
| "grad_norm": 0.16037105023860931, |
| "learning_rate": 5.6266661678215216e-05, |
| "loss": 0.0114, |
| "step": 9740 |
| }, |
| { |
| "epoch": 5.456071628427532, |
| "grad_norm": 0.15576882660388947, |
| "learning_rate": 5.618463156346739e-05, |
| "loss": 0.0138, |
| "step": 9750 |
| }, |
| { |
| "epoch": 5.461667599328483, |
| "grad_norm": 0.24249835312366486, |
| "learning_rate": 5.6102584540173006e-05, |
| "loss": 0.0131, |
| "step": 9760 |
| }, |
| { |
| "epoch": 5.467263570229435, |
| "grad_norm": 0.27811625599861145, |
| "learning_rate": 5.602052083264555e-05, |
| "loss": 0.0098, |
| "step": 9770 |
| }, |
| { |
| "epoch": 5.472859541130386, |
| "grad_norm": 0.3673328459262848, |
| "learning_rate": 5.5938440665244006e-05, |
| "loss": 0.0131, |
| "step": 9780 |
| }, |
| { |
| "epoch": 5.478455512031338, |
| "grad_norm": 0.2886298596858978, |
| "learning_rate": 5.585634426237246e-05, |
| "loss": 0.0141, |
| "step": 9790 |
| }, |
| { |
| "epoch": 5.484051482932289, |
| "grad_norm": 0.2564665973186493, |
| "learning_rate": 5.577423184847932e-05, |
| "loss": 0.0104, |
| "step": 9800 |
| }, |
| { |
| "epoch": 5.48964745383324, |
| "grad_norm": 0.22507299482822418, |
| "learning_rate": 5.569210364805677e-05, |
| "loss": 0.0116, |
| "step": 9810 |
| }, |
| { |
| "epoch": 5.495243424734191, |
| "grad_norm": 0.09582646191120148, |
| "learning_rate": 5.560995988564023e-05, |
| "loss": 0.0107, |
| "step": 9820 |
| }, |
| { |
| "epoch": 5.5008393956351425, |
| "grad_norm": 0.25511208176612854, |
| "learning_rate": 5.552780078580756e-05, |
| "loss": 0.0111, |
| "step": 9830 |
| }, |
| { |
| "epoch": 5.506435366536094, |
| "grad_norm": 0.14793109893798828, |
| "learning_rate": 5.544562657317863e-05, |
| "loss": 0.0088, |
| "step": 9840 |
| }, |
| { |
| "epoch": 5.512031337437046, |
| "grad_norm": 0.3215508759021759, |
| "learning_rate": 5.5363437472414595e-05, |
| "loss": 0.0132, |
| "step": 9850 |
| }, |
| { |
| "epoch": 5.517627308337997, |
| "grad_norm": 0.357731431722641, |
| "learning_rate": 5.52812337082173e-05, |
| "loss": 0.0119, |
| "step": 9860 |
| }, |
| { |
| "epoch": 5.523223279238948, |
| "grad_norm": 0.2520214915275574, |
| "learning_rate": 5.519901550532871e-05, |
| "loss": 0.0121, |
| "step": 9870 |
| }, |
| { |
| "epoch": 5.528819250139899, |
| "grad_norm": 0.28353017568588257, |
| "learning_rate": 5.511678308853026e-05, |
| "loss": 0.0077, |
| "step": 9880 |
| }, |
| { |
| "epoch": 5.534415221040851, |
| "grad_norm": 0.34384286403656006, |
| "learning_rate": 5.5034536682642224e-05, |
| "loss": 0.0125, |
| "step": 9890 |
| }, |
| { |
| "epoch": 5.540011191941802, |
| "grad_norm": 0.21323193609714508, |
| "learning_rate": 5.495227651252315e-05, |
| "loss": 0.0121, |
| "step": 9900 |
| }, |
| { |
| "epoch": 5.545607162842753, |
| "grad_norm": 0.3126833736896515, |
| "learning_rate": 5.487000280306917e-05, |
| "loss": 0.0125, |
| "step": 9910 |
| }, |
| { |
| "epoch": 5.551203133743704, |
| "grad_norm": 0.29106199741363525, |
| "learning_rate": 5.478771577921351e-05, |
| "loss": 0.0098, |
| "step": 9920 |
| }, |
| { |
| "epoch": 5.556799104644655, |
| "grad_norm": 0.2740892469882965, |
| "learning_rate": 5.470541566592573e-05, |
| "loss": 0.0135, |
| "step": 9930 |
| }, |
| { |
| "epoch": 5.5623950755456075, |
| "grad_norm": 0.19003938138484955, |
| "learning_rate": 5.462310268821118e-05, |
| "loss": 0.0146, |
| "step": 9940 |
| }, |
| { |
| "epoch": 5.567991046446559, |
| "grad_norm": 0.2251635491847992, |
| "learning_rate": 5.454077707111042e-05, |
| "loss": 0.0153, |
| "step": 9950 |
| }, |
| { |
| "epoch": 5.57358701734751, |
| "grad_norm": 0.16961322724819183, |
| "learning_rate": 5.445843903969854e-05, |
| "loss": 0.0154, |
| "step": 9960 |
| }, |
| { |
| "epoch": 5.579182988248461, |
| "grad_norm": 0.2752644419670105, |
| "learning_rate": 5.4376088819084556e-05, |
| "loss": 0.0102, |
| "step": 9970 |
| }, |
| { |
| "epoch": 5.584778959149412, |
| "grad_norm": 0.24675792455673218, |
| "learning_rate": 5.4293726634410855e-05, |
| "loss": 0.0123, |
| "step": 9980 |
| }, |
| { |
| "epoch": 5.590374930050364, |
| "grad_norm": 0.2074369490146637, |
| "learning_rate": 5.4211352710852495e-05, |
| "loss": 0.0095, |
| "step": 9990 |
| }, |
| { |
| "epoch": 5.595970900951315, |
| "grad_norm": 0.22929449379444122, |
| "learning_rate": 5.4128967273616625e-05, |
| "loss": 0.0123, |
| "step": 10000 |
| }, |
| { |
| "epoch": 5.601566871852266, |
| "grad_norm": 0.21107512712478638, |
| "learning_rate": 5.404657054794189e-05, |
| "loss": 0.01, |
| "step": 10010 |
| }, |
| { |
| "epoch": 5.607162842753217, |
| "grad_norm": 0.3743564188480377, |
| "learning_rate": 5.396416275909779e-05, |
| "loss": 0.0173, |
| "step": 10020 |
| }, |
| { |
| "epoch": 5.612758813654169, |
| "grad_norm": 0.19637951254844666, |
| "learning_rate": 5.3881744132384104e-05, |
| "loss": 0.0114, |
| "step": 10030 |
| }, |
| { |
| "epoch": 5.6183547845551205, |
| "grad_norm": 0.2417994886636734, |
| "learning_rate": 5.379931489313016e-05, |
| "loss": 0.0117, |
| "step": 10040 |
| }, |
| { |
| "epoch": 5.623950755456072, |
| "grad_norm": 0.18541017174720764, |
| "learning_rate": 5.371687526669439e-05, |
| "loss": 0.0139, |
| "step": 10050 |
| }, |
| { |
| "epoch": 5.629546726357023, |
| "grad_norm": 0.26478803157806396, |
| "learning_rate": 5.363442547846356e-05, |
| "loss": 0.0108, |
| "step": 10060 |
| }, |
| { |
| "epoch": 5.635142697257974, |
| "grad_norm": 0.23468017578125, |
| "learning_rate": 5.355196575385225e-05, |
| "loss": 0.0107, |
| "step": 10070 |
| }, |
| { |
| "epoch": 5.640738668158925, |
| "grad_norm": 0.2251582145690918, |
| "learning_rate": 5.3469496318302204e-05, |
| "loss": 0.0105, |
| "step": 10080 |
| }, |
| { |
| "epoch": 5.6463346390598765, |
| "grad_norm": 0.18580631911754608, |
| "learning_rate": 5.3387017397281704e-05, |
| "loss": 0.0107, |
| "step": 10090 |
| }, |
| { |
| "epoch": 5.651930609960829, |
| "grad_norm": 0.14670825004577637, |
| "learning_rate": 5.330452921628497e-05, |
| "loss": 0.0103, |
| "step": 10100 |
| }, |
| { |
| "epoch": 5.65752658086178, |
| "grad_norm": 0.22916555404663086, |
| "learning_rate": 5.322203200083154e-05, |
| "loss": 0.0113, |
| "step": 10110 |
| }, |
| { |
| "epoch": 5.663122551762731, |
| "grad_norm": 0.1360463947057724, |
| "learning_rate": 5.313952597646568e-05, |
| "loss": 0.0121, |
| "step": 10120 |
| }, |
| { |
| "epoch": 5.668718522663682, |
| "grad_norm": 0.24525059759616852, |
| "learning_rate": 5.305701136875566e-05, |
| "loss": 0.0092, |
| "step": 10130 |
| }, |
| { |
| "epoch": 5.6743144935646335, |
| "grad_norm": 0.1451522707939148, |
| "learning_rate": 5.297448840329329e-05, |
| "loss": 0.0081, |
| "step": 10140 |
| }, |
| { |
| "epoch": 5.679910464465585, |
| "grad_norm": 0.1923244744539261, |
| "learning_rate": 5.2891957305693205e-05, |
| "loss": 0.0117, |
| "step": 10150 |
| }, |
| { |
| "epoch": 5.685506435366536, |
| "grad_norm": 0.18804806470870972, |
| "learning_rate": 5.280941830159227e-05, |
| "loss": 0.0095, |
| "step": 10160 |
| }, |
| { |
| "epoch": 5.691102406267487, |
| "grad_norm": 0.1880972534418106, |
| "learning_rate": 5.2726871616649e-05, |
| "loss": 0.0111, |
| "step": 10170 |
| }, |
| { |
| "epoch": 5.696698377168438, |
| "grad_norm": 0.18024373054504395, |
| "learning_rate": 5.264431747654284e-05, |
| "loss": 0.0119, |
| "step": 10180 |
| }, |
| { |
| "epoch": 5.70229434806939, |
| "grad_norm": 0.16494502127170563, |
| "learning_rate": 5.2561756106973656e-05, |
| "loss": 0.0131, |
| "step": 10190 |
| }, |
| { |
| "epoch": 5.707890318970342, |
| "grad_norm": 0.2051820605993271, |
| "learning_rate": 5.247918773366112e-05, |
| "loss": 0.0136, |
| "step": 10200 |
| }, |
| { |
| "epoch": 5.713486289871293, |
| "grad_norm": 0.21385324001312256, |
| "learning_rate": 5.2396612582343986e-05, |
| "loss": 0.0101, |
| "step": 10210 |
| }, |
| { |
| "epoch": 5.719082260772244, |
| "grad_norm": 0.2170487344264984, |
| "learning_rate": 5.231403087877955e-05, |
| "loss": 0.0107, |
| "step": 10220 |
| }, |
| { |
| "epoch": 5.724678231673195, |
| "grad_norm": 0.23433655500411987, |
| "learning_rate": 5.2231442848743064e-05, |
| "loss": 0.0139, |
| "step": 10230 |
| }, |
| { |
| "epoch": 5.730274202574146, |
| "grad_norm": 0.2549709379673004, |
| "learning_rate": 5.214884871802703e-05, |
| "loss": 0.0178, |
| "step": 10240 |
| }, |
| { |
| "epoch": 5.735870173475098, |
| "grad_norm": 0.11975869536399841, |
| "learning_rate": 5.2066248712440656e-05, |
| "loss": 0.0101, |
| "step": 10250 |
| }, |
| { |
| "epoch": 5.74146614437605, |
| "grad_norm": 0.39216071367263794, |
| "learning_rate": 5.198364305780922e-05, |
| "loss": 0.0131, |
| "step": 10260 |
| }, |
| { |
| "epoch": 5.747062115277, |
| "grad_norm": 0.2390432357788086, |
| "learning_rate": 5.1901031979973394e-05, |
| "loss": 0.0097, |
| "step": 10270 |
| }, |
| { |
| "epoch": 5.752658086177952, |
| "grad_norm": 0.1686331033706665, |
| "learning_rate": 5.1818415704788725e-05, |
| "loss": 0.0104, |
| "step": 10280 |
| }, |
| { |
| "epoch": 5.758254057078903, |
| "grad_norm": 0.28812578320503235, |
| "learning_rate": 5.1735794458124956e-05, |
| "loss": 0.01, |
| "step": 10290 |
| }, |
| { |
| "epoch": 5.763850027979855, |
| "grad_norm": 0.4722854197025299, |
| "learning_rate": 5.165316846586541e-05, |
| "loss": 0.0125, |
| "step": 10300 |
| }, |
| { |
| "epoch": 5.769445998880806, |
| "grad_norm": 0.19151827692985535, |
| "learning_rate": 5.157053795390642e-05, |
| "loss": 0.0134, |
| "step": 10310 |
| }, |
| { |
| "epoch": 5.775041969781757, |
| "grad_norm": 0.2533670961856842, |
| "learning_rate": 5.148790314815663e-05, |
| "loss": 0.011, |
| "step": 10320 |
| }, |
| { |
| "epoch": 5.780637940682708, |
| "grad_norm": 0.1756027489900589, |
| "learning_rate": 5.1405264274536445e-05, |
| "loss": 0.0092, |
| "step": 10330 |
| }, |
| { |
| "epoch": 5.786233911583659, |
| "grad_norm": 0.2753913700580597, |
| "learning_rate": 5.132262155897739e-05, |
| "loss": 0.0118, |
| "step": 10340 |
| }, |
| { |
| "epoch": 5.7918298824846115, |
| "grad_norm": 0.17530974745750427, |
| "learning_rate": 5.123997522742151e-05, |
| "loss": 0.0092, |
| "step": 10350 |
| }, |
| { |
| "epoch": 5.797425853385563, |
| "grad_norm": 0.3250185251235962, |
| "learning_rate": 5.1157325505820694e-05, |
| "loss": 0.0135, |
| "step": 10360 |
| }, |
| { |
| "epoch": 5.803021824286514, |
| "grad_norm": 0.2266574651002884, |
| "learning_rate": 5.107467262013614e-05, |
| "loss": 0.0174, |
| "step": 10370 |
| }, |
| { |
| "epoch": 5.808617795187465, |
| "grad_norm": 0.15442338585853577, |
| "learning_rate": 5.0992016796337686e-05, |
| "loss": 0.0112, |
| "step": 10380 |
| }, |
| { |
| "epoch": 5.814213766088416, |
| "grad_norm": 0.16227369010448456, |
| "learning_rate": 5.0909358260403186e-05, |
| "loss": 0.0141, |
| "step": 10390 |
| }, |
| { |
| "epoch": 5.8198097369893675, |
| "grad_norm": 0.288241982460022, |
| "learning_rate": 5.0826697238317935e-05, |
| "loss": 0.0142, |
| "step": 10400 |
| }, |
| { |
| "epoch": 5.825405707890319, |
| "grad_norm": 0.17878948152065277, |
| "learning_rate": 5.074403395607399e-05, |
| "loss": 0.0115, |
| "step": 10410 |
| }, |
| { |
| "epoch": 5.83100167879127, |
| "grad_norm": 0.2224341630935669, |
| "learning_rate": 5.066136863966963e-05, |
| "loss": 0.0106, |
| "step": 10420 |
| }, |
| { |
| "epoch": 5.836597649692221, |
| "grad_norm": 0.1762062907218933, |
| "learning_rate": 5.057870151510864e-05, |
| "loss": 0.0115, |
| "step": 10430 |
| }, |
| { |
| "epoch": 5.842193620593173, |
| "grad_norm": 0.15165816247463226, |
| "learning_rate": 5.0496032808399815e-05, |
| "loss": 0.0116, |
| "step": 10440 |
| }, |
| { |
| "epoch": 5.8477895914941245, |
| "grad_norm": 0.23350821435451508, |
| "learning_rate": 5.041336274555625e-05, |
| "loss": 0.0124, |
| "step": 10450 |
| }, |
| { |
| "epoch": 5.853385562395076, |
| "grad_norm": 0.3131781816482544, |
| "learning_rate": 5.033069155259471e-05, |
| "loss": 0.0136, |
| "step": 10460 |
| }, |
| { |
| "epoch": 5.858981533296027, |
| "grad_norm": 0.25165101885795593, |
| "learning_rate": 5.02480194555351e-05, |
| "loss": 0.0081, |
| "step": 10470 |
| }, |
| { |
| "epoch": 5.864577504196978, |
| "grad_norm": 0.17109723389148712, |
| "learning_rate": 5.016534668039976e-05, |
| "loss": 0.0104, |
| "step": 10480 |
| }, |
| { |
| "epoch": 5.870173475097929, |
| "grad_norm": 0.14172928035259247, |
| "learning_rate": 5.0082673453212914e-05, |
| "loss": 0.0096, |
| "step": 10490 |
| }, |
| { |
| "epoch": 5.8757694459988805, |
| "grad_norm": 0.15533624589443207, |
| "learning_rate": 5e-05, |
| "loss": 0.0075, |
| "step": 10500 |
| }, |
| { |
| "epoch": 5.881365416899833, |
| "grad_norm": 0.12869463860988617, |
| "learning_rate": 4.991732654678709e-05, |
| "loss": 0.0114, |
| "step": 10510 |
| }, |
| { |
| "epoch": 5.886961387800784, |
| "grad_norm": 0.3376826345920563, |
| "learning_rate": 4.9834653319600246e-05, |
| "loss": 0.0135, |
| "step": 10520 |
| }, |
| { |
| "epoch": 5.892557358701735, |
| "grad_norm": 0.20675431191921234, |
| "learning_rate": 4.975198054446492e-05, |
| "loss": 0.0106, |
| "step": 10530 |
| }, |
| { |
| "epoch": 5.898153329602686, |
| "grad_norm": 0.14309728145599365, |
| "learning_rate": 4.96693084474053e-05, |
| "loss": 0.0122, |
| "step": 10540 |
| }, |
| { |
| "epoch": 5.903749300503637, |
| "grad_norm": 0.13042593002319336, |
| "learning_rate": 4.9586637254443756e-05, |
| "loss": 0.0114, |
| "step": 10550 |
| }, |
| { |
| "epoch": 5.909345271404589, |
| "grad_norm": 0.14101748168468475, |
| "learning_rate": 4.950396719160018e-05, |
| "loss": 0.0104, |
| "step": 10560 |
| }, |
| { |
| "epoch": 5.91494124230554, |
| "grad_norm": 0.22409436106681824, |
| "learning_rate": 4.942129848489137e-05, |
| "loss": 0.0109, |
| "step": 10570 |
| }, |
| { |
| "epoch": 5.920537213206491, |
| "grad_norm": 0.22155794501304626, |
| "learning_rate": 4.93386313603304e-05, |
| "loss": 0.0091, |
| "step": 10580 |
| }, |
| { |
| "epoch": 5.926133184107442, |
| "grad_norm": 0.1839323341846466, |
| "learning_rate": 4.925596604392603e-05, |
| "loss": 0.0086, |
| "step": 10590 |
| }, |
| { |
| "epoch": 5.931729155008394, |
| "grad_norm": 0.1160067617893219, |
| "learning_rate": 4.917330276168208e-05, |
| "loss": 0.0103, |
| "step": 10600 |
| }, |
| { |
| "epoch": 5.937325125909346, |
| "grad_norm": 0.2413625419139862, |
| "learning_rate": 4.909064173959681e-05, |
| "loss": 0.0117, |
| "step": 10610 |
| }, |
| { |
| "epoch": 5.942921096810297, |
| "grad_norm": 0.19037237763404846, |
| "learning_rate": 4.9007983203662326e-05, |
| "loss": 0.011, |
| "step": 10620 |
| }, |
| { |
| "epoch": 5.948517067711248, |
| "grad_norm": 0.17303366959095, |
| "learning_rate": 4.892532737986387e-05, |
| "loss": 0.0094, |
| "step": 10630 |
| }, |
| { |
| "epoch": 5.954113038612199, |
| "grad_norm": 0.2476578801870346, |
| "learning_rate": 4.884267449417931e-05, |
| "loss": 0.0118, |
| "step": 10640 |
| }, |
| { |
| "epoch": 5.95970900951315, |
| "grad_norm": 0.29616495966911316, |
| "learning_rate": 4.87600247725785e-05, |
| "loss": 0.0118, |
| "step": 10650 |
| }, |
| { |
| "epoch": 5.965304980414102, |
| "grad_norm": 0.1653703898191452, |
| "learning_rate": 4.867737844102261e-05, |
| "loss": 0.0093, |
| "step": 10660 |
| }, |
| { |
| "epoch": 5.970900951315053, |
| "grad_norm": 0.2089630663394928, |
| "learning_rate": 4.8594735725463567e-05, |
| "loss": 0.0113, |
| "step": 10670 |
| }, |
| { |
| "epoch": 5.976496922216004, |
| "grad_norm": 0.14042207598686218, |
| "learning_rate": 4.851209685184338e-05, |
| "loss": 0.0091, |
| "step": 10680 |
| }, |
| { |
| "epoch": 5.982092893116956, |
| "grad_norm": 0.17145408689975739, |
| "learning_rate": 4.8429462046093585e-05, |
| "loss": 0.0103, |
| "step": 10690 |
| }, |
| { |
| "epoch": 5.987688864017907, |
| "grad_norm": 0.2082109898328781, |
| "learning_rate": 4.834683153413459e-05, |
| "loss": 0.0109, |
| "step": 10700 |
| }, |
| { |
| "epoch": 5.9932848349188586, |
| "grad_norm": 0.3018309473991394, |
| "learning_rate": 4.826420554187506e-05, |
| "loss": 0.0125, |
| "step": 10710 |
| }, |
| { |
| "epoch": 5.99888080581981, |
| "grad_norm": 0.1233690157532692, |
| "learning_rate": 4.818158429521129e-05, |
| "loss": 0.0093, |
| "step": 10720 |
| }, |
| { |
| "epoch": 6.004476776720761, |
| "grad_norm": 0.226378932595253, |
| "learning_rate": 4.809896802002662e-05, |
| "loss": 0.0124, |
| "step": 10730 |
| }, |
| { |
| "epoch": 6.010072747621712, |
| "grad_norm": 0.149214506149292, |
| "learning_rate": 4.801635694219079e-05, |
| "loss": 0.0105, |
| "step": 10740 |
| }, |
| { |
| "epoch": 6.015668718522663, |
| "grad_norm": 0.35911405086517334, |
| "learning_rate": 4.7933751287559335e-05, |
| "loss": 0.0097, |
| "step": 10750 |
| }, |
| { |
| "epoch": 6.021264689423615, |
| "grad_norm": 0.3472690284252167, |
| "learning_rate": 4.785115128197298e-05, |
| "loss": 0.0115, |
| "step": 10760 |
| }, |
| { |
| "epoch": 6.026860660324567, |
| "grad_norm": 0.1740999072790146, |
| "learning_rate": 4.776855715125694e-05, |
| "loss": 0.0088, |
| "step": 10770 |
| }, |
| { |
| "epoch": 6.032456631225518, |
| "grad_norm": 0.22089268267154694, |
| "learning_rate": 4.7685969121220456e-05, |
| "loss": 0.0087, |
| "step": 10780 |
| }, |
| { |
| "epoch": 6.038052602126469, |
| "grad_norm": 0.17993643879890442, |
| "learning_rate": 4.7603387417656026e-05, |
| "loss": 0.0086, |
| "step": 10790 |
| }, |
| { |
| "epoch": 6.04364857302742, |
| "grad_norm": 0.3000619113445282, |
| "learning_rate": 4.7520812266338885e-05, |
| "loss": 0.0117, |
| "step": 10800 |
| }, |
| { |
| "epoch": 6.0492445439283715, |
| "grad_norm": 0.16510385274887085, |
| "learning_rate": 4.743824389302635e-05, |
| "loss": 0.0098, |
| "step": 10810 |
| }, |
| { |
| "epoch": 6.054840514829323, |
| "grad_norm": 0.17736104130744934, |
| "learning_rate": 4.735568252345718e-05, |
| "loss": 0.0111, |
| "step": 10820 |
| }, |
| { |
| "epoch": 6.060436485730274, |
| "grad_norm": 0.17262353003025055, |
| "learning_rate": 4.7273128383351015e-05, |
| "loss": 0.0075, |
| "step": 10830 |
| }, |
| { |
| "epoch": 6.066032456631225, |
| "grad_norm": 0.15096010267734528, |
| "learning_rate": 4.7190581698407725e-05, |
| "loss": 0.0086, |
| "step": 10840 |
| }, |
| { |
| "epoch": 6.071628427532177, |
| "grad_norm": 0.16276976466178894, |
| "learning_rate": 4.710804269430681e-05, |
| "loss": 0.0102, |
| "step": 10850 |
| }, |
| { |
| "epoch": 6.0772243984331284, |
| "grad_norm": 0.42808446288108826, |
| "learning_rate": 4.702551159670672e-05, |
| "loss": 0.0094, |
| "step": 10860 |
| }, |
| { |
| "epoch": 6.08282036933408, |
| "grad_norm": 0.17846183478832245, |
| "learning_rate": 4.694298863124435e-05, |
| "loss": 0.0092, |
| "step": 10870 |
| }, |
| { |
| "epoch": 6.088416340235031, |
| "grad_norm": 0.2053506076335907, |
| "learning_rate": 4.6860474023534335e-05, |
| "loss": 0.0086, |
| "step": 10880 |
| }, |
| { |
| "epoch": 6.094012311135982, |
| "grad_norm": 0.2614595592021942, |
| "learning_rate": 4.677796799916845e-05, |
| "loss": 0.017, |
| "step": 10890 |
| }, |
| { |
| "epoch": 6.099608282036933, |
| "grad_norm": 0.2127176970243454, |
| "learning_rate": 4.669547078371504e-05, |
| "loss": 0.014, |
| "step": 10900 |
| }, |
| { |
| "epoch": 6.1052042529378845, |
| "grad_norm": 0.2204008847475052, |
| "learning_rate": 4.66129826027183e-05, |
| "loss": 0.0116, |
| "step": 10910 |
| }, |
| { |
| "epoch": 6.110800223838836, |
| "grad_norm": 0.3794216215610504, |
| "learning_rate": 4.65305036816978e-05, |
| "loss": 0.0112, |
| "step": 10920 |
| }, |
| { |
| "epoch": 6.116396194739787, |
| "grad_norm": 0.22125349938869476, |
| "learning_rate": 4.6448034246147754e-05, |
| "loss": 0.0086, |
| "step": 10930 |
| }, |
| { |
| "epoch": 6.121992165640739, |
| "grad_norm": 0.21079552173614502, |
| "learning_rate": 4.6365574521536445e-05, |
| "loss": 0.0118, |
| "step": 10940 |
| }, |
| { |
| "epoch": 6.12758813654169, |
| "grad_norm": 0.17766894400119781, |
| "learning_rate": 4.6283124733305624e-05, |
| "loss": 0.007, |
| "step": 10950 |
| }, |
| { |
| "epoch": 6.133184107442641, |
| "grad_norm": 0.23495835065841675, |
| "learning_rate": 4.620068510686985e-05, |
| "loss": 0.0092, |
| "step": 10960 |
| }, |
| { |
| "epoch": 6.138780078343593, |
| "grad_norm": 0.25509214401245117, |
| "learning_rate": 4.611825586761591e-05, |
| "loss": 0.0098, |
| "step": 10970 |
| }, |
| { |
| "epoch": 6.144376049244544, |
| "grad_norm": 0.2415831834077835, |
| "learning_rate": 4.60358372409022e-05, |
| "loss": 0.0105, |
| "step": 10980 |
| }, |
| { |
| "epoch": 6.149972020145495, |
| "grad_norm": 0.1638316661119461, |
| "learning_rate": 4.5953429452058135e-05, |
| "loss": 0.0092, |
| "step": 10990 |
| }, |
| { |
| "epoch": 6.155567991046446, |
| "grad_norm": 0.17809127271175385, |
| "learning_rate": 4.5871032726383386e-05, |
| "loss": 0.0089, |
| "step": 11000 |
| }, |
| { |
| "epoch": 6.1611639619473975, |
| "grad_norm": 0.22080188989639282, |
| "learning_rate": 4.5788647289147516e-05, |
| "loss": 0.008, |
| "step": 11010 |
| }, |
| { |
| "epoch": 6.16675993284835, |
| "grad_norm": 0.19198036193847656, |
| "learning_rate": 4.570627336558915e-05, |
| "loss": 0.0099, |
| "step": 11020 |
| }, |
| { |
| "epoch": 6.172355903749301, |
| "grad_norm": 0.1567138433456421, |
| "learning_rate": 4.562391118091544e-05, |
| "loss": 0.0081, |
| "step": 11030 |
| }, |
| { |
| "epoch": 6.177951874650252, |
| "grad_norm": 0.10507390648126602, |
| "learning_rate": 4.554156096030149e-05, |
| "loss": 0.0068, |
| "step": 11040 |
| }, |
| { |
| "epoch": 6.183547845551203, |
| "grad_norm": 0.2201065570116043, |
| "learning_rate": 4.545922292888959e-05, |
| "loss": 0.0111, |
| "step": 11050 |
| }, |
| { |
| "epoch": 6.189143816452154, |
| "grad_norm": 0.2924385666847229, |
| "learning_rate": 4.537689731178883e-05, |
| "loss": 0.0198, |
| "step": 11060 |
| }, |
| { |
| "epoch": 6.194739787353106, |
| "grad_norm": 0.18973895907402039, |
| "learning_rate": 4.529458433407429e-05, |
| "loss": 0.0113, |
| "step": 11070 |
| }, |
| { |
| "epoch": 6.200335758254057, |
| "grad_norm": 0.2131788432598114, |
| "learning_rate": 4.5212284220786494e-05, |
| "loss": 0.0093, |
| "step": 11080 |
| }, |
| { |
| "epoch": 6.205931729155008, |
| "grad_norm": 0.17389249801635742, |
| "learning_rate": 4.5129997196930845e-05, |
| "loss": 0.0066, |
| "step": 11090 |
| }, |
| { |
| "epoch": 6.21152770005596, |
| "grad_norm": 0.21684075891971588, |
| "learning_rate": 4.504772348747687e-05, |
| "loss": 0.0071, |
| "step": 11100 |
| }, |
| { |
| "epoch": 6.217123670956911, |
| "grad_norm": 0.19866231083869934, |
| "learning_rate": 4.496546331735778e-05, |
| "loss": 0.0096, |
| "step": 11110 |
| }, |
| { |
| "epoch": 6.2227196418578625, |
| "grad_norm": 0.19832220673561096, |
| "learning_rate": 4.488321691146975e-05, |
| "loss": 0.0068, |
| "step": 11120 |
| }, |
| { |
| "epoch": 6.228315612758814, |
| "grad_norm": 0.12977780401706696, |
| "learning_rate": 4.480098449467132e-05, |
| "loss": 0.0089, |
| "step": 11130 |
| }, |
| { |
| "epoch": 6.233911583659765, |
| "grad_norm": 0.32740047574043274, |
| "learning_rate": 4.471876629178273e-05, |
| "loss": 0.0092, |
| "step": 11140 |
| }, |
| { |
| "epoch": 6.239507554560716, |
| "grad_norm": 0.12163751572370529, |
| "learning_rate": 4.463656252758542e-05, |
| "loss": 0.0089, |
| "step": 11150 |
| }, |
| { |
| "epoch": 6.245103525461667, |
| "grad_norm": 0.21914434432983398, |
| "learning_rate": 4.4554373426821374e-05, |
| "loss": 0.0084, |
| "step": 11160 |
| }, |
| { |
| "epoch": 6.250699496362619, |
| "grad_norm": 0.23196600377559662, |
| "learning_rate": 4.447219921419244e-05, |
| "loss": 0.0095, |
| "step": 11170 |
| }, |
| { |
| "epoch": 6.25629546726357, |
| "grad_norm": 0.19451774656772614, |
| "learning_rate": 4.439004011435979e-05, |
| "loss": 0.01, |
| "step": 11180 |
| }, |
| { |
| "epoch": 6.261891438164522, |
| "grad_norm": 0.20714877545833588, |
| "learning_rate": 4.430789635194324e-05, |
| "loss": 0.0124, |
| "step": 11190 |
| }, |
| { |
| "epoch": 6.267487409065473, |
| "grad_norm": 0.1735510528087616, |
| "learning_rate": 4.4225768151520694e-05, |
| "loss": 0.0089, |
| "step": 11200 |
| }, |
| { |
| "epoch": 6.273083379966424, |
| "grad_norm": 0.2282591164112091, |
| "learning_rate": 4.414365573762755e-05, |
| "loss": 0.0166, |
| "step": 11210 |
| }, |
| { |
| "epoch": 6.2786793508673755, |
| "grad_norm": 0.2207183688879013, |
| "learning_rate": 4.406155933475599e-05, |
| "loss": 0.0089, |
| "step": 11220 |
| }, |
| { |
| "epoch": 6.284275321768327, |
| "grad_norm": 0.252380907535553, |
| "learning_rate": 4.3979479167354477e-05, |
| "loss": 0.0111, |
| "step": 11230 |
| }, |
| { |
| "epoch": 6.289871292669278, |
| "grad_norm": 0.18762193620204926, |
| "learning_rate": 4.3897415459827e-05, |
| "loss": 0.0099, |
| "step": 11240 |
| }, |
| { |
| "epoch": 6.295467263570229, |
| "grad_norm": 0.15788224339485168, |
| "learning_rate": 4.381536843653262e-05, |
| "loss": 0.0086, |
| "step": 11250 |
| }, |
| { |
| "epoch": 6.301063234471181, |
| "grad_norm": 0.22205393016338348, |
| "learning_rate": 4.373333832178478e-05, |
| "loss": 0.0081, |
| "step": 11260 |
| }, |
| { |
| "epoch": 6.306659205372132, |
| "grad_norm": 0.2042773962020874, |
| "learning_rate": 4.365132533985071e-05, |
| "loss": 0.0112, |
| "step": 11270 |
| }, |
| { |
| "epoch": 6.312255176273084, |
| "grad_norm": 0.15884517133235931, |
| "learning_rate": 4.3569329714950704e-05, |
| "loss": 0.011, |
| "step": 11280 |
| }, |
| { |
| "epoch": 6.317851147174035, |
| "grad_norm": 0.1604417860507965, |
| "learning_rate": 4.348735167125771e-05, |
| "loss": 0.0126, |
| "step": 11290 |
| }, |
| { |
| "epoch": 6.323447118074986, |
| "grad_norm": 0.1566859632730484, |
| "learning_rate": 4.3405391432896555e-05, |
| "loss": 0.0078, |
| "step": 11300 |
| }, |
| { |
| "epoch": 6.329043088975937, |
| "grad_norm": 0.2835988700389862, |
| "learning_rate": 4.3323449223943416e-05, |
| "loss": 0.0096, |
| "step": 11310 |
| }, |
| { |
| "epoch": 6.3346390598768885, |
| "grad_norm": 0.2758636772632599, |
| "learning_rate": 4.324152526842517e-05, |
| "loss": 0.0118, |
| "step": 11320 |
| }, |
| { |
| "epoch": 6.34023503077784, |
| "grad_norm": 0.09336747974157333, |
| "learning_rate": 4.315961979031875e-05, |
| "loss": 0.0111, |
| "step": 11330 |
| }, |
| { |
| "epoch": 6.345831001678791, |
| "grad_norm": 0.16241887211799622, |
| "learning_rate": 4.307773301355062e-05, |
| "loss": 0.0106, |
| "step": 11340 |
| }, |
| { |
| "epoch": 6.351426972579743, |
| "grad_norm": 0.20391559600830078, |
| "learning_rate": 4.2995865161996105e-05, |
| "loss": 0.0081, |
| "step": 11350 |
| }, |
| { |
| "epoch": 6.357022943480694, |
| "grad_norm": 0.12543804943561554, |
| "learning_rate": 4.291401645947879e-05, |
| "loss": 0.0137, |
| "step": 11360 |
| }, |
| { |
| "epoch": 6.362618914381645, |
| "grad_norm": 0.24983376264572144, |
| "learning_rate": 4.283218712976992e-05, |
| "loss": 0.0095, |
| "step": 11370 |
| }, |
| { |
| "epoch": 6.368214885282597, |
| "grad_norm": 0.2291889637708664, |
| "learning_rate": 4.275037739658771e-05, |
| "loss": 0.0113, |
| "step": 11380 |
| }, |
| { |
| "epoch": 6.373810856183548, |
| "grad_norm": 0.1601787656545639, |
| "learning_rate": 4.2668587483596864e-05, |
| "loss": 0.0128, |
| "step": 11390 |
| }, |
| { |
| "epoch": 6.379406827084499, |
| "grad_norm": 0.14628605544567108, |
| "learning_rate": 4.2586817614407895e-05, |
| "loss": 0.0076, |
| "step": 11400 |
| }, |
| { |
| "epoch": 6.38500279798545, |
| "grad_norm": 0.16742217540740967, |
| "learning_rate": 4.250506801257653e-05, |
| "loss": 0.0104, |
| "step": 11410 |
| }, |
| { |
| "epoch": 6.390598768886401, |
| "grad_norm": 0.20203527808189392, |
| "learning_rate": 4.2423338901602985e-05, |
| "loss": 0.0112, |
| "step": 11420 |
| }, |
| { |
| "epoch": 6.396194739787353, |
| "grad_norm": 0.2605644762516022, |
| "learning_rate": 4.234163050493158e-05, |
| "loss": 0.0166, |
| "step": 11430 |
| }, |
| { |
| "epoch": 6.401790710688305, |
| "grad_norm": 0.22104188799858093, |
| "learning_rate": 4.2259943045949934e-05, |
| "loss": 0.0069, |
| "step": 11440 |
| }, |
| { |
| "epoch": 6.407386681589256, |
| "grad_norm": 0.2080865204334259, |
| "learning_rate": 4.2178276747988446e-05, |
| "loss": 0.0136, |
| "step": 11450 |
| }, |
| { |
| "epoch": 6.412982652490207, |
| "grad_norm": 0.22961939871311188, |
| "learning_rate": 4.209663183431969e-05, |
| "loss": 0.0184, |
| "step": 11460 |
| }, |
| { |
| "epoch": 6.418578623391158, |
| "grad_norm": 0.3134923577308655, |
| "learning_rate": 4.201500852815768e-05, |
| "loss": 0.0108, |
| "step": 11470 |
| }, |
| { |
| "epoch": 6.42417459429211, |
| "grad_norm": 0.11267667263746262, |
| "learning_rate": 4.1933407052657456e-05, |
| "loss": 0.0113, |
| "step": 11480 |
| }, |
| { |
| "epoch": 6.429770565193061, |
| "grad_norm": 0.11718063056468964, |
| "learning_rate": 4.1851827630914305e-05, |
| "loss": 0.0069, |
| "step": 11490 |
| }, |
| { |
| "epoch": 6.435366536094012, |
| "grad_norm": 0.15294240415096283, |
| "learning_rate": 4.17702704859633e-05, |
| "loss": 0.0087, |
| "step": 11500 |
| }, |
| { |
| "epoch": 6.440962506994964, |
| "grad_norm": 0.16003765165805817, |
| "learning_rate": 4.1688735840778546e-05, |
| "loss": 0.0087, |
| "step": 11510 |
| }, |
| { |
| "epoch": 6.446558477895915, |
| "grad_norm": 0.28345319628715515, |
| "learning_rate": 4.160722391827262e-05, |
| "loss": 0.0119, |
| "step": 11520 |
| }, |
| { |
| "epoch": 6.4521544487968665, |
| "grad_norm": 0.18619926273822784, |
| "learning_rate": 4.1525734941296026e-05, |
| "loss": 0.01, |
| "step": 11530 |
| }, |
| { |
| "epoch": 6.457750419697818, |
| "grad_norm": 0.1567833423614502, |
| "learning_rate": 4.14442691326365e-05, |
| "loss": 0.0089, |
| "step": 11540 |
| }, |
| { |
| "epoch": 6.463346390598769, |
| "grad_norm": 0.16688846051692963, |
| "learning_rate": 4.13628267150185e-05, |
| "loss": 0.0078, |
| "step": 11550 |
| }, |
| { |
| "epoch": 6.46894236149972, |
| "grad_norm": 0.19638372957706451, |
| "learning_rate": 4.1281407911102425e-05, |
| "loss": 0.0119, |
| "step": 11560 |
| }, |
| { |
| "epoch": 6.474538332400671, |
| "grad_norm": 0.13919275999069214, |
| "learning_rate": 4.120001294348421e-05, |
| "loss": 0.0105, |
| "step": 11570 |
| }, |
| { |
| "epoch": 6.4801343033016225, |
| "grad_norm": 0.17611968517303467, |
| "learning_rate": 4.111864203469457e-05, |
| "loss": 0.0145, |
| "step": 11580 |
| }, |
| { |
| "epoch": 6.485730274202574, |
| "grad_norm": 0.15707933902740479, |
| "learning_rate": 4.103729540719847e-05, |
| "loss": 0.0088, |
| "step": 11590 |
| }, |
| { |
| "epoch": 6.491326245103526, |
| "grad_norm": 0.16832014918327332, |
| "learning_rate": 4.095597328339452e-05, |
| "loss": 0.0087, |
| "step": 11600 |
| }, |
| { |
| "epoch": 6.496922216004477, |
| "grad_norm": 0.16573460400104523, |
| "learning_rate": 4.087467588561424e-05, |
| "loss": 0.0085, |
| "step": 11610 |
| }, |
| { |
| "epoch": 6.502518186905428, |
| "grad_norm": 0.16878801584243774, |
| "learning_rate": 4.079340343612165e-05, |
| "loss": 0.0081, |
| "step": 11620 |
| }, |
| { |
| "epoch": 6.5081141578063795, |
| "grad_norm": 0.10650831460952759, |
| "learning_rate": 4.07121561571125e-05, |
| "loss": 0.0088, |
| "step": 11630 |
| }, |
| { |
| "epoch": 6.513710128707331, |
| "grad_norm": 0.15549488365650177, |
| "learning_rate": 4.063093427071376e-05, |
| "loss": 0.008, |
| "step": 11640 |
| }, |
| { |
| "epoch": 6.519306099608282, |
| "grad_norm": 0.17358443140983582, |
| "learning_rate": 4.0549737998983e-05, |
| "loss": 0.0133, |
| "step": 11650 |
| }, |
| { |
| "epoch": 6.524902070509233, |
| "grad_norm": 0.24347983300685883, |
| "learning_rate": 4.046856756390767e-05, |
| "loss": 0.0123, |
| "step": 11660 |
| }, |
| { |
| "epoch": 6.530498041410184, |
| "grad_norm": 0.31662797927856445, |
| "learning_rate": 4.038742318740465e-05, |
| "loss": 0.0108, |
| "step": 11670 |
| }, |
| { |
| "epoch": 6.5360940123111355, |
| "grad_norm": 0.21490415930747986, |
| "learning_rate": 4.0306305091319595e-05, |
| "loss": 0.0116, |
| "step": 11680 |
| }, |
| { |
| "epoch": 6.541689983212088, |
| "grad_norm": 0.10896732658147812, |
| "learning_rate": 4.0225213497426276e-05, |
| "loss": 0.0088, |
| "step": 11690 |
| }, |
| { |
| "epoch": 6.547285954113039, |
| "grad_norm": 0.22287431359291077, |
| "learning_rate": 4.0144148627425993e-05, |
| "loss": 0.0157, |
| "step": 11700 |
| }, |
| { |
| "epoch": 6.55288192501399, |
| "grad_norm": 0.2492447942495346, |
| "learning_rate": 4.006311070294702e-05, |
| "loss": 0.0155, |
| "step": 11710 |
| }, |
| { |
| "epoch": 6.558477895914941, |
| "grad_norm": 0.09591550379991531, |
| "learning_rate": 3.9982099945543945e-05, |
| "loss": 0.0076, |
| "step": 11720 |
| }, |
| { |
| "epoch": 6.564073866815892, |
| "grad_norm": 0.21364928781986237, |
| "learning_rate": 3.9901116576697083e-05, |
| "loss": 0.0109, |
| "step": 11730 |
| }, |
| { |
| "epoch": 6.569669837716844, |
| "grad_norm": 0.2347889095544815, |
| "learning_rate": 3.982016081781189e-05, |
| "loss": 0.009, |
| "step": 11740 |
| }, |
| { |
| "epoch": 6.575265808617795, |
| "grad_norm": 0.07959645986557007, |
| "learning_rate": 3.973923289021829e-05, |
| "loss": 0.007, |
| "step": 11750 |
| }, |
| { |
| "epoch": 6.580861779518747, |
| "grad_norm": 0.18356555700302124, |
| "learning_rate": 3.965833301517017e-05, |
| "loss": 0.014, |
| "step": 11760 |
| }, |
| { |
| "epoch": 6.586457750419698, |
| "grad_norm": 0.16104575991630554, |
| "learning_rate": 3.9577461413844684e-05, |
| "loss": 0.0159, |
| "step": 11770 |
| }, |
| { |
| "epoch": 6.592053721320649, |
| "grad_norm": 0.2652454972267151, |
| "learning_rate": 3.949661830734172e-05, |
| "loss": 0.0103, |
| "step": 11780 |
| }, |
| { |
| "epoch": 6.597649692221601, |
| "grad_norm": 0.29040461778640747, |
| "learning_rate": 3.9415803916683224e-05, |
| "loss": 0.0077, |
| "step": 11790 |
| }, |
| { |
| "epoch": 6.603245663122552, |
| "grad_norm": 0.3047587275505066, |
| "learning_rate": 3.933501846281267e-05, |
| "loss": 0.0137, |
| "step": 11800 |
| }, |
| { |
| "epoch": 6.608841634023503, |
| "grad_norm": 0.15864235162734985, |
| "learning_rate": 3.925426216659438e-05, |
| "loss": 0.0097, |
| "step": 11810 |
| }, |
| { |
| "epoch": 6.614437604924454, |
| "grad_norm": 0.20918135344982147, |
| "learning_rate": 3.917353524881302e-05, |
| "loss": 0.008, |
| "step": 11820 |
| }, |
| { |
| "epoch": 6.620033575825405, |
| "grad_norm": 0.17880207300186157, |
| "learning_rate": 3.9092837930172884e-05, |
| "loss": 0.0119, |
| "step": 11830 |
| }, |
| { |
| "epoch": 6.625629546726357, |
| "grad_norm": 0.16844668984413147, |
| "learning_rate": 3.901217043129735e-05, |
| "loss": 0.0092, |
| "step": 11840 |
| }, |
| { |
| "epoch": 6.631225517627309, |
| "grad_norm": 0.2069406360387802, |
| "learning_rate": 3.8931532972728285e-05, |
| "loss": 0.0116, |
| "step": 11850 |
| }, |
| { |
| "epoch": 6.63682148852826, |
| "grad_norm": 0.2709522843360901, |
| "learning_rate": 3.8850925774925425e-05, |
| "loss": 0.0076, |
| "step": 11860 |
| }, |
| { |
| "epoch": 6.642417459429211, |
| "grad_norm": 0.16224393248558044, |
| "learning_rate": 3.877034905826577e-05, |
| "loss": 0.0099, |
| "step": 11870 |
| }, |
| { |
| "epoch": 6.648013430330162, |
| "grad_norm": 0.238708034157753, |
| "learning_rate": 3.8689803043043e-05, |
| "loss": 0.0073, |
| "step": 11880 |
| }, |
| { |
| "epoch": 6.6536094012311136, |
| "grad_norm": 0.12267536669969559, |
| "learning_rate": 3.860928794946682e-05, |
| "loss": 0.0086, |
| "step": 11890 |
| }, |
| { |
| "epoch": 6.659205372132065, |
| "grad_norm": 0.1931445449590683, |
| "learning_rate": 3.852880399766243e-05, |
| "loss": 0.0098, |
| "step": 11900 |
| }, |
| { |
| "epoch": 6.664801343033016, |
| "grad_norm": 0.23762571811676025, |
| "learning_rate": 3.844835140766988e-05, |
| "loss": 0.0091, |
| "step": 11910 |
| }, |
| { |
| "epoch": 6.670397313933967, |
| "grad_norm": 0.1977052241563797, |
| "learning_rate": 3.836793039944349e-05, |
| "loss": 0.0079, |
| "step": 11920 |
| }, |
| { |
| "epoch": 6.675993284834918, |
| "grad_norm": 0.10921810567378998, |
| "learning_rate": 3.828754119285123e-05, |
| "loss": 0.0072, |
| "step": 11930 |
| }, |
| { |
| "epoch": 6.6815892557358705, |
| "grad_norm": 0.2423611879348755, |
| "learning_rate": 3.820718400767409e-05, |
| "loss": 0.0119, |
| "step": 11940 |
| }, |
| { |
| "epoch": 6.687185226636822, |
| "grad_norm": 0.19429948925971985, |
| "learning_rate": 3.812685906360557e-05, |
| "loss": 0.0081, |
| "step": 11950 |
| }, |
| { |
| "epoch": 6.692781197537773, |
| "grad_norm": 0.104859858751297, |
| "learning_rate": 3.8046566580251e-05, |
| "loss": 0.0064, |
| "step": 11960 |
| }, |
| { |
| "epoch": 6.698377168438724, |
| "grad_norm": 0.11694277077913284, |
| "learning_rate": 3.796630677712697e-05, |
| "loss": 0.0086, |
| "step": 11970 |
| }, |
| { |
| "epoch": 6.703973139339675, |
| "grad_norm": 0.2368919551372528, |
| "learning_rate": 3.788607987366069e-05, |
| "loss": 0.0059, |
| "step": 11980 |
| }, |
| { |
| "epoch": 6.7095691102406265, |
| "grad_norm": 0.20411504805088043, |
| "learning_rate": 3.780588608918947e-05, |
| "loss": 0.0133, |
| "step": 11990 |
| }, |
| { |
| "epoch": 6.715165081141578, |
| "grad_norm": 0.11036452651023865, |
| "learning_rate": 3.772572564296005e-05, |
| "loss": 0.0085, |
| "step": 12000 |
| }, |
| { |
| "epoch": 6.72076105204253, |
| "grad_norm": 0.09863012284040451, |
| "learning_rate": 3.764559875412803e-05, |
| "loss": 0.0064, |
| "step": 12010 |
| }, |
| { |
| "epoch": 6.726357022943481, |
| "grad_norm": 0.12064427882432938, |
| "learning_rate": 3.756550564175727e-05, |
| "loss": 0.009, |
| "step": 12020 |
| }, |
| { |
| "epoch": 6.731952993844432, |
| "grad_norm": 0.11138517409563065, |
| "learning_rate": 3.748544652481927e-05, |
| "loss": 0.0082, |
| "step": 12030 |
| }, |
| { |
| "epoch": 6.7375489647453835, |
| "grad_norm": 0.1209891140460968, |
| "learning_rate": 3.74054216221926e-05, |
| "loss": 0.0074, |
| "step": 12040 |
| }, |
| { |
| "epoch": 6.743144935646335, |
| "grad_norm": 0.22739742696285248, |
| "learning_rate": 3.73254311526623e-05, |
| "loss": 0.0082, |
| "step": 12050 |
| }, |
| { |
| "epoch": 6.748740906547286, |
| "grad_norm": 0.19938482344150543, |
| "learning_rate": 3.7245475334919246e-05, |
| "loss": 0.0087, |
| "step": 12060 |
| }, |
| { |
| "epoch": 6.754336877448237, |
| "grad_norm": 0.18825367093086243, |
| "learning_rate": 3.716555438755961e-05, |
| "loss": 0.0091, |
| "step": 12070 |
| }, |
| { |
| "epoch": 6.759932848349188, |
| "grad_norm": 0.18540059030056, |
| "learning_rate": 3.7085668529084184e-05, |
| "loss": 0.0096, |
| "step": 12080 |
| }, |
| { |
| "epoch": 6.7655288192501395, |
| "grad_norm": 0.11188949644565582, |
| "learning_rate": 3.700581797789786e-05, |
| "loss": 0.0081, |
| "step": 12090 |
| }, |
| { |
| "epoch": 6.771124790151092, |
| "grad_norm": 0.09911153465509415, |
| "learning_rate": 3.6926002952309016e-05, |
| "loss": 0.0065, |
| "step": 12100 |
| }, |
| { |
| "epoch": 6.776720761052043, |
| "grad_norm": 0.2001970112323761, |
| "learning_rate": 3.684622367052887e-05, |
| "loss": 0.007, |
| "step": 12110 |
| }, |
| { |
| "epoch": 6.782316731952994, |
| "grad_norm": 0.256001740694046, |
| "learning_rate": 3.676648035067093e-05, |
| "loss": 0.0101, |
| "step": 12120 |
| }, |
| { |
| "epoch": 6.787912702853945, |
| "grad_norm": 0.16810284554958344, |
| "learning_rate": 3.6686773210750385e-05, |
| "loss": 0.0084, |
| "step": 12130 |
| }, |
| { |
| "epoch": 6.793508673754896, |
| "grad_norm": 0.21629579365253448, |
| "learning_rate": 3.6607102468683526e-05, |
| "loss": 0.0066, |
| "step": 12140 |
| }, |
| { |
| "epoch": 6.799104644655848, |
| "grad_norm": 0.2616669237613678, |
| "learning_rate": 3.65274683422871e-05, |
| "loss": 0.0111, |
| "step": 12150 |
| }, |
| { |
| "epoch": 6.804700615556799, |
| "grad_norm": 0.18898139894008636, |
| "learning_rate": 3.6447871049277796e-05, |
| "loss": 0.0103, |
| "step": 12160 |
| }, |
| { |
| "epoch": 6.81029658645775, |
| "grad_norm": 0.20177505910396576, |
| "learning_rate": 3.636831080727154e-05, |
| "loss": 0.0064, |
| "step": 12170 |
| }, |
| { |
| "epoch": 6.815892557358701, |
| "grad_norm": 0.18514911830425262, |
| "learning_rate": 3.628878783378302e-05, |
| "loss": 0.0118, |
| "step": 12180 |
| }, |
| { |
| "epoch": 6.821488528259653, |
| "grad_norm": 0.25894469022750854, |
| "learning_rate": 3.6209302346225006e-05, |
| "loss": 0.0083, |
| "step": 12190 |
| }, |
| { |
| "epoch": 6.827084499160605, |
| "grad_norm": 0.16605038940906525, |
| "learning_rate": 3.612985456190778e-05, |
| "loss": 0.0049, |
| "step": 12200 |
| }, |
| { |
| "epoch": 6.832680470061556, |
| "grad_norm": 0.17524683475494385, |
| "learning_rate": 3.605044469803854e-05, |
| "loss": 0.0066, |
| "step": 12210 |
| }, |
| { |
| "epoch": 6.838276440962507, |
| "grad_norm": 0.10738332569599152, |
| "learning_rate": 3.597107297172084e-05, |
| "loss": 0.0087, |
| "step": 12220 |
| }, |
| { |
| "epoch": 6.843872411863458, |
| "grad_norm": 0.19934684038162231, |
| "learning_rate": 3.5891739599953945e-05, |
| "loss": 0.009, |
| "step": 12230 |
| }, |
| { |
| "epoch": 6.849468382764409, |
| "grad_norm": 0.12639135122299194, |
| "learning_rate": 3.581244479963225e-05, |
| "loss": 0.0092, |
| "step": 12240 |
| }, |
| { |
| "epoch": 6.855064353665361, |
| "grad_norm": 0.1152096539735794, |
| "learning_rate": 3.5733188787544745e-05, |
| "loss": 0.007, |
| "step": 12250 |
| }, |
| { |
| "epoch": 6.860660324566313, |
| "grad_norm": 0.2878243625164032, |
| "learning_rate": 3.5653971780374295e-05, |
| "loss": 0.0096, |
| "step": 12260 |
| }, |
| { |
| "epoch": 6.866256295467264, |
| "grad_norm": 0.2725951075553894, |
| "learning_rate": 3.557479399469721e-05, |
| "loss": 0.0081, |
| "step": 12270 |
| }, |
| { |
| "epoch": 6.871852266368215, |
| "grad_norm": 0.16931770741939545, |
| "learning_rate": 3.5495655646982505e-05, |
| "loss": 0.0085, |
| "step": 12280 |
| }, |
| { |
| "epoch": 6.877448237269166, |
| "grad_norm": 0.11503436416387558, |
| "learning_rate": 3.541655695359142e-05, |
| "loss": 0.0062, |
| "step": 12290 |
| }, |
| { |
| "epoch": 6.8830442081701175, |
| "grad_norm": 0.18025194108486176, |
| "learning_rate": 3.533749813077677e-05, |
| "loss": 0.0082, |
| "step": 12300 |
| }, |
| { |
| "epoch": 6.888640179071069, |
| "grad_norm": 0.1392613649368286, |
| "learning_rate": 3.525847939468233e-05, |
| "loss": 0.0086, |
| "step": 12310 |
| }, |
| { |
| "epoch": 6.89423614997202, |
| "grad_norm": 0.2620909512042999, |
| "learning_rate": 3.517950096134232e-05, |
| "loss": 0.0108, |
| "step": 12320 |
| }, |
| { |
| "epoch": 6.899832120872971, |
| "grad_norm": 0.12296637147665024, |
| "learning_rate": 3.5100563046680764e-05, |
| "loss": 0.008, |
| "step": 12330 |
| }, |
| { |
| "epoch": 6.905428091773922, |
| "grad_norm": 0.13329119980335236, |
| "learning_rate": 3.5021665866510925e-05, |
| "loss": 0.0104, |
| "step": 12340 |
| }, |
| { |
| "epoch": 6.9110240626748745, |
| "grad_norm": 0.18710525333881378, |
| "learning_rate": 3.494280963653463e-05, |
| "loss": 0.0096, |
| "step": 12350 |
| }, |
| { |
| "epoch": 6.916620033575826, |
| "grad_norm": 0.199269637465477, |
| "learning_rate": 3.4863994572341843e-05, |
| "loss": 0.0098, |
| "step": 12360 |
| }, |
| { |
| "epoch": 6.922216004476777, |
| "grad_norm": 0.24953125417232513, |
| "learning_rate": 3.478522088940993e-05, |
| "loss": 0.01, |
| "step": 12370 |
| }, |
| { |
| "epoch": 6.927811975377728, |
| "grad_norm": 0.1573137789964676, |
| "learning_rate": 3.470648880310313e-05, |
| "loss": 0.0119, |
| "step": 12380 |
| }, |
| { |
| "epoch": 6.933407946278679, |
| "grad_norm": 0.24244867265224457, |
| "learning_rate": 3.462779852867197e-05, |
| "loss": 0.0129, |
| "step": 12390 |
| }, |
| { |
| "epoch": 6.9390039171796305, |
| "grad_norm": 0.12841010093688965, |
| "learning_rate": 3.4549150281252636e-05, |
| "loss": 0.0074, |
| "step": 12400 |
| }, |
| { |
| "epoch": 6.944599888080582, |
| "grad_norm": 0.17973212897777557, |
| "learning_rate": 3.447054427586644e-05, |
| "loss": 0.0084, |
| "step": 12410 |
| }, |
| { |
| "epoch": 6.950195858981533, |
| "grad_norm": 0.2083815336227417, |
| "learning_rate": 3.439198072741921e-05, |
| "loss": 0.0096, |
| "step": 12420 |
| }, |
| { |
| "epoch": 6.955791829882484, |
| "grad_norm": 0.21580283343791962, |
| "learning_rate": 3.431345985070067e-05, |
| "loss": 0.009, |
| "step": 12430 |
| }, |
| { |
| "epoch": 6.961387800783436, |
| "grad_norm": 0.22562581300735474, |
| "learning_rate": 3.423498186038393e-05, |
| "loss": 0.0105, |
| "step": 12440 |
| }, |
| { |
| "epoch": 6.966983771684387, |
| "grad_norm": 0.19070309400558472, |
| "learning_rate": 3.4156546971024784e-05, |
| "loss": 0.0074, |
| "step": 12450 |
| }, |
| { |
| "epoch": 6.972579742585339, |
| "grad_norm": 0.2400059998035431, |
| "learning_rate": 3.407815539706124e-05, |
| "loss": 0.0102, |
| "step": 12460 |
| }, |
| { |
| "epoch": 6.97817571348629, |
| "grad_norm": 0.13252539932727814, |
| "learning_rate": 3.399980735281286e-05, |
| "loss": 0.0066, |
| "step": 12470 |
| }, |
| { |
| "epoch": 6.983771684387241, |
| "grad_norm": 0.2826622426509857, |
| "learning_rate": 3.392150305248024e-05, |
| "loss": 0.0103, |
| "step": 12480 |
| }, |
| { |
| "epoch": 6.989367655288192, |
| "grad_norm": 0.2674136757850647, |
| "learning_rate": 3.384324271014429e-05, |
| "loss": 0.0089, |
| "step": 12490 |
| }, |
| { |
| "epoch": 6.9949636261891435, |
| "grad_norm": 0.09753147512674332, |
| "learning_rate": 3.3765026539765834e-05, |
| "loss": 0.0126, |
| "step": 12500 |
| }, |
| { |
| "epoch": 7.000559597090096, |
| "grad_norm": 0.13642564415931702, |
| "learning_rate": 3.368685475518488e-05, |
| "loss": 0.01, |
| "step": 12510 |
| }, |
| { |
| "epoch": 7.006155567991047, |
| "grad_norm": 0.2658902704715729, |
| "learning_rate": 3.360872757012011e-05, |
| "loss": 0.0168, |
| "step": 12520 |
| }, |
| { |
| "epoch": 7.011751538891998, |
| "grad_norm": 0.12951083481311798, |
| "learning_rate": 3.3530645198168295e-05, |
| "loss": 0.0081, |
| "step": 12530 |
| }, |
| { |
| "epoch": 7.017347509792949, |
| "grad_norm": 0.23773689568042755, |
| "learning_rate": 3.3452607852803584e-05, |
| "loss": 0.0082, |
| "step": 12540 |
| }, |
| { |
| "epoch": 7.0229434806939, |
| "grad_norm": 0.21580462157726288, |
| "learning_rate": 3.337461574737716e-05, |
| "loss": 0.0106, |
| "step": 12550 |
| }, |
| { |
| "epoch": 7.028539451594852, |
| "grad_norm": 0.15399706363677979, |
| "learning_rate": 3.329666909511645e-05, |
| "loss": 0.0103, |
| "step": 12560 |
| }, |
| { |
| "epoch": 7.034135422495803, |
| "grad_norm": 0.21200086176395416, |
| "learning_rate": 3.321876810912461e-05, |
| "loss": 0.0141, |
| "step": 12570 |
| }, |
| { |
| "epoch": 7.039731393396754, |
| "grad_norm": 0.2530173063278198, |
| "learning_rate": 3.3140913002379995e-05, |
| "loss": 0.0101, |
| "step": 12580 |
| }, |
| { |
| "epoch": 7.045327364297705, |
| "grad_norm": 0.16888059675693512, |
| "learning_rate": 3.3063103987735433e-05, |
| "loss": 0.0068, |
| "step": 12590 |
| }, |
| { |
| "epoch": 7.050923335198657, |
| "grad_norm": 0.213544562458992, |
| "learning_rate": 3.298534127791785e-05, |
| "loss": 0.0099, |
| "step": 12600 |
| }, |
| { |
| "epoch": 7.0565193060996085, |
| "grad_norm": 0.2427508383989334, |
| "learning_rate": 3.2907625085527503e-05, |
| "loss": 0.0078, |
| "step": 12610 |
| }, |
| { |
| "epoch": 7.06211527700056, |
| "grad_norm": 0.3301132023334503, |
| "learning_rate": 3.282995562303754e-05, |
| "loss": 0.0091, |
| "step": 12620 |
| }, |
| { |
| "epoch": 7.067711247901511, |
| "grad_norm": 0.15243375301361084, |
| "learning_rate": 3.275233310279321e-05, |
| "loss": 0.0058, |
| "step": 12630 |
| }, |
| { |
| "epoch": 7.073307218802462, |
| "grad_norm": 0.14671820402145386, |
| "learning_rate": 3.267475773701161e-05, |
| "loss": 0.0062, |
| "step": 12640 |
| }, |
| { |
| "epoch": 7.078903189703413, |
| "grad_norm": 0.22168104350566864, |
| "learning_rate": 3.2597229737780774e-05, |
| "loss": 0.0079, |
| "step": 12650 |
| }, |
| { |
| "epoch": 7.084499160604365, |
| "grad_norm": 0.25640955567359924, |
| "learning_rate": 3.251974931705933e-05, |
| "loss": 0.0085, |
| "step": 12660 |
| }, |
| { |
| "epoch": 7.090095131505316, |
| "grad_norm": 0.2436077892780304, |
| "learning_rate": 3.244231668667578e-05, |
| "loss": 0.0078, |
| "step": 12670 |
| }, |
| { |
| "epoch": 7.095691102406268, |
| "grad_norm": 0.19463610649108887, |
| "learning_rate": 3.236493205832795e-05, |
| "loss": 0.0066, |
| "step": 12680 |
| }, |
| { |
| "epoch": 7.101287073307219, |
| "grad_norm": 0.22004422545433044, |
| "learning_rate": 3.228759564358248e-05, |
| "loss": 0.0078, |
| "step": 12690 |
| }, |
| { |
| "epoch": 7.10688304420817, |
| "grad_norm": 0.1793327033519745, |
| "learning_rate": 3.221030765387417e-05, |
| "loss": 0.0059, |
| "step": 12700 |
| }, |
| { |
| "epoch": 7.1124790151091215, |
| "grad_norm": 0.2823750376701355, |
| "learning_rate": 3.2133068300505455e-05, |
| "loss": 0.0072, |
| "step": 12710 |
| }, |
| { |
| "epoch": 7.118074986010073, |
| "grad_norm": 0.3006185293197632, |
| "learning_rate": 3.205587779464576e-05, |
| "loss": 0.0099, |
| "step": 12720 |
| }, |
| { |
| "epoch": 7.123670956911024, |
| "grad_norm": 0.15955254435539246, |
| "learning_rate": 3.197873634733096e-05, |
| "loss": 0.01, |
| "step": 12730 |
| }, |
| { |
| "epoch": 7.129266927811975, |
| "grad_norm": 0.3392355442047119, |
| "learning_rate": 3.190164416946285e-05, |
| "loss": 0.0096, |
| "step": 12740 |
| }, |
| { |
| "epoch": 7.134862898712926, |
| "grad_norm": 0.209779292345047, |
| "learning_rate": 3.18246014718085e-05, |
| "loss": 0.0083, |
| "step": 12750 |
| }, |
| { |
| "epoch": 7.140458869613878, |
| "grad_norm": 0.13492996990680695, |
| "learning_rate": 3.1747608464999725e-05, |
| "loss": 0.0085, |
| "step": 12760 |
| }, |
| { |
| "epoch": 7.14605484051483, |
| "grad_norm": 0.20543181896209717, |
| "learning_rate": 3.167066535953242e-05, |
| "loss": 0.0099, |
| "step": 12770 |
| }, |
| { |
| "epoch": 7.151650811415781, |
| "grad_norm": 0.24595800042152405, |
| "learning_rate": 3.1593772365766105e-05, |
| "loss": 0.0089, |
| "step": 12780 |
| }, |
| { |
| "epoch": 7.157246782316732, |
| "grad_norm": 0.24962860345840454, |
| "learning_rate": 3.1516929693923315e-05, |
| "loss": 0.0111, |
| "step": 12790 |
| }, |
| { |
| "epoch": 7.162842753217683, |
| "grad_norm": 0.236158549785614, |
| "learning_rate": 3.144013755408895e-05, |
| "loss": 0.0092, |
| "step": 12800 |
| }, |
| { |
| "epoch": 7.1684387241186345, |
| "grad_norm": 0.09373817592859268, |
| "learning_rate": 3.136339615620985e-05, |
| "loss": 0.0073, |
| "step": 12810 |
| }, |
| { |
| "epoch": 7.174034695019586, |
| "grad_norm": 0.3018852770328522, |
| "learning_rate": 3.128670571009399e-05, |
| "loss": 0.0109, |
| "step": 12820 |
| }, |
| { |
| "epoch": 7.179630665920537, |
| "grad_norm": 0.22144253551959991, |
| "learning_rate": 3.121006642541014e-05, |
| "loss": 0.008, |
| "step": 12830 |
| }, |
| { |
| "epoch": 7.185226636821488, |
| "grad_norm": 0.14473740756511688, |
| "learning_rate": 3.113347851168721e-05, |
| "loss": 0.0095, |
| "step": 12840 |
| }, |
| { |
| "epoch": 7.19082260772244, |
| "grad_norm": 0.14747409522533417, |
| "learning_rate": 3.105694217831361e-05, |
| "loss": 0.0062, |
| "step": 12850 |
| }, |
| { |
| "epoch": 7.196418578623391, |
| "grad_norm": 0.2111588716506958, |
| "learning_rate": 3.098045763453678e-05, |
| "loss": 0.0074, |
| "step": 12860 |
| }, |
| { |
| "epoch": 7.202014549524343, |
| "grad_norm": 0.2098371833562851, |
| "learning_rate": 3.090402508946249e-05, |
| "loss": 0.0084, |
| "step": 12870 |
| }, |
| { |
| "epoch": 7.207610520425294, |
| "grad_norm": 0.1614372432231903, |
| "learning_rate": 3.082764475205442e-05, |
| "loss": 0.007, |
| "step": 12880 |
| }, |
| { |
| "epoch": 7.213206491326245, |
| "grad_norm": 0.0742206946015358, |
| "learning_rate": 3.075131683113352e-05, |
| "loss": 0.006, |
| "step": 12890 |
| }, |
| { |
| "epoch": 7.218802462227196, |
| "grad_norm": 0.07135152816772461, |
| "learning_rate": 3.0675041535377405e-05, |
| "loss": 0.0057, |
| "step": 12900 |
| }, |
| { |
| "epoch": 7.2243984331281474, |
| "grad_norm": 0.20988823473453522, |
| "learning_rate": 3.059881907331979e-05, |
| "loss": 0.0071, |
| "step": 12910 |
| }, |
| { |
| "epoch": 7.229994404029099, |
| "grad_norm": 0.10817866027355194, |
| "learning_rate": 3.052264965335e-05, |
| "loss": 0.0049, |
| "step": 12920 |
| }, |
| { |
| "epoch": 7.235590374930051, |
| "grad_norm": 0.13764233887195587, |
| "learning_rate": 3.0446533483712304e-05, |
| "loss": 0.0088, |
| "step": 12930 |
| }, |
| { |
| "epoch": 7.241186345831002, |
| "grad_norm": 0.17063380777835846, |
| "learning_rate": 3.0370470772505433e-05, |
| "loss": 0.0073, |
| "step": 12940 |
| }, |
| { |
| "epoch": 7.246782316731953, |
| "grad_norm": 0.11198591440916061, |
| "learning_rate": 3.0294461727681932e-05, |
| "loss": 0.0112, |
| "step": 12950 |
| }, |
| { |
| "epoch": 7.252378287632904, |
| "grad_norm": 0.1855844408273697, |
| "learning_rate": 3.0218506557047598e-05, |
| "loss": 0.0069, |
| "step": 12960 |
| }, |
| { |
| "epoch": 7.257974258533856, |
| "grad_norm": 0.10013962537050247, |
| "learning_rate": 3.0142605468260978e-05, |
| "loss": 0.0063, |
| "step": 12970 |
| }, |
| { |
| "epoch": 7.263570229434807, |
| "grad_norm": 0.16480940580368042, |
| "learning_rate": 3.006675866883275e-05, |
| "loss": 0.0062, |
| "step": 12980 |
| }, |
| { |
| "epoch": 7.269166200335758, |
| "grad_norm": 0.2087039351463318, |
| "learning_rate": 2.999096636612518e-05, |
| "loss": 0.0085, |
| "step": 12990 |
| }, |
| { |
| "epoch": 7.274762171236709, |
| "grad_norm": 0.15215320885181427, |
| "learning_rate": 2.991522876735154e-05, |
| "loss": 0.0077, |
| "step": 13000 |
| }, |
| { |
| "epoch": 7.280358142137661, |
| "grad_norm": 0.2687567472457886, |
| "learning_rate": 2.9839546079575497e-05, |
| "loss": 0.0105, |
| "step": 13010 |
| }, |
| { |
| "epoch": 7.2859541130386125, |
| "grad_norm": 0.23126524686813354, |
| "learning_rate": 2.976391850971065e-05, |
| "loss": 0.0076, |
| "step": 13020 |
| }, |
| { |
| "epoch": 7.291550083939564, |
| "grad_norm": 0.10021013021469116, |
| "learning_rate": 2.9688346264519866e-05, |
| "loss": 0.01, |
| "step": 13030 |
| }, |
| { |
| "epoch": 7.297146054840515, |
| "grad_norm": 0.16525714099407196, |
| "learning_rate": 2.9612829550614836e-05, |
| "loss": 0.0082, |
| "step": 13040 |
| }, |
| { |
| "epoch": 7.302742025741466, |
| "grad_norm": 0.16742092370986938, |
| "learning_rate": 2.9537368574455304e-05, |
| "loss": 0.0141, |
| "step": 13050 |
| }, |
| { |
| "epoch": 7.308337996642417, |
| "grad_norm": 0.07409677654504776, |
| "learning_rate": 2.9461963542348737e-05, |
| "loss": 0.0083, |
| "step": 13060 |
| }, |
| { |
| "epoch": 7.3139339675433686, |
| "grad_norm": 0.2794577181339264, |
| "learning_rate": 2.9386614660449596e-05, |
| "loss": 0.0091, |
| "step": 13070 |
| }, |
| { |
| "epoch": 7.31952993844432, |
| "grad_norm": 0.16768626868724823, |
| "learning_rate": 2.931132213475884e-05, |
| "loss": 0.0128, |
| "step": 13080 |
| }, |
| { |
| "epoch": 7.325125909345271, |
| "grad_norm": 0.19670413434505463, |
| "learning_rate": 2.9236086171123404e-05, |
| "loss": 0.0058, |
| "step": 13090 |
| }, |
| { |
| "epoch": 7.330721880246223, |
| "grad_norm": 0.1663038730621338, |
| "learning_rate": 2.916090697523549e-05, |
| "loss": 0.0081, |
| "step": 13100 |
| }, |
| { |
| "epoch": 7.336317851147174, |
| "grad_norm": 0.2468092292547226, |
| "learning_rate": 2.9085784752632157e-05, |
| "loss": 0.0094, |
| "step": 13110 |
| }, |
| { |
| "epoch": 7.3419138220481255, |
| "grad_norm": 0.20476868748664856, |
| "learning_rate": 2.9010719708694722e-05, |
| "loss": 0.0095, |
| "step": 13120 |
| }, |
| { |
| "epoch": 7.347509792949077, |
| "grad_norm": 0.19373807311058044, |
| "learning_rate": 2.8935712048648112e-05, |
| "loss": 0.0077, |
| "step": 13130 |
| }, |
| { |
| "epoch": 7.353105763850028, |
| "grad_norm": 0.16226400434970856, |
| "learning_rate": 2.8860761977560436e-05, |
| "loss": 0.0105, |
| "step": 13140 |
| }, |
| { |
| "epoch": 7.358701734750979, |
| "grad_norm": 0.2760455906391144, |
| "learning_rate": 2.878586970034232e-05, |
| "loss": 0.017, |
| "step": 13150 |
| }, |
| { |
| "epoch": 7.36429770565193, |
| "grad_norm": 0.269136518239975, |
| "learning_rate": 2.8711035421746367e-05, |
| "loss": 0.0127, |
| "step": 13160 |
| }, |
| { |
| "epoch": 7.3698936765528815, |
| "grad_norm": 0.2237207144498825, |
| "learning_rate": 2.8636259346366666e-05, |
| "loss": 0.007, |
| "step": 13170 |
| }, |
| { |
| "epoch": 7.375489647453834, |
| "grad_norm": 0.1836055964231491, |
| "learning_rate": 2.8561541678638142e-05, |
| "loss": 0.0077, |
| "step": 13180 |
| }, |
| { |
| "epoch": 7.381085618354785, |
| "grad_norm": 0.1962578445672989, |
| "learning_rate": 2.8486882622836026e-05, |
| "loss": 0.0078, |
| "step": 13190 |
| }, |
| { |
| "epoch": 7.386681589255736, |
| "grad_norm": 0.16476459801197052, |
| "learning_rate": 2.8412282383075363e-05, |
| "loss": 0.0093, |
| "step": 13200 |
| }, |
| { |
| "epoch": 7.392277560156687, |
| "grad_norm": 0.17988111078739166, |
| "learning_rate": 2.8337741163310317e-05, |
| "loss": 0.0081, |
| "step": 13210 |
| }, |
| { |
| "epoch": 7.3978735310576385, |
| "grad_norm": 0.21751411259174347, |
| "learning_rate": 2.8263259167333777e-05, |
| "loss": 0.0092, |
| "step": 13220 |
| }, |
| { |
| "epoch": 7.40346950195859, |
| "grad_norm": 0.150657057762146, |
| "learning_rate": 2.8188836598776662e-05, |
| "loss": 0.0094, |
| "step": 13230 |
| }, |
| { |
| "epoch": 7.409065472859541, |
| "grad_norm": 0.16722621023654938, |
| "learning_rate": 2.811447366110741e-05, |
| "loss": 0.0074, |
| "step": 13240 |
| }, |
| { |
| "epoch": 7.414661443760492, |
| "grad_norm": 0.16167713701725006, |
| "learning_rate": 2.804017055763149e-05, |
| "loss": 0.0063, |
| "step": 13250 |
| }, |
| { |
| "epoch": 7.420257414661444, |
| "grad_norm": 0.07585649192333221, |
| "learning_rate": 2.7965927491490705e-05, |
| "loss": 0.0112, |
| "step": 13260 |
| }, |
| { |
| "epoch": 7.425853385562395, |
| "grad_norm": 0.19306915998458862, |
| "learning_rate": 2.7891744665662823e-05, |
| "loss": 0.0069, |
| "step": 13270 |
| }, |
| { |
| "epoch": 7.431449356463347, |
| "grad_norm": 0.23972170054912567, |
| "learning_rate": 2.7817622282960815e-05, |
| "loss": 0.0062, |
| "step": 13280 |
| }, |
| { |
| "epoch": 7.437045327364298, |
| "grad_norm": 0.15592247247695923, |
| "learning_rate": 2.774356054603243e-05, |
| "loss": 0.0055, |
| "step": 13290 |
| }, |
| { |
| "epoch": 7.442641298265249, |
| "grad_norm": 0.20682460069656372, |
| "learning_rate": 2.766955965735968e-05, |
| "loss": 0.0052, |
| "step": 13300 |
| }, |
| { |
| "epoch": 7.4482372691662, |
| "grad_norm": 0.09251468628644943, |
| "learning_rate": 2.7595619819258116e-05, |
| "loss": 0.0077, |
| "step": 13310 |
| }, |
| { |
| "epoch": 7.453833240067151, |
| "grad_norm": 0.1358599066734314, |
| "learning_rate": 2.7521741233876496e-05, |
| "loss": 0.0098, |
| "step": 13320 |
| }, |
| { |
| "epoch": 7.459429210968103, |
| "grad_norm": 0.10552109777927399, |
| "learning_rate": 2.7447924103195976e-05, |
| "loss": 0.0045, |
| "step": 13330 |
| }, |
| { |
| "epoch": 7.465025181869054, |
| "grad_norm": 0.22331656515598297, |
| "learning_rate": 2.7374168629029813e-05, |
| "loss": 0.0075, |
| "step": 13340 |
| }, |
| { |
| "epoch": 7.470621152770006, |
| "grad_norm": 0.25520750880241394, |
| "learning_rate": 2.7300475013022663e-05, |
| "loss": 0.0079, |
| "step": 13350 |
| }, |
| { |
| "epoch": 7.476217123670957, |
| "grad_norm": 0.3160042464733124, |
| "learning_rate": 2.7226843456650037e-05, |
| "loss": 0.0123, |
| "step": 13360 |
| }, |
| { |
| "epoch": 7.481813094571908, |
| "grad_norm": 0.1619534194469452, |
| "learning_rate": 2.7153274161217846e-05, |
| "loss": 0.0049, |
| "step": 13370 |
| }, |
| { |
| "epoch": 7.48740906547286, |
| "grad_norm": 0.3031173646450043, |
| "learning_rate": 2.707976732786166e-05, |
| "loss": 0.0098, |
| "step": 13380 |
| }, |
| { |
| "epoch": 7.493005036373811, |
| "grad_norm": 0.1819227635860443, |
| "learning_rate": 2.7006323157546386e-05, |
| "loss": 0.0065, |
| "step": 13390 |
| }, |
| { |
| "epoch": 7.498601007274762, |
| "grad_norm": 0.17307765781879425, |
| "learning_rate": 2.693294185106562e-05, |
| "loss": 0.0087, |
| "step": 13400 |
| }, |
| { |
| "epoch": 7.504196978175713, |
| "grad_norm": 0.1600845456123352, |
| "learning_rate": 2.6859623609040984e-05, |
| "loss": 0.0061, |
| "step": 13410 |
| }, |
| { |
| "epoch": 7.509792949076665, |
| "grad_norm": 0.21853172779083252, |
| "learning_rate": 2.6786368631921836e-05, |
| "loss": 0.0054, |
| "step": 13420 |
| }, |
| { |
| "epoch": 7.5153889199776165, |
| "grad_norm": 0.16434265673160553, |
| "learning_rate": 2.67131771199844e-05, |
| "loss": 0.0104, |
| "step": 13430 |
| }, |
| { |
| "epoch": 7.520984890878568, |
| "grad_norm": 0.1688595563173294, |
| "learning_rate": 2.6640049273331515e-05, |
| "loss": 0.0068, |
| "step": 13440 |
| }, |
| { |
| "epoch": 7.526580861779519, |
| "grad_norm": 0.10968342423439026, |
| "learning_rate": 2.656698529189193e-05, |
| "loss": 0.0072, |
| "step": 13450 |
| }, |
| { |
| "epoch": 7.53217683268047, |
| "grad_norm": 0.12489527463912964, |
| "learning_rate": 2.6493985375419778e-05, |
| "loss": 0.0067, |
| "step": 13460 |
| }, |
| { |
| "epoch": 7.537772803581421, |
| "grad_norm": 0.3275364935398102, |
| "learning_rate": 2.642104972349403e-05, |
| "loss": 0.0066, |
| "step": 13470 |
| }, |
| { |
| "epoch": 7.5433687744823725, |
| "grad_norm": 0.10653702169656754, |
| "learning_rate": 2.6348178535517966e-05, |
| "loss": 0.0133, |
| "step": 13480 |
| }, |
| { |
| "epoch": 7.548964745383324, |
| "grad_norm": 0.16446645557880402, |
| "learning_rate": 2.6275372010718635e-05, |
| "loss": 0.0075, |
| "step": 13490 |
| }, |
| { |
| "epoch": 7.554560716284275, |
| "grad_norm": 0.17610448598861694, |
| "learning_rate": 2.6202630348146324e-05, |
| "loss": 0.0077, |
| "step": 13500 |
| }, |
| { |
| "epoch": 7.560156687185227, |
| "grad_norm": 0.1589246541261673, |
| "learning_rate": 2.612995374667394e-05, |
| "loss": 0.0044, |
| "step": 13510 |
| }, |
| { |
| "epoch": 7.565752658086178, |
| "grad_norm": 0.3019932806491852, |
| "learning_rate": 2.6057342404996522e-05, |
| "loss": 0.0067, |
| "step": 13520 |
| }, |
| { |
| "epoch": 7.5713486289871295, |
| "grad_norm": 0.19549022614955902, |
| "learning_rate": 2.5984796521630737e-05, |
| "loss": 0.0083, |
| "step": 13530 |
| }, |
| { |
| "epoch": 7.576944599888081, |
| "grad_norm": 0.1532057523727417, |
| "learning_rate": 2.591231629491423e-05, |
| "loss": 0.0043, |
| "step": 13540 |
| }, |
| { |
| "epoch": 7.582540570789032, |
| "grad_norm": 0.1547580510377884, |
| "learning_rate": 2.5839901923005205e-05, |
| "loss": 0.0083, |
| "step": 13550 |
| }, |
| { |
| "epoch": 7.588136541689983, |
| "grad_norm": 0.30122992396354675, |
| "learning_rate": 2.5767553603881767e-05, |
| "loss": 0.0064, |
| "step": 13560 |
| }, |
| { |
| "epoch": 7.593732512590934, |
| "grad_norm": 0.12354984134435654, |
| "learning_rate": 2.5695271535341443e-05, |
| "loss": 0.0059, |
| "step": 13570 |
| }, |
| { |
| "epoch": 7.5993284834918855, |
| "grad_norm": 0.14805443584918976, |
| "learning_rate": 2.562305591500069e-05, |
| "loss": 0.0072, |
| "step": 13580 |
| }, |
| { |
| "epoch": 7.604924454392837, |
| "grad_norm": 0.15644380450248718, |
| "learning_rate": 2.555090694029421e-05, |
| "loss": 0.0076, |
| "step": 13590 |
| }, |
| { |
| "epoch": 7.610520425293789, |
| "grad_norm": 0.22504927217960358, |
| "learning_rate": 2.547882480847461e-05, |
| "loss": 0.0114, |
| "step": 13600 |
| }, |
| { |
| "epoch": 7.61611639619474, |
| "grad_norm": 0.10872774571180344, |
| "learning_rate": 2.540680971661161e-05, |
| "loss": 0.0098, |
| "step": 13610 |
| }, |
| { |
| "epoch": 7.621712367095691, |
| "grad_norm": 0.1415761411190033, |
| "learning_rate": 2.5334861861591753e-05, |
| "loss": 0.0059, |
| "step": 13620 |
| }, |
| { |
| "epoch": 7.627308337996642, |
| "grad_norm": 0.18380744755268097, |
| "learning_rate": 2.526298144011775e-05, |
| "loss": 0.0074, |
| "step": 13630 |
| }, |
| { |
| "epoch": 7.632904308897594, |
| "grad_norm": 0.13029605150222778, |
| "learning_rate": 2.5191168648707887e-05, |
| "loss": 0.0046, |
| "step": 13640 |
| }, |
| { |
| "epoch": 7.638500279798545, |
| "grad_norm": 0.11022605746984482, |
| "learning_rate": 2.511942368369566e-05, |
| "loss": 0.0052, |
| "step": 13650 |
| }, |
| { |
| "epoch": 7.644096250699496, |
| "grad_norm": 0.1933964192867279, |
| "learning_rate": 2.5047746741228978e-05, |
| "loss": 0.0062, |
| "step": 13660 |
| }, |
| { |
| "epoch": 7.649692221600448, |
| "grad_norm": 0.10140606015920639, |
| "learning_rate": 2.4976138017269908e-05, |
| "loss": 0.005, |
| "step": 13670 |
| }, |
| { |
| "epoch": 7.655288192501399, |
| "grad_norm": 0.1074545681476593, |
| "learning_rate": 2.490459770759398e-05, |
| "loss": 0.0081, |
| "step": 13680 |
| }, |
| { |
| "epoch": 7.660884163402351, |
| "grad_norm": 0.11866219341754913, |
| "learning_rate": 2.4833126007789653e-05, |
| "loss": 0.0063, |
| "step": 13690 |
| }, |
| { |
| "epoch": 7.666480134303302, |
| "grad_norm": 0.14528554677963257, |
| "learning_rate": 2.476172311325783e-05, |
| "loss": 0.0075, |
| "step": 13700 |
| }, |
| { |
| "epoch": 7.672076105204253, |
| "grad_norm": 0.12533891201019287, |
| "learning_rate": 2.4690389219211273e-05, |
| "loss": 0.0056, |
| "step": 13710 |
| }, |
| { |
| "epoch": 7.677672076105204, |
| "grad_norm": 0.2228127419948578, |
| "learning_rate": 2.4619124520674146e-05, |
| "loss": 0.007, |
| "step": 13720 |
| }, |
| { |
| "epoch": 7.683268047006155, |
| "grad_norm": 0.167043074965477, |
| "learning_rate": 2.4547929212481435e-05, |
| "loss": 0.0092, |
| "step": 13730 |
| }, |
| { |
| "epoch": 7.688864017907107, |
| "grad_norm": 0.1956396847963333, |
| "learning_rate": 2.447680348927837e-05, |
| "loss": 0.0104, |
| "step": 13740 |
| }, |
| { |
| "epoch": 7.694459988808058, |
| "grad_norm": 0.3440028429031372, |
| "learning_rate": 2.4405747545519963e-05, |
| "loss": 0.0101, |
| "step": 13750 |
| }, |
| { |
| "epoch": 7.70005595970901, |
| "grad_norm": 0.19462288916110992, |
| "learning_rate": 2.433476157547044e-05, |
| "loss": 0.0123, |
| "step": 13760 |
| }, |
| { |
| "epoch": 7.705651930609961, |
| "grad_norm": 0.2774219512939453, |
| "learning_rate": 2.4263845773202736e-05, |
| "loss": 0.012, |
| "step": 13770 |
| }, |
| { |
| "epoch": 7.711247901510912, |
| "grad_norm": 0.15917648375034332, |
| "learning_rate": 2.419300033259798e-05, |
| "loss": 0.0072, |
| "step": 13780 |
| }, |
| { |
| "epoch": 7.7168438724118635, |
| "grad_norm": 0.17087779939174652, |
| "learning_rate": 2.4122225447344875e-05, |
| "loss": 0.0051, |
| "step": 13790 |
| }, |
| { |
| "epoch": 7.722439843312815, |
| "grad_norm": 0.3049764931201935, |
| "learning_rate": 2.405152131093926e-05, |
| "loss": 0.0068, |
| "step": 13800 |
| }, |
| { |
| "epoch": 7.728035814213766, |
| "grad_norm": 0.23013077676296234, |
| "learning_rate": 2.3980888116683515e-05, |
| "loss": 0.0093, |
| "step": 13810 |
| }, |
| { |
| "epoch": 7.733631785114717, |
| "grad_norm": 0.25196191668510437, |
| "learning_rate": 2.3910326057686127e-05, |
| "loss": 0.0063, |
| "step": 13820 |
| }, |
| { |
| "epoch": 7.739227756015668, |
| "grad_norm": 0.13192011415958405, |
| "learning_rate": 2.3839835326861104e-05, |
| "loss": 0.0077, |
| "step": 13830 |
| }, |
| { |
| "epoch": 7.74482372691662, |
| "grad_norm": 0.14442972838878632, |
| "learning_rate": 2.3769416116927335e-05, |
| "loss": 0.0131, |
| "step": 13840 |
| }, |
| { |
| "epoch": 7.750419697817572, |
| "grad_norm": 0.1425463706254959, |
| "learning_rate": 2.3699068620408304e-05, |
| "loss": 0.0066, |
| "step": 13850 |
| }, |
| { |
| "epoch": 7.756015668718523, |
| "grad_norm": 0.1162482276558876, |
| "learning_rate": 2.362879302963135e-05, |
| "loss": 0.007, |
| "step": 13860 |
| }, |
| { |
| "epoch": 7.761611639619474, |
| "grad_norm": 0.21869398653507233, |
| "learning_rate": 2.3558589536727277e-05, |
| "loss": 0.0045, |
| "step": 13870 |
| }, |
| { |
| "epoch": 7.767207610520425, |
| "grad_norm": 0.1804109364748001, |
| "learning_rate": 2.3488458333629777e-05, |
| "loss": 0.0064, |
| "step": 13880 |
| }, |
| { |
| "epoch": 7.7728035814213765, |
| "grad_norm": 0.18711616098880768, |
| "learning_rate": 2.341839961207482e-05, |
| "loss": 0.0082, |
| "step": 13890 |
| }, |
| { |
| "epoch": 7.778399552322328, |
| "grad_norm": 0.17115071415901184, |
| "learning_rate": 2.3348413563600325e-05, |
| "loss": 0.008, |
| "step": 13900 |
| }, |
| { |
| "epoch": 7.783995523223279, |
| "grad_norm": 0.3199642300605774, |
| "learning_rate": 2.3278500379545436e-05, |
| "loss": 0.008, |
| "step": 13910 |
| }, |
| { |
| "epoch": 7.789591494124231, |
| "grad_norm": 0.16800075769424438, |
| "learning_rate": 2.3208660251050158e-05, |
| "loss": 0.0054, |
| "step": 13920 |
| }, |
| { |
| "epoch": 7.795187465025182, |
| "grad_norm": 0.11445470154285431, |
| "learning_rate": 2.3138893369054766e-05, |
| "loss": 0.0067, |
| "step": 13930 |
| }, |
| { |
| "epoch": 7.800783435926133, |
| "grad_norm": 0.1465342938899994, |
| "learning_rate": 2.3069199924299174e-05, |
| "loss": 0.0046, |
| "step": 13940 |
| }, |
| { |
| "epoch": 7.806379406827085, |
| "grad_norm": 0.10726216435432434, |
| "learning_rate": 2.2999580107322653e-05, |
| "loss": 0.013, |
| "step": 13950 |
| }, |
| { |
| "epoch": 7.811975377728036, |
| "grad_norm": 0.2467944324016571, |
| "learning_rate": 2.29300341084631e-05, |
| "loss": 0.006, |
| "step": 13960 |
| }, |
| { |
| "epoch": 7.817571348628987, |
| "grad_norm": 0.18158167600631714, |
| "learning_rate": 2.2860562117856647e-05, |
| "loss": 0.0065, |
| "step": 13970 |
| }, |
| { |
| "epoch": 7.823167319529938, |
| "grad_norm": 0.1618615835905075, |
| "learning_rate": 2.279116432543705e-05, |
| "loss": 0.0065, |
| "step": 13980 |
| }, |
| { |
| "epoch": 7.8287632904308895, |
| "grad_norm": 0.1069146990776062, |
| "learning_rate": 2.2721840920935196e-05, |
| "loss": 0.0105, |
| "step": 13990 |
| }, |
| { |
| "epoch": 7.834359261331841, |
| "grad_norm": 0.12003065645694733, |
| "learning_rate": 2.2652592093878666e-05, |
| "loss": 0.0049, |
| "step": 14000 |
| }, |
| { |
| "epoch": 7.839955232232793, |
| "grad_norm": 0.09423186630010605, |
| "learning_rate": 2.258341803359108e-05, |
| "loss": 0.0061, |
| "step": 14010 |
| }, |
| { |
| "epoch": 7.845551203133744, |
| "grad_norm": 0.35245028138160706, |
| "learning_rate": 2.251431892919171e-05, |
| "loss": 0.0091, |
| "step": 14020 |
| }, |
| { |
| "epoch": 7.851147174034695, |
| "grad_norm": 0.11108125001192093, |
| "learning_rate": 2.2445294969594844e-05, |
| "loss": 0.007, |
| "step": 14030 |
| }, |
| { |
| "epoch": 7.856743144935646, |
| "grad_norm": 0.10527674853801727, |
| "learning_rate": 2.237634634350934e-05, |
| "loss": 0.0042, |
| "step": 14040 |
| }, |
| { |
| "epoch": 7.862339115836598, |
| "grad_norm": 0.2263229489326477, |
| "learning_rate": 2.2307473239438154e-05, |
| "loss": 0.0056, |
| "step": 14050 |
| }, |
| { |
| "epoch": 7.867935086737549, |
| "grad_norm": 0.13221915066242218, |
| "learning_rate": 2.2238675845677663e-05, |
| "loss": 0.0068, |
| "step": 14060 |
| }, |
| { |
| "epoch": 7.8735310576385, |
| "grad_norm": 0.17508424818515778, |
| "learning_rate": 2.2169954350317374e-05, |
| "loss": 0.007, |
| "step": 14070 |
| }, |
| { |
| "epoch": 7.879127028539451, |
| "grad_norm": 0.24999241530895233, |
| "learning_rate": 2.2101308941239203e-05, |
| "loss": 0.0085, |
| "step": 14080 |
| }, |
| { |
| "epoch": 7.8847229994404024, |
| "grad_norm": 0.12810635566711426, |
| "learning_rate": 2.2032739806117058e-05, |
| "loss": 0.0084, |
| "step": 14090 |
| }, |
| { |
| "epoch": 7.8903189703413545, |
| "grad_norm": 0.22745615243911743, |
| "learning_rate": 2.196424713241637e-05, |
| "loss": 0.0145, |
| "step": 14100 |
| }, |
| { |
| "epoch": 7.895914941242306, |
| "grad_norm": 0.0886574536561966, |
| "learning_rate": 2.1895831107393484e-05, |
| "loss": 0.0071, |
| "step": 14110 |
| }, |
| { |
| "epoch": 7.901510912143257, |
| "grad_norm": 0.18623238801956177, |
| "learning_rate": 2.182749191809518e-05, |
| "loss": 0.0077, |
| "step": 14120 |
| }, |
| { |
| "epoch": 7.907106883044208, |
| "grad_norm": 0.20176784694194794, |
| "learning_rate": 2.1759229751358217e-05, |
| "loss": 0.008, |
| "step": 14130 |
| }, |
| { |
| "epoch": 7.912702853945159, |
| "grad_norm": 0.18935443460941315, |
| "learning_rate": 2.1691044793808734e-05, |
| "loss": 0.0069, |
| "step": 14140 |
| }, |
| { |
| "epoch": 7.918298824846111, |
| "grad_norm": 0.18812550604343414, |
| "learning_rate": 2.1622937231861822e-05, |
| "loss": 0.0051, |
| "step": 14150 |
| }, |
| { |
| "epoch": 7.923894795747062, |
| "grad_norm": 0.12224578857421875, |
| "learning_rate": 2.1554907251720945e-05, |
| "loss": 0.0053, |
| "step": 14160 |
| }, |
| { |
| "epoch": 7.929490766648014, |
| "grad_norm": 0.12175440043210983, |
| "learning_rate": 2.148695503937745e-05, |
| "loss": 0.0075, |
| "step": 14170 |
| }, |
| { |
| "epoch": 7.935086737548965, |
| "grad_norm": 0.11878049373626709, |
| "learning_rate": 2.1419080780610123e-05, |
| "loss": 0.0062, |
| "step": 14180 |
| }, |
| { |
| "epoch": 7.940682708449916, |
| "grad_norm": 0.19284716248512268, |
| "learning_rate": 2.1351284660984572e-05, |
| "loss": 0.0063, |
| "step": 14190 |
| }, |
| { |
| "epoch": 7.9462786793508675, |
| "grad_norm": 0.159319207072258, |
| "learning_rate": 2.128356686585282e-05, |
| "loss": 0.0064, |
| "step": 14200 |
| }, |
| { |
| "epoch": 7.951874650251819, |
| "grad_norm": 0.16800148785114288, |
| "learning_rate": 2.121592758035273e-05, |
| "loss": 0.0054, |
| "step": 14210 |
| }, |
| { |
| "epoch": 7.95747062115277, |
| "grad_norm": 0.23277972638607025, |
| "learning_rate": 2.1148366989407496e-05, |
| "loss": 0.0056, |
| "step": 14220 |
| }, |
| { |
| "epoch": 7.963066592053721, |
| "grad_norm": 0.08594591915607452, |
| "learning_rate": 2.1080885277725236e-05, |
| "loss": 0.0054, |
| "step": 14230 |
| }, |
| { |
| "epoch": 7.968662562954672, |
| "grad_norm": 0.21676327288150787, |
| "learning_rate": 2.1013482629798333e-05, |
| "loss": 0.0071, |
| "step": 14240 |
| }, |
| { |
| "epoch": 7.9742585338556236, |
| "grad_norm": 0.1778232604265213, |
| "learning_rate": 2.094615922990309e-05, |
| "loss": 0.0067, |
| "step": 14250 |
| }, |
| { |
| "epoch": 7.979854504756576, |
| "grad_norm": 0.2177736759185791, |
| "learning_rate": 2.0878915262099098e-05, |
| "loss": 0.0068, |
| "step": 14260 |
| }, |
| { |
| "epoch": 7.985450475657527, |
| "grad_norm": 0.25127291679382324, |
| "learning_rate": 2.0811750910228774e-05, |
| "loss": 0.0104, |
| "step": 14270 |
| }, |
| { |
| "epoch": 7.991046446558478, |
| "grad_norm": 0.08792544901371002, |
| "learning_rate": 2.0744666357916925e-05, |
| "loss": 0.0064, |
| "step": 14280 |
| }, |
| { |
| "epoch": 7.996642417459429, |
| "grad_norm": 0.1125119999051094, |
| "learning_rate": 2.067766178857013e-05, |
| "loss": 0.0099, |
| "step": 14290 |
| }, |
| { |
| "epoch": 8.00223838836038, |
| "grad_norm": 0.18561410903930664, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 0.0089, |
| "step": 14300 |
| }, |
| { |
| "epoch": 8.007834359261333, |
| "grad_norm": 0.10987678915262222, |
| "learning_rate": 2.0543893331304333e-05, |
| "loss": 0.0071, |
| "step": 14310 |
| }, |
| { |
| "epoch": 8.013430330162283, |
| "grad_norm": 0.10636857897043228, |
| "learning_rate": 2.0477129809103147e-05, |
| "loss": 0.007, |
| "step": 14320 |
| }, |
| { |
| "epoch": 8.019026301063235, |
| "grad_norm": 0.16379332542419434, |
| "learning_rate": 2.0410447001301753e-05, |
| "loss": 0.006, |
| "step": 14330 |
| }, |
| { |
| "epoch": 8.024622271964185, |
| "grad_norm": 0.09951362758874893, |
| "learning_rate": 2.0343845090208368e-05, |
| "loss": 0.0052, |
| "step": 14340 |
| }, |
| { |
| "epoch": 8.030218242865137, |
| "grad_norm": 0.1974375694990158, |
| "learning_rate": 2.0277324257910106e-05, |
| "loss": 0.0061, |
| "step": 14350 |
| }, |
| { |
| "epoch": 8.035814213766088, |
| "grad_norm": 0.16213521361351013, |
| "learning_rate": 2.0210884686272368e-05, |
| "loss": 0.0056, |
| "step": 14360 |
| }, |
| { |
| "epoch": 8.04141018466704, |
| "grad_norm": 0.32907333970069885, |
| "learning_rate": 2.0144526556938387e-05, |
| "loss": 0.011, |
| "step": 14370 |
| }, |
| { |
| "epoch": 8.047006155567992, |
| "grad_norm": 0.24763990938663483, |
| "learning_rate": 2.0078250051328784e-05, |
| "loss": 0.0059, |
| "step": 14380 |
| }, |
| { |
| "epoch": 8.052602126468942, |
| "grad_norm": 0.06522991508245468, |
| "learning_rate": 2.0012055350640986e-05, |
| "loss": 0.0075, |
| "step": 14390 |
| }, |
| { |
| "epoch": 8.058198097369894, |
| "grad_norm": 0.1594466120004654, |
| "learning_rate": 1.9945942635848748e-05, |
| "loss": 0.0107, |
| "step": 14400 |
| }, |
| { |
| "epoch": 8.063794068270845, |
| "grad_norm": 0.11248297244310379, |
| "learning_rate": 1.9879912087701753e-05, |
| "loss": 0.0043, |
| "step": 14410 |
| }, |
| { |
| "epoch": 8.069390039171797, |
| "grad_norm": 0.11491246521472931, |
| "learning_rate": 1.981396388672496e-05, |
| "loss": 0.0043, |
| "step": 14420 |
| }, |
| { |
| "epoch": 8.074986010072747, |
| "grad_norm": 0.22106263041496277, |
| "learning_rate": 1.974809821321827e-05, |
| "loss": 0.0055, |
| "step": 14430 |
| }, |
| { |
| "epoch": 8.0805819809737, |
| "grad_norm": 0.16226910054683685, |
| "learning_rate": 1.9682315247255894e-05, |
| "loss": 0.0085, |
| "step": 14440 |
| }, |
| { |
| "epoch": 8.08617795187465, |
| "grad_norm": 0.09066546708345413, |
| "learning_rate": 1.9616615168685943e-05, |
| "loss": 0.0083, |
| "step": 14450 |
| }, |
| { |
| "epoch": 8.091773922775602, |
| "grad_norm": 0.11933751404285431, |
| "learning_rate": 1.9550998157129946e-05, |
| "loss": 0.0057, |
| "step": 14460 |
| }, |
| { |
| "epoch": 8.097369893676554, |
| "grad_norm": 0.1404096931219101, |
| "learning_rate": 1.9485464391982284e-05, |
| "loss": 0.0047, |
| "step": 14470 |
| }, |
| { |
| "epoch": 8.102965864577504, |
| "grad_norm": 0.2508150339126587, |
| "learning_rate": 1.942001405240979e-05, |
| "loss": 0.0076, |
| "step": 14480 |
| }, |
| { |
| "epoch": 8.108561835478456, |
| "grad_norm": 0.17527352273464203, |
| "learning_rate": 1.9354647317351188e-05, |
| "loss": 0.0077, |
| "step": 14490 |
| }, |
| { |
| "epoch": 8.114157806379406, |
| "grad_norm": 0.11819542944431305, |
| "learning_rate": 1.928936436551661e-05, |
| "loss": 0.0048, |
| "step": 14500 |
| }, |
| { |
| "epoch": 8.119753777280359, |
| "grad_norm": 0.17159508168697357, |
| "learning_rate": 1.9224165375387193e-05, |
| "loss": 0.0072, |
| "step": 14510 |
| }, |
| { |
| "epoch": 8.125349748181309, |
| "grad_norm": 0.1392519325017929, |
| "learning_rate": 1.9159050525214452e-05, |
| "loss": 0.0058, |
| "step": 14520 |
| }, |
| { |
| "epoch": 8.130945719082261, |
| "grad_norm": 0.2096053957939148, |
| "learning_rate": 1.909401999301993e-05, |
| "loss": 0.007, |
| "step": 14530 |
| }, |
| { |
| "epoch": 8.136541689983211, |
| "grad_norm": 0.2075774371623993, |
| "learning_rate": 1.9029073956594606e-05, |
| "loss": 0.0063, |
| "step": 14540 |
| }, |
| { |
| "epoch": 8.142137660884163, |
| "grad_norm": 0.0607825368642807, |
| "learning_rate": 1.8964212593498442e-05, |
| "loss": 0.0046, |
| "step": 14550 |
| }, |
| { |
| "epoch": 8.147733631785115, |
| "grad_norm": 0.20028991997241974, |
| "learning_rate": 1.8899436081059975e-05, |
| "loss": 0.0067, |
| "step": 14560 |
| }, |
| { |
| "epoch": 8.153329602686066, |
| "grad_norm": 0.12437421083450317, |
| "learning_rate": 1.8834744596375666e-05, |
| "loss": 0.0045, |
| "step": 14570 |
| }, |
| { |
| "epoch": 8.158925573587018, |
| "grad_norm": 0.09412521868944168, |
| "learning_rate": 1.877013831630961e-05, |
| "loss": 0.0053, |
| "step": 14580 |
| }, |
| { |
| "epoch": 8.164521544487968, |
| "grad_norm": 0.30078214406967163, |
| "learning_rate": 1.8705617417492883e-05, |
| "loss": 0.0088, |
| "step": 14590 |
| }, |
| { |
| "epoch": 8.17011751538892, |
| "grad_norm": 0.2020367681980133, |
| "learning_rate": 1.8641182076323148e-05, |
| "loss": 0.0074, |
| "step": 14600 |
| }, |
| { |
| "epoch": 8.17571348628987, |
| "grad_norm": 0.12557435035705566, |
| "learning_rate": 1.85768324689642e-05, |
| "loss": 0.0054, |
| "step": 14610 |
| }, |
| { |
| "epoch": 8.181309457190823, |
| "grad_norm": 0.11945895105600357, |
| "learning_rate": 1.851256877134538e-05, |
| "loss": 0.0078, |
| "step": 14620 |
| }, |
| { |
| "epoch": 8.186905428091775, |
| "grad_norm": 0.33773839473724365, |
| "learning_rate": 1.8448391159161204e-05, |
| "loss": 0.0101, |
| "step": 14630 |
| }, |
| { |
| "epoch": 8.192501398992725, |
| "grad_norm": 0.2184380739927292, |
| "learning_rate": 1.838429980787081e-05, |
| "loss": 0.0059, |
| "step": 14640 |
| }, |
| { |
| "epoch": 8.198097369893677, |
| "grad_norm": 0.06359529495239258, |
| "learning_rate": 1.8320294892697478e-05, |
| "loss": 0.006, |
| "step": 14650 |
| }, |
| { |
| "epoch": 8.203693340794628, |
| "grad_norm": 0.1690957248210907, |
| "learning_rate": 1.8256376588628238e-05, |
| "loss": 0.008, |
| "step": 14660 |
| }, |
| { |
| "epoch": 8.20928931169558, |
| "grad_norm": 0.296812504529953, |
| "learning_rate": 1.8192545070413282e-05, |
| "loss": 0.0069, |
| "step": 14670 |
| }, |
| { |
| "epoch": 8.21488528259653, |
| "grad_norm": 0.08360179513692856, |
| "learning_rate": 1.8128800512565513e-05, |
| "loss": 0.007, |
| "step": 14680 |
| }, |
| { |
| "epoch": 8.220481253497482, |
| "grad_norm": 0.12985661625862122, |
| "learning_rate": 1.8065143089360172e-05, |
| "loss": 0.0079, |
| "step": 14690 |
| }, |
| { |
| "epoch": 8.226077224398432, |
| "grad_norm": 0.10445982962846756, |
| "learning_rate": 1.800157297483417e-05, |
| "loss": 0.0036, |
| "step": 14700 |
| }, |
| { |
| "epoch": 8.231673195299384, |
| "grad_norm": 0.1876983791589737, |
| "learning_rate": 1.7938090342785817e-05, |
| "loss": 0.0058, |
| "step": 14710 |
| }, |
| { |
| "epoch": 8.237269166200337, |
| "grad_norm": 0.07933235913515091, |
| "learning_rate": 1.787469536677419e-05, |
| "loss": 0.0048, |
| "step": 14720 |
| }, |
| { |
| "epoch": 8.242865137101287, |
| "grad_norm": 0.2597578465938568, |
| "learning_rate": 1.7811388220118707e-05, |
| "loss": 0.0077, |
| "step": 14730 |
| }, |
| { |
| "epoch": 8.248461108002239, |
| "grad_norm": 0.1318414807319641, |
| "learning_rate": 1.774816907589873e-05, |
| "loss": 0.0038, |
| "step": 14740 |
| }, |
| { |
| "epoch": 8.25405707890319, |
| "grad_norm": 0.23657891154289246, |
| "learning_rate": 1.768503810695295e-05, |
| "loss": 0.0074, |
| "step": 14750 |
| }, |
| { |
| "epoch": 8.259653049804141, |
| "grad_norm": 0.12084835767745972, |
| "learning_rate": 1.7621995485879062e-05, |
| "loss": 0.0086, |
| "step": 14760 |
| }, |
| { |
| "epoch": 8.265249020705092, |
| "grad_norm": 0.2077346295118332, |
| "learning_rate": 1.755904138503316e-05, |
| "loss": 0.0066, |
| "step": 14770 |
| }, |
| { |
| "epoch": 8.270844991606044, |
| "grad_norm": 0.26253417134284973, |
| "learning_rate": 1.749617597652934e-05, |
| "loss": 0.0107, |
| "step": 14780 |
| }, |
| { |
| "epoch": 8.276440962506994, |
| "grad_norm": 0.25481829047203064, |
| "learning_rate": 1.743339943223926e-05, |
| "loss": 0.0044, |
| "step": 14790 |
| }, |
| { |
| "epoch": 8.282036933407946, |
| "grad_norm": 0.23157408833503723, |
| "learning_rate": 1.7370711923791567e-05, |
| "loss": 0.0069, |
| "step": 14800 |
| }, |
| { |
| "epoch": 8.287632904308898, |
| "grad_norm": 0.10085418075323105, |
| "learning_rate": 1.7308113622571544e-05, |
| "loss": 0.0036, |
| "step": 14810 |
| }, |
| { |
| "epoch": 8.293228875209849, |
| "grad_norm": 0.10876370966434479, |
| "learning_rate": 1.7245604699720535e-05, |
| "loss": 0.007, |
| "step": 14820 |
| }, |
| { |
| "epoch": 8.2988248461108, |
| "grad_norm": 0.20935757458209991, |
| "learning_rate": 1.7183185326135543e-05, |
| "loss": 0.0055, |
| "step": 14830 |
| }, |
| { |
| "epoch": 8.304420817011751, |
| "grad_norm": 0.13824748992919922, |
| "learning_rate": 1.712085567246878e-05, |
| "loss": 0.0072, |
| "step": 14840 |
| }, |
| { |
| "epoch": 8.310016787912703, |
| "grad_norm": 0.3369564414024353, |
| "learning_rate": 1.70586159091271e-05, |
| "loss": 0.0069, |
| "step": 14850 |
| }, |
| { |
| "epoch": 8.315612758813653, |
| "grad_norm": 0.2684394419193268, |
| "learning_rate": 1.699646620627168e-05, |
| "loss": 0.0061, |
| "step": 14860 |
| }, |
| { |
| "epoch": 8.321208729714606, |
| "grad_norm": 0.23020261526107788, |
| "learning_rate": 1.6934406733817414e-05, |
| "loss": 0.0126, |
| "step": 14870 |
| }, |
| { |
| "epoch": 8.326804700615558, |
| "grad_norm": 0.23905567824840546, |
| "learning_rate": 1.6872437661432517e-05, |
| "loss": 0.0057, |
| "step": 14880 |
| }, |
| { |
| "epoch": 8.332400671516508, |
| "grad_norm": 0.11183072626590729, |
| "learning_rate": 1.6810559158538092e-05, |
| "loss": 0.0061, |
| "step": 14890 |
| }, |
| { |
| "epoch": 8.33799664241746, |
| "grad_norm": 0.11450804024934769, |
| "learning_rate": 1.6748771394307585e-05, |
| "loss": 0.0041, |
| "step": 14900 |
| }, |
| { |
| "epoch": 8.34359261331841, |
| "grad_norm": 0.14276103675365448, |
| "learning_rate": 1.6687074537666398e-05, |
| "loss": 0.0046, |
| "step": 14910 |
| }, |
| { |
| "epoch": 8.349188584219362, |
| "grad_norm": 0.1129729300737381, |
| "learning_rate": 1.662546875729138e-05, |
| "loss": 0.0063, |
| "step": 14920 |
| }, |
| { |
| "epoch": 8.354784555120313, |
| "grad_norm": 0.18285100162029266, |
| "learning_rate": 1.6563954221610355e-05, |
| "loss": 0.0106, |
| "step": 14930 |
| }, |
| { |
| "epoch": 8.360380526021265, |
| "grad_norm": 0.10539596527814865, |
| "learning_rate": 1.6502531098801753e-05, |
| "loss": 0.0043, |
| "step": 14940 |
| }, |
| { |
| "epoch": 8.365976496922215, |
| "grad_norm": 0.13819168508052826, |
| "learning_rate": 1.6441199556794033e-05, |
| "loss": 0.0065, |
| "step": 14950 |
| }, |
| { |
| "epoch": 8.371572467823167, |
| "grad_norm": 0.19076746702194214, |
| "learning_rate": 1.637995976326527e-05, |
| "loss": 0.01, |
| "step": 14960 |
| }, |
| { |
| "epoch": 8.37716843872412, |
| "grad_norm": 0.24138867855072021, |
| "learning_rate": 1.631881188564275e-05, |
| "loss": 0.0082, |
| "step": 14970 |
| }, |
| { |
| "epoch": 8.38276440962507, |
| "grad_norm": 0.1397552490234375, |
| "learning_rate": 1.62577560911024e-05, |
| "loss": 0.0047, |
| "step": 14980 |
| }, |
| { |
| "epoch": 8.388360380526022, |
| "grad_norm": 0.08066073060035706, |
| "learning_rate": 1.6196792546568472e-05, |
| "loss": 0.0076, |
| "step": 14990 |
| }, |
| { |
| "epoch": 8.393956351426972, |
| "grad_norm": 0.2772653102874756, |
| "learning_rate": 1.6135921418712956e-05, |
| "loss": 0.008, |
| "step": 15000 |
| }, |
| { |
| "epoch": 8.399552322327924, |
| "grad_norm": 0.1933654099702835, |
| "learning_rate": 1.6075142873955164e-05, |
| "loss": 0.0049, |
| "step": 15010 |
| }, |
| { |
| "epoch": 8.405148293228875, |
| "grad_norm": 0.09738892316818237, |
| "learning_rate": 1.6014457078461353e-05, |
| "loss": 0.0046, |
| "step": 15020 |
| }, |
| { |
| "epoch": 8.410744264129827, |
| "grad_norm": 0.11632133275270462, |
| "learning_rate": 1.5953864198144135e-05, |
| "loss": 0.0079, |
| "step": 15030 |
| }, |
| { |
| "epoch": 8.416340235030777, |
| "grad_norm": 0.10637476295232773, |
| "learning_rate": 1.5893364398662176e-05, |
| "loss": 0.0052, |
| "step": 15040 |
| }, |
| { |
| "epoch": 8.421936205931729, |
| "grad_norm": 0.22587163746356964, |
| "learning_rate": 1.583295784541958e-05, |
| "loss": 0.0064, |
| "step": 15050 |
| }, |
| { |
| "epoch": 8.427532176832681, |
| "grad_norm": 0.15165762603282928, |
| "learning_rate": 1.5772644703565565e-05, |
| "loss": 0.0068, |
| "step": 15060 |
| }, |
| { |
| "epoch": 8.433128147733632, |
| "grad_norm": 0.13497453927993774, |
| "learning_rate": 1.5712425137993973e-05, |
| "loss": 0.0076, |
| "step": 15070 |
| }, |
| { |
| "epoch": 8.438724118634584, |
| "grad_norm": 0.1444980800151825, |
| "learning_rate": 1.5652299313342773e-05, |
| "loss": 0.0066, |
| "step": 15080 |
| }, |
| { |
| "epoch": 8.444320089535534, |
| "grad_norm": 0.32101383805274963, |
| "learning_rate": 1.5592267393993716e-05, |
| "loss": 0.0054, |
| "step": 15090 |
| }, |
| { |
| "epoch": 8.449916060436486, |
| "grad_norm": 0.26894599199295044, |
| "learning_rate": 1.553232954407171e-05, |
| "loss": 0.0039, |
| "step": 15100 |
| }, |
| { |
| "epoch": 8.455512031337436, |
| "grad_norm": 0.26109951734542847, |
| "learning_rate": 1.5472485927444597e-05, |
| "loss": 0.0057, |
| "step": 15110 |
| }, |
| { |
| "epoch": 8.461108002238388, |
| "grad_norm": 0.09691357612609863, |
| "learning_rate": 1.5412736707722537e-05, |
| "loss": 0.0036, |
| "step": 15120 |
| }, |
| { |
| "epoch": 8.46670397313934, |
| "grad_norm": 0.08756586909294128, |
| "learning_rate": 1.5353082048257596e-05, |
| "loss": 0.0059, |
| "step": 15130 |
| }, |
| { |
| "epoch": 8.47229994404029, |
| "grad_norm": 0.0936000794172287, |
| "learning_rate": 1.5293522112143373e-05, |
| "loss": 0.0042, |
| "step": 15140 |
| }, |
| { |
| "epoch": 8.477895914941243, |
| "grad_norm": 0.20747262239456177, |
| "learning_rate": 1.5234057062214402e-05, |
| "loss": 0.0118, |
| "step": 15150 |
| }, |
| { |
| "epoch": 8.483491885842193, |
| "grad_norm": 0.11843043565750122, |
| "learning_rate": 1.517468706104589e-05, |
| "loss": 0.0072, |
| "step": 15160 |
| }, |
| { |
| "epoch": 8.489087856743145, |
| "grad_norm": 0.23854964971542358, |
| "learning_rate": 1.5115412270953167e-05, |
| "loss": 0.0066, |
| "step": 15170 |
| }, |
| { |
| "epoch": 8.494683827644096, |
| "grad_norm": 0.1770446002483368, |
| "learning_rate": 1.5056232853991209e-05, |
| "loss": 0.0062, |
| "step": 15180 |
| }, |
| { |
| "epoch": 8.500279798545048, |
| "grad_norm": 0.23799461126327515, |
| "learning_rate": 1.4997148971954344e-05, |
| "loss": 0.0075, |
| "step": 15190 |
| }, |
| { |
| "epoch": 8.505875769445998, |
| "grad_norm": 0.3780512511730194, |
| "learning_rate": 1.4938160786375572e-05, |
| "loss": 0.0081, |
| "step": 15200 |
| }, |
| { |
| "epoch": 8.51147174034695, |
| "grad_norm": 0.11119966208934784, |
| "learning_rate": 1.4879268458526379e-05, |
| "loss": 0.0046, |
| "step": 15210 |
| }, |
| { |
| "epoch": 8.517067711247902, |
| "grad_norm": 0.09658356010913849, |
| "learning_rate": 1.4820472149416154e-05, |
| "loss": 0.007, |
| "step": 15220 |
| }, |
| { |
| "epoch": 8.522663682148853, |
| "grad_norm": 0.17144611477851868, |
| "learning_rate": 1.4761772019791748e-05, |
| "loss": 0.0056, |
| "step": 15230 |
| }, |
| { |
| "epoch": 8.528259653049805, |
| "grad_norm": 0.14623138308525085, |
| "learning_rate": 1.470316823013707e-05, |
| "loss": 0.0051, |
| "step": 15240 |
| }, |
| { |
| "epoch": 8.533855623950755, |
| "grad_norm": 0.1579722911119461, |
| "learning_rate": 1.4644660940672627e-05, |
| "loss": 0.0049, |
| "step": 15250 |
| }, |
| { |
| "epoch": 8.539451594851707, |
| "grad_norm": 0.14990709722042084, |
| "learning_rate": 1.4586250311355132e-05, |
| "loss": 0.006, |
| "step": 15260 |
| }, |
| { |
| "epoch": 8.545047565752657, |
| "grad_norm": 0.24695487320423126, |
| "learning_rate": 1.4527936501877032e-05, |
| "loss": 0.0072, |
| "step": 15270 |
| }, |
| { |
| "epoch": 8.55064353665361, |
| "grad_norm": 0.2550105154514313, |
| "learning_rate": 1.4469719671666043e-05, |
| "loss": 0.0058, |
| "step": 15280 |
| }, |
| { |
| "epoch": 8.556239507554562, |
| "grad_norm": 0.17998188734054565, |
| "learning_rate": 1.4411599979884744e-05, |
| "loss": 0.0089, |
| "step": 15290 |
| }, |
| { |
| "epoch": 8.561835478455512, |
| "grad_norm": 0.3639971613883972, |
| "learning_rate": 1.435357758543015e-05, |
| "loss": 0.0085, |
| "step": 15300 |
| }, |
| { |
| "epoch": 8.567431449356464, |
| "grad_norm": 0.12687824666500092, |
| "learning_rate": 1.4295652646933277e-05, |
| "loss": 0.0061, |
| "step": 15310 |
| }, |
| { |
| "epoch": 8.573027420257414, |
| "grad_norm": 0.1352899670600891, |
| "learning_rate": 1.4237825322758736e-05, |
| "loss": 0.0066, |
| "step": 15320 |
| }, |
| { |
| "epoch": 8.578623391158366, |
| "grad_norm": 0.2139214277267456, |
| "learning_rate": 1.4180095771004154e-05, |
| "loss": 0.006, |
| "step": 15330 |
| }, |
| { |
| "epoch": 8.584219362059317, |
| "grad_norm": 0.13526403903961182, |
| "learning_rate": 1.412246414949997e-05, |
| "loss": 0.0061, |
| "step": 15340 |
| }, |
| { |
| "epoch": 8.589815332960269, |
| "grad_norm": 0.10206010937690735, |
| "learning_rate": 1.4064930615808808e-05, |
| "loss": 0.0042, |
| "step": 15350 |
| }, |
| { |
| "epoch": 8.59541130386122, |
| "grad_norm": 0.1680195927619934, |
| "learning_rate": 1.4007495327225162e-05, |
| "loss": 0.0063, |
| "step": 15360 |
| }, |
| { |
| "epoch": 8.601007274762171, |
| "grad_norm": 0.2092961072921753, |
| "learning_rate": 1.3950158440774957e-05, |
| "loss": 0.0089, |
| "step": 15370 |
| }, |
| { |
| "epoch": 8.606603245663123, |
| "grad_norm": 0.24639266729354858, |
| "learning_rate": 1.389292011321498e-05, |
| "loss": 0.0037, |
| "step": 15380 |
| }, |
| { |
| "epoch": 8.612199216564074, |
| "grad_norm": 0.20889121294021606, |
| "learning_rate": 1.383578050103268e-05, |
| "loss": 0.0036, |
| "step": 15390 |
| }, |
| { |
| "epoch": 8.617795187465026, |
| "grad_norm": 0.1731806993484497, |
| "learning_rate": 1.3778739760445552e-05, |
| "loss": 0.0049, |
| "step": 15400 |
| }, |
| { |
| "epoch": 8.623391158365976, |
| "grad_norm": 0.15791241824626923, |
| "learning_rate": 1.3721798047400813e-05, |
| "loss": 0.0064, |
| "step": 15410 |
| }, |
| { |
| "epoch": 8.628987129266928, |
| "grad_norm": 0.2612980604171753, |
| "learning_rate": 1.3664955517574968e-05, |
| "loss": 0.0056, |
| "step": 15420 |
| }, |
| { |
| "epoch": 8.634583100167879, |
| "grad_norm": 0.12942969799041748, |
| "learning_rate": 1.3608212326373249e-05, |
| "loss": 0.0044, |
| "step": 15430 |
| }, |
| { |
| "epoch": 8.64017907106883, |
| "grad_norm": 0.224086731672287, |
| "learning_rate": 1.3551568628929434e-05, |
| "loss": 0.0065, |
| "step": 15440 |
| }, |
| { |
| "epoch": 8.645775041969781, |
| "grad_norm": 0.234924778342247, |
| "learning_rate": 1.3495024580105192e-05, |
| "loss": 0.0055, |
| "step": 15450 |
| }, |
| { |
| "epoch": 8.651371012870733, |
| "grad_norm": 0.14701171219348907, |
| "learning_rate": 1.343858033448982e-05, |
| "loss": 0.0078, |
| "step": 15460 |
| }, |
| { |
| "epoch": 8.656966983771685, |
| "grad_norm": 0.06672263145446777, |
| "learning_rate": 1.3382236046399722e-05, |
| "loss": 0.0057, |
| "step": 15470 |
| }, |
| { |
| "epoch": 8.662562954672635, |
| "grad_norm": 0.11234284192323685, |
| "learning_rate": 1.3325991869878013e-05, |
| "loss": 0.0053, |
| "step": 15480 |
| }, |
| { |
| "epoch": 8.668158925573588, |
| "grad_norm": 0.2150266021490097, |
| "learning_rate": 1.3269847958694148e-05, |
| "loss": 0.0045, |
| "step": 15490 |
| }, |
| { |
| "epoch": 8.673754896474538, |
| "grad_norm": 0.37493982911109924, |
| "learning_rate": 1.3213804466343421e-05, |
| "loss": 0.0058, |
| "step": 15500 |
| }, |
| { |
| "epoch": 8.67935086737549, |
| "grad_norm": 0.054848652333021164, |
| "learning_rate": 1.3157861546046613e-05, |
| "loss": 0.0062, |
| "step": 15510 |
| }, |
| { |
| "epoch": 8.68494683827644, |
| "grad_norm": 0.30526259541511536, |
| "learning_rate": 1.3102019350749528e-05, |
| "loss": 0.005, |
| "step": 15520 |
| }, |
| { |
| "epoch": 8.690542809177392, |
| "grad_norm": 0.11414709687232971, |
| "learning_rate": 1.3046278033122577e-05, |
| "loss": 0.0055, |
| "step": 15530 |
| }, |
| { |
| "epoch": 8.696138780078343, |
| "grad_norm": 0.19409357011318207, |
| "learning_rate": 1.299063774556042e-05, |
| "loss": 0.0048, |
| "step": 15540 |
| }, |
| { |
| "epoch": 8.701734750979295, |
| "grad_norm": 0.0840323343873024, |
| "learning_rate": 1.293509864018146e-05, |
| "loss": 0.0062, |
| "step": 15550 |
| }, |
| { |
| "epoch": 8.707330721880247, |
| "grad_norm": 0.2921426594257355, |
| "learning_rate": 1.2879660868827508e-05, |
| "loss": 0.0055, |
| "step": 15560 |
| }, |
| { |
| "epoch": 8.712926692781197, |
| "grad_norm": 0.18921242654323578, |
| "learning_rate": 1.2824324583063302e-05, |
| "loss": 0.0065, |
| "step": 15570 |
| }, |
| { |
| "epoch": 8.71852266368215, |
| "grad_norm": 0.2043517678976059, |
| "learning_rate": 1.2769089934176126e-05, |
| "loss": 0.0048, |
| "step": 15580 |
| }, |
| { |
| "epoch": 8.7241186345831, |
| "grad_norm": 0.14090007543563843, |
| "learning_rate": 1.2713957073175425e-05, |
| "loss": 0.0043, |
| "step": 15590 |
| }, |
| { |
| "epoch": 8.729714605484052, |
| "grad_norm": 0.13512486219406128, |
| "learning_rate": 1.2658926150792322e-05, |
| "loss": 0.009, |
| "step": 15600 |
| }, |
| { |
| "epoch": 8.735310576385002, |
| "grad_norm": 0.16850633919239044, |
| "learning_rate": 1.2603997317479238e-05, |
| "loss": 0.0043, |
| "step": 15610 |
| }, |
| { |
| "epoch": 8.740906547285954, |
| "grad_norm": 0.0671689510345459, |
| "learning_rate": 1.2549170723409549e-05, |
| "loss": 0.0047, |
| "step": 15620 |
| }, |
| { |
| "epoch": 8.746502518186904, |
| "grad_norm": 0.17265447974205017, |
| "learning_rate": 1.2494446518477022e-05, |
| "loss": 0.0078, |
| "step": 15630 |
| }, |
| { |
| "epoch": 8.752098489087857, |
| "grad_norm": 0.09633443504571915, |
| "learning_rate": 1.243982485229559e-05, |
| "loss": 0.01, |
| "step": 15640 |
| }, |
| { |
| "epoch": 8.757694459988809, |
| "grad_norm": 0.07608158886432648, |
| "learning_rate": 1.2385305874198776e-05, |
| "loss": 0.008, |
| "step": 15650 |
| }, |
| { |
| "epoch": 8.763290430889759, |
| "grad_norm": 0.1386493295431137, |
| "learning_rate": 1.233088973323937e-05, |
| "loss": 0.0141, |
| "step": 15660 |
| }, |
| { |
| "epoch": 8.768886401790711, |
| "grad_norm": 0.22368523478507996, |
| "learning_rate": 1.2276576578189064e-05, |
| "loss": 0.0046, |
| "step": 15670 |
| }, |
| { |
| "epoch": 8.774482372691661, |
| "grad_norm": 0.1423027664422989, |
| "learning_rate": 1.2222366557537911e-05, |
| "loss": 0.0059, |
| "step": 15680 |
| }, |
| { |
| "epoch": 8.780078343592614, |
| "grad_norm": 0.09472924470901489, |
| "learning_rate": 1.2168259819494066e-05, |
| "loss": 0.0078, |
| "step": 15690 |
| }, |
| { |
| "epoch": 8.785674314493564, |
| "grad_norm": 0.1385987550020218, |
| "learning_rate": 1.2114256511983274e-05, |
| "loss": 0.0044, |
| "step": 15700 |
| }, |
| { |
| "epoch": 8.791270285394516, |
| "grad_norm": 0.1465826779603958, |
| "learning_rate": 1.2060356782648503e-05, |
| "loss": 0.0035, |
| "step": 15710 |
| }, |
| { |
| "epoch": 8.796866256295468, |
| "grad_norm": 0.3275586664676666, |
| "learning_rate": 1.2006560778849578e-05, |
| "loss": 0.0057, |
| "step": 15720 |
| }, |
| { |
| "epoch": 8.802462227196418, |
| "grad_norm": 0.09989197552204132, |
| "learning_rate": 1.1952868647662696e-05, |
| "loss": 0.006, |
| "step": 15730 |
| }, |
| { |
| "epoch": 8.80805819809737, |
| "grad_norm": 0.12719599902629852, |
| "learning_rate": 1.1899280535880119e-05, |
| "loss": 0.0042, |
| "step": 15740 |
| }, |
| { |
| "epoch": 8.81365416899832, |
| "grad_norm": 0.3480566740036011, |
| "learning_rate": 1.1845796590009683e-05, |
| "loss": 0.0073, |
| "step": 15750 |
| }, |
| { |
| "epoch": 8.819250139899273, |
| "grad_norm": 0.1562948226928711, |
| "learning_rate": 1.1792416956274444e-05, |
| "loss": 0.0066, |
| "step": 15760 |
| }, |
| { |
| "epoch": 8.824846110800223, |
| "grad_norm": 0.23169738054275513, |
| "learning_rate": 1.1739141780612306e-05, |
| "loss": 0.0067, |
| "step": 15770 |
| }, |
| { |
| "epoch": 8.830442081701175, |
| "grad_norm": 0.1328081339597702, |
| "learning_rate": 1.1685971208675539e-05, |
| "loss": 0.0051, |
| "step": 15780 |
| }, |
| { |
| "epoch": 8.836038052602127, |
| "grad_norm": 0.10535513609647751, |
| "learning_rate": 1.1632905385830484e-05, |
| "loss": 0.0061, |
| "step": 15790 |
| }, |
| { |
| "epoch": 8.841634023503078, |
| "grad_norm": 0.08534829318523407, |
| "learning_rate": 1.157994445715706e-05, |
| "loss": 0.0052, |
| "step": 15800 |
| }, |
| { |
| "epoch": 8.84722999440403, |
| "grad_norm": 0.21224470436573029, |
| "learning_rate": 1.1527088567448407e-05, |
| "loss": 0.0066, |
| "step": 15810 |
| }, |
| { |
| "epoch": 8.85282596530498, |
| "grad_norm": 0.20451109111309052, |
| "learning_rate": 1.1474337861210543e-05, |
| "loss": 0.0067, |
| "step": 15820 |
| }, |
| { |
| "epoch": 8.858421936205932, |
| "grad_norm": 0.21763543784618378, |
| "learning_rate": 1.1421692482661856e-05, |
| "loss": 0.0089, |
| "step": 15830 |
| }, |
| { |
| "epoch": 8.864017907106883, |
| "grad_norm": 0.14212079346179962, |
| "learning_rate": 1.1369152575732822e-05, |
| "loss": 0.0048, |
| "step": 15840 |
| }, |
| { |
| "epoch": 8.869613878007835, |
| "grad_norm": 0.1489504873752594, |
| "learning_rate": 1.1316718284065537e-05, |
| "loss": 0.0046, |
| "step": 15850 |
| }, |
| { |
| "epoch": 8.875209848908785, |
| "grad_norm": 0.09450363367795944, |
| "learning_rate": 1.1264389751013326e-05, |
| "loss": 0.0053, |
| "step": 15860 |
| }, |
| { |
| "epoch": 8.880805819809737, |
| "grad_norm": 0.2034289836883545, |
| "learning_rate": 1.1212167119640438e-05, |
| "loss": 0.0081, |
| "step": 15870 |
| }, |
| { |
| "epoch": 8.88640179071069, |
| "grad_norm": 0.13935258984565735, |
| "learning_rate": 1.1160050532721528e-05, |
| "loss": 0.0064, |
| "step": 15880 |
| }, |
| { |
| "epoch": 8.89199776161164, |
| "grad_norm": 0.08578619360923767, |
| "learning_rate": 1.1108040132741354e-05, |
| "loss": 0.0111, |
| "step": 15890 |
| }, |
| { |
| "epoch": 8.897593732512592, |
| "grad_norm": 0.0884697362780571, |
| "learning_rate": 1.1056136061894384e-05, |
| "loss": 0.0108, |
| "step": 15900 |
| }, |
| { |
| "epoch": 8.903189703413542, |
| "grad_norm": 0.28323593735694885, |
| "learning_rate": 1.100433846208434e-05, |
| "loss": 0.0116, |
| "step": 15910 |
| }, |
| { |
| "epoch": 8.908785674314494, |
| "grad_norm": 0.14971330761909485, |
| "learning_rate": 1.095264747492391e-05, |
| "loss": 0.0079, |
| "step": 15920 |
| }, |
| { |
| "epoch": 8.914381645215444, |
| "grad_norm": 0.18808728456497192, |
| "learning_rate": 1.090106324173426e-05, |
| "loss": 0.0082, |
| "step": 15930 |
| }, |
| { |
| "epoch": 8.919977616116396, |
| "grad_norm": 0.16924549639225006, |
| "learning_rate": 1.0849585903544706e-05, |
| "loss": 0.0064, |
| "step": 15940 |
| }, |
| { |
| "epoch": 8.925573587017347, |
| "grad_norm": 0.1466728150844574, |
| "learning_rate": 1.0798215601092354e-05, |
| "loss": 0.0106, |
| "step": 15950 |
| }, |
| { |
| "epoch": 8.931169557918299, |
| "grad_norm": 0.18614622950553894, |
| "learning_rate": 1.0746952474821614e-05, |
| "loss": 0.0089, |
| "step": 15960 |
| }, |
| { |
| "epoch": 8.936765528819251, |
| "grad_norm": 0.04307910427451134, |
| "learning_rate": 1.069579666488395e-05, |
| "loss": 0.0092, |
| "step": 15970 |
| }, |
| { |
| "epoch": 8.942361499720201, |
| "grad_norm": 0.20207299292087555, |
| "learning_rate": 1.0644748311137376e-05, |
| "loss": 0.0077, |
| "step": 15980 |
| }, |
| { |
| "epoch": 8.947957470621153, |
| "grad_norm": 0.12527382373809814, |
| "learning_rate": 1.059380755314613e-05, |
| "loss": 0.008, |
| "step": 15990 |
| }, |
| { |
| "epoch": 8.953553441522104, |
| "grad_norm": 0.3143978416919708, |
| "learning_rate": 1.0542974530180327e-05, |
| "loss": 0.0061, |
| "step": 16000 |
| }, |
| { |
| "epoch": 8.959149412423056, |
| "grad_norm": 0.0894945040345192, |
| "learning_rate": 1.049224938121548e-05, |
| "loss": 0.0041, |
| "step": 16010 |
| }, |
| { |
| "epoch": 8.964745383324006, |
| "grad_norm": 0.23625624179840088, |
| "learning_rate": 1.0441632244932237e-05, |
| "loss": 0.0067, |
| "step": 16020 |
| }, |
| { |
| "epoch": 8.970341354224958, |
| "grad_norm": 0.14668506383895874, |
| "learning_rate": 1.0391123259715906e-05, |
| "loss": 0.0056, |
| "step": 16030 |
| }, |
| { |
| "epoch": 8.975937325125908, |
| "grad_norm": 0.17659400403499603, |
| "learning_rate": 1.0340722563656107e-05, |
| "loss": 0.0066, |
| "step": 16040 |
| }, |
| { |
| "epoch": 8.98153329602686, |
| "grad_norm": 0.2076718956232071, |
| "learning_rate": 1.0290430294546449e-05, |
| "loss": 0.0074, |
| "step": 16050 |
| }, |
| { |
| "epoch": 8.987129266927813, |
| "grad_norm": 0.1386403888463974, |
| "learning_rate": 1.0240246589884044e-05, |
| "loss": 0.0052, |
| "step": 16060 |
| }, |
| { |
| "epoch": 8.992725237828763, |
| "grad_norm": 0.12247960269451141, |
| "learning_rate": 1.0190171586869258e-05, |
| "loss": 0.0059, |
| "step": 16070 |
| }, |
| { |
| "epoch": 8.998321208729715, |
| "grad_norm": 0.08335962146520615, |
| "learning_rate": 1.0140205422405214e-05, |
| "loss": 0.0045, |
| "step": 16080 |
| }, |
| { |
| "epoch": 9.003917179630665, |
| "grad_norm": 0.13206073641777039, |
| "learning_rate": 1.009034823309749e-05, |
| "loss": 0.0049, |
| "step": 16090 |
| }, |
| { |
| "epoch": 9.009513150531617, |
| "grad_norm": 0.1473735272884369, |
| "learning_rate": 1.0040600155253765e-05, |
| "loss": 0.0035, |
| "step": 16100 |
| }, |
| { |
| "epoch": 9.015109121432568, |
| "grad_norm": 0.07891960442066193, |
| "learning_rate": 9.990961324883358e-06, |
| "loss": 0.0064, |
| "step": 16110 |
| }, |
| { |
| "epoch": 9.02070509233352, |
| "grad_norm": 0.16706879436969757, |
| "learning_rate": 9.941431877696955e-06, |
| "loss": 0.0039, |
| "step": 16120 |
| }, |
| { |
| "epoch": 9.026301063234472, |
| "grad_norm": 0.0876656025648117, |
| "learning_rate": 9.892011949106172e-06, |
| "loss": 0.008, |
| "step": 16130 |
| }, |
| { |
| "epoch": 9.031897034135422, |
| "grad_norm": 0.10205890983343124, |
| "learning_rate": 9.842701674223187e-06, |
| "loss": 0.0071, |
| "step": 16140 |
| }, |
| { |
| "epoch": 9.037493005036374, |
| "grad_norm": 0.16774903237819672, |
| "learning_rate": 9.793501187860432e-06, |
| "loss": 0.0037, |
| "step": 16150 |
| }, |
| { |
| "epoch": 9.043088975937325, |
| "grad_norm": 0.2676295340061188, |
| "learning_rate": 9.744410624530148e-06, |
| "loss": 0.0062, |
| "step": 16160 |
| }, |
| { |
| "epoch": 9.048684946838277, |
| "grad_norm": 0.2096317857503891, |
| "learning_rate": 9.695430118444048e-06, |
| "loss": 0.0036, |
| "step": 16170 |
| }, |
| { |
| "epoch": 9.054280917739227, |
| "grad_norm": 0.09436144679784775, |
| "learning_rate": 9.646559803512994e-06, |
| "loss": 0.0045, |
| "step": 16180 |
| }, |
| { |
| "epoch": 9.05987688864018, |
| "grad_norm": 0.17315761744976044, |
| "learning_rate": 9.597799813346525e-06, |
| "loss": 0.0064, |
| "step": 16190 |
| }, |
| { |
| "epoch": 9.06547285954113, |
| "grad_norm": 0.07326121628284454, |
| "learning_rate": 9.549150281252633e-06, |
| "loss": 0.0035, |
| "step": 16200 |
| }, |
| { |
| "epoch": 9.071068830442082, |
| "grad_norm": 0.14720216393470764, |
| "learning_rate": 9.500611340237258e-06, |
| "loss": 0.0055, |
| "step": 16210 |
| }, |
| { |
| "epoch": 9.076664801343034, |
| "grad_norm": 0.0691135823726654, |
| "learning_rate": 9.452183123004e-06, |
| "loss": 0.0077, |
| "step": 16220 |
| }, |
| { |
| "epoch": 9.082260772243984, |
| "grad_norm": 0.13588427007198334, |
| "learning_rate": 9.403865761953779e-06, |
| "loss": 0.0046, |
| "step": 16230 |
| }, |
| { |
| "epoch": 9.087856743144936, |
| "grad_norm": 0.13852879405021667, |
| "learning_rate": 9.355659389184396e-06, |
| "loss": 0.0046, |
| "step": 16240 |
| }, |
| { |
| "epoch": 9.093452714045887, |
| "grad_norm": 0.0626252144575119, |
| "learning_rate": 9.307564136490254e-06, |
| "loss": 0.0069, |
| "step": 16250 |
| }, |
| { |
| "epoch": 9.099048684946839, |
| "grad_norm": 0.25919991731643677, |
| "learning_rate": 9.259580135361929e-06, |
| "loss": 0.0046, |
| "step": 16260 |
| }, |
| { |
| "epoch": 9.104644655847789, |
| "grad_norm": 0.0894588977098465, |
| "learning_rate": 9.211707516985829e-06, |
| "loss": 0.0046, |
| "step": 16270 |
| }, |
| { |
| "epoch": 9.110240626748741, |
| "grad_norm": 0.45610806345939636, |
| "learning_rate": 9.163946412243896e-06, |
| "loss": 0.0069, |
| "step": 16280 |
| }, |
| { |
| "epoch": 9.115836597649691, |
| "grad_norm": 0.1714649349451065, |
| "learning_rate": 9.116296951713133e-06, |
| "loss": 0.0058, |
| "step": 16290 |
| }, |
| { |
| "epoch": 9.121432568550643, |
| "grad_norm": 0.20788055658340454, |
| "learning_rate": 9.068759265665384e-06, |
| "loss": 0.0046, |
| "step": 16300 |
| }, |
| { |
| "epoch": 9.127028539451596, |
| "grad_norm": 0.13281454145908356, |
| "learning_rate": 9.02133348406684e-06, |
| "loss": 0.0073, |
| "step": 16310 |
| }, |
| { |
| "epoch": 9.132624510352546, |
| "grad_norm": 0.20327745378017426, |
| "learning_rate": 8.974019736577777e-06, |
| "loss": 0.0061, |
| "step": 16320 |
| }, |
| { |
| "epoch": 9.138220481253498, |
| "grad_norm": 0.1418776661157608, |
| "learning_rate": 8.92681815255219e-06, |
| "loss": 0.0054, |
| "step": 16330 |
| }, |
| { |
| "epoch": 9.143816452154448, |
| "grad_norm": 0.08617481589317322, |
| "learning_rate": 8.879728861037384e-06, |
| "loss": 0.0057, |
| "step": 16340 |
| }, |
| { |
| "epoch": 9.1494124230554, |
| "grad_norm": 0.14362642168998718, |
| "learning_rate": 8.832751990773714e-06, |
| "loss": 0.0059, |
| "step": 16350 |
| }, |
| { |
| "epoch": 9.15500839395635, |
| "grad_norm": 0.05195459723472595, |
| "learning_rate": 8.785887670194138e-06, |
| "loss": 0.0063, |
| "step": 16360 |
| }, |
| { |
| "epoch": 9.160604364857303, |
| "grad_norm": 0.1765775829553604, |
| "learning_rate": 8.739136027423894e-06, |
| "loss": 0.0075, |
| "step": 16370 |
| }, |
| { |
| "epoch": 9.166200335758255, |
| "grad_norm": 0.1646648496389389, |
| "learning_rate": 8.692497190280224e-06, |
| "loss": 0.0065, |
| "step": 16380 |
| }, |
| { |
| "epoch": 9.171796306659205, |
| "grad_norm": 0.16203129291534424, |
| "learning_rate": 8.645971286271904e-06, |
| "loss": 0.0049, |
| "step": 16390 |
| }, |
| { |
| "epoch": 9.177392277560157, |
| "grad_norm": 0.07584717124700546, |
| "learning_rate": 8.599558442598998e-06, |
| "loss": 0.0071, |
| "step": 16400 |
| }, |
| { |
| "epoch": 9.182988248461108, |
| "grad_norm": 0.14030073583126068, |
| "learning_rate": 8.55325878615244e-06, |
| "loss": 0.0033, |
| "step": 16410 |
| }, |
| { |
| "epoch": 9.18858421936206, |
| "grad_norm": 0.09595508873462677, |
| "learning_rate": 8.507072443513702e-06, |
| "loss": 0.0034, |
| "step": 16420 |
| }, |
| { |
| "epoch": 9.19418019026301, |
| "grad_norm": 0.2346934825181961, |
| "learning_rate": 8.460999540954517e-06, |
| "loss": 0.0091, |
| "step": 16430 |
| }, |
| { |
| "epoch": 9.199776161163962, |
| "grad_norm": 0.11720654368400574, |
| "learning_rate": 8.415040204436426e-06, |
| "loss": 0.0056, |
| "step": 16440 |
| }, |
| { |
| "epoch": 9.205372132064912, |
| "grad_norm": 0.18266266584396362, |
| "learning_rate": 8.369194559610482e-06, |
| "loss": 0.0044, |
| "step": 16450 |
| }, |
| { |
| "epoch": 9.210968102965865, |
| "grad_norm": 0.11530566215515137, |
| "learning_rate": 8.323462731816961e-06, |
| "loss": 0.0091, |
| "step": 16460 |
| }, |
| { |
| "epoch": 9.216564073866817, |
| "grad_norm": 0.15264108777046204, |
| "learning_rate": 8.277844846084898e-06, |
| "loss": 0.0056, |
| "step": 16470 |
| }, |
| { |
| "epoch": 9.222160044767767, |
| "grad_norm": 0.12221037596464157, |
| "learning_rate": 8.232341027131885e-06, |
| "loss": 0.0046, |
| "step": 16480 |
| }, |
| { |
| "epoch": 9.227756015668719, |
| "grad_norm": 0.18118728697299957, |
| "learning_rate": 8.186951399363613e-06, |
| "loss": 0.0048, |
| "step": 16490 |
| }, |
| { |
| "epoch": 9.23335198656967, |
| "grad_norm": 0.11156457662582397, |
| "learning_rate": 8.141676086873572e-06, |
| "loss": 0.0038, |
| "step": 16500 |
| }, |
| { |
| "epoch": 9.238947957470621, |
| "grad_norm": 0.24215921759605408, |
| "learning_rate": 8.096515213442762e-06, |
| "loss": 0.0053, |
| "step": 16510 |
| }, |
| { |
| "epoch": 9.244543928371572, |
| "grad_norm": 0.1042838767170906, |
| "learning_rate": 8.051468902539272e-06, |
| "loss": 0.0038, |
| "step": 16520 |
| }, |
| { |
| "epoch": 9.250139899272524, |
| "grad_norm": 0.15312840044498444, |
| "learning_rate": 8.00653727731801e-06, |
| "loss": 0.0056, |
| "step": 16530 |
| }, |
| { |
| "epoch": 9.255735870173474, |
| "grad_norm": 0.12216275930404663, |
| "learning_rate": 7.96172046062032e-06, |
| "loss": 0.009, |
| "step": 16540 |
| }, |
| { |
| "epoch": 9.261331841074426, |
| "grad_norm": 0.14912450313568115, |
| "learning_rate": 7.917018574973645e-06, |
| "loss": 0.0104, |
| "step": 16550 |
| }, |
| { |
| "epoch": 9.266927811975378, |
| "grad_norm": 0.2108585089445114, |
| "learning_rate": 7.872431742591268e-06, |
| "loss": 0.0068, |
| "step": 16560 |
| }, |
| { |
| "epoch": 9.272523782876329, |
| "grad_norm": 0.0906781554222107, |
| "learning_rate": 7.827960085371855e-06, |
| "loss": 0.0044, |
| "step": 16570 |
| }, |
| { |
| "epoch": 9.27811975377728, |
| "grad_norm": 0.13947215676307678, |
| "learning_rate": 7.783603724899257e-06, |
| "loss": 0.0057, |
| "step": 16580 |
| }, |
| { |
| "epoch": 9.283715724678231, |
| "grad_norm": 0.11844757199287415, |
| "learning_rate": 7.739362782442021e-06, |
| "loss": 0.0044, |
| "step": 16590 |
| }, |
| { |
| "epoch": 9.289311695579183, |
| "grad_norm": 0.13809189200401306, |
| "learning_rate": 7.695237378953223e-06, |
| "loss": 0.0064, |
| "step": 16600 |
| }, |
| { |
| "epoch": 9.294907666480134, |
| "grad_norm": 0.33429670333862305, |
| "learning_rate": 7.651227635070041e-06, |
| "loss": 0.0033, |
| "step": 16610 |
| }, |
| { |
| "epoch": 9.300503637381086, |
| "grad_norm": 0.15949353575706482, |
| "learning_rate": 7.607333671113409e-06, |
| "loss": 0.0142, |
| "step": 16620 |
| }, |
| { |
| "epoch": 9.306099608282038, |
| "grad_norm": 0.30085355043411255, |
| "learning_rate": 7.56355560708778e-06, |
| "loss": 0.0064, |
| "step": 16630 |
| }, |
| { |
| "epoch": 9.311695579182988, |
| "grad_norm": 0.09114662557840347, |
| "learning_rate": 7.519893562680663e-06, |
| "loss": 0.0062, |
| "step": 16640 |
| }, |
| { |
| "epoch": 9.31729155008394, |
| "grad_norm": 0.3248306214809418, |
| "learning_rate": 7.476347657262456e-06, |
| "loss": 0.0063, |
| "step": 16650 |
| }, |
| { |
| "epoch": 9.32288752098489, |
| "grad_norm": 0.15951383113861084, |
| "learning_rate": 7.432918009885997e-06, |
| "loss": 0.0069, |
| "step": 16660 |
| }, |
| { |
| "epoch": 9.328483491885843, |
| "grad_norm": 0.1393985003232956, |
| "learning_rate": 7.389604739286271e-06, |
| "loss": 0.0046, |
| "step": 16670 |
| }, |
| { |
| "epoch": 9.334079462786793, |
| "grad_norm": 0.14699183404445648, |
| "learning_rate": 7.3464079638801365e-06, |
| "loss": 0.0047, |
| "step": 16680 |
| }, |
| { |
| "epoch": 9.339675433687745, |
| "grad_norm": 0.14034835994243622, |
| "learning_rate": 7.30332780176588e-06, |
| "loss": 0.0068, |
| "step": 16690 |
| }, |
| { |
| "epoch": 9.345271404588695, |
| "grad_norm": 0.202976793050766, |
| "learning_rate": 7.260364370723044e-06, |
| "loss": 0.007, |
| "step": 16700 |
| }, |
| { |
| "epoch": 9.350867375489647, |
| "grad_norm": 0.1574084311723709, |
| "learning_rate": 7.217517788212025e-06, |
| "loss": 0.0037, |
| "step": 16710 |
| }, |
| { |
| "epoch": 9.3564633463906, |
| "grad_norm": 0.23007866740226746, |
| "learning_rate": 7.174788171373731e-06, |
| "loss": 0.006, |
| "step": 16720 |
| }, |
| { |
| "epoch": 9.36205931729155, |
| "grad_norm": 0.06488067656755447, |
| "learning_rate": 7.132175637029293e-06, |
| "loss": 0.0038, |
| "step": 16730 |
| }, |
| { |
| "epoch": 9.367655288192502, |
| "grad_norm": 0.08520302921533585, |
| "learning_rate": 7.089680301679752e-06, |
| "loss": 0.0035, |
| "step": 16740 |
| }, |
| { |
| "epoch": 9.373251259093452, |
| "grad_norm": 0.1132565289735794, |
| "learning_rate": 7.047302281505736e-06, |
| "loss": 0.0033, |
| "step": 16750 |
| }, |
| { |
| "epoch": 9.378847229994404, |
| "grad_norm": 0.29900556802749634, |
| "learning_rate": 7.005041692367154e-06, |
| "loss": 0.0083, |
| "step": 16760 |
| }, |
| { |
| "epoch": 9.384443200895355, |
| "grad_norm": 0.21089625358581543, |
| "learning_rate": 6.962898649802823e-06, |
| "loss": 0.004, |
| "step": 16770 |
| }, |
| { |
| "epoch": 9.390039171796307, |
| "grad_norm": 0.1411179006099701, |
| "learning_rate": 6.92087326903022e-06, |
| "loss": 0.0051, |
| "step": 16780 |
| }, |
| { |
| "epoch": 9.395635142697259, |
| "grad_norm": 0.20569784939289093, |
| "learning_rate": 6.878965664945108e-06, |
| "loss": 0.0057, |
| "step": 16790 |
| }, |
| { |
| "epoch": 9.40123111359821, |
| "grad_norm": 0.13673344254493713, |
| "learning_rate": 6.837175952121306e-06, |
| "loss": 0.0029, |
| "step": 16800 |
| }, |
| { |
| "epoch": 9.406827084499161, |
| "grad_norm": 0.07221835851669312, |
| "learning_rate": 6.795504244810285e-06, |
| "loss": 0.0028, |
| "step": 16810 |
| }, |
| { |
| "epoch": 9.412423055400112, |
| "grad_norm": 0.15173490345478058, |
| "learning_rate": 6.753950656940905e-06, |
| "loss": 0.0055, |
| "step": 16820 |
| }, |
| { |
| "epoch": 9.418019026301064, |
| "grad_norm": 0.12818996608257294, |
| "learning_rate": 6.712515302119077e-06, |
| "loss": 0.0047, |
| "step": 16830 |
| }, |
| { |
| "epoch": 9.423614997202014, |
| "grad_norm": 0.2607164978981018, |
| "learning_rate": 6.671198293627479e-06, |
| "loss": 0.0062, |
| "step": 16840 |
| }, |
| { |
| "epoch": 9.429210968102966, |
| "grad_norm": 0.1782405823469162, |
| "learning_rate": 6.629999744425236e-06, |
| "loss": 0.0038, |
| "step": 16850 |
| }, |
| { |
| "epoch": 9.434806939003916, |
| "grad_norm": 0.1047229990363121, |
| "learning_rate": 6.588919767147639e-06, |
| "loss": 0.0038, |
| "step": 16860 |
| }, |
| { |
| "epoch": 9.440402909904869, |
| "grad_norm": 0.21528460085391998, |
| "learning_rate": 6.5479584741057255e-06, |
| "loss": 0.0044, |
| "step": 16870 |
| }, |
| { |
| "epoch": 9.44599888080582, |
| "grad_norm": 0.033052559942007065, |
| "learning_rate": 6.5071159772861436e-06, |
| "loss": 0.0043, |
| "step": 16880 |
| }, |
| { |
| "epoch": 9.451594851706771, |
| "grad_norm": 0.08729052543640137, |
| "learning_rate": 6.466392388350695e-06, |
| "loss": 0.0067, |
| "step": 16890 |
| }, |
| { |
| "epoch": 9.457190822607723, |
| "grad_norm": 0.1754913330078125, |
| "learning_rate": 6.425787818636131e-06, |
| "loss": 0.0038, |
| "step": 16900 |
| }, |
| { |
| "epoch": 9.462786793508673, |
| "grad_norm": 0.13821344077587128, |
| "learning_rate": 6.385302379153818e-06, |
| "loss": 0.0046, |
| "step": 16910 |
| }, |
| { |
| "epoch": 9.468382764409625, |
| "grad_norm": 0.1275906264781952, |
| "learning_rate": 6.344936180589351e-06, |
| "loss": 0.0036, |
| "step": 16920 |
| }, |
| { |
| "epoch": 9.473978735310576, |
| "grad_norm": 0.14954271912574768, |
| "learning_rate": 6.304689333302416e-06, |
| "loss": 0.0034, |
| "step": 16930 |
| }, |
| { |
| "epoch": 9.479574706211528, |
| "grad_norm": 0.12982557713985443, |
| "learning_rate": 6.264561947326331e-06, |
| "loss": 0.0043, |
| "step": 16940 |
| }, |
| { |
| "epoch": 9.485170677112478, |
| "grad_norm": 0.06912703812122345, |
| "learning_rate": 6.22455413236786e-06, |
| "loss": 0.0055, |
| "step": 16950 |
| }, |
| { |
| "epoch": 9.49076664801343, |
| "grad_norm": 0.19244985282421112, |
| "learning_rate": 6.184665997806832e-06, |
| "loss": 0.0043, |
| "step": 16960 |
| }, |
| { |
| "epoch": 9.496362618914382, |
| "grad_norm": 0.08739597350358963, |
| "learning_rate": 6.144897652695864e-06, |
| "loss": 0.0151, |
| "step": 16970 |
| }, |
| { |
| "epoch": 9.501958589815333, |
| "grad_norm": 0.11885930597782135, |
| "learning_rate": 6.1052492057601275e-06, |
| "loss": 0.0073, |
| "step": 16980 |
| }, |
| { |
| "epoch": 9.507554560716285, |
| "grad_norm": 0.07571222633123398, |
| "learning_rate": 6.0657207653969315e-06, |
| "loss": 0.0032, |
| "step": 16990 |
| }, |
| { |
| "epoch": 9.513150531617235, |
| "grad_norm": 0.07605729252099991, |
| "learning_rate": 6.026312439675552e-06, |
| "loss": 0.0036, |
| "step": 17000 |
| }, |
| { |
| "epoch": 9.518746502518187, |
| "grad_norm": 0.20224310457706451, |
| "learning_rate": 5.9870243363368275e-06, |
| "loss": 0.0055, |
| "step": 17010 |
| }, |
| { |
| "epoch": 9.524342473419138, |
| "grad_norm": 0.09693833440542221, |
| "learning_rate": 5.947856562792925e-06, |
| "loss": 0.0048, |
| "step": 17020 |
| }, |
| { |
| "epoch": 9.52993844432009, |
| "grad_norm": 0.13180632889270782, |
| "learning_rate": 5.908809226127054e-06, |
| "loss": 0.0052, |
| "step": 17030 |
| }, |
| { |
| "epoch": 9.53553441522104, |
| "grad_norm": 0.18198780715465546, |
| "learning_rate": 5.869882433093155e-06, |
| "loss": 0.0053, |
| "step": 17040 |
| }, |
| { |
| "epoch": 9.541130386121992, |
| "grad_norm": 0.08620735257863998, |
| "learning_rate": 5.831076290115573e-06, |
| "loss": 0.0047, |
| "step": 17050 |
| }, |
| { |
| "epoch": 9.546726357022944, |
| "grad_norm": 0.18070462346076965, |
| "learning_rate": 5.79239090328883e-06, |
| "loss": 0.005, |
| "step": 17060 |
| }, |
| { |
| "epoch": 9.552322327923894, |
| "grad_norm": 0.13954901695251465, |
| "learning_rate": 5.753826378377286e-06, |
| "loss": 0.0037, |
| "step": 17070 |
| }, |
| { |
| "epoch": 9.557918298824847, |
| "grad_norm": 0.08338068425655365, |
| "learning_rate": 5.715382820814885e-06, |
| "loss": 0.0035, |
| "step": 17080 |
| }, |
| { |
| "epoch": 9.563514269725797, |
| "grad_norm": 0.1206720620393753, |
| "learning_rate": 5.67706033570487e-06, |
| "loss": 0.0071, |
| "step": 17090 |
| }, |
| { |
| "epoch": 9.569110240626749, |
| "grad_norm": 0.1978680044412613, |
| "learning_rate": 5.6388590278194096e-06, |
| "loss": 0.0048, |
| "step": 17100 |
| }, |
| { |
| "epoch": 9.5747062115277, |
| "grad_norm": 0.2190864086151123, |
| "learning_rate": 5.600779001599455e-06, |
| "loss": 0.0043, |
| "step": 17110 |
| }, |
| { |
| "epoch": 9.580302182428651, |
| "grad_norm": 0.0734127014875412, |
| "learning_rate": 5.562820361154314e-06, |
| "loss": 0.0049, |
| "step": 17120 |
| }, |
| { |
| "epoch": 9.585898153329603, |
| "grad_norm": 0.14367960393428802, |
| "learning_rate": 5.524983210261481e-06, |
| "loss": 0.0035, |
| "step": 17130 |
| }, |
| { |
| "epoch": 9.591494124230554, |
| "grad_norm": 0.26178881525993347, |
| "learning_rate": 5.48726765236629e-06, |
| "loss": 0.005, |
| "step": 17140 |
| }, |
| { |
| "epoch": 9.597090095131506, |
| "grad_norm": 0.10900067538022995, |
| "learning_rate": 5.449673790581611e-06, |
| "loss": 0.0065, |
| "step": 17150 |
| }, |
| { |
| "epoch": 9.602686066032456, |
| "grad_norm": 0.16984951496124268, |
| "learning_rate": 5.412201727687644e-06, |
| "loss": 0.0051, |
| "step": 17160 |
| }, |
| { |
| "epoch": 9.608282036933408, |
| "grad_norm": 0.0894961804151535, |
| "learning_rate": 5.374851566131561e-06, |
| "loss": 0.0038, |
| "step": 17170 |
| }, |
| { |
| "epoch": 9.613878007834359, |
| "grad_norm": 0.25771039724349976, |
| "learning_rate": 5.337623408027293e-06, |
| "loss": 0.0073, |
| "step": 17180 |
| }, |
| { |
| "epoch": 9.61947397873531, |
| "grad_norm": 0.14566998183727264, |
| "learning_rate": 5.300517355155215e-06, |
| "loss": 0.0046, |
| "step": 17190 |
| }, |
| { |
| "epoch": 9.625069949636263, |
| "grad_norm": 0.17133091390132904, |
| "learning_rate": 5.263533508961827e-06, |
| "loss": 0.0073, |
| "step": 17200 |
| }, |
| { |
| "epoch": 9.630665920537213, |
| "grad_norm": 0.16593864560127258, |
| "learning_rate": 5.226671970559577e-06, |
| "loss": 0.0053, |
| "step": 17210 |
| }, |
| { |
| "epoch": 9.636261891438165, |
| "grad_norm": 0.11243371665477753, |
| "learning_rate": 5.1899328407264855e-06, |
| "loss": 0.0043, |
| "step": 17220 |
| }, |
| { |
| "epoch": 9.641857862339116, |
| "grad_norm": 0.15767988562583923, |
| "learning_rate": 5.153316219905946e-06, |
| "loss": 0.0072, |
| "step": 17230 |
| }, |
| { |
| "epoch": 9.647453833240068, |
| "grad_norm": 0.2645623981952667, |
| "learning_rate": 5.116822208206396e-06, |
| "loss": 0.0052, |
| "step": 17240 |
| }, |
| { |
| "epoch": 9.653049804141018, |
| "grad_norm": 0.08610297739505768, |
| "learning_rate": 5.080450905401057e-06, |
| "loss": 0.0056, |
| "step": 17250 |
| }, |
| { |
| "epoch": 9.65864577504197, |
| "grad_norm": 0.08036172389984131, |
| "learning_rate": 5.044202410927706e-06, |
| "loss": 0.0036, |
| "step": 17260 |
| }, |
| { |
| "epoch": 9.66424174594292, |
| "grad_norm": 0.18519535660743713, |
| "learning_rate": 5.008076823888319e-06, |
| "loss": 0.0057, |
| "step": 17270 |
| }, |
| { |
| "epoch": 9.669837716843872, |
| "grad_norm": 0.19542230665683746, |
| "learning_rate": 4.972074243048897e-06, |
| "loss": 0.0036, |
| "step": 17280 |
| }, |
| { |
| "epoch": 9.675433687744825, |
| "grad_norm": 0.21911007165908813, |
| "learning_rate": 4.936194766839103e-06, |
| "loss": 0.0039, |
| "step": 17290 |
| }, |
| { |
| "epoch": 9.681029658645775, |
| "grad_norm": 0.14355053007602692, |
| "learning_rate": 4.900438493352055e-06, |
| "loss": 0.0052, |
| "step": 17300 |
| }, |
| { |
| "epoch": 9.686625629546727, |
| "grad_norm": 0.34103378653526306, |
| "learning_rate": 4.864805520344051e-06, |
| "loss": 0.0063, |
| "step": 17310 |
| }, |
| { |
| "epoch": 9.692221600447677, |
| "grad_norm": 0.18420292437076569, |
| "learning_rate": 4.829295945234258e-06, |
| "loss": 0.0046, |
| "step": 17320 |
| }, |
| { |
| "epoch": 9.69781757134863, |
| "grad_norm": 0.11074794083833694, |
| "learning_rate": 4.7939098651045235e-06, |
| "loss": 0.0056, |
| "step": 17330 |
| }, |
| { |
| "epoch": 9.70341354224958, |
| "grad_norm": 0.1706562340259552, |
| "learning_rate": 4.758647376699032e-06, |
| "loss": 0.0038, |
| "step": 17340 |
| }, |
| { |
| "epoch": 9.709009513150532, |
| "grad_norm": 0.16499456763267517, |
| "learning_rate": 4.723508576424062e-06, |
| "loss": 0.0046, |
| "step": 17350 |
| }, |
| { |
| "epoch": 9.714605484051482, |
| "grad_norm": 0.08222458511590958, |
| "learning_rate": 4.688493560347773e-06, |
| "loss": 0.0062, |
| "step": 17360 |
| }, |
| { |
| "epoch": 9.720201454952434, |
| "grad_norm": 0.13518883287906647, |
| "learning_rate": 4.653602424199876e-06, |
| "loss": 0.0086, |
| "step": 17370 |
| }, |
| { |
| "epoch": 9.725797425853386, |
| "grad_norm": 0.16546756029129028, |
| "learning_rate": 4.618835263371396e-06, |
| "loss": 0.0051, |
| "step": 17380 |
| }, |
| { |
| "epoch": 9.731393396754337, |
| "grad_norm": 0.31760314106941223, |
| "learning_rate": 4.5841921729144424e-06, |
| "loss": 0.0056, |
| "step": 17390 |
| }, |
| { |
| "epoch": 9.736989367655289, |
| "grad_norm": 0.11362655460834503, |
| "learning_rate": 4.549673247541875e-06, |
| "loss": 0.0085, |
| "step": 17400 |
| }, |
| { |
| "epoch": 9.742585338556239, |
| "grad_norm": 0.12480427324771881, |
| "learning_rate": 4.515278581627141e-06, |
| "loss": 0.003, |
| "step": 17410 |
| }, |
| { |
| "epoch": 9.748181309457191, |
| "grad_norm": 0.09458563476800919, |
| "learning_rate": 4.48100826920394e-06, |
| "loss": 0.0043, |
| "step": 17420 |
| }, |
| { |
| "epoch": 9.753777280358142, |
| "grad_norm": 0.15045048296451569, |
| "learning_rate": 4.446862403965984e-06, |
| "loss": 0.0035, |
| "step": 17430 |
| }, |
| { |
| "epoch": 9.759373251259094, |
| "grad_norm": 0.10754050314426422, |
| "learning_rate": 4.412841079266777e-06, |
| "loss": 0.0059, |
| "step": 17440 |
| }, |
| { |
| "epoch": 9.764969222160044, |
| "grad_norm": 0.09626353532075882, |
| "learning_rate": 4.378944388119311e-06, |
| "loss": 0.0064, |
| "step": 17450 |
| }, |
| { |
| "epoch": 9.770565193060996, |
| "grad_norm": 0.0682365670800209, |
| "learning_rate": 4.3451724231958644e-06, |
| "loss": 0.0039, |
| "step": 17460 |
| }, |
| { |
| "epoch": 9.776161163961948, |
| "grad_norm": 0.0859832614660263, |
| "learning_rate": 4.311525276827682e-06, |
| "loss": 0.0038, |
| "step": 17470 |
| }, |
| { |
| "epoch": 9.781757134862898, |
| "grad_norm": 0.057302311062812805, |
| "learning_rate": 4.27800304100478e-06, |
| "loss": 0.0061, |
| "step": 17480 |
| }, |
| { |
| "epoch": 9.78735310576385, |
| "grad_norm": 0.30939188599586487, |
| "learning_rate": 4.244605807375679e-06, |
| "loss": 0.0072, |
| "step": 17490 |
| }, |
| { |
| "epoch": 9.7929490766648, |
| "grad_norm": 0.06655000895261765, |
| "learning_rate": 4.2113336672471245e-06, |
| "loss": 0.006, |
| "step": 17500 |
| }, |
| { |
| "epoch": 9.798545047565753, |
| "grad_norm": 0.07795148342847824, |
| "learning_rate": 4.178186711583904e-06, |
| "loss": 0.0064, |
| "step": 17510 |
| }, |
| { |
| "epoch": 9.804141018466703, |
| "grad_norm": 0.06218419224023819, |
| "learning_rate": 4.145165031008508e-06, |
| "loss": 0.0041, |
| "step": 17520 |
| }, |
| { |
| "epoch": 9.809736989367655, |
| "grad_norm": 0.064509816467762, |
| "learning_rate": 4.112268715800943e-06, |
| "loss": 0.0048, |
| "step": 17530 |
| }, |
| { |
| "epoch": 9.815332960268606, |
| "grad_norm": 0.2096703052520752, |
| "learning_rate": 4.079497855898501e-06, |
| "loss": 0.0049, |
| "step": 17540 |
| }, |
| { |
| "epoch": 9.820928931169558, |
| "grad_norm": 0.15621553361415863, |
| "learning_rate": 4.046852540895446e-06, |
| "loss": 0.0046, |
| "step": 17550 |
| }, |
| { |
| "epoch": 9.82652490207051, |
| "grad_norm": 0.089202381670475, |
| "learning_rate": 4.01433286004283e-06, |
| "loss": 0.0078, |
| "step": 17560 |
| }, |
| { |
| "epoch": 9.83212087297146, |
| "grad_norm": 0.11227259039878845, |
| "learning_rate": 3.981938902248222e-06, |
| "loss": 0.0046, |
| "step": 17570 |
| }, |
| { |
| "epoch": 9.837716843872412, |
| "grad_norm": 0.038788773119449615, |
| "learning_rate": 3.949670756075447e-06, |
| "loss": 0.0093, |
| "step": 17580 |
| }, |
| { |
| "epoch": 9.843312814773363, |
| "grad_norm": 0.1287786364555359, |
| "learning_rate": 3.917528509744412e-06, |
| "loss": 0.0041, |
| "step": 17590 |
| }, |
| { |
| "epoch": 9.848908785674315, |
| "grad_norm": 0.04712485149502754, |
| "learning_rate": 3.885512251130763e-06, |
| "loss": 0.0046, |
| "step": 17600 |
| }, |
| { |
| "epoch": 9.854504756575265, |
| "grad_norm": 0.24810890853405, |
| "learning_rate": 3.8536220677657495e-06, |
| "loss": 0.0112, |
| "step": 17610 |
| }, |
| { |
| "epoch": 9.860100727476217, |
| "grad_norm": 0.16745951771736145, |
| "learning_rate": 3.821858046835913e-06, |
| "loss": 0.0038, |
| "step": 17620 |
| }, |
| { |
| "epoch": 9.86569669837717, |
| "grad_norm": 0.10218873620033264, |
| "learning_rate": 3.790220275182854e-06, |
| "loss": 0.0037, |
| "step": 17630 |
| }, |
| { |
| "epoch": 9.87129266927812, |
| "grad_norm": 0.19612161815166473, |
| "learning_rate": 3.75870883930306e-06, |
| "loss": 0.004, |
| "step": 17640 |
| }, |
| { |
| "epoch": 9.876888640179072, |
| "grad_norm": 0.20635591447353363, |
| "learning_rate": 3.7273238253475785e-06, |
| "loss": 0.0081, |
| "step": 17650 |
| }, |
| { |
| "epoch": 9.882484611080022, |
| "grad_norm": 0.154740571975708, |
| "learning_rate": 3.696065319121833e-06, |
| "loss": 0.0049, |
| "step": 17660 |
| }, |
| { |
| "epoch": 9.888080581980974, |
| "grad_norm": 0.046477749943733215, |
| "learning_rate": 3.664933406085402e-06, |
| "loss": 0.0055, |
| "step": 17670 |
| }, |
| { |
| "epoch": 9.893676552881924, |
| "grad_norm": 0.20742470026016235, |
| "learning_rate": 3.6339281713517303e-06, |
| "loss": 0.0027, |
| "step": 17680 |
| }, |
| { |
| "epoch": 9.899272523782876, |
| "grad_norm": 0.07390665262937546, |
| "learning_rate": 3.60304969968796e-06, |
| "loss": 0.0035, |
| "step": 17690 |
| }, |
| { |
| "epoch": 9.904868494683829, |
| "grad_norm": 0.12964075803756714, |
| "learning_rate": 3.5722980755146517e-06, |
| "loss": 0.0066, |
| "step": 17700 |
| }, |
| { |
| "epoch": 9.910464465584779, |
| "grad_norm": 0.05571340024471283, |
| "learning_rate": 3.541673382905558e-06, |
| "loss": 0.008, |
| "step": 17710 |
| }, |
| { |
| "epoch": 9.916060436485731, |
| "grad_norm": 0.12276771664619446, |
| "learning_rate": 3.511175705587433e-06, |
| "loss": 0.0069, |
| "step": 17720 |
| }, |
| { |
| "epoch": 9.921656407386681, |
| "grad_norm": 0.09888763725757599, |
| "learning_rate": 3.4808051269397512e-06, |
| "loss": 0.0036, |
| "step": 17730 |
| }, |
| { |
| "epoch": 9.927252378287633, |
| "grad_norm": 0.08338962495326996, |
| "learning_rate": 3.4505617299945336e-06, |
| "loss": 0.004, |
| "step": 17740 |
| }, |
| { |
| "epoch": 9.932848349188584, |
| "grad_norm": 0.06845631450414658, |
| "learning_rate": 3.420445597436056e-06, |
| "loss": 0.0037, |
| "step": 17750 |
| }, |
| { |
| "epoch": 9.938444320089536, |
| "grad_norm": 0.072002112865448, |
| "learning_rate": 3.390456811600673e-06, |
| "loss": 0.0049, |
| "step": 17760 |
| }, |
| { |
| "epoch": 9.944040290990486, |
| "grad_norm": 0.13706427812576294, |
| "learning_rate": 3.360595454476595e-06, |
| "loss": 0.0067, |
| "step": 17770 |
| }, |
| { |
| "epoch": 9.949636261891438, |
| "grad_norm": 0.14595244824886322, |
| "learning_rate": 3.3308616077036115e-06, |
| "loss": 0.0047, |
| "step": 17780 |
| }, |
| { |
| "epoch": 9.95523223279239, |
| "grad_norm": 0.07961612939834595, |
| "learning_rate": 3.301255352572946e-06, |
| "loss": 0.0035, |
| "step": 17790 |
| }, |
| { |
| "epoch": 9.96082820369334, |
| "grad_norm": 0.10814230144023895, |
| "learning_rate": 3.271776770026963e-06, |
| "loss": 0.0048, |
| "step": 17800 |
| }, |
| { |
| "epoch": 9.966424174594293, |
| "grad_norm": 0.11842755228281021, |
| "learning_rate": 3.2424259406589664e-06, |
| "loss": 0.0095, |
| "step": 17810 |
| }, |
| { |
| "epoch": 9.972020145495243, |
| "grad_norm": 0.21332372725009918, |
| "learning_rate": 3.213202944713023e-06, |
| "loss": 0.003, |
| "step": 17820 |
| }, |
| { |
| "epoch": 9.977616116396195, |
| "grad_norm": 0.06386691331863403, |
| "learning_rate": 3.1841078620836683e-06, |
| "loss": 0.0036, |
| "step": 17830 |
| }, |
| { |
| "epoch": 9.983212087297145, |
| "grad_norm": 0.08316194266080856, |
| "learning_rate": 3.155140772315773e-06, |
| "loss": 0.0042, |
| "step": 17840 |
| }, |
| { |
| "epoch": 9.988808058198098, |
| "grad_norm": 0.16622905433177948, |
| "learning_rate": 3.126301754604233e-06, |
| "loss": 0.0039, |
| "step": 17850 |
| }, |
| { |
| "epoch": 9.994404029099048, |
| "grad_norm": 0.11861821264028549, |
| "learning_rate": 3.0975908877938277e-06, |
| "loss": 0.0048, |
| "step": 17860 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.1722375601530075, |
| "learning_rate": 3.0690082503789742e-06, |
| "loss": 0.0026, |
| "step": 17870 |
| }, |
| { |
| "epoch": 10.005595970900952, |
| "grad_norm": 0.06653541326522827, |
| "learning_rate": 3.040553920503503e-06, |
| "loss": 0.0048, |
| "step": 17880 |
| }, |
| { |
| "epoch": 10.011191941801902, |
| "grad_norm": 0.16646505892276764, |
| "learning_rate": 3.0122279759604745e-06, |
| "loss": 0.004, |
| "step": 17890 |
| }, |
| { |
| "epoch": 10.016787912702855, |
| "grad_norm": 0.07118295133113861, |
| "learning_rate": 2.9840304941919415e-06, |
| "loss": 0.0066, |
| "step": 17900 |
| }, |
| { |
| "epoch": 10.022383883603805, |
| "grad_norm": 0.15453752875328064, |
| "learning_rate": 2.9559615522887273e-06, |
| "loss": 0.0052, |
| "step": 17910 |
| }, |
| { |
| "epoch": 10.027979854504757, |
| "grad_norm": 0.23914295434951782, |
| "learning_rate": 2.928021226990263e-06, |
| "loss": 0.0042, |
| "step": 17920 |
| }, |
| { |
| "epoch": 10.033575825405707, |
| "grad_norm": 0.09927842766046524, |
| "learning_rate": 2.9002095946843277e-06, |
| "loss": 0.0053, |
| "step": 17930 |
| }, |
| { |
| "epoch": 10.03917179630666, |
| "grad_norm": 0.039526671171188354, |
| "learning_rate": 2.8725267314068495e-06, |
| "loss": 0.0029, |
| "step": 17940 |
| }, |
| { |
| "epoch": 10.04476776720761, |
| "grad_norm": 0.1683174967765808, |
| "learning_rate": 2.844972712841737e-06, |
| "loss": 0.0042, |
| "step": 17950 |
| }, |
| { |
| "epoch": 10.050363738108562, |
| "grad_norm": 0.10315953940153122, |
| "learning_rate": 2.817547614320615e-06, |
| "loss": 0.0096, |
| "step": 17960 |
| }, |
| { |
| "epoch": 10.055959709009514, |
| "grad_norm": 0.17959141731262207, |
| "learning_rate": 2.790251510822661e-06, |
| "loss": 0.0048, |
| "step": 17970 |
| }, |
| { |
| "epoch": 10.061555679910464, |
| "grad_norm": 0.18458683788776398, |
| "learning_rate": 2.7630844769743757e-06, |
| "loss": 0.0051, |
| "step": 17980 |
| }, |
| { |
| "epoch": 10.067151650811416, |
| "grad_norm": 0.19159017503261566, |
| "learning_rate": 2.73604658704939e-06, |
| "loss": 0.0054, |
| "step": 17990 |
| }, |
| { |
| "epoch": 10.072747621712367, |
| "grad_norm": 0.08318327367305756, |
| "learning_rate": 2.7091379149682685e-06, |
| "loss": 0.0053, |
| "step": 18000 |
| }, |
| { |
| "epoch": 10.078343592613319, |
| "grad_norm": 0.07472005486488342, |
| "learning_rate": 2.682358534298285e-06, |
| "loss": 0.006, |
| "step": 18010 |
| }, |
| { |
| "epoch": 10.083939563514269, |
| "grad_norm": 0.09040942043066025, |
| "learning_rate": 2.6557085182532582e-06, |
| "loss": 0.004, |
| "step": 18020 |
| }, |
| { |
| "epoch": 10.089535534415221, |
| "grad_norm": 0.037220001220703125, |
| "learning_rate": 2.6291879396933004e-06, |
| "loss": 0.0038, |
| "step": 18030 |
| }, |
| { |
| "epoch": 10.095131505316173, |
| "grad_norm": 0.11240635067224503, |
| "learning_rate": 2.602796871124663e-06, |
| "loss": 0.0031, |
| "step": 18040 |
| }, |
| { |
| "epoch": 10.100727476217124, |
| "grad_norm": 0.12259605526924133, |
| "learning_rate": 2.57653538469953e-06, |
| "loss": 0.0049, |
| "step": 18050 |
| }, |
| { |
| "epoch": 10.106323447118076, |
| "grad_norm": 0.16758129000663757, |
| "learning_rate": 2.5504035522157854e-06, |
| "loss": 0.0066, |
| "step": 18060 |
| }, |
| { |
| "epoch": 10.111919418019026, |
| "grad_norm": 0.10704974085092545, |
| "learning_rate": 2.5244014451168863e-06, |
| "loss": 0.0021, |
| "step": 18070 |
| }, |
| { |
| "epoch": 10.117515388919978, |
| "grad_norm": 0.19684171676635742, |
| "learning_rate": 2.4985291344915674e-06, |
| "loss": 0.0035, |
| "step": 18080 |
| }, |
| { |
| "epoch": 10.123111359820928, |
| "grad_norm": 0.25069093704223633, |
| "learning_rate": 2.4727866910737583e-06, |
| "loss": 0.0038, |
| "step": 18090 |
| }, |
| { |
| "epoch": 10.12870733072188, |
| "grad_norm": 0.15888355672359467, |
| "learning_rate": 2.4471741852423237e-06, |
| "loss": 0.0055, |
| "step": 18100 |
| }, |
| { |
| "epoch": 10.13430330162283, |
| "grad_norm": 0.1355513483285904, |
| "learning_rate": 2.421691687020855e-06, |
| "loss": 0.0032, |
| "step": 18110 |
| }, |
| { |
| "epoch": 10.139899272523783, |
| "grad_norm": 0.09521888941526413, |
| "learning_rate": 2.3963392660775575e-06, |
| "loss": 0.0072, |
| "step": 18120 |
| }, |
| { |
| "epoch": 10.145495243424735, |
| "grad_norm": 0.18774038553237915, |
| "learning_rate": 2.371116991724953e-06, |
| "loss": 0.0028, |
| "step": 18130 |
| }, |
| { |
| "epoch": 10.151091214325685, |
| "grad_norm": 0.06293562054634094, |
| "learning_rate": 2.3460249329197824e-06, |
| "loss": 0.0032, |
| "step": 18140 |
| }, |
| { |
| "epoch": 10.156687185226637, |
| "grad_norm": 0.25169095396995544, |
| "learning_rate": 2.321063158262793e-06, |
| "loss": 0.0092, |
| "step": 18150 |
| }, |
| { |
| "epoch": 10.162283156127588, |
| "grad_norm": 0.08376752585172653, |
| "learning_rate": 2.296231735998511e-06, |
| "loss": 0.0021, |
| "step": 18160 |
| }, |
| { |
| "epoch": 10.16787912702854, |
| "grad_norm": 0.06758670508861542, |
| "learning_rate": 2.271530734015104e-06, |
| "loss": 0.0036, |
| "step": 18170 |
| }, |
| { |
| "epoch": 10.17347509792949, |
| "grad_norm": 0.06193256378173828, |
| "learning_rate": 2.2469602198441573e-06, |
| "loss": 0.0036, |
| "step": 18180 |
| }, |
| { |
| "epoch": 10.179071068830442, |
| "grad_norm": 0.21087805926799774, |
| "learning_rate": 2.222520260660521e-06, |
| "loss": 0.0043, |
| "step": 18190 |
| }, |
| { |
| "epoch": 10.184667039731393, |
| "grad_norm": 0.09581877291202545, |
| "learning_rate": 2.1982109232821178e-06, |
| "loss": 0.0048, |
| "step": 18200 |
| }, |
| { |
| "epoch": 10.190263010632345, |
| "grad_norm": 0.23187117278575897, |
| "learning_rate": 2.174032274169746e-06, |
| "loss": 0.0068, |
| "step": 18210 |
| }, |
| { |
| "epoch": 10.195858981533297, |
| "grad_norm": 0.1904383897781372, |
| "learning_rate": 2.149984379426906e-06, |
| "loss": 0.0036, |
| "step": 18220 |
| }, |
| { |
| "epoch": 10.201454952434247, |
| "grad_norm": 0.04588289558887482, |
| "learning_rate": 2.1260673047996227e-06, |
| "loss": 0.0075, |
| "step": 18230 |
| }, |
| { |
| "epoch": 10.2070509233352, |
| "grad_norm": 0.05446457862854004, |
| "learning_rate": 2.102281115676258e-06, |
| "loss": 0.0036, |
| "step": 18240 |
| }, |
| { |
| "epoch": 10.21264689423615, |
| "grad_norm": 0.12907229363918304, |
| "learning_rate": 2.0786258770873647e-06, |
| "loss": 0.0043, |
| "step": 18250 |
| }, |
| { |
| "epoch": 10.218242865137102, |
| "grad_norm": 0.0724627822637558, |
| "learning_rate": 2.0551016537054493e-06, |
| "loss": 0.0024, |
| "step": 18260 |
| }, |
| { |
| "epoch": 10.223838836038052, |
| "grad_norm": 0.11797565221786499, |
| "learning_rate": 2.0317085098448372e-06, |
| "loss": 0.0032, |
| "step": 18270 |
| }, |
| { |
| "epoch": 10.229434806939004, |
| "grad_norm": 0.1239556148648262, |
| "learning_rate": 2.008446509461498e-06, |
| "loss": 0.0038, |
| "step": 18280 |
| }, |
| { |
| "epoch": 10.235030777839956, |
| "grad_norm": 0.05614084377884865, |
| "learning_rate": 1.985315716152847e-06, |
| "loss": 0.0041, |
| "step": 18290 |
| }, |
| { |
| "epoch": 10.240626748740906, |
| "grad_norm": 0.2968387007713318, |
| "learning_rate": 1.962316193157593e-06, |
| "loss": 0.0092, |
| "step": 18300 |
| }, |
| { |
| "epoch": 10.246222719641858, |
| "grad_norm": 0.11529407650232315, |
| "learning_rate": 1.939448003355554e-06, |
| "loss": 0.0059, |
| "step": 18310 |
| }, |
| { |
| "epoch": 10.251818690542809, |
| "grad_norm": 0.24037353694438934, |
| "learning_rate": 1.91671120926748e-06, |
| "loss": 0.0045, |
| "step": 18320 |
| }, |
| { |
| "epoch": 10.257414661443761, |
| "grad_norm": 0.20346900820732117, |
| "learning_rate": 1.8941058730549132e-06, |
| "loss": 0.0047, |
| "step": 18330 |
| }, |
| { |
| "epoch": 10.263010632344711, |
| "grad_norm": 0.27883380651474, |
| "learning_rate": 1.8716320565199618e-06, |
| "loss": 0.0049, |
| "step": 18340 |
| }, |
| { |
| "epoch": 10.268606603245663, |
| "grad_norm": 0.12232355028390884, |
| "learning_rate": 1.849289821105199e-06, |
| "loss": 0.0077, |
| "step": 18350 |
| }, |
| { |
| "epoch": 10.274202574146614, |
| "grad_norm": 0.09397400170564651, |
| "learning_rate": 1.8270792278934302e-06, |
| "loss": 0.0039, |
| "step": 18360 |
| }, |
| { |
| "epoch": 10.279798545047566, |
| "grad_norm": 0.13843244314193726, |
| "learning_rate": 1.8050003376075707e-06, |
| "loss": 0.0059, |
| "step": 18370 |
| }, |
| { |
| "epoch": 10.285394515948518, |
| "grad_norm": 0.04927824065089226, |
| "learning_rate": 1.7830532106104747e-06, |
| "loss": 0.003, |
| "step": 18380 |
| }, |
| { |
| "epoch": 10.290990486849468, |
| "grad_norm": 0.2848436236381531, |
| "learning_rate": 1.7612379069047335e-06, |
| "loss": 0.004, |
| "step": 18390 |
| }, |
| { |
| "epoch": 10.29658645775042, |
| "grad_norm": 0.10808296501636505, |
| "learning_rate": 1.7395544861325718e-06, |
| "loss": 0.0072, |
| "step": 18400 |
| }, |
| { |
| "epoch": 10.30218242865137, |
| "grad_norm": 0.08363109827041626, |
| "learning_rate": 1.7180030075756136e-06, |
| "loss": 0.0029, |
| "step": 18410 |
| }, |
| { |
| "epoch": 10.307778399552323, |
| "grad_norm": 0.07970738410949707, |
| "learning_rate": 1.696583530154794e-06, |
| "loss": 0.0058, |
| "step": 18420 |
| }, |
| { |
| "epoch": 10.313374370453273, |
| "grad_norm": 0.06155739724636078, |
| "learning_rate": 1.6752961124301415e-06, |
| "loss": 0.0042, |
| "step": 18430 |
| }, |
| { |
| "epoch": 10.318970341354225, |
| "grad_norm": 0.15518154203891754, |
| "learning_rate": 1.6541408126006463e-06, |
| "loss": 0.006, |
| "step": 18440 |
| }, |
| { |
| "epoch": 10.324566312255175, |
| "grad_norm": 0.06478218734264374, |
| "learning_rate": 1.6331176885040878e-06, |
| "loss": 0.0083, |
| "step": 18450 |
| }, |
| { |
| "epoch": 10.330162283156128, |
| "grad_norm": 0.11871203780174255, |
| "learning_rate": 1.6122267976168781e-06, |
| "loss": 0.0046, |
| "step": 18460 |
| }, |
| { |
| "epoch": 10.33575825405708, |
| "grad_norm": 0.13164940476417542, |
| "learning_rate": 1.5914681970539192e-06, |
| "loss": 0.0055, |
| "step": 18470 |
| }, |
| { |
| "epoch": 10.34135422495803, |
| "grad_norm": 0.08165992051362991, |
| "learning_rate": 1.5708419435684462e-06, |
| "loss": 0.0065, |
| "step": 18480 |
| }, |
| { |
| "epoch": 10.346950195858982, |
| "grad_norm": 0.06479761004447937, |
| "learning_rate": 1.550348093551829e-06, |
| "loss": 0.0044, |
| "step": 18490 |
| }, |
| { |
| "epoch": 10.352546166759932, |
| "grad_norm": 0.24080127477645874, |
| "learning_rate": 1.5299867030334814e-06, |
| "loss": 0.0085, |
| "step": 18500 |
| }, |
| { |
| "epoch": 10.358142137660884, |
| "grad_norm": 0.1411421000957489, |
| "learning_rate": 1.5097578276806633e-06, |
| "loss": 0.0045, |
| "step": 18510 |
| }, |
| { |
| "epoch": 10.363738108561835, |
| "grad_norm": 0.058580052107572556, |
| "learning_rate": 1.4896615227983468e-06, |
| "loss": 0.0041, |
| "step": 18520 |
| }, |
| { |
| "epoch": 10.369334079462787, |
| "grad_norm": 0.1638147383928299, |
| "learning_rate": 1.4696978433290653e-06, |
| "loss": 0.0054, |
| "step": 18530 |
| }, |
| { |
| "epoch": 10.374930050363739, |
| "grad_norm": 0.05566524341702461, |
| "learning_rate": 1.4498668438527597e-06, |
| "loss": 0.004, |
| "step": 18540 |
| }, |
| { |
| "epoch": 10.38052602126469, |
| "grad_norm": 0.07601140439510345, |
| "learning_rate": 1.4301685785866214e-06, |
| "loss": 0.0034, |
| "step": 18550 |
| }, |
| { |
| "epoch": 10.386121992165641, |
| "grad_norm": 0.10449633747339249, |
| "learning_rate": 1.4106031013849496e-06, |
| "loss": 0.0041, |
| "step": 18560 |
| }, |
| { |
| "epoch": 10.391717963066592, |
| "grad_norm": 0.15937356650829315, |
| "learning_rate": 1.3911704657390113e-06, |
| "loss": 0.0039, |
| "step": 18570 |
| }, |
| { |
| "epoch": 10.397313933967544, |
| "grad_norm": 0.059475306421518326, |
| "learning_rate": 1.3718707247769135e-06, |
| "loss": 0.006, |
| "step": 18580 |
| }, |
| { |
| "epoch": 10.402909904868494, |
| "grad_norm": 0.24354378879070282, |
| "learning_rate": 1.3527039312633827e-06, |
| "loss": 0.0042, |
| "step": 18590 |
| }, |
| { |
| "epoch": 10.408505875769446, |
| "grad_norm": 0.20878778398036957, |
| "learning_rate": 1.333670137599713e-06, |
| "loss": 0.0107, |
| "step": 18600 |
| }, |
| { |
| "epoch": 10.414101846670397, |
| "grad_norm": 0.1909496784210205, |
| "learning_rate": 1.3147693958235618e-06, |
| "loss": 0.0034, |
| "step": 18610 |
| }, |
| { |
| "epoch": 10.419697817571349, |
| "grad_norm": 0.13632823526859283, |
| "learning_rate": 1.2960017576088446e-06, |
| "loss": 0.0066, |
| "step": 18620 |
| }, |
| { |
| "epoch": 10.4252937884723, |
| "grad_norm": 0.10793755203485489, |
| "learning_rate": 1.2773672742655784e-06, |
| "loss": 0.0037, |
| "step": 18630 |
| }, |
| { |
| "epoch": 10.430889759373251, |
| "grad_norm": 0.10346037149429321, |
| "learning_rate": 1.2588659967397e-06, |
| "loss": 0.0044, |
| "step": 18640 |
| }, |
| { |
| "epoch": 10.436485730274203, |
| "grad_norm": 0.08834080398082733, |
| "learning_rate": 1.2404979756130142e-06, |
| "loss": 0.0037, |
| "step": 18650 |
| }, |
| { |
| "epoch": 10.442081701175153, |
| "grad_norm": 0.09045784175395966, |
| "learning_rate": 1.222263261102985e-06, |
| "loss": 0.0052, |
| "step": 18660 |
| }, |
| { |
| "epoch": 10.447677672076106, |
| "grad_norm": 0.07731129229068756, |
| "learning_rate": 1.2041619030626284e-06, |
| "loss": 0.0071, |
| "step": 18670 |
| }, |
| { |
| "epoch": 10.453273642977056, |
| "grad_norm": 0.08769071102142334, |
| "learning_rate": 1.1861939509803687e-06, |
| "loss": 0.0044, |
| "step": 18680 |
| }, |
| { |
| "epoch": 10.458869613878008, |
| "grad_norm": 0.15766629576683044, |
| "learning_rate": 1.1683594539798893e-06, |
| "loss": 0.0063, |
| "step": 18690 |
| }, |
| { |
| "epoch": 10.46446558477896, |
| "grad_norm": 0.11048921942710876, |
| "learning_rate": 1.1506584608200367e-06, |
| "loss": 0.0033, |
| "step": 18700 |
| }, |
| { |
| "epoch": 10.47006155567991, |
| "grad_norm": 0.25674813985824585, |
| "learning_rate": 1.1330910198946442e-06, |
| "loss": 0.0047, |
| "step": 18710 |
| }, |
| { |
| "epoch": 10.475657526580862, |
| "grad_norm": 0.09696432203054428, |
| "learning_rate": 1.1156571792324211e-06, |
| "loss": 0.0038, |
| "step": 18720 |
| }, |
| { |
| "epoch": 10.481253497481813, |
| "grad_norm": 0.17716100811958313, |
| "learning_rate": 1.0983569864968346e-06, |
| "loss": 0.0085, |
| "step": 18730 |
| }, |
| { |
| "epoch": 10.486849468382765, |
| "grad_norm": 0.18763263523578644, |
| "learning_rate": 1.0811904889859336e-06, |
| "loss": 0.009, |
| "step": 18740 |
| }, |
| { |
| "epoch": 10.492445439283715, |
| "grad_norm": 0.047968145459890366, |
| "learning_rate": 1.064157733632276e-06, |
| "loss": 0.0051, |
| "step": 18750 |
| }, |
| { |
| "epoch": 10.498041410184667, |
| "grad_norm": 0.1565999537706375, |
| "learning_rate": 1.0472587670027678e-06, |
| "loss": 0.0062, |
| "step": 18760 |
| }, |
| { |
| "epoch": 10.503637381085618, |
| "grad_norm": 0.06519567221403122, |
| "learning_rate": 1.030493635298535e-06, |
| "loss": 0.0073, |
| "step": 18770 |
| }, |
| { |
| "epoch": 10.50923335198657, |
| "grad_norm": 0.10364692658185959, |
| "learning_rate": 1.0138623843548078e-06, |
| "loss": 0.0051, |
| "step": 18780 |
| }, |
| { |
| "epoch": 10.514829322887522, |
| "grad_norm": 0.036633651703596115, |
| "learning_rate": 9.97365059640787e-07, |
| "loss": 0.0062, |
| "step": 18790 |
| }, |
| { |
| "epoch": 10.520425293788472, |
| "grad_norm": 0.2015930861234665, |
| "learning_rate": 9.810017062595322e-07, |
| "loss": 0.0037, |
| "step": 18800 |
| }, |
| { |
| "epoch": 10.526021264689424, |
| "grad_norm": 0.1180974468588829, |
| "learning_rate": 9.647723689478305e-07, |
| "loss": 0.0039, |
| "step": 18810 |
| }, |
| { |
| "epoch": 10.531617235590375, |
| "grad_norm": 0.07416771352291107, |
| "learning_rate": 9.486770920760668e-07, |
| "loss": 0.0041, |
| "step": 18820 |
| }, |
| { |
| "epoch": 10.537213206491327, |
| "grad_norm": 0.05668334290385246, |
| "learning_rate": 9.327159196481138e-07, |
| "loss": 0.0059, |
| "step": 18830 |
| }, |
| { |
| "epoch": 10.542809177392277, |
| "grad_norm": 0.07584750652313232, |
| "learning_rate": 9.168888953011989e-07, |
| "loss": 0.0054, |
| "step": 18840 |
| }, |
| { |
| "epoch": 10.548405148293229, |
| "grad_norm": 0.06703902035951614, |
| "learning_rate": 9.011960623058202e-07, |
| "loss": 0.0039, |
| "step": 18850 |
| }, |
| { |
| "epoch": 10.55400111919418, |
| "grad_norm": 0.06538796424865723, |
| "learning_rate": 8.856374635655695e-07, |
| "loss": 0.0035, |
| "step": 18860 |
| }, |
| { |
| "epoch": 10.559597090095131, |
| "grad_norm": 0.09234767407178879, |
| "learning_rate": 8.702131416170656e-07, |
| "loss": 0.0047, |
| "step": 18870 |
| }, |
| { |
| "epoch": 10.565193060996084, |
| "grad_norm": 0.09068552404642105, |
| "learning_rate": 8.549231386298151e-07, |
| "loss": 0.0032, |
| "step": 18880 |
| }, |
| { |
| "epoch": 10.570789031897034, |
| "grad_norm": 0.2574044466018677, |
| "learning_rate": 8.397674964061075e-07, |
| "loss": 0.0123, |
| "step": 18890 |
| }, |
| { |
| "epoch": 10.576385002797986, |
| "grad_norm": 0.1742398738861084, |
| "learning_rate": 8.247462563808817e-07, |
| "loss": 0.005, |
| "step": 18900 |
| }, |
| { |
| "epoch": 10.581980973698936, |
| "grad_norm": 0.19498533010482788, |
| "learning_rate": 8.098594596216424e-07, |
| "loss": 0.0051, |
| "step": 18910 |
| }, |
| { |
| "epoch": 10.587576944599888, |
| "grad_norm": 0.1093849390745163, |
| "learning_rate": 7.951071468283167e-07, |
| "loss": 0.0062, |
| "step": 18920 |
| }, |
| { |
| "epoch": 10.593172915500839, |
| "grad_norm": 0.05242215842008591, |
| "learning_rate": 7.804893583331696e-07, |
| "loss": 0.0049, |
| "step": 18930 |
| }, |
| { |
| "epoch": 10.59876888640179, |
| "grad_norm": 0.06830724328756332, |
| "learning_rate": 7.66006134100672e-07, |
| "loss": 0.0031, |
| "step": 18940 |
| }, |
| { |
| "epoch": 10.604364857302741, |
| "grad_norm": 0.08541436493396759, |
| "learning_rate": 7.516575137274162e-07, |
| "loss": 0.0044, |
| "step": 18950 |
| }, |
| { |
| "epoch": 10.609960828203693, |
| "grad_norm": 0.042029768228530884, |
| "learning_rate": 7.374435364419674e-07, |
| "loss": 0.0043, |
| "step": 18960 |
| }, |
| { |
| "epoch": 10.615556799104645, |
| "grad_norm": 0.12100391089916229, |
| "learning_rate": 7.233642411048014e-07, |
| "loss": 0.0032, |
| "step": 18970 |
| }, |
| { |
| "epoch": 10.621152770005596, |
| "grad_norm": 0.04842936620116234, |
| "learning_rate": 7.094196662081831e-07, |
| "loss": 0.0052, |
| "step": 18980 |
| }, |
| { |
| "epoch": 10.626748740906548, |
| "grad_norm": 0.13397961854934692, |
| "learning_rate": 6.956098498760389e-07, |
| "loss": 0.0056, |
| "step": 18990 |
| }, |
| { |
| "epoch": 10.632344711807498, |
| "grad_norm": 0.19486455619335175, |
| "learning_rate": 6.819348298638839e-07, |
| "loss": 0.0029, |
| "step": 19000 |
| }, |
| { |
| "epoch": 10.63794068270845, |
| "grad_norm": 0.1525876224040985, |
| "learning_rate": 6.683946435586952e-07, |
| "loss": 0.0142, |
| "step": 19010 |
| }, |
| { |
| "epoch": 10.6435366536094, |
| "grad_norm": 0.09059377759695053, |
| "learning_rate": 6.549893279788277e-07, |
| "loss": 0.0057, |
| "step": 19020 |
| }, |
| { |
| "epoch": 10.649132624510353, |
| "grad_norm": 0.08628048002719879, |
| "learning_rate": 6.417189197739093e-07, |
| "loss": 0.0059, |
| "step": 19030 |
| }, |
| { |
| "epoch": 10.654728595411305, |
| "grad_norm": 0.34853503108024597, |
| "learning_rate": 6.285834552247128e-07, |
| "loss": 0.0041, |
| "step": 19040 |
| }, |
| { |
| "epoch": 10.660324566312255, |
| "grad_norm": 0.1580825001001358, |
| "learning_rate": 6.15582970243117e-07, |
| "loss": 0.0059, |
| "step": 19050 |
| }, |
| { |
| "epoch": 10.665920537213207, |
| "grad_norm": 0.2064519226551056, |
| "learning_rate": 6.027175003719354e-07, |
| "loss": 0.0065, |
| "step": 19060 |
| }, |
| { |
| "epoch": 10.671516508114157, |
| "grad_norm": 0.1656566709280014, |
| "learning_rate": 5.899870807848762e-07, |
| "loss": 0.0045, |
| "step": 19070 |
| }, |
| { |
| "epoch": 10.67711247901511, |
| "grad_norm": 0.06346923857927322, |
| "learning_rate": 5.773917462864264e-07, |
| "loss": 0.0108, |
| "step": 19080 |
| }, |
| { |
| "epoch": 10.68270844991606, |
| "grad_norm": 0.0746588408946991, |
| "learning_rate": 5.64931531311741e-07, |
| "loss": 0.0038, |
| "step": 19090 |
| }, |
| { |
| "epoch": 10.688304420817012, |
| "grad_norm": 0.10566951334476471, |
| "learning_rate": 5.526064699265753e-07, |
| "loss": 0.0084, |
| "step": 19100 |
| }, |
| { |
| "epoch": 10.693900391717962, |
| "grad_norm": 0.061587151139974594, |
| "learning_rate": 5.404165958271811e-07, |
| "loss": 0.0042, |
| "step": 19110 |
| }, |
| { |
| "epoch": 10.699496362618914, |
| "grad_norm": 0.27593472599983215, |
| "learning_rate": 5.283619423401998e-07, |
| "loss": 0.005, |
| "step": 19120 |
| }, |
| { |
| "epoch": 10.705092333519866, |
| "grad_norm": 0.37827596068382263, |
| "learning_rate": 5.164425424226016e-07, |
| "loss": 0.0068, |
| "step": 19130 |
| }, |
| { |
| "epoch": 10.710688304420817, |
| "grad_norm": 0.2789309322834015, |
| "learning_rate": 5.046584286615697e-07, |
| "loss": 0.0054, |
| "step": 19140 |
| }, |
| { |
| "epoch": 10.716284275321769, |
| "grad_norm": 0.08417310565710068, |
| "learning_rate": 4.930096332744105e-07, |
| "loss": 0.0043, |
| "step": 19150 |
| }, |
| { |
| "epoch": 10.72188024622272, |
| "grad_norm": 0.13277283310890198, |
| "learning_rate": 4.814961881085045e-07, |
| "loss": 0.007, |
| "step": 19160 |
| }, |
| { |
| "epoch": 10.727476217123671, |
| "grad_norm": 0.029057292267680168, |
| "learning_rate": 4.701181246411501e-07, |
| "loss": 0.0077, |
| "step": 19170 |
| }, |
| { |
| "epoch": 10.733072188024622, |
| "grad_norm": 0.07132174074649811, |
| "learning_rate": 4.5887547397955864e-07, |
| "loss": 0.0044, |
| "step": 19180 |
| }, |
| { |
| "epoch": 10.738668158925574, |
| "grad_norm": 0.05213991925120354, |
| "learning_rate": 4.4776826686069305e-07, |
| "loss": 0.0022, |
| "step": 19190 |
| }, |
| { |
| "epoch": 10.744264129826526, |
| "grad_norm": 0.092039555311203, |
| "learning_rate": 4.367965336512403e-07, |
| "loss": 0.0032, |
| "step": 19200 |
| }, |
| { |
| "epoch": 10.749860100727476, |
| "grad_norm": 0.17352578043937683, |
| "learning_rate": 4.259603043475002e-07, |
| "loss": 0.0064, |
| "step": 19210 |
| }, |
| { |
| "epoch": 10.755456071628428, |
| "grad_norm": 0.15915948152542114, |
| "learning_rate": 4.1525960857530243e-07, |
| "loss": 0.0075, |
| "step": 19220 |
| }, |
| { |
| "epoch": 10.761052042529379, |
| "grad_norm": 0.21297423541545868, |
| "learning_rate": 4.0469447558995065e-07, |
| "loss": 0.0057, |
| "step": 19230 |
| }, |
| { |
| "epoch": 10.76664801343033, |
| "grad_norm": 0.17462663352489471, |
| "learning_rate": 3.9426493427611177e-07, |
| "loss": 0.0056, |
| "step": 19240 |
| }, |
| { |
| "epoch": 10.772243984331281, |
| "grad_norm": 0.10657753050327301, |
| "learning_rate": 3.839710131477492e-07, |
| "loss": 0.0089, |
| "step": 19250 |
| }, |
| { |
| "epoch": 10.777839955232233, |
| "grad_norm": 0.07254552841186523, |
| "learning_rate": 3.738127403480507e-07, |
| "loss": 0.003, |
| "step": 19260 |
| }, |
| { |
| "epoch": 10.783435926133183, |
| "grad_norm": 0.27843359112739563, |
| "learning_rate": 3.637901436493507e-07, |
| "loss": 0.0067, |
| "step": 19270 |
| }, |
| { |
| "epoch": 10.789031897034135, |
| "grad_norm": 0.17431190609931946, |
| "learning_rate": 3.5390325045304706e-07, |
| "loss": 0.0042, |
| "step": 19280 |
| }, |
| { |
| "epoch": 10.794627867935088, |
| "grad_norm": 0.11761761456727982, |
| "learning_rate": 3.441520877895288e-07, |
| "loss": 0.0036, |
| "step": 19290 |
| }, |
| { |
| "epoch": 10.800223838836038, |
| "grad_norm": 0.1055087074637413, |
| "learning_rate": 3.3453668231809286e-07, |
| "loss": 0.0049, |
| "step": 19300 |
| }, |
| { |
| "epoch": 10.80581980973699, |
| "grad_norm": 0.05716053023934364, |
| "learning_rate": 3.250570603268943e-07, |
| "loss": 0.0057, |
| "step": 19310 |
| }, |
| { |
| "epoch": 10.81141578063794, |
| "grad_norm": 0.06227661669254303, |
| "learning_rate": 3.157132477328628e-07, |
| "loss": 0.0047, |
| "step": 19320 |
| }, |
| { |
| "epoch": 10.817011751538892, |
| "grad_norm": 0.07587496936321259, |
| "learning_rate": 3.0650527008162513e-07, |
| "loss": 0.0058, |
| "step": 19330 |
| }, |
| { |
| "epoch": 10.822607722439843, |
| "grad_norm": 0.12384708225727081, |
| "learning_rate": 2.9743315254743833e-07, |
| "loss": 0.0044, |
| "step": 19340 |
| }, |
| { |
| "epoch": 10.828203693340795, |
| "grad_norm": 0.130027636885643, |
| "learning_rate": 2.8849691993311777e-07, |
| "loss": 0.0048, |
| "step": 19350 |
| }, |
| { |
| "epoch": 10.833799664241745, |
| "grad_norm": 0.03498604893684387, |
| "learning_rate": 2.796965966699927e-07, |
| "loss": 0.0076, |
| "step": 19360 |
| }, |
| { |
| "epoch": 10.839395635142697, |
| "grad_norm": 0.06795532256364822, |
| "learning_rate": 2.7103220681780615e-07, |
| "loss": 0.0046, |
| "step": 19370 |
| }, |
| { |
| "epoch": 10.84499160604365, |
| "grad_norm": 0.15649089217185974, |
| "learning_rate": 2.625037740646763e-07, |
| "loss": 0.0041, |
| "step": 19380 |
| }, |
| { |
| "epoch": 10.8505875769446, |
| "grad_norm": 0.19872230291366577, |
| "learning_rate": 2.5411132172700194e-07, |
| "loss": 0.0045, |
| "step": 19390 |
| }, |
| { |
| "epoch": 10.856183547845552, |
| "grad_norm": 0.1986837238073349, |
| "learning_rate": 2.458548727494292e-07, |
| "loss": 0.0034, |
| "step": 19400 |
| }, |
| { |
| "epoch": 10.861779518746502, |
| "grad_norm": 0.34645870327949524, |
| "learning_rate": 2.3773444970477955e-07, |
| "loss": 0.0059, |
| "step": 19410 |
| }, |
| { |
| "epoch": 10.867375489647454, |
| "grad_norm": 0.043271441012620926, |
| "learning_rate": 2.2975007479397738e-07, |
| "loss": 0.0042, |
| "step": 19420 |
| }, |
| { |
| "epoch": 10.872971460548404, |
| "grad_norm": 0.10621374845504761, |
| "learning_rate": 2.219017698460002e-07, |
| "loss": 0.0107, |
| "step": 19430 |
| }, |
| { |
| "epoch": 10.878567431449357, |
| "grad_norm": 0.038412097841501236, |
| "learning_rate": 2.1418955631781202e-07, |
| "loss": 0.0025, |
| "step": 19440 |
| }, |
| { |
| "epoch": 10.884163402350307, |
| "grad_norm": 0.14375977218151093, |
| "learning_rate": 2.0661345529430775e-07, |
| "loss": 0.0063, |
| "step": 19450 |
| }, |
| { |
| "epoch": 10.889759373251259, |
| "grad_norm": 0.28644490242004395, |
| "learning_rate": 1.9917348748826335e-07, |
| "loss": 0.0037, |
| "step": 19460 |
| }, |
| { |
| "epoch": 10.895355344152211, |
| "grad_norm": 0.19371145963668823, |
| "learning_rate": 1.918696732402636e-07, |
| "loss": 0.0071, |
| "step": 19470 |
| }, |
| { |
| "epoch": 10.900951315053161, |
| "grad_norm": 0.11907006055116653, |
| "learning_rate": 1.847020325186577e-07, |
| "loss": 0.0049, |
| "step": 19480 |
| }, |
| { |
| "epoch": 10.906547285954113, |
| "grad_norm": 0.10020023584365845, |
| "learning_rate": 1.776705849195037e-07, |
| "loss": 0.0036, |
| "step": 19490 |
| }, |
| { |
| "epoch": 10.912143256855064, |
| "grad_norm": 0.12778791785240173, |
| "learning_rate": 1.7077534966650766e-07, |
| "loss": 0.0057, |
| "step": 19500 |
| }, |
| { |
| "epoch": 10.917739227756016, |
| "grad_norm": 0.06359223276376724, |
| "learning_rate": 1.6401634561098444e-07, |
| "loss": 0.0036, |
| "step": 19510 |
| }, |
| { |
| "epoch": 10.923335198656966, |
| "grad_norm": 0.07983513921499252, |
| "learning_rate": 1.5739359123178587e-07, |
| "loss": 0.0037, |
| "step": 19520 |
| }, |
| { |
| "epoch": 10.928931169557918, |
| "grad_norm": 0.12060696631669998, |
| "learning_rate": 1.5090710463527836e-07, |
| "loss": 0.0031, |
| "step": 19530 |
| }, |
| { |
| "epoch": 10.93452714045887, |
| "grad_norm": 0.10252276062965393, |
| "learning_rate": 1.4455690355525964e-07, |
| "loss": 0.0052, |
| "step": 19540 |
| }, |
| { |
| "epoch": 10.94012311135982, |
| "grad_norm": 0.10586907714605331, |
| "learning_rate": 1.383430053529422e-07, |
| "loss": 0.0025, |
| "step": 19550 |
| }, |
| { |
| "epoch": 10.945719082260773, |
| "grad_norm": 0.05571618303656578, |
| "learning_rate": 1.3226542701689215e-07, |
| "loss": 0.0045, |
| "step": 19560 |
| }, |
| { |
| "epoch": 10.951315053161723, |
| "grad_norm": 0.07698628306388855, |
| "learning_rate": 1.2632418516296262e-07, |
| "loss": 0.0039, |
| "step": 19570 |
| }, |
| { |
| "epoch": 10.956911024062675, |
| "grad_norm": 0.3049318790435791, |
| "learning_rate": 1.2051929603428825e-07, |
| "loss": 0.0036, |
| "step": 19580 |
| }, |
| { |
| "epoch": 10.962506994963626, |
| "grad_norm": 0.04247491434216499, |
| "learning_rate": 1.1485077550122402e-07, |
| "loss": 0.0086, |
| "step": 19590 |
| }, |
| { |
| "epoch": 10.968102965864578, |
| "grad_norm": 0.13998843729496002, |
| "learning_rate": 1.0931863906127327e-07, |
| "loss": 0.0032, |
| "step": 19600 |
| }, |
| { |
| "epoch": 10.973698936765528, |
| "grad_norm": 0.18532228469848633, |
| "learning_rate": 1.0392290183909304e-07, |
| "loss": 0.0053, |
| "step": 19610 |
| }, |
| { |
| "epoch": 10.97929490766648, |
| "grad_norm": 0.24849370121955872, |
| "learning_rate": 9.866357858642205e-08, |
| "loss": 0.0031, |
| "step": 19620 |
| }, |
| { |
| "epoch": 10.984890878567432, |
| "grad_norm": 0.04739070311188698, |
| "learning_rate": 9.354068368204739e-08, |
| "loss": 0.0055, |
| "step": 19630 |
| }, |
| { |
| "epoch": 10.990486849468383, |
| "grad_norm": 0.13325341045856476, |
| "learning_rate": 8.855423113177664e-08, |
| "loss": 0.0027, |
| "step": 19640 |
| }, |
| { |
| "epoch": 10.996082820369335, |
| "grad_norm": 0.15442515909671783, |
| "learning_rate": 8.37042345683714e-08, |
| "loss": 0.009, |
| "step": 19650 |
| }, |
| { |
| "epoch": 11.001678791270285, |
| "grad_norm": 0.20657239854335785, |
| "learning_rate": 7.899070725153613e-08, |
| "loss": 0.0063, |
| "step": 19660 |
| }, |
| { |
| "epoch": 11.007274762171237, |
| "grad_norm": 0.16029535233974457, |
| "learning_rate": 7.44136620678848e-08, |
| "loss": 0.0044, |
| "step": 19670 |
| }, |
| { |
| "epoch": 11.012870733072187, |
| "grad_norm": 0.16476546227931976, |
| "learning_rate": 6.997311153086883e-08, |
| "loss": 0.0066, |
| "step": 19680 |
| }, |
| { |
| "epoch": 11.01846670397314, |
| "grad_norm": 0.12683425843715668, |
| "learning_rate": 6.566906778079917e-08, |
| "loss": 0.0052, |
| "step": 19690 |
| }, |
| { |
| "epoch": 11.024062674874092, |
| "grad_norm": 0.23135153949260712, |
| "learning_rate": 6.150154258476315e-08, |
| "loss": 0.0043, |
| "step": 19700 |
| }, |
| { |
| "epoch": 11.029658645775042, |
| "grad_norm": 0.1939716786146164, |
| "learning_rate": 5.747054733660773e-08, |
| "loss": 0.0077, |
| "step": 19710 |
| }, |
| { |
| "epoch": 11.035254616675994, |
| "grad_norm": 0.11450741440057755, |
| "learning_rate": 5.3576093056922906e-08, |
| "loss": 0.0079, |
| "step": 19720 |
| }, |
| { |
| "epoch": 11.040850587576944, |
| "grad_norm": 0.06929726153612137, |
| "learning_rate": 4.981819039300284e-08, |
| "loss": 0.0039, |
| "step": 19730 |
| }, |
| { |
| "epoch": 11.046446558477896, |
| "grad_norm": 0.11268885433673859, |
| "learning_rate": 4.619684961881254e-08, |
| "loss": 0.0047, |
| "step": 19740 |
| }, |
| { |
| "epoch": 11.052042529378847, |
| "grad_norm": 0.07555661350488663, |
| "learning_rate": 4.2712080634949024e-08, |
| "loss": 0.0038, |
| "step": 19750 |
| }, |
| { |
| "epoch": 11.057638500279799, |
| "grad_norm": 0.07180225849151611, |
| "learning_rate": 3.936389296864129e-08, |
| "loss": 0.0066, |
| "step": 19760 |
| }, |
| { |
| "epoch": 11.063234471180749, |
| "grad_norm": 0.2635197937488556, |
| "learning_rate": 3.615229577371149e-08, |
| "loss": 0.0047, |
| "step": 19770 |
| }, |
| { |
| "epoch": 11.068830442081701, |
| "grad_norm": 0.03527739644050598, |
| "learning_rate": 3.3077297830541584e-08, |
| "loss": 0.0047, |
| "step": 19780 |
| }, |
| { |
| "epoch": 11.074426412982653, |
| "grad_norm": 0.061606280505657196, |
| "learning_rate": 3.01389075460512e-08, |
| "loss": 0.0069, |
| "step": 19790 |
| }, |
| { |
| "epoch": 11.080022383883604, |
| "grad_norm": 0.14764872193336487, |
| "learning_rate": 2.7337132953697554e-08, |
| "loss": 0.0063, |
| "step": 19800 |
| }, |
| { |
| "epoch": 11.085618354784556, |
| "grad_norm": 0.13825170695781708, |
| "learning_rate": 2.467198171342e-08, |
| "loss": 0.0047, |
| "step": 19810 |
| }, |
| { |
| "epoch": 11.091214325685506, |
| "grad_norm": 0.40132373571395874, |
| "learning_rate": 2.214346111164556e-08, |
| "loss": 0.0058, |
| "step": 19820 |
| }, |
| { |
| "epoch": 11.096810296586458, |
| "grad_norm": 0.06293044239282608, |
| "learning_rate": 1.9751578061244504e-08, |
| "loss": 0.0093, |
| "step": 19830 |
| }, |
| { |
| "epoch": 11.102406267487408, |
| "grad_norm": 0.08641501516103745, |
| "learning_rate": 1.749633910153592e-08, |
| "loss": 0.0061, |
| "step": 19840 |
| }, |
| { |
| "epoch": 11.10800223838836, |
| "grad_norm": 0.06543342024087906, |
| "learning_rate": 1.5377750398265502e-08, |
| "loss": 0.0034, |
| "step": 19850 |
| }, |
| { |
| "epoch": 11.11359820928931, |
| "grad_norm": 0.0463268905878067, |
| "learning_rate": 1.3395817743561134e-08, |
| "loss": 0.0031, |
| "step": 19860 |
| }, |
| { |
| "epoch": 11.119194180190263, |
| "grad_norm": 0.18889687955379486, |
| "learning_rate": 1.1550546555960662e-08, |
| "loss": 0.0049, |
| "step": 19870 |
| }, |
| { |
| "epoch": 11.124790151091215, |
| "grad_norm": 0.33526870608329773, |
| "learning_rate": 9.841941880361916e-09, |
| "loss": 0.0068, |
| "step": 19880 |
| }, |
| { |
| "epoch": 11.130386121992165, |
| "grad_norm": 0.17259934544563293, |
| "learning_rate": 8.270008388022721e-09, |
| "loss": 0.0047, |
| "step": 19890 |
| }, |
| { |
| "epoch": 11.135982092893117, |
| "grad_norm": 0.24882031977176666, |
| "learning_rate": 6.834750376549792e-09, |
| "loss": 0.0061, |
| "step": 19900 |
| }, |
| { |
| "epoch": 11.141578063794068, |
| "grad_norm": 0.05286456272006035, |
| "learning_rate": 5.536171769887632e-09, |
| "loss": 0.0059, |
| "step": 19910 |
| }, |
| { |
| "epoch": 11.14717403469502, |
| "grad_norm": 0.08882560580968857, |
| "learning_rate": 4.3742761183018784e-09, |
| "loss": 0.0063, |
| "step": 19920 |
| }, |
| { |
| "epoch": 11.15277000559597, |
| "grad_norm": 0.09571769833564758, |
| "learning_rate": 3.349066598362649e-09, |
| "loss": 0.0034, |
| "step": 19930 |
| }, |
| { |
| "epoch": 11.158365976496922, |
| "grad_norm": 0.07795775681734085, |
| "learning_rate": 2.4605460129556445e-09, |
| "loss": 0.0029, |
| "step": 19940 |
| }, |
| { |
| "epoch": 11.163961947397874, |
| "grad_norm": 0.07696644216775894, |
| "learning_rate": 1.7087167912710478e-09, |
| "loss": 0.0083, |
| "step": 19950 |
| }, |
| { |
| "epoch": 11.169557918298825, |
| "grad_norm": 0.26498469710350037, |
| "learning_rate": 1.0935809887702154e-09, |
| "loss": 0.0033, |
| "step": 19960 |
| }, |
| { |
| "epoch": 11.175153889199777, |
| "grad_norm": 0.165630042552948, |
| "learning_rate": 6.151402872134337e-10, |
| "loss": 0.007, |
| "step": 19970 |
| }, |
| { |
| "epoch": 11.180749860100727, |
| "grad_norm": 0.07009857147932053, |
| "learning_rate": 2.7339599464326627e-10, |
| "loss": 0.0037, |
| "step": 19980 |
| }, |
| { |
| "epoch": 11.18634583100168, |
| "grad_norm": 0.1754114180803299, |
| "learning_rate": 6.834904537900144e-11, |
| "loss": 0.0069, |
| "step": 19990 |
| }, |
| { |
| "epoch": 11.19194180190263, |
| "grad_norm": 0.18103741109371185, |
| "learning_rate": 0.0, |
| "loss": 0.0044, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 12, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.091345386565736e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|