| { |
| "best_global_step": 15408, |
| "best_metric": 0.9998485536877177, |
| "best_model_checkpoint": "wav2vec2_frog_classifier_sew_d/checkpoint-15408", |
| "epoch": 9.0, |
| "eval_steps": 500, |
| "global_step": 15408, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005841121495327103, |
| "grad_norm": 9.79509162902832, |
| "learning_rate": 1.3142523364485982e-08, |
| "loss": 0.7229, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011682242990654205, |
| "grad_norm": 18.790061950683594, |
| "learning_rate": 2.774532710280374e-08, |
| "loss": 0.7206, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.017523364485981307, |
| "grad_norm": 10.11142349243164, |
| "learning_rate": 4.2348130841121496e-08, |
| "loss": 0.7046, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02336448598130841, |
| "grad_norm": 14.219882011413574, |
| "learning_rate": 5.6950934579439255e-08, |
| "loss": 0.6709, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.029205607476635514, |
| "grad_norm": 16.27984619140625, |
| "learning_rate": 7.155373831775702e-08, |
| "loss": 0.6569, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.035046728971962614, |
| "grad_norm": 15.359692573547363, |
| "learning_rate": 8.615654205607476e-08, |
| "loss": 0.6114, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04088785046728972, |
| "grad_norm": 18.100404739379883, |
| "learning_rate": 1.0075934579439253e-07, |
| "loss": 0.5935, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04672897196261682, |
| "grad_norm": 45.55104064941406, |
| "learning_rate": 1.1536214953271028e-07, |
| "loss": 0.5592, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.052570093457943924, |
| "grad_norm": 16.723636627197266, |
| "learning_rate": 1.2996495327102804e-07, |
| "loss": 0.5253, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05841121495327103, |
| "grad_norm": 37.350120544433594, |
| "learning_rate": 1.445677570093458e-07, |
| "loss": 0.4649, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06425233644859812, |
| "grad_norm": 38.95242691040039, |
| "learning_rate": 1.5917056074766358e-07, |
| "loss": 0.4839, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07009345794392523, |
| "grad_norm": 53.936973571777344, |
| "learning_rate": 1.7377336448598132e-07, |
| "loss": 0.4564, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07593457943925233, |
| "grad_norm": 16.073362350463867, |
| "learning_rate": 1.8837616822429908e-07, |
| "loss": 0.4502, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08177570093457943, |
| "grad_norm": 49.77845001220703, |
| "learning_rate": 2.0297897196261685e-07, |
| "loss": 0.4377, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08761682242990654, |
| "grad_norm": 21.89165687561035, |
| "learning_rate": 2.1758177570093461e-07, |
| "loss": 0.3377, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09345794392523364, |
| "grad_norm": 19.01292610168457, |
| "learning_rate": 2.3218457943925235e-07, |
| "loss": 0.3362, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09929906542056074, |
| "grad_norm": 44.09226608276367, |
| "learning_rate": 2.467873831775701e-07, |
| "loss": 0.3133, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10514018691588785, |
| "grad_norm": 7.86514949798584, |
| "learning_rate": 2.6139018691588786e-07, |
| "loss": 0.2629, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11098130841121495, |
| "grad_norm": 60.873443603515625, |
| "learning_rate": 2.7599299065420565e-07, |
| "loss": 0.2352, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11682242990654206, |
| "grad_norm": 8.908461570739746, |
| "learning_rate": 2.905957943925234e-07, |
| "loss": 0.2343, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12266355140186916, |
| "grad_norm": 20.771284103393555, |
| "learning_rate": 3.051985981308412e-07, |
| "loss": 0.2147, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12850467289719625, |
| "grad_norm": 4.727106094360352, |
| "learning_rate": 3.198014018691589e-07, |
| "loss": 0.13, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.13434579439252337, |
| "grad_norm": 40.93299865722656, |
| "learning_rate": 3.3440420560747666e-07, |
| "loss": 0.1191, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14018691588785046, |
| "grad_norm": 4.306646347045898, |
| "learning_rate": 3.4900700934579445e-07, |
| "loss": 0.0938, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.14602803738317757, |
| "grad_norm": 1.910163164138794, |
| "learning_rate": 3.636098130841122e-07, |
| "loss": 0.0715, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15186915887850466, |
| "grad_norm": 2.0170037746429443, |
| "learning_rate": 3.7821261682243e-07, |
| "loss": 0.0417, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15771028037383178, |
| "grad_norm": 1.1409950256347656, |
| "learning_rate": 3.9281542056074767e-07, |
| "loss": 0.0309, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16355140186915887, |
| "grad_norm": 1.4356250762939453, |
| "learning_rate": 4.0741822429906546e-07, |
| "loss": 0.03, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.169392523364486, |
| "grad_norm": 0.8783276677131653, |
| "learning_rate": 4.220210280373832e-07, |
| "loss": 0.0212, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.17523364485981308, |
| "grad_norm": 17.989458084106445, |
| "learning_rate": 4.36623831775701e-07, |
| "loss": 0.0416, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1810747663551402, |
| "grad_norm": 0.5098496079444885, |
| "learning_rate": 4.5122663551401874e-07, |
| "loss": 0.0161, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.18691588785046728, |
| "grad_norm": 1.8965669870376587, |
| "learning_rate": 4.6582943925233653e-07, |
| "loss": 0.0415, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1927570093457944, |
| "grad_norm": 0.6456058025360107, |
| "learning_rate": 4.804322429906542e-07, |
| "loss": 0.0316, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1985981308411215, |
| "grad_norm": 0.7947583794593811, |
| "learning_rate": 4.95035046728972e-07, |
| "loss": 0.013, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2044392523364486, |
| "grad_norm": 0.3620131015777588, |
| "learning_rate": 5.096378504672898e-07, |
| "loss": 0.0127, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2102803738317757, |
| "grad_norm": 1.309668779373169, |
| "learning_rate": 5.242406542056075e-07, |
| "loss": 0.0406, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2161214953271028, |
| "grad_norm": 0.6888924241065979, |
| "learning_rate": 5.388434579439253e-07, |
| "loss": 0.0118, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2219626168224299, |
| "grad_norm": 0.4831880033016205, |
| "learning_rate": 5.53446261682243e-07, |
| "loss": 0.0314, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.22780373831775702, |
| "grad_norm": 0.355655699968338, |
| "learning_rate": 5.680490654205608e-07, |
| "loss": 0.011, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2336448598130841, |
| "grad_norm": 0.36532697081565857, |
| "learning_rate": 5.826518691588785e-07, |
| "loss": 0.0391, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.23948598130841123, |
| "grad_norm": 0.7894148230552673, |
| "learning_rate": 5.972546728971963e-07, |
| "loss": 0.0388, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.24532710280373832, |
| "grad_norm": 0.34525978565216064, |
| "learning_rate": 6.11857476635514e-07, |
| "loss": 0.0101, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.25116822429906543, |
| "grad_norm": 0.2628501355648041, |
| "learning_rate": 6.264602803738318e-07, |
| "loss": 0.0098, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2570093457943925, |
| "grad_norm": 0.4060896635055542, |
| "learning_rate": 6.410630841121496e-07, |
| "loss": 0.0096, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2628504672897196, |
| "grad_norm": 0.22625339031219482, |
| "learning_rate": 6.556658878504674e-07, |
| "loss": 0.0093, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.26869158878504673, |
| "grad_norm": 0.25126221776008606, |
| "learning_rate": 6.702686915887852e-07, |
| "loss": 0.0091, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.27453271028037385, |
| "grad_norm": 0.17624634504318237, |
| "learning_rate": 6.848714953271028e-07, |
| "loss": 0.0089, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2803738317757009, |
| "grad_norm": 0.1749255508184433, |
| "learning_rate": 6.994742990654206e-07, |
| "loss": 0.0087, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.286214953271028, |
| "grad_norm": 0.5451480150222778, |
| "learning_rate": 7.140771028037384e-07, |
| "loss": 0.0384, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.29205607476635514, |
| "grad_norm": 0.2977391183376312, |
| "learning_rate": 7.286799065420562e-07, |
| "loss": 0.0086, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.29789719626168226, |
| "grad_norm": 0.1782873570919037, |
| "learning_rate": 7.432827102803738e-07, |
| "loss": 0.0083, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3037383177570093, |
| "grad_norm": 0.4122442603111267, |
| "learning_rate": 7.578855140186916e-07, |
| "loss": 0.0383, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.30957943925233644, |
| "grad_norm": 0.3462129235267639, |
| "learning_rate": 7.724883177570094e-07, |
| "loss": 0.0083, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.31542056074766356, |
| "grad_norm": 0.4232594668865204, |
| "learning_rate": 7.870911214953271e-07, |
| "loss": 0.0382, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3212616822429907, |
| "grad_norm": 0.6220551133155823, |
| "learning_rate": 8.016939252336449e-07, |
| "loss": 0.0383, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.32710280373831774, |
| "grad_norm": 3.729926347732544, |
| "learning_rate": 8.162967289719627e-07, |
| "loss": 0.0382, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.33294392523364486, |
| "grad_norm": 0.38771969079971313, |
| "learning_rate": 8.308995327102805e-07, |
| "loss": 0.0078, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.338785046728972, |
| "grad_norm": 2.8846986293792725, |
| "learning_rate": 8.455023364485982e-07, |
| "loss": 0.038, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3446261682242991, |
| "grad_norm": 0.2876492738723755, |
| "learning_rate": 8.60105140186916e-07, |
| "loss": 0.0076, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.35046728971962615, |
| "grad_norm": 0.18359579145908356, |
| "learning_rate": 8.747079439252338e-07, |
| "loss": 0.0072, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.35630841121495327, |
| "grad_norm": 0.15943388640880585, |
| "learning_rate": 8.893107476635516e-07, |
| "loss": 0.0071, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3621495327102804, |
| "grad_norm": 0.4958500564098358, |
| "learning_rate": 9.039135514018691e-07, |
| "loss": 0.069, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3679906542056075, |
| "grad_norm": 0.2749152183532715, |
| "learning_rate": 9.185163551401869e-07, |
| "loss": 0.0362, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "grad_norm": 0.25621095299720764, |
| "learning_rate": 9.331191588785047e-07, |
| "loss": 0.0373, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3796728971962617, |
| "grad_norm": 0.19895656406879425, |
| "learning_rate": 9.477219626168225e-07, |
| "loss": 0.0069, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3855140186915888, |
| "grad_norm": 0.15757645666599274, |
| "learning_rate": 9.623247663551403e-07, |
| "loss": 0.0067, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.39135514018691586, |
| "grad_norm": 0.29621386528015137, |
| "learning_rate": 9.76927570093458e-07, |
| "loss": 0.0364, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.397196261682243, |
| "grad_norm": 0.15212251245975494, |
| "learning_rate": 9.915303738317759e-07, |
| "loss": 0.0065, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4030373831775701, |
| "grad_norm": 0.12763015925884247, |
| "learning_rate": 1.0061331775700936e-06, |
| "loss": 0.0063, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4088785046728972, |
| "grad_norm": 0.11682584136724472, |
| "learning_rate": 1.0207359813084113e-06, |
| "loss": 0.0061, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4147196261682243, |
| "grad_norm": 0.11381419748067856, |
| "learning_rate": 1.035338785046729e-06, |
| "loss": 0.006, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4205607476635514, |
| "grad_norm": 0.1704946756362915, |
| "learning_rate": 1.0499415887850468e-06, |
| "loss": 0.038, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4264018691588785, |
| "grad_norm": 10.463326454162598, |
| "learning_rate": 1.0645443925233647e-06, |
| "loss": 0.0382, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4322429906542056, |
| "grad_norm": 0.1989985555410385, |
| "learning_rate": 1.0791471962616822e-06, |
| "loss": 0.0061, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4380841121495327, |
| "grad_norm": 0.3084839880466461, |
| "learning_rate": 1.0937500000000001e-06, |
| "loss": 0.0617, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4439252336448598, |
| "grad_norm": 0.15561099350452423, |
| "learning_rate": 1.1083528037383178e-06, |
| "loss": 0.0059, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4497663551401869, |
| "grad_norm": 0.26193615794181824, |
| "learning_rate": 1.1229556074766355e-06, |
| "loss": 0.0379, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.45560747663551404, |
| "grad_norm": 0.1491398811340332, |
| "learning_rate": 1.1375584112149534e-06, |
| "loss": 0.0057, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4614485981308411, |
| "grad_norm": 0.11440080404281616, |
| "learning_rate": 1.152161214953271e-06, |
| "loss": 0.0055, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4672897196261682, |
| "grad_norm": 0.2670748233795166, |
| "learning_rate": 1.166764018691589e-06, |
| "loss": 0.0381, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.47313084112149534, |
| "grad_norm": 0.14163456857204437, |
| "learning_rate": 1.1813668224299067e-06, |
| "loss": 0.0054, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.47897196261682246, |
| "grad_norm": 0.3021210730075836, |
| "learning_rate": 1.1959696261682243e-06, |
| "loss": 0.0381, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4848130841121495, |
| "grad_norm": 0.15613315999507904, |
| "learning_rate": 1.2105724299065422e-06, |
| "loss": 0.0054, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.49065420560747663, |
| "grad_norm": 0.14163915812969208, |
| "learning_rate": 1.22517523364486e-06, |
| "loss": 0.0051, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.49649532710280375, |
| "grad_norm": 0.1025778204202652, |
| "learning_rate": 1.2397780373831776e-06, |
| "loss": 0.0049, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5023364485981309, |
| "grad_norm": 0.1480044722557068, |
| "learning_rate": 1.2543808411214955e-06, |
| "loss": 0.0382, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.508177570093458, |
| "grad_norm": 0.10440000891685486, |
| "learning_rate": 1.268983644859813e-06, |
| "loss": 0.0049, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.514018691588785, |
| "grad_norm": 0.10672589391469955, |
| "learning_rate": 1.2835864485981309e-06, |
| "loss": 0.0047, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5198598130841121, |
| "grad_norm": 0.09414487332105637, |
| "learning_rate": 1.2981892523364486e-06, |
| "loss": 0.0046, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5257009345794392, |
| "grad_norm": 0.08826252818107605, |
| "learning_rate": 1.3127920560747665e-06, |
| "loss": 0.0045, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5315420560747663, |
| "grad_norm": 0.08648844063282013, |
| "learning_rate": 1.3273948598130842e-06, |
| "loss": 0.0044, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5373831775700935, |
| "grad_norm": 0.08451962471008301, |
| "learning_rate": 1.341997663551402e-06, |
| "loss": 0.0043, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5432242990654206, |
| "grad_norm": 0.14131106436252594, |
| "learning_rate": 1.3566004672897197e-06, |
| "loss": 0.0384, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5490654205607477, |
| "grad_norm": 0.09261338412761688, |
| "learning_rate": 1.3712032710280376e-06, |
| "loss": 0.0043, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5549065420560748, |
| "grad_norm": 0.09553109854459763, |
| "learning_rate": 1.3858060747663551e-06, |
| "loss": 0.0042, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5607476635514018, |
| "grad_norm": 0.08126160502433777, |
| "learning_rate": 1.400408878504673e-06, |
| "loss": 0.0041, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5665887850467289, |
| "grad_norm": 0.07953794300556183, |
| "learning_rate": 1.4150116822429907e-06, |
| "loss": 0.004, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.572429906542056, |
| "grad_norm": 0.07813739776611328, |
| "learning_rate": 1.4296144859813086e-06, |
| "loss": 0.004, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5782710280373832, |
| "grad_norm": 0.07673702389001846, |
| "learning_rate": 1.4442172897196263e-06, |
| "loss": 0.0039, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5841121495327103, |
| "grad_norm": 0.0752805769443512, |
| "learning_rate": 1.4588200934579442e-06, |
| "loss": 0.0038, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5899532710280374, |
| "grad_norm": 0.1004716008901596, |
| "learning_rate": 1.4734228971962619e-06, |
| "loss": 0.0387, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5957943925233645, |
| "grad_norm": 0.20554307103157043, |
| "learning_rate": 1.4880257009345793e-06, |
| "loss": 0.0387, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6016355140186916, |
| "grad_norm": 0.23959581553936005, |
| "learning_rate": 1.5026285046728972e-06, |
| "loss": 0.004, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6074766355140186, |
| "grad_norm": 0.12093233317136765, |
| "learning_rate": 1.517231308411215e-06, |
| "loss": 0.0038, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6133177570093458, |
| "grad_norm": 0.15225279331207275, |
| "learning_rate": 1.5318341121495328e-06, |
| "loss": 0.0389, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6191588785046729, |
| "grad_norm": 0.14517702162265778, |
| "learning_rate": 1.5464369158878505e-06, |
| "loss": 0.0037, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.12021499127149582, |
| "learning_rate": 1.5610397196261684e-06, |
| "loss": 0.0036, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6308411214953271, |
| "grad_norm": 0.07724525034427643, |
| "learning_rate": 1.575642523364486e-06, |
| "loss": 0.0035, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6366822429906542, |
| "grad_norm": 0.07167772948741913, |
| "learning_rate": 1.590245327102804e-06, |
| "loss": 0.0034, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6425233644859814, |
| "grad_norm": 0.06891785562038422, |
| "learning_rate": 1.6048481308411215e-06, |
| "loss": 0.0033, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6483644859813084, |
| "grad_norm": 0.06666991114616394, |
| "learning_rate": 1.6194509345794394e-06, |
| "loss": 0.0033, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6542056074766355, |
| "grad_norm": 0.11780866980552673, |
| "learning_rate": 1.634053738317757e-06, |
| "loss": 0.0391, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6600467289719626, |
| "grad_norm": 0.06861135363578796, |
| "learning_rate": 1.648656542056075e-06, |
| "loss": 0.0032, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6658878504672897, |
| "grad_norm": 0.0664181187748909, |
| "learning_rate": 1.6632593457943926e-06, |
| "loss": 0.0032, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6717289719626168, |
| "grad_norm": 0.06380724161863327, |
| "learning_rate": 1.6778621495327105e-06, |
| "loss": 0.0031, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.677570093457944, |
| "grad_norm": 0.0625571459531784, |
| "learning_rate": 1.6924649532710282e-06, |
| "loss": 0.0031, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6834112149532711, |
| "grad_norm": 0.06134733557701111, |
| "learning_rate": 1.7070677570093461e-06, |
| "loss": 0.003, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6892523364485982, |
| "grad_norm": 0.06018327549099922, |
| "learning_rate": 1.7216705607476636e-06, |
| "loss": 0.0029, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6950934579439252, |
| "grad_norm": 0.05909387394785881, |
| "learning_rate": 1.7362733644859813e-06, |
| "loss": 0.0029, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7009345794392523, |
| "grad_norm": 0.058149177581071854, |
| "learning_rate": 1.7508761682242992e-06, |
| "loss": 0.0028, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7067757009345794, |
| "grad_norm": 0.0572173111140728, |
| "learning_rate": 1.7654789719626169e-06, |
| "loss": 0.0028, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7126168224299065, |
| "grad_norm": 0.05630593001842499, |
| "learning_rate": 1.7800817757009348e-06, |
| "loss": 0.0027, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7184579439252337, |
| "grad_norm": 0.07457193732261658, |
| "learning_rate": 1.7946845794392527e-06, |
| "loss": 0.0397, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7242990654205608, |
| "grad_norm": 0.05988035723567009, |
| "learning_rate": 1.8092873831775704e-06, |
| "loss": 0.0027, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7301401869158879, |
| "grad_norm": 0.13969571888446808, |
| "learning_rate": 1.8238901869158878e-06, |
| "loss": 0.0397, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.735981308411215, |
| "grad_norm": 0.25887084007263184, |
| "learning_rate": 1.8384929906542057e-06, |
| "loss": 0.0398, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.741822429906542, |
| "grad_norm": 0.13206328451633453, |
| "learning_rate": 1.8530957943925234e-06, |
| "loss": 0.0031, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "grad_norm": 0.07050861418247223, |
| "learning_rate": 1.8676985981308413e-06, |
| "loss": 0.0028, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7535046728971962, |
| "grad_norm": 0.05721152201294899, |
| "learning_rate": 1.882301401869159e-06, |
| "loss": 0.0026, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7593457943925234, |
| "grad_norm": 0.055426888167858124, |
| "learning_rate": 1.896904205607477e-06, |
| "loss": 0.0026, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7651869158878505, |
| "grad_norm": 0.053014617413282394, |
| "learning_rate": 1.9115070093457946e-06, |
| "loss": 0.0025, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7710280373831776, |
| "grad_norm": 0.07429449260234833, |
| "learning_rate": 1.9261098130841125e-06, |
| "loss": 0.0399, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7768691588785047, |
| "grad_norm": 0.058152373880147934, |
| "learning_rate": 1.94071261682243e-06, |
| "loss": 0.0025, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7827102803738317, |
| "grad_norm": 0.05262916162610054, |
| "learning_rate": 1.955315420560748e-06, |
| "loss": 0.0025, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7885514018691588, |
| "grad_norm": 0.05100777745246887, |
| "learning_rate": 1.9699182242990658e-06, |
| "loss": 0.0024, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.794392523364486, |
| "grad_norm": 0.04984293133020401, |
| "learning_rate": 1.9845210280373832e-06, |
| "loss": 0.0024, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8002336448598131, |
| "grad_norm": 0.048913147300481796, |
| "learning_rate": 1.999123831775701e-06, |
| "loss": 0.0023, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8060747663551402, |
| "grad_norm": 0.04825076088309288, |
| "learning_rate": 2.013726635514019e-06, |
| "loss": 0.0023, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8119158878504673, |
| "grad_norm": 0.04739582911133766, |
| "learning_rate": 2.028329439252337e-06, |
| "loss": 0.0022, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8177570093457944, |
| "grad_norm": 0.04659786447882652, |
| "learning_rate": 2.0429322429906544e-06, |
| "loss": 0.0022, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8235981308411215, |
| "grad_norm": 0.0471995547413826, |
| "learning_rate": 2.057535046728972e-06, |
| "loss": 0.0405, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8294392523364486, |
| "grad_norm": 0.06635795533657074, |
| "learning_rate": 2.0721378504672898e-06, |
| "loss": 0.0405, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8352803738317757, |
| "grad_norm": 0.05472889170050621, |
| "learning_rate": 2.0867406542056077e-06, |
| "loss": 0.0023, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8411214953271028, |
| "grad_norm": 0.06456664949655533, |
| "learning_rate": 2.1013434579439256e-06, |
| "loss": 0.0404, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8469626168224299, |
| "grad_norm": 0.06406297534704208, |
| "learning_rate": 2.115946261682243e-06, |
| "loss": 0.0024, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.852803738317757, |
| "grad_norm": 0.05212978273630142, |
| "learning_rate": 2.130549065420561e-06, |
| "loss": 0.0022, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8586448598130841, |
| "grad_norm": 0.04937836900353432, |
| "learning_rate": 2.145151869158879e-06, |
| "loss": 0.0022, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8644859813084113, |
| "grad_norm": 0.046520065516233444, |
| "learning_rate": 2.1597546728971963e-06, |
| "loss": 0.0021, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8703271028037384, |
| "grad_norm": 0.044921793043613434, |
| "learning_rate": 2.174357476635514e-06, |
| "loss": 0.0021, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8761682242990654, |
| "grad_norm": 0.04380778223276138, |
| "learning_rate": 2.188960280373832e-06, |
| "loss": 0.002, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8820093457943925, |
| "grad_norm": 0.04308932647109032, |
| "learning_rate": 2.2035630841121496e-06, |
| "loss": 0.002, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8878504672897196, |
| "grad_norm": 0.042391639202833176, |
| "learning_rate": 2.2181658878504675e-06, |
| "loss": 0.002, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8936915887850467, |
| "grad_norm": 0.04182083159685135, |
| "learning_rate": 2.2327686915887854e-06, |
| "loss": 0.0019, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8995327102803738, |
| "grad_norm": 0.04098886623978615, |
| "learning_rate": 2.2473714953271033e-06, |
| "loss": 0.0019, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.905373831775701, |
| "grad_norm": 0.040418434888124466, |
| "learning_rate": 2.2619742990654208e-06, |
| "loss": 0.0019, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9112149532710281, |
| "grad_norm": 0.03962099552154541, |
| "learning_rate": 2.2765771028037382e-06, |
| "loss": 0.0018, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9170560747663551, |
| "grad_norm": 0.039173197001218796, |
| "learning_rate": 2.291179906542056e-06, |
| "loss": 0.0018, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.9228971962616822, |
| "grad_norm": 0.038435958325862885, |
| "learning_rate": 2.305782710280374e-06, |
| "loss": 0.0018, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9287383177570093, |
| "grad_norm": 0.03787772357463837, |
| "learning_rate": 2.320385514018692e-06, |
| "loss": 0.0017, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9345794392523364, |
| "grad_norm": 0.03723740950226784, |
| "learning_rate": 2.3349883177570094e-06, |
| "loss": 0.0017, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9404205607476636, |
| "grad_norm": 0.03676334023475647, |
| "learning_rate": 2.3495911214953273e-06, |
| "loss": 0.0017, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9462616822429907, |
| "grad_norm": 0.036133646965026855, |
| "learning_rate": 2.364193925233645e-06, |
| "loss": 0.0016, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9521028037383178, |
| "grad_norm": 0.03555206581950188, |
| "learning_rate": 2.3787967289719627e-06, |
| "loss": 0.0016, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9579439252336449, |
| "grad_norm": 0.03623361513018608, |
| "learning_rate": 2.3933995327102806e-06, |
| "loss": 0.0418, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9637850467289719, |
| "grad_norm": 0.036169301718473434, |
| "learning_rate": 2.4080023364485985e-06, |
| "loss": 0.0016, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.969626168224299, |
| "grad_norm": 0.035384681075811386, |
| "learning_rate": 2.422605140186916e-06, |
| "loss": 0.0016, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9754672897196262, |
| "grad_norm": 0.04360009357333183, |
| "learning_rate": 2.437207943925234e-06, |
| "loss": 0.0419, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.9813084112149533, |
| "grad_norm": 0.0415232889354229, |
| "learning_rate": 2.4518107476635517e-06, |
| "loss": 0.0016, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9871495327102804, |
| "grad_norm": 0.03600607439875603, |
| "learning_rate": 2.4664135514018696e-06, |
| "loss": 0.0016, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9929906542056075, |
| "grad_norm": 0.09126334637403488, |
| "learning_rate": 2.481016355140187e-06, |
| "loss": 0.0419, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9988317757009346, |
| "grad_norm": 0.051943764090538025, |
| "learning_rate": 2.4956191588785046e-06, |
| "loss": 0.0017, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.998563109733041, |
| "eval_fbeta": 0.9977028760709574, |
| "eval_loss": 0.011804744601249695, |
| "eval_precision": 0.9971303428485123, |
| "eval_recall": 1.0, |
| "eval_runtime": 170.816, |
| "eval_samples_per_second": 68.711, |
| "eval_steps_per_second": 8.594, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.0046728971962617, |
| "grad_norm": 0.040207888931035995, |
| "learning_rate": 2.510221962616823e-06, |
| "loss": 0.0016, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.0105140186915889, |
| "grad_norm": 0.06412038952112198, |
| "learning_rate": 2.524824766355141e-06, |
| "loss": 0.0422, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.016355140186916, |
| "grad_norm": 0.04405215010046959, |
| "learning_rate": 2.5394275700934583e-06, |
| "loss": 0.0017, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.022196261682243, |
| "grad_norm": 0.037864964455366135, |
| "learning_rate": 2.5540303738317758e-06, |
| "loss": 0.0016, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.02803738317757, |
| "grad_norm": 0.035099685192108154, |
| "learning_rate": 2.5686331775700936e-06, |
| "loss": 0.0015, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.033878504672897, |
| "grad_norm": 0.03391886502504349, |
| "learning_rate": 2.583235981308411e-06, |
| "loss": 0.0015, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.0397196261682242, |
| "grad_norm": 0.03310862556099892, |
| "learning_rate": 2.597838785046729e-06, |
| "loss": 0.0015, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.0455607476635513, |
| "grad_norm": 0.032472554594278336, |
| "learning_rate": 2.612441588785047e-06, |
| "loss": 0.0014, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.0514018691588785, |
| "grad_norm": 0.03186679258942604, |
| "learning_rate": 2.627044392523365e-06, |
| "loss": 0.0014, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0572429906542056, |
| "grad_norm": 0.03139648213982582, |
| "learning_rate": 2.6416471962616823e-06, |
| "loss": 0.0014, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.0630841121495327, |
| "grad_norm": 0.030965762212872505, |
| "learning_rate": 2.65625e-06, |
| "loss": 0.0014, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.0689252336448598, |
| "grad_norm": 0.03837515413761139, |
| "learning_rate": 2.670852803738318e-06, |
| "loss": 0.0426, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.074766355140187, |
| "grad_norm": 0.03224362060427666, |
| "learning_rate": 2.685455607476636e-06, |
| "loss": 0.0014, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.080607476635514, |
| "grad_norm": 0.10661806166172028, |
| "learning_rate": 2.7000584112149535e-06, |
| "loss": 0.0426, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.0864485981308412, |
| "grad_norm": 0.04082982614636421, |
| "learning_rate": 2.7146612149532714e-06, |
| "loss": 0.0016, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.0922897196261683, |
| "grad_norm": 0.08507952094078064, |
| "learning_rate": 2.7292640186915893e-06, |
| "loss": 0.0421, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.0981308411214954, |
| "grad_norm": 0.05080524832010269, |
| "learning_rate": 2.743866822429907e-06, |
| "loss": 0.0022, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.1039719626168225, |
| "grad_norm": 0.04093686118721962, |
| "learning_rate": 2.7584696261682246e-06, |
| "loss": 0.0016, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.1098130841121496, |
| "grad_norm": 0.03352896124124527, |
| "learning_rate": 2.773072429906542e-06, |
| "loss": 0.0014, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.1156542056074765, |
| "grad_norm": 0.03170730173587799, |
| "learning_rate": 2.78767523364486e-06, |
| "loss": 0.0014, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.1214953271028036, |
| "grad_norm": 0.030157096683979034, |
| "learning_rate": 2.8022780373831775e-06, |
| "loss": 0.0013, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.1273364485981308, |
| "grad_norm": 0.02939213439822197, |
| "learning_rate": 2.8168808411214954e-06, |
| "loss": 0.0013, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.1331775700934579, |
| "grad_norm": 0.028846966102719307, |
| "learning_rate": 2.8314836448598133e-06, |
| "loss": 0.0013, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.139018691588785, |
| "grad_norm": 0.02849399857223034, |
| "learning_rate": 2.846086448598131e-06, |
| "loss": 0.0012, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.144859813084112, |
| "grad_norm": 0.028026683256030083, |
| "learning_rate": 2.8606892523364486e-06, |
| "loss": 0.0012, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.1507009345794392, |
| "grad_norm": 0.029067154973745346, |
| "learning_rate": 2.8752920560747665e-06, |
| "loss": 0.0431, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.1565420560747663, |
| "grad_norm": 0.028508760035037994, |
| "learning_rate": 2.8898948598130844e-06, |
| "loss": 0.0012, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.1623831775700935, |
| "grad_norm": 0.028263544663786888, |
| "learning_rate": 2.9044976635514023e-06, |
| "loss": 0.0012, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.1682242990654206, |
| "grad_norm": 0.027732761576771736, |
| "learning_rate": 2.91910046728972e-06, |
| "loss": 0.0012, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1740654205607477, |
| "grad_norm": 0.027435291558504105, |
| "learning_rate": 2.9337032710280377e-06, |
| "loss": 0.0012, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.1799065420560748, |
| "grad_norm": 0.02704302780330181, |
| "learning_rate": 2.9483060747663556e-06, |
| "loss": 0.0012, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.185747663551402, |
| "grad_norm": 0.02908189222216606, |
| "learning_rate": 2.9629088785046735e-06, |
| "loss": 0.0433, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.191588785046729, |
| "grad_norm": 0.04472142830491066, |
| "learning_rate": 2.977511682242991e-06, |
| "loss": 0.0433, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.1974299065420562, |
| "grad_norm": 0.033964864909648895, |
| "learning_rate": 2.9921144859813085e-06, |
| "loss": 0.0013, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.203271028037383, |
| "grad_norm": 0.030364977195858955, |
| "learning_rate": 3.0067172897196264e-06, |
| "loss": 0.0013, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.2091121495327104, |
| "grad_norm": 0.028072107583284378, |
| "learning_rate": 3.021320093457944e-06, |
| "loss": 0.0012, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.2149532710280373, |
| "grad_norm": 0.02732822299003601, |
| "learning_rate": 3.0359228971962617e-06, |
| "loss": 0.0012, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.2207943925233644, |
| "grad_norm": 0.026666365563869476, |
| "learning_rate": 3.0505257009345796e-06, |
| "loss": 0.0012, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.2266355140186915, |
| "grad_norm": 0.026195447891950607, |
| "learning_rate": 3.0651285046728975e-06, |
| "loss": 0.0011, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.2324766355140186, |
| "grad_norm": 0.02573522739112377, |
| "learning_rate": 3.079731308411215e-06, |
| "loss": 0.0011, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.2383177570093458, |
| "grad_norm": 0.025196623057127, |
| "learning_rate": 3.094334112149533e-06, |
| "loss": 0.0011, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.2441588785046729, |
| "grad_norm": 0.025058843195438385, |
| "learning_rate": 3.108936915887851e-06, |
| "loss": 0.0011, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.02460530959069729, |
| "learning_rate": 3.1235397196261687e-06, |
| "loss": 0.0011, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.2558411214953271, |
| "grad_norm": 0.024240560829639435, |
| "learning_rate": 3.138142523364486e-06, |
| "loss": 0.001, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.2616822429906542, |
| "grad_norm": 0.023768194019794464, |
| "learning_rate": 3.152745327102804e-06, |
| "loss": 0.001, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.2675233644859814, |
| "grad_norm": 0.023436764255166054, |
| "learning_rate": 3.167348130841122e-06, |
| "loss": 0.001, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.2733644859813085, |
| "grad_norm": 0.022970303893089294, |
| "learning_rate": 3.18195093457944e-06, |
| "loss": 0.001, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.2792056074766356, |
| "grad_norm": 0.022840231657028198, |
| "learning_rate": 3.1965537383177573e-06, |
| "loss": 0.001, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.2850467289719627, |
| "grad_norm": 0.024615749716758728, |
| "learning_rate": 3.2111565420560752e-06, |
| "loss": 0.0443, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.2908878504672896, |
| "grad_norm": 0.023948077112436295, |
| "learning_rate": 3.2257593457943927e-06, |
| "loss": 0.001, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.296728971962617, |
| "grad_norm": 0.023156747221946716, |
| "learning_rate": 3.24036214953271e-06, |
| "loss": 0.001, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.3025700934579438, |
| "grad_norm": 0.022802365943789482, |
| "learning_rate": 3.254964953271028e-06, |
| "loss": 0.001, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.308411214953271, |
| "grad_norm": 0.022550372406840324, |
| "learning_rate": 3.269567757009346e-06, |
| "loss": 0.001, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.314252336448598, |
| "grad_norm": 0.022892745211720467, |
| "learning_rate": 3.284170560747664e-06, |
| "loss": 0.0443, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.3200934579439252, |
| "grad_norm": 0.02606125734746456, |
| "learning_rate": 3.2987733644859814e-06, |
| "loss": 0.001, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.3259345794392523, |
| "grad_norm": 0.024206535890698433, |
| "learning_rate": 3.3133761682242993e-06, |
| "loss": 0.001, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.3317757009345794, |
| "grad_norm": 0.02334391325712204, |
| "learning_rate": 3.327978971962617e-06, |
| "loss": 0.001, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.3376168224299065, |
| "grad_norm": 0.022638387978076935, |
| "learning_rate": 3.342581775700935e-06, |
| "loss": 0.001, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.3434579439252337, |
| "grad_norm": 0.022134365513920784, |
| "learning_rate": 3.3571845794392525e-06, |
| "loss": 0.0009, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.3492990654205608, |
| "grad_norm": 0.03821112960577011, |
| "learning_rate": 3.3717873831775704e-06, |
| "loss": 0.0443, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.355140186915888, |
| "grad_norm": 0.0276421457529068, |
| "learning_rate": 3.3863901869158883e-06, |
| "loss": 0.0011, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.360981308411215, |
| "grad_norm": 0.02461417205631733, |
| "learning_rate": 3.4009929906542062e-06, |
| "loss": 0.001, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.3668224299065421, |
| "grad_norm": 0.022621462121605873, |
| "learning_rate": 3.4155957943925237e-06, |
| "loss": 0.001, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.3726635514018692, |
| "grad_norm": 0.02172948606312275, |
| "learning_rate": 3.4301985981308416e-06, |
| "loss": 0.0009, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.3785046728971961, |
| "grad_norm": 0.021321304142475128, |
| "learning_rate": 3.444801401869159e-06, |
| "loss": 0.0009, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.3843457943925235, |
| "grad_norm": 0.026693595573306084, |
| "learning_rate": 3.4594042056074765e-06, |
| "loss": 0.0445, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.3901869158878504, |
| "grad_norm": 0.024471307173371315, |
| "learning_rate": 3.4740070093457944e-06, |
| "loss": 0.001, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.3960280373831775, |
| "grad_norm": 0.023060401901602745, |
| "learning_rate": 3.4886098130841123e-06, |
| "loss": 0.001, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.4018691588785046, |
| "grad_norm": 0.025215281173586845, |
| "learning_rate": 3.5032126168224302e-06, |
| "loss": 0.0438, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4077102803738317, |
| "grad_norm": 0.031053408980369568, |
| "learning_rate": 3.5178154205607477e-06, |
| "loss": 0.0012, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.4135514018691588, |
| "grad_norm": 0.02737055905163288, |
| "learning_rate": 3.5324182242990656e-06, |
| "loss": 0.001, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.419392523364486, |
| "grad_norm": 0.02308940328657627, |
| "learning_rate": 3.5470210280373835e-06, |
| "loss": 0.001, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.425233644859813, |
| "grad_norm": 0.021701965481042862, |
| "learning_rate": 3.5616238317757014e-06, |
| "loss": 0.0009, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.4310747663551402, |
| "grad_norm": 0.021604137495160103, |
| "learning_rate": 3.576226635514019e-06, |
| "loss": 0.0009, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.4369158878504673, |
| "grad_norm": 0.020978761836886406, |
| "learning_rate": 3.5908294392523368e-06, |
| "loss": 0.0009, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.4427570093457944, |
| "grad_norm": 2.0315942764282227, |
| "learning_rate": 3.6054322429906547e-06, |
| "loss": 0.0881, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.4485981308411215, |
| "grad_norm": 0.053938183933496475, |
| "learning_rate": 3.6200350467289726e-06, |
| "loss": 0.0019, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.4544392523364487, |
| "grad_norm": 0.03417206183075905, |
| "learning_rate": 3.63463785046729e-06, |
| "loss": 0.0012, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.4602803738317758, |
| "grad_norm": 0.025503478944301605, |
| "learning_rate": 3.649240654205608e-06, |
| "loss": 0.001, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4661214953271027, |
| "grad_norm": 0.02286619506776333, |
| "learning_rate": 3.6638434579439254e-06, |
| "loss": 0.001, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.47196261682243, |
| "grad_norm": 0.02164420299232006, |
| "learning_rate": 3.678446261682243e-06, |
| "loss": 0.0009, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.477803738317757, |
| "grad_norm": 0.028322748839855194, |
| "learning_rate": 3.693049065420561e-06, |
| "loss": 0.0446, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.4836448598130842, |
| "grad_norm": 0.0625884160399437, |
| "learning_rate": 3.7076518691588787e-06, |
| "loss": 0.0435, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.4894859813084111, |
| "grad_norm": 0.030157284811139107, |
| "learning_rate": 3.7222546728971966e-06, |
| "loss": 0.0013, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.4953271028037383, |
| "grad_norm": 0.024576175957918167, |
| "learning_rate": 3.736857476635514e-06, |
| "loss": 0.0011, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.5011682242990654, |
| "grad_norm": 0.022601209580898285, |
| "learning_rate": 3.751460280373832e-06, |
| "loss": 0.001, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.5070093457943925, |
| "grad_norm": 0.02741135098040104, |
| "learning_rate": 3.76606308411215e-06, |
| "loss": 0.0437, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.5128504672897196, |
| "grad_norm": 2.4886422157287598, |
| "learning_rate": 3.7806658878504678e-06, |
| "loss": 0.0776, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.5186915887850467, |
| "grad_norm": 0.049459826201200485, |
| "learning_rate": 3.7952686915887852e-06, |
| "loss": 0.0025, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.5245327102803738, |
| "grad_norm": 0.03191259503364563, |
| "learning_rate": 3.809871495327103e-06, |
| "loss": 0.0013, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.530373831775701, |
| "grad_norm": 0.025913212448358536, |
| "learning_rate": 3.824474299065421e-06, |
| "loss": 0.0011, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.536214953271028, |
| "grad_norm": 0.02325914427638054, |
| "learning_rate": 3.839077102803739e-06, |
| "loss": 0.001, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.542056074766355, |
| "grad_norm": 0.02170516550540924, |
| "learning_rate": 3.853679906542057e-06, |
| "loss": 0.0009, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.5478971962616823, |
| "grad_norm": 0.020911023020744324, |
| "learning_rate": 3.868282710280375e-06, |
| "loss": 0.0009, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.5537383177570092, |
| "grad_norm": 0.020392388105392456, |
| "learning_rate": 3.882885514018692e-06, |
| "loss": 0.0009, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.5595794392523366, |
| "grad_norm": 0.02007397636771202, |
| "learning_rate": 3.89748831775701e-06, |
| "loss": 0.0008, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.5654205607476634, |
| "grad_norm": 1.490532398223877, |
| "learning_rate": 3.912091121495328e-06, |
| "loss": 0.0451, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.5712616822429908, |
| "grad_norm": 0.020575718954205513, |
| "learning_rate": 3.926693925233645e-06, |
| "loss": 0.0008, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.5771028037383177, |
| "grad_norm": 0.02088101953268051, |
| "learning_rate": 3.9412967289719625e-06, |
| "loss": 0.0009, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.582943925233645, |
| "grad_norm": 0.022468693554401398, |
| "learning_rate": 3.9558995327102804e-06, |
| "loss": 0.0446, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.588785046728972, |
| "grad_norm": 0.41546332836151123, |
| "learning_rate": 3.970502336448598e-06, |
| "loss": 0.2174, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.594626168224299, |
| "grad_norm": 29.55653190612793, |
| "learning_rate": 3.985105140186916e-06, |
| "loss": 0.7007, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.6004672897196262, |
| "grad_norm": 0.23895660042762756, |
| "learning_rate": 3.999707943925234e-06, |
| "loss": 0.2632, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.6063084112149533, |
| "grad_norm": 20.28415298461914, |
| "learning_rate": 4.014310747663552e-06, |
| "loss": 0.3131, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.6121495327102804, |
| "grad_norm": 66.40315246582031, |
| "learning_rate": 4.02891355140187e-06, |
| "loss": 1.465, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.6179906542056075, |
| "grad_norm": 1.3744255304336548, |
| "learning_rate": 4.043516355140187e-06, |
| "loss": 0.2324, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.6238317757009346, |
| "grad_norm": 0.07546886801719666, |
| "learning_rate": 4.058119158878505e-06, |
| "loss": 0.0066, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.6296728971962615, |
| "grad_norm": 0.05592099949717522, |
| "learning_rate": 4.072721962616823e-06, |
| "loss": 0.0421, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.6355140186915889, |
| "grad_norm": 0.053602512925863266, |
| "learning_rate": 4.087324766355141e-06, |
| "loss": 0.0022, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.6413551401869158, |
| "grad_norm": 0.0363592766225338, |
| "learning_rate": 4.1019275700934586e-06, |
| "loss": 0.0018, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.647196261682243, |
| "grad_norm": 0.04215683415532112, |
| "learning_rate": 4.116530373831776e-06, |
| "loss": 0.042, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.65303738317757, |
| "grad_norm": 0.04295524209737778, |
| "learning_rate": 4.1311331775700935e-06, |
| "loss": 0.0018, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.6588785046728973, |
| "grad_norm": 0.03542430326342583, |
| "learning_rate": 4.145735981308411e-06, |
| "loss": 0.0016, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.6647196261682242, |
| "grad_norm": 0.0312524288892746, |
| "learning_rate": 4.160338785046729e-06, |
| "loss": 0.0014, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.6705607476635516, |
| "grad_norm": 0.027192620560526848, |
| "learning_rate": 4.174941588785047e-06, |
| "loss": 0.0013, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.6764018691588785, |
| "grad_norm": 0.027118809521198273, |
| "learning_rate": 4.189544392523365e-06, |
| "loss": 0.0012, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.6822429906542056, |
| "grad_norm": 0.02445983700454235, |
| "learning_rate": 4.204147196261682e-06, |
| "loss": 0.0011, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.6880841121495327, |
| "grad_norm": 0.024152180179953575, |
| "learning_rate": 4.21875e-06, |
| "loss": 0.001, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.6939252336448598, |
| "grad_norm": 0.0222761407494545, |
| "learning_rate": 4.233352803738318e-06, |
| "loss": 0.001, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.699766355140187, |
| "grad_norm": 0.024713346734642982, |
| "learning_rate": 4.247955607476636e-06, |
| "loss": 0.0443, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.705607476635514, |
| "grad_norm": 0.025482937693595886, |
| "learning_rate": 4.262558411214954e-06, |
| "loss": 0.001, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.7114485981308412, |
| "grad_norm": 0.024490345269441605, |
| "learning_rate": 4.277161214953272e-06, |
| "loss": 0.001, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.7172897196261683, |
| "grad_norm": 0.02450953796505928, |
| "learning_rate": 4.2917640186915895e-06, |
| "loss": 0.0436, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.7231308411214954, |
| "grad_norm": 0.027671849355101585, |
| "learning_rate": 4.3063668224299074e-06, |
| "loss": 0.0011, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.7289719626168223, |
| "grad_norm": 0.03285020962357521, |
| "learning_rate": 4.3209696261682245e-06, |
| "loss": 0.0426, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.7348130841121496, |
| "grad_norm": 0.03429288789629936, |
| "learning_rate": 4.335572429906542e-06, |
| "loss": 0.0014, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.7406542056074765, |
| "grad_norm": 0.03147612139582634, |
| "learning_rate": 4.35017523364486e-06, |
| "loss": 0.0014, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.7464953271028039, |
| "grad_norm": 0.5087982416152954, |
| "learning_rate": 4.364778037383177e-06, |
| "loss": 0.0049, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.7523364485981308, |
| "grad_norm": 0.04070281982421875, |
| "learning_rate": 4.379380841121495e-06, |
| "loss": 0.028, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.758177570093458, |
| "grad_norm": 24.421571731567383, |
| "learning_rate": 4.393983644859813e-06, |
| "loss": 0.9912, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.764018691588785, |
| "grad_norm": 3.793189287185669, |
| "learning_rate": 4.408586448598131e-06, |
| "loss": 0.8169, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.769859813084112, |
| "grad_norm": 0.18406537175178528, |
| "learning_rate": 4.423189252336449e-06, |
| "loss": 0.6664, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.7757009345794392, |
| "grad_norm": 3.0396714210510254, |
| "learning_rate": 4.437792056074767e-06, |
| "loss": 0.6962, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.7815420560747663, |
| "grad_norm": 1.405199408531189, |
| "learning_rate": 4.452394859813085e-06, |
| "loss": 0.7149, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.7873831775700935, |
| "grad_norm": 3.036806106567383, |
| "learning_rate": 4.466997663551403e-06, |
| "loss": 0.6842, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.7932242990654206, |
| "grad_norm": 1.536515235900879, |
| "learning_rate": 4.48160046728972e-06, |
| "loss": 0.3574, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.7990654205607477, |
| "grad_norm": 0.11639049649238586, |
| "learning_rate": 4.496203271028038e-06, |
| "loss": 0.0598, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.8049065420560748, |
| "grad_norm": 0.11044025421142578, |
| "learning_rate": 4.5108060747663555e-06, |
| "loss": 0.0701, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.810747663551402, |
| "grad_norm": 0.102878138422966, |
| "learning_rate": 4.525408878504673e-06, |
| "loss": 0.1514, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.8165887850467288, |
| "grad_norm": 7.707913875579834, |
| "learning_rate": 4.540011682242991e-06, |
| "loss": 0.7924, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.8224299065420562, |
| "grad_norm": 5.9456353187561035, |
| "learning_rate": 4.554614485981308e-06, |
| "loss": 1.3826, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.828271028037383, |
| "grad_norm": 2.2925925254821777, |
| "learning_rate": 4.569217289719626e-06, |
| "loss": 0.7252, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.8341121495327104, |
| "grad_norm": 2.821223258972168, |
| "learning_rate": 4.583820093457944e-06, |
| "loss": 0.7524, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.8399532710280373, |
| "grad_norm": 2.5678889751434326, |
| "learning_rate": 4.598422897196262e-06, |
| "loss": 0.4001, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.8457943925233646, |
| "grad_norm": 0.44236326217651367, |
| "learning_rate": 4.61302570093458e-06, |
| "loss": 0.0417, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.8516355140186915, |
| "grad_norm": 0.0788566917181015, |
| "learning_rate": 4.627628504672898e-06, |
| "loss": 0.0075, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.8574766355140186, |
| "grad_norm": 0.06400685757398605, |
| "learning_rate": 4.642231308411215e-06, |
| "loss": 0.0035, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.8633177570093458, |
| "grad_norm": 0.050487253814935684, |
| "learning_rate": 4.656834112149533e-06, |
| "loss": 0.0025, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.8691588785046729, |
| "grad_norm": 0.042493101209402084, |
| "learning_rate": 4.671436915887851e-06, |
| "loss": 0.0423, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.04191233962774277, |
| "learning_rate": 4.6860397196261686e-06, |
| "loss": 0.0019, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.8808411214953271, |
| "grad_norm": 0.04269988089799881, |
| "learning_rate": 4.7006425233644865e-06, |
| "loss": 0.0414, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.8866822429906542, |
| "grad_norm": 0.040563952177762985, |
| "learning_rate": 4.715245327102804e-06, |
| "loss": 0.0019, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.8925233644859814, |
| "grad_norm": 0.04327201843261719, |
| "learning_rate": 4.729848130841122e-06, |
| "loss": 0.0405, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.8983644859813085, |
| "grad_norm": 0.04342912510037422, |
| "learning_rate": 4.74445093457944e-06, |
| "loss": 0.0022, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.9042056074766354, |
| "grad_norm": 0.04997817426919937, |
| "learning_rate": 4.759053738317758e-06, |
| "loss": 0.0397, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.9100467289719627, |
| "grad_norm": 1.362629771232605, |
| "learning_rate": 4.773656542056075e-06, |
| "loss": 0.0386, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.9158878504672896, |
| "grad_norm": 0.05325537174940109, |
| "learning_rate": 4.788259345794393e-06, |
| "loss": 0.0027, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.921728971962617, |
| "grad_norm": 0.04630275070667267, |
| "learning_rate": 4.80286214953271e-06, |
| "loss": 0.0028, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.9275700934579438, |
| "grad_norm": 0.04541867598891258, |
| "learning_rate": 4.817464953271028e-06, |
| "loss": 0.0022, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.9334112149532712, |
| "grad_norm": 0.04611349105834961, |
| "learning_rate": 4.832067757009346e-06, |
| "loss": 0.0398, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.939252336448598, |
| "grad_norm": 0.04701936990022659, |
| "learning_rate": 4.846670560747664e-06, |
| "loss": 0.0391, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.9450934579439252, |
| "grad_norm": 0.054911911487579346, |
| "learning_rate": 4.861273364485982e-06, |
| "loss": 0.0026, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.9509345794392523, |
| "grad_norm": 0.05357871577143669, |
| "learning_rate": 4.8758761682242995e-06, |
| "loss": 0.0024, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.9567757009345794, |
| "grad_norm": 0.042115770280361176, |
| "learning_rate": 4.8904789719626174e-06, |
| "loss": 0.002, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.9626168224299065, |
| "grad_norm": 0.0314444899559021, |
| "learning_rate": 4.905081775700935e-06, |
| "loss": 0.0017, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.9684579439252337, |
| "grad_norm": 0.029266072437167168, |
| "learning_rate": 4.919684579439253e-06, |
| "loss": 0.0015, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.9742990654205608, |
| "grad_norm": 0.03335956856608391, |
| "learning_rate": 4.93428738317757e-06, |
| "loss": 0.0014, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.980140186915888, |
| "grad_norm": 0.029220635071396828, |
| "learning_rate": 4.948890186915888e-06, |
| "loss": 0.0013, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.985981308411215, |
| "grad_norm": 0.026790356263518333, |
| "learning_rate": 4.963492990654206e-06, |
| "loss": 0.0013, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.991822429906542, |
| "grad_norm": 0.026554999873042107, |
| "learning_rate": 4.978095794392524e-06, |
| "loss": 0.0421, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.9976635514018692, |
| "grad_norm": 0.027294134721159935, |
| "learning_rate": 4.992698598130842e-06, |
| "loss": 0.0012, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.9984869117869571, |
| "eval_fbeta": 0.9978526993915837, |
| "eval_loss": 0.01201227679848671, |
| "eval_precision": 0.9974304715840387, |
| "eval_recall": 0.9995455922447744, |
| "eval_runtime": 171.0255, |
| "eval_samples_per_second": 68.627, |
| "eval_steps_per_second": 8.584, |
| "step": 3424 |
| }, |
| { |
| "epoch": 2.003504672897196, |
| "grad_norm": 0.0311049185693264, |
| "learning_rate": 5.007301401869159e-06, |
| "loss": 0.0013, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.0093457943925235, |
| "grad_norm": 0.029238952323794365, |
| "learning_rate": 5.021904205607478e-06, |
| "loss": 0.0447, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.0151869158878504, |
| "grad_norm": 0.07359280437231064, |
| "learning_rate": 5.036507009345795e-06, |
| "loss": 0.0014, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.0210280373831777, |
| "grad_norm": 0.029126616194844246, |
| "learning_rate": 5.051109813084113e-06, |
| "loss": 0.0786, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.0268691588785046, |
| "grad_norm": 0.02853413298726082, |
| "learning_rate": 5.0657126168224305e-06, |
| "loss": 0.0013, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.032710280373832, |
| "grad_norm": 0.02674773521721363, |
| "learning_rate": 5.0803154205607484e-06, |
| "loss": 0.0013, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.038551401869159, |
| "grad_norm": 0.02553749829530716, |
| "learning_rate": 5.0949182242990655e-06, |
| "loss": 0.0325, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.044392523364486, |
| "grad_norm": 0.023599721491336823, |
| "learning_rate": 5.109521028037384e-06, |
| "loss": 0.0011, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.050233644859813, |
| "grad_norm": 0.02482794038951397, |
| "learning_rate": 5.124123831775701e-06, |
| "loss": 0.0011, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.05607476635514, |
| "grad_norm": 0.026357341557741165, |
| "learning_rate": 5.138726635514018e-06, |
| "loss": 0.001, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.0619158878504673, |
| "grad_norm": 0.024338532239198685, |
| "learning_rate": 5.153329439252337e-06, |
| "loss": 0.001, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.067757009345794, |
| "grad_norm": 0.020824845880270004, |
| "learning_rate": 5.167932242990654e-06, |
| "loss": 0.001, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.0735981308411215, |
| "grad_norm": 0.023196179419755936, |
| "learning_rate": 5.182535046728973e-06, |
| "loss": 0.001, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.0794392523364484, |
| "grad_norm": 0.021305715665221214, |
| "learning_rate": 5.19713785046729e-06, |
| "loss": 0.0009, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.085280373831776, |
| "grad_norm": 0.019744129851460457, |
| "learning_rate": 5.211740654205608e-06, |
| "loss": 0.0009, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.0911214953271027, |
| "grad_norm": 0.01918872818350792, |
| "learning_rate": 5.226343457943926e-06, |
| "loss": 0.0008, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.09696261682243, |
| "grad_norm": 0.01923677884042263, |
| "learning_rate": 5.240946261682244e-06, |
| "loss": 0.0008, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.102803738317757, |
| "grad_norm": 0.020027387887239456, |
| "learning_rate": 5.255549065420561e-06, |
| "loss": 0.0444, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.1086448598130842, |
| "grad_norm": 0.020742233842611313, |
| "learning_rate": 5.270151869158879e-06, |
| "loss": 0.0437, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.114485981308411, |
| "grad_norm": 0.02387331984937191, |
| "learning_rate": 5.2847546728971965e-06, |
| "loss": 0.001, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.1203271028037385, |
| "grad_norm": 0.024925414472818375, |
| "learning_rate": 5.299357476635515e-06, |
| "loss": 0.001, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.1261682242990654, |
| "grad_norm": 0.02018832229077816, |
| "learning_rate": 5.313960280373832e-06, |
| "loss": 0.0009, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.1320093457943927, |
| "grad_norm": 0.023544279858469963, |
| "learning_rate": 5.32856308411215e-06, |
| "loss": 0.001, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.1378504672897196, |
| "grad_norm": 0.02338625304400921, |
| "learning_rate": 5.343165887850468e-06, |
| "loss": 0.001, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.143691588785047, |
| "grad_norm": 0.021194949746131897, |
| "learning_rate": 5.357768691588785e-06, |
| "loss": 0.0009, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.149532710280374, |
| "grad_norm": 0.02002919651567936, |
| "learning_rate": 5.372371495327103e-06, |
| "loss": 0.0436, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.1553738317757007, |
| "grad_norm": 0.024264391511678696, |
| "learning_rate": 5.386974299065421e-06, |
| "loss": 0.001, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.161214953271028, |
| "grad_norm": 0.025951523333787918, |
| "learning_rate": 5.401577102803739e-06, |
| "loss": 0.0423, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.167056074766355, |
| "grad_norm": 0.03249853104352951, |
| "learning_rate": 5.416179906542056e-06, |
| "loss": 0.0013, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.1728971962616823, |
| "grad_norm": 0.028281256556510925, |
| "learning_rate": 5.430782710280375e-06, |
| "loss": 0.0012, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.178738317757009, |
| "grad_norm": 0.02567378431558609, |
| "learning_rate": 5.445385514018692e-06, |
| "loss": 0.0011, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.1845794392523366, |
| "grad_norm": 0.021611211821436882, |
| "learning_rate": 5.45998831775701e-06, |
| "loss": 0.0011, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.1904205607476634, |
| "grad_norm": 0.02238270454108715, |
| "learning_rate": 5.4745911214953274e-06, |
| "loss": 0.0009, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.196261682242991, |
| "grad_norm": 0.02387162297964096, |
| "learning_rate": 5.489193925233645e-06, |
| "loss": 0.0432, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.2021028037383177, |
| "grad_norm": 0.02597387693822384, |
| "learning_rate": 5.503796728971963e-06, |
| "loss": 0.0011, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.207943925233645, |
| "grad_norm": 0.023671170696616173, |
| "learning_rate": 5.518399532710281e-06, |
| "loss": 0.0011, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.213785046728972, |
| "grad_norm": 0.023667603731155396, |
| "learning_rate": 5.533002336448598e-06, |
| "loss": 0.001, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.2196261682242993, |
| "grad_norm": 0.027072029188275337, |
| "learning_rate": 5.547605140186917e-06, |
| "loss": 0.0427, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.225467289719626, |
| "grad_norm": 0.02555328793823719, |
| "learning_rate": 5.562207943925234e-06, |
| "loss": 0.0012, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.231308411214953, |
| "grad_norm": 0.02583390660583973, |
| "learning_rate": 5.576810747663551e-06, |
| "loss": 0.0012, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.2371495327102804, |
| "grad_norm": 0.022327054291963577, |
| "learning_rate": 5.59141355140187e-06, |
| "loss": 0.0011, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.2429906542056073, |
| "grad_norm": 0.02613520622253418, |
| "learning_rate": 5.606016355140187e-06, |
| "loss": 0.001, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.2488317757009346, |
| "grad_norm": 0.020380638539791107, |
| "learning_rate": 5.620619158878506e-06, |
| "loss": 0.0009, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.2546728971962615, |
| "grad_norm": 0.02417253516614437, |
| "learning_rate": 5.635221962616823e-06, |
| "loss": 0.001, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.260514018691589, |
| "grad_norm": 0.01819733716547489, |
| "learning_rate": 5.6498247663551405e-06, |
| "loss": 0.0009, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.2663551401869158, |
| "grad_norm": 0.01892516203224659, |
| "learning_rate": 5.6644275700934584e-06, |
| "loss": 0.0008, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.272196261682243, |
| "grad_norm": 0.02071457915008068, |
| "learning_rate": 5.679030373831776e-06, |
| "loss": 0.0008, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.27803738317757, |
| "grad_norm": 0.01727299951016903, |
| "learning_rate": 5.693633177570093e-06, |
| "loss": 0.0008, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.2838785046728973, |
| "grad_norm": 0.017327282577753067, |
| "learning_rate": 5.708235981308412e-06, |
| "loss": 0.0007, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.289719626168224, |
| "grad_norm": 0.016446802765130997, |
| "learning_rate": 5.722838785046729e-06, |
| "loss": 0.0007, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.2955607476635516, |
| "grad_norm": 0.01564253680408001, |
| "learning_rate": 5.737441588785048e-06, |
| "loss": 0.0007, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.3014018691588785, |
| "grad_norm": 0.01652144268155098, |
| "learning_rate": 5.752044392523365e-06, |
| "loss": 0.0007, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.307242990654206, |
| "grad_norm": 0.015415907837450504, |
| "learning_rate": 5.766647196261683e-06, |
| "loss": 0.0007, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.3130841121495327, |
| "grad_norm": 0.015839243307709694, |
| "learning_rate": 5.781250000000001e-06, |
| "loss": 0.0007, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.31892523364486, |
| "grad_norm": 0.01590505987405777, |
| "learning_rate": 5.795852803738318e-06, |
| "loss": 0.0456, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.324766355140187, |
| "grad_norm": 0.016949482262134552, |
| "learning_rate": 5.810455607476636e-06, |
| "loss": 0.0007, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.330607476635514, |
| "grad_norm": 1.5004304647445679, |
| "learning_rate": 5.825058411214954e-06, |
| "loss": 0.0449, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.336448598130841, |
| "grad_norm": 0.019646212458610535, |
| "learning_rate": 5.8396612149532715e-06, |
| "loss": 0.0008, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.342289719626168, |
| "grad_norm": 0.017813665792346, |
| "learning_rate": 5.8542640186915886e-06, |
| "loss": 0.0008, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.3481308411214954, |
| "grad_norm": 0.021812189370393753, |
| "learning_rate": 5.868866822429907e-06, |
| "loss": 0.0008, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.3539719626168223, |
| "grad_norm": 0.018918626010417938, |
| "learning_rate": 5.883469626168224e-06, |
| "loss": 0.0008, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.3598130841121496, |
| "grad_norm": 0.018906960263848305, |
| "learning_rate": 5.898072429906543e-06, |
| "loss": 0.0008, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.3656542056074765, |
| "grad_norm": 0.0188592579215765, |
| "learning_rate": 5.91267523364486e-06, |
| "loss": 0.0881, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.371495327102804, |
| "grad_norm": 1.4257460832595825, |
| "learning_rate": 5.927278037383178e-06, |
| "loss": 0.0824, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.3773364485981308, |
| "grad_norm": 0.05044642463326454, |
| "learning_rate": 5.941880841121496e-06, |
| "loss": 0.0018, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.383177570093458, |
| "grad_norm": 0.04848237708210945, |
| "learning_rate": 5.956483644859814e-06, |
| "loss": 0.0022, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.389018691588785, |
| "grad_norm": 0.03797721490263939, |
| "learning_rate": 5.971086448598131e-06, |
| "loss": 0.0019, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.3948598130841123, |
| "grad_norm": 1.3042831420898438, |
| "learning_rate": 5.98568925233645e-06, |
| "loss": 0.0761, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.4007009345794392, |
| "grad_norm": 0.056668974459171295, |
| "learning_rate": 6.000292056074767e-06, |
| "loss": 0.0026, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.406542056074766, |
| "grad_norm": 0.05167270824313164, |
| "learning_rate": 6.014894859813084e-06, |
| "loss": 0.0025, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.4123831775700935, |
| "grad_norm": 0.02998114936053753, |
| "learning_rate": 6.0294976635514025e-06, |
| "loss": 0.0022, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.418224299065421, |
| "grad_norm": 0.035051021724939346, |
| "learning_rate": 6.0441004672897195e-06, |
| "loss": 0.0016, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.4240654205607477, |
| "grad_norm": 0.032349418848752975, |
| "learning_rate": 6.058703271028038e-06, |
| "loss": 0.0015, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.4299065420560746, |
| "grad_norm": 0.03202052041888237, |
| "learning_rate": 6.073306074766355e-06, |
| "loss": 0.0013, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.435747663551402, |
| "grad_norm": 0.02790389209985733, |
| "learning_rate": 6.087908878504673e-06, |
| "loss": 0.0012, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.441588785046729, |
| "grad_norm": 0.023634234443306923, |
| "learning_rate": 6.102511682242991e-06, |
| "loss": 0.0011, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.447429906542056, |
| "grad_norm": 0.024219932034611702, |
| "learning_rate": 6.117114485981309e-06, |
| "loss": 0.001, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.453271028037383, |
| "grad_norm": 0.020274635404348373, |
| "learning_rate": 6.131717289719626e-06, |
| "loss": 0.0009, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.4591121495327104, |
| "grad_norm": 0.017824700102210045, |
| "learning_rate": 6.146320093457945e-06, |
| "loss": 0.0009, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.4649532710280373, |
| "grad_norm": 0.024308903142809868, |
| "learning_rate": 6.160922897196262e-06, |
| "loss": 0.0437, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.4707943925233646, |
| "grad_norm": 0.022735148668289185, |
| "learning_rate": 6.175525700934581e-06, |
| "loss": 0.0009, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.4766355140186915, |
| "grad_norm": 0.024395328015089035, |
| "learning_rate": 6.190128504672898e-06, |
| "loss": 0.001, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.482476635514019, |
| "grad_norm": 0.022520286962389946, |
| "learning_rate": 6.204731308411216e-06, |
| "loss": 0.0009, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.4883177570093458, |
| "grad_norm": 0.023120161145925522, |
| "learning_rate": 6.2193341121495335e-06, |
| "loss": 0.0431, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.494158878504673, |
| "grad_norm": 0.02719821222126484, |
| "learning_rate": 6.2339369158878505e-06, |
| "loss": 0.0011, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.02525412105023861, |
| "learning_rate": 6.2485397196261684e-06, |
| "loss": 0.001, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.505841121495327, |
| "grad_norm": 0.02401375211775303, |
| "learning_rate": 6.263142523364486e-06, |
| "loss": 0.001, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.5116822429906542, |
| "grad_norm": 0.025789327919483185, |
| "learning_rate": 6.277745327102804e-06, |
| "loss": 0.001, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.5175233644859816, |
| "grad_norm": 0.019803550094366074, |
| "learning_rate": 6.292348130841121e-06, |
| "loss": 0.0009, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.5233644859813085, |
| "grad_norm": 0.024618757888674736, |
| "learning_rate": 6.30695093457944e-06, |
| "loss": 0.0853, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.5292056074766354, |
| "grad_norm": 0.028840836137533188, |
| "learning_rate": 6.321553738317757e-06, |
| "loss": 0.0408, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.5350467289719627, |
| "grad_norm": 0.038188423961400986, |
| "learning_rate": 6.336156542056076e-06, |
| "loss": 0.0019, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.5408878504672896, |
| "grad_norm": 0.04892474785447121, |
| "learning_rate": 6.350759345794393e-06, |
| "loss": 0.0388, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.546728971962617, |
| "grad_norm": 0.051845621317625046, |
| "learning_rate": 6.365362149532711e-06, |
| "loss": 0.0021, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.552570093457944, |
| "grad_norm": 0.035969078540802, |
| "learning_rate": 6.379964953271029e-06, |
| "loss": 0.0021, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.558411214953271, |
| "grad_norm": 0.02896655909717083, |
| "learning_rate": 6.3945677570093466e-06, |
| "loss": 0.0017, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.564252336448598, |
| "grad_norm": 0.03623491898179054, |
| "learning_rate": 6.409170560747664e-06, |
| "loss": 0.0015, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.5700934579439254, |
| "grad_norm": 0.03049786575138569, |
| "learning_rate": 6.423773364485982e-06, |
| "loss": 0.0013, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.5759345794392523, |
| "grad_norm": 0.030855245888233185, |
| "learning_rate": 6.438376168224299e-06, |
| "loss": 0.0012, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.581775700934579, |
| "grad_norm": 0.02443297952413559, |
| "learning_rate": 6.4529789719626165e-06, |
| "loss": 0.0011, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.5876168224299065, |
| "grad_norm": 0.020587557926774025, |
| "learning_rate": 6.467581775700935e-06, |
| "loss": 0.001, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.593457943925234, |
| "grad_norm": 0.016886306926608086, |
| "learning_rate": 6.482184579439252e-06, |
| "loss": 0.0427, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.5992990654205608, |
| "grad_norm": 0.02218514122068882, |
| "learning_rate": 6.496787383177571e-06, |
| "loss": 0.001, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.6051401869158877, |
| "grad_norm": 0.03042624518275261, |
| "learning_rate": 6.511390186915888e-06, |
| "loss": 0.0011, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.610981308411215, |
| "grad_norm": 0.02127344347536564, |
| "learning_rate": 6.525992990654206e-06, |
| "loss": 0.0011, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.616822429906542, |
| "grad_norm": 0.0248599611222744, |
| "learning_rate": 6.540595794392524e-06, |
| "loss": 0.001, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.6226635514018692, |
| "grad_norm": 0.025477230548858643, |
| "learning_rate": 6.555198598130842e-06, |
| "loss": 0.0427, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.628504672897196, |
| "grad_norm": 0.029195934534072876, |
| "learning_rate": 6.569801401869159e-06, |
| "loss": 0.0011, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.6343457943925235, |
| "grad_norm": 0.028360871598124504, |
| "learning_rate": 6.5844042056074775e-06, |
| "loss": 0.0417, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.6401869158878504, |
| "grad_norm": 0.03158384561538696, |
| "learning_rate": 6.599007009345795e-06, |
| "loss": 0.0013, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.6460280373831777, |
| "grad_norm": 0.03432070091366768, |
| "learning_rate": 6.613609813084113e-06, |
| "loss": 0.0013, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.6518691588785046, |
| "grad_norm": 0.03093302808701992, |
| "learning_rate": 6.62821261682243e-06, |
| "loss": 0.0408, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.6577102803738315, |
| "grad_norm": 0.028930488973855972, |
| "learning_rate": 6.642815420560748e-06, |
| "loss": 0.0014, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.663551401869159, |
| "grad_norm": 0.035523250699043274, |
| "learning_rate": 6.657418224299066e-06, |
| "loss": 0.0014, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.669392523364486, |
| "grad_norm": 0.03366963937878609, |
| "learning_rate": 6.672021028037384e-06, |
| "loss": 0.0403, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.675233644859813, |
| "grad_norm": 0.03295886889100075, |
| "learning_rate": 6.686623831775701e-06, |
| "loss": 0.0015, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.68107476635514, |
| "grad_norm": 0.035681866109371185, |
| "learning_rate": 6.701226635514019e-06, |
| "loss": 0.0015, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.6869158878504673, |
| "grad_norm": 0.028133153915405273, |
| "learning_rate": 6.715829439252337e-06, |
| "loss": 0.0016, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.6927570093457946, |
| "grad_norm": 0.02387244440615177, |
| "learning_rate": 6.730432242990654e-06, |
| "loss": 0.0013, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.6985981308411215, |
| "grad_norm": 0.025142334401607513, |
| "learning_rate": 6.745035046728973e-06, |
| "loss": 0.0801, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.7044392523364484, |
| "grad_norm": 0.03518941253423691, |
| "learning_rate": 6.75963785046729e-06, |
| "loss": 0.0018, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.710280373831776, |
| "grad_norm": 0.030848579481244087, |
| "learning_rate": 6.7742406542056085e-06, |
| "loss": 0.0017, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.7161214953271027, |
| "grad_norm": 0.04090001434087753, |
| "learning_rate": 6.788843457943926e-06, |
| "loss": 0.0018, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.72196261682243, |
| "grad_norm": 0.027243750169873238, |
| "learning_rate": 6.8034462616822435e-06, |
| "loss": 0.0015, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.727803738317757, |
| "grad_norm": 0.02641715668141842, |
| "learning_rate": 6.818049065420561e-06, |
| "loss": 0.0013, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.7336448598130842, |
| "grad_norm": 0.025730127468705177, |
| "learning_rate": 6.832651869158879e-06, |
| "loss": 0.0012, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.739485981308411, |
| "grad_norm": 0.024618886411190033, |
| "learning_rate": 6.847254672897196e-06, |
| "loss": 0.121, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.7453271028037385, |
| "grad_norm": 0.039262160658836365, |
| "learning_rate": 6.861857476635515e-06, |
| "loss": 0.0015, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.7511682242990654, |
| "grad_norm": 0.03421768546104431, |
| "learning_rate": 6.876460280373832e-06, |
| "loss": 0.0017, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.7570093457943923, |
| "grad_norm": 0.03881515935063362, |
| "learning_rate": 6.891063084112151e-06, |
| "loss": 0.0015, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.7628504672897196, |
| "grad_norm": 0.02848188392817974, |
| "learning_rate": 6.905665887850468e-06, |
| "loss": 0.0015, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.768691588785047, |
| "grad_norm": 0.02415272779762745, |
| "learning_rate": 6.920268691588785e-06, |
| "loss": 0.0013, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.774532710280374, |
| "grad_norm": 0.026982519775629044, |
| "learning_rate": 6.934871495327104e-06, |
| "loss": 0.0012, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.7803738317757007, |
| "grad_norm": 0.0221230611205101, |
| "learning_rate": 6.949474299065421e-06, |
| "loss": 0.001, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.786214953271028, |
| "grad_norm": 0.020836248993873596, |
| "learning_rate": 6.964077102803739e-06, |
| "loss": 0.0009, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.792056074766355, |
| "grad_norm": 0.016426123678684235, |
| "learning_rate": 6.9786799065420566e-06, |
| "loss": 0.0009, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.7978971962616823, |
| "grad_norm": 0.015739239752292633, |
| "learning_rate": 6.9932827102803745e-06, |
| "loss": 0.0008, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.803738317757009, |
| "grad_norm": 0.025521619245409966, |
| "learning_rate": 7.0078855140186915e-06, |
| "loss": 0.0434, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.8095794392523366, |
| "grad_norm": 0.02532140351831913, |
| "learning_rate": 7.02248831775701e-06, |
| "loss": 0.0009, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.8154205607476634, |
| "grad_norm": 0.01968371495604515, |
| "learning_rate": 7.037091121495327e-06, |
| "loss": 0.001, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.821261682242991, |
| "grad_norm": 0.023469461128115654, |
| "learning_rate": 7.051693925233646e-06, |
| "loss": 0.001, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.8271028037383177, |
| "grad_norm": 0.023085037246346474, |
| "learning_rate": 7.066296728971963e-06, |
| "loss": 0.0009, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.832943925233645, |
| "grad_norm": 0.02076035924255848, |
| "learning_rate": 7.080899532710281e-06, |
| "loss": 0.0008, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.838785046728972, |
| "grad_norm": 0.01970616728067398, |
| "learning_rate": 7.095502336448599e-06, |
| "loss": 0.0007, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.8446261682242993, |
| "grad_norm": 0.01674484834074974, |
| "learning_rate": 7.110105140186917e-06, |
| "loss": 0.044, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.850467289719626, |
| "grad_norm": 0.019377458840608597, |
| "learning_rate": 7.124707943925234e-06, |
| "loss": 0.0008, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.856308411214953, |
| "grad_norm": 0.01821335218846798, |
| "learning_rate": 7.139310747663552e-06, |
| "loss": 0.0009, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.8621495327102804, |
| "grad_norm": 0.023551421239972115, |
| "learning_rate": 7.15391355140187e-06, |
| "loss": 0.0431, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.8679906542056077, |
| "grad_norm": 0.021875927224755287, |
| "learning_rate": 7.168516355140187e-06, |
| "loss": 0.001, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.8738317757009346, |
| "grad_norm": 0.028866572305560112, |
| "learning_rate": 7.1831191588785054e-06, |
| "loss": 0.0011, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.8796728971962615, |
| "grad_norm": 0.022117484360933304, |
| "learning_rate": 7.1977219626168225e-06, |
| "loss": 0.0009, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.885514018691589, |
| "grad_norm": 0.02393292263150215, |
| "learning_rate": 7.212324766355141e-06, |
| "loss": 0.0423, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.8913551401869158, |
| "grad_norm": 0.028178559616208076, |
| "learning_rate": 7.226927570093458e-06, |
| "loss": 0.0409, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.897196261682243, |
| "grad_norm": 0.042030058801174164, |
| "learning_rate": 7.241530373831776e-06, |
| "loss": 0.0014, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.90303738317757, |
| "grad_norm": 0.035542890429496765, |
| "learning_rate": 7.256133177570094e-06, |
| "loss": 0.0394, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.9088785046728973, |
| "grad_norm": 0.04510030522942543, |
| "learning_rate": 7.270735981308412e-06, |
| "loss": 0.0019, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.914719626168224, |
| "grad_norm": 0.03763249143958092, |
| "learning_rate": 7.285338785046729e-06, |
| "loss": 0.0018, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.9205607476635516, |
| "grad_norm": 0.03738338127732277, |
| "learning_rate": 7.299941588785048e-06, |
| "loss": 0.039, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.9264018691588785, |
| "grad_norm": 0.05190500617027283, |
| "learning_rate": 7.314544392523365e-06, |
| "loss": 0.002, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.9322429906542054, |
| "grad_norm": 0.04588020592927933, |
| "learning_rate": 7.3291471962616836e-06, |
| "loss": 0.0386, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.9380841121495327, |
| "grad_norm": 0.05511573329567909, |
| "learning_rate": 7.343750000000001e-06, |
| "loss": 0.0375, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.94392523364486, |
| "grad_norm": 0.04213655740022659, |
| "learning_rate": 7.358352803738318e-06, |
| "loss": 0.0028, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.949766355140187, |
| "grad_norm": 0.041382934898138046, |
| "learning_rate": 7.3729556074766364e-06, |
| "loss": 0.0365, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.955607476635514, |
| "grad_norm": 0.05554201453924179, |
| "learning_rate": 7.3875584112149535e-06, |
| "loss": 0.003, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.961448598130841, |
| "grad_norm": 0.04713258147239685, |
| "learning_rate": 7.402161214953271e-06, |
| "loss": 0.0025, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.9672897196261685, |
| "grad_norm": 0.03752976655960083, |
| "learning_rate": 7.416764018691589e-06, |
| "loss": 0.0019, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.9731308411214954, |
| "grad_norm": 0.0314338281750679, |
| "learning_rate": 7.431366822429907e-06, |
| "loss": 0.0016, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.9789719626168223, |
| "grad_norm": 0.03591935336589813, |
| "learning_rate": 7.445969626168224e-06, |
| "loss": 0.0777, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.9848130841121496, |
| "grad_norm": 0.052552781999111176, |
| "learning_rate": 7.460572429906543e-06, |
| "loss": 0.0021, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.9906542056074765, |
| "grad_norm": 0.03766405209898949, |
| "learning_rate": 7.47517523364486e-06, |
| "loss": 0.0022, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.996495327102804, |
| "grad_norm": 0.03205743804574013, |
| "learning_rate": 7.489778037383179e-06, |
| "loss": 0.002, |
| "step": 5130 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.9986386325820602, |
| "eval_fbeta": 0.9978235099982873, |
| "eval_loss": 0.01077313907444477, |
| "eval_precision": 0.9972809667673715, |
| "eval_recall": 1.0, |
| "eval_runtime": 171.3603, |
| "eval_samples_per_second": 68.493, |
| "eval_steps_per_second": 8.567, |
| "step": 5136 |
| }, |
| { |
| "epoch": 3.0023364485981308, |
| "grad_norm": 0.02765633352100849, |
| "learning_rate": 7.504380841121496e-06, |
| "loss": 0.0016, |
| "step": 5140 |
| }, |
| { |
| "epoch": 3.008177570093458, |
| "grad_norm": 0.028236212208867073, |
| "learning_rate": 7.518983644859814e-06, |
| "loss": 0.0014, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.014018691588785, |
| "grad_norm": 0.015433499589562416, |
| "learning_rate": 7.533586448598132e-06, |
| "loss": 0.0011, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.0198598130841123, |
| "grad_norm": 0.024822015315294266, |
| "learning_rate": 7.5481892523364495e-06, |
| "loss": 0.001, |
| "step": 5170 |
| }, |
| { |
| "epoch": 3.0257009345794392, |
| "grad_norm": 0.024215752258896828, |
| "learning_rate": 7.5627920560747666e-06, |
| "loss": 0.001, |
| "step": 5180 |
| }, |
| { |
| "epoch": 3.0315420560747666, |
| "grad_norm": 0.021826941519975662, |
| "learning_rate": 7.5773948598130845e-06, |
| "loss": 0.0425, |
| "step": 5190 |
| }, |
| { |
| "epoch": 3.0373831775700935, |
| "grad_norm": 0.02321033552289009, |
| "learning_rate": 7.591997663551402e-06, |
| "loss": 0.0012, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.0432242990654204, |
| "grad_norm": 0.03412926569581032, |
| "learning_rate": 7.606600467289719e-06, |
| "loss": 0.0414, |
| "step": 5210 |
| }, |
| { |
| "epoch": 3.0490654205607477, |
| "grad_norm": 0.030813759192824364, |
| "learning_rate": 7.621203271028038e-06, |
| "loss": 0.0013, |
| "step": 5220 |
| }, |
| { |
| "epoch": 3.0549065420560746, |
| "grad_norm": 0.026983771473169327, |
| "learning_rate": 7.635806074766355e-06, |
| "loss": 0.0014, |
| "step": 5230 |
| }, |
| { |
| "epoch": 3.060747663551402, |
| "grad_norm": 0.024708811193704605, |
| "learning_rate": 7.650408878504674e-06, |
| "loss": 0.0013, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.066588785046729, |
| "grad_norm": 0.022824935615062714, |
| "learning_rate": 7.665011682242991e-06, |
| "loss": 0.0011, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.072429906542056, |
| "grad_norm": 0.022325806319713593, |
| "learning_rate": 7.67961448598131e-06, |
| "loss": 0.0011, |
| "step": 5260 |
| }, |
| { |
| "epoch": 3.078271028037383, |
| "grad_norm": 0.023151839151978493, |
| "learning_rate": 7.694217289719627e-06, |
| "loss": 0.0009, |
| "step": 5270 |
| }, |
| { |
| "epoch": 3.0841121495327104, |
| "grad_norm": 0.02269587479531765, |
| "learning_rate": 7.708820093457946e-06, |
| "loss": 0.0009, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.0899532710280373, |
| "grad_norm": 0.01616939902305603, |
| "learning_rate": 7.723422897196263e-06, |
| "loss": 0.0008, |
| "step": 5290 |
| }, |
| { |
| "epoch": 3.0957943925233646, |
| "grad_norm": 0.018096772953867912, |
| "learning_rate": 7.738025700934581e-06, |
| "loss": 0.0008, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.1016355140186915, |
| "grad_norm": 0.015381600707769394, |
| "learning_rate": 7.752628504672898e-06, |
| "loss": 0.0007, |
| "step": 5310 |
| }, |
| { |
| "epoch": 3.107476635514019, |
| "grad_norm": 0.016292404383420944, |
| "learning_rate": 7.767231308411215e-06, |
| "loss": 0.0007, |
| "step": 5320 |
| }, |
| { |
| "epoch": 3.1133177570093458, |
| "grad_norm": 0.016235455870628357, |
| "learning_rate": 7.781834112149532e-06, |
| "loss": 0.0447, |
| "step": 5330 |
| }, |
| { |
| "epoch": 3.119158878504673, |
| "grad_norm": 0.01838596910238266, |
| "learning_rate": 7.796436915887851e-06, |
| "loss": 0.0007, |
| "step": 5340 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.018622124567627907, |
| "learning_rate": 7.811039719626168e-06, |
| "loss": 0.0007, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.130841121495327, |
| "grad_norm": 0.020052941516041756, |
| "learning_rate": 7.825642523364485e-06, |
| "loss": 0.0007, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.1366822429906542, |
| "grad_norm": 0.0180792436003685, |
| "learning_rate": 7.840245327102804e-06, |
| "loss": 0.0007, |
| "step": 5370 |
| }, |
| { |
| "epoch": 3.142523364485981, |
| "grad_norm": 0.015389679931104183, |
| "learning_rate": 7.854848130841121e-06, |
| "loss": 0.0007, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.1483644859813085, |
| "grad_norm": 0.01384568028151989, |
| "learning_rate": 7.86945093457944e-06, |
| "loss": 0.0007, |
| "step": 5390 |
| }, |
| { |
| "epoch": 3.1542056074766354, |
| "grad_norm": 1.5059659481048584, |
| "learning_rate": 7.884053738317757e-06, |
| "loss": 0.0449, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.1600467289719627, |
| "grad_norm": 0.02092292346060276, |
| "learning_rate": 7.898656542056076e-06, |
| "loss": 0.0007, |
| "step": 5410 |
| }, |
| { |
| "epoch": 3.1658878504672896, |
| "grad_norm": 0.01820148155093193, |
| "learning_rate": 7.913259345794393e-06, |
| "loss": 0.0008, |
| "step": 5420 |
| }, |
| { |
| "epoch": 3.171728971962617, |
| "grad_norm": 0.018773594871163368, |
| "learning_rate": 7.927862149532711e-06, |
| "loss": 0.0007, |
| "step": 5430 |
| }, |
| { |
| "epoch": 3.177570093457944, |
| "grad_norm": 0.01787596382200718, |
| "learning_rate": 7.942464953271029e-06, |
| "loss": 0.0007, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.183411214953271, |
| "grad_norm": 0.033807117491960526, |
| "learning_rate": 7.957067757009347e-06, |
| "loss": 0.0867, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.189252336448598, |
| "grad_norm": 0.023095451295375824, |
| "learning_rate": 7.971670560747664e-06, |
| "loss": 0.0013, |
| "step": 5460 |
| }, |
| { |
| "epoch": 3.1950934579439254, |
| "grad_norm": 0.02575875073671341, |
| "learning_rate": 7.986273364485983e-06, |
| "loss": 0.0013, |
| "step": 5470 |
| }, |
| { |
| "epoch": 3.2009345794392523, |
| "grad_norm": 0.023440295830368996, |
| "learning_rate": 8.0008761682243e-06, |
| "loss": 0.0012, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.2067757009345796, |
| "grad_norm": 0.018454963341355324, |
| "learning_rate": 8.015478971962617e-06, |
| "loss": 0.001, |
| "step": 5490 |
| }, |
| { |
| "epoch": 3.2126168224299065, |
| "grad_norm": 0.022602304816246033, |
| "learning_rate": 8.030081775700936e-06, |
| "loss": 0.0421, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.218457943925234, |
| "grad_norm": 0.040274497121572495, |
| "learning_rate": 8.044684579439253e-06, |
| "loss": 0.0406, |
| "step": 5510 |
| }, |
| { |
| "epoch": 3.2242990654205608, |
| "grad_norm": 1.618641972541809, |
| "learning_rate": 8.059287383177572e-06, |
| "loss": 0.1812, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.2301401869158877, |
| "grad_norm": 0.10644412040710449, |
| "learning_rate": 8.073890186915889e-06, |
| "loss": 0.09, |
| "step": 5530 |
| }, |
| { |
| "epoch": 3.235981308411215, |
| "grad_norm": 0.05092101916670799, |
| "learning_rate": 8.088492990654206e-06, |
| "loss": 0.0038, |
| "step": 5540 |
| }, |
| { |
| "epoch": 3.241822429906542, |
| "grad_norm": 0.026801634579896927, |
| "learning_rate": 8.103095794392523e-06, |
| "loss": 0.002, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.2476635514018692, |
| "grad_norm": 0.024006173014640808, |
| "learning_rate": 8.117698598130842e-06, |
| "loss": 0.0013, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.253504672897196, |
| "grad_norm": 0.019342713057994843, |
| "learning_rate": 8.132301401869159e-06, |
| "loss": 0.001, |
| "step": 5570 |
| }, |
| { |
| "epoch": 3.2593457943925235, |
| "grad_norm": 0.01894223503768444, |
| "learning_rate": 8.146904205607477e-06, |
| "loss": 0.0008, |
| "step": 5580 |
| }, |
| { |
| "epoch": 3.2651869158878504, |
| "grad_norm": 0.01678093895316124, |
| "learning_rate": 8.161507009345794e-06, |
| "loss": 0.0007, |
| "step": 5590 |
| }, |
| { |
| "epoch": 3.2710280373831777, |
| "grad_norm": 0.014554358087480068, |
| "learning_rate": 8.176109813084113e-06, |
| "loss": 0.0006, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.2768691588785046, |
| "grad_norm": 0.014845364727079868, |
| "learning_rate": 8.19071261682243e-06, |
| "loss": 0.0006, |
| "step": 5610 |
| }, |
| { |
| "epoch": 3.282710280373832, |
| "grad_norm": 0.012988976202905178, |
| "learning_rate": 8.205315420560749e-06, |
| "loss": 0.0005, |
| "step": 5620 |
| }, |
| { |
| "epoch": 3.288551401869159, |
| "grad_norm": 0.039321836084127426, |
| "learning_rate": 8.219918224299066e-06, |
| "loss": 0.0473, |
| "step": 5630 |
| }, |
| { |
| "epoch": 3.294392523364486, |
| "grad_norm": 0.021501081064343452, |
| "learning_rate": 8.234521028037385e-06, |
| "loss": 0.0009, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.300233644859813, |
| "grad_norm": 0.022349685430526733, |
| "learning_rate": 8.249123831775702e-06, |
| "loss": 0.0009, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.30607476635514, |
| "grad_norm": 0.015372666530311108, |
| "learning_rate": 8.263726635514019e-06, |
| "loss": 0.0007, |
| "step": 5660 |
| }, |
| { |
| "epoch": 3.3119158878504673, |
| "grad_norm": 0.022245537489652634, |
| "learning_rate": 8.278329439252338e-06, |
| "loss": 0.0452, |
| "step": 5670 |
| }, |
| { |
| "epoch": 3.317757009345794, |
| "grad_norm": 0.027831530198454857, |
| "learning_rate": 8.292932242990655e-06, |
| "loss": 0.001, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.3235981308411215, |
| "grad_norm": 0.024912910535931587, |
| "learning_rate": 8.307535046728973e-06, |
| "loss": 0.001, |
| "step": 5690 |
| }, |
| { |
| "epoch": 3.3294392523364484, |
| "grad_norm": 0.05802381411194801, |
| "learning_rate": 8.32213785046729e-06, |
| "loss": 0.0424, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.335280373831776, |
| "grad_norm": 0.04274534061551094, |
| "learning_rate": 8.33674065420561e-06, |
| "loss": 0.0018, |
| "step": 5710 |
| }, |
| { |
| "epoch": 3.3411214953271027, |
| "grad_norm": 0.033411163836717606, |
| "learning_rate": 8.351343457943926e-06, |
| "loss": 0.0015, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.34696261682243, |
| "grad_norm": 0.024391217157244682, |
| "learning_rate": 8.365946261682243e-06, |
| "loss": 0.0011, |
| "step": 5730 |
| }, |
| { |
| "epoch": 3.352803738317757, |
| "grad_norm": 0.019727276638150215, |
| "learning_rate": 8.38054906542056e-06, |
| "loss": 0.0008, |
| "step": 5740 |
| }, |
| { |
| "epoch": 3.3586448598130842, |
| "grad_norm": 0.01770111732184887, |
| "learning_rate": 8.395151869158879e-06, |
| "loss": 0.0008, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.364485981308411, |
| "grad_norm": 1.5078725814819336, |
| "learning_rate": 8.409754672897196e-06, |
| "loss": 0.0449, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.3703271028037385, |
| "grad_norm": 0.01682940497994423, |
| "learning_rate": 8.424357476635515e-06, |
| "loss": 0.0008, |
| "step": 5770 |
| }, |
| { |
| "epoch": 3.3761682242990654, |
| "grad_norm": 0.022241350263357162, |
| "learning_rate": 8.438960280373832e-06, |
| "loss": 0.0009, |
| "step": 5780 |
| }, |
| { |
| "epoch": 3.3820093457943923, |
| "grad_norm": 0.05642879754304886, |
| "learning_rate": 8.45356308411215e-06, |
| "loss": 0.1237, |
| "step": 5790 |
| }, |
| { |
| "epoch": 3.3878504672897196, |
| "grad_norm": 0.08196932822465897, |
| "learning_rate": 8.468165887850468e-06, |
| "loss": 0.0039, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.393691588785047, |
| "grad_norm": 0.061761435121297836, |
| "learning_rate": 8.482768691588785e-06, |
| "loss": 0.0032, |
| "step": 5810 |
| }, |
| { |
| "epoch": 3.399532710280374, |
| "grad_norm": 0.0422382578253746, |
| "learning_rate": 8.497371495327104e-06, |
| "loss": 0.0022, |
| "step": 5820 |
| }, |
| { |
| "epoch": 3.4053738317757007, |
| "grad_norm": 1.63783860206604, |
| "learning_rate": 8.51197429906542e-06, |
| "loss": 0.0864, |
| "step": 5830 |
| }, |
| { |
| "epoch": 3.411214953271028, |
| "grad_norm": 0.10045663267374039, |
| "learning_rate": 8.52657710280374e-06, |
| "loss": 0.0443, |
| "step": 5840 |
| }, |
| { |
| "epoch": 3.417056074766355, |
| "grad_norm": 0.057120032608509064, |
| "learning_rate": 8.541179906542056e-06, |
| "loss": 0.0312, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.4228971962616823, |
| "grad_norm": 0.04555455595254898, |
| "learning_rate": 8.555782710280375e-06, |
| "loss": 0.0391, |
| "step": 5860 |
| }, |
| { |
| "epoch": 3.428738317757009, |
| "grad_norm": 0.04573315382003784, |
| "learning_rate": 8.570385514018692e-06, |
| "loss": 0.0023, |
| "step": 5870 |
| }, |
| { |
| "epoch": 3.4345794392523366, |
| "grad_norm": 0.034623004496097565, |
| "learning_rate": 8.584988317757011e-06, |
| "loss": 0.0021, |
| "step": 5880 |
| }, |
| { |
| "epoch": 3.4404205607476634, |
| "grad_norm": 0.030579067766666412, |
| "learning_rate": 8.599591121495328e-06, |
| "loss": 0.0018, |
| "step": 5890 |
| }, |
| { |
| "epoch": 3.446261682242991, |
| "grad_norm": 0.029105929657816887, |
| "learning_rate": 8.614193925233647e-06, |
| "loss": 0.0014, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.4521028037383177, |
| "grad_norm": 0.02326030656695366, |
| "learning_rate": 8.628796728971964e-06, |
| "loss": 0.0011, |
| "step": 5910 |
| }, |
| { |
| "epoch": 3.457943925233645, |
| "grad_norm": 0.023391487076878548, |
| "learning_rate": 8.643399532710281e-06, |
| "loss": 0.0011, |
| "step": 5920 |
| }, |
| { |
| "epoch": 3.463785046728972, |
| "grad_norm": 0.026132913306355476, |
| "learning_rate": 8.6580023364486e-06, |
| "loss": 0.0416, |
| "step": 5930 |
| }, |
| { |
| "epoch": 3.4696261682242993, |
| "grad_norm": 0.03305817395448685, |
| "learning_rate": 8.672605140186917e-06, |
| "loss": 0.0015, |
| "step": 5940 |
| }, |
| { |
| "epoch": 3.475467289719626, |
| "grad_norm": 1.3769471645355225, |
| "learning_rate": 8.687207943925234e-06, |
| "loss": 0.0406, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.481308411214953, |
| "grad_norm": 0.036052025854587555, |
| "learning_rate": 8.70181074766355e-06, |
| "loss": 0.0018, |
| "step": 5960 |
| }, |
| { |
| "epoch": 3.4871495327102804, |
| "grad_norm": 0.03515082970261574, |
| "learning_rate": 8.71641355140187e-06, |
| "loss": 0.0018, |
| "step": 5970 |
| }, |
| { |
| "epoch": 3.4929906542056077, |
| "grad_norm": 1.3603745698928833, |
| "learning_rate": 8.731016355140187e-06, |
| "loss": 0.04, |
| "step": 5980 |
| }, |
| { |
| "epoch": 3.4988317757009346, |
| "grad_norm": 0.044313978403806686, |
| "learning_rate": 8.745619158878505e-06, |
| "loss": 0.0017, |
| "step": 5990 |
| }, |
| { |
| "epoch": 3.5046728971962615, |
| "grad_norm": 0.03033366985619068, |
| "learning_rate": 8.760221962616822e-06, |
| "loss": 0.0017, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.510514018691589, |
| "grad_norm": 0.03509744256734848, |
| "learning_rate": 8.774824766355141e-06, |
| "loss": 0.0402, |
| "step": 6010 |
| }, |
| { |
| "epoch": 3.5163551401869158, |
| "grad_norm": 0.127041295170784, |
| "learning_rate": 8.789427570093458e-06, |
| "loss": 0.0734, |
| "step": 6020 |
| }, |
| { |
| "epoch": 3.522196261682243, |
| "grad_norm": 0.07098700851202011, |
| "learning_rate": 8.804030373831777e-06, |
| "loss": 0.0048, |
| "step": 6030 |
| }, |
| { |
| "epoch": 3.52803738317757, |
| "grad_norm": 0.05003582686185837, |
| "learning_rate": 8.818633177570094e-06, |
| "loss": 0.0365, |
| "step": 6040 |
| }, |
| { |
| "epoch": 3.5338785046728973, |
| "grad_norm": 0.06801114976406097, |
| "learning_rate": 8.833235981308413e-06, |
| "loss": 0.0369, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.539719626168224, |
| "grad_norm": 0.1009073555469513, |
| "learning_rate": 8.84783878504673e-06, |
| "loss": 0.0042, |
| "step": 6060 |
| }, |
| { |
| "epoch": 3.5455607476635516, |
| "grad_norm": 0.045602068305015564, |
| "learning_rate": 8.862441588785048e-06, |
| "loss": 0.0368, |
| "step": 6070 |
| }, |
| { |
| "epoch": 3.5514018691588785, |
| "grad_norm": 0.04139287769794464, |
| "learning_rate": 8.877044392523366e-06, |
| "loss": 0.0028, |
| "step": 6080 |
| }, |
| { |
| "epoch": 3.5572429906542054, |
| "grad_norm": 0.04599359631538391, |
| "learning_rate": 8.891647196261684e-06, |
| "loss": 0.0025, |
| "step": 6090 |
| }, |
| { |
| "epoch": 3.5630841121495327, |
| "grad_norm": 0.04469927027821541, |
| "learning_rate": 8.906250000000001e-06, |
| "loss": 0.0021, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.56892523364486, |
| "grad_norm": 0.03399086743593216, |
| "learning_rate": 8.920852803738318e-06, |
| "loss": 0.0014, |
| "step": 6110 |
| }, |
| { |
| "epoch": 3.574766355140187, |
| "grad_norm": 0.024210546165704727, |
| "learning_rate": 8.935455607476637e-06, |
| "loss": 0.0012, |
| "step": 6120 |
| }, |
| { |
| "epoch": 3.580607476635514, |
| "grad_norm": 0.029081307351589203, |
| "learning_rate": 8.950058411214954e-06, |
| "loss": 0.0414, |
| "step": 6130 |
| }, |
| { |
| "epoch": 3.586448598130841, |
| "grad_norm": 0.031951967626810074, |
| "learning_rate": 8.964661214953271e-06, |
| "loss": 0.0395, |
| "step": 6140 |
| }, |
| { |
| "epoch": 3.5922897196261685, |
| "grad_norm": 0.04544154927134514, |
| "learning_rate": 8.97926401869159e-06, |
| "loss": 0.0376, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.5981308411214954, |
| "grad_norm": 0.04905861243605614, |
| "learning_rate": 8.993866822429907e-06, |
| "loss": 0.0359, |
| "step": 6160 |
| }, |
| { |
| "epoch": 3.6039719626168223, |
| "grad_norm": 0.0846642404794693, |
| "learning_rate": 9.008469626168224e-06, |
| "loss": 0.0372, |
| "step": 6170 |
| }, |
| { |
| "epoch": 3.6098130841121496, |
| "grad_norm": 0.09748505055904388, |
| "learning_rate": 9.023072429906543e-06, |
| "loss": 0.0057, |
| "step": 6180 |
| }, |
| { |
| "epoch": 3.6156542056074765, |
| "grad_norm": 0.05836522579193115, |
| "learning_rate": 9.03767523364486e-06, |
| "loss": 0.0034, |
| "step": 6190 |
| }, |
| { |
| "epoch": 3.621495327102804, |
| "grad_norm": 0.03731178119778633, |
| "learning_rate": 9.052278037383179e-06, |
| "loss": 0.0021, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.6273364485981308, |
| "grad_norm": 0.04591674730181694, |
| "learning_rate": 9.066880841121496e-06, |
| "loss": 0.0017, |
| "step": 6210 |
| }, |
| { |
| "epoch": 3.633177570093458, |
| "grad_norm": 0.026560049504041672, |
| "learning_rate": 9.081483644859814e-06, |
| "loss": 0.0013, |
| "step": 6220 |
| }, |
| { |
| "epoch": 3.639018691588785, |
| "grad_norm": 0.021895037963986397, |
| "learning_rate": 9.096086448598131e-06, |
| "loss": 0.0013, |
| "step": 6230 |
| }, |
| { |
| "epoch": 3.6448598130841123, |
| "grad_norm": 0.0244914460927248, |
| "learning_rate": 9.11068925233645e-06, |
| "loss": 0.001, |
| "step": 6240 |
| }, |
| { |
| "epoch": 3.6507009345794392, |
| "grad_norm": 0.023699410259723663, |
| "learning_rate": 9.125292056074767e-06, |
| "loss": 0.0009, |
| "step": 6250 |
| }, |
| { |
| "epoch": 3.656542056074766, |
| "grad_norm": 0.0207260400056839, |
| "learning_rate": 9.139894859813084e-06, |
| "loss": 0.0008, |
| "step": 6260 |
| }, |
| { |
| "epoch": 3.6623831775700935, |
| "grad_norm": 0.017150694504380226, |
| "learning_rate": 9.154497663551403e-06, |
| "loss": 0.0008, |
| "step": 6270 |
| }, |
| { |
| "epoch": 3.668224299065421, |
| "grad_norm": 0.01909276656806469, |
| "learning_rate": 9.16910046728972e-06, |
| "loss": 0.0007, |
| "step": 6280 |
| }, |
| { |
| "epoch": 3.6740654205607477, |
| "grad_norm": 0.01428013201802969, |
| "learning_rate": 9.183703271028039e-06, |
| "loss": 0.0007, |
| "step": 6290 |
| }, |
| { |
| "epoch": 3.6799065420560746, |
| "grad_norm": 0.014406004920601845, |
| "learning_rate": 9.198306074766356e-06, |
| "loss": 0.0006, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.685747663551402, |
| "grad_norm": 0.01366669312119484, |
| "learning_rate": 9.212908878504675e-06, |
| "loss": 0.0006, |
| "step": 6310 |
| }, |
| { |
| "epoch": 3.691588785046729, |
| "grad_norm": 0.012372363358736038, |
| "learning_rate": 9.227511682242992e-06, |
| "loss": 0.0006, |
| "step": 6320 |
| }, |
| { |
| "epoch": 3.697429906542056, |
| "grad_norm": 0.014415577985346317, |
| "learning_rate": 9.242114485981309e-06, |
| "loss": 0.0006, |
| "step": 6330 |
| }, |
| { |
| "epoch": 3.703271028037383, |
| "grad_norm": 0.014359201304614544, |
| "learning_rate": 9.256717289719628e-06, |
| "loss": 0.0005, |
| "step": 6340 |
| }, |
| { |
| "epoch": 3.7091121495327104, |
| "grad_norm": 0.012585778720676899, |
| "learning_rate": 9.271320093457945e-06, |
| "loss": 0.0005, |
| "step": 6350 |
| }, |
| { |
| "epoch": 3.7149532710280373, |
| "grad_norm": 0.010574690997600555, |
| "learning_rate": 9.285922897196262e-06, |
| "loss": 0.0004, |
| "step": 6360 |
| }, |
| { |
| "epoch": 3.7207943925233646, |
| "grad_norm": 0.011687861755490303, |
| "learning_rate": 9.30052570093458e-06, |
| "loss": 0.0004, |
| "step": 6370 |
| }, |
| { |
| "epoch": 3.7266355140186915, |
| "grad_norm": 0.010782795958220959, |
| "learning_rate": 9.315128504672897e-06, |
| "loss": 0.0004, |
| "step": 6380 |
| }, |
| { |
| "epoch": 3.7324766355140184, |
| "grad_norm": 0.00877504050731659, |
| "learning_rate": 9.329731308411216e-06, |
| "loss": 0.0004, |
| "step": 6390 |
| }, |
| { |
| "epoch": 3.7383177570093458, |
| "grad_norm": 0.011687839403748512, |
| "learning_rate": 9.344334112149533e-06, |
| "loss": 0.0004, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.744158878504673, |
| "grad_norm": 0.009787016548216343, |
| "learning_rate": 9.35893691588785e-06, |
| "loss": 0.0004, |
| "step": 6410 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.009898348711431026, |
| "learning_rate": 9.373539719626169e-06, |
| "loss": 0.0004, |
| "step": 6420 |
| }, |
| { |
| "epoch": 3.755841121495327, |
| "grad_norm": 0.008907527662813663, |
| "learning_rate": 9.388142523364486e-06, |
| "loss": 0.0003, |
| "step": 6430 |
| }, |
| { |
| "epoch": 3.7616822429906542, |
| "grad_norm": 0.008017996326088905, |
| "learning_rate": 9.402745327102805e-06, |
| "loss": 0.0004, |
| "step": 6440 |
| }, |
| { |
| "epoch": 3.7675233644859816, |
| "grad_norm": 0.008717546239495277, |
| "learning_rate": 9.417348130841122e-06, |
| "loss": 0.0003, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.7733644859813085, |
| "grad_norm": 0.008758803829550743, |
| "learning_rate": 9.43195093457944e-06, |
| "loss": 0.0003, |
| "step": 6460 |
| }, |
| { |
| "epoch": 3.7792056074766354, |
| "grad_norm": 0.008630459196865559, |
| "learning_rate": 9.446553738317758e-06, |
| "loss": 0.0492, |
| "step": 6470 |
| }, |
| { |
| "epoch": 3.7850467289719627, |
| "grad_norm": 0.012145120650529861, |
| "learning_rate": 9.461156542056076e-06, |
| "loss": 0.0004, |
| "step": 6480 |
| }, |
| { |
| "epoch": 3.7908878504672896, |
| "grad_norm": 0.012704220600426197, |
| "learning_rate": 9.475759345794393e-06, |
| "loss": 0.0005, |
| "step": 6490 |
| }, |
| { |
| "epoch": 3.796728971962617, |
| "grad_norm": 0.012053780257701874, |
| "learning_rate": 9.490362149532712e-06, |
| "loss": 0.0005, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.802570093457944, |
| "grad_norm": 0.010433944873511791, |
| "learning_rate": 9.50496495327103e-06, |
| "loss": 0.0004, |
| "step": 6510 |
| }, |
| { |
| "epoch": 3.808411214953271, |
| "grad_norm": 0.015743877738714218, |
| "learning_rate": 9.519567757009346e-06, |
| "loss": 0.0461, |
| "step": 6520 |
| }, |
| { |
| "epoch": 3.814252336448598, |
| "grad_norm": 0.01906657963991165, |
| "learning_rate": 9.534170560747665e-06, |
| "loss": 0.0007, |
| "step": 6530 |
| }, |
| { |
| "epoch": 3.8200934579439254, |
| "grad_norm": 0.01994435489177704, |
| "learning_rate": 9.548773364485982e-06, |
| "loss": 0.0414, |
| "step": 6540 |
| }, |
| { |
| "epoch": 3.8259345794392523, |
| "grad_norm": 0.02100527472794056, |
| "learning_rate": 9.563376168224299e-06, |
| "loss": 0.2299, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.831775700934579, |
| "grad_norm": 0.0396944135427475, |
| "learning_rate": 9.577978971962618e-06, |
| "loss": 0.0558, |
| "step": 6560 |
| }, |
| { |
| "epoch": 3.8376168224299065, |
| "grad_norm": 0.02828989550471306, |
| "learning_rate": 9.592581775700935e-06, |
| "loss": 0.0014, |
| "step": 6570 |
| }, |
| { |
| "epoch": 3.843457943925234, |
| "grad_norm": 0.02526008151471615, |
| "learning_rate": 9.607184579439252e-06, |
| "loss": 0.0012, |
| "step": 6580 |
| }, |
| { |
| "epoch": 3.8492990654205608, |
| "grad_norm": 0.02473423443734646, |
| "learning_rate": 9.62178738317757e-06, |
| "loss": 0.0373, |
| "step": 6590 |
| }, |
| { |
| "epoch": 3.8551401869158877, |
| "grad_norm": 0.034433163702487946, |
| "learning_rate": 9.636390186915888e-06, |
| "loss": 0.0013, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.860981308411215, |
| "grad_norm": 0.021160632371902466, |
| "learning_rate": 9.650992990654207e-06, |
| "loss": 0.049, |
| "step": 6610 |
| }, |
| { |
| "epoch": 3.866822429906542, |
| "grad_norm": 0.025369103997945786, |
| "learning_rate": 9.665595794392524e-06, |
| "loss": 0.001, |
| "step": 6620 |
| }, |
| { |
| "epoch": 3.8726635514018692, |
| "grad_norm": 0.02345307357609272, |
| "learning_rate": 9.680198598130842e-06, |
| "loss": 0.0011, |
| "step": 6630 |
| }, |
| { |
| "epoch": 3.878504672897196, |
| "grad_norm": 0.03416465222835541, |
| "learning_rate": 9.69480140186916e-06, |
| "loss": 0.0447, |
| "step": 6640 |
| }, |
| { |
| "epoch": 3.8843457943925235, |
| "grad_norm": 0.023318510502576828, |
| "learning_rate": 9.709404205607478e-06, |
| "loss": 0.0014, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.8901869158878504, |
| "grad_norm": 0.024326141923666, |
| "learning_rate": 9.724007009345795e-06, |
| "loss": 0.0011, |
| "step": 6660 |
| }, |
| { |
| "epoch": 3.8960280373831777, |
| "grad_norm": 0.021615874022245407, |
| "learning_rate": 9.738609813084114e-06, |
| "loss": 0.0009, |
| "step": 6670 |
| }, |
| { |
| "epoch": 3.9018691588785046, |
| "grad_norm": 0.01807982474565506, |
| "learning_rate": 9.753212616822431e-06, |
| "loss": 0.0008, |
| "step": 6680 |
| }, |
| { |
| "epoch": 3.9077102803738315, |
| "grad_norm": 0.01847692020237446, |
| "learning_rate": 9.76781542056075e-06, |
| "loss": 0.0446, |
| "step": 6690 |
| }, |
| { |
| "epoch": 3.913551401869159, |
| "grad_norm": 0.022509966045618057, |
| "learning_rate": 9.782418224299067e-06, |
| "loss": 0.001, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.919392523364486, |
| "grad_norm": 0.027468325570225716, |
| "learning_rate": 9.797021028037384e-06, |
| "loss": 0.001, |
| "step": 6710 |
| }, |
| { |
| "epoch": 3.925233644859813, |
| "grad_norm": 0.01959499530494213, |
| "learning_rate": 9.811623831775703e-06, |
| "loss": 0.0008, |
| "step": 6720 |
| }, |
| { |
| "epoch": 3.93107476635514, |
| "grad_norm": 0.015794578939676285, |
| "learning_rate": 9.82622663551402e-06, |
| "loss": 0.0008, |
| "step": 6730 |
| }, |
| { |
| "epoch": 3.9369158878504673, |
| "grad_norm": 0.016814829781651497, |
| "learning_rate": 9.840829439252337e-06, |
| "loss": 0.0007, |
| "step": 6740 |
| }, |
| { |
| "epoch": 3.9427570093457946, |
| "grad_norm": 0.01603136584162712, |
| "learning_rate": 9.855432242990655e-06, |
| "loss": 0.0006, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.9485981308411215, |
| "grad_norm": 0.01432070042937994, |
| "learning_rate": 9.870035046728972e-06, |
| "loss": 0.0006, |
| "step": 6760 |
| }, |
| { |
| "epoch": 3.9544392523364484, |
| "grad_norm": 0.014329387806355953, |
| "learning_rate": 9.88463785046729e-06, |
| "loss": 0.0005, |
| "step": 6770 |
| }, |
| { |
| "epoch": 3.960280373831776, |
| "grad_norm": 0.012948929332196712, |
| "learning_rate": 9.899240654205608e-06, |
| "loss": 0.0005, |
| "step": 6780 |
| }, |
| { |
| "epoch": 3.9661214953271027, |
| "grad_norm": 0.012632016092538834, |
| "learning_rate": 9.913843457943925e-06, |
| "loss": 0.0005, |
| "step": 6790 |
| }, |
| { |
| "epoch": 3.97196261682243, |
| "grad_norm": 0.010846257209777832, |
| "learning_rate": 9.928446261682244e-06, |
| "loss": 0.0004, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.977803738317757, |
| "grad_norm": 0.01142601016908884, |
| "learning_rate": 9.943049065420561e-06, |
| "loss": 0.0004, |
| "step": 6810 |
| }, |
| { |
| "epoch": 3.9836448598130842, |
| "grad_norm": 0.06392688304185867, |
| "learning_rate": 9.95765186915888e-06, |
| "loss": 0.1766, |
| "step": 6820 |
| }, |
| { |
| "epoch": 3.989485981308411, |
| "grad_norm": 0.08231887221336365, |
| "learning_rate": 9.972254672897197e-06, |
| "loss": 0.0056, |
| "step": 6830 |
| }, |
| { |
| "epoch": 3.9953271028037385, |
| "grad_norm": 0.03441477566957474, |
| "learning_rate": 9.986857476635516e-06, |
| "loss": 0.003, |
| "step": 6840 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_f1": 0.9986386325820602, |
| "eval_fbeta": 0.9978235099982873, |
| "eval_loss": 0.010695425793528557, |
| "eval_precision": 0.9972809667673715, |
| "eval_recall": 1.0, |
| "eval_runtime": 173.1568, |
| "eval_samples_per_second": 67.783, |
| "eval_steps_per_second": 8.478, |
| "step": 6848 |
| }, |
| { |
| "epoch": 4.001168224299065, |
| "grad_norm": 0.04184762015938759, |
| "learning_rate": 9.999634929906544e-06, |
| "loss": 0.002, |
| "step": 6850 |
| }, |
| { |
| "epoch": 4.007009345794392, |
| "grad_norm": 0.027074744924902916, |
| "learning_rate": 9.995984228971964e-06, |
| "loss": 0.0015, |
| "step": 6860 |
| }, |
| { |
| "epoch": 4.01285046728972, |
| "grad_norm": 0.02378828078508377, |
| "learning_rate": 9.992333528037384e-06, |
| "loss": 0.0012, |
| "step": 6870 |
| }, |
| { |
| "epoch": 4.018691588785047, |
| "grad_norm": 0.01926625706255436, |
| "learning_rate": 9.988682827102805e-06, |
| "loss": 0.0009, |
| "step": 6880 |
| }, |
| { |
| "epoch": 4.024532710280374, |
| "grad_norm": 0.03264503926038742, |
| "learning_rate": 9.985032126168225e-06, |
| "loss": 0.0425, |
| "step": 6890 |
| }, |
| { |
| "epoch": 4.030373831775701, |
| "grad_norm": 0.022108478471636772, |
| "learning_rate": 9.981381425233645e-06, |
| "loss": 0.0012, |
| "step": 6900 |
| }, |
| { |
| "epoch": 4.036214953271028, |
| "grad_norm": 0.036377232521772385, |
| "learning_rate": 9.977730724299066e-06, |
| "loss": 0.0404, |
| "step": 6910 |
| }, |
| { |
| "epoch": 4.042056074766355, |
| "grad_norm": 0.0229843407869339, |
| "learning_rate": 9.974080023364486e-06, |
| "loss": 0.0014, |
| "step": 6920 |
| }, |
| { |
| "epoch": 4.047897196261682, |
| "grad_norm": 0.023045457899570465, |
| "learning_rate": 9.970429322429908e-06, |
| "loss": 0.0015, |
| "step": 6930 |
| }, |
| { |
| "epoch": 4.053738317757009, |
| "grad_norm": 0.03223934397101402, |
| "learning_rate": 9.966778621495328e-06, |
| "loss": 0.0403, |
| "step": 6940 |
| }, |
| { |
| "epoch": 4.059579439252336, |
| "grad_norm": 0.022895364090800285, |
| "learning_rate": 9.963127920560749e-06, |
| "loss": 0.0015, |
| "step": 6950 |
| }, |
| { |
| "epoch": 4.065420560747664, |
| "grad_norm": 0.04552144557237625, |
| "learning_rate": 9.959477219626169e-06, |
| "loss": 0.0388, |
| "step": 6960 |
| }, |
| { |
| "epoch": 4.071261682242991, |
| "grad_norm": 0.04091038182377815, |
| "learning_rate": 9.95582651869159e-06, |
| "loss": 0.0021, |
| "step": 6970 |
| }, |
| { |
| "epoch": 4.077102803738318, |
| "grad_norm": 0.027029208838939667, |
| "learning_rate": 9.952175817757011e-06, |
| "loss": 0.0019, |
| "step": 6980 |
| }, |
| { |
| "epoch": 4.082943925233645, |
| "grad_norm": 0.027454374358057976, |
| "learning_rate": 9.94852511682243e-06, |
| "loss": 0.0016, |
| "step": 6990 |
| }, |
| { |
| "epoch": 4.088785046728972, |
| "grad_norm": 0.031487565487623215, |
| "learning_rate": 9.94487441588785e-06, |
| "loss": 0.04, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.094626168224299, |
| "grad_norm": 0.03998126834630966, |
| "learning_rate": 9.941223714953272e-06, |
| "loss": 0.0014, |
| "step": 7010 |
| }, |
| { |
| "epoch": 4.100467289719626, |
| "grad_norm": 0.050244223326444626, |
| "learning_rate": 9.937573014018692e-06, |
| "loss": 0.0747, |
| "step": 7020 |
| }, |
| { |
| "epoch": 4.106308411214953, |
| "grad_norm": 0.05759045109152794, |
| "learning_rate": 9.933922313084113e-06, |
| "loss": 0.003, |
| "step": 7030 |
| }, |
| { |
| "epoch": 4.11214953271028, |
| "grad_norm": 0.0511946938931942, |
| "learning_rate": 9.930271612149533e-06, |
| "loss": 0.0033, |
| "step": 7040 |
| }, |
| { |
| "epoch": 4.117990654205608, |
| "grad_norm": 0.03305158019065857, |
| "learning_rate": 9.926620911214953e-06, |
| "loss": 0.0024, |
| "step": 7050 |
| }, |
| { |
| "epoch": 4.123831775700935, |
| "grad_norm": 0.026132995262742043, |
| "learning_rate": 9.922970210280375e-06, |
| "loss": 0.0018, |
| "step": 7060 |
| }, |
| { |
| "epoch": 4.1296728971962615, |
| "grad_norm": 0.02357422560453415, |
| "learning_rate": 9.919319509345796e-06, |
| "loss": 0.0014, |
| "step": 7070 |
| }, |
| { |
| "epoch": 4.135514018691588, |
| "grad_norm": 0.02945876307785511, |
| "learning_rate": 9.915668808411216e-06, |
| "loss": 0.0407, |
| "step": 7080 |
| }, |
| { |
| "epoch": 4.141355140186916, |
| "grad_norm": 0.031209511682391167, |
| "learning_rate": 9.912018107476636e-06, |
| "loss": 0.0013, |
| "step": 7090 |
| }, |
| { |
| "epoch": 4.147196261682243, |
| "grad_norm": 0.03341691195964813, |
| "learning_rate": 9.908367406542057e-06, |
| "loss": 0.0397, |
| "step": 7100 |
| }, |
| { |
| "epoch": 4.15303738317757, |
| "grad_norm": 0.02726580575108528, |
| "learning_rate": 9.904716705607477e-06, |
| "loss": 0.0016, |
| "step": 7110 |
| }, |
| { |
| "epoch": 4.158878504672897, |
| "grad_norm": 0.0374341756105423, |
| "learning_rate": 9.901066004672897e-06, |
| "loss": 0.0016, |
| "step": 7120 |
| }, |
| { |
| "epoch": 4.164719626168225, |
| "grad_norm": 0.026886312291026115, |
| "learning_rate": 9.897415303738318e-06, |
| "loss": 0.0014, |
| "step": 7130 |
| }, |
| { |
| "epoch": 4.170560747663552, |
| "grad_norm": 0.03707856312394142, |
| "learning_rate": 9.89376460280374e-06, |
| "loss": 0.0405, |
| "step": 7140 |
| }, |
| { |
| "epoch": 4.1764018691588785, |
| "grad_norm": 0.02945527993142605, |
| "learning_rate": 9.89011390186916e-06, |
| "loss": 0.0014, |
| "step": 7150 |
| }, |
| { |
| "epoch": 4.182242990654205, |
| "grad_norm": 0.031244931742548943, |
| "learning_rate": 9.88646320093458e-06, |
| "loss": 0.0015, |
| "step": 7160 |
| }, |
| { |
| "epoch": 4.188084112149533, |
| "grad_norm": 0.0277140773832798, |
| "learning_rate": 9.8828125e-06, |
| "loss": 0.0013, |
| "step": 7170 |
| }, |
| { |
| "epoch": 4.19392523364486, |
| "grad_norm": 0.024385616183280945, |
| "learning_rate": 9.87916179906542e-06, |
| "loss": 0.0012, |
| "step": 7180 |
| }, |
| { |
| "epoch": 4.199766355140187, |
| "grad_norm": 0.02136746421456337, |
| "learning_rate": 9.875511098130843e-06, |
| "loss": 0.001, |
| "step": 7190 |
| }, |
| { |
| "epoch": 4.205607476635514, |
| "grad_norm": 0.025973528623580933, |
| "learning_rate": 9.871860397196263e-06, |
| "loss": 0.0415, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.211448598130841, |
| "grad_norm": 0.02830415591597557, |
| "learning_rate": 9.868209696261683e-06, |
| "loss": 0.0011, |
| "step": 7210 |
| }, |
| { |
| "epoch": 4.2172897196261685, |
| "grad_norm": 0.028491418808698654, |
| "learning_rate": 9.864558995327104e-06, |
| "loss": 0.0013, |
| "step": 7220 |
| }, |
| { |
| "epoch": 4.223130841121495, |
| "grad_norm": 0.025560962036252022, |
| "learning_rate": 9.860908294392524e-06, |
| "loss": 0.041, |
| "step": 7230 |
| }, |
| { |
| "epoch": 4.228971962616822, |
| "grad_norm": 0.042416248470544815, |
| "learning_rate": 9.857257593457944e-06, |
| "loss": 0.0403, |
| "step": 7240 |
| }, |
| { |
| "epoch": 4.234813084112149, |
| "grad_norm": 0.03840547055006027, |
| "learning_rate": 9.853606892523365e-06, |
| "loss": 0.0022, |
| "step": 7250 |
| }, |
| { |
| "epoch": 4.240654205607477, |
| "grad_norm": 0.04450295493006706, |
| "learning_rate": 9.849956191588785e-06, |
| "loss": 0.0021, |
| "step": 7260 |
| }, |
| { |
| "epoch": 4.246495327102804, |
| "grad_norm": 0.025487173348665237, |
| "learning_rate": 9.846305490654207e-06, |
| "loss": 0.0016, |
| "step": 7270 |
| }, |
| { |
| "epoch": 4.252336448598131, |
| "grad_norm": 0.03323301300406456, |
| "learning_rate": 9.842654789719627e-06, |
| "loss": 0.0392, |
| "step": 7280 |
| }, |
| { |
| "epoch": 4.258177570093458, |
| "grad_norm": 0.0345788300037384, |
| "learning_rate": 9.839004088785048e-06, |
| "loss": 0.0016, |
| "step": 7290 |
| }, |
| { |
| "epoch": 4.264018691588785, |
| "grad_norm": 0.02562621608376503, |
| "learning_rate": 9.835353387850468e-06, |
| "loss": 0.0016, |
| "step": 7300 |
| }, |
| { |
| "epoch": 4.269859813084112, |
| "grad_norm": 0.03218419477343559, |
| "learning_rate": 9.831702686915888e-06, |
| "loss": 0.0014, |
| "step": 7310 |
| }, |
| { |
| "epoch": 4.275700934579439, |
| "grad_norm": 0.017818788066506386, |
| "learning_rate": 9.82805198598131e-06, |
| "loss": 0.0011, |
| "step": 7320 |
| }, |
| { |
| "epoch": 4.281542056074766, |
| "grad_norm": 0.027568548917770386, |
| "learning_rate": 9.824401285046729e-06, |
| "loss": 0.0408, |
| "step": 7330 |
| }, |
| { |
| "epoch": 4.287383177570094, |
| "grad_norm": 0.03140578046441078, |
| "learning_rate": 9.820750584112151e-06, |
| "loss": 0.0012, |
| "step": 7340 |
| }, |
| { |
| "epoch": 4.293224299065421, |
| "grad_norm": 0.017698241397738457, |
| "learning_rate": 9.817099883177571e-06, |
| "loss": 0.0012, |
| "step": 7350 |
| }, |
| { |
| "epoch": 4.299065420560748, |
| "grad_norm": 0.022705256938934326, |
| "learning_rate": 9.813449182242992e-06, |
| "loss": 0.0011, |
| "step": 7360 |
| }, |
| { |
| "epoch": 4.304906542056075, |
| "grad_norm": 0.028253547847270966, |
| "learning_rate": 9.809798481308412e-06, |
| "loss": 0.0405, |
| "step": 7370 |
| }, |
| { |
| "epoch": 4.3107476635514015, |
| "grad_norm": 0.03405269980430603, |
| "learning_rate": 9.806147780373832e-06, |
| "loss": 0.0014, |
| "step": 7380 |
| }, |
| { |
| "epoch": 4.316588785046729, |
| "grad_norm": 0.030191723257303238, |
| "learning_rate": 9.802497079439252e-06, |
| "loss": 0.0014, |
| "step": 7390 |
| }, |
| { |
| "epoch": 4.322429906542056, |
| "grad_norm": 0.01599167101085186, |
| "learning_rate": 9.798846378504675e-06, |
| "loss": 0.0011, |
| "step": 7400 |
| }, |
| { |
| "epoch": 4.328271028037383, |
| "grad_norm": 0.0164847020059824, |
| "learning_rate": 9.795195677570095e-06, |
| "loss": 0.001, |
| "step": 7410 |
| }, |
| { |
| "epoch": 4.33411214953271, |
| "grad_norm": 0.021135691553354263, |
| "learning_rate": 9.791544976635515e-06, |
| "loss": 0.0009, |
| "step": 7420 |
| }, |
| { |
| "epoch": 4.339953271028038, |
| "grad_norm": 0.021334068849682808, |
| "learning_rate": 9.787894275700935e-06, |
| "loss": 0.0008, |
| "step": 7430 |
| }, |
| { |
| "epoch": 4.345794392523365, |
| "grad_norm": 0.012419966980814934, |
| "learning_rate": 9.784243574766356e-06, |
| "loss": 0.0007, |
| "step": 7440 |
| }, |
| { |
| "epoch": 4.3516355140186915, |
| "grad_norm": 0.011540565639734268, |
| "learning_rate": 9.780592873831776e-06, |
| "loss": 0.0006, |
| "step": 7450 |
| }, |
| { |
| "epoch": 4.357476635514018, |
| "grad_norm": 0.012541793286800385, |
| "learning_rate": 9.776942172897196e-06, |
| "loss": 0.0006, |
| "step": 7460 |
| }, |
| { |
| "epoch": 4.363317757009346, |
| "grad_norm": 0.013756770640611649, |
| "learning_rate": 9.773291471962617e-06, |
| "loss": 0.0005, |
| "step": 7470 |
| }, |
| { |
| "epoch": 4.369158878504673, |
| "grad_norm": 0.010488706640899181, |
| "learning_rate": 9.769640771028039e-06, |
| "loss": 0.0005, |
| "step": 7480 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 0.019985994324088097, |
| "learning_rate": 9.765990070093459e-06, |
| "loss": 0.0884, |
| "step": 7490 |
| }, |
| { |
| "epoch": 4.380841121495327, |
| "grad_norm": 0.01809321902692318, |
| "learning_rate": 9.76233936915888e-06, |
| "loss": 0.001, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.386682242990654, |
| "grad_norm": 0.025336505845189095, |
| "learning_rate": 9.7586886682243e-06, |
| "loss": 0.0011, |
| "step": 7510 |
| }, |
| { |
| "epoch": 4.392523364485982, |
| "grad_norm": 0.022142156958580017, |
| "learning_rate": 9.75503796728972e-06, |
| "loss": 0.0011, |
| "step": 7520 |
| }, |
| { |
| "epoch": 4.3983644859813085, |
| "grad_norm": 0.028588872402906418, |
| "learning_rate": 9.751387266355142e-06, |
| "loss": 0.001, |
| "step": 7530 |
| }, |
| { |
| "epoch": 4.404205607476635, |
| "grad_norm": 0.01660712994635105, |
| "learning_rate": 9.74773656542056e-06, |
| "loss": 0.0009, |
| "step": 7540 |
| }, |
| { |
| "epoch": 4.410046728971962, |
| "grad_norm": 0.020251592621207237, |
| "learning_rate": 9.744085864485983e-06, |
| "loss": 0.0008, |
| "step": 7550 |
| }, |
| { |
| "epoch": 4.41588785046729, |
| "grad_norm": 0.015099707059562206, |
| "learning_rate": 9.740435163551403e-06, |
| "loss": 0.0007, |
| "step": 7560 |
| }, |
| { |
| "epoch": 4.421728971962617, |
| "grad_norm": 0.014263300225138664, |
| "learning_rate": 9.736784462616823e-06, |
| "loss": 0.0006, |
| "step": 7570 |
| }, |
| { |
| "epoch": 4.427570093457944, |
| "grad_norm": 0.015971451997756958, |
| "learning_rate": 9.733133761682244e-06, |
| "loss": 0.0006, |
| "step": 7580 |
| }, |
| { |
| "epoch": 4.433411214953271, |
| "grad_norm": 0.018041100353002548, |
| "learning_rate": 9.729483060747664e-06, |
| "loss": 0.0433, |
| "step": 7590 |
| }, |
| { |
| "epoch": 4.4392523364485985, |
| "grad_norm": 0.02004099264740944, |
| "learning_rate": 9.725832359813084e-06, |
| "loss": 0.0008, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.445093457943925, |
| "grad_norm": 0.016719138249754906, |
| "learning_rate": 9.722181658878506e-06, |
| "loss": 0.0007, |
| "step": 7610 |
| }, |
| { |
| "epoch": 4.450934579439252, |
| "grad_norm": 0.017085473984479904, |
| "learning_rate": 9.718530957943926e-06, |
| "loss": 0.0008, |
| "step": 7620 |
| }, |
| { |
| "epoch": 4.456775700934579, |
| "grad_norm": 0.014245687052607536, |
| "learning_rate": 9.714880257009347e-06, |
| "loss": 0.0007, |
| "step": 7630 |
| }, |
| { |
| "epoch": 4.462616822429906, |
| "grad_norm": 0.011969489976763725, |
| "learning_rate": 9.711229556074767e-06, |
| "loss": 0.0006, |
| "step": 7640 |
| }, |
| { |
| "epoch": 4.468457943925234, |
| "grad_norm": 0.012137607671320438, |
| "learning_rate": 9.707578855140187e-06, |
| "loss": 0.0006, |
| "step": 7650 |
| }, |
| { |
| "epoch": 4.474299065420561, |
| "grad_norm": 0.012511523440480232, |
| "learning_rate": 9.703928154205608e-06, |
| "loss": 0.0005, |
| "step": 7660 |
| }, |
| { |
| "epoch": 4.480140186915888, |
| "grad_norm": 0.01686168648302555, |
| "learning_rate": 9.700277453271028e-06, |
| "loss": 0.0449, |
| "step": 7670 |
| }, |
| { |
| "epoch": 4.485981308411215, |
| "grad_norm": 0.01611410826444626, |
| "learning_rate": 9.69662675233645e-06, |
| "loss": 0.0007, |
| "step": 7680 |
| }, |
| { |
| "epoch": 4.491822429906542, |
| "grad_norm": 0.017773732542991638, |
| "learning_rate": 9.69297605140187e-06, |
| "loss": 0.0008, |
| "step": 7690 |
| }, |
| { |
| "epoch": 4.497663551401869, |
| "grad_norm": 0.01379080768674612, |
| "learning_rate": 9.68932535046729e-06, |
| "loss": 0.0007, |
| "step": 7700 |
| }, |
| { |
| "epoch": 4.503504672897196, |
| "grad_norm": 0.011493315920233727, |
| "learning_rate": 9.685674649532711e-06, |
| "loss": 0.0006, |
| "step": 7710 |
| }, |
| { |
| "epoch": 4.509345794392523, |
| "grad_norm": 0.012840710580348969, |
| "learning_rate": 9.682023948598131e-06, |
| "loss": 0.0006, |
| "step": 7720 |
| }, |
| { |
| "epoch": 4.515186915887851, |
| "grad_norm": 0.0128638232126832, |
| "learning_rate": 9.678373247663552e-06, |
| "loss": 0.0005, |
| "step": 7730 |
| }, |
| { |
| "epoch": 4.521028037383178, |
| "grad_norm": 0.013440934009850025, |
| "learning_rate": 9.674722546728974e-06, |
| "loss": 0.0005, |
| "step": 7740 |
| }, |
| { |
| "epoch": 4.526869158878505, |
| "grad_norm": 0.01098957471549511, |
| "learning_rate": 9.671071845794392e-06, |
| "loss": 0.0005, |
| "step": 7750 |
| }, |
| { |
| "epoch": 4.5327102803738315, |
| "grad_norm": 0.009183285757899284, |
| "learning_rate": 9.667421144859814e-06, |
| "loss": 0.0004, |
| "step": 7760 |
| }, |
| { |
| "epoch": 4.538551401869158, |
| "grad_norm": 0.008257759734988213, |
| "learning_rate": 9.663770443925235e-06, |
| "loss": 0.0004, |
| "step": 7770 |
| }, |
| { |
| "epoch": 4.544392523364486, |
| "grad_norm": 0.008908426389098167, |
| "learning_rate": 9.660119742990655e-06, |
| "loss": 0.0004, |
| "step": 7780 |
| }, |
| { |
| "epoch": 4.550233644859813, |
| "grad_norm": 0.011919341050088406, |
| "learning_rate": 9.656469042056075e-06, |
| "loss": 0.0471, |
| "step": 7790 |
| }, |
| { |
| "epoch": 4.55607476635514, |
| "grad_norm": 0.015091456472873688, |
| "learning_rate": 9.652818341121496e-06, |
| "loss": 0.0006, |
| "step": 7800 |
| }, |
| { |
| "epoch": 4.561915887850468, |
| "grad_norm": 0.013059835880994797, |
| "learning_rate": 9.649167640186918e-06, |
| "loss": 0.0006, |
| "step": 7810 |
| }, |
| { |
| "epoch": 4.567757009345795, |
| "grad_norm": 0.011410195380449295, |
| "learning_rate": 9.645516939252338e-06, |
| "loss": 0.0005, |
| "step": 7820 |
| }, |
| { |
| "epoch": 4.5735981308411215, |
| "grad_norm": 0.010844654403626919, |
| "learning_rate": 9.641866238317758e-06, |
| "loss": 0.0005, |
| "step": 7830 |
| }, |
| { |
| "epoch": 4.579439252336448, |
| "grad_norm": 0.0135049344971776, |
| "learning_rate": 9.638215537383178e-06, |
| "loss": 0.0878, |
| "step": 7840 |
| }, |
| { |
| "epoch": 4.585280373831775, |
| "grad_norm": 0.01884039305150509, |
| "learning_rate": 9.634564836448599e-06, |
| "loss": 0.0007, |
| "step": 7850 |
| }, |
| { |
| "epoch": 4.591121495327103, |
| "grad_norm": 0.0158846452832222, |
| "learning_rate": 9.630914135514019e-06, |
| "loss": 0.0008, |
| "step": 7860 |
| }, |
| { |
| "epoch": 4.59696261682243, |
| "grad_norm": 0.018315622583031654, |
| "learning_rate": 9.62726343457944e-06, |
| "loss": 0.0007, |
| "step": 7870 |
| }, |
| { |
| "epoch": 4.602803738317757, |
| "grad_norm": 0.015546320006251335, |
| "learning_rate": 9.62361273364486e-06, |
| "loss": 0.0006, |
| "step": 7880 |
| }, |
| { |
| "epoch": 4.608644859813084, |
| "grad_norm": 0.012519214302301407, |
| "learning_rate": 9.619962032710282e-06, |
| "loss": 0.0006, |
| "step": 7890 |
| }, |
| { |
| "epoch": 4.614485981308412, |
| "grad_norm": 0.012493118643760681, |
| "learning_rate": 9.616311331775702e-06, |
| "loss": 0.0005, |
| "step": 7900 |
| }, |
| { |
| "epoch": 4.6203271028037385, |
| "grad_norm": 0.012429878115653992, |
| "learning_rate": 9.612660630841122e-06, |
| "loss": 0.0424, |
| "step": 7910 |
| }, |
| { |
| "epoch": 4.626168224299065, |
| "grad_norm": 0.03473297879099846, |
| "learning_rate": 9.609009929906543e-06, |
| "loss": 0.0862, |
| "step": 7920 |
| }, |
| { |
| "epoch": 4.632009345794392, |
| "grad_norm": 0.02223706804215908, |
| "learning_rate": 9.605359228971963e-06, |
| "loss": 0.0012, |
| "step": 7930 |
| }, |
| { |
| "epoch": 4.63785046728972, |
| "grad_norm": 1.4426337480545044, |
| "learning_rate": 9.601708528037385e-06, |
| "loss": 0.0434, |
| "step": 7940 |
| }, |
| { |
| "epoch": 4.643691588785047, |
| "grad_norm": 0.027474477887153625, |
| "learning_rate": 9.598057827102805e-06, |
| "loss": 0.001, |
| "step": 7950 |
| }, |
| { |
| "epoch": 4.649532710280374, |
| "grad_norm": 1.3533803224563599, |
| "learning_rate": 9.594407126168226e-06, |
| "loss": 0.0405, |
| "step": 7960 |
| }, |
| { |
| "epoch": 4.655373831775701, |
| "grad_norm": 0.028552265837788582, |
| "learning_rate": 9.590756425233646e-06, |
| "loss": 0.0014, |
| "step": 7970 |
| }, |
| { |
| "epoch": 4.661214953271028, |
| "grad_norm": 0.018535079434514046, |
| "learning_rate": 9.587105724299066e-06, |
| "loss": 0.0015, |
| "step": 7980 |
| }, |
| { |
| "epoch": 4.667056074766355, |
| "grad_norm": 0.027407588437199593, |
| "learning_rate": 9.583455023364487e-06, |
| "loss": 0.0015, |
| "step": 7990 |
| }, |
| { |
| "epoch": 4.672897196261682, |
| "grad_norm": 0.020983988419175148, |
| "learning_rate": 9.579804322429907e-06, |
| "loss": 0.0011, |
| "step": 8000 |
| }, |
| { |
| "epoch": 4.678738317757009, |
| "grad_norm": 1.3812689781188965, |
| "learning_rate": 9.576153621495327e-06, |
| "loss": 0.0408, |
| "step": 8010 |
| }, |
| { |
| "epoch": 4.684579439252336, |
| "grad_norm": 0.02850353717803955, |
| "learning_rate": 9.57250292056075e-06, |
| "loss": 0.0011, |
| "step": 8020 |
| }, |
| { |
| "epoch": 4.690420560747664, |
| "grad_norm": 0.021959487348794937, |
| "learning_rate": 9.56885221962617e-06, |
| "loss": 0.0012, |
| "step": 8030 |
| }, |
| { |
| "epoch": 4.696261682242991, |
| "grad_norm": 3.579338788986206, |
| "learning_rate": 9.56520151869159e-06, |
| "loss": 0.074, |
| "step": 8040 |
| }, |
| { |
| "epoch": 4.702102803738318, |
| "grad_norm": 0.05354708805680275, |
| "learning_rate": 9.56155081775701e-06, |
| "loss": 0.0028, |
| "step": 8050 |
| }, |
| { |
| "epoch": 4.707943925233645, |
| "grad_norm": 0.030053434893488884, |
| "learning_rate": 9.55790011682243e-06, |
| "loss": 0.0018, |
| "step": 8060 |
| }, |
| { |
| "epoch": 4.713785046728972, |
| "grad_norm": 1.3448758125305176, |
| "learning_rate": 9.55424941588785e-06, |
| "loss": 0.0804, |
| "step": 8070 |
| }, |
| { |
| "epoch": 4.719626168224299, |
| "grad_norm": 0.07039051502943039, |
| "learning_rate": 9.550598714953273e-06, |
| "loss": 0.0021, |
| "step": 8080 |
| }, |
| { |
| "epoch": 4.725467289719626, |
| "grad_norm": 0.04373152554035187, |
| "learning_rate": 9.546948014018691e-06, |
| "loss": 0.0028, |
| "step": 8090 |
| }, |
| { |
| "epoch": 4.731308411214953, |
| "grad_norm": 0.04728665202856064, |
| "learning_rate": 9.543297313084113e-06, |
| "loss": 0.0031, |
| "step": 8100 |
| }, |
| { |
| "epoch": 4.73714953271028, |
| "grad_norm": 0.04679826647043228, |
| "learning_rate": 9.539646612149534e-06, |
| "loss": 0.0928, |
| "step": 8110 |
| }, |
| { |
| "epoch": 4.742990654205608, |
| "grad_norm": 0.0427640900015831, |
| "learning_rate": 9.535995911214954e-06, |
| "loss": 0.0037, |
| "step": 8120 |
| }, |
| { |
| "epoch": 4.748831775700935, |
| "grad_norm": 0.02820892632007599, |
| "learning_rate": 9.532345210280374e-06, |
| "loss": 0.0015, |
| "step": 8130 |
| }, |
| { |
| "epoch": 4.7546728971962615, |
| "grad_norm": 0.028863176703453064, |
| "learning_rate": 9.528694509345795e-06, |
| "loss": 0.0416, |
| "step": 8140 |
| }, |
| { |
| "epoch": 4.760514018691588, |
| "grad_norm": 0.030389975756406784, |
| "learning_rate": 9.525043808411217e-06, |
| "loss": 0.0014, |
| "step": 8150 |
| }, |
| { |
| "epoch": 4.766355140186916, |
| "grad_norm": 0.025430910289287567, |
| "learning_rate": 9.521393107476637e-06, |
| "loss": 0.0013, |
| "step": 8160 |
| }, |
| { |
| "epoch": 4.772196261682243, |
| "grad_norm": 0.02759852446615696, |
| "learning_rate": 9.517742406542057e-06, |
| "loss": 0.0012, |
| "step": 8170 |
| }, |
| { |
| "epoch": 4.77803738317757, |
| "grad_norm": 0.01693788357079029, |
| "learning_rate": 9.514091705607478e-06, |
| "loss": 0.0009, |
| "step": 8180 |
| }, |
| { |
| "epoch": 4.783878504672897, |
| "grad_norm": 0.019037263467907906, |
| "learning_rate": 9.510441004672898e-06, |
| "loss": 0.0008, |
| "step": 8190 |
| }, |
| { |
| "epoch": 4.789719626168225, |
| "grad_norm": 0.01847090944647789, |
| "learning_rate": 9.506790303738318e-06, |
| "loss": 0.0008, |
| "step": 8200 |
| }, |
| { |
| "epoch": 4.795560747663552, |
| "grad_norm": 0.014866204001009464, |
| "learning_rate": 9.503139602803739e-06, |
| "loss": 0.0007, |
| "step": 8210 |
| }, |
| { |
| "epoch": 4.8014018691588785, |
| "grad_norm": 0.017772305756807327, |
| "learning_rate": 9.499488901869159e-06, |
| "loss": 0.044, |
| "step": 8220 |
| }, |
| { |
| "epoch": 4.807242990654205, |
| "grad_norm": 0.018901938572525978, |
| "learning_rate": 9.495838200934581e-06, |
| "loss": 0.0008, |
| "step": 8230 |
| }, |
| { |
| "epoch": 4.813084112149532, |
| "grad_norm": 0.01904388889670372, |
| "learning_rate": 9.492187500000001e-06, |
| "loss": 0.001, |
| "step": 8240 |
| }, |
| { |
| "epoch": 4.81892523364486, |
| "grad_norm": 0.02215546742081642, |
| "learning_rate": 9.488536799065421e-06, |
| "loss": 0.0008, |
| "step": 8250 |
| }, |
| { |
| "epoch": 4.824766355140187, |
| "grad_norm": 0.0166354738175869, |
| "learning_rate": 9.484886098130842e-06, |
| "loss": 0.0008, |
| "step": 8260 |
| }, |
| { |
| "epoch": 4.830607476635514, |
| "grad_norm": 0.015143346972763538, |
| "learning_rate": 9.481235397196262e-06, |
| "loss": 0.0007, |
| "step": 8270 |
| }, |
| { |
| "epoch": 4.836448598130842, |
| "grad_norm": 0.010659987106919289, |
| "learning_rate": 9.477584696261684e-06, |
| "loss": 0.0006, |
| "step": 8280 |
| }, |
| { |
| "epoch": 4.8422897196261685, |
| "grad_norm": 0.01438753679394722, |
| "learning_rate": 9.473933995327104e-06, |
| "loss": 0.0006, |
| "step": 8290 |
| }, |
| { |
| "epoch": 4.848130841121495, |
| "grad_norm": 0.009508033283054829, |
| "learning_rate": 9.470283294392523e-06, |
| "loss": 0.0006, |
| "step": 8300 |
| }, |
| { |
| "epoch": 4.853971962616822, |
| "grad_norm": 0.01247571874409914, |
| "learning_rate": 9.466632593457945e-06, |
| "loss": 0.0005, |
| "step": 8310 |
| }, |
| { |
| "epoch": 4.859813084112149, |
| "grad_norm": 0.010293890722095966, |
| "learning_rate": 9.462981892523365e-06, |
| "loss": 0.0005, |
| "step": 8320 |
| }, |
| { |
| "epoch": 4.865654205607477, |
| "grad_norm": 0.011530600488185883, |
| "learning_rate": 9.459331191588786e-06, |
| "loss": 0.0004, |
| "step": 8330 |
| }, |
| { |
| "epoch": 4.871495327102804, |
| "grad_norm": 0.011948925442993641, |
| "learning_rate": 9.455680490654206e-06, |
| "loss": 0.0004, |
| "step": 8340 |
| }, |
| { |
| "epoch": 4.877336448598131, |
| "grad_norm": 0.00931254867464304, |
| "learning_rate": 9.452029789719626e-06, |
| "loss": 0.0004, |
| "step": 8350 |
| }, |
| { |
| "epoch": 4.883177570093458, |
| "grad_norm": 0.008403575979173183, |
| "learning_rate": 9.448379088785048e-06, |
| "loss": 0.0004, |
| "step": 8360 |
| }, |
| { |
| "epoch": 4.8890186915887845, |
| "grad_norm": 0.011545160785317421, |
| "learning_rate": 9.444728387850469e-06, |
| "loss": 0.0478, |
| "step": 8370 |
| }, |
| { |
| "epoch": 4.894859813084112, |
| "grad_norm": 0.017456984147429466, |
| "learning_rate": 9.441077686915889e-06, |
| "loss": 0.0006, |
| "step": 8380 |
| }, |
| { |
| "epoch": 4.900700934579439, |
| "grad_norm": 0.010550367645919323, |
| "learning_rate": 9.43742698598131e-06, |
| "loss": 0.0006, |
| "step": 8390 |
| }, |
| { |
| "epoch": 4.906542056074766, |
| "grad_norm": 1.5026402473449707, |
| "learning_rate": 9.43377628504673e-06, |
| "loss": 0.0452, |
| "step": 8400 |
| }, |
| { |
| "epoch": 4.912383177570094, |
| "grad_norm": 0.015959005802869797, |
| "learning_rate": 9.430125584112152e-06, |
| "loss": 0.0006, |
| "step": 8410 |
| }, |
| { |
| "epoch": 4.918224299065421, |
| "grad_norm": 0.01576901040971279, |
| "learning_rate": 9.42647488317757e-06, |
| "loss": 0.0008, |
| "step": 8420 |
| }, |
| { |
| "epoch": 4.924065420560748, |
| "grad_norm": 0.012024643830955029, |
| "learning_rate": 9.42282418224299e-06, |
| "loss": 0.0008, |
| "step": 8430 |
| }, |
| { |
| "epoch": 4.929906542056075, |
| "grad_norm": 0.017918461933732033, |
| "learning_rate": 9.419173481308413e-06, |
| "loss": 0.0007, |
| "step": 8440 |
| }, |
| { |
| "epoch": 4.9357476635514015, |
| "grad_norm": 0.018606670200824738, |
| "learning_rate": 9.415522780373833e-06, |
| "loss": 0.0006, |
| "step": 8450 |
| }, |
| { |
| "epoch": 4.941588785046729, |
| "grad_norm": 0.01964343525469303, |
| "learning_rate": 9.411872079439253e-06, |
| "loss": 0.0427, |
| "step": 8460 |
| }, |
| { |
| "epoch": 4.947429906542056, |
| "grad_norm": 0.019112348556518555, |
| "learning_rate": 9.408221378504673e-06, |
| "loss": 0.0009, |
| "step": 8470 |
| }, |
| { |
| "epoch": 4.953271028037383, |
| "grad_norm": 0.017472274601459503, |
| "learning_rate": 9.404570677570094e-06, |
| "loss": 0.0009, |
| "step": 8480 |
| }, |
| { |
| "epoch": 4.95911214953271, |
| "grad_norm": 0.018791217356920242, |
| "learning_rate": 9.400919976635516e-06, |
| "loss": 0.0009, |
| "step": 8490 |
| }, |
| { |
| "epoch": 4.964953271028038, |
| "grad_norm": 0.014025550335645676, |
| "learning_rate": 9.397269275700936e-06, |
| "loss": 0.0008, |
| "step": 8500 |
| }, |
| { |
| "epoch": 4.970794392523365, |
| "grad_norm": 0.018441924825310707, |
| "learning_rate": 9.393618574766355e-06, |
| "loss": 0.0434, |
| "step": 8510 |
| }, |
| { |
| "epoch": 4.9766355140186915, |
| "grad_norm": 0.02612805739045143, |
| "learning_rate": 9.389967873831777e-06, |
| "loss": 0.0009, |
| "step": 8520 |
| }, |
| { |
| "epoch": 4.982476635514018, |
| "grad_norm": 1.388852596282959, |
| "learning_rate": 9.386317172897197e-06, |
| "loss": 0.0395, |
| "step": 8530 |
| }, |
| { |
| "epoch": 4.988317757009346, |
| "grad_norm": 0.04369686543941498, |
| "learning_rate": 9.382666471962617e-06, |
| "loss": 0.0018, |
| "step": 8540 |
| }, |
| { |
| "epoch": 4.994158878504673, |
| "grad_norm": 0.019059184938669205, |
| "learning_rate": 9.379015771028038e-06, |
| "loss": 0.0016, |
| "step": 8550 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.017675094306468964, |
| "learning_rate": 9.375365070093458e-06, |
| "loss": 0.0011, |
| "step": 8560 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_f1": 0.9985628923682021, |
| "eval_fbeta": 0.997793015231967, |
| "eval_loss": 0.010423527099192142, |
| "eval_precision": 0.9972805559752228, |
| "eval_recall": 0.9998485307482581, |
| "eval_runtime": 171.2964, |
| "eval_samples_per_second": 68.519, |
| "eval_steps_per_second": 8.57, |
| "step": 8560 |
| }, |
| { |
| "epoch": 5.005841121495327, |
| "grad_norm": 0.020811092108488083, |
| "learning_rate": 9.37171436915888e-06, |
| "loss": 0.0009, |
| "step": 8570 |
| }, |
| { |
| "epoch": 5.011682242990654, |
| "grad_norm": 0.017634106799960136, |
| "learning_rate": 9.3680636682243e-06, |
| "loss": 0.0007, |
| "step": 8580 |
| }, |
| { |
| "epoch": 5.017523364485982, |
| "grad_norm": 0.01755143329501152, |
| "learning_rate": 9.36441296728972e-06, |
| "loss": 0.0007, |
| "step": 8590 |
| }, |
| { |
| "epoch": 5.0233644859813085, |
| "grad_norm": 0.013711643405258656, |
| "learning_rate": 9.360762266355141e-06, |
| "loss": 0.0006, |
| "step": 8600 |
| }, |
| { |
| "epoch": 5.029205607476635, |
| "grad_norm": 0.022771639749407768, |
| "learning_rate": 9.357111565420561e-06, |
| "loss": 0.0435, |
| "step": 8610 |
| }, |
| { |
| "epoch": 5.035046728971962, |
| "grad_norm": 0.023437688127160072, |
| "learning_rate": 9.353460864485983e-06, |
| "loss": 0.0009, |
| "step": 8620 |
| }, |
| { |
| "epoch": 5.04088785046729, |
| "grad_norm": 0.027155088260769844, |
| "learning_rate": 9.349810163551402e-06, |
| "loss": 0.0008, |
| "step": 8630 |
| }, |
| { |
| "epoch": 5.046728971962617, |
| "grad_norm": 0.02320449985563755, |
| "learning_rate": 9.346159462616822e-06, |
| "loss": 0.0393, |
| "step": 8640 |
| }, |
| { |
| "epoch": 5.052570093457944, |
| "grad_norm": 2.8932414054870605, |
| "learning_rate": 9.342508761682244e-06, |
| "loss": 0.0434, |
| "step": 8650 |
| }, |
| { |
| "epoch": 5.058411214953271, |
| "grad_norm": 0.044957537204027176, |
| "learning_rate": 9.338858060747665e-06, |
| "loss": 0.0014, |
| "step": 8660 |
| }, |
| { |
| "epoch": 5.0642523364485985, |
| "grad_norm": 0.024762434884905815, |
| "learning_rate": 9.335207359813085e-06, |
| "loss": 0.0014, |
| "step": 8670 |
| }, |
| { |
| "epoch": 5.070093457943925, |
| "grad_norm": 0.017925061285495758, |
| "learning_rate": 9.331556658878505e-06, |
| "loss": 0.0008, |
| "step": 8680 |
| }, |
| { |
| "epoch": 5.075934579439252, |
| "grad_norm": 0.01599021814763546, |
| "learning_rate": 9.327905957943925e-06, |
| "loss": 0.0007, |
| "step": 8690 |
| }, |
| { |
| "epoch": 5.081775700934579, |
| "grad_norm": 0.01171871181577444, |
| "learning_rate": 9.324255257009347e-06, |
| "loss": 0.0006, |
| "step": 8700 |
| }, |
| { |
| "epoch": 5.087616822429906, |
| "grad_norm": 0.018137352541089058, |
| "learning_rate": 9.320604556074768e-06, |
| "loss": 0.089, |
| "step": 8710 |
| }, |
| { |
| "epoch": 5.093457943925234, |
| "grad_norm": 0.03295230492949486, |
| "learning_rate": 9.316953855140186e-06, |
| "loss": 0.0015, |
| "step": 8720 |
| }, |
| { |
| "epoch": 5.099299065420561, |
| "grad_norm": 0.0420125387609005, |
| "learning_rate": 9.313303154205608e-06, |
| "loss": 0.0016, |
| "step": 8730 |
| }, |
| { |
| "epoch": 5.105140186915888, |
| "grad_norm": 0.024619553238153458, |
| "learning_rate": 9.309652453271029e-06, |
| "loss": 0.0015, |
| "step": 8740 |
| }, |
| { |
| "epoch": 5.110981308411215, |
| "grad_norm": 0.024943150579929352, |
| "learning_rate": 9.306001752336449e-06, |
| "loss": 0.0011, |
| "step": 8750 |
| }, |
| { |
| "epoch": 5.116822429906542, |
| "grad_norm": 0.024661751464009285, |
| "learning_rate": 9.30235105140187e-06, |
| "loss": 0.001, |
| "step": 8760 |
| }, |
| { |
| "epoch": 5.122663551401869, |
| "grad_norm": 0.019864290952682495, |
| "learning_rate": 9.29870035046729e-06, |
| "loss": 0.0009, |
| "step": 8770 |
| }, |
| { |
| "epoch": 5.128504672897196, |
| "grad_norm": 0.014965186826884747, |
| "learning_rate": 9.295049649532712e-06, |
| "loss": 0.0393, |
| "step": 8780 |
| }, |
| { |
| "epoch": 5.134345794392523, |
| "grad_norm": 0.020861614495515823, |
| "learning_rate": 9.291398948598132e-06, |
| "loss": 0.001, |
| "step": 8790 |
| }, |
| { |
| "epoch": 5.140186915887851, |
| "grad_norm": 0.01782502979040146, |
| "learning_rate": 9.287748247663552e-06, |
| "loss": 0.032, |
| "step": 8800 |
| }, |
| { |
| "epoch": 5.146028037383178, |
| "grad_norm": 0.014934423379600048, |
| "learning_rate": 9.284097546728973e-06, |
| "loss": 0.0007, |
| "step": 8810 |
| }, |
| { |
| "epoch": 5.151869158878505, |
| "grad_norm": 0.02040957845747471, |
| "learning_rate": 9.280446845794393e-06, |
| "loss": 0.0434, |
| "step": 8820 |
| }, |
| { |
| "epoch": 5.1577102803738315, |
| "grad_norm": 0.027365613728761673, |
| "learning_rate": 9.276796144859815e-06, |
| "loss": 0.0009, |
| "step": 8830 |
| }, |
| { |
| "epoch": 5.163551401869159, |
| "grad_norm": 0.026035156100988388, |
| "learning_rate": 9.273145443925235e-06, |
| "loss": 0.001, |
| "step": 8840 |
| }, |
| { |
| "epoch": 5.169392523364486, |
| "grad_norm": 0.011135846376419067, |
| "learning_rate": 9.269494742990654e-06, |
| "loss": 0.0008, |
| "step": 8850 |
| }, |
| { |
| "epoch": 5.175233644859813, |
| "grad_norm": 0.02742450125515461, |
| "learning_rate": 9.265844042056076e-06, |
| "loss": 0.0422, |
| "step": 8860 |
| }, |
| { |
| "epoch": 5.18107476635514, |
| "grad_norm": 0.029255658388137817, |
| "learning_rate": 9.262193341121496e-06, |
| "loss": 0.0012, |
| "step": 8870 |
| }, |
| { |
| "epoch": 5.186915887850467, |
| "grad_norm": 0.02883157692849636, |
| "learning_rate": 9.258542640186917e-06, |
| "loss": 0.0013, |
| "step": 8880 |
| }, |
| { |
| "epoch": 5.192757009345795, |
| "grad_norm": 0.02262677438557148, |
| "learning_rate": 9.254891939252337e-06, |
| "loss": 0.001, |
| "step": 8890 |
| }, |
| { |
| "epoch": 5.1985981308411215, |
| "grad_norm": 0.018410807475447655, |
| "learning_rate": 9.251241238317757e-06, |
| "loss": 0.0008, |
| "step": 8900 |
| }, |
| { |
| "epoch": 5.204439252336448, |
| "grad_norm": 0.030904971063137054, |
| "learning_rate": 9.247590537383179e-06, |
| "loss": 0.0413, |
| "step": 8910 |
| }, |
| { |
| "epoch": 5.210280373831775, |
| "grad_norm": 0.029191523790359497, |
| "learning_rate": 9.2439398364486e-06, |
| "loss": 0.0012, |
| "step": 8920 |
| }, |
| { |
| "epoch": 5.216121495327103, |
| "grad_norm": 0.021812813356518745, |
| "learning_rate": 9.24028913551402e-06, |
| "loss": 0.001, |
| "step": 8930 |
| }, |
| { |
| "epoch": 5.22196261682243, |
| "grad_norm": 0.027340680360794067, |
| "learning_rate": 9.23663843457944e-06, |
| "loss": 0.001, |
| "step": 8940 |
| }, |
| { |
| "epoch": 5.227803738317757, |
| "grad_norm": 0.013570006936788559, |
| "learning_rate": 9.23298773364486e-06, |
| "loss": 0.0008, |
| "step": 8950 |
| }, |
| { |
| "epoch": 5.233644859813084, |
| "grad_norm": 0.02434568665921688, |
| "learning_rate": 9.229337032710282e-06, |
| "loss": 0.0417, |
| "step": 8960 |
| }, |
| { |
| "epoch": 5.239485981308412, |
| "grad_norm": 0.03371907025575638, |
| "learning_rate": 9.225686331775701e-06, |
| "loss": 0.0011, |
| "step": 8970 |
| }, |
| { |
| "epoch": 5.2453271028037385, |
| "grad_norm": 0.02317052148282528, |
| "learning_rate": 9.222035630841121e-06, |
| "loss": 0.0011, |
| "step": 8980 |
| }, |
| { |
| "epoch": 5.251168224299065, |
| "grad_norm": 0.02078530192375183, |
| "learning_rate": 9.218384929906543e-06, |
| "loss": 0.0011, |
| "step": 8990 |
| }, |
| { |
| "epoch": 5.257009345794392, |
| "grad_norm": 0.018598228693008423, |
| "learning_rate": 9.214734228971964e-06, |
| "loss": 0.0008, |
| "step": 9000 |
| }, |
| { |
| "epoch": 5.26285046728972, |
| "grad_norm": 0.03364751860499382, |
| "learning_rate": 9.211083528037384e-06, |
| "loss": 0.0359, |
| "step": 9010 |
| }, |
| { |
| "epoch": 5.268691588785047, |
| "grad_norm": 0.01850021816790104, |
| "learning_rate": 9.207432827102804e-06, |
| "loss": 0.0085, |
| "step": 9020 |
| }, |
| { |
| "epoch": 5.274532710280374, |
| "grad_norm": 0.014193546026945114, |
| "learning_rate": 9.203782126168225e-06, |
| "loss": 0.0007, |
| "step": 9030 |
| }, |
| { |
| "epoch": 5.280373831775701, |
| "grad_norm": 0.011346135288476944, |
| "learning_rate": 9.200131425233647e-06, |
| "loss": 0.0005, |
| "step": 9040 |
| }, |
| { |
| "epoch": 5.286214953271028, |
| "grad_norm": 0.011504976078867912, |
| "learning_rate": 9.196480724299067e-06, |
| "loss": 0.0005, |
| "step": 9050 |
| }, |
| { |
| "epoch": 5.292056074766355, |
| "grad_norm": 1.3916432857513428, |
| "learning_rate": 9.192830023364486e-06, |
| "loss": 0.0825, |
| "step": 9060 |
| }, |
| { |
| "epoch": 5.297897196261682, |
| "grad_norm": 0.04609482362866402, |
| "learning_rate": 9.189179322429908e-06, |
| "loss": 0.002, |
| "step": 9070 |
| }, |
| { |
| "epoch": 5.303738317757009, |
| "grad_norm": 0.05234284698963165, |
| "learning_rate": 9.185528621495328e-06, |
| "loss": 0.031, |
| "step": 9080 |
| }, |
| { |
| "epoch": 5.309579439252336, |
| "grad_norm": 0.08297610282897949, |
| "learning_rate": 9.181877920560748e-06, |
| "loss": 0.004, |
| "step": 9090 |
| }, |
| { |
| "epoch": 5.315420560747664, |
| "grad_norm": 0.034371357411146164, |
| "learning_rate": 9.178227219626168e-06, |
| "loss": 0.0472, |
| "step": 9100 |
| }, |
| { |
| "epoch": 5.321261682242991, |
| "grad_norm": 0.01807144097983837, |
| "learning_rate": 9.174576518691589e-06, |
| "loss": 0.0442, |
| "step": 9110 |
| }, |
| { |
| "epoch": 5.327102803738318, |
| "grad_norm": 0.02567487396299839, |
| "learning_rate": 9.17092581775701e-06, |
| "loss": 0.001, |
| "step": 9120 |
| }, |
| { |
| "epoch": 5.332943925233645, |
| "grad_norm": 0.02019997499883175, |
| "learning_rate": 9.167275116822431e-06, |
| "loss": 0.001, |
| "step": 9130 |
| }, |
| { |
| "epoch": 5.338785046728972, |
| "grad_norm": 0.021175650879740715, |
| "learning_rate": 9.163624415887851e-06, |
| "loss": 0.001, |
| "step": 9140 |
| }, |
| { |
| "epoch": 5.344626168224299, |
| "grad_norm": 0.0180047620087862, |
| "learning_rate": 9.159973714953272e-06, |
| "loss": 0.0008, |
| "step": 9150 |
| }, |
| { |
| "epoch": 5.350467289719626, |
| "grad_norm": 0.013865533284842968, |
| "learning_rate": 9.156323014018692e-06, |
| "loss": 0.0007, |
| "step": 9160 |
| }, |
| { |
| "epoch": 5.356308411214953, |
| "grad_norm": 0.015626810491085052, |
| "learning_rate": 9.152672313084114e-06, |
| "loss": 0.0006, |
| "step": 9170 |
| }, |
| { |
| "epoch": 5.36214953271028, |
| "grad_norm": 0.013258475810289383, |
| "learning_rate": 9.149021612149533e-06, |
| "loss": 0.0005, |
| "step": 9180 |
| }, |
| { |
| "epoch": 5.367990654205608, |
| "grad_norm": 0.01744505763053894, |
| "learning_rate": 9.145370911214953e-06, |
| "loss": 0.0005, |
| "step": 9190 |
| }, |
| { |
| "epoch": 5.373831775700935, |
| "grad_norm": 0.009587449952960014, |
| "learning_rate": 9.141720210280375e-06, |
| "loss": 0.0004, |
| "step": 9200 |
| }, |
| { |
| "epoch": 5.3796728971962615, |
| "grad_norm": 0.008393511176109314, |
| "learning_rate": 9.138069509345795e-06, |
| "loss": 0.0003, |
| "step": 9210 |
| }, |
| { |
| "epoch": 5.385514018691588, |
| "grad_norm": 0.009683453477919102, |
| "learning_rate": 9.134418808411216e-06, |
| "loss": 0.0004, |
| "step": 9220 |
| }, |
| { |
| "epoch": 5.391355140186916, |
| "grad_norm": 0.010573089122772217, |
| "learning_rate": 9.130768107476636e-06, |
| "loss": 0.0456, |
| "step": 9230 |
| }, |
| { |
| "epoch": 5.397196261682243, |
| "grad_norm": 0.024214621633291245, |
| "learning_rate": 9.127117406542056e-06, |
| "loss": 0.04, |
| "step": 9240 |
| }, |
| { |
| "epoch": 5.40303738317757, |
| "grad_norm": 0.03241032361984253, |
| "learning_rate": 9.123466705607478e-06, |
| "loss": 0.0016, |
| "step": 9250 |
| }, |
| { |
| "epoch": 5.408878504672897, |
| "grad_norm": 0.03916610777378082, |
| "learning_rate": 9.119816004672899e-06, |
| "loss": 0.0016, |
| "step": 9260 |
| }, |
| { |
| "epoch": 5.414719626168225, |
| "grad_norm": 0.016246847808361053, |
| "learning_rate": 9.116165303738317e-06, |
| "loss": 0.0011, |
| "step": 9270 |
| }, |
| { |
| "epoch": 5.420560747663552, |
| "grad_norm": 0.022740503773093224, |
| "learning_rate": 9.11251460280374e-06, |
| "loss": 0.0009, |
| "step": 9280 |
| }, |
| { |
| "epoch": 5.4264018691588785, |
| "grad_norm": 0.01288650557398796, |
| "learning_rate": 9.10886390186916e-06, |
| "loss": 0.0007, |
| "step": 9290 |
| }, |
| { |
| "epoch": 5.432242990654205, |
| "grad_norm": 0.014474975876510143, |
| "learning_rate": 9.10521320093458e-06, |
| "loss": 0.0006, |
| "step": 9300 |
| }, |
| { |
| "epoch": 5.438084112149532, |
| "grad_norm": 0.011654899455606937, |
| "learning_rate": 9.1015625e-06, |
| "loss": 0.0005, |
| "step": 9310 |
| }, |
| { |
| "epoch": 5.44392523364486, |
| "grad_norm": 0.00907689519226551, |
| "learning_rate": 9.09791179906542e-06, |
| "loss": 0.0004, |
| "step": 9320 |
| }, |
| { |
| "epoch": 5.449766355140187, |
| "grad_norm": 0.007321347948163748, |
| "learning_rate": 9.094261098130842e-06, |
| "loss": 0.0004, |
| "step": 9330 |
| }, |
| { |
| "epoch": 5.455607476635514, |
| "grad_norm": 0.007953358814120293, |
| "learning_rate": 9.090610397196263e-06, |
| "loss": 0.0004, |
| "step": 9340 |
| }, |
| { |
| "epoch": 5.461448598130841, |
| "grad_norm": 0.008754126727581024, |
| "learning_rate": 9.086959696261683e-06, |
| "loss": 0.0003, |
| "step": 9350 |
| }, |
| { |
| "epoch": 5.4672897196261685, |
| "grad_norm": 0.007452510762959719, |
| "learning_rate": 9.083308995327103e-06, |
| "loss": 0.0003, |
| "step": 9360 |
| }, |
| { |
| "epoch": 5.473130841121495, |
| "grad_norm": 0.0064163813367486, |
| "learning_rate": 9.079658294392524e-06, |
| "loss": 0.0003, |
| "step": 9370 |
| }, |
| { |
| "epoch": 5.478971962616822, |
| "grad_norm": 0.006491228472441435, |
| "learning_rate": 9.076007593457946e-06, |
| "loss": 0.0003, |
| "step": 9380 |
| }, |
| { |
| "epoch": 5.484813084112149, |
| "grad_norm": 0.0077286045998334885, |
| "learning_rate": 9.072356892523364e-06, |
| "loss": 0.0493, |
| "step": 9390 |
| }, |
| { |
| "epoch": 5.490654205607477, |
| "grad_norm": 0.01421878021210432, |
| "learning_rate": 9.068706191588785e-06, |
| "loss": 0.0006, |
| "step": 9400 |
| }, |
| { |
| "epoch": 5.496495327102804, |
| "grad_norm": 0.021647976711392403, |
| "learning_rate": 9.065055490654207e-06, |
| "loss": 0.0007, |
| "step": 9410 |
| }, |
| { |
| "epoch": 5.502336448598131, |
| "grad_norm": 0.01476993691176176, |
| "learning_rate": 9.061404789719627e-06, |
| "loss": 0.0007, |
| "step": 9420 |
| }, |
| { |
| "epoch": 5.508177570093458, |
| "grad_norm": 0.0126703642308712, |
| "learning_rate": 9.057754088785047e-06, |
| "loss": 0.0207, |
| "step": 9430 |
| }, |
| { |
| "epoch": 5.5140186915887845, |
| "grad_norm": 0.013835652731359005, |
| "learning_rate": 9.054103387850468e-06, |
| "loss": 0.0066, |
| "step": 9440 |
| }, |
| { |
| "epoch": 5.519859813084112, |
| "grad_norm": 0.010086634196341038, |
| "learning_rate": 9.050452686915888e-06, |
| "loss": 0.0004, |
| "step": 9450 |
| }, |
| { |
| "epoch": 5.525700934579439, |
| "grad_norm": 0.0096484599635005, |
| "learning_rate": 9.04680198598131e-06, |
| "loss": 0.0004, |
| "step": 9460 |
| }, |
| { |
| "epoch": 5.531542056074766, |
| "grad_norm": 0.009923032484948635, |
| "learning_rate": 9.04315128504673e-06, |
| "loss": 0.0004, |
| "step": 9470 |
| }, |
| { |
| "epoch": 5.537383177570094, |
| "grad_norm": 0.008762541227042675, |
| "learning_rate": 9.03950058411215e-06, |
| "loss": 0.0004, |
| "step": 9480 |
| }, |
| { |
| "epoch": 5.543224299065421, |
| "grad_norm": 0.008561445400118828, |
| "learning_rate": 9.035849883177571e-06, |
| "loss": 0.0003, |
| "step": 9490 |
| }, |
| { |
| "epoch": 5.549065420560748, |
| "grad_norm": 0.0088927261531353, |
| "learning_rate": 9.032199182242991e-06, |
| "loss": 0.0003, |
| "step": 9500 |
| }, |
| { |
| "epoch": 5.554906542056075, |
| "grad_norm": 0.007393279578536749, |
| "learning_rate": 9.028548481308412e-06, |
| "loss": 0.0003, |
| "step": 9510 |
| }, |
| { |
| "epoch": 5.5607476635514015, |
| "grad_norm": 0.0070273722521960735, |
| "learning_rate": 9.024897780373832e-06, |
| "loss": 0.0003, |
| "step": 9520 |
| }, |
| { |
| "epoch": 5.566588785046729, |
| "grad_norm": 0.0068083093501627445, |
| "learning_rate": 9.021247079439252e-06, |
| "loss": 0.0003, |
| "step": 9530 |
| }, |
| { |
| "epoch": 5.572429906542056, |
| "grad_norm": 0.007201792672276497, |
| "learning_rate": 9.017596378504674e-06, |
| "loss": 0.0003, |
| "step": 9540 |
| }, |
| { |
| "epoch": 5.578271028037383, |
| "grad_norm": 0.007822646759450436, |
| "learning_rate": 9.013945677570094e-06, |
| "loss": 0.0492, |
| "step": 9550 |
| }, |
| { |
| "epoch": 5.58411214953271, |
| "grad_norm": 0.010411952622234821, |
| "learning_rate": 9.010294976635515e-06, |
| "loss": 0.0004, |
| "step": 9560 |
| }, |
| { |
| "epoch": 5.589953271028038, |
| "grad_norm": 0.010123065672814846, |
| "learning_rate": 9.006644275700935e-06, |
| "loss": 0.0005, |
| "step": 9570 |
| }, |
| { |
| "epoch": 5.595794392523365, |
| "grad_norm": 0.009665888734161854, |
| "learning_rate": 9.002993574766355e-06, |
| "loss": 0.0005, |
| "step": 9580 |
| }, |
| { |
| "epoch": 5.6016355140186915, |
| "grad_norm": 0.011049588210880756, |
| "learning_rate": 8.999342873831777e-06, |
| "loss": 0.0005, |
| "step": 9590 |
| }, |
| { |
| "epoch": 5.607476635514018, |
| "grad_norm": 0.009384658187627792, |
| "learning_rate": 8.995692172897196e-06, |
| "loss": 0.0004, |
| "step": 9600 |
| }, |
| { |
| "epoch": 5.613317757009346, |
| "grad_norm": 0.010541644878685474, |
| "learning_rate": 8.992041471962616e-06, |
| "loss": 0.0004, |
| "step": 9610 |
| }, |
| { |
| "epoch": 5.619158878504673, |
| "grad_norm": 0.008299053646624088, |
| "learning_rate": 8.988390771028038e-06, |
| "loss": 0.0003, |
| "step": 9620 |
| }, |
| { |
| "epoch": 5.625, |
| "grad_norm": 0.009733597747981548, |
| "learning_rate": 8.984740070093459e-06, |
| "loss": 0.0003, |
| "step": 9630 |
| }, |
| { |
| "epoch": 5.630841121495327, |
| "grad_norm": 0.006777781993150711, |
| "learning_rate": 8.981089369158879e-06, |
| "loss": 0.0003, |
| "step": 9640 |
| }, |
| { |
| "epoch": 5.636682242990654, |
| "grad_norm": 0.009290322661399841, |
| "learning_rate": 8.9774386682243e-06, |
| "loss": 0.0473, |
| "step": 9650 |
| }, |
| { |
| "epoch": 5.642523364485982, |
| "grad_norm": 0.012651221826672554, |
| "learning_rate": 8.97378796728972e-06, |
| "loss": 0.0005, |
| "step": 9660 |
| }, |
| { |
| "epoch": 5.6483644859813085, |
| "grad_norm": 0.013811645098030567, |
| "learning_rate": 8.970137266355142e-06, |
| "loss": 0.0006, |
| "step": 9670 |
| }, |
| { |
| "epoch": 5.654205607476635, |
| "grad_norm": 0.01238976139575243, |
| "learning_rate": 8.966486565420562e-06, |
| "loss": 0.0005, |
| "step": 9680 |
| }, |
| { |
| "epoch": 5.660046728971962, |
| "grad_norm": 0.012121552601456642, |
| "learning_rate": 8.962835864485982e-06, |
| "loss": 0.0005, |
| "step": 9690 |
| }, |
| { |
| "epoch": 5.66588785046729, |
| "grad_norm": 0.018319696187973022, |
| "learning_rate": 8.959185163551403e-06, |
| "loss": 0.0846, |
| "step": 9700 |
| }, |
| { |
| "epoch": 5.671728971962617, |
| "grad_norm": 0.0433378741145134, |
| "learning_rate": 8.955534462616823e-06, |
| "loss": 0.0015, |
| "step": 9710 |
| }, |
| { |
| "epoch": 5.677570093457944, |
| "grad_norm": 0.06617596745491028, |
| "learning_rate": 8.951883761682245e-06, |
| "loss": 0.0576, |
| "step": 9720 |
| }, |
| { |
| "epoch": 5.683411214953271, |
| "grad_norm": 0.06185260787606239, |
| "learning_rate": 8.948233060747663e-06, |
| "loss": 0.0376, |
| "step": 9730 |
| }, |
| { |
| "epoch": 5.6892523364485985, |
| "grad_norm": 0.04279331862926483, |
| "learning_rate": 8.944582359813084e-06, |
| "loss": 0.0027, |
| "step": 9740 |
| }, |
| { |
| "epoch": 5.695093457943925, |
| "grad_norm": 0.0327022448182106, |
| "learning_rate": 8.940931658878506e-06, |
| "loss": 0.0344, |
| "step": 9750 |
| }, |
| { |
| "epoch": 5.700934579439252, |
| "grad_norm": 0.055735599249601364, |
| "learning_rate": 8.937280957943926e-06, |
| "loss": 0.0028, |
| "step": 9760 |
| }, |
| { |
| "epoch": 5.706775700934579, |
| "grad_norm": 3.1117148399353027, |
| "learning_rate": 8.933630257009346e-06, |
| "loss": 0.0473, |
| "step": 9770 |
| }, |
| { |
| "epoch": 5.712616822429906, |
| "grad_norm": 0.02079218439757824, |
| "learning_rate": 8.929979556074767e-06, |
| "loss": 0.0022, |
| "step": 9780 |
| }, |
| { |
| "epoch": 5.718457943925234, |
| "grad_norm": 0.01973365619778633, |
| "learning_rate": 8.926328855140187e-06, |
| "loss": 0.0008, |
| "step": 9790 |
| }, |
| { |
| "epoch": 5.724299065420561, |
| "grad_norm": 0.01175629161298275, |
| "learning_rate": 8.922678154205609e-06, |
| "loss": 0.0006, |
| "step": 9800 |
| }, |
| { |
| "epoch": 5.730140186915888, |
| "grad_norm": 0.01121139619499445, |
| "learning_rate": 8.91902745327103e-06, |
| "loss": 0.0004, |
| "step": 9810 |
| }, |
| { |
| "epoch": 5.7359813084112155, |
| "grad_norm": 0.009263860061764717, |
| "learning_rate": 8.91537675233645e-06, |
| "loss": 0.0004, |
| "step": 9820 |
| }, |
| { |
| "epoch": 5.741822429906542, |
| "grad_norm": 0.009500655345618725, |
| "learning_rate": 8.91172605140187e-06, |
| "loss": 0.0004, |
| "step": 9830 |
| }, |
| { |
| "epoch": 5.747663551401869, |
| "grad_norm": 0.007937227375805378, |
| "learning_rate": 8.90807535046729e-06, |
| "loss": 0.0003, |
| "step": 9840 |
| }, |
| { |
| "epoch": 5.753504672897196, |
| "grad_norm": 0.0077436259016394615, |
| "learning_rate": 8.90442464953271e-06, |
| "loss": 0.0003, |
| "step": 9850 |
| }, |
| { |
| "epoch": 5.759345794392523, |
| "grad_norm": 0.007083016447722912, |
| "learning_rate": 8.900773948598131e-06, |
| "loss": 0.0003, |
| "step": 9860 |
| }, |
| { |
| "epoch": 5.765186915887851, |
| "grad_norm": 0.006318287458270788, |
| "learning_rate": 8.897123247663551e-06, |
| "loss": 0.0002, |
| "step": 9870 |
| }, |
| { |
| "epoch": 5.771028037383178, |
| "grad_norm": 0.007866185158491135, |
| "learning_rate": 8.893472546728973e-06, |
| "loss": 0.0003, |
| "step": 9880 |
| }, |
| { |
| "epoch": 5.776869158878505, |
| "grad_norm": 0.005734143313020468, |
| "learning_rate": 8.889821845794394e-06, |
| "loss": 0.0002, |
| "step": 9890 |
| }, |
| { |
| "epoch": 5.7827102803738315, |
| "grad_norm": 29.448055267333984, |
| "learning_rate": 8.886171144859814e-06, |
| "loss": 0.0588, |
| "step": 9900 |
| }, |
| { |
| "epoch": 5.788551401869158, |
| "grad_norm": 0.020456640049815178, |
| "learning_rate": 8.882520443925234e-06, |
| "loss": 0.0445, |
| "step": 9910 |
| }, |
| { |
| "epoch": 5.794392523364486, |
| "grad_norm": 0.059099141508340836, |
| "learning_rate": 8.878869742990655e-06, |
| "loss": 0.0019, |
| "step": 9920 |
| }, |
| { |
| "epoch": 5.800233644859813, |
| "grad_norm": 0.037169113755226135, |
| "learning_rate": 8.875219042056077e-06, |
| "loss": 0.0019, |
| "step": 9930 |
| }, |
| { |
| "epoch": 5.80607476635514, |
| "grad_norm": 0.02267596684396267, |
| "learning_rate": 8.871568341121495e-06, |
| "loss": 0.0524, |
| "step": 9940 |
| }, |
| { |
| "epoch": 5.811915887850468, |
| "grad_norm": 0.016864538192749023, |
| "learning_rate": 8.867917640186917e-06, |
| "loss": 0.0437, |
| "step": 9950 |
| }, |
| { |
| "epoch": 5.817757009345795, |
| "grad_norm": 0.024668557569384575, |
| "learning_rate": 8.864266939252337e-06, |
| "loss": 0.0011, |
| "step": 9960 |
| }, |
| { |
| "epoch": 5.8235981308411215, |
| "grad_norm": 0.041549887508153915, |
| "learning_rate": 8.860616238317758e-06, |
| "loss": 0.041, |
| "step": 9970 |
| }, |
| { |
| "epoch": 5.829439252336448, |
| "grad_norm": 0.042261477559804916, |
| "learning_rate": 8.856965537383178e-06, |
| "loss": 0.0021, |
| "step": 9980 |
| }, |
| { |
| "epoch": 5.835280373831775, |
| "grad_norm": 0.039793532341718674, |
| "learning_rate": 8.853314836448598e-06, |
| "loss": 0.0357, |
| "step": 9990 |
| }, |
| { |
| "epoch": 5.841121495327103, |
| "grad_norm": 0.12145894020795822, |
| "learning_rate": 8.849664135514019e-06, |
| "loss": 0.0289, |
| "step": 10000 |
| }, |
| { |
| "epoch": 5.84696261682243, |
| "grad_norm": 0.036945607513189316, |
| "learning_rate": 8.84601343457944e-06, |
| "loss": 0.0039, |
| "step": 10010 |
| }, |
| { |
| "epoch": 5.852803738317757, |
| "grad_norm": 0.02271541766822338, |
| "learning_rate": 8.842362733644861e-06, |
| "loss": 0.0016, |
| "step": 10020 |
| }, |
| { |
| "epoch": 5.858644859813084, |
| "grad_norm": 0.016810262575745583, |
| "learning_rate": 8.838712032710281e-06, |
| "loss": 0.0008, |
| "step": 10030 |
| }, |
| { |
| "epoch": 5.864485981308412, |
| "grad_norm": 0.011538311839103699, |
| "learning_rate": 8.835061331775702e-06, |
| "loss": 0.0007, |
| "step": 10040 |
| }, |
| { |
| "epoch": 5.8703271028037385, |
| "grad_norm": 0.012231874279677868, |
| "learning_rate": 8.831410630841122e-06, |
| "loss": 0.0005, |
| "step": 10050 |
| }, |
| { |
| "epoch": 5.876168224299065, |
| "grad_norm": 0.010014678351581097, |
| "learning_rate": 8.827759929906542e-06, |
| "loss": 0.0005, |
| "step": 10060 |
| }, |
| { |
| "epoch": 5.882009345794392, |
| "grad_norm": 0.010446464642882347, |
| "learning_rate": 8.824109228971963e-06, |
| "loss": 0.0004, |
| "step": 10070 |
| }, |
| { |
| "epoch": 5.88785046728972, |
| "grad_norm": 0.010866041295230389, |
| "learning_rate": 8.820458528037385e-06, |
| "loss": 0.0004, |
| "step": 10080 |
| }, |
| { |
| "epoch": 5.893691588785047, |
| "grad_norm": 0.00958226714283228, |
| "learning_rate": 8.816807827102805e-06, |
| "loss": 0.0004, |
| "step": 10090 |
| }, |
| { |
| "epoch": 5.899532710280374, |
| "grad_norm": 0.009142902679741383, |
| "learning_rate": 8.813157126168225e-06, |
| "loss": 0.0003, |
| "step": 10100 |
| }, |
| { |
| "epoch": 5.905373831775701, |
| "grad_norm": 0.009961924515664577, |
| "learning_rate": 8.809506425233646e-06, |
| "loss": 0.0004, |
| "step": 10110 |
| }, |
| { |
| "epoch": 5.911214953271028, |
| "grad_norm": 0.00830204226076603, |
| "learning_rate": 8.805855724299066e-06, |
| "loss": 0.0186, |
| "step": 10120 |
| }, |
| { |
| "epoch": 5.917056074766355, |
| "grad_norm": 2.5257885456085205, |
| "learning_rate": 8.802205023364486e-06, |
| "loss": 0.05, |
| "step": 10130 |
| }, |
| { |
| "epoch": 5.922897196261682, |
| "grad_norm": 0.011586299166083336, |
| "learning_rate": 8.798554322429908e-06, |
| "loss": 0.0004, |
| "step": 10140 |
| }, |
| { |
| "epoch": 5.928738317757009, |
| "grad_norm": 0.013572271913290024, |
| "learning_rate": 8.794903621495327e-06, |
| "loss": 0.0006, |
| "step": 10150 |
| }, |
| { |
| "epoch": 5.934579439252336, |
| "grad_norm": 0.009981178678572178, |
| "learning_rate": 8.791252920560749e-06, |
| "loss": 0.0004, |
| "step": 10160 |
| }, |
| { |
| "epoch": 5.940420560747664, |
| "grad_norm": 0.008034189231693745, |
| "learning_rate": 8.78760221962617e-06, |
| "loss": 0.0003, |
| "step": 10170 |
| }, |
| { |
| "epoch": 5.946261682242991, |
| "grad_norm": 0.00744387973099947, |
| "learning_rate": 8.78395151869159e-06, |
| "loss": 0.0003, |
| "step": 10180 |
| }, |
| { |
| "epoch": 5.952102803738318, |
| "grad_norm": 0.008284796960651875, |
| "learning_rate": 8.78030081775701e-06, |
| "loss": 0.0491, |
| "step": 10190 |
| }, |
| { |
| "epoch": 5.957943925233645, |
| "grad_norm": 0.013832306489348412, |
| "learning_rate": 8.77665011682243e-06, |
| "loss": 0.0007, |
| "step": 10200 |
| }, |
| { |
| "epoch": 5.963785046728972, |
| "grad_norm": 0.013292601332068443, |
| "learning_rate": 8.77299941588785e-06, |
| "loss": 0.0006, |
| "step": 10210 |
| }, |
| { |
| "epoch": 5.969626168224299, |
| "grad_norm": 0.011907985433936119, |
| "learning_rate": 8.769348714953272e-06, |
| "loss": 0.0005, |
| "step": 10220 |
| }, |
| { |
| "epoch": 5.975467289719626, |
| "grad_norm": 0.012486466206610203, |
| "learning_rate": 8.765698014018693e-06, |
| "loss": 0.0005, |
| "step": 10230 |
| }, |
| { |
| "epoch": 5.981308411214953, |
| "grad_norm": 0.01007250975817442, |
| "learning_rate": 8.762047313084113e-06, |
| "loss": 0.0007, |
| "step": 10240 |
| }, |
| { |
| "epoch": 5.98714953271028, |
| "grad_norm": 0.010638604871928692, |
| "learning_rate": 8.758396612149533e-06, |
| "loss": 0.0004, |
| "step": 10250 |
| }, |
| { |
| "epoch": 5.992990654205608, |
| "grad_norm": 0.009204142726957798, |
| "learning_rate": 8.754745911214954e-06, |
| "loss": 0.0003, |
| "step": 10260 |
| }, |
| { |
| "epoch": 5.998831775700935, |
| "grad_norm": 0.008714770898222923, |
| "learning_rate": 8.751095210280374e-06, |
| "loss": 0.001, |
| "step": 10270 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_f1": 0.9994698977659977, |
| "eval_fbeta": 0.9994244065887268, |
| "eval_loss": 0.003641982562839985, |
| "eval_precision": 0.9993942147508709, |
| "eval_recall": 0.9995455922447744, |
| "eval_runtime": 171.4592, |
| "eval_samples_per_second": 68.454, |
| "eval_steps_per_second": 8.562, |
| "step": 10272 |
| }, |
| { |
| "epoch": 6.0046728971962615, |
| "grad_norm": 0.01118410099297762, |
| "learning_rate": 8.747444509345794e-06, |
| "loss": 0.0004, |
| "step": 10280 |
| }, |
| { |
| "epoch": 6.010514018691588, |
| "grad_norm": 0.009393405169248581, |
| "learning_rate": 8.743793808411216e-06, |
| "loss": 0.0004, |
| "step": 10290 |
| }, |
| { |
| "epoch": 6.016355140186916, |
| "grad_norm": 0.009552313946187496, |
| "learning_rate": 8.740143107476637e-06, |
| "loss": 0.0004, |
| "step": 10300 |
| }, |
| { |
| "epoch": 6.022196261682243, |
| "grad_norm": 1.8145328760147095, |
| "learning_rate": 8.736492406542057e-06, |
| "loss": 0.046, |
| "step": 10310 |
| }, |
| { |
| "epoch": 6.02803738317757, |
| "grad_norm": 0.02487098053097725, |
| "learning_rate": 8.732841705607477e-06, |
| "loss": 0.0453, |
| "step": 10320 |
| }, |
| { |
| "epoch": 6.033878504672897, |
| "grad_norm": 0.059945717453956604, |
| "learning_rate": 8.729191004672898e-06, |
| "loss": 0.0315, |
| "step": 10330 |
| }, |
| { |
| "epoch": 6.039719626168225, |
| "grad_norm": 0.02849278412759304, |
| "learning_rate": 8.725540303738318e-06, |
| "loss": 0.0227, |
| "step": 10340 |
| }, |
| { |
| "epoch": 6.045560747663552, |
| "grad_norm": 0.11616010963916779, |
| "learning_rate": 8.72188960280374e-06, |
| "loss": 0.0016, |
| "step": 10350 |
| }, |
| { |
| "epoch": 6.0514018691588785, |
| "grad_norm": 0.014204906299710274, |
| "learning_rate": 8.718238901869159e-06, |
| "loss": 0.001, |
| "step": 10360 |
| }, |
| { |
| "epoch": 6.057242990654205, |
| "grad_norm": 0.011876954697072506, |
| "learning_rate": 8.71458820093458e-06, |
| "loss": 0.0005, |
| "step": 10370 |
| }, |
| { |
| "epoch": 6.063084112149533, |
| "grad_norm": 0.010328153148293495, |
| "learning_rate": 8.7109375e-06, |
| "loss": 0.0004, |
| "step": 10380 |
| }, |
| { |
| "epoch": 6.06892523364486, |
| "grad_norm": 0.00894414447247982, |
| "learning_rate": 8.707286799065421e-06, |
| "loss": 0.0004, |
| "step": 10390 |
| }, |
| { |
| "epoch": 6.074766355140187, |
| "grad_norm": 0.011801760643720627, |
| "learning_rate": 8.703636098130841e-06, |
| "loss": 0.0481, |
| "step": 10400 |
| }, |
| { |
| "epoch": 6.080607476635514, |
| "grad_norm": 0.015615479089319706, |
| "learning_rate": 8.699985397196262e-06, |
| "loss": 0.0005, |
| "step": 10410 |
| }, |
| { |
| "epoch": 6.086448598130841, |
| "grad_norm": 0.015844807028770447, |
| "learning_rate": 8.696334696261684e-06, |
| "loss": 0.0006, |
| "step": 10420 |
| }, |
| { |
| "epoch": 6.0922897196261685, |
| "grad_norm": 0.013462678529322147, |
| "learning_rate": 8.692683995327104e-06, |
| "loss": 0.0006, |
| "step": 10430 |
| }, |
| { |
| "epoch": 6.098130841121495, |
| "grad_norm": 0.012400257401168346, |
| "learning_rate": 8.689033294392524e-06, |
| "loss": 0.0005, |
| "step": 10440 |
| }, |
| { |
| "epoch": 6.103971962616822, |
| "grad_norm": 0.010923230089247227, |
| "learning_rate": 8.685382593457945e-06, |
| "loss": 0.0005, |
| "step": 10450 |
| }, |
| { |
| "epoch": 6.109813084112149, |
| "grad_norm": 0.0079196747392416, |
| "learning_rate": 8.681731892523365e-06, |
| "loss": 0.0004, |
| "step": 10460 |
| }, |
| { |
| "epoch": 6.115654205607477, |
| "grad_norm": 0.009008225984871387, |
| "learning_rate": 8.678081191588785e-06, |
| "loss": 0.0004, |
| "step": 10470 |
| }, |
| { |
| "epoch": 6.121495327102804, |
| "grad_norm": 0.008494589477777481, |
| "learning_rate": 8.674430490654207e-06, |
| "loss": 0.0003, |
| "step": 10480 |
| }, |
| { |
| "epoch": 6.127336448598131, |
| "grad_norm": 0.00859193503856659, |
| "learning_rate": 8.670779789719626e-06, |
| "loss": 0.0003, |
| "step": 10490 |
| }, |
| { |
| "epoch": 6.133177570093458, |
| "grad_norm": 0.00865325890481472, |
| "learning_rate": 8.667129088785048e-06, |
| "loss": 0.0416, |
| "step": 10500 |
| }, |
| { |
| "epoch": 6.139018691588785, |
| "grad_norm": 0.008447653613984585, |
| "learning_rate": 8.663478387850468e-06, |
| "loss": 0.0003, |
| "step": 10510 |
| }, |
| { |
| "epoch": 6.144859813084112, |
| "grad_norm": 0.010303646326065063, |
| "learning_rate": 8.659827686915889e-06, |
| "loss": 0.0005, |
| "step": 10520 |
| }, |
| { |
| "epoch": 6.150700934579439, |
| "grad_norm": 0.008880840614438057, |
| "learning_rate": 8.656176985981309e-06, |
| "loss": 0.0107, |
| "step": 10530 |
| }, |
| { |
| "epoch": 6.156542056074766, |
| "grad_norm": 0.006746761500835419, |
| "learning_rate": 8.65252628504673e-06, |
| "loss": 0.0003, |
| "step": 10540 |
| }, |
| { |
| "epoch": 6.162383177570094, |
| "grad_norm": 0.006260715890675783, |
| "learning_rate": 8.648875584112151e-06, |
| "loss": 0.0002, |
| "step": 10550 |
| }, |
| { |
| "epoch": 6.168224299065421, |
| "grad_norm": 0.006196292117238045, |
| "learning_rate": 8.645224883177572e-06, |
| "loss": 0.0002, |
| "step": 10560 |
| }, |
| { |
| "epoch": 6.174065420560748, |
| "grad_norm": 0.005948168691247702, |
| "learning_rate": 8.641574182242992e-06, |
| "loss": 0.0002, |
| "step": 10570 |
| }, |
| { |
| "epoch": 6.179906542056075, |
| "grad_norm": 0.005726588889956474, |
| "learning_rate": 8.637923481308412e-06, |
| "loss": 0.0002, |
| "step": 10580 |
| }, |
| { |
| "epoch": 6.1857476635514015, |
| "grad_norm": 0.005977707449346781, |
| "learning_rate": 8.634272780373832e-06, |
| "loss": 0.0002, |
| "step": 10590 |
| }, |
| { |
| "epoch": 6.191588785046729, |
| "grad_norm": 0.006332565564662218, |
| "learning_rate": 8.630622079439253e-06, |
| "loss": 0.0002, |
| "step": 10600 |
| }, |
| { |
| "epoch": 6.197429906542056, |
| "grad_norm": 0.006450187414884567, |
| "learning_rate": 8.626971378504673e-06, |
| "loss": 0.0504, |
| "step": 10610 |
| }, |
| { |
| "epoch": 6.203271028037383, |
| "grad_norm": 0.007723100017756224, |
| "learning_rate": 8.623320677570093e-06, |
| "loss": 0.0003, |
| "step": 10620 |
| }, |
| { |
| "epoch": 6.20911214953271, |
| "grad_norm": 0.007839754223823547, |
| "learning_rate": 8.619669976635515e-06, |
| "loss": 0.0003, |
| "step": 10630 |
| }, |
| { |
| "epoch": 6.214953271028038, |
| "grad_norm": 0.007956388406455517, |
| "learning_rate": 8.616019275700936e-06, |
| "loss": 0.0004, |
| "step": 10640 |
| }, |
| { |
| "epoch": 6.220794392523365, |
| "grad_norm": 0.011821724474430084, |
| "learning_rate": 8.612368574766356e-06, |
| "loss": 0.0368, |
| "step": 10650 |
| }, |
| { |
| "epoch": 6.2266355140186915, |
| "grad_norm": 0.011840198189020157, |
| "learning_rate": 8.608717873831776e-06, |
| "loss": 0.0005, |
| "step": 10660 |
| }, |
| { |
| "epoch": 6.232476635514018, |
| "grad_norm": 0.02075495943427086, |
| "learning_rate": 8.605067172897197e-06, |
| "loss": 0.0185, |
| "step": 10670 |
| }, |
| { |
| "epoch": 6.238317757009346, |
| "grad_norm": 0.016375849023461342, |
| "learning_rate": 8.601416471962617e-06, |
| "loss": 0.0013, |
| "step": 10680 |
| }, |
| { |
| "epoch": 6.244158878504673, |
| "grad_norm": 0.015895796939730644, |
| "learning_rate": 8.597765771028039e-06, |
| "loss": 0.0005, |
| "step": 10690 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.008427509106695652, |
| "learning_rate": 8.594115070093458e-06, |
| "loss": 0.0003, |
| "step": 10700 |
| }, |
| { |
| "epoch": 6.255841121495327, |
| "grad_norm": 0.007040541619062424, |
| "learning_rate": 8.59046436915888e-06, |
| "loss": 0.0003, |
| "step": 10710 |
| }, |
| { |
| "epoch": 6.261682242990654, |
| "grad_norm": 0.0081349927932024, |
| "learning_rate": 8.5868136682243e-06, |
| "loss": 0.0003, |
| "step": 10720 |
| }, |
| { |
| "epoch": 6.267523364485982, |
| "grad_norm": 0.007933318614959717, |
| "learning_rate": 8.58316296728972e-06, |
| "loss": 0.0458, |
| "step": 10730 |
| }, |
| { |
| "epoch": 6.2733644859813085, |
| "grad_norm": 0.025732412934303284, |
| "learning_rate": 8.57951226635514e-06, |
| "loss": 0.0005, |
| "step": 10740 |
| }, |
| { |
| "epoch": 6.279205607476635, |
| "grad_norm": 0.024862881749868393, |
| "learning_rate": 8.575861565420561e-06, |
| "loss": 0.0008, |
| "step": 10750 |
| }, |
| { |
| "epoch": 6.285046728971962, |
| "grad_norm": 0.01671590469777584, |
| "learning_rate": 8.572210864485983e-06, |
| "loss": 0.0007, |
| "step": 10760 |
| }, |
| { |
| "epoch": 6.29088785046729, |
| "grad_norm": 0.012816164642572403, |
| "learning_rate": 8.568560163551403e-06, |
| "loss": 0.0004, |
| "step": 10770 |
| }, |
| { |
| "epoch": 6.296728971962617, |
| "grad_norm": 0.009239032864570618, |
| "learning_rate": 8.564909462616824e-06, |
| "loss": 0.0004, |
| "step": 10780 |
| }, |
| { |
| "epoch": 6.302570093457944, |
| "grad_norm": 0.010092846117913723, |
| "learning_rate": 8.561258761682244e-06, |
| "loss": 0.0177, |
| "step": 10790 |
| }, |
| { |
| "epoch": 6.308411214953271, |
| "grad_norm": 0.009570459835231304, |
| "learning_rate": 8.557608060747664e-06, |
| "loss": 0.0004, |
| "step": 10800 |
| }, |
| { |
| "epoch": 6.3142523364485985, |
| "grad_norm": 14.129960060119629, |
| "learning_rate": 8.553957359813084e-06, |
| "loss": 0.0253, |
| "step": 10810 |
| }, |
| { |
| "epoch": 6.320093457943925, |
| "grad_norm": 0.027155300602316856, |
| "learning_rate": 8.550306658878505e-06, |
| "loss": 0.0005, |
| "step": 10820 |
| }, |
| { |
| "epoch": 6.325934579439252, |
| "grad_norm": 0.012927797622978687, |
| "learning_rate": 8.546655957943925e-06, |
| "loss": 0.005, |
| "step": 10830 |
| }, |
| { |
| "epoch": 6.331775700934579, |
| "grad_norm": 0.006941064726561308, |
| "learning_rate": 8.543005257009347e-06, |
| "loss": 0.0003, |
| "step": 10840 |
| }, |
| { |
| "epoch": 6.337616822429906, |
| "grad_norm": 0.006605945061892271, |
| "learning_rate": 8.539354556074767e-06, |
| "loss": 0.0003, |
| "step": 10850 |
| }, |
| { |
| "epoch": 6.343457943925234, |
| "grad_norm": 0.0064064753241837025, |
| "learning_rate": 8.535703855140188e-06, |
| "loss": 0.0003, |
| "step": 10860 |
| }, |
| { |
| "epoch": 6.349299065420561, |
| "grad_norm": 0.005703518632799387, |
| "learning_rate": 8.532053154205608e-06, |
| "loss": 0.0002, |
| "step": 10870 |
| }, |
| { |
| "epoch": 6.355140186915888, |
| "grad_norm": 0.004557217936962843, |
| "learning_rate": 8.528402453271028e-06, |
| "loss": 0.0002, |
| "step": 10880 |
| }, |
| { |
| "epoch": 6.360981308411215, |
| "grad_norm": 0.005052170716226101, |
| "learning_rate": 8.52475175233645e-06, |
| "loss": 0.0007, |
| "step": 10890 |
| }, |
| { |
| "epoch": 6.366822429906542, |
| "grad_norm": 0.0055373115465044975, |
| "learning_rate": 8.52110105140187e-06, |
| "loss": 0.0002, |
| "step": 10900 |
| }, |
| { |
| "epoch": 6.372663551401869, |
| "grad_norm": 0.006470870226621628, |
| "learning_rate": 8.51745035046729e-06, |
| "loss": 0.0002, |
| "step": 10910 |
| }, |
| { |
| "epoch": 6.378504672897196, |
| "grad_norm": 0.004862222820520401, |
| "learning_rate": 8.513799649532711e-06, |
| "loss": 0.0002, |
| "step": 10920 |
| }, |
| { |
| "epoch": 6.384345794392523, |
| "grad_norm": 0.006725993473082781, |
| "learning_rate": 8.510148948598132e-06, |
| "loss": 0.0455, |
| "step": 10930 |
| }, |
| { |
| "epoch": 6.390186915887851, |
| "grad_norm": 0.013148023746907711, |
| "learning_rate": 8.506498247663552e-06, |
| "loss": 0.001, |
| "step": 10940 |
| }, |
| { |
| "epoch": 6.396028037383178, |
| "grad_norm": 0.011781498789787292, |
| "learning_rate": 8.502847546728972e-06, |
| "loss": 0.0005, |
| "step": 10950 |
| }, |
| { |
| "epoch": 6.401869158878505, |
| "grad_norm": 0.006317190360277891, |
| "learning_rate": 8.499196845794393e-06, |
| "loss": 0.0003, |
| "step": 10960 |
| }, |
| { |
| "epoch": 6.4077102803738315, |
| "grad_norm": 0.0071831364184618, |
| "learning_rate": 8.495546144859815e-06, |
| "loss": 0.0003, |
| "step": 10970 |
| }, |
| { |
| "epoch": 6.413551401869159, |
| "grad_norm": 0.012186083942651749, |
| "learning_rate": 8.491895443925235e-06, |
| "loss": 0.0396, |
| "step": 10980 |
| }, |
| { |
| "epoch": 6.419392523364486, |
| "grad_norm": 0.04296307638287544, |
| "learning_rate": 8.488244742990655e-06, |
| "loss": 0.0008, |
| "step": 10990 |
| }, |
| { |
| "epoch": 6.425233644859813, |
| "grad_norm": 0.018925141543149948, |
| "learning_rate": 8.484594042056076e-06, |
| "loss": 0.0007, |
| "step": 11000 |
| }, |
| { |
| "epoch": 6.43107476635514, |
| "grad_norm": 0.006211417261511087, |
| "learning_rate": 8.480943341121496e-06, |
| "loss": 0.0006, |
| "step": 11010 |
| }, |
| { |
| "epoch": 6.436915887850468, |
| "grad_norm": 0.007444577757269144, |
| "learning_rate": 8.477292640186918e-06, |
| "loss": 0.0002, |
| "step": 11020 |
| }, |
| { |
| "epoch": 6.442757009345795, |
| "grad_norm": 0.007033002562820911, |
| "learning_rate": 8.473641939252336e-06, |
| "loss": 0.0003, |
| "step": 11030 |
| }, |
| { |
| "epoch": 6.4485981308411215, |
| "grad_norm": 0.0055618369951844215, |
| "learning_rate": 8.469991238317757e-06, |
| "loss": 0.0003, |
| "step": 11040 |
| }, |
| { |
| "epoch": 6.454439252336448, |
| "grad_norm": 0.005723849404603243, |
| "learning_rate": 8.466340537383179e-06, |
| "loss": 0.0002, |
| "step": 11050 |
| }, |
| { |
| "epoch": 6.460280373831775, |
| "grad_norm": 0.004047950729727745, |
| "learning_rate": 8.462689836448599e-06, |
| "loss": 0.0002, |
| "step": 11060 |
| }, |
| { |
| "epoch": 6.466121495327103, |
| "grad_norm": 0.004566140007227659, |
| "learning_rate": 8.45903913551402e-06, |
| "loss": 0.0002, |
| "step": 11070 |
| }, |
| { |
| "epoch": 6.47196261682243, |
| "grad_norm": 0.004481618292629719, |
| "learning_rate": 8.45538843457944e-06, |
| "loss": 0.0002, |
| "step": 11080 |
| }, |
| { |
| "epoch": 6.477803738317757, |
| "grad_norm": 0.004923723638057709, |
| "learning_rate": 8.45173773364486e-06, |
| "loss": 0.0002, |
| "step": 11090 |
| }, |
| { |
| "epoch": 6.483644859813084, |
| "grad_norm": 0.004801360424607992, |
| "learning_rate": 8.448087032710282e-06, |
| "loss": 0.0002, |
| "step": 11100 |
| }, |
| { |
| "epoch": 6.489485981308412, |
| "grad_norm": 0.004998974967747927, |
| "learning_rate": 8.444436331775702e-06, |
| "loss": 0.0002, |
| "step": 11110 |
| }, |
| { |
| "epoch": 6.4953271028037385, |
| "grad_norm": 0.0041777221485972404, |
| "learning_rate": 8.440785630841121e-06, |
| "loss": 0.0002, |
| "step": 11120 |
| }, |
| { |
| "epoch": 6.501168224299065, |
| "grad_norm": 0.00409247912466526, |
| "learning_rate": 8.437134929906543e-06, |
| "loss": 0.0002, |
| "step": 11130 |
| }, |
| { |
| "epoch": 6.507009345794392, |
| "grad_norm": 0.003906558267772198, |
| "learning_rate": 8.433484228971963e-06, |
| "loss": 0.0002, |
| "step": 11140 |
| }, |
| { |
| "epoch": 6.51285046728972, |
| "grad_norm": 0.004433404188603163, |
| "learning_rate": 8.429833528037384e-06, |
| "loss": 0.0002, |
| "step": 11150 |
| }, |
| { |
| "epoch": 6.518691588785047, |
| "grad_norm": 0.003944264259189367, |
| "learning_rate": 8.426182827102804e-06, |
| "loss": 0.0323, |
| "step": 11160 |
| }, |
| { |
| "epoch": 6.524532710280374, |
| "grad_norm": 0.007481928914785385, |
| "learning_rate": 8.422532126168224e-06, |
| "loss": 0.0002, |
| "step": 11170 |
| }, |
| { |
| "epoch": 6.530373831775701, |
| "grad_norm": 0.004499036818742752, |
| "learning_rate": 8.418881425233646e-06, |
| "loss": 0.0187, |
| "step": 11180 |
| }, |
| { |
| "epoch": 6.536214953271028, |
| "grad_norm": 0.004098616074770689, |
| "learning_rate": 8.415230724299067e-06, |
| "loss": 0.0107, |
| "step": 11190 |
| }, |
| { |
| "epoch": 6.542056074766355, |
| "grad_norm": 0.004652571398764849, |
| "learning_rate": 8.411580023364487e-06, |
| "loss": 0.0002, |
| "step": 11200 |
| }, |
| { |
| "epoch": 6.547897196261682, |
| "grad_norm": 0.006798152346163988, |
| "learning_rate": 8.407929322429907e-06, |
| "loss": 0.0002, |
| "step": 11210 |
| }, |
| { |
| "epoch": 6.553738317757009, |
| "grad_norm": 0.004885418340563774, |
| "learning_rate": 8.404278621495328e-06, |
| "loss": 0.0001, |
| "step": 11220 |
| }, |
| { |
| "epoch": 6.559579439252336, |
| "grad_norm": 0.0039513991214334965, |
| "learning_rate": 8.40062792056075e-06, |
| "loss": 0.0001, |
| "step": 11230 |
| }, |
| { |
| "epoch": 6.565420560747664, |
| "grad_norm": 0.0035442986991256475, |
| "learning_rate": 8.396977219626168e-06, |
| "loss": 0.0002, |
| "step": 11240 |
| }, |
| { |
| "epoch": 6.571261682242991, |
| "grad_norm": 0.003085469128564, |
| "learning_rate": 8.393326518691588e-06, |
| "loss": 0.0001, |
| "step": 11250 |
| }, |
| { |
| "epoch": 6.577102803738318, |
| "grad_norm": 0.00335341296158731, |
| "learning_rate": 8.38967581775701e-06, |
| "loss": 0.0033, |
| "step": 11260 |
| }, |
| { |
| "epoch": 6.582943925233645, |
| "grad_norm": 0.003827323205769062, |
| "learning_rate": 8.38602511682243e-06, |
| "loss": 0.0001, |
| "step": 11270 |
| }, |
| { |
| "epoch": 6.588785046728972, |
| "grad_norm": 0.005507790017873049, |
| "learning_rate": 8.382374415887851e-06, |
| "loss": 0.0498, |
| "step": 11280 |
| }, |
| { |
| "epoch": 6.594626168224299, |
| "grad_norm": 0.025428486987948418, |
| "learning_rate": 8.378723714953271e-06, |
| "loss": 0.0012, |
| "step": 11290 |
| }, |
| { |
| "epoch": 6.600467289719626, |
| "grad_norm": 7.240142345428467, |
| "learning_rate": 8.375073014018692e-06, |
| "loss": 0.0024, |
| "step": 11300 |
| }, |
| { |
| "epoch": 6.606308411214953, |
| "grad_norm": 0.007059688679873943, |
| "learning_rate": 8.371422313084114e-06, |
| "loss": 0.0003, |
| "step": 11310 |
| }, |
| { |
| "epoch": 6.61214953271028, |
| "grad_norm": 0.0074775912798941135, |
| "learning_rate": 8.367771612149534e-06, |
| "loss": 0.0003, |
| "step": 11320 |
| }, |
| { |
| "epoch": 6.617990654205608, |
| "grad_norm": 0.006460248958319426, |
| "learning_rate": 8.364120911214954e-06, |
| "loss": 0.0413, |
| "step": 11330 |
| }, |
| { |
| "epoch": 6.623831775700935, |
| "grad_norm": 0.006473233923316002, |
| "learning_rate": 8.360470210280375e-06, |
| "loss": 0.0002, |
| "step": 11340 |
| }, |
| { |
| "epoch": 6.6296728971962615, |
| "grad_norm": 0.005892688874155283, |
| "learning_rate": 8.356819509345795e-06, |
| "loss": 0.0002, |
| "step": 11350 |
| }, |
| { |
| "epoch": 6.635514018691588, |
| "grad_norm": 0.004774305038154125, |
| "learning_rate": 8.353168808411217e-06, |
| "loss": 0.0002, |
| "step": 11360 |
| }, |
| { |
| "epoch": 6.641355140186916, |
| "grad_norm": 0.004374335985630751, |
| "learning_rate": 8.349518107476636e-06, |
| "loss": 0.0002, |
| "step": 11370 |
| }, |
| { |
| "epoch": 6.647196261682243, |
| "grad_norm": 0.003162469482049346, |
| "learning_rate": 8.345867406542056e-06, |
| "loss": 0.0001, |
| "step": 11380 |
| }, |
| { |
| "epoch": 6.65303738317757, |
| "grad_norm": 0.0034602792002260685, |
| "learning_rate": 8.342216705607478e-06, |
| "loss": 0.0001, |
| "step": 11390 |
| }, |
| { |
| "epoch": 6.658878504672897, |
| "grad_norm": 0.0040388829074800014, |
| "learning_rate": 8.338566004672898e-06, |
| "loss": 0.0001, |
| "step": 11400 |
| }, |
| { |
| "epoch": 6.664719626168225, |
| "grad_norm": 0.00331344292499125, |
| "learning_rate": 8.334915303738319e-06, |
| "loss": 0.0001, |
| "step": 11410 |
| }, |
| { |
| "epoch": 6.670560747663552, |
| "grad_norm": 0.0032574781216681004, |
| "learning_rate": 8.331264602803739e-06, |
| "loss": 0.0003, |
| "step": 11420 |
| }, |
| { |
| "epoch": 6.6764018691588785, |
| "grad_norm": 0.0035904489923268557, |
| "learning_rate": 8.32761390186916e-06, |
| "loss": 0.0001, |
| "step": 11430 |
| }, |
| { |
| "epoch": 6.682242990654205, |
| "grad_norm": 0.0036325124092400074, |
| "learning_rate": 8.323963200934581e-06, |
| "loss": 0.0001, |
| "step": 11440 |
| }, |
| { |
| "epoch": 6.688084112149532, |
| "grad_norm": 0.004073506221175194, |
| "learning_rate": 8.320312500000001e-06, |
| "loss": 0.0001, |
| "step": 11450 |
| }, |
| { |
| "epoch": 6.69392523364486, |
| "grad_norm": 0.0034190404694527388, |
| "learning_rate": 8.31666179906542e-06, |
| "loss": 0.0001, |
| "step": 11460 |
| }, |
| { |
| "epoch": 6.699766355140187, |
| "grad_norm": 0.003095339285209775, |
| "learning_rate": 8.313011098130842e-06, |
| "loss": 0.0001, |
| "step": 11470 |
| }, |
| { |
| "epoch": 6.705607476635514, |
| "grad_norm": 0.0030306437984108925, |
| "learning_rate": 8.309360397196262e-06, |
| "loss": 0.0001, |
| "step": 11480 |
| }, |
| { |
| "epoch": 6.711448598130842, |
| "grad_norm": 0.002707183128222823, |
| "learning_rate": 8.305709696261683e-06, |
| "loss": 0.0001, |
| "step": 11490 |
| }, |
| { |
| "epoch": 6.7172897196261685, |
| "grad_norm": 0.0031440563034266233, |
| "learning_rate": 8.302058995327103e-06, |
| "loss": 0.0003, |
| "step": 11500 |
| }, |
| { |
| "epoch": 6.723130841121495, |
| "grad_norm": 0.0029218934942036867, |
| "learning_rate": 8.298408294392523e-06, |
| "loss": 0.0001, |
| "step": 11510 |
| }, |
| { |
| "epoch": 6.728971962616822, |
| "grad_norm": 0.002636146731674671, |
| "learning_rate": 8.294757593457945e-06, |
| "loss": 0.0001, |
| "step": 11520 |
| }, |
| { |
| "epoch": 6.734813084112149, |
| "grad_norm": 0.003062307136133313, |
| "learning_rate": 8.291106892523366e-06, |
| "loss": 0.0001, |
| "step": 11530 |
| }, |
| { |
| "epoch": 6.740654205607477, |
| "grad_norm": 0.00242641381919384, |
| "learning_rate": 8.287456191588786e-06, |
| "loss": 0.0001, |
| "step": 11540 |
| }, |
| { |
| "epoch": 6.746495327102804, |
| "grad_norm": 0.0024078742135316133, |
| "learning_rate": 8.283805490654206e-06, |
| "loss": 0.0001, |
| "step": 11550 |
| }, |
| { |
| "epoch": 6.752336448598131, |
| "grad_norm": 0.0025927985552698374, |
| "learning_rate": 8.280154789719627e-06, |
| "loss": 0.0001, |
| "step": 11560 |
| }, |
| { |
| "epoch": 6.758177570093458, |
| "grad_norm": 0.002639703219756484, |
| "learning_rate": 8.276504088785049e-06, |
| "loss": 0.0001, |
| "step": 11570 |
| }, |
| { |
| "epoch": 6.7640186915887845, |
| "grad_norm": 0.002318447921425104, |
| "learning_rate": 8.272853387850467e-06, |
| "loss": 0.0001, |
| "step": 11580 |
| }, |
| { |
| "epoch": 6.769859813084112, |
| "grad_norm": 0.0026759966276586056, |
| "learning_rate": 8.269202686915888e-06, |
| "loss": 0.0001, |
| "step": 11590 |
| }, |
| { |
| "epoch": 6.775700934579439, |
| "grad_norm": 0.0025113169103860855, |
| "learning_rate": 8.26555198598131e-06, |
| "loss": 0.0001, |
| "step": 11600 |
| }, |
| { |
| "epoch": 6.781542056074766, |
| "grad_norm": 0.0024989412631839514, |
| "learning_rate": 8.26190128504673e-06, |
| "loss": 0.0001, |
| "step": 11610 |
| }, |
| { |
| "epoch": 6.787383177570094, |
| "grad_norm": 0.0024788263253867626, |
| "learning_rate": 8.25825058411215e-06, |
| "loss": 0.0001, |
| "step": 11620 |
| }, |
| { |
| "epoch": 6.793224299065421, |
| "grad_norm": 0.0025793309323489666, |
| "learning_rate": 8.25459988317757e-06, |
| "loss": 0.0001, |
| "step": 11630 |
| }, |
| { |
| "epoch": 6.799065420560748, |
| "grad_norm": 0.002356814919039607, |
| "learning_rate": 8.25094918224299e-06, |
| "loss": 0.0001, |
| "step": 11640 |
| }, |
| { |
| "epoch": 6.804906542056075, |
| "grad_norm": 0.002353727351874113, |
| "learning_rate": 8.247298481308413e-06, |
| "loss": 0.0001, |
| "step": 11650 |
| }, |
| { |
| "epoch": 6.8107476635514015, |
| "grad_norm": 0.002258236985653639, |
| "learning_rate": 8.243647780373833e-06, |
| "loss": 0.0001, |
| "step": 11660 |
| }, |
| { |
| "epoch": 6.816588785046729, |
| "grad_norm": 0.0023588163312524557, |
| "learning_rate": 8.239997079439252e-06, |
| "loss": 0.045, |
| "step": 11670 |
| }, |
| { |
| "epoch": 6.822429906542056, |
| "grad_norm": 0.006023265887051821, |
| "learning_rate": 8.236346378504674e-06, |
| "loss": 0.0001, |
| "step": 11680 |
| }, |
| { |
| "epoch": 6.828271028037383, |
| "grad_norm": 0.004266134463250637, |
| "learning_rate": 8.232695677570094e-06, |
| "loss": 0.0002, |
| "step": 11690 |
| }, |
| { |
| "epoch": 6.83411214953271, |
| "grad_norm": 0.00548480125144124, |
| "learning_rate": 8.229044976635514e-06, |
| "loss": 0.0003, |
| "step": 11700 |
| }, |
| { |
| "epoch": 6.839953271028038, |
| "grad_norm": 0.0027503659948706627, |
| "learning_rate": 8.225394275700935e-06, |
| "loss": 0.0286, |
| "step": 11710 |
| }, |
| { |
| "epoch": 6.845794392523365, |
| "grad_norm": 0.004625431727617979, |
| "learning_rate": 8.221743574766355e-06, |
| "loss": 0.0294, |
| "step": 11720 |
| }, |
| { |
| "epoch": 6.8516355140186915, |
| "grad_norm": 0.008213341236114502, |
| "learning_rate": 8.218092873831777e-06, |
| "loss": 0.0002, |
| "step": 11730 |
| }, |
| { |
| "epoch": 6.857476635514018, |
| "grad_norm": 0.0034780986607074738, |
| "learning_rate": 8.214442172897197e-06, |
| "loss": 0.0009, |
| "step": 11740 |
| }, |
| { |
| "epoch": 6.863317757009346, |
| "grad_norm": 0.0030813429038971663, |
| "learning_rate": 8.210791471962618e-06, |
| "loss": 0.0001, |
| "step": 11750 |
| }, |
| { |
| "epoch": 6.869158878504673, |
| "grad_norm": 0.0029140140395611525, |
| "learning_rate": 8.207140771028038e-06, |
| "loss": 0.0001, |
| "step": 11760 |
| }, |
| { |
| "epoch": 6.875, |
| "grad_norm": 0.0028195863123983145, |
| "learning_rate": 8.203490070093458e-06, |
| "loss": 0.0001, |
| "step": 11770 |
| }, |
| { |
| "epoch": 6.880841121495327, |
| "grad_norm": 0.0026145544834434986, |
| "learning_rate": 8.19983936915888e-06, |
| "loss": 0.0001, |
| "step": 11780 |
| }, |
| { |
| "epoch": 6.886682242990654, |
| "grad_norm": 0.002694539725780487, |
| "learning_rate": 8.196188668224299e-06, |
| "loss": 0.0001, |
| "step": 11790 |
| }, |
| { |
| "epoch": 6.892523364485982, |
| "grad_norm": 0.0025953687727451324, |
| "learning_rate": 8.19253796728972e-06, |
| "loss": 0.0001, |
| "step": 11800 |
| }, |
| { |
| "epoch": 6.8983644859813085, |
| "grad_norm": 0.002524446463212371, |
| "learning_rate": 8.188887266355141e-06, |
| "loss": 0.0001, |
| "step": 11810 |
| }, |
| { |
| "epoch": 6.904205607476635, |
| "grad_norm": 0.0022692338097840548, |
| "learning_rate": 8.185236565420562e-06, |
| "loss": 0.0001, |
| "step": 11820 |
| }, |
| { |
| "epoch": 6.910046728971962, |
| "grad_norm": 0.002503743628039956, |
| "learning_rate": 8.181585864485982e-06, |
| "loss": 0.0001, |
| "step": 11830 |
| }, |
| { |
| "epoch": 6.91588785046729, |
| "grad_norm": 0.0026293403934687376, |
| "learning_rate": 8.177935163551402e-06, |
| "loss": 0.0001, |
| "step": 11840 |
| }, |
| { |
| "epoch": 6.921728971962617, |
| "grad_norm": 0.002107327338308096, |
| "learning_rate": 8.174284462616823e-06, |
| "loss": 0.0001, |
| "step": 11850 |
| }, |
| { |
| "epoch": 6.927570093457944, |
| "grad_norm": 0.002371525624766946, |
| "learning_rate": 8.170633761682245e-06, |
| "loss": 0.0001, |
| "step": 11860 |
| }, |
| { |
| "epoch": 6.933411214953271, |
| "grad_norm": 0.002208322286605835, |
| "learning_rate": 8.166983060747665e-06, |
| "loss": 0.0001, |
| "step": 11870 |
| }, |
| { |
| "epoch": 6.9392523364485985, |
| "grad_norm": 0.002092135837301612, |
| "learning_rate": 8.163332359813083e-06, |
| "loss": 0.0001, |
| "step": 11880 |
| }, |
| { |
| "epoch": 6.945093457943925, |
| "grad_norm": 0.00219535268843174, |
| "learning_rate": 8.159681658878505e-06, |
| "loss": 0.0001, |
| "step": 11890 |
| }, |
| { |
| "epoch": 6.950934579439252, |
| "grad_norm": 0.0020399116910994053, |
| "learning_rate": 8.156030957943926e-06, |
| "loss": 0.0001, |
| "step": 11900 |
| }, |
| { |
| "epoch": 6.956775700934579, |
| "grad_norm": 0.0020822633523494005, |
| "learning_rate": 8.152380257009346e-06, |
| "loss": 0.0001, |
| "step": 11910 |
| }, |
| { |
| "epoch": 6.962616822429906, |
| "grad_norm": 0.0020946068689227104, |
| "learning_rate": 8.148729556074766e-06, |
| "loss": 0.0001, |
| "step": 11920 |
| }, |
| { |
| "epoch": 6.968457943925234, |
| "grad_norm": 0.0019803382456302643, |
| "learning_rate": 8.145078855140187e-06, |
| "loss": 0.0001, |
| "step": 11930 |
| }, |
| { |
| "epoch": 6.974299065420561, |
| "grad_norm": 0.0021050155628472567, |
| "learning_rate": 8.141428154205609e-06, |
| "loss": 0.0001, |
| "step": 11940 |
| }, |
| { |
| "epoch": 6.980140186915888, |
| "grad_norm": 2.105935573577881, |
| "learning_rate": 8.137777453271029e-06, |
| "loss": 0.0058, |
| "step": 11950 |
| }, |
| { |
| "epoch": 6.9859813084112155, |
| "grad_norm": 0.0022007932420819998, |
| "learning_rate": 8.13412675233645e-06, |
| "loss": 0.0001, |
| "step": 11960 |
| }, |
| { |
| "epoch": 6.991822429906542, |
| "grad_norm": 0.015978222712874413, |
| "learning_rate": 8.13047605140187e-06, |
| "loss": 0.0001, |
| "step": 11970 |
| }, |
| { |
| "epoch": 6.997663551401869, |
| "grad_norm": 0.0019430032698437572, |
| "learning_rate": 8.12682535046729e-06, |
| "loss": 0.0006, |
| "step": 11980 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_f1": 0.9993941229930324, |
| "eval_fbeta": 0.9993940429930388, |
| "eval_loss": 0.0036255475133657455, |
| "eval_precision": 0.9993941229930324, |
| "eval_recall": 0.9993941229930324, |
| "eval_runtime": 171.0479, |
| "eval_samples_per_second": 68.618, |
| "eval_steps_per_second": 8.582, |
| "step": 11984 |
| }, |
| { |
| "epoch": 7.003504672897196, |
| "grad_norm": 0.0021681012585759163, |
| "learning_rate": 8.123174649532712e-06, |
| "loss": 0.0001, |
| "step": 11990 |
| }, |
| { |
| "epoch": 7.009345794392523, |
| "grad_norm": 0.0019255972001701593, |
| "learning_rate": 8.11952394859813e-06, |
| "loss": 0.0001, |
| "step": 12000 |
| }, |
| { |
| "epoch": 7.015186915887851, |
| "grad_norm": 0.0021150237880647182, |
| "learning_rate": 8.115873247663551e-06, |
| "loss": 0.0001, |
| "step": 12010 |
| }, |
| { |
| "epoch": 7.021028037383178, |
| "grad_norm": 0.0019581643864512444, |
| "learning_rate": 8.112222546728973e-06, |
| "loss": 0.0001, |
| "step": 12020 |
| }, |
| { |
| "epoch": 7.026869158878505, |
| "grad_norm": 0.0018600919283926487, |
| "learning_rate": 8.108571845794393e-06, |
| "loss": 0.0001, |
| "step": 12030 |
| }, |
| { |
| "epoch": 7.0327102803738315, |
| "grad_norm": 0.002059732563793659, |
| "learning_rate": 8.104921144859814e-06, |
| "loss": 0.0001, |
| "step": 12040 |
| }, |
| { |
| "epoch": 7.038551401869159, |
| "grad_norm": 0.002076375298202038, |
| "learning_rate": 8.101270443925234e-06, |
| "loss": 0.0001, |
| "step": 12050 |
| }, |
| { |
| "epoch": 7.044392523364486, |
| "grad_norm": 0.0031956476159393787, |
| "learning_rate": 8.097619742990654e-06, |
| "loss": 0.0181, |
| "step": 12060 |
| }, |
| { |
| "epoch": 7.050233644859813, |
| "grad_norm": 0.0022973825689405203, |
| "learning_rate": 8.093969042056076e-06, |
| "loss": 0.0091, |
| "step": 12070 |
| }, |
| { |
| "epoch": 7.05607476635514, |
| "grad_norm": 0.0018713580211624503, |
| "learning_rate": 8.090318341121497e-06, |
| "loss": 0.0001, |
| "step": 12080 |
| }, |
| { |
| "epoch": 7.061915887850467, |
| "grad_norm": 0.002722959266975522, |
| "learning_rate": 8.086667640186917e-06, |
| "loss": 0.0001, |
| "step": 12090 |
| }, |
| { |
| "epoch": 7.067757009345795, |
| "grad_norm": 0.002272659447044134, |
| "learning_rate": 8.083016939252337e-06, |
| "loss": 0.0001, |
| "step": 12100 |
| }, |
| { |
| "epoch": 7.0735981308411215, |
| "grad_norm": 0.0020739021711051464, |
| "learning_rate": 8.079366238317757e-06, |
| "loss": 0.0001, |
| "step": 12110 |
| }, |
| { |
| "epoch": 7.079439252336448, |
| "grad_norm": 0.0021052819211035967, |
| "learning_rate": 8.075715537383178e-06, |
| "loss": 0.0001, |
| "step": 12120 |
| }, |
| { |
| "epoch": 7.085280373831775, |
| "grad_norm": 0.0022176839411258698, |
| "learning_rate": 8.072064836448598e-06, |
| "loss": 0.0517, |
| "step": 12130 |
| }, |
| { |
| "epoch": 7.091121495327103, |
| "grad_norm": 0.014300468377768993, |
| "learning_rate": 8.068414135514018e-06, |
| "loss": 0.0005, |
| "step": 12140 |
| }, |
| { |
| "epoch": 7.09696261682243, |
| "grad_norm": 0.014064386487007141, |
| "learning_rate": 8.06476343457944e-06, |
| "loss": 0.001, |
| "step": 12150 |
| }, |
| { |
| "epoch": 7.102803738317757, |
| "grad_norm": 0.0048030661419034, |
| "learning_rate": 8.06111273364486e-06, |
| "loss": 0.0003, |
| "step": 12160 |
| }, |
| { |
| "epoch": 7.108644859813084, |
| "grad_norm": 0.003011312335729599, |
| "learning_rate": 8.057462032710281e-06, |
| "loss": 0.0001, |
| "step": 12170 |
| }, |
| { |
| "epoch": 7.114485981308412, |
| "grad_norm": 0.002865664893761277, |
| "learning_rate": 8.053811331775701e-06, |
| "loss": 0.0001, |
| "step": 12180 |
| }, |
| { |
| "epoch": 7.1203271028037385, |
| "grad_norm": 3.0432324409484863, |
| "learning_rate": 8.050160630841122e-06, |
| "loss": 0.0423, |
| "step": 12190 |
| }, |
| { |
| "epoch": 7.126168224299065, |
| "grad_norm": 0.05281701311469078, |
| "learning_rate": 8.046509929906544e-06, |
| "loss": 0.0005, |
| "step": 12200 |
| }, |
| { |
| "epoch": 7.132009345794392, |
| "grad_norm": 0.016092179343104362, |
| "learning_rate": 8.042859228971964e-06, |
| "loss": 0.0008, |
| "step": 12210 |
| }, |
| { |
| "epoch": 7.13785046728972, |
| "grad_norm": 0.006236253771930933, |
| "learning_rate": 8.039208528037384e-06, |
| "loss": 0.0003, |
| "step": 12220 |
| }, |
| { |
| "epoch": 7.143691588785047, |
| "grad_norm": 0.004007366020232439, |
| "learning_rate": 8.035557827102805e-06, |
| "loss": 0.0002, |
| "step": 12230 |
| }, |
| { |
| "epoch": 7.149532710280374, |
| "grad_norm": 0.0033751898445189, |
| "learning_rate": 8.031907126168225e-06, |
| "loss": 0.0001, |
| "step": 12240 |
| }, |
| { |
| "epoch": 7.155373831775701, |
| "grad_norm": 0.0023680857848376036, |
| "learning_rate": 8.028256425233645e-06, |
| "loss": 0.0001, |
| "step": 12250 |
| }, |
| { |
| "epoch": 7.161214953271028, |
| "grad_norm": 0.0029950051102787256, |
| "learning_rate": 8.024605724299066e-06, |
| "loss": 0.0001, |
| "step": 12260 |
| }, |
| { |
| "epoch": 7.167056074766355, |
| "grad_norm": 0.002707561943680048, |
| "learning_rate": 8.020955023364486e-06, |
| "loss": 0.0001, |
| "step": 12270 |
| }, |
| { |
| "epoch": 7.172897196261682, |
| "grad_norm": 0.002163573633879423, |
| "learning_rate": 8.017304322429908e-06, |
| "loss": 0.0001, |
| "step": 12280 |
| }, |
| { |
| "epoch": 7.178738317757009, |
| "grad_norm": 0.0022281715646386147, |
| "learning_rate": 8.013653621495328e-06, |
| "loss": 0.0001, |
| "step": 12290 |
| }, |
| { |
| "epoch": 7.184579439252336, |
| "grad_norm": 0.002539695706218481, |
| "learning_rate": 8.010002920560748e-06, |
| "loss": 0.0001, |
| "step": 12300 |
| }, |
| { |
| "epoch": 7.190420560747664, |
| "grad_norm": 0.0019248753087595105, |
| "learning_rate": 8.006352219626169e-06, |
| "loss": 0.0001, |
| "step": 12310 |
| }, |
| { |
| "epoch": 7.196261682242991, |
| "grad_norm": 0.0019189286977052689, |
| "learning_rate": 8.002701518691589e-06, |
| "loss": 0.0005, |
| "step": 12320 |
| }, |
| { |
| "epoch": 7.202102803738318, |
| "grad_norm": 0.0019058303441852331, |
| "learning_rate": 7.999050817757011e-06, |
| "loss": 0.0001, |
| "step": 12330 |
| }, |
| { |
| "epoch": 7.207943925233645, |
| "grad_norm": 0.0016836983850225806, |
| "learning_rate": 7.99540011682243e-06, |
| "loss": 0.0001, |
| "step": 12340 |
| }, |
| { |
| "epoch": 7.213785046728972, |
| "grad_norm": 0.0029658398125320673, |
| "learning_rate": 7.99174941588785e-06, |
| "loss": 0.0001, |
| "step": 12350 |
| }, |
| { |
| "epoch": 7.219626168224299, |
| "grad_norm": 0.0017996998503804207, |
| "learning_rate": 7.988098714953272e-06, |
| "loss": 0.0001, |
| "step": 12360 |
| }, |
| { |
| "epoch": 7.225467289719626, |
| "grad_norm": 0.0019612801261246204, |
| "learning_rate": 7.984448014018692e-06, |
| "loss": 0.0001, |
| "step": 12370 |
| }, |
| { |
| "epoch": 7.231308411214953, |
| "grad_norm": 0.0017387006664648652, |
| "learning_rate": 7.980797313084113e-06, |
| "loss": 0.0001, |
| "step": 12380 |
| }, |
| { |
| "epoch": 7.23714953271028, |
| "grad_norm": 0.0015903054736554623, |
| "learning_rate": 7.977146612149533e-06, |
| "loss": 0.0001, |
| "step": 12390 |
| }, |
| { |
| "epoch": 7.242990654205608, |
| "grad_norm": 0.0018440884305164218, |
| "learning_rate": 7.973495911214953e-06, |
| "loss": 0.0001, |
| "step": 12400 |
| }, |
| { |
| "epoch": 7.248831775700935, |
| "grad_norm": 0.0016087364638224244, |
| "learning_rate": 7.969845210280375e-06, |
| "loss": 0.0001, |
| "step": 12410 |
| }, |
| { |
| "epoch": 7.2546728971962615, |
| "grad_norm": 0.0017136124661192298, |
| "learning_rate": 7.966194509345796e-06, |
| "loss": 0.0001, |
| "step": 12420 |
| }, |
| { |
| "epoch": 7.260514018691588, |
| "grad_norm": 0.0014161961153149605, |
| "learning_rate": 7.962543808411216e-06, |
| "loss": 0.0001, |
| "step": 12430 |
| }, |
| { |
| "epoch": 7.266355140186916, |
| "grad_norm": 0.0017160034039989114, |
| "learning_rate": 7.958893107476636e-06, |
| "loss": 0.0001, |
| "step": 12440 |
| }, |
| { |
| "epoch": 7.272196261682243, |
| "grad_norm": 0.0016139451181516051, |
| "learning_rate": 7.955242406542057e-06, |
| "loss": 0.0175, |
| "step": 12450 |
| }, |
| { |
| "epoch": 7.27803738317757, |
| "grad_norm": 0.0016056919703260064, |
| "learning_rate": 7.951591705607477e-06, |
| "loss": 0.0001, |
| "step": 12460 |
| }, |
| { |
| "epoch": 7.283878504672897, |
| "grad_norm": 0.0014629984507337213, |
| "learning_rate": 7.947941004672897e-06, |
| "loss": 0.0, |
| "step": 12470 |
| }, |
| { |
| "epoch": 7.289719626168225, |
| "grad_norm": 0.0015402857679873705, |
| "learning_rate": 7.944290303738318e-06, |
| "loss": 0.0, |
| "step": 12480 |
| }, |
| { |
| "epoch": 7.295560747663552, |
| "grad_norm": 0.001538011827506125, |
| "learning_rate": 7.94063960280374e-06, |
| "loss": 0.0, |
| "step": 12490 |
| }, |
| { |
| "epoch": 7.3014018691588785, |
| "grad_norm": 0.0017495568608865142, |
| "learning_rate": 7.93698890186916e-06, |
| "loss": 0.0001, |
| "step": 12500 |
| }, |
| { |
| "epoch": 7.307242990654205, |
| "grad_norm": 0.0013630022294819355, |
| "learning_rate": 7.93333820093458e-06, |
| "loss": 0.0, |
| "step": 12510 |
| }, |
| { |
| "epoch": 7.313084112149532, |
| "grad_norm": 0.0013937480980530381, |
| "learning_rate": 7.9296875e-06, |
| "loss": 0.0, |
| "step": 12520 |
| }, |
| { |
| "epoch": 7.31892523364486, |
| "grad_norm": 0.0014540269039571285, |
| "learning_rate": 7.92603679906542e-06, |
| "loss": 0.0, |
| "step": 12530 |
| }, |
| { |
| "epoch": 7.324766355140187, |
| "grad_norm": 0.0016073896549642086, |
| "learning_rate": 7.922386098130843e-06, |
| "loss": 0.0, |
| "step": 12540 |
| }, |
| { |
| "epoch": 7.330607476635514, |
| "grad_norm": 0.0013543206732720137, |
| "learning_rate": 7.918735397196261e-06, |
| "loss": 0.0, |
| "step": 12550 |
| }, |
| { |
| "epoch": 7.336448598130841, |
| "grad_norm": 0.00147124077193439, |
| "learning_rate": 7.915084696261683e-06, |
| "loss": 0.0, |
| "step": 12560 |
| }, |
| { |
| "epoch": 7.3422897196261685, |
| "grad_norm": 0.001399431494064629, |
| "learning_rate": 7.911433995327104e-06, |
| "loss": 0.0, |
| "step": 12570 |
| }, |
| { |
| "epoch": 7.348130841121495, |
| "grad_norm": 0.0013303236337378621, |
| "learning_rate": 7.907783294392524e-06, |
| "loss": 0.0418, |
| "step": 12580 |
| }, |
| { |
| "epoch": 7.353971962616822, |
| "grad_norm": 0.00885701458901167, |
| "learning_rate": 7.904132593457944e-06, |
| "loss": 0.0001, |
| "step": 12590 |
| }, |
| { |
| "epoch": 7.359813084112149, |
| "grad_norm": 0.005552555434405804, |
| "learning_rate": 7.900481892523365e-06, |
| "loss": 0.0139, |
| "step": 12600 |
| }, |
| { |
| "epoch": 7.365654205607477, |
| "grad_norm": 0.001586294500157237, |
| "learning_rate": 7.896831191588785e-06, |
| "loss": 0.0001, |
| "step": 12610 |
| }, |
| { |
| "epoch": 7.371495327102804, |
| "grad_norm": 0.001655551022849977, |
| "learning_rate": 7.893180490654207e-06, |
| "loss": 0.0001, |
| "step": 12620 |
| }, |
| { |
| "epoch": 7.377336448598131, |
| "grad_norm": 0.0016032133717089891, |
| "learning_rate": 7.889529789719627e-06, |
| "loss": 0.0001, |
| "step": 12630 |
| }, |
| { |
| "epoch": 7.383177570093458, |
| "grad_norm": 0.0013974695466458797, |
| "learning_rate": 7.885879088785048e-06, |
| "loss": 0.0001, |
| "step": 12640 |
| }, |
| { |
| "epoch": 7.389018691588785, |
| "grad_norm": 0.0015485621988773346, |
| "learning_rate": 7.882228387850468e-06, |
| "loss": 0.0001, |
| "step": 12650 |
| }, |
| { |
| "epoch": 7.394859813084112, |
| "grad_norm": 0.0014567647594958544, |
| "learning_rate": 7.878577686915888e-06, |
| "loss": 0.0, |
| "step": 12660 |
| }, |
| { |
| "epoch": 7.400700934579439, |
| "grad_norm": 0.0017351839924231172, |
| "learning_rate": 7.874926985981309e-06, |
| "loss": 0.0062, |
| "step": 12670 |
| }, |
| { |
| "epoch": 7.406542056074766, |
| "grad_norm": 0.005675299558788538, |
| "learning_rate": 7.871276285046729e-06, |
| "loss": 0.0001, |
| "step": 12680 |
| }, |
| { |
| "epoch": 7.412383177570094, |
| "grad_norm": 0.002098724478855729, |
| "learning_rate": 7.867625584112151e-06, |
| "loss": 0.0006, |
| "step": 12690 |
| }, |
| { |
| "epoch": 7.418224299065421, |
| "grad_norm": 0.001393609563820064, |
| "learning_rate": 7.863974883177571e-06, |
| "loss": 0.0, |
| "step": 12700 |
| }, |
| { |
| "epoch": 7.424065420560748, |
| "grad_norm": 0.0014116641832515597, |
| "learning_rate": 7.860324182242992e-06, |
| "loss": 0.0, |
| "step": 12710 |
| }, |
| { |
| "epoch": 7.429906542056075, |
| "grad_norm": 0.0013138065114617348, |
| "learning_rate": 7.856673481308412e-06, |
| "loss": 0.0, |
| "step": 12720 |
| }, |
| { |
| "epoch": 7.4357476635514015, |
| "grad_norm": 0.0014055016217753291, |
| "learning_rate": 7.853022780373832e-06, |
| "loss": 0.0, |
| "step": 12730 |
| }, |
| { |
| "epoch": 7.441588785046729, |
| "grad_norm": 0.001359453541226685, |
| "learning_rate": 7.849372079439252e-06, |
| "loss": 0.0, |
| "step": 12740 |
| }, |
| { |
| "epoch": 7.447429906542056, |
| "grad_norm": 0.0013050459092482924, |
| "learning_rate": 7.845721378504674e-06, |
| "loss": 0.0001, |
| "step": 12750 |
| }, |
| { |
| "epoch": 7.453271028037383, |
| "grad_norm": 0.0012936226557940245, |
| "learning_rate": 7.842070677570093e-06, |
| "loss": 0.0, |
| "step": 12760 |
| }, |
| { |
| "epoch": 7.45911214953271, |
| "grad_norm": 0.0012887063203379512, |
| "learning_rate": 7.838419976635515e-06, |
| "loss": 0.0, |
| "step": 12770 |
| }, |
| { |
| "epoch": 7.464953271028038, |
| "grad_norm": 0.0012247867416590452, |
| "learning_rate": 7.834769275700935e-06, |
| "loss": 0.0, |
| "step": 12780 |
| }, |
| { |
| "epoch": 7.470794392523365, |
| "grad_norm": 0.0012418876867741346, |
| "learning_rate": 7.831118574766356e-06, |
| "loss": 0.0, |
| "step": 12790 |
| }, |
| { |
| "epoch": 7.4766355140186915, |
| "grad_norm": 0.0011055973591282964, |
| "learning_rate": 7.827467873831776e-06, |
| "loss": 0.0, |
| "step": 12800 |
| }, |
| { |
| "epoch": 7.482476635514018, |
| "grad_norm": 0.0011922491248697042, |
| "learning_rate": 7.823817172897196e-06, |
| "loss": 0.0, |
| "step": 12810 |
| }, |
| { |
| "epoch": 7.488317757009346, |
| "grad_norm": 0.0012404808076098561, |
| "learning_rate": 7.820166471962617e-06, |
| "loss": 0.0, |
| "step": 12820 |
| }, |
| { |
| "epoch": 7.494158878504673, |
| "grad_norm": 0.0012015723623335361, |
| "learning_rate": 7.816515771028039e-06, |
| "loss": 0.0, |
| "step": 12830 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.0010450141271576285, |
| "learning_rate": 7.812865070093459e-06, |
| "loss": 0.0, |
| "step": 12840 |
| }, |
| { |
| "epoch": 7.505841121495327, |
| "grad_norm": 0.041091423481702805, |
| "learning_rate": 7.80921436915888e-06, |
| "loss": 0.0001, |
| "step": 12850 |
| }, |
| { |
| "epoch": 7.511682242990654, |
| "grad_norm": 0.0012605130905285478, |
| "learning_rate": 7.8055636682243e-06, |
| "loss": 0.0, |
| "step": 12860 |
| }, |
| { |
| "epoch": 7.517523364485982, |
| "grad_norm": 0.001104542170651257, |
| "learning_rate": 7.80191296728972e-06, |
| "loss": 0.0, |
| "step": 12870 |
| }, |
| { |
| "epoch": 7.5233644859813085, |
| "grad_norm": 0.0011109106708317995, |
| "learning_rate": 7.79826226635514e-06, |
| "loss": 0.0, |
| "step": 12880 |
| }, |
| { |
| "epoch": 7.529205607476635, |
| "grad_norm": 0.0012586305383592844, |
| "learning_rate": 7.79461156542056e-06, |
| "loss": 0.0, |
| "step": 12890 |
| }, |
| { |
| "epoch": 7.535046728971962, |
| "grad_norm": 0.0010842509800568223, |
| "learning_rate": 7.790960864485983e-06, |
| "loss": 0.0, |
| "step": 12900 |
| }, |
| { |
| "epoch": 7.54088785046729, |
| "grad_norm": 0.0011715622385963798, |
| "learning_rate": 7.787310163551403e-06, |
| "loss": 0.0, |
| "step": 12910 |
| }, |
| { |
| "epoch": 7.546728971962617, |
| "grad_norm": 0.0011943539138883352, |
| "learning_rate": 7.783659462616823e-06, |
| "loss": 0.0, |
| "step": 12920 |
| }, |
| { |
| "epoch": 7.552570093457944, |
| "grad_norm": 0.0012001136783510447, |
| "learning_rate": 7.780008761682243e-06, |
| "loss": 0.0, |
| "step": 12930 |
| }, |
| { |
| "epoch": 7.558411214953271, |
| "grad_norm": 0.0010596492793411016, |
| "learning_rate": 7.776358060747664e-06, |
| "loss": 0.0, |
| "step": 12940 |
| }, |
| { |
| "epoch": 7.5642523364485985, |
| "grad_norm": 0.0010550218867138028, |
| "learning_rate": 7.772707359813084e-06, |
| "loss": 0.0001, |
| "step": 12950 |
| }, |
| { |
| "epoch": 7.570093457943925, |
| "grad_norm": 0.13915540277957916, |
| "learning_rate": 7.769056658878506e-06, |
| "loss": 0.0508, |
| "step": 12960 |
| }, |
| { |
| "epoch": 7.575934579439252, |
| "grad_norm": 0.010561136528849602, |
| "learning_rate": 7.765405957943926e-06, |
| "loss": 0.0004, |
| "step": 12970 |
| }, |
| { |
| "epoch": 7.581775700934579, |
| "grad_norm": 0.006983012892305851, |
| "learning_rate": 7.761755257009347e-06, |
| "loss": 0.0003, |
| "step": 12980 |
| }, |
| { |
| "epoch": 7.587616822429906, |
| "grad_norm": 0.006389088463038206, |
| "learning_rate": 7.758104556074767e-06, |
| "loss": 0.0002, |
| "step": 12990 |
| }, |
| { |
| "epoch": 7.593457943925234, |
| "grad_norm": 0.0036603384651243687, |
| "learning_rate": 7.754453855140187e-06, |
| "loss": 0.0133, |
| "step": 13000 |
| }, |
| { |
| "epoch": 7.599299065420561, |
| "grad_norm": 0.001919433125294745, |
| "learning_rate": 7.750803154205608e-06, |
| "loss": 0.0001, |
| "step": 13010 |
| }, |
| { |
| "epoch": 7.605140186915888, |
| "grad_norm": 0.0013452052371576428, |
| "learning_rate": 7.747152453271028e-06, |
| "loss": 0.0001, |
| "step": 13020 |
| }, |
| { |
| "epoch": 7.6109813084112155, |
| "grad_norm": 0.0012721661478281021, |
| "learning_rate": 7.74350175233645e-06, |
| "loss": 0.0, |
| "step": 13030 |
| }, |
| { |
| "epoch": 7.616822429906542, |
| "grad_norm": 0.0015792486956343055, |
| "learning_rate": 7.73985105140187e-06, |
| "loss": 0.0005, |
| "step": 13040 |
| }, |
| { |
| "epoch": 7.622663551401869, |
| "grad_norm": 0.0015665246173739433, |
| "learning_rate": 7.73620035046729e-06, |
| "loss": 0.0, |
| "step": 13050 |
| }, |
| { |
| "epoch": 7.628504672897196, |
| "grad_norm": 0.0012903015594929457, |
| "learning_rate": 7.732549649532711e-06, |
| "loss": 0.0, |
| "step": 13060 |
| }, |
| { |
| "epoch": 7.634345794392523, |
| "grad_norm": 0.0014493002090603113, |
| "learning_rate": 7.728898948598131e-06, |
| "loss": 0.0, |
| "step": 13070 |
| }, |
| { |
| "epoch": 7.640186915887851, |
| "grad_norm": 0.0013346703490242362, |
| "learning_rate": 7.725248247663552e-06, |
| "loss": 0.0, |
| "step": 13080 |
| }, |
| { |
| "epoch": 7.646028037383178, |
| "grad_norm": 0.0014051039470359683, |
| "learning_rate": 7.721597546728974e-06, |
| "loss": 0.0, |
| "step": 13090 |
| }, |
| { |
| "epoch": 7.651869158878505, |
| "grad_norm": 0.0015306697459891438, |
| "learning_rate": 7.717946845794392e-06, |
| "loss": 0.0, |
| "step": 13100 |
| }, |
| { |
| "epoch": 7.6577102803738315, |
| "grad_norm": 0.001278238371014595, |
| "learning_rate": 7.714296144859814e-06, |
| "loss": 0.0, |
| "step": 13110 |
| }, |
| { |
| "epoch": 7.663551401869158, |
| "grad_norm": 0.0012969443341717124, |
| "learning_rate": 7.710645443925235e-06, |
| "loss": 0.0004, |
| "step": 13120 |
| }, |
| { |
| "epoch": 7.669392523364486, |
| "grad_norm": 0.002551996847614646, |
| "learning_rate": 7.706994742990655e-06, |
| "loss": 0.0001, |
| "step": 13130 |
| }, |
| { |
| "epoch": 7.675233644859813, |
| "grad_norm": 0.0014118566177785397, |
| "learning_rate": 7.703344042056075e-06, |
| "loss": 0.003, |
| "step": 13140 |
| }, |
| { |
| "epoch": 7.68107476635514, |
| "grad_norm": 0.001230996917001903, |
| "learning_rate": 7.699693341121495e-06, |
| "loss": 0.0, |
| "step": 13150 |
| }, |
| { |
| "epoch": 7.686915887850468, |
| "grad_norm": 0.0011307065142318606, |
| "learning_rate": 7.696042640186917e-06, |
| "loss": 0.0, |
| "step": 13160 |
| }, |
| { |
| "epoch": 7.692757009345795, |
| "grad_norm": 0.001138526014983654, |
| "learning_rate": 7.692391939252338e-06, |
| "loss": 0.0, |
| "step": 13170 |
| }, |
| { |
| "epoch": 7.6985981308411215, |
| "grad_norm": 0.0011980440467596054, |
| "learning_rate": 7.688741238317758e-06, |
| "loss": 0.0, |
| "step": 13180 |
| }, |
| { |
| "epoch": 7.704439252336448, |
| "grad_norm": 19.9531192779541, |
| "learning_rate": 7.685090537383178e-06, |
| "loss": 0.0048, |
| "step": 13190 |
| }, |
| { |
| "epoch": 7.710280373831775, |
| "grad_norm": 0.0011443004477769136, |
| "learning_rate": 7.681439836448599e-06, |
| "loss": 0.0, |
| "step": 13200 |
| }, |
| { |
| "epoch": 7.716121495327103, |
| "grad_norm": 0.0011824540561065078, |
| "learning_rate": 7.677789135514019e-06, |
| "loss": 0.0, |
| "step": 13210 |
| }, |
| { |
| "epoch": 7.72196261682243, |
| "grad_norm": 0.0012346376897767186, |
| "learning_rate": 7.67413843457944e-06, |
| "loss": 0.0, |
| "step": 13220 |
| }, |
| { |
| "epoch": 7.727803738317757, |
| "grad_norm": 0.0013738384004682302, |
| "learning_rate": 7.67048773364486e-06, |
| "loss": 0.0, |
| "step": 13230 |
| }, |
| { |
| "epoch": 7.733644859813084, |
| "grad_norm": 0.0012500376906245947, |
| "learning_rate": 7.666837032710282e-06, |
| "loss": 0.0, |
| "step": 13240 |
| }, |
| { |
| "epoch": 7.739485981308412, |
| "grad_norm": 0.0011344078229740262, |
| "learning_rate": 7.663186331775702e-06, |
| "loss": 0.0, |
| "step": 13250 |
| }, |
| { |
| "epoch": 7.7453271028037385, |
| "grad_norm": 0.0011888087028637528, |
| "learning_rate": 7.659535630841122e-06, |
| "loss": 0.0312, |
| "step": 13260 |
| }, |
| { |
| "epoch": 7.751168224299065, |
| "grad_norm": 0.0011277446756139398, |
| "learning_rate": 7.655884929906543e-06, |
| "loss": 0.0, |
| "step": 13270 |
| }, |
| { |
| "epoch": 7.757009345794392, |
| "grad_norm": 0.001008135499432683, |
| "learning_rate": 7.652234228971963e-06, |
| "loss": 0.0, |
| "step": 13280 |
| }, |
| { |
| "epoch": 7.76285046728972, |
| "grad_norm": 0.001450374722480774, |
| "learning_rate": 7.648583528037385e-06, |
| "loss": 0.0309, |
| "step": 13290 |
| }, |
| { |
| "epoch": 7.768691588785047, |
| "grad_norm": 0.0015170661499723792, |
| "learning_rate": 7.644932827102805e-06, |
| "loss": 0.0001, |
| "step": 13300 |
| }, |
| { |
| "epoch": 7.774532710280374, |
| "grad_norm": 0.0019507072865962982, |
| "learning_rate": 7.641282126168224e-06, |
| "loss": 0.0001, |
| "step": 13310 |
| }, |
| { |
| "epoch": 7.780373831775701, |
| "grad_norm": 0.0026334829162806273, |
| "learning_rate": 7.637631425233646e-06, |
| "loss": 0.0002, |
| "step": 13320 |
| }, |
| { |
| "epoch": 7.786214953271028, |
| "grad_norm": 0.0013415843714028597, |
| "learning_rate": 7.633980724299066e-06, |
| "loss": 0.0, |
| "step": 13330 |
| }, |
| { |
| "epoch": 7.792056074766355, |
| "grad_norm": 0.0012233871966600418, |
| "learning_rate": 7.630330023364487e-06, |
| "loss": 0.0, |
| "step": 13340 |
| }, |
| { |
| "epoch": 7.797897196261682, |
| "grad_norm": 0.0008745313389226794, |
| "learning_rate": 7.626679322429908e-06, |
| "loss": 0.0, |
| "step": 13350 |
| }, |
| { |
| "epoch": 7.803738317757009, |
| "grad_norm": 0.002569309901446104, |
| "learning_rate": 7.623028621495327e-06, |
| "loss": 0.0, |
| "step": 13360 |
| }, |
| { |
| "epoch": 7.809579439252336, |
| "grad_norm": 0.0009451903752051294, |
| "learning_rate": 7.619377920560749e-06, |
| "loss": 0.0, |
| "step": 13370 |
| }, |
| { |
| "epoch": 7.815420560747664, |
| "grad_norm": 0.0009761240216903389, |
| "learning_rate": 7.615727219626169e-06, |
| "loss": 0.0, |
| "step": 13380 |
| }, |
| { |
| "epoch": 7.821261682242991, |
| "grad_norm": 0.0009837980614975095, |
| "learning_rate": 7.612076518691589e-06, |
| "loss": 0.0, |
| "step": 13390 |
| }, |
| { |
| "epoch": 7.827102803738318, |
| "grad_norm": 0.001016711932606995, |
| "learning_rate": 7.60842581775701e-06, |
| "loss": 0.0, |
| "step": 13400 |
| }, |
| { |
| "epoch": 7.832943925233645, |
| "grad_norm": 0.0009571296395733953, |
| "learning_rate": 7.60477511682243e-06, |
| "loss": 0.0, |
| "step": 13410 |
| }, |
| { |
| "epoch": 7.838785046728972, |
| "grad_norm": 0.0009453770471736789, |
| "learning_rate": 7.601124415887851e-06, |
| "loss": 0.0, |
| "step": 13420 |
| }, |
| { |
| "epoch": 7.844626168224299, |
| "grad_norm": 0.001051238621585071, |
| "learning_rate": 7.597473714953272e-06, |
| "loss": 0.0, |
| "step": 13430 |
| }, |
| { |
| "epoch": 7.850467289719626, |
| "grad_norm": 0.0011488308664411306, |
| "learning_rate": 7.593823014018692e-06, |
| "loss": 0.0, |
| "step": 13440 |
| }, |
| { |
| "epoch": 7.856308411214953, |
| "grad_norm": 0.0007586100255139172, |
| "learning_rate": 7.590172313084113e-06, |
| "loss": 0.0, |
| "step": 13450 |
| }, |
| { |
| "epoch": 7.86214953271028, |
| "grad_norm": 0.0008220048621296883, |
| "learning_rate": 7.586521612149534e-06, |
| "loss": 0.0, |
| "step": 13460 |
| }, |
| { |
| "epoch": 7.867990654205608, |
| "grad_norm": 0.0009259909274987876, |
| "learning_rate": 7.582870911214953e-06, |
| "loss": 0.0, |
| "step": 13470 |
| }, |
| { |
| "epoch": 7.873831775700935, |
| "grad_norm": 0.0010616907384246588, |
| "learning_rate": 7.579220210280374e-06, |
| "loss": 0.0, |
| "step": 13480 |
| }, |
| { |
| "epoch": 7.8796728971962615, |
| "grad_norm": 0.0008421916863881052, |
| "learning_rate": 7.575569509345795e-06, |
| "loss": 0.0, |
| "step": 13490 |
| }, |
| { |
| "epoch": 7.885514018691588, |
| "grad_norm": 0.0009183556539937854, |
| "learning_rate": 7.571918808411216e-06, |
| "loss": 0.0, |
| "step": 13500 |
| }, |
| { |
| "epoch": 7.891355140186916, |
| "grad_norm": 0.0010863002389669418, |
| "learning_rate": 7.568268107476636e-06, |
| "loss": 0.0, |
| "step": 13510 |
| }, |
| { |
| "epoch": 7.897196261682243, |
| "grad_norm": 0.0010933643206954002, |
| "learning_rate": 7.564617406542056e-06, |
| "loss": 0.0, |
| "step": 13520 |
| }, |
| { |
| "epoch": 7.90303738317757, |
| "grad_norm": 0.0010127710411325097, |
| "learning_rate": 7.5609667056074776e-06, |
| "loss": 0.0, |
| "step": 13530 |
| }, |
| { |
| "epoch": 7.908878504672897, |
| "grad_norm": 0.00087543431436643, |
| "learning_rate": 7.557316004672898e-06, |
| "loss": 0.0, |
| "step": 13540 |
| }, |
| { |
| "epoch": 7.914719626168225, |
| "grad_norm": 0.0010519116185605526, |
| "learning_rate": 7.553665303738318e-06, |
| "loss": 0.0, |
| "step": 13550 |
| }, |
| { |
| "epoch": 7.920560747663552, |
| "grad_norm": 0.000822052825242281, |
| "learning_rate": 7.550014602803739e-06, |
| "loss": 0.0, |
| "step": 13560 |
| }, |
| { |
| "epoch": 7.9264018691588785, |
| "grad_norm": 0.0007104118703864515, |
| "learning_rate": 7.546363901869159e-06, |
| "loss": 0.0, |
| "step": 13570 |
| }, |
| { |
| "epoch": 7.932242990654205, |
| "grad_norm": 0.0008943330030888319, |
| "learning_rate": 7.542713200934581e-06, |
| "loss": 0.0, |
| "step": 13580 |
| }, |
| { |
| "epoch": 7.938084112149532, |
| "grad_norm": 0.0009444963652640581, |
| "learning_rate": 7.5390625e-06, |
| "loss": 0.0, |
| "step": 13590 |
| }, |
| { |
| "epoch": 7.94392523364486, |
| "grad_norm": 0.0008807304548099637, |
| "learning_rate": 7.535411799065421e-06, |
| "loss": 0.0, |
| "step": 13600 |
| }, |
| { |
| "epoch": 7.949766355140187, |
| "grad_norm": 0.0008989177295006812, |
| "learning_rate": 7.531761098130842e-06, |
| "loss": 0.0008, |
| "step": 13610 |
| }, |
| { |
| "epoch": 7.955607476635514, |
| "grad_norm": 0.0010687765898182988, |
| "learning_rate": 7.528110397196262e-06, |
| "loss": 0.0, |
| "step": 13620 |
| }, |
| { |
| "epoch": 7.961448598130842, |
| "grad_norm": 0.0009110852843150496, |
| "learning_rate": 7.524459696261683e-06, |
| "loss": 0.0, |
| "step": 13630 |
| }, |
| { |
| "epoch": 7.9672897196261685, |
| "grad_norm": 0.000878663791809231, |
| "learning_rate": 7.5208089953271036e-06, |
| "loss": 0.0, |
| "step": 13640 |
| }, |
| { |
| "epoch": 7.973130841121495, |
| "grad_norm": 0.0007639786344952881, |
| "learning_rate": 7.517158294392524e-06, |
| "loss": 0.0, |
| "step": 13650 |
| }, |
| { |
| "epoch": 7.978971962616822, |
| "grad_norm": 0.000716277165338397, |
| "learning_rate": 7.513507593457945e-06, |
| "loss": 0.0, |
| "step": 13660 |
| }, |
| { |
| "epoch": 7.984813084112149, |
| "grad_norm": 0.0009262704406864941, |
| "learning_rate": 7.509856892523365e-06, |
| "loss": 0.0, |
| "step": 13670 |
| }, |
| { |
| "epoch": 7.990654205607477, |
| "grad_norm": 0.0009081005700863898, |
| "learning_rate": 7.506206191588785e-06, |
| "loss": 0.0, |
| "step": 13680 |
| }, |
| { |
| "epoch": 7.996495327102804, |
| "grad_norm": 0.0008006028365343809, |
| "learning_rate": 7.502555490654207e-06, |
| "loss": 0.0, |
| "step": 13690 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_f1": 0.9993188526451222, |
| "eval_fbeta": 0.9989105295188826, |
| "eval_loss": 0.004954248666763306, |
| "eval_precision": 0.9986386325820602, |
| "eval_recall": 1.0, |
| "eval_runtime": 170.9133, |
| "eval_samples_per_second": 68.672, |
| "eval_steps_per_second": 8.589, |
| "step": 13696 |
| }, |
| { |
| "epoch": 8.00233644859813, |
| "grad_norm": 0.0008075121440924704, |
| "learning_rate": 7.498904789719626e-06, |
| "loss": 0.0, |
| "step": 13700 |
| }, |
| { |
| "epoch": 8.008177570093459, |
| "grad_norm": 0.0007103482494130731, |
| "learning_rate": 7.4952540887850474e-06, |
| "loss": 0.0, |
| "step": 13710 |
| }, |
| { |
| "epoch": 8.014018691588785, |
| "grad_norm": 0.0007463495712727308, |
| "learning_rate": 7.491603387850468e-06, |
| "loss": 0.0, |
| "step": 13720 |
| }, |
| { |
| "epoch": 8.019859813084112, |
| "grad_norm": 0.000795390282291919, |
| "learning_rate": 7.487952686915888e-06, |
| "loss": 0.0, |
| "step": 13730 |
| }, |
| { |
| "epoch": 8.02570093457944, |
| "grad_norm": 0.0007960695074871182, |
| "learning_rate": 7.484301985981309e-06, |
| "loss": 0.0, |
| "step": 13740 |
| }, |
| { |
| "epoch": 8.031542056074766, |
| "grad_norm": 0.000913925701752305, |
| "learning_rate": 7.4806512850467295e-06, |
| "loss": 0.0493, |
| "step": 13750 |
| }, |
| { |
| "epoch": 8.037383177570094, |
| "grad_norm": 0.0020121552515774965, |
| "learning_rate": 7.477000584112151e-06, |
| "loss": 0.0009, |
| "step": 13760 |
| }, |
| { |
| "epoch": 8.04322429906542, |
| "grad_norm": 0.0016634415369480848, |
| "learning_rate": 7.473349883177571e-06, |
| "loss": 0.0001, |
| "step": 13770 |
| }, |
| { |
| "epoch": 8.049065420560748, |
| "grad_norm": 0.0020532705821096897, |
| "learning_rate": 7.469699182242991e-06, |
| "loss": 0.0001, |
| "step": 13780 |
| }, |
| { |
| "epoch": 8.054906542056075, |
| "grad_norm": 0.0015584519132971764, |
| "learning_rate": 7.4660484813084125e-06, |
| "loss": 0.0, |
| "step": 13790 |
| }, |
| { |
| "epoch": 8.060747663551401, |
| "grad_norm": 0.002475617453455925, |
| "learning_rate": 7.462397780373832e-06, |
| "loss": 0.0244, |
| "step": 13800 |
| }, |
| { |
| "epoch": 8.06658878504673, |
| "grad_norm": 0.0036134568508714437, |
| "learning_rate": 7.458747079439252e-06, |
| "loss": 0.0001, |
| "step": 13810 |
| }, |
| { |
| "epoch": 8.072429906542055, |
| "grad_norm": 0.016877055168151855, |
| "learning_rate": 7.4550963785046734e-06, |
| "loss": 0.0003, |
| "step": 13820 |
| }, |
| { |
| "epoch": 8.078271028037383, |
| "grad_norm": 0.0013811654644086957, |
| "learning_rate": 7.451445677570094e-06, |
| "loss": 0.0002, |
| "step": 13830 |
| }, |
| { |
| "epoch": 8.08411214953271, |
| "grad_norm": 0.017157340422272682, |
| "learning_rate": 7.447794976635515e-06, |
| "loss": 0.0001, |
| "step": 13840 |
| }, |
| { |
| "epoch": 8.089953271028037, |
| "grad_norm": 0.0008855984779074788, |
| "learning_rate": 7.444144275700935e-06, |
| "loss": 0.0, |
| "step": 13850 |
| }, |
| { |
| "epoch": 8.095794392523365, |
| "grad_norm": 0.000996871036477387, |
| "learning_rate": 7.4404935747663555e-06, |
| "loss": 0.0, |
| "step": 13860 |
| }, |
| { |
| "epoch": 8.101635514018692, |
| "grad_norm": 0.0009028511703945696, |
| "learning_rate": 7.436842873831777e-06, |
| "loss": 0.0, |
| "step": 13870 |
| }, |
| { |
| "epoch": 8.107476635514018, |
| "grad_norm": 0.0009699731017462909, |
| "learning_rate": 7.433192172897197e-06, |
| "loss": 0.0, |
| "step": 13880 |
| }, |
| { |
| "epoch": 8.113317757009346, |
| "grad_norm": 0.0009504208574071527, |
| "learning_rate": 7.4295414719626165e-06, |
| "loss": 0.0, |
| "step": 13890 |
| }, |
| { |
| "epoch": 8.119158878504672, |
| "grad_norm": 0.0008348809205926955, |
| "learning_rate": 7.4258907710280385e-06, |
| "loss": 0.0, |
| "step": 13900 |
| }, |
| { |
| "epoch": 8.125, |
| "grad_norm": 0.0011557228863239288, |
| "learning_rate": 7.422240070093458e-06, |
| "loss": 0.0, |
| "step": 13910 |
| }, |
| { |
| "epoch": 8.130841121495328, |
| "grad_norm": 0.0009561071055941284, |
| "learning_rate": 7.418589369158879e-06, |
| "loss": 0.0, |
| "step": 13920 |
| }, |
| { |
| "epoch": 8.136682242990654, |
| "grad_norm": 0.0012622548965737224, |
| "learning_rate": 7.4149386682242994e-06, |
| "loss": 0.0, |
| "step": 13930 |
| }, |
| { |
| "epoch": 8.142523364485982, |
| "grad_norm": 0.001713108504191041, |
| "learning_rate": 7.41128796728972e-06, |
| "loss": 0.0, |
| "step": 13940 |
| }, |
| { |
| "epoch": 8.148364485981308, |
| "grad_norm": 0.0007852665730752051, |
| "learning_rate": 7.407637266355141e-06, |
| "loss": 0.0001, |
| "step": 13950 |
| }, |
| { |
| "epoch": 8.154205607476635, |
| "grad_norm": 0.0008906972361728549, |
| "learning_rate": 7.403986565420561e-06, |
| "loss": 0.0, |
| "step": 13960 |
| }, |
| { |
| "epoch": 8.160046728971963, |
| "grad_norm": 0.0007920749485492706, |
| "learning_rate": 7.400335864485982e-06, |
| "loss": 0.0, |
| "step": 13970 |
| }, |
| { |
| "epoch": 8.16588785046729, |
| "grad_norm": 0.000825137656647712, |
| "learning_rate": 7.396685163551403e-06, |
| "loss": 0.0, |
| "step": 13980 |
| }, |
| { |
| "epoch": 8.171728971962617, |
| "grad_norm": 0.0008679748862050474, |
| "learning_rate": 7.393034462616823e-06, |
| "loss": 0.0, |
| "step": 13990 |
| }, |
| { |
| "epoch": 8.177570093457945, |
| "grad_norm": 0.0007770723896101117, |
| "learning_rate": 7.389383761682244e-06, |
| "loss": 0.0, |
| "step": 14000 |
| }, |
| { |
| "epoch": 8.18341121495327, |
| "grad_norm": 0.000692716334015131, |
| "learning_rate": 7.385733060747664e-06, |
| "loss": 0.0, |
| "step": 14010 |
| }, |
| { |
| "epoch": 8.189252336448599, |
| "grad_norm": 0.00084645178867504, |
| "learning_rate": 7.382082359813084e-06, |
| "loss": 0.0, |
| "step": 14020 |
| }, |
| { |
| "epoch": 8.195093457943925, |
| "grad_norm": 0.0011498586973175406, |
| "learning_rate": 7.378431658878505e-06, |
| "loss": 0.0, |
| "step": 14030 |
| }, |
| { |
| "epoch": 8.200934579439252, |
| "grad_norm": 0.000940586207434535, |
| "learning_rate": 7.374780957943925e-06, |
| "loss": 0.0, |
| "step": 14040 |
| }, |
| { |
| "epoch": 8.20677570093458, |
| "grad_norm": 0.0007425823714584112, |
| "learning_rate": 7.371130257009347e-06, |
| "loss": 0.0, |
| "step": 14050 |
| }, |
| { |
| "epoch": 8.212616822429906, |
| "grad_norm": 0.0007549254805780947, |
| "learning_rate": 7.367479556074767e-06, |
| "loss": 0.0, |
| "step": 14060 |
| }, |
| { |
| "epoch": 8.218457943925234, |
| "grad_norm": 0.0007854328723624349, |
| "learning_rate": 7.363828855140187e-06, |
| "loss": 0.0, |
| "step": 14070 |
| }, |
| { |
| "epoch": 8.22429906542056, |
| "grad_norm": 0.0007651887135580182, |
| "learning_rate": 7.360178154205608e-06, |
| "loss": 0.0, |
| "step": 14080 |
| }, |
| { |
| "epoch": 8.230140186915888, |
| "grad_norm": 0.0006790246115997434, |
| "learning_rate": 7.356527453271029e-06, |
| "loss": 0.0, |
| "step": 14090 |
| }, |
| { |
| "epoch": 8.235981308411215, |
| "grad_norm": 0.0006944058113731444, |
| "learning_rate": 7.35287675233645e-06, |
| "loss": 0.0, |
| "step": 14100 |
| }, |
| { |
| "epoch": 8.241822429906541, |
| "grad_norm": 0.0006746923318132758, |
| "learning_rate": 7.34922605140187e-06, |
| "loss": 0.0, |
| "step": 14110 |
| }, |
| { |
| "epoch": 8.24766355140187, |
| "grad_norm": 0.0007892411667853594, |
| "learning_rate": 7.34557535046729e-06, |
| "loss": 0.0, |
| "step": 14120 |
| }, |
| { |
| "epoch": 8.253504672897197, |
| "grad_norm": 0.0006671813898719847, |
| "learning_rate": 7.341924649532712e-06, |
| "loss": 0.0, |
| "step": 14130 |
| }, |
| { |
| "epoch": 8.259345794392523, |
| "grad_norm": 0.0006297816289588809, |
| "learning_rate": 7.338273948598131e-06, |
| "loss": 0.0, |
| "step": 14140 |
| }, |
| { |
| "epoch": 8.26518691588785, |
| "grad_norm": 0.0006763806450180709, |
| "learning_rate": 7.334623247663551e-06, |
| "loss": 0.0, |
| "step": 14150 |
| }, |
| { |
| "epoch": 8.271028037383177, |
| "grad_norm": 0.0007309651700779796, |
| "learning_rate": 7.3309725467289726e-06, |
| "loss": 0.0, |
| "step": 14160 |
| }, |
| { |
| "epoch": 8.276869158878505, |
| "grad_norm": 0.0006537912413477898, |
| "learning_rate": 7.327321845794393e-06, |
| "loss": 0.0, |
| "step": 14170 |
| }, |
| { |
| "epoch": 8.282710280373832, |
| "grad_norm": 0.0006116880103945732, |
| "learning_rate": 7.323671144859814e-06, |
| "loss": 0.0, |
| "step": 14180 |
| }, |
| { |
| "epoch": 8.288551401869158, |
| "grad_norm": 0.0006861832225695252, |
| "learning_rate": 7.320020443925234e-06, |
| "loss": 0.0, |
| "step": 14190 |
| }, |
| { |
| "epoch": 8.294392523364486, |
| "grad_norm": 0.0006580561748705804, |
| "learning_rate": 7.316369742990655e-06, |
| "loss": 0.0, |
| "step": 14200 |
| }, |
| { |
| "epoch": 8.300233644859812, |
| "grad_norm": 0.0006089273374527693, |
| "learning_rate": 7.312719042056076e-06, |
| "loss": 0.0, |
| "step": 14210 |
| }, |
| { |
| "epoch": 8.30607476635514, |
| "grad_norm": 0.0006199203780852258, |
| "learning_rate": 7.309068341121496e-06, |
| "loss": 0.0, |
| "step": 14220 |
| }, |
| { |
| "epoch": 8.311915887850468, |
| "grad_norm": 0.0005652210093103349, |
| "learning_rate": 7.305417640186917e-06, |
| "loss": 0.0, |
| "step": 14230 |
| }, |
| { |
| "epoch": 8.317757009345794, |
| "grad_norm": 0.0006072871037758887, |
| "learning_rate": 7.301766939252337e-06, |
| "loss": 0.0, |
| "step": 14240 |
| }, |
| { |
| "epoch": 8.323598130841122, |
| "grad_norm": 0.0006104871281422675, |
| "learning_rate": 7.298116238317757e-06, |
| "loss": 0.0, |
| "step": 14250 |
| }, |
| { |
| "epoch": 8.32943925233645, |
| "grad_norm": 0.0005932269268669188, |
| "learning_rate": 7.294465537383178e-06, |
| "loss": 0.0, |
| "step": 14260 |
| }, |
| { |
| "epoch": 8.335280373831775, |
| "grad_norm": 0.000652293034363538, |
| "learning_rate": 7.2908148364485986e-06, |
| "loss": 0.0028, |
| "step": 14270 |
| }, |
| { |
| "epoch": 8.341121495327103, |
| "grad_norm": 0.0007468141266144812, |
| "learning_rate": 7.287164135514019e-06, |
| "loss": 0.0, |
| "step": 14280 |
| }, |
| { |
| "epoch": 8.34696261682243, |
| "grad_norm": 0.0007531442679464817, |
| "learning_rate": 7.28351343457944e-06, |
| "loss": 0.0, |
| "step": 14290 |
| }, |
| { |
| "epoch": 8.352803738317757, |
| "grad_norm": 0.000986740575172007, |
| "learning_rate": 7.27986273364486e-06, |
| "loss": 0.0674, |
| "step": 14300 |
| }, |
| { |
| "epoch": 8.358644859813085, |
| "grad_norm": 0.0013275983510538936, |
| "learning_rate": 7.2762120327102815e-06, |
| "loss": 0.0, |
| "step": 14310 |
| }, |
| { |
| "epoch": 8.36448598130841, |
| "grad_norm": 0.0015961026074364781, |
| "learning_rate": 7.272561331775702e-06, |
| "loss": 0.0003, |
| "step": 14320 |
| }, |
| { |
| "epoch": 8.370327102803738, |
| "grad_norm": 0.0012349386233836412, |
| "learning_rate": 7.268910630841121e-06, |
| "loss": 0.0, |
| "step": 14330 |
| }, |
| { |
| "epoch": 8.376168224299066, |
| "grad_norm": 0.001687607727944851, |
| "learning_rate": 7.265259929906543e-06, |
| "loss": 0.0, |
| "step": 14340 |
| }, |
| { |
| "epoch": 8.382009345794392, |
| "grad_norm": 0.0012085556518286467, |
| "learning_rate": 7.261609228971963e-06, |
| "loss": 0.0001, |
| "step": 14350 |
| }, |
| { |
| "epoch": 8.38785046728972, |
| "grad_norm": 0.0010161312529817224, |
| "learning_rate": 7.257958528037384e-06, |
| "loss": 0.0, |
| "step": 14360 |
| }, |
| { |
| "epoch": 8.393691588785046, |
| "grad_norm": 0.0010755781549960375, |
| "learning_rate": 7.254307827102804e-06, |
| "loss": 0.0001, |
| "step": 14370 |
| }, |
| { |
| "epoch": 8.399532710280374, |
| "grad_norm": 0.0010478084441274405, |
| "learning_rate": 7.2506571261682246e-06, |
| "loss": 0.0, |
| "step": 14380 |
| }, |
| { |
| "epoch": 8.405373831775702, |
| "grad_norm": 0.001011241809464991, |
| "learning_rate": 7.247006425233646e-06, |
| "loss": 0.0, |
| "step": 14390 |
| }, |
| { |
| "epoch": 8.411214953271028, |
| "grad_norm": 0.0009002193110063672, |
| "learning_rate": 7.243355724299066e-06, |
| "loss": 0.0, |
| "step": 14400 |
| }, |
| { |
| "epoch": 8.417056074766355, |
| "grad_norm": 0.0009343910496681929, |
| "learning_rate": 7.239705023364486e-06, |
| "loss": 0.0, |
| "step": 14410 |
| }, |
| { |
| "epoch": 8.422897196261681, |
| "grad_norm": 0.0008302544592879713, |
| "learning_rate": 7.2360543224299075e-06, |
| "loss": 0.0001, |
| "step": 14420 |
| }, |
| { |
| "epoch": 8.42873831775701, |
| "grad_norm": 0.0008650152012705803, |
| "learning_rate": 7.232403621495328e-06, |
| "loss": 0.0, |
| "step": 14430 |
| }, |
| { |
| "epoch": 8.434579439252337, |
| "grad_norm": 0.0008420557714998722, |
| "learning_rate": 7.228752920560749e-06, |
| "loss": 0.0, |
| "step": 14440 |
| }, |
| { |
| "epoch": 8.440420560747663, |
| "grad_norm": 0.0008095310186035931, |
| "learning_rate": 7.2251022196261684e-06, |
| "loss": 0.0, |
| "step": 14450 |
| }, |
| { |
| "epoch": 8.44626168224299, |
| "grad_norm": 0.0008125108433887362, |
| "learning_rate": 7.221451518691589e-06, |
| "loss": 0.0, |
| "step": 14460 |
| }, |
| { |
| "epoch": 8.452102803738319, |
| "grad_norm": 0.0007584681734442711, |
| "learning_rate": 7.21780081775701e-06, |
| "loss": 0.0, |
| "step": 14470 |
| }, |
| { |
| "epoch": 8.457943925233645, |
| "grad_norm": 0.0007872634450905025, |
| "learning_rate": 7.21415011682243e-06, |
| "loss": 0.0, |
| "step": 14480 |
| }, |
| { |
| "epoch": 8.463785046728972, |
| "grad_norm": 0.0007215076475404203, |
| "learning_rate": 7.2104994158878505e-06, |
| "loss": 0.0215, |
| "step": 14490 |
| }, |
| { |
| "epoch": 8.469626168224298, |
| "grad_norm": 0.0007134796469472349, |
| "learning_rate": 7.206848714953272e-06, |
| "loss": 0.0, |
| "step": 14500 |
| }, |
| { |
| "epoch": 8.475467289719626, |
| "grad_norm": 0.0008008384029380977, |
| "learning_rate": 7.203198014018692e-06, |
| "loss": 0.0, |
| "step": 14510 |
| }, |
| { |
| "epoch": 8.481308411214954, |
| "grad_norm": 0.000777578039560467, |
| "learning_rate": 7.199547313084113e-06, |
| "loss": 0.0, |
| "step": 14520 |
| }, |
| { |
| "epoch": 8.48714953271028, |
| "grad_norm": 0.0007424994255416095, |
| "learning_rate": 7.1958966121495335e-06, |
| "loss": 0.0, |
| "step": 14530 |
| }, |
| { |
| "epoch": 8.492990654205608, |
| "grad_norm": 0.0006925264024175704, |
| "learning_rate": 7.192245911214954e-06, |
| "loss": 0.0, |
| "step": 14540 |
| }, |
| { |
| "epoch": 8.498831775700934, |
| "grad_norm": 0.0006563960923813283, |
| "learning_rate": 7.188595210280375e-06, |
| "loss": 0.0, |
| "step": 14550 |
| }, |
| { |
| "epoch": 8.504672897196262, |
| "grad_norm": 0.0007172149489633739, |
| "learning_rate": 7.1849445093457944e-06, |
| "loss": 0.0, |
| "step": 14560 |
| }, |
| { |
| "epoch": 8.51051401869159, |
| "grad_norm": 0.0006392272189259529, |
| "learning_rate": 7.1812938084112164e-06, |
| "loss": 0.0, |
| "step": 14570 |
| }, |
| { |
| "epoch": 8.516355140186915, |
| "grad_norm": 0.0006601911736652255, |
| "learning_rate": 7.177643107476636e-06, |
| "loss": 0.0, |
| "step": 14580 |
| }, |
| { |
| "epoch": 8.522196261682243, |
| "grad_norm": 9.061529159545898, |
| "learning_rate": 7.173992406542056e-06, |
| "loss": 0.0489, |
| "step": 14590 |
| }, |
| { |
| "epoch": 8.52803738317757, |
| "grad_norm": 0.0026740028988569975, |
| "learning_rate": 7.170341705607477e-06, |
| "loss": 0.0001, |
| "step": 14600 |
| }, |
| { |
| "epoch": 8.533878504672897, |
| "grad_norm": 0.004049922805279493, |
| "learning_rate": 7.166691004672898e-06, |
| "loss": 0.0002, |
| "step": 14610 |
| }, |
| { |
| "epoch": 8.539719626168225, |
| "grad_norm": 0.006144702434539795, |
| "learning_rate": 7.163040303738318e-06, |
| "loss": 0.0001, |
| "step": 14620 |
| }, |
| { |
| "epoch": 8.54556074766355, |
| "grad_norm": 0.00719565711915493, |
| "learning_rate": 7.159389602803739e-06, |
| "loss": 0.0001, |
| "step": 14630 |
| }, |
| { |
| "epoch": 8.551401869158878, |
| "grad_norm": 0.0010341497836634517, |
| "learning_rate": 7.1557389018691595e-06, |
| "loss": 0.0001, |
| "step": 14640 |
| }, |
| { |
| "epoch": 8.557242990654206, |
| "grad_norm": 0.0012794750509783626, |
| "learning_rate": 7.152088200934581e-06, |
| "loss": 0.0001, |
| "step": 14650 |
| }, |
| { |
| "epoch": 8.563084112149532, |
| "grad_norm": 0.0016508783446624875, |
| "learning_rate": 7.148437500000001e-06, |
| "loss": 0.0, |
| "step": 14660 |
| }, |
| { |
| "epoch": 8.56892523364486, |
| "grad_norm": 0.0007288011256605387, |
| "learning_rate": 7.1447867990654204e-06, |
| "loss": 0.0, |
| "step": 14670 |
| }, |
| { |
| "epoch": 8.574766355140188, |
| "grad_norm": 0.0006575025035999715, |
| "learning_rate": 7.141136098130842e-06, |
| "loss": 0.0, |
| "step": 14680 |
| }, |
| { |
| "epoch": 8.580607476635514, |
| "grad_norm": 0.000702616642229259, |
| "learning_rate": 7.137485397196262e-06, |
| "loss": 0.0, |
| "step": 14690 |
| }, |
| { |
| "epoch": 8.586448598130842, |
| "grad_norm": 0.0007005234947428107, |
| "learning_rate": 7.133834696261683e-06, |
| "loss": 0.0, |
| "step": 14700 |
| }, |
| { |
| "epoch": 8.592289719626168, |
| "grad_norm": 0.0006908517680130899, |
| "learning_rate": 7.130183995327103e-06, |
| "loss": 0.0, |
| "step": 14710 |
| }, |
| { |
| "epoch": 8.598130841121495, |
| "grad_norm": 0.0006434786482714117, |
| "learning_rate": 7.126533294392524e-06, |
| "loss": 0.0, |
| "step": 14720 |
| }, |
| { |
| "epoch": 8.603971962616823, |
| "grad_norm": 0.0007483828230760992, |
| "learning_rate": 7.122882593457945e-06, |
| "loss": 0.0001, |
| "step": 14730 |
| }, |
| { |
| "epoch": 8.60981308411215, |
| "grad_norm": 0.0007363962358795106, |
| "learning_rate": 7.119231892523365e-06, |
| "loss": 0.0, |
| "step": 14740 |
| }, |
| { |
| "epoch": 8.615654205607477, |
| "grad_norm": 0.0007484328816644847, |
| "learning_rate": 7.1155811915887855e-06, |
| "loss": 0.0, |
| "step": 14750 |
| }, |
| { |
| "epoch": 8.621495327102803, |
| "grad_norm": 0.0007965927361510694, |
| "learning_rate": 7.111930490654207e-06, |
| "loss": 0.0, |
| "step": 14760 |
| }, |
| { |
| "epoch": 8.62733644859813, |
| "grad_norm": 0.0006503010517917573, |
| "learning_rate": 7.108279789719626e-06, |
| "loss": 0.0, |
| "step": 14770 |
| }, |
| { |
| "epoch": 8.633177570093459, |
| "grad_norm": 0.0006895572878420353, |
| "learning_rate": 7.104629088785048e-06, |
| "loss": 0.0, |
| "step": 14780 |
| }, |
| { |
| "epoch": 8.639018691588785, |
| "grad_norm": 0.0006212314474396408, |
| "learning_rate": 7.100978387850468e-06, |
| "loss": 0.0, |
| "step": 14790 |
| }, |
| { |
| "epoch": 8.644859813084112, |
| "grad_norm": 0.0006286058924160898, |
| "learning_rate": 7.097327686915888e-06, |
| "loss": 0.0, |
| "step": 14800 |
| }, |
| { |
| "epoch": 8.65070093457944, |
| "grad_norm": 0.001083516632206738, |
| "learning_rate": 7.093676985981309e-06, |
| "loss": 0.0, |
| "step": 14810 |
| }, |
| { |
| "epoch": 8.656542056074766, |
| "grad_norm": 0.0005799504579044878, |
| "learning_rate": 7.090026285046729e-06, |
| "loss": 0.0, |
| "step": 14820 |
| }, |
| { |
| "epoch": 8.662383177570094, |
| "grad_norm": 0.0005443979753181338, |
| "learning_rate": 7.0863755841121505e-06, |
| "loss": 0.0, |
| "step": 14830 |
| }, |
| { |
| "epoch": 8.66822429906542, |
| "grad_norm": 0.8044731020927429, |
| "learning_rate": 7.082724883177571e-06, |
| "loss": 0.0008, |
| "step": 14840 |
| }, |
| { |
| "epoch": 8.674065420560748, |
| "grad_norm": 0.0007343247998505831, |
| "learning_rate": 7.079074182242991e-06, |
| "loss": 0.0006, |
| "step": 14850 |
| }, |
| { |
| "epoch": 8.679906542056075, |
| "grad_norm": 0.0007370833773165941, |
| "learning_rate": 7.075423481308412e-06, |
| "loss": 0.0, |
| "step": 14860 |
| }, |
| { |
| "epoch": 8.685747663551401, |
| "grad_norm": 0.000746356206946075, |
| "learning_rate": 7.071772780373833e-06, |
| "loss": 0.0, |
| "step": 14870 |
| }, |
| { |
| "epoch": 8.69158878504673, |
| "grad_norm": 0.0007385259959846735, |
| "learning_rate": 7.068122079439252e-06, |
| "loss": 0.0, |
| "step": 14880 |
| }, |
| { |
| "epoch": 8.697429906542055, |
| "grad_norm": 0.0007199611281976104, |
| "learning_rate": 7.064471378504674e-06, |
| "loss": 0.0, |
| "step": 14890 |
| }, |
| { |
| "epoch": 8.703271028037383, |
| "grad_norm": 0.0006418672273866832, |
| "learning_rate": 7.0608206775700936e-06, |
| "loss": 0.0, |
| "step": 14900 |
| }, |
| { |
| "epoch": 8.70911214953271, |
| "grad_norm": 0.0006997276796028018, |
| "learning_rate": 7.057169976635515e-06, |
| "loss": 0.0, |
| "step": 14910 |
| }, |
| { |
| "epoch": 8.714953271028037, |
| "grad_norm": 0.0005712428246624768, |
| "learning_rate": 7.053519275700935e-06, |
| "loss": 0.0, |
| "step": 14920 |
| }, |
| { |
| "epoch": 8.720794392523365, |
| "grad_norm": 0.0005171049269847572, |
| "learning_rate": 7.049868574766355e-06, |
| "loss": 0.0, |
| "step": 14930 |
| }, |
| { |
| "epoch": 8.726635514018692, |
| "grad_norm": 0.00048571472871117294, |
| "learning_rate": 7.0462178738317765e-06, |
| "loss": 0.0, |
| "step": 14940 |
| }, |
| { |
| "epoch": 8.732476635514018, |
| "grad_norm": 0.0005043037817813456, |
| "learning_rate": 7.042567172897197e-06, |
| "loss": 0.0, |
| "step": 14950 |
| }, |
| { |
| "epoch": 8.738317757009346, |
| "grad_norm": 0.0005587654886767268, |
| "learning_rate": 7.038916471962617e-06, |
| "loss": 0.0, |
| "step": 14960 |
| }, |
| { |
| "epoch": 8.744158878504672, |
| "grad_norm": 0.0004949513822793961, |
| "learning_rate": 7.035265771028038e-06, |
| "loss": 0.0, |
| "step": 14970 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.00048060237895697355, |
| "learning_rate": 7.031615070093459e-06, |
| "loss": 0.0, |
| "step": 14980 |
| }, |
| { |
| "epoch": 8.755841121495328, |
| "grad_norm": 0.0005176740232855082, |
| "learning_rate": 7.02796436915888e-06, |
| "loss": 0.0, |
| "step": 14990 |
| }, |
| { |
| "epoch": 8.761682242990654, |
| "grad_norm": 0.0005571042420342565, |
| "learning_rate": 7.024313668224299e-06, |
| "loss": 0.0, |
| "step": 15000 |
| }, |
| { |
| "epoch": 8.767523364485982, |
| "grad_norm": 0.00046813112567178905, |
| "learning_rate": 7.0206629672897196e-06, |
| "loss": 0.0, |
| "step": 15010 |
| }, |
| { |
| "epoch": 8.773364485981308, |
| "grad_norm": 0.0005086753517389297, |
| "learning_rate": 7.017012266355141e-06, |
| "loss": 0.0, |
| "step": 15020 |
| }, |
| { |
| "epoch": 8.779205607476635, |
| "grad_norm": 0.0004869645636063069, |
| "learning_rate": 7.013361565420561e-06, |
| "loss": 0.0, |
| "step": 15030 |
| }, |
| { |
| "epoch": 8.785046728971963, |
| "grad_norm": 0.000516543397679925, |
| "learning_rate": 7.009710864485982e-06, |
| "loss": 0.0, |
| "step": 15040 |
| }, |
| { |
| "epoch": 8.79088785046729, |
| "grad_norm": 0.0004322292807046324, |
| "learning_rate": 7.0060601635514025e-06, |
| "loss": 0.0, |
| "step": 15050 |
| }, |
| { |
| "epoch": 8.796728971962617, |
| "grad_norm": 0.0004122374521102756, |
| "learning_rate": 7.002409462616823e-06, |
| "loss": 0.0, |
| "step": 15060 |
| }, |
| { |
| "epoch": 8.802570093457945, |
| "grad_norm": 0.00039723445661365986, |
| "learning_rate": 6.998758761682244e-06, |
| "loss": 0.0, |
| "step": 15070 |
| }, |
| { |
| "epoch": 8.80841121495327, |
| "grad_norm": 0.00043567997636273503, |
| "learning_rate": 6.995108060747664e-06, |
| "loss": 0.0, |
| "step": 15080 |
| }, |
| { |
| "epoch": 8.814252336448599, |
| "grad_norm": 0.00040160896605812013, |
| "learning_rate": 6.991457359813084e-06, |
| "loss": 0.0, |
| "step": 15090 |
| }, |
| { |
| "epoch": 8.820093457943925, |
| "grad_norm": 0.0004335957346484065, |
| "learning_rate": 6.987806658878506e-06, |
| "loss": 0.0, |
| "step": 15100 |
| }, |
| { |
| "epoch": 8.825934579439252, |
| "grad_norm": 0.0004694766830652952, |
| "learning_rate": 6.984155957943925e-06, |
| "loss": 0.0, |
| "step": 15110 |
| }, |
| { |
| "epoch": 8.83177570093458, |
| "grad_norm": 0.10719966888427734, |
| "learning_rate": 6.980505257009346e-06, |
| "loss": 0.0, |
| "step": 15120 |
| }, |
| { |
| "epoch": 8.837616822429906, |
| "grad_norm": 0.000425121485022828, |
| "learning_rate": 6.976854556074767e-06, |
| "loss": 0.0, |
| "step": 15130 |
| }, |
| { |
| "epoch": 8.843457943925234, |
| "grad_norm": 0.0004239288973622024, |
| "learning_rate": 6.973203855140187e-06, |
| "loss": 0.0, |
| "step": 15140 |
| }, |
| { |
| "epoch": 8.84929906542056, |
| "grad_norm": 0.0004261216090526432, |
| "learning_rate": 6.969553154205608e-06, |
| "loss": 0.0, |
| "step": 15150 |
| }, |
| { |
| "epoch": 8.855140186915888, |
| "grad_norm": 0.0004185021680314094, |
| "learning_rate": 6.9659024532710285e-06, |
| "loss": 0.0, |
| "step": 15160 |
| }, |
| { |
| "epoch": 8.860981308411215, |
| "grad_norm": 0.000404214282752946, |
| "learning_rate": 6.96225175233645e-06, |
| "loss": 0.0, |
| "step": 15170 |
| }, |
| { |
| "epoch": 8.866822429906541, |
| "grad_norm": 0.00038997773663140833, |
| "learning_rate": 6.95860105140187e-06, |
| "loss": 0.0, |
| "step": 15180 |
| }, |
| { |
| "epoch": 8.87266355140187, |
| "grad_norm": 0.00040488707600161433, |
| "learning_rate": 6.95495035046729e-06, |
| "loss": 0.0, |
| "step": 15190 |
| }, |
| { |
| "epoch": 8.878504672897197, |
| "grad_norm": 0.00039094104431569576, |
| "learning_rate": 6.9512996495327115e-06, |
| "loss": 0.0, |
| "step": 15200 |
| }, |
| { |
| "epoch": 8.884345794392523, |
| "grad_norm": 0.00038398956530727446, |
| "learning_rate": 6.947648948598131e-06, |
| "loss": 0.0, |
| "step": 15210 |
| }, |
| { |
| "epoch": 8.89018691588785, |
| "grad_norm": 0.0004078754282090813, |
| "learning_rate": 6.943998247663551e-06, |
| "loss": 0.0, |
| "step": 15220 |
| }, |
| { |
| "epoch": 8.896028037383177, |
| "grad_norm": 0.0005417789798229933, |
| "learning_rate": 6.940347546728972e-06, |
| "loss": 0.0918, |
| "step": 15230 |
| }, |
| { |
| "epoch": 8.901869158878505, |
| "grad_norm": 0.0005213550757616758, |
| "learning_rate": 6.936696845794393e-06, |
| "loss": 0.0, |
| "step": 15240 |
| }, |
| { |
| "epoch": 8.907710280373832, |
| "grad_norm": 0.0024687082041054964, |
| "learning_rate": 6.933046144859814e-06, |
| "loss": 0.0, |
| "step": 15250 |
| }, |
| { |
| "epoch": 8.913551401869158, |
| "grad_norm": 0.0004659520636778325, |
| "learning_rate": 6.929395443925234e-06, |
| "loss": 0.0003, |
| "step": 15260 |
| }, |
| { |
| "epoch": 8.919392523364486, |
| "grad_norm": 0.0005060379626229405, |
| "learning_rate": 6.9257447429906545e-06, |
| "loss": 0.0, |
| "step": 15270 |
| }, |
| { |
| "epoch": 8.925233644859812, |
| "grad_norm": 0.00046979807666502893, |
| "learning_rate": 6.922094042056076e-06, |
| "loss": 0.0, |
| "step": 15280 |
| }, |
| { |
| "epoch": 8.93107476635514, |
| "grad_norm": 0.0003927224315702915, |
| "learning_rate": 6.918443341121496e-06, |
| "loss": 0.0, |
| "step": 15290 |
| }, |
| { |
| "epoch": 8.936915887850468, |
| "grad_norm": 0.0004722073790617287, |
| "learning_rate": 6.914792640186917e-06, |
| "loss": 0.0, |
| "step": 15300 |
| }, |
| { |
| "epoch": 8.942757009345794, |
| "grad_norm": 0.0004678576369769871, |
| "learning_rate": 6.9111419392523374e-06, |
| "loss": 0.0, |
| "step": 15310 |
| }, |
| { |
| "epoch": 8.948598130841122, |
| "grad_norm": 0.00042912139906547964, |
| "learning_rate": 6.907491238317757e-06, |
| "loss": 0.0, |
| "step": 15320 |
| }, |
| { |
| "epoch": 8.95443925233645, |
| "grad_norm": 0.0004561747773550451, |
| "learning_rate": 6.903840537383179e-06, |
| "loss": 0.0, |
| "step": 15330 |
| }, |
| { |
| "epoch": 8.960280373831775, |
| "grad_norm": 0.0004263494920451194, |
| "learning_rate": 6.900189836448598e-06, |
| "loss": 0.0, |
| "step": 15340 |
| }, |
| { |
| "epoch": 8.966121495327103, |
| "grad_norm": 0.00038956417120061815, |
| "learning_rate": 6.896539135514019e-06, |
| "loss": 0.0, |
| "step": 15350 |
| }, |
| { |
| "epoch": 8.97196261682243, |
| "grad_norm": 0.0003977013984695077, |
| "learning_rate": 6.89288843457944e-06, |
| "loss": 0.0, |
| "step": 15360 |
| }, |
| { |
| "epoch": 8.977803738317757, |
| "grad_norm": 0.00039336297777481377, |
| "learning_rate": 6.88923773364486e-06, |
| "loss": 0.0, |
| "step": 15370 |
| }, |
| { |
| "epoch": 8.983644859813085, |
| "grad_norm": 0.00036047364119440317, |
| "learning_rate": 6.885587032710281e-06, |
| "loss": 0.0, |
| "step": 15380 |
| }, |
| { |
| "epoch": 8.98948598130841, |
| "grad_norm": 0.0003544074425008148, |
| "learning_rate": 6.881936331775702e-06, |
| "loss": 0.0, |
| "step": 15390 |
| }, |
| { |
| "epoch": 8.995327102803738, |
| "grad_norm": 0.0003941435134038329, |
| "learning_rate": 6.878285630841122e-06, |
| "loss": 0.0, |
| "step": 15400 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_f1": 0.9998485536877177, |
| "eval_fbeta": 0.9997576279314389, |
| "eval_loss": 0.0016669631004333496, |
| "eval_precision": 0.9996971532404604, |
| "eval_recall": 1.0, |
| "eval_runtime": 170.7688, |
| "eval_samples_per_second": 68.73, |
| "eval_steps_per_second": 8.596, |
| "step": 15408 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 34240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2027188102579712e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|