LBolitho's picture
Upload folder using huggingface_hub
48e5bd3 verified
{
"best_global_step": 15408,
"best_metric": 0.9998485536877177,
"best_model_checkpoint": "wav2vec2_frog_classifier_sew_d/checkpoint-15408",
"epoch": 9.0,
"eval_steps": 500,
"global_step": 15408,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005841121495327103,
"grad_norm": 9.79509162902832,
"learning_rate": 1.3142523364485982e-08,
"loss": 0.7229,
"step": 10
},
{
"epoch": 0.011682242990654205,
"grad_norm": 18.790061950683594,
"learning_rate": 2.774532710280374e-08,
"loss": 0.7206,
"step": 20
},
{
"epoch": 0.017523364485981307,
"grad_norm": 10.11142349243164,
"learning_rate": 4.2348130841121496e-08,
"loss": 0.7046,
"step": 30
},
{
"epoch": 0.02336448598130841,
"grad_norm": 14.219882011413574,
"learning_rate": 5.6950934579439255e-08,
"loss": 0.6709,
"step": 40
},
{
"epoch": 0.029205607476635514,
"grad_norm": 16.27984619140625,
"learning_rate": 7.155373831775702e-08,
"loss": 0.6569,
"step": 50
},
{
"epoch": 0.035046728971962614,
"grad_norm": 15.359692573547363,
"learning_rate": 8.615654205607476e-08,
"loss": 0.6114,
"step": 60
},
{
"epoch": 0.04088785046728972,
"grad_norm": 18.100404739379883,
"learning_rate": 1.0075934579439253e-07,
"loss": 0.5935,
"step": 70
},
{
"epoch": 0.04672897196261682,
"grad_norm": 45.55104064941406,
"learning_rate": 1.1536214953271028e-07,
"loss": 0.5592,
"step": 80
},
{
"epoch": 0.052570093457943924,
"grad_norm": 16.723636627197266,
"learning_rate": 1.2996495327102804e-07,
"loss": 0.5253,
"step": 90
},
{
"epoch": 0.05841121495327103,
"grad_norm": 37.350120544433594,
"learning_rate": 1.445677570093458e-07,
"loss": 0.4649,
"step": 100
},
{
"epoch": 0.06425233644859812,
"grad_norm": 38.95242691040039,
"learning_rate": 1.5917056074766358e-07,
"loss": 0.4839,
"step": 110
},
{
"epoch": 0.07009345794392523,
"grad_norm": 53.936973571777344,
"learning_rate": 1.7377336448598132e-07,
"loss": 0.4564,
"step": 120
},
{
"epoch": 0.07593457943925233,
"grad_norm": 16.073362350463867,
"learning_rate": 1.8837616822429908e-07,
"loss": 0.4502,
"step": 130
},
{
"epoch": 0.08177570093457943,
"grad_norm": 49.77845001220703,
"learning_rate": 2.0297897196261685e-07,
"loss": 0.4377,
"step": 140
},
{
"epoch": 0.08761682242990654,
"grad_norm": 21.89165687561035,
"learning_rate": 2.1758177570093461e-07,
"loss": 0.3377,
"step": 150
},
{
"epoch": 0.09345794392523364,
"grad_norm": 19.01292610168457,
"learning_rate": 2.3218457943925235e-07,
"loss": 0.3362,
"step": 160
},
{
"epoch": 0.09929906542056074,
"grad_norm": 44.09226608276367,
"learning_rate": 2.467873831775701e-07,
"loss": 0.3133,
"step": 170
},
{
"epoch": 0.10514018691588785,
"grad_norm": 7.86514949798584,
"learning_rate": 2.6139018691588786e-07,
"loss": 0.2629,
"step": 180
},
{
"epoch": 0.11098130841121495,
"grad_norm": 60.873443603515625,
"learning_rate": 2.7599299065420565e-07,
"loss": 0.2352,
"step": 190
},
{
"epoch": 0.11682242990654206,
"grad_norm": 8.908461570739746,
"learning_rate": 2.905957943925234e-07,
"loss": 0.2343,
"step": 200
},
{
"epoch": 0.12266355140186916,
"grad_norm": 20.771284103393555,
"learning_rate": 3.051985981308412e-07,
"loss": 0.2147,
"step": 210
},
{
"epoch": 0.12850467289719625,
"grad_norm": 4.727106094360352,
"learning_rate": 3.198014018691589e-07,
"loss": 0.13,
"step": 220
},
{
"epoch": 0.13434579439252337,
"grad_norm": 40.93299865722656,
"learning_rate": 3.3440420560747666e-07,
"loss": 0.1191,
"step": 230
},
{
"epoch": 0.14018691588785046,
"grad_norm": 4.306646347045898,
"learning_rate": 3.4900700934579445e-07,
"loss": 0.0938,
"step": 240
},
{
"epoch": 0.14602803738317757,
"grad_norm": 1.910163164138794,
"learning_rate": 3.636098130841122e-07,
"loss": 0.0715,
"step": 250
},
{
"epoch": 0.15186915887850466,
"grad_norm": 2.0170037746429443,
"learning_rate": 3.7821261682243e-07,
"loss": 0.0417,
"step": 260
},
{
"epoch": 0.15771028037383178,
"grad_norm": 1.1409950256347656,
"learning_rate": 3.9281542056074767e-07,
"loss": 0.0309,
"step": 270
},
{
"epoch": 0.16355140186915887,
"grad_norm": 1.4356250762939453,
"learning_rate": 4.0741822429906546e-07,
"loss": 0.03,
"step": 280
},
{
"epoch": 0.169392523364486,
"grad_norm": 0.8783276677131653,
"learning_rate": 4.220210280373832e-07,
"loss": 0.0212,
"step": 290
},
{
"epoch": 0.17523364485981308,
"grad_norm": 17.989458084106445,
"learning_rate": 4.36623831775701e-07,
"loss": 0.0416,
"step": 300
},
{
"epoch": 0.1810747663551402,
"grad_norm": 0.5098496079444885,
"learning_rate": 4.5122663551401874e-07,
"loss": 0.0161,
"step": 310
},
{
"epoch": 0.18691588785046728,
"grad_norm": 1.8965669870376587,
"learning_rate": 4.6582943925233653e-07,
"loss": 0.0415,
"step": 320
},
{
"epoch": 0.1927570093457944,
"grad_norm": 0.6456058025360107,
"learning_rate": 4.804322429906542e-07,
"loss": 0.0316,
"step": 330
},
{
"epoch": 0.1985981308411215,
"grad_norm": 0.7947583794593811,
"learning_rate": 4.95035046728972e-07,
"loss": 0.013,
"step": 340
},
{
"epoch": 0.2044392523364486,
"grad_norm": 0.3620131015777588,
"learning_rate": 5.096378504672898e-07,
"loss": 0.0127,
"step": 350
},
{
"epoch": 0.2102803738317757,
"grad_norm": 1.309668779373169,
"learning_rate": 5.242406542056075e-07,
"loss": 0.0406,
"step": 360
},
{
"epoch": 0.2161214953271028,
"grad_norm": 0.6888924241065979,
"learning_rate": 5.388434579439253e-07,
"loss": 0.0118,
"step": 370
},
{
"epoch": 0.2219626168224299,
"grad_norm": 0.4831880033016205,
"learning_rate": 5.53446261682243e-07,
"loss": 0.0314,
"step": 380
},
{
"epoch": 0.22780373831775702,
"grad_norm": 0.355655699968338,
"learning_rate": 5.680490654205608e-07,
"loss": 0.011,
"step": 390
},
{
"epoch": 0.2336448598130841,
"grad_norm": 0.36532697081565857,
"learning_rate": 5.826518691588785e-07,
"loss": 0.0391,
"step": 400
},
{
"epoch": 0.23948598130841123,
"grad_norm": 0.7894148230552673,
"learning_rate": 5.972546728971963e-07,
"loss": 0.0388,
"step": 410
},
{
"epoch": 0.24532710280373832,
"grad_norm": 0.34525978565216064,
"learning_rate": 6.11857476635514e-07,
"loss": 0.0101,
"step": 420
},
{
"epoch": 0.25116822429906543,
"grad_norm": 0.2628501355648041,
"learning_rate": 6.264602803738318e-07,
"loss": 0.0098,
"step": 430
},
{
"epoch": 0.2570093457943925,
"grad_norm": 0.4060896635055542,
"learning_rate": 6.410630841121496e-07,
"loss": 0.0096,
"step": 440
},
{
"epoch": 0.2628504672897196,
"grad_norm": 0.22625339031219482,
"learning_rate": 6.556658878504674e-07,
"loss": 0.0093,
"step": 450
},
{
"epoch": 0.26869158878504673,
"grad_norm": 0.25126221776008606,
"learning_rate": 6.702686915887852e-07,
"loss": 0.0091,
"step": 460
},
{
"epoch": 0.27453271028037385,
"grad_norm": 0.17624634504318237,
"learning_rate": 6.848714953271028e-07,
"loss": 0.0089,
"step": 470
},
{
"epoch": 0.2803738317757009,
"grad_norm": 0.1749255508184433,
"learning_rate": 6.994742990654206e-07,
"loss": 0.0087,
"step": 480
},
{
"epoch": 0.286214953271028,
"grad_norm": 0.5451480150222778,
"learning_rate": 7.140771028037384e-07,
"loss": 0.0384,
"step": 490
},
{
"epoch": 0.29205607476635514,
"grad_norm": 0.2977391183376312,
"learning_rate": 7.286799065420562e-07,
"loss": 0.0086,
"step": 500
},
{
"epoch": 0.29789719626168226,
"grad_norm": 0.1782873570919037,
"learning_rate": 7.432827102803738e-07,
"loss": 0.0083,
"step": 510
},
{
"epoch": 0.3037383177570093,
"grad_norm": 0.4122442603111267,
"learning_rate": 7.578855140186916e-07,
"loss": 0.0383,
"step": 520
},
{
"epoch": 0.30957943925233644,
"grad_norm": 0.3462129235267639,
"learning_rate": 7.724883177570094e-07,
"loss": 0.0083,
"step": 530
},
{
"epoch": 0.31542056074766356,
"grad_norm": 0.4232594668865204,
"learning_rate": 7.870911214953271e-07,
"loss": 0.0382,
"step": 540
},
{
"epoch": 0.3212616822429907,
"grad_norm": 0.6220551133155823,
"learning_rate": 8.016939252336449e-07,
"loss": 0.0383,
"step": 550
},
{
"epoch": 0.32710280373831774,
"grad_norm": 3.729926347732544,
"learning_rate": 8.162967289719627e-07,
"loss": 0.0382,
"step": 560
},
{
"epoch": 0.33294392523364486,
"grad_norm": 0.38771969079971313,
"learning_rate": 8.308995327102805e-07,
"loss": 0.0078,
"step": 570
},
{
"epoch": 0.338785046728972,
"grad_norm": 2.8846986293792725,
"learning_rate": 8.455023364485982e-07,
"loss": 0.038,
"step": 580
},
{
"epoch": 0.3446261682242991,
"grad_norm": 0.2876492738723755,
"learning_rate": 8.60105140186916e-07,
"loss": 0.0076,
"step": 590
},
{
"epoch": 0.35046728971962615,
"grad_norm": 0.18359579145908356,
"learning_rate": 8.747079439252338e-07,
"loss": 0.0072,
"step": 600
},
{
"epoch": 0.35630841121495327,
"grad_norm": 0.15943388640880585,
"learning_rate": 8.893107476635516e-07,
"loss": 0.0071,
"step": 610
},
{
"epoch": 0.3621495327102804,
"grad_norm": 0.4958500564098358,
"learning_rate": 9.039135514018691e-07,
"loss": 0.069,
"step": 620
},
{
"epoch": 0.3679906542056075,
"grad_norm": 0.2749152183532715,
"learning_rate": 9.185163551401869e-07,
"loss": 0.0362,
"step": 630
},
{
"epoch": 0.37383177570093457,
"grad_norm": 0.25621095299720764,
"learning_rate": 9.331191588785047e-07,
"loss": 0.0373,
"step": 640
},
{
"epoch": 0.3796728971962617,
"grad_norm": 0.19895656406879425,
"learning_rate": 9.477219626168225e-07,
"loss": 0.0069,
"step": 650
},
{
"epoch": 0.3855140186915888,
"grad_norm": 0.15757645666599274,
"learning_rate": 9.623247663551403e-07,
"loss": 0.0067,
"step": 660
},
{
"epoch": 0.39135514018691586,
"grad_norm": 0.29621386528015137,
"learning_rate": 9.76927570093458e-07,
"loss": 0.0364,
"step": 670
},
{
"epoch": 0.397196261682243,
"grad_norm": 0.15212251245975494,
"learning_rate": 9.915303738317759e-07,
"loss": 0.0065,
"step": 680
},
{
"epoch": 0.4030373831775701,
"grad_norm": 0.12763015925884247,
"learning_rate": 1.0061331775700936e-06,
"loss": 0.0063,
"step": 690
},
{
"epoch": 0.4088785046728972,
"grad_norm": 0.11682584136724472,
"learning_rate": 1.0207359813084113e-06,
"loss": 0.0061,
"step": 700
},
{
"epoch": 0.4147196261682243,
"grad_norm": 0.11381419748067856,
"learning_rate": 1.035338785046729e-06,
"loss": 0.006,
"step": 710
},
{
"epoch": 0.4205607476635514,
"grad_norm": 0.1704946756362915,
"learning_rate": 1.0499415887850468e-06,
"loss": 0.038,
"step": 720
},
{
"epoch": 0.4264018691588785,
"grad_norm": 10.463326454162598,
"learning_rate": 1.0645443925233647e-06,
"loss": 0.0382,
"step": 730
},
{
"epoch": 0.4322429906542056,
"grad_norm": 0.1989985555410385,
"learning_rate": 1.0791471962616822e-06,
"loss": 0.0061,
"step": 740
},
{
"epoch": 0.4380841121495327,
"grad_norm": 0.3084839880466461,
"learning_rate": 1.0937500000000001e-06,
"loss": 0.0617,
"step": 750
},
{
"epoch": 0.4439252336448598,
"grad_norm": 0.15561099350452423,
"learning_rate": 1.1083528037383178e-06,
"loss": 0.0059,
"step": 760
},
{
"epoch": 0.4497663551401869,
"grad_norm": 0.26193615794181824,
"learning_rate": 1.1229556074766355e-06,
"loss": 0.0379,
"step": 770
},
{
"epoch": 0.45560747663551404,
"grad_norm": 0.1491398811340332,
"learning_rate": 1.1375584112149534e-06,
"loss": 0.0057,
"step": 780
},
{
"epoch": 0.4614485981308411,
"grad_norm": 0.11440080404281616,
"learning_rate": 1.152161214953271e-06,
"loss": 0.0055,
"step": 790
},
{
"epoch": 0.4672897196261682,
"grad_norm": 0.2670748233795166,
"learning_rate": 1.166764018691589e-06,
"loss": 0.0381,
"step": 800
},
{
"epoch": 0.47313084112149534,
"grad_norm": 0.14163456857204437,
"learning_rate": 1.1813668224299067e-06,
"loss": 0.0054,
"step": 810
},
{
"epoch": 0.47897196261682246,
"grad_norm": 0.3021210730075836,
"learning_rate": 1.1959696261682243e-06,
"loss": 0.0381,
"step": 820
},
{
"epoch": 0.4848130841121495,
"grad_norm": 0.15613315999507904,
"learning_rate": 1.2105724299065422e-06,
"loss": 0.0054,
"step": 830
},
{
"epoch": 0.49065420560747663,
"grad_norm": 0.14163915812969208,
"learning_rate": 1.22517523364486e-06,
"loss": 0.0051,
"step": 840
},
{
"epoch": 0.49649532710280375,
"grad_norm": 0.1025778204202652,
"learning_rate": 1.2397780373831776e-06,
"loss": 0.0049,
"step": 850
},
{
"epoch": 0.5023364485981309,
"grad_norm": 0.1480044722557068,
"learning_rate": 1.2543808411214955e-06,
"loss": 0.0382,
"step": 860
},
{
"epoch": 0.508177570093458,
"grad_norm": 0.10440000891685486,
"learning_rate": 1.268983644859813e-06,
"loss": 0.0049,
"step": 870
},
{
"epoch": 0.514018691588785,
"grad_norm": 0.10672589391469955,
"learning_rate": 1.2835864485981309e-06,
"loss": 0.0047,
"step": 880
},
{
"epoch": 0.5198598130841121,
"grad_norm": 0.09414487332105637,
"learning_rate": 1.2981892523364486e-06,
"loss": 0.0046,
"step": 890
},
{
"epoch": 0.5257009345794392,
"grad_norm": 0.08826252818107605,
"learning_rate": 1.3127920560747665e-06,
"loss": 0.0045,
"step": 900
},
{
"epoch": 0.5315420560747663,
"grad_norm": 0.08648844063282013,
"learning_rate": 1.3273948598130842e-06,
"loss": 0.0044,
"step": 910
},
{
"epoch": 0.5373831775700935,
"grad_norm": 0.08451962471008301,
"learning_rate": 1.341997663551402e-06,
"loss": 0.0043,
"step": 920
},
{
"epoch": 0.5432242990654206,
"grad_norm": 0.14131106436252594,
"learning_rate": 1.3566004672897197e-06,
"loss": 0.0384,
"step": 930
},
{
"epoch": 0.5490654205607477,
"grad_norm": 0.09261338412761688,
"learning_rate": 1.3712032710280376e-06,
"loss": 0.0043,
"step": 940
},
{
"epoch": 0.5549065420560748,
"grad_norm": 0.09553109854459763,
"learning_rate": 1.3858060747663551e-06,
"loss": 0.0042,
"step": 950
},
{
"epoch": 0.5607476635514018,
"grad_norm": 0.08126160502433777,
"learning_rate": 1.400408878504673e-06,
"loss": 0.0041,
"step": 960
},
{
"epoch": 0.5665887850467289,
"grad_norm": 0.07953794300556183,
"learning_rate": 1.4150116822429907e-06,
"loss": 0.004,
"step": 970
},
{
"epoch": 0.572429906542056,
"grad_norm": 0.07813739776611328,
"learning_rate": 1.4296144859813086e-06,
"loss": 0.004,
"step": 980
},
{
"epoch": 0.5782710280373832,
"grad_norm": 0.07673702389001846,
"learning_rate": 1.4442172897196263e-06,
"loss": 0.0039,
"step": 990
},
{
"epoch": 0.5841121495327103,
"grad_norm": 0.0752805769443512,
"learning_rate": 1.4588200934579442e-06,
"loss": 0.0038,
"step": 1000
},
{
"epoch": 0.5899532710280374,
"grad_norm": 0.1004716008901596,
"learning_rate": 1.4734228971962619e-06,
"loss": 0.0387,
"step": 1010
},
{
"epoch": 0.5957943925233645,
"grad_norm": 0.20554307103157043,
"learning_rate": 1.4880257009345793e-06,
"loss": 0.0387,
"step": 1020
},
{
"epoch": 0.6016355140186916,
"grad_norm": 0.23959581553936005,
"learning_rate": 1.5026285046728972e-06,
"loss": 0.004,
"step": 1030
},
{
"epoch": 0.6074766355140186,
"grad_norm": 0.12093233317136765,
"learning_rate": 1.517231308411215e-06,
"loss": 0.0038,
"step": 1040
},
{
"epoch": 0.6133177570093458,
"grad_norm": 0.15225279331207275,
"learning_rate": 1.5318341121495328e-06,
"loss": 0.0389,
"step": 1050
},
{
"epoch": 0.6191588785046729,
"grad_norm": 0.14517702162265778,
"learning_rate": 1.5464369158878505e-06,
"loss": 0.0037,
"step": 1060
},
{
"epoch": 0.625,
"grad_norm": 0.12021499127149582,
"learning_rate": 1.5610397196261684e-06,
"loss": 0.0036,
"step": 1070
},
{
"epoch": 0.6308411214953271,
"grad_norm": 0.07724525034427643,
"learning_rate": 1.575642523364486e-06,
"loss": 0.0035,
"step": 1080
},
{
"epoch": 0.6366822429906542,
"grad_norm": 0.07167772948741913,
"learning_rate": 1.590245327102804e-06,
"loss": 0.0034,
"step": 1090
},
{
"epoch": 0.6425233644859814,
"grad_norm": 0.06891785562038422,
"learning_rate": 1.6048481308411215e-06,
"loss": 0.0033,
"step": 1100
},
{
"epoch": 0.6483644859813084,
"grad_norm": 0.06666991114616394,
"learning_rate": 1.6194509345794394e-06,
"loss": 0.0033,
"step": 1110
},
{
"epoch": 0.6542056074766355,
"grad_norm": 0.11780866980552673,
"learning_rate": 1.634053738317757e-06,
"loss": 0.0391,
"step": 1120
},
{
"epoch": 0.6600467289719626,
"grad_norm": 0.06861135363578796,
"learning_rate": 1.648656542056075e-06,
"loss": 0.0032,
"step": 1130
},
{
"epoch": 0.6658878504672897,
"grad_norm": 0.0664181187748909,
"learning_rate": 1.6632593457943926e-06,
"loss": 0.0032,
"step": 1140
},
{
"epoch": 0.6717289719626168,
"grad_norm": 0.06380724161863327,
"learning_rate": 1.6778621495327105e-06,
"loss": 0.0031,
"step": 1150
},
{
"epoch": 0.677570093457944,
"grad_norm": 0.0625571459531784,
"learning_rate": 1.6924649532710282e-06,
"loss": 0.0031,
"step": 1160
},
{
"epoch": 0.6834112149532711,
"grad_norm": 0.06134733557701111,
"learning_rate": 1.7070677570093461e-06,
"loss": 0.003,
"step": 1170
},
{
"epoch": 0.6892523364485982,
"grad_norm": 0.06018327549099922,
"learning_rate": 1.7216705607476636e-06,
"loss": 0.0029,
"step": 1180
},
{
"epoch": 0.6950934579439252,
"grad_norm": 0.05909387394785881,
"learning_rate": 1.7362733644859813e-06,
"loss": 0.0029,
"step": 1190
},
{
"epoch": 0.7009345794392523,
"grad_norm": 0.058149177581071854,
"learning_rate": 1.7508761682242992e-06,
"loss": 0.0028,
"step": 1200
},
{
"epoch": 0.7067757009345794,
"grad_norm": 0.0572173111140728,
"learning_rate": 1.7654789719626169e-06,
"loss": 0.0028,
"step": 1210
},
{
"epoch": 0.7126168224299065,
"grad_norm": 0.05630593001842499,
"learning_rate": 1.7800817757009348e-06,
"loss": 0.0027,
"step": 1220
},
{
"epoch": 0.7184579439252337,
"grad_norm": 0.07457193732261658,
"learning_rate": 1.7946845794392527e-06,
"loss": 0.0397,
"step": 1230
},
{
"epoch": 0.7242990654205608,
"grad_norm": 0.05988035723567009,
"learning_rate": 1.8092873831775704e-06,
"loss": 0.0027,
"step": 1240
},
{
"epoch": 0.7301401869158879,
"grad_norm": 0.13969571888446808,
"learning_rate": 1.8238901869158878e-06,
"loss": 0.0397,
"step": 1250
},
{
"epoch": 0.735981308411215,
"grad_norm": 0.25887084007263184,
"learning_rate": 1.8384929906542057e-06,
"loss": 0.0398,
"step": 1260
},
{
"epoch": 0.741822429906542,
"grad_norm": 0.13206328451633453,
"learning_rate": 1.8530957943925234e-06,
"loss": 0.0031,
"step": 1270
},
{
"epoch": 0.7476635514018691,
"grad_norm": 0.07050861418247223,
"learning_rate": 1.8676985981308413e-06,
"loss": 0.0028,
"step": 1280
},
{
"epoch": 0.7535046728971962,
"grad_norm": 0.05721152201294899,
"learning_rate": 1.882301401869159e-06,
"loss": 0.0026,
"step": 1290
},
{
"epoch": 0.7593457943925234,
"grad_norm": 0.055426888167858124,
"learning_rate": 1.896904205607477e-06,
"loss": 0.0026,
"step": 1300
},
{
"epoch": 0.7651869158878505,
"grad_norm": 0.053014617413282394,
"learning_rate": 1.9115070093457946e-06,
"loss": 0.0025,
"step": 1310
},
{
"epoch": 0.7710280373831776,
"grad_norm": 0.07429449260234833,
"learning_rate": 1.9261098130841125e-06,
"loss": 0.0399,
"step": 1320
},
{
"epoch": 0.7768691588785047,
"grad_norm": 0.058152373880147934,
"learning_rate": 1.94071261682243e-06,
"loss": 0.0025,
"step": 1330
},
{
"epoch": 0.7827102803738317,
"grad_norm": 0.05262916162610054,
"learning_rate": 1.955315420560748e-06,
"loss": 0.0025,
"step": 1340
},
{
"epoch": 0.7885514018691588,
"grad_norm": 0.05100777745246887,
"learning_rate": 1.9699182242990658e-06,
"loss": 0.0024,
"step": 1350
},
{
"epoch": 0.794392523364486,
"grad_norm": 0.04984293133020401,
"learning_rate": 1.9845210280373832e-06,
"loss": 0.0024,
"step": 1360
},
{
"epoch": 0.8002336448598131,
"grad_norm": 0.048913147300481796,
"learning_rate": 1.999123831775701e-06,
"loss": 0.0023,
"step": 1370
},
{
"epoch": 0.8060747663551402,
"grad_norm": 0.04825076088309288,
"learning_rate": 2.013726635514019e-06,
"loss": 0.0023,
"step": 1380
},
{
"epoch": 0.8119158878504673,
"grad_norm": 0.04739582911133766,
"learning_rate": 2.028329439252337e-06,
"loss": 0.0022,
"step": 1390
},
{
"epoch": 0.8177570093457944,
"grad_norm": 0.04659786447882652,
"learning_rate": 2.0429322429906544e-06,
"loss": 0.0022,
"step": 1400
},
{
"epoch": 0.8235981308411215,
"grad_norm": 0.0471995547413826,
"learning_rate": 2.057535046728972e-06,
"loss": 0.0405,
"step": 1410
},
{
"epoch": 0.8294392523364486,
"grad_norm": 0.06635795533657074,
"learning_rate": 2.0721378504672898e-06,
"loss": 0.0405,
"step": 1420
},
{
"epoch": 0.8352803738317757,
"grad_norm": 0.05472889170050621,
"learning_rate": 2.0867406542056077e-06,
"loss": 0.0023,
"step": 1430
},
{
"epoch": 0.8411214953271028,
"grad_norm": 0.06456664949655533,
"learning_rate": 2.1013434579439256e-06,
"loss": 0.0404,
"step": 1440
},
{
"epoch": 0.8469626168224299,
"grad_norm": 0.06406297534704208,
"learning_rate": 2.115946261682243e-06,
"loss": 0.0024,
"step": 1450
},
{
"epoch": 0.852803738317757,
"grad_norm": 0.05212978273630142,
"learning_rate": 2.130549065420561e-06,
"loss": 0.0022,
"step": 1460
},
{
"epoch": 0.8586448598130841,
"grad_norm": 0.04937836900353432,
"learning_rate": 2.145151869158879e-06,
"loss": 0.0022,
"step": 1470
},
{
"epoch": 0.8644859813084113,
"grad_norm": 0.046520065516233444,
"learning_rate": 2.1597546728971963e-06,
"loss": 0.0021,
"step": 1480
},
{
"epoch": 0.8703271028037384,
"grad_norm": 0.044921793043613434,
"learning_rate": 2.174357476635514e-06,
"loss": 0.0021,
"step": 1490
},
{
"epoch": 0.8761682242990654,
"grad_norm": 0.04380778223276138,
"learning_rate": 2.188960280373832e-06,
"loss": 0.002,
"step": 1500
},
{
"epoch": 0.8820093457943925,
"grad_norm": 0.04308932647109032,
"learning_rate": 2.2035630841121496e-06,
"loss": 0.002,
"step": 1510
},
{
"epoch": 0.8878504672897196,
"grad_norm": 0.042391639202833176,
"learning_rate": 2.2181658878504675e-06,
"loss": 0.002,
"step": 1520
},
{
"epoch": 0.8936915887850467,
"grad_norm": 0.04182083159685135,
"learning_rate": 2.2327686915887854e-06,
"loss": 0.0019,
"step": 1530
},
{
"epoch": 0.8995327102803738,
"grad_norm": 0.04098886623978615,
"learning_rate": 2.2473714953271033e-06,
"loss": 0.0019,
"step": 1540
},
{
"epoch": 0.905373831775701,
"grad_norm": 0.040418434888124466,
"learning_rate": 2.2619742990654208e-06,
"loss": 0.0019,
"step": 1550
},
{
"epoch": 0.9112149532710281,
"grad_norm": 0.03962099552154541,
"learning_rate": 2.2765771028037382e-06,
"loss": 0.0018,
"step": 1560
},
{
"epoch": 0.9170560747663551,
"grad_norm": 0.039173197001218796,
"learning_rate": 2.291179906542056e-06,
"loss": 0.0018,
"step": 1570
},
{
"epoch": 0.9228971962616822,
"grad_norm": 0.038435958325862885,
"learning_rate": 2.305782710280374e-06,
"loss": 0.0018,
"step": 1580
},
{
"epoch": 0.9287383177570093,
"grad_norm": 0.03787772357463837,
"learning_rate": 2.320385514018692e-06,
"loss": 0.0017,
"step": 1590
},
{
"epoch": 0.9345794392523364,
"grad_norm": 0.03723740950226784,
"learning_rate": 2.3349883177570094e-06,
"loss": 0.0017,
"step": 1600
},
{
"epoch": 0.9404205607476636,
"grad_norm": 0.03676334023475647,
"learning_rate": 2.3495911214953273e-06,
"loss": 0.0017,
"step": 1610
},
{
"epoch": 0.9462616822429907,
"grad_norm": 0.036133646965026855,
"learning_rate": 2.364193925233645e-06,
"loss": 0.0016,
"step": 1620
},
{
"epoch": 0.9521028037383178,
"grad_norm": 0.03555206581950188,
"learning_rate": 2.3787967289719627e-06,
"loss": 0.0016,
"step": 1630
},
{
"epoch": 0.9579439252336449,
"grad_norm": 0.03623361513018608,
"learning_rate": 2.3933995327102806e-06,
"loss": 0.0418,
"step": 1640
},
{
"epoch": 0.9637850467289719,
"grad_norm": 0.036169301718473434,
"learning_rate": 2.4080023364485985e-06,
"loss": 0.0016,
"step": 1650
},
{
"epoch": 0.969626168224299,
"grad_norm": 0.035384681075811386,
"learning_rate": 2.422605140186916e-06,
"loss": 0.0016,
"step": 1660
},
{
"epoch": 0.9754672897196262,
"grad_norm": 0.04360009357333183,
"learning_rate": 2.437207943925234e-06,
"loss": 0.0419,
"step": 1670
},
{
"epoch": 0.9813084112149533,
"grad_norm": 0.0415232889354229,
"learning_rate": 2.4518107476635517e-06,
"loss": 0.0016,
"step": 1680
},
{
"epoch": 0.9871495327102804,
"grad_norm": 0.03600607439875603,
"learning_rate": 2.4664135514018696e-06,
"loss": 0.0016,
"step": 1690
},
{
"epoch": 0.9929906542056075,
"grad_norm": 0.09126334637403488,
"learning_rate": 2.481016355140187e-06,
"loss": 0.0419,
"step": 1700
},
{
"epoch": 0.9988317757009346,
"grad_norm": 0.051943764090538025,
"learning_rate": 2.4956191588785046e-06,
"loss": 0.0017,
"step": 1710
},
{
"epoch": 1.0,
"eval_f1": 0.998563109733041,
"eval_fbeta": 0.9977028760709574,
"eval_loss": 0.011804744601249695,
"eval_precision": 0.9971303428485123,
"eval_recall": 1.0,
"eval_runtime": 170.816,
"eval_samples_per_second": 68.711,
"eval_steps_per_second": 8.594,
"step": 1712
},
{
"epoch": 1.0046728971962617,
"grad_norm": 0.040207888931035995,
"learning_rate": 2.510221962616823e-06,
"loss": 0.0016,
"step": 1720
},
{
"epoch": 1.0105140186915889,
"grad_norm": 0.06412038952112198,
"learning_rate": 2.524824766355141e-06,
"loss": 0.0422,
"step": 1730
},
{
"epoch": 1.016355140186916,
"grad_norm": 0.04405215010046959,
"learning_rate": 2.5394275700934583e-06,
"loss": 0.0017,
"step": 1740
},
{
"epoch": 1.022196261682243,
"grad_norm": 0.037864964455366135,
"learning_rate": 2.5540303738317758e-06,
"loss": 0.0016,
"step": 1750
},
{
"epoch": 1.02803738317757,
"grad_norm": 0.035099685192108154,
"learning_rate": 2.5686331775700936e-06,
"loss": 0.0015,
"step": 1760
},
{
"epoch": 1.033878504672897,
"grad_norm": 0.03391886502504349,
"learning_rate": 2.583235981308411e-06,
"loss": 0.0015,
"step": 1770
},
{
"epoch": 1.0397196261682242,
"grad_norm": 0.03310862556099892,
"learning_rate": 2.597838785046729e-06,
"loss": 0.0015,
"step": 1780
},
{
"epoch": 1.0455607476635513,
"grad_norm": 0.032472554594278336,
"learning_rate": 2.612441588785047e-06,
"loss": 0.0014,
"step": 1790
},
{
"epoch": 1.0514018691588785,
"grad_norm": 0.03186679258942604,
"learning_rate": 2.627044392523365e-06,
"loss": 0.0014,
"step": 1800
},
{
"epoch": 1.0572429906542056,
"grad_norm": 0.03139648213982582,
"learning_rate": 2.6416471962616823e-06,
"loss": 0.0014,
"step": 1810
},
{
"epoch": 1.0630841121495327,
"grad_norm": 0.030965762212872505,
"learning_rate": 2.65625e-06,
"loss": 0.0014,
"step": 1820
},
{
"epoch": 1.0689252336448598,
"grad_norm": 0.03837515413761139,
"learning_rate": 2.670852803738318e-06,
"loss": 0.0426,
"step": 1830
},
{
"epoch": 1.074766355140187,
"grad_norm": 0.03224362060427666,
"learning_rate": 2.685455607476636e-06,
"loss": 0.0014,
"step": 1840
},
{
"epoch": 1.080607476635514,
"grad_norm": 0.10661806166172028,
"learning_rate": 2.7000584112149535e-06,
"loss": 0.0426,
"step": 1850
},
{
"epoch": 1.0864485981308412,
"grad_norm": 0.04082982614636421,
"learning_rate": 2.7146612149532714e-06,
"loss": 0.0016,
"step": 1860
},
{
"epoch": 1.0922897196261683,
"grad_norm": 0.08507952094078064,
"learning_rate": 2.7292640186915893e-06,
"loss": 0.0421,
"step": 1870
},
{
"epoch": 1.0981308411214954,
"grad_norm": 0.05080524832010269,
"learning_rate": 2.743866822429907e-06,
"loss": 0.0022,
"step": 1880
},
{
"epoch": 1.1039719626168225,
"grad_norm": 0.04093686118721962,
"learning_rate": 2.7584696261682246e-06,
"loss": 0.0016,
"step": 1890
},
{
"epoch": 1.1098130841121496,
"grad_norm": 0.03352896124124527,
"learning_rate": 2.773072429906542e-06,
"loss": 0.0014,
"step": 1900
},
{
"epoch": 1.1156542056074765,
"grad_norm": 0.03170730173587799,
"learning_rate": 2.78767523364486e-06,
"loss": 0.0014,
"step": 1910
},
{
"epoch": 1.1214953271028036,
"grad_norm": 0.030157096683979034,
"learning_rate": 2.8022780373831775e-06,
"loss": 0.0013,
"step": 1920
},
{
"epoch": 1.1273364485981308,
"grad_norm": 0.02939213439822197,
"learning_rate": 2.8168808411214954e-06,
"loss": 0.0013,
"step": 1930
},
{
"epoch": 1.1331775700934579,
"grad_norm": 0.028846966102719307,
"learning_rate": 2.8314836448598133e-06,
"loss": 0.0013,
"step": 1940
},
{
"epoch": 1.139018691588785,
"grad_norm": 0.02849399857223034,
"learning_rate": 2.846086448598131e-06,
"loss": 0.0012,
"step": 1950
},
{
"epoch": 1.144859813084112,
"grad_norm": 0.028026683256030083,
"learning_rate": 2.8606892523364486e-06,
"loss": 0.0012,
"step": 1960
},
{
"epoch": 1.1507009345794392,
"grad_norm": 0.029067154973745346,
"learning_rate": 2.8752920560747665e-06,
"loss": 0.0431,
"step": 1970
},
{
"epoch": 1.1565420560747663,
"grad_norm": 0.028508760035037994,
"learning_rate": 2.8898948598130844e-06,
"loss": 0.0012,
"step": 1980
},
{
"epoch": 1.1623831775700935,
"grad_norm": 0.028263544663786888,
"learning_rate": 2.9044976635514023e-06,
"loss": 0.0012,
"step": 1990
},
{
"epoch": 1.1682242990654206,
"grad_norm": 0.027732761576771736,
"learning_rate": 2.91910046728972e-06,
"loss": 0.0012,
"step": 2000
},
{
"epoch": 1.1740654205607477,
"grad_norm": 0.027435291558504105,
"learning_rate": 2.9337032710280377e-06,
"loss": 0.0012,
"step": 2010
},
{
"epoch": 1.1799065420560748,
"grad_norm": 0.02704302780330181,
"learning_rate": 2.9483060747663556e-06,
"loss": 0.0012,
"step": 2020
},
{
"epoch": 1.185747663551402,
"grad_norm": 0.02908189222216606,
"learning_rate": 2.9629088785046735e-06,
"loss": 0.0433,
"step": 2030
},
{
"epoch": 1.191588785046729,
"grad_norm": 0.04472142830491066,
"learning_rate": 2.977511682242991e-06,
"loss": 0.0433,
"step": 2040
},
{
"epoch": 1.1974299065420562,
"grad_norm": 0.033964864909648895,
"learning_rate": 2.9921144859813085e-06,
"loss": 0.0013,
"step": 2050
},
{
"epoch": 1.203271028037383,
"grad_norm": 0.030364977195858955,
"learning_rate": 3.0067172897196264e-06,
"loss": 0.0013,
"step": 2060
},
{
"epoch": 1.2091121495327104,
"grad_norm": 0.028072107583284378,
"learning_rate": 3.021320093457944e-06,
"loss": 0.0012,
"step": 2070
},
{
"epoch": 1.2149532710280373,
"grad_norm": 0.02732822299003601,
"learning_rate": 3.0359228971962617e-06,
"loss": 0.0012,
"step": 2080
},
{
"epoch": 1.2207943925233644,
"grad_norm": 0.026666365563869476,
"learning_rate": 3.0505257009345796e-06,
"loss": 0.0012,
"step": 2090
},
{
"epoch": 1.2266355140186915,
"grad_norm": 0.026195447891950607,
"learning_rate": 3.0651285046728975e-06,
"loss": 0.0011,
"step": 2100
},
{
"epoch": 1.2324766355140186,
"grad_norm": 0.02573522739112377,
"learning_rate": 3.079731308411215e-06,
"loss": 0.0011,
"step": 2110
},
{
"epoch": 1.2383177570093458,
"grad_norm": 0.025196623057127,
"learning_rate": 3.094334112149533e-06,
"loss": 0.0011,
"step": 2120
},
{
"epoch": 1.2441588785046729,
"grad_norm": 0.025058843195438385,
"learning_rate": 3.108936915887851e-06,
"loss": 0.0011,
"step": 2130
},
{
"epoch": 1.25,
"grad_norm": 0.02460530959069729,
"learning_rate": 3.1235397196261687e-06,
"loss": 0.0011,
"step": 2140
},
{
"epoch": 1.2558411214953271,
"grad_norm": 0.024240560829639435,
"learning_rate": 3.138142523364486e-06,
"loss": 0.001,
"step": 2150
},
{
"epoch": 1.2616822429906542,
"grad_norm": 0.023768194019794464,
"learning_rate": 3.152745327102804e-06,
"loss": 0.001,
"step": 2160
},
{
"epoch": 1.2675233644859814,
"grad_norm": 0.023436764255166054,
"learning_rate": 3.167348130841122e-06,
"loss": 0.001,
"step": 2170
},
{
"epoch": 1.2733644859813085,
"grad_norm": 0.022970303893089294,
"learning_rate": 3.18195093457944e-06,
"loss": 0.001,
"step": 2180
},
{
"epoch": 1.2792056074766356,
"grad_norm": 0.022840231657028198,
"learning_rate": 3.1965537383177573e-06,
"loss": 0.001,
"step": 2190
},
{
"epoch": 1.2850467289719627,
"grad_norm": 0.024615749716758728,
"learning_rate": 3.2111565420560752e-06,
"loss": 0.0443,
"step": 2200
},
{
"epoch": 1.2908878504672896,
"grad_norm": 0.023948077112436295,
"learning_rate": 3.2257593457943927e-06,
"loss": 0.001,
"step": 2210
},
{
"epoch": 1.296728971962617,
"grad_norm": 0.023156747221946716,
"learning_rate": 3.24036214953271e-06,
"loss": 0.001,
"step": 2220
},
{
"epoch": 1.3025700934579438,
"grad_norm": 0.022802365943789482,
"learning_rate": 3.254964953271028e-06,
"loss": 0.001,
"step": 2230
},
{
"epoch": 1.308411214953271,
"grad_norm": 0.022550372406840324,
"learning_rate": 3.269567757009346e-06,
"loss": 0.001,
"step": 2240
},
{
"epoch": 1.314252336448598,
"grad_norm": 0.022892745211720467,
"learning_rate": 3.284170560747664e-06,
"loss": 0.0443,
"step": 2250
},
{
"epoch": 1.3200934579439252,
"grad_norm": 0.02606125734746456,
"learning_rate": 3.2987733644859814e-06,
"loss": 0.001,
"step": 2260
},
{
"epoch": 1.3259345794392523,
"grad_norm": 0.024206535890698433,
"learning_rate": 3.3133761682242993e-06,
"loss": 0.001,
"step": 2270
},
{
"epoch": 1.3317757009345794,
"grad_norm": 0.02334391325712204,
"learning_rate": 3.327978971962617e-06,
"loss": 0.001,
"step": 2280
},
{
"epoch": 1.3376168224299065,
"grad_norm": 0.022638387978076935,
"learning_rate": 3.342581775700935e-06,
"loss": 0.001,
"step": 2290
},
{
"epoch": 1.3434579439252337,
"grad_norm": 0.022134365513920784,
"learning_rate": 3.3571845794392525e-06,
"loss": 0.0009,
"step": 2300
},
{
"epoch": 1.3492990654205608,
"grad_norm": 0.03821112960577011,
"learning_rate": 3.3717873831775704e-06,
"loss": 0.0443,
"step": 2310
},
{
"epoch": 1.355140186915888,
"grad_norm": 0.0276421457529068,
"learning_rate": 3.3863901869158883e-06,
"loss": 0.0011,
"step": 2320
},
{
"epoch": 1.360981308411215,
"grad_norm": 0.02461417205631733,
"learning_rate": 3.4009929906542062e-06,
"loss": 0.001,
"step": 2330
},
{
"epoch": 1.3668224299065421,
"grad_norm": 0.022621462121605873,
"learning_rate": 3.4155957943925237e-06,
"loss": 0.001,
"step": 2340
},
{
"epoch": 1.3726635514018692,
"grad_norm": 0.02172948606312275,
"learning_rate": 3.4301985981308416e-06,
"loss": 0.0009,
"step": 2350
},
{
"epoch": 1.3785046728971961,
"grad_norm": 0.021321304142475128,
"learning_rate": 3.444801401869159e-06,
"loss": 0.0009,
"step": 2360
},
{
"epoch": 1.3843457943925235,
"grad_norm": 0.026693595573306084,
"learning_rate": 3.4594042056074765e-06,
"loss": 0.0445,
"step": 2370
},
{
"epoch": 1.3901869158878504,
"grad_norm": 0.024471307173371315,
"learning_rate": 3.4740070093457944e-06,
"loss": 0.001,
"step": 2380
},
{
"epoch": 1.3960280373831775,
"grad_norm": 0.023060401901602745,
"learning_rate": 3.4886098130841123e-06,
"loss": 0.001,
"step": 2390
},
{
"epoch": 1.4018691588785046,
"grad_norm": 0.025215281173586845,
"learning_rate": 3.5032126168224302e-06,
"loss": 0.0438,
"step": 2400
},
{
"epoch": 1.4077102803738317,
"grad_norm": 0.031053408980369568,
"learning_rate": 3.5178154205607477e-06,
"loss": 0.0012,
"step": 2410
},
{
"epoch": 1.4135514018691588,
"grad_norm": 0.02737055905163288,
"learning_rate": 3.5324182242990656e-06,
"loss": 0.001,
"step": 2420
},
{
"epoch": 1.419392523364486,
"grad_norm": 0.02308940328657627,
"learning_rate": 3.5470210280373835e-06,
"loss": 0.001,
"step": 2430
},
{
"epoch": 1.425233644859813,
"grad_norm": 0.021701965481042862,
"learning_rate": 3.5616238317757014e-06,
"loss": 0.0009,
"step": 2440
},
{
"epoch": 1.4310747663551402,
"grad_norm": 0.021604137495160103,
"learning_rate": 3.576226635514019e-06,
"loss": 0.0009,
"step": 2450
},
{
"epoch": 1.4369158878504673,
"grad_norm": 0.020978761836886406,
"learning_rate": 3.5908294392523368e-06,
"loss": 0.0009,
"step": 2460
},
{
"epoch": 1.4427570093457944,
"grad_norm": 2.0315942764282227,
"learning_rate": 3.6054322429906547e-06,
"loss": 0.0881,
"step": 2470
},
{
"epoch": 1.4485981308411215,
"grad_norm": 0.053938183933496475,
"learning_rate": 3.6200350467289726e-06,
"loss": 0.0019,
"step": 2480
},
{
"epoch": 1.4544392523364487,
"grad_norm": 0.03417206183075905,
"learning_rate": 3.63463785046729e-06,
"loss": 0.0012,
"step": 2490
},
{
"epoch": 1.4602803738317758,
"grad_norm": 0.025503478944301605,
"learning_rate": 3.649240654205608e-06,
"loss": 0.001,
"step": 2500
},
{
"epoch": 1.4661214953271027,
"grad_norm": 0.02286619506776333,
"learning_rate": 3.6638434579439254e-06,
"loss": 0.001,
"step": 2510
},
{
"epoch": 1.47196261682243,
"grad_norm": 0.02164420299232006,
"learning_rate": 3.678446261682243e-06,
"loss": 0.0009,
"step": 2520
},
{
"epoch": 1.477803738317757,
"grad_norm": 0.028322748839855194,
"learning_rate": 3.693049065420561e-06,
"loss": 0.0446,
"step": 2530
},
{
"epoch": 1.4836448598130842,
"grad_norm": 0.0625884160399437,
"learning_rate": 3.7076518691588787e-06,
"loss": 0.0435,
"step": 2540
},
{
"epoch": 1.4894859813084111,
"grad_norm": 0.030157284811139107,
"learning_rate": 3.7222546728971966e-06,
"loss": 0.0013,
"step": 2550
},
{
"epoch": 1.4953271028037383,
"grad_norm": 0.024576175957918167,
"learning_rate": 3.736857476635514e-06,
"loss": 0.0011,
"step": 2560
},
{
"epoch": 1.5011682242990654,
"grad_norm": 0.022601209580898285,
"learning_rate": 3.751460280373832e-06,
"loss": 0.001,
"step": 2570
},
{
"epoch": 1.5070093457943925,
"grad_norm": 0.02741135098040104,
"learning_rate": 3.76606308411215e-06,
"loss": 0.0437,
"step": 2580
},
{
"epoch": 1.5128504672897196,
"grad_norm": 2.4886422157287598,
"learning_rate": 3.7806658878504678e-06,
"loss": 0.0776,
"step": 2590
},
{
"epoch": 1.5186915887850467,
"grad_norm": 0.049459826201200485,
"learning_rate": 3.7952686915887852e-06,
"loss": 0.0025,
"step": 2600
},
{
"epoch": 1.5245327102803738,
"grad_norm": 0.03191259503364563,
"learning_rate": 3.809871495327103e-06,
"loss": 0.0013,
"step": 2610
},
{
"epoch": 1.530373831775701,
"grad_norm": 0.025913212448358536,
"learning_rate": 3.824474299065421e-06,
"loss": 0.0011,
"step": 2620
},
{
"epoch": 1.536214953271028,
"grad_norm": 0.02325914427638054,
"learning_rate": 3.839077102803739e-06,
"loss": 0.001,
"step": 2630
},
{
"epoch": 1.542056074766355,
"grad_norm": 0.02170516550540924,
"learning_rate": 3.853679906542057e-06,
"loss": 0.0009,
"step": 2640
},
{
"epoch": 1.5478971962616823,
"grad_norm": 0.020911023020744324,
"learning_rate": 3.868282710280375e-06,
"loss": 0.0009,
"step": 2650
},
{
"epoch": 1.5537383177570092,
"grad_norm": 0.020392388105392456,
"learning_rate": 3.882885514018692e-06,
"loss": 0.0009,
"step": 2660
},
{
"epoch": 1.5595794392523366,
"grad_norm": 0.02007397636771202,
"learning_rate": 3.89748831775701e-06,
"loss": 0.0008,
"step": 2670
},
{
"epoch": 1.5654205607476634,
"grad_norm": 1.490532398223877,
"learning_rate": 3.912091121495328e-06,
"loss": 0.0451,
"step": 2680
},
{
"epoch": 1.5712616822429908,
"grad_norm": 0.020575718954205513,
"learning_rate": 3.926693925233645e-06,
"loss": 0.0008,
"step": 2690
},
{
"epoch": 1.5771028037383177,
"grad_norm": 0.02088101953268051,
"learning_rate": 3.9412967289719625e-06,
"loss": 0.0009,
"step": 2700
},
{
"epoch": 1.582943925233645,
"grad_norm": 0.022468693554401398,
"learning_rate": 3.9558995327102804e-06,
"loss": 0.0446,
"step": 2710
},
{
"epoch": 1.588785046728972,
"grad_norm": 0.41546332836151123,
"learning_rate": 3.970502336448598e-06,
"loss": 0.2174,
"step": 2720
},
{
"epoch": 1.594626168224299,
"grad_norm": 29.55653190612793,
"learning_rate": 3.985105140186916e-06,
"loss": 0.7007,
"step": 2730
},
{
"epoch": 1.6004672897196262,
"grad_norm": 0.23895660042762756,
"learning_rate": 3.999707943925234e-06,
"loss": 0.2632,
"step": 2740
},
{
"epoch": 1.6063084112149533,
"grad_norm": 20.28415298461914,
"learning_rate": 4.014310747663552e-06,
"loss": 0.3131,
"step": 2750
},
{
"epoch": 1.6121495327102804,
"grad_norm": 66.40315246582031,
"learning_rate": 4.02891355140187e-06,
"loss": 1.465,
"step": 2760
},
{
"epoch": 1.6179906542056075,
"grad_norm": 1.3744255304336548,
"learning_rate": 4.043516355140187e-06,
"loss": 0.2324,
"step": 2770
},
{
"epoch": 1.6238317757009346,
"grad_norm": 0.07546886801719666,
"learning_rate": 4.058119158878505e-06,
"loss": 0.0066,
"step": 2780
},
{
"epoch": 1.6296728971962615,
"grad_norm": 0.05592099949717522,
"learning_rate": 4.072721962616823e-06,
"loss": 0.0421,
"step": 2790
},
{
"epoch": 1.6355140186915889,
"grad_norm": 0.053602512925863266,
"learning_rate": 4.087324766355141e-06,
"loss": 0.0022,
"step": 2800
},
{
"epoch": 1.6413551401869158,
"grad_norm": 0.0363592766225338,
"learning_rate": 4.1019275700934586e-06,
"loss": 0.0018,
"step": 2810
},
{
"epoch": 1.647196261682243,
"grad_norm": 0.04215683415532112,
"learning_rate": 4.116530373831776e-06,
"loss": 0.042,
"step": 2820
},
{
"epoch": 1.65303738317757,
"grad_norm": 0.04295524209737778,
"learning_rate": 4.1311331775700935e-06,
"loss": 0.0018,
"step": 2830
},
{
"epoch": 1.6588785046728973,
"grad_norm": 0.03542430326342583,
"learning_rate": 4.145735981308411e-06,
"loss": 0.0016,
"step": 2840
},
{
"epoch": 1.6647196261682242,
"grad_norm": 0.0312524288892746,
"learning_rate": 4.160338785046729e-06,
"loss": 0.0014,
"step": 2850
},
{
"epoch": 1.6705607476635516,
"grad_norm": 0.027192620560526848,
"learning_rate": 4.174941588785047e-06,
"loss": 0.0013,
"step": 2860
},
{
"epoch": 1.6764018691588785,
"grad_norm": 0.027118809521198273,
"learning_rate": 4.189544392523365e-06,
"loss": 0.0012,
"step": 2870
},
{
"epoch": 1.6822429906542056,
"grad_norm": 0.02445983700454235,
"learning_rate": 4.204147196261682e-06,
"loss": 0.0011,
"step": 2880
},
{
"epoch": 1.6880841121495327,
"grad_norm": 0.024152180179953575,
"learning_rate": 4.21875e-06,
"loss": 0.001,
"step": 2890
},
{
"epoch": 1.6939252336448598,
"grad_norm": 0.0222761407494545,
"learning_rate": 4.233352803738318e-06,
"loss": 0.001,
"step": 2900
},
{
"epoch": 1.699766355140187,
"grad_norm": 0.024713346734642982,
"learning_rate": 4.247955607476636e-06,
"loss": 0.0443,
"step": 2910
},
{
"epoch": 1.705607476635514,
"grad_norm": 0.025482937693595886,
"learning_rate": 4.262558411214954e-06,
"loss": 0.001,
"step": 2920
},
{
"epoch": 1.7114485981308412,
"grad_norm": 0.024490345269441605,
"learning_rate": 4.277161214953272e-06,
"loss": 0.001,
"step": 2930
},
{
"epoch": 1.7172897196261683,
"grad_norm": 0.02450953796505928,
"learning_rate": 4.2917640186915895e-06,
"loss": 0.0436,
"step": 2940
},
{
"epoch": 1.7231308411214954,
"grad_norm": 0.027671849355101585,
"learning_rate": 4.3063668224299074e-06,
"loss": 0.0011,
"step": 2950
},
{
"epoch": 1.7289719626168223,
"grad_norm": 0.03285020962357521,
"learning_rate": 4.3209696261682245e-06,
"loss": 0.0426,
"step": 2960
},
{
"epoch": 1.7348130841121496,
"grad_norm": 0.03429288789629936,
"learning_rate": 4.335572429906542e-06,
"loss": 0.0014,
"step": 2970
},
{
"epoch": 1.7406542056074765,
"grad_norm": 0.03147612139582634,
"learning_rate": 4.35017523364486e-06,
"loss": 0.0014,
"step": 2980
},
{
"epoch": 1.7464953271028039,
"grad_norm": 0.5087982416152954,
"learning_rate": 4.364778037383177e-06,
"loss": 0.0049,
"step": 2990
},
{
"epoch": 1.7523364485981308,
"grad_norm": 0.04070281982421875,
"learning_rate": 4.379380841121495e-06,
"loss": 0.028,
"step": 3000
},
{
"epoch": 1.758177570093458,
"grad_norm": 24.421571731567383,
"learning_rate": 4.393983644859813e-06,
"loss": 0.9912,
"step": 3010
},
{
"epoch": 1.764018691588785,
"grad_norm": 3.793189287185669,
"learning_rate": 4.408586448598131e-06,
"loss": 0.8169,
"step": 3020
},
{
"epoch": 1.769859813084112,
"grad_norm": 0.18406537175178528,
"learning_rate": 4.423189252336449e-06,
"loss": 0.6664,
"step": 3030
},
{
"epoch": 1.7757009345794392,
"grad_norm": 3.0396714210510254,
"learning_rate": 4.437792056074767e-06,
"loss": 0.6962,
"step": 3040
},
{
"epoch": 1.7815420560747663,
"grad_norm": 1.405199408531189,
"learning_rate": 4.452394859813085e-06,
"loss": 0.7149,
"step": 3050
},
{
"epoch": 1.7873831775700935,
"grad_norm": 3.036806106567383,
"learning_rate": 4.466997663551403e-06,
"loss": 0.6842,
"step": 3060
},
{
"epoch": 1.7932242990654206,
"grad_norm": 1.536515235900879,
"learning_rate": 4.48160046728972e-06,
"loss": 0.3574,
"step": 3070
},
{
"epoch": 1.7990654205607477,
"grad_norm": 0.11639049649238586,
"learning_rate": 4.496203271028038e-06,
"loss": 0.0598,
"step": 3080
},
{
"epoch": 1.8049065420560748,
"grad_norm": 0.11044025421142578,
"learning_rate": 4.5108060747663555e-06,
"loss": 0.0701,
"step": 3090
},
{
"epoch": 1.810747663551402,
"grad_norm": 0.102878138422966,
"learning_rate": 4.525408878504673e-06,
"loss": 0.1514,
"step": 3100
},
{
"epoch": 1.8165887850467288,
"grad_norm": 7.707913875579834,
"learning_rate": 4.540011682242991e-06,
"loss": 0.7924,
"step": 3110
},
{
"epoch": 1.8224299065420562,
"grad_norm": 5.9456353187561035,
"learning_rate": 4.554614485981308e-06,
"loss": 1.3826,
"step": 3120
},
{
"epoch": 1.828271028037383,
"grad_norm": 2.2925925254821777,
"learning_rate": 4.569217289719626e-06,
"loss": 0.7252,
"step": 3130
},
{
"epoch": 1.8341121495327104,
"grad_norm": 2.821223258972168,
"learning_rate": 4.583820093457944e-06,
"loss": 0.7524,
"step": 3140
},
{
"epoch": 1.8399532710280373,
"grad_norm": 2.5678889751434326,
"learning_rate": 4.598422897196262e-06,
"loss": 0.4001,
"step": 3150
},
{
"epoch": 1.8457943925233646,
"grad_norm": 0.44236326217651367,
"learning_rate": 4.61302570093458e-06,
"loss": 0.0417,
"step": 3160
},
{
"epoch": 1.8516355140186915,
"grad_norm": 0.0788566917181015,
"learning_rate": 4.627628504672898e-06,
"loss": 0.0075,
"step": 3170
},
{
"epoch": 1.8574766355140186,
"grad_norm": 0.06400685757398605,
"learning_rate": 4.642231308411215e-06,
"loss": 0.0035,
"step": 3180
},
{
"epoch": 1.8633177570093458,
"grad_norm": 0.050487253814935684,
"learning_rate": 4.656834112149533e-06,
"loss": 0.0025,
"step": 3190
},
{
"epoch": 1.8691588785046729,
"grad_norm": 0.042493101209402084,
"learning_rate": 4.671436915887851e-06,
"loss": 0.0423,
"step": 3200
},
{
"epoch": 1.875,
"grad_norm": 0.04191233962774277,
"learning_rate": 4.6860397196261686e-06,
"loss": 0.0019,
"step": 3210
},
{
"epoch": 1.8808411214953271,
"grad_norm": 0.04269988089799881,
"learning_rate": 4.7006425233644865e-06,
"loss": 0.0414,
"step": 3220
},
{
"epoch": 1.8866822429906542,
"grad_norm": 0.040563952177762985,
"learning_rate": 4.715245327102804e-06,
"loss": 0.0019,
"step": 3230
},
{
"epoch": 1.8925233644859814,
"grad_norm": 0.04327201843261719,
"learning_rate": 4.729848130841122e-06,
"loss": 0.0405,
"step": 3240
},
{
"epoch": 1.8983644859813085,
"grad_norm": 0.04342912510037422,
"learning_rate": 4.74445093457944e-06,
"loss": 0.0022,
"step": 3250
},
{
"epoch": 1.9042056074766354,
"grad_norm": 0.04997817426919937,
"learning_rate": 4.759053738317758e-06,
"loss": 0.0397,
"step": 3260
},
{
"epoch": 1.9100467289719627,
"grad_norm": 1.362629771232605,
"learning_rate": 4.773656542056075e-06,
"loss": 0.0386,
"step": 3270
},
{
"epoch": 1.9158878504672896,
"grad_norm": 0.05325537174940109,
"learning_rate": 4.788259345794393e-06,
"loss": 0.0027,
"step": 3280
},
{
"epoch": 1.921728971962617,
"grad_norm": 0.04630275070667267,
"learning_rate": 4.80286214953271e-06,
"loss": 0.0028,
"step": 3290
},
{
"epoch": 1.9275700934579438,
"grad_norm": 0.04541867598891258,
"learning_rate": 4.817464953271028e-06,
"loss": 0.0022,
"step": 3300
},
{
"epoch": 1.9334112149532712,
"grad_norm": 0.04611349105834961,
"learning_rate": 4.832067757009346e-06,
"loss": 0.0398,
"step": 3310
},
{
"epoch": 1.939252336448598,
"grad_norm": 0.04701936990022659,
"learning_rate": 4.846670560747664e-06,
"loss": 0.0391,
"step": 3320
},
{
"epoch": 1.9450934579439252,
"grad_norm": 0.054911911487579346,
"learning_rate": 4.861273364485982e-06,
"loss": 0.0026,
"step": 3330
},
{
"epoch": 1.9509345794392523,
"grad_norm": 0.05357871577143669,
"learning_rate": 4.8758761682242995e-06,
"loss": 0.0024,
"step": 3340
},
{
"epoch": 1.9567757009345794,
"grad_norm": 0.042115770280361176,
"learning_rate": 4.8904789719626174e-06,
"loss": 0.002,
"step": 3350
},
{
"epoch": 1.9626168224299065,
"grad_norm": 0.0314444899559021,
"learning_rate": 4.905081775700935e-06,
"loss": 0.0017,
"step": 3360
},
{
"epoch": 1.9684579439252337,
"grad_norm": 0.029266072437167168,
"learning_rate": 4.919684579439253e-06,
"loss": 0.0015,
"step": 3370
},
{
"epoch": 1.9742990654205608,
"grad_norm": 0.03335956856608391,
"learning_rate": 4.93428738317757e-06,
"loss": 0.0014,
"step": 3380
},
{
"epoch": 1.980140186915888,
"grad_norm": 0.029220635071396828,
"learning_rate": 4.948890186915888e-06,
"loss": 0.0013,
"step": 3390
},
{
"epoch": 1.985981308411215,
"grad_norm": 0.026790356263518333,
"learning_rate": 4.963492990654206e-06,
"loss": 0.0013,
"step": 3400
},
{
"epoch": 1.991822429906542,
"grad_norm": 0.026554999873042107,
"learning_rate": 4.978095794392524e-06,
"loss": 0.0421,
"step": 3410
},
{
"epoch": 1.9976635514018692,
"grad_norm": 0.027294134721159935,
"learning_rate": 4.992698598130842e-06,
"loss": 0.0012,
"step": 3420
},
{
"epoch": 2.0,
"eval_f1": 0.9984869117869571,
"eval_fbeta": 0.9978526993915837,
"eval_loss": 0.01201227679848671,
"eval_precision": 0.9974304715840387,
"eval_recall": 0.9995455922447744,
"eval_runtime": 171.0255,
"eval_samples_per_second": 68.627,
"eval_steps_per_second": 8.584,
"step": 3424
},
{
"epoch": 2.003504672897196,
"grad_norm": 0.0311049185693264,
"learning_rate": 5.007301401869159e-06,
"loss": 0.0013,
"step": 3430
},
{
"epoch": 2.0093457943925235,
"grad_norm": 0.029238952323794365,
"learning_rate": 5.021904205607478e-06,
"loss": 0.0447,
"step": 3440
},
{
"epoch": 2.0151869158878504,
"grad_norm": 0.07359280437231064,
"learning_rate": 5.036507009345795e-06,
"loss": 0.0014,
"step": 3450
},
{
"epoch": 2.0210280373831777,
"grad_norm": 0.029126616194844246,
"learning_rate": 5.051109813084113e-06,
"loss": 0.0786,
"step": 3460
},
{
"epoch": 2.0268691588785046,
"grad_norm": 0.02853413298726082,
"learning_rate": 5.0657126168224305e-06,
"loss": 0.0013,
"step": 3470
},
{
"epoch": 2.032710280373832,
"grad_norm": 0.02674773521721363,
"learning_rate": 5.0803154205607484e-06,
"loss": 0.0013,
"step": 3480
},
{
"epoch": 2.038551401869159,
"grad_norm": 0.02553749829530716,
"learning_rate": 5.0949182242990655e-06,
"loss": 0.0325,
"step": 3490
},
{
"epoch": 2.044392523364486,
"grad_norm": 0.023599721491336823,
"learning_rate": 5.109521028037384e-06,
"loss": 0.0011,
"step": 3500
},
{
"epoch": 2.050233644859813,
"grad_norm": 0.02482794038951397,
"learning_rate": 5.124123831775701e-06,
"loss": 0.0011,
"step": 3510
},
{
"epoch": 2.05607476635514,
"grad_norm": 0.026357341557741165,
"learning_rate": 5.138726635514018e-06,
"loss": 0.001,
"step": 3520
},
{
"epoch": 2.0619158878504673,
"grad_norm": 0.024338532239198685,
"learning_rate": 5.153329439252337e-06,
"loss": 0.001,
"step": 3530
},
{
"epoch": 2.067757009345794,
"grad_norm": 0.020824845880270004,
"learning_rate": 5.167932242990654e-06,
"loss": 0.001,
"step": 3540
},
{
"epoch": 2.0735981308411215,
"grad_norm": 0.023196179419755936,
"learning_rate": 5.182535046728973e-06,
"loss": 0.001,
"step": 3550
},
{
"epoch": 2.0794392523364484,
"grad_norm": 0.021305715665221214,
"learning_rate": 5.19713785046729e-06,
"loss": 0.0009,
"step": 3560
},
{
"epoch": 2.085280373831776,
"grad_norm": 0.019744129851460457,
"learning_rate": 5.211740654205608e-06,
"loss": 0.0009,
"step": 3570
},
{
"epoch": 2.0911214953271027,
"grad_norm": 0.01918872818350792,
"learning_rate": 5.226343457943926e-06,
"loss": 0.0008,
"step": 3580
},
{
"epoch": 2.09696261682243,
"grad_norm": 0.01923677884042263,
"learning_rate": 5.240946261682244e-06,
"loss": 0.0008,
"step": 3590
},
{
"epoch": 2.102803738317757,
"grad_norm": 0.020027387887239456,
"learning_rate": 5.255549065420561e-06,
"loss": 0.0444,
"step": 3600
},
{
"epoch": 2.1086448598130842,
"grad_norm": 0.020742233842611313,
"learning_rate": 5.270151869158879e-06,
"loss": 0.0437,
"step": 3610
},
{
"epoch": 2.114485981308411,
"grad_norm": 0.02387331984937191,
"learning_rate": 5.2847546728971965e-06,
"loss": 0.001,
"step": 3620
},
{
"epoch": 2.1203271028037385,
"grad_norm": 0.024925414472818375,
"learning_rate": 5.299357476635515e-06,
"loss": 0.001,
"step": 3630
},
{
"epoch": 2.1261682242990654,
"grad_norm": 0.02018832229077816,
"learning_rate": 5.313960280373832e-06,
"loss": 0.0009,
"step": 3640
},
{
"epoch": 2.1320093457943927,
"grad_norm": 0.023544279858469963,
"learning_rate": 5.32856308411215e-06,
"loss": 0.001,
"step": 3650
},
{
"epoch": 2.1378504672897196,
"grad_norm": 0.02338625304400921,
"learning_rate": 5.343165887850468e-06,
"loss": 0.001,
"step": 3660
},
{
"epoch": 2.143691588785047,
"grad_norm": 0.021194949746131897,
"learning_rate": 5.357768691588785e-06,
"loss": 0.0009,
"step": 3670
},
{
"epoch": 2.149532710280374,
"grad_norm": 0.02002919651567936,
"learning_rate": 5.372371495327103e-06,
"loss": 0.0436,
"step": 3680
},
{
"epoch": 2.1553738317757007,
"grad_norm": 0.024264391511678696,
"learning_rate": 5.386974299065421e-06,
"loss": 0.001,
"step": 3690
},
{
"epoch": 2.161214953271028,
"grad_norm": 0.025951523333787918,
"learning_rate": 5.401577102803739e-06,
"loss": 0.0423,
"step": 3700
},
{
"epoch": 2.167056074766355,
"grad_norm": 0.03249853104352951,
"learning_rate": 5.416179906542056e-06,
"loss": 0.0013,
"step": 3710
},
{
"epoch": 2.1728971962616823,
"grad_norm": 0.028281256556510925,
"learning_rate": 5.430782710280375e-06,
"loss": 0.0012,
"step": 3720
},
{
"epoch": 2.178738317757009,
"grad_norm": 0.02567378431558609,
"learning_rate": 5.445385514018692e-06,
"loss": 0.0011,
"step": 3730
},
{
"epoch": 2.1845794392523366,
"grad_norm": 0.021611211821436882,
"learning_rate": 5.45998831775701e-06,
"loss": 0.0011,
"step": 3740
},
{
"epoch": 2.1904205607476634,
"grad_norm": 0.02238270454108715,
"learning_rate": 5.4745911214953274e-06,
"loss": 0.0009,
"step": 3750
},
{
"epoch": 2.196261682242991,
"grad_norm": 0.02387162297964096,
"learning_rate": 5.489193925233645e-06,
"loss": 0.0432,
"step": 3760
},
{
"epoch": 2.2021028037383177,
"grad_norm": 0.02597387693822384,
"learning_rate": 5.503796728971963e-06,
"loss": 0.0011,
"step": 3770
},
{
"epoch": 2.207943925233645,
"grad_norm": 0.023671170696616173,
"learning_rate": 5.518399532710281e-06,
"loss": 0.0011,
"step": 3780
},
{
"epoch": 2.213785046728972,
"grad_norm": 0.023667603731155396,
"learning_rate": 5.533002336448598e-06,
"loss": 0.001,
"step": 3790
},
{
"epoch": 2.2196261682242993,
"grad_norm": 0.027072029188275337,
"learning_rate": 5.547605140186917e-06,
"loss": 0.0427,
"step": 3800
},
{
"epoch": 2.225467289719626,
"grad_norm": 0.02555328793823719,
"learning_rate": 5.562207943925234e-06,
"loss": 0.0012,
"step": 3810
},
{
"epoch": 2.231308411214953,
"grad_norm": 0.02583390660583973,
"learning_rate": 5.576810747663551e-06,
"loss": 0.0012,
"step": 3820
},
{
"epoch": 2.2371495327102804,
"grad_norm": 0.022327054291963577,
"learning_rate": 5.59141355140187e-06,
"loss": 0.0011,
"step": 3830
},
{
"epoch": 2.2429906542056073,
"grad_norm": 0.02613520622253418,
"learning_rate": 5.606016355140187e-06,
"loss": 0.001,
"step": 3840
},
{
"epoch": 2.2488317757009346,
"grad_norm": 0.020380638539791107,
"learning_rate": 5.620619158878506e-06,
"loss": 0.0009,
"step": 3850
},
{
"epoch": 2.2546728971962615,
"grad_norm": 0.02417253516614437,
"learning_rate": 5.635221962616823e-06,
"loss": 0.001,
"step": 3860
},
{
"epoch": 2.260514018691589,
"grad_norm": 0.01819733716547489,
"learning_rate": 5.6498247663551405e-06,
"loss": 0.0009,
"step": 3870
},
{
"epoch": 2.2663551401869158,
"grad_norm": 0.01892516203224659,
"learning_rate": 5.6644275700934584e-06,
"loss": 0.0008,
"step": 3880
},
{
"epoch": 2.272196261682243,
"grad_norm": 0.02071457915008068,
"learning_rate": 5.679030373831776e-06,
"loss": 0.0008,
"step": 3890
},
{
"epoch": 2.27803738317757,
"grad_norm": 0.01727299951016903,
"learning_rate": 5.693633177570093e-06,
"loss": 0.0008,
"step": 3900
},
{
"epoch": 2.2838785046728973,
"grad_norm": 0.017327282577753067,
"learning_rate": 5.708235981308412e-06,
"loss": 0.0007,
"step": 3910
},
{
"epoch": 2.289719626168224,
"grad_norm": 0.016446802765130997,
"learning_rate": 5.722838785046729e-06,
"loss": 0.0007,
"step": 3920
},
{
"epoch": 2.2955607476635516,
"grad_norm": 0.01564253680408001,
"learning_rate": 5.737441588785048e-06,
"loss": 0.0007,
"step": 3930
},
{
"epoch": 2.3014018691588785,
"grad_norm": 0.01652144268155098,
"learning_rate": 5.752044392523365e-06,
"loss": 0.0007,
"step": 3940
},
{
"epoch": 2.307242990654206,
"grad_norm": 0.015415907837450504,
"learning_rate": 5.766647196261683e-06,
"loss": 0.0007,
"step": 3950
},
{
"epoch": 2.3130841121495327,
"grad_norm": 0.015839243307709694,
"learning_rate": 5.781250000000001e-06,
"loss": 0.0007,
"step": 3960
},
{
"epoch": 2.31892523364486,
"grad_norm": 0.01590505987405777,
"learning_rate": 5.795852803738318e-06,
"loss": 0.0456,
"step": 3970
},
{
"epoch": 2.324766355140187,
"grad_norm": 0.016949482262134552,
"learning_rate": 5.810455607476636e-06,
"loss": 0.0007,
"step": 3980
},
{
"epoch": 2.330607476635514,
"grad_norm": 1.5004304647445679,
"learning_rate": 5.825058411214954e-06,
"loss": 0.0449,
"step": 3990
},
{
"epoch": 2.336448598130841,
"grad_norm": 0.019646212458610535,
"learning_rate": 5.8396612149532715e-06,
"loss": 0.0008,
"step": 4000
},
{
"epoch": 2.342289719626168,
"grad_norm": 0.017813665792346,
"learning_rate": 5.8542640186915886e-06,
"loss": 0.0008,
"step": 4010
},
{
"epoch": 2.3481308411214954,
"grad_norm": 0.021812189370393753,
"learning_rate": 5.868866822429907e-06,
"loss": 0.0008,
"step": 4020
},
{
"epoch": 2.3539719626168223,
"grad_norm": 0.018918626010417938,
"learning_rate": 5.883469626168224e-06,
"loss": 0.0008,
"step": 4030
},
{
"epoch": 2.3598130841121496,
"grad_norm": 0.018906960263848305,
"learning_rate": 5.898072429906543e-06,
"loss": 0.0008,
"step": 4040
},
{
"epoch": 2.3656542056074765,
"grad_norm": 0.0188592579215765,
"learning_rate": 5.91267523364486e-06,
"loss": 0.0881,
"step": 4050
},
{
"epoch": 2.371495327102804,
"grad_norm": 1.4257460832595825,
"learning_rate": 5.927278037383178e-06,
"loss": 0.0824,
"step": 4060
},
{
"epoch": 2.3773364485981308,
"grad_norm": 0.05044642463326454,
"learning_rate": 5.941880841121496e-06,
"loss": 0.0018,
"step": 4070
},
{
"epoch": 2.383177570093458,
"grad_norm": 0.04848237708210945,
"learning_rate": 5.956483644859814e-06,
"loss": 0.0022,
"step": 4080
},
{
"epoch": 2.389018691588785,
"grad_norm": 0.03797721490263939,
"learning_rate": 5.971086448598131e-06,
"loss": 0.0019,
"step": 4090
},
{
"epoch": 2.3948598130841123,
"grad_norm": 1.3042831420898438,
"learning_rate": 5.98568925233645e-06,
"loss": 0.0761,
"step": 4100
},
{
"epoch": 2.4007009345794392,
"grad_norm": 0.056668974459171295,
"learning_rate": 6.000292056074767e-06,
"loss": 0.0026,
"step": 4110
},
{
"epoch": 2.406542056074766,
"grad_norm": 0.05167270824313164,
"learning_rate": 6.014894859813084e-06,
"loss": 0.0025,
"step": 4120
},
{
"epoch": 2.4123831775700935,
"grad_norm": 0.02998114936053753,
"learning_rate": 6.0294976635514025e-06,
"loss": 0.0022,
"step": 4130
},
{
"epoch": 2.418224299065421,
"grad_norm": 0.035051021724939346,
"learning_rate": 6.0441004672897195e-06,
"loss": 0.0016,
"step": 4140
},
{
"epoch": 2.4240654205607477,
"grad_norm": 0.032349418848752975,
"learning_rate": 6.058703271028038e-06,
"loss": 0.0015,
"step": 4150
},
{
"epoch": 2.4299065420560746,
"grad_norm": 0.03202052041888237,
"learning_rate": 6.073306074766355e-06,
"loss": 0.0013,
"step": 4160
},
{
"epoch": 2.435747663551402,
"grad_norm": 0.02790389209985733,
"learning_rate": 6.087908878504673e-06,
"loss": 0.0012,
"step": 4170
},
{
"epoch": 2.441588785046729,
"grad_norm": 0.023634234443306923,
"learning_rate": 6.102511682242991e-06,
"loss": 0.0011,
"step": 4180
},
{
"epoch": 2.447429906542056,
"grad_norm": 0.024219932034611702,
"learning_rate": 6.117114485981309e-06,
"loss": 0.001,
"step": 4190
},
{
"epoch": 2.453271028037383,
"grad_norm": 0.020274635404348373,
"learning_rate": 6.131717289719626e-06,
"loss": 0.0009,
"step": 4200
},
{
"epoch": 2.4591121495327104,
"grad_norm": 0.017824700102210045,
"learning_rate": 6.146320093457945e-06,
"loss": 0.0009,
"step": 4210
},
{
"epoch": 2.4649532710280373,
"grad_norm": 0.024308903142809868,
"learning_rate": 6.160922897196262e-06,
"loss": 0.0437,
"step": 4220
},
{
"epoch": 2.4707943925233646,
"grad_norm": 0.022735148668289185,
"learning_rate": 6.175525700934581e-06,
"loss": 0.0009,
"step": 4230
},
{
"epoch": 2.4766355140186915,
"grad_norm": 0.024395328015089035,
"learning_rate": 6.190128504672898e-06,
"loss": 0.001,
"step": 4240
},
{
"epoch": 2.482476635514019,
"grad_norm": 0.022520286962389946,
"learning_rate": 6.204731308411216e-06,
"loss": 0.0009,
"step": 4250
},
{
"epoch": 2.4883177570093458,
"grad_norm": 0.023120161145925522,
"learning_rate": 6.2193341121495335e-06,
"loss": 0.0431,
"step": 4260
},
{
"epoch": 2.494158878504673,
"grad_norm": 0.02719821222126484,
"learning_rate": 6.2339369158878505e-06,
"loss": 0.0011,
"step": 4270
},
{
"epoch": 2.5,
"grad_norm": 0.02525412105023861,
"learning_rate": 6.2485397196261684e-06,
"loss": 0.001,
"step": 4280
},
{
"epoch": 2.505841121495327,
"grad_norm": 0.02401375211775303,
"learning_rate": 6.263142523364486e-06,
"loss": 0.001,
"step": 4290
},
{
"epoch": 2.5116822429906542,
"grad_norm": 0.025789327919483185,
"learning_rate": 6.277745327102804e-06,
"loss": 0.001,
"step": 4300
},
{
"epoch": 2.5175233644859816,
"grad_norm": 0.019803550094366074,
"learning_rate": 6.292348130841121e-06,
"loss": 0.0009,
"step": 4310
},
{
"epoch": 2.5233644859813085,
"grad_norm": 0.024618757888674736,
"learning_rate": 6.30695093457944e-06,
"loss": 0.0853,
"step": 4320
},
{
"epoch": 2.5292056074766354,
"grad_norm": 0.028840836137533188,
"learning_rate": 6.321553738317757e-06,
"loss": 0.0408,
"step": 4330
},
{
"epoch": 2.5350467289719627,
"grad_norm": 0.038188423961400986,
"learning_rate": 6.336156542056076e-06,
"loss": 0.0019,
"step": 4340
},
{
"epoch": 2.5408878504672896,
"grad_norm": 0.04892474785447121,
"learning_rate": 6.350759345794393e-06,
"loss": 0.0388,
"step": 4350
},
{
"epoch": 2.546728971962617,
"grad_norm": 0.051845621317625046,
"learning_rate": 6.365362149532711e-06,
"loss": 0.0021,
"step": 4360
},
{
"epoch": 2.552570093457944,
"grad_norm": 0.035969078540802,
"learning_rate": 6.379964953271029e-06,
"loss": 0.0021,
"step": 4370
},
{
"epoch": 2.558411214953271,
"grad_norm": 0.02896655909717083,
"learning_rate": 6.3945677570093466e-06,
"loss": 0.0017,
"step": 4380
},
{
"epoch": 2.564252336448598,
"grad_norm": 0.03623491898179054,
"learning_rate": 6.409170560747664e-06,
"loss": 0.0015,
"step": 4390
},
{
"epoch": 2.5700934579439254,
"grad_norm": 0.03049786575138569,
"learning_rate": 6.423773364485982e-06,
"loss": 0.0013,
"step": 4400
},
{
"epoch": 2.5759345794392523,
"grad_norm": 0.030855245888233185,
"learning_rate": 6.438376168224299e-06,
"loss": 0.0012,
"step": 4410
},
{
"epoch": 2.581775700934579,
"grad_norm": 0.02443297952413559,
"learning_rate": 6.4529789719626165e-06,
"loss": 0.0011,
"step": 4420
},
{
"epoch": 2.5876168224299065,
"grad_norm": 0.020587557926774025,
"learning_rate": 6.467581775700935e-06,
"loss": 0.001,
"step": 4430
},
{
"epoch": 2.593457943925234,
"grad_norm": 0.016886306926608086,
"learning_rate": 6.482184579439252e-06,
"loss": 0.0427,
"step": 4440
},
{
"epoch": 2.5992990654205608,
"grad_norm": 0.02218514122068882,
"learning_rate": 6.496787383177571e-06,
"loss": 0.001,
"step": 4450
},
{
"epoch": 2.6051401869158877,
"grad_norm": 0.03042624518275261,
"learning_rate": 6.511390186915888e-06,
"loss": 0.0011,
"step": 4460
},
{
"epoch": 2.610981308411215,
"grad_norm": 0.02127344347536564,
"learning_rate": 6.525992990654206e-06,
"loss": 0.0011,
"step": 4470
},
{
"epoch": 2.616822429906542,
"grad_norm": 0.0248599611222744,
"learning_rate": 6.540595794392524e-06,
"loss": 0.001,
"step": 4480
},
{
"epoch": 2.6226635514018692,
"grad_norm": 0.025477230548858643,
"learning_rate": 6.555198598130842e-06,
"loss": 0.0427,
"step": 4490
},
{
"epoch": 2.628504672897196,
"grad_norm": 0.029195934534072876,
"learning_rate": 6.569801401869159e-06,
"loss": 0.0011,
"step": 4500
},
{
"epoch": 2.6343457943925235,
"grad_norm": 0.028360871598124504,
"learning_rate": 6.5844042056074775e-06,
"loss": 0.0417,
"step": 4510
},
{
"epoch": 2.6401869158878504,
"grad_norm": 0.03158384561538696,
"learning_rate": 6.599007009345795e-06,
"loss": 0.0013,
"step": 4520
},
{
"epoch": 2.6460280373831777,
"grad_norm": 0.03432070091366768,
"learning_rate": 6.613609813084113e-06,
"loss": 0.0013,
"step": 4530
},
{
"epoch": 2.6518691588785046,
"grad_norm": 0.03093302808701992,
"learning_rate": 6.62821261682243e-06,
"loss": 0.0408,
"step": 4540
},
{
"epoch": 2.6577102803738315,
"grad_norm": 0.028930488973855972,
"learning_rate": 6.642815420560748e-06,
"loss": 0.0014,
"step": 4550
},
{
"epoch": 2.663551401869159,
"grad_norm": 0.035523250699043274,
"learning_rate": 6.657418224299066e-06,
"loss": 0.0014,
"step": 4560
},
{
"epoch": 2.669392523364486,
"grad_norm": 0.03366963937878609,
"learning_rate": 6.672021028037384e-06,
"loss": 0.0403,
"step": 4570
},
{
"epoch": 2.675233644859813,
"grad_norm": 0.03295886889100075,
"learning_rate": 6.686623831775701e-06,
"loss": 0.0015,
"step": 4580
},
{
"epoch": 2.68107476635514,
"grad_norm": 0.035681866109371185,
"learning_rate": 6.701226635514019e-06,
"loss": 0.0015,
"step": 4590
},
{
"epoch": 2.6869158878504673,
"grad_norm": 0.028133153915405273,
"learning_rate": 6.715829439252337e-06,
"loss": 0.0016,
"step": 4600
},
{
"epoch": 2.6927570093457946,
"grad_norm": 0.02387244440615177,
"learning_rate": 6.730432242990654e-06,
"loss": 0.0013,
"step": 4610
},
{
"epoch": 2.6985981308411215,
"grad_norm": 0.025142334401607513,
"learning_rate": 6.745035046728973e-06,
"loss": 0.0801,
"step": 4620
},
{
"epoch": 2.7044392523364484,
"grad_norm": 0.03518941253423691,
"learning_rate": 6.75963785046729e-06,
"loss": 0.0018,
"step": 4630
},
{
"epoch": 2.710280373831776,
"grad_norm": 0.030848579481244087,
"learning_rate": 6.7742406542056085e-06,
"loss": 0.0017,
"step": 4640
},
{
"epoch": 2.7161214953271027,
"grad_norm": 0.04090001434087753,
"learning_rate": 6.788843457943926e-06,
"loss": 0.0018,
"step": 4650
},
{
"epoch": 2.72196261682243,
"grad_norm": 0.027243750169873238,
"learning_rate": 6.8034462616822435e-06,
"loss": 0.0015,
"step": 4660
},
{
"epoch": 2.727803738317757,
"grad_norm": 0.02641715668141842,
"learning_rate": 6.818049065420561e-06,
"loss": 0.0013,
"step": 4670
},
{
"epoch": 2.7336448598130842,
"grad_norm": 0.025730127468705177,
"learning_rate": 6.832651869158879e-06,
"loss": 0.0012,
"step": 4680
},
{
"epoch": 2.739485981308411,
"grad_norm": 0.024618886411190033,
"learning_rate": 6.847254672897196e-06,
"loss": 0.121,
"step": 4690
},
{
"epoch": 2.7453271028037385,
"grad_norm": 0.039262160658836365,
"learning_rate": 6.861857476635515e-06,
"loss": 0.0015,
"step": 4700
},
{
"epoch": 2.7511682242990654,
"grad_norm": 0.03421768546104431,
"learning_rate": 6.876460280373832e-06,
"loss": 0.0017,
"step": 4710
},
{
"epoch": 2.7570093457943923,
"grad_norm": 0.03881515935063362,
"learning_rate": 6.891063084112151e-06,
"loss": 0.0015,
"step": 4720
},
{
"epoch": 2.7628504672897196,
"grad_norm": 0.02848188392817974,
"learning_rate": 6.905665887850468e-06,
"loss": 0.0015,
"step": 4730
},
{
"epoch": 2.768691588785047,
"grad_norm": 0.02415272779762745,
"learning_rate": 6.920268691588785e-06,
"loss": 0.0013,
"step": 4740
},
{
"epoch": 2.774532710280374,
"grad_norm": 0.026982519775629044,
"learning_rate": 6.934871495327104e-06,
"loss": 0.0012,
"step": 4750
},
{
"epoch": 2.7803738317757007,
"grad_norm": 0.0221230611205101,
"learning_rate": 6.949474299065421e-06,
"loss": 0.001,
"step": 4760
},
{
"epoch": 2.786214953271028,
"grad_norm": 0.020836248993873596,
"learning_rate": 6.964077102803739e-06,
"loss": 0.0009,
"step": 4770
},
{
"epoch": 2.792056074766355,
"grad_norm": 0.016426123678684235,
"learning_rate": 6.9786799065420566e-06,
"loss": 0.0009,
"step": 4780
},
{
"epoch": 2.7978971962616823,
"grad_norm": 0.015739239752292633,
"learning_rate": 6.9932827102803745e-06,
"loss": 0.0008,
"step": 4790
},
{
"epoch": 2.803738317757009,
"grad_norm": 0.025521619245409966,
"learning_rate": 7.0078855140186915e-06,
"loss": 0.0434,
"step": 4800
},
{
"epoch": 2.8095794392523366,
"grad_norm": 0.02532140351831913,
"learning_rate": 7.02248831775701e-06,
"loss": 0.0009,
"step": 4810
},
{
"epoch": 2.8154205607476634,
"grad_norm": 0.01968371495604515,
"learning_rate": 7.037091121495327e-06,
"loss": 0.001,
"step": 4820
},
{
"epoch": 2.821261682242991,
"grad_norm": 0.023469461128115654,
"learning_rate": 7.051693925233646e-06,
"loss": 0.001,
"step": 4830
},
{
"epoch": 2.8271028037383177,
"grad_norm": 0.023085037246346474,
"learning_rate": 7.066296728971963e-06,
"loss": 0.0009,
"step": 4840
},
{
"epoch": 2.832943925233645,
"grad_norm": 0.02076035924255848,
"learning_rate": 7.080899532710281e-06,
"loss": 0.0008,
"step": 4850
},
{
"epoch": 2.838785046728972,
"grad_norm": 0.01970616728067398,
"learning_rate": 7.095502336448599e-06,
"loss": 0.0007,
"step": 4860
},
{
"epoch": 2.8446261682242993,
"grad_norm": 0.01674484834074974,
"learning_rate": 7.110105140186917e-06,
"loss": 0.044,
"step": 4870
},
{
"epoch": 2.850467289719626,
"grad_norm": 0.019377458840608597,
"learning_rate": 7.124707943925234e-06,
"loss": 0.0008,
"step": 4880
},
{
"epoch": 2.856308411214953,
"grad_norm": 0.01821335218846798,
"learning_rate": 7.139310747663552e-06,
"loss": 0.0009,
"step": 4890
},
{
"epoch": 2.8621495327102804,
"grad_norm": 0.023551421239972115,
"learning_rate": 7.15391355140187e-06,
"loss": 0.0431,
"step": 4900
},
{
"epoch": 2.8679906542056077,
"grad_norm": 0.021875927224755287,
"learning_rate": 7.168516355140187e-06,
"loss": 0.001,
"step": 4910
},
{
"epoch": 2.8738317757009346,
"grad_norm": 0.028866572305560112,
"learning_rate": 7.1831191588785054e-06,
"loss": 0.0011,
"step": 4920
},
{
"epoch": 2.8796728971962615,
"grad_norm": 0.022117484360933304,
"learning_rate": 7.1977219626168225e-06,
"loss": 0.0009,
"step": 4930
},
{
"epoch": 2.885514018691589,
"grad_norm": 0.02393292263150215,
"learning_rate": 7.212324766355141e-06,
"loss": 0.0423,
"step": 4940
},
{
"epoch": 2.8913551401869158,
"grad_norm": 0.028178559616208076,
"learning_rate": 7.226927570093458e-06,
"loss": 0.0409,
"step": 4950
},
{
"epoch": 2.897196261682243,
"grad_norm": 0.042030058801174164,
"learning_rate": 7.241530373831776e-06,
"loss": 0.0014,
"step": 4960
},
{
"epoch": 2.90303738317757,
"grad_norm": 0.035542890429496765,
"learning_rate": 7.256133177570094e-06,
"loss": 0.0394,
"step": 4970
},
{
"epoch": 2.9088785046728973,
"grad_norm": 0.04510030522942543,
"learning_rate": 7.270735981308412e-06,
"loss": 0.0019,
"step": 4980
},
{
"epoch": 2.914719626168224,
"grad_norm": 0.03763249143958092,
"learning_rate": 7.285338785046729e-06,
"loss": 0.0018,
"step": 4990
},
{
"epoch": 2.9205607476635516,
"grad_norm": 0.03738338127732277,
"learning_rate": 7.299941588785048e-06,
"loss": 0.039,
"step": 5000
},
{
"epoch": 2.9264018691588785,
"grad_norm": 0.05190500617027283,
"learning_rate": 7.314544392523365e-06,
"loss": 0.002,
"step": 5010
},
{
"epoch": 2.9322429906542054,
"grad_norm": 0.04588020592927933,
"learning_rate": 7.3291471962616836e-06,
"loss": 0.0386,
"step": 5020
},
{
"epoch": 2.9380841121495327,
"grad_norm": 0.05511573329567909,
"learning_rate": 7.343750000000001e-06,
"loss": 0.0375,
"step": 5030
},
{
"epoch": 2.94392523364486,
"grad_norm": 0.04213655740022659,
"learning_rate": 7.358352803738318e-06,
"loss": 0.0028,
"step": 5040
},
{
"epoch": 2.949766355140187,
"grad_norm": 0.041382934898138046,
"learning_rate": 7.3729556074766364e-06,
"loss": 0.0365,
"step": 5050
},
{
"epoch": 2.955607476635514,
"grad_norm": 0.05554201453924179,
"learning_rate": 7.3875584112149535e-06,
"loss": 0.003,
"step": 5060
},
{
"epoch": 2.961448598130841,
"grad_norm": 0.04713258147239685,
"learning_rate": 7.402161214953271e-06,
"loss": 0.0025,
"step": 5070
},
{
"epoch": 2.9672897196261685,
"grad_norm": 0.03752976655960083,
"learning_rate": 7.416764018691589e-06,
"loss": 0.0019,
"step": 5080
},
{
"epoch": 2.9731308411214954,
"grad_norm": 0.0314338281750679,
"learning_rate": 7.431366822429907e-06,
"loss": 0.0016,
"step": 5090
},
{
"epoch": 2.9789719626168223,
"grad_norm": 0.03591935336589813,
"learning_rate": 7.445969626168224e-06,
"loss": 0.0777,
"step": 5100
},
{
"epoch": 2.9848130841121496,
"grad_norm": 0.052552781999111176,
"learning_rate": 7.460572429906543e-06,
"loss": 0.0021,
"step": 5110
},
{
"epoch": 2.9906542056074765,
"grad_norm": 0.03766405209898949,
"learning_rate": 7.47517523364486e-06,
"loss": 0.0022,
"step": 5120
},
{
"epoch": 2.996495327102804,
"grad_norm": 0.03205743804574013,
"learning_rate": 7.489778037383179e-06,
"loss": 0.002,
"step": 5130
},
{
"epoch": 3.0,
"eval_f1": 0.9986386325820602,
"eval_fbeta": 0.9978235099982873,
"eval_loss": 0.01077313907444477,
"eval_precision": 0.9972809667673715,
"eval_recall": 1.0,
"eval_runtime": 171.3603,
"eval_samples_per_second": 68.493,
"eval_steps_per_second": 8.567,
"step": 5136
},
{
"epoch": 3.0023364485981308,
"grad_norm": 0.02765633352100849,
"learning_rate": 7.504380841121496e-06,
"loss": 0.0016,
"step": 5140
},
{
"epoch": 3.008177570093458,
"grad_norm": 0.028236212208867073,
"learning_rate": 7.518983644859814e-06,
"loss": 0.0014,
"step": 5150
},
{
"epoch": 3.014018691588785,
"grad_norm": 0.015433499589562416,
"learning_rate": 7.533586448598132e-06,
"loss": 0.0011,
"step": 5160
},
{
"epoch": 3.0198598130841123,
"grad_norm": 0.024822015315294266,
"learning_rate": 7.5481892523364495e-06,
"loss": 0.001,
"step": 5170
},
{
"epoch": 3.0257009345794392,
"grad_norm": 0.024215752258896828,
"learning_rate": 7.5627920560747666e-06,
"loss": 0.001,
"step": 5180
},
{
"epoch": 3.0315420560747666,
"grad_norm": 0.021826941519975662,
"learning_rate": 7.5773948598130845e-06,
"loss": 0.0425,
"step": 5190
},
{
"epoch": 3.0373831775700935,
"grad_norm": 0.02321033552289009,
"learning_rate": 7.591997663551402e-06,
"loss": 0.0012,
"step": 5200
},
{
"epoch": 3.0432242990654204,
"grad_norm": 0.03412926569581032,
"learning_rate": 7.606600467289719e-06,
"loss": 0.0414,
"step": 5210
},
{
"epoch": 3.0490654205607477,
"grad_norm": 0.030813759192824364,
"learning_rate": 7.621203271028038e-06,
"loss": 0.0013,
"step": 5220
},
{
"epoch": 3.0549065420560746,
"grad_norm": 0.026983771473169327,
"learning_rate": 7.635806074766355e-06,
"loss": 0.0014,
"step": 5230
},
{
"epoch": 3.060747663551402,
"grad_norm": 0.024708811193704605,
"learning_rate": 7.650408878504674e-06,
"loss": 0.0013,
"step": 5240
},
{
"epoch": 3.066588785046729,
"grad_norm": 0.022824935615062714,
"learning_rate": 7.665011682242991e-06,
"loss": 0.0011,
"step": 5250
},
{
"epoch": 3.072429906542056,
"grad_norm": 0.022325806319713593,
"learning_rate": 7.67961448598131e-06,
"loss": 0.0011,
"step": 5260
},
{
"epoch": 3.078271028037383,
"grad_norm": 0.023151839151978493,
"learning_rate": 7.694217289719627e-06,
"loss": 0.0009,
"step": 5270
},
{
"epoch": 3.0841121495327104,
"grad_norm": 0.02269587479531765,
"learning_rate": 7.708820093457946e-06,
"loss": 0.0009,
"step": 5280
},
{
"epoch": 3.0899532710280373,
"grad_norm": 0.01616939902305603,
"learning_rate": 7.723422897196263e-06,
"loss": 0.0008,
"step": 5290
},
{
"epoch": 3.0957943925233646,
"grad_norm": 0.018096772953867912,
"learning_rate": 7.738025700934581e-06,
"loss": 0.0008,
"step": 5300
},
{
"epoch": 3.1016355140186915,
"grad_norm": 0.015381600707769394,
"learning_rate": 7.752628504672898e-06,
"loss": 0.0007,
"step": 5310
},
{
"epoch": 3.107476635514019,
"grad_norm": 0.016292404383420944,
"learning_rate": 7.767231308411215e-06,
"loss": 0.0007,
"step": 5320
},
{
"epoch": 3.1133177570093458,
"grad_norm": 0.016235455870628357,
"learning_rate": 7.781834112149532e-06,
"loss": 0.0447,
"step": 5330
},
{
"epoch": 3.119158878504673,
"grad_norm": 0.01838596910238266,
"learning_rate": 7.796436915887851e-06,
"loss": 0.0007,
"step": 5340
},
{
"epoch": 3.125,
"grad_norm": 0.018622124567627907,
"learning_rate": 7.811039719626168e-06,
"loss": 0.0007,
"step": 5350
},
{
"epoch": 3.130841121495327,
"grad_norm": 0.020052941516041756,
"learning_rate": 7.825642523364485e-06,
"loss": 0.0007,
"step": 5360
},
{
"epoch": 3.1366822429906542,
"grad_norm": 0.0180792436003685,
"learning_rate": 7.840245327102804e-06,
"loss": 0.0007,
"step": 5370
},
{
"epoch": 3.142523364485981,
"grad_norm": 0.015389679931104183,
"learning_rate": 7.854848130841121e-06,
"loss": 0.0007,
"step": 5380
},
{
"epoch": 3.1483644859813085,
"grad_norm": 0.01384568028151989,
"learning_rate": 7.86945093457944e-06,
"loss": 0.0007,
"step": 5390
},
{
"epoch": 3.1542056074766354,
"grad_norm": 1.5059659481048584,
"learning_rate": 7.884053738317757e-06,
"loss": 0.0449,
"step": 5400
},
{
"epoch": 3.1600467289719627,
"grad_norm": 0.02092292346060276,
"learning_rate": 7.898656542056076e-06,
"loss": 0.0007,
"step": 5410
},
{
"epoch": 3.1658878504672896,
"grad_norm": 0.01820148155093193,
"learning_rate": 7.913259345794393e-06,
"loss": 0.0008,
"step": 5420
},
{
"epoch": 3.171728971962617,
"grad_norm": 0.018773594871163368,
"learning_rate": 7.927862149532711e-06,
"loss": 0.0007,
"step": 5430
},
{
"epoch": 3.177570093457944,
"grad_norm": 0.01787596382200718,
"learning_rate": 7.942464953271029e-06,
"loss": 0.0007,
"step": 5440
},
{
"epoch": 3.183411214953271,
"grad_norm": 0.033807117491960526,
"learning_rate": 7.957067757009347e-06,
"loss": 0.0867,
"step": 5450
},
{
"epoch": 3.189252336448598,
"grad_norm": 0.023095451295375824,
"learning_rate": 7.971670560747664e-06,
"loss": 0.0013,
"step": 5460
},
{
"epoch": 3.1950934579439254,
"grad_norm": 0.02575875073671341,
"learning_rate": 7.986273364485983e-06,
"loss": 0.0013,
"step": 5470
},
{
"epoch": 3.2009345794392523,
"grad_norm": 0.023440295830368996,
"learning_rate": 8.0008761682243e-06,
"loss": 0.0012,
"step": 5480
},
{
"epoch": 3.2067757009345796,
"grad_norm": 0.018454963341355324,
"learning_rate": 8.015478971962617e-06,
"loss": 0.001,
"step": 5490
},
{
"epoch": 3.2126168224299065,
"grad_norm": 0.022602304816246033,
"learning_rate": 8.030081775700936e-06,
"loss": 0.0421,
"step": 5500
},
{
"epoch": 3.218457943925234,
"grad_norm": 0.040274497121572495,
"learning_rate": 8.044684579439253e-06,
"loss": 0.0406,
"step": 5510
},
{
"epoch": 3.2242990654205608,
"grad_norm": 1.618641972541809,
"learning_rate": 8.059287383177572e-06,
"loss": 0.1812,
"step": 5520
},
{
"epoch": 3.2301401869158877,
"grad_norm": 0.10644412040710449,
"learning_rate": 8.073890186915889e-06,
"loss": 0.09,
"step": 5530
},
{
"epoch": 3.235981308411215,
"grad_norm": 0.05092101916670799,
"learning_rate": 8.088492990654206e-06,
"loss": 0.0038,
"step": 5540
},
{
"epoch": 3.241822429906542,
"grad_norm": 0.026801634579896927,
"learning_rate": 8.103095794392523e-06,
"loss": 0.002,
"step": 5550
},
{
"epoch": 3.2476635514018692,
"grad_norm": 0.024006173014640808,
"learning_rate": 8.117698598130842e-06,
"loss": 0.0013,
"step": 5560
},
{
"epoch": 3.253504672897196,
"grad_norm": 0.019342713057994843,
"learning_rate": 8.132301401869159e-06,
"loss": 0.001,
"step": 5570
},
{
"epoch": 3.2593457943925235,
"grad_norm": 0.01894223503768444,
"learning_rate": 8.146904205607477e-06,
"loss": 0.0008,
"step": 5580
},
{
"epoch": 3.2651869158878504,
"grad_norm": 0.01678093895316124,
"learning_rate": 8.161507009345794e-06,
"loss": 0.0007,
"step": 5590
},
{
"epoch": 3.2710280373831777,
"grad_norm": 0.014554358087480068,
"learning_rate": 8.176109813084113e-06,
"loss": 0.0006,
"step": 5600
},
{
"epoch": 3.2768691588785046,
"grad_norm": 0.014845364727079868,
"learning_rate": 8.19071261682243e-06,
"loss": 0.0006,
"step": 5610
},
{
"epoch": 3.282710280373832,
"grad_norm": 0.012988976202905178,
"learning_rate": 8.205315420560749e-06,
"loss": 0.0005,
"step": 5620
},
{
"epoch": 3.288551401869159,
"grad_norm": 0.039321836084127426,
"learning_rate": 8.219918224299066e-06,
"loss": 0.0473,
"step": 5630
},
{
"epoch": 3.294392523364486,
"grad_norm": 0.021501081064343452,
"learning_rate": 8.234521028037385e-06,
"loss": 0.0009,
"step": 5640
},
{
"epoch": 3.300233644859813,
"grad_norm": 0.022349685430526733,
"learning_rate": 8.249123831775702e-06,
"loss": 0.0009,
"step": 5650
},
{
"epoch": 3.30607476635514,
"grad_norm": 0.015372666530311108,
"learning_rate": 8.263726635514019e-06,
"loss": 0.0007,
"step": 5660
},
{
"epoch": 3.3119158878504673,
"grad_norm": 0.022245537489652634,
"learning_rate": 8.278329439252338e-06,
"loss": 0.0452,
"step": 5670
},
{
"epoch": 3.317757009345794,
"grad_norm": 0.027831530198454857,
"learning_rate": 8.292932242990655e-06,
"loss": 0.001,
"step": 5680
},
{
"epoch": 3.3235981308411215,
"grad_norm": 0.024912910535931587,
"learning_rate": 8.307535046728973e-06,
"loss": 0.001,
"step": 5690
},
{
"epoch": 3.3294392523364484,
"grad_norm": 0.05802381411194801,
"learning_rate": 8.32213785046729e-06,
"loss": 0.0424,
"step": 5700
},
{
"epoch": 3.335280373831776,
"grad_norm": 0.04274534061551094,
"learning_rate": 8.33674065420561e-06,
"loss": 0.0018,
"step": 5710
},
{
"epoch": 3.3411214953271027,
"grad_norm": 0.033411163836717606,
"learning_rate": 8.351343457943926e-06,
"loss": 0.0015,
"step": 5720
},
{
"epoch": 3.34696261682243,
"grad_norm": 0.024391217157244682,
"learning_rate": 8.365946261682243e-06,
"loss": 0.0011,
"step": 5730
},
{
"epoch": 3.352803738317757,
"grad_norm": 0.019727276638150215,
"learning_rate": 8.38054906542056e-06,
"loss": 0.0008,
"step": 5740
},
{
"epoch": 3.3586448598130842,
"grad_norm": 0.01770111732184887,
"learning_rate": 8.395151869158879e-06,
"loss": 0.0008,
"step": 5750
},
{
"epoch": 3.364485981308411,
"grad_norm": 1.5078725814819336,
"learning_rate": 8.409754672897196e-06,
"loss": 0.0449,
"step": 5760
},
{
"epoch": 3.3703271028037385,
"grad_norm": 0.01682940497994423,
"learning_rate": 8.424357476635515e-06,
"loss": 0.0008,
"step": 5770
},
{
"epoch": 3.3761682242990654,
"grad_norm": 0.022241350263357162,
"learning_rate": 8.438960280373832e-06,
"loss": 0.0009,
"step": 5780
},
{
"epoch": 3.3820093457943923,
"grad_norm": 0.05642879754304886,
"learning_rate": 8.45356308411215e-06,
"loss": 0.1237,
"step": 5790
},
{
"epoch": 3.3878504672897196,
"grad_norm": 0.08196932822465897,
"learning_rate": 8.468165887850468e-06,
"loss": 0.0039,
"step": 5800
},
{
"epoch": 3.393691588785047,
"grad_norm": 0.061761435121297836,
"learning_rate": 8.482768691588785e-06,
"loss": 0.0032,
"step": 5810
},
{
"epoch": 3.399532710280374,
"grad_norm": 0.0422382578253746,
"learning_rate": 8.497371495327104e-06,
"loss": 0.0022,
"step": 5820
},
{
"epoch": 3.4053738317757007,
"grad_norm": 1.63783860206604,
"learning_rate": 8.51197429906542e-06,
"loss": 0.0864,
"step": 5830
},
{
"epoch": 3.411214953271028,
"grad_norm": 0.10045663267374039,
"learning_rate": 8.52657710280374e-06,
"loss": 0.0443,
"step": 5840
},
{
"epoch": 3.417056074766355,
"grad_norm": 0.057120032608509064,
"learning_rate": 8.541179906542056e-06,
"loss": 0.0312,
"step": 5850
},
{
"epoch": 3.4228971962616823,
"grad_norm": 0.04555455595254898,
"learning_rate": 8.555782710280375e-06,
"loss": 0.0391,
"step": 5860
},
{
"epoch": 3.428738317757009,
"grad_norm": 0.04573315382003784,
"learning_rate": 8.570385514018692e-06,
"loss": 0.0023,
"step": 5870
},
{
"epoch": 3.4345794392523366,
"grad_norm": 0.034623004496097565,
"learning_rate": 8.584988317757011e-06,
"loss": 0.0021,
"step": 5880
},
{
"epoch": 3.4404205607476634,
"grad_norm": 0.030579067766666412,
"learning_rate": 8.599591121495328e-06,
"loss": 0.0018,
"step": 5890
},
{
"epoch": 3.446261682242991,
"grad_norm": 0.029105929657816887,
"learning_rate": 8.614193925233647e-06,
"loss": 0.0014,
"step": 5900
},
{
"epoch": 3.4521028037383177,
"grad_norm": 0.02326030656695366,
"learning_rate": 8.628796728971964e-06,
"loss": 0.0011,
"step": 5910
},
{
"epoch": 3.457943925233645,
"grad_norm": 0.023391487076878548,
"learning_rate": 8.643399532710281e-06,
"loss": 0.0011,
"step": 5920
},
{
"epoch": 3.463785046728972,
"grad_norm": 0.026132913306355476,
"learning_rate": 8.6580023364486e-06,
"loss": 0.0416,
"step": 5930
},
{
"epoch": 3.4696261682242993,
"grad_norm": 0.03305817395448685,
"learning_rate": 8.672605140186917e-06,
"loss": 0.0015,
"step": 5940
},
{
"epoch": 3.475467289719626,
"grad_norm": 1.3769471645355225,
"learning_rate": 8.687207943925234e-06,
"loss": 0.0406,
"step": 5950
},
{
"epoch": 3.481308411214953,
"grad_norm": 0.036052025854587555,
"learning_rate": 8.70181074766355e-06,
"loss": 0.0018,
"step": 5960
},
{
"epoch": 3.4871495327102804,
"grad_norm": 0.03515082970261574,
"learning_rate": 8.71641355140187e-06,
"loss": 0.0018,
"step": 5970
},
{
"epoch": 3.4929906542056077,
"grad_norm": 1.3603745698928833,
"learning_rate": 8.731016355140187e-06,
"loss": 0.04,
"step": 5980
},
{
"epoch": 3.4988317757009346,
"grad_norm": 0.044313978403806686,
"learning_rate": 8.745619158878505e-06,
"loss": 0.0017,
"step": 5990
},
{
"epoch": 3.5046728971962615,
"grad_norm": 0.03033366985619068,
"learning_rate": 8.760221962616822e-06,
"loss": 0.0017,
"step": 6000
},
{
"epoch": 3.510514018691589,
"grad_norm": 0.03509744256734848,
"learning_rate": 8.774824766355141e-06,
"loss": 0.0402,
"step": 6010
},
{
"epoch": 3.5163551401869158,
"grad_norm": 0.127041295170784,
"learning_rate": 8.789427570093458e-06,
"loss": 0.0734,
"step": 6020
},
{
"epoch": 3.522196261682243,
"grad_norm": 0.07098700851202011,
"learning_rate": 8.804030373831777e-06,
"loss": 0.0048,
"step": 6030
},
{
"epoch": 3.52803738317757,
"grad_norm": 0.05003582686185837,
"learning_rate": 8.818633177570094e-06,
"loss": 0.0365,
"step": 6040
},
{
"epoch": 3.5338785046728973,
"grad_norm": 0.06801114976406097,
"learning_rate": 8.833235981308413e-06,
"loss": 0.0369,
"step": 6050
},
{
"epoch": 3.539719626168224,
"grad_norm": 0.1009073555469513,
"learning_rate": 8.84783878504673e-06,
"loss": 0.0042,
"step": 6060
},
{
"epoch": 3.5455607476635516,
"grad_norm": 0.045602068305015564,
"learning_rate": 8.862441588785048e-06,
"loss": 0.0368,
"step": 6070
},
{
"epoch": 3.5514018691588785,
"grad_norm": 0.04139287769794464,
"learning_rate": 8.877044392523366e-06,
"loss": 0.0028,
"step": 6080
},
{
"epoch": 3.5572429906542054,
"grad_norm": 0.04599359631538391,
"learning_rate": 8.891647196261684e-06,
"loss": 0.0025,
"step": 6090
},
{
"epoch": 3.5630841121495327,
"grad_norm": 0.04469927027821541,
"learning_rate": 8.906250000000001e-06,
"loss": 0.0021,
"step": 6100
},
{
"epoch": 3.56892523364486,
"grad_norm": 0.03399086743593216,
"learning_rate": 8.920852803738318e-06,
"loss": 0.0014,
"step": 6110
},
{
"epoch": 3.574766355140187,
"grad_norm": 0.024210546165704727,
"learning_rate": 8.935455607476637e-06,
"loss": 0.0012,
"step": 6120
},
{
"epoch": 3.580607476635514,
"grad_norm": 0.029081307351589203,
"learning_rate": 8.950058411214954e-06,
"loss": 0.0414,
"step": 6130
},
{
"epoch": 3.586448598130841,
"grad_norm": 0.031951967626810074,
"learning_rate": 8.964661214953271e-06,
"loss": 0.0395,
"step": 6140
},
{
"epoch": 3.5922897196261685,
"grad_norm": 0.04544154927134514,
"learning_rate": 8.97926401869159e-06,
"loss": 0.0376,
"step": 6150
},
{
"epoch": 3.5981308411214954,
"grad_norm": 0.04905861243605614,
"learning_rate": 8.993866822429907e-06,
"loss": 0.0359,
"step": 6160
},
{
"epoch": 3.6039719626168223,
"grad_norm": 0.0846642404794693,
"learning_rate": 9.008469626168224e-06,
"loss": 0.0372,
"step": 6170
},
{
"epoch": 3.6098130841121496,
"grad_norm": 0.09748505055904388,
"learning_rate": 9.023072429906543e-06,
"loss": 0.0057,
"step": 6180
},
{
"epoch": 3.6156542056074765,
"grad_norm": 0.05836522579193115,
"learning_rate": 9.03767523364486e-06,
"loss": 0.0034,
"step": 6190
},
{
"epoch": 3.621495327102804,
"grad_norm": 0.03731178119778633,
"learning_rate": 9.052278037383179e-06,
"loss": 0.0021,
"step": 6200
},
{
"epoch": 3.6273364485981308,
"grad_norm": 0.04591674730181694,
"learning_rate": 9.066880841121496e-06,
"loss": 0.0017,
"step": 6210
},
{
"epoch": 3.633177570093458,
"grad_norm": 0.026560049504041672,
"learning_rate": 9.081483644859814e-06,
"loss": 0.0013,
"step": 6220
},
{
"epoch": 3.639018691588785,
"grad_norm": 0.021895037963986397,
"learning_rate": 9.096086448598131e-06,
"loss": 0.0013,
"step": 6230
},
{
"epoch": 3.6448598130841123,
"grad_norm": 0.0244914460927248,
"learning_rate": 9.11068925233645e-06,
"loss": 0.001,
"step": 6240
},
{
"epoch": 3.6507009345794392,
"grad_norm": 0.023699410259723663,
"learning_rate": 9.125292056074767e-06,
"loss": 0.0009,
"step": 6250
},
{
"epoch": 3.656542056074766,
"grad_norm": 0.0207260400056839,
"learning_rate": 9.139894859813084e-06,
"loss": 0.0008,
"step": 6260
},
{
"epoch": 3.6623831775700935,
"grad_norm": 0.017150694504380226,
"learning_rate": 9.154497663551403e-06,
"loss": 0.0008,
"step": 6270
},
{
"epoch": 3.668224299065421,
"grad_norm": 0.01909276656806469,
"learning_rate": 9.16910046728972e-06,
"loss": 0.0007,
"step": 6280
},
{
"epoch": 3.6740654205607477,
"grad_norm": 0.01428013201802969,
"learning_rate": 9.183703271028039e-06,
"loss": 0.0007,
"step": 6290
},
{
"epoch": 3.6799065420560746,
"grad_norm": 0.014406004920601845,
"learning_rate": 9.198306074766356e-06,
"loss": 0.0006,
"step": 6300
},
{
"epoch": 3.685747663551402,
"grad_norm": 0.01366669312119484,
"learning_rate": 9.212908878504675e-06,
"loss": 0.0006,
"step": 6310
},
{
"epoch": 3.691588785046729,
"grad_norm": 0.012372363358736038,
"learning_rate": 9.227511682242992e-06,
"loss": 0.0006,
"step": 6320
},
{
"epoch": 3.697429906542056,
"grad_norm": 0.014415577985346317,
"learning_rate": 9.242114485981309e-06,
"loss": 0.0006,
"step": 6330
},
{
"epoch": 3.703271028037383,
"grad_norm": 0.014359201304614544,
"learning_rate": 9.256717289719628e-06,
"loss": 0.0005,
"step": 6340
},
{
"epoch": 3.7091121495327104,
"grad_norm": 0.012585778720676899,
"learning_rate": 9.271320093457945e-06,
"loss": 0.0005,
"step": 6350
},
{
"epoch": 3.7149532710280373,
"grad_norm": 0.010574690997600555,
"learning_rate": 9.285922897196262e-06,
"loss": 0.0004,
"step": 6360
},
{
"epoch": 3.7207943925233646,
"grad_norm": 0.011687861755490303,
"learning_rate": 9.30052570093458e-06,
"loss": 0.0004,
"step": 6370
},
{
"epoch": 3.7266355140186915,
"grad_norm": 0.010782795958220959,
"learning_rate": 9.315128504672897e-06,
"loss": 0.0004,
"step": 6380
},
{
"epoch": 3.7324766355140184,
"grad_norm": 0.00877504050731659,
"learning_rate": 9.329731308411216e-06,
"loss": 0.0004,
"step": 6390
},
{
"epoch": 3.7383177570093458,
"grad_norm": 0.011687839403748512,
"learning_rate": 9.344334112149533e-06,
"loss": 0.0004,
"step": 6400
},
{
"epoch": 3.744158878504673,
"grad_norm": 0.009787016548216343,
"learning_rate": 9.35893691588785e-06,
"loss": 0.0004,
"step": 6410
},
{
"epoch": 3.75,
"grad_norm": 0.009898348711431026,
"learning_rate": 9.373539719626169e-06,
"loss": 0.0004,
"step": 6420
},
{
"epoch": 3.755841121495327,
"grad_norm": 0.008907527662813663,
"learning_rate": 9.388142523364486e-06,
"loss": 0.0003,
"step": 6430
},
{
"epoch": 3.7616822429906542,
"grad_norm": 0.008017996326088905,
"learning_rate": 9.402745327102805e-06,
"loss": 0.0004,
"step": 6440
},
{
"epoch": 3.7675233644859816,
"grad_norm": 0.008717546239495277,
"learning_rate": 9.417348130841122e-06,
"loss": 0.0003,
"step": 6450
},
{
"epoch": 3.7733644859813085,
"grad_norm": 0.008758803829550743,
"learning_rate": 9.43195093457944e-06,
"loss": 0.0003,
"step": 6460
},
{
"epoch": 3.7792056074766354,
"grad_norm": 0.008630459196865559,
"learning_rate": 9.446553738317758e-06,
"loss": 0.0492,
"step": 6470
},
{
"epoch": 3.7850467289719627,
"grad_norm": 0.012145120650529861,
"learning_rate": 9.461156542056076e-06,
"loss": 0.0004,
"step": 6480
},
{
"epoch": 3.7908878504672896,
"grad_norm": 0.012704220600426197,
"learning_rate": 9.475759345794393e-06,
"loss": 0.0005,
"step": 6490
},
{
"epoch": 3.796728971962617,
"grad_norm": 0.012053780257701874,
"learning_rate": 9.490362149532712e-06,
"loss": 0.0005,
"step": 6500
},
{
"epoch": 3.802570093457944,
"grad_norm": 0.010433944873511791,
"learning_rate": 9.50496495327103e-06,
"loss": 0.0004,
"step": 6510
},
{
"epoch": 3.808411214953271,
"grad_norm": 0.015743877738714218,
"learning_rate": 9.519567757009346e-06,
"loss": 0.0461,
"step": 6520
},
{
"epoch": 3.814252336448598,
"grad_norm": 0.01906657963991165,
"learning_rate": 9.534170560747665e-06,
"loss": 0.0007,
"step": 6530
},
{
"epoch": 3.8200934579439254,
"grad_norm": 0.01994435489177704,
"learning_rate": 9.548773364485982e-06,
"loss": 0.0414,
"step": 6540
},
{
"epoch": 3.8259345794392523,
"grad_norm": 0.02100527472794056,
"learning_rate": 9.563376168224299e-06,
"loss": 0.2299,
"step": 6550
},
{
"epoch": 3.831775700934579,
"grad_norm": 0.0396944135427475,
"learning_rate": 9.577978971962618e-06,
"loss": 0.0558,
"step": 6560
},
{
"epoch": 3.8376168224299065,
"grad_norm": 0.02828989550471306,
"learning_rate": 9.592581775700935e-06,
"loss": 0.0014,
"step": 6570
},
{
"epoch": 3.843457943925234,
"grad_norm": 0.02526008151471615,
"learning_rate": 9.607184579439252e-06,
"loss": 0.0012,
"step": 6580
},
{
"epoch": 3.8492990654205608,
"grad_norm": 0.02473423443734646,
"learning_rate": 9.62178738317757e-06,
"loss": 0.0373,
"step": 6590
},
{
"epoch": 3.8551401869158877,
"grad_norm": 0.034433163702487946,
"learning_rate": 9.636390186915888e-06,
"loss": 0.0013,
"step": 6600
},
{
"epoch": 3.860981308411215,
"grad_norm": 0.021160632371902466,
"learning_rate": 9.650992990654207e-06,
"loss": 0.049,
"step": 6610
},
{
"epoch": 3.866822429906542,
"grad_norm": 0.025369103997945786,
"learning_rate": 9.665595794392524e-06,
"loss": 0.001,
"step": 6620
},
{
"epoch": 3.8726635514018692,
"grad_norm": 0.02345307357609272,
"learning_rate": 9.680198598130842e-06,
"loss": 0.0011,
"step": 6630
},
{
"epoch": 3.878504672897196,
"grad_norm": 0.03416465222835541,
"learning_rate": 9.69480140186916e-06,
"loss": 0.0447,
"step": 6640
},
{
"epoch": 3.8843457943925235,
"grad_norm": 0.023318510502576828,
"learning_rate": 9.709404205607478e-06,
"loss": 0.0014,
"step": 6650
},
{
"epoch": 3.8901869158878504,
"grad_norm": 0.024326141923666,
"learning_rate": 9.724007009345795e-06,
"loss": 0.0011,
"step": 6660
},
{
"epoch": 3.8960280373831777,
"grad_norm": 0.021615874022245407,
"learning_rate": 9.738609813084114e-06,
"loss": 0.0009,
"step": 6670
},
{
"epoch": 3.9018691588785046,
"grad_norm": 0.01807982474565506,
"learning_rate": 9.753212616822431e-06,
"loss": 0.0008,
"step": 6680
},
{
"epoch": 3.9077102803738315,
"grad_norm": 0.01847692020237446,
"learning_rate": 9.76781542056075e-06,
"loss": 0.0446,
"step": 6690
},
{
"epoch": 3.913551401869159,
"grad_norm": 0.022509966045618057,
"learning_rate": 9.782418224299067e-06,
"loss": 0.001,
"step": 6700
},
{
"epoch": 3.919392523364486,
"grad_norm": 0.027468325570225716,
"learning_rate": 9.797021028037384e-06,
"loss": 0.001,
"step": 6710
},
{
"epoch": 3.925233644859813,
"grad_norm": 0.01959499530494213,
"learning_rate": 9.811623831775703e-06,
"loss": 0.0008,
"step": 6720
},
{
"epoch": 3.93107476635514,
"grad_norm": 0.015794578939676285,
"learning_rate": 9.82622663551402e-06,
"loss": 0.0008,
"step": 6730
},
{
"epoch": 3.9369158878504673,
"grad_norm": 0.016814829781651497,
"learning_rate": 9.840829439252337e-06,
"loss": 0.0007,
"step": 6740
},
{
"epoch": 3.9427570093457946,
"grad_norm": 0.01603136584162712,
"learning_rate": 9.855432242990655e-06,
"loss": 0.0006,
"step": 6750
},
{
"epoch": 3.9485981308411215,
"grad_norm": 0.01432070042937994,
"learning_rate": 9.870035046728972e-06,
"loss": 0.0006,
"step": 6760
},
{
"epoch": 3.9544392523364484,
"grad_norm": 0.014329387806355953,
"learning_rate": 9.88463785046729e-06,
"loss": 0.0005,
"step": 6770
},
{
"epoch": 3.960280373831776,
"grad_norm": 0.012948929332196712,
"learning_rate": 9.899240654205608e-06,
"loss": 0.0005,
"step": 6780
},
{
"epoch": 3.9661214953271027,
"grad_norm": 0.012632016092538834,
"learning_rate": 9.913843457943925e-06,
"loss": 0.0005,
"step": 6790
},
{
"epoch": 3.97196261682243,
"grad_norm": 0.010846257209777832,
"learning_rate": 9.928446261682244e-06,
"loss": 0.0004,
"step": 6800
},
{
"epoch": 3.977803738317757,
"grad_norm": 0.01142601016908884,
"learning_rate": 9.943049065420561e-06,
"loss": 0.0004,
"step": 6810
},
{
"epoch": 3.9836448598130842,
"grad_norm": 0.06392688304185867,
"learning_rate": 9.95765186915888e-06,
"loss": 0.1766,
"step": 6820
},
{
"epoch": 3.989485981308411,
"grad_norm": 0.08231887221336365,
"learning_rate": 9.972254672897197e-06,
"loss": 0.0056,
"step": 6830
},
{
"epoch": 3.9953271028037385,
"grad_norm": 0.03441477566957474,
"learning_rate": 9.986857476635516e-06,
"loss": 0.003,
"step": 6840
},
{
"epoch": 4.0,
"eval_f1": 0.9986386325820602,
"eval_fbeta": 0.9978235099982873,
"eval_loss": 0.010695425793528557,
"eval_precision": 0.9972809667673715,
"eval_recall": 1.0,
"eval_runtime": 173.1568,
"eval_samples_per_second": 67.783,
"eval_steps_per_second": 8.478,
"step": 6848
},
{
"epoch": 4.001168224299065,
"grad_norm": 0.04184762015938759,
"learning_rate": 9.999634929906544e-06,
"loss": 0.002,
"step": 6850
},
{
"epoch": 4.007009345794392,
"grad_norm": 0.027074744924902916,
"learning_rate": 9.995984228971964e-06,
"loss": 0.0015,
"step": 6860
},
{
"epoch": 4.01285046728972,
"grad_norm": 0.02378828078508377,
"learning_rate": 9.992333528037384e-06,
"loss": 0.0012,
"step": 6870
},
{
"epoch": 4.018691588785047,
"grad_norm": 0.01926625706255436,
"learning_rate": 9.988682827102805e-06,
"loss": 0.0009,
"step": 6880
},
{
"epoch": 4.024532710280374,
"grad_norm": 0.03264503926038742,
"learning_rate": 9.985032126168225e-06,
"loss": 0.0425,
"step": 6890
},
{
"epoch": 4.030373831775701,
"grad_norm": 0.022108478471636772,
"learning_rate": 9.981381425233645e-06,
"loss": 0.0012,
"step": 6900
},
{
"epoch": 4.036214953271028,
"grad_norm": 0.036377232521772385,
"learning_rate": 9.977730724299066e-06,
"loss": 0.0404,
"step": 6910
},
{
"epoch": 4.042056074766355,
"grad_norm": 0.0229843407869339,
"learning_rate": 9.974080023364486e-06,
"loss": 0.0014,
"step": 6920
},
{
"epoch": 4.047897196261682,
"grad_norm": 0.023045457899570465,
"learning_rate": 9.970429322429908e-06,
"loss": 0.0015,
"step": 6930
},
{
"epoch": 4.053738317757009,
"grad_norm": 0.03223934397101402,
"learning_rate": 9.966778621495328e-06,
"loss": 0.0403,
"step": 6940
},
{
"epoch": 4.059579439252336,
"grad_norm": 0.022895364090800285,
"learning_rate": 9.963127920560749e-06,
"loss": 0.0015,
"step": 6950
},
{
"epoch": 4.065420560747664,
"grad_norm": 0.04552144557237625,
"learning_rate": 9.959477219626169e-06,
"loss": 0.0388,
"step": 6960
},
{
"epoch": 4.071261682242991,
"grad_norm": 0.04091038182377815,
"learning_rate": 9.95582651869159e-06,
"loss": 0.0021,
"step": 6970
},
{
"epoch": 4.077102803738318,
"grad_norm": 0.027029208838939667,
"learning_rate": 9.952175817757011e-06,
"loss": 0.0019,
"step": 6980
},
{
"epoch": 4.082943925233645,
"grad_norm": 0.027454374358057976,
"learning_rate": 9.94852511682243e-06,
"loss": 0.0016,
"step": 6990
},
{
"epoch": 4.088785046728972,
"grad_norm": 0.031487565487623215,
"learning_rate": 9.94487441588785e-06,
"loss": 0.04,
"step": 7000
},
{
"epoch": 4.094626168224299,
"grad_norm": 0.03998126834630966,
"learning_rate": 9.941223714953272e-06,
"loss": 0.0014,
"step": 7010
},
{
"epoch": 4.100467289719626,
"grad_norm": 0.050244223326444626,
"learning_rate": 9.937573014018692e-06,
"loss": 0.0747,
"step": 7020
},
{
"epoch": 4.106308411214953,
"grad_norm": 0.05759045109152794,
"learning_rate": 9.933922313084113e-06,
"loss": 0.003,
"step": 7030
},
{
"epoch": 4.11214953271028,
"grad_norm": 0.0511946938931942,
"learning_rate": 9.930271612149533e-06,
"loss": 0.0033,
"step": 7040
},
{
"epoch": 4.117990654205608,
"grad_norm": 0.03305158019065857,
"learning_rate": 9.926620911214953e-06,
"loss": 0.0024,
"step": 7050
},
{
"epoch": 4.123831775700935,
"grad_norm": 0.026132995262742043,
"learning_rate": 9.922970210280375e-06,
"loss": 0.0018,
"step": 7060
},
{
"epoch": 4.1296728971962615,
"grad_norm": 0.02357422560453415,
"learning_rate": 9.919319509345796e-06,
"loss": 0.0014,
"step": 7070
},
{
"epoch": 4.135514018691588,
"grad_norm": 0.02945876307785511,
"learning_rate": 9.915668808411216e-06,
"loss": 0.0407,
"step": 7080
},
{
"epoch": 4.141355140186916,
"grad_norm": 0.031209511682391167,
"learning_rate": 9.912018107476636e-06,
"loss": 0.0013,
"step": 7090
},
{
"epoch": 4.147196261682243,
"grad_norm": 0.03341691195964813,
"learning_rate": 9.908367406542057e-06,
"loss": 0.0397,
"step": 7100
},
{
"epoch": 4.15303738317757,
"grad_norm": 0.02726580575108528,
"learning_rate": 9.904716705607477e-06,
"loss": 0.0016,
"step": 7110
},
{
"epoch": 4.158878504672897,
"grad_norm": 0.0374341756105423,
"learning_rate": 9.901066004672897e-06,
"loss": 0.0016,
"step": 7120
},
{
"epoch": 4.164719626168225,
"grad_norm": 0.026886312291026115,
"learning_rate": 9.897415303738318e-06,
"loss": 0.0014,
"step": 7130
},
{
"epoch": 4.170560747663552,
"grad_norm": 0.03707856312394142,
"learning_rate": 9.89376460280374e-06,
"loss": 0.0405,
"step": 7140
},
{
"epoch": 4.1764018691588785,
"grad_norm": 0.02945527993142605,
"learning_rate": 9.89011390186916e-06,
"loss": 0.0014,
"step": 7150
},
{
"epoch": 4.182242990654205,
"grad_norm": 0.031244931742548943,
"learning_rate": 9.88646320093458e-06,
"loss": 0.0015,
"step": 7160
},
{
"epoch": 4.188084112149533,
"grad_norm": 0.0277140773832798,
"learning_rate": 9.8828125e-06,
"loss": 0.0013,
"step": 7170
},
{
"epoch": 4.19392523364486,
"grad_norm": 0.024385616183280945,
"learning_rate": 9.87916179906542e-06,
"loss": 0.0012,
"step": 7180
},
{
"epoch": 4.199766355140187,
"grad_norm": 0.02136746421456337,
"learning_rate": 9.875511098130843e-06,
"loss": 0.001,
"step": 7190
},
{
"epoch": 4.205607476635514,
"grad_norm": 0.025973528623580933,
"learning_rate": 9.871860397196263e-06,
"loss": 0.0415,
"step": 7200
},
{
"epoch": 4.211448598130841,
"grad_norm": 0.02830415591597557,
"learning_rate": 9.868209696261683e-06,
"loss": 0.0011,
"step": 7210
},
{
"epoch": 4.2172897196261685,
"grad_norm": 0.028491418808698654,
"learning_rate": 9.864558995327104e-06,
"loss": 0.0013,
"step": 7220
},
{
"epoch": 4.223130841121495,
"grad_norm": 0.025560962036252022,
"learning_rate": 9.860908294392524e-06,
"loss": 0.041,
"step": 7230
},
{
"epoch": 4.228971962616822,
"grad_norm": 0.042416248470544815,
"learning_rate": 9.857257593457944e-06,
"loss": 0.0403,
"step": 7240
},
{
"epoch": 4.234813084112149,
"grad_norm": 0.03840547055006027,
"learning_rate": 9.853606892523365e-06,
"loss": 0.0022,
"step": 7250
},
{
"epoch": 4.240654205607477,
"grad_norm": 0.04450295493006706,
"learning_rate": 9.849956191588785e-06,
"loss": 0.0021,
"step": 7260
},
{
"epoch": 4.246495327102804,
"grad_norm": 0.025487173348665237,
"learning_rate": 9.846305490654207e-06,
"loss": 0.0016,
"step": 7270
},
{
"epoch": 4.252336448598131,
"grad_norm": 0.03323301300406456,
"learning_rate": 9.842654789719627e-06,
"loss": 0.0392,
"step": 7280
},
{
"epoch": 4.258177570093458,
"grad_norm": 0.0345788300037384,
"learning_rate": 9.839004088785048e-06,
"loss": 0.0016,
"step": 7290
},
{
"epoch": 4.264018691588785,
"grad_norm": 0.02562621608376503,
"learning_rate": 9.835353387850468e-06,
"loss": 0.0016,
"step": 7300
},
{
"epoch": 4.269859813084112,
"grad_norm": 0.03218419477343559,
"learning_rate": 9.831702686915888e-06,
"loss": 0.0014,
"step": 7310
},
{
"epoch": 4.275700934579439,
"grad_norm": 0.017818788066506386,
"learning_rate": 9.82805198598131e-06,
"loss": 0.0011,
"step": 7320
},
{
"epoch": 4.281542056074766,
"grad_norm": 0.027568548917770386,
"learning_rate": 9.824401285046729e-06,
"loss": 0.0408,
"step": 7330
},
{
"epoch": 4.287383177570094,
"grad_norm": 0.03140578046441078,
"learning_rate": 9.820750584112151e-06,
"loss": 0.0012,
"step": 7340
},
{
"epoch": 4.293224299065421,
"grad_norm": 0.017698241397738457,
"learning_rate": 9.817099883177571e-06,
"loss": 0.0012,
"step": 7350
},
{
"epoch": 4.299065420560748,
"grad_norm": 0.022705256938934326,
"learning_rate": 9.813449182242992e-06,
"loss": 0.0011,
"step": 7360
},
{
"epoch": 4.304906542056075,
"grad_norm": 0.028253547847270966,
"learning_rate": 9.809798481308412e-06,
"loss": 0.0405,
"step": 7370
},
{
"epoch": 4.3107476635514015,
"grad_norm": 0.03405269980430603,
"learning_rate": 9.806147780373832e-06,
"loss": 0.0014,
"step": 7380
},
{
"epoch": 4.316588785046729,
"grad_norm": 0.030191723257303238,
"learning_rate": 9.802497079439252e-06,
"loss": 0.0014,
"step": 7390
},
{
"epoch": 4.322429906542056,
"grad_norm": 0.01599167101085186,
"learning_rate": 9.798846378504675e-06,
"loss": 0.0011,
"step": 7400
},
{
"epoch": 4.328271028037383,
"grad_norm": 0.0164847020059824,
"learning_rate": 9.795195677570095e-06,
"loss": 0.001,
"step": 7410
},
{
"epoch": 4.33411214953271,
"grad_norm": 0.021135691553354263,
"learning_rate": 9.791544976635515e-06,
"loss": 0.0009,
"step": 7420
},
{
"epoch": 4.339953271028038,
"grad_norm": 0.021334068849682808,
"learning_rate": 9.787894275700935e-06,
"loss": 0.0008,
"step": 7430
},
{
"epoch": 4.345794392523365,
"grad_norm": 0.012419966980814934,
"learning_rate": 9.784243574766356e-06,
"loss": 0.0007,
"step": 7440
},
{
"epoch": 4.3516355140186915,
"grad_norm": 0.011540565639734268,
"learning_rate": 9.780592873831776e-06,
"loss": 0.0006,
"step": 7450
},
{
"epoch": 4.357476635514018,
"grad_norm": 0.012541793286800385,
"learning_rate": 9.776942172897196e-06,
"loss": 0.0006,
"step": 7460
},
{
"epoch": 4.363317757009346,
"grad_norm": 0.013756770640611649,
"learning_rate": 9.773291471962617e-06,
"loss": 0.0005,
"step": 7470
},
{
"epoch": 4.369158878504673,
"grad_norm": 0.010488706640899181,
"learning_rate": 9.769640771028039e-06,
"loss": 0.0005,
"step": 7480
},
{
"epoch": 4.375,
"grad_norm": 0.019985994324088097,
"learning_rate": 9.765990070093459e-06,
"loss": 0.0884,
"step": 7490
},
{
"epoch": 4.380841121495327,
"grad_norm": 0.01809321902692318,
"learning_rate": 9.76233936915888e-06,
"loss": 0.001,
"step": 7500
},
{
"epoch": 4.386682242990654,
"grad_norm": 0.025336505845189095,
"learning_rate": 9.7586886682243e-06,
"loss": 0.0011,
"step": 7510
},
{
"epoch": 4.392523364485982,
"grad_norm": 0.022142156958580017,
"learning_rate": 9.75503796728972e-06,
"loss": 0.0011,
"step": 7520
},
{
"epoch": 4.3983644859813085,
"grad_norm": 0.028588872402906418,
"learning_rate": 9.751387266355142e-06,
"loss": 0.001,
"step": 7530
},
{
"epoch": 4.404205607476635,
"grad_norm": 0.01660712994635105,
"learning_rate": 9.74773656542056e-06,
"loss": 0.0009,
"step": 7540
},
{
"epoch": 4.410046728971962,
"grad_norm": 0.020251592621207237,
"learning_rate": 9.744085864485983e-06,
"loss": 0.0008,
"step": 7550
},
{
"epoch": 4.41588785046729,
"grad_norm": 0.015099707059562206,
"learning_rate": 9.740435163551403e-06,
"loss": 0.0007,
"step": 7560
},
{
"epoch": 4.421728971962617,
"grad_norm": 0.014263300225138664,
"learning_rate": 9.736784462616823e-06,
"loss": 0.0006,
"step": 7570
},
{
"epoch": 4.427570093457944,
"grad_norm": 0.015971451997756958,
"learning_rate": 9.733133761682244e-06,
"loss": 0.0006,
"step": 7580
},
{
"epoch": 4.433411214953271,
"grad_norm": 0.018041100353002548,
"learning_rate": 9.729483060747664e-06,
"loss": 0.0433,
"step": 7590
},
{
"epoch": 4.4392523364485985,
"grad_norm": 0.02004099264740944,
"learning_rate": 9.725832359813084e-06,
"loss": 0.0008,
"step": 7600
},
{
"epoch": 4.445093457943925,
"grad_norm": 0.016719138249754906,
"learning_rate": 9.722181658878506e-06,
"loss": 0.0007,
"step": 7610
},
{
"epoch": 4.450934579439252,
"grad_norm": 0.017085473984479904,
"learning_rate": 9.718530957943926e-06,
"loss": 0.0008,
"step": 7620
},
{
"epoch": 4.456775700934579,
"grad_norm": 0.014245687052607536,
"learning_rate": 9.714880257009347e-06,
"loss": 0.0007,
"step": 7630
},
{
"epoch": 4.462616822429906,
"grad_norm": 0.011969489976763725,
"learning_rate": 9.711229556074767e-06,
"loss": 0.0006,
"step": 7640
},
{
"epoch": 4.468457943925234,
"grad_norm": 0.012137607671320438,
"learning_rate": 9.707578855140187e-06,
"loss": 0.0006,
"step": 7650
},
{
"epoch": 4.474299065420561,
"grad_norm": 0.012511523440480232,
"learning_rate": 9.703928154205608e-06,
"loss": 0.0005,
"step": 7660
},
{
"epoch": 4.480140186915888,
"grad_norm": 0.01686168648302555,
"learning_rate": 9.700277453271028e-06,
"loss": 0.0449,
"step": 7670
},
{
"epoch": 4.485981308411215,
"grad_norm": 0.01611410826444626,
"learning_rate": 9.69662675233645e-06,
"loss": 0.0007,
"step": 7680
},
{
"epoch": 4.491822429906542,
"grad_norm": 0.017773732542991638,
"learning_rate": 9.69297605140187e-06,
"loss": 0.0008,
"step": 7690
},
{
"epoch": 4.497663551401869,
"grad_norm": 0.01379080768674612,
"learning_rate": 9.68932535046729e-06,
"loss": 0.0007,
"step": 7700
},
{
"epoch": 4.503504672897196,
"grad_norm": 0.011493315920233727,
"learning_rate": 9.685674649532711e-06,
"loss": 0.0006,
"step": 7710
},
{
"epoch": 4.509345794392523,
"grad_norm": 0.012840710580348969,
"learning_rate": 9.682023948598131e-06,
"loss": 0.0006,
"step": 7720
},
{
"epoch": 4.515186915887851,
"grad_norm": 0.0128638232126832,
"learning_rate": 9.678373247663552e-06,
"loss": 0.0005,
"step": 7730
},
{
"epoch": 4.521028037383178,
"grad_norm": 0.013440934009850025,
"learning_rate": 9.674722546728974e-06,
"loss": 0.0005,
"step": 7740
},
{
"epoch": 4.526869158878505,
"grad_norm": 0.01098957471549511,
"learning_rate": 9.671071845794392e-06,
"loss": 0.0005,
"step": 7750
},
{
"epoch": 4.5327102803738315,
"grad_norm": 0.009183285757899284,
"learning_rate": 9.667421144859814e-06,
"loss": 0.0004,
"step": 7760
},
{
"epoch": 4.538551401869158,
"grad_norm": 0.008257759734988213,
"learning_rate": 9.663770443925235e-06,
"loss": 0.0004,
"step": 7770
},
{
"epoch": 4.544392523364486,
"grad_norm": 0.008908426389098167,
"learning_rate": 9.660119742990655e-06,
"loss": 0.0004,
"step": 7780
},
{
"epoch": 4.550233644859813,
"grad_norm": 0.011919341050088406,
"learning_rate": 9.656469042056075e-06,
"loss": 0.0471,
"step": 7790
},
{
"epoch": 4.55607476635514,
"grad_norm": 0.015091456472873688,
"learning_rate": 9.652818341121496e-06,
"loss": 0.0006,
"step": 7800
},
{
"epoch": 4.561915887850468,
"grad_norm": 0.013059835880994797,
"learning_rate": 9.649167640186918e-06,
"loss": 0.0006,
"step": 7810
},
{
"epoch": 4.567757009345795,
"grad_norm": 0.011410195380449295,
"learning_rate": 9.645516939252338e-06,
"loss": 0.0005,
"step": 7820
},
{
"epoch": 4.5735981308411215,
"grad_norm": 0.010844654403626919,
"learning_rate": 9.641866238317758e-06,
"loss": 0.0005,
"step": 7830
},
{
"epoch": 4.579439252336448,
"grad_norm": 0.0135049344971776,
"learning_rate": 9.638215537383178e-06,
"loss": 0.0878,
"step": 7840
},
{
"epoch": 4.585280373831775,
"grad_norm": 0.01884039305150509,
"learning_rate": 9.634564836448599e-06,
"loss": 0.0007,
"step": 7850
},
{
"epoch": 4.591121495327103,
"grad_norm": 0.0158846452832222,
"learning_rate": 9.630914135514019e-06,
"loss": 0.0008,
"step": 7860
},
{
"epoch": 4.59696261682243,
"grad_norm": 0.018315622583031654,
"learning_rate": 9.62726343457944e-06,
"loss": 0.0007,
"step": 7870
},
{
"epoch": 4.602803738317757,
"grad_norm": 0.015546320006251335,
"learning_rate": 9.62361273364486e-06,
"loss": 0.0006,
"step": 7880
},
{
"epoch": 4.608644859813084,
"grad_norm": 0.012519214302301407,
"learning_rate": 9.619962032710282e-06,
"loss": 0.0006,
"step": 7890
},
{
"epoch": 4.614485981308412,
"grad_norm": 0.012493118643760681,
"learning_rate": 9.616311331775702e-06,
"loss": 0.0005,
"step": 7900
},
{
"epoch": 4.6203271028037385,
"grad_norm": 0.012429878115653992,
"learning_rate": 9.612660630841122e-06,
"loss": 0.0424,
"step": 7910
},
{
"epoch": 4.626168224299065,
"grad_norm": 0.03473297879099846,
"learning_rate": 9.609009929906543e-06,
"loss": 0.0862,
"step": 7920
},
{
"epoch": 4.632009345794392,
"grad_norm": 0.02223706804215908,
"learning_rate": 9.605359228971963e-06,
"loss": 0.0012,
"step": 7930
},
{
"epoch": 4.63785046728972,
"grad_norm": 1.4426337480545044,
"learning_rate": 9.601708528037385e-06,
"loss": 0.0434,
"step": 7940
},
{
"epoch": 4.643691588785047,
"grad_norm": 0.027474477887153625,
"learning_rate": 9.598057827102805e-06,
"loss": 0.001,
"step": 7950
},
{
"epoch": 4.649532710280374,
"grad_norm": 1.3533803224563599,
"learning_rate": 9.594407126168226e-06,
"loss": 0.0405,
"step": 7960
},
{
"epoch": 4.655373831775701,
"grad_norm": 0.028552265837788582,
"learning_rate": 9.590756425233646e-06,
"loss": 0.0014,
"step": 7970
},
{
"epoch": 4.661214953271028,
"grad_norm": 0.018535079434514046,
"learning_rate": 9.587105724299066e-06,
"loss": 0.0015,
"step": 7980
},
{
"epoch": 4.667056074766355,
"grad_norm": 0.027407588437199593,
"learning_rate": 9.583455023364487e-06,
"loss": 0.0015,
"step": 7990
},
{
"epoch": 4.672897196261682,
"grad_norm": 0.020983988419175148,
"learning_rate": 9.579804322429907e-06,
"loss": 0.0011,
"step": 8000
},
{
"epoch": 4.678738317757009,
"grad_norm": 1.3812689781188965,
"learning_rate": 9.576153621495327e-06,
"loss": 0.0408,
"step": 8010
},
{
"epoch": 4.684579439252336,
"grad_norm": 0.02850353717803955,
"learning_rate": 9.57250292056075e-06,
"loss": 0.0011,
"step": 8020
},
{
"epoch": 4.690420560747664,
"grad_norm": 0.021959487348794937,
"learning_rate": 9.56885221962617e-06,
"loss": 0.0012,
"step": 8030
},
{
"epoch": 4.696261682242991,
"grad_norm": 3.579338788986206,
"learning_rate": 9.56520151869159e-06,
"loss": 0.074,
"step": 8040
},
{
"epoch": 4.702102803738318,
"grad_norm": 0.05354708805680275,
"learning_rate": 9.56155081775701e-06,
"loss": 0.0028,
"step": 8050
},
{
"epoch": 4.707943925233645,
"grad_norm": 0.030053434893488884,
"learning_rate": 9.55790011682243e-06,
"loss": 0.0018,
"step": 8060
},
{
"epoch": 4.713785046728972,
"grad_norm": 1.3448758125305176,
"learning_rate": 9.55424941588785e-06,
"loss": 0.0804,
"step": 8070
},
{
"epoch": 4.719626168224299,
"grad_norm": 0.07039051502943039,
"learning_rate": 9.550598714953273e-06,
"loss": 0.0021,
"step": 8080
},
{
"epoch": 4.725467289719626,
"grad_norm": 0.04373152554035187,
"learning_rate": 9.546948014018691e-06,
"loss": 0.0028,
"step": 8090
},
{
"epoch": 4.731308411214953,
"grad_norm": 0.04728665202856064,
"learning_rate": 9.543297313084113e-06,
"loss": 0.0031,
"step": 8100
},
{
"epoch": 4.73714953271028,
"grad_norm": 0.04679826647043228,
"learning_rate": 9.539646612149534e-06,
"loss": 0.0928,
"step": 8110
},
{
"epoch": 4.742990654205608,
"grad_norm": 0.0427640900015831,
"learning_rate": 9.535995911214954e-06,
"loss": 0.0037,
"step": 8120
},
{
"epoch": 4.748831775700935,
"grad_norm": 0.02820892632007599,
"learning_rate": 9.532345210280374e-06,
"loss": 0.0015,
"step": 8130
},
{
"epoch": 4.7546728971962615,
"grad_norm": 0.028863176703453064,
"learning_rate": 9.528694509345795e-06,
"loss": 0.0416,
"step": 8140
},
{
"epoch": 4.760514018691588,
"grad_norm": 0.030389975756406784,
"learning_rate": 9.525043808411217e-06,
"loss": 0.0014,
"step": 8150
},
{
"epoch": 4.766355140186916,
"grad_norm": 0.025430910289287567,
"learning_rate": 9.521393107476637e-06,
"loss": 0.0013,
"step": 8160
},
{
"epoch": 4.772196261682243,
"grad_norm": 0.02759852446615696,
"learning_rate": 9.517742406542057e-06,
"loss": 0.0012,
"step": 8170
},
{
"epoch": 4.77803738317757,
"grad_norm": 0.01693788357079029,
"learning_rate": 9.514091705607478e-06,
"loss": 0.0009,
"step": 8180
},
{
"epoch": 4.783878504672897,
"grad_norm": 0.019037263467907906,
"learning_rate": 9.510441004672898e-06,
"loss": 0.0008,
"step": 8190
},
{
"epoch": 4.789719626168225,
"grad_norm": 0.01847090944647789,
"learning_rate": 9.506790303738318e-06,
"loss": 0.0008,
"step": 8200
},
{
"epoch": 4.795560747663552,
"grad_norm": 0.014866204001009464,
"learning_rate": 9.503139602803739e-06,
"loss": 0.0007,
"step": 8210
},
{
"epoch": 4.8014018691588785,
"grad_norm": 0.017772305756807327,
"learning_rate": 9.499488901869159e-06,
"loss": 0.044,
"step": 8220
},
{
"epoch": 4.807242990654205,
"grad_norm": 0.018901938572525978,
"learning_rate": 9.495838200934581e-06,
"loss": 0.0008,
"step": 8230
},
{
"epoch": 4.813084112149532,
"grad_norm": 0.01904388889670372,
"learning_rate": 9.492187500000001e-06,
"loss": 0.001,
"step": 8240
},
{
"epoch": 4.81892523364486,
"grad_norm": 0.02215546742081642,
"learning_rate": 9.488536799065421e-06,
"loss": 0.0008,
"step": 8250
},
{
"epoch": 4.824766355140187,
"grad_norm": 0.0166354738175869,
"learning_rate": 9.484886098130842e-06,
"loss": 0.0008,
"step": 8260
},
{
"epoch": 4.830607476635514,
"grad_norm": 0.015143346972763538,
"learning_rate": 9.481235397196262e-06,
"loss": 0.0007,
"step": 8270
},
{
"epoch": 4.836448598130842,
"grad_norm": 0.010659987106919289,
"learning_rate": 9.477584696261684e-06,
"loss": 0.0006,
"step": 8280
},
{
"epoch": 4.8422897196261685,
"grad_norm": 0.01438753679394722,
"learning_rate": 9.473933995327104e-06,
"loss": 0.0006,
"step": 8290
},
{
"epoch": 4.848130841121495,
"grad_norm": 0.009508033283054829,
"learning_rate": 9.470283294392523e-06,
"loss": 0.0006,
"step": 8300
},
{
"epoch": 4.853971962616822,
"grad_norm": 0.01247571874409914,
"learning_rate": 9.466632593457945e-06,
"loss": 0.0005,
"step": 8310
},
{
"epoch": 4.859813084112149,
"grad_norm": 0.010293890722095966,
"learning_rate": 9.462981892523365e-06,
"loss": 0.0005,
"step": 8320
},
{
"epoch": 4.865654205607477,
"grad_norm": 0.011530600488185883,
"learning_rate": 9.459331191588786e-06,
"loss": 0.0004,
"step": 8330
},
{
"epoch": 4.871495327102804,
"grad_norm": 0.011948925442993641,
"learning_rate": 9.455680490654206e-06,
"loss": 0.0004,
"step": 8340
},
{
"epoch": 4.877336448598131,
"grad_norm": 0.00931254867464304,
"learning_rate": 9.452029789719626e-06,
"loss": 0.0004,
"step": 8350
},
{
"epoch": 4.883177570093458,
"grad_norm": 0.008403575979173183,
"learning_rate": 9.448379088785048e-06,
"loss": 0.0004,
"step": 8360
},
{
"epoch": 4.8890186915887845,
"grad_norm": 0.011545160785317421,
"learning_rate": 9.444728387850469e-06,
"loss": 0.0478,
"step": 8370
},
{
"epoch": 4.894859813084112,
"grad_norm": 0.017456984147429466,
"learning_rate": 9.441077686915889e-06,
"loss": 0.0006,
"step": 8380
},
{
"epoch": 4.900700934579439,
"grad_norm": 0.010550367645919323,
"learning_rate": 9.43742698598131e-06,
"loss": 0.0006,
"step": 8390
},
{
"epoch": 4.906542056074766,
"grad_norm": 1.5026402473449707,
"learning_rate": 9.43377628504673e-06,
"loss": 0.0452,
"step": 8400
},
{
"epoch": 4.912383177570094,
"grad_norm": 0.015959005802869797,
"learning_rate": 9.430125584112152e-06,
"loss": 0.0006,
"step": 8410
},
{
"epoch": 4.918224299065421,
"grad_norm": 0.01576901040971279,
"learning_rate": 9.42647488317757e-06,
"loss": 0.0008,
"step": 8420
},
{
"epoch": 4.924065420560748,
"grad_norm": 0.012024643830955029,
"learning_rate": 9.42282418224299e-06,
"loss": 0.0008,
"step": 8430
},
{
"epoch": 4.929906542056075,
"grad_norm": 0.017918461933732033,
"learning_rate": 9.419173481308413e-06,
"loss": 0.0007,
"step": 8440
},
{
"epoch": 4.9357476635514015,
"grad_norm": 0.018606670200824738,
"learning_rate": 9.415522780373833e-06,
"loss": 0.0006,
"step": 8450
},
{
"epoch": 4.941588785046729,
"grad_norm": 0.01964343525469303,
"learning_rate": 9.411872079439253e-06,
"loss": 0.0427,
"step": 8460
},
{
"epoch": 4.947429906542056,
"grad_norm": 0.019112348556518555,
"learning_rate": 9.408221378504673e-06,
"loss": 0.0009,
"step": 8470
},
{
"epoch": 4.953271028037383,
"grad_norm": 0.017472274601459503,
"learning_rate": 9.404570677570094e-06,
"loss": 0.0009,
"step": 8480
},
{
"epoch": 4.95911214953271,
"grad_norm": 0.018791217356920242,
"learning_rate": 9.400919976635516e-06,
"loss": 0.0009,
"step": 8490
},
{
"epoch": 4.964953271028038,
"grad_norm": 0.014025550335645676,
"learning_rate": 9.397269275700936e-06,
"loss": 0.0008,
"step": 8500
},
{
"epoch": 4.970794392523365,
"grad_norm": 0.018441924825310707,
"learning_rate": 9.393618574766355e-06,
"loss": 0.0434,
"step": 8510
},
{
"epoch": 4.9766355140186915,
"grad_norm": 0.02612805739045143,
"learning_rate": 9.389967873831777e-06,
"loss": 0.0009,
"step": 8520
},
{
"epoch": 4.982476635514018,
"grad_norm": 1.388852596282959,
"learning_rate": 9.386317172897197e-06,
"loss": 0.0395,
"step": 8530
},
{
"epoch": 4.988317757009346,
"grad_norm": 0.04369686543941498,
"learning_rate": 9.382666471962617e-06,
"loss": 0.0018,
"step": 8540
},
{
"epoch": 4.994158878504673,
"grad_norm": 0.019059184938669205,
"learning_rate": 9.379015771028038e-06,
"loss": 0.0016,
"step": 8550
},
{
"epoch": 5.0,
"grad_norm": 0.017675094306468964,
"learning_rate": 9.375365070093458e-06,
"loss": 0.0011,
"step": 8560
},
{
"epoch": 5.0,
"eval_f1": 0.9985628923682021,
"eval_fbeta": 0.997793015231967,
"eval_loss": 0.010423527099192142,
"eval_precision": 0.9972805559752228,
"eval_recall": 0.9998485307482581,
"eval_runtime": 171.2964,
"eval_samples_per_second": 68.519,
"eval_steps_per_second": 8.57,
"step": 8560
},
{
"epoch": 5.005841121495327,
"grad_norm": 0.020811092108488083,
"learning_rate": 9.37171436915888e-06,
"loss": 0.0009,
"step": 8570
},
{
"epoch": 5.011682242990654,
"grad_norm": 0.017634106799960136,
"learning_rate": 9.3680636682243e-06,
"loss": 0.0007,
"step": 8580
},
{
"epoch": 5.017523364485982,
"grad_norm": 0.01755143329501152,
"learning_rate": 9.36441296728972e-06,
"loss": 0.0007,
"step": 8590
},
{
"epoch": 5.0233644859813085,
"grad_norm": 0.013711643405258656,
"learning_rate": 9.360762266355141e-06,
"loss": 0.0006,
"step": 8600
},
{
"epoch": 5.029205607476635,
"grad_norm": 0.022771639749407768,
"learning_rate": 9.357111565420561e-06,
"loss": 0.0435,
"step": 8610
},
{
"epoch": 5.035046728971962,
"grad_norm": 0.023437688127160072,
"learning_rate": 9.353460864485983e-06,
"loss": 0.0009,
"step": 8620
},
{
"epoch": 5.04088785046729,
"grad_norm": 0.027155088260769844,
"learning_rate": 9.349810163551402e-06,
"loss": 0.0008,
"step": 8630
},
{
"epoch": 5.046728971962617,
"grad_norm": 0.02320449985563755,
"learning_rate": 9.346159462616822e-06,
"loss": 0.0393,
"step": 8640
},
{
"epoch": 5.052570093457944,
"grad_norm": 2.8932414054870605,
"learning_rate": 9.342508761682244e-06,
"loss": 0.0434,
"step": 8650
},
{
"epoch": 5.058411214953271,
"grad_norm": 0.044957537204027176,
"learning_rate": 9.338858060747665e-06,
"loss": 0.0014,
"step": 8660
},
{
"epoch": 5.0642523364485985,
"grad_norm": 0.024762434884905815,
"learning_rate": 9.335207359813085e-06,
"loss": 0.0014,
"step": 8670
},
{
"epoch": 5.070093457943925,
"grad_norm": 0.017925061285495758,
"learning_rate": 9.331556658878505e-06,
"loss": 0.0008,
"step": 8680
},
{
"epoch": 5.075934579439252,
"grad_norm": 0.01599021814763546,
"learning_rate": 9.327905957943925e-06,
"loss": 0.0007,
"step": 8690
},
{
"epoch": 5.081775700934579,
"grad_norm": 0.01171871181577444,
"learning_rate": 9.324255257009347e-06,
"loss": 0.0006,
"step": 8700
},
{
"epoch": 5.087616822429906,
"grad_norm": 0.018137352541089058,
"learning_rate": 9.320604556074768e-06,
"loss": 0.089,
"step": 8710
},
{
"epoch": 5.093457943925234,
"grad_norm": 0.03295230492949486,
"learning_rate": 9.316953855140186e-06,
"loss": 0.0015,
"step": 8720
},
{
"epoch": 5.099299065420561,
"grad_norm": 0.0420125387609005,
"learning_rate": 9.313303154205608e-06,
"loss": 0.0016,
"step": 8730
},
{
"epoch": 5.105140186915888,
"grad_norm": 0.024619553238153458,
"learning_rate": 9.309652453271029e-06,
"loss": 0.0015,
"step": 8740
},
{
"epoch": 5.110981308411215,
"grad_norm": 0.024943150579929352,
"learning_rate": 9.306001752336449e-06,
"loss": 0.0011,
"step": 8750
},
{
"epoch": 5.116822429906542,
"grad_norm": 0.024661751464009285,
"learning_rate": 9.30235105140187e-06,
"loss": 0.001,
"step": 8760
},
{
"epoch": 5.122663551401869,
"grad_norm": 0.019864290952682495,
"learning_rate": 9.29870035046729e-06,
"loss": 0.0009,
"step": 8770
},
{
"epoch": 5.128504672897196,
"grad_norm": 0.014965186826884747,
"learning_rate": 9.295049649532712e-06,
"loss": 0.0393,
"step": 8780
},
{
"epoch": 5.134345794392523,
"grad_norm": 0.020861614495515823,
"learning_rate": 9.291398948598132e-06,
"loss": 0.001,
"step": 8790
},
{
"epoch": 5.140186915887851,
"grad_norm": 0.01782502979040146,
"learning_rate": 9.287748247663552e-06,
"loss": 0.032,
"step": 8800
},
{
"epoch": 5.146028037383178,
"grad_norm": 0.014934423379600048,
"learning_rate": 9.284097546728973e-06,
"loss": 0.0007,
"step": 8810
},
{
"epoch": 5.151869158878505,
"grad_norm": 0.02040957845747471,
"learning_rate": 9.280446845794393e-06,
"loss": 0.0434,
"step": 8820
},
{
"epoch": 5.1577102803738315,
"grad_norm": 0.027365613728761673,
"learning_rate": 9.276796144859815e-06,
"loss": 0.0009,
"step": 8830
},
{
"epoch": 5.163551401869159,
"grad_norm": 0.026035156100988388,
"learning_rate": 9.273145443925235e-06,
"loss": 0.001,
"step": 8840
},
{
"epoch": 5.169392523364486,
"grad_norm": 0.011135846376419067,
"learning_rate": 9.269494742990654e-06,
"loss": 0.0008,
"step": 8850
},
{
"epoch": 5.175233644859813,
"grad_norm": 0.02742450125515461,
"learning_rate": 9.265844042056076e-06,
"loss": 0.0422,
"step": 8860
},
{
"epoch": 5.18107476635514,
"grad_norm": 0.029255658388137817,
"learning_rate": 9.262193341121496e-06,
"loss": 0.0012,
"step": 8870
},
{
"epoch": 5.186915887850467,
"grad_norm": 0.02883157692849636,
"learning_rate": 9.258542640186917e-06,
"loss": 0.0013,
"step": 8880
},
{
"epoch": 5.192757009345795,
"grad_norm": 0.02262677438557148,
"learning_rate": 9.254891939252337e-06,
"loss": 0.001,
"step": 8890
},
{
"epoch": 5.1985981308411215,
"grad_norm": 0.018410807475447655,
"learning_rate": 9.251241238317757e-06,
"loss": 0.0008,
"step": 8900
},
{
"epoch": 5.204439252336448,
"grad_norm": 0.030904971063137054,
"learning_rate": 9.247590537383179e-06,
"loss": 0.0413,
"step": 8910
},
{
"epoch": 5.210280373831775,
"grad_norm": 0.029191523790359497,
"learning_rate": 9.2439398364486e-06,
"loss": 0.0012,
"step": 8920
},
{
"epoch": 5.216121495327103,
"grad_norm": 0.021812813356518745,
"learning_rate": 9.24028913551402e-06,
"loss": 0.001,
"step": 8930
},
{
"epoch": 5.22196261682243,
"grad_norm": 0.027340680360794067,
"learning_rate": 9.23663843457944e-06,
"loss": 0.001,
"step": 8940
},
{
"epoch": 5.227803738317757,
"grad_norm": 0.013570006936788559,
"learning_rate": 9.23298773364486e-06,
"loss": 0.0008,
"step": 8950
},
{
"epoch": 5.233644859813084,
"grad_norm": 0.02434568665921688,
"learning_rate": 9.229337032710282e-06,
"loss": 0.0417,
"step": 8960
},
{
"epoch": 5.239485981308412,
"grad_norm": 0.03371907025575638,
"learning_rate": 9.225686331775701e-06,
"loss": 0.0011,
"step": 8970
},
{
"epoch": 5.2453271028037385,
"grad_norm": 0.02317052148282528,
"learning_rate": 9.222035630841121e-06,
"loss": 0.0011,
"step": 8980
},
{
"epoch": 5.251168224299065,
"grad_norm": 0.02078530192375183,
"learning_rate": 9.218384929906543e-06,
"loss": 0.0011,
"step": 8990
},
{
"epoch": 5.257009345794392,
"grad_norm": 0.018598228693008423,
"learning_rate": 9.214734228971964e-06,
"loss": 0.0008,
"step": 9000
},
{
"epoch": 5.26285046728972,
"grad_norm": 0.03364751860499382,
"learning_rate": 9.211083528037384e-06,
"loss": 0.0359,
"step": 9010
},
{
"epoch": 5.268691588785047,
"grad_norm": 0.01850021816790104,
"learning_rate": 9.207432827102804e-06,
"loss": 0.0085,
"step": 9020
},
{
"epoch": 5.274532710280374,
"grad_norm": 0.014193546026945114,
"learning_rate": 9.203782126168225e-06,
"loss": 0.0007,
"step": 9030
},
{
"epoch": 5.280373831775701,
"grad_norm": 0.011346135288476944,
"learning_rate": 9.200131425233647e-06,
"loss": 0.0005,
"step": 9040
},
{
"epoch": 5.286214953271028,
"grad_norm": 0.011504976078867912,
"learning_rate": 9.196480724299067e-06,
"loss": 0.0005,
"step": 9050
},
{
"epoch": 5.292056074766355,
"grad_norm": 1.3916432857513428,
"learning_rate": 9.192830023364486e-06,
"loss": 0.0825,
"step": 9060
},
{
"epoch": 5.297897196261682,
"grad_norm": 0.04609482362866402,
"learning_rate": 9.189179322429908e-06,
"loss": 0.002,
"step": 9070
},
{
"epoch": 5.303738317757009,
"grad_norm": 0.05234284698963165,
"learning_rate": 9.185528621495328e-06,
"loss": 0.031,
"step": 9080
},
{
"epoch": 5.309579439252336,
"grad_norm": 0.08297610282897949,
"learning_rate": 9.181877920560748e-06,
"loss": 0.004,
"step": 9090
},
{
"epoch": 5.315420560747664,
"grad_norm": 0.034371357411146164,
"learning_rate": 9.178227219626168e-06,
"loss": 0.0472,
"step": 9100
},
{
"epoch": 5.321261682242991,
"grad_norm": 0.01807144097983837,
"learning_rate": 9.174576518691589e-06,
"loss": 0.0442,
"step": 9110
},
{
"epoch": 5.327102803738318,
"grad_norm": 0.02567487396299839,
"learning_rate": 9.17092581775701e-06,
"loss": 0.001,
"step": 9120
},
{
"epoch": 5.332943925233645,
"grad_norm": 0.02019997499883175,
"learning_rate": 9.167275116822431e-06,
"loss": 0.001,
"step": 9130
},
{
"epoch": 5.338785046728972,
"grad_norm": 0.021175650879740715,
"learning_rate": 9.163624415887851e-06,
"loss": 0.001,
"step": 9140
},
{
"epoch": 5.344626168224299,
"grad_norm": 0.0180047620087862,
"learning_rate": 9.159973714953272e-06,
"loss": 0.0008,
"step": 9150
},
{
"epoch": 5.350467289719626,
"grad_norm": 0.013865533284842968,
"learning_rate": 9.156323014018692e-06,
"loss": 0.0007,
"step": 9160
},
{
"epoch": 5.356308411214953,
"grad_norm": 0.015626810491085052,
"learning_rate": 9.152672313084114e-06,
"loss": 0.0006,
"step": 9170
},
{
"epoch": 5.36214953271028,
"grad_norm": 0.013258475810289383,
"learning_rate": 9.149021612149533e-06,
"loss": 0.0005,
"step": 9180
},
{
"epoch": 5.367990654205608,
"grad_norm": 0.01744505763053894,
"learning_rate": 9.145370911214953e-06,
"loss": 0.0005,
"step": 9190
},
{
"epoch": 5.373831775700935,
"grad_norm": 0.009587449952960014,
"learning_rate": 9.141720210280375e-06,
"loss": 0.0004,
"step": 9200
},
{
"epoch": 5.3796728971962615,
"grad_norm": 0.008393511176109314,
"learning_rate": 9.138069509345795e-06,
"loss": 0.0003,
"step": 9210
},
{
"epoch": 5.385514018691588,
"grad_norm": 0.009683453477919102,
"learning_rate": 9.134418808411216e-06,
"loss": 0.0004,
"step": 9220
},
{
"epoch": 5.391355140186916,
"grad_norm": 0.010573089122772217,
"learning_rate": 9.130768107476636e-06,
"loss": 0.0456,
"step": 9230
},
{
"epoch": 5.397196261682243,
"grad_norm": 0.024214621633291245,
"learning_rate": 9.127117406542056e-06,
"loss": 0.04,
"step": 9240
},
{
"epoch": 5.40303738317757,
"grad_norm": 0.03241032361984253,
"learning_rate": 9.123466705607478e-06,
"loss": 0.0016,
"step": 9250
},
{
"epoch": 5.408878504672897,
"grad_norm": 0.03916610777378082,
"learning_rate": 9.119816004672899e-06,
"loss": 0.0016,
"step": 9260
},
{
"epoch": 5.414719626168225,
"grad_norm": 0.016246847808361053,
"learning_rate": 9.116165303738317e-06,
"loss": 0.0011,
"step": 9270
},
{
"epoch": 5.420560747663552,
"grad_norm": 0.022740503773093224,
"learning_rate": 9.11251460280374e-06,
"loss": 0.0009,
"step": 9280
},
{
"epoch": 5.4264018691588785,
"grad_norm": 0.01288650557398796,
"learning_rate": 9.10886390186916e-06,
"loss": 0.0007,
"step": 9290
},
{
"epoch": 5.432242990654205,
"grad_norm": 0.014474975876510143,
"learning_rate": 9.10521320093458e-06,
"loss": 0.0006,
"step": 9300
},
{
"epoch": 5.438084112149532,
"grad_norm": 0.011654899455606937,
"learning_rate": 9.1015625e-06,
"loss": 0.0005,
"step": 9310
},
{
"epoch": 5.44392523364486,
"grad_norm": 0.00907689519226551,
"learning_rate": 9.09791179906542e-06,
"loss": 0.0004,
"step": 9320
},
{
"epoch": 5.449766355140187,
"grad_norm": 0.007321347948163748,
"learning_rate": 9.094261098130842e-06,
"loss": 0.0004,
"step": 9330
},
{
"epoch": 5.455607476635514,
"grad_norm": 0.007953358814120293,
"learning_rate": 9.090610397196263e-06,
"loss": 0.0004,
"step": 9340
},
{
"epoch": 5.461448598130841,
"grad_norm": 0.008754126727581024,
"learning_rate": 9.086959696261683e-06,
"loss": 0.0003,
"step": 9350
},
{
"epoch": 5.4672897196261685,
"grad_norm": 0.007452510762959719,
"learning_rate": 9.083308995327103e-06,
"loss": 0.0003,
"step": 9360
},
{
"epoch": 5.473130841121495,
"grad_norm": 0.0064163813367486,
"learning_rate": 9.079658294392524e-06,
"loss": 0.0003,
"step": 9370
},
{
"epoch": 5.478971962616822,
"grad_norm": 0.006491228472441435,
"learning_rate": 9.076007593457946e-06,
"loss": 0.0003,
"step": 9380
},
{
"epoch": 5.484813084112149,
"grad_norm": 0.0077286045998334885,
"learning_rate": 9.072356892523364e-06,
"loss": 0.0493,
"step": 9390
},
{
"epoch": 5.490654205607477,
"grad_norm": 0.01421878021210432,
"learning_rate": 9.068706191588785e-06,
"loss": 0.0006,
"step": 9400
},
{
"epoch": 5.496495327102804,
"grad_norm": 0.021647976711392403,
"learning_rate": 9.065055490654207e-06,
"loss": 0.0007,
"step": 9410
},
{
"epoch": 5.502336448598131,
"grad_norm": 0.01476993691176176,
"learning_rate": 9.061404789719627e-06,
"loss": 0.0007,
"step": 9420
},
{
"epoch": 5.508177570093458,
"grad_norm": 0.0126703642308712,
"learning_rate": 9.057754088785047e-06,
"loss": 0.0207,
"step": 9430
},
{
"epoch": 5.5140186915887845,
"grad_norm": 0.013835652731359005,
"learning_rate": 9.054103387850468e-06,
"loss": 0.0066,
"step": 9440
},
{
"epoch": 5.519859813084112,
"grad_norm": 0.010086634196341038,
"learning_rate": 9.050452686915888e-06,
"loss": 0.0004,
"step": 9450
},
{
"epoch": 5.525700934579439,
"grad_norm": 0.0096484599635005,
"learning_rate": 9.04680198598131e-06,
"loss": 0.0004,
"step": 9460
},
{
"epoch": 5.531542056074766,
"grad_norm": 0.009923032484948635,
"learning_rate": 9.04315128504673e-06,
"loss": 0.0004,
"step": 9470
},
{
"epoch": 5.537383177570094,
"grad_norm": 0.008762541227042675,
"learning_rate": 9.03950058411215e-06,
"loss": 0.0004,
"step": 9480
},
{
"epoch": 5.543224299065421,
"grad_norm": 0.008561445400118828,
"learning_rate": 9.035849883177571e-06,
"loss": 0.0003,
"step": 9490
},
{
"epoch": 5.549065420560748,
"grad_norm": 0.0088927261531353,
"learning_rate": 9.032199182242991e-06,
"loss": 0.0003,
"step": 9500
},
{
"epoch": 5.554906542056075,
"grad_norm": 0.007393279578536749,
"learning_rate": 9.028548481308412e-06,
"loss": 0.0003,
"step": 9510
},
{
"epoch": 5.5607476635514015,
"grad_norm": 0.0070273722521960735,
"learning_rate": 9.024897780373832e-06,
"loss": 0.0003,
"step": 9520
},
{
"epoch": 5.566588785046729,
"grad_norm": 0.0068083093501627445,
"learning_rate": 9.021247079439252e-06,
"loss": 0.0003,
"step": 9530
},
{
"epoch": 5.572429906542056,
"grad_norm": 0.007201792672276497,
"learning_rate": 9.017596378504674e-06,
"loss": 0.0003,
"step": 9540
},
{
"epoch": 5.578271028037383,
"grad_norm": 0.007822646759450436,
"learning_rate": 9.013945677570094e-06,
"loss": 0.0492,
"step": 9550
},
{
"epoch": 5.58411214953271,
"grad_norm": 0.010411952622234821,
"learning_rate": 9.010294976635515e-06,
"loss": 0.0004,
"step": 9560
},
{
"epoch": 5.589953271028038,
"grad_norm": 0.010123065672814846,
"learning_rate": 9.006644275700935e-06,
"loss": 0.0005,
"step": 9570
},
{
"epoch": 5.595794392523365,
"grad_norm": 0.009665888734161854,
"learning_rate": 9.002993574766355e-06,
"loss": 0.0005,
"step": 9580
},
{
"epoch": 5.6016355140186915,
"grad_norm": 0.011049588210880756,
"learning_rate": 8.999342873831777e-06,
"loss": 0.0005,
"step": 9590
},
{
"epoch": 5.607476635514018,
"grad_norm": 0.009384658187627792,
"learning_rate": 8.995692172897196e-06,
"loss": 0.0004,
"step": 9600
},
{
"epoch": 5.613317757009346,
"grad_norm": 0.010541644878685474,
"learning_rate": 8.992041471962616e-06,
"loss": 0.0004,
"step": 9610
},
{
"epoch": 5.619158878504673,
"grad_norm": 0.008299053646624088,
"learning_rate": 8.988390771028038e-06,
"loss": 0.0003,
"step": 9620
},
{
"epoch": 5.625,
"grad_norm": 0.009733597747981548,
"learning_rate": 8.984740070093459e-06,
"loss": 0.0003,
"step": 9630
},
{
"epoch": 5.630841121495327,
"grad_norm": 0.006777781993150711,
"learning_rate": 8.981089369158879e-06,
"loss": 0.0003,
"step": 9640
},
{
"epoch": 5.636682242990654,
"grad_norm": 0.009290322661399841,
"learning_rate": 8.9774386682243e-06,
"loss": 0.0473,
"step": 9650
},
{
"epoch": 5.642523364485982,
"grad_norm": 0.012651221826672554,
"learning_rate": 8.97378796728972e-06,
"loss": 0.0005,
"step": 9660
},
{
"epoch": 5.6483644859813085,
"grad_norm": 0.013811645098030567,
"learning_rate": 8.970137266355142e-06,
"loss": 0.0006,
"step": 9670
},
{
"epoch": 5.654205607476635,
"grad_norm": 0.01238976139575243,
"learning_rate": 8.966486565420562e-06,
"loss": 0.0005,
"step": 9680
},
{
"epoch": 5.660046728971962,
"grad_norm": 0.012121552601456642,
"learning_rate": 8.962835864485982e-06,
"loss": 0.0005,
"step": 9690
},
{
"epoch": 5.66588785046729,
"grad_norm": 0.018319696187973022,
"learning_rate": 8.959185163551403e-06,
"loss": 0.0846,
"step": 9700
},
{
"epoch": 5.671728971962617,
"grad_norm": 0.0433378741145134,
"learning_rate": 8.955534462616823e-06,
"loss": 0.0015,
"step": 9710
},
{
"epoch": 5.677570093457944,
"grad_norm": 0.06617596745491028,
"learning_rate": 8.951883761682245e-06,
"loss": 0.0576,
"step": 9720
},
{
"epoch": 5.683411214953271,
"grad_norm": 0.06185260787606239,
"learning_rate": 8.948233060747663e-06,
"loss": 0.0376,
"step": 9730
},
{
"epoch": 5.6892523364485985,
"grad_norm": 0.04279331862926483,
"learning_rate": 8.944582359813084e-06,
"loss": 0.0027,
"step": 9740
},
{
"epoch": 5.695093457943925,
"grad_norm": 0.0327022448182106,
"learning_rate": 8.940931658878506e-06,
"loss": 0.0344,
"step": 9750
},
{
"epoch": 5.700934579439252,
"grad_norm": 0.055735599249601364,
"learning_rate": 8.937280957943926e-06,
"loss": 0.0028,
"step": 9760
},
{
"epoch": 5.706775700934579,
"grad_norm": 3.1117148399353027,
"learning_rate": 8.933630257009346e-06,
"loss": 0.0473,
"step": 9770
},
{
"epoch": 5.712616822429906,
"grad_norm": 0.02079218439757824,
"learning_rate": 8.929979556074767e-06,
"loss": 0.0022,
"step": 9780
},
{
"epoch": 5.718457943925234,
"grad_norm": 0.01973365619778633,
"learning_rate": 8.926328855140187e-06,
"loss": 0.0008,
"step": 9790
},
{
"epoch": 5.724299065420561,
"grad_norm": 0.01175629161298275,
"learning_rate": 8.922678154205609e-06,
"loss": 0.0006,
"step": 9800
},
{
"epoch": 5.730140186915888,
"grad_norm": 0.01121139619499445,
"learning_rate": 8.91902745327103e-06,
"loss": 0.0004,
"step": 9810
},
{
"epoch": 5.7359813084112155,
"grad_norm": 0.009263860061764717,
"learning_rate": 8.91537675233645e-06,
"loss": 0.0004,
"step": 9820
},
{
"epoch": 5.741822429906542,
"grad_norm": 0.009500655345618725,
"learning_rate": 8.91172605140187e-06,
"loss": 0.0004,
"step": 9830
},
{
"epoch": 5.747663551401869,
"grad_norm": 0.007937227375805378,
"learning_rate": 8.90807535046729e-06,
"loss": 0.0003,
"step": 9840
},
{
"epoch": 5.753504672897196,
"grad_norm": 0.0077436259016394615,
"learning_rate": 8.90442464953271e-06,
"loss": 0.0003,
"step": 9850
},
{
"epoch": 5.759345794392523,
"grad_norm": 0.007083016447722912,
"learning_rate": 8.900773948598131e-06,
"loss": 0.0003,
"step": 9860
},
{
"epoch": 5.765186915887851,
"grad_norm": 0.006318287458270788,
"learning_rate": 8.897123247663551e-06,
"loss": 0.0002,
"step": 9870
},
{
"epoch": 5.771028037383178,
"grad_norm": 0.007866185158491135,
"learning_rate": 8.893472546728973e-06,
"loss": 0.0003,
"step": 9880
},
{
"epoch": 5.776869158878505,
"grad_norm": 0.005734143313020468,
"learning_rate": 8.889821845794394e-06,
"loss": 0.0002,
"step": 9890
},
{
"epoch": 5.7827102803738315,
"grad_norm": 29.448055267333984,
"learning_rate": 8.886171144859814e-06,
"loss": 0.0588,
"step": 9900
},
{
"epoch": 5.788551401869158,
"grad_norm": 0.020456640049815178,
"learning_rate": 8.882520443925234e-06,
"loss": 0.0445,
"step": 9910
},
{
"epoch": 5.794392523364486,
"grad_norm": 0.059099141508340836,
"learning_rate": 8.878869742990655e-06,
"loss": 0.0019,
"step": 9920
},
{
"epoch": 5.800233644859813,
"grad_norm": 0.037169113755226135,
"learning_rate": 8.875219042056077e-06,
"loss": 0.0019,
"step": 9930
},
{
"epoch": 5.80607476635514,
"grad_norm": 0.02267596684396267,
"learning_rate": 8.871568341121495e-06,
"loss": 0.0524,
"step": 9940
},
{
"epoch": 5.811915887850468,
"grad_norm": 0.016864538192749023,
"learning_rate": 8.867917640186917e-06,
"loss": 0.0437,
"step": 9950
},
{
"epoch": 5.817757009345795,
"grad_norm": 0.024668557569384575,
"learning_rate": 8.864266939252337e-06,
"loss": 0.0011,
"step": 9960
},
{
"epoch": 5.8235981308411215,
"grad_norm": 0.041549887508153915,
"learning_rate": 8.860616238317758e-06,
"loss": 0.041,
"step": 9970
},
{
"epoch": 5.829439252336448,
"grad_norm": 0.042261477559804916,
"learning_rate": 8.856965537383178e-06,
"loss": 0.0021,
"step": 9980
},
{
"epoch": 5.835280373831775,
"grad_norm": 0.039793532341718674,
"learning_rate": 8.853314836448598e-06,
"loss": 0.0357,
"step": 9990
},
{
"epoch": 5.841121495327103,
"grad_norm": 0.12145894020795822,
"learning_rate": 8.849664135514019e-06,
"loss": 0.0289,
"step": 10000
},
{
"epoch": 5.84696261682243,
"grad_norm": 0.036945607513189316,
"learning_rate": 8.84601343457944e-06,
"loss": 0.0039,
"step": 10010
},
{
"epoch": 5.852803738317757,
"grad_norm": 0.02271541766822338,
"learning_rate": 8.842362733644861e-06,
"loss": 0.0016,
"step": 10020
},
{
"epoch": 5.858644859813084,
"grad_norm": 0.016810262575745583,
"learning_rate": 8.838712032710281e-06,
"loss": 0.0008,
"step": 10030
},
{
"epoch": 5.864485981308412,
"grad_norm": 0.011538311839103699,
"learning_rate": 8.835061331775702e-06,
"loss": 0.0007,
"step": 10040
},
{
"epoch": 5.8703271028037385,
"grad_norm": 0.012231874279677868,
"learning_rate": 8.831410630841122e-06,
"loss": 0.0005,
"step": 10050
},
{
"epoch": 5.876168224299065,
"grad_norm": 0.010014678351581097,
"learning_rate": 8.827759929906542e-06,
"loss": 0.0005,
"step": 10060
},
{
"epoch": 5.882009345794392,
"grad_norm": 0.010446464642882347,
"learning_rate": 8.824109228971963e-06,
"loss": 0.0004,
"step": 10070
},
{
"epoch": 5.88785046728972,
"grad_norm": 0.010866041295230389,
"learning_rate": 8.820458528037385e-06,
"loss": 0.0004,
"step": 10080
},
{
"epoch": 5.893691588785047,
"grad_norm": 0.00958226714283228,
"learning_rate": 8.816807827102805e-06,
"loss": 0.0004,
"step": 10090
},
{
"epoch": 5.899532710280374,
"grad_norm": 0.009142902679741383,
"learning_rate": 8.813157126168225e-06,
"loss": 0.0003,
"step": 10100
},
{
"epoch": 5.905373831775701,
"grad_norm": 0.009961924515664577,
"learning_rate": 8.809506425233646e-06,
"loss": 0.0004,
"step": 10110
},
{
"epoch": 5.911214953271028,
"grad_norm": 0.00830204226076603,
"learning_rate": 8.805855724299066e-06,
"loss": 0.0186,
"step": 10120
},
{
"epoch": 5.917056074766355,
"grad_norm": 2.5257885456085205,
"learning_rate": 8.802205023364486e-06,
"loss": 0.05,
"step": 10130
},
{
"epoch": 5.922897196261682,
"grad_norm": 0.011586299166083336,
"learning_rate": 8.798554322429908e-06,
"loss": 0.0004,
"step": 10140
},
{
"epoch": 5.928738317757009,
"grad_norm": 0.013572271913290024,
"learning_rate": 8.794903621495327e-06,
"loss": 0.0006,
"step": 10150
},
{
"epoch": 5.934579439252336,
"grad_norm": 0.009981178678572178,
"learning_rate": 8.791252920560749e-06,
"loss": 0.0004,
"step": 10160
},
{
"epoch": 5.940420560747664,
"grad_norm": 0.008034189231693745,
"learning_rate": 8.78760221962617e-06,
"loss": 0.0003,
"step": 10170
},
{
"epoch": 5.946261682242991,
"grad_norm": 0.00744387973099947,
"learning_rate": 8.78395151869159e-06,
"loss": 0.0003,
"step": 10180
},
{
"epoch": 5.952102803738318,
"grad_norm": 0.008284796960651875,
"learning_rate": 8.78030081775701e-06,
"loss": 0.0491,
"step": 10190
},
{
"epoch": 5.957943925233645,
"grad_norm": 0.013832306489348412,
"learning_rate": 8.77665011682243e-06,
"loss": 0.0007,
"step": 10200
},
{
"epoch": 5.963785046728972,
"grad_norm": 0.013292601332068443,
"learning_rate": 8.77299941588785e-06,
"loss": 0.0006,
"step": 10210
},
{
"epoch": 5.969626168224299,
"grad_norm": 0.011907985433936119,
"learning_rate": 8.769348714953272e-06,
"loss": 0.0005,
"step": 10220
},
{
"epoch": 5.975467289719626,
"grad_norm": 0.012486466206610203,
"learning_rate": 8.765698014018693e-06,
"loss": 0.0005,
"step": 10230
},
{
"epoch": 5.981308411214953,
"grad_norm": 0.01007250975817442,
"learning_rate": 8.762047313084113e-06,
"loss": 0.0007,
"step": 10240
},
{
"epoch": 5.98714953271028,
"grad_norm": 0.010638604871928692,
"learning_rate": 8.758396612149533e-06,
"loss": 0.0004,
"step": 10250
},
{
"epoch": 5.992990654205608,
"grad_norm": 0.009204142726957798,
"learning_rate": 8.754745911214954e-06,
"loss": 0.0003,
"step": 10260
},
{
"epoch": 5.998831775700935,
"grad_norm": 0.008714770898222923,
"learning_rate": 8.751095210280374e-06,
"loss": 0.001,
"step": 10270
},
{
"epoch": 6.0,
"eval_f1": 0.9994698977659977,
"eval_fbeta": 0.9994244065887268,
"eval_loss": 0.003641982562839985,
"eval_precision": 0.9993942147508709,
"eval_recall": 0.9995455922447744,
"eval_runtime": 171.4592,
"eval_samples_per_second": 68.454,
"eval_steps_per_second": 8.562,
"step": 10272
},
{
"epoch": 6.0046728971962615,
"grad_norm": 0.01118410099297762,
"learning_rate": 8.747444509345794e-06,
"loss": 0.0004,
"step": 10280
},
{
"epoch": 6.010514018691588,
"grad_norm": 0.009393405169248581,
"learning_rate": 8.743793808411216e-06,
"loss": 0.0004,
"step": 10290
},
{
"epoch": 6.016355140186916,
"grad_norm": 0.009552313946187496,
"learning_rate": 8.740143107476637e-06,
"loss": 0.0004,
"step": 10300
},
{
"epoch": 6.022196261682243,
"grad_norm": 1.8145328760147095,
"learning_rate": 8.736492406542057e-06,
"loss": 0.046,
"step": 10310
},
{
"epoch": 6.02803738317757,
"grad_norm": 0.02487098053097725,
"learning_rate": 8.732841705607477e-06,
"loss": 0.0453,
"step": 10320
},
{
"epoch": 6.033878504672897,
"grad_norm": 0.059945717453956604,
"learning_rate": 8.729191004672898e-06,
"loss": 0.0315,
"step": 10330
},
{
"epoch": 6.039719626168225,
"grad_norm": 0.02849278412759304,
"learning_rate": 8.725540303738318e-06,
"loss": 0.0227,
"step": 10340
},
{
"epoch": 6.045560747663552,
"grad_norm": 0.11616010963916779,
"learning_rate": 8.72188960280374e-06,
"loss": 0.0016,
"step": 10350
},
{
"epoch": 6.0514018691588785,
"grad_norm": 0.014204906299710274,
"learning_rate": 8.718238901869159e-06,
"loss": 0.001,
"step": 10360
},
{
"epoch": 6.057242990654205,
"grad_norm": 0.011876954697072506,
"learning_rate": 8.71458820093458e-06,
"loss": 0.0005,
"step": 10370
},
{
"epoch": 6.063084112149533,
"grad_norm": 0.010328153148293495,
"learning_rate": 8.7109375e-06,
"loss": 0.0004,
"step": 10380
},
{
"epoch": 6.06892523364486,
"grad_norm": 0.00894414447247982,
"learning_rate": 8.707286799065421e-06,
"loss": 0.0004,
"step": 10390
},
{
"epoch": 6.074766355140187,
"grad_norm": 0.011801760643720627,
"learning_rate": 8.703636098130841e-06,
"loss": 0.0481,
"step": 10400
},
{
"epoch": 6.080607476635514,
"grad_norm": 0.015615479089319706,
"learning_rate": 8.699985397196262e-06,
"loss": 0.0005,
"step": 10410
},
{
"epoch": 6.086448598130841,
"grad_norm": 0.015844807028770447,
"learning_rate": 8.696334696261684e-06,
"loss": 0.0006,
"step": 10420
},
{
"epoch": 6.0922897196261685,
"grad_norm": 0.013462678529322147,
"learning_rate": 8.692683995327104e-06,
"loss": 0.0006,
"step": 10430
},
{
"epoch": 6.098130841121495,
"grad_norm": 0.012400257401168346,
"learning_rate": 8.689033294392524e-06,
"loss": 0.0005,
"step": 10440
},
{
"epoch": 6.103971962616822,
"grad_norm": 0.010923230089247227,
"learning_rate": 8.685382593457945e-06,
"loss": 0.0005,
"step": 10450
},
{
"epoch": 6.109813084112149,
"grad_norm": 0.0079196747392416,
"learning_rate": 8.681731892523365e-06,
"loss": 0.0004,
"step": 10460
},
{
"epoch": 6.115654205607477,
"grad_norm": 0.009008225984871387,
"learning_rate": 8.678081191588785e-06,
"loss": 0.0004,
"step": 10470
},
{
"epoch": 6.121495327102804,
"grad_norm": 0.008494589477777481,
"learning_rate": 8.674430490654207e-06,
"loss": 0.0003,
"step": 10480
},
{
"epoch": 6.127336448598131,
"grad_norm": 0.00859193503856659,
"learning_rate": 8.670779789719626e-06,
"loss": 0.0003,
"step": 10490
},
{
"epoch": 6.133177570093458,
"grad_norm": 0.00865325890481472,
"learning_rate": 8.667129088785048e-06,
"loss": 0.0416,
"step": 10500
},
{
"epoch": 6.139018691588785,
"grad_norm": 0.008447653613984585,
"learning_rate": 8.663478387850468e-06,
"loss": 0.0003,
"step": 10510
},
{
"epoch": 6.144859813084112,
"grad_norm": 0.010303646326065063,
"learning_rate": 8.659827686915889e-06,
"loss": 0.0005,
"step": 10520
},
{
"epoch": 6.150700934579439,
"grad_norm": 0.008880840614438057,
"learning_rate": 8.656176985981309e-06,
"loss": 0.0107,
"step": 10530
},
{
"epoch": 6.156542056074766,
"grad_norm": 0.006746761500835419,
"learning_rate": 8.65252628504673e-06,
"loss": 0.0003,
"step": 10540
},
{
"epoch": 6.162383177570094,
"grad_norm": 0.006260715890675783,
"learning_rate": 8.648875584112151e-06,
"loss": 0.0002,
"step": 10550
},
{
"epoch": 6.168224299065421,
"grad_norm": 0.006196292117238045,
"learning_rate": 8.645224883177572e-06,
"loss": 0.0002,
"step": 10560
},
{
"epoch": 6.174065420560748,
"grad_norm": 0.005948168691247702,
"learning_rate": 8.641574182242992e-06,
"loss": 0.0002,
"step": 10570
},
{
"epoch": 6.179906542056075,
"grad_norm": 0.005726588889956474,
"learning_rate": 8.637923481308412e-06,
"loss": 0.0002,
"step": 10580
},
{
"epoch": 6.1857476635514015,
"grad_norm": 0.005977707449346781,
"learning_rate": 8.634272780373832e-06,
"loss": 0.0002,
"step": 10590
},
{
"epoch": 6.191588785046729,
"grad_norm": 0.006332565564662218,
"learning_rate": 8.630622079439253e-06,
"loss": 0.0002,
"step": 10600
},
{
"epoch": 6.197429906542056,
"grad_norm": 0.006450187414884567,
"learning_rate": 8.626971378504673e-06,
"loss": 0.0504,
"step": 10610
},
{
"epoch": 6.203271028037383,
"grad_norm": 0.007723100017756224,
"learning_rate": 8.623320677570093e-06,
"loss": 0.0003,
"step": 10620
},
{
"epoch": 6.20911214953271,
"grad_norm": 0.007839754223823547,
"learning_rate": 8.619669976635515e-06,
"loss": 0.0003,
"step": 10630
},
{
"epoch": 6.214953271028038,
"grad_norm": 0.007956388406455517,
"learning_rate": 8.616019275700936e-06,
"loss": 0.0004,
"step": 10640
},
{
"epoch": 6.220794392523365,
"grad_norm": 0.011821724474430084,
"learning_rate": 8.612368574766356e-06,
"loss": 0.0368,
"step": 10650
},
{
"epoch": 6.2266355140186915,
"grad_norm": 0.011840198189020157,
"learning_rate": 8.608717873831776e-06,
"loss": 0.0005,
"step": 10660
},
{
"epoch": 6.232476635514018,
"grad_norm": 0.02075495943427086,
"learning_rate": 8.605067172897197e-06,
"loss": 0.0185,
"step": 10670
},
{
"epoch": 6.238317757009346,
"grad_norm": 0.016375849023461342,
"learning_rate": 8.601416471962617e-06,
"loss": 0.0013,
"step": 10680
},
{
"epoch": 6.244158878504673,
"grad_norm": 0.015895796939730644,
"learning_rate": 8.597765771028039e-06,
"loss": 0.0005,
"step": 10690
},
{
"epoch": 6.25,
"grad_norm": 0.008427509106695652,
"learning_rate": 8.594115070093458e-06,
"loss": 0.0003,
"step": 10700
},
{
"epoch": 6.255841121495327,
"grad_norm": 0.007040541619062424,
"learning_rate": 8.59046436915888e-06,
"loss": 0.0003,
"step": 10710
},
{
"epoch": 6.261682242990654,
"grad_norm": 0.0081349927932024,
"learning_rate": 8.5868136682243e-06,
"loss": 0.0003,
"step": 10720
},
{
"epoch": 6.267523364485982,
"grad_norm": 0.007933318614959717,
"learning_rate": 8.58316296728972e-06,
"loss": 0.0458,
"step": 10730
},
{
"epoch": 6.2733644859813085,
"grad_norm": 0.025732412934303284,
"learning_rate": 8.57951226635514e-06,
"loss": 0.0005,
"step": 10740
},
{
"epoch": 6.279205607476635,
"grad_norm": 0.024862881749868393,
"learning_rate": 8.575861565420561e-06,
"loss": 0.0008,
"step": 10750
},
{
"epoch": 6.285046728971962,
"grad_norm": 0.01671590469777584,
"learning_rate": 8.572210864485983e-06,
"loss": 0.0007,
"step": 10760
},
{
"epoch": 6.29088785046729,
"grad_norm": 0.012816164642572403,
"learning_rate": 8.568560163551403e-06,
"loss": 0.0004,
"step": 10770
},
{
"epoch": 6.296728971962617,
"grad_norm": 0.009239032864570618,
"learning_rate": 8.564909462616824e-06,
"loss": 0.0004,
"step": 10780
},
{
"epoch": 6.302570093457944,
"grad_norm": 0.010092846117913723,
"learning_rate": 8.561258761682244e-06,
"loss": 0.0177,
"step": 10790
},
{
"epoch": 6.308411214953271,
"grad_norm": 0.009570459835231304,
"learning_rate": 8.557608060747664e-06,
"loss": 0.0004,
"step": 10800
},
{
"epoch": 6.3142523364485985,
"grad_norm": 14.129960060119629,
"learning_rate": 8.553957359813084e-06,
"loss": 0.0253,
"step": 10810
},
{
"epoch": 6.320093457943925,
"grad_norm": 0.027155300602316856,
"learning_rate": 8.550306658878505e-06,
"loss": 0.0005,
"step": 10820
},
{
"epoch": 6.325934579439252,
"grad_norm": 0.012927797622978687,
"learning_rate": 8.546655957943925e-06,
"loss": 0.005,
"step": 10830
},
{
"epoch": 6.331775700934579,
"grad_norm": 0.006941064726561308,
"learning_rate": 8.543005257009347e-06,
"loss": 0.0003,
"step": 10840
},
{
"epoch": 6.337616822429906,
"grad_norm": 0.006605945061892271,
"learning_rate": 8.539354556074767e-06,
"loss": 0.0003,
"step": 10850
},
{
"epoch": 6.343457943925234,
"grad_norm": 0.0064064753241837025,
"learning_rate": 8.535703855140188e-06,
"loss": 0.0003,
"step": 10860
},
{
"epoch": 6.349299065420561,
"grad_norm": 0.005703518632799387,
"learning_rate": 8.532053154205608e-06,
"loss": 0.0002,
"step": 10870
},
{
"epoch": 6.355140186915888,
"grad_norm": 0.004557217936962843,
"learning_rate": 8.528402453271028e-06,
"loss": 0.0002,
"step": 10880
},
{
"epoch": 6.360981308411215,
"grad_norm": 0.005052170716226101,
"learning_rate": 8.52475175233645e-06,
"loss": 0.0007,
"step": 10890
},
{
"epoch": 6.366822429906542,
"grad_norm": 0.0055373115465044975,
"learning_rate": 8.52110105140187e-06,
"loss": 0.0002,
"step": 10900
},
{
"epoch": 6.372663551401869,
"grad_norm": 0.006470870226621628,
"learning_rate": 8.51745035046729e-06,
"loss": 0.0002,
"step": 10910
},
{
"epoch": 6.378504672897196,
"grad_norm": 0.004862222820520401,
"learning_rate": 8.513799649532711e-06,
"loss": 0.0002,
"step": 10920
},
{
"epoch": 6.384345794392523,
"grad_norm": 0.006725993473082781,
"learning_rate": 8.510148948598132e-06,
"loss": 0.0455,
"step": 10930
},
{
"epoch": 6.390186915887851,
"grad_norm": 0.013148023746907711,
"learning_rate": 8.506498247663552e-06,
"loss": 0.001,
"step": 10940
},
{
"epoch": 6.396028037383178,
"grad_norm": 0.011781498789787292,
"learning_rate": 8.502847546728972e-06,
"loss": 0.0005,
"step": 10950
},
{
"epoch": 6.401869158878505,
"grad_norm": 0.006317190360277891,
"learning_rate": 8.499196845794393e-06,
"loss": 0.0003,
"step": 10960
},
{
"epoch": 6.4077102803738315,
"grad_norm": 0.0071831364184618,
"learning_rate": 8.495546144859815e-06,
"loss": 0.0003,
"step": 10970
},
{
"epoch": 6.413551401869159,
"grad_norm": 0.012186083942651749,
"learning_rate": 8.491895443925235e-06,
"loss": 0.0396,
"step": 10980
},
{
"epoch": 6.419392523364486,
"grad_norm": 0.04296307638287544,
"learning_rate": 8.488244742990655e-06,
"loss": 0.0008,
"step": 10990
},
{
"epoch": 6.425233644859813,
"grad_norm": 0.018925141543149948,
"learning_rate": 8.484594042056076e-06,
"loss": 0.0007,
"step": 11000
},
{
"epoch": 6.43107476635514,
"grad_norm": 0.006211417261511087,
"learning_rate": 8.480943341121496e-06,
"loss": 0.0006,
"step": 11010
},
{
"epoch": 6.436915887850468,
"grad_norm": 0.007444577757269144,
"learning_rate": 8.477292640186918e-06,
"loss": 0.0002,
"step": 11020
},
{
"epoch": 6.442757009345795,
"grad_norm": 0.007033002562820911,
"learning_rate": 8.473641939252336e-06,
"loss": 0.0003,
"step": 11030
},
{
"epoch": 6.4485981308411215,
"grad_norm": 0.0055618369951844215,
"learning_rate": 8.469991238317757e-06,
"loss": 0.0003,
"step": 11040
},
{
"epoch": 6.454439252336448,
"grad_norm": 0.005723849404603243,
"learning_rate": 8.466340537383179e-06,
"loss": 0.0002,
"step": 11050
},
{
"epoch": 6.460280373831775,
"grad_norm": 0.004047950729727745,
"learning_rate": 8.462689836448599e-06,
"loss": 0.0002,
"step": 11060
},
{
"epoch": 6.466121495327103,
"grad_norm": 0.004566140007227659,
"learning_rate": 8.45903913551402e-06,
"loss": 0.0002,
"step": 11070
},
{
"epoch": 6.47196261682243,
"grad_norm": 0.004481618292629719,
"learning_rate": 8.45538843457944e-06,
"loss": 0.0002,
"step": 11080
},
{
"epoch": 6.477803738317757,
"grad_norm": 0.004923723638057709,
"learning_rate": 8.45173773364486e-06,
"loss": 0.0002,
"step": 11090
},
{
"epoch": 6.483644859813084,
"grad_norm": 0.004801360424607992,
"learning_rate": 8.448087032710282e-06,
"loss": 0.0002,
"step": 11100
},
{
"epoch": 6.489485981308412,
"grad_norm": 0.004998974967747927,
"learning_rate": 8.444436331775702e-06,
"loss": 0.0002,
"step": 11110
},
{
"epoch": 6.4953271028037385,
"grad_norm": 0.0041777221485972404,
"learning_rate": 8.440785630841121e-06,
"loss": 0.0002,
"step": 11120
},
{
"epoch": 6.501168224299065,
"grad_norm": 0.00409247912466526,
"learning_rate": 8.437134929906543e-06,
"loss": 0.0002,
"step": 11130
},
{
"epoch": 6.507009345794392,
"grad_norm": 0.003906558267772198,
"learning_rate": 8.433484228971963e-06,
"loss": 0.0002,
"step": 11140
},
{
"epoch": 6.51285046728972,
"grad_norm": 0.004433404188603163,
"learning_rate": 8.429833528037384e-06,
"loss": 0.0002,
"step": 11150
},
{
"epoch": 6.518691588785047,
"grad_norm": 0.003944264259189367,
"learning_rate": 8.426182827102804e-06,
"loss": 0.0323,
"step": 11160
},
{
"epoch": 6.524532710280374,
"grad_norm": 0.007481928914785385,
"learning_rate": 8.422532126168224e-06,
"loss": 0.0002,
"step": 11170
},
{
"epoch": 6.530373831775701,
"grad_norm": 0.004499036818742752,
"learning_rate": 8.418881425233646e-06,
"loss": 0.0187,
"step": 11180
},
{
"epoch": 6.536214953271028,
"grad_norm": 0.004098616074770689,
"learning_rate": 8.415230724299067e-06,
"loss": 0.0107,
"step": 11190
},
{
"epoch": 6.542056074766355,
"grad_norm": 0.004652571398764849,
"learning_rate": 8.411580023364487e-06,
"loss": 0.0002,
"step": 11200
},
{
"epoch": 6.547897196261682,
"grad_norm": 0.006798152346163988,
"learning_rate": 8.407929322429907e-06,
"loss": 0.0002,
"step": 11210
},
{
"epoch": 6.553738317757009,
"grad_norm": 0.004885418340563774,
"learning_rate": 8.404278621495328e-06,
"loss": 0.0001,
"step": 11220
},
{
"epoch": 6.559579439252336,
"grad_norm": 0.0039513991214334965,
"learning_rate": 8.40062792056075e-06,
"loss": 0.0001,
"step": 11230
},
{
"epoch": 6.565420560747664,
"grad_norm": 0.0035442986991256475,
"learning_rate": 8.396977219626168e-06,
"loss": 0.0002,
"step": 11240
},
{
"epoch": 6.571261682242991,
"grad_norm": 0.003085469128564,
"learning_rate": 8.393326518691588e-06,
"loss": 0.0001,
"step": 11250
},
{
"epoch": 6.577102803738318,
"grad_norm": 0.00335341296158731,
"learning_rate": 8.38967581775701e-06,
"loss": 0.0033,
"step": 11260
},
{
"epoch": 6.582943925233645,
"grad_norm": 0.003827323205769062,
"learning_rate": 8.38602511682243e-06,
"loss": 0.0001,
"step": 11270
},
{
"epoch": 6.588785046728972,
"grad_norm": 0.005507790017873049,
"learning_rate": 8.382374415887851e-06,
"loss": 0.0498,
"step": 11280
},
{
"epoch": 6.594626168224299,
"grad_norm": 0.025428486987948418,
"learning_rate": 8.378723714953271e-06,
"loss": 0.0012,
"step": 11290
},
{
"epoch": 6.600467289719626,
"grad_norm": 7.240142345428467,
"learning_rate": 8.375073014018692e-06,
"loss": 0.0024,
"step": 11300
},
{
"epoch": 6.606308411214953,
"grad_norm": 0.007059688679873943,
"learning_rate": 8.371422313084114e-06,
"loss": 0.0003,
"step": 11310
},
{
"epoch": 6.61214953271028,
"grad_norm": 0.0074775912798941135,
"learning_rate": 8.367771612149534e-06,
"loss": 0.0003,
"step": 11320
},
{
"epoch": 6.617990654205608,
"grad_norm": 0.006460248958319426,
"learning_rate": 8.364120911214954e-06,
"loss": 0.0413,
"step": 11330
},
{
"epoch": 6.623831775700935,
"grad_norm": 0.006473233923316002,
"learning_rate": 8.360470210280375e-06,
"loss": 0.0002,
"step": 11340
},
{
"epoch": 6.6296728971962615,
"grad_norm": 0.005892688874155283,
"learning_rate": 8.356819509345795e-06,
"loss": 0.0002,
"step": 11350
},
{
"epoch": 6.635514018691588,
"grad_norm": 0.004774305038154125,
"learning_rate": 8.353168808411217e-06,
"loss": 0.0002,
"step": 11360
},
{
"epoch": 6.641355140186916,
"grad_norm": 0.004374335985630751,
"learning_rate": 8.349518107476636e-06,
"loss": 0.0002,
"step": 11370
},
{
"epoch": 6.647196261682243,
"grad_norm": 0.003162469482049346,
"learning_rate": 8.345867406542056e-06,
"loss": 0.0001,
"step": 11380
},
{
"epoch": 6.65303738317757,
"grad_norm": 0.0034602792002260685,
"learning_rate": 8.342216705607478e-06,
"loss": 0.0001,
"step": 11390
},
{
"epoch": 6.658878504672897,
"grad_norm": 0.0040388829074800014,
"learning_rate": 8.338566004672898e-06,
"loss": 0.0001,
"step": 11400
},
{
"epoch": 6.664719626168225,
"grad_norm": 0.00331344292499125,
"learning_rate": 8.334915303738319e-06,
"loss": 0.0001,
"step": 11410
},
{
"epoch": 6.670560747663552,
"grad_norm": 0.0032574781216681004,
"learning_rate": 8.331264602803739e-06,
"loss": 0.0003,
"step": 11420
},
{
"epoch": 6.6764018691588785,
"grad_norm": 0.0035904489923268557,
"learning_rate": 8.32761390186916e-06,
"loss": 0.0001,
"step": 11430
},
{
"epoch": 6.682242990654205,
"grad_norm": 0.0036325124092400074,
"learning_rate": 8.323963200934581e-06,
"loss": 0.0001,
"step": 11440
},
{
"epoch": 6.688084112149532,
"grad_norm": 0.004073506221175194,
"learning_rate": 8.320312500000001e-06,
"loss": 0.0001,
"step": 11450
},
{
"epoch": 6.69392523364486,
"grad_norm": 0.0034190404694527388,
"learning_rate": 8.31666179906542e-06,
"loss": 0.0001,
"step": 11460
},
{
"epoch": 6.699766355140187,
"grad_norm": 0.003095339285209775,
"learning_rate": 8.313011098130842e-06,
"loss": 0.0001,
"step": 11470
},
{
"epoch": 6.705607476635514,
"grad_norm": 0.0030306437984108925,
"learning_rate": 8.309360397196262e-06,
"loss": 0.0001,
"step": 11480
},
{
"epoch": 6.711448598130842,
"grad_norm": 0.002707183128222823,
"learning_rate": 8.305709696261683e-06,
"loss": 0.0001,
"step": 11490
},
{
"epoch": 6.7172897196261685,
"grad_norm": 0.0031440563034266233,
"learning_rate": 8.302058995327103e-06,
"loss": 0.0003,
"step": 11500
},
{
"epoch": 6.723130841121495,
"grad_norm": 0.0029218934942036867,
"learning_rate": 8.298408294392523e-06,
"loss": 0.0001,
"step": 11510
},
{
"epoch": 6.728971962616822,
"grad_norm": 0.002636146731674671,
"learning_rate": 8.294757593457945e-06,
"loss": 0.0001,
"step": 11520
},
{
"epoch": 6.734813084112149,
"grad_norm": 0.003062307136133313,
"learning_rate": 8.291106892523366e-06,
"loss": 0.0001,
"step": 11530
},
{
"epoch": 6.740654205607477,
"grad_norm": 0.00242641381919384,
"learning_rate": 8.287456191588786e-06,
"loss": 0.0001,
"step": 11540
},
{
"epoch": 6.746495327102804,
"grad_norm": 0.0024078742135316133,
"learning_rate": 8.283805490654206e-06,
"loss": 0.0001,
"step": 11550
},
{
"epoch": 6.752336448598131,
"grad_norm": 0.0025927985552698374,
"learning_rate": 8.280154789719627e-06,
"loss": 0.0001,
"step": 11560
},
{
"epoch": 6.758177570093458,
"grad_norm": 0.002639703219756484,
"learning_rate": 8.276504088785049e-06,
"loss": 0.0001,
"step": 11570
},
{
"epoch": 6.7640186915887845,
"grad_norm": 0.002318447921425104,
"learning_rate": 8.272853387850467e-06,
"loss": 0.0001,
"step": 11580
},
{
"epoch": 6.769859813084112,
"grad_norm": 0.0026759966276586056,
"learning_rate": 8.269202686915888e-06,
"loss": 0.0001,
"step": 11590
},
{
"epoch": 6.775700934579439,
"grad_norm": 0.0025113169103860855,
"learning_rate": 8.26555198598131e-06,
"loss": 0.0001,
"step": 11600
},
{
"epoch": 6.781542056074766,
"grad_norm": 0.0024989412631839514,
"learning_rate": 8.26190128504673e-06,
"loss": 0.0001,
"step": 11610
},
{
"epoch": 6.787383177570094,
"grad_norm": 0.0024788263253867626,
"learning_rate": 8.25825058411215e-06,
"loss": 0.0001,
"step": 11620
},
{
"epoch": 6.793224299065421,
"grad_norm": 0.0025793309323489666,
"learning_rate": 8.25459988317757e-06,
"loss": 0.0001,
"step": 11630
},
{
"epoch": 6.799065420560748,
"grad_norm": 0.002356814919039607,
"learning_rate": 8.25094918224299e-06,
"loss": 0.0001,
"step": 11640
},
{
"epoch": 6.804906542056075,
"grad_norm": 0.002353727351874113,
"learning_rate": 8.247298481308413e-06,
"loss": 0.0001,
"step": 11650
},
{
"epoch": 6.8107476635514015,
"grad_norm": 0.002258236985653639,
"learning_rate": 8.243647780373833e-06,
"loss": 0.0001,
"step": 11660
},
{
"epoch": 6.816588785046729,
"grad_norm": 0.0023588163312524557,
"learning_rate": 8.239997079439252e-06,
"loss": 0.045,
"step": 11670
},
{
"epoch": 6.822429906542056,
"grad_norm": 0.006023265887051821,
"learning_rate": 8.236346378504674e-06,
"loss": 0.0001,
"step": 11680
},
{
"epoch": 6.828271028037383,
"grad_norm": 0.004266134463250637,
"learning_rate": 8.232695677570094e-06,
"loss": 0.0002,
"step": 11690
},
{
"epoch": 6.83411214953271,
"grad_norm": 0.00548480125144124,
"learning_rate": 8.229044976635514e-06,
"loss": 0.0003,
"step": 11700
},
{
"epoch": 6.839953271028038,
"grad_norm": 0.0027503659948706627,
"learning_rate": 8.225394275700935e-06,
"loss": 0.0286,
"step": 11710
},
{
"epoch": 6.845794392523365,
"grad_norm": 0.004625431727617979,
"learning_rate": 8.221743574766355e-06,
"loss": 0.0294,
"step": 11720
},
{
"epoch": 6.8516355140186915,
"grad_norm": 0.008213341236114502,
"learning_rate": 8.218092873831777e-06,
"loss": 0.0002,
"step": 11730
},
{
"epoch": 6.857476635514018,
"grad_norm": 0.0034780986607074738,
"learning_rate": 8.214442172897197e-06,
"loss": 0.0009,
"step": 11740
},
{
"epoch": 6.863317757009346,
"grad_norm": 0.0030813429038971663,
"learning_rate": 8.210791471962618e-06,
"loss": 0.0001,
"step": 11750
},
{
"epoch": 6.869158878504673,
"grad_norm": 0.0029140140395611525,
"learning_rate": 8.207140771028038e-06,
"loss": 0.0001,
"step": 11760
},
{
"epoch": 6.875,
"grad_norm": 0.0028195863123983145,
"learning_rate": 8.203490070093458e-06,
"loss": 0.0001,
"step": 11770
},
{
"epoch": 6.880841121495327,
"grad_norm": 0.0026145544834434986,
"learning_rate": 8.19983936915888e-06,
"loss": 0.0001,
"step": 11780
},
{
"epoch": 6.886682242990654,
"grad_norm": 0.002694539725780487,
"learning_rate": 8.196188668224299e-06,
"loss": 0.0001,
"step": 11790
},
{
"epoch": 6.892523364485982,
"grad_norm": 0.0025953687727451324,
"learning_rate": 8.19253796728972e-06,
"loss": 0.0001,
"step": 11800
},
{
"epoch": 6.8983644859813085,
"grad_norm": 0.002524446463212371,
"learning_rate": 8.188887266355141e-06,
"loss": 0.0001,
"step": 11810
},
{
"epoch": 6.904205607476635,
"grad_norm": 0.0022692338097840548,
"learning_rate": 8.185236565420562e-06,
"loss": 0.0001,
"step": 11820
},
{
"epoch": 6.910046728971962,
"grad_norm": 0.002503743628039956,
"learning_rate": 8.181585864485982e-06,
"loss": 0.0001,
"step": 11830
},
{
"epoch": 6.91588785046729,
"grad_norm": 0.0026293403934687376,
"learning_rate": 8.177935163551402e-06,
"loss": 0.0001,
"step": 11840
},
{
"epoch": 6.921728971962617,
"grad_norm": 0.002107327338308096,
"learning_rate": 8.174284462616823e-06,
"loss": 0.0001,
"step": 11850
},
{
"epoch": 6.927570093457944,
"grad_norm": 0.002371525624766946,
"learning_rate": 8.170633761682245e-06,
"loss": 0.0001,
"step": 11860
},
{
"epoch": 6.933411214953271,
"grad_norm": 0.002208322286605835,
"learning_rate": 8.166983060747665e-06,
"loss": 0.0001,
"step": 11870
},
{
"epoch": 6.9392523364485985,
"grad_norm": 0.002092135837301612,
"learning_rate": 8.163332359813083e-06,
"loss": 0.0001,
"step": 11880
},
{
"epoch": 6.945093457943925,
"grad_norm": 0.00219535268843174,
"learning_rate": 8.159681658878505e-06,
"loss": 0.0001,
"step": 11890
},
{
"epoch": 6.950934579439252,
"grad_norm": 0.0020399116910994053,
"learning_rate": 8.156030957943926e-06,
"loss": 0.0001,
"step": 11900
},
{
"epoch": 6.956775700934579,
"grad_norm": 0.0020822633523494005,
"learning_rate": 8.152380257009346e-06,
"loss": 0.0001,
"step": 11910
},
{
"epoch": 6.962616822429906,
"grad_norm": 0.0020946068689227104,
"learning_rate": 8.148729556074766e-06,
"loss": 0.0001,
"step": 11920
},
{
"epoch": 6.968457943925234,
"grad_norm": 0.0019803382456302643,
"learning_rate": 8.145078855140187e-06,
"loss": 0.0001,
"step": 11930
},
{
"epoch": 6.974299065420561,
"grad_norm": 0.0021050155628472567,
"learning_rate": 8.141428154205609e-06,
"loss": 0.0001,
"step": 11940
},
{
"epoch": 6.980140186915888,
"grad_norm": 2.105935573577881,
"learning_rate": 8.137777453271029e-06,
"loss": 0.0058,
"step": 11950
},
{
"epoch": 6.9859813084112155,
"grad_norm": 0.0022007932420819998,
"learning_rate": 8.13412675233645e-06,
"loss": 0.0001,
"step": 11960
},
{
"epoch": 6.991822429906542,
"grad_norm": 0.015978222712874413,
"learning_rate": 8.13047605140187e-06,
"loss": 0.0001,
"step": 11970
},
{
"epoch": 6.997663551401869,
"grad_norm": 0.0019430032698437572,
"learning_rate": 8.12682535046729e-06,
"loss": 0.0006,
"step": 11980
},
{
"epoch": 7.0,
"eval_f1": 0.9993941229930324,
"eval_fbeta": 0.9993940429930388,
"eval_loss": 0.0036255475133657455,
"eval_precision": 0.9993941229930324,
"eval_recall": 0.9993941229930324,
"eval_runtime": 171.0479,
"eval_samples_per_second": 68.618,
"eval_steps_per_second": 8.582,
"step": 11984
},
{
"epoch": 7.003504672897196,
"grad_norm": 0.0021681012585759163,
"learning_rate": 8.123174649532712e-06,
"loss": 0.0001,
"step": 11990
},
{
"epoch": 7.009345794392523,
"grad_norm": 0.0019255972001701593,
"learning_rate": 8.11952394859813e-06,
"loss": 0.0001,
"step": 12000
},
{
"epoch": 7.015186915887851,
"grad_norm": 0.0021150237880647182,
"learning_rate": 8.115873247663551e-06,
"loss": 0.0001,
"step": 12010
},
{
"epoch": 7.021028037383178,
"grad_norm": 0.0019581643864512444,
"learning_rate": 8.112222546728973e-06,
"loss": 0.0001,
"step": 12020
},
{
"epoch": 7.026869158878505,
"grad_norm": 0.0018600919283926487,
"learning_rate": 8.108571845794393e-06,
"loss": 0.0001,
"step": 12030
},
{
"epoch": 7.0327102803738315,
"grad_norm": 0.002059732563793659,
"learning_rate": 8.104921144859814e-06,
"loss": 0.0001,
"step": 12040
},
{
"epoch": 7.038551401869159,
"grad_norm": 0.002076375298202038,
"learning_rate": 8.101270443925234e-06,
"loss": 0.0001,
"step": 12050
},
{
"epoch": 7.044392523364486,
"grad_norm": 0.0031956476159393787,
"learning_rate": 8.097619742990654e-06,
"loss": 0.0181,
"step": 12060
},
{
"epoch": 7.050233644859813,
"grad_norm": 0.0022973825689405203,
"learning_rate": 8.093969042056076e-06,
"loss": 0.0091,
"step": 12070
},
{
"epoch": 7.05607476635514,
"grad_norm": 0.0018713580211624503,
"learning_rate": 8.090318341121497e-06,
"loss": 0.0001,
"step": 12080
},
{
"epoch": 7.061915887850467,
"grad_norm": 0.002722959266975522,
"learning_rate": 8.086667640186917e-06,
"loss": 0.0001,
"step": 12090
},
{
"epoch": 7.067757009345795,
"grad_norm": 0.002272659447044134,
"learning_rate": 8.083016939252337e-06,
"loss": 0.0001,
"step": 12100
},
{
"epoch": 7.0735981308411215,
"grad_norm": 0.0020739021711051464,
"learning_rate": 8.079366238317757e-06,
"loss": 0.0001,
"step": 12110
},
{
"epoch": 7.079439252336448,
"grad_norm": 0.0021052819211035967,
"learning_rate": 8.075715537383178e-06,
"loss": 0.0001,
"step": 12120
},
{
"epoch": 7.085280373831775,
"grad_norm": 0.0022176839411258698,
"learning_rate": 8.072064836448598e-06,
"loss": 0.0517,
"step": 12130
},
{
"epoch": 7.091121495327103,
"grad_norm": 0.014300468377768993,
"learning_rate": 8.068414135514018e-06,
"loss": 0.0005,
"step": 12140
},
{
"epoch": 7.09696261682243,
"grad_norm": 0.014064386487007141,
"learning_rate": 8.06476343457944e-06,
"loss": 0.001,
"step": 12150
},
{
"epoch": 7.102803738317757,
"grad_norm": 0.0048030661419034,
"learning_rate": 8.06111273364486e-06,
"loss": 0.0003,
"step": 12160
},
{
"epoch": 7.108644859813084,
"grad_norm": 0.003011312335729599,
"learning_rate": 8.057462032710281e-06,
"loss": 0.0001,
"step": 12170
},
{
"epoch": 7.114485981308412,
"grad_norm": 0.002865664893761277,
"learning_rate": 8.053811331775701e-06,
"loss": 0.0001,
"step": 12180
},
{
"epoch": 7.1203271028037385,
"grad_norm": 3.0432324409484863,
"learning_rate": 8.050160630841122e-06,
"loss": 0.0423,
"step": 12190
},
{
"epoch": 7.126168224299065,
"grad_norm": 0.05281701311469078,
"learning_rate": 8.046509929906544e-06,
"loss": 0.0005,
"step": 12200
},
{
"epoch": 7.132009345794392,
"grad_norm": 0.016092179343104362,
"learning_rate": 8.042859228971964e-06,
"loss": 0.0008,
"step": 12210
},
{
"epoch": 7.13785046728972,
"grad_norm": 0.006236253771930933,
"learning_rate": 8.039208528037384e-06,
"loss": 0.0003,
"step": 12220
},
{
"epoch": 7.143691588785047,
"grad_norm": 0.004007366020232439,
"learning_rate": 8.035557827102805e-06,
"loss": 0.0002,
"step": 12230
},
{
"epoch": 7.149532710280374,
"grad_norm": 0.0033751898445189,
"learning_rate": 8.031907126168225e-06,
"loss": 0.0001,
"step": 12240
},
{
"epoch": 7.155373831775701,
"grad_norm": 0.0023680857848376036,
"learning_rate": 8.028256425233645e-06,
"loss": 0.0001,
"step": 12250
},
{
"epoch": 7.161214953271028,
"grad_norm": 0.0029950051102787256,
"learning_rate": 8.024605724299066e-06,
"loss": 0.0001,
"step": 12260
},
{
"epoch": 7.167056074766355,
"grad_norm": 0.002707561943680048,
"learning_rate": 8.020955023364486e-06,
"loss": 0.0001,
"step": 12270
},
{
"epoch": 7.172897196261682,
"grad_norm": 0.002163573633879423,
"learning_rate": 8.017304322429908e-06,
"loss": 0.0001,
"step": 12280
},
{
"epoch": 7.178738317757009,
"grad_norm": 0.0022281715646386147,
"learning_rate": 8.013653621495328e-06,
"loss": 0.0001,
"step": 12290
},
{
"epoch": 7.184579439252336,
"grad_norm": 0.002539695706218481,
"learning_rate": 8.010002920560748e-06,
"loss": 0.0001,
"step": 12300
},
{
"epoch": 7.190420560747664,
"grad_norm": 0.0019248753087595105,
"learning_rate": 8.006352219626169e-06,
"loss": 0.0001,
"step": 12310
},
{
"epoch": 7.196261682242991,
"grad_norm": 0.0019189286977052689,
"learning_rate": 8.002701518691589e-06,
"loss": 0.0005,
"step": 12320
},
{
"epoch": 7.202102803738318,
"grad_norm": 0.0019058303441852331,
"learning_rate": 7.999050817757011e-06,
"loss": 0.0001,
"step": 12330
},
{
"epoch": 7.207943925233645,
"grad_norm": 0.0016836983850225806,
"learning_rate": 7.99540011682243e-06,
"loss": 0.0001,
"step": 12340
},
{
"epoch": 7.213785046728972,
"grad_norm": 0.0029658398125320673,
"learning_rate": 7.99174941588785e-06,
"loss": 0.0001,
"step": 12350
},
{
"epoch": 7.219626168224299,
"grad_norm": 0.0017996998503804207,
"learning_rate": 7.988098714953272e-06,
"loss": 0.0001,
"step": 12360
},
{
"epoch": 7.225467289719626,
"grad_norm": 0.0019612801261246204,
"learning_rate": 7.984448014018692e-06,
"loss": 0.0001,
"step": 12370
},
{
"epoch": 7.231308411214953,
"grad_norm": 0.0017387006664648652,
"learning_rate": 7.980797313084113e-06,
"loss": 0.0001,
"step": 12380
},
{
"epoch": 7.23714953271028,
"grad_norm": 0.0015903054736554623,
"learning_rate": 7.977146612149533e-06,
"loss": 0.0001,
"step": 12390
},
{
"epoch": 7.242990654205608,
"grad_norm": 0.0018440884305164218,
"learning_rate": 7.973495911214953e-06,
"loss": 0.0001,
"step": 12400
},
{
"epoch": 7.248831775700935,
"grad_norm": 0.0016087364638224244,
"learning_rate": 7.969845210280375e-06,
"loss": 0.0001,
"step": 12410
},
{
"epoch": 7.2546728971962615,
"grad_norm": 0.0017136124661192298,
"learning_rate": 7.966194509345796e-06,
"loss": 0.0001,
"step": 12420
},
{
"epoch": 7.260514018691588,
"grad_norm": 0.0014161961153149605,
"learning_rate": 7.962543808411216e-06,
"loss": 0.0001,
"step": 12430
},
{
"epoch": 7.266355140186916,
"grad_norm": 0.0017160034039989114,
"learning_rate": 7.958893107476636e-06,
"loss": 0.0001,
"step": 12440
},
{
"epoch": 7.272196261682243,
"grad_norm": 0.0016139451181516051,
"learning_rate": 7.955242406542057e-06,
"loss": 0.0175,
"step": 12450
},
{
"epoch": 7.27803738317757,
"grad_norm": 0.0016056919703260064,
"learning_rate": 7.951591705607477e-06,
"loss": 0.0001,
"step": 12460
},
{
"epoch": 7.283878504672897,
"grad_norm": 0.0014629984507337213,
"learning_rate": 7.947941004672897e-06,
"loss": 0.0,
"step": 12470
},
{
"epoch": 7.289719626168225,
"grad_norm": 0.0015402857679873705,
"learning_rate": 7.944290303738318e-06,
"loss": 0.0,
"step": 12480
},
{
"epoch": 7.295560747663552,
"grad_norm": 0.001538011827506125,
"learning_rate": 7.94063960280374e-06,
"loss": 0.0,
"step": 12490
},
{
"epoch": 7.3014018691588785,
"grad_norm": 0.0017495568608865142,
"learning_rate": 7.93698890186916e-06,
"loss": 0.0001,
"step": 12500
},
{
"epoch": 7.307242990654205,
"grad_norm": 0.0013630022294819355,
"learning_rate": 7.93333820093458e-06,
"loss": 0.0,
"step": 12510
},
{
"epoch": 7.313084112149532,
"grad_norm": 0.0013937480980530381,
"learning_rate": 7.9296875e-06,
"loss": 0.0,
"step": 12520
},
{
"epoch": 7.31892523364486,
"grad_norm": 0.0014540269039571285,
"learning_rate": 7.92603679906542e-06,
"loss": 0.0,
"step": 12530
},
{
"epoch": 7.324766355140187,
"grad_norm": 0.0016073896549642086,
"learning_rate": 7.922386098130843e-06,
"loss": 0.0,
"step": 12540
},
{
"epoch": 7.330607476635514,
"grad_norm": 0.0013543206732720137,
"learning_rate": 7.918735397196261e-06,
"loss": 0.0,
"step": 12550
},
{
"epoch": 7.336448598130841,
"grad_norm": 0.00147124077193439,
"learning_rate": 7.915084696261683e-06,
"loss": 0.0,
"step": 12560
},
{
"epoch": 7.3422897196261685,
"grad_norm": 0.001399431494064629,
"learning_rate": 7.911433995327104e-06,
"loss": 0.0,
"step": 12570
},
{
"epoch": 7.348130841121495,
"grad_norm": 0.0013303236337378621,
"learning_rate": 7.907783294392524e-06,
"loss": 0.0418,
"step": 12580
},
{
"epoch": 7.353971962616822,
"grad_norm": 0.00885701458901167,
"learning_rate": 7.904132593457944e-06,
"loss": 0.0001,
"step": 12590
},
{
"epoch": 7.359813084112149,
"grad_norm": 0.005552555434405804,
"learning_rate": 7.900481892523365e-06,
"loss": 0.0139,
"step": 12600
},
{
"epoch": 7.365654205607477,
"grad_norm": 0.001586294500157237,
"learning_rate": 7.896831191588785e-06,
"loss": 0.0001,
"step": 12610
},
{
"epoch": 7.371495327102804,
"grad_norm": 0.001655551022849977,
"learning_rate": 7.893180490654207e-06,
"loss": 0.0001,
"step": 12620
},
{
"epoch": 7.377336448598131,
"grad_norm": 0.0016032133717089891,
"learning_rate": 7.889529789719627e-06,
"loss": 0.0001,
"step": 12630
},
{
"epoch": 7.383177570093458,
"grad_norm": 0.0013974695466458797,
"learning_rate": 7.885879088785048e-06,
"loss": 0.0001,
"step": 12640
},
{
"epoch": 7.389018691588785,
"grad_norm": 0.0015485621988773346,
"learning_rate": 7.882228387850468e-06,
"loss": 0.0001,
"step": 12650
},
{
"epoch": 7.394859813084112,
"grad_norm": 0.0014567647594958544,
"learning_rate": 7.878577686915888e-06,
"loss": 0.0,
"step": 12660
},
{
"epoch": 7.400700934579439,
"grad_norm": 0.0017351839924231172,
"learning_rate": 7.874926985981309e-06,
"loss": 0.0062,
"step": 12670
},
{
"epoch": 7.406542056074766,
"grad_norm": 0.005675299558788538,
"learning_rate": 7.871276285046729e-06,
"loss": 0.0001,
"step": 12680
},
{
"epoch": 7.412383177570094,
"grad_norm": 0.002098724478855729,
"learning_rate": 7.867625584112151e-06,
"loss": 0.0006,
"step": 12690
},
{
"epoch": 7.418224299065421,
"grad_norm": 0.001393609563820064,
"learning_rate": 7.863974883177571e-06,
"loss": 0.0,
"step": 12700
},
{
"epoch": 7.424065420560748,
"grad_norm": 0.0014116641832515597,
"learning_rate": 7.860324182242992e-06,
"loss": 0.0,
"step": 12710
},
{
"epoch": 7.429906542056075,
"grad_norm": 0.0013138065114617348,
"learning_rate": 7.856673481308412e-06,
"loss": 0.0,
"step": 12720
},
{
"epoch": 7.4357476635514015,
"grad_norm": 0.0014055016217753291,
"learning_rate": 7.853022780373832e-06,
"loss": 0.0,
"step": 12730
},
{
"epoch": 7.441588785046729,
"grad_norm": 0.001359453541226685,
"learning_rate": 7.849372079439252e-06,
"loss": 0.0,
"step": 12740
},
{
"epoch": 7.447429906542056,
"grad_norm": 0.0013050459092482924,
"learning_rate": 7.845721378504674e-06,
"loss": 0.0001,
"step": 12750
},
{
"epoch": 7.453271028037383,
"grad_norm": 0.0012936226557940245,
"learning_rate": 7.842070677570093e-06,
"loss": 0.0,
"step": 12760
},
{
"epoch": 7.45911214953271,
"grad_norm": 0.0012887063203379512,
"learning_rate": 7.838419976635515e-06,
"loss": 0.0,
"step": 12770
},
{
"epoch": 7.464953271028038,
"grad_norm": 0.0012247867416590452,
"learning_rate": 7.834769275700935e-06,
"loss": 0.0,
"step": 12780
},
{
"epoch": 7.470794392523365,
"grad_norm": 0.0012418876867741346,
"learning_rate": 7.831118574766356e-06,
"loss": 0.0,
"step": 12790
},
{
"epoch": 7.4766355140186915,
"grad_norm": 0.0011055973591282964,
"learning_rate": 7.827467873831776e-06,
"loss": 0.0,
"step": 12800
},
{
"epoch": 7.482476635514018,
"grad_norm": 0.0011922491248697042,
"learning_rate": 7.823817172897196e-06,
"loss": 0.0,
"step": 12810
},
{
"epoch": 7.488317757009346,
"grad_norm": 0.0012404808076098561,
"learning_rate": 7.820166471962617e-06,
"loss": 0.0,
"step": 12820
},
{
"epoch": 7.494158878504673,
"grad_norm": 0.0012015723623335361,
"learning_rate": 7.816515771028039e-06,
"loss": 0.0,
"step": 12830
},
{
"epoch": 7.5,
"grad_norm": 0.0010450141271576285,
"learning_rate": 7.812865070093459e-06,
"loss": 0.0,
"step": 12840
},
{
"epoch": 7.505841121495327,
"grad_norm": 0.041091423481702805,
"learning_rate": 7.80921436915888e-06,
"loss": 0.0001,
"step": 12850
},
{
"epoch": 7.511682242990654,
"grad_norm": 0.0012605130905285478,
"learning_rate": 7.8055636682243e-06,
"loss": 0.0,
"step": 12860
},
{
"epoch": 7.517523364485982,
"grad_norm": 0.001104542170651257,
"learning_rate": 7.80191296728972e-06,
"loss": 0.0,
"step": 12870
},
{
"epoch": 7.5233644859813085,
"grad_norm": 0.0011109106708317995,
"learning_rate": 7.79826226635514e-06,
"loss": 0.0,
"step": 12880
},
{
"epoch": 7.529205607476635,
"grad_norm": 0.0012586305383592844,
"learning_rate": 7.79461156542056e-06,
"loss": 0.0,
"step": 12890
},
{
"epoch": 7.535046728971962,
"grad_norm": 0.0010842509800568223,
"learning_rate": 7.790960864485983e-06,
"loss": 0.0,
"step": 12900
},
{
"epoch": 7.54088785046729,
"grad_norm": 0.0011715622385963798,
"learning_rate": 7.787310163551403e-06,
"loss": 0.0,
"step": 12910
},
{
"epoch": 7.546728971962617,
"grad_norm": 0.0011943539138883352,
"learning_rate": 7.783659462616823e-06,
"loss": 0.0,
"step": 12920
},
{
"epoch": 7.552570093457944,
"grad_norm": 0.0012001136783510447,
"learning_rate": 7.780008761682243e-06,
"loss": 0.0,
"step": 12930
},
{
"epoch": 7.558411214953271,
"grad_norm": 0.0010596492793411016,
"learning_rate": 7.776358060747664e-06,
"loss": 0.0,
"step": 12940
},
{
"epoch": 7.5642523364485985,
"grad_norm": 0.0010550218867138028,
"learning_rate": 7.772707359813084e-06,
"loss": 0.0001,
"step": 12950
},
{
"epoch": 7.570093457943925,
"grad_norm": 0.13915540277957916,
"learning_rate": 7.769056658878506e-06,
"loss": 0.0508,
"step": 12960
},
{
"epoch": 7.575934579439252,
"grad_norm": 0.010561136528849602,
"learning_rate": 7.765405957943926e-06,
"loss": 0.0004,
"step": 12970
},
{
"epoch": 7.581775700934579,
"grad_norm": 0.006983012892305851,
"learning_rate": 7.761755257009347e-06,
"loss": 0.0003,
"step": 12980
},
{
"epoch": 7.587616822429906,
"grad_norm": 0.006389088463038206,
"learning_rate": 7.758104556074767e-06,
"loss": 0.0002,
"step": 12990
},
{
"epoch": 7.593457943925234,
"grad_norm": 0.0036603384651243687,
"learning_rate": 7.754453855140187e-06,
"loss": 0.0133,
"step": 13000
},
{
"epoch": 7.599299065420561,
"grad_norm": 0.001919433125294745,
"learning_rate": 7.750803154205608e-06,
"loss": 0.0001,
"step": 13010
},
{
"epoch": 7.605140186915888,
"grad_norm": 0.0013452052371576428,
"learning_rate": 7.747152453271028e-06,
"loss": 0.0001,
"step": 13020
},
{
"epoch": 7.6109813084112155,
"grad_norm": 0.0012721661478281021,
"learning_rate": 7.74350175233645e-06,
"loss": 0.0,
"step": 13030
},
{
"epoch": 7.616822429906542,
"grad_norm": 0.0015792486956343055,
"learning_rate": 7.73985105140187e-06,
"loss": 0.0005,
"step": 13040
},
{
"epoch": 7.622663551401869,
"grad_norm": 0.0015665246173739433,
"learning_rate": 7.73620035046729e-06,
"loss": 0.0,
"step": 13050
},
{
"epoch": 7.628504672897196,
"grad_norm": 0.0012903015594929457,
"learning_rate": 7.732549649532711e-06,
"loss": 0.0,
"step": 13060
},
{
"epoch": 7.634345794392523,
"grad_norm": 0.0014493002090603113,
"learning_rate": 7.728898948598131e-06,
"loss": 0.0,
"step": 13070
},
{
"epoch": 7.640186915887851,
"grad_norm": 0.0013346703490242362,
"learning_rate": 7.725248247663552e-06,
"loss": 0.0,
"step": 13080
},
{
"epoch": 7.646028037383178,
"grad_norm": 0.0014051039470359683,
"learning_rate": 7.721597546728974e-06,
"loss": 0.0,
"step": 13090
},
{
"epoch": 7.651869158878505,
"grad_norm": 0.0015306697459891438,
"learning_rate": 7.717946845794392e-06,
"loss": 0.0,
"step": 13100
},
{
"epoch": 7.6577102803738315,
"grad_norm": 0.001278238371014595,
"learning_rate": 7.714296144859814e-06,
"loss": 0.0,
"step": 13110
},
{
"epoch": 7.663551401869158,
"grad_norm": 0.0012969443341717124,
"learning_rate": 7.710645443925235e-06,
"loss": 0.0004,
"step": 13120
},
{
"epoch": 7.669392523364486,
"grad_norm": 0.002551996847614646,
"learning_rate": 7.706994742990655e-06,
"loss": 0.0001,
"step": 13130
},
{
"epoch": 7.675233644859813,
"grad_norm": 0.0014118566177785397,
"learning_rate": 7.703344042056075e-06,
"loss": 0.003,
"step": 13140
},
{
"epoch": 7.68107476635514,
"grad_norm": 0.001230996917001903,
"learning_rate": 7.699693341121495e-06,
"loss": 0.0,
"step": 13150
},
{
"epoch": 7.686915887850468,
"grad_norm": 0.0011307065142318606,
"learning_rate": 7.696042640186917e-06,
"loss": 0.0,
"step": 13160
},
{
"epoch": 7.692757009345795,
"grad_norm": 0.001138526014983654,
"learning_rate": 7.692391939252338e-06,
"loss": 0.0,
"step": 13170
},
{
"epoch": 7.6985981308411215,
"grad_norm": 0.0011980440467596054,
"learning_rate": 7.688741238317758e-06,
"loss": 0.0,
"step": 13180
},
{
"epoch": 7.704439252336448,
"grad_norm": 19.9531192779541,
"learning_rate": 7.685090537383178e-06,
"loss": 0.0048,
"step": 13190
},
{
"epoch": 7.710280373831775,
"grad_norm": 0.0011443004477769136,
"learning_rate": 7.681439836448599e-06,
"loss": 0.0,
"step": 13200
},
{
"epoch": 7.716121495327103,
"grad_norm": 0.0011824540561065078,
"learning_rate": 7.677789135514019e-06,
"loss": 0.0,
"step": 13210
},
{
"epoch": 7.72196261682243,
"grad_norm": 0.0012346376897767186,
"learning_rate": 7.67413843457944e-06,
"loss": 0.0,
"step": 13220
},
{
"epoch": 7.727803738317757,
"grad_norm": 0.0013738384004682302,
"learning_rate": 7.67048773364486e-06,
"loss": 0.0,
"step": 13230
},
{
"epoch": 7.733644859813084,
"grad_norm": 0.0012500376906245947,
"learning_rate": 7.666837032710282e-06,
"loss": 0.0,
"step": 13240
},
{
"epoch": 7.739485981308412,
"grad_norm": 0.0011344078229740262,
"learning_rate": 7.663186331775702e-06,
"loss": 0.0,
"step": 13250
},
{
"epoch": 7.7453271028037385,
"grad_norm": 0.0011888087028637528,
"learning_rate": 7.659535630841122e-06,
"loss": 0.0312,
"step": 13260
},
{
"epoch": 7.751168224299065,
"grad_norm": 0.0011277446756139398,
"learning_rate": 7.655884929906543e-06,
"loss": 0.0,
"step": 13270
},
{
"epoch": 7.757009345794392,
"grad_norm": 0.001008135499432683,
"learning_rate": 7.652234228971963e-06,
"loss": 0.0,
"step": 13280
},
{
"epoch": 7.76285046728972,
"grad_norm": 0.001450374722480774,
"learning_rate": 7.648583528037385e-06,
"loss": 0.0309,
"step": 13290
},
{
"epoch": 7.768691588785047,
"grad_norm": 0.0015170661499723792,
"learning_rate": 7.644932827102805e-06,
"loss": 0.0001,
"step": 13300
},
{
"epoch": 7.774532710280374,
"grad_norm": 0.0019507072865962982,
"learning_rate": 7.641282126168224e-06,
"loss": 0.0001,
"step": 13310
},
{
"epoch": 7.780373831775701,
"grad_norm": 0.0026334829162806273,
"learning_rate": 7.637631425233646e-06,
"loss": 0.0002,
"step": 13320
},
{
"epoch": 7.786214953271028,
"grad_norm": 0.0013415843714028597,
"learning_rate": 7.633980724299066e-06,
"loss": 0.0,
"step": 13330
},
{
"epoch": 7.792056074766355,
"grad_norm": 0.0012233871966600418,
"learning_rate": 7.630330023364487e-06,
"loss": 0.0,
"step": 13340
},
{
"epoch": 7.797897196261682,
"grad_norm": 0.0008745313389226794,
"learning_rate": 7.626679322429908e-06,
"loss": 0.0,
"step": 13350
},
{
"epoch": 7.803738317757009,
"grad_norm": 0.002569309901446104,
"learning_rate": 7.623028621495327e-06,
"loss": 0.0,
"step": 13360
},
{
"epoch": 7.809579439252336,
"grad_norm": 0.0009451903752051294,
"learning_rate": 7.619377920560749e-06,
"loss": 0.0,
"step": 13370
},
{
"epoch": 7.815420560747664,
"grad_norm": 0.0009761240216903389,
"learning_rate": 7.615727219626169e-06,
"loss": 0.0,
"step": 13380
},
{
"epoch": 7.821261682242991,
"grad_norm": 0.0009837980614975095,
"learning_rate": 7.612076518691589e-06,
"loss": 0.0,
"step": 13390
},
{
"epoch": 7.827102803738318,
"grad_norm": 0.001016711932606995,
"learning_rate": 7.60842581775701e-06,
"loss": 0.0,
"step": 13400
},
{
"epoch": 7.832943925233645,
"grad_norm": 0.0009571296395733953,
"learning_rate": 7.60477511682243e-06,
"loss": 0.0,
"step": 13410
},
{
"epoch": 7.838785046728972,
"grad_norm": 0.0009453770471736789,
"learning_rate": 7.601124415887851e-06,
"loss": 0.0,
"step": 13420
},
{
"epoch": 7.844626168224299,
"grad_norm": 0.001051238621585071,
"learning_rate": 7.597473714953272e-06,
"loss": 0.0,
"step": 13430
},
{
"epoch": 7.850467289719626,
"grad_norm": 0.0011488308664411306,
"learning_rate": 7.593823014018692e-06,
"loss": 0.0,
"step": 13440
},
{
"epoch": 7.856308411214953,
"grad_norm": 0.0007586100255139172,
"learning_rate": 7.590172313084113e-06,
"loss": 0.0,
"step": 13450
},
{
"epoch": 7.86214953271028,
"grad_norm": 0.0008220048621296883,
"learning_rate": 7.586521612149534e-06,
"loss": 0.0,
"step": 13460
},
{
"epoch": 7.867990654205608,
"grad_norm": 0.0009259909274987876,
"learning_rate": 7.582870911214953e-06,
"loss": 0.0,
"step": 13470
},
{
"epoch": 7.873831775700935,
"grad_norm": 0.0010616907384246588,
"learning_rate": 7.579220210280374e-06,
"loss": 0.0,
"step": 13480
},
{
"epoch": 7.8796728971962615,
"grad_norm": 0.0008421916863881052,
"learning_rate": 7.575569509345795e-06,
"loss": 0.0,
"step": 13490
},
{
"epoch": 7.885514018691588,
"grad_norm": 0.0009183556539937854,
"learning_rate": 7.571918808411216e-06,
"loss": 0.0,
"step": 13500
},
{
"epoch": 7.891355140186916,
"grad_norm": 0.0010863002389669418,
"learning_rate": 7.568268107476636e-06,
"loss": 0.0,
"step": 13510
},
{
"epoch": 7.897196261682243,
"grad_norm": 0.0010933643206954002,
"learning_rate": 7.564617406542056e-06,
"loss": 0.0,
"step": 13520
},
{
"epoch": 7.90303738317757,
"grad_norm": 0.0010127710411325097,
"learning_rate": 7.5609667056074776e-06,
"loss": 0.0,
"step": 13530
},
{
"epoch": 7.908878504672897,
"grad_norm": 0.00087543431436643,
"learning_rate": 7.557316004672898e-06,
"loss": 0.0,
"step": 13540
},
{
"epoch": 7.914719626168225,
"grad_norm": 0.0010519116185605526,
"learning_rate": 7.553665303738318e-06,
"loss": 0.0,
"step": 13550
},
{
"epoch": 7.920560747663552,
"grad_norm": 0.000822052825242281,
"learning_rate": 7.550014602803739e-06,
"loss": 0.0,
"step": 13560
},
{
"epoch": 7.9264018691588785,
"grad_norm": 0.0007104118703864515,
"learning_rate": 7.546363901869159e-06,
"loss": 0.0,
"step": 13570
},
{
"epoch": 7.932242990654205,
"grad_norm": 0.0008943330030888319,
"learning_rate": 7.542713200934581e-06,
"loss": 0.0,
"step": 13580
},
{
"epoch": 7.938084112149532,
"grad_norm": 0.0009444963652640581,
"learning_rate": 7.5390625e-06,
"loss": 0.0,
"step": 13590
},
{
"epoch": 7.94392523364486,
"grad_norm": 0.0008807304548099637,
"learning_rate": 7.535411799065421e-06,
"loss": 0.0,
"step": 13600
},
{
"epoch": 7.949766355140187,
"grad_norm": 0.0008989177295006812,
"learning_rate": 7.531761098130842e-06,
"loss": 0.0008,
"step": 13610
},
{
"epoch": 7.955607476635514,
"grad_norm": 0.0010687765898182988,
"learning_rate": 7.528110397196262e-06,
"loss": 0.0,
"step": 13620
},
{
"epoch": 7.961448598130842,
"grad_norm": 0.0009110852843150496,
"learning_rate": 7.524459696261683e-06,
"loss": 0.0,
"step": 13630
},
{
"epoch": 7.9672897196261685,
"grad_norm": 0.000878663791809231,
"learning_rate": 7.5208089953271036e-06,
"loss": 0.0,
"step": 13640
},
{
"epoch": 7.973130841121495,
"grad_norm": 0.0007639786344952881,
"learning_rate": 7.517158294392524e-06,
"loss": 0.0,
"step": 13650
},
{
"epoch": 7.978971962616822,
"grad_norm": 0.000716277165338397,
"learning_rate": 7.513507593457945e-06,
"loss": 0.0,
"step": 13660
},
{
"epoch": 7.984813084112149,
"grad_norm": 0.0009262704406864941,
"learning_rate": 7.509856892523365e-06,
"loss": 0.0,
"step": 13670
},
{
"epoch": 7.990654205607477,
"grad_norm": 0.0009081005700863898,
"learning_rate": 7.506206191588785e-06,
"loss": 0.0,
"step": 13680
},
{
"epoch": 7.996495327102804,
"grad_norm": 0.0008006028365343809,
"learning_rate": 7.502555490654207e-06,
"loss": 0.0,
"step": 13690
},
{
"epoch": 8.0,
"eval_f1": 0.9993188526451222,
"eval_fbeta": 0.9989105295188826,
"eval_loss": 0.004954248666763306,
"eval_precision": 0.9986386325820602,
"eval_recall": 1.0,
"eval_runtime": 170.9133,
"eval_samples_per_second": 68.672,
"eval_steps_per_second": 8.589,
"step": 13696
},
{
"epoch": 8.00233644859813,
"grad_norm": 0.0008075121440924704,
"learning_rate": 7.498904789719626e-06,
"loss": 0.0,
"step": 13700
},
{
"epoch": 8.008177570093459,
"grad_norm": 0.0007103482494130731,
"learning_rate": 7.4952540887850474e-06,
"loss": 0.0,
"step": 13710
},
{
"epoch": 8.014018691588785,
"grad_norm": 0.0007463495712727308,
"learning_rate": 7.491603387850468e-06,
"loss": 0.0,
"step": 13720
},
{
"epoch": 8.019859813084112,
"grad_norm": 0.000795390282291919,
"learning_rate": 7.487952686915888e-06,
"loss": 0.0,
"step": 13730
},
{
"epoch": 8.02570093457944,
"grad_norm": 0.0007960695074871182,
"learning_rate": 7.484301985981309e-06,
"loss": 0.0,
"step": 13740
},
{
"epoch": 8.031542056074766,
"grad_norm": 0.000913925701752305,
"learning_rate": 7.4806512850467295e-06,
"loss": 0.0493,
"step": 13750
},
{
"epoch": 8.037383177570094,
"grad_norm": 0.0020121552515774965,
"learning_rate": 7.477000584112151e-06,
"loss": 0.0009,
"step": 13760
},
{
"epoch": 8.04322429906542,
"grad_norm": 0.0016634415369480848,
"learning_rate": 7.473349883177571e-06,
"loss": 0.0001,
"step": 13770
},
{
"epoch": 8.049065420560748,
"grad_norm": 0.0020532705821096897,
"learning_rate": 7.469699182242991e-06,
"loss": 0.0001,
"step": 13780
},
{
"epoch": 8.054906542056075,
"grad_norm": 0.0015584519132971764,
"learning_rate": 7.4660484813084125e-06,
"loss": 0.0,
"step": 13790
},
{
"epoch": 8.060747663551401,
"grad_norm": 0.002475617453455925,
"learning_rate": 7.462397780373832e-06,
"loss": 0.0244,
"step": 13800
},
{
"epoch": 8.06658878504673,
"grad_norm": 0.0036134568508714437,
"learning_rate": 7.458747079439252e-06,
"loss": 0.0001,
"step": 13810
},
{
"epoch": 8.072429906542055,
"grad_norm": 0.016877055168151855,
"learning_rate": 7.4550963785046734e-06,
"loss": 0.0003,
"step": 13820
},
{
"epoch": 8.078271028037383,
"grad_norm": 0.0013811654644086957,
"learning_rate": 7.451445677570094e-06,
"loss": 0.0002,
"step": 13830
},
{
"epoch": 8.08411214953271,
"grad_norm": 0.017157340422272682,
"learning_rate": 7.447794976635515e-06,
"loss": 0.0001,
"step": 13840
},
{
"epoch": 8.089953271028037,
"grad_norm": 0.0008855984779074788,
"learning_rate": 7.444144275700935e-06,
"loss": 0.0,
"step": 13850
},
{
"epoch": 8.095794392523365,
"grad_norm": 0.000996871036477387,
"learning_rate": 7.4404935747663555e-06,
"loss": 0.0,
"step": 13860
},
{
"epoch": 8.101635514018692,
"grad_norm": 0.0009028511703945696,
"learning_rate": 7.436842873831777e-06,
"loss": 0.0,
"step": 13870
},
{
"epoch": 8.107476635514018,
"grad_norm": 0.0009699731017462909,
"learning_rate": 7.433192172897197e-06,
"loss": 0.0,
"step": 13880
},
{
"epoch": 8.113317757009346,
"grad_norm": 0.0009504208574071527,
"learning_rate": 7.4295414719626165e-06,
"loss": 0.0,
"step": 13890
},
{
"epoch": 8.119158878504672,
"grad_norm": 0.0008348809205926955,
"learning_rate": 7.4258907710280385e-06,
"loss": 0.0,
"step": 13900
},
{
"epoch": 8.125,
"grad_norm": 0.0011557228863239288,
"learning_rate": 7.422240070093458e-06,
"loss": 0.0,
"step": 13910
},
{
"epoch": 8.130841121495328,
"grad_norm": 0.0009561071055941284,
"learning_rate": 7.418589369158879e-06,
"loss": 0.0,
"step": 13920
},
{
"epoch": 8.136682242990654,
"grad_norm": 0.0012622548965737224,
"learning_rate": 7.4149386682242994e-06,
"loss": 0.0,
"step": 13930
},
{
"epoch": 8.142523364485982,
"grad_norm": 0.001713108504191041,
"learning_rate": 7.41128796728972e-06,
"loss": 0.0,
"step": 13940
},
{
"epoch": 8.148364485981308,
"grad_norm": 0.0007852665730752051,
"learning_rate": 7.407637266355141e-06,
"loss": 0.0001,
"step": 13950
},
{
"epoch": 8.154205607476635,
"grad_norm": 0.0008906972361728549,
"learning_rate": 7.403986565420561e-06,
"loss": 0.0,
"step": 13960
},
{
"epoch": 8.160046728971963,
"grad_norm": 0.0007920749485492706,
"learning_rate": 7.400335864485982e-06,
"loss": 0.0,
"step": 13970
},
{
"epoch": 8.16588785046729,
"grad_norm": 0.000825137656647712,
"learning_rate": 7.396685163551403e-06,
"loss": 0.0,
"step": 13980
},
{
"epoch": 8.171728971962617,
"grad_norm": 0.0008679748862050474,
"learning_rate": 7.393034462616823e-06,
"loss": 0.0,
"step": 13990
},
{
"epoch": 8.177570093457945,
"grad_norm": 0.0007770723896101117,
"learning_rate": 7.389383761682244e-06,
"loss": 0.0,
"step": 14000
},
{
"epoch": 8.18341121495327,
"grad_norm": 0.000692716334015131,
"learning_rate": 7.385733060747664e-06,
"loss": 0.0,
"step": 14010
},
{
"epoch": 8.189252336448599,
"grad_norm": 0.00084645178867504,
"learning_rate": 7.382082359813084e-06,
"loss": 0.0,
"step": 14020
},
{
"epoch": 8.195093457943925,
"grad_norm": 0.0011498586973175406,
"learning_rate": 7.378431658878505e-06,
"loss": 0.0,
"step": 14030
},
{
"epoch": 8.200934579439252,
"grad_norm": 0.000940586207434535,
"learning_rate": 7.374780957943925e-06,
"loss": 0.0,
"step": 14040
},
{
"epoch": 8.20677570093458,
"grad_norm": 0.0007425823714584112,
"learning_rate": 7.371130257009347e-06,
"loss": 0.0,
"step": 14050
},
{
"epoch": 8.212616822429906,
"grad_norm": 0.0007549254805780947,
"learning_rate": 7.367479556074767e-06,
"loss": 0.0,
"step": 14060
},
{
"epoch": 8.218457943925234,
"grad_norm": 0.0007854328723624349,
"learning_rate": 7.363828855140187e-06,
"loss": 0.0,
"step": 14070
},
{
"epoch": 8.22429906542056,
"grad_norm": 0.0007651887135580182,
"learning_rate": 7.360178154205608e-06,
"loss": 0.0,
"step": 14080
},
{
"epoch": 8.230140186915888,
"grad_norm": 0.0006790246115997434,
"learning_rate": 7.356527453271029e-06,
"loss": 0.0,
"step": 14090
},
{
"epoch": 8.235981308411215,
"grad_norm": 0.0006944058113731444,
"learning_rate": 7.35287675233645e-06,
"loss": 0.0,
"step": 14100
},
{
"epoch": 8.241822429906541,
"grad_norm": 0.0006746923318132758,
"learning_rate": 7.34922605140187e-06,
"loss": 0.0,
"step": 14110
},
{
"epoch": 8.24766355140187,
"grad_norm": 0.0007892411667853594,
"learning_rate": 7.34557535046729e-06,
"loss": 0.0,
"step": 14120
},
{
"epoch": 8.253504672897197,
"grad_norm": 0.0006671813898719847,
"learning_rate": 7.341924649532712e-06,
"loss": 0.0,
"step": 14130
},
{
"epoch": 8.259345794392523,
"grad_norm": 0.0006297816289588809,
"learning_rate": 7.338273948598131e-06,
"loss": 0.0,
"step": 14140
},
{
"epoch": 8.26518691588785,
"grad_norm": 0.0006763806450180709,
"learning_rate": 7.334623247663551e-06,
"loss": 0.0,
"step": 14150
},
{
"epoch": 8.271028037383177,
"grad_norm": 0.0007309651700779796,
"learning_rate": 7.3309725467289726e-06,
"loss": 0.0,
"step": 14160
},
{
"epoch": 8.276869158878505,
"grad_norm": 0.0006537912413477898,
"learning_rate": 7.327321845794393e-06,
"loss": 0.0,
"step": 14170
},
{
"epoch": 8.282710280373832,
"grad_norm": 0.0006116880103945732,
"learning_rate": 7.323671144859814e-06,
"loss": 0.0,
"step": 14180
},
{
"epoch": 8.288551401869158,
"grad_norm": 0.0006861832225695252,
"learning_rate": 7.320020443925234e-06,
"loss": 0.0,
"step": 14190
},
{
"epoch": 8.294392523364486,
"grad_norm": 0.0006580561748705804,
"learning_rate": 7.316369742990655e-06,
"loss": 0.0,
"step": 14200
},
{
"epoch": 8.300233644859812,
"grad_norm": 0.0006089273374527693,
"learning_rate": 7.312719042056076e-06,
"loss": 0.0,
"step": 14210
},
{
"epoch": 8.30607476635514,
"grad_norm": 0.0006199203780852258,
"learning_rate": 7.309068341121496e-06,
"loss": 0.0,
"step": 14220
},
{
"epoch": 8.311915887850468,
"grad_norm": 0.0005652210093103349,
"learning_rate": 7.305417640186917e-06,
"loss": 0.0,
"step": 14230
},
{
"epoch": 8.317757009345794,
"grad_norm": 0.0006072871037758887,
"learning_rate": 7.301766939252337e-06,
"loss": 0.0,
"step": 14240
},
{
"epoch": 8.323598130841122,
"grad_norm": 0.0006104871281422675,
"learning_rate": 7.298116238317757e-06,
"loss": 0.0,
"step": 14250
},
{
"epoch": 8.32943925233645,
"grad_norm": 0.0005932269268669188,
"learning_rate": 7.294465537383178e-06,
"loss": 0.0,
"step": 14260
},
{
"epoch": 8.335280373831775,
"grad_norm": 0.000652293034363538,
"learning_rate": 7.2908148364485986e-06,
"loss": 0.0028,
"step": 14270
},
{
"epoch": 8.341121495327103,
"grad_norm": 0.0007468141266144812,
"learning_rate": 7.287164135514019e-06,
"loss": 0.0,
"step": 14280
},
{
"epoch": 8.34696261682243,
"grad_norm": 0.0007531442679464817,
"learning_rate": 7.28351343457944e-06,
"loss": 0.0,
"step": 14290
},
{
"epoch": 8.352803738317757,
"grad_norm": 0.000986740575172007,
"learning_rate": 7.27986273364486e-06,
"loss": 0.0674,
"step": 14300
},
{
"epoch": 8.358644859813085,
"grad_norm": 0.0013275983510538936,
"learning_rate": 7.2762120327102815e-06,
"loss": 0.0,
"step": 14310
},
{
"epoch": 8.36448598130841,
"grad_norm": 0.0015961026074364781,
"learning_rate": 7.272561331775702e-06,
"loss": 0.0003,
"step": 14320
},
{
"epoch": 8.370327102803738,
"grad_norm": 0.0012349386233836412,
"learning_rate": 7.268910630841121e-06,
"loss": 0.0,
"step": 14330
},
{
"epoch": 8.376168224299066,
"grad_norm": 0.001687607727944851,
"learning_rate": 7.265259929906543e-06,
"loss": 0.0,
"step": 14340
},
{
"epoch": 8.382009345794392,
"grad_norm": 0.0012085556518286467,
"learning_rate": 7.261609228971963e-06,
"loss": 0.0001,
"step": 14350
},
{
"epoch": 8.38785046728972,
"grad_norm": 0.0010161312529817224,
"learning_rate": 7.257958528037384e-06,
"loss": 0.0,
"step": 14360
},
{
"epoch": 8.393691588785046,
"grad_norm": 0.0010755781549960375,
"learning_rate": 7.254307827102804e-06,
"loss": 0.0001,
"step": 14370
},
{
"epoch": 8.399532710280374,
"grad_norm": 0.0010478084441274405,
"learning_rate": 7.2506571261682246e-06,
"loss": 0.0,
"step": 14380
},
{
"epoch": 8.405373831775702,
"grad_norm": 0.001011241809464991,
"learning_rate": 7.247006425233646e-06,
"loss": 0.0,
"step": 14390
},
{
"epoch": 8.411214953271028,
"grad_norm": 0.0009002193110063672,
"learning_rate": 7.243355724299066e-06,
"loss": 0.0,
"step": 14400
},
{
"epoch": 8.417056074766355,
"grad_norm": 0.0009343910496681929,
"learning_rate": 7.239705023364486e-06,
"loss": 0.0,
"step": 14410
},
{
"epoch": 8.422897196261681,
"grad_norm": 0.0008302544592879713,
"learning_rate": 7.2360543224299075e-06,
"loss": 0.0001,
"step": 14420
},
{
"epoch": 8.42873831775701,
"grad_norm": 0.0008650152012705803,
"learning_rate": 7.232403621495328e-06,
"loss": 0.0,
"step": 14430
},
{
"epoch": 8.434579439252337,
"grad_norm": 0.0008420557714998722,
"learning_rate": 7.228752920560749e-06,
"loss": 0.0,
"step": 14440
},
{
"epoch": 8.440420560747663,
"grad_norm": 0.0008095310186035931,
"learning_rate": 7.2251022196261684e-06,
"loss": 0.0,
"step": 14450
},
{
"epoch": 8.44626168224299,
"grad_norm": 0.0008125108433887362,
"learning_rate": 7.221451518691589e-06,
"loss": 0.0,
"step": 14460
},
{
"epoch": 8.452102803738319,
"grad_norm": 0.0007584681734442711,
"learning_rate": 7.21780081775701e-06,
"loss": 0.0,
"step": 14470
},
{
"epoch": 8.457943925233645,
"grad_norm": 0.0007872634450905025,
"learning_rate": 7.21415011682243e-06,
"loss": 0.0,
"step": 14480
},
{
"epoch": 8.463785046728972,
"grad_norm": 0.0007215076475404203,
"learning_rate": 7.2104994158878505e-06,
"loss": 0.0215,
"step": 14490
},
{
"epoch": 8.469626168224298,
"grad_norm": 0.0007134796469472349,
"learning_rate": 7.206848714953272e-06,
"loss": 0.0,
"step": 14500
},
{
"epoch": 8.475467289719626,
"grad_norm": 0.0008008384029380977,
"learning_rate": 7.203198014018692e-06,
"loss": 0.0,
"step": 14510
},
{
"epoch": 8.481308411214954,
"grad_norm": 0.000777578039560467,
"learning_rate": 7.199547313084113e-06,
"loss": 0.0,
"step": 14520
},
{
"epoch": 8.48714953271028,
"grad_norm": 0.0007424994255416095,
"learning_rate": 7.1958966121495335e-06,
"loss": 0.0,
"step": 14530
},
{
"epoch": 8.492990654205608,
"grad_norm": 0.0006925264024175704,
"learning_rate": 7.192245911214954e-06,
"loss": 0.0,
"step": 14540
},
{
"epoch": 8.498831775700934,
"grad_norm": 0.0006563960923813283,
"learning_rate": 7.188595210280375e-06,
"loss": 0.0,
"step": 14550
},
{
"epoch": 8.504672897196262,
"grad_norm": 0.0007172149489633739,
"learning_rate": 7.1849445093457944e-06,
"loss": 0.0,
"step": 14560
},
{
"epoch": 8.51051401869159,
"grad_norm": 0.0006392272189259529,
"learning_rate": 7.1812938084112164e-06,
"loss": 0.0,
"step": 14570
},
{
"epoch": 8.516355140186915,
"grad_norm": 0.0006601911736652255,
"learning_rate": 7.177643107476636e-06,
"loss": 0.0,
"step": 14580
},
{
"epoch": 8.522196261682243,
"grad_norm": 9.061529159545898,
"learning_rate": 7.173992406542056e-06,
"loss": 0.0489,
"step": 14590
},
{
"epoch": 8.52803738317757,
"grad_norm": 0.0026740028988569975,
"learning_rate": 7.170341705607477e-06,
"loss": 0.0001,
"step": 14600
},
{
"epoch": 8.533878504672897,
"grad_norm": 0.004049922805279493,
"learning_rate": 7.166691004672898e-06,
"loss": 0.0002,
"step": 14610
},
{
"epoch": 8.539719626168225,
"grad_norm": 0.006144702434539795,
"learning_rate": 7.163040303738318e-06,
"loss": 0.0001,
"step": 14620
},
{
"epoch": 8.54556074766355,
"grad_norm": 0.00719565711915493,
"learning_rate": 7.159389602803739e-06,
"loss": 0.0001,
"step": 14630
},
{
"epoch": 8.551401869158878,
"grad_norm": 0.0010341497836634517,
"learning_rate": 7.1557389018691595e-06,
"loss": 0.0001,
"step": 14640
},
{
"epoch": 8.557242990654206,
"grad_norm": 0.0012794750509783626,
"learning_rate": 7.152088200934581e-06,
"loss": 0.0001,
"step": 14650
},
{
"epoch": 8.563084112149532,
"grad_norm": 0.0016508783446624875,
"learning_rate": 7.148437500000001e-06,
"loss": 0.0,
"step": 14660
},
{
"epoch": 8.56892523364486,
"grad_norm": 0.0007288011256605387,
"learning_rate": 7.1447867990654204e-06,
"loss": 0.0,
"step": 14670
},
{
"epoch": 8.574766355140188,
"grad_norm": 0.0006575025035999715,
"learning_rate": 7.141136098130842e-06,
"loss": 0.0,
"step": 14680
},
{
"epoch": 8.580607476635514,
"grad_norm": 0.000702616642229259,
"learning_rate": 7.137485397196262e-06,
"loss": 0.0,
"step": 14690
},
{
"epoch": 8.586448598130842,
"grad_norm": 0.0007005234947428107,
"learning_rate": 7.133834696261683e-06,
"loss": 0.0,
"step": 14700
},
{
"epoch": 8.592289719626168,
"grad_norm": 0.0006908517680130899,
"learning_rate": 7.130183995327103e-06,
"loss": 0.0,
"step": 14710
},
{
"epoch": 8.598130841121495,
"grad_norm": 0.0006434786482714117,
"learning_rate": 7.126533294392524e-06,
"loss": 0.0,
"step": 14720
},
{
"epoch": 8.603971962616823,
"grad_norm": 0.0007483828230760992,
"learning_rate": 7.122882593457945e-06,
"loss": 0.0001,
"step": 14730
},
{
"epoch": 8.60981308411215,
"grad_norm": 0.0007363962358795106,
"learning_rate": 7.119231892523365e-06,
"loss": 0.0,
"step": 14740
},
{
"epoch": 8.615654205607477,
"grad_norm": 0.0007484328816644847,
"learning_rate": 7.1155811915887855e-06,
"loss": 0.0,
"step": 14750
},
{
"epoch": 8.621495327102803,
"grad_norm": 0.0007965927361510694,
"learning_rate": 7.111930490654207e-06,
"loss": 0.0,
"step": 14760
},
{
"epoch": 8.62733644859813,
"grad_norm": 0.0006503010517917573,
"learning_rate": 7.108279789719626e-06,
"loss": 0.0,
"step": 14770
},
{
"epoch": 8.633177570093459,
"grad_norm": 0.0006895572878420353,
"learning_rate": 7.104629088785048e-06,
"loss": 0.0,
"step": 14780
},
{
"epoch": 8.639018691588785,
"grad_norm": 0.0006212314474396408,
"learning_rate": 7.100978387850468e-06,
"loss": 0.0,
"step": 14790
},
{
"epoch": 8.644859813084112,
"grad_norm": 0.0006286058924160898,
"learning_rate": 7.097327686915888e-06,
"loss": 0.0,
"step": 14800
},
{
"epoch": 8.65070093457944,
"grad_norm": 0.001083516632206738,
"learning_rate": 7.093676985981309e-06,
"loss": 0.0,
"step": 14810
},
{
"epoch": 8.656542056074766,
"grad_norm": 0.0005799504579044878,
"learning_rate": 7.090026285046729e-06,
"loss": 0.0,
"step": 14820
},
{
"epoch": 8.662383177570094,
"grad_norm": 0.0005443979753181338,
"learning_rate": 7.0863755841121505e-06,
"loss": 0.0,
"step": 14830
},
{
"epoch": 8.66822429906542,
"grad_norm": 0.8044731020927429,
"learning_rate": 7.082724883177571e-06,
"loss": 0.0008,
"step": 14840
},
{
"epoch": 8.674065420560748,
"grad_norm": 0.0007343247998505831,
"learning_rate": 7.079074182242991e-06,
"loss": 0.0006,
"step": 14850
},
{
"epoch": 8.679906542056075,
"grad_norm": 0.0007370833773165941,
"learning_rate": 7.075423481308412e-06,
"loss": 0.0,
"step": 14860
},
{
"epoch": 8.685747663551401,
"grad_norm": 0.000746356206946075,
"learning_rate": 7.071772780373833e-06,
"loss": 0.0,
"step": 14870
},
{
"epoch": 8.69158878504673,
"grad_norm": 0.0007385259959846735,
"learning_rate": 7.068122079439252e-06,
"loss": 0.0,
"step": 14880
},
{
"epoch": 8.697429906542055,
"grad_norm": 0.0007199611281976104,
"learning_rate": 7.064471378504674e-06,
"loss": 0.0,
"step": 14890
},
{
"epoch": 8.703271028037383,
"grad_norm": 0.0006418672273866832,
"learning_rate": 7.0608206775700936e-06,
"loss": 0.0,
"step": 14900
},
{
"epoch": 8.70911214953271,
"grad_norm": 0.0006997276796028018,
"learning_rate": 7.057169976635515e-06,
"loss": 0.0,
"step": 14910
},
{
"epoch": 8.714953271028037,
"grad_norm": 0.0005712428246624768,
"learning_rate": 7.053519275700935e-06,
"loss": 0.0,
"step": 14920
},
{
"epoch": 8.720794392523365,
"grad_norm": 0.0005171049269847572,
"learning_rate": 7.049868574766355e-06,
"loss": 0.0,
"step": 14930
},
{
"epoch": 8.726635514018692,
"grad_norm": 0.00048571472871117294,
"learning_rate": 7.0462178738317765e-06,
"loss": 0.0,
"step": 14940
},
{
"epoch": 8.732476635514018,
"grad_norm": 0.0005043037817813456,
"learning_rate": 7.042567172897197e-06,
"loss": 0.0,
"step": 14950
},
{
"epoch": 8.738317757009346,
"grad_norm": 0.0005587654886767268,
"learning_rate": 7.038916471962617e-06,
"loss": 0.0,
"step": 14960
},
{
"epoch": 8.744158878504672,
"grad_norm": 0.0004949513822793961,
"learning_rate": 7.035265771028038e-06,
"loss": 0.0,
"step": 14970
},
{
"epoch": 8.75,
"grad_norm": 0.00048060237895697355,
"learning_rate": 7.031615070093459e-06,
"loss": 0.0,
"step": 14980
},
{
"epoch": 8.755841121495328,
"grad_norm": 0.0005176740232855082,
"learning_rate": 7.02796436915888e-06,
"loss": 0.0,
"step": 14990
},
{
"epoch": 8.761682242990654,
"grad_norm": 0.0005571042420342565,
"learning_rate": 7.024313668224299e-06,
"loss": 0.0,
"step": 15000
},
{
"epoch": 8.767523364485982,
"grad_norm": 0.00046813112567178905,
"learning_rate": 7.0206629672897196e-06,
"loss": 0.0,
"step": 15010
},
{
"epoch": 8.773364485981308,
"grad_norm": 0.0005086753517389297,
"learning_rate": 7.017012266355141e-06,
"loss": 0.0,
"step": 15020
},
{
"epoch": 8.779205607476635,
"grad_norm": 0.0004869645636063069,
"learning_rate": 7.013361565420561e-06,
"loss": 0.0,
"step": 15030
},
{
"epoch": 8.785046728971963,
"grad_norm": 0.000516543397679925,
"learning_rate": 7.009710864485982e-06,
"loss": 0.0,
"step": 15040
},
{
"epoch": 8.79088785046729,
"grad_norm": 0.0004322292807046324,
"learning_rate": 7.0060601635514025e-06,
"loss": 0.0,
"step": 15050
},
{
"epoch": 8.796728971962617,
"grad_norm": 0.0004122374521102756,
"learning_rate": 7.002409462616823e-06,
"loss": 0.0,
"step": 15060
},
{
"epoch": 8.802570093457945,
"grad_norm": 0.00039723445661365986,
"learning_rate": 6.998758761682244e-06,
"loss": 0.0,
"step": 15070
},
{
"epoch": 8.80841121495327,
"grad_norm": 0.00043567997636273503,
"learning_rate": 6.995108060747664e-06,
"loss": 0.0,
"step": 15080
},
{
"epoch": 8.814252336448599,
"grad_norm": 0.00040160896605812013,
"learning_rate": 6.991457359813084e-06,
"loss": 0.0,
"step": 15090
},
{
"epoch": 8.820093457943925,
"grad_norm": 0.0004335957346484065,
"learning_rate": 6.987806658878506e-06,
"loss": 0.0,
"step": 15100
},
{
"epoch": 8.825934579439252,
"grad_norm": 0.0004694766830652952,
"learning_rate": 6.984155957943925e-06,
"loss": 0.0,
"step": 15110
},
{
"epoch": 8.83177570093458,
"grad_norm": 0.10719966888427734,
"learning_rate": 6.980505257009346e-06,
"loss": 0.0,
"step": 15120
},
{
"epoch": 8.837616822429906,
"grad_norm": 0.000425121485022828,
"learning_rate": 6.976854556074767e-06,
"loss": 0.0,
"step": 15130
},
{
"epoch": 8.843457943925234,
"grad_norm": 0.0004239288973622024,
"learning_rate": 6.973203855140187e-06,
"loss": 0.0,
"step": 15140
},
{
"epoch": 8.84929906542056,
"grad_norm": 0.0004261216090526432,
"learning_rate": 6.969553154205608e-06,
"loss": 0.0,
"step": 15150
},
{
"epoch": 8.855140186915888,
"grad_norm": 0.0004185021680314094,
"learning_rate": 6.9659024532710285e-06,
"loss": 0.0,
"step": 15160
},
{
"epoch": 8.860981308411215,
"grad_norm": 0.000404214282752946,
"learning_rate": 6.96225175233645e-06,
"loss": 0.0,
"step": 15170
},
{
"epoch": 8.866822429906541,
"grad_norm": 0.00038997773663140833,
"learning_rate": 6.95860105140187e-06,
"loss": 0.0,
"step": 15180
},
{
"epoch": 8.87266355140187,
"grad_norm": 0.00040488707600161433,
"learning_rate": 6.95495035046729e-06,
"loss": 0.0,
"step": 15190
},
{
"epoch": 8.878504672897197,
"grad_norm": 0.00039094104431569576,
"learning_rate": 6.9512996495327115e-06,
"loss": 0.0,
"step": 15200
},
{
"epoch": 8.884345794392523,
"grad_norm": 0.00038398956530727446,
"learning_rate": 6.947648948598131e-06,
"loss": 0.0,
"step": 15210
},
{
"epoch": 8.89018691588785,
"grad_norm": 0.0004078754282090813,
"learning_rate": 6.943998247663551e-06,
"loss": 0.0,
"step": 15220
},
{
"epoch": 8.896028037383177,
"grad_norm": 0.0005417789798229933,
"learning_rate": 6.940347546728972e-06,
"loss": 0.0918,
"step": 15230
},
{
"epoch": 8.901869158878505,
"grad_norm": 0.0005213550757616758,
"learning_rate": 6.936696845794393e-06,
"loss": 0.0,
"step": 15240
},
{
"epoch": 8.907710280373832,
"grad_norm": 0.0024687082041054964,
"learning_rate": 6.933046144859814e-06,
"loss": 0.0,
"step": 15250
},
{
"epoch": 8.913551401869158,
"grad_norm": 0.0004659520636778325,
"learning_rate": 6.929395443925234e-06,
"loss": 0.0003,
"step": 15260
},
{
"epoch": 8.919392523364486,
"grad_norm": 0.0005060379626229405,
"learning_rate": 6.9257447429906545e-06,
"loss": 0.0,
"step": 15270
},
{
"epoch": 8.925233644859812,
"grad_norm": 0.00046979807666502893,
"learning_rate": 6.922094042056076e-06,
"loss": 0.0,
"step": 15280
},
{
"epoch": 8.93107476635514,
"grad_norm": 0.0003927224315702915,
"learning_rate": 6.918443341121496e-06,
"loss": 0.0,
"step": 15290
},
{
"epoch": 8.936915887850468,
"grad_norm": 0.0004722073790617287,
"learning_rate": 6.914792640186917e-06,
"loss": 0.0,
"step": 15300
},
{
"epoch": 8.942757009345794,
"grad_norm": 0.0004678576369769871,
"learning_rate": 6.9111419392523374e-06,
"loss": 0.0,
"step": 15310
},
{
"epoch": 8.948598130841122,
"grad_norm": 0.00042912139906547964,
"learning_rate": 6.907491238317757e-06,
"loss": 0.0,
"step": 15320
},
{
"epoch": 8.95443925233645,
"grad_norm": 0.0004561747773550451,
"learning_rate": 6.903840537383179e-06,
"loss": 0.0,
"step": 15330
},
{
"epoch": 8.960280373831775,
"grad_norm": 0.0004263494920451194,
"learning_rate": 6.900189836448598e-06,
"loss": 0.0,
"step": 15340
},
{
"epoch": 8.966121495327103,
"grad_norm": 0.00038956417120061815,
"learning_rate": 6.896539135514019e-06,
"loss": 0.0,
"step": 15350
},
{
"epoch": 8.97196261682243,
"grad_norm": 0.0003977013984695077,
"learning_rate": 6.89288843457944e-06,
"loss": 0.0,
"step": 15360
},
{
"epoch": 8.977803738317757,
"grad_norm": 0.00039336297777481377,
"learning_rate": 6.88923773364486e-06,
"loss": 0.0,
"step": 15370
},
{
"epoch": 8.983644859813085,
"grad_norm": 0.00036047364119440317,
"learning_rate": 6.885587032710281e-06,
"loss": 0.0,
"step": 15380
},
{
"epoch": 8.98948598130841,
"grad_norm": 0.0003544074425008148,
"learning_rate": 6.881936331775702e-06,
"loss": 0.0,
"step": 15390
},
{
"epoch": 8.995327102803738,
"grad_norm": 0.0003941435134038329,
"learning_rate": 6.878285630841122e-06,
"loss": 0.0,
"step": 15400
},
{
"epoch": 9.0,
"eval_f1": 0.9998485536877177,
"eval_fbeta": 0.9997576279314389,
"eval_loss": 0.0016669631004333496,
"eval_precision": 0.9996971532404604,
"eval_recall": 1.0,
"eval_runtime": 170.7688,
"eval_samples_per_second": 68.73,
"eval_steps_per_second": 8.596,
"step": 15408
}
],
"logging_steps": 10,
"max_steps": 34240,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2027188102579712e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}