polyglot-tagger-v2 / trainer_state.json
DerivedFunction's picture
End of training
1ca65bf
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_global_step": 55000,
"best_metric": 0.9045753492836575,
"best_model_checkpoint": "./lang-ner-xlmr/checkpoint-55000",
"epoch": 2.0,
"eval_steps": 2500,
"global_step": 55938,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0035753870356466087,
"grad_norm": 4.756625652313232,
"learning_rate": 4.991150917086775e-05,
"loss": 4.384464111328125,
"step": 100
},
{
"epoch": 0.007150774071293217,
"grad_norm": 2.1916704177856445,
"learning_rate": 4.982212449497658e-05,
"loss": 0.6957355499267578,
"step": 200
},
{
"epoch": 0.010726161106939826,
"grad_norm": 3.089632987976074,
"learning_rate": 4.973273981908542e-05,
"loss": 0.3664897537231445,
"step": 300
},
{
"epoch": 0.014301548142586435,
"grad_norm": 3.1561801433563232,
"learning_rate": 4.964335514319425e-05,
"loss": 0.24268556594848634,
"step": 400
},
{
"epoch": 0.017876935178233043,
"grad_norm": 4.541702747344971,
"learning_rate": 4.955397046730309e-05,
"loss": 0.18832412719726563,
"step": 500
},
{
"epoch": 0.021452322213879653,
"grad_norm": 3.958519220352173,
"learning_rate": 4.946458579141192e-05,
"loss": 0.1659502410888672,
"step": 600
},
{
"epoch": 0.025027709249526263,
"grad_norm": 2.7777926921844482,
"learning_rate": 4.9375201115520756e-05,
"loss": 0.1568641757965088,
"step": 700
},
{
"epoch": 0.02860309628517287,
"grad_norm": 2.150230646133423,
"learning_rate": 4.928581643962959e-05,
"loss": 0.14989984512329102,
"step": 800
},
{
"epoch": 0.032178483320819476,
"grad_norm": 1.7589229345321655,
"learning_rate": 4.9196431763738424e-05,
"loss": 0.13244229316711426,
"step": 900
},
{
"epoch": 0.035753870356466086,
"grad_norm": 1.2541024684906006,
"learning_rate": 4.910704708784726e-05,
"loss": 0.1411085033416748,
"step": 1000
},
{
"epoch": 0.039329257392112696,
"grad_norm": 1.043690800666809,
"learning_rate": 4.901766241195609e-05,
"loss": 0.12802630424499511,
"step": 1100
},
{
"epoch": 0.042904644427759306,
"grad_norm": 1.5866156816482544,
"learning_rate": 4.892827773606493e-05,
"loss": 0.11472611427307129,
"step": 1200
},
{
"epoch": 0.046480031463405916,
"grad_norm": 2.9468393325805664,
"learning_rate": 4.883889306017377e-05,
"loss": 0.11635817527770996,
"step": 1300
},
{
"epoch": 0.050055418499052526,
"grad_norm": 2.623593330383301,
"learning_rate": 4.87495083842826e-05,
"loss": 0.11469986915588379,
"step": 1400
},
{
"epoch": 0.05363080553469913,
"grad_norm": 1.0270402431488037,
"learning_rate": 4.8660123708391436e-05,
"loss": 0.10951638221740723,
"step": 1500
},
{
"epoch": 0.05720619257034574,
"grad_norm": 0.6011027693748474,
"learning_rate": 4.8570739032500274e-05,
"loss": 0.1056843090057373,
"step": 1600
},
{
"epoch": 0.06078157960599235,
"grad_norm": 1.5310850143432617,
"learning_rate": 4.8481354356609104e-05,
"loss": 0.10531362533569336,
"step": 1700
},
{
"epoch": 0.06435696664163895,
"grad_norm": 1.9218846559524536,
"learning_rate": 4.839196968071794e-05,
"loss": 0.10761914253234864,
"step": 1800
},
{
"epoch": 0.06793235367728556,
"grad_norm": 0.9941307306289673,
"learning_rate": 4.830258500482677e-05,
"loss": 0.10573001861572266,
"step": 1900
},
{
"epoch": 0.07150774071293217,
"grad_norm": 1.5511739253997803,
"learning_rate": 4.821320032893561e-05,
"loss": 0.09843612670898437,
"step": 2000
},
{
"epoch": 0.07508312774857878,
"grad_norm": 3.8423593044281006,
"learning_rate": 4.812381565304445e-05,
"loss": 0.09764796257019043,
"step": 2100
},
{
"epoch": 0.07865851478422539,
"grad_norm": 2.3102476596832275,
"learning_rate": 4.803443097715328e-05,
"loss": 0.0996187973022461,
"step": 2200
},
{
"epoch": 0.082233901819872,
"grad_norm": 0.8750975131988525,
"learning_rate": 4.7945046301262116e-05,
"loss": 0.09662745475769043,
"step": 2300
},
{
"epoch": 0.08580928885551861,
"grad_norm": 1.4319772720336914,
"learning_rate": 4.7855661625370954e-05,
"loss": 0.09479823112487792,
"step": 2400
},
{
"epoch": 0.08938467589116522,
"grad_norm": 1.1493583917617798,
"learning_rate": 4.7766276949479785e-05,
"loss": 0.09187865257263184,
"step": 2500
},
{
"epoch": 0.08938467589116522,
"eval_accuracy": 0.97115720940899,
"eval_f1": 0.7833422259443614,
"eval_loss": 0.12425321340560913,
"eval_precision": 0.7387798259806108,
"eval_recall": 0.8336256217117557,
"eval_runtime": 30.7236,
"eval_samples_per_second": 732.336,
"eval_steps_per_second": 20.343,
"step": 2500
},
{
"epoch": 0.09296006292681183,
"grad_norm": 0.4979458749294281,
"learning_rate": 4.767689227358862e-05,
"loss": 0.0913974666595459,
"step": 2600
},
{
"epoch": 0.09653544996245844,
"grad_norm": 0.9877503514289856,
"learning_rate": 4.758750759769745e-05,
"loss": 0.09267548561096191,
"step": 2700
},
{
"epoch": 0.10011083699810505,
"grad_norm": 1.4115008115768433,
"learning_rate": 4.749812292180629e-05,
"loss": 0.09185708045959473,
"step": 2800
},
{
"epoch": 0.10368622403375165,
"grad_norm": 1.2117033004760742,
"learning_rate": 4.740873824591512e-05,
"loss": 0.09133506774902343,
"step": 2900
},
{
"epoch": 0.10726161106939826,
"grad_norm": 0.527315080165863,
"learning_rate": 4.731935357002396e-05,
"loss": 0.08854376792907714,
"step": 3000
},
{
"epoch": 0.11083699810504487,
"grad_norm": 0.5725809931755066,
"learning_rate": 4.722996889413279e-05,
"loss": 0.08516644477844239,
"step": 3100
},
{
"epoch": 0.11441238514069148,
"grad_norm": 1.4227476119995117,
"learning_rate": 4.714058421824163e-05,
"loss": 0.08871203422546386,
"step": 3200
},
{
"epoch": 0.11798777217633809,
"grad_norm": 1.2104847431182861,
"learning_rate": 4.705119954235046e-05,
"loss": 0.0874100399017334,
"step": 3300
},
{
"epoch": 0.1215631592119847,
"grad_norm": 1.4136381149291992,
"learning_rate": 4.6961814866459295e-05,
"loss": 0.09060199737548828,
"step": 3400
},
{
"epoch": 0.1251385462476313,
"grad_norm": 1.7565488815307617,
"learning_rate": 4.687243019056813e-05,
"loss": 0.09233291625976563,
"step": 3500
},
{
"epoch": 0.1287139332832779,
"grad_norm": 1.2004791498184204,
"learning_rate": 4.6783045514676964e-05,
"loss": 0.08300934791564941,
"step": 3600
},
{
"epoch": 0.1322893203189245,
"grad_norm": 1.7836707830429077,
"learning_rate": 4.66936608387858e-05,
"loss": 0.09250588417053222,
"step": 3700
},
{
"epoch": 0.13586470735457112,
"grad_norm": 1.83432137966156,
"learning_rate": 4.660427616289463e-05,
"loss": 0.08511058807373047,
"step": 3800
},
{
"epoch": 0.13944009439021773,
"grad_norm": 1.1962814331054688,
"learning_rate": 4.651489148700347e-05,
"loss": 0.07956169128417968,
"step": 3900
},
{
"epoch": 0.14301548142586434,
"grad_norm": 1.145377278327942,
"learning_rate": 4.642550681111231e-05,
"loss": 0.08179279327392579,
"step": 4000
},
{
"epoch": 0.14659086846151095,
"grad_norm": 4.283623218536377,
"learning_rate": 4.633612213522114e-05,
"loss": 0.09118062019348144,
"step": 4100
},
{
"epoch": 0.15016625549715756,
"grad_norm": 2.0267841815948486,
"learning_rate": 4.6246737459329975e-05,
"loss": 0.0859706974029541,
"step": 4200
},
{
"epoch": 0.15374164253280417,
"grad_norm": 1.3412806987762451,
"learning_rate": 4.615735278343881e-05,
"loss": 0.07687939643859863,
"step": 4300
},
{
"epoch": 0.15731702956845078,
"grad_norm": 1.2748081684112549,
"learning_rate": 4.6067968107547644e-05,
"loss": 0.0789797306060791,
"step": 4400
},
{
"epoch": 0.1608924166040974,
"grad_norm": 0.8491079807281494,
"learning_rate": 4.597858343165648e-05,
"loss": 0.07809987068176269,
"step": 4500
},
{
"epoch": 0.164467803639744,
"grad_norm": 1.1583634614944458,
"learning_rate": 4.588919875576531e-05,
"loss": 0.07350683212280273,
"step": 4600
},
{
"epoch": 0.1680431906753906,
"grad_norm": 0.6579107642173767,
"learning_rate": 4.579981407987415e-05,
"loss": 0.07875243663787841,
"step": 4700
},
{
"epoch": 0.17161857771103722,
"grad_norm": 0.9742094278335571,
"learning_rate": 4.571042940398299e-05,
"loss": 0.08122955322265625,
"step": 4800
},
{
"epoch": 0.17519396474668383,
"grad_norm": 0.7365472912788391,
"learning_rate": 4.562104472809182e-05,
"loss": 0.07848617553710938,
"step": 4900
},
{
"epoch": 0.17876935178233044,
"grad_norm": 4.312972545623779,
"learning_rate": 4.5531660052200655e-05,
"loss": 0.07981382846832276,
"step": 5000
},
{
"epoch": 0.17876935178233044,
"eval_accuracy": 0.9774074625381929,
"eval_f1": 0.8253539377731214,
"eval_loss": 0.09500592201948166,
"eval_precision": 0.7928386037396048,
"eval_recall": 0.8606503176839261,
"eval_runtime": 27.5737,
"eval_samples_per_second": 815.994,
"eval_steps_per_second": 22.666,
"step": 5000
},
{
"epoch": 0.18234473881797705,
"grad_norm": 0.7737888097763062,
"learning_rate": 4.544227537630949e-05,
"loss": 0.07826550960540772,
"step": 5100
},
{
"epoch": 0.18592012585362366,
"grad_norm": 1.3171430826187134,
"learning_rate": 4.5352890700418324e-05,
"loss": 0.07463918685913086,
"step": 5200
},
{
"epoch": 0.18949551288927027,
"grad_norm": 1.445436716079712,
"learning_rate": 4.526350602452716e-05,
"loss": 0.07105834484100342,
"step": 5300
},
{
"epoch": 0.19307089992491688,
"grad_norm": 1.4572588205337524,
"learning_rate": 4.517412134863599e-05,
"loss": 0.07838897705078125,
"step": 5400
},
{
"epoch": 0.1966462869605635,
"grad_norm": 0.940371572971344,
"learning_rate": 4.508473667274482e-05,
"loss": 0.07499915599822998,
"step": 5500
},
{
"epoch": 0.2002216739962101,
"grad_norm": 0.6899816393852234,
"learning_rate": 4.499535199685366e-05,
"loss": 0.07030135154724121,
"step": 5600
},
{
"epoch": 0.20379706103185669,
"grad_norm": 1.0485793352127075,
"learning_rate": 4.490596732096249e-05,
"loss": 0.07988662719726562,
"step": 5700
},
{
"epoch": 0.2073724480675033,
"grad_norm": 1.5643068552017212,
"learning_rate": 4.481658264507133e-05,
"loss": 0.06960040092468261,
"step": 5800
},
{
"epoch": 0.2109478351031499,
"grad_norm": 0.5016098022460938,
"learning_rate": 4.4727197969180166e-05,
"loss": 0.07297846794128418,
"step": 5900
},
{
"epoch": 0.21452322213879652,
"grad_norm": 1.0210011005401611,
"learning_rate": 4.4637813293289e-05,
"loss": 0.07059600353240966,
"step": 6000
},
{
"epoch": 0.21809860917444313,
"grad_norm": 1.0845718383789062,
"learning_rate": 4.4548428617397835e-05,
"loss": 0.06686034202575683,
"step": 6100
},
{
"epoch": 0.22167399621008974,
"grad_norm": 0.8528701663017273,
"learning_rate": 4.445904394150667e-05,
"loss": 0.06841277122497559,
"step": 6200
},
{
"epoch": 0.22524938324573635,
"grad_norm": 1.8847771883010864,
"learning_rate": 4.43696592656155e-05,
"loss": 0.0757840919494629,
"step": 6300
},
{
"epoch": 0.22882477028138296,
"grad_norm": 2.079796075820923,
"learning_rate": 4.428027458972434e-05,
"loss": 0.06774754524230957,
"step": 6400
},
{
"epoch": 0.23240015731702957,
"grad_norm": 1.0023269653320312,
"learning_rate": 4.419088991383317e-05,
"loss": 0.07408513069152832,
"step": 6500
},
{
"epoch": 0.23597554435267618,
"grad_norm": 1.2481714487075806,
"learning_rate": 4.410150523794201e-05,
"loss": 0.07167030811309814,
"step": 6600
},
{
"epoch": 0.23955093138832279,
"grad_norm": 1.2812612056732178,
"learning_rate": 4.4012120562050846e-05,
"loss": 0.07096508502960205,
"step": 6700
},
{
"epoch": 0.2431263184239694,
"grad_norm": 1.1778594255447388,
"learning_rate": 4.392273588615968e-05,
"loss": 0.06785487651824951,
"step": 6800
},
{
"epoch": 0.246701705459616,
"grad_norm": 1.0923346281051636,
"learning_rate": 4.3833351210268515e-05,
"loss": 0.07081903457641602,
"step": 6900
},
{
"epoch": 0.2502770924952626,
"grad_norm": 0.9252088069915771,
"learning_rate": 4.374396653437735e-05,
"loss": 0.0647373390197754,
"step": 7000
},
{
"epoch": 0.2538524795309092,
"grad_norm": 2.192573070526123,
"learning_rate": 4.365458185848618e-05,
"loss": 0.0676526165008545,
"step": 7100
},
{
"epoch": 0.2574278665665558,
"grad_norm": 1.6381704807281494,
"learning_rate": 4.356519718259502e-05,
"loss": 0.07402976512908936,
"step": 7200
},
{
"epoch": 0.26100325360220245,
"grad_norm": 0.9283214807510376,
"learning_rate": 4.347581250670385e-05,
"loss": 0.06920474052429199,
"step": 7300
},
{
"epoch": 0.264578640637849,
"grad_norm": 0.8774147033691406,
"learning_rate": 4.338642783081269e-05,
"loss": 0.07280929565429688,
"step": 7400
},
{
"epoch": 0.26815402767349567,
"grad_norm": 1.8515883684158325,
"learning_rate": 4.3297043154921526e-05,
"loss": 0.07380086898803712,
"step": 7500
},
{
"epoch": 0.26815402767349567,
"eval_accuracy": 0.9785046625741701,
"eval_f1": 0.8438232328500399,
"eval_loss": 0.08568981289863586,
"eval_precision": 0.8172658575681245,
"eval_recall": 0.8721645631486645,
"eval_runtime": 27.0275,
"eval_samples_per_second": 832.487,
"eval_steps_per_second": 23.125,
"step": 7500
},
{
"epoch": 0.27172941470914225,
"grad_norm": 0.7200068235397339,
"learning_rate": 4.320765847903036e-05,
"loss": 0.0730604887008667,
"step": 7600
},
{
"epoch": 0.2753048017447889,
"grad_norm": 0.915267288684845,
"learning_rate": 4.3118273803139195e-05,
"loss": 0.07050428867340088,
"step": 7700
},
{
"epoch": 0.27888018878043547,
"grad_norm": 0.7131528854370117,
"learning_rate": 4.3028889127248025e-05,
"loss": 0.0710810136795044,
"step": 7800
},
{
"epoch": 0.2824555758160821,
"grad_norm": 1.1002038717269897,
"learning_rate": 4.293950445135686e-05,
"loss": 0.07342493057250976,
"step": 7900
},
{
"epoch": 0.2860309628517287,
"grad_norm": 0.9407269954681396,
"learning_rate": 4.2850119775465694e-05,
"loss": 0.0673301601409912,
"step": 8000
},
{
"epoch": 0.2896063498873753,
"grad_norm": 2.832193613052368,
"learning_rate": 4.276073509957453e-05,
"loss": 0.06240209102630615,
"step": 8100
},
{
"epoch": 0.2931817369230219,
"grad_norm": 0.8768466114997864,
"learning_rate": 4.267135042368336e-05,
"loss": 0.06878421783447265,
"step": 8200
},
{
"epoch": 0.29675712395866854,
"grad_norm": 2.6219418048858643,
"learning_rate": 4.25819657477922e-05,
"loss": 0.06775379657745362,
"step": 8300
},
{
"epoch": 0.3003325109943151,
"grad_norm": 1.4696264266967773,
"learning_rate": 4.249258107190103e-05,
"loss": 0.06918183803558349,
"step": 8400
},
{
"epoch": 0.30390789802996176,
"grad_norm": 0.3726998269557953,
"learning_rate": 4.240319639600987e-05,
"loss": 0.0662617588043213,
"step": 8500
},
{
"epoch": 0.30748328506560835,
"grad_norm": 0.7445316314697266,
"learning_rate": 4.2313811720118706e-05,
"loss": 0.06442654609680176,
"step": 8600
},
{
"epoch": 0.311058672101255,
"grad_norm": 1.971909761428833,
"learning_rate": 4.2224427044227536e-05,
"loss": 0.0726364278793335,
"step": 8700
},
{
"epoch": 0.31463405913690157,
"grad_norm": 1.5563815832138062,
"learning_rate": 4.2135042368336374e-05,
"loss": 0.06712177753448487,
"step": 8800
},
{
"epoch": 0.3182094461725482,
"grad_norm": 0.7900974154472351,
"learning_rate": 4.204565769244521e-05,
"loss": 0.058818936347961426,
"step": 8900
},
{
"epoch": 0.3217848332081948,
"grad_norm": 1.3865457773208618,
"learning_rate": 4.195627301655404e-05,
"loss": 0.06370719909667968,
"step": 9000
},
{
"epoch": 0.32536022024384137,
"grad_norm": 0.34235015511512756,
"learning_rate": 4.186688834066288e-05,
"loss": 0.06904962062835693,
"step": 9100
},
{
"epoch": 0.328935607279488,
"grad_norm": 2.1909384727478027,
"learning_rate": 4.177750366477171e-05,
"loss": 0.06057620048522949,
"step": 9200
},
{
"epoch": 0.3325109943151346,
"grad_norm": 1.308127760887146,
"learning_rate": 4.168811898888055e-05,
"loss": 0.06866058826446533,
"step": 9300
},
{
"epoch": 0.3360863813507812,
"grad_norm": 0.6863975524902344,
"learning_rate": 4.1598734312989386e-05,
"loss": 0.06358649730682372,
"step": 9400
},
{
"epoch": 0.3396617683864278,
"grad_norm": 1.1869947910308838,
"learning_rate": 4.1509349637098216e-05,
"loss": 0.06475292205810547,
"step": 9500
},
{
"epoch": 0.34323715542207445,
"grad_norm": 1.4386121034622192,
"learning_rate": 4.1419964961207054e-05,
"loss": 0.06661314010620117,
"step": 9600
},
{
"epoch": 0.34681254245772103,
"grad_norm": 0.48181113600730896,
"learning_rate": 4.133058028531589e-05,
"loss": 0.060897083282470704,
"step": 9700
},
{
"epoch": 0.35038792949336767,
"grad_norm": 0.8885261416435242,
"learning_rate": 4.124119560942472e-05,
"loss": 0.06239647388458252,
"step": 9800
},
{
"epoch": 0.35396331652901425,
"grad_norm": 1.2147257328033447,
"learning_rate": 4.115181093353356e-05,
"loss": 0.06007009029388428,
"step": 9900
},
{
"epoch": 0.3575387035646609,
"grad_norm": 3.1831276416778564,
"learning_rate": 4.106242625764239e-05,
"loss": 0.06108261108398438,
"step": 10000
},
{
"epoch": 0.3575387035646609,
"eval_accuracy": 0.9812439807847978,
"eval_f1": 0.8498944390638173,
"eval_loss": 0.07966496795415878,
"eval_precision": 0.8246859491839411,
"eval_recall": 0.8766926372078314,
"eval_runtime": 27.6584,
"eval_samples_per_second": 813.495,
"eval_steps_per_second": 22.597,
"step": 10000
},
{
"epoch": 0.36111409060030747,
"grad_norm": 1.2144405841827393,
"learning_rate": 4.097304158175123e-05,
"loss": 0.06152146816253662,
"step": 10100
},
{
"epoch": 0.3646894776359541,
"grad_norm": 0.777988076210022,
"learning_rate": 4.0883656905860066e-05,
"loss": 0.06342405319213867,
"step": 10200
},
{
"epoch": 0.3682648646716007,
"grad_norm": 0.6419842839241028,
"learning_rate": 4.0794272229968896e-05,
"loss": 0.055976643562316894,
"step": 10300
},
{
"epoch": 0.3718402517072473,
"grad_norm": 0.45166343450546265,
"learning_rate": 4.0704887554077734e-05,
"loss": 0.07191664695739747,
"step": 10400
},
{
"epoch": 0.3754156387428939,
"grad_norm": 0.5005468726158142,
"learning_rate": 4.0615502878186565e-05,
"loss": 0.06205938339233399,
"step": 10500
},
{
"epoch": 0.37899102577854055,
"grad_norm": 0.6201260089874268,
"learning_rate": 4.0526118202295396e-05,
"loss": 0.061759543418884275,
"step": 10600
},
{
"epoch": 0.38256641281418713,
"grad_norm": 0.4341242015361786,
"learning_rate": 4.043673352640423e-05,
"loss": 0.06618201732635498,
"step": 10700
},
{
"epoch": 0.38614179984983377,
"grad_norm": 0.4003482460975647,
"learning_rate": 4.034734885051307e-05,
"loss": 0.06178065299987793,
"step": 10800
},
{
"epoch": 0.38971718688548035,
"grad_norm": 1.0296162366867065,
"learning_rate": 4.02579641746219e-05,
"loss": 0.06249929904937744,
"step": 10900
},
{
"epoch": 0.393292573921127,
"grad_norm": 1.362121820449829,
"learning_rate": 4.016857949873074e-05,
"loss": 0.05500233173370361,
"step": 11000
},
{
"epoch": 0.39686796095677357,
"grad_norm": 0.7699733376502991,
"learning_rate": 4.007919482283957e-05,
"loss": 0.060595006942749025,
"step": 11100
},
{
"epoch": 0.4004433479924202,
"grad_norm": 1.3927844762802124,
"learning_rate": 3.998981014694841e-05,
"loss": 0.05860278129577637,
"step": 11200
},
{
"epoch": 0.4040187350280668,
"grad_norm": 0.5842928290367126,
"learning_rate": 3.9900425471057245e-05,
"loss": 0.062330193519592285,
"step": 11300
},
{
"epoch": 0.40759412206371337,
"grad_norm": 1.231602430343628,
"learning_rate": 3.9811040795166076e-05,
"loss": 0.05743512153625488,
"step": 11400
},
{
"epoch": 0.41116950909936,
"grad_norm": 0.33235710859298706,
"learning_rate": 3.972165611927491e-05,
"loss": 0.059948296546936036,
"step": 11500
},
{
"epoch": 0.4147448961350066,
"grad_norm": 0.812560498714447,
"learning_rate": 3.963227144338375e-05,
"loss": 0.06013148784637451,
"step": 11600
},
{
"epoch": 0.41832028317065323,
"grad_norm": 0.7160065174102783,
"learning_rate": 3.954288676749258e-05,
"loss": 0.0654984951019287,
"step": 11700
},
{
"epoch": 0.4218956702062998,
"grad_norm": 0.959859311580658,
"learning_rate": 3.945350209160142e-05,
"loss": 0.061361746788024904,
"step": 11800
},
{
"epoch": 0.42547105724194645,
"grad_norm": 0.661882758140564,
"learning_rate": 3.936411741571025e-05,
"loss": 0.05800935268402099,
"step": 11900
},
{
"epoch": 0.42904644427759303,
"grad_norm": 1.3494808673858643,
"learning_rate": 3.927473273981909e-05,
"loss": 0.058743157386779786,
"step": 12000
},
{
"epoch": 0.43262183131323967,
"grad_norm": 0.3964793384075165,
"learning_rate": 3.9185348063927925e-05,
"loss": 0.05860978603363037,
"step": 12100
},
{
"epoch": 0.43619721834888625,
"grad_norm": 0.6984548568725586,
"learning_rate": 3.9095963388036756e-05,
"loss": 0.05355045795440674,
"step": 12200
},
{
"epoch": 0.4397726053845329,
"grad_norm": 0.9193189144134521,
"learning_rate": 3.900657871214559e-05,
"loss": 0.05985400676727295,
"step": 12300
},
{
"epoch": 0.44334799242017947,
"grad_norm": 2.1851706504821777,
"learning_rate": 3.891719403625443e-05,
"loss": 0.06027592182159424,
"step": 12400
},
{
"epoch": 0.4469233794558261,
"grad_norm": 2.280050754547119,
"learning_rate": 3.882780936036326e-05,
"loss": 0.05881267070770264,
"step": 12500
},
{
"epoch": 0.4469233794558261,
"eval_accuracy": 0.9822486078551317,
"eval_f1": 0.8582031250000001,
"eval_loss": 0.07316970080137253,
"eval_precision": 0.8336179093151205,
"eval_recall": 0.884282551821292,
"eval_runtime": 27.7811,
"eval_samples_per_second": 809.904,
"eval_steps_per_second": 22.497,
"step": 12500
},
{
"epoch": 0.4504987664914727,
"grad_norm": 0.9513980746269226,
"learning_rate": 3.87384246844721e-05,
"loss": 0.05991718769073486,
"step": 12600
},
{
"epoch": 0.45407415352711933,
"grad_norm": 0.8513447046279907,
"learning_rate": 3.8649040008580937e-05,
"loss": 0.059436683654785154,
"step": 12700
},
{
"epoch": 0.4576495405627659,
"grad_norm": 3.6959080696105957,
"learning_rate": 3.855965533268977e-05,
"loss": 0.06146327018737793,
"step": 12800
},
{
"epoch": 0.46122492759841255,
"grad_norm": 0.4215289056301117,
"learning_rate": 3.8470270656798605e-05,
"loss": 0.051028499603271486,
"step": 12900
},
{
"epoch": 0.46480031463405913,
"grad_norm": 0.553249716758728,
"learning_rate": 3.8380885980907436e-05,
"loss": 0.05888910293579101,
"step": 13000
},
{
"epoch": 0.46837570166970577,
"grad_norm": 0.534638524055481,
"learning_rate": 3.8291501305016266e-05,
"loss": 0.056477956771850586,
"step": 13100
},
{
"epoch": 0.47195108870535235,
"grad_norm": 0.5859609842300415,
"learning_rate": 3.8202116629125104e-05,
"loss": 0.05791654109954834,
"step": 13200
},
{
"epoch": 0.475526475740999,
"grad_norm": 0.6610586047172546,
"learning_rate": 3.8112731953233935e-05,
"loss": 0.05362565040588379,
"step": 13300
},
{
"epoch": 0.47910186277664557,
"grad_norm": 0.6048291325569153,
"learning_rate": 3.802334727734277e-05,
"loss": 0.057788271903991696,
"step": 13400
},
{
"epoch": 0.4826772498122922,
"grad_norm": 0.7319697141647339,
"learning_rate": 3.793396260145161e-05,
"loss": 0.05477115631103516,
"step": 13500
},
{
"epoch": 0.4862526368479388,
"grad_norm": 0.5771811008453369,
"learning_rate": 3.784457792556044e-05,
"loss": 0.059410476684570314,
"step": 13600
},
{
"epoch": 0.4898280238835854,
"grad_norm": 1.9499260187149048,
"learning_rate": 3.775519324966928e-05,
"loss": 0.052494893074035646,
"step": 13700
},
{
"epoch": 0.493403410919232,
"grad_norm": 0.8795179128646851,
"learning_rate": 3.766580857377811e-05,
"loss": 0.05528387546539307,
"step": 13800
},
{
"epoch": 0.4969787979548786,
"grad_norm": 0.5892202258110046,
"learning_rate": 3.7576423897886947e-05,
"loss": 0.05600544452667236,
"step": 13900
},
{
"epoch": 0.5005541849905252,
"grad_norm": 0.6402941346168518,
"learning_rate": 3.7487039221995784e-05,
"loss": 0.05357628345489502,
"step": 14000
},
{
"epoch": 0.5041295720261718,
"grad_norm": 0.5255988836288452,
"learning_rate": 3.7397654546104615e-05,
"loss": 0.057103352546691896,
"step": 14100
},
{
"epoch": 0.5077049590618185,
"grad_norm": 0.8301808834075928,
"learning_rate": 3.730826987021345e-05,
"loss": 0.0532010555267334,
"step": 14200
},
{
"epoch": 0.5112803460974651,
"grad_norm": 0.6901052594184875,
"learning_rate": 3.721888519432229e-05,
"loss": 0.05516294002532959,
"step": 14300
},
{
"epoch": 0.5148557331331116,
"grad_norm": 0.9628658890724182,
"learning_rate": 3.712950051843112e-05,
"loss": 0.06214995384216308,
"step": 14400
},
{
"epoch": 0.5184311201687583,
"grad_norm": 1.3679792881011963,
"learning_rate": 3.704011584253996e-05,
"loss": 0.05541347503662109,
"step": 14500
},
{
"epoch": 0.5220065072044049,
"grad_norm": 0.23267334699630737,
"learning_rate": 3.695073116664879e-05,
"loss": 0.0589248275756836,
"step": 14600
},
{
"epoch": 0.5255818942400515,
"grad_norm": 0.6239579319953918,
"learning_rate": 3.6861346490757627e-05,
"loss": 0.053284521102905276,
"step": 14700
},
{
"epoch": 0.529157281275698,
"grad_norm": 0.7674051523208618,
"learning_rate": 3.6771961814866464e-05,
"loss": 0.05738714218139648,
"step": 14800
},
{
"epoch": 0.5327326683113447,
"grad_norm": 0.8594136834144592,
"learning_rate": 3.6682577138975295e-05,
"loss": 0.055074062347412106,
"step": 14900
},
{
"epoch": 0.5363080553469913,
"grad_norm": 1.3505005836486816,
"learning_rate": 3.659319246308413e-05,
"loss": 0.05417671680450439,
"step": 15000
},
{
"epoch": 0.5363080553469913,
"eval_accuracy": 0.9838496993745539,
"eval_f1": 0.8737497800457504,
"eval_loss": 0.06651480495929718,
"eval_precision": 0.8560178736432719,
"eval_recall": 0.8922318373918293,
"eval_runtime": 27.3392,
"eval_samples_per_second": 822.994,
"eval_steps_per_second": 22.861,
"step": 15000
},
{
"epoch": 0.539883442382638,
"grad_norm": 0.7868797779083252,
"learning_rate": 3.650380778719297e-05,
"loss": 0.060230064392089847,
"step": 15100
},
{
"epoch": 0.5434588294182845,
"grad_norm": 0.3154486119747162,
"learning_rate": 3.64144231113018e-05,
"loss": 0.05918198108673096,
"step": 15200
},
{
"epoch": 0.5470342164539311,
"grad_norm": 0.5093942284584045,
"learning_rate": 3.632503843541064e-05,
"loss": 0.05554147720336914,
"step": 15300
},
{
"epoch": 0.5506096034895778,
"grad_norm": 1.080651044845581,
"learning_rate": 3.623565375951947e-05,
"loss": 0.05167547702789307,
"step": 15400
},
{
"epoch": 0.5541849905252244,
"grad_norm": 1.2834564447402954,
"learning_rate": 3.614626908362831e-05,
"loss": 0.05269266128540039,
"step": 15500
},
{
"epoch": 0.5577603775608709,
"grad_norm": 0.9456666707992554,
"learning_rate": 3.605688440773714e-05,
"loss": 0.05228121280670166,
"step": 15600
},
{
"epoch": 0.5613357645965176,
"grad_norm": 1.931270718574524,
"learning_rate": 3.5967499731845975e-05,
"loss": 0.05532039642333984,
"step": 15700
},
{
"epoch": 0.5649111516321642,
"grad_norm": 1.9416167736053467,
"learning_rate": 3.5878115055954806e-05,
"loss": 0.05132888793945312,
"step": 15800
},
{
"epoch": 0.5684865386678107,
"grad_norm": 0.2992418110370636,
"learning_rate": 3.578873038006364e-05,
"loss": 0.05806799411773682,
"step": 15900
},
{
"epoch": 0.5720619257034574,
"grad_norm": 0.7173650860786438,
"learning_rate": 3.5699345704172474e-05,
"loss": 0.05833985805511475,
"step": 16000
},
{
"epoch": 0.575637312739104,
"grad_norm": 1.0283321142196655,
"learning_rate": 3.560996102828131e-05,
"loss": 0.05651096820831299,
"step": 16100
},
{
"epoch": 0.5792126997747507,
"grad_norm": 0.43172529339790344,
"learning_rate": 3.552057635239015e-05,
"loss": 0.05330658435821533,
"step": 16200
},
{
"epoch": 0.5827880868103972,
"grad_norm": 0.6333898901939392,
"learning_rate": 3.543119167649898e-05,
"loss": 0.053462224006652834,
"step": 16300
},
{
"epoch": 0.5863634738460438,
"grad_norm": 0.8817270994186401,
"learning_rate": 3.534180700060782e-05,
"loss": 0.05549070358276367,
"step": 16400
},
{
"epoch": 0.5899388608816905,
"grad_norm": 4.280094146728516,
"learning_rate": 3.525242232471665e-05,
"loss": 0.05985762119293213,
"step": 16500
},
{
"epoch": 0.5935142479173371,
"grad_norm": 0.62297523021698,
"learning_rate": 3.5163037648825486e-05,
"loss": 0.05666534423828125,
"step": 16600
},
{
"epoch": 0.5970896349529836,
"grad_norm": 0.29738688468933105,
"learning_rate": 3.507365297293432e-05,
"loss": 0.053336749076843264,
"step": 16700
},
{
"epoch": 0.6006650219886303,
"grad_norm": 1.139436960220337,
"learning_rate": 3.4984268297043154e-05,
"loss": 0.05532379150390625,
"step": 16800
},
{
"epoch": 0.6042404090242769,
"grad_norm": 0.37320244312286377,
"learning_rate": 3.489488362115199e-05,
"loss": 0.05435383796691894,
"step": 16900
},
{
"epoch": 0.6078157960599235,
"grad_norm": 0.5908817648887634,
"learning_rate": 3.480549894526083e-05,
"loss": 0.052842388153076174,
"step": 17000
},
{
"epoch": 0.6113911830955701,
"grad_norm": 0.4973529279232025,
"learning_rate": 3.471611426936966e-05,
"loss": 0.05500569343566895,
"step": 17100
},
{
"epoch": 0.6149665701312167,
"grad_norm": 1.438362717628479,
"learning_rate": 3.46267295934785e-05,
"loss": 0.04875383853912354,
"step": 17200
},
{
"epoch": 0.6185419571668633,
"grad_norm": 1.1460702419281006,
"learning_rate": 3.4537344917587335e-05,
"loss": 0.05489758968353271,
"step": 17300
},
{
"epoch": 0.62211734420251,
"grad_norm": 0.359030157327652,
"learning_rate": 3.4447960241696166e-05,
"loss": 0.0537039852142334,
"step": 17400
},
{
"epoch": 0.6256927312381565,
"grad_norm": 1.0160428285598755,
"learning_rate": 3.4358575565805e-05,
"loss": 0.05569758415222168,
"step": 17500
},
{
"epoch": 0.6256927312381565,
"eval_accuracy": 0.9845403147375635,
"eval_f1": 0.877477096546864,
"eval_loss": 0.0613168403506279,
"eval_precision": 0.8606879199270053,
"eval_recall": 0.8949343069890464,
"eval_runtime": 27.832,
"eval_samples_per_second": 808.423,
"eval_steps_per_second": 22.456,
"step": 17500
},
{
"epoch": 0.6292681182738031,
"grad_norm": 0.9637561440467834,
"learning_rate": 3.4269190889913834e-05,
"loss": 0.05049953460693359,
"step": 17600
},
{
"epoch": 0.6328435053094498,
"grad_norm": 0.4047839343547821,
"learning_rate": 3.417980621402267e-05,
"loss": 0.051105165481567384,
"step": 17700
},
{
"epoch": 0.6364188923450964,
"grad_norm": 0.5562448501586914,
"learning_rate": 3.409042153813151e-05,
"loss": 0.04887496471405029,
"step": 17800
},
{
"epoch": 0.6399942793807429,
"grad_norm": 0.7675971984863281,
"learning_rate": 3.400103686224034e-05,
"loss": 0.05429211139678955,
"step": 17900
},
{
"epoch": 0.6435696664163896,
"grad_norm": 0.44871142506599426,
"learning_rate": 3.391165218634918e-05,
"loss": 0.04755040645599365,
"step": 18000
},
{
"epoch": 0.6471450534520362,
"grad_norm": 0.4453502297401428,
"learning_rate": 3.382226751045801e-05,
"loss": 0.05987214088439941,
"step": 18100
},
{
"epoch": 0.6507204404876827,
"grad_norm": 0.4004403352737427,
"learning_rate": 3.373288283456684e-05,
"loss": 0.054094972610473635,
"step": 18200
},
{
"epoch": 0.6542958275233294,
"grad_norm": 0.8362923264503479,
"learning_rate": 3.364349815867568e-05,
"loss": 0.04843898296356201,
"step": 18300
},
{
"epoch": 0.657871214558976,
"grad_norm": 0.6269751787185669,
"learning_rate": 3.355411348278451e-05,
"loss": 0.05007925033569336,
"step": 18400
},
{
"epoch": 0.6614466015946227,
"grad_norm": 0.7181591987609863,
"learning_rate": 3.3464728806893345e-05,
"loss": 0.05742511749267578,
"step": 18500
},
{
"epoch": 0.6650219886302692,
"grad_norm": 2.8255951404571533,
"learning_rate": 3.337534413100218e-05,
"loss": 0.050363807678222655,
"step": 18600
},
{
"epoch": 0.6685973756659158,
"grad_norm": 1.1854428052902222,
"learning_rate": 3.328595945511101e-05,
"loss": 0.05580689430236816,
"step": 18700
},
{
"epoch": 0.6721727627015625,
"grad_norm": 0.3564029335975647,
"learning_rate": 3.319657477921985e-05,
"loss": 0.04986191749572754,
"step": 18800
},
{
"epoch": 0.6757481497372091,
"grad_norm": 0.9392517805099487,
"learning_rate": 3.310719010332869e-05,
"loss": 0.05029686450958252,
"step": 18900
},
{
"epoch": 0.6793235367728556,
"grad_norm": 0.9811071157455444,
"learning_rate": 3.301780542743752e-05,
"loss": 0.05468404293060303,
"step": 19000
},
{
"epoch": 0.6828989238085023,
"grad_norm": 2.1979386806488037,
"learning_rate": 3.292842075154636e-05,
"loss": 0.04795463562011719,
"step": 19100
},
{
"epoch": 0.6864743108441489,
"grad_norm": 4.135185241699219,
"learning_rate": 3.2839036075655194e-05,
"loss": 0.051746668815612795,
"step": 19200
},
{
"epoch": 0.6900496978797955,
"grad_norm": 0.611629843711853,
"learning_rate": 3.2749651399764025e-05,
"loss": 0.05136622428894043,
"step": 19300
},
{
"epoch": 0.6936250849154421,
"grad_norm": 0.7905089259147644,
"learning_rate": 3.266026672387286e-05,
"loss": 0.0534757661819458,
"step": 19400
},
{
"epoch": 0.6972004719510887,
"grad_norm": 0.3704472482204437,
"learning_rate": 3.257088204798169e-05,
"loss": 0.05190816879272461,
"step": 19500
},
{
"epoch": 0.7007758589867353,
"grad_norm": 0.41257503628730774,
"learning_rate": 3.248149737209053e-05,
"loss": 0.05314404487609863,
"step": 19600
},
{
"epoch": 0.704351246022382,
"grad_norm": 1.0130038261413574,
"learning_rate": 3.239211269619937e-05,
"loss": 0.051221070289611814,
"step": 19700
},
{
"epoch": 0.7079266330580285,
"grad_norm": 0.44306495785713196,
"learning_rate": 3.23027280203082e-05,
"loss": 0.05151443004608154,
"step": 19800
},
{
"epoch": 0.7115020200936751,
"grad_norm": 1.3375622034072876,
"learning_rate": 3.221334334441704e-05,
"loss": 0.051753206253051756,
"step": 19900
},
{
"epoch": 0.7150774071293218,
"grad_norm": 0.48512154817581177,
"learning_rate": 3.2123958668525874e-05,
"loss": 0.04863485813140869,
"step": 20000
},
{
"epoch": 0.7150774071293218,
"eval_accuracy": 0.9851494027728759,
"eval_f1": 0.8755350929603205,
"eval_loss": 0.05904531106352806,
"eval_precision": 0.856669280182671,
"eval_recall": 0.8952505534312739,
"eval_runtime": 27.4851,
"eval_samples_per_second": 818.624,
"eval_steps_per_second": 22.74,
"step": 20000
},
{
"epoch": 0.7186527941649684,
"grad_norm": 0.45322614908218384,
"learning_rate": 3.2034573992634705e-05,
"loss": 0.05499778270721436,
"step": 20100
},
{
"epoch": 0.7222281812006149,
"grad_norm": 0.4665698707103729,
"learning_rate": 3.194518931674354e-05,
"loss": 0.05100120544433594,
"step": 20200
},
{
"epoch": 0.7258035682362616,
"grad_norm": 0.7074053883552551,
"learning_rate": 3.185580464085237e-05,
"loss": 0.04919565200805664,
"step": 20300
},
{
"epoch": 0.7293789552719082,
"grad_norm": 1.2581121921539307,
"learning_rate": 3.176641996496121e-05,
"loss": 0.05387771606445312,
"step": 20400
},
{
"epoch": 0.7329543423075547,
"grad_norm": 0.3161942660808563,
"learning_rate": 3.167703528907004e-05,
"loss": 0.04680909633636474,
"step": 20500
},
{
"epoch": 0.7365297293432014,
"grad_norm": 0.8641468286514282,
"learning_rate": 3.158765061317888e-05,
"loss": 0.04961400508880615,
"step": 20600
},
{
"epoch": 0.740105116378848,
"grad_norm": 0.6563690304756165,
"learning_rate": 3.149826593728771e-05,
"loss": 0.05145148754119873,
"step": 20700
},
{
"epoch": 0.7436805034144947,
"grad_norm": 0.3394390940666199,
"learning_rate": 3.140888126139655e-05,
"loss": 0.048502054214477536,
"step": 20800
},
{
"epoch": 0.7472558904501412,
"grad_norm": 0.5382287502288818,
"learning_rate": 3.131949658550538e-05,
"loss": 0.052634720802307126,
"step": 20900
},
{
"epoch": 0.7508312774857878,
"grad_norm": 0.5506078004837036,
"learning_rate": 3.1230111909614216e-05,
"loss": 0.05615939140319824,
"step": 21000
},
{
"epoch": 0.7544066645214345,
"grad_norm": 0.4533487558364868,
"learning_rate": 3.114072723372305e-05,
"loss": 0.0571517276763916,
"step": 21100
},
{
"epoch": 0.7579820515570811,
"grad_norm": 1.2659982442855835,
"learning_rate": 3.1051342557831884e-05,
"loss": 0.05127411842346191,
"step": 21200
},
{
"epoch": 0.7615574385927276,
"grad_norm": 0.38378211855888367,
"learning_rate": 3.096195788194072e-05,
"loss": 0.04847681522369385,
"step": 21300
},
{
"epoch": 0.7651328256283743,
"grad_norm": 0.2992658317089081,
"learning_rate": 3.087257320604955e-05,
"loss": 0.05205928325653076,
"step": 21400
},
{
"epoch": 0.7687082126640209,
"grad_norm": 0.5818284749984741,
"learning_rate": 3.078318853015839e-05,
"loss": 0.04922466278076172,
"step": 21500
},
{
"epoch": 0.7722835996996675,
"grad_norm": 0.41028082370758057,
"learning_rate": 3.069380385426723e-05,
"loss": 0.04695847034454346,
"step": 21600
},
{
"epoch": 0.7758589867353141,
"grad_norm": 0.31596678495407104,
"learning_rate": 3.060441917837606e-05,
"loss": 0.049401440620422364,
"step": 21700
},
{
"epoch": 0.7794343737709607,
"grad_norm": 0.39899763464927673,
"learning_rate": 3.0515034502484896e-05,
"loss": 0.0458904504776001,
"step": 21800
},
{
"epoch": 0.7830097608066073,
"grad_norm": 4.016449928283691,
"learning_rate": 3.0425649826593733e-05,
"loss": 0.04808720588684082,
"step": 21900
},
{
"epoch": 0.786585147842254,
"grad_norm": 1.8184044361114502,
"learning_rate": 3.0336265150702564e-05,
"loss": 0.050203371047973636,
"step": 22000
},
{
"epoch": 0.7901605348779005,
"grad_norm": 0.47340500354766846,
"learning_rate": 3.0246880474811402e-05,
"loss": 0.04804760932922363,
"step": 22100
},
{
"epoch": 0.7937359219135471,
"grad_norm": 1.306254506111145,
"learning_rate": 3.0157495798920233e-05,
"loss": 0.04765232563018799,
"step": 22200
},
{
"epoch": 0.7973113089491938,
"grad_norm": 0.6133173704147339,
"learning_rate": 3.006811112302907e-05,
"loss": 0.04909511566162109,
"step": 22300
},
{
"epoch": 0.8008866959848404,
"grad_norm": 1.063022494316101,
"learning_rate": 2.9978726447137904e-05,
"loss": 0.048132557868957516,
"step": 22400
},
{
"epoch": 0.8044620830204869,
"grad_norm": 0.4442903697490692,
"learning_rate": 2.988934177124674e-05,
"loss": 0.04739914894104004,
"step": 22500
},
{
"epoch": 0.8044620830204869,
"eval_accuracy": 0.9853803092042249,
"eval_f1": 0.8812850838481906,
"eval_loss": 0.060121480375528336,
"eval_precision": 0.8660403280645027,
"eval_recall": 0.8970761578932237,
"eval_runtime": 27.7438,
"eval_samples_per_second": 810.991,
"eval_steps_per_second": 22.528,
"step": 22500
},
{
"epoch": 0.8080374700561336,
"grad_norm": 0.8813098073005676,
"learning_rate": 2.9799957095355573e-05,
"loss": 0.05161878108978271,
"step": 22600
},
{
"epoch": 0.8116128570917802,
"grad_norm": 0.7460477948188782,
"learning_rate": 2.971057241946441e-05,
"loss": 0.0515793514251709,
"step": 22700
},
{
"epoch": 0.8151882441274267,
"grad_norm": 0.5062021613121033,
"learning_rate": 2.962118774357324e-05,
"loss": 0.04754622936248779,
"step": 22800
},
{
"epoch": 0.8187636311630734,
"grad_norm": 0.7567230463027954,
"learning_rate": 2.953180306768208e-05,
"loss": 0.05149875164031983,
"step": 22900
},
{
"epoch": 0.82233901819872,
"grad_norm": 0.7439789772033691,
"learning_rate": 2.944241839179091e-05,
"loss": 0.04974982738494873,
"step": 23000
},
{
"epoch": 0.8259144052343667,
"grad_norm": 0.669979453086853,
"learning_rate": 2.9353033715899747e-05,
"loss": 0.04604334354400635,
"step": 23100
},
{
"epoch": 0.8294897922700132,
"grad_norm": 1.005071759223938,
"learning_rate": 2.9263649040008584e-05,
"loss": 0.04706980228424072,
"step": 23200
},
{
"epoch": 0.8330651793056598,
"grad_norm": 0.31772536039352417,
"learning_rate": 2.9174264364117415e-05,
"loss": 0.05056349754333496,
"step": 23300
},
{
"epoch": 0.8366405663413065,
"grad_norm": 0.32514145970344543,
"learning_rate": 2.9084879688226253e-05,
"loss": 0.04744285106658935,
"step": 23400
},
{
"epoch": 0.8402159533769531,
"grad_norm": 1.0965938568115234,
"learning_rate": 2.899549501233509e-05,
"loss": 0.04618396759033203,
"step": 23500
},
{
"epoch": 0.8437913404125996,
"grad_norm": 0.6312568783760071,
"learning_rate": 2.890611033644392e-05,
"loss": 0.04719692230224609,
"step": 23600
},
{
"epoch": 0.8473667274482463,
"grad_norm": 0.5469244122505188,
"learning_rate": 2.881672566055276e-05,
"loss": 0.04657519817352295,
"step": 23700
},
{
"epoch": 0.8509421144838929,
"grad_norm": 0.9338961839675903,
"learning_rate": 2.8727340984661593e-05,
"loss": 0.04994749069213867,
"step": 23800
},
{
"epoch": 0.8545175015195395,
"grad_norm": 0.6873934268951416,
"learning_rate": 2.8637956308770423e-05,
"loss": 0.04766389846801758,
"step": 23900
},
{
"epoch": 0.8580928885551861,
"grad_norm": 1.3465129137039185,
"learning_rate": 2.854857163287926e-05,
"loss": 0.04612489223480225,
"step": 24000
},
{
"epoch": 0.8616682755908327,
"grad_norm": 0.3835633397102356,
"learning_rate": 2.8459186956988092e-05,
"loss": 0.048950729370117185,
"step": 24100
},
{
"epoch": 0.8652436626264793,
"grad_norm": 0.7884401082992554,
"learning_rate": 2.836980228109693e-05,
"loss": 0.046166911125183105,
"step": 24200
},
{
"epoch": 0.868819049662126,
"grad_norm": 0.49389323592185974,
"learning_rate": 2.8280417605205767e-05,
"loss": 0.046818752288818356,
"step": 24300
},
{
"epoch": 0.8723944366977725,
"grad_norm": 0.6339199542999268,
"learning_rate": 2.8191032929314598e-05,
"loss": 0.04933880805969238,
"step": 24400
},
{
"epoch": 0.8759698237334191,
"grad_norm": 0.5761122703552246,
"learning_rate": 2.8101648253423435e-05,
"loss": 0.044534187316894534,
"step": 24500
},
{
"epoch": 0.8795452107690658,
"grad_norm": 0.45685720443725586,
"learning_rate": 2.8012263577532273e-05,
"loss": 0.051560683250427244,
"step": 24600
},
{
"epoch": 0.8831205978047124,
"grad_norm": 0.4419282078742981,
"learning_rate": 2.7922878901641104e-05,
"loss": 0.043671913146972656,
"step": 24700
},
{
"epoch": 0.8866959848403589,
"grad_norm": 0.734449028968811,
"learning_rate": 2.783349422574994e-05,
"loss": 0.05153060913085938,
"step": 24800
},
{
"epoch": 0.8902713718760056,
"grad_norm": 1.0401020050048828,
"learning_rate": 2.7744109549858772e-05,
"loss": 0.04694102287292481,
"step": 24900
},
{
"epoch": 0.8938467589116522,
"grad_norm": 0.646715521812439,
"learning_rate": 2.765472487396761e-05,
"loss": 0.054542098045349124,
"step": 25000
},
{
"epoch": 0.8938467589116522,
"eval_accuracy": 0.9856964248425865,
"eval_f1": 0.8835749303424683,
"eval_loss": 0.05743265897035599,
"eval_precision": 0.8674635382761534,
"eval_recall": 0.9002961216686313,
"eval_runtime": 27.4328,
"eval_samples_per_second": 820.186,
"eval_steps_per_second": 22.783,
"step": 25000
},
{
"epoch": 0.8974221459472987,
"grad_norm": 0.3341001570224762,
"learning_rate": 2.7565340198076444e-05,
"loss": 0.04484391689300537,
"step": 25100
},
{
"epoch": 0.9009975329829454,
"grad_norm": 0.700167715549469,
"learning_rate": 2.7475955522185278e-05,
"loss": 0.04423677921295166,
"step": 25200
},
{
"epoch": 0.904572920018592,
"grad_norm": 1.2379734516143799,
"learning_rate": 2.7386570846294112e-05,
"loss": 0.04488907337188721,
"step": 25300
},
{
"epoch": 0.9081483070542387,
"grad_norm": 0.4145027697086334,
"learning_rate": 2.729718617040295e-05,
"loss": 0.04520434856414795,
"step": 25400
},
{
"epoch": 0.9117236940898852,
"grad_norm": 0.3579607605934143,
"learning_rate": 2.720780149451178e-05,
"loss": 0.04551751613616943,
"step": 25500
},
{
"epoch": 0.9152990811255318,
"grad_norm": 0.5503469705581665,
"learning_rate": 2.7118416818620618e-05,
"loss": 0.04752420425415039,
"step": 25600
},
{
"epoch": 0.9188744681611785,
"grad_norm": 0.41558948159217834,
"learning_rate": 2.702903214272945e-05,
"loss": 0.05269415855407715,
"step": 25700
},
{
"epoch": 0.9224498551968251,
"grad_norm": 1.5605533123016357,
"learning_rate": 2.6939647466838286e-05,
"loss": 0.0499528169631958,
"step": 25800
},
{
"epoch": 0.9260252422324716,
"grad_norm": 0.6252946853637695,
"learning_rate": 2.6850262790947124e-05,
"loss": 0.04681193351745606,
"step": 25900
},
{
"epoch": 0.9296006292681183,
"grad_norm": 0.4643714427947998,
"learning_rate": 2.6760878115055954e-05,
"loss": 0.04491585254669189,
"step": 26000
},
{
"epoch": 0.9331760163037649,
"grad_norm": 1.0552211999893188,
"learning_rate": 2.6671493439164792e-05,
"loss": 0.050134167671203614,
"step": 26100
},
{
"epoch": 0.9367514033394115,
"grad_norm": 0.2919712960720062,
"learning_rate": 2.6582108763273626e-05,
"loss": 0.045297045707702634,
"step": 26200
},
{
"epoch": 0.9403267903750581,
"grad_norm": 0.5062688589096069,
"learning_rate": 2.649272408738246e-05,
"loss": 0.04247344017028808,
"step": 26300
},
{
"epoch": 0.9439021774107047,
"grad_norm": 0.4406910538673401,
"learning_rate": 2.6403339411491294e-05,
"loss": 0.0437799072265625,
"step": 26400
},
{
"epoch": 0.9474775644463513,
"grad_norm": 0.41486886143684387,
"learning_rate": 2.6313954735600132e-05,
"loss": 0.04669870376586914,
"step": 26500
},
{
"epoch": 0.951052951481998,
"grad_norm": 0.6877465844154358,
"learning_rate": 2.6224570059708963e-05,
"loss": 0.04583415985107422,
"step": 26600
},
{
"epoch": 0.9546283385176445,
"grad_norm": 0.6501809358596802,
"learning_rate": 2.61351853838178e-05,
"loss": 0.04593777179718018,
"step": 26700
},
{
"epoch": 0.9582037255532911,
"grad_norm": 0.7312682271003723,
"learning_rate": 2.604580070792663e-05,
"loss": 0.050377216339111325,
"step": 26800
},
{
"epoch": 0.9617791125889378,
"grad_norm": 0.8844775557518005,
"learning_rate": 2.595641603203547e-05,
"loss": 0.04860093593597412,
"step": 26900
},
{
"epoch": 0.9653544996245844,
"grad_norm": 0.4647756814956665,
"learning_rate": 2.5867031356144306e-05,
"loss": 0.0445063066482544,
"step": 27000
},
{
"epoch": 0.9689298866602309,
"grad_norm": 0.20223687589168549,
"learning_rate": 2.5777646680253137e-05,
"loss": 0.04691956520080567,
"step": 27100
},
{
"epoch": 0.9725052736958776,
"grad_norm": 0.9210941195487976,
"learning_rate": 2.5688262004361974e-05,
"loss": 0.049297604560852054,
"step": 27200
},
{
"epoch": 0.9760806607315242,
"grad_norm": 0.35992079973220825,
"learning_rate": 2.5598877328470812e-05,
"loss": 0.04701284408569336,
"step": 27300
},
{
"epoch": 0.9796560477671707,
"grad_norm": 0.6507813334465027,
"learning_rate": 2.5509492652579643e-05,
"loss": 0.04716668605804444,
"step": 27400
},
{
"epoch": 0.9832314348028174,
"grad_norm": 0.5909741520881653,
"learning_rate": 2.542010797668848e-05,
"loss": 0.048493666648864744,
"step": 27500
},
{
"epoch": 0.9832314348028174,
"eval_accuracy": 0.9858305504462175,
"eval_f1": 0.8868203247033212,
"eval_loss": 0.05660928413271904,
"eval_precision": 0.8723250413671315,
"eval_recall": 0.9018054796883535,
"eval_runtime": 27.7774,
"eval_samples_per_second": 810.012,
"eval_steps_per_second": 22.5,
"step": 27500
},
{
"epoch": 0.986806821838464,
"grad_norm": 0.47291576862335205,
"learning_rate": 2.533072330079731e-05,
"loss": 0.04355491161346436,
"step": 27600
},
{
"epoch": 0.9903822088741107,
"grad_norm": 0.4872467815876007,
"learning_rate": 2.5241338624906145e-05,
"loss": 0.0435347318649292,
"step": 27700
},
{
"epoch": 0.9939575959097572,
"grad_norm": 1.711300015449524,
"learning_rate": 2.5151953949014983e-05,
"loss": 0.04561484336853027,
"step": 27800
},
{
"epoch": 0.9975329829454038,
"grad_norm": 0.2917760908603668,
"learning_rate": 2.5062569273123814e-05,
"loss": 0.047463297843933105,
"step": 27900
},
{
"epoch": 1.0011083699810504,
"grad_norm": 0.2678261697292328,
"learning_rate": 2.497318459723265e-05,
"loss": 0.04366901874542237,
"step": 28000
},
{
"epoch": 1.004683757016697,
"grad_norm": 0.3751468062400818,
"learning_rate": 2.4883799921341485e-05,
"loss": 0.03846597194671631,
"step": 28100
},
{
"epoch": 1.0082591440523436,
"grad_norm": 0.41662493348121643,
"learning_rate": 2.4794415245450323e-05,
"loss": 0.03653419733047485,
"step": 28200
},
{
"epoch": 1.0118345310879904,
"grad_norm": 0.6062248945236206,
"learning_rate": 2.4705030569559157e-05,
"loss": 0.037252871990203856,
"step": 28300
},
{
"epoch": 1.015409918123637,
"grad_norm": 0.7458221316337585,
"learning_rate": 2.461564589366799e-05,
"loss": 0.03445641756057739,
"step": 28400
},
{
"epoch": 1.0189853051592834,
"grad_norm": 0.13679973781108856,
"learning_rate": 2.4526261217776825e-05,
"loss": 0.03599729061126709,
"step": 28500
},
{
"epoch": 1.0225606921949302,
"grad_norm": 1.258949637413025,
"learning_rate": 2.4436876541885663e-05,
"loss": 0.037976634502410886,
"step": 28600
},
{
"epoch": 1.0261360792305767,
"grad_norm": 0.27776288986206055,
"learning_rate": 2.4347491865994497e-05,
"loss": 0.03968371391296387,
"step": 28700
},
{
"epoch": 1.0297114662662232,
"grad_norm": 0.34287697076797485,
"learning_rate": 2.425810719010333e-05,
"loss": 0.03572561502456665,
"step": 28800
},
{
"epoch": 1.03328685330187,
"grad_norm": 0.5158637166023254,
"learning_rate": 2.4168722514212165e-05,
"loss": 0.036703295707702636,
"step": 28900
},
{
"epoch": 1.0368622403375165,
"grad_norm": 0.8635151982307434,
"learning_rate": 2.4079337838321e-05,
"loss": 0.035954997539520264,
"step": 29000
},
{
"epoch": 1.040437627373163,
"grad_norm": 0.6386840343475342,
"learning_rate": 2.3989953162429834e-05,
"loss": 0.039990205764770505,
"step": 29100
},
{
"epoch": 1.0440130144088098,
"grad_norm": 0.2795710861682892,
"learning_rate": 2.3900568486538668e-05,
"loss": 0.03744415760040283,
"step": 29200
},
{
"epoch": 1.0475884014444563,
"grad_norm": 0.674773097038269,
"learning_rate": 2.3811183810647502e-05,
"loss": 0.038765432834625246,
"step": 29300
},
{
"epoch": 1.051163788480103,
"grad_norm": 0.5345519185066223,
"learning_rate": 2.372179913475634e-05,
"loss": 0.03793670177459717,
"step": 29400
},
{
"epoch": 1.0547391755157496,
"grad_norm": 0.19475312530994415,
"learning_rate": 2.3632414458865174e-05,
"loss": 0.03510812759399414,
"step": 29500
},
{
"epoch": 1.058314562551396,
"grad_norm": 0.6469267010688782,
"learning_rate": 2.3543029782974008e-05,
"loss": 0.03977480411529541,
"step": 29600
},
{
"epoch": 1.0618899495870429,
"grad_norm": 0.3818305432796478,
"learning_rate": 2.3453645107082842e-05,
"loss": 0.03915615558624268,
"step": 29700
},
{
"epoch": 1.0654653366226894,
"grad_norm": 0.7031393051147461,
"learning_rate": 2.336426043119168e-05,
"loss": 0.03701666355133057,
"step": 29800
},
{
"epoch": 1.069040723658336,
"grad_norm": 0.34952452778816223,
"learning_rate": 2.3274875755300514e-05,
"loss": 0.03564514398574829,
"step": 29900
},
{
"epoch": 1.0726161106939827,
"grad_norm": 0.5351042747497559,
"learning_rate": 2.3185491079409348e-05,
"loss": 0.04400619983673096,
"step": 30000
},
{
"epoch": 1.0726161106939827,
"eval_accuracy": 0.9867462864302234,
"eval_f1": 0.8903530810550676,
"eval_loss": 0.05216454714536667,
"eval_precision": 0.8769046324564705,
"eval_recall": 0.9042204525199091,
"eval_runtime": 27.3869,
"eval_samples_per_second": 821.559,
"eval_steps_per_second": 22.821,
"step": 30000
},
{
"epoch": 1.0761914977296292,
"grad_norm": 0.6395847201347351,
"learning_rate": 2.3096106403518182e-05,
"loss": 0.03795994281768799,
"step": 30100
},
{
"epoch": 1.079766884765276,
"grad_norm": 0.2738804221153259,
"learning_rate": 2.3006721727627016e-05,
"loss": 0.034112286567687986,
"step": 30200
},
{
"epoch": 1.0833422718009225,
"grad_norm": 0.36416754126548767,
"learning_rate": 2.291733705173585e-05,
"loss": 0.03839835166931152,
"step": 30300
},
{
"epoch": 1.086917658836569,
"grad_norm": 0.8902291059494019,
"learning_rate": 2.2827952375844684e-05,
"loss": 0.04109617233276367,
"step": 30400
},
{
"epoch": 1.0904930458722157,
"grad_norm": 0.47186803817749023,
"learning_rate": 2.2738567699953522e-05,
"loss": 0.03920984029769897,
"step": 30500
},
{
"epoch": 1.0940684329078623,
"grad_norm": 3.810819625854492,
"learning_rate": 2.2649183024062356e-05,
"loss": 0.0391163420677185,
"step": 30600
},
{
"epoch": 1.0976438199435088,
"grad_norm": 0.8752216696739197,
"learning_rate": 2.255979834817119e-05,
"loss": 0.038404548168182374,
"step": 30700
},
{
"epoch": 1.1012192069791555,
"grad_norm": 0.2776939570903778,
"learning_rate": 2.2470413672280025e-05,
"loss": 0.037470765113830566,
"step": 30800
},
{
"epoch": 1.104794594014802,
"grad_norm": 0.549679160118103,
"learning_rate": 2.2381028996388862e-05,
"loss": 0.03804266691207886,
"step": 30900
},
{
"epoch": 1.1083699810504486,
"grad_norm": 0.7605739235877991,
"learning_rate": 2.2291644320497696e-05,
"loss": 0.03416654348373413,
"step": 31000
},
{
"epoch": 1.1119453680860953,
"grad_norm": 0.16704197227954865,
"learning_rate": 2.220225964460653e-05,
"loss": 0.034537038803100585,
"step": 31100
},
{
"epoch": 1.1155207551217419,
"grad_norm": 0.5772648453712463,
"learning_rate": 2.2112874968715365e-05,
"loss": 0.03786729097366333,
"step": 31200
},
{
"epoch": 1.1190961421573886,
"grad_norm": 0.3576936423778534,
"learning_rate": 2.2023490292824202e-05,
"loss": 0.04146803379058838,
"step": 31300
},
{
"epoch": 1.1226715291930351,
"grad_norm": 0.24434928596019745,
"learning_rate": 2.1934105616933033e-05,
"loss": 0.03837924718856812,
"step": 31400
},
{
"epoch": 1.1262469162286817,
"grad_norm": 0.8151653409004211,
"learning_rate": 2.1844720941041867e-05,
"loss": 0.03402991771697998,
"step": 31500
},
{
"epoch": 1.1298223032643284,
"grad_norm": 0.803303062915802,
"learning_rate": 2.17553362651507e-05,
"loss": 0.03701550483703613,
"step": 31600
},
{
"epoch": 1.133397690299975,
"grad_norm": 0.5276838541030884,
"learning_rate": 2.166595158925954e-05,
"loss": 0.037687735557556154,
"step": 31700
},
{
"epoch": 1.1369730773356217,
"grad_norm": 1.2563331127166748,
"learning_rate": 2.1576566913368373e-05,
"loss": 0.04105483055114746,
"step": 31800
},
{
"epoch": 1.1405484643712682,
"grad_norm": 2.2794508934020996,
"learning_rate": 2.1487182237477207e-05,
"loss": 0.03871995687484741,
"step": 31900
},
{
"epoch": 1.1441238514069147,
"grad_norm": 0.5270197987556458,
"learning_rate": 2.139779756158604e-05,
"loss": 0.03748847007751465,
"step": 32000
},
{
"epoch": 1.1476992384425615,
"grad_norm": 0.4776967763900757,
"learning_rate": 2.130841288569488e-05,
"loss": 0.04054388523101807,
"step": 32100
},
{
"epoch": 1.151274625478208,
"grad_norm": 0.281562864780426,
"learning_rate": 2.1219028209803713e-05,
"loss": 0.03565767288208008,
"step": 32200
},
{
"epoch": 1.1548500125138546,
"grad_norm": 0.986331582069397,
"learning_rate": 2.1129643533912547e-05,
"loss": 0.03515695333480835,
"step": 32300
},
{
"epoch": 1.1584253995495013,
"grad_norm": 1.0339690446853638,
"learning_rate": 2.104025885802138e-05,
"loss": 0.03650200843811035,
"step": 32400
},
{
"epoch": 1.1620007865851478,
"grad_norm": 0.6622812747955322,
"learning_rate": 2.095087418213022e-05,
"loss": 0.03963910102844238,
"step": 32500
},
{
"epoch": 1.1620007865851478,
"eval_accuracy": 0.9872691132930045,
"eval_f1": 0.8901178950048444,
"eval_loss": 0.050942763686180115,
"eval_precision": 0.8760841419442859,
"eval_recall": 0.904608573153552,
"eval_runtime": 27.7471,
"eval_samples_per_second": 810.896,
"eval_steps_per_second": 22.525,
"step": 32500
},
{
"epoch": 1.1655761736207944,
"grad_norm": 0.4157122075557709,
"learning_rate": 2.0861489506239053e-05,
"loss": 0.03528056383132935,
"step": 32600
},
{
"epoch": 1.169151560656441,
"grad_norm": 1.0833650827407837,
"learning_rate": 2.0772104830347887e-05,
"loss": 0.0338432765007019,
"step": 32700
},
{
"epoch": 1.1727269476920876,
"grad_norm": 0.6234818696975708,
"learning_rate": 2.068272015445672e-05,
"loss": 0.03545186996459961,
"step": 32800
},
{
"epoch": 1.1763023347277342,
"grad_norm": 0.46430152654647827,
"learning_rate": 2.0593335478565555e-05,
"loss": 0.03938552379608154,
"step": 32900
},
{
"epoch": 1.179877721763381,
"grad_norm": 0.32441213726997375,
"learning_rate": 2.050395080267439e-05,
"loss": 0.03765884399414063,
"step": 33000
},
{
"epoch": 1.1834531087990274,
"grad_norm": 0.5149340033531189,
"learning_rate": 2.0414566126783224e-05,
"loss": 0.04111374378204346,
"step": 33100
},
{
"epoch": 1.1870284958346742,
"grad_norm": 0.6311440467834473,
"learning_rate": 2.032518145089206e-05,
"loss": 0.03235443115234375,
"step": 33200
},
{
"epoch": 1.1906038828703207,
"grad_norm": 0.41769224405288696,
"learning_rate": 2.0235796775000895e-05,
"loss": 0.03542477607727051,
"step": 33300
},
{
"epoch": 1.1941792699059672,
"grad_norm": 1.399487853050232,
"learning_rate": 2.014641209910973e-05,
"loss": 0.03973909854888916,
"step": 33400
},
{
"epoch": 1.197754656941614,
"grad_norm": 0.44740626215934753,
"learning_rate": 2.0057027423218564e-05,
"loss": 0.03419320821762085,
"step": 33500
},
{
"epoch": 1.2013300439772605,
"grad_norm": 0.7771443128585815,
"learning_rate": 1.99676427473274e-05,
"loss": 0.03688163042068481,
"step": 33600
},
{
"epoch": 1.2049054310129073,
"grad_norm": 0.33263227343559265,
"learning_rate": 1.9878258071436235e-05,
"loss": 0.0361082911491394,
"step": 33700
},
{
"epoch": 1.2084808180485538,
"grad_norm": 0.586033821105957,
"learning_rate": 1.978887339554507e-05,
"loss": 0.037032432556152343,
"step": 33800
},
{
"epoch": 1.2120562050842003,
"grad_norm": 0.17661893367767334,
"learning_rate": 1.9699488719653904e-05,
"loss": 0.03797416687011719,
"step": 33900
},
{
"epoch": 1.215631592119847,
"grad_norm": 0.6682581305503845,
"learning_rate": 1.9610104043762738e-05,
"loss": 0.03688710927963257,
"step": 34000
},
{
"epoch": 1.2192069791554936,
"grad_norm": 0.33618828654289246,
"learning_rate": 1.9520719367871572e-05,
"loss": 0.03531041145324707,
"step": 34100
},
{
"epoch": 1.2227823661911401,
"grad_norm": 0.2299039363861084,
"learning_rate": 1.9431334691980406e-05,
"loss": 0.037303669452667235,
"step": 34200
},
{
"epoch": 1.2263577532267869,
"grad_norm": 0.38670745491981506,
"learning_rate": 1.934195001608924e-05,
"loss": 0.03624207735061646,
"step": 34300
},
{
"epoch": 1.2299331402624334,
"grad_norm": 0.28273847699165344,
"learning_rate": 1.9252565340198078e-05,
"loss": 0.03737942218780518,
"step": 34400
},
{
"epoch": 1.23350852729808,
"grad_norm": 0.1840369552373886,
"learning_rate": 1.9163180664306912e-05,
"loss": 0.04193697929382324,
"step": 34500
},
{
"epoch": 1.2370839143337267,
"grad_norm": 0.3581949770450592,
"learning_rate": 1.9073795988415746e-05,
"loss": 0.03538564205169678,
"step": 34600
},
{
"epoch": 1.2406593013693732,
"grad_norm": 0.47306036949157715,
"learning_rate": 1.898441131252458e-05,
"loss": 0.03894999265670776,
"step": 34700
},
{
"epoch": 1.24423468840502,
"grad_norm": 0.961359977722168,
"learning_rate": 1.8895026636633418e-05,
"loss": 0.03768787622451782,
"step": 34800
},
{
"epoch": 1.2478100754406665,
"grad_norm": 0.873396098613739,
"learning_rate": 1.8805641960742252e-05,
"loss": 0.03798648834228516,
"step": 34900
},
{
"epoch": 1.251385462476313,
"grad_norm": 0.27755600214004517,
"learning_rate": 1.8716257284851086e-05,
"loss": 0.03826235771179199,
"step": 35000
},
{
"epoch": 1.251385462476313,
"eval_accuracy": 0.9878845131214289,
"eval_f1": 0.8920698296733638,
"eval_loss": 0.04892827197909355,
"eval_precision": 0.8788428276516208,
"eval_recall": 0.9057010608630653,
"eval_runtime": 27.2527,
"eval_samples_per_second": 825.607,
"eval_steps_per_second": 22.934,
"step": 35000
},
{
"epoch": 1.2549608495119597,
"grad_norm": 0.19469444453716278,
"learning_rate": 1.862687260895992e-05,
"loss": 0.037444868087768556,
"step": 35100
},
{
"epoch": 1.2585362365476063,
"grad_norm": 0.7563005685806274,
"learning_rate": 1.8537487933068755e-05,
"loss": 0.03585953950881958,
"step": 35200
},
{
"epoch": 1.262111623583253,
"grad_norm": 0.748693585395813,
"learning_rate": 1.844810325717759e-05,
"loss": 0.036836111545562746,
"step": 35300
},
{
"epoch": 1.2656870106188995,
"grad_norm": 0.2749057114124298,
"learning_rate": 1.8358718581286423e-05,
"loss": 0.035653345584869385,
"step": 35400
},
{
"epoch": 1.269262397654546,
"grad_norm": 0.46990424394607544,
"learning_rate": 1.826933390539526e-05,
"loss": 0.038926541805267334,
"step": 35500
},
{
"epoch": 1.2728377846901928,
"grad_norm": 0.5694590210914612,
"learning_rate": 1.8179949229504095e-05,
"loss": 0.041200418472290036,
"step": 35600
},
{
"epoch": 1.2764131717258393,
"grad_norm": 0.44198593497276306,
"learning_rate": 1.809056455361293e-05,
"loss": 0.03306173086166382,
"step": 35700
},
{
"epoch": 1.2799885587614859,
"grad_norm": 1.5265918970108032,
"learning_rate": 1.8001179877721763e-05,
"loss": 0.03929618358612061,
"step": 35800
},
{
"epoch": 1.2835639457971326,
"grad_norm": 0.568000078201294,
"learning_rate": 1.79117952018306e-05,
"loss": 0.035215189456939695,
"step": 35900
},
{
"epoch": 1.2871393328327791,
"grad_norm": 0.3256838619709015,
"learning_rate": 1.7822410525939435e-05,
"loss": 0.03693248510360718,
"step": 36000
},
{
"epoch": 1.2907147198684257,
"grad_norm": 0.37276744842529297,
"learning_rate": 1.773302585004827e-05,
"loss": 0.038254330158233645,
"step": 36100
},
{
"epoch": 1.2942901069040724,
"grad_norm": 0.9104180335998535,
"learning_rate": 1.7643641174157103e-05,
"loss": 0.03706887722015381,
"step": 36200
},
{
"epoch": 1.297865493939719,
"grad_norm": 0.855074942111969,
"learning_rate": 1.755425649826594e-05,
"loss": 0.039341244697570804,
"step": 36300
},
{
"epoch": 1.3014408809753655,
"grad_norm": 1.0919744968414307,
"learning_rate": 1.7464871822374775e-05,
"loss": 0.03796007394790649,
"step": 36400
},
{
"epoch": 1.3050162680110122,
"grad_norm": 0.4765317142009735,
"learning_rate": 1.737548714648361e-05,
"loss": 0.03425301790237427,
"step": 36500
},
{
"epoch": 1.3085916550466588,
"grad_norm": 0.28184378147125244,
"learning_rate": 1.728610247059244e-05,
"loss": 0.03511073589324951,
"step": 36600
},
{
"epoch": 1.3121670420823053,
"grad_norm": 0.26926326751708984,
"learning_rate": 1.7196717794701277e-05,
"loss": 0.03917940616607666,
"step": 36700
},
{
"epoch": 1.315742429117952,
"grad_norm": 2.2863128185272217,
"learning_rate": 1.710733311881011e-05,
"loss": 0.03748450517654419,
"step": 36800
},
{
"epoch": 1.3193178161535986,
"grad_norm": 0.47158753871917725,
"learning_rate": 1.7017948442918946e-05,
"loss": 0.034841620922088624,
"step": 36900
},
{
"epoch": 1.3228932031892453,
"grad_norm": 0.3611966371536255,
"learning_rate": 1.692856376702778e-05,
"loss": 0.03597846508026123,
"step": 37000
},
{
"epoch": 1.3264685902248918,
"grad_norm": 0.19897930324077606,
"learning_rate": 1.6839179091136617e-05,
"loss": 0.0384373140335083,
"step": 37100
},
{
"epoch": 1.3300439772605386,
"grad_norm": 0.4929654002189636,
"learning_rate": 1.674979441524545e-05,
"loss": 0.03474846363067627,
"step": 37200
},
{
"epoch": 1.333619364296185,
"grad_norm": 1.4330233335494995,
"learning_rate": 1.6660409739354286e-05,
"loss": 0.03804588317871094,
"step": 37300
},
{
"epoch": 1.3371947513318316,
"grad_norm": 0.7935028076171875,
"learning_rate": 1.6571025063463123e-05,
"loss": 0.036091580390930175,
"step": 37400
},
{
"epoch": 1.3407701383674784,
"grad_norm": 0.6093057990074158,
"learning_rate": 1.6481640387571957e-05,
"loss": 0.036958491802215575,
"step": 37500
},
{
"epoch": 1.3407701383674784,
"eval_accuracy": 0.9876767499314908,
"eval_f1": 0.8963199795830114,
"eval_loss": 0.048646602779626846,
"eval_precision": 0.8842404151455387,
"eval_recall": 0.9087341517407929,
"eval_runtime": 27.7954,
"eval_samples_per_second": 809.486,
"eval_steps_per_second": 22.486,
"step": 37500
},
{
"epoch": 1.344345525403125,
"grad_norm": 0.530693531036377,
"learning_rate": 1.639225571168079e-05,
"loss": 0.040179696083068844,
"step": 37600
},
{
"epoch": 1.3479209124387714,
"grad_norm": 0.70650714635849,
"learning_rate": 1.6302871035789626e-05,
"loss": 0.03599003076553345,
"step": 37700
},
{
"epoch": 1.3514962994744182,
"grad_norm": 0.673740029335022,
"learning_rate": 1.621348635989846e-05,
"loss": 0.03707956552505493,
"step": 37800
},
{
"epoch": 1.3550716865100647,
"grad_norm": 0.28047823905944824,
"learning_rate": 1.6124101684007294e-05,
"loss": 0.034383256435394284,
"step": 37900
},
{
"epoch": 1.3586470735457112,
"grad_norm": 0.4644497036933899,
"learning_rate": 1.6034717008116128e-05,
"loss": 0.039096081256866456,
"step": 38000
},
{
"epoch": 1.362222460581358,
"grad_norm": 0.2905023992061615,
"learning_rate": 1.5945332332224962e-05,
"loss": 0.031935737133026124,
"step": 38100
},
{
"epoch": 1.3657978476170045,
"grad_norm": 0.519289493560791,
"learning_rate": 1.58559476563338e-05,
"loss": 0.03160768747329712,
"step": 38200
},
{
"epoch": 1.369373234652651,
"grad_norm": 0.4803026616573334,
"learning_rate": 1.5766562980442634e-05,
"loss": 0.03475278615951538,
"step": 38300
},
{
"epoch": 1.3729486216882978,
"grad_norm": 0.2219659686088562,
"learning_rate": 1.5677178304551468e-05,
"loss": 0.03382747411727905,
"step": 38400
},
{
"epoch": 1.3765240087239443,
"grad_norm": 0.9020390510559082,
"learning_rate": 1.5587793628660302e-05,
"loss": 0.03778740644454956,
"step": 38500
},
{
"epoch": 1.3800993957595908,
"grad_norm": 0.4074041247367859,
"learning_rate": 1.549840895276914e-05,
"loss": 0.03417648077011108,
"step": 38600
},
{
"epoch": 1.3836747827952376,
"grad_norm": 0.2950891852378845,
"learning_rate": 1.5409024276877974e-05,
"loss": 0.037335121631622316,
"step": 38700
},
{
"epoch": 1.3872501698308841,
"grad_norm": 0.5112789869308472,
"learning_rate": 1.5319639600986808e-05,
"loss": 0.03443581342697143,
"step": 38800
},
{
"epoch": 1.3908255568665309,
"grad_norm": 0.6883418560028076,
"learning_rate": 1.523025492509564e-05,
"loss": 0.03647557497024536,
"step": 38900
},
{
"epoch": 1.3944009439021774,
"grad_norm": 0.22857694327831268,
"learning_rate": 1.5140870249204478e-05,
"loss": 0.03520648956298828,
"step": 39000
},
{
"epoch": 1.3979763309378241,
"grad_norm": 1.4312663078308105,
"learning_rate": 1.5051485573313312e-05,
"loss": 0.031425106525421145,
"step": 39100
},
{
"epoch": 1.4015517179734707,
"grad_norm": 0.7821195125579834,
"learning_rate": 1.4962100897422146e-05,
"loss": 0.03315335750579834,
"step": 39200
},
{
"epoch": 1.4051271050091172,
"grad_norm": 0.27848535776138306,
"learning_rate": 1.487271622153098e-05,
"loss": 0.033316426277160645,
"step": 39300
},
{
"epoch": 1.408702492044764,
"grad_norm": 0.6713240146636963,
"learning_rate": 1.4783331545639816e-05,
"loss": 0.033266935348510746,
"step": 39400
},
{
"epoch": 1.4122778790804105,
"grad_norm": 3.596701145172119,
"learning_rate": 1.469394686974865e-05,
"loss": 0.03419414281845093,
"step": 39500
},
{
"epoch": 1.415853266116057,
"grad_norm": 1.069840908050537,
"learning_rate": 1.4604562193857485e-05,
"loss": 0.035397300720214846,
"step": 39600
},
{
"epoch": 1.4194286531517037,
"grad_norm": 0.2466162145137787,
"learning_rate": 1.4515177517966322e-05,
"loss": 0.0358107590675354,
"step": 39700
},
{
"epoch": 1.4230040401873503,
"grad_norm": 0.5182567834854126,
"learning_rate": 1.4425792842075156e-05,
"loss": 0.03377439260482788,
"step": 39800
},
{
"epoch": 1.4265794272229968,
"grad_norm": 0.8782963752746582,
"learning_rate": 1.433640816618399e-05,
"loss": 0.03737137794494629,
"step": 39900
},
{
"epoch": 1.4301548142586435,
"grad_norm": 0.2662527561187744,
"learning_rate": 1.4247023490292825e-05,
"loss": 0.035046143531799315,
"step": 40000
},
{
"epoch": 1.4301548142586435,
"eval_accuracy": 0.9874205963276935,
"eval_f1": 0.8909038185431681,
"eval_loss": 0.048918217420578,
"eval_precision": 0.8769057265778372,
"eval_recall": 0.9053560647442717,
"eval_runtime": 27.308,
"eval_samples_per_second": 823.934,
"eval_steps_per_second": 22.887,
"step": 40000
},
{
"epoch": 1.43373020129429,
"grad_norm": 4.632917404174805,
"learning_rate": 1.415763881440166e-05,
"loss": 0.03318638563156128,
"step": 40100
},
{
"epoch": 1.4373055883299366,
"grad_norm": 0.34400591254234314,
"learning_rate": 1.4068254138510495e-05,
"loss": 0.03380630970001221,
"step": 40200
},
{
"epoch": 1.4408809753655833,
"grad_norm": 0.3949352204799652,
"learning_rate": 1.3978869462619329e-05,
"loss": 0.035887646675109866,
"step": 40300
},
{
"epoch": 1.4444563624012299,
"grad_norm": 0.21083228290081024,
"learning_rate": 1.3889484786728163e-05,
"loss": 0.02981067180633545,
"step": 40400
},
{
"epoch": 1.4480317494368766,
"grad_norm": 0.5403398871421814,
"learning_rate": 1.3800100110836999e-05,
"loss": 0.03951683759689331,
"step": 40500
},
{
"epoch": 1.4516071364725232,
"grad_norm": 0.37334415316581726,
"learning_rate": 1.3710715434945833e-05,
"loss": 0.03376241683959961,
"step": 40600
},
{
"epoch": 1.4551825235081697,
"grad_norm": 0.6374111771583557,
"learning_rate": 1.3621330759054667e-05,
"loss": 0.035758087635040285,
"step": 40700
},
{
"epoch": 1.4587579105438164,
"grad_norm": 0.4704621434211731,
"learning_rate": 1.3531946083163501e-05,
"loss": 0.03579946041107178,
"step": 40800
},
{
"epoch": 1.462333297579463,
"grad_norm": 0.31890979409217834,
"learning_rate": 1.3442561407272339e-05,
"loss": 0.036891818046569824,
"step": 40900
},
{
"epoch": 1.4659086846151097,
"grad_norm": 0.36003023386001587,
"learning_rate": 1.3353176731381173e-05,
"loss": 0.03722346544265747,
"step": 41000
},
{
"epoch": 1.4694840716507562,
"grad_norm": 0.3868881165981293,
"learning_rate": 1.3263792055490007e-05,
"loss": 0.03188649654388428,
"step": 41100
},
{
"epoch": 1.4730594586864028,
"grad_norm": 0.1989583820104599,
"learning_rate": 1.3174407379598841e-05,
"loss": 0.03385810136795044,
"step": 41200
},
{
"epoch": 1.4766348457220495,
"grad_norm": 1.653865933418274,
"learning_rate": 1.3085022703707677e-05,
"loss": 0.033811585903167726,
"step": 41300
},
{
"epoch": 1.480210232757696,
"grad_norm": 0.4005359709262848,
"learning_rate": 1.2995638027816512e-05,
"loss": 0.034179413318634035,
"step": 41400
},
{
"epoch": 1.4837856197933426,
"grad_norm": 0.40698060393333435,
"learning_rate": 1.2906253351925346e-05,
"loss": 0.0344992733001709,
"step": 41500
},
{
"epoch": 1.4873610068289893,
"grad_norm": 0.23063120245933533,
"learning_rate": 1.281686867603418e-05,
"loss": 0.036886801719665525,
"step": 41600
},
{
"epoch": 1.4909363938646358,
"grad_norm": 0.36372461915016174,
"learning_rate": 1.2727484000143017e-05,
"loss": 0.03418808460235596,
"step": 41700
},
{
"epoch": 1.4945117809002824,
"grad_norm": 3.4656498432159424,
"learning_rate": 1.2638099324251852e-05,
"loss": 0.035131211280822756,
"step": 41800
},
{
"epoch": 1.498087167935929,
"grad_norm": 0.5397525429725647,
"learning_rate": 1.2548714648360686e-05,
"loss": 0.032310936450958255,
"step": 41900
},
{
"epoch": 1.5016625549715756,
"grad_norm": 0.803663969039917,
"learning_rate": 1.245932997246952e-05,
"loss": 0.034068484306335446,
"step": 42000
},
{
"epoch": 1.5052379420072222,
"grad_norm": 0.44578149914741516,
"learning_rate": 1.2369945296578356e-05,
"loss": 0.033568575382232665,
"step": 42100
},
{
"epoch": 1.508813329042869,
"grad_norm": 0.3740385174751282,
"learning_rate": 1.228056062068719e-05,
"loss": 0.0316014552116394,
"step": 42200
},
{
"epoch": 1.5123887160785157,
"grad_norm": 0.7885581254959106,
"learning_rate": 1.2191175944796026e-05,
"loss": 0.036092112064361574,
"step": 42300
},
{
"epoch": 1.515964103114162,
"grad_norm": 0.2616823613643646,
"learning_rate": 1.210179126890486e-05,
"loss": 0.0364843225479126,
"step": 42400
},
{
"epoch": 1.5195394901498087,
"grad_norm": 1.1933097839355469,
"learning_rate": 1.2012406593013694e-05,
"loss": 0.032956657409667967,
"step": 42500
},
{
"epoch": 1.5195394901498087,
"eval_accuracy": 0.9878976626904123,
"eval_f1": 0.8964901338171921,
"eval_loss": 0.04782980680465698,
"eval_precision": 0.8842314252957132,
"eval_recall": 0.9090935226978697,
"eval_runtime": 27.8912,
"eval_samples_per_second": 806.706,
"eval_steps_per_second": 22.409,
"step": 42500
},
{
"epoch": 1.5231148771854555,
"grad_norm": 1.002236247062683,
"learning_rate": 1.1923021917122528e-05,
"loss": 0.03267708301544189,
"step": 42600
},
{
"epoch": 1.526690264221102,
"grad_norm": 0.2965432405471802,
"learning_rate": 1.1833637241231364e-05,
"loss": 0.03969228982925415,
"step": 42700
},
{
"epoch": 1.5302656512567485,
"grad_norm": 0.35980096459388733,
"learning_rate": 1.1744252565340198e-05,
"loss": 0.033807692527770994,
"step": 42800
},
{
"epoch": 1.5338410382923953,
"grad_norm": 0.4036603271961212,
"learning_rate": 1.1654867889449034e-05,
"loss": 0.036050264835357664,
"step": 42900
},
{
"epoch": 1.5374164253280418,
"grad_norm": 0.4341689348220825,
"learning_rate": 1.1565483213557868e-05,
"loss": 0.03344399690628052,
"step": 43000
},
{
"epoch": 1.5409918123636883,
"grad_norm": 0.35666847229003906,
"learning_rate": 1.1476098537666702e-05,
"loss": 0.035790588855743405,
"step": 43100
},
{
"epoch": 1.544567199399335,
"grad_norm": 2.009552001953125,
"learning_rate": 1.1386713861775537e-05,
"loss": 0.03580213069915771,
"step": 43200
},
{
"epoch": 1.5481425864349816,
"grad_norm": 0.9199197888374329,
"learning_rate": 1.1297329185884372e-05,
"loss": 0.035557851791381836,
"step": 43300
},
{
"epoch": 1.5517179734706281,
"grad_norm": 0.3379763662815094,
"learning_rate": 1.1207944509993207e-05,
"loss": 0.037502107620239256,
"step": 43400
},
{
"epoch": 1.5552933605062749,
"grad_norm": 0.4002296030521393,
"learning_rate": 1.1118559834102042e-05,
"loss": 0.03514168262481689,
"step": 43500
},
{
"epoch": 1.5588687475419214,
"grad_norm": 0.44335803389549255,
"learning_rate": 1.1029175158210877e-05,
"loss": 0.03210949659347534,
"step": 43600
},
{
"epoch": 1.562444134577568,
"grad_norm": 0.3367313742637634,
"learning_rate": 1.0939790482319712e-05,
"loss": 0.03381946325302124,
"step": 43700
},
{
"epoch": 1.5660195216132147,
"grad_norm": 0.3180839419364929,
"learning_rate": 1.0850405806428547e-05,
"loss": 0.033136572837829587,
"step": 43800
},
{
"epoch": 1.5695949086488612,
"grad_norm": 0.49929025769233704,
"learning_rate": 1.076102113053738e-05,
"loss": 0.03284239530563354,
"step": 43900
},
{
"epoch": 1.5731702956845077,
"grad_norm": 0.36956411600112915,
"learning_rate": 1.0671636454646217e-05,
"loss": 0.032391068935394285,
"step": 44000
},
{
"epoch": 1.5767456827201545,
"grad_norm": 0.3806305527687073,
"learning_rate": 1.058225177875505e-05,
"loss": 0.03159698247909546,
"step": 44100
},
{
"epoch": 1.5803210697558012,
"grad_norm": 0.24886535108089447,
"learning_rate": 1.0492867102863887e-05,
"loss": 0.03376968622207641,
"step": 44200
},
{
"epoch": 1.5838964567914475,
"grad_norm": 0.8062007427215576,
"learning_rate": 1.040348242697272e-05,
"loss": 0.031125342845916747,
"step": 44300
},
{
"epoch": 1.5874718438270943,
"grad_norm": 0.32632651925086975,
"learning_rate": 1.0314097751081555e-05,
"loss": 0.032405462265014645,
"step": 44400
},
{
"epoch": 1.591047230862741,
"grad_norm": 0.9697968363761902,
"learning_rate": 1.0224713075190389e-05,
"loss": 0.0316835880279541,
"step": 44500
},
{
"epoch": 1.5946226178983876,
"grad_norm": 0.7041149735450745,
"learning_rate": 1.0135328399299225e-05,
"loss": 0.03227449417114258,
"step": 44600
},
{
"epoch": 1.598198004934034,
"grad_norm": 1.0169494152069092,
"learning_rate": 1.0045943723408059e-05,
"loss": 0.03636837244033814,
"step": 44700
},
{
"epoch": 1.6017733919696808,
"grad_norm": 1.4278594255447388,
"learning_rate": 9.956559047516895e-06,
"loss": 0.036050994396209714,
"step": 44800
},
{
"epoch": 1.6053487790053274,
"grad_norm": 0.21218614280223846,
"learning_rate": 9.867174371625729e-06,
"loss": 0.03155009746551514,
"step": 44900
},
{
"epoch": 1.6089241660409739,
"grad_norm": 0.2901414930820465,
"learning_rate": 9.777789695734563e-06,
"loss": 0.030832624435424803,
"step": 45000
},
{
"epoch": 1.6089241660409739,
"eval_accuracy": 0.9888076128640655,
"eval_f1": 0.9007828635915198,
"eval_loss": 0.04577971622347832,
"eval_precision": 0.8896631009295218,
"eval_recall": 0.9121841129287296,
"eval_runtime": 27.4639,
"eval_samples_per_second": 819.259,
"eval_steps_per_second": 22.757,
"step": 45000
},
{
"epoch": 1.6124995530766206,
"grad_norm": 0.6742628812789917,
"learning_rate": 9.688405019843397e-06,
"loss": 0.03396400213241577,
"step": 45100
},
{
"epoch": 1.6160749401122672,
"grad_norm": 0.30497708916664124,
"learning_rate": 9.599020343952233e-06,
"loss": 0.030751326084136964,
"step": 45200
},
{
"epoch": 1.6196503271479137,
"grad_norm": 0.33833158016204834,
"learning_rate": 9.509635668061067e-06,
"loss": 0.03044323444366455,
"step": 45300
},
{
"epoch": 1.6232257141835604,
"grad_norm": 0.35390418767929077,
"learning_rate": 9.420250992169903e-06,
"loss": 0.03425618410110474,
"step": 45400
},
{
"epoch": 1.626801101219207,
"grad_norm": 0.6008805632591248,
"learning_rate": 9.330866316278737e-06,
"loss": 0.03422411203384399,
"step": 45500
},
{
"epoch": 1.6303764882548535,
"grad_norm": 0.7057814598083496,
"learning_rate": 9.241481640387573e-06,
"loss": 0.03580734968185425,
"step": 45600
},
{
"epoch": 1.6339518752905002,
"grad_norm": 0.6222581267356873,
"learning_rate": 9.152096964496407e-06,
"loss": 0.03245258092880249,
"step": 45700
},
{
"epoch": 1.6375272623261468,
"grad_norm": 0.19113455712795258,
"learning_rate": 9.062712288605242e-06,
"loss": 0.03314180135726929,
"step": 45800
},
{
"epoch": 1.6411026493617933,
"grad_norm": 0.35139983892440796,
"learning_rate": 8.973327612714076e-06,
"loss": 0.03314854860305786,
"step": 45900
},
{
"epoch": 1.64467803639744,
"grad_norm": 2.3638358116149902,
"learning_rate": 8.883942936822912e-06,
"loss": 0.03374920845031738,
"step": 46000
},
{
"epoch": 1.6482534234330868,
"grad_norm": 0.3906150162220001,
"learning_rate": 8.794558260931746e-06,
"loss": 0.030902385711669922,
"step": 46100
},
{
"epoch": 1.651828810468733,
"grad_norm": 1.5684771537780762,
"learning_rate": 8.705173585040582e-06,
"loss": 0.03261609077453613,
"step": 46200
},
{
"epoch": 1.6554041975043798,
"grad_norm": 0.5489705801010132,
"learning_rate": 8.615788909149416e-06,
"loss": 0.032475869655609134,
"step": 46300
},
{
"epoch": 1.6589795845400266,
"grad_norm": 0.4629211127758026,
"learning_rate": 8.52640423325825e-06,
"loss": 0.0343438458442688,
"step": 46400
},
{
"epoch": 1.6625549715756731,
"grad_norm": 0.35416728258132935,
"learning_rate": 8.437019557367086e-06,
"loss": 0.030291988849639892,
"step": 46500
},
{
"epoch": 1.6661303586113196,
"grad_norm": 0.3730672597885132,
"learning_rate": 8.34763488147592e-06,
"loss": 0.03114586353302002,
"step": 46600
},
{
"epoch": 1.6697057456469664,
"grad_norm": 0.8023098111152649,
"learning_rate": 8.258250205584756e-06,
"loss": 0.031157519817352295,
"step": 46700
},
{
"epoch": 1.673281132682613,
"grad_norm": 0.3616831600666046,
"learning_rate": 8.16886552969359e-06,
"loss": 0.03633548498153687,
"step": 46800
},
{
"epoch": 1.6768565197182594,
"grad_norm": 0.2969978451728821,
"learning_rate": 8.079480853802424e-06,
"loss": 0.030888726711273195,
"step": 46900
},
{
"epoch": 1.6804319067539062,
"grad_norm": 0.5954911708831787,
"learning_rate": 7.990096177911258e-06,
"loss": 0.02800543785095215,
"step": 47000
},
{
"epoch": 1.6840072937895527,
"grad_norm": 0.28519004583358765,
"learning_rate": 7.900711502020094e-06,
"loss": 0.0348360013961792,
"step": 47100
},
{
"epoch": 1.6875826808251992,
"grad_norm": 3.0812149047851562,
"learning_rate": 7.811326826128928e-06,
"loss": 0.03429551839828491,
"step": 47200
},
{
"epoch": 1.691158067860846,
"grad_norm": 0.3664245903491974,
"learning_rate": 7.721942150237764e-06,
"loss": 0.03342988014221191,
"step": 47300
},
{
"epoch": 1.6947334548964925,
"grad_norm": 0.4746117889881134,
"learning_rate": 7.632557474346598e-06,
"loss": 0.031046552658081053,
"step": 47400
},
{
"epoch": 1.698308841932139,
"grad_norm": 0.26298218965530396,
"learning_rate": 7.543172798455433e-06,
"loss": 0.03168731689453125,
"step": 47500
},
{
"epoch": 1.698308841932139,
"eval_accuracy": 0.9886813770018247,
"eval_f1": 0.8991618091307493,
"eval_loss": 0.04538652300834656,
"eval_precision": 0.8872949672507418,
"eval_recall": 0.9113503723083115,
"eval_runtime": 27.7648,
"eval_samples_per_second": 810.377,
"eval_steps_per_second": 22.51,
"step": 47500
},
{
"epoch": 1.7018842289677858,
"grad_norm": 1.6149009466171265,
"learning_rate": 7.4537881225642675e-06,
"loss": 0.035295097827911376,
"step": 47600
},
{
"epoch": 1.7054596160034323,
"grad_norm": 0.37669169902801514,
"learning_rate": 7.3644034466731025e-06,
"loss": 0.03364665269851685,
"step": 47700
},
{
"epoch": 1.7090350030390788,
"grad_norm": 0.5029271841049194,
"learning_rate": 7.275018770781937e-06,
"loss": 0.032778596878051756,
"step": 47800
},
{
"epoch": 1.7126103900747256,
"grad_norm": 0.265184611082077,
"learning_rate": 7.1856340948907725e-06,
"loss": 0.033551807403564456,
"step": 47900
},
{
"epoch": 1.7161857771103723,
"grad_norm": 0.5929502248764038,
"learning_rate": 7.096249418999607e-06,
"loss": 0.033371658325195314,
"step": 48000
},
{
"epoch": 1.7197611641460187,
"grad_norm": 0.6151393055915833,
"learning_rate": 7.006864743108442e-06,
"loss": 0.034622840881347657,
"step": 48100
},
{
"epoch": 1.7233365511816654,
"grad_norm": Infinity,
"learning_rate": 6.917480067217276e-06,
"loss": 0.032550268173217774,
"step": 48200
},
{
"epoch": 1.7269119382173121,
"grad_norm": 3.7852137088775635,
"learning_rate": 6.828095391326112e-06,
"loss": 0.03138866424560547,
"step": 48300
},
{
"epoch": 1.7304873252529587,
"grad_norm": 0.1753600835800171,
"learning_rate": 6.738710715434946e-06,
"loss": 0.03186697244644165,
"step": 48400
},
{
"epoch": 1.7340627122886052,
"grad_norm": 6.609533786773682,
"learning_rate": 6.649326039543781e-06,
"loss": 0.031199581623077392,
"step": 48500
},
{
"epoch": 1.737638099324252,
"grad_norm": 1.9689279794692993,
"learning_rate": 6.559941363652617e-06,
"loss": 0.03473323583602905,
"step": 48600
},
{
"epoch": 1.7412134863598985,
"grad_norm": 1.0971671342849731,
"learning_rate": 6.47055668776145e-06,
"loss": 0.031001167297363283,
"step": 48700
},
{
"epoch": 1.744788873395545,
"grad_norm": 0.5941652655601501,
"learning_rate": 6.381172011870286e-06,
"loss": 0.03148573875427246,
"step": 48800
},
{
"epoch": 1.7483642604311918,
"grad_norm": 1.0142033100128174,
"learning_rate": 6.29178733597912e-06,
"loss": 0.03321949720382691,
"step": 48900
},
{
"epoch": 1.7519396474668383,
"grad_norm": 1.1377204656600952,
"learning_rate": 6.202402660087954e-06,
"loss": 0.03343360424041748,
"step": 49000
},
{
"epoch": 1.7555150345024848,
"grad_norm": 0.5484851002693176,
"learning_rate": 6.113017984196789e-06,
"loss": 0.03009215831756592,
"step": 49100
},
{
"epoch": 1.7590904215381316,
"grad_norm": 0.4845998287200928,
"learning_rate": 6.023633308305624e-06,
"loss": 0.03416025161743164,
"step": 49200
},
{
"epoch": 1.762665808573778,
"grad_norm": 2.4999592304229736,
"learning_rate": 5.934248632414459e-06,
"loss": 0.03311382532119751,
"step": 49300
},
{
"epoch": 1.7662411956094246,
"grad_norm": 0.8577232956886292,
"learning_rate": 5.844863956523293e-06,
"loss": 0.030206308364868165,
"step": 49400
},
{
"epoch": 1.7698165826450714,
"grad_norm": 0.90534508228302,
"learning_rate": 5.755479280632128e-06,
"loss": 0.03304917335510254,
"step": 49500
},
{
"epoch": 1.7733919696807179,
"grad_norm": 0.4702795445919037,
"learning_rate": 5.666094604740963e-06,
"loss": 0.03289535760879517,
"step": 49600
},
{
"epoch": 1.7769673567163644,
"grad_norm": 0.3340344727039337,
"learning_rate": 5.5767099288497984e-06,
"loss": 0.03143750667572021,
"step": 49700
},
{
"epoch": 1.7805427437520112,
"grad_norm": 0.8033680319786072,
"learning_rate": 5.4873252529586334e-06,
"loss": 0.03799154043197632,
"step": 49800
},
{
"epoch": 1.784118130787658,
"grad_norm": 0.3498431444168091,
"learning_rate": 5.3979405770674684e-06,
"loss": 0.032227945327758786,
"step": 49900
},
{
"epoch": 1.7876935178233042,
"grad_norm": 0.5044463276863098,
"learning_rate": 5.308555901176303e-06,
"loss": 0.03224561214447021,
"step": 50000
},
{
"epoch": 1.7876935178233042,
"eval_accuracy": 0.9888107687606216,
"eval_f1": 0.9007455797770362,
"eval_loss": 0.04468328878283501,
"eval_precision": 0.890014593623709,
"eval_recall": 0.9117384929419544,
"eval_runtime": 27.446,
"eval_samples_per_second": 819.793,
"eval_steps_per_second": 22.772,
"step": 50000
},
{
"epoch": 1.791268904858951,
"grad_norm": 0.2692296504974365,
"learning_rate": 5.219171225285138e-06,
"loss": 0.03332348108291626,
"step": 50100
},
{
"epoch": 1.7948442918945977,
"grad_norm": 0.29106396436691284,
"learning_rate": 5.129786549393973e-06,
"loss": 0.032147047519683836,
"step": 50200
},
{
"epoch": 1.7984196789302442,
"grad_norm": 0.20724542438983917,
"learning_rate": 5.040401873502807e-06,
"loss": 0.02886124849319458,
"step": 50300
},
{
"epoch": 1.8019950659658908,
"grad_norm": 0.7092130184173584,
"learning_rate": 4.951017197611642e-06,
"loss": 0.033159823417663575,
"step": 50400
},
{
"epoch": 1.8055704530015375,
"grad_norm": 0.432674765586853,
"learning_rate": 4.861632521720477e-06,
"loss": 0.03299700260162353,
"step": 50500
},
{
"epoch": 1.809145840037184,
"grad_norm": 0.9785314798355103,
"learning_rate": 4.772247845829311e-06,
"loss": 0.03019791841506958,
"step": 50600
},
{
"epoch": 1.8127212270728306,
"grad_norm": 0.5002002120018005,
"learning_rate": 4.682863169938146e-06,
"loss": 0.035624983310699465,
"step": 50700
},
{
"epoch": 1.8162966141084773,
"grad_norm": 0.765285313129425,
"learning_rate": 4.593478494046981e-06,
"loss": 0.02971407175064087,
"step": 50800
},
{
"epoch": 1.8198720011441238,
"grad_norm": 0.534965991973877,
"learning_rate": 4.504093818155815e-06,
"loss": 0.03354018688201904,
"step": 50900
},
{
"epoch": 1.8234473881797704,
"grad_norm": 0.7223150134086609,
"learning_rate": 4.41470914226465e-06,
"loss": 0.02953230619430542,
"step": 51000
},
{
"epoch": 1.8270227752154171,
"grad_norm": 0.38850611448287964,
"learning_rate": 4.325324466373485e-06,
"loss": 0.030534558296203614,
"step": 51100
},
{
"epoch": 1.8305981622510636,
"grad_norm": 0.36119019985198975,
"learning_rate": 4.23593979048232e-06,
"loss": 0.030811927318572997,
"step": 51200
},
{
"epoch": 1.8341735492867102,
"grad_norm": 0.4112676978111267,
"learning_rate": 4.146555114591154e-06,
"loss": 0.036168689727783206,
"step": 51300
},
{
"epoch": 1.837748936322357,
"grad_norm": 0.38200223445892334,
"learning_rate": 4.057170438699989e-06,
"loss": 0.03023934841156006,
"step": 51400
},
{
"epoch": 1.8413243233580037,
"grad_norm": 0.22987698018550873,
"learning_rate": 3.967785762808824e-06,
"loss": 0.03280112981796265,
"step": 51500
},
{
"epoch": 1.84489971039365,
"grad_norm": 0.5126951336860657,
"learning_rate": 3.8784010869176585e-06,
"loss": 0.032214133739471434,
"step": 51600
},
{
"epoch": 1.8484750974292967,
"grad_norm": 0.3394624888896942,
"learning_rate": 3.7890164110264935e-06,
"loss": 0.0288789963722229,
"step": 51700
},
{
"epoch": 1.8520504844649435,
"grad_norm": 0.8338372111320496,
"learning_rate": 3.699631735135328e-06,
"loss": 0.03252574443817138,
"step": 51800
},
{
"epoch": 1.8556258715005898,
"grad_norm": 0.2515293061733246,
"learning_rate": 3.6102470592441635e-06,
"loss": 0.029772815704345704,
"step": 51900
},
{
"epoch": 1.8592012585362365,
"grad_norm": 0.5206916332244873,
"learning_rate": 3.5208623833529985e-06,
"loss": 0.030335335731506347,
"step": 52000
},
{
"epoch": 1.8627766455718833,
"grad_norm": 2.3129968643188477,
"learning_rate": 3.431477707461833e-06,
"loss": 0.032417423725128174,
"step": 52100
},
{
"epoch": 1.8663520326075298,
"grad_norm": 1.627025842666626,
"learning_rate": 3.342093031570668e-06,
"loss": 0.03170029640197754,
"step": 52200
},
{
"epoch": 1.8699274196431763,
"grad_norm": 1.4574371576309204,
"learning_rate": 3.2527083556795027e-06,
"loss": 0.03141381978988647,
"step": 52300
},
{
"epoch": 1.873502806678823,
"grad_norm": 0.3863239288330078,
"learning_rate": 3.1633236797883373e-06,
"loss": 0.031075146198272705,
"step": 52400
},
{
"epoch": 1.8770781937144696,
"grad_norm": 0.4181801676750183,
"learning_rate": 3.0739390038971723e-06,
"loss": 0.031000993251800536,
"step": 52500
},
{
"epoch": 1.8770781937144696,
"eval_accuracy": 0.9888013010709535,
"eval_f1": 0.9016717087789566,
"eval_loss": 0.04389448091387749,
"eval_precision": 0.8910285200988098,
"eval_recall": 0.9125722335623724,
"eval_runtime": 27.8666,
"eval_samples_per_second": 807.418,
"eval_steps_per_second": 22.428,
"step": 52500
},
{
"epoch": 1.8806535807501161,
"grad_norm": 0.2707064151763916,
"learning_rate": 2.984554328006007e-06,
"loss": 0.03185615539550781,
"step": 52600
},
{
"epoch": 1.8842289677857629,
"grad_norm": 0.5553069710731506,
"learning_rate": 2.895169652114842e-06,
"loss": 0.03002817392349243,
"step": 52700
},
{
"epoch": 1.8878043548214094,
"grad_norm": 0.3491911292076111,
"learning_rate": 2.8057849762236764e-06,
"loss": 0.028789632320404053,
"step": 52800
},
{
"epoch": 1.891379741857056,
"grad_norm": 0.25187739729881287,
"learning_rate": 2.716400300332511e-06,
"loss": 0.030605175495147706,
"step": 52900
},
{
"epoch": 1.8949551288927027,
"grad_norm": 0.9672222137451172,
"learning_rate": 2.627015624441346e-06,
"loss": 0.026704788208007812,
"step": 53000
},
{
"epoch": 1.8985305159283492,
"grad_norm": 0.20565390586853027,
"learning_rate": 2.5376309485501806e-06,
"loss": 0.03059121608734131,
"step": 53100
},
{
"epoch": 1.9021059029639957,
"grad_norm": 0.28167805075645447,
"learning_rate": 2.448246272659015e-06,
"loss": 0.03177599668502808,
"step": 53200
},
{
"epoch": 1.9056812899996425,
"grad_norm": 0.24386221170425415,
"learning_rate": 2.35886159676785e-06,
"loss": 0.029768753051757812,
"step": 53300
},
{
"epoch": 1.9092566770352892,
"grad_norm": 3.4795925617218018,
"learning_rate": 2.2694769208766848e-06,
"loss": 0.030632736682891844,
"step": 53400
},
{
"epoch": 1.9128320640709355,
"grad_norm": 0.28710371255874634,
"learning_rate": 2.1800922449855198e-06,
"loss": 0.03532270431518555,
"step": 53500
},
{
"epoch": 1.9164074511065823,
"grad_norm": 1.0009117126464844,
"learning_rate": 2.090707569094355e-06,
"loss": 0.030157883167266846,
"step": 53600
},
{
"epoch": 1.919982838142229,
"grad_norm": 0.8986654877662659,
"learning_rate": 2.0013228932031894e-06,
"loss": 0.02968831777572632,
"step": 53700
},
{
"epoch": 1.9235582251778756,
"grad_norm": 0.4408089518547058,
"learning_rate": 1.9119382173120244e-06,
"loss": 0.031650230884552,
"step": 53800
},
{
"epoch": 1.927133612213522,
"grad_norm": 0.44061407446861267,
"learning_rate": 1.822553541420859e-06,
"loss": 0.03314239501953125,
"step": 53900
},
{
"epoch": 1.9307089992491688,
"grad_norm": 0.31529247760772705,
"learning_rate": 1.7331688655296938e-06,
"loss": 0.028174445629119874,
"step": 54000
},
{
"epoch": 1.9342843862848154,
"grad_norm": 0.46949172019958496,
"learning_rate": 1.6437841896385283e-06,
"loss": 0.03205679178237915,
"step": 54100
},
{
"epoch": 1.9378597733204619,
"grad_norm": 0.42985737323760986,
"learning_rate": 1.5543995137473631e-06,
"loss": 0.03423054218292236,
"step": 54200
},
{
"epoch": 1.9414351603561086,
"grad_norm": 0.3582230806350708,
"learning_rate": 1.465014837856198e-06,
"loss": 0.036082537174224855,
"step": 54300
},
{
"epoch": 1.9450105473917552,
"grad_norm": 0.2743465304374695,
"learning_rate": 1.375630161965033e-06,
"loss": 0.03133800745010376,
"step": 54400
},
{
"epoch": 1.9485859344274017,
"grad_norm": 0.3252977728843689,
"learning_rate": 1.2862454860738675e-06,
"loss": 0.029351208209991455,
"step": 54500
},
{
"epoch": 1.9521613214630484,
"grad_norm": 0.7166300415992737,
"learning_rate": 1.1968608101827023e-06,
"loss": 0.03286364078521729,
"step": 54600
},
{
"epoch": 1.955736708498695,
"grad_norm": 0.4002815783023834,
"learning_rate": 1.1074761342915371e-06,
"loss": 0.03330163955688477,
"step": 54700
},
{
"epoch": 1.9593120955343415,
"grad_norm": 0.6636976003646851,
"learning_rate": 1.018091458400372e-06,
"loss": 0.03203016996383667,
"step": 54800
},
{
"epoch": 1.9628874825699882,
"grad_norm": 0.9583289623260498,
"learning_rate": 9.287067825092066e-07,
"loss": 0.03129979610443115,
"step": 54900
},
{
"epoch": 1.9664628696056348,
"grad_norm": 0.31978148221969604,
"learning_rate": 8.393221066180415e-07,
"loss": 0.029429452419281008,
"step": 55000
},
{
"epoch": 1.9664628696056348,
"eval_accuracy": 0.9891878983990663,
"eval_f1": 0.9045753492836575,
"eval_loss": 0.04267999157309532,
"eval_precision": 0.8949478748997595,
"eval_recall": 0.9144122128626053,
"eval_runtime": 27.5433,
"eval_samples_per_second": 816.897,
"eval_steps_per_second": 22.692,
"step": 55000
},
{
"epoch": 1.9700382566412813,
"grad_norm": 2.8054332733154297,
"learning_rate": 7.499374307268763e-07,
"loss": 0.03312858819961548,
"step": 55100
},
{
"epoch": 1.973613643676928,
"grad_norm": 0.5224851369857788,
"learning_rate": 6.60552754835711e-07,
"loss": 0.028790268898010254,
"step": 55200
},
{
"epoch": 1.9771890307125748,
"grad_norm": 0.26614582538604736,
"learning_rate": 5.711680789445458e-07,
"loss": 0.028711328506469725,
"step": 55300
},
{
"epoch": 1.980764417748221,
"grad_norm": 0.7065221667289734,
"learning_rate": 4.817834030533806e-07,
"loss": 0.03409520626068115,
"step": 55400
},
{
"epoch": 1.9843398047838678,
"grad_norm": 0.5520646572113037,
"learning_rate": 3.923987271622153e-07,
"loss": 0.030617287158966066,
"step": 55500
},
{
"epoch": 1.9879151918195146,
"grad_norm": 0.8152151703834534,
"learning_rate": 3.030140512710501e-07,
"loss": 0.034760825634002686,
"step": 55600
},
{
"epoch": 1.9914905788551611,
"grad_norm": 0.7719851136207581,
"learning_rate": 2.136293753798849e-07,
"loss": 0.033238520622253416,
"step": 55700
},
{
"epoch": 1.9950659658908076,
"grad_norm": 0.3627885580062866,
"learning_rate": 1.2424469948871967e-07,
"loss": 0.029695370197296143,
"step": 55800
},
{
"epoch": 1.9986413529264544,
"grad_norm": 1.9493422508239746,
"learning_rate": 3.4860023597554434e-08,
"loss": 0.032883105278015134,
"step": 55900
},
{
"epoch": 2.0,
"step": 55938,
"total_flos": 1.889848580814228e+18,
"train_loss": 0.058781605476671404,
"train_runtime": 18337.3113,
"train_samples_per_second": 439.268,
"train_steps_per_second": 3.051
}
],
"logging_steps": 100,
"max_steps": 55938,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 2500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.889848580814228e+18,
"train_batch_size": 72,
"trial_name": null,
"trial_params": null
}