QwenTest3 / trainer_state.json
MatthiasPicard's picture
Upload 14 files
d87ad8a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9614965492190337,
"eval_steps": 300,
"global_step": 10800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0018162005085361425,
"grad_norm": 51.7724723815918,
"learning_rate": 4.99545949872866e-05,
"loss": 4.15,
"step": 10
},
{
"epoch": 0.003632401017072285,
"grad_norm": 39.101844787597656,
"learning_rate": 4.9909189974573195e-05,
"loss": 2.9203,
"step": 20
},
{
"epoch": 0.005448601525608427,
"grad_norm": 59.884159088134766,
"learning_rate": 4.9863784961859795e-05,
"loss": 2.3234,
"step": 30
},
{
"epoch": 0.00726480203414457,
"grad_norm": 30.44273567199707,
"learning_rate": 4.981837994914639e-05,
"loss": 2.2648,
"step": 40
},
{
"epoch": 0.009081002542680712,
"grad_norm": 31.179187774658203,
"learning_rate": 4.977297493643299e-05,
"loss": 1.9688,
"step": 50
},
{
"epoch": 0.010897203051216855,
"grad_norm": 24.85585594177246,
"learning_rate": 4.972756992371958e-05,
"loss": 1.8813,
"step": 60
},
{
"epoch": 0.012713403559752997,
"grad_norm": 48.86698532104492,
"learning_rate": 4.968216491100618e-05,
"loss": 1.9812,
"step": 70
},
{
"epoch": 0.01452960406828914,
"grad_norm": 51.080074310302734,
"learning_rate": 4.963675989829277e-05,
"loss": 1.8727,
"step": 80
},
{
"epoch": 0.01634580457682528,
"grad_norm": 26.04038429260254,
"learning_rate": 4.959135488557937e-05,
"loss": 1.6398,
"step": 90
},
{
"epoch": 0.018162005085361425,
"grad_norm": 24.692808151245117,
"learning_rate": 4.9545949872865965e-05,
"loss": 1.4898,
"step": 100
},
{
"epoch": 0.019978205593897565,
"grad_norm": 20.653032302856445,
"learning_rate": 4.9500544860152565e-05,
"loss": 1.177,
"step": 110
},
{
"epoch": 0.02179440610243371,
"grad_norm": 37.319427490234375,
"learning_rate": 4.945513984743916e-05,
"loss": 1.4352,
"step": 120
},
{
"epoch": 0.02361060661096985,
"grad_norm": 22.78856086730957,
"learning_rate": 4.940973483472576e-05,
"loss": 1.4187,
"step": 130
},
{
"epoch": 0.025426807119505995,
"grad_norm": 34.050315856933594,
"learning_rate": 4.936432982201235e-05,
"loss": 1.2316,
"step": 140
},
{
"epoch": 0.027243007628042135,
"grad_norm": 18.459930419921875,
"learning_rate": 4.931892480929895e-05,
"loss": 1.2225,
"step": 150
},
{
"epoch": 0.02905920813657828,
"grad_norm": 18.474990844726562,
"learning_rate": 4.927351979658554e-05,
"loss": 1.1586,
"step": 160
},
{
"epoch": 0.03087540864511442,
"grad_norm": 31.727245330810547,
"learning_rate": 4.922811478387214e-05,
"loss": 1.1371,
"step": 170
},
{
"epoch": 0.03269160915365056,
"grad_norm": 33.16598129272461,
"learning_rate": 4.9182709771158735e-05,
"loss": 0.8711,
"step": 180
},
{
"epoch": 0.03450780966218671,
"grad_norm": 35.099700927734375,
"learning_rate": 4.9137304758445335e-05,
"loss": 1.1367,
"step": 190
},
{
"epoch": 0.03632401017072285,
"grad_norm": 20.833833694458008,
"learning_rate": 4.909189974573193e-05,
"loss": 1.0652,
"step": 200
},
{
"epoch": 0.03814021067925899,
"grad_norm": 29.985502243041992,
"learning_rate": 4.904649473301853e-05,
"loss": 1.0373,
"step": 210
},
{
"epoch": 0.03995641118779513,
"grad_norm": 18.238357543945312,
"learning_rate": 4.900108972030513e-05,
"loss": 0.8555,
"step": 220
},
{
"epoch": 0.04177261169633127,
"grad_norm": 23.067848205566406,
"learning_rate": 4.895568470759172e-05,
"loss": 0.9469,
"step": 230
},
{
"epoch": 0.04358881220486742,
"grad_norm": 32.689029693603516,
"learning_rate": 4.891027969487832e-05,
"loss": 0.9379,
"step": 240
},
{
"epoch": 0.04540501271340356,
"grad_norm": 25.626976013183594,
"learning_rate": 4.886487468216491e-05,
"loss": 0.8861,
"step": 250
},
{
"epoch": 0.0472212132219397,
"grad_norm": 13.55506706237793,
"learning_rate": 4.881946966945151e-05,
"loss": 0.8361,
"step": 260
},
{
"epoch": 0.04903741373047584,
"grad_norm": 22.14944839477539,
"learning_rate": 4.8774064656738104e-05,
"loss": 0.7471,
"step": 270
},
{
"epoch": 0.05085361423901199,
"grad_norm": 20.66185760498047,
"learning_rate": 4.8728659644024704e-05,
"loss": 0.815,
"step": 280
},
{
"epoch": 0.05266981474754813,
"grad_norm": 22.343624114990234,
"learning_rate": 4.86832546313113e-05,
"loss": 0.6479,
"step": 290
},
{
"epoch": 0.05448601525608427,
"grad_norm": 23.354530334472656,
"learning_rate": 4.86378496185979e-05,
"loss": 0.6668,
"step": 300
},
{
"epoch": 0.05448601525608427,
"eval_accuracy": 0.6562756357670222,
"eval_f1": 0.6498431091967438,
"eval_loss": 1.0104337930679321,
"eval_precision": 0.6553569560524448,
"eval_recall": 0.6624616383794661,
"eval_runtime": 12.1348,
"eval_samples_per_second": 100.455,
"eval_steps_per_second": 6.345,
"step": 300
},
{
"epoch": 0.05630221576462041,
"grad_norm": 19.208993911743164,
"learning_rate": 4.859244460588449e-05,
"loss": 0.9148,
"step": 310
},
{
"epoch": 0.05811841627315656,
"grad_norm": 41.95314025878906,
"learning_rate": 4.854703959317109e-05,
"loss": 0.7869,
"step": 320
},
{
"epoch": 0.0599346167816927,
"grad_norm": 26.48455047607422,
"learning_rate": 4.850163458045768e-05,
"loss": 0.7795,
"step": 330
},
{
"epoch": 0.06175081729022884,
"grad_norm": 17.859508514404297,
"learning_rate": 4.845622956774428e-05,
"loss": 0.8215,
"step": 340
},
{
"epoch": 0.06356701779876499,
"grad_norm": 30.228845596313477,
"learning_rate": 4.8410824555030874e-05,
"loss": 0.849,
"step": 350
},
{
"epoch": 0.06538321830730112,
"grad_norm": 26.73940086364746,
"learning_rate": 4.8365419542317474e-05,
"loss": 0.7037,
"step": 360
},
{
"epoch": 0.06719941881583727,
"grad_norm": 28.508052825927734,
"learning_rate": 4.832001452960407e-05,
"loss": 0.9711,
"step": 370
},
{
"epoch": 0.06901561932437342,
"grad_norm": 26.7029972076416,
"learning_rate": 4.827460951689067e-05,
"loss": 0.8039,
"step": 380
},
{
"epoch": 0.07083181983290955,
"grad_norm": 20.99094009399414,
"learning_rate": 4.822920450417726e-05,
"loss": 0.74,
"step": 390
},
{
"epoch": 0.0726480203414457,
"grad_norm": 22.60647201538086,
"learning_rate": 4.818379949146386e-05,
"loss": 0.759,
"step": 400
},
{
"epoch": 0.07446422084998183,
"grad_norm": 24.6385555267334,
"learning_rate": 4.813839447875045e-05,
"loss": 0.9516,
"step": 410
},
{
"epoch": 0.07628042135851798,
"grad_norm": 19.862504959106445,
"learning_rate": 4.809298946603705e-05,
"loss": 0.6793,
"step": 420
},
{
"epoch": 0.07809662186705413,
"grad_norm": 20.13799476623535,
"learning_rate": 4.804758445332365e-05,
"loss": 0.6387,
"step": 430
},
{
"epoch": 0.07991282237559026,
"grad_norm": 11.443085670471191,
"learning_rate": 4.8002179440610244e-05,
"loss": 0.6844,
"step": 440
},
{
"epoch": 0.08172902288412641,
"grad_norm": 14.975491523742676,
"learning_rate": 4.7956774427896844e-05,
"loss": 0.6252,
"step": 450
},
{
"epoch": 0.08354522339266254,
"grad_norm": 34.0562858581543,
"learning_rate": 4.7911369415183437e-05,
"loss": 0.841,
"step": 460
},
{
"epoch": 0.08536142390119869,
"grad_norm": 14.882052421569824,
"learning_rate": 4.7865964402470036e-05,
"loss": 0.6924,
"step": 470
},
{
"epoch": 0.08717762440973484,
"grad_norm": 10.927328109741211,
"learning_rate": 4.782055938975663e-05,
"loss": 0.6586,
"step": 480
},
{
"epoch": 0.08899382491827097,
"grad_norm": 18.295116424560547,
"learning_rate": 4.777515437704323e-05,
"loss": 0.7256,
"step": 490
},
{
"epoch": 0.09081002542680712,
"grad_norm": 38.24443435668945,
"learning_rate": 4.772974936432982e-05,
"loss": 0.7914,
"step": 500
},
{
"epoch": 0.09262622593534327,
"grad_norm": 20.668012619018555,
"learning_rate": 4.768434435161642e-05,
"loss": 0.5446,
"step": 510
},
{
"epoch": 0.0944424264438794,
"grad_norm": 17.62775421142578,
"learning_rate": 4.7638939338903014e-05,
"loss": 0.7426,
"step": 520
},
{
"epoch": 0.09625862695241555,
"grad_norm": 23.430341720581055,
"learning_rate": 4.7593534326189614e-05,
"loss": 0.7063,
"step": 530
},
{
"epoch": 0.09807482746095168,
"grad_norm": 30.422704696655273,
"learning_rate": 4.7548129313476207e-05,
"loss": 0.6363,
"step": 540
},
{
"epoch": 0.09989102796948783,
"grad_norm": 20.155187606811523,
"learning_rate": 4.7502724300762806e-05,
"loss": 0.7201,
"step": 550
},
{
"epoch": 0.10170722847802398,
"grad_norm": 28.10911750793457,
"learning_rate": 4.74573192880494e-05,
"loss": 0.7906,
"step": 560
},
{
"epoch": 0.10352342898656011,
"grad_norm": 25.283676147460938,
"learning_rate": 4.7411914275336e-05,
"loss": 0.5832,
"step": 570
},
{
"epoch": 0.10533962949509626,
"grad_norm": 9.627545356750488,
"learning_rate": 4.736650926262259e-05,
"loss": 0.5875,
"step": 580
},
{
"epoch": 0.10715583000363241,
"grad_norm": 12.515082359313965,
"learning_rate": 4.732110424990919e-05,
"loss": 0.7885,
"step": 590
},
{
"epoch": 0.10897203051216854,
"grad_norm": 16.580331802368164,
"learning_rate": 4.7275699237195784e-05,
"loss": 0.7129,
"step": 600
},
{
"epoch": 0.10897203051216854,
"eval_accuracy": 0.7235438884331419,
"eval_f1": 0.7205989414762296,
"eval_loss": 0.7799906134605408,
"eval_precision": 0.730319610475981,
"eval_recall": 0.7152200658078283,
"eval_runtime": 12.0682,
"eval_samples_per_second": 101.009,
"eval_steps_per_second": 6.38,
"step": 600
},
{
"epoch": 0.11078823102070469,
"grad_norm": 18.379596710205078,
"learning_rate": 4.7230294224482384e-05,
"loss": 0.5647,
"step": 610
},
{
"epoch": 0.11260443152924082,
"grad_norm": 23.94647789001465,
"learning_rate": 4.7184889211768977e-05,
"loss": 0.7223,
"step": 620
},
{
"epoch": 0.11442063203777697,
"grad_norm": 16.6752986907959,
"learning_rate": 4.7139484199055576e-05,
"loss": 0.5555,
"step": 630
},
{
"epoch": 0.11623683254631312,
"grad_norm": 19.58445930480957,
"learning_rate": 4.7094079186342176e-05,
"loss": 0.5701,
"step": 640
},
{
"epoch": 0.11805303305484925,
"grad_norm": 9.683353424072266,
"learning_rate": 4.704867417362877e-05,
"loss": 0.5934,
"step": 650
},
{
"epoch": 0.1198692335633854,
"grad_norm": 16.68168830871582,
"learning_rate": 4.700326916091537e-05,
"loss": 0.6471,
"step": 660
},
{
"epoch": 0.12168543407192153,
"grad_norm": 16.534196853637695,
"learning_rate": 4.695786414820196e-05,
"loss": 0.7002,
"step": 670
},
{
"epoch": 0.12350163458045768,
"grad_norm": 12.310294151306152,
"learning_rate": 4.691245913548856e-05,
"loss": 0.6939,
"step": 680
},
{
"epoch": 0.12531783508899383,
"grad_norm": 24.0400333404541,
"learning_rate": 4.6867054122775154e-05,
"loss": 0.6732,
"step": 690
},
{
"epoch": 0.12713403559752998,
"grad_norm": 22.213275909423828,
"learning_rate": 4.682164911006175e-05,
"loss": 0.4664,
"step": 700
},
{
"epoch": 0.1289502361060661,
"grad_norm": 41.8505973815918,
"learning_rate": 4.6776244097348346e-05,
"loss": 0.5713,
"step": 710
},
{
"epoch": 0.13076643661460224,
"grad_norm": 39.898765563964844,
"learning_rate": 4.6730839084634946e-05,
"loss": 0.8389,
"step": 720
},
{
"epoch": 0.1325826371231384,
"grad_norm": 74.52597045898438,
"learning_rate": 4.668543407192154e-05,
"loss": 0.7645,
"step": 730
},
{
"epoch": 0.13439883763167454,
"grad_norm": 15.932312965393066,
"learning_rate": 4.664002905920814e-05,
"loss": 0.8391,
"step": 740
},
{
"epoch": 0.1362150381402107,
"grad_norm": 18.114957809448242,
"learning_rate": 4.659462404649473e-05,
"loss": 0.7943,
"step": 750
},
{
"epoch": 0.13803123864874683,
"grad_norm": 16.521848678588867,
"learning_rate": 4.654921903378133e-05,
"loss": 0.5992,
"step": 760
},
{
"epoch": 0.13984743915728295,
"grad_norm": 27.91718101501465,
"learning_rate": 4.6503814021067924e-05,
"loss": 0.7793,
"step": 770
},
{
"epoch": 0.1416636396658191,
"grad_norm": 19.695844650268555,
"learning_rate": 4.645840900835452e-05,
"loss": 0.7436,
"step": 780
},
{
"epoch": 0.14347984017435525,
"grad_norm": 16.544538497924805,
"learning_rate": 4.6413003995641116e-05,
"loss": 0.6805,
"step": 790
},
{
"epoch": 0.1452960406828914,
"grad_norm": 18.75685691833496,
"learning_rate": 4.6367598982927716e-05,
"loss": 0.4687,
"step": 800
},
{
"epoch": 0.14711224119142755,
"grad_norm": 21.897932052612305,
"learning_rate": 4.632219397021431e-05,
"loss": 0.7238,
"step": 810
},
{
"epoch": 0.14892844169996367,
"grad_norm": 31.82994270324707,
"learning_rate": 4.627678895750091e-05,
"loss": 0.7512,
"step": 820
},
{
"epoch": 0.1507446422084998,
"grad_norm": 15.97396183013916,
"learning_rate": 4.62313839447875e-05,
"loss": 0.6934,
"step": 830
},
{
"epoch": 0.15256084271703596,
"grad_norm": 20.96219253540039,
"learning_rate": 4.61859789320741e-05,
"loss": 0.5324,
"step": 840
},
{
"epoch": 0.1543770432255721,
"grad_norm": 19.114473342895508,
"learning_rate": 4.61405739193607e-05,
"loss": 0.508,
"step": 850
},
{
"epoch": 0.15619324373410826,
"grad_norm": 9.712385177612305,
"learning_rate": 4.609516890664729e-05,
"loss": 0.5295,
"step": 860
},
{
"epoch": 0.15800944424264438,
"grad_norm": 13.762930870056152,
"learning_rate": 4.604976389393389e-05,
"loss": 0.7455,
"step": 870
},
{
"epoch": 0.15982564475118052,
"grad_norm": 27.86884307861328,
"learning_rate": 4.6004358881220486e-05,
"loss": 0.5587,
"step": 880
},
{
"epoch": 0.16164184525971667,
"grad_norm": 26.105749130249023,
"learning_rate": 4.5958953868507085e-05,
"loss": 0.5981,
"step": 890
},
{
"epoch": 0.16345804576825282,
"grad_norm": 26.51416015625,
"learning_rate": 4.591354885579368e-05,
"loss": 0.5396,
"step": 900
},
{
"epoch": 0.16345804576825282,
"eval_accuracy": 0.7424118129614438,
"eval_f1": 0.7311388146519514,
"eval_loss": 0.7538678050041199,
"eval_precision": 0.7369680243126087,
"eval_recall": 0.7387445231992035,
"eval_runtime": 12.1028,
"eval_samples_per_second": 100.721,
"eval_steps_per_second": 6.362,
"step": 900
},
{
"epoch": 0.16527424627678897,
"grad_norm": 22.03255844116211,
"learning_rate": 4.586814384308028e-05,
"loss": 0.6057,
"step": 910
},
{
"epoch": 0.1670904467853251,
"grad_norm": 22.521772384643555,
"learning_rate": 4.582273883036687e-05,
"loss": 0.7402,
"step": 920
},
{
"epoch": 0.16890664729386123,
"grad_norm": 14.045843124389648,
"learning_rate": 4.577733381765347e-05,
"loss": 0.5914,
"step": 930
},
{
"epoch": 0.17072284780239738,
"grad_norm": 11.75537395477295,
"learning_rate": 4.573192880494006e-05,
"loss": 0.5954,
"step": 940
},
{
"epoch": 0.17253904831093353,
"grad_norm": 25.500017166137695,
"learning_rate": 4.568652379222666e-05,
"loss": 0.6219,
"step": 950
},
{
"epoch": 0.17435524881946968,
"grad_norm": 15.509596824645996,
"learning_rate": 4.5641118779513256e-05,
"loss": 0.5603,
"step": 960
},
{
"epoch": 0.17617144932800582,
"grad_norm": 20.986408233642578,
"learning_rate": 4.5595713766799855e-05,
"loss": 0.6184,
"step": 970
},
{
"epoch": 0.17798764983654194,
"grad_norm": 12.335230827331543,
"learning_rate": 4.555030875408645e-05,
"loss": 0.7159,
"step": 980
},
{
"epoch": 0.1798038503450781,
"grad_norm": 14.523093223571777,
"learning_rate": 4.550490374137305e-05,
"loss": 0.5396,
"step": 990
},
{
"epoch": 0.18162005085361424,
"grad_norm": 18.180063247680664,
"learning_rate": 4.545949872865964e-05,
"loss": 0.4772,
"step": 1000
},
{
"epoch": 0.1834362513621504,
"grad_norm": 11.765399932861328,
"learning_rate": 4.541409371594624e-05,
"loss": 0.5037,
"step": 1010
},
{
"epoch": 0.18525245187068654,
"grad_norm": 14.669034957885742,
"learning_rate": 4.536868870323284e-05,
"loss": 0.7092,
"step": 1020
},
{
"epoch": 0.18706865237922266,
"grad_norm": 22.75113296508789,
"learning_rate": 4.532328369051943e-05,
"loss": 0.6244,
"step": 1030
},
{
"epoch": 0.1888848528877588,
"grad_norm": 15.64301872253418,
"learning_rate": 4.527787867780603e-05,
"loss": 0.6145,
"step": 1040
},
{
"epoch": 0.19070105339629495,
"grad_norm": 25.85451316833496,
"learning_rate": 4.5232473665092625e-05,
"loss": 0.6305,
"step": 1050
},
{
"epoch": 0.1925172539048311,
"grad_norm": 11.775480270385742,
"learning_rate": 4.5187068652379225e-05,
"loss": 0.5969,
"step": 1060
},
{
"epoch": 0.19433345441336725,
"grad_norm": 15.521839141845703,
"learning_rate": 4.514166363966582e-05,
"loss": 0.587,
"step": 1070
},
{
"epoch": 0.19614965492190337,
"grad_norm": 13.683486938476562,
"learning_rate": 4.509625862695242e-05,
"loss": 0.5607,
"step": 1080
},
{
"epoch": 0.1979658554304395,
"grad_norm": 20.192411422729492,
"learning_rate": 4.505085361423901e-05,
"loss": 0.6512,
"step": 1090
},
{
"epoch": 0.19978205593897566,
"grad_norm": 10.552505493164062,
"learning_rate": 4.500544860152561e-05,
"loss": 0.4988,
"step": 1100
},
{
"epoch": 0.2015982564475118,
"grad_norm": 20.700984954833984,
"learning_rate": 4.49600435888122e-05,
"loss": 0.6752,
"step": 1110
},
{
"epoch": 0.20341445695604796,
"grad_norm": 11.448249816894531,
"learning_rate": 4.49146385760988e-05,
"loss": 0.6486,
"step": 1120
},
{
"epoch": 0.20523065746458408,
"grad_norm": 12.439767837524414,
"learning_rate": 4.48692335633854e-05,
"loss": 0.6803,
"step": 1130
},
{
"epoch": 0.20704685797312022,
"grad_norm": 21.695728302001953,
"learning_rate": 4.4823828550671995e-05,
"loss": 0.6166,
"step": 1140
},
{
"epoch": 0.20886305848165637,
"grad_norm": 19.776832580566406,
"learning_rate": 4.4778423537958595e-05,
"loss": 0.5869,
"step": 1150
},
{
"epoch": 0.21067925899019252,
"grad_norm": 19.56122589111328,
"learning_rate": 4.473301852524519e-05,
"loss": 0.6789,
"step": 1160
},
{
"epoch": 0.21249545949872867,
"grad_norm": 20.19476318359375,
"learning_rate": 4.468761351253179e-05,
"loss": 0.6135,
"step": 1170
},
{
"epoch": 0.21431166000726481,
"grad_norm": 15.969949722290039,
"learning_rate": 4.464220849981838e-05,
"loss": 0.5218,
"step": 1180
},
{
"epoch": 0.21612786051580093,
"grad_norm": 13.37980842590332,
"learning_rate": 4.459680348710498e-05,
"loss": 0.5176,
"step": 1190
},
{
"epoch": 0.21794406102433708,
"grad_norm": 7.233293056488037,
"learning_rate": 4.455139847439157e-05,
"loss": 0.3802,
"step": 1200
},
{
"epoch": 0.21794406102433708,
"eval_accuracy": 0.7202625102543068,
"eval_f1": 0.722200966558384,
"eval_loss": 0.8283492922782898,
"eval_precision": 0.7215506981816255,
"eval_recall": 0.7526848662720801,
"eval_runtime": 12.054,
"eval_samples_per_second": 101.128,
"eval_steps_per_second": 6.388,
"step": 1200
},
{
"epoch": 0.21976026153287323,
"grad_norm": 14.58340835571289,
"learning_rate": 4.450599346167817e-05,
"loss": 0.7496,
"step": 1210
},
{
"epoch": 0.22157646204140938,
"grad_norm": 16.27931785583496,
"learning_rate": 4.4460588448964765e-05,
"loss": 0.6172,
"step": 1220
},
{
"epoch": 0.22339266254994553,
"grad_norm": 19.69707679748535,
"learning_rate": 4.4415183436251365e-05,
"loss": 0.6921,
"step": 1230
},
{
"epoch": 0.22520886305848165,
"grad_norm": 13.30395793914795,
"learning_rate": 4.4369778423537964e-05,
"loss": 0.6034,
"step": 1240
},
{
"epoch": 0.2270250635670178,
"grad_norm": 13.300093650817871,
"learning_rate": 4.432437341082456e-05,
"loss": 0.503,
"step": 1250
},
{
"epoch": 0.22884126407555394,
"grad_norm": 14.82442855834961,
"learning_rate": 4.427896839811116e-05,
"loss": 0.7216,
"step": 1260
},
{
"epoch": 0.2306574645840901,
"grad_norm": 29.971027374267578,
"learning_rate": 4.423356338539775e-05,
"loss": 0.6413,
"step": 1270
},
{
"epoch": 0.23247366509262624,
"grad_norm": 24.70488166809082,
"learning_rate": 4.418815837268435e-05,
"loss": 0.5686,
"step": 1280
},
{
"epoch": 0.23428986560116236,
"grad_norm": 18.32679557800293,
"learning_rate": 4.414275335997094e-05,
"loss": 0.5744,
"step": 1290
},
{
"epoch": 0.2361060661096985,
"grad_norm": 16.468469619750977,
"learning_rate": 4.409734834725754e-05,
"loss": 0.5514,
"step": 1300
},
{
"epoch": 0.23792226661823465,
"grad_norm": 14.710607528686523,
"learning_rate": 4.4051943334544135e-05,
"loss": 0.5871,
"step": 1310
},
{
"epoch": 0.2397384671267708,
"grad_norm": 15.89440631866455,
"learning_rate": 4.4006538321830734e-05,
"loss": 0.6898,
"step": 1320
},
{
"epoch": 0.24155466763530695,
"grad_norm": 18.804264068603516,
"learning_rate": 4.396113330911733e-05,
"loss": 0.617,
"step": 1330
},
{
"epoch": 0.24337086814384307,
"grad_norm": 17.780223846435547,
"learning_rate": 4.391572829640393e-05,
"loss": 0.5986,
"step": 1340
},
{
"epoch": 0.24518706865237921,
"grad_norm": 18.2768611907959,
"learning_rate": 4.3870323283690526e-05,
"loss": 0.498,
"step": 1350
},
{
"epoch": 0.24700326916091536,
"grad_norm": 20.490026473999023,
"learning_rate": 4.382491827097712e-05,
"loss": 0.5705,
"step": 1360
},
{
"epoch": 0.2488194696694515,
"grad_norm": 14.712557792663574,
"learning_rate": 4.377951325826372e-05,
"loss": 0.5586,
"step": 1370
},
{
"epoch": 0.25063567017798766,
"grad_norm": 15.078400611877441,
"learning_rate": 4.373410824555031e-05,
"loss": 0.7252,
"step": 1380
},
{
"epoch": 0.2524518706865238,
"grad_norm": 16.71666717529297,
"learning_rate": 4.368870323283691e-05,
"loss": 0.4957,
"step": 1390
},
{
"epoch": 0.25426807119505995,
"grad_norm": 21.710941314697266,
"learning_rate": 4.3643298220123504e-05,
"loss": 0.5567,
"step": 1400
},
{
"epoch": 0.2560842717035961,
"grad_norm": 13.499922752380371,
"learning_rate": 4.3597893207410104e-05,
"loss": 0.6141,
"step": 1410
},
{
"epoch": 0.2579004722121322,
"grad_norm": 22.025402069091797,
"learning_rate": 4.35524881946967e-05,
"loss": 0.7273,
"step": 1420
},
{
"epoch": 0.25971667272066834,
"grad_norm": 17.700258255004883,
"learning_rate": 4.3507083181983296e-05,
"loss": 0.5406,
"step": 1430
},
{
"epoch": 0.2615328732292045,
"grad_norm": 32.67435073852539,
"learning_rate": 4.346167816926989e-05,
"loss": 0.5677,
"step": 1440
},
{
"epoch": 0.26334907373774064,
"grad_norm": 15.318320274353027,
"learning_rate": 4.341627315655649e-05,
"loss": 0.6266,
"step": 1450
},
{
"epoch": 0.2651652742462768,
"grad_norm": 27.953414916992188,
"learning_rate": 4.337086814384308e-05,
"loss": 0.5541,
"step": 1460
},
{
"epoch": 0.26698147475481293,
"grad_norm": 12.882086753845215,
"learning_rate": 4.332546313112968e-05,
"loss": 0.5984,
"step": 1470
},
{
"epoch": 0.2687976752633491,
"grad_norm": 17.85333824157715,
"learning_rate": 4.328005811841628e-05,
"loss": 0.5021,
"step": 1480
},
{
"epoch": 0.2706138757718852,
"grad_norm": 11.902690887451172,
"learning_rate": 4.3234653105702874e-05,
"loss": 0.5995,
"step": 1490
},
{
"epoch": 0.2724300762804214,
"grad_norm": 8.324485778808594,
"learning_rate": 4.3189248092989474e-05,
"loss": 0.5337,
"step": 1500
},
{
"epoch": 0.2724300762804214,
"eval_accuracy": 0.7637407711238721,
"eval_f1": 0.7516135071707333,
"eval_loss": 0.6853081583976746,
"eval_precision": 0.7710051578059205,
"eval_recall": 0.7442999398826009,
"eval_runtime": 12.0321,
"eval_samples_per_second": 101.312,
"eval_steps_per_second": 6.4,
"step": 1500
},
{
"epoch": 0.2742462767889575,
"grad_norm": 17.059507369995117,
"learning_rate": 4.3143843080276066e-05,
"loss": 0.693,
"step": 1510
},
{
"epoch": 0.27606247729749367,
"grad_norm": 20.31708526611328,
"learning_rate": 4.3098438067562666e-05,
"loss": 0.5956,
"step": 1520
},
{
"epoch": 0.27787867780602976,
"grad_norm": 20.84437370300293,
"learning_rate": 4.305303305484926e-05,
"loss": 0.6239,
"step": 1530
},
{
"epoch": 0.2796948783145659,
"grad_norm": 22.729970932006836,
"learning_rate": 4.300762804213586e-05,
"loss": 0.5901,
"step": 1540
},
{
"epoch": 0.28151107882310206,
"grad_norm": 14.622097969055176,
"learning_rate": 4.296222302942245e-05,
"loss": 0.4957,
"step": 1550
},
{
"epoch": 0.2833272793316382,
"grad_norm": 18.853378295898438,
"learning_rate": 4.291681801670905e-05,
"loss": 0.544,
"step": 1560
},
{
"epoch": 0.28514347984017435,
"grad_norm": 20.18765640258789,
"learning_rate": 4.2871413003995644e-05,
"loss": 0.7035,
"step": 1570
},
{
"epoch": 0.2869596803487105,
"grad_norm": 15.882144927978516,
"learning_rate": 4.2826007991282244e-05,
"loss": 0.5633,
"step": 1580
},
{
"epoch": 0.28877588085724665,
"grad_norm": 14.607442855834961,
"learning_rate": 4.2780602978568836e-05,
"loss": 0.6074,
"step": 1590
},
{
"epoch": 0.2905920813657828,
"grad_norm": 17.136274337768555,
"learning_rate": 4.2735197965855436e-05,
"loss": 0.5523,
"step": 1600
},
{
"epoch": 0.29240828187431894,
"grad_norm": 7.1735429763793945,
"learning_rate": 4.268979295314203e-05,
"loss": 0.6283,
"step": 1610
},
{
"epoch": 0.2942244823828551,
"grad_norm": 11.038073539733887,
"learning_rate": 4.264438794042863e-05,
"loss": 0.6132,
"step": 1620
},
{
"epoch": 0.2960406828913912,
"grad_norm": 15.272370338439941,
"learning_rate": 4.259898292771522e-05,
"loss": 0.4732,
"step": 1630
},
{
"epoch": 0.29785688339992733,
"grad_norm": 23.69139289855957,
"learning_rate": 4.255357791500182e-05,
"loss": 0.5788,
"step": 1640
},
{
"epoch": 0.2996730839084635,
"grad_norm": 16.922348022460938,
"learning_rate": 4.2508172902288414e-05,
"loss": 0.5357,
"step": 1650
},
{
"epoch": 0.3014892844169996,
"grad_norm": 16.82611656188965,
"learning_rate": 4.2462767889575014e-05,
"loss": 0.5324,
"step": 1660
},
{
"epoch": 0.3033054849255358,
"grad_norm": 6.3888773918151855,
"learning_rate": 4.2417362876861606e-05,
"loss": 0.4909,
"step": 1670
},
{
"epoch": 0.3051216854340719,
"grad_norm": 11.367060661315918,
"learning_rate": 4.2371957864148206e-05,
"loss": 0.3793,
"step": 1680
},
{
"epoch": 0.30693788594260807,
"grad_norm": 13.870577812194824,
"learning_rate": 4.2326552851434806e-05,
"loss": 0.5448,
"step": 1690
},
{
"epoch": 0.3087540864511442,
"grad_norm": 18.472719192504883,
"learning_rate": 4.22811478387214e-05,
"loss": 0.5681,
"step": 1700
},
{
"epoch": 0.31057028695968036,
"grad_norm": 10.446008682250977,
"learning_rate": 4.2235742826008e-05,
"loss": 0.496,
"step": 1710
},
{
"epoch": 0.3123864874682165,
"grad_norm": 13.921605110168457,
"learning_rate": 4.219033781329459e-05,
"loss": 0.5314,
"step": 1720
},
{
"epoch": 0.31420268797675266,
"grad_norm": 17.571805953979492,
"learning_rate": 4.214493280058119e-05,
"loss": 0.6498,
"step": 1730
},
{
"epoch": 0.31601888848528875,
"grad_norm": 21.902027130126953,
"learning_rate": 4.2099527787867784e-05,
"loss": 0.5822,
"step": 1740
},
{
"epoch": 0.3178350889938249,
"grad_norm": 20.000957489013672,
"learning_rate": 4.205412277515438e-05,
"loss": 0.5377,
"step": 1750
},
{
"epoch": 0.31965128950236105,
"grad_norm": 12.021200180053711,
"learning_rate": 4.2008717762440976e-05,
"loss": 0.6164,
"step": 1760
},
{
"epoch": 0.3214674900108972,
"grad_norm": 24.00454330444336,
"learning_rate": 4.1963312749727576e-05,
"loss": 0.6348,
"step": 1770
},
{
"epoch": 0.32328369051943334,
"grad_norm": 14.13219928741455,
"learning_rate": 4.191790773701417e-05,
"loss": 0.5242,
"step": 1780
},
{
"epoch": 0.3250998910279695,
"grad_norm": 32.07684326171875,
"learning_rate": 4.187250272430077e-05,
"loss": 0.8162,
"step": 1790
},
{
"epoch": 0.32691609153650564,
"grad_norm": 16.062604904174805,
"learning_rate": 4.182709771158736e-05,
"loss": 0.568,
"step": 1800
},
{
"epoch": 0.32691609153650564,
"eval_accuracy": 0.7793273174733388,
"eval_f1": 0.7716766196492151,
"eval_loss": 0.6542023420333862,
"eval_precision": 0.7704320356934777,
"eval_recall": 0.7814793806758547,
"eval_runtime": 12.0427,
"eval_samples_per_second": 101.223,
"eval_steps_per_second": 6.394,
"step": 1800
},
{
"epoch": 0.3287322920450418,
"grad_norm": 14.167701721191406,
"learning_rate": 4.178169269887396e-05,
"loss": 0.502,
"step": 1810
},
{
"epoch": 0.33054849255357793,
"grad_norm": 16.717453002929688,
"learning_rate": 4.1736287686160554e-05,
"loss": 0.4441,
"step": 1820
},
{
"epoch": 0.3323646930621141,
"grad_norm": 24.774871826171875,
"learning_rate": 4.169088267344715e-05,
"loss": 0.5017,
"step": 1830
},
{
"epoch": 0.3341808935706502,
"grad_norm": 12.445333480834961,
"learning_rate": 4.1645477660733746e-05,
"loss": 0.4088,
"step": 1840
},
{
"epoch": 0.3359970940791863,
"grad_norm": 18.43295669555664,
"learning_rate": 4.1600072648020346e-05,
"loss": 0.6032,
"step": 1850
},
{
"epoch": 0.33781329458772247,
"grad_norm": 26.735172271728516,
"learning_rate": 4.155466763530694e-05,
"loss": 0.5611,
"step": 1860
},
{
"epoch": 0.3396294950962586,
"grad_norm": 18.15043067932129,
"learning_rate": 4.150926262259354e-05,
"loss": 0.4716,
"step": 1870
},
{
"epoch": 0.34144569560479476,
"grad_norm": 18.67064094543457,
"learning_rate": 4.146385760988013e-05,
"loss": 0.5643,
"step": 1880
},
{
"epoch": 0.3432618961133309,
"grad_norm": 21.009254455566406,
"learning_rate": 4.141845259716673e-05,
"loss": 0.6117,
"step": 1890
},
{
"epoch": 0.34507809662186706,
"grad_norm": 10.891605377197266,
"learning_rate": 4.137304758445333e-05,
"loss": 0.5336,
"step": 1900
},
{
"epoch": 0.3468942971304032,
"grad_norm": 6.9248504638671875,
"learning_rate": 4.132764257173992e-05,
"loss": 0.5683,
"step": 1910
},
{
"epoch": 0.34871049763893935,
"grad_norm": 20.700204849243164,
"learning_rate": 4.128223755902652e-05,
"loss": 0.5869,
"step": 1920
},
{
"epoch": 0.3505266981474755,
"grad_norm": 17.678829193115234,
"learning_rate": 4.1236832546313116e-05,
"loss": 0.5081,
"step": 1930
},
{
"epoch": 0.35234289865601165,
"grad_norm": 19.98926544189453,
"learning_rate": 4.1191427533599715e-05,
"loss": 0.4912,
"step": 1940
},
{
"epoch": 0.35415909916454774,
"grad_norm": 15.056520462036133,
"learning_rate": 4.114602252088631e-05,
"loss": 0.5509,
"step": 1950
},
{
"epoch": 0.3559752996730839,
"grad_norm": 19.50244903564453,
"learning_rate": 4.110061750817291e-05,
"loss": 0.5181,
"step": 1960
},
{
"epoch": 0.35779150018162004,
"grad_norm": 20.018245697021484,
"learning_rate": 4.10552124954595e-05,
"loss": 0.6597,
"step": 1970
},
{
"epoch": 0.3596077006901562,
"grad_norm": 18.31260108947754,
"learning_rate": 4.10098074827461e-05,
"loss": 0.7217,
"step": 1980
},
{
"epoch": 0.36142390119869233,
"grad_norm": 15.864529609680176,
"learning_rate": 4.096440247003269e-05,
"loss": 0.3586,
"step": 1990
},
{
"epoch": 0.3632401017072285,
"grad_norm": 9.000946998596191,
"learning_rate": 4.091899745731929e-05,
"loss": 0.5125,
"step": 2000
},
{
"epoch": 0.3650563022157646,
"grad_norm": 4.08746337890625,
"learning_rate": 4.0873592444605886e-05,
"loss": 0.5204,
"step": 2010
},
{
"epoch": 0.3668725027243008,
"grad_norm": 7.909645080566406,
"learning_rate": 4.0828187431892485e-05,
"loss": 0.3989,
"step": 2020
},
{
"epoch": 0.3686887032328369,
"grad_norm": 17.512380599975586,
"learning_rate": 4.078278241917908e-05,
"loss": 0.5338,
"step": 2030
},
{
"epoch": 0.37050490374137307,
"grad_norm": 22.659942626953125,
"learning_rate": 4.073737740646568e-05,
"loss": 0.5221,
"step": 2040
},
{
"epoch": 0.37232110424990916,
"grad_norm": 8.159212112426758,
"learning_rate": 4.069197239375227e-05,
"loss": 0.5502,
"step": 2050
},
{
"epoch": 0.3741373047584453,
"grad_norm": 20.236705780029297,
"learning_rate": 4.064656738103887e-05,
"loss": 0.4267,
"step": 2060
},
{
"epoch": 0.37595350526698146,
"grad_norm": 16.24159049987793,
"learning_rate": 4.060116236832546e-05,
"loss": 0.5693,
"step": 2070
},
{
"epoch": 0.3777697057755176,
"grad_norm": 14.921638488769531,
"learning_rate": 4.055575735561206e-05,
"loss": 0.5152,
"step": 2080
},
{
"epoch": 0.37958590628405375,
"grad_norm": 18.668865203857422,
"learning_rate": 4.0510352342898656e-05,
"loss": 0.6135,
"step": 2090
},
{
"epoch": 0.3814021067925899,
"grad_norm": 23.298078536987305,
"learning_rate": 4.0464947330185255e-05,
"loss": 0.5237,
"step": 2100
},
{
"epoch": 0.3814021067925899,
"eval_accuracy": 0.7752255947497949,
"eval_f1": 0.7694936459460316,
"eval_loss": 0.629031240940094,
"eval_precision": 0.7649651818415721,
"eval_recall": 0.7791450088077798,
"eval_runtime": 12.0845,
"eval_samples_per_second": 100.873,
"eval_steps_per_second": 6.372,
"step": 2100
},
{
"epoch": 0.38321830730112605,
"grad_norm": 14.145480155944824,
"learning_rate": 4.0419542317471855e-05,
"loss": 0.4478,
"step": 2110
},
{
"epoch": 0.3850345078096622,
"grad_norm": 21.260257720947266,
"learning_rate": 4.037413730475845e-05,
"loss": 0.4185,
"step": 2120
},
{
"epoch": 0.38685070831819834,
"grad_norm": 9.28508186340332,
"learning_rate": 4.032873229204505e-05,
"loss": 0.6288,
"step": 2130
},
{
"epoch": 0.3886669088267345,
"grad_norm": 12.114027976989746,
"learning_rate": 4.028332727933164e-05,
"loss": 0.5159,
"step": 2140
},
{
"epoch": 0.39048310933527064,
"grad_norm": 27.268280029296875,
"learning_rate": 4.023792226661824e-05,
"loss": 0.6385,
"step": 2150
},
{
"epoch": 0.39229930984380673,
"grad_norm": 15.575640678405762,
"learning_rate": 4.019251725390483e-05,
"loss": 0.5996,
"step": 2160
},
{
"epoch": 0.3941155103523429,
"grad_norm": 15.587044715881348,
"learning_rate": 4.014711224119143e-05,
"loss": 0.4721,
"step": 2170
},
{
"epoch": 0.395931710860879,
"grad_norm": 15.621984481811523,
"learning_rate": 4.0101707228478025e-05,
"loss": 0.49,
"step": 2180
},
{
"epoch": 0.3977479113694152,
"grad_norm": 10.312201499938965,
"learning_rate": 4.0056302215764625e-05,
"loss": 0.5224,
"step": 2190
},
{
"epoch": 0.3995641118779513,
"grad_norm": 17.903989791870117,
"learning_rate": 4.001089720305122e-05,
"loss": 0.4217,
"step": 2200
},
{
"epoch": 0.40138031238648747,
"grad_norm": 15.996349334716797,
"learning_rate": 3.996549219033782e-05,
"loss": 0.4906,
"step": 2210
},
{
"epoch": 0.4031965128950236,
"grad_norm": 21.02739715576172,
"learning_rate": 3.992008717762441e-05,
"loss": 0.6814,
"step": 2220
},
{
"epoch": 0.40501271340355977,
"grad_norm": 21.1398868560791,
"learning_rate": 3.987468216491101e-05,
"loss": 0.3679,
"step": 2230
},
{
"epoch": 0.4068289139120959,
"grad_norm": 24.41451072692871,
"learning_rate": 3.98292771521976e-05,
"loss": 0.4802,
"step": 2240
},
{
"epoch": 0.40864511442063206,
"grad_norm": 22.847251892089844,
"learning_rate": 3.97838721394842e-05,
"loss": 0.5097,
"step": 2250
},
{
"epoch": 0.41046131492916815,
"grad_norm": 14.888809204101562,
"learning_rate": 3.9738467126770795e-05,
"loss": 0.4294,
"step": 2260
},
{
"epoch": 0.4122775154377043,
"grad_norm": 20.353588104248047,
"learning_rate": 3.9693062114057395e-05,
"loss": 0.4949,
"step": 2270
},
{
"epoch": 0.41409371594624045,
"grad_norm": 11.559284210205078,
"learning_rate": 3.964765710134399e-05,
"loss": 0.4353,
"step": 2280
},
{
"epoch": 0.4159099164547766,
"grad_norm": 12.643139839172363,
"learning_rate": 3.960225208863059e-05,
"loss": 0.3885,
"step": 2290
},
{
"epoch": 0.41772611696331274,
"grad_norm": 4.294188022613525,
"learning_rate": 3.955684707591718e-05,
"loss": 0.4166,
"step": 2300
},
{
"epoch": 0.4195423174718489,
"grad_norm": 17.501489639282227,
"learning_rate": 3.951144206320378e-05,
"loss": 0.4092,
"step": 2310
},
{
"epoch": 0.42135851798038504,
"grad_norm": 9.189852714538574,
"learning_rate": 3.946603705049038e-05,
"loss": 0.5333,
"step": 2320
},
{
"epoch": 0.4231747184889212,
"grad_norm": 19.29057502746582,
"learning_rate": 3.942063203777697e-05,
"loss": 0.6406,
"step": 2330
},
{
"epoch": 0.42499091899745733,
"grad_norm": 15.98727035522461,
"learning_rate": 3.937522702506357e-05,
"loss": 0.4677,
"step": 2340
},
{
"epoch": 0.4268071195059935,
"grad_norm": 9.902159690856934,
"learning_rate": 3.9329822012350165e-05,
"loss": 0.5894,
"step": 2350
},
{
"epoch": 0.42862332001452963,
"grad_norm": 18.965747833251953,
"learning_rate": 3.9284416999636764e-05,
"loss": 0.6196,
"step": 2360
},
{
"epoch": 0.4304395205230657,
"grad_norm": 18.899520874023438,
"learning_rate": 3.923901198692336e-05,
"loss": 0.5199,
"step": 2370
},
{
"epoch": 0.43225572103160187,
"grad_norm": 12.890677452087402,
"learning_rate": 3.919360697420996e-05,
"loss": 0.583,
"step": 2380
},
{
"epoch": 0.434071921540138,
"grad_norm": 17.285070419311523,
"learning_rate": 3.914820196149655e-05,
"loss": 0.4777,
"step": 2390
},
{
"epoch": 0.43588812204867416,
"grad_norm": 11.499088287353516,
"learning_rate": 3.910279694878315e-05,
"loss": 0.4478,
"step": 2400
},
{
"epoch": 0.43588812204867416,
"eval_accuracy": 0.7908121410992617,
"eval_f1": 0.7894885784356462,
"eval_loss": 0.6197062134742737,
"eval_precision": 0.778593519395511,
"eval_recall": 0.8067083793113867,
"eval_runtime": 12.0738,
"eval_samples_per_second": 100.963,
"eval_steps_per_second": 6.377,
"step": 2400
},
{
"epoch": 0.4377043225572103,
"grad_norm": 11.969683647155762,
"learning_rate": 3.905739193606974e-05,
"loss": 0.4575,
"step": 2410
},
{
"epoch": 0.43952052306574646,
"grad_norm": 16.60710906982422,
"learning_rate": 3.901198692335634e-05,
"loss": 0.6813,
"step": 2420
},
{
"epoch": 0.4413367235742826,
"grad_norm": 7.043119430541992,
"learning_rate": 3.8966581910642935e-05,
"loss": 0.5086,
"step": 2430
},
{
"epoch": 0.44315292408281876,
"grad_norm": 24.139657974243164,
"learning_rate": 3.8921176897929534e-05,
"loss": 0.6707,
"step": 2440
},
{
"epoch": 0.4449691245913549,
"grad_norm": 10.262349128723145,
"learning_rate": 3.887577188521613e-05,
"loss": 0.4105,
"step": 2450
},
{
"epoch": 0.44678532509989105,
"grad_norm": 18.123046875,
"learning_rate": 3.883036687250273e-05,
"loss": 0.5051,
"step": 2460
},
{
"epoch": 0.44860152560842714,
"grad_norm": 14.30826187133789,
"learning_rate": 3.878496185978932e-05,
"loss": 0.4586,
"step": 2470
},
{
"epoch": 0.4504177261169633,
"grad_norm": 32.13856506347656,
"learning_rate": 3.873955684707592e-05,
"loss": 0.4564,
"step": 2480
},
{
"epoch": 0.45223392662549944,
"grad_norm": 21.877262115478516,
"learning_rate": 3.869415183436251e-05,
"loss": 0.5316,
"step": 2490
},
{
"epoch": 0.4540501271340356,
"grad_norm": 15.986939430236816,
"learning_rate": 3.864874682164911e-05,
"loss": 0.4488,
"step": 2500
},
{
"epoch": 0.45586632764257173,
"grad_norm": 15.963953018188477,
"learning_rate": 3.8603341808935705e-05,
"loss": 0.6677,
"step": 2510
},
{
"epoch": 0.4576825281511079,
"grad_norm": 29.52568244934082,
"learning_rate": 3.8557936796222304e-05,
"loss": 0.5992,
"step": 2520
},
{
"epoch": 0.45949872865964403,
"grad_norm": 18.61089324951172,
"learning_rate": 3.8512531783508904e-05,
"loss": 0.5565,
"step": 2530
},
{
"epoch": 0.4613149291681802,
"grad_norm": 23.38523292541504,
"learning_rate": 3.84671267707955e-05,
"loss": 0.5018,
"step": 2540
},
{
"epoch": 0.4631311296767163,
"grad_norm": 20.95744514465332,
"learning_rate": 3.8421721758082097e-05,
"loss": 0.5758,
"step": 2550
},
{
"epoch": 0.46494733018525247,
"grad_norm": 11.371984481811523,
"learning_rate": 3.837631674536869e-05,
"loss": 0.5553,
"step": 2560
},
{
"epoch": 0.4667635306937886,
"grad_norm": 21.71943473815918,
"learning_rate": 3.833091173265529e-05,
"loss": 0.5777,
"step": 2570
},
{
"epoch": 0.4685797312023247,
"grad_norm": 18.1218318939209,
"learning_rate": 3.828550671994188e-05,
"loss": 0.5863,
"step": 2580
},
{
"epoch": 0.47039593171086086,
"grad_norm": 19.874448776245117,
"learning_rate": 3.824010170722848e-05,
"loss": 0.5183,
"step": 2590
},
{
"epoch": 0.472212132219397,
"grad_norm": 19.063386917114258,
"learning_rate": 3.8194696694515074e-05,
"loss": 0.5381,
"step": 2600
},
{
"epoch": 0.47402833272793315,
"grad_norm": 9.372021675109863,
"learning_rate": 3.8149291681801674e-05,
"loss": 0.433,
"step": 2610
},
{
"epoch": 0.4758445332364693,
"grad_norm": 14.822279930114746,
"learning_rate": 3.810388666908827e-05,
"loss": 0.5518,
"step": 2620
},
{
"epoch": 0.47766073374500545,
"grad_norm": 13.066219329833984,
"learning_rate": 3.8058481656374867e-05,
"loss": 0.4946,
"step": 2630
},
{
"epoch": 0.4794769342535416,
"grad_norm": 18.113737106323242,
"learning_rate": 3.801307664366146e-05,
"loss": 0.4824,
"step": 2640
},
{
"epoch": 0.48129313476207775,
"grad_norm": 10.73379898071289,
"learning_rate": 3.796767163094806e-05,
"loss": 0.5541,
"step": 2650
},
{
"epoch": 0.4831093352706139,
"grad_norm": 25.9276065826416,
"learning_rate": 3.792226661823465e-05,
"loss": 0.5404,
"step": 2660
},
{
"epoch": 0.48492553577915004,
"grad_norm": 20.394275665283203,
"learning_rate": 3.787686160552125e-05,
"loss": 0.5106,
"step": 2670
},
{
"epoch": 0.48674173628768613,
"grad_norm": 23.263164520263672,
"learning_rate": 3.7831456592807844e-05,
"loss": 0.5293,
"step": 2680
},
{
"epoch": 0.4885579367962223,
"grad_norm": 13.967432975769043,
"learning_rate": 3.7786051580094444e-05,
"loss": 0.4886,
"step": 2690
},
{
"epoch": 0.49037413730475843,
"grad_norm": 18.502605438232422,
"learning_rate": 3.774064656738104e-05,
"loss": 0.5617,
"step": 2700
},
{
"epoch": 0.49037413730475843,
"eval_accuracy": 0.8039376538146021,
"eval_f1": 0.7953241400811288,
"eval_loss": 0.5712546110153198,
"eval_precision": 0.7878506982758448,
"eval_recall": 0.8087375226161377,
"eval_runtime": 12.0651,
"eval_samples_per_second": 101.035,
"eval_steps_per_second": 6.382,
"step": 2700
},
{
"epoch": 0.4921903378132946,
"grad_norm": 36.73835754394531,
"learning_rate": 3.7695241554667637e-05,
"loss": 0.6704,
"step": 2710
},
{
"epoch": 0.4940065383218307,
"grad_norm": 11.638787269592285,
"learning_rate": 3.764983654195423e-05,
"loss": 0.5284,
"step": 2720
},
{
"epoch": 0.49582273883036687,
"grad_norm": 22.700679779052734,
"learning_rate": 3.760443152924083e-05,
"loss": 0.5621,
"step": 2730
},
{
"epoch": 0.497638939338903,
"grad_norm": 10.612008094787598,
"learning_rate": 3.755902651652743e-05,
"loss": 0.572,
"step": 2740
},
{
"epoch": 0.49945513984743917,
"grad_norm": 8.393928527832031,
"learning_rate": 3.751362150381402e-05,
"loss": 0.4636,
"step": 2750
},
{
"epoch": 0.5012713403559753,
"grad_norm": 28.1651554107666,
"learning_rate": 3.746821649110062e-05,
"loss": 0.4955,
"step": 2760
},
{
"epoch": 0.5030875408645115,
"grad_norm": 20.283479690551758,
"learning_rate": 3.7422811478387214e-05,
"loss": 0.5398,
"step": 2770
},
{
"epoch": 0.5049037413730476,
"grad_norm": 12.401691436767578,
"learning_rate": 3.7377406465673814e-05,
"loss": 0.5622,
"step": 2780
},
{
"epoch": 0.5067199418815838,
"grad_norm": 31.315277099609375,
"learning_rate": 3.7332001452960407e-05,
"loss": 0.5752,
"step": 2790
},
{
"epoch": 0.5085361423901199,
"grad_norm": 17.91919708251953,
"learning_rate": 3.7286596440247006e-05,
"loss": 0.5953,
"step": 2800
},
{
"epoch": 0.510352342898656,
"grad_norm": 10.692752838134766,
"learning_rate": 3.72411914275336e-05,
"loss": 0.4012,
"step": 2810
},
{
"epoch": 0.5121685434071922,
"grad_norm": 17.449275970458984,
"learning_rate": 3.71957864148202e-05,
"loss": 0.4245,
"step": 2820
},
{
"epoch": 0.5139847439157283,
"grad_norm": 17.479352951049805,
"learning_rate": 3.715038140210679e-05,
"loss": 0.4908,
"step": 2830
},
{
"epoch": 0.5158009444242644,
"grad_norm": 20.10633659362793,
"learning_rate": 3.710497638939339e-05,
"loss": 0.5738,
"step": 2840
},
{
"epoch": 0.5176171449328005,
"grad_norm": 17.699560165405273,
"learning_rate": 3.7059571376679984e-05,
"loss": 0.4434,
"step": 2850
},
{
"epoch": 0.5194333454413367,
"grad_norm": 15.045440673828125,
"learning_rate": 3.7014166363966584e-05,
"loss": 0.4992,
"step": 2860
},
{
"epoch": 0.5212495459498728,
"grad_norm": 14.244542121887207,
"learning_rate": 3.6968761351253177e-05,
"loss": 0.5327,
"step": 2870
},
{
"epoch": 0.523065746458409,
"grad_norm": 11.60004997253418,
"learning_rate": 3.6923356338539776e-05,
"loss": 0.4859,
"step": 2880
},
{
"epoch": 0.5248819469669451,
"grad_norm": 8.768573760986328,
"learning_rate": 3.687795132582637e-05,
"loss": 0.5004,
"step": 2890
},
{
"epoch": 0.5266981474754813,
"grad_norm": 22.54417610168457,
"learning_rate": 3.683254631311297e-05,
"loss": 0.543,
"step": 2900
},
{
"epoch": 0.5285143479840174,
"grad_norm": 20.270061492919922,
"learning_rate": 3.678714130039956e-05,
"loss": 0.5296,
"step": 2910
},
{
"epoch": 0.5303305484925536,
"grad_norm": 18.757434844970703,
"learning_rate": 3.674173628768616e-05,
"loss": 0.3825,
"step": 2920
},
{
"epoch": 0.5321467490010897,
"grad_norm": 13.12435245513916,
"learning_rate": 3.6696331274972754e-05,
"loss": 0.545,
"step": 2930
},
{
"epoch": 0.5339629495096259,
"grad_norm": 23.035865783691406,
"learning_rate": 3.6650926262259354e-05,
"loss": 0.6143,
"step": 2940
},
{
"epoch": 0.535779150018162,
"grad_norm": 15.766834259033203,
"learning_rate": 3.660552124954595e-05,
"loss": 0.5893,
"step": 2950
},
{
"epoch": 0.5375953505266982,
"grad_norm": 11.79257869720459,
"learning_rate": 3.6560116236832546e-05,
"loss": 0.5365,
"step": 2960
},
{
"epoch": 0.5394115510352343,
"grad_norm": 22.071346282958984,
"learning_rate": 3.6514711224119146e-05,
"loss": 0.5344,
"step": 2970
},
{
"epoch": 0.5412277515437705,
"grad_norm": 16.728076934814453,
"learning_rate": 3.646930621140574e-05,
"loss": 0.5137,
"step": 2980
},
{
"epoch": 0.5430439520523066,
"grad_norm": 13.112013816833496,
"learning_rate": 3.642390119869234e-05,
"loss": 0.6091,
"step": 2990
},
{
"epoch": 0.5448601525608427,
"grad_norm": 15.373380661010742,
"learning_rate": 3.637849618597893e-05,
"loss": 0.4939,
"step": 3000
},
{
"epoch": 0.5448601525608427,
"eval_accuracy": 0.8039376538146021,
"eval_f1": 0.7973147509604641,
"eval_loss": 0.5636632442474365,
"eval_precision": 0.7937266489697907,
"eval_recall": 0.8050963810992605,
"eval_runtime": 12.0525,
"eval_samples_per_second": 101.141,
"eval_steps_per_second": 6.389,
"step": 3000
},
{
"epoch": 0.5466763530693789,
"grad_norm": 17.602785110473633,
"learning_rate": 3.633309117326553e-05,
"loss": 0.4411,
"step": 3010
},
{
"epoch": 0.548492553577915,
"grad_norm": 11.274548530578613,
"learning_rate": 3.6287686160552124e-05,
"loss": 0.5949,
"step": 3020
},
{
"epoch": 0.5503087540864512,
"grad_norm": 17.69841766357422,
"learning_rate": 3.624228114783872e-05,
"loss": 0.4447,
"step": 3030
},
{
"epoch": 0.5521249545949873,
"grad_norm": 21.106124877929688,
"learning_rate": 3.6196876135125316e-05,
"loss": 0.5316,
"step": 3040
},
{
"epoch": 0.5539411551035234,
"grad_norm": 21.903255462646484,
"learning_rate": 3.6151471122411916e-05,
"loss": 0.476,
"step": 3050
},
{
"epoch": 0.5557573556120595,
"grad_norm": 11.051823616027832,
"learning_rate": 3.610606610969851e-05,
"loss": 0.5307,
"step": 3060
},
{
"epoch": 0.5575735561205957,
"grad_norm": 5.928410530090332,
"learning_rate": 3.606066109698511e-05,
"loss": 0.4116,
"step": 3070
},
{
"epoch": 0.5593897566291318,
"grad_norm": 24.413103103637695,
"learning_rate": 3.60152560842717e-05,
"loss": 0.482,
"step": 3080
},
{
"epoch": 0.561205957137668,
"grad_norm": 17.25383949279785,
"learning_rate": 3.59698510715583e-05,
"loss": 0.5269,
"step": 3090
},
{
"epoch": 0.5630221576462041,
"grad_norm": 14.473711013793945,
"learning_rate": 3.5924446058844894e-05,
"loss": 0.5098,
"step": 3100
},
{
"epoch": 0.5648383581547403,
"grad_norm": 14.325135231018066,
"learning_rate": 3.587904104613149e-05,
"loss": 0.4476,
"step": 3110
},
{
"epoch": 0.5666545586632764,
"grad_norm": 22.374534606933594,
"learning_rate": 3.5833636033418086e-05,
"loss": 0.4768,
"step": 3120
},
{
"epoch": 0.5684707591718126,
"grad_norm": 22.39207649230957,
"learning_rate": 3.5788231020704686e-05,
"loss": 0.457,
"step": 3130
},
{
"epoch": 0.5702869596803487,
"grad_norm": 14.626873970031738,
"learning_rate": 3.574282600799128e-05,
"loss": 0.4465,
"step": 3140
},
{
"epoch": 0.5721031601888849,
"grad_norm": 23.05328369140625,
"learning_rate": 3.569742099527788e-05,
"loss": 0.4138,
"step": 3150
},
{
"epoch": 0.573919360697421,
"grad_norm": 16.360881805419922,
"learning_rate": 3.565201598256448e-05,
"loss": 0.4439,
"step": 3160
},
{
"epoch": 0.5757355612059571,
"grad_norm": 25.42070770263672,
"learning_rate": 3.560661096985107e-05,
"loss": 0.6645,
"step": 3170
},
{
"epoch": 0.5775517617144933,
"grad_norm": 22.610538482666016,
"learning_rate": 3.556120595713767e-05,
"loss": 0.4836,
"step": 3180
},
{
"epoch": 0.5793679622230294,
"grad_norm": 11.355021476745605,
"learning_rate": 3.551580094442426e-05,
"loss": 0.5534,
"step": 3190
},
{
"epoch": 0.5811841627315656,
"grad_norm": 18.886524200439453,
"learning_rate": 3.547039593171086e-05,
"loss": 0.5369,
"step": 3200
},
{
"epoch": 0.5830003632401017,
"grad_norm": 14.892853736877441,
"learning_rate": 3.5424990918997456e-05,
"loss": 0.4463,
"step": 3210
},
{
"epoch": 0.5848165637486379,
"grad_norm": 31.027605056762695,
"learning_rate": 3.5379585906284055e-05,
"loss": 0.4775,
"step": 3220
},
{
"epoch": 0.586632764257174,
"grad_norm": 11.664224624633789,
"learning_rate": 3.533418089357065e-05,
"loss": 0.5938,
"step": 3230
},
{
"epoch": 0.5884489647657102,
"grad_norm": 13.272047996520996,
"learning_rate": 3.528877588085725e-05,
"loss": 0.4925,
"step": 3240
},
{
"epoch": 0.5902651652742463,
"grad_norm": 13.521268844604492,
"learning_rate": 3.524337086814384e-05,
"loss": 0.504,
"step": 3250
},
{
"epoch": 0.5920813657827824,
"grad_norm": 10.777715682983398,
"learning_rate": 3.519796585543044e-05,
"loss": 0.51,
"step": 3260
},
{
"epoch": 0.5938975662913185,
"grad_norm": 16.920635223388672,
"learning_rate": 3.515256084271703e-05,
"loss": 0.5315,
"step": 3270
},
{
"epoch": 0.5957137667998547,
"grad_norm": 22.00889778137207,
"learning_rate": 3.510715583000363e-05,
"loss": 0.4741,
"step": 3280
},
{
"epoch": 0.5975299673083908,
"grad_norm": 14.849915504455566,
"learning_rate": 3.5061750817290226e-05,
"loss": 0.4805,
"step": 3290
},
{
"epoch": 0.599346167816927,
"grad_norm": 22.403329849243164,
"learning_rate": 3.5016345804576825e-05,
"loss": 0.5531,
"step": 3300
},
{
"epoch": 0.599346167816927,
"eval_accuracy": 0.8146021328958163,
"eval_f1": 0.8074596115450074,
"eval_loss": 0.5683770179748535,
"eval_precision": 0.8052208334869901,
"eval_recall": 0.8145117560161068,
"eval_runtime": 12.0545,
"eval_samples_per_second": 101.124,
"eval_steps_per_second": 6.388,
"step": 3300
},
{
"epoch": 0.6011623683254631,
"grad_norm": 13.46020221710205,
"learning_rate": 3.497094079186342e-05,
"loss": 0.5227,
"step": 3310
},
{
"epoch": 0.6029785688339993,
"grad_norm": 22.012182235717773,
"learning_rate": 3.492553577915002e-05,
"loss": 0.4315,
"step": 3320
},
{
"epoch": 0.6047947693425354,
"grad_norm": 16.392894744873047,
"learning_rate": 3.488013076643661e-05,
"loss": 0.474,
"step": 3330
},
{
"epoch": 0.6066109698510715,
"grad_norm": 19.60003089904785,
"learning_rate": 3.483472575372321e-05,
"loss": 0.3914,
"step": 3340
},
{
"epoch": 0.6084271703596077,
"grad_norm": 24.537080764770508,
"learning_rate": 3.47893207410098e-05,
"loss": 0.4278,
"step": 3350
},
{
"epoch": 0.6102433708681438,
"grad_norm": 22.935487747192383,
"learning_rate": 3.47439157282964e-05,
"loss": 0.6627,
"step": 3360
},
{
"epoch": 0.61205957137668,
"grad_norm": 19.0701847076416,
"learning_rate": 3.4698510715583e-05,
"loss": 0.4398,
"step": 3370
},
{
"epoch": 0.6138757718852161,
"grad_norm": 11.604155540466309,
"learning_rate": 3.4653105702869595e-05,
"loss": 0.6131,
"step": 3380
},
{
"epoch": 0.6156919723937523,
"grad_norm": 17.911949157714844,
"learning_rate": 3.4607700690156195e-05,
"loss": 0.4957,
"step": 3390
},
{
"epoch": 0.6175081729022884,
"grad_norm": 12.859588623046875,
"learning_rate": 3.456229567744279e-05,
"loss": 0.5457,
"step": 3400
},
{
"epoch": 0.6193243734108246,
"grad_norm": 17.096111297607422,
"learning_rate": 3.451689066472939e-05,
"loss": 0.4664,
"step": 3410
},
{
"epoch": 0.6211405739193607,
"grad_norm": 17.198429107666016,
"learning_rate": 3.447148565201598e-05,
"loss": 0.3655,
"step": 3420
},
{
"epoch": 0.6229567744278969,
"grad_norm": 7.782280445098877,
"learning_rate": 3.442608063930258e-05,
"loss": 0.4909,
"step": 3430
},
{
"epoch": 0.624772974936433,
"grad_norm": 13.99974250793457,
"learning_rate": 3.438067562658917e-05,
"loss": 0.5252,
"step": 3440
},
{
"epoch": 0.6265891754449692,
"grad_norm": 26.579198837280273,
"learning_rate": 3.433527061387577e-05,
"loss": 0.4605,
"step": 3450
},
{
"epoch": 0.6284053759535053,
"grad_norm": 23.17647361755371,
"learning_rate": 3.4289865601162365e-05,
"loss": 0.3984,
"step": 3460
},
{
"epoch": 0.6302215764620414,
"grad_norm": 15.169466972351074,
"learning_rate": 3.4244460588448965e-05,
"loss": 0.4396,
"step": 3470
},
{
"epoch": 0.6320377769705775,
"grad_norm": 18.425457000732422,
"learning_rate": 3.4199055575735565e-05,
"loss": 0.5219,
"step": 3480
},
{
"epoch": 0.6338539774791137,
"grad_norm": 11.86226749420166,
"learning_rate": 3.415365056302216e-05,
"loss": 0.5649,
"step": 3490
},
{
"epoch": 0.6356701779876498,
"grad_norm": 18.50494384765625,
"learning_rate": 3.410824555030876e-05,
"loss": 0.5174,
"step": 3500
},
{
"epoch": 0.637486378496186,
"grad_norm": 36.33973693847656,
"learning_rate": 3.406284053759535e-05,
"loss": 0.6008,
"step": 3510
},
{
"epoch": 0.6393025790047221,
"grad_norm": 12.04764175415039,
"learning_rate": 3.401743552488195e-05,
"loss": 0.375,
"step": 3520
},
{
"epoch": 0.6411187795132582,
"grad_norm": 19.642751693725586,
"learning_rate": 3.397203051216854e-05,
"loss": 0.4088,
"step": 3530
},
{
"epoch": 0.6429349800217944,
"grad_norm": 11.035579681396484,
"learning_rate": 3.392662549945514e-05,
"loss": 0.4665,
"step": 3540
},
{
"epoch": 0.6447511805303305,
"grad_norm": 9.772668838500977,
"learning_rate": 3.3881220486741735e-05,
"loss": 0.4081,
"step": 3550
},
{
"epoch": 0.6465673810388667,
"grad_norm": 15.26156997680664,
"learning_rate": 3.3835815474028335e-05,
"loss": 0.5479,
"step": 3560
},
{
"epoch": 0.6483835815474028,
"grad_norm": 16.603866577148438,
"learning_rate": 3.379041046131493e-05,
"loss": 0.5206,
"step": 3570
},
{
"epoch": 0.650199782055939,
"grad_norm": 14.417247772216797,
"learning_rate": 3.374500544860153e-05,
"loss": 0.4639,
"step": 3580
},
{
"epoch": 0.6520159825644751,
"grad_norm": 14.06032943725586,
"learning_rate": 3.369960043588813e-05,
"loss": 0.5256,
"step": 3590
},
{
"epoch": 0.6538321830730113,
"grad_norm": 19.377899169921875,
"learning_rate": 3.365419542317472e-05,
"loss": 0.4589,
"step": 3600
},
{
"epoch": 0.6538321830730113,
"eval_accuracy": 0.815422477440525,
"eval_f1": 0.8098112032026681,
"eval_loss": 0.5438756346702576,
"eval_precision": 0.8087363057639664,
"eval_recall": 0.8171923146479461,
"eval_runtime": 12.0597,
"eval_samples_per_second": 101.081,
"eval_steps_per_second": 6.385,
"step": 3600
},
{
"epoch": 0.6556483835815474,
"grad_norm": 10.339813232421875,
"learning_rate": 3.360879041046132e-05,
"loss": 0.5665,
"step": 3610
},
{
"epoch": 0.6574645840900836,
"grad_norm": 12.189675331115723,
"learning_rate": 3.356338539774791e-05,
"loss": 0.341,
"step": 3620
},
{
"epoch": 0.6592807845986197,
"grad_norm": 17.71584701538086,
"learning_rate": 3.351798038503451e-05,
"loss": 0.3679,
"step": 3630
},
{
"epoch": 0.6610969851071559,
"grad_norm": 12.258733749389648,
"learning_rate": 3.3472575372321105e-05,
"loss": 0.5311,
"step": 3640
},
{
"epoch": 0.662913185615692,
"grad_norm": 22.354339599609375,
"learning_rate": 3.3427170359607704e-05,
"loss": 0.4843,
"step": 3650
},
{
"epoch": 0.6647293861242282,
"grad_norm": 14.635857582092285,
"learning_rate": 3.33817653468943e-05,
"loss": 0.4105,
"step": 3660
},
{
"epoch": 0.6665455866327643,
"grad_norm": 15.776519775390625,
"learning_rate": 3.33363603341809e-05,
"loss": 0.5662,
"step": 3670
},
{
"epoch": 0.6683617871413003,
"grad_norm": 16.750410079956055,
"learning_rate": 3.329095532146749e-05,
"loss": 0.4221,
"step": 3680
},
{
"epoch": 0.6701779876498365,
"grad_norm": 14.167458534240723,
"learning_rate": 3.324555030875409e-05,
"loss": 0.3916,
"step": 3690
},
{
"epoch": 0.6719941881583726,
"grad_norm": 12.054675102233887,
"learning_rate": 3.320014529604069e-05,
"loss": 0.4332,
"step": 3700
},
{
"epoch": 0.6738103886669088,
"grad_norm": 17.444786071777344,
"learning_rate": 3.315474028332728e-05,
"loss": 0.4895,
"step": 3710
},
{
"epoch": 0.6756265891754449,
"grad_norm": 12.62495231628418,
"learning_rate": 3.310933527061388e-05,
"loss": 0.484,
"step": 3720
},
{
"epoch": 0.6774427896839811,
"grad_norm": 17.694808959960938,
"learning_rate": 3.3063930257900474e-05,
"loss": 0.5494,
"step": 3730
},
{
"epoch": 0.6792589901925172,
"grad_norm": 9.741250038146973,
"learning_rate": 3.3018525245187074e-05,
"loss": 0.5297,
"step": 3740
},
{
"epoch": 0.6810751907010534,
"grad_norm": 9.227933883666992,
"learning_rate": 3.297312023247367e-05,
"loss": 0.4591,
"step": 3750
},
{
"epoch": 0.6828913912095895,
"grad_norm": 22.44287109375,
"learning_rate": 3.2927715219760266e-05,
"loss": 0.5248,
"step": 3760
},
{
"epoch": 0.6847075917181257,
"grad_norm": 19.741558074951172,
"learning_rate": 3.288231020704686e-05,
"loss": 0.532,
"step": 3770
},
{
"epoch": 0.6865237922266618,
"grad_norm": 21.53546142578125,
"learning_rate": 3.283690519433346e-05,
"loss": 0.5514,
"step": 3780
},
{
"epoch": 0.688339992735198,
"grad_norm": 16.261137008666992,
"learning_rate": 3.279150018162005e-05,
"loss": 0.5721,
"step": 3790
},
{
"epoch": 0.6901561932437341,
"grad_norm": 7.155134677886963,
"learning_rate": 3.274609516890665e-05,
"loss": 0.4068,
"step": 3800
},
{
"epoch": 0.6919723937522703,
"grad_norm": 8.603271484375,
"learning_rate": 3.270069015619325e-05,
"loss": 0.3959,
"step": 3810
},
{
"epoch": 0.6937885942608064,
"grad_norm": 18.37700080871582,
"learning_rate": 3.2655285143479844e-05,
"loss": 0.5014,
"step": 3820
},
{
"epoch": 0.6956047947693426,
"grad_norm": 13.100898742675781,
"learning_rate": 3.2609880130766444e-05,
"loss": 0.5567,
"step": 3830
},
{
"epoch": 0.6974209952778787,
"grad_norm": 13.200430870056152,
"learning_rate": 3.2564475118053036e-05,
"loss": 0.4679,
"step": 3840
},
{
"epoch": 0.6992371957864149,
"grad_norm": 12.253862380981445,
"learning_rate": 3.2519070105339636e-05,
"loss": 0.4516,
"step": 3850
},
{
"epoch": 0.701053396294951,
"grad_norm": 6.870277404785156,
"learning_rate": 3.247366509262623e-05,
"loss": 0.5378,
"step": 3860
},
{
"epoch": 0.7028695968034872,
"grad_norm": 14.495081901550293,
"learning_rate": 3.242826007991283e-05,
"loss": 0.5009,
"step": 3870
},
{
"epoch": 0.7046857973120233,
"grad_norm": 3.442812919616699,
"learning_rate": 3.238285506719942e-05,
"loss": 0.4108,
"step": 3880
},
{
"epoch": 0.7065019978205593,
"grad_norm": 5.441460609436035,
"learning_rate": 3.233745005448602e-05,
"loss": 0.3479,
"step": 3890
},
{
"epoch": 0.7083181983290955,
"grad_norm": 12.709874153137207,
"learning_rate": 3.2292045041772614e-05,
"loss": 0.3864,
"step": 3900
},
{
"epoch": 0.7083181983290955,
"eval_accuracy": 0.8105004101722724,
"eval_f1": 0.799452724152361,
"eval_loss": 0.5857027769088745,
"eval_precision": 0.7947749172284474,
"eval_recall": 0.8158609235209215,
"eval_runtime": 12.0674,
"eval_samples_per_second": 101.016,
"eval_steps_per_second": 6.381,
"step": 3900
},
{
"epoch": 0.7101343988376316,
"grad_norm": 10.219947814941406,
"learning_rate": 3.2246640029059214e-05,
"loss": 0.6109,
"step": 3910
},
{
"epoch": 0.7119505993461678,
"grad_norm": 22.518009185791016,
"learning_rate": 3.2201235016345806e-05,
"loss": 0.5354,
"step": 3920
},
{
"epoch": 0.7137667998547039,
"grad_norm": 9.188867568969727,
"learning_rate": 3.2155830003632406e-05,
"loss": 0.5216,
"step": 3930
},
{
"epoch": 0.7155830003632401,
"grad_norm": 16.9005184173584,
"learning_rate": 3.2110424990919e-05,
"loss": 0.4771,
"step": 3940
},
{
"epoch": 0.7173992008717762,
"grad_norm": 14.375580787658691,
"learning_rate": 3.20650199782056e-05,
"loss": 0.4197,
"step": 3950
},
{
"epoch": 0.7192154013803124,
"grad_norm": 14.258020401000977,
"learning_rate": 3.201961496549219e-05,
"loss": 0.4487,
"step": 3960
},
{
"epoch": 0.7210316018888485,
"grad_norm": 11.470094680786133,
"learning_rate": 3.197420995277879e-05,
"loss": 0.4176,
"step": 3970
},
{
"epoch": 0.7228478023973847,
"grad_norm": 12.606728553771973,
"learning_rate": 3.1928804940065384e-05,
"loss": 0.5001,
"step": 3980
},
{
"epoch": 0.7246640029059208,
"grad_norm": 25.704116821289062,
"learning_rate": 3.1883399927351983e-05,
"loss": 0.5424,
"step": 3990
},
{
"epoch": 0.726480203414457,
"grad_norm": 15.872344017028809,
"learning_rate": 3.183799491463858e-05,
"loss": 0.589,
"step": 4000
},
{
"epoch": 0.7282964039229931,
"grad_norm": 8.058246612548828,
"learning_rate": 3.1792589901925176e-05,
"loss": 0.4356,
"step": 4010
},
{
"epoch": 0.7301126044315293,
"grad_norm": 18.3121337890625,
"learning_rate": 3.1747184889211776e-05,
"loss": 0.4245,
"step": 4020
},
{
"epoch": 0.7319288049400654,
"grad_norm": 13.85145378112793,
"learning_rate": 3.170177987649837e-05,
"loss": 0.4336,
"step": 4030
},
{
"epoch": 0.7337450054486016,
"grad_norm": 11.043869018554688,
"learning_rate": 3.165637486378497e-05,
"loss": 0.4433,
"step": 4040
},
{
"epoch": 0.7355612059571377,
"grad_norm": 14.299675941467285,
"learning_rate": 3.161096985107156e-05,
"loss": 0.4482,
"step": 4050
},
{
"epoch": 0.7373774064656738,
"grad_norm": 9.129308700561523,
"learning_rate": 3.156556483835816e-05,
"loss": 0.5591,
"step": 4060
},
{
"epoch": 0.73919360697421,
"grad_norm": 15.059881210327148,
"learning_rate": 3.1520159825644753e-05,
"loss": 0.4043,
"step": 4070
},
{
"epoch": 0.7410098074827461,
"grad_norm": 14.52391242980957,
"learning_rate": 3.147475481293135e-05,
"loss": 0.4029,
"step": 4080
},
{
"epoch": 0.7428260079912823,
"grad_norm": 14.165828704833984,
"learning_rate": 3.1429349800217946e-05,
"loss": 0.5121,
"step": 4090
},
{
"epoch": 0.7446422084998183,
"grad_norm": 19.52725601196289,
"learning_rate": 3.1383944787504546e-05,
"loss": 0.4375,
"step": 4100
},
{
"epoch": 0.7464584090083545,
"grad_norm": 18.168001174926758,
"learning_rate": 3.133853977479114e-05,
"loss": 0.541,
"step": 4110
},
{
"epoch": 0.7482746095168906,
"grad_norm": 23.436870574951172,
"learning_rate": 3.129313476207774e-05,
"loss": 0.5703,
"step": 4120
},
{
"epoch": 0.7500908100254268,
"grad_norm": 16.01010513305664,
"learning_rate": 3.124772974936433e-05,
"loss": 0.4326,
"step": 4130
},
{
"epoch": 0.7519070105339629,
"grad_norm": 15.457175254821777,
"learning_rate": 3.120232473665093e-05,
"loss": 0.379,
"step": 4140
},
{
"epoch": 0.7537232110424991,
"grad_norm": 17.524295806884766,
"learning_rate": 3.1156919723937523e-05,
"loss": 0.392,
"step": 4150
},
{
"epoch": 0.7555394115510352,
"grad_norm": 19.16515350341797,
"learning_rate": 3.111151471122412e-05,
"loss": 0.5474,
"step": 4160
},
{
"epoch": 0.7573556120595714,
"grad_norm": 12.622529029846191,
"learning_rate": 3.1066109698510716e-05,
"loss": 0.4343,
"step": 4170
},
{
"epoch": 0.7591718125681075,
"grad_norm": 12.761943817138672,
"learning_rate": 3.1020704685797316e-05,
"loss": 0.4281,
"step": 4180
},
{
"epoch": 0.7609880130766437,
"grad_norm": 15.795944213867188,
"learning_rate": 3.097529967308391e-05,
"loss": 0.4434,
"step": 4190
},
{
"epoch": 0.7628042135851798,
"grad_norm": 6.286984920501709,
"learning_rate": 3.092989466037051e-05,
"loss": 0.4196,
"step": 4200
},
{
"epoch": 0.7628042135851798,
"eval_accuracy": 0.8105004101722724,
"eval_f1": 0.8031176631002425,
"eval_loss": 0.5337262749671936,
"eval_precision": 0.8017654292844819,
"eval_recall": 0.811221918165322,
"eval_runtime": 12.0674,
"eval_samples_per_second": 101.016,
"eval_steps_per_second": 6.381,
"step": 4200
},
{
"epoch": 0.764620414093716,
"grad_norm": 53.68039321899414,
"learning_rate": 3.088448964765711e-05,
"loss": 0.5151,
"step": 4210
},
{
"epoch": 0.7664366146022521,
"grad_norm": 10.041727066040039,
"learning_rate": 3.08390846349437e-05,
"loss": 0.4146,
"step": 4220
},
{
"epoch": 0.7682528151107882,
"grad_norm": 25.997821807861328,
"learning_rate": 3.07936796222303e-05,
"loss": 0.556,
"step": 4230
},
{
"epoch": 0.7700690156193244,
"grad_norm": 13.25404167175293,
"learning_rate": 3.074827460951689e-05,
"loss": 0.513,
"step": 4240
},
{
"epoch": 0.7718852161278605,
"grad_norm": 23.45793342590332,
"learning_rate": 3.070286959680349e-05,
"loss": 0.4953,
"step": 4250
},
{
"epoch": 0.7737014166363967,
"grad_norm": 18.79665756225586,
"learning_rate": 3.0657464584090086e-05,
"loss": 0.4196,
"step": 4260
},
{
"epoch": 0.7755176171449328,
"grad_norm": 15.050500869750977,
"learning_rate": 3.0612059571376685e-05,
"loss": 0.3523,
"step": 4270
},
{
"epoch": 0.777333817653469,
"grad_norm": 17.48199462890625,
"learning_rate": 3.056665455866328e-05,
"loss": 0.4707,
"step": 4280
},
{
"epoch": 0.7791500181620051,
"grad_norm": 12.54255199432373,
"learning_rate": 3.052124954594988e-05,
"loss": 0.3119,
"step": 4290
},
{
"epoch": 0.7809662186705413,
"grad_norm": 19.040857315063477,
"learning_rate": 3.047584453323647e-05,
"loss": 0.3681,
"step": 4300
},
{
"epoch": 0.7827824191790773,
"grad_norm": 22.418601989746094,
"learning_rate": 3.043043952052307e-05,
"loss": 0.4745,
"step": 4310
},
{
"epoch": 0.7845986196876135,
"grad_norm": 19.793771743774414,
"learning_rate": 3.0385034507809663e-05,
"loss": 0.4836,
"step": 4320
},
{
"epoch": 0.7864148201961496,
"grad_norm": 10.669327735900879,
"learning_rate": 3.0339629495096263e-05,
"loss": 0.4829,
"step": 4330
},
{
"epoch": 0.7882310207046858,
"grad_norm": 22.390172958374023,
"learning_rate": 3.0294224482382856e-05,
"loss": 0.6336,
"step": 4340
},
{
"epoch": 0.7900472212132219,
"grad_norm": 15.641258239746094,
"learning_rate": 3.0248819469669455e-05,
"loss": 0.5083,
"step": 4350
},
{
"epoch": 0.791863421721758,
"grad_norm": 20.096162796020508,
"learning_rate": 3.0203414456956048e-05,
"loss": 0.5094,
"step": 4360
},
{
"epoch": 0.7936796222302942,
"grad_norm": 22.68675422668457,
"learning_rate": 3.0158009444242648e-05,
"loss": 0.5245,
"step": 4370
},
{
"epoch": 0.7954958227388303,
"grad_norm": 29.42097282409668,
"learning_rate": 3.011260443152924e-05,
"loss": 0.4459,
"step": 4380
},
{
"epoch": 0.7973120232473665,
"grad_norm": 19.64771842956543,
"learning_rate": 3.006719941881584e-05,
"loss": 0.4514,
"step": 4390
},
{
"epoch": 0.7991282237559026,
"grad_norm": 21.052167892456055,
"learning_rate": 3.0021794406102433e-05,
"loss": 0.4875,
"step": 4400
},
{
"epoch": 0.8009444242644388,
"grad_norm": 22.617921829223633,
"learning_rate": 2.9976389393389033e-05,
"loss": 0.5775,
"step": 4410
},
{
"epoch": 0.8027606247729749,
"grad_norm": 18.567598342895508,
"learning_rate": 2.9930984380675632e-05,
"loss": 0.3457,
"step": 4420
},
{
"epoch": 0.8045768252815111,
"grad_norm": 13.792886734008789,
"learning_rate": 2.9885579367962225e-05,
"loss": 0.3967,
"step": 4430
},
{
"epoch": 0.8063930257900472,
"grad_norm": 24.021446228027344,
"learning_rate": 2.9840174355248825e-05,
"loss": 0.5235,
"step": 4440
},
{
"epoch": 0.8082092262985834,
"grad_norm": 14.933148384094238,
"learning_rate": 2.9794769342535418e-05,
"loss": 0.5006,
"step": 4450
},
{
"epoch": 0.8100254268071195,
"grad_norm": 18.85728645324707,
"learning_rate": 2.9749364329822017e-05,
"loss": 0.408,
"step": 4460
},
{
"epoch": 0.8118416273156557,
"grad_norm": 18.168296813964844,
"learning_rate": 2.970395931710861e-05,
"loss": 0.5026,
"step": 4470
},
{
"epoch": 0.8136578278241918,
"grad_norm": 12.585858345031738,
"learning_rate": 2.965855430439521e-05,
"loss": 0.3576,
"step": 4480
},
{
"epoch": 0.815474028332728,
"grad_norm": 21.00431251525879,
"learning_rate": 2.9613149291681803e-05,
"loss": 0.494,
"step": 4490
},
{
"epoch": 0.8172902288412641,
"grad_norm": 32.093345642089844,
"learning_rate": 2.9567744278968402e-05,
"loss": 0.4508,
"step": 4500
},
{
"epoch": 0.8172902288412641,
"eval_accuracy": 0.8162428219852338,
"eval_f1": 0.8092636477145345,
"eval_loss": 0.4987526535987854,
"eval_precision": 0.8023527484407372,
"eval_recall": 0.8208478993572457,
"eval_runtime": 12.0751,
"eval_samples_per_second": 100.951,
"eval_steps_per_second": 6.377,
"step": 4500
},
{
"epoch": 0.8191064293498003,
"grad_norm": 9.480962753295898,
"learning_rate": 2.9522339266254995e-05,
"loss": 0.3415,
"step": 4510
},
{
"epoch": 0.8209226298583363,
"grad_norm": 21.77303123474121,
"learning_rate": 2.9476934253541595e-05,
"loss": 0.5777,
"step": 4520
},
{
"epoch": 0.8227388303668725,
"grad_norm": 17.37676429748535,
"learning_rate": 2.9431529240828188e-05,
"loss": 0.5402,
"step": 4530
},
{
"epoch": 0.8245550308754086,
"grad_norm": 8.430058479309082,
"learning_rate": 2.9386124228114787e-05,
"loss": 0.5605,
"step": 4540
},
{
"epoch": 0.8263712313839447,
"grad_norm": 11.52684211730957,
"learning_rate": 2.934071921540138e-05,
"loss": 0.4402,
"step": 4550
},
{
"epoch": 0.8281874318924809,
"grad_norm": 15.570836067199707,
"learning_rate": 2.929531420268798e-05,
"loss": 0.4715,
"step": 4560
},
{
"epoch": 0.830003632401017,
"grad_norm": 17.31182289123535,
"learning_rate": 2.9249909189974573e-05,
"loss": 0.4377,
"step": 4570
},
{
"epoch": 0.8318198329095532,
"grad_norm": 17.72749900817871,
"learning_rate": 2.9204504177261172e-05,
"loss": 0.574,
"step": 4580
},
{
"epoch": 0.8336360334180893,
"grad_norm": 4.865232467651367,
"learning_rate": 2.9159099164547765e-05,
"loss": 0.4036,
"step": 4590
},
{
"epoch": 0.8354522339266255,
"grad_norm": 13.070740699768066,
"learning_rate": 2.9113694151834365e-05,
"loss": 0.4395,
"step": 4600
},
{
"epoch": 0.8372684344351616,
"grad_norm": 11.224090576171875,
"learning_rate": 2.9068289139120958e-05,
"loss": 0.5105,
"step": 4610
},
{
"epoch": 0.8390846349436978,
"grad_norm": 10.014636039733887,
"learning_rate": 2.9022884126407557e-05,
"loss": 0.3832,
"step": 4620
},
{
"epoch": 0.8409008354522339,
"grad_norm": 16.736953735351562,
"learning_rate": 2.8977479113694157e-05,
"loss": 0.5902,
"step": 4630
},
{
"epoch": 0.8427170359607701,
"grad_norm": 19.752222061157227,
"learning_rate": 2.893207410098075e-05,
"loss": 0.4689,
"step": 4640
},
{
"epoch": 0.8445332364693062,
"grad_norm": 21.56574058532715,
"learning_rate": 2.888666908826735e-05,
"loss": 0.424,
"step": 4650
},
{
"epoch": 0.8463494369778424,
"grad_norm": 7.042162895202637,
"learning_rate": 2.8841264075553942e-05,
"loss": 0.4833,
"step": 4660
},
{
"epoch": 0.8481656374863785,
"grad_norm": 28.993854522705078,
"learning_rate": 2.8795859062840542e-05,
"loss": 0.549,
"step": 4670
},
{
"epoch": 0.8499818379949147,
"grad_norm": 10.317886352539062,
"learning_rate": 2.8750454050127135e-05,
"loss": 0.5293,
"step": 4680
},
{
"epoch": 0.8517980385034508,
"grad_norm": 17.66384506225586,
"learning_rate": 2.8705049037413734e-05,
"loss": 0.4197,
"step": 4690
},
{
"epoch": 0.853614239011987,
"grad_norm": 24.199743270874023,
"learning_rate": 2.8659644024700327e-05,
"loss": 0.4935,
"step": 4700
},
{
"epoch": 0.8554304395205231,
"grad_norm": 6.78064489364624,
"learning_rate": 2.8614239011986927e-05,
"loss": 0.4021,
"step": 4710
},
{
"epoch": 0.8572466400290593,
"grad_norm": 12.244144439697266,
"learning_rate": 2.856883399927352e-05,
"loss": 0.487,
"step": 4720
},
{
"epoch": 0.8590628405375953,
"grad_norm": 13.118432998657227,
"learning_rate": 2.852342898656012e-05,
"loss": 0.4862,
"step": 4730
},
{
"epoch": 0.8608790410461314,
"grad_norm": 12.08948040008545,
"learning_rate": 2.8478023973846712e-05,
"loss": 0.5039,
"step": 4740
},
{
"epoch": 0.8626952415546676,
"grad_norm": 18.59223175048828,
"learning_rate": 2.8432618961133312e-05,
"loss": 0.3387,
"step": 4750
},
{
"epoch": 0.8645114420632037,
"grad_norm": 16.462608337402344,
"learning_rate": 2.8387213948419905e-05,
"loss": 0.4173,
"step": 4760
},
{
"epoch": 0.8663276425717399,
"grad_norm": 19.474824905395508,
"learning_rate": 2.8341808935706504e-05,
"loss": 0.4793,
"step": 4770
},
{
"epoch": 0.868143843080276,
"grad_norm": 9.90221118927002,
"learning_rate": 2.8296403922993097e-05,
"loss": 0.4677,
"step": 4780
},
{
"epoch": 0.8699600435888122,
"grad_norm": 18.47876739501953,
"learning_rate": 2.8250998910279697e-05,
"loss": 0.3815,
"step": 4790
},
{
"epoch": 0.8717762440973483,
"grad_norm": 16.144685745239258,
"learning_rate": 2.820559389756629e-05,
"loss": 0.5303,
"step": 4800
},
{
"epoch": 0.8717762440973483,
"eval_accuracy": 0.8351107465135357,
"eval_f1": 0.8281745117415222,
"eval_loss": 0.48171207308769226,
"eval_precision": 0.8221184344103021,
"eval_recall": 0.8358880640041798,
"eval_runtime": 12.0571,
"eval_samples_per_second": 101.102,
"eval_steps_per_second": 6.386,
"step": 4800
},
{
"epoch": 0.8735924446058845,
"grad_norm": 15.96438980102539,
"learning_rate": 2.816018888485289e-05,
"loss": 0.4498,
"step": 4810
},
{
"epoch": 0.8754086451144206,
"grad_norm": 19.435787200927734,
"learning_rate": 2.8114783872139482e-05,
"loss": 0.3781,
"step": 4820
},
{
"epoch": 0.8772248456229568,
"grad_norm": 17.200559616088867,
"learning_rate": 2.8069378859426082e-05,
"loss": 0.4631,
"step": 4830
},
{
"epoch": 0.8790410461314929,
"grad_norm": 12.658839225769043,
"learning_rate": 2.802397384671268e-05,
"loss": 0.4265,
"step": 4840
},
{
"epoch": 0.8808572466400291,
"grad_norm": 7.684325695037842,
"learning_rate": 2.7978568833999274e-05,
"loss": 0.4284,
"step": 4850
},
{
"epoch": 0.8826734471485652,
"grad_norm": 12.922738075256348,
"learning_rate": 2.7933163821285874e-05,
"loss": 0.3229,
"step": 4860
},
{
"epoch": 0.8844896476571014,
"grad_norm": 23.311817169189453,
"learning_rate": 2.7887758808572467e-05,
"loss": 0.4021,
"step": 4870
},
{
"epoch": 0.8863058481656375,
"grad_norm": 14.780502319335938,
"learning_rate": 2.7842353795859067e-05,
"loss": 0.4722,
"step": 4880
},
{
"epoch": 0.8881220486741737,
"grad_norm": 12.9520902633667,
"learning_rate": 2.779694878314566e-05,
"loss": 0.3892,
"step": 4890
},
{
"epoch": 0.8899382491827098,
"grad_norm": 22.6149845123291,
"learning_rate": 2.775154377043226e-05,
"loss": 0.5177,
"step": 4900
},
{
"epoch": 0.891754449691246,
"grad_norm": 19.335704803466797,
"learning_rate": 2.7706138757718852e-05,
"loss": 0.4524,
"step": 4910
},
{
"epoch": 0.8935706501997821,
"grad_norm": 11.706412315368652,
"learning_rate": 2.766073374500545e-05,
"loss": 0.3219,
"step": 4920
},
{
"epoch": 0.8953868507083182,
"grad_norm": 24.9698543548584,
"learning_rate": 2.7615328732292044e-05,
"loss": 0.4699,
"step": 4930
},
{
"epoch": 0.8972030512168543,
"grad_norm": 22.411867141723633,
"learning_rate": 2.7569923719578644e-05,
"loss": 0.4928,
"step": 4940
},
{
"epoch": 0.8990192517253904,
"grad_norm": 15.261788368225098,
"learning_rate": 2.7524518706865237e-05,
"loss": 0.4825,
"step": 4950
},
{
"epoch": 0.9008354522339266,
"grad_norm": 3.8997724056243896,
"learning_rate": 2.7479113694151837e-05,
"loss": 0.3152,
"step": 4960
},
{
"epoch": 0.9026516527424627,
"grad_norm": 17.017913818359375,
"learning_rate": 2.743370868143843e-05,
"loss": 0.5825,
"step": 4970
},
{
"epoch": 0.9044678532509989,
"grad_norm": 12.277453422546387,
"learning_rate": 2.738830366872503e-05,
"loss": 0.4168,
"step": 4980
},
{
"epoch": 0.906284053759535,
"grad_norm": 10.170480728149414,
"learning_rate": 2.7342898656011622e-05,
"loss": 0.3428,
"step": 4990
},
{
"epoch": 0.9081002542680712,
"grad_norm": 17.166027069091797,
"learning_rate": 2.729749364329822e-05,
"loss": 0.3343,
"step": 5000
},
{
"epoch": 0.9099164547766073,
"grad_norm": 13.139386177062988,
"learning_rate": 2.7252088630584814e-05,
"loss": 0.3653,
"step": 5010
},
{
"epoch": 0.9117326552851435,
"grad_norm": 17.63907814025879,
"learning_rate": 2.7206683617871414e-05,
"loss": 0.4842,
"step": 5020
},
{
"epoch": 0.9135488557936796,
"grad_norm": 11.699908256530762,
"learning_rate": 2.7161278605158007e-05,
"loss": 0.5177,
"step": 5030
},
{
"epoch": 0.9153650563022158,
"grad_norm": 13.266210556030273,
"learning_rate": 2.7115873592444607e-05,
"loss": 0.4534,
"step": 5040
},
{
"epoch": 0.9171812568107519,
"grad_norm": 9.60502815246582,
"learning_rate": 2.7070468579731206e-05,
"loss": 0.3299,
"step": 5050
},
{
"epoch": 0.9189974573192881,
"grad_norm": 17.09486198425293,
"learning_rate": 2.70250635670178e-05,
"loss": 0.4957,
"step": 5060
},
{
"epoch": 0.9208136578278242,
"grad_norm": 17.392698287963867,
"learning_rate": 2.69796585543044e-05,
"loss": 0.4678,
"step": 5070
},
{
"epoch": 0.9226298583363604,
"grad_norm": 17.839717864990234,
"learning_rate": 2.693425354159099e-05,
"loss": 0.5732,
"step": 5080
},
{
"epoch": 0.9244460588448965,
"grad_norm": 14.015562057495117,
"learning_rate": 2.688884852887759e-05,
"loss": 0.3867,
"step": 5090
},
{
"epoch": 0.9262622593534326,
"grad_norm": 13.306390762329102,
"learning_rate": 2.6843443516164184e-05,
"loss": 0.4422,
"step": 5100
},
{
"epoch": 0.9262622593534326,
"eval_accuracy": 0.8236259228876128,
"eval_f1": 0.8174273731975736,
"eval_loss": 0.5039647221565247,
"eval_precision": 0.8178974051720215,
"eval_recall": 0.8269853120695915,
"eval_runtime": 12.0903,
"eval_samples_per_second": 100.824,
"eval_steps_per_second": 6.369,
"step": 5100
},
{
"epoch": 0.9280784598619688,
"grad_norm": 15.518729209899902,
"learning_rate": 2.6798038503450784e-05,
"loss": 0.5069,
"step": 5110
},
{
"epoch": 0.9298946603705049,
"grad_norm": 20.26007843017578,
"learning_rate": 2.6752633490737377e-05,
"loss": 0.5047,
"step": 5120
},
{
"epoch": 0.9317108608790411,
"grad_norm": 10.854071617126465,
"learning_rate": 2.6707228478023976e-05,
"loss": 0.5326,
"step": 5130
},
{
"epoch": 0.9335270613875772,
"grad_norm": 12.248214721679688,
"learning_rate": 2.666182346531057e-05,
"loss": 0.4037,
"step": 5140
},
{
"epoch": 0.9353432618961133,
"grad_norm": 9.578265190124512,
"learning_rate": 2.661641845259717e-05,
"loss": 0.3088,
"step": 5150
},
{
"epoch": 0.9371594624046494,
"grad_norm": 19.308855056762695,
"learning_rate": 2.657101343988376e-05,
"loss": 0.391,
"step": 5160
},
{
"epoch": 0.9389756629131856,
"grad_norm": 16.941064834594727,
"learning_rate": 2.652560842717036e-05,
"loss": 0.3419,
"step": 5170
},
{
"epoch": 0.9407918634217217,
"grad_norm": 21.8260440826416,
"learning_rate": 2.6480203414456954e-05,
"loss": 0.671,
"step": 5180
},
{
"epoch": 0.9426080639302579,
"grad_norm": 22.408531188964844,
"learning_rate": 2.6434798401743554e-05,
"loss": 0.4909,
"step": 5190
},
{
"epoch": 0.944424264438794,
"grad_norm": 21.8310546875,
"learning_rate": 2.6389393389030147e-05,
"loss": 0.4541,
"step": 5200
},
{
"epoch": 0.9462404649473302,
"grad_norm": 21.634668350219727,
"learning_rate": 2.6343988376316746e-05,
"loss": 0.3987,
"step": 5210
},
{
"epoch": 0.9480566654558663,
"grad_norm": 16.159862518310547,
"learning_rate": 2.6298583363603342e-05,
"loss": 0.5047,
"step": 5220
},
{
"epoch": 0.9498728659644025,
"grad_norm": 10.970589637756348,
"learning_rate": 2.625317835088994e-05,
"loss": 0.4124,
"step": 5230
},
{
"epoch": 0.9516890664729386,
"grad_norm": 19.680744171142578,
"learning_rate": 2.6207773338176535e-05,
"loss": 0.5401,
"step": 5240
},
{
"epoch": 0.9535052669814748,
"grad_norm": 10.408095359802246,
"learning_rate": 2.616236832546313e-05,
"loss": 0.4913,
"step": 5250
},
{
"epoch": 0.9553214674900109,
"grad_norm": 8.389443397521973,
"learning_rate": 2.611696331274973e-05,
"loss": 0.3567,
"step": 5260
},
{
"epoch": 0.957137667998547,
"grad_norm": 11.20021915435791,
"learning_rate": 2.6071558300036324e-05,
"loss": 0.3231,
"step": 5270
},
{
"epoch": 0.9589538685070832,
"grad_norm": 20.043296813964844,
"learning_rate": 2.6026153287322923e-05,
"loss": 0.6047,
"step": 5280
},
{
"epoch": 0.9607700690156193,
"grad_norm": 20.225339889526367,
"learning_rate": 2.5980748274609516e-05,
"loss": 0.442,
"step": 5290
},
{
"epoch": 0.9625862695241555,
"grad_norm": 14.611661911010742,
"learning_rate": 2.5935343261896116e-05,
"loss": 0.4598,
"step": 5300
},
{
"epoch": 0.9644024700326916,
"grad_norm": 10.53466510772705,
"learning_rate": 2.588993824918271e-05,
"loss": 0.3438,
"step": 5310
},
{
"epoch": 0.9662186705412278,
"grad_norm": 23.299837112426758,
"learning_rate": 2.5844533236469308e-05,
"loss": 0.3867,
"step": 5320
},
{
"epoch": 0.9680348710497639,
"grad_norm": 13.314847946166992,
"learning_rate": 2.5799128223755905e-05,
"loss": 0.5519,
"step": 5330
},
{
"epoch": 0.9698510715583001,
"grad_norm": 10.600733757019043,
"learning_rate": 2.57537232110425e-05,
"loss": 0.4463,
"step": 5340
},
{
"epoch": 0.9716672720668362,
"grad_norm": 23.0856990814209,
"learning_rate": 2.5708318198329097e-05,
"loss": 0.4518,
"step": 5350
},
{
"epoch": 0.9734834725753723,
"grad_norm": 16.995450973510742,
"learning_rate": 2.5662913185615693e-05,
"loss": 0.4835,
"step": 5360
},
{
"epoch": 0.9752996730839084,
"grad_norm": 13.805352210998535,
"learning_rate": 2.561750817290229e-05,
"loss": 0.4145,
"step": 5370
},
{
"epoch": 0.9771158735924446,
"grad_norm": 7.604394435882568,
"learning_rate": 2.5572103160188886e-05,
"loss": 0.3946,
"step": 5380
},
{
"epoch": 0.9789320741009807,
"grad_norm": 12.790209770202637,
"learning_rate": 2.5526698147475482e-05,
"loss": 0.4734,
"step": 5390
},
{
"epoch": 0.9807482746095169,
"grad_norm": 13.206761360168457,
"learning_rate": 2.5481293134762078e-05,
"loss": 0.4081,
"step": 5400
},
{
"epoch": 0.9807482746095169,
"eval_accuracy": 0.8244462674323215,
"eval_f1": 0.8170131934114577,
"eval_loss": 0.4968956708908081,
"eval_precision": 0.8102785727429149,
"eval_recall": 0.8306898382408164,
"eval_runtime": 12.0822,
"eval_samples_per_second": 100.892,
"eval_steps_per_second": 6.373,
"step": 5400
},
{
"epoch": 0.982564475118053,
"grad_norm": 11.270719528198242,
"learning_rate": 2.5435888122048675e-05,
"loss": 0.4856,
"step": 5410
},
{
"epoch": 0.9843806756265892,
"grad_norm": 8.846707344055176,
"learning_rate": 2.539048310933527e-05,
"loss": 0.4354,
"step": 5420
},
{
"epoch": 0.9861968761351253,
"grad_norm": 22.749967575073242,
"learning_rate": 2.5345078096621867e-05,
"loss": 0.479,
"step": 5430
},
{
"epoch": 0.9880130766436614,
"grad_norm": 16.036691665649414,
"learning_rate": 2.5299673083908467e-05,
"loss": 0.4042,
"step": 5440
},
{
"epoch": 0.9898292771521976,
"grad_norm": 13.034772872924805,
"learning_rate": 2.525426807119506e-05,
"loss": 0.4203,
"step": 5450
},
{
"epoch": 0.9916454776607337,
"grad_norm": 16.55560302734375,
"learning_rate": 2.520886305848166e-05,
"loss": 0.325,
"step": 5460
},
{
"epoch": 0.9934616781692699,
"grad_norm": 17.726268768310547,
"learning_rate": 2.5163458045768255e-05,
"loss": 0.5221,
"step": 5470
},
{
"epoch": 0.995277878677806,
"grad_norm": 16.85834503173828,
"learning_rate": 2.511805303305485e-05,
"loss": 0.4584,
"step": 5480
},
{
"epoch": 0.9970940791863422,
"grad_norm": 20.59090232849121,
"learning_rate": 2.5072648020341448e-05,
"loss": 0.687,
"step": 5490
},
{
"epoch": 0.9989102796948783,
"grad_norm": 11.305227279663086,
"learning_rate": 2.5027243007628044e-05,
"loss": 0.3293,
"step": 5500
},
{
"epoch": 1.0007264802034144,
"grad_norm": 5.661748886108398,
"learning_rate": 2.498183799491464e-05,
"loss": 0.3034,
"step": 5510
},
{
"epoch": 1.0025426807119506,
"grad_norm": 14.591137886047363,
"learning_rate": 2.4936432982201237e-05,
"loss": 0.2698,
"step": 5520
},
{
"epoch": 1.0043588812204867,
"grad_norm": 18.574607849121094,
"learning_rate": 2.4891027969487833e-05,
"loss": 0.3438,
"step": 5530
},
{
"epoch": 1.006175081729023,
"grad_norm": 12.30125904083252,
"learning_rate": 2.484562295677443e-05,
"loss": 0.3503,
"step": 5540
},
{
"epoch": 1.007991282237559,
"grad_norm": 25.125314712524414,
"learning_rate": 2.4800217944061025e-05,
"loss": 0.3518,
"step": 5550
},
{
"epoch": 1.0098074827460952,
"grad_norm": 10.978049278259277,
"learning_rate": 2.475481293134762e-05,
"loss": 0.3292,
"step": 5560
},
{
"epoch": 1.0116236832546313,
"grad_norm": 9.796133995056152,
"learning_rate": 2.470940791863422e-05,
"loss": 0.2946,
"step": 5570
},
{
"epoch": 1.0134398837631675,
"grad_norm": 28.557024002075195,
"learning_rate": 2.4664002905920818e-05,
"loss": 0.4914,
"step": 5580
},
{
"epoch": 1.0152560842717036,
"grad_norm": 15.893627166748047,
"learning_rate": 2.4618597893207414e-05,
"loss": 0.3519,
"step": 5590
},
{
"epoch": 1.0170722847802398,
"grad_norm": 15.343416213989258,
"learning_rate": 2.457319288049401e-05,
"loss": 0.4028,
"step": 5600
},
{
"epoch": 1.0188884852887758,
"grad_norm": 9.576445579528809,
"learning_rate": 2.4527787867780606e-05,
"loss": 0.3718,
"step": 5610
},
{
"epoch": 1.020704685797312,
"grad_norm": 7.0438232421875,
"learning_rate": 2.4482382855067202e-05,
"loss": 0.191,
"step": 5620
},
{
"epoch": 1.0225208863058481,
"grad_norm": 7.872592926025391,
"learning_rate": 2.44369778423538e-05,
"loss": 0.2752,
"step": 5630
},
{
"epoch": 1.0243370868143844,
"grad_norm": 19.647167205810547,
"learning_rate": 2.4391572829640395e-05,
"loss": 0.3226,
"step": 5640
},
{
"epoch": 1.0261532873229204,
"grad_norm": 7.968392848968506,
"learning_rate": 2.434616781692699e-05,
"loss": 0.2653,
"step": 5650
},
{
"epoch": 1.0279694878314567,
"grad_norm": 17.07377052307129,
"learning_rate": 2.4300762804213587e-05,
"loss": 0.3296,
"step": 5660
},
{
"epoch": 1.0297856883399927,
"grad_norm": 18.10598373413086,
"learning_rate": 2.4255357791500184e-05,
"loss": 0.2421,
"step": 5670
},
{
"epoch": 1.0316018888485288,
"grad_norm": 10.867464065551758,
"learning_rate": 2.420995277878678e-05,
"loss": 0.301,
"step": 5680
},
{
"epoch": 1.033418089357065,
"grad_norm": 15.795087814331055,
"learning_rate": 2.4164547766073376e-05,
"loss": 0.2956,
"step": 5690
},
{
"epoch": 1.035234289865601,
"grad_norm": 19.456615447998047,
"learning_rate": 2.4119142753359972e-05,
"loss": 0.2555,
"step": 5700
},
{
"epoch": 1.035234289865601,
"eval_accuracy": 0.8285479901558654,
"eval_f1": 0.8213955767333186,
"eval_loss": 0.5004270076751709,
"eval_precision": 0.8160496038609667,
"eval_recall": 0.8316668454229845,
"eval_runtime": 12.1355,
"eval_samples_per_second": 100.449,
"eval_steps_per_second": 6.345,
"step": 5700
},
{
"epoch": 1.0370504903741373,
"grad_norm": 10.63526725769043,
"learning_rate": 2.407373774064657e-05,
"loss": 0.3511,
"step": 5710
},
{
"epoch": 1.0388666908826734,
"grad_norm": 5.104264259338379,
"learning_rate": 2.4028332727933165e-05,
"loss": 0.2357,
"step": 5720
},
{
"epoch": 1.0406828913912096,
"grad_norm": 16.865144729614258,
"learning_rate": 2.398292771521976e-05,
"loss": 0.4097,
"step": 5730
},
{
"epoch": 1.0424990918997457,
"grad_norm": 22.08740997314453,
"learning_rate": 2.3937522702506357e-05,
"loss": 0.3789,
"step": 5740
},
{
"epoch": 1.044315292408282,
"grad_norm": 28.62466812133789,
"learning_rate": 2.3892117689792954e-05,
"loss": 0.3074,
"step": 5750
},
{
"epoch": 1.046131492916818,
"grad_norm": 27.597490310668945,
"learning_rate": 2.384671267707955e-05,
"loss": 0.2929,
"step": 5760
},
{
"epoch": 1.0479476934253542,
"grad_norm": 20.991607666015625,
"learning_rate": 2.3801307664366146e-05,
"loss": 0.4398,
"step": 5770
},
{
"epoch": 1.0497638939338902,
"grad_norm": 20.590211868286133,
"learning_rate": 2.3755902651652746e-05,
"loss": 0.2788,
"step": 5780
},
{
"epoch": 1.0515800944424265,
"grad_norm": 11.073917388916016,
"learning_rate": 2.3710497638939342e-05,
"loss": 0.1954,
"step": 5790
},
{
"epoch": 1.0533962949509625,
"grad_norm": 14.334949493408203,
"learning_rate": 2.366509262622594e-05,
"loss": 0.3096,
"step": 5800
},
{
"epoch": 1.0552124954594988,
"grad_norm": 6.725550651550293,
"learning_rate": 2.3619687613512535e-05,
"loss": 0.4892,
"step": 5810
},
{
"epoch": 1.0570286959680348,
"grad_norm": 16.188831329345703,
"learning_rate": 2.357428260079913e-05,
"loss": 0.3451,
"step": 5820
},
{
"epoch": 1.058844896476571,
"grad_norm": 10.715399742126465,
"learning_rate": 2.3528877588085727e-05,
"loss": 0.4155,
"step": 5830
},
{
"epoch": 1.0606610969851071,
"grad_norm": 18.30307388305664,
"learning_rate": 2.3483472575372323e-05,
"loss": 0.3095,
"step": 5840
},
{
"epoch": 1.0624772974936434,
"grad_norm": 9.688103675842285,
"learning_rate": 2.343806756265892e-05,
"loss": 0.3121,
"step": 5850
},
{
"epoch": 1.0642934980021794,
"grad_norm": 22.700828552246094,
"learning_rate": 2.3392662549945516e-05,
"loss": 0.2068,
"step": 5860
},
{
"epoch": 1.0661096985107157,
"grad_norm": 11.1968994140625,
"learning_rate": 2.3347257537232112e-05,
"loss": 0.2767,
"step": 5870
},
{
"epoch": 1.0679258990192517,
"grad_norm": 25.934579849243164,
"learning_rate": 2.330185252451871e-05,
"loss": 0.3119,
"step": 5880
},
{
"epoch": 1.069742099527788,
"grad_norm": 7.8139824867248535,
"learning_rate": 2.3256447511805305e-05,
"loss": 0.2135,
"step": 5890
},
{
"epoch": 1.071558300036324,
"grad_norm": 18.086198806762695,
"learning_rate": 2.32110424990919e-05,
"loss": 0.4011,
"step": 5900
},
{
"epoch": 1.07337450054486,
"grad_norm": 22.85544776916504,
"learning_rate": 2.3165637486378497e-05,
"loss": 0.4042,
"step": 5910
},
{
"epoch": 1.0751907010533963,
"grad_norm": 22.67595672607422,
"learning_rate": 2.3120232473665093e-05,
"loss": 0.289,
"step": 5920
},
{
"epoch": 1.0770069015619324,
"grad_norm": 8.212250709533691,
"learning_rate": 2.307482746095169e-05,
"loss": 0.4169,
"step": 5930
},
{
"epoch": 1.0788231020704686,
"grad_norm": 22.84626007080078,
"learning_rate": 2.3029422448238286e-05,
"loss": 0.3653,
"step": 5940
},
{
"epoch": 1.0806393025790046,
"grad_norm": 13.91925048828125,
"learning_rate": 2.2984017435524882e-05,
"loss": 0.2087,
"step": 5950
},
{
"epoch": 1.082455503087541,
"grad_norm": 22.284345626831055,
"learning_rate": 2.293861242281148e-05,
"loss": 0.3626,
"step": 5960
},
{
"epoch": 1.084271703596077,
"grad_norm": 15.127870559692383,
"learning_rate": 2.2893207410098075e-05,
"loss": 0.2541,
"step": 5970
},
{
"epoch": 1.0860879041046132,
"grad_norm": 6.7608113288879395,
"learning_rate": 2.284780239738467e-05,
"loss": 0.2726,
"step": 5980
},
{
"epoch": 1.0879041046131492,
"grad_norm": 13.903280258178711,
"learning_rate": 2.280239738467127e-05,
"loss": 0.3793,
"step": 5990
},
{
"epoch": 1.0897203051216855,
"grad_norm": 15.691337585449219,
"learning_rate": 2.2756992371957867e-05,
"loss": 0.2741,
"step": 6000
},
{
"epoch": 1.0897203051216855,
"eval_accuracy": 0.8301886792452831,
"eval_f1": 0.8241579951716257,
"eval_loss": 0.46239912509918213,
"eval_precision": 0.8201764967723821,
"eval_recall": 0.8354772684817514,
"eval_runtime": 12.131,
"eval_samples_per_second": 100.486,
"eval_steps_per_second": 6.347,
"step": 6000
},
{
"epoch": 1.0915365056302215,
"grad_norm": 23.5091609954834,
"learning_rate": 2.2711587359244463e-05,
"loss": 0.3384,
"step": 6010
},
{
"epoch": 1.0933527061387578,
"grad_norm": 16.730623245239258,
"learning_rate": 2.266618234653106e-05,
"loss": 0.4145,
"step": 6020
},
{
"epoch": 1.0951689066472938,
"grad_norm": 12.925875663757324,
"learning_rate": 2.2620777333817655e-05,
"loss": 0.2396,
"step": 6030
},
{
"epoch": 1.09698510715583,
"grad_norm": 1.2459120750427246,
"learning_rate": 2.2575372321104252e-05,
"loss": 0.2662,
"step": 6040
},
{
"epoch": 1.0988013076643661,
"grad_norm": 11.28719711303711,
"learning_rate": 2.2529967308390848e-05,
"loss": 0.2615,
"step": 6050
},
{
"epoch": 1.1006175081729024,
"grad_norm": 17.44615936279297,
"learning_rate": 2.2484562295677444e-05,
"loss": 0.3594,
"step": 6060
},
{
"epoch": 1.1024337086814384,
"grad_norm": 12.629523277282715,
"learning_rate": 2.243915728296404e-05,
"loss": 0.3975,
"step": 6070
},
{
"epoch": 1.1042499091899747,
"grad_norm": 16.39533805847168,
"learning_rate": 2.2393752270250637e-05,
"loss": 0.2663,
"step": 6080
},
{
"epoch": 1.1060661096985107,
"grad_norm": 6.332333564758301,
"learning_rate": 2.2348347257537233e-05,
"loss": 0.221,
"step": 6090
},
{
"epoch": 1.1078823102070467,
"grad_norm": 8.119811058044434,
"learning_rate": 2.230294224482383e-05,
"loss": 0.2462,
"step": 6100
},
{
"epoch": 1.109698510715583,
"grad_norm": 13.648475646972656,
"learning_rate": 2.2257537232110425e-05,
"loss": 0.2634,
"step": 6110
},
{
"epoch": 1.111514711224119,
"grad_norm": 2.348459005355835,
"learning_rate": 2.2212132219397022e-05,
"loss": 0.3967,
"step": 6120
},
{
"epoch": 1.1133309117326553,
"grad_norm": 20.70992660522461,
"learning_rate": 2.2166727206683618e-05,
"loss": 0.3852,
"step": 6130
},
{
"epoch": 1.1151471122411913,
"grad_norm": 9.073955535888672,
"learning_rate": 2.2121322193970214e-05,
"loss": 0.3379,
"step": 6140
},
{
"epoch": 1.1169633127497276,
"grad_norm": 18.57855796813965,
"learning_rate": 2.207591718125681e-05,
"loss": 0.3118,
"step": 6150
},
{
"epoch": 1.1187795132582636,
"grad_norm": 13.104948043823242,
"learning_rate": 2.2030512168543407e-05,
"loss": 0.3165,
"step": 6160
},
{
"epoch": 1.1205957137668,
"grad_norm": 17.0866756439209,
"learning_rate": 2.1985107155830003e-05,
"loss": 0.3737,
"step": 6170
},
{
"epoch": 1.122411914275336,
"grad_norm": 10.766948699951172,
"learning_rate": 2.19397021431166e-05,
"loss": 0.2323,
"step": 6180
},
{
"epoch": 1.1242281147838722,
"grad_norm": 14.713004112243652,
"learning_rate": 2.1894297130403195e-05,
"loss": 0.2995,
"step": 6190
},
{
"epoch": 1.1260443152924082,
"grad_norm": 15.085183143615723,
"learning_rate": 2.1848892117689795e-05,
"loss": 0.3476,
"step": 6200
},
{
"epoch": 1.1278605158009445,
"grad_norm": 12.6240816116333,
"learning_rate": 2.180348710497639e-05,
"loss": 0.2693,
"step": 6210
},
{
"epoch": 1.1296767163094805,
"grad_norm": 16.312969207763672,
"learning_rate": 2.1758082092262988e-05,
"loss": 0.4585,
"step": 6220
},
{
"epoch": 1.1314929168180168,
"grad_norm": 4.667062282562256,
"learning_rate": 2.1712677079549584e-05,
"loss": 0.1519,
"step": 6230
},
{
"epoch": 1.1333091173265528,
"grad_norm": 10.980841636657715,
"learning_rate": 2.166727206683618e-05,
"loss": 0.3728,
"step": 6240
},
{
"epoch": 1.135125317835089,
"grad_norm": 12.60606575012207,
"learning_rate": 2.1621867054122776e-05,
"loss": 0.2508,
"step": 6250
},
{
"epoch": 1.1369415183436251,
"grad_norm": 24.3731689453125,
"learning_rate": 2.1576462041409373e-05,
"loss": 0.3333,
"step": 6260
},
{
"epoch": 1.1387577188521614,
"grad_norm": 14.52236557006836,
"learning_rate": 2.153105702869597e-05,
"loss": 0.3393,
"step": 6270
},
{
"epoch": 1.1405739193606974,
"grad_norm": 18.642431259155273,
"learning_rate": 2.1485652015982565e-05,
"loss": 0.2943,
"step": 6280
},
{
"epoch": 1.1423901198692334,
"grad_norm": 17.546001434326172,
"learning_rate": 2.144024700326916e-05,
"loss": 0.4127,
"step": 6290
},
{
"epoch": 1.1442063203777697,
"grad_norm": 17.17730712890625,
"learning_rate": 2.1394841990555758e-05,
"loss": 0.4123,
"step": 6300
},
{
"epoch": 1.1442063203777697,
"eval_accuracy": 0.8408531583264971,
"eval_f1": 0.8356285591942396,
"eval_loss": 0.4855496287345886,
"eval_precision": 0.8294180649644707,
"eval_recall": 0.8508328584053949,
"eval_runtime": 12.1359,
"eval_samples_per_second": 100.445,
"eval_steps_per_second": 6.345,
"step": 6300
},
{
"epoch": 1.146022520886306,
"grad_norm": 12.82174301147461,
"learning_rate": 2.1349436977842354e-05,
"loss": 0.222,
"step": 6310
},
{
"epoch": 1.147838721394842,
"grad_norm": 13.084376335144043,
"learning_rate": 2.130403196512895e-05,
"loss": 0.2926,
"step": 6320
},
{
"epoch": 1.149654921903378,
"grad_norm": 15.497282028198242,
"learning_rate": 2.1258626952415546e-05,
"loss": 0.3175,
"step": 6330
},
{
"epoch": 1.1514711224119143,
"grad_norm": 13.51550579071045,
"learning_rate": 2.1213221939702143e-05,
"loss": 0.3562,
"step": 6340
},
{
"epoch": 1.1532873229204503,
"grad_norm": 23.226682662963867,
"learning_rate": 2.116781692698874e-05,
"loss": 0.3547,
"step": 6350
},
{
"epoch": 1.1551035234289866,
"grad_norm": 19.47138023376465,
"learning_rate": 2.1122411914275335e-05,
"loss": 0.2397,
"step": 6360
},
{
"epoch": 1.1569197239375226,
"grad_norm": 20.718000411987305,
"learning_rate": 2.107700690156193e-05,
"loss": 0.2287,
"step": 6370
},
{
"epoch": 1.1587359244460589,
"grad_norm": 15.798551559448242,
"learning_rate": 2.1031601888848528e-05,
"loss": 0.2555,
"step": 6380
},
{
"epoch": 1.160552124954595,
"grad_norm": 17.811277389526367,
"learning_rate": 2.0986196876135124e-05,
"loss": 0.4506,
"step": 6390
},
{
"epoch": 1.1623683254631312,
"grad_norm": 11.916951179504395,
"learning_rate": 2.094079186342172e-05,
"loss": 0.4996,
"step": 6400
},
{
"epoch": 1.1641845259716672,
"grad_norm": 25.842151641845703,
"learning_rate": 2.089538685070832e-05,
"loss": 0.3922,
"step": 6410
},
{
"epoch": 1.1660007264802035,
"grad_norm": 12.472575187683105,
"learning_rate": 2.0849981837994916e-05,
"loss": 0.2717,
"step": 6420
},
{
"epoch": 1.1678169269887395,
"grad_norm": 16.09991455078125,
"learning_rate": 2.0804576825281512e-05,
"loss": 0.265,
"step": 6430
},
{
"epoch": 1.1696331274972758,
"grad_norm": 13.561772346496582,
"learning_rate": 2.075917181256811e-05,
"loss": 0.3963,
"step": 6440
},
{
"epoch": 1.1714493280058118,
"grad_norm": 8.552865982055664,
"learning_rate": 2.0713766799854705e-05,
"loss": 0.2881,
"step": 6450
},
{
"epoch": 1.173265528514348,
"grad_norm": 16.097904205322266,
"learning_rate": 2.06683617871413e-05,
"loss": 0.3422,
"step": 6460
},
{
"epoch": 1.175081729022884,
"grad_norm": 14.92163372039795,
"learning_rate": 2.0622956774427897e-05,
"loss": 0.3918,
"step": 6470
},
{
"epoch": 1.1768979295314204,
"grad_norm": 23.94252586364746,
"learning_rate": 2.0577551761714493e-05,
"loss": 0.2916,
"step": 6480
},
{
"epoch": 1.1787141300399564,
"grad_norm": 32.18648147583008,
"learning_rate": 2.053214674900109e-05,
"loss": 0.4981,
"step": 6490
},
{
"epoch": 1.1805303305484927,
"grad_norm": 25.992433547973633,
"learning_rate": 2.0486741736287686e-05,
"loss": 0.4043,
"step": 6500
},
{
"epoch": 1.1823465310570287,
"grad_norm": 16.37993621826172,
"learning_rate": 2.0441336723574282e-05,
"loss": 0.2839,
"step": 6510
},
{
"epoch": 1.1841627315655647,
"grad_norm": 11.15428638458252,
"learning_rate": 2.039593171086088e-05,
"loss": 0.2675,
"step": 6520
},
{
"epoch": 1.185978932074101,
"grad_norm": 4.630198001861572,
"learning_rate": 2.0350526698147475e-05,
"loss": 0.3671,
"step": 6530
},
{
"epoch": 1.1877951325826372,
"grad_norm": 7.3554606437683105,
"learning_rate": 2.030512168543407e-05,
"loss": 0.335,
"step": 6540
},
{
"epoch": 1.1896113330911733,
"grad_norm": 15.69100284576416,
"learning_rate": 2.025971667272067e-05,
"loss": 0.3132,
"step": 6550
},
{
"epoch": 1.1914275335997093,
"grad_norm": 9.235587120056152,
"learning_rate": 2.0214311660007267e-05,
"loss": 0.3725,
"step": 6560
},
{
"epoch": 1.1932437341082456,
"grad_norm": 17.381521224975586,
"learning_rate": 2.0168906647293863e-05,
"loss": 0.4108,
"step": 6570
},
{
"epoch": 1.1950599346167816,
"grad_norm": 6.358768939971924,
"learning_rate": 2.012350163458046e-05,
"loss": 0.437,
"step": 6580
},
{
"epoch": 1.1968761351253179,
"grad_norm": 25.116188049316406,
"learning_rate": 2.0078096621867056e-05,
"loss": 0.3024,
"step": 6590
},
{
"epoch": 1.198692335633854,
"grad_norm": 17.047897338867188,
"learning_rate": 2.0032691609153652e-05,
"loss": 0.4109,
"step": 6600
},
{
"epoch": 1.198692335633854,
"eval_accuracy": 0.8457752255947498,
"eval_f1": 0.8384878566461094,
"eval_loss": 0.43283388018608093,
"eval_precision": 0.8390928465459404,
"eval_recall": 0.8434791709994856,
"eval_runtime": 12.1336,
"eval_samples_per_second": 100.465,
"eval_steps_per_second": 6.346,
"step": 6600
},
{
"epoch": 1.2005085361423902,
"grad_norm": 10.83834171295166,
"learning_rate": 1.9987286596440248e-05,
"loss": 0.2987,
"step": 6610
},
{
"epoch": 1.2023247366509262,
"grad_norm": 11.946487426757812,
"learning_rate": 1.9941881583726844e-05,
"loss": 0.3454,
"step": 6620
},
{
"epoch": 1.2041409371594625,
"grad_norm": 11.06641674041748,
"learning_rate": 1.989647657101344e-05,
"loss": 0.3263,
"step": 6630
},
{
"epoch": 1.2059571376679985,
"grad_norm": 10.330728530883789,
"learning_rate": 1.9851071558300037e-05,
"loss": 0.3618,
"step": 6640
},
{
"epoch": 1.2077733381765348,
"grad_norm": 26.483482360839844,
"learning_rate": 1.9805666545586633e-05,
"loss": 0.3452,
"step": 6650
},
{
"epoch": 1.2095895386850708,
"grad_norm": 12.150251388549805,
"learning_rate": 1.9760261532873233e-05,
"loss": 0.3156,
"step": 6660
},
{
"epoch": 1.211405739193607,
"grad_norm": 12.710071563720703,
"learning_rate": 1.971485652015983e-05,
"loss": 0.2054,
"step": 6670
},
{
"epoch": 1.213221939702143,
"grad_norm": 10.786319732666016,
"learning_rate": 1.9669451507446425e-05,
"loss": 0.3378,
"step": 6680
},
{
"epoch": 1.2150381402106794,
"grad_norm": 17.936023712158203,
"learning_rate": 1.962404649473302e-05,
"loss": 0.2841,
"step": 6690
},
{
"epoch": 1.2168543407192154,
"grad_norm": 13.268390655517578,
"learning_rate": 1.9578641482019618e-05,
"loss": 0.17,
"step": 6700
},
{
"epoch": 1.2186705412277514,
"grad_norm": 6.254006385803223,
"learning_rate": 1.9533236469306214e-05,
"loss": 0.3391,
"step": 6710
},
{
"epoch": 1.2204867417362877,
"grad_norm": 19.141435623168945,
"learning_rate": 1.948783145659281e-05,
"loss": 0.281,
"step": 6720
},
{
"epoch": 1.222302942244824,
"grad_norm": 20.09503936767578,
"learning_rate": 1.9442426443879406e-05,
"loss": 0.3752,
"step": 6730
},
{
"epoch": 1.22411914275336,
"grad_norm": 13.188698768615723,
"learning_rate": 1.9397021431166003e-05,
"loss": 0.2663,
"step": 6740
},
{
"epoch": 1.225935343261896,
"grad_norm": 16.47735595703125,
"learning_rate": 1.93516164184526e-05,
"loss": 0.2395,
"step": 6750
},
{
"epoch": 1.2277515437704323,
"grad_norm": 14.550719261169434,
"learning_rate": 1.9306211405739195e-05,
"loss": 0.4064,
"step": 6760
},
{
"epoch": 1.2295677442789683,
"grad_norm": 24.679447174072266,
"learning_rate": 1.926080639302579e-05,
"loss": 0.3489,
"step": 6770
},
{
"epoch": 1.2313839447875046,
"grad_norm": 18.579641342163086,
"learning_rate": 1.9215401380312388e-05,
"loss": 0.3471,
"step": 6780
},
{
"epoch": 1.2332001452960406,
"grad_norm": 9.306077003479004,
"learning_rate": 1.9169996367598984e-05,
"loss": 0.3261,
"step": 6790
},
{
"epoch": 1.2350163458045769,
"grad_norm": 14.456847190856934,
"learning_rate": 1.912459135488558e-05,
"loss": 0.2736,
"step": 6800
},
{
"epoch": 1.236832546313113,
"grad_norm": 13.200130462646484,
"learning_rate": 1.9079186342172176e-05,
"loss": 0.3333,
"step": 6810
},
{
"epoch": 1.2386487468216492,
"grad_norm": 20.47243309020996,
"learning_rate": 1.9033781329458773e-05,
"loss": 0.2882,
"step": 6820
},
{
"epoch": 1.2404649473301852,
"grad_norm": 4.953060150146484,
"learning_rate": 1.8988376316745372e-05,
"loss": 0.2492,
"step": 6830
},
{
"epoch": 1.2422811478387215,
"grad_norm": 18.391193389892578,
"learning_rate": 1.894297130403197e-05,
"loss": 0.3334,
"step": 6840
},
{
"epoch": 1.2440973483472575,
"grad_norm": 15.669410705566406,
"learning_rate": 1.8897566291318565e-05,
"loss": 0.2825,
"step": 6850
},
{
"epoch": 1.2459135488557938,
"grad_norm": 7.906172752380371,
"learning_rate": 1.885216127860516e-05,
"loss": 0.3103,
"step": 6860
},
{
"epoch": 1.2477297493643298,
"grad_norm": 28.12116813659668,
"learning_rate": 1.8806756265891757e-05,
"loss": 0.513,
"step": 6870
},
{
"epoch": 1.249545949872866,
"grad_norm": 15.086318969726562,
"learning_rate": 1.8761351253178354e-05,
"loss": 0.2614,
"step": 6880
},
{
"epoch": 1.251362150381402,
"grad_norm": 21.694408416748047,
"learning_rate": 1.871594624046495e-05,
"loss": 0.3005,
"step": 6890
},
{
"epoch": 1.2531783508899381,
"grad_norm": 19.437421798706055,
"learning_rate": 1.8670541227751546e-05,
"loss": 0.3489,
"step": 6900
},
{
"epoch": 1.2531783508899381,
"eval_accuracy": 0.8613617719442166,
"eval_f1": 0.8577346281731705,
"eval_loss": 0.41974562406539917,
"eval_precision": 0.8551502281540364,
"eval_recall": 0.8622491860794987,
"eval_runtime": 12.1508,
"eval_samples_per_second": 100.323,
"eval_steps_per_second": 6.337,
"step": 6900
},
{
"epoch": 1.2549945513984744,
"grad_norm": 10.58027172088623,
"learning_rate": 1.8625136215038142e-05,
"loss": 0.4004,
"step": 6910
},
{
"epoch": 1.2568107519070106,
"grad_norm": 18.426048278808594,
"learning_rate": 1.857973120232474e-05,
"loss": 0.3934,
"step": 6920
},
{
"epoch": 1.2586269524155467,
"grad_norm": 14.427044868469238,
"learning_rate": 1.8534326189611335e-05,
"loss": 0.3299,
"step": 6930
},
{
"epoch": 1.2604431529240827,
"grad_norm": 15.217583656311035,
"learning_rate": 1.848892117689793e-05,
"loss": 0.2325,
"step": 6940
},
{
"epoch": 1.262259353432619,
"grad_norm": 7.493901252746582,
"learning_rate": 1.8443516164184527e-05,
"loss": 0.1888,
"step": 6950
},
{
"epoch": 1.2640755539411552,
"grad_norm": 9.405710220336914,
"learning_rate": 1.8398111151471124e-05,
"loss": 0.3034,
"step": 6960
},
{
"epoch": 1.2658917544496913,
"grad_norm": 22.79953384399414,
"learning_rate": 1.835270613875772e-05,
"loss": 0.4091,
"step": 6970
},
{
"epoch": 1.2677079549582273,
"grad_norm": 11.145368576049805,
"learning_rate": 1.8307301126044316e-05,
"loss": 0.3613,
"step": 6980
},
{
"epoch": 1.2695241554667636,
"grad_norm": 12.6839599609375,
"learning_rate": 1.8261896113330912e-05,
"loss": 0.2778,
"step": 6990
},
{
"epoch": 1.2713403559752996,
"grad_norm": 26.889331817626953,
"learning_rate": 1.821649110061751e-05,
"loss": 0.5038,
"step": 7000
},
{
"epoch": 1.2731565564838359,
"grad_norm": 24.15523338317871,
"learning_rate": 1.8171086087904105e-05,
"loss": 0.2681,
"step": 7010
},
{
"epoch": 1.274972756992372,
"grad_norm": 24.74588394165039,
"learning_rate": 1.81256810751907e-05,
"loss": 0.3978,
"step": 7020
},
{
"epoch": 1.2767889575009082,
"grad_norm": 14.386746406555176,
"learning_rate": 1.8080276062477297e-05,
"loss": 0.1776,
"step": 7030
},
{
"epoch": 1.2786051580094442,
"grad_norm": 14.167470932006836,
"learning_rate": 1.8034871049763894e-05,
"loss": 0.4071,
"step": 7040
},
{
"epoch": 1.2804213585179804,
"grad_norm": 17.294830322265625,
"learning_rate": 1.7989466037050493e-05,
"loss": 0.3855,
"step": 7050
},
{
"epoch": 1.2822375590265165,
"grad_norm": 7.660125255584717,
"learning_rate": 1.794406102433709e-05,
"loss": 0.3288,
"step": 7060
},
{
"epoch": 1.2840537595350527,
"grad_norm": 9.39700698852539,
"learning_rate": 1.7898656011623686e-05,
"loss": 0.216,
"step": 7070
},
{
"epoch": 1.2858699600435888,
"grad_norm": 14.8308744430542,
"learning_rate": 1.7853250998910282e-05,
"loss": 0.293,
"step": 7080
},
{
"epoch": 1.287686160552125,
"grad_norm": 7.683089733123779,
"learning_rate": 1.7807845986196878e-05,
"loss": 0.2502,
"step": 7090
},
{
"epoch": 1.289502361060661,
"grad_norm": 8.663617134094238,
"learning_rate": 1.7762440973483474e-05,
"loss": 0.2726,
"step": 7100
},
{
"epoch": 1.2913185615691973,
"grad_norm": 27.20614242553711,
"learning_rate": 1.771703596077007e-05,
"loss": 0.3407,
"step": 7110
},
{
"epoch": 1.2931347620777334,
"grad_norm": 20.363462448120117,
"learning_rate": 1.7671630948056667e-05,
"loss": 0.4059,
"step": 7120
},
{
"epoch": 1.2949509625862694,
"grad_norm": 11.423839569091797,
"learning_rate": 1.7626225935343263e-05,
"loss": 0.3017,
"step": 7130
},
{
"epoch": 1.2967671630948057,
"grad_norm": 21.78324317932129,
"learning_rate": 1.758082092262986e-05,
"loss": 0.2886,
"step": 7140
},
{
"epoch": 1.298583363603342,
"grad_norm": 10.466778755187988,
"learning_rate": 1.7535415909916456e-05,
"loss": 0.4254,
"step": 7150
},
{
"epoch": 1.300399564111878,
"grad_norm": 27.826078414916992,
"learning_rate": 1.7490010897203052e-05,
"loss": 0.3508,
"step": 7160
},
{
"epoch": 1.302215764620414,
"grad_norm": 30.015012741088867,
"learning_rate": 1.7444605884489648e-05,
"loss": 0.339,
"step": 7170
},
{
"epoch": 1.3040319651289503,
"grad_norm": 5.534029960632324,
"learning_rate": 1.7399200871776244e-05,
"loss": 0.2054,
"step": 7180
},
{
"epoch": 1.3058481656374865,
"grad_norm": 17.095060348510742,
"learning_rate": 1.735379585906284e-05,
"loss": 0.418,
"step": 7190
},
{
"epoch": 1.3076643661460225,
"grad_norm": 13.304654121398926,
"learning_rate": 1.7308390846349437e-05,
"loss": 0.4048,
"step": 7200
},
{
"epoch": 1.3076643661460225,
"eval_accuracy": 0.8679245283018868,
"eval_f1": 0.8611075418966936,
"eval_loss": 0.39569488167762756,
"eval_precision": 0.8537013974843974,
"eval_recall": 0.8742274746130478,
"eval_runtime": 12.1513,
"eval_samples_per_second": 100.319,
"eval_steps_per_second": 6.337,
"step": 7200
},
{
"epoch": 1.3094805666545586,
"grad_norm": 11.452223777770996,
"learning_rate": 1.7262985833636033e-05,
"loss": 0.4095,
"step": 7210
},
{
"epoch": 1.3112967671630948,
"grad_norm": 7.238298416137695,
"learning_rate": 1.721758082092263e-05,
"loss": 0.2381,
"step": 7220
},
{
"epoch": 1.3131129676716309,
"grad_norm": 25.489473342895508,
"learning_rate": 1.7172175808209226e-05,
"loss": 0.3537,
"step": 7230
},
{
"epoch": 1.3149291681801671,
"grad_norm": 8.602483749389648,
"learning_rate": 1.7126770795495822e-05,
"loss": 0.2295,
"step": 7240
},
{
"epoch": 1.3167453686887032,
"grad_norm": 16.307979583740234,
"learning_rate": 1.7081365782782418e-05,
"loss": 0.4157,
"step": 7250
},
{
"epoch": 1.3185615691972394,
"grad_norm": 12.968489646911621,
"learning_rate": 1.7035960770069018e-05,
"loss": 0.2462,
"step": 7260
},
{
"epoch": 1.3203777697057755,
"grad_norm": 473.5597839355469,
"learning_rate": 1.6990555757355614e-05,
"loss": 0.2313,
"step": 7270
},
{
"epoch": 1.3221939702143117,
"grad_norm": 25.61052703857422,
"learning_rate": 1.694515074464221e-05,
"loss": 0.3654,
"step": 7280
},
{
"epoch": 1.3240101707228478,
"grad_norm": 8.36911678314209,
"learning_rate": 1.6899745731928806e-05,
"loss": 0.4109,
"step": 7290
},
{
"epoch": 1.325826371231384,
"grad_norm": 5.5236687660217285,
"learning_rate": 1.6854340719215403e-05,
"loss": 0.3426,
"step": 7300
},
{
"epoch": 1.32764257173992,
"grad_norm": 12.219548225402832,
"learning_rate": 1.6808935706502e-05,
"loss": 0.2344,
"step": 7310
},
{
"epoch": 1.329458772248456,
"grad_norm": 9.559709548950195,
"learning_rate": 1.6763530693788595e-05,
"loss": 0.351,
"step": 7320
},
{
"epoch": 1.3312749727569924,
"grad_norm": 12.749125480651855,
"learning_rate": 1.671812568107519e-05,
"loss": 0.3179,
"step": 7330
},
{
"epoch": 1.3330911732655286,
"grad_norm": 10.120976448059082,
"learning_rate": 1.6672720668361788e-05,
"loss": 0.2978,
"step": 7340
},
{
"epoch": 1.3349073737740647,
"grad_norm": 20.587955474853516,
"learning_rate": 1.6627315655648384e-05,
"loss": 0.3815,
"step": 7350
},
{
"epoch": 1.3367235742826007,
"grad_norm": 15.96291446685791,
"learning_rate": 1.658191064293498e-05,
"loss": 0.3063,
"step": 7360
},
{
"epoch": 1.338539774791137,
"grad_norm": 20.04080581665039,
"learning_rate": 1.6536505630221576e-05,
"loss": 0.2703,
"step": 7370
},
{
"epoch": 1.3403559752996732,
"grad_norm": 19.636119842529297,
"learning_rate": 1.6491100617508173e-05,
"loss": 0.2934,
"step": 7380
},
{
"epoch": 1.3421721758082092,
"grad_norm": 17.96088218688965,
"learning_rate": 1.644569560479477e-05,
"loss": 0.2903,
"step": 7390
},
{
"epoch": 1.3439883763167453,
"grad_norm": 5.417899131774902,
"learning_rate": 1.6400290592081365e-05,
"loss": 0.4768,
"step": 7400
},
{
"epoch": 1.3458045768252815,
"grad_norm": 27.437042236328125,
"learning_rate": 1.635488557936796e-05,
"loss": 0.4288,
"step": 7410
},
{
"epoch": 1.3476207773338176,
"grad_norm": 14.335066795349121,
"learning_rate": 1.6309480566654558e-05,
"loss": 0.3827,
"step": 7420
},
{
"epoch": 1.3494369778423538,
"grad_norm": 20.122777938842773,
"learning_rate": 1.6264075553941154e-05,
"loss": 0.4897,
"step": 7430
},
{
"epoch": 1.3512531783508899,
"grad_norm": 32.951942443847656,
"learning_rate": 1.621867054122775e-05,
"loss": 0.3861,
"step": 7440
},
{
"epoch": 1.3530693788594261,
"grad_norm": 11.255241394042969,
"learning_rate": 1.6173265528514346e-05,
"loss": 0.2971,
"step": 7450
},
{
"epoch": 1.3548855793679622,
"grad_norm": 14.039215087890625,
"learning_rate": 1.6127860515800943e-05,
"loss": 0.3155,
"step": 7460
},
{
"epoch": 1.3567017798764984,
"grad_norm": 8.711435317993164,
"learning_rate": 1.6082455503087542e-05,
"loss": 0.2202,
"step": 7470
},
{
"epoch": 1.3585179803850345,
"grad_norm": 11.57476806640625,
"learning_rate": 1.603705049037414e-05,
"loss": 0.2802,
"step": 7480
},
{
"epoch": 1.3603341808935707,
"grad_norm": 26.275901794433594,
"learning_rate": 1.5991645477660735e-05,
"loss": 0.424,
"step": 7490
},
{
"epoch": 1.3621503814021068,
"grad_norm": 9.019407272338867,
"learning_rate": 1.594624046494733e-05,
"loss": 0.2053,
"step": 7500
},
{
"epoch": 1.3621503814021068,
"eval_accuracy": 0.8810500410172273,
"eval_f1": 0.8728353936424942,
"eval_loss": 0.36287108063697815,
"eval_precision": 0.8688572009408948,
"eval_recall": 0.8783339042782887,
"eval_runtime": 12.133,
"eval_samples_per_second": 100.469,
"eval_steps_per_second": 6.346,
"step": 7500
},
{
"epoch": 1.363966581910643,
"grad_norm": 13.45000171661377,
"learning_rate": 1.5900835452233927e-05,
"loss": 0.2865,
"step": 7510
},
{
"epoch": 1.365782782419179,
"grad_norm": 18.8865966796875,
"learning_rate": 1.5855430439520524e-05,
"loss": 0.3401,
"step": 7520
},
{
"epoch": 1.3675989829277153,
"grad_norm": 10.25676155090332,
"learning_rate": 1.581002542680712e-05,
"loss": 0.2958,
"step": 7530
},
{
"epoch": 1.3694151834362513,
"grad_norm": 12.922000885009766,
"learning_rate": 1.5764620414093716e-05,
"loss": 0.3281,
"step": 7540
},
{
"epoch": 1.3712313839447874,
"grad_norm": 21.29969596862793,
"learning_rate": 1.5719215401380312e-05,
"loss": 0.4653,
"step": 7550
},
{
"epoch": 1.3730475844533236,
"grad_norm": 22.337665557861328,
"learning_rate": 1.567381038866691e-05,
"loss": 0.4078,
"step": 7560
},
{
"epoch": 1.37486378496186,
"grad_norm": 13.976520538330078,
"learning_rate": 1.5628405375953505e-05,
"loss": 0.2474,
"step": 7570
},
{
"epoch": 1.376679985470396,
"grad_norm": 15.766996383666992,
"learning_rate": 1.55830003632401e-05,
"loss": 0.3418,
"step": 7580
},
{
"epoch": 1.378496185978932,
"grad_norm": 10.695988655090332,
"learning_rate": 1.5537595350526697e-05,
"loss": 0.231,
"step": 7590
},
{
"epoch": 1.3803123864874682,
"grad_norm": 12.22573184967041,
"learning_rate": 1.5492190337813294e-05,
"loss": 0.3406,
"step": 7600
},
{
"epoch": 1.3821285869960045,
"grad_norm": 8.77241325378418,
"learning_rate": 1.544678532509989e-05,
"loss": 0.4051,
"step": 7610
},
{
"epoch": 1.3839447875045405,
"grad_norm": 17.83467674255371,
"learning_rate": 1.5401380312386486e-05,
"loss": 0.2736,
"step": 7620
},
{
"epoch": 1.3857609880130766,
"grad_norm": 10.345674514770508,
"learning_rate": 1.5355975299673082e-05,
"loss": 0.2945,
"step": 7630
},
{
"epoch": 1.3875771885216128,
"grad_norm": 9.618982315063477,
"learning_rate": 1.531057028695968e-05,
"loss": 0.307,
"step": 7640
},
{
"epoch": 1.3893933890301489,
"grad_norm": 6.242488861083984,
"learning_rate": 1.5265165274246275e-05,
"loss": 0.2746,
"step": 7650
},
{
"epoch": 1.3912095895386851,
"grad_norm": 14.939092636108398,
"learning_rate": 1.5219760261532873e-05,
"loss": 0.2958,
"step": 7660
},
{
"epoch": 1.3930257900472212,
"grad_norm": 16.4776611328125,
"learning_rate": 1.5174355248819469e-05,
"loss": 0.5158,
"step": 7670
},
{
"epoch": 1.3948419905557574,
"grad_norm": 19.468334197998047,
"learning_rate": 1.5128950236106069e-05,
"loss": 0.2188,
"step": 7680
},
{
"epoch": 1.3966581910642935,
"grad_norm": 16.519298553466797,
"learning_rate": 1.5083545223392665e-05,
"loss": 0.2974,
"step": 7690
},
{
"epoch": 1.3984743915728297,
"grad_norm": 8.464622497558594,
"learning_rate": 1.5038140210679261e-05,
"loss": 0.2988,
"step": 7700
},
{
"epoch": 1.4002905920813657,
"grad_norm": 1.7706962823867798,
"learning_rate": 1.4992735197965857e-05,
"loss": 0.25,
"step": 7710
},
{
"epoch": 1.402106792589902,
"grad_norm": 13.213313102722168,
"learning_rate": 1.4947330185252454e-05,
"loss": 0.2841,
"step": 7720
},
{
"epoch": 1.403922993098438,
"grad_norm": 18.352794647216797,
"learning_rate": 1.490192517253905e-05,
"loss": 0.4059,
"step": 7730
},
{
"epoch": 1.405739193606974,
"grad_norm": 11.154178619384766,
"learning_rate": 1.4856520159825646e-05,
"loss": 0.4,
"step": 7740
},
{
"epoch": 1.4075553941155103,
"grad_norm": 20.417091369628906,
"learning_rate": 1.4811115147112242e-05,
"loss": 0.3162,
"step": 7750
},
{
"epoch": 1.4093715946240466,
"grad_norm": 15.249809265136719,
"learning_rate": 1.4765710134398839e-05,
"loss": 0.2575,
"step": 7760
},
{
"epoch": 1.4111877951325826,
"grad_norm": 12.293340682983398,
"learning_rate": 1.4720305121685435e-05,
"loss": 0.2857,
"step": 7770
},
{
"epoch": 1.4130039956411187,
"grad_norm": 6.126258850097656,
"learning_rate": 1.4674900108972031e-05,
"loss": 0.2618,
"step": 7780
},
{
"epoch": 1.414820196149655,
"grad_norm": 15.712937355041504,
"learning_rate": 1.4629495096258627e-05,
"loss": 0.3071,
"step": 7790
},
{
"epoch": 1.4166363966581912,
"grad_norm": 22.511367797851562,
"learning_rate": 1.4584090083545224e-05,
"loss": 0.2066,
"step": 7800
},
{
"epoch": 1.4166363966581912,
"eval_accuracy": 0.8712059064807219,
"eval_f1": 0.864515063175361,
"eval_loss": 0.39585188031196594,
"eval_precision": 0.8567226508046806,
"eval_recall": 0.8748503082269048,
"eval_runtime": 12.1334,
"eval_samples_per_second": 100.467,
"eval_steps_per_second": 6.346,
"step": 7800
},
{
"epoch": 1.4184525971667272,
"grad_norm": 21.43792152404785,
"learning_rate": 1.453868507083182e-05,
"loss": 0.3206,
"step": 7810
},
{
"epoch": 1.4202687976752633,
"grad_norm": 15.642193794250488,
"learning_rate": 1.4493280058118416e-05,
"loss": 0.3246,
"step": 7820
},
{
"epoch": 1.4220849981837995,
"grad_norm": 22.358238220214844,
"learning_rate": 1.4447875045405012e-05,
"loss": 0.2895,
"step": 7830
},
{
"epoch": 1.4239011986923356,
"grad_norm": 12.986956596374512,
"learning_rate": 1.4402470032691609e-05,
"loss": 0.309,
"step": 7840
},
{
"epoch": 1.4257173992008718,
"grad_norm": 25.143394470214844,
"learning_rate": 1.4357065019978205e-05,
"loss": 0.4229,
"step": 7850
},
{
"epoch": 1.4275335997094079,
"grad_norm": 15.178205490112305,
"learning_rate": 1.4311660007264801e-05,
"loss": 0.2001,
"step": 7860
},
{
"epoch": 1.429349800217944,
"grad_norm": 27.758424758911133,
"learning_rate": 1.4266254994551397e-05,
"loss": 0.3854,
"step": 7870
},
{
"epoch": 1.4311660007264801,
"grad_norm": 20.106098175048828,
"learning_rate": 1.4220849981837994e-05,
"loss": 0.3047,
"step": 7880
},
{
"epoch": 1.4329822012350164,
"grad_norm": 21.916183471679688,
"learning_rate": 1.4175444969124593e-05,
"loss": 0.3352,
"step": 7890
},
{
"epoch": 1.4347984017435524,
"grad_norm": 11.698692321777344,
"learning_rate": 1.413003995641119e-05,
"loss": 0.2302,
"step": 7900
},
{
"epoch": 1.4366146022520887,
"grad_norm": 18.721933364868164,
"learning_rate": 1.4084634943697786e-05,
"loss": 0.2219,
"step": 7910
},
{
"epoch": 1.4384308027606247,
"grad_norm": 10.098983764648438,
"learning_rate": 1.4039229930984382e-05,
"loss": 0.3427,
"step": 7920
},
{
"epoch": 1.440247003269161,
"grad_norm": 5.1341023445129395,
"learning_rate": 1.3993824918270978e-05,
"loss": 0.1857,
"step": 7930
},
{
"epoch": 1.442063203777697,
"grad_norm": 7.9033522605896,
"learning_rate": 1.3948419905557575e-05,
"loss": 0.2511,
"step": 7940
},
{
"epoch": 1.4438794042862333,
"grad_norm": 15.394737243652344,
"learning_rate": 1.390301489284417e-05,
"loss": 0.372,
"step": 7950
},
{
"epoch": 1.4456956047947693,
"grad_norm": 10.518932342529297,
"learning_rate": 1.3857609880130767e-05,
"loss": 0.2298,
"step": 7960
},
{
"epoch": 1.4475118053033054,
"grad_norm": 15.272256851196289,
"learning_rate": 1.3812204867417363e-05,
"loss": 0.2183,
"step": 7970
},
{
"epoch": 1.4493280058118416,
"grad_norm": 13.641687393188477,
"learning_rate": 1.376679985470396e-05,
"loss": 0.3127,
"step": 7980
},
{
"epoch": 1.4511442063203779,
"grad_norm": 20.85528564453125,
"learning_rate": 1.3721394841990556e-05,
"loss": 0.437,
"step": 7990
},
{
"epoch": 1.452960406828914,
"grad_norm": 17.150014877319336,
"learning_rate": 1.3675989829277152e-05,
"loss": 0.2353,
"step": 8000
},
{
"epoch": 1.45477660733745,
"grad_norm": 19.58470916748047,
"learning_rate": 1.3630584816563748e-05,
"loss": 0.2235,
"step": 8010
},
{
"epoch": 1.4565928078459862,
"grad_norm": 11.996252059936523,
"learning_rate": 1.3585179803850344e-05,
"loss": 0.1931,
"step": 8020
},
{
"epoch": 1.4584090083545225,
"grad_norm": 14.358990669250488,
"learning_rate": 1.353977479113694e-05,
"loss": 0.1409,
"step": 8030
},
{
"epoch": 1.4602252088630585,
"grad_norm": 25.43513298034668,
"learning_rate": 1.3494369778423539e-05,
"loss": 0.3949,
"step": 8040
},
{
"epoch": 1.4620414093715945,
"grad_norm": 38.57484817504883,
"learning_rate": 1.3448964765710135e-05,
"loss": 0.4156,
"step": 8050
},
{
"epoch": 1.4638576098801308,
"grad_norm": 11.345231056213379,
"learning_rate": 1.3403559752996731e-05,
"loss": 0.2358,
"step": 8060
},
{
"epoch": 1.4656738103886668,
"grad_norm": 9.881817817687988,
"learning_rate": 1.3358154740283327e-05,
"loss": 0.1973,
"step": 8070
},
{
"epoch": 1.467490010897203,
"grad_norm": 15.946255683898926,
"learning_rate": 1.3312749727569924e-05,
"loss": 0.3282,
"step": 8080
},
{
"epoch": 1.4693062114057391,
"grad_norm": 21.599016189575195,
"learning_rate": 1.326734471485652e-05,
"loss": 0.3486,
"step": 8090
},
{
"epoch": 1.4711224119142754,
"grad_norm": 23.88036346435547,
"learning_rate": 1.3221939702143118e-05,
"loss": 0.2855,
"step": 8100
},
{
"epoch": 1.4711224119142754,
"eval_accuracy": 0.8777686628383922,
"eval_f1": 0.8732951551257775,
"eval_loss": 0.37594613432884216,
"eval_precision": 0.8717582158349084,
"eval_recall": 0.8789319049269518,
"eval_runtime": 12.1477,
"eval_samples_per_second": 100.348,
"eval_steps_per_second": 6.339,
"step": 8100
},
{
"epoch": 1.4729386124228114,
"grad_norm": 21.347118377685547,
"learning_rate": 1.3176534689429714e-05,
"loss": 0.4024,
"step": 8110
},
{
"epoch": 1.4747548129313477,
"grad_norm": 12.109701156616211,
"learning_rate": 1.313112967671631e-05,
"loss": 0.3624,
"step": 8120
},
{
"epoch": 1.4765710134398837,
"grad_norm": 11.65137004852295,
"learning_rate": 1.3085724664002907e-05,
"loss": 0.2691,
"step": 8130
},
{
"epoch": 1.47838721394842,
"grad_norm": 14.210288047790527,
"learning_rate": 1.3040319651289505e-05,
"loss": 0.3066,
"step": 8140
},
{
"epoch": 1.480203414456956,
"grad_norm": 15.840164184570312,
"learning_rate": 1.29949146385761e-05,
"loss": 0.4073,
"step": 8150
},
{
"epoch": 1.482019614965492,
"grad_norm": 17.042640686035156,
"learning_rate": 1.2949509625862697e-05,
"loss": 0.2465,
"step": 8160
},
{
"epoch": 1.4838358154740283,
"grad_norm": 4.103309631347656,
"learning_rate": 1.2904104613149293e-05,
"loss": 0.2429,
"step": 8170
},
{
"epoch": 1.4856520159825646,
"grad_norm": 21.490703582763672,
"learning_rate": 1.285869960043589e-05,
"loss": 0.3825,
"step": 8180
},
{
"epoch": 1.4874682164911006,
"grad_norm": 22.954036712646484,
"learning_rate": 1.2813294587722486e-05,
"loss": 0.3533,
"step": 8190
},
{
"epoch": 1.4892844169996367,
"grad_norm": 9.550930976867676,
"learning_rate": 1.2767889575009082e-05,
"loss": 0.3585,
"step": 8200
},
{
"epoch": 1.491100617508173,
"grad_norm": 23.033842086791992,
"learning_rate": 1.2722484562295678e-05,
"loss": 0.3724,
"step": 8210
},
{
"epoch": 1.4929168180167092,
"grad_norm": 18.261627197265625,
"learning_rate": 1.2677079549582275e-05,
"loss": 0.3568,
"step": 8220
},
{
"epoch": 1.4947330185252452,
"grad_norm": 26.01344871520996,
"learning_rate": 1.263167453686887e-05,
"loss": 0.3325,
"step": 8230
},
{
"epoch": 1.4965492190337812,
"grad_norm": 6.980250358581543,
"learning_rate": 1.2586269524155467e-05,
"loss": 0.2685,
"step": 8240
},
{
"epoch": 1.4983654195423175,
"grad_norm": 15.290885925292969,
"learning_rate": 1.2540864511442063e-05,
"loss": 0.2438,
"step": 8250
},
{
"epoch": 1.5001816200508538,
"grad_norm": 16.555368423461914,
"learning_rate": 1.2495459498728661e-05,
"loss": 0.4081,
"step": 8260
},
{
"epoch": 1.5019978205593898,
"grad_norm": 23.678932189941406,
"learning_rate": 1.2450054486015257e-05,
"loss": 0.3303,
"step": 8270
},
{
"epoch": 1.5038140210679258,
"grad_norm": 28.935272216796875,
"learning_rate": 1.2404649473301854e-05,
"loss": 0.3075,
"step": 8280
},
{
"epoch": 1.505630221576462,
"grad_norm": 1.6638036966323853,
"learning_rate": 1.235924446058845e-05,
"loss": 0.2976,
"step": 8290
},
{
"epoch": 1.5074464220849983,
"grad_norm": 27.825714111328125,
"learning_rate": 1.2313839447875046e-05,
"loss": 0.2812,
"step": 8300
},
{
"epoch": 1.5092626225935342,
"grad_norm": 18.06635093688965,
"learning_rate": 1.2268434435161642e-05,
"loss": 0.3069,
"step": 8310
},
{
"epoch": 1.5110788231020704,
"grad_norm": 10.651163101196289,
"learning_rate": 1.2223029422448239e-05,
"loss": 0.5344,
"step": 8320
},
{
"epoch": 1.5128950236106067,
"grad_norm": 9.965625762939453,
"learning_rate": 1.2177624409734835e-05,
"loss": 0.3103,
"step": 8330
},
{
"epoch": 1.5147112241191427,
"grad_norm": 23.21745491027832,
"learning_rate": 1.2132219397021431e-05,
"loss": 0.2692,
"step": 8340
},
{
"epoch": 1.5165274246276788,
"grad_norm": 18.808652877807617,
"learning_rate": 1.2086814384308027e-05,
"loss": 0.156,
"step": 8350
},
{
"epoch": 1.518343625136215,
"grad_norm": 21.283294677734375,
"learning_rate": 1.2041409371594625e-05,
"loss": 0.3646,
"step": 8360
},
{
"epoch": 1.5201598256447513,
"grad_norm": 21.1343936920166,
"learning_rate": 1.1996004358881222e-05,
"loss": 0.3235,
"step": 8370
},
{
"epoch": 1.5219760261532873,
"grad_norm": 15.289054870605469,
"learning_rate": 1.1950599346167818e-05,
"loss": 0.2583,
"step": 8380
},
{
"epoch": 1.5237922266618233,
"grad_norm": 15.304503440856934,
"learning_rate": 1.1905194333454414e-05,
"loss": 0.3819,
"step": 8390
},
{
"epoch": 1.5256084271703596,
"grad_norm": 22.24407196044922,
"learning_rate": 1.185978932074101e-05,
"loss": 0.2542,
"step": 8400
},
{
"epoch": 1.5256084271703596,
"eval_accuracy": 0.8843314191960624,
"eval_f1": 0.8780677678973086,
"eval_loss": 0.3471013903617859,
"eval_precision": 0.8764668066951888,
"eval_recall": 0.8831446649071804,
"eval_runtime": 12.1617,
"eval_samples_per_second": 100.233,
"eval_steps_per_second": 6.331,
"step": 8400
},
{
"epoch": 1.5274246276788959,
"grad_norm": 27.7410831451416,
"learning_rate": 1.1814384308027607e-05,
"loss": 0.3286,
"step": 8410
},
{
"epoch": 1.529240828187432,
"grad_norm": 19.968013763427734,
"learning_rate": 1.1768979295314203e-05,
"loss": 0.2745,
"step": 8420
},
{
"epoch": 1.531057028695968,
"grad_norm": 4.940372467041016,
"learning_rate": 1.17235742826008e-05,
"loss": 0.2978,
"step": 8430
},
{
"epoch": 1.5328732292045042,
"grad_norm": 12.394369125366211,
"learning_rate": 1.1678169269887395e-05,
"loss": 0.2675,
"step": 8440
},
{
"epoch": 1.5346894297130405,
"grad_norm": 14.312457084655762,
"learning_rate": 1.1632764257173992e-05,
"loss": 0.3254,
"step": 8450
},
{
"epoch": 1.5365056302215765,
"grad_norm": 18.364046096801758,
"learning_rate": 1.158735924446059e-05,
"loss": 0.2875,
"step": 8460
},
{
"epoch": 1.5383218307301125,
"grad_norm": 20.195308685302734,
"learning_rate": 1.1541954231747186e-05,
"loss": 0.2987,
"step": 8470
},
{
"epoch": 1.5401380312386488,
"grad_norm": 14.188733100891113,
"learning_rate": 1.1496549219033782e-05,
"loss": 0.3537,
"step": 8480
},
{
"epoch": 1.541954231747185,
"grad_norm": 8.175540924072266,
"learning_rate": 1.1451144206320378e-05,
"loss": 0.2177,
"step": 8490
},
{
"epoch": 1.543770432255721,
"grad_norm": 18.209714889526367,
"learning_rate": 1.1405739193606975e-05,
"loss": 0.3042,
"step": 8500
},
{
"epoch": 1.5455866327642571,
"grad_norm": 9.630953788757324,
"learning_rate": 1.136033418089357e-05,
"loss": 0.2883,
"step": 8510
},
{
"epoch": 1.5474028332727934,
"grad_norm": 7.398960113525391,
"learning_rate": 1.1314929168180167e-05,
"loss": 0.2416,
"step": 8520
},
{
"epoch": 1.5492190337813294,
"grad_norm": 16.70703887939453,
"learning_rate": 1.1269524155466763e-05,
"loss": 0.3105,
"step": 8530
},
{
"epoch": 1.5510352342898654,
"grad_norm": 13.721776008605957,
"learning_rate": 1.122411914275336e-05,
"loss": 0.2512,
"step": 8540
},
{
"epoch": 1.5528514347984017,
"grad_norm": 15.949941635131836,
"learning_rate": 1.1178714130039956e-05,
"loss": 0.2245,
"step": 8550
},
{
"epoch": 1.554667635306938,
"grad_norm": 15.553458213806152,
"learning_rate": 1.1133309117326552e-05,
"loss": 0.2002,
"step": 8560
},
{
"epoch": 1.556483835815474,
"grad_norm": 5.081323623657227,
"learning_rate": 1.108790410461315e-05,
"loss": 0.231,
"step": 8570
},
{
"epoch": 1.55830003632401,
"grad_norm": 11.08918285369873,
"learning_rate": 1.1042499091899746e-05,
"loss": 0.2269,
"step": 8580
},
{
"epoch": 1.5601162368325463,
"grad_norm": 16.481983184814453,
"learning_rate": 1.0997094079186343e-05,
"loss": 0.2977,
"step": 8590
},
{
"epoch": 1.5619324373410826,
"grad_norm": 22.30940055847168,
"learning_rate": 1.0951689066472939e-05,
"loss": 0.3221,
"step": 8600
},
{
"epoch": 1.5637486378496186,
"grad_norm": 26.49005126953125,
"learning_rate": 1.0906284053759535e-05,
"loss": 0.3319,
"step": 8610
},
{
"epoch": 1.5655648383581546,
"grad_norm": 18.184383392333984,
"learning_rate": 1.0860879041046131e-05,
"loss": 0.3648,
"step": 8620
},
{
"epoch": 1.567381038866691,
"grad_norm": 7.18729829788208,
"learning_rate": 1.0815474028332728e-05,
"loss": 0.3291,
"step": 8630
},
{
"epoch": 1.5691972393752271,
"grad_norm": 24.356779098510742,
"learning_rate": 1.0770069015619324e-05,
"loss": 0.1853,
"step": 8640
},
{
"epoch": 1.5710134398837632,
"grad_norm": 13.144723892211914,
"learning_rate": 1.0724664002905922e-05,
"loss": 0.2874,
"step": 8650
},
{
"epoch": 1.5728296403922992,
"grad_norm": 8.509248733520508,
"learning_rate": 1.0679258990192518e-05,
"loss": 0.1815,
"step": 8660
},
{
"epoch": 1.5746458409008355,
"grad_norm": 19.468769073486328,
"learning_rate": 1.0633853977479114e-05,
"loss": 0.223,
"step": 8670
},
{
"epoch": 1.5764620414093717,
"grad_norm": 19.752363204956055,
"learning_rate": 1.058844896476571e-05,
"loss": 0.2087,
"step": 8680
},
{
"epoch": 1.5782782419179078,
"grad_norm": 8.5270414352417,
"learning_rate": 1.0543043952052307e-05,
"loss": 0.3123,
"step": 8690
},
{
"epoch": 1.5800944424264438,
"grad_norm": 11.519478797912598,
"learning_rate": 1.0497638939338905e-05,
"loss": 0.407,
"step": 8700
},
{
"epoch": 1.5800944424264438,
"eval_accuracy": 0.889253486464315,
"eval_f1": 0.8844054402447329,
"eval_loss": 0.35321420431137085,
"eval_precision": 0.8739349747133954,
"eval_recall": 0.8988169397446168,
"eval_runtime": 12.1712,
"eval_samples_per_second": 100.155,
"eval_steps_per_second": 6.326,
"step": 8700
},
{
"epoch": 1.58191064293498,
"grad_norm": 5.541025638580322,
"learning_rate": 1.0452233926625501e-05,
"loss": 0.3838,
"step": 8710
},
{
"epoch": 1.5837268434435163,
"grad_norm": 26.33243179321289,
"learning_rate": 1.0406828913912097e-05,
"loss": 0.4307,
"step": 8720
},
{
"epoch": 1.5855430439520521,
"grad_norm": 17.7266788482666,
"learning_rate": 1.0361423901198693e-05,
"loss": 0.4372,
"step": 8730
},
{
"epoch": 1.5873592444605884,
"grad_norm": 12.146345138549805,
"learning_rate": 1.031601888848529e-05,
"loss": 0.3942,
"step": 8740
},
{
"epoch": 1.5891754449691247,
"grad_norm": 15.945060729980469,
"learning_rate": 1.0270613875771886e-05,
"loss": 0.3127,
"step": 8750
},
{
"epoch": 1.5909916454776607,
"grad_norm": 15.658045768737793,
"learning_rate": 1.0225208863058482e-05,
"loss": 0.1777,
"step": 8760
},
{
"epoch": 1.5928078459861967,
"grad_norm": 8.869367599487305,
"learning_rate": 1.0179803850345078e-05,
"loss": 0.3081,
"step": 8770
},
{
"epoch": 1.594624046494733,
"grad_norm": 15.344075202941895,
"learning_rate": 1.0134398837631676e-05,
"loss": 0.291,
"step": 8780
},
{
"epoch": 1.5964402470032693,
"grad_norm": 14.211679458618164,
"learning_rate": 1.0088993824918273e-05,
"loss": 0.2659,
"step": 8790
},
{
"epoch": 1.5982564475118053,
"grad_norm": 26.636606216430664,
"learning_rate": 1.0043588812204869e-05,
"loss": 0.2606,
"step": 8800
},
{
"epoch": 1.6000726480203413,
"grad_norm": 20.942895889282227,
"learning_rate": 9.998183799491465e-06,
"loss": 0.1804,
"step": 8810
},
{
"epoch": 1.6018888485288776,
"grad_norm": 16.17045021057129,
"learning_rate": 9.952778786778061e-06,
"loss": 0.2943,
"step": 8820
},
{
"epoch": 1.6037050490374138,
"grad_norm": 6.153861999511719,
"learning_rate": 9.907373774064658e-06,
"loss": 0.3264,
"step": 8830
},
{
"epoch": 1.6055212495459499,
"grad_norm": 21.781164169311523,
"learning_rate": 9.861968761351254e-06,
"loss": 0.4721,
"step": 8840
},
{
"epoch": 1.607337450054486,
"grad_norm": 21.82793617248535,
"learning_rate": 9.81656374863785e-06,
"loss": 0.2914,
"step": 8850
},
{
"epoch": 1.6091536505630222,
"grad_norm": 21.26590919494629,
"learning_rate": 9.771158735924446e-06,
"loss": 0.256,
"step": 8860
},
{
"epoch": 1.6109698510715584,
"grad_norm": 10.11241626739502,
"learning_rate": 9.725753723211043e-06,
"loss": 0.2504,
"step": 8870
},
{
"epoch": 1.6127860515800945,
"grad_norm": 4.329545021057129,
"learning_rate": 9.68034871049764e-06,
"loss": 0.272,
"step": 8880
},
{
"epoch": 1.6146022520886305,
"grad_norm": 13.154899597167969,
"learning_rate": 9.634943697784237e-06,
"loss": 0.2627,
"step": 8890
},
{
"epoch": 1.6164184525971668,
"grad_norm": 21.979530334472656,
"learning_rate": 9.589538685070833e-06,
"loss": 0.2251,
"step": 8900
},
{
"epoch": 1.618234653105703,
"grad_norm": 7.475334167480469,
"learning_rate": 9.54413367235743e-06,
"loss": 0.268,
"step": 8910
},
{
"epoch": 1.620050853614239,
"grad_norm": 24.20920753479004,
"learning_rate": 9.498728659644026e-06,
"loss": 0.3187,
"step": 8920
},
{
"epoch": 1.621867054122775,
"grad_norm": 14.770585060119629,
"learning_rate": 9.453323646930622e-06,
"loss": 0.2269,
"step": 8930
},
{
"epoch": 1.6236832546313114,
"grad_norm": 14.928208351135254,
"learning_rate": 9.407918634217218e-06,
"loss": 0.2472,
"step": 8940
},
{
"epoch": 1.6254994551398474,
"grad_norm": 10.5422945022583,
"learning_rate": 9.362513621503814e-06,
"loss": 0.4042,
"step": 8950
},
{
"epoch": 1.6273156556483834,
"grad_norm": 16.635868072509766,
"learning_rate": 9.31710860879041e-06,
"loss": 0.2717,
"step": 8960
},
{
"epoch": 1.6291318561569197,
"grad_norm": 18.028661727905273,
"learning_rate": 9.271703596077007e-06,
"loss": 0.1889,
"step": 8970
},
{
"epoch": 1.630948056665456,
"grad_norm": 14.402594566345215,
"learning_rate": 9.226298583363603e-06,
"loss": 0.2593,
"step": 8980
},
{
"epoch": 1.632764257173992,
"grad_norm": 18.25139617919922,
"learning_rate": 9.180893570650201e-06,
"loss": 0.4313,
"step": 8990
},
{
"epoch": 1.634580457682528,
"grad_norm": 15.58337688446045,
"learning_rate": 9.135488557936797e-06,
"loss": 0.2691,
"step": 9000
},
{
"epoch": 1.634580457682528,
"eval_accuracy": 0.8859721082854799,
"eval_f1": 0.8808923670444524,
"eval_loss": 0.3507283329963684,
"eval_precision": 0.8735378654629461,
"eval_recall": 0.8903885563547935,
"eval_runtime": 12.1871,
"eval_samples_per_second": 100.024,
"eval_steps_per_second": 6.318,
"step": 9000
},
{
"epoch": 1.6363966581910643,
"grad_norm": 16.187101364135742,
"learning_rate": 9.090083545223393e-06,
"loss": 0.3231,
"step": 9010
},
{
"epoch": 1.6382128586996005,
"grad_norm": 10.623252868652344,
"learning_rate": 9.04467853250999e-06,
"loss": 0.2615,
"step": 9020
},
{
"epoch": 1.6400290592081366,
"grad_norm": 16.480899810791016,
"learning_rate": 8.999273519796586e-06,
"loss": 0.1975,
"step": 9030
},
{
"epoch": 1.6418452597166726,
"grad_norm": 27.12870979309082,
"learning_rate": 8.953868507083182e-06,
"loss": 0.3049,
"step": 9040
},
{
"epoch": 1.6436614602252089,
"grad_norm": 29.148317337036133,
"learning_rate": 8.908463494369778e-06,
"loss": 0.3735,
"step": 9050
},
{
"epoch": 1.6454776607337451,
"grad_norm": 6.015985012054443,
"learning_rate": 8.863058481656375e-06,
"loss": 0.1824,
"step": 9060
},
{
"epoch": 1.6472938612422812,
"grad_norm": 8.818500518798828,
"learning_rate": 8.817653468942971e-06,
"loss": 0.2414,
"step": 9070
},
{
"epoch": 1.6491100617508172,
"grad_norm": 27.29248809814453,
"learning_rate": 8.772248456229567e-06,
"loss": 0.4476,
"step": 9080
},
{
"epoch": 1.6509262622593535,
"grad_norm": 3.913367986679077,
"learning_rate": 8.726843443516165e-06,
"loss": 0.2849,
"step": 9090
},
{
"epoch": 1.6527424627678897,
"grad_norm": 21.9657039642334,
"learning_rate": 8.681438430802761e-06,
"loss": 0.3711,
"step": 9100
},
{
"epoch": 1.6545586632764258,
"grad_norm": 14.766958236694336,
"learning_rate": 8.636033418089358e-06,
"loss": 0.3725,
"step": 9110
},
{
"epoch": 1.6563748637849618,
"grad_norm": 14.37176513671875,
"learning_rate": 8.590628405375954e-06,
"loss": 0.2172,
"step": 9120
},
{
"epoch": 1.658191064293498,
"grad_norm": 5.39981746673584,
"learning_rate": 8.54522339266255e-06,
"loss": 0.422,
"step": 9130
},
{
"epoch": 1.6600072648020343,
"grad_norm": 14.968268394470215,
"learning_rate": 8.499818379949146e-06,
"loss": 0.3144,
"step": 9140
},
{
"epoch": 1.6618234653105701,
"grad_norm": 5.774266242980957,
"learning_rate": 8.454413367235743e-06,
"loss": 0.2657,
"step": 9150
},
{
"epoch": 1.6636396658191064,
"grad_norm": 20.72484016418457,
"learning_rate": 8.409008354522339e-06,
"loss": 0.2577,
"step": 9160
},
{
"epoch": 1.6654558663276426,
"grad_norm": 18.517642974853516,
"learning_rate": 8.363603341808935e-06,
"loss": 0.4908,
"step": 9170
},
{
"epoch": 1.6672720668361787,
"grad_norm": 3.34096622467041,
"learning_rate": 8.318198329095531e-06,
"loss": 0.3054,
"step": 9180
},
{
"epoch": 1.6690882673447147,
"grad_norm": 15.43202018737793,
"learning_rate": 8.272793316382128e-06,
"loss": 0.2922,
"step": 9190
},
{
"epoch": 1.670904467853251,
"grad_norm": 9.061037063598633,
"learning_rate": 8.227388303668726e-06,
"loss": 0.2724,
"step": 9200
},
{
"epoch": 1.6727206683617872,
"grad_norm": 7.940707206726074,
"learning_rate": 8.181983290955322e-06,
"loss": 0.2115,
"step": 9210
},
{
"epoch": 1.6745368688703233,
"grad_norm": 5.944194793701172,
"learning_rate": 8.136578278241918e-06,
"loss": 0.2005,
"step": 9220
},
{
"epoch": 1.6763530693788593,
"grad_norm": 14.915303230285645,
"learning_rate": 8.091173265528514e-06,
"loss": 0.3257,
"step": 9230
},
{
"epoch": 1.6781692698873956,
"grad_norm": 17.358768463134766,
"learning_rate": 8.04576825281511e-06,
"loss": 0.4163,
"step": 9240
},
{
"epoch": 1.6799854703959318,
"grad_norm": 16.51521110534668,
"learning_rate": 8.000363240101707e-06,
"loss": 0.282,
"step": 9250
},
{
"epoch": 1.6818016709044679,
"grad_norm": 12.452226638793945,
"learning_rate": 7.954958227388303e-06,
"loss": 0.2801,
"step": 9260
},
{
"epoch": 1.683617871413004,
"grad_norm": 21.576208114624023,
"learning_rate": 7.909553214674901e-06,
"loss": 0.3299,
"step": 9270
},
{
"epoch": 1.6854340719215402,
"grad_norm": 17.944128036499023,
"learning_rate": 7.864148201961497e-06,
"loss": 0.302,
"step": 9280
},
{
"epoch": 1.6872502724300764,
"grad_norm": 16.343204498291016,
"learning_rate": 7.818743189248093e-06,
"loss": 0.2561,
"step": 9290
},
{
"epoch": 1.6890664729386125,
"grad_norm": 12.301880836486816,
"learning_rate": 7.77333817653469e-06,
"loss": 0.3478,
"step": 9300
},
{
"epoch": 1.6890664729386125,
"eval_accuracy": 0.8884331419196062,
"eval_f1": 0.8845863378573883,
"eval_loss": 0.33353373408317566,
"eval_precision": 0.8812408616942775,
"eval_recall": 0.8907306256856646,
"eval_runtime": 12.1668,
"eval_samples_per_second": 100.191,
"eval_steps_per_second": 6.329,
"step": 9300
},
{
"epoch": 1.6908826734471485,
"grad_norm": 17.064929962158203,
"learning_rate": 7.727933163821286e-06,
"loss": 0.3987,
"step": 9310
},
{
"epoch": 1.6926988739556847,
"grad_norm": 6.434702396392822,
"learning_rate": 7.682528151107884e-06,
"loss": 0.3076,
"step": 9320
},
{
"epoch": 1.694515074464221,
"grad_norm": 6.527015209197998,
"learning_rate": 7.63712313839448e-06,
"loss": 0.227,
"step": 9330
},
{
"epoch": 1.696331274972757,
"grad_norm": 11.179935455322266,
"learning_rate": 7.5917181256810756e-06,
"loss": 0.2426,
"step": 9340
},
{
"epoch": 1.698147475481293,
"grad_norm": 10.149739265441895,
"learning_rate": 7.546313112967672e-06,
"loss": 0.3203,
"step": 9350
},
{
"epoch": 1.6999636759898293,
"grad_norm": 22.71770668029785,
"learning_rate": 7.500908100254268e-06,
"loss": 0.3208,
"step": 9360
},
{
"epoch": 1.7017798764983654,
"grad_norm": 9.473342895507812,
"learning_rate": 7.455503087540864e-06,
"loss": 0.3104,
"step": 9370
},
{
"epoch": 1.7035960770069014,
"grad_norm": 15.134129524230957,
"learning_rate": 7.4100980748274606e-06,
"loss": 0.3233,
"step": 9380
},
{
"epoch": 1.7054122775154377,
"grad_norm": 17.22422981262207,
"learning_rate": 7.364693062114058e-06,
"loss": 0.2024,
"step": 9390
},
{
"epoch": 1.707228478023974,
"grad_norm": 7.5997724533081055,
"learning_rate": 7.319288049400654e-06,
"loss": 0.256,
"step": 9400
},
{
"epoch": 1.70904467853251,
"grad_norm": 3.4972565174102783,
"learning_rate": 7.273883036687251e-06,
"loss": 0.2396,
"step": 9410
},
{
"epoch": 1.710860879041046,
"grad_norm": 14.709694862365723,
"learning_rate": 7.228478023973847e-06,
"loss": 0.4198,
"step": 9420
},
{
"epoch": 1.7126770795495823,
"grad_norm": 6.8229546546936035,
"learning_rate": 7.1830730112604435e-06,
"loss": 0.3004,
"step": 9430
},
{
"epoch": 1.7144932800581185,
"grad_norm": 19.955167770385742,
"learning_rate": 7.13766799854704e-06,
"loss": 0.3674,
"step": 9440
},
{
"epoch": 1.7163094805666546,
"grad_norm": 16.53900909423828,
"learning_rate": 7.092262985833637e-06,
"loss": 0.5129,
"step": 9450
},
{
"epoch": 1.7181256810751906,
"grad_norm": 19.58238983154297,
"learning_rate": 7.046857973120233e-06,
"loss": 0.3268,
"step": 9460
},
{
"epoch": 1.7199418815837269,
"grad_norm": 17.560192108154297,
"learning_rate": 7.001452960406829e-06,
"loss": 0.1925,
"step": 9470
},
{
"epoch": 1.721758082092263,
"grad_norm": 12.490224838256836,
"learning_rate": 6.956047947693426e-06,
"loss": 0.215,
"step": 9480
},
{
"epoch": 1.7235742826007991,
"grad_norm": 11.736912727355957,
"learning_rate": 6.910642934980022e-06,
"loss": 0.2565,
"step": 9490
},
{
"epoch": 1.7253904831093352,
"grad_norm": 16.065710067749023,
"learning_rate": 6.865237922266618e-06,
"loss": 0.2175,
"step": 9500
},
{
"epoch": 1.7272066836178714,
"grad_norm": 24.19011116027832,
"learning_rate": 6.819832909553216e-06,
"loss": 0.265,
"step": 9510
},
{
"epoch": 1.7290228841264077,
"grad_norm": 17.26852798461914,
"learning_rate": 6.774427896839812e-06,
"loss": 0.2567,
"step": 9520
},
{
"epoch": 1.7308390846349437,
"grad_norm": 15.206780433654785,
"learning_rate": 6.7290228841264085e-06,
"loss": 0.385,
"step": 9530
},
{
"epoch": 1.7326552851434798,
"grad_norm": 25.0367374420166,
"learning_rate": 6.683617871413005e-06,
"loss": 0.3845,
"step": 9540
},
{
"epoch": 1.734471485652016,
"grad_norm": 6.6783270835876465,
"learning_rate": 6.638212858699601e-06,
"loss": 0.2297,
"step": 9550
},
{
"epoch": 1.7362876861605523,
"grad_norm": 23.788753509521484,
"learning_rate": 6.592807845986197e-06,
"loss": 0.2331,
"step": 9560
},
{
"epoch": 1.738103886669088,
"grad_norm": 15.42270278930664,
"learning_rate": 6.5474028332727935e-06,
"loss": 0.3213,
"step": 9570
},
{
"epoch": 1.7399200871776244,
"grad_norm": 13.071663856506348,
"learning_rate": 6.50199782055939e-06,
"loss": 0.34,
"step": 9580
},
{
"epoch": 1.7417362876861606,
"grad_norm": 10.215317726135254,
"learning_rate": 6.456592807845986e-06,
"loss": 0.3863,
"step": 9590
},
{
"epoch": 1.7435524881946967,
"grad_norm": 18.26382064819336,
"learning_rate": 6.411187795132582e-06,
"loss": 0.1977,
"step": 9600
},
{
"epoch": 1.7435524881946967,
"eval_accuracy": 0.8917145200984413,
"eval_f1": 0.8880029159699074,
"eval_loss": 0.32050377130508423,
"eval_precision": 0.8856300730561886,
"eval_recall": 0.8930436229083021,
"eval_runtime": 12.1617,
"eval_samples_per_second": 100.233,
"eval_steps_per_second": 6.331,
"step": 9600
},
{
"epoch": 1.7453686887032327,
"grad_norm": 28.250118255615234,
"learning_rate": 6.3657827824191785e-06,
"loss": 0.5122,
"step": 9610
},
{
"epoch": 1.747184889211769,
"grad_norm": 25.557300567626953,
"learning_rate": 6.3203777697057765e-06,
"loss": 0.2384,
"step": 9620
},
{
"epoch": 1.7490010897203052,
"grad_norm": 14.092631340026855,
"learning_rate": 6.274972756992373e-06,
"loss": 0.224,
"step": 9630
},
{
"epoch": 1.7508172902288412,
"grad_norm": 16.70784568786621,
"learning_rate": 6.229567744278968e-06,
"loss": 0.2133,
"step": 9640
},
{
"epoch": 1.7526334907373773,
"grad_norm": 13.771525382995605,
"learning_rate": 6.184162731565565e-06,
"loss": 0.4779,
"step": 9650
},
{
"epoch": 1.7544496912459135,
"grad_norm": 13.317017555236816,
"learning_rate": 6.1387577188521614e-06,
"loss": 0.188,
"step": 9660
},
{
"epoch": 1.7562658917544498,
"grad_norm": 16.88526153564453,
"learning_rate": 6.093352706138758e-06,
"loss": 0.2979,
"step": 9670
},
{
"epoch": 1.7580820922629858,
"grad_norm": 21.325788497924805,
"learning_rate": 6.047947693425354e-06,
"loss": 0.31,
"step": 9680
},
{
"epoch": 1.7598982927715219,
"grad_norm": 20.396289825439453,
"learning_rate": 6.00254268071195e-06,
"loss": 0.308,
"step": 9690
},
{
"epoch": 1.7617144932800581,
"grad_norm": 15.008870124816895,
"learning_rate": 5.957137667998547e-06,
"loss": 0.1889,
"step": 9700
},
{
"epoch": 1.7635306937885944,
"grad_norm": 15.86091136932373,
"learning_rate": 5.9117326552851435e-06,
"loss": 0.2219,
"step": 9710
},
{
"epoch": 1.7653468942971304,
"grad_norm": 13.422758102416992,
"learning_rate": 5.86632764257174e-06,
"loss": 0.2332,
"step": 9720
},
{
"epoch": 1.7671630948056665,
"grad_norm": 13.367154121398926,
"learning_rate": 5.820922629858337e-06,
"loss": 0.2688,
"step": 9730
},
{
"epoch": 1.7689792953142027,
"grad_norm": 9.540538787841797,
"learning_rate": 5.775517617144933e-06,
"loss": 0.1648,
"step": 9740
},
{
"epoch": 1.770795495822739,
"grad_norm": 1.7264131307601929,
"learning_rate": 5.73011260443153e-06,
"loss": 0.3343,
"step": 9750
},
{
"epoch": 1.772611696331275,
"grad_norm": 12.68677806854248,
"learning_rate": 5.6847075917181265e-06,
"loss": 0.181,
"step": 9760
},
{
"epoch": 1.774427896839811,
"grad_norm": 18.84226417541504,
"learning_rate": 5.639302579004723e-06,
"loss": 0.3011,
"step": 9770
},
{
"epoch": 1.7762440973483473,
"grad_norm": 18.599489212036133,
"learning_rate": 5.593897566291319e-06,
"loss": 0.3759,
"step": 9780
},
{
"epoch": 1.7780602978568834,
"grad_norm": 24.071170806884766,
"learning_rate": 5.548492553577915e-06,
"loss": 0.4405,
"step": 9790
},
{
"epoch": 1.7798764983654194,
"grad_norm": 20.368112564086914,
"learning_rate": 5.503087540864512e-06,
"loss": 0.1978,
"step": 9800
},
{
"epoch": 1.7816926988739556,
"grad_norm": 4.559482574462891,
"learning_rate": 5.4576825281511086e-06,
"loss": 0.2037,
"step": 9810
},
{
"epoch": 1.783508899382492,
"grad_norm": 22.67530059814453,
"learning_rate": 5.412277515437705e-06,
"loss": 0.291,
"step": 9820
},
{
"epoch": 1.785325099891028,
"grad_norm": 14.617053031921387,
"learning_rate": 5.366872502724301e-06,
"loss": 0.2558,
"step": 9830
},
{
"epoch": 1.787141300399564,
"grad_norm": 9.699358940124512,
"learning_rate": 5.321467490010897e-06,
"loss": 0.4875,
"step": 9840
},
{
"epoch": 1.7889575009081002,
"grad_norm": 7.0100908279418945,
"learning_rate": 5.2760624772974936e-06,
"loss": 0.2908,
"step": 9850
},
{
"epoch": 1.7907737014166365,
"grad_norm": 18.469934463500977,
"learning_rate": 5.230657464584091e-06,
"loss": 0.3165,
"step": 9860
},
{
"epoch": 1.7925899019251725,
"grad_norm": 8.11326789855957,
"learning_rate": 5.185252451870687e-06,
"loss": 0.2946,
"step": 9870
},
{
"epoch": 1.7944061024337086,
"grad_norm": 14.999030113220215,
"learning_rate": 5.139847439157283e-06,
"loss": 0.2645,
"step": 9880
},
{
"epoch": 1.7962223029422448,
"grad_norm": 11.65622615814209,
"learning_rate": 5.094442426443879e-06,
"loss": 0.3627,
"step": 9890
},
{
"epoch": 1.798038503450781,
"grad_norm": 11.40311336517334,
"learning_rate": 5.049037413730476e-06,
"loss": 0.1679,
"step": 9900
},
{
"epoch": 1.798038503450781,
"eval_accuracy": 0.8941755537325676,
"eval_f1": 0.8909275000598975,
"eval_loss": 0.31260696053504944,
"eval_precision": 0.8874307991095873,
"eval_recall": 0.8966734867740875,
"eval_runtime": 12.1595,
"eval_samples_per_second": 100.251,
"eval_steps_per_second": 6.333,
"step": 9900
},
{
"epoch": 1.7998547039593171,
"grad_norm": 17.762371063232422,
"learning_rate": 5.003632401017073e-06,
"loss": 0.2242,
"step": 9910
},
{
"epoch": 1.8016709044678532,
"grad_norm": 8.07528018951416,
"learning_rate": 4.958227388303669e-06,
"loss": 0.305,
"step": 9920
},
{
"epoch": 1.8034871049763894,
"grad_norm": 8.580565452575684,
"learning_rate": 4.912822375590265e-06,
"loss": 0.3421,
"step": 9930
},
{
"epoch": 1.8053033054849257,
"grad_norm": 13.502715110778809,
"learning_rate": 4.8674173628768615e-06,
"loss": 0.1513,
"step": 9940
},
{
"epoch": 1.8071195059934617,
"grad_norm": 25.522857666015625,
"learning_rate": 4.822012350163458e-06,
"loss": 0.4245,
"step": 9950
},
{
"epoch": 1.8089357065019978,
"grad_norm": 11.059943199157715,
"learning_rate": 4.776607337450055e-06,
"loss": 0.2588,
"step": 9960
},
{
"epoch": 1.810751907010534,
"grad_norm": 25.464778900146484,
"learning_rate": 4.731202324736651e-06,
"loss": 0.3359,
"step": 9970
},
{
"epoch": 1.8125681075190703,
"grad_norm": 14.878934860229492,
"learning_rate": 4.685797312023247e-06,
"loss": 0.2937,
"step": 9980
},
{
"epoch": 1.814384308027606,
"grad_norm": 17.37669563293457,
"learning_rate": 4.640392299309844e-06,
"loss": 0.2071,
"step": 9990
},
{
"epoch": 1.8162005085361423,
"grad_norm": 5.666213035583496,
"learning_rate": 4.59498728659644e-06,
"loss": 0.2143,
"step": 10000
},
{
"epoch": 1.8180167090446786,
"grad_norm": 10.074169158935547,
"learning_rate": 4.549582273883037e-06,
"loss": 0.3549,
"step": 10010
},
{
"epoch": 1.8198329095532146,
"grad_norm": 22.636964797973633,
"learning_rate": 4.504177261169633e-06,
"loss": 0.3005,
"step": 10020
},
{
"epoch": 1.8216491100617507,
"grad_norm": 21.29734992980957,
"learning_rate": 4.45877224845623e-06,
"loss": 0.3036,
"step": 10030
},
{
"epoch": 1.823465310570287,
"grad_norm": 18.074115753173828,
"learning_rate": 4.4133672357428265e-06,
"loss": 0.2375,
"step": 10040
},
{
"epoch": 1.8252815110788232,
"grad_norm": 13.343547821044922,
"learning_rate": 4.367962223029423e-06,
"loss": 0.3563,
"step": 10050
},
{
"epoch": 1.8270977115873592,
"grad_norm": 15.384708404541016,
"learning_rate": 4.322557210316019e-06,
"loss": 0.3085,
"step": 10060
},
{
"epoch": 1.8289139120958953,
"grad_norm": 22.345226287841797,
"learning_rate": 4.277152197602616e-06,
"loss": 0.2529,
"step": 10070
},
{
"epoch": 1.8307301126044315,
"grad_norm": 3.8110063076019287,
"learning_rate": 4.231747184889212e-06,
"loss": 0.2276,
"step": 10080
},
{
"epoch": 1.8325463131129678,
"grad_norm": 14.203871726989746,
"learning_rate": 4.186342172175809e-06,
"loss": 0.2813,
"step": 10090
},
{
"epoch": 1.8343625136215038,
"grad_norm": 18.770957946777344,
"learning_rate": 4.140937159462405e-06,
"loss": 0.2534,
"step": 10100
},
{
"epoch": 1.8361787141300399,
"grad_norm": 19.307645797729492,
"learning_rate": 4.095532146749001e-06,
"loss": 0.2505,
"step": 10110
},
{
"epoch": 1.8379949146385761,
"grad_norm": 12.851605415344238,
"learning_rate": 4.050127134035598e-06,
"loss": 0.1797,
"step": 10120
},
{
"epoch": 1.8398111151471124,
"grad_norm": 17.81605339050293,
"learning_rate": 4.0047221213221944e-06,
"loss": 0.1766,
"step": 10130
},
{
"epoch": 1.8416273156556484,
"grad_norm": 5.046531677246094,
"learning_rate": 3.959317108608791e-06,
"loss": 0.2847,
"step": 10140
},
{
"epoch": 1.8434435161641844,
"grad_norm": 15.07939338684082,
"learning_rate": 3.913912095895387e-06,
"loss": 0.2543,
"step": 10150
},
{
"epoch": 1.8452597166727207,
"grad_norm": 5.4197187423706055,
"learning_rate": 3.868507083181983e-06,
"loss": 0.276,
"step": 10160
},
{
"epoch": 1.847075917181257,
"grad_norm": 4.651303768157959,
"learning_rate": 3.82310207046858e-06,
"loss": 0.3026,
"step": 10170
},
{
"epoch": 1.848892117689793,
"grad_norm": 5.725452423095703,
"learning_rate": 3.7776970577551765e-06,
"loss": 0.321,
"step": 10180
},
{
"epoch": 1.850708318198329,
"grad_norm": 12.18204402923584,
"learning_rate": 3.7322920450417728e-06,
"loss": 0.2724,
"step": 10190
},
{
"epoch": 1.8525245187068653,
"grad_norm": 3.6419947147369385,
"learning_rate": 3.686887032328369e-06,
"loss": 0.3451,
"step": 10200
},
{
"epoch": 1.8525245187068653,
"eval_accuracy": 0.896636587366694,
"eval_f1": 0.8908937354693052,
"eval_loss": 0.31067386269569397,
"eval_precision": 0.8844179738985345,
"eval_recall": 0.899200938788393,
"eval_runtime": 12.1663,
"eval_samples_per_second": 100.195,
"eval_steps_per_second": 6.329,
"step": 10200
},
{
"epoch": 1.8543407192154013,
"grad_norm": 15.111452102661133,
"learning_rate": 3.6414820196149653e-06,
"loss": 0.1833,
"step": 10210
},
{
"epoch": 1.8561569197239374,
"grad_norm": 18.825973510742188,
"learning_rate": 3.5960770069015624e-06,
"loss": 0.3835,
"step": 10220
},
{
"epoch": 1.8579731202324736,
"grad_norm": 12.017671585083008,
"learning_rate": 3.5506719941881586e-06,
"loss": 0.2242,
"step": 10230
},
{
"epoch": 1.8597893207410099,
"grad_norm": 24.258045196533203,
"learning_rate": 3.505266981474755e-06,
"loss": 0.4669,
"step": 10240
},
{
"epoch": 1.861605521249546,
"grad_norm": 16.036376953125,
"learning_rate": 3.4598619687613515e-06,
"loss": 0.3115,
"step": 10250
},
{
"epoch": 1.863421721758082,
"grad_norm": 25.399738311767578,
"learning_rate": 3.414456956047948e-06,
"loss": 0.3122,
"step": 10260
},
{
"epoch": 1.8652379222666182,
"grad_norm": 20.493247985839844,
"learning_rate": 3.369051943334544e-06,
"loss": 0.3484,
"step": 10270
},
{
"epoch": 1.8670541227751545,
"grad_norm": 16.851757049560547,
"learning_rate": 3.323646930621141e-06,
"loss": 0.3464,
"step": 10280
},
{
"epoch": 1.8688703232836905,
"grad_norm": 5.93435001373291,
"learning_rate": 3.2782419179077374e-06,
"loss": 0.1495,
"step": 10290
},
{
"epoch": 1.8706865237922266,
"grad_norm": 15.460413932800293,
"learning_rate": 3.2328369051943336e-06,
"loss": 0.2404,
"step": 10300
},
{
"epoch": 1.8725027243007628,
"grad_norm": 14.957904815673828,
"learning_rate": 3.18743189248093e-06,
"loss": 0.2765,
"step": 10310
},
{
"epoch": 1.874318924809299,
"grad_norm": 24.511220932006836,
"learning_rate": 3.142026879767526e-06,
"loss": 0.2619,
"step": 10320
},
{
"epoch": 1.876135125317835,
"grad_norm": 15.439196586608887,
"learning_rate": 3.096621867054123e-06,
"loss": 0.2426,
"step": 10330
},
{
"epoch": 1.8779513258263711,
"grad_norm": 18.046316146850586,
"learning_rate": 3.0512168543407195e-06,
"loss": 0.2727,
"step": 10340
},
{
"epoch": 1.8797675263349074,
"grad_norm": 9.866218566894531,
"learning_rate": 3.0058118416273157e-06,
"loss": 0.2591,
"step": 10350
},
{
"epoch": 1.8815837268434437,
"grad_norm": 15.493182182312012,
"learning_rate": 2.960406828913912e-06,
"loss": 0.2277,
"step": 10360
},
{
"epoch": 1.8833999273519797,
"grad_norm": 13.912703514099121,
"learning_rate": 2.9150018162005086e-06,
"loss": 0.2501,
"step": 10370
},
{
"epoch": 1.8852161278605157,
"grad_norm": 10.867436408996582,
"learning_rate": 2.869596803487105e-06,
"loss": 0.3008,
"step": 10380
},
{
"epoch": 1.887032328369052,
"grad_norm": 18.993480682373047,
"learning_rate": 2.8241917907737016e-06,
"loss": 0.1959,
"step": 10390
},
{
"epoch": 1.8888485288775883,
"grad_norm": 10.849798202514648,
"learning_rate": 2.778786778060298e-06,
"loss": 0.3108,
"step": 10400
},
{
"epoch": 1.890664729386124,
"grad_norm": 22.30524253845215,
"learning_rate": 2.7333817653468945e-06,
"loss": 0.3187,
"step": 10410
},
{
"epoch": 1.8924809298946603,
"grad_norm": 4.560070514678955,
"learning_rate": 2.687976752633491e-06,
"loss": 0.2388,
"step": 10420
},
{
"epoch": 1.8942971304031966,
"grad_norm": 13.859076499938965,
"learning_rate": 2.6425717399200874e-06,
"loss": 0.2131,
"step": 10430
},
{
"epoch": 1.8961133309117326,
"grad_norm": 4.655661582946777,
"learning_rate": 2.5971667272066837e-06,
"loss": 0.1688,
"step": 10440
},
{
"epoch": 1.8979295314202687,
"grad_norm": 18.112701416015625,
"learning_rate": 2.5517617144932803e-06,
"loss": 0.2763,
"step": 10450
},
{
"epoch": 1.899745731928805,
"grad_norm": 9.725381851196289,
"learning_rate": 2.5063567017798766e-06,
"loss": 0.2694,
"step": 10460
},
{
"epoch": 1.9015619324373412,
"grad_norm": 21.33144760131836,
"learning_rate": 2.4609516890664732e-06,
"loss": 0.2813,
"step": 10470
},
{
"epoch": 1.9033781329458772,
"grad_norm": 9.807076454162598,
"learning_rate": 2.4155466763530695e-06,
"loss": 0.2441,
"step": 10480
},
{
"epoch": 1.9051943334544132,
"grad_norm": 5.741889476776123,
"learning_rate": 2.3701416636396657e-06,
"loss": 0.2222,
"step": 10490
},
{
"epoch": 1.9070105339629495,
"grad_norm": 12.217042922973633,
"learning_rate": 2.3247366509262624e-06,
"loss": 0.333,
"step": 10500
},
{
"epoch": 1.9070105339629495,
"eval_accuracy": 0.8917145200984413,
"eval_f1": 0.8879221499881476,
"eval_loss": 0.31236740946769714,
"eval_precision": 0.880595048818372,
"eval_recall": 0.8979764002075346,
"eval_runtime": 12.1598,
"eval_samples_per_second": 100.249,
"eval_steps_per_second": 6.332,
"step": 10500
},
{
"epoch": 1.9088267344714858,
"grad_norm": 7.902151584625244,
"learning_rate": 2.2793316382128587e-06,
"loss": 0.2326,
"step": 10510
},
{
"epoch": 1.9106429349800218,
"grad_norm": 17.3931827545166,
"learning_rate": 2.233926625499455e-06,
"loss": 0.2766,
"step": 10520
},
{
"epoch": 1.9124591354885578,
"grad_norm": 13.122883796691895,
"learning_rate": 2.1885216127860516e-06,
"loss": 0.2769,
"step": 10530
},
{
"epoch": 1.914275335997094,
"grad_norm": 12.172006607055664,
"learning_rate": 2.143116600072648e-06,
"loss": 0.2228,
"step": 10540
},
{
"epoch": 1.9160915365056304,
"grad_norm": 6.881805896759033,
"learning_rate": 2.0977115873592445e-06,
"loss": 0.1705,
"step": 10550
},
{
"epoch": 1.9179077370141664,
"grad_norm": 19.267488479614258,
"learning_rate": 2.052306574645841e-06,
"loss": 0.3439,
"step": 10560
},
{
"epoch": 1.9197239375227024,
"grad_norm": 4.804058074951172,
"learning_rate": 2.0069015619324374e-06,
"loss": 0.2191,
"step": 10570
},
{
"epoch": 1.9215401380312387,
"grad_norm": 15.236076354980469,
"learning_rate": 1.961496549219034e-06,
"loss": 0.2319,
"step": 10580
},
{
"epoch": 1.923356338539775,
"grad_norm": 17.033308029174805,
"learning_rate": 1.9160915365056303e-06,
"loss": 0.4269,
"step": 10590
},
{
"epoch": 1.925172539048311,
"grad_norm": 15.480613708496094,
"learning_rate": 1.870686523792227e-06,
"loss": 0.281,
"step": 10600
},
{
"epoch": 1.926988739556847,
"grad_norm": 18.749048233032227,
"learning_rate": 1.8252815110788233e-06,
"loss": 0.2042,
"step": 10610
},
{
"epoch": 1.9288049400653833,
"grad_norm": 10.639968872070312,
"learning_rate": 1.7798764983654195e-06,
"loss": 0.3482,
"step": 10620
},
{
"epoch": 1.9306211405739193,
"grad_norm": 19.38547706604004,
"learning_rate": 1.7344714856520162e-06,
"loss": 0.285,
"step": 10630
},
{
"epoch": 1.9324373410824554,
"grad_norm": 3.3453238010406494,
"learning_rate": 1.6890664729386124e-06,
"loss": 0.2941,
"step": 10640
},
{
"epoch": 1.9342535415909916,
"grad_norm": 16.062301635742188,
"learning_rate": 1.643661460225209e-06,
"loss": 0.3375,
"step": 10650
},
{
"epoch": 1.9360697420995279,
"grad_norm": 15.955122947692871,
"learning_rate": 1.5982564475118054e-06,
"loss": 0.2047,
"step": 10660
},
{
"epoch": 1.937885942608064,
"grad_norm": 25.678117752075195,
"learning_rate": 1.5528514347984018e-06,
"loss": 0.2348,
"step": 10670
},
{
"epoch": 1.9397021431166,
"grad_norm": 13.082353591918945,
"learning_rate": 1.5074464220849983e-06,
"loss": 0.2699,
"step": 10680
},
{
"epoch": 1.9415183436251362,
"grad_norm": 15.006932258605957,
"learning_rate": 1.4620414093715947e-06,
"loss": 0.2522,
"step": 10690
},
{
"epoch": 1.9433345441336725,
"grad_norm": 18.136178970336914,
"learning_rate": 1.4166363966581912e-06,
"loss": 0.3754,
"step": 10700
},
{
"epoch": 1.9451507446422085,
"grad_norm": 13.17072868347168,
"learning_rate": 1.3712313839447874e-06,
"loss": 0.2194,
"step": 10710
},
{
"epoch": 1.9469669451507445,
"grad_norm": 16.255809783935547,
"learning_rate": 1.325826371231384e-06,
"loss": 0.3234,
"step": 10720
},
{
"epoch": 1.9487831456592808,
"grad_norm": 30.35965347290039,
"learning_rate": 1.2804213585179804e-06,
"loss": 0.3173,
"step": 10730
},
{
"epoch": 1.950599346167817,
"grad_norm": 15.700325965881348,
"learning_rate": 1.235016345804577e-06,
"loss": 0.2505,
"step": 10740
},
{
"epoch": 1.952415546676353,
"grad_norm": 18.816146850585938,
"learning_rate": 1.1896113330911733e-06,
"loss": 0.3193,
"step": 10750
},
{
"epoch": 1.9542317471848891,
"grad_norm": 16.41304588317871,
"learning_rate": 1.1442063203777698e-06,
"loss": 0.2486,
"step": 10760
},
{
"epoch": 1.9560479476934254,
"grad_norm": 16.63722801208496,
"learning_rate": 1.0988013076643662e-06,
"loss": 0.237,
"step": 10770
},
{
"epoch": 1.9578641482019616,
"grad_norm": 6.757064342498779,
"learning_rate": 1.0533962949509627e-06,
"loss": 0.2081,
"step": 10780
},
{
"epoch": 1.9596803487104977,
"grad_norm": 3.97802734375,
"learning_rate": 1.007991282237559e-06,
"loss": 0.2566,
"step": 10790
},
{
"epoch": 1.9614965492190337,
"grad_norm": 5.727281093597412,
"learning_rate": 9.625862695241554e-07,
"loss": 0.1921,
"step": 10800
},
{
"epoch": 1.9614965492190337,
"eval_accuracy": 0.9007383100902379,
"eval_f1": 0.8957933200060735,
"eval_loss": 0.3023645579814911,
"eval_precision": 0.8902412162565443,
"eval_recall": 0.9027657103823287,
"eval_runtime": 12.1671,
"eval_samples_per_second": 100.188,
"eval_steps_per_second": 6.329,
"step": 10800
}
],
"logging_steps": 10,
"max_steps": 11012,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 600,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.701982939756626e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}