Qwen_finetuned / trainer_state.json
Rcarvalo's picture
Upload 14 files
30e4ea9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9792492422476101,
"eval_steps": 300,
"global_step": 4200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0023315458148752623,
"grad_norm": 58.996910095214844,
"learning_rate": 4.997668454185125e-05,
"loss": 2.8188,
"step": 10
},
{
"epoch": 0.004663091629750525,
"grad_norm": 46.450401306152344,
"learning_rate": 4.995336908370249e-05,
"loss": 2.2969,
"step": 20
},
{
"epoch": 0.006994637444625787,
"grad_norm": 20.279075622558594,
"learning_rate": 4.993005362555374e-05,
"loss": 2.2492,
"step": 30
},
{
"epoch": 0.00932618325950105,
"grad_norm": 54.02119445800781,
"learning_rate": 4.9906738167404995e-05,
"loss": 2.0938,
"step": 40
},
{
"epoch": 0.011657729074376311,
"grad_norm": 20.223501205444336,
"learning_rate": 4.988342270925624e-05,
"loss": 1.9648,
"step": 50
},
{
"epoch": 0.013989274889251575,
"grad_norm": 32.29741668701172,
"learning_rate": 4.9860107251107484e-05,
"loss": 1.8797,
"step": 60
},
{
"epoch": 0.016320820704126836,
"grad_norm": 36.739723205566406,
"learning_rate": 4.983679179295873e-05,
"loss": 1.6664,
"step": 70
},
{
"epoch": 0.0186523665190021,
"grad_norm": 53.299015045166016,
"learning_rate": 4.981347633480998e-05,
"loss": 1.6,
"step": 80
},
{
"epoch": 0.02098391233387736,
"grad_norm": 25.623676300048828,
"learning_rate": 4.979016087666123e-05,
"loss": 1.6898,
"step": 90
},
{
"epoch": 0.023315458148752622,
"grad_norm": 34.87362289428711,
"learning_rate": 4.9766845418512476e-05,
"loss": 1.5578,
"step": 100
},
{
"epoch": 0.025647003963627884,
"grad_norm": 25.20810317993164,
"learning_rate": 4.974352996036372e-05,
"loss": 1.5078,
"step": 110
},
{
"epoch": 0.02797854977850315,
"grad_norm": 43.89160919189453,
"learning_rate": 4.972021450221497e-05,
"loss": 1.4242,
"step": 120
},
{
"epoch": 0.03031009559337841,
"grad_norm": 43.445186614990234,
"learning_rate": 4.969689904406622e-05,
"loss": 1.3207,
"step": 130
},
{
"epoch": 0.03264164140825367,
"grad_norm": 49.01000213623047,
"learning_rate": 4.967358358591747e-05,
"loss": 1.1633,
"step": 140
},
{
"epoch": 0.034973187223128935,
"grad_norm": 32.247344970703125,
"learning_rate": 4.9650268127768713e-05,
"loss": 0.9754,
"step": 150
},
{
"epoch": 0.0373047330380042,
"grad_norm": 37.74547576904297,
"learning_rate": 4.962695266961996e-05,
"loss": 0.9918,
"step": 160
},
{
"epoch": 0.03963627885287946,
"grad_norm": 29.272994995117188,
"learning_rate": 4.960363721147121e-05,
"loss": 1.132,
"step": 170
},
{
"epoch": 0.04196782466775472,
"grad_norm": 34.0861930847168,
"learning_rate": 4.9580321753322454e-05,
"loss": 1.0285,
"step": 180
},
{
"epoch": 0.04429937048262998,
"grad_norm": 41.01604461669922,
"learning_rate": 4.95570062951737e-05,
"loss": 0.9484,
"step": 190
},
{
"epoch": 0.046630916297505244,
"grad_norm": 27.387298583984375,
"learning_rate": 4.953369083702495e-05,
"loss": 0.9852,
"step": 200
},
{
"epoch": 0.048962462112380506,
"grad_norm": 27.99677848815918,
"learning_rate": 4.9510375378876195e-05,
"loss": 0.6836,
"step": 210
},
{
"epoch": 0.05129400792725577,
"grad_norm": 26.071035385131836,
"learning_rate": 4.948705992072745e-05,
"loss": 0.8875,
"step": 220
},
{
"epoch": 0.05362555374213103,
"grad_norm": 29.15469741821289,
"learning_rate": 4.946374446257869e-05,
"loss": 0.893,
"step": 230
},
{
"epoch": 0.0559570995570063,
"grad_norm": 39.19640350341797,
"learning_rate": 4.9440429004429936e-05,
"loss": 0.8641,
"step": 240
},
{
"epoch": 0.05828864537188156,
"grad_norm": 24.756563186645508,
"learning_rate": 4.941711354628119e-05,
"loss": 0.9574,
"step": 250
},
{
"epoch": 0.06062019118675682,
"grad_norm": 33.53086471557617,
"learning_rate": 4.939379808813244e-05,
"loss": 0.8523,
"step": 260
},
{
"epoch": 0.06295173700163208,
"grad_norm": 14.436066627502441,
"learning_rate": 4.9370482629983684e-05,
"loss": 0.7646,
"step": 270
},
{
"epoch": 0.06528328281650735,
"grad_norm": 20.99724769592285,
"learning_rate": 4.934716717183493e-05,
"loss": 0.8172,
"step": 280
},
{
"epoch": 0.0676148286313826,
"grad_norm": 27.321861267089844,
"learning_rate": 4.932385171368617e-05,
"loss": 0.8965,
"step": 290
},
{
"epoch": 0.06994637444625787,
"grad_norm": 14.228981971740723,
"learning_rate": 4.9300536255537425e-05,
"loss": 0.8838,
"step": 300
},
{
"epoch": 0.06994637444625787,
"eval_accuracy": 0.6251025430680885,
"eval_f1": 0.6198441320333742,
"eval_loss": 0.9746333956718445,
"eval_precision": 0.6288553718864676,
"eval_recall": 0.6346714934867023,
"eval_runtime": 32.8297,
"eval_samples_per_second": 37.131,
"eval_steps_per_second": 2.345,
"step": 300
},
{
"epoch": 0.07227792026113312,
"grad_norm": 11.636761665344238,
"learning_rate": 4.9277220797388676e-05,
"loss": 0.6803,
"step": 310
},
{
"epoch": 0.0746094660760084,
"grad_norm": 28.35625457763672,
"learning_rate": 4.925390533923992e-05,
"loss": 0.7066,
"step": 320
},
{
"epoch": 0.07694101189088366,
"grad_norm": 34.743988037109375,
"learning_rate": 4.9230589881091165e-05,
"loss": 0.6387,
"step": 330
},
{
"epoch": 0.07927255770575892,
"grad_norm": 27.67451286315918,
"learning_rate": 4.920727442294241e-05,
"loss": 0.8309,
"step": 340
},
{
"epoch": 0.08160410352063419,
"grad_norm": 35.13928985595703,
"learning_rate": 4.918395896479366e-05,
"loss": 0.8715,
"step": 350
},
{
"epoch": 0.08393564933550944,
"grad_norm": 35.64329147338867,
"learning_rate": 4.916064350664491e-05,
"loss": 0.6961,
"step": 360
},
{
"epoch": 0.08626719515038471,
"grad_norm": 26.17702293395996,
"learning_rate": 4.913732804849616e-05,
"loss": 0.7254,
"step": 370
},
{
"epoch": 0.08859874096525996,
"grad_norm": 26.370773315429688,
"learning_rate": 4.91140125903474e-05,
"loss": 0.7217,
"step": 380
},
{
"epoch": 0.09093028678013523,
"grad_norm": 34.7267951965332,
"learning_rate": 4.909069713219865e-05,
"loss": 0.7297,
"step": 390
},
{
"epoch": 0.09326183259501049,
"grad_norm": 40.13215637207031,
"learning_rate": 4.906738167404989e-05,
"loss": 0.865,
"step": 400
},
{
"epoch": 0.09559337840988576,
"grad_norm": 31.081972122192383,
"learning_rate": 4.904406621590114e-05,
"loss": 0.7848,
"step": 410
},
{
"epoch": 0.09792492422476101,
"grad_norm": 29.940229415893555,
"learning_rate": 4.9020750757752395e-05,
"loss": 0.7045,
"step": 420
},
{
"epoch": 0.10025647003963628,
"grad_norm": 28.462858200073242,
"learning_rate": 4.899743529960364e-05,
"loss": 0.5851,
"step": 430
},
{
"epoch": 0.10258801585451154,
"grad_norm": 26.055572509765625,
"learning_rate": 4.8974119841454884e-05,
"loss": 0.6502,
"step": 440
},
{
"epoch": 0.1049195616693868,
"grad_norm": 31.78554344177246,
"learning_rate": 4.8950804383306136e-05,
"loss": 0.8398,
"step": 450
},
{
"epoch": 0.10725110748426206,
"grad_norm": 27.621715545654297,
"learning_rate": 4.892748892515738e-05,
"loss": 0.6201,
"step": 460
},
{
"epoch": 0.10958265329913733,
"grad_norm": 46.45154571533203,
"learning_rate": 4.890417346700863e-05,
"loss": 0.8129,
"step": 470
},
{
"epoch": 0.1119141991140126,
"grad_norm": 30.32659339904785,
"learning_rate": 4.8880858008859876e-05,
"loss": 0.8564,
"step": 480
},
{
"epoch": 0.11424574492888785,
"grad_norm": 11.306530952453613,
"learning_rate": 4.885754255071112e-05,
"loss": 0.7492,
"step": 490
},
{
"epoch": 0.11657729074376312,
"grad_norm": 33.427490234375,
"learning_rate": 4.883422709256237e-05,
"loss": 0.6204,
"step": 500
},
{
"epoch": 0.11890883655863838,
"grad_norm": 28.030242919921875,
"learning_rate": 4.881091163441362e-05,
"loss": 0.6666,
"step": 510
},
{
"epoch": 0.12124038237351364,
"grad_norm": 26.319486618041992,
"learning_rate": 4.878759617626487e-05,
"loss": 0.6586,
"step": 520
},
{
"epoch": 0.1235719281883889,
"grad_norm": 25.426727294921875,
"learning_rate": 4.8764280718116114e-05,
"loss": 0.7229,
"step": 530
},
{
"epoch": 0.12590347400326415,
"grad_norm": 27.011367797851562,
"learning_rate": 4.874096525996736e-05,
"loss": 0.8982,
"step": 540
},
{
"epoch": 0.12823501981813942,
"grad_norm": 18.395326614379883,
"learning_rate": 4.871764980181861e-05,
"loss": 0.7205,
"step": 550
},
{
"epoch": 0.1305665656330147,
"grad_norm": 21.085630416870117,
"learning_rate": 4.8694334343669854e-05,
"loss": 0.582,
"step": 560
},
{
"epoch": 0.13289811144788996,
"grad_norm": 38.923683166503906,
"learning_rate": 4.86710188855211e-05,
"loss": 0.5496,
"step": 570
},
{
"epoch": 0.1352296572627652,
"grad_norm": 39.22763442993164,
"learning_rate": 4.864770342737235e-05,
"loss": 0.734,
"step": 580
},
{
"epoch": 0.13756120307764047,
"grad_norm": 22.916170120239258,
"learning_rate": 4.8624387969223595e-05,
"loss": 0.6928,
"step": 590
},
{
"epoch": 0.13989274889251574,
"grad_norm": 19.733055114746094,
"learning_rate": 4.860107251107485e-05,
"loss": 0.4986,
"step": 600
},
{
"epoch": 0.13989274889251574,
"eval_accuracy": 0.7456931911402789,
"eval_f1": 0.7266139292840452,
"eval_loss": 0.7722646594047546,
"eval_precision": 0.7451348277162813,
"eval_recall": 0.7211806530410576,
"eval_runtime": 32.3956,
"eval_samples_per_second": 37.629,
"eval_steps_per_second": 2.377,
"step": 600
},
{
"epoch": 0.142224294707391,
"grad_norm": 17.900259017944336,
"learning_rate": 4.857775705292609e-05,
"loss": 0.5705,
"step": 610
},
{
"epoch": 0.14455584052226625,
"grad_norm": 26.365203857421875,
"learning_rate": 4.8554441594777336e-05,
"loss": 0.5932,
"step": 620
},
{
"epoch": 0.14688738633714152,
"grad_norm": 20.65036392211914,
"learning_rate": 4.853112613662859e-05,
"loss": 0.6246,
"step": 630
},
{
"epoch": 0.1492189321520168,
"grad_norm": 20.070476531982422,
"learning_rate": 4.850781067847984e-05,
"loss": 0.7621,
"step": 640
},
{
"epoch": 0.15155047796689206,
"grad_norm": 27.528751373291016,
"learning_rate": 4.8484495220331084e-05,
"loss": 0.7121,
"step": 650
},
{
"epoch": 0.15388202378176732,
"grad_norm": 30.218090057373047,
"learning_rate": 4.846117976218233e-05,
"loss": 0.7557,
"step": 660
},
{
"epoch": 0.15621356959664257,
"grad_norm": 32.3998908996582,
"learning_rate": 4.843786430403357e-05,
"loss": 0.5539,
"step": 670
},
{
"epoch": 0.15854511541151783,
"grad_norm": 26.95191192626953,
"learning_rate": 4.8414548845884825e-05,
"loss": 0.5303,
"step": 680
},
{
"epoch": 0.1608766612263931,
"grad_norm": 30.945283889770508,
"learning_rate": 4.8391233387736076e-05,
"loss": 0.6001,
"step": 690
},
{
"epoch": 0.16320820704126837,
"grad_norm": 29.30241584777832,
"learning_rate": 4.836791792958732e-05,
"loss": 0.7291,
"step": 700
},
{
"epoch": 0.1655397528561436,
"grad_norm": 13.888816833496094,
"learning_rate": 4.8344602471438565e-05,
"loss": 0.8428,
"step": 710
},
{
"epoch": 0.16787129867101888,
"grad_norm": 29.725255966186523,
"learning_rate": 4.832128701328981e-05,
"loss": 0.6883,
"step": 720
},
{
"epoch": 0.17020284448589415,
"grad_norm": 42.3590202331543,
"learning_rate": 4.829797155514106e-05,
"loss": 0.6275,
"step": 730
},
{
"epoch": 0.17253439030076942,
"grad_norm": 23.562644958496094,
"learning_rate": 4.827465609699231e-05,
"loss": 0.5752,
"step": 740
},
{
"epoch": 0.17486593611564466,
"grad_norm": 32.83530044555664,
"learning_rate": 4.825134063884356e-05,
"loss": 0.6953,
"step": 750
},
{
"epoch": 0.17719748193051993,
"grad_norm": 12.841109275817871,
"learning_rate": 4.82280251806948e-05,
"loss": 0.4848,
"step": 760
},
{
"epoch": 0.1795290277453952,
"grad_norm": 22.86924171447754,
"learning_rate": 4.820470972254605e-05,
"loss": 0.6314,
"step": 770
},
{
"epoch": 0.18186057356027047,
"grad_norm": 16.436222076416016,
"learning_rate": 4.81813942643973e-05,
"loss": 0.6568,
"step": 780
},
{
"epoch": 0.18419211937514574,
"grad_norm": 13.749951362609863,
"learning_rate": 4.815807880624854e-05,
"loss": 0.7607,
"step": 790
},
{
"epoch": 0.18652366519002098,
"grad_norm": 42.645729064941406,
"learning_rate": 4.8134763348099795e-05,
"loss": 0.536,
"step": 800
},
{
"epoch": 0.18885521100489625,
"grad_norm": 25.09123420715332,
"learning_rate": 4.811144788995104e-05,
"loss": 0.6258,
"step": 810
},
{
"epoch": 0.19118675681977151,
"grad_norm": 24.370229721069336,
"learning_rate": 4.8088132431802284e-05,
"loss": 0.4913,
"step": 820
},
{
"epoch": 0.19351830263464678,
"grad_norm": 34.05779266357422,
"learning_rate": 4.8064816973653536e-05,
"loss": 0.8139,
"step": 830
},
{
"epoch": 0.19584984844952202,
"grad_norm": 24.232662200927734,
"learning_rate": 4.804150151550478e-05,
"loss": 0.7322,
"step": 840
},
{
"epoch": 0.1981813942643973,
"grad_norm": 20.707740783691406,
"learning_rate": 4.801818605735603e-05,
"loss": 0.591,
"step": 850
},
{
"epoch": 0.20051294007927256,
"grad_norm": 26.865257263183594,
"learning_rate": 4.7994870599207277e-05,
"loss": 0.6098,
"step": 860
},
{
"epoch": 0.20284448589414783,
"grad_norm": 10.35026741027832,
"learning_rate": 4.797155514105852e-05,
"loss": 0.6383,
"step": 870
},
{
"epoch": 0.20517603170902307,
"grad_norm": 23.788137435913086,
"learning_rate": 4.794823968290977e-05,
"loss": 0.7854,
"step": 880
},
{
"epoch": 0.20750757752389834,
"grad_norm": 21.079648971557617,
"learning_rate": 4.792492422476102e-05,
"loss": 0.6371,
"step": 890
},
{
"epoch": 0.2098391233387736,
"grad_norm": 34.78284454345703,
"learning_rate": 4.790160876661227e-05,
"loss": 0.6078,
"step": 900
},
{
"epoch": 0.2098391233387736,
"eval_accuracy": 0.689909762100082,
"eval_f1": 0.6883721016902948,
"eval_loss": 0.8121763467788696,
"eval_precision": 0.7020681280584337,
"eval_recall": 0.7085581473429932,
"eval_runtime": 32.2791,
"eval_samples_per_second": 37.764,
"eval_steps_per_second": 2.385,
"step": 900
},
{
"epoch": 0.21217066915364888,
"grad_norm": 38.722511291503906,
"learning_rate": 4.7878293308463514e-05,
"loss": 0.6371,
"step": 910
},
{
"epoch": 0.21450221496852412,
"grad_norm": 30.822816848754883,
"learning_rate": 4.785497785031476e-05,
"loss": 0.6244,
"step": 920
},
{
"epoch": 0.2168337607833994,
"grad_norm": 46.684818267822266,
"learning_rate": 4.783166239216601e-05,
"loss": 0.7515,
"step": 930
},
{
"epoch": 0.21916530659827466,
"grad_norm": 23.10223960876465,
"learning_rate": 4.7808346934017254e-05,
"loss": 0.5434,
"step": 940
},
{
"epoch": 0.22149685241314993,
"grad_norm": 20.43950080871582,
"learning_rate": 4.77850314758685e-05,
"loss": 0.5672,
"step": 950
},
{
"epoch": 0.2238283982280252,
"grad_norm": 33.358795166015625,
"learning_rate": 4.776171601771975e-05,
"loss": 0.6576,
"step": 960
},
{
"epoch": 0.22615994404290043,
"grad_norm": 25.824260711669922,
"learning_rate": 4.7738400559571e-05,
"loss": 0.6646,
"step": 970
},
{
"epoch": 0.2284914898577757,
"grad_norm": 21.079469680786133,
"learning_rate": 4.771508510142225e-05,
"loss": 0.5052,
"step": 980
},
{
"epoch": 0.23082303567265097,
"grad_norm": 15.477420806884766,
"learning_rate": 4.769176964327349e-05,
"loss": 0.6392,
"step": 990
},
{
"epoch": 0.23315458148752624,
"grad_norm": 21.481847763061523,
"learning_rate": 4.7668454185124736e-05,
"loss": 0.6549,
"step": 1000
},
{
"epoch": 0.23548612730240148,
"grad_norm": 14.648496627807617,
"learning_rate": 4.764513872697599e-05,
"loss": 0.5125,
"step": 1010
},
{
"epoch": 0.23781767311727675,
"grad_norm": 17.860349655151367,
"learning_rate": 4.762182326882724e-05,
"loss": 0.5947,
"step": 1020
},
{
"epoch": 0.24014921893215202,
"grad_norm": 30.999217987060547,
"learning_rate": 4.7598507810678484e-05,
"loss": 0.5108,
"step": 1030
},
{
"epoch": 0.2424807647470273,
"grad_norm": 33.350311279296875,
"learning_rate": 4.757519235252973e-05,
"loss": 0.5957,
"step": 1040
},
{
"epoch": 0.24481231056190253,
"grad_norm": 50.07561111450195,
"learning_rate": 4.755187689438097e-05,
"loss": 0.6289,
"step": 1050
},
{
"epoch": 0.2471438563767778,
"grad_norm": 20.572126388549805,
"learning_rate": 4.7528561436232225e-05,
"loss": 0.6101,
"step": 1060
},
{
"epoch": 0.24947540219165307,
"grad_norm": 31.299867630004883,
"learning_rate": 4.7505245978083476e-05,
"loss": 0.7346,
"step": 1070
},
{
"epoch": 0.2518069480065283,
"grad_norm": 22.430063247680664,
"learning_rate": 4.748193051993472e-05,
"loss": 0.5229,
"step": 1080
},
{
"epoch": 0.2541384938214036,
"grad_norm": 27.95315933227539,
"learning_rate": 4.7458615061785966e-05,
"loss": 0.6457,
"step": 1090
},
{
"epoch": 0.25647003963627885,
"grad_norm": 20.998676300048828,
"learning_rate": 4.743529960363721e-05,
"loss": 0.6852,
"step": 1100
},
{
"epoch": 0.2588015854511541,
"grad_norm": 28.259180068969727,
"learning_rate": 4.741198414548846e-05,
"loss": 0.6709,
"step": 1110
},
{
"epoch": 0.2611331312660294,
"grad_norm": 29.109024047851562,
"learning_rate": 4.738866868733971e-05,
"loss": 0.5709,
"step": 1120
},
{
"epoch": 0.26346467708090465,
"grad_norm": 25.32686996459961,
"learning_rate": 4.736535322919096e-05,
"loss": 0.5139,
"step": 1130
},
{
"epoch": 0.2657962228957799,
"grad_norm": 19.918743133544922,
"learning_rate": 4.73420377710422e-05,
"loss": 0.6274,
"step": 1140
},
{
"epoch": 0.2681277687106552,
"grad_norm": 27.803632736206055,
"learning_rate": 4.731872231289345e-05,
"loss": 0.7746,
"step": 1150
},
{
"epoch": 0.2704593145255304,
"grad_norm": 51.257896423339844,
"learning_rate": 4.72954068547447e-05,
"loss": 0.6581,
"step": 1160
},
{
"epoch": 0.27279086034040567,
"grad_norm": 25.976425170898438,
"learning_rate": 4.7272091396595943e-05,
"loss": 0.5674,
"step": 1170
},
{
"epoch": 0.27512240615528094,
"grad_norm": 20.14984703063965,
"learning_rate": 4.7248775938447195e-05,
"loss": 0.5609,
"step": 1180
},
{
"epoch": 0.2774539519701562,
"grad_norm": 21.2273006439209,
"learning_rate": 4.722546048029844e-05,
"loss": 0.5842,
"step": 1190
},
{
"epoch": 0.2797854977850315,
"grad_norm": 31.50432014465332,
"learning_rate": 4.7202145022149684e-05,
"loss": 0.5867,
"step": 1200
},
{
"epoch": 0.2797854977850315,
"eval_accuracy": 0.7227235438884332,
"eval_f1": 0.7223497656011426,
"eval_loss": 0.7796285152435303,
"eval_precision": 0.7239042036307864,
"eval_recall": 0.7433241953688237,
"eval_runtime": 32.4272,
"eval_samples_per_second": 37.592,
"eval_steps_per_second": 2.375,
"step": 1200
},
{
"epoch": 0.28211704359990675,
"grad_norm": 32.188232421875,
"learning_rate": 4.7178829564000936e-05,
"loss": 0.6986,
"step": 1210
},
{
"epoch": 0.284448589414782,
"grad_norm": 21.2509708404541,
"learning_rate": 4.715551410585218e-05,
"loss": 0.5578,
"step": 1220
},
{
"epoch": 0.2867801352296573,
"grad_norm": 36.59361267089844,
"learning_rate": 4.713219864770343e-05,
"loss": 0.6035,
"step": 1230
},
{
"epoch": 0.2891116810445325,
"grad_norm": 23.820602416992188,
"learning_rate": 4.7108883189554677e-05,
"loss": 0.672,
"step": 1240
},
{
"epoch": 0.29144322685940777,
"grad_norm": 9.917643547058105,
"learning_rate": 4.708556773140592e-05,
"loss": 0.5623,
"step": 1250
},
{
"epoch": 0.29377477267428304,
"grad_norm": 23.47327423095703,
"learning_rate": 4.706225227325717e-05,
"loss": 0.5947,
"step": 1260
},
{
"epoch": 0.2961063184891583,
"grad_norm": 20.891555786132812,
"learning_rate": 4.703893681510842e-05,
"loss": 0.5065,
"step": 1270
},
{
"epoch": 0.2984378643040336,
"grad_norm": 20.379684448242188,
"learning_rate": 4.701562135695967e-05,
"loss": 0.4707,
"step": 1280
},
{
"epoch": 0.30076941011890884,
"grad_norm": 23.66413688659668,
"learning_rate": 4.6992305898810914e-05,
"loss": 0.7354,
"step": 1290
},
{
"epoch": 0.3031009559337841,
"grad_norm": 82.15457916259766,
"learning_rate": 4.6968990440662165e-05,
"loss": 0.6615,
"step": 1300
},
{
"epoch": 0.3054325017486594,
"grad_norm": 43.44639587402344,
"learning_rate": 4.694567498251341e-05,
"loss": 0.7217,
"step": 1310
},
{
"epoch": 0.30776404756353465,
"grad_norm": 27.2445125579834,
"learning_rate": 4.6922359524364654e-05,
"loss": 0.7379,
"step": 1320
},
{
"epoch": 0.31009559337840986,
"grad_norm": 23.97974395751953,
"learning_rate": 4.68990440662159e-05,
"loss": 0.6117,
"step": 1330
},
{
"epoch": 0.31242713919328513,
"grad_norm": 20.4930362701416,
"learning_rate": 4.687572860806715e-05,
"loss": 0.6047,
"step": 1340
},
{
"epoch": 0.3147586850081604,
"grad_norm": 36.909305572509766,
"learning_rate": 4.68524131499184e-05,
"loss": 0.5695,
"step": 1350
},
{
"epoch": 0.31709023082303567,
"grad_norm": 13.219598770141602,
"learning_rate": 4.682909769176965e-05,
"loss": 0.61,
"step": 1360
},
{
"epoch": 0.31942177663791094,
"grad_norm": 37.49195861816406,
"learning_rate": 4.680578223362089e-05,
"loss": 0.6959,
"step": 1370
},
{
"epoch": 0.3217533224527862,
"grad_norm": 21.94476890563965,
"learning_rate": 4.6782466775472136e-05,
"loss": 0.5361,
"step": 1380
},
{
"epoch": 0.3240848682676615,
"grad_norm": 15.76740837097168,
"learning_rate": 4.675915131732339e-05,
"loss": 0.5799,
"step": 1390
},
{
"epoch": 0.32641641408253674,
"grad_norm": 16.694778442382812,
"learning_rate": 4.673583585917464e-05,
"loss": 0.5195,
"step": 1400
},
{
"epoch": 0.32874795989741196,
"grad_norm": 17.327856063842773,
"learning_rate": 4.6712520401025884e-05,
"loss": 0.4577,
"step": 1410
},
{
"epoch": 0.3310795057122872,
"grad_norm": 19.089357376098633,
"learning_rate": 4.668920494287713e-05,
"loss": 0.5473,
"step": 1420
},
{
"epoch": 0.3334110515271625,
"grad_norm": 20.266950607299805,
"learning_rate": 4.666588948472837e-05,
"loss": 0.5861,
"step": 1430
},
{
"epoch": 0.33574259734203776,
"grad_norm": 14.822595596313477,
"learning_rate": 4.6642574026579625e-05,
"loss": 0.4603,
"step": 1440
},
{
"epoch": 0.33807414315691303,
"grad_norm": 14.292266845703125,
"learning_rate": 4.6619258568430876e-05,
"loss": 0.5209,
"step": 1450
},
{
"epoch": 0.3404056889717883,
"grad_norm": 15.99500560760498,
"learning_rate": 4.659594311028212e-05,
"loss": 0.5721,
"step": 1460
},
{
"epoch": 0.34273723478666357,
"grad_norm": 15.458304405212402,
"learning_rate": 4.6572627652133366e-05,
"loss": 0.5798,
"step": 1470
},
{
"epoch": 0.34506878060153884,
"grad_norm": 22.426408767700195,
"learning_rate": 4.654931219398461e-05,
"loss": 0.6674,
"step": 1480
},
{
"epoch": 0.3474003264164141,
"grad_norm": 16.9567928314209,
"learning_rate": 4.652599673583586e-05,
"loss": 0.6049,
"step": 1490
},
{
"epoch": 0.3497318722312893,
"grad_norm": 39.44313430786133,
"learning_rate": 4.650268127768711e-05,
"loss": 0.5684,
"step": 1500
},
{
"epoch": 0.3497318722312893,
"eval_accuracy": 0.7506152584085316,
"eval_f1": 0.7356563013468664,
"eval_loss": 0.7205380797386169,
"eval_precision": 0.7489548577706214,
"eval_recall": 0.7325100738137515,
"eval_runtime": 32.4911,
"eval_samples_per_second": 37.518,
"eval_steps_per_second": 2.37,
"step": 1500
},
{
"epoch": 0.3520634180461646,
"grad_norm": 27.17556381225586,
"learning_rate": 4.647936581953836e-05,
"loss": 0.5555,
"step": 1510
},
{
"epoch": 0.35439496386103986,
"grad_norm": 21.439729690551758,
"learning_rate": 4.64560503613896e-05,
"loss": 0.6018,
"step": 1520
},
{
"epoch": 0.3567265096759151,
"grad_norm": 27.142850875854492,
"learning_rate": 4.643273490324085e-05,
"loss": 0.6183,
"step": 1530
},
{
"epoch": 0.3590580554907904,
"grad_norm": 26.709306716918945,
"learning_rate": 4.64094194450921e-05,
"loss": 0.5238,
"step": 1540
},
{
"epoch": 0.36138960130566566,
"grad_norm": 18.24578857421875,
"learning_rate": 4.6386103986943343e-05,
"loss": 0.6221,
"step": 1550
},
{
"epoch": 0.36372114712054093,
"grad_norm": 16.884159088134766,
"learning_rate": 4.6362788528794595e-05,
"loss": 0.577,
"step": 1560
},
{
"epoch": 0.3660526929354162,
"grad_norm": 27.92196273803711,
"learning_rate": 4.633947307064584e-05,
"loss": 0.4804,
"step": 1570
},
{
"epoch": 0.36838423875029147,
"grad_norm": 19.397260665893555,
"learning_rate": 4.6316157612497084e-05,
"loss": 0.5586,
"step": 1580
},
{
"epoch": 0.3707157845651667,
"grad_norm": 20.426605224609375,
"learning_rate": 4.6292842154348336e-05,
"loss": 0.7078,
"step": 1590
},
{
"epoch": 0.37304733038004195,
"grad_norm": 16.057165145874023,
"learning_rate": 4.626952669619958e-05,
"loss": 0.4367,
"step": 1600
},
{
"epoch": 0.3753788761949172,
"grad_norm": 15.59145450592041,
"learning_rate": 4.624621123805083e-05,
"loss": 0.4379,
"step": 1610
},
{
"epoch": 0.3777104220097925,
"grad_norm": 11.662853240966797,
"learning_rate": 4.622289577990208e-05,
"loss": 0.4041,
"step": 1620
},
{
"epoch": 0.38004196782466776,
"grad_norm": 27.15912628173828,
"learning_rate": 4.619958032175333e-05,
"loss": 0.5678,
"step": 1630
},
{
"epoch": 0.38237351363954303,
"grad_norm": 18.558469772338867,
"learning_rate": 4.617626486360457e-05,
"loss": 0.5907,
"step": 1640
},
{
"epoch": 0.3847050594544183,
"grad_norm": 32.28955078125,
"learning_rate": 4.615294940545582e-05,
"loss": 0.592,
"step": 1650
},
{
"epoch": 0.38703660526929357,
"grad_norm": 19.484052658081055,
"learning_rate": 4.612963394730707e-05,
"loss": 0.5517,
"step": 1660
},
{
"epoch": 0.3893681510841688,
"grad_norm": 19.991775512695312,
"learning_rate": 4.6106318489158314e-05,
"loss": 0.4574,
"step": 1670
},
{
"epoch": 0.39169969689904405,
"grad_norm": 19.2491455078125,
"learning_rate": 4.6083003031009565e-05,
"loss": 0.5324,
"step": 1680
},
{
"epoch": 0.3940312427139193,
"grad_norm": 26.1087646484375,
"learning_rate": 4.605968757286081e-05,
"loss": 0.5405,
"step": 1690
},
{
"epoch": 0.3963627885287946,
"grad_norm": 28.05010986328125,
"learning_rate": 4.6036372114712055e-05,
"loss": 0.6015,
"step": 1700
},
{
"epoch": 0.39869433434366985,
"grad_norm": 24.019983291625977,
"learning_rate": 4.60130566565633e-05,
"loss": 0.4885,
"step": 1710
},
{
"epoch": 0.4010258801585451,
"grad_norm": 43.11894607543945,
"learning_rate": 4.598974119841455e-05,
"loss": 0.6539,
"step": 1720
},
{
"epoch": 0.4033574259734204,
"grad_norm": 23.700422286987305,
"learning_rate": 4.59664257402658e-05,
"loss": 0.4439,
"step": 1730
},
{
"epoch": 0.40568897178829566,
"grad_norm": 29.97321319580078,
"learning_rate": 4.594311028211705e-05,
"loss": 0.493,
"step": 1740
},
{
"epoch": 0.40802051760317093,
"grad_norm": 32.946022033691406,
"learning_rate": 4.591979482396829e-05,
"loss": 0.6301,
"step": 1750
},
{
"epoch": 0.41035206341804614,
"grad_norm": 16.01514434814453,
"learning_rate": 4.5896479365819536e-05,
"loss": 0.4617,
"step": 1760
},
{
"epoch": 0.4126836092329214,
"grad_norm": 21.685338973999023,
"learning_rate": 4.587316390767079e-05,
"loss": 0.6289,
"step": 1770
},
{
"epoch": 0.4150151550477967,
"grad_norm": 28.99067497253418,
"learning_rate": 4.584984844952204e-05,
"loss": 0.4477,
"step": 1780
},
{
"epoch": 0.41734670086267195,
"grad_norm": 35.17900085449219,
"learning_rate": 4.5826532991373284e-05,
"loss": 0.4637,
"step": 1790
},
{
"epoch": 0.4196782466775472,
"grad_norm": 33.74941635131836,
"learning_rate": 4.580321753322453e-05,
"loss": 0.6002,
"step": 1800
},
{
"epoch": 0.4196782466775472,
"eval_accuracy": 0.7506152584085316,
"eval_f1": 0.7449892374013476,
"eval_loss": 0.7239476442337036,
"eval_precision": 0.7453346607126952,
"eval_recall": 0.7541687656017055,
"eval_runtime": 32.6118,
"eval_samples_per_second": 37.379,
"eval_steps_per_second": 2.361,
"step": 1800
},
{
"epoch": 0.4220097924924225,
"grad_norm": 13.531204223632812,
"learning_rate": 4.577990207507577e-05,
"loss": 0.5473,
"step": 1810
},
{
"epoch": 0.42434133830729776,
"grad_norm": 16.84659194946289,
"learning_rate": 4.5756586616927025e-05,
"loss": 0.5308,
"step": 1820
},
{
"epoch": 0.426672884122173,
"grad_norm": 34.70216369628906,
"learning_rate": 4.5733271158778276e-05,
"loss": 0.7112,
"step": 1830
},
{
"epoch": 0.42900442993704824,
"grad_norm": 18.915586471557617,
"learning_rate": 4.570995570062952e-05,
"loss": 0.6053,
"step": 1840
},
{
"epoch": 0.4313359757519235,
"grad_norm": 19.790071487426758,
"learning_rate": 4.5686640242480766e-05,
"loss": 0.4516,
"step": 1850
},
{
"epoch": 0.4336675215667988,
"grad_norm": 19.730384826660156,
"learning_rate": 4.566332478433201e-05,
"loss": 0.6088,
"step": 1860
},
{
"epoch": 0.43599906738167404,
"grad_norm": 31.787572860717773,
"learning_rate": 4.564000932618326e-05,
"loss": 0.5709,
"step": 1870
},
{
"epoch": 0.4383306131965493,
"grad_norm": 26.529708862304688,
"learning_rate": 4.561669386803451e-05,
"loss": 0.5072,
"step": 1880
},
{
"epoch": 0.4406621590114246,
"grad_norm": 29.785123825073242,
"learning_rate": 4.559337840988576e-05,
"loss": 0.4786,
"step": 1890
},
{
"epoch": 0.44299370482629985,
"grad_norm": 18.451383590698242,
"learning_rate": 4.5570062951737e-05,
"loss": 0.5377,
"step": 1900
},
{
"epoch": 0.4453252506411751,
"grad_norm": 24.651517868041992,
"learning_rate": 4.554674749358825e-05,
"loss": 0.4759,
"step": 1910
},
{
"epoch": 0.4476567964560504,
"grad_norm": 23.738956451416016,
"learning_rate": 4.55234320354395e-05,
"loss": 0.6059,
"step": 1920
},
{
"epoch": 0.4499883422709256,
"grad_norm": 15.957860946655273,
"learning_rate": 4.5500116577290743e-05,
"loss": 0.5788,
"step": 1930
},
{
"epoch": 0.45231988808580087,
"grad_norm": 18.671892166137695,
"learning_rate": 4.5476801119141995e-05,
"loss": 0.42,
"step": 1940
},
{
"epoch": 0.45465143390067614,
"grad_norm": 20.396814346313477,
"learning_rate": 4.545348566099324e-05,
"loss": 0.5062,
"step": 1950
},
{
"epoch": 0.4569829797155514,
"grad_norm": 35.40294647216797,
"learning_rate": 4.5430170202844484e-05,
"loss": 0.6339,
"step": 1960
},
{
"epoch": 0.4593145255304267,
"grad_norm": 20.99814224243164,
"learning_rate": 4.5406854744695736e-05,
"loss": 0.4737,
"step": 1970
},
{
"epoch": 0.46164607134530194,
"grad_norm": 19.0653133392334,
"learning_rate": 4.538353928654698e-05,
"loss": 0.4303,
"step": 1980
},
{
"epoch": 0.4639776171601772,
"grad_norm": 25.659717559814453,
"learning_rate": 4.536022382839823e-05,
"loss": 0.4544,
"step": 1990
},
{
"epoch": 0.4663091629750525,
"grad_norm": 15.122028350830078,
"learning_rate": 4.533690837024948e-05,
"loss": 0.4343,
"step": 2000
},
{
"epoch": 0.4686407087899277,
"grad_norm": 18.79733657836914,
"learning_rate": 4.531359291210073e-05,
"loss": 0.598,
"step": 2010
},
{
"epoch": 0.47097225460480296,
"grad_norm": 21.690399169921875,
"learning_rate": 4.529027745395197e-05,
"loss": 0.5553,
"step": 2020
},
{
"epoch": 0.47330380041967823,
"grad_norm": 19.833171844482422,
"learning_rate": 4.526696199580322e-05,
"loss": 0.4684,
"step": 2030
},
{
"epoch": 0.4756353462345535,
"grad_norm": 31.109315872192383,
"learning_rate": 4.524364653765447e-05,
"loss": 0.5953,
"step": 2040
},
{
"epoch": 0.47796689204942877,
"grad_norm": 14.755363464355469,
"learning_rate": 4.5220331079505714e-05,
"loss": 0.4521,
"step": 2050
},
{
"epoch": 0.48029843786430404,
"grad_norm": 19.60972785949707,
"learning_rate": 4.5197015621356965e-05,
"loss": 0.4349,
"step": 2060
},
{
"epoch": 0.4826299836791793,
"grad_norm": 20.63113784790039,
"learning_rate": 4.517370016320821e-05,
"loss": 0.4893,
"step": 2070
},
{
"epoch": 0.4849615294940546,
"grad_norm": 21.99651336669922,
"learning_rate": 4.5150384705059455e-05,
"loss": 0.535,
"step": 2080
},
{
"epoch": 0.48729307530892985,
"grad_norm": 13.244401931762695,
"learning_rate": 4.51270692469107e-05,
"loss": 0.4573,
"step": 2090
},
{
"epoch": 0.48962462112380506,
"grad_norm": 22.323959350585938,
"learning_rate": 4.510375378876195e-05,
"loss": 0.4974,
"step": 2100
},
{
"epoch": 0.48962462112380506,
"eval_accuracy": 0.7497949138638228,
"eval_f1": 0.7429285315686787,
"eval_loss": 0.7164492011070251,
"eval_precision": 0.7399478118312398,
"eval_recall": 0.751867958660305,
"eval_runtime": 32.6276,
"eval_samples_per_second": 37.361,
"eval_steps_per_second": 2.36,
"step": 2100
},
{
"epoch": 0.49195616693868033,
"grad_norm": 14.91236400604248,
"learning_rate": 4.50804383306132e-05,
"loss": 0.4868,
"step": 2110
},
{
"epoch": 0.4942877127535556,
"grad_norm": 14.522492408752441,
"learning_rate": 4.505712287246445e-05,
"loss": 0.517,
"step": 2120
},
{
"epoch": 0.49661925856843087,
"grad_norm": 14.661904335021973,
"learning_rate": 4.503380741431569e-05,
"loss": 0.6969,
"step": 2130
},
{
"epoch": 0.49895080438330613,
"grad_norm": 17.774005889892578,
"learning_rate": 4.5010491956166936e-05,
"loss": 0.3392,
"step": 2140
},
{
"epoch": 0.5012823501981813,
"grad_norm": 20.338176727294922,
"learning_rate": 4.498717649801819e-05,
"loss": 0.5478,
"step": 2150
},
{
"epoch": 0.5036138960130566,
"grad_norm": 23.17992401123047,
"learning_rate": 4.496386103986944e-05,
"loss": 0.4363,
"step": 2160
},
{
"epoch": 0.5059454418279319,
"grad_norm": 26.9781494140625,
"learning_rate": 4.4940545581720684e-05,
"loss": 0.4991,
"step": 2170
},
{
"epoch": 0.5082769876428072,
"grad_norm": 13.74269962310791,
"learning_rate": 4.491723012357193e-05,
"loss": 0.4789,
"step": 2180
},
{
"epoch": 0.5106085334576824,
"grad_norm": 9.351542472839355,
"learning_rate": 4.489391466542317e-05,
"loss": 0.6222,
"step": 2190
},
{
"epoch": 0.5129400792725577,
"grad_norm": 29.69098472595215,
"learning_rate": 4.4870599207274425e-05,
"loss": 0.5182,
"step": 2200
},
{
"epoch": 0.515271625087433,
"grad_norm": 34.87522506713867,
"learning_rate": 4.4847283749125676e-05,
"loss": 0.4833,
"step": 2210
},
{
"epoch": 0.5176031709023082,
"grad_norm": 16.882413864135742,
"learning_rate": 4.482396829097692e-05,
"loss": 0.4985,
"step": 2220
},
{
"epoch": 0.5199347167171835,
"grad_norm": 18.16925621032715,
"learning_rate": 4.4800652832828166e-05,
"loss": 0.5125,
"step": 2230
},
{
"epoch": 0.5222662625320588,
"grad_norm": 25.316865921020508,
"learning_rate": 4.477733737467941e-05,
"loss": 0.6061,
"step": 2240
},
{
"epoch": 0.524597808346934,
"grad_norm": 24.0291690826416,
"learning_rate": 4.475402191653066e-05,
"loss": 0.5563,
"step": 2250
},
{
"epoch": 0.5269293541618093,
"grad_norm": 18.830142974853516,
"learning_rate": 4.473070645838191e-05,
"loss": 0.34,
"step": 2260
},
{
"epoch": 0.5292608999766846,
"grad_norm": 24.81058692932129,
"learning_rate": 4.470739100023316e-05,
"loss": 0.4322,
"step": 2270
},
{
"epoch": 0.5315924457915598,
"grad_norm": 25.785091400146484,
"learning_rate": 4.46840755420844e-05,
"loss": 0.4726,
"step": 2280
},
{
"epoch": 0.5339239916064351,
"grad_norm": 14.79159927368164,
"learning_rate": 4.466076008393565e-05,
"loss": 0.3366,
"step": 2290
},
{
"epoch": 0.5362555374213104,
"grad_norm": 34.41261672973633,
"learning_rate": 4.46374446257869e-05,
"loss": 0.5926,
"step": 2300
},
{
"epoch": 0.5385870832361855,
"grad_norm": 34.747901916503906,
"learning_rate": 4.4614129167638144e-05,
"loss": 0.6057,
"step": 2310
},
{
"epoch": 0.5409186290510608,
"grad_norm": 17.471677780151367,
"learning_rate": 4.4590813709489395e-05,
"loss": 0.5399,
"step": 2320
},
{
"epoch": 0.5432501748659361,
"grad_norm": 29.014802932739258,
"learning_rate": 4.456749825134064e-05,
"loss": 0.4637,
"step": 2330
},
{
"epoch": 0.5455817206808113,
"grad_norm": 12.530820846557617,
"learning_rate": 4.454418279319189e-05,
"loss": 0.4172,
"step": 2340
},
{
"epoch": 0.5479132664956866,
"grad_norm": 15.449395179748535,
"learning_rate": 4.4520867335043136e-05,
"loss": 0.5294,
"step": 2350
},
{
"epoch": 0.5502448123105619,
"grad_norm": 26.869712829589844,
"learning_rate": 4.449755187689438e-05,
"loss": 0.5169,
"step": 2360
},
{
"epoch": 0.5525763581254372,
"grad_norm": 17.471458435058594,
"learning_rate": 4.447423641874563e-05,
"loss": 0.4523,
"step": 2370
},
{
"epoch": 0.5549079039403124,
"grad_norm": 24.529001235961914,
"learning_rate": 4.445092096059688e-05,
"loss": 0.4501,
"step": 2380
},
{
"epoch": 0.5572394497551877,
"grad_norm": 22.41488265991211,
"learning_rate": 4.442760550244813e-05,
"loss": 0.5475,
"step": 2390
},
{
"epoch": 0.559570995570063,
"grad_norm": 27.631166458129883,
"learning_rate": 4.440429004429937e-05,
"loss": 0.5527,
"step": 2400
},
{
"epoch": 0.559570995570063,
"eval_accuracy": 0.7506152584085316,
"eval_f1": 0.7474469925712124,
"eval_loss": 0.7103798985481262,
"eval_precision": 0.7429756390197679,
"eval_recall": 0.7637649710650173,
"eval_runtime": 32.5095,
"eval_samples_per_second": 37.497,
"eval_steps_per_second": 2.369,
"step": 2400
},
{
"epoch": 0.5619025413849382,
"grad_norm": 25.045551300048828,
"learning_rate": 4.438097458615062e-05,
"loss": 0.641,
"step": 2410
},
{
"epoch": 0.5642340871998135,
"grad_norm": 21.757932662963867,
"learning_rate": 4.435765912800187e-05,
"loss": 0.4971,
"step": 2420
},
{
"epoch": 0.5665656330146888,
"grad_norm": 21.797353744506836,
"learning_rate": 4.4334343669853114e-05,
"loss": 0.4863,
"step": 2430
},
{
"epoch": 0.568897178829564,
"grad_norm": 24.75421905517578,
"learning_rate": 4.4311028211704365e-05,
"loss": 0.49,
"step": 2440
},
{
"epoch": 0.5712287246444393,
"grad_norm": 29.258378982543945,
"learning_rate": 4.428771275355561e-05,
"loss": 0.4736,
"step": 2450
},
{
"epoch": 0.5735602704593146,
"grad_norm": 36.19465255737305,
"learning_rate": 4.4264397295406855e-05,
"loss": 0.4717,
"step": 2460
},
{
"epoch": 0.5758918162741898,
"grad_norm": 25.283084869384766,
"learning_rate": 4.42410818372581e-05,
"loss": 0.5374,
"step": 2470
},
{
"epoch": 0.578223362089065,
"grad_norm": 26.333541870117188,
"learning_rate": 4.421776637910936e-05,
"loss": 0.3847,
"step": 2480
},
{
"epoch": 0.5805549079039403,
"grad_norm": 21.764862060546875,
"learning_rate": 4.41944509209606e-05,
"loss": 0.4232,
"step": 2490
},
{
"epoch": 0.5828864537188155,
"grad_norm": 11.467122077941895,
"learning_rate": 4.417113546281185e-05,
"loss": 0.6221,
"step": 2500
},
{
"epoch": 0.5852179995336908,
"grad_norm": 16.913673400878906,
"learning_rate": 4.414782000466309e-05,
"loss": 0.4062,
"step": 2510
},
{
"epoch": 0.5875495453485661,
"grad_norm": 25.194719314575195,
"learning_rate": 4.4124504546514336e-05,
"loss": 0.4734,
"step": 2520
},
{
"epoch": 0.5898810911634413,
"grad_norm": 16.23316764831543,
"learning_rate": 4.410118908836559e-05,
"loss": 0.413,
"step": 2530
},
{
"epoch": 0.5922126369783166,
"grad_norm": 29.319387435913086,
"learning_rate": 4.407787363021684e-05,
"loss": 0.4903,
"step": 2540
},
{
"epoch": 0.5945441827931919,
"grad_norm": 55.968284606933594,
"learning_rate": 4.4054558172068084e-05,
"loss": 0.5513,
"step": 2550
},
{
"epoch": 0.5968757286080671,
"grad_norm": 19.242820739746094,
"learning_rate": 4.403124271391933e-05,
"loss": 0.4908,
"step": 2560
},
{
"epoch": 0.5992072744229424,
"grad_norm": 23.568754196166992,
"learning_rate": 4.400792725577057e-05,
"loss": 0.585,
"step": 2570
},
{
"epoch": 0.6015388202378177,
"grad_norm": 19.30316925048828,
"learning_rate": 4.3984611797621825e-05,
"loss": 0.5568,
"step": 2580
},
{
"epoch": 0.603870366052693,
"grad_norm": 11.688234329223633,
"learning_rate": 4.3961296339473076e-05,
"loss": 0.4393,
"step": 2590
},
{
"epoch": 0.6062019118675682,
"grad_norm": 18.595117568969727,
"learning_rate": 4.393798088132432e-05,
"loss": 0.3502,
"step": 2600
},
{
"epoch": 0.6085334576824435,
"grad_norm": 30.775352478027344,
"learning_rate": 4.3914665423175566e-05,
"loss": 0.4952,
"step": 2610
},
{
"epoch": 0.6108650034973188,
"grad_norm": 9.629733085632324,
"learning_rate": 4.389134996502681e-05,
"loss": 0.3984,
"step": 2620
},
{
"epoch": 0.613196549312194,
"grad_norm": 27.071420669555664,
"learning_rate": 4.386803450687806e-05,
"loss": 0.5048,
"step": 2630
},
{
"epoch": 0.6155280951270693,
"grad_norm": 18.72870445251465,
"learning_rate": 4.384471904872931e-05,
"loss": 0.5675,
"step": 2640
},
{
"epoch": 0.6178596409419445,
"grad_norm": 16.282094955444336,
"learning_rate": 4.382140359058056e-05,
"loss": 0.3591,
"step": 2650
},
{
"epoch": 0.6201911867568197,
"grad_norm": 17.249792098999023,
"learning_rate": 4.37980881324318e-05,
"loss": 0.4584,
"step": 2660
},
{
"epoch": 0.622522732571695,
"grad_norm": 21.42504119873047,
"learning_rate": 4.3774772674283054e-05,
"loss": 0.4607,
"step": 2670
},
{
"epoch": 0.6248542783865703,
"grad_norm": 30.91826820373535,
"learning_rate": 4.37514572161343e-05,
"loss": 0.6725,
"step": 2680
},
{
"epoch": 0.6271858242014455,
"grad_norm": 20.925262451171875,
"learning_rate": 4.3728141757985544e-05,
"loss": 0.4768,
"step": 2690
},
{
"epoch": 0.6295173700163208,
"grad_norm": 25.807174682617188,
"learning_rate": 4.3704826299836795e-05,
"loss": 0.4127,
"step": 2700
},
{
"epoch": 0.6295173700163208,
"eval_accuracy": 0.7678424938474159,
"eval_f1": 0.7584708782486864,
"eval_loss": 0.6823632121086121,
"eval_precision": 0.7601462178390429,
"eval_recall": 0.7648942677055709,
"eval_runtime": 32.4267,
"eval_samples_per_second": 37.592,
"eval_steps_per_second": 2.375,
"step": 2700
},
{
"epoch": 0.6318489158311961,
"grad_norm": 20.336694717407227,
"learning_rate": 4.368151084168804e-05,
"loss": 0.4772,
"step": 2710
},
{
"epoch": 0.6341804616460713,
"grad_norm": 15.894454956054688,
"learning_rate": 4.365819538353929e-05,
"loss": 0.5338,
"step": 2720
},
{
"epoch": 0.6365120074609466,
"grad_norm": 45.088111877441406,
"learning_rate": 4.3634879925390536e-05,
"loss": 0.6168,
"step": 2730
},
{
"epoch": 0.6388435532758219,
"grad_norm": 31.453920364379883,
"learning_rate": 4.361156446724178e-05,
"loss": 0.4662,
"step": 2740
},
{
"epoch": 0.6411750990906971,
"grad_norm": 11.898534774780273,
"learning_rate": 4.358824900909303e-05,
"loss": 0.5345,
"step": 2750
},
{
"epoch": 0.6435066449055724,
"grad_norm": 21.230201721191406,
"learning_rate": 4.356493355094428e-05,
"loss": 0.4006,
"step": 2760
},
{
"epoch": 0.6458381907204477,
"grad_norm": 25.514484405517578,
"learning_rate": 4.354161809279553e-05,
"loss": 0.5164,
"step": 2770
},
{
"epoch": 0.648169736535323,
"grad_norm": 20.121109008789062,
"learning_rate": 4.351830263464677e-05,
"loss": 0.4885,
"step": 2780
},
{
"epoch": 0.6505012823501982,
"grad_norm": 11.797569274902344,
"learning_rate": 4.349498717649802e-05,
"loss": 0.5272,
"step": 2790
},
{
"epoch": 0.6528328281650735,
"grad_norm": 22.636089324951172,
"learning_rate": 4.347167171834927e-05,
"loss": 0.5199,
"step": 2800
},
{
"epoch": 0.6551643739799488,
"grad_norm": 29.251462936401367,
"learning_rate": 4.3448356260200514e-05,
"loss": 0.3558,
"step": 2810
},
{
"epoch": 0.6574959197948239,
"grad_norm": 14.990754127502441,
"learning_rate": 4.3425040802051765e-05,
"loss": 0.5055,
"step": 2820
},
{
"epoch": 0.6598274656096992,
"grad_norm": 15.994630813598633,
"learning_rate": 4.340172534390301e-05,
"loss": 0.3577,
"step": 2830
},
{
"epoch": 0.6621590114245745,
"grad_norm": 25.580074310302734,
"learning_rate": 4.3378409885754255e-05,
"loss": 0.464,
"step": 2840
},
{
"epoch": 0.6644905572394497,
"grad_norm": 19.915939331054688,
"learning_rate": 4.33550944276055e-05,
"loss": 0.3557,
"step": 2850
},
{
"epoch": 0.666822103054325,
"grad_norm": 17.42690658569336,
"learning_rate": 4.333177896945676e-05,
"loss": 0.5423,
"step": 2860
},
{
"epoch": 0.6691536488692003,
"grad_norm": 16.17222785949707,
"learning_rate": 4.3308463511308e-05,
"loss": 0.3552,
"step": 2870
},
{
"epoch": 0.6714851946840755,
"grad_norm": 23.892414093017578,
"learning_rate": 4.328514805315925e-05,
"loss": 0.4097,
"step": 2880
},
{
"epoch": 0.6738167404989508,
"grad_norm": 23.955047607421875,
"learning_rate": 4.326183259501049e-05,
"loss": 0.6225,
"step": 2890
},
{
"epoch": 0.6761482863138261,
"grad_norm": 19.446603775024414,
"learning_rate": 4.3238517136861736e-05,
"loss": 0.5303,
"step": 2900
},
{
"epoch": 0.6784798321287013,
"grad_norm": 15.906410217285156,
"learning_rate": 4.321520167871299e-05,
"loss": 0.4539,
"step": 2910
},
{
"epoch": 0.6808113779435766,
"grad_norm": 30.294095993041992,
"learning_rate": 4.319188622056424e-05,
"loss": 0.6016,
"step": 2920
},
{
"epoch": 0.6831429237584519,
"grad_norm": 22.50743865966797,
"learning_rate": 4.3168570762415484e-05,
"loss": 0.4718,
"step": 2930
},
{
"epoch": 0.6854744695733271,
"grad_norm": 11.781279563903809,
"learning_rate": 4.314525530426673e-05,
"loss": 0.413,
"step": 2940
},
{
"epoch": 0.6878060153882024,
"grad_norm": 10.89158821105957,
"learning_rate": 4.3121939846117973e-05,
"loss": 0.507,
"step": 2950
},
{
"epoch": 0.6901375612030777,
"grad_norm": 13.030016899108887,
"learning_rate": 4.3098624387969225e-05,
"loss": 0.4475,
"step": 2960
},
{
"epoch": 0.692469107017953,
"grad_norm": 15.248382568359375,
"learning_rate": 4.3075308929820476e-05,
"loss": 0.4741,
"step": 2970
},
{
"epoch": 0.6948006528328282,
"grad_norm": 32.4050407409668,
"learning_rate": 4.305199347167172e-05,
"loss": 0.5675,
"step": 2980
},
{
"epoch": 0.6971321986477035,
"grad_norm": 36.350406646728516,
"learning_rate": 4.3028678013522966e-05,
"loss": 0.584,
"step": 2990
},
{
"epoch": 0.6994637444625786,
"grad_norm": 14.917610168457031,
"learning_rate": 4.300536255537422e-05,
"loss": 0.4201,
"step": 3000
},
{
"epoch": 0.6994637444625786,
"eval_accuracy": 0.7768662838392125,
"eval_f1": 0.7673956246919376,
"eval_loss": 0.6402276158332825,
"eval_precision": 0.7691001302569485,
"eval_recall": 0.7679644214691466,
"eval_runtime": 32.6299,
"eval_samples_per_second": 37.358,
"eval_steps_per_second": 2.36,
"step": 3000
},
{
"epoch": 0.7017952902774539,
"grad_norm": 28.404804229736328,
"learning_rate": 4.298204709722546e-05,
"loss": 0.4424,
"step": 3010
},
{
"epoch": 0.7041268360923292,
"grad_norm": 22.141082763671875,
"learning_rate": 4.295873163907671e-05,
"loss": 0.5042,
"step": 3020
},
{
"epoch": 0.7064583819072044,
"grad_norm": 26.650156021118164,
"learning_rate": 4.293541618092796e-05,
"loss": 0.5509,
"step": 3030
},
{
"epoch": 0.7087899277220797,
"grad_norm": 17.86048698425293,
"learning_rate": 4.29121007227792e-05,
"loss": 0.525,
"step": 3040
},
{
"epoch": 0.711121473536955,
"grad_norm": 31.643362045288086,
"learning_rate": 4.2888785264630454e-05,
"loss": 0.4592,
"step": 3050
},
{
"epoch": 0.7134530193518303,
"grad_norm": 25.388368606567383,
"learning_rate": 4.28654698064817e-05,
"loss": 0.6506,
"step": 3060
},
{
"epoch": 0.7157845651667055,
"grad_norm": 20.75798225402832,
"learning_rate": 4.2842154348332944e-05,
"loss": 0.5776,
"step": 3070
},
{
"epoch": 0.7181161109815808,
"grad_norm": 17.62347412109375,
"learning_rate": 4.2818838890184195e-05,
"loss": 0.497,
"step": 3080
},
{
"epoch": 0.7204476567964561,
"grad_norm": 21.391613006591797,
"learning_rate": 4.279552343203544e-05,
"loss": 0.4536,
"step": 3090
},
{
"epoch": 0.7227792026113313,
"grad_norm": 19.84242057800293,
"learning_rate": 4.277220797388669e-05,
"loss": 0.518,
"step": 3100
},
{
"epoch": 0.7251107484262066,
"grad_norm": 25.691789627075195,
"learning_rate": 4.2748892515737936e-05,
"loss": 0.4864,
"step": 3110
},
{
"epoch": 0.7274422942410819,
"grad_norm": 19.61354637145996,
"learning_rate": 4.272557705758918e-05,
"loss": 0.3578,
"step": 3120
},
{
"epoch": 0.7297738400559571,
"grad_norm": 20.281843185424805,
"learning_rate": 4.270226159944043e-05,
"loss": 0.492,
"step": 3130
},
{
"epoch": 0.7321053858708324,
"grad_norm": 19.12962532043457,
"learning_rate": 4.267894614129168e-05,
"loss": 0.4072,
"step": 3140
},
{
"epoch": 0.7344369316857077,
"grad_norm": 22.840578079223633,
"learning_rate": 4.265563068314293e-05,
"loss": 0.5414,
"step": 3150
},
{
"epoch": 0.7367684775005829,
"grad_norm": 30.5288028717041,
"learning_rate": 4.263231522499417e-05,
"loss": 0.518,
"step": 3160
},
{
"epoch": 0.7391000233154581,
"grad_norm": 15.130803108215332,
"learning_rate": 4.260899976684542e-05,
"loss": 0.3736,
"step": 3170
},
{
"epoch": 0.7414315691303334,
"grad_norm": 22.197586059570312,
"learning_rate": 4.258568430869667e-05,
"loss": 0.3704,
"step": 3180
},
{
"epoch": 0.7437631149452086,
"grad_norm": 13.625364303588867,
"learning_rate": 4.256236885054792e-05,
"loss": 0.3927,
"step": 3190
},
{
"epoch": 0.7460946607600839,
"grad_norm": 23.51502227783203,
"learning_rate": 4.2539053392399165e-05,
"loss": 0.4915,
"step": 3200
},
{
"epoch": 0.7484262065749592,
"grad_norm": 19.06190299987793,
"learning_rate": 4.251573793425041e-05,
"loss": 0.5324,
"step": 3210
},
{
"epoch": 0.7507577523898344,
"grad_norm": 16.512483596801758,
"learning_rate": 4.2492422476101655e-05,
"loss": 0.5011,
"step": 3220
},
{
"epoch": 0.7530892982047097,
"grad_norm": 12.160454750061035,
"learning_rate": 4.24691070179529e-05,
"loss": 0.5847,
"step": 3230
},
{
"epoch": 0.755420844019585,
"grad_norm": 14.771639823913574,
"learning_rate": 4.244579155980416e-05,
"loss": 0.3473,
"step": 3240
},
{
"epoch": 0.7577523898344602,
"grad_norm": 25.87384605407715,
"learning_rate": 4.24224761016554e-05,
"loss": 0.4409,
"step": 3250
},
{
"epoch": 0.7600839356493355,
"grad_norm": 30.41501235961914,
"learning_rate": 4.239916064350665e-05,
"loss": 0.6514,
"step": 3260
},
{
"epoch": 0.7624154814642108,
"grad_norm": 20.601119995117188,
"learning_rate": 4.237584518535789e-05,
"loss": 0.5139,
"step": 3270
},
{
"epoch": 0.7647470272790861,
"grad_norm": 12.452898979187012,
"learning_rate": 4.2352529727209136e-05,
"loss": 0.395,
"step": 3280
},
{
"epoch": 0.7670785730939613,
"grad_norm": 23.974411010742188,
"learning_rate": 4.232921426906039e-05,
"loss": 0.4344,
"step": 3290
},
{
"epoch": 0.7694101189088366,
"grad_norm": 27.8565616607666,
"learning_rate": 4.230589881091164e-05,
"loss": 0.4636,
"step": 3300
},
{
"epoch": 0.7694101189088366,
"eval_accuracy": 0.7727645611156686,
"eval_f1": 0.7628742718759469,
"eval_loss": 0.673406720161438,
"eval_precision": 0.7584650322424751,
"eval_recall": 0.7751981038943103,
"eval_runtime": 32.4265,
"eval_samples_per_second": 37.593,
"eval_steps_per_second": 2.375,
"step": 3300
},
{
"epoch": 0.7717416647237119,
"grad_norm": 31.938621520996094,
"learning_rate": 4.2282583352762884e-05,
"loss": 0.5178,
"step": 3310
},
{
"epoch": 0.7740732105385871,
"grad_norm": 14.094200134277344,
"learning_rate": 4.225926789461413e-05,
"loss": 0.3979,
"step": 3320
},
{
"epoch": 0.7764047563534624,
"grad_norm": 14.13912582397461,
"learning_rate": 4.2235952436465373e-05,
"loss": 0.4894,
"step": 3330
},
{
"epoch": 0.7787363021683376,
"grad_norm": 16.00527572631836,
"learning_rate": 4.2212636978316625e-05,
"loss": 0.3421,
"step": 3340
},
{
"epoch": 0.7810678479832128,
"grad_norm": 14.035831451416016,
"learning_rate": 4.2189321520167876e-05,
"loss": 0.4204,
"step": 3350
},
{
"epoch": 0.7833993937980881,
"grad_norm": 17.89393424987793,
"learning_rate": 4.216600606201912e-05,
"loss": 0.3081,
"step": 3360
},
{
"epoch": 0.7857309396129634,
"grad_norm": 19.49710464477539,
"learning_rate": 4.2142690603870366e-05,
"loss": 0.497,
"step": 3370
},
{
"epoch": 0.7880624854278386,
"grad_norm": 32.836326599121094,
"learning_rate": 4.211937514572162e-05,
"loss": 0.5676,
"step": 3380
},
{
"epoch": 0.7903940312427139,
"grad_norm": 21.465435028076172,
"learning_rate": 4.209605968757286e-05,
"loss": 0.583,
"step": 3390
},
{
"epoch": 0.7927255770575892,
"grad_norm": 19.0760440826416,
"learning_rate": 4.2072744229424113e-05,
"loss": 0.4606,
"step": 3400
},
{
"epoch": 0.7950571228724644,
"grad_norm": 34.77079772949219,
"learning_rate": 4.204942877127536e-05,
"loss": 0.4716,
"step": 3410
},
{
"epoch": 0.7973886686873397,
"grad_norm": 12.543126106262207,
"learning_rate": 4.20261133131266e-05,
"loss": 0.391,
"step": 3420
},
{
"epoch": 0.799720214502215,
"grad_norm": 15.688311576843262,
"learning_rate": 4.2002797854977854e-05,
"loss": 0.5113,
"step": 3430
},
{
"epoch": 0.8020517603170902,
"grad_norm": 11.053609848022461,
"learning_rate": 4.19794823968291e-05,
"loss": 0.3462,
"step": 3440
},
{
"epoch": 0.8043833061319655,
"grad_norm": 21.625019073486328,
"learning_rate": 4.1956166938680344e-05,
"loss": 0.5507,
"step": 3450
},
{
"epoch": 0.8067148519468408,
"grad_norm": 15.765186309814453,
"learning_rate": 4.1932851480531595e-05,
"loss": 0.5008,
"step": 3460
},
{
"epoch": 0.809046397761716,
"grad_norm": 32.54380416870117,
"learning_rate": 4.190953602238284e-05,
"loss": 0.6183,
"step": 3470
},
{
"epoch": 0.8113779435765913,
"grad_norm": 19.001272201538086,
"learning_rate": 4.188622056423409e-05,
"loss": 0.5258,
"step": 3480
},
{
"epoch": 0.8137094893914666,
"grad_norm": 43.268978118896484,
"learning_rate": 4.1862905106085336e-05,
"loss": 0.5148,
"step": 3490
},
{
"epoch": 0.8160410352063419,
"grad_norm": 15.338736534118652,
"learning_rate": 4.183958964793658e-05,
"loss": 0.4623,
"step": 3500
},
{
"epoch": 0.818372581021217,
"grad_norm": 21.353567123413086,
"learning_rate": 4.181627418978783e-05,
"loss": 0.4635,
"step": 3510
},
{
"epoch": 0.8207041268360923,
"grad_norm": 133.6362762451172,
"learning_rate": 4.1792958731639084e-05,
"loss": 0.4413,
"step": 3520
},
{
"epoch": 0.8230356726509676,
"grad_norm": 32.99754333496094,
"learning_rate": 4.176964327349033e-05,
"loss": 0.4504,
"step": 3530
},
{
"epoch": 0.8253672184658428,
"grad_norm": 27.58423614501953,
"learning_rate": 4.174632781534157e-05,
"loss": 0.6231,
"step": 3540
},
{
"epoch": 0.8276987642807181,
"grad_norm": 25.30926513671875,
"learning_rate": 4.172301235719282e-05,
"loss": 0.3987,
"step": 3550
},
{
"epoch": 0.8300303100955934,
"grad_norm": 21.798927307128906,
"learning_rate": 4.169969689904407e-05,
"loss": 0.4207,
"step": 3560
},
{
"epoch": 0.8323618559104686,
"grad_norm": 27.713125228881836,
"learning_rate": 4.167638144089532e-05,
"loss": 0.5021,
"step": 3570
},
{
"epoch": 0.8346934017253439,
"grad_norm": 12.710470199584961,
"learning_rate": 4.1653065982746565e-05,
"loss": 0.303,
"step": 3580
},
{
"epoch": 0.8370249475402192,
"grad_norm": 24.33064079284668,
"learning_rate": 4.162975052459781e-05,
"loss": 0.4564,
"step": 3590
},
{
"epoch": 0.8393564933550944,
"grad_norm": 12.85043716430664,
"learning_rate": 4.1606435066449055e-05,
"loss": 0.4887,
"step": 3600
},
{
"epoch": 0.8393564933550944,
"eval_accuracy": 0.7637407711238721,
"eval_f1": 0.7492356256854388,
"eval_loss": 0.6969403624534607,
"eval_precision": 0.7486500525258384,
"eval_recall": 0.7582037541347775,
"eval_runtime": 32.6493,
"eval_samples_per_second": 37.336,
"eval_steps_per_second": 2.358,
"step": 3600
},
{
"epoch": 0.8416880391699697,
"grad_norm": 16.10417366027832,
"learning_rate": 4.15831196083003e-05,
"loss": 0.4708,
"step": 3610
},
{
"epoch": 0.844019584984845,
"grad_norm": 23.369497299194336,
"learning_rate": 4.155980415015156e-05,
"loss": 0.5608,
"step": 3620
},
{
"epoch": 0.8463511307997202,
"grad_norm": 19.515378952026367,
"learning_rate": 4.15364886920028e-05,
"loss": 0.5755,
"step": 3630
},
{
"epoch": 0.8486826766145955,
"grad_norm": 11.91996955871582,
"learning_rate": 4.151317323385405e-05,
"loss": 0.3869,
"step": 3640
},
{
"epoch": 0.8510142224294708,
"grad_norm": 21.93692398071289,
"learning_rate": 4.148985777570529e-05,
"loss": 0.484,
"step": 3650
},
{
"epoch": 0.853345768244346,
"grad_norm": 21.24445343017578,
"learning_rate": 4.1466542317556536e-05,
"loss": 0.3958,
"step": 3660
},
{
"epoch": 0.8556773140592213,
"grad_norm": 16.571958541870117,
"learning_rate": 4.144322685940779e-05,
"loss": 0.331,
"step": 3670
},
{
"epoch": 0.8580088598740965,
"grad_norm": 32.600013732910156,
"learning_rate": 4.141991140125904e-05,
"loss": 0.5633,
"step": 3680
},
{
"epoch": 0.8603404056889717,
"grad_norm": 22.03318977355957,
"learning_rate": 4.1396595943110284e-05,
"loss": 0.4267,
"step": 3690
},
{
"epoch": 0.862671951503847,
"grad_norm": 22.175247192382812,
"learning_rate": 4.137328048496153e-05,
"loss": 0.5786,
"step": 3700
},
{
"epoch": 0.8650034973187223,
"grad_norm": 16.111539840698242,
"learning_rate": 4.134996502681278e-05,
"loss": 0.313,
"step": 3710
},
{
"epoch": 0.8673350431335975,
"grad_norm": 23.84856414794922,
"learning_rate": 4.1326649568664025e-05,
"loss": 0.5621,
"step": 3720
},
{
"epoch": 0.8696665889484728,
"grad_norm": 27.550527572631836,
"learning_rate": 4.1303334110515276e-05,
"loss": 0.3678,
"step": 3730
},
{
"epoch": 0.8719981347633481,
"grad_norm": 17.500328063964844,
"learning_rate": 4.128001865236652e-05,
"loss": 0.4118,
"step": 3740
},
{
"epoch": 0.8743296805782234,
"grad_norm": 20.387914657592773,
"learning_rate": 4.1256703194217766e-05,
"loss": 0.4329,
"step": 3750
},
{
"epoch": 0.8766612263930986,
"grad_norm": 28.69219970703125,
"learning_rate": 4.123338773606902e-05,
"loss": 0.5973,
"step": 3760
},
{
"epoch": 0.8789927722079739,
"grad_norm": 67.64371490478516,
"learning_rate": 4.121007227792026e-05,
"loss": 0.439,
"step": 3770
},
{
"epoch": 0.8813243180228492,
"grad_norm": 11.542470932006836,
"learning_rate": 4.1186756819771513e-05,
"loss": 0.439,
"step": 3780
},
{
"epoch": 0.8836558638377244,
"grad_norm": 28.455219268798828,
"learning_rate": 4.116344136162276e-05,
"loss": 0.5973,
"step": 3790
},
{
"epoch": 0.8859874096525997,
"grad_norm": 24.25450325012207,
"learning_rate": 4.1140125903474e-05,
"loss": 0.5056,
"step": 3800
},
{
"epoch": 0.888318955467475,
"grad_norm": 27.57263946533203,
"learning_rate": 4.1116810445325254e-05,
"loss": 0.5563,
"step": 3810
},
{
"epoch": 0.8906505012823502,
"grad_norm": 32.943077087402344,
"learning_rate": 4.10934949871765e-05,
"loss": 0.5166,
"step": 3820
},
{
"epoch": 0.8929820470972255,
"grad_norm": 25.220003128051758,
"learning_rate": 4.1070179529027744e-05,
"loss": 0.4761,
"step": 3830
},
{
"epoch": 0.8953135929121008,
"grad_norm": 19.925155639648438,
"learning_rate": 4.1046864070878995e-05,
"loss": 0.3266,
"step": 3840
},
{
"epoch": 0.8976451387269759,
"grad_norm": 39.80344009399414,
"learning_rate": 4.102354861273025e-05,
"loss": 0.5011,
"step": 3850
},
{
"epoch": 0.8999766845418512,
"grad_norm": 23.42641830444336,
"learning_rate": 4.100023315458149e-05,
"loss": 0.5182,
"step": 3860
},
{
"epoch": 0.9023082303567265,
"grad_norm": 18.273305892944336,
"learning_rate": 4.0976917696432736e-05,
"loss": 0.4711,
"step": 3870
},
{
"epoch": 0.9046397761716017,
"grad_norm": 27.585613250732422,
"learning_rate": 4.095360223828398e-05,
"loss": 0.4976,
"step": 3880
},
{
"epoch": 0.906971321986477,
"grad_norm": 10.163019180297852,
"learning_rate": 4.093028678013523e-05,
"loss": 0.3424,
"step": 3890
},
{
"epoch": 0.9093028678013523,
"grad_norm": 18.85008430480957,
"learning_rate": 4.0906971321986484e-05,
"loss": 0.358,
"step": 3900
},
{
"epoch": 0.9093028678013523,
"eval_accuracy": 0.7456931911402789,
"eval_f1": 0.7407595301719996,
"eval_loss": 0.776879072189331,
"eval_precision": 0.7366901226312517,
"eval_recall": 0.7566993445706643,
"eval_runtime": 32.6742,
"eval_samples_per_second": 37.308,
"eval_steps_per_second": 2.357,
"step": 3900
},
{
"epoch": 0.9116344136162275,
"grad_norm": 32.68353271484375,
"learning_rate": 4.088365586383773e-05,
"loss": 0.4426,
"step": 3910
},
{
"epoch": 0.9139659594311028,
"grad_norm": 23.773094177246094,
"learning_rate": 4.086034040568897e-05,
"loss": 0.6041,
"step": 3920
},
{
"epoch": 0.9162975052459781,
"grad_norm": 18.2379093170166,
"learning_rate": 4.083702494754022e-05,
"loss": 0.4279,
"step": 3930
},
{
"epoch": 0.9186290510608534,
"grad_norm": 20.479825973510742,
"learning_rate": 4.081370948939147e-05,
"loss": 0.4433,
"step": 3940
},
{
"epoch": 0.9209605968757286,
"grad_norm": 32.64277648925781,
"learning_rate": 4.079039403124272e-05,
"loss": 0.4704,
"step": 3950
},
{
"epoch": 0.9232921426906039,
"grad_norm": 23.21099090576172,
"learning_rate": 4.0767078573093965e-05,
"loss": 0.4129,
"step": 3960
},
{
"epoch": 0.9256236885054792,
"grad_norm": 15.052021026611328,
"learning_rate": 4.074376311494521e-05,
"loss": 0.4739,
"step": 3970
},
{
"epoch": 0.9279552343203544,
"grad_norm": 14.582944869995117,
"learning_rate": 4.0720447656796455e-05,
"loss": 0.4743,
"step": 3980
},
{
"epoch": 0.9302867801352297,
"grad_norm": 20.65665626525879,
"learning_rate": 4.06971321986477e-05,
"loss": 0.465,
"step": 3990
},
{
"epoch": 0.932618325950105,
"grad_norm": 20.397233963012695,
"learning_rate": 4.067381674049896e-05,
"loss": 0.5524,
"step": 4000
},
{
"epoch": 0.9349498717649802,
"grad_norm": 6.327408313751221,
"learning_rate": 4.06505012823502e-05,
"loss": 0.3442,
"step": 4010
},
{
"epoch": 0.9372814175798554,
"grad_norm": 9.921250343322754,
"learning_rate": 4.062718582420145e-05,
"loss": 0.3566,
"step": 4020
},
{
"epoch": 0.9396129633947307,
"grad_norm": 17.692302703857422,
"learning_rate": 4.060387036605269e-05,
"loss": 0.4596,
"step": 4030
},
{
"epoch": 0.9419445092096059,
"grad_norm": 2.8422834873199463,
"learning_rate": 4.058055490790394e-05,
"loss": 0.4456,
"step": 4040
},
{
"epoch": 0.9442760550244812,
"grad_norm": 16.632015228271484,
"learning_rate": 4.055723944975519e-05,
"loss": 0.517,
"step": 4050
},
{
"epoch": 0.9466076008393565,
"grad_norm": 23.965824127197266,
"learning_rate": 4.053392399160644e-05,
"loss": 0.538,
"step": 4060
},
{
"epoch": 0.9489391466542317,
"grad_norm": 26.93478775024414,
"learning_rate": 4.0510608533457684e-05,
"loss": 0.6331,
"step": 4070
},
{
"epoch": 0.951270692469107,
"grad_norm": 24.248111724853516,
"learning_rate": 4.048729307530893e-05,
"loss": 0.4971,
"step": 4080
},
{
"epoch": 0.9536022382839823,
"grad_norm": 19.178695678710938,
"learning_rate": 4.046397761716018e-05,
"loss": 0.4324,
"step": 4090
},
{
"epoch": 0.9559337840988575,
"grad_norm": 19.033815383911133,
"learning_rate": 4.0440662159011425e-05,
"loss": 0.4867,
"step": 4100
},
{
"epoch": 0.9582653299137328,
"grad_norm": 30.779146194458008,
"learning_rate": 4.0417346700862676e-05,
"loss": 0.5852,
"step": 4110
},
{
"epoch": 0.9605968757286081,
"grad_norm": 15.432817459106445,
"learning_rate": 4.039403124271392e-05,
"loss": 0.5479,
"step": 4120
},
{
"epoch": 0.9629284215434833,
"grad_norm": 20.558475494384766,
"learning_rate": 4.0370715784565166e-05,
"loss": 0.4594,
"step": 4130
},
{
"epoch": 0.9652599673583586,
"grad_norm": 12.561017990112305,
"learning_rate": 4.034740032641642e-05,
"loss": 0.4238,
"step": 4140
},
{
"epoch": 0.9675915131732339,
"grad_norm": 20.812999725341797,
"learning_rate": 4.032408486826766e-05,
"loss": 0.5912,
"step": 4150
},
{
"epoch": 0.9699230589881092,
"grad_norm": 15.587718963623047,
"learning_rate": 4.0300769410118914e-05,
"loss": 0.3111,
"step": 4160
},
{
"epoch": 0.9722546048029844,
"grad_norm": 21.68885612487793,
"learning_rate": 4.027745395197016e-05,
"loss": 0.4421,
"step": 4170
},
{
"epoch": 0.9745861506178597,
"grad_norm": 8.09349536895752,
"learning_rate": 4.02541384938214e-05,
"loss": 0.4447,
"step": 4180
},
{
"epoch": 0.9769176964327349,
"grad_norm": 23.542570114135742,
"learning_rate": 4.0230823035672654e-05,
"loss": 0.4601,
"step": 4190
},
{
"epoch": 0.9792492422476101,
"grad_norm": 20.75370216369629,
"learning_rate": 4.02075075775239e-05,
"loss": 0.4899,
"step": 4200
},
{
"epoch": 0.9792492422476101,
"eval_accuracy": 0.7809680065627563,
"eval_f1": 0.7694587436976446,
"eval_loss": 0.6500813961029053,
"eval_precision": 0.7664779373382287,
"eval_recall": 0.7765403302363741,
"eval_runtime": 32.5513,
"eval_samples_per_second": 37.449,
"eval_steps_per_second": 2.365,
"step": 4200
}
],
"logging_steps": 10,
"max_steps": 21445,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 600,
"total_flos": 1.439676695052288e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}