{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9614965492190337, "eval_steps": 300, "global_step": 10800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018162005085361425, "grad_norm": 51.7724723815918, "learning_rate": 4.99545949872866e-05, "loss": 4.15, "step": 10 }, { "epoch": 0.003632401017072285, "grad_norm": 39.101844787597656, "learning_rate": 4.9909189974573195e-05, "loss": 2.9203, "step": 20 }, { "epoch": 0.005448601525608427, "grad_norm": 59.884159088134766, "learning_rate": 4.9863784961859795e-05, "loss": 2.3234, "step": 30 }, { "epoch": 0.00726480203414457, "grad_norm": 30.44273567199707, "learning_rate": 4.981837994914639e-05, "loss": 2.2648, "step": 40 }, { "epoch": 0.009081002542680712, "grad_norm": 31.179187774658203, "learning_rate": 4.977297493643299e-05, "loss": 1.9688, "step": 50 }, { "epoch": 0.010897203051216855, "grad_norm": 24.85585594177246, "learning_rate": 4.972756992371958e-05, "loss": 1.8813, "step": 60 }, { "epoch": 0.012713403559752997, "grad_norm": 48.86698532104492, "learning_rate": 4.968216491100618e-05, "loss": 1.9812, "step": 70 }, { "epoch": 0.01452960406828914, "grad_norm": 51.080074310302734, "learning_rate": 4.963675989829277e-05, "loss": 1.8727, "step": 80 }, { "epoch": 0.01634580457682528, "grad_norm": 26.04038429260254, "learning_rate": 4.959135488557937e-05, "loss": 1.6398, "step": 90 }, { "epoch": 0.018162005085361425, "grad_norm": 24.692808151245117, "learning_rate": 4.9545949872865965e-05, "loss": 1.4898, "step": 100 }, { "epoch": 0.019978205593897565, "grad_norm": 20.653032302856445, "learning_rate": 4.9500544860152565e-05, "loss": 1.177, "step": 110 }, { "epoch": 0.02179440610243371, "grad_norm": 37.319427490234375, "learning_rate": 4.945513984743916e-05, "loss": 1.4352, "step": 120 }, { "epoch": 0.02361060661096985, "grad_norm": 22.78856086730957, "learning_rate": 4.940973483472576e-05, "loss": 1.4187, "step": 130 }, { "epoch": 0.025426807119505995, "grad_norm": 34.050315856933594, "learning_rate": 4.936432982201235e-05, "loss": 1.2316, "step": 140 }, { "epoch": 0.027243007628042135, "grad_norm": 18.459930419921875, "learning_rate": 4.931892480929895e-05, "loss": 1.2225, "step": 150 }, { "epoch": 0.02905920813657828, "grad_norm": 18.474990844726562, "learning_rate": 4.927351979658554e-05, "loss": 1.1586, "step": 160 }, { "epoch": 0.03087540864511442, "grad_norm": 31.727245330810547, "learning_rate": 4.922811478387214e-05, "loss": 1.1371, "step": 170 }, { "epoch": 0.03269160915365056, "grad_norm": 33.16598129272461, "learning_rate": 4.9182709771158735e-05, "loss": 0.8711, "step": 180 }, { "epoch": 0.03450780966218671, "grad_norm": 35.099700927734375, "learning_rate": 4.9137304758445335e-05, "loss": 1.1367, "step": 190 }, { "epoch": 0.03632401017072285, "grad_norm": 20.833833694458008, "learning_rate": 4.909189974573193e-05, "loss": 1.0652, "step": 200 }, { "epoch": 0.03814021067925899, "grad_norm": 29.985502243041992, "learning_rate": 4.904649473301853e-05, "loss": 1.0373, "step": 210 }, { "epoch": 0.03995641118779513, "grad_norm": 18.238357543945312, "learning_rate": 4.900108972030513e-05, "loss": 0.8555, "step": 220 }, { "epoch": 0.04177261169633127, "grad_norm": 23.067848205566406, "learning_rate": 4.895568470759172e-05, "loss": 0.9469, "step": 230 }, { "epoch": 0.04358881220486742, "grad_norm": 32.689029693603516, "learning_rate": 4.891027969487832e-05, "loss": 0.9379, "step": 240 }, { "epoch": 0.04540501271340356, "grad_norm": 25.626976013183594, "learning_rate": 4.886487468216491e-05, "loss": 0.8861, "step": 250 }, { "epoch": 0.0472212132219397, "grad_norm": 13.55506706237793, "learning_rate": 4.881946966945151e-05, "loss": 0.8361, "step": 260 }, { "epoch": 0.04903741373047584, "grad_norm": 22.14944839477539, "learning_rate": 4.8774064656738104e-05, "loss": 0.7471, "step": 270 }, { "epoch": 0.05085361423901199, "grad_norm": 20.66185760498047, "learning_rate": 4.8728659644024704e-05, "loss": 0.815, "step": 280 }, { "epoch": 0.05266981474754813, "grad_norm": 22.343624114990234, "learning_rate": 4.86832546313113e-05, "loss": 0.6479, "step": 290 }, { "epoch": 0.05448601525608427, "grad_norm": 23.354530334472656, "learning_rate": 4.86378496185979e-05, "loss": 0.6668, "step": 300 }, { "epoch": 0.05448601525608427, "eval_accuracy": 0.6562756357670222, "eval_f1": 0.6498431091967438, "eval_loss": 1.0104337930679321, "eval_precision": 0.6553569560524448, "eval_recall": 0.6624616383794661, "eval_runtime": 12.1348, "eval_samples_per_second": 100.455, "eval_steps_per_second": 6.345, "step": 300 }, { "epoch": 0.05630221576462041, "grad_norm": 19.208993911743164, "learning_rate": 4.859244460588449e-05, "loss": 0.9148, "step": 310 }, { "epoch": 0.05811841627315656, "grad_norm": 41.95314025878906, "learning_rate": 4.854703959317109e-05, "loss": 0.7869, "step": 320 }, { "epoch": 0.0599346167816927, "grad_norm": 26.48455047607422, "learning_rate": 4.850163458045768e-05, "loss": 0.7795, "step": 330 }, { "epoch": 0.06175081729022884, "grad_norm": 17.859508514404297, "learning_rate": 4.845622956774428e-05, "loss": 0.8215, "step": 340 }, { "epoch": 0.06356701779876499, "grad_norm": 30.228845596313477, "learning_rate": 4.8410824555030874e-05, "loss": 0.849, "step": 350 }, { "epoch": 0.06538321830730112, "grad_norm": 26.73940086364746, "learning_rate": 4.8365419542317474e-05, "loss": 0.7037, "step": 360 }, { "epoch": 0.06719941881583727, "grad_norm": 28.508052825927734, "learning_rate": 4.832001452960407e-05, "loss": 0.9711, "step": 370 }, { "epoch": 0.06901561932437342, "grad_norm": 26.7029972076416, "learning_rate": 4.827460951689067e-05, "loss": 0.8039, "step": 380 }, { "epoch": 0.07083181983290955, "grad_norm": 20.99094009399414, "learning_rate": 4.822920450417726e-05, "loss": 0.74, "step": 390 }, { "epoch": 0.0726480203414457, "grad_norm": 22.60647201538086, "learning_rate": 4.818379949146386e-05, "loss": 0.759, "step": 400 }, { "epoch": 0.07446422084998183, "grad_norm": 24.6385555267334, "learning_rate": 4.813839447875045e-05, "loss": 0.9516, "step": 410 }, { "epoch": 0.07628042135851798, "grad_norm": 19.862504959106445, "learning_rate": 4.809298946603705e-05, "loss": 0.6793, "step": 420 }, { "epoch": 0.07809662186705413, "grad_norm": 20.13799476623535, "learning_rate": 4.804758445332365e-05, "loss": 0.6387, "step": 430 }, { "epoch": 0.07991282237559026, "grad_norm": 11.443085670471191, "learning_rate": 4.8002179440610244e-05, "loss": 0.6844, "step": 440 }, { "epoch": 0.08172902288412641, "grad_norm": 14.975491523742676, "learning_rate": 4.7956774427896844e-05, "loss": 0.6252, "step": 450 }, { "epoch": 0.08354522339266254, "grad_norm": 34.0562858581543, "learning_rate": 4.7911369415183437e-05, "loss": 0.841, "step": 460 }, { "epoch": 0.08536142390119869, "grad_norm": 14.882052421569824, "learning_rate": 4.7865964402470036e-05, "loss": 0.6924, "step": 470 }, { "epoch": 0.08717762440973484, "grad_norm": 10.927328109741211, "learning_rate": 4.782055938975663e-05, "loss": 0.6586, "step": 480 }, { "epoch": 0.08899382491827097, "grad_norm": 18.295116424560547, "learning_rate": 4.777515437704323e-05, "loss": 0.7256, "step": 490 }, { "epoch": 0.09081002542680712, "grad_norm": 38.24443435668945, "learning_rate": 4.772974936432982e-05, "loss": 0.7914, "step": 500 }, { "epoch": 0.09262622593534327, "grad_norm": 20.668012619018555, "learning_rate": 4.768434435161642e-05, "loss": 0.5446, "step": 510 }, { "epoch": 0.0944424264438794, "grad_norm": 17.62775421142578, "learning_rate": 4.7638939338903014e-05, "loss": 0.7426, "step": 520 }, { "epoch": 0.09625862695241555, "grad_norm": 23.430341720581055, "learning_rate": 4.7593534326189614e-05, "loss": 0.7063, "step": 530 }, { "epoch": 0.09807482746095168, "grad_norm": 30.422704696655273, "learning_rate": 4.7548129313476207e-05, "loss": 0.6363, "step": 540 }, { "epoch": 0.09989102796948783, "grad_norm": 20.155187606811523, "learning_rate": 4.7502724300762806e-05, "loss": 0.7201, "step": 550 }, { "epoch": 0.10170722847802398, "grad_norm": 28.10911750793457, "learning_rate": 4.74573192880494e-05, "loss": 0.7906, "step": 560 }, { "epoch": 0.10352342898656011, "grad_norm": 25.283676147460938, "learning_rate": 4.7411914275336e-05, "loss": 0.5832, "step": 570 }, { "epoch": 0.10533962949509626, "grad_norm": 9.627545356750488, "learning_rate": 4.736650926262259e-05, "loss": 0.5875, "step": 580 }, { "epoch": 0.10715583000363241, "grad_norm": 12.515082359313965, "learning_rate": 4.732110424990919e-05, "loss": 0.7885, "step": 590 }, { "epoch": 0.10897203051216854, "grad_norm": 16.580331802368164, "learning_rate": 4.7275699237195784e-05, "loss": 0.7129, "step": 600 }, { "epoch": 0.10897203051216854, "eval_accuracy": 0.7235438884331419, "eval_f1": 0.7205989414762296, "eval_loss": 0.7799906134605408, "eval_precision": 0.730319610475981, "eval_recall": 0.7152200658078283, "eval_runtime": 12.0682, "eval_samples_per_second": 101.009, "eval_steps_per_second": 6.38, "step": 600 }, { "epoch": 0.11078823102070469, "grad_norm": 18.379596710205078, "learning_rate": 4.7230294224482384e-05, "loss": 0.5647, "step": 610 }, { "epoch": 0.11260443152924082, "grad_norm": 23.94647789001465, "learning_rate": 4.7184889211768977e-05, "loss": 0.7223, "step": 620 }, { "epoch": 0.11442063203777697, "grad_norm": 16.6752986907959, "learning_rate": 4.7139484199055576e-05, "loss": 0.5555, "step": 630 }, { "epoch": 0.11623683254631312, "grad_norm": 19.58445930480957, "learning_rate": 4.7094079186342176e-05, "loss": 0.5701, "step": 640 }, { "epoch": 0.11805303305484925, "grad_norm": 9.683353424072266, "learning_rate": 4.704867417362877e-05, "loss": 0.5934, "step": 650 }, { "epoch": 0.1198692335633854, "grad_norm": 16.68168830871582, "learning_rate": 4.700326916091537e-05, "loss": 0.6471, "step": 660 }, { "epoch": 0.12168543407192153, "grad_norm": 16.534196853637695, "learning_rate": 4.695786414820196e-05, "loss": 0.7002, "step": 670 }, { "epoch": 0.12350163458045768, "grad_norm": 12.310294151306152, "learning_rate": 4.691245913548856e-05, "loss": 0.6939, "step": 680 }, { "epoch": 0.12531783508899383, "grad_norm": 24.0400333404541, "learning_rate": 4.6867054122775154e-05, "loss": 0.6732, "step": 690 }, { "epoch": 0.12713403559752998, "grad_norm": 22.213275909423828, "learning_rate": 4.682164911006175e-05, "loss": 0.4664, "step": 700 }, { "epoch": 0.1289502361060661, "grad_norm": 41.8505973815918, "learning_rate": 4.6776244097348346e-05, "loss": 0.5713, "step": 710 }, { "epoch": 0.13076643661460224, "grad_norm": 39.898765563964844, "learning_rate": 4.6730839084634946e-05, "loss": 0.8389, "step": 720 }, { "epoch": 0.1325826371231384, "grad_norm": 74.52597045898438, "learning_rate": 4.668543407192154e-05, "loss": 0.7645, "step": 730 }, { "epoch": 0.13439883763167454, "grad_norm": 15.932312965393066, "learning_rate": 4.664002905920814e-05, "loss": 0.8391, "step": 740 }, { "epoch": 0.1362150381402107, "grad_norm": 18.114957809448242, "learning_rate": 4.659462404649473e-05, "loss": 0.7943, "step": 750 }, { "epoch": 0.13803123864874683, "grad_norm": 16.521848678588867, "learning_rate": 4.654921903378133e-05, "loss": 0.5992, "step": 760 }, { "epoch": 0.13984743915728295, "grad_norm": 27.91718101501465, "learning_rate": 4.6503814021067924e-05, "loss": 0.7793, "step": 770 }, { "epoch": 0.1416636396658191, "grad_norm": 19.695844650268555, "learning_rate": 4.645840900835452e-05, "loss": 0.7436, "step": 780 }, { "epoch": 0.14347984017435525, "grad_norm": 16.544538497924805, "learning_rate": 4.6413003995641116e-05, "loss": 0.6805, "step": 790 }, { "epoch": 0.1452960406828914, "grad_norm": 18.75685691833496, "learning_rate": 4.6367598982927716e-05, "loss": 0.4687, "step": 800 }, { "epoch": 0.14711224119142755, "grad_norm": 21.897932052612305, "learning_rate": 4.632219397021431e-05, "loss": 0.7238, "step": 810 }, { "epoch": 0.14892844169996367, "grad_norm": 31.82994270324707, "learning_rate": 4.627678895750091e-05, "loss": 0.7512, "step": 820 }, { "epoch": 0.1507446422084998, "grad_norm": 15.97396183013916, "learning_rate": 4.62313839447875e-05, "loss": 0.6934, "step": 830 }, { "epoch": 0.15256084271703596, "grad_norm": 20.96219253540039, "learning_rate": 4.61859789320741e-05, "loss": 0.5324, "step": 840 }, { "epoch": 0.1543770432255721, "grad_norm": 19.114473342895508, "learning_rate": 4.61405739193607e-05, "loss": 0.508, "step": 850 }, { "epoch": 0.15619324373410826, "grad_norm": 9.712385177612305, "learning_rate": 4.609516890664729e-05, "loss": 0.5295, "step": 860 }, { "epoch": 0.15800944424264438, "grad_norm": 13.762930870056152, "learning_rate": 4.604976389393389e-05, "loss": 0.7455, "step": 870 }, { "epoch": 0.15982564475118052, "grad_norm": 27.86884307861328, "learning_rate": 4.6004358881220486e-05, "loss": 0.5587, "step": 880 }, { "epoch": 0.16164184525971667, "grad_norm": 26.105749130249023, "learning_rate": 4.5958953868507085e-05, "loss": 0.5981, "step": 890 }, { "epoch": 0.16345804576825282, "grad_norm": 26.51416015625, "learning_rate": 4.591354885579368e-05, "loss": 0.5396, "step": 900 }, { "epoch": 0.16345804576825282, "eval_accuracy": 0.7424118129614438, "eval_f1": 0.7311388146519514, "eval_loss": 0.7538678050041199, "eval_precision": 0.7369680243126087, "eval_recall": 0.7387445231992035, "eval_runtime": 12.1028, "eval_samples_per_second": 100.721, "eval_steps_per_second": 6.362, "step": 900 }, { "epoch": 0.16527424627678897, "grad_norm": 22.03255844116211, "learning_rate": 4.586814384308028e-05, "loss": 0.6057, "step": 910 }, { "epoch": 0.1670904467853251, "grad_norm": 22.521772384643555, "learning_rate": 4.582273883036687e-05, "loss": 0.7402, "step": 920 }, { "epoch": 0.16890664729386123, "grad_norm": 14.045843124389648, "learning_rate": 4.577733381765347e-05, "loss": 0.5914, "step": 930 }, { "epoch": 0.17072284780239738, "grad_norm": 11.75537395477295, "learning_rate": 4.573192880494006e-05, "loss": 0.5954, "step": 940 }, { "epoch": 0.17253904831093353, "grad_norm": 25.500017166137695, "learning_rate": 4.568652379222666e-05, "loss": 0.6219, "step": 950 }, { "epoch": 0.17435524881946968, "grad_norm": 15.509596824645996, "learning_rate": 4.5641118779513256e-05, "loss": 0.5603, "step": 960 }, { "epoch": 0.17617144932800582, "grad_norm": 20.986408233642578, "learning_rate": 4.5595713766799855e-05, "loss": 0.6184, "step": 970 }, { "epoch": 0.17798764983654194, "grad_norm": 12.335230827331543, "learning_rate": 4.555030875408645e-05, "loss": 0.7159, "step": 980 }, { "epoch": 0.1798038503450781, "grad_norm": 14.523093223571777, "learning_rate": 4.550490374137305e-05, "loss": 0.5396, "step": 990 }, { "epoch": 0.18162005085361424, "grad_norm": 18.180063247680664, "learning_rate": 4.545949872865964e-05, "loss": 0.4772, "step": 1000 }, { "epoch": 0.1834362513621504, "grad_norm": 11.765399932861328, "learning_rate": 4.541409371594624e-05, "loss": 0.5037, "step": 1010 }, { "epoch": 0.18525245187068654, "grad_norm": 14.669034957885742, "learning_rate": 4.536868870323284e-05, "loss": 0.7092, "step": 1020 }, { "epoch": 0.18706865237922266, "grad_norm": 22.75113296508789, "learning_rate": 4.532328369051943e-05, "loss": 0.6244, "step": 1030 }, { "epoch": 0.1888848528877588, "grad_norm": 15.64301872253418, "learning_rate": 4.527787867780603e-05, "loss": 0.6145, "step": 1040 }, { "epoch": 0.19070105339629495, "grad_norm": 25.85451316833496, "learning_rate": 4.5232473665092625e-05, "loss": 0.6305, "step": 1050 }, { "epoch": 0.1925172539048311, "grad_norm": 11.775480270385742, "learning_rate": 4.5187068652379225e-05, "loss": 0.5969, "step": 1060 }, { "epoch": 0.19433345441336725, "grad_norm": 15.521839141845703, "learning_rate": 4.514166363966582e-05, "loss": 0.587, "step": 1070 }, { "epoch": 0.19614965492190337, "grad_norm": 13.683486938476562, "learning_rate": 4.509625862695242e-05, "loss": 0.5607, "step": 1080 }, { "epoch": 0.1979658554304395, "grad_norm": 20.192411422729492, "learning_rate": 4.505085361423901e-05, "loss": 0.6512, "step": 1090 }, { "epoch": 0.19978205593897566, "grad_norm": 10.552505493164062, "learning_rate": 4.500544860152561e-05, "loss": 0.4988, "step": 1100 }, { "epoch": 0.2015982564475118, "grad_norm": 20.700984954833984, "learning_rate": 4.49600435888122e-05, "loss": 0.6752, "step": 1110 }, { "epoch": 0.20341445695604796, "grad_norm": 11.448249816894531, "learning_rate": 4.49146385760988e-05, "loss": 0.6486, "step": 1120 }, { "epoch": 0.20523065746458408, "grad_norm": 12.439767837524414, "learning_rate": 4.48692335633854e-05, "loss": 0.6803, "step": 1130 }, { "epoch": 0.20704685797312022, "grad_norm": 21.695728302001953, "learning_rate": 4.4823828550671995e-05, "loss": 0.6166, "step": 1140 }, { "epoch": 0.20886305848165637, "grad_norm": 19.776832580566406, "learning_rate": 4.4778423537958595e-05, "loss": 0.5869, "step": 1150 }, { "epoch": 0.21067925899019252, "grad_norm": 19.56122589111328, "learning_rate": 4.473301852524519e-05, "loss": 0.6789, "step": 1160 }, { "epoch": 0.21249545949872867, "grad_norm": 20.19476318359375, "learning_rate": 4.468761351253179e-05, "loss": 0.6135, "step": 1170 }, { "epoch": 0.21431166000726481, "grad_norm": 15.969949722290039, "learning_rate": 4.464220849981838e-05, "loss": 0.5218, "step": 1180 }, { "epoch": 0.21612786051580093, "grad_norm": 13.37980842590332, "learning_rate": 4.459680348710498e-05, "loss": 0.5176, "step": 1190 }, { "epoch": 0.21794406102433708, "grad_norm": 7.233293056488037, "learning_rate": 4.455139847439157e-05, "loss": 0.3802, "step": 1200 }, { "epoch": 0.21794406102433708, "eval_accuracy": 0.7202625102543068, "eval_f1": 0.722200966558384, "eval_loss": 0.8283492922782898, "eval_precision": 0.7215506981816255, "eval_recall": 0.7526848662720801, "eval_runtime": 12.054, "eval_samples_per_second": 101.128, "eval_steps_per_second": 6.388, "step": 1200 }, { "epoch": 0.21976026153287323, "grad_norm": 14.58340835571289, "learning_rate": 4.450599346167817e-05, "loss": 0.7496, "step": 1210 }, { "epoch": 0.22157646204140938, "grad_norm": 16.27931785583496, "learning_rate": 4.4460588448964765e-05, "loss": 0.6172, "step": 1220 }, { "epoch": 0.22339266254994553, "grad_norm": 19.69707679748535, "learning_rate": 4.4415183436251365e-05, "loss": 0.6921, "step": 1230 }, { "epoch": 0.22520886305848165, "grad_norm": 13.30395793914795, "learning_rate": 4.4369778423537964e-05, "loss": 0.6034, "step": 1240 }, { "epoch": 0.2270250635670178, "grad_norm": 13.300093650817871, "learning_rate": 4.432437341082456e-05, "loss": 0.503, "step": 1250 }, { "epoch": 0.22884126407555394, "grad_norm": 14.82442855834961, "learning_rate": 4.427896839811116e-05, "loss": 0.7216, "step": 1260 }, { "epoch": 0.2306574645840901, "grad_norm": 29.971027374267578, "learning_rate": 4.423356338539775e-05, "loss": 0.6413, "step": 1270 }, { "epoch": 0.23247366509262624, "grad_norm": 24.70488166809082, "learning_rate": 4.418815837268435e-05, "loss": 0.5686, "step": 1280 }, { "epoch": 0.23428986560116236, "grad_norm": 18.32679557800293, "learning_rate": 4.414275335997094e-05, "loss": 0.5744, "step": 1290 }, { "epoch": 0.2361060661096985, "grad_norm": 16.468469619750977, "learning_rate": 4.409734834725754e-05, "loss": 0.5514, "step": 1300 }, { "epoch": 0.23792226661823465, "grad_norm": 14.710607528686523, "learning_rate": 4.4051943334544135e-05, "loss": 0.5871, "step": 1310 }, { "epoch": 0.2397384671267708, "grad_norm": 15.89440631866455, "learning_rate": 4.4006538321830734e-05, "loss": 0.6898, "step": 1320 }, { "epoch": 0.24155466763530695, "grad_norm": 18.804264068603516, "learning_rate": 4.396113330911733e-05, "loss": 0.617, "step": 1330 }, { "epoch": 0.24337086814384307, "grad_norm": 17.780223846435547, "learning_rate": 4.391572829640393e-05, "loss": 0.5986, "step": 1340 }, { "epoch": 0.24518706865237921, "grad_norm": 18.2768611907959, "learning_rate": 4.3870323283690526e-05, "loss": 0.498, "step": 1350 }, { "epoch": 0.24700326916091536, "grad_norm": 20.490026473999023, "learning_rate": 4.382491827097712e-05, "loss": 0.5705, "step": 1360 }, { "epoch": 0.2488194696694515, "grad_norm": 14.712557792663574, "learning_rate": 4.377951325826372e-05, "loss": 0.5586, "step": 1370 }, { "epoch": 0.25063567017798766, "grad_norm": 15.078400611877441, "learning_rate": 4.373410824555031e-05, "loss": 0.7252, "step": 1380 }, { "epoch": 0.2524518706865238, "grad_norm": 16.71666717529297, "learning_rate": 4.368870323283691e-05, "loss": 0.4957, "step": 1390 }, { "epoch": 0.25426807119505995, "grad_norm": 21.710941314697266, "learning_rate": 4.3643298220123504e-05, "loss": 0.5567, "step": 1400 }, { "epoch": 0.2560842717035961, "grad_norm": 13.499922752380371, "learning_rate": 4.3597893207410104e-05, "loss": 0.6141, "step": 1410 }, { "epoch": 0.2579004722121322, "grad_norm": 22.025402069091797, "learning_rate": 4.35524881946967e-05, "loss": 0.7273, "step": 1420 }, { "epoch": 0.25971667272066834, "grad_norm": 17.700258255004883, "learning_rate": 4.3507083181983296e-05, "loss": 0.5406, "step": 1430 }, { "epoch": 0.2615328732292045, "grad_norm": 32.67435073852539, "learning_rate": 4.346167816926989e-05, "loss": 0.5677, "step": 1440 }, { "epoch": 0.26334907373774064, "grad_norm": 15.318320274353027, "learning_rate": 4.341627315655649e-05, "loss": 0.6266, "step": 1450 }, { "epoch": 0.2651652742462768, "grad_norm": 27.953414916992188, "learning_rate": 4.337086814384308e-05, "loss": 0.5541, "step": 1460 }, { "epoch": 0.26698147475481293, "grad_norm": 12.882086753845215, "learning_rate": 4.332546313112968e-05, "loss": 0.5984, "step": 1470 }, { "epoch": 0.2687976752633491, "grad_norm": 17.85333824157715, "learning_rate": 4.328005811841628e-05, "loss": 0.5021, "step": 1480 }, { "epoch": 0.2706138757718852, "grad_norm": 11.902690887451172, "learning_rate": 4.3234653105702874e-05, "loss": 0.5995, "step": 1490 }, { "epoch": 0.2724300762804214, "grad_norm": 8.324485778808594, "learning_rate": 4.3189248092989474e-05, "loss": 0.5337, "step": 1500 }, { "epoch": 0.2724300762804214, "eval_accuracy": 0.7637407711238721, "eval_f1": 0.7516135071707333, "eval_loss": 0.6853081583976746, "eval_precision": 0.7710051578059205, "eval_recall": 0.7442999398826009, "eval_runtime": 12.0321, "eval_samples_per_second": 101.312, "eval_steps_per_second": 6.4, "step": 1500 }, { "epoch": 0.2742462767889575, "grad_norm": 17.059507369995117, "learning_rate": 4.3143843080276066e-05, "loss": 0.693, "step": 1510 }, { "epoch": 0.27606247729749367, "grad_norm": 20.31708526611328, "learning_rate": 4.3098438067562666e-05, "loss": 0.5956, "step": 1520 }, { "epoch": 0.27787867780602976, "grad_norm": 20.84437370300293, "learning_rate": 4.305303305484926e-05, "loss": 0.6239, "step": 1530 }, { "epoch": 0.2796948783145659, "grad_norm": 22.729970932006836, "learning_rate": 4.300762804213586e-05, "loss": 0.5901, "step": 1540 }, { "epoch": 0.28151107882310206, "grad_norm": 14.622097969055176, "learning_rate": 4.296222302942245e-05, "loss": 0.4957, "step": 1550 }, { "epoch": 0.2833272793316382, "grad_norm": 18.853378295898438, "learning_rate": 4.291681801670905e-05, "loss": 0.544, "step": 1560 }, { "epoch": 0.28514347984017435, "grad_norm": 20.18765640258789, "learning_rate": 4.2871413003995644e-05, "loss": 0.7035, "step": 1570 }, { "epoch": 0.2869596803487105, "grad_norm": 15.882144927978516, "learning_rate": 4.2826007991282244e-05, "loss": 0.5633, "step": 1580 }, { "epoch": 0.28877588085724665, "grad_norm": 14.607442855834961, "learning_rate": 4.2780602978568836e-05, "loss": 0.6074, "step": 1590 }, { "epoch": 0.2905920813657828, "grad_norm": 17.136274337768555, "learning_rate": 4.2735197965855436e-05, "loss": 0.5523, "step": 1600 }, { "epoch": 0.29240828187431894, "grad_norm": 7.1735429763793945, "learning_rate": 4.268979295314203e-05, "loss": 0.6283, "step": 1610 }, { "epoch": 0.2942244823828551, "grad_norm": 11.038073539733887, "learning_rate": 4.264438794042863e-05, "loss": 0.6132, "step": 1620 }, { "epoch": 0.2960406828913912, "grad_norm": 15.272370338439941, "learning_rate": 4.259898292771522e-05, "loss": 0.4732, "step": 1630 }, { "epoch": 0.29785688339992733, "grad_norm": 23.69139289855957, "learning_rate": 4.255357791500182e-05, "loss": 0.5788, "step": 1640 }, { "epoch": 0.2996730839084635, "grad_norm": 16.922348022460938, "learning_rate": 4.2508172902288414e-05, "loss": 0.5357, "step": 1650 }, { "epoch": 0.3014892844169996, "grad_norm": 16.82611656188965, "learning_rate": 4.2462767889575014e-05, "loss": 0.5324, "step": 1660 }, { "epoch": 0.3033054849255358, "grad_norm": 6.3888773918151855, "learning_rate": 4.2417362876861606e-05, "loss": 0.4909, "step": 1670 }, { "epoch": 0.3051216854340719, "grad_norm": 11.367060661315918, "learning_rate": 4.2371957864148206e-05, "loss": 0.3793, "step": 1680 }, { "epoch": 0.30693788594260807, "grad_norm": 13.870577812194824, "learning_rate": 4.2326552851434806e-05, "loss": 0.5448, "step": 1690 }, { "epoch": 0.3087540864511442, "grad_norm": 18.472719192504883, "learning_rate": 4.22811478387214e-05, "loss": 0.5681, "step": 1700 }, { "epoch": 0.31057028695968036, "grad_norm": 10.446008682250977, "learning_rate": 4.2235742826008e-05, "loss": 0.496, "step": 1710 }, { "epoch": 0.3123864874682165, "grad_norm": 13.921605110168457, "learning_rate": 4.219033781329459e-05, "loss": 0.5314, "step": 1720 }, { "epoch": 0.31420268797675266, "grad_norm": 17.571805953979492, "learning_rate": 4.214493280058119e-05, "loss": 0.6498, "step": 1730 }, { "epoch": 0.31601888848528875, "grad_norm": 21.902027130126953, "learning_rate": 4.2099527787867784e-05, "loss": 0.5822, "step": 1740 }, { "epoch": 0.3178350889938249, "grad_norm": 20.000957489013672, "learning_rate": 4.205412277515438e-05, "loss": 0.5377, "step": 1750 }, { "epoch": 0.31965128950236105, "grad_norm": 12.021200180053711, "learning_rate": 4.2008717762440976e-05, "loss": 0.6164, "step": 1760 }, { "epoch": 0.3214674900108972, "grad_norm": 24.00454330444336, "learning_rate": 4.1963312749727576e-05, "loss": 0.6348, "step": 1770 }, { "epoch": 0.32328369051943334, "grad_norm": 14.13219928741455, "learning_rate": 4.191790773701417e-05, "loss": 0.5242, "step": 1780 }, { "epoch": 0.3250998910279695, "grad_norm": 32.07684326171875, "learning_rate": 4.187250272430077e-05, "loss": 0.8162, "step": 1790 }, { "epoch": 0.32691609153650564, "grad_norm": 16.062604904174805, "learning_rate": 4.182709771158736e-05, "loss": 0.568, "step": 1800 }, { "epoch": 0.32691609153650564, "eval_accuracy": 0.7793273174733388, "eval_f1": 0.7716766196492151, "eval_loss": 0.6542023420333862, "eval_precision": 0.7704320356934777, "eval_recall": 0.7814793806758547, "eval_runtime": 12.0427, "eval_samples_per_second": 101.223, "eval_steps_per_second": 6.394, "step": 1800 }, { "epoch": 0.3287322920450418, "grad_norm": 14.167701721191406, "learning_rate": 4.178169269887396e-05, "loss": 0.502, "step": 1810 }, { "epoch": 0.33054849255357793, "grad_norm": 16.717453002929688, "learning_rate": 4.1736287686160554e-05, "loss": 0.4441, "step": 1820 }, { "epoch": 0.3323646930621141, "grad_norm": 24.774871826171875, "learning_rate": 4.169088267344715e-05, "loss": 0.5017, "step": 1830 }, { "epoch": 0.3341808935706502, "grad_norm": 12.445333480834961, "learning_rate": 4.1645477660733746e-05, "loss": 0.4088, "step": 1840 }, { "epoch": 0.3359970940791863, "grad_norm": 18.43295669555664, "learning_rate": 4.1600072648020346e-05, "loss": 0.6032, "step": 1850 }, { "epoch": 0.33781329458772247, "grad_norm": 26.735172271728516, "learning_rate": 4.155466763530694e-05, "loss": 0.5611, "step": 1860 }, { "epoch": 0.3396294950962586, "grad_norm": 18.15043067932129, "learning_rate": 4.150926262259354e-05, "loss": 0.4716, "step": 1870 }, { "epoch": 0.34144569560479476, "grad_norm": 18.67064094543457, "learning_rate": 4.146385760988013e-05, "loss": 0.5643, "step": 1880 }, { "epoch": 0.3432618961133309, "grad_norm": 21.009254455566406, "learning_rate": 4.141845259716673e-05, "loss": 0.6117, "step": 1890 }, { "epoch": 0.34507809662186706, "grad_norm": 10.891605377197266, "learning_rate": 4.137304758445333e-05, "loss": 0.5336, "step": 1900 }, { "epoch": 0.3468942971304032, "grad_norm": 6.9248504638671875, "learning_rate": 4.132764257173992e-05, "loss": 0.5683, "step": 1910 }, { "epoch": 0.34871049763893935, "grad_norm": 20.700204849243164, "learning_rate": 4.128223755902652e-05, "loss": 0.5869, "step": 1920 }, { "epoch": 0.3505266981474755, "grad_norm": 17.678829193115234, "learning_rate": 4.1236832546313116e-05, "loss": 0.5081, "step": 1930 }, { "epoch": 0.35234289865601165, "grad_norm": 19.98926544189453, "learning_rate": 4.1191427533599715e-05, "loss": 0.4912, "step": 1940 }, { "epoch": 0.35415909916454774, "grad_norm": 15.056520462036133, "learning_rate": 4.114602252088631e-05, "loss": 0.5509, "step": 1950 }, { "epoch": 0.3559752996730839, "grad_norm": 19.50244903564453, "learning_rate": 4.110061750817291e-05, "loss": 0.5181, "step": 1960 }, { "epoch": 0.35779150018162004, "grad_norm": 20.018245697021484, "learning_rate": 4.10552124954595e-05, "loss": 0.6597, "step": 1970 }, { "epoch": 0.3596077006901562, "grad_norm": 18.31260108947754, "learning_rate": 4.10098074827461e-05, "loss": 0.7217, "step": 1980 }, { "epoch": 0.36142390119869233, "grad_norm": 15.864529609680176, "learning_rate": 4.096440247003269e-05, "loss": 0.3586, "step": 1990 }, { "epoch": 0.3632401017072285, "grad_norm": 9.000946998596191, "learning_rate": 4.091899745731929e-05, "loss": 0.5125, "step": 2000 }, { "epoch": 0.3650563022157646, "grad_norm": 4.08746337890625, "learning_rate": 4.0873592444605886e-05, "loss": 0.5204, "step": 2010 }, { "epoch": 0.3668725027243008, "grad_norm": 7.909645080566406, "learning_rate": 4.0828187431892485e-05, "loss": 0.3989, "step": 2020 }, { "epoch": 0.3686887032328369, "grad_norm": 17.512380599975586, "learning_rate": 4.078278241917908e-05, "loss": 0.5338, "step": 2030 }, { "epoch": 0.37050490374137307, "grad_norm": 22.659942626953125, "learning_rate": 4.073737740646568e-05, "loss": 0.5221, "step": 2040 }, { "epoch": 0.37232110424990916, "grad_norm": 8.159212112426758, "learning_rate": 4.069197239375227e-05, "loss": 0.5502, "step": 2050 }, { "epoch": 0.3741373047584453, "grad_norm": 20.236705780029297, "learning_rate": 4.064656738103887e-05, "loss": 0.4267, "step": 2060 }, { "epoch": 0.37595350526698146, "grad_norm": 16.24159049987793, "learning_rate": 4.060116236832546e-05, "loss": 0.5693, "step": 2070 }, { "epoch": 0.3777697057755176, "grad_norm": 14.921638488769531, "learning_rate": 4.055575735561206e-05, "loss": 0.5152, "step": 2080 }, { "epoch": 0.37958590628405375, "grad_norm": 18.668865203857422, "learning_rate": 4.0510352342898656e-05, "loss": 0.6135, "step": 2090 }, { "epoch": 0.3814021067925899, "grad_norm": 23.298078536987305, "learning_rate": 4.0464947330185255e-05, "loss": 0.5237, "step": 2100 }, { "epoch": 0.3814021067925899, "eval_accuracy": 0.7752255947497949, "eval_f1": 0.7694936459460316, "eval_loss": 0.629031240940094, "eval_precision": 0.7649651818415721, "eval_recall": 0.7791450088077798, "eval_runtime": 12.0845, "eval_samples_per_second": 100.873, "eval_steps_per_second": 6.372, "step": 2100 }, { "epoch": 0.38321830730112605, "grad_norm": 14.145480155944824, "learning_rate": 4.0419542317471855e-05, "loss": 0.4478, "step": 2110 }, { "epoch": 0.3850345078096622, "grad_norm": 21.260257720947266, "learning_rate": 4.037413730475845e-05, "loss": 0.4185, "step": 2120 }, { "epoch": 0.38685070831819834, "grad_norm": 9.28508186340332, "learning_rate": 4.032873229204505e-05, "loss": 0.6288, "step": 2130 }, { "epoch": 0.3886669088267345, "grad_norm": 12.114027976989746, "learning_rate": 4.028332727933164e-05, "loss": 0.5159, "step": 2140 }, { "epoch": 0.39048310933527064, "grad_norm": 27.268280029296875, "learning_rate": 4.023792226661824e-05, "loss": 0.6385, "step": 2150 }, { "epoch": 0.39229930984380673, "grad_norm": 15.575640678405762, "learning_rate": 4.019251725390483e-05, "loss": 0.5996, "step": 2160 }, { "epoch": 0.3941155103523429, "grad_norm": 15.587044715881348, "learning_rate": 4.014711224119143e-05, "loss": 0.4721, "step": 2170 }, { "epoch": 0.395931710860879, "grad_norm": 15.621984481811523, "learning_rate": 4.0101707228478025e-05, "loss": 0.49, "step": 2180 }, { "epoch": 0.3977479113694152, "grad_norm": 10.312201499938965, "learning_rate": 4.0056302215764625e-05, "loss": 0.5224, "step": 2190 }, { "epoch": 0.3995641118779513, "grad_norm": 17.903989791870117, "learning_rate": 4.001089720305122e-05, "loss": 0.4217, "step": 2200 }, { "epoch": 0.40138031238648747, "grad_norm": 15.996349334716797, "learning_rate": 3.996549219033782e-05, "loss": 0.4906, "step": 2210 }, { "epoch": 0.4031965128950236, "grad_norm": 21.02739715576172, "learning_rate": 3.992008717762441e-05, "loss": 0.6814, "step": 2220 }, { "epoch": 0.40501271340355977, "grad_norm": 21.1398868560791, "learning_rate": 3.987468216491101e-05, "loss": 0.3679, "step": 2230 }, { "epoch": 0.4068289139120959, "grad_norm": 24.41451072692871, "learning_rate": 3.98292771521976e-05, "loss": 0.4802, "step": 2240 }, { "epoch": 0.40864511442063206, "grad_norm": 22.847251892089844, "learning_rate": 3.97838721394842e-05, "loss": 0.5097, "step": 2250 }, { "epoch": 0.41046131492916815, "grad_norm": 14.888809204101562, "learning_rate": 3.9738467126770795e-05, "loss": 0.4294, "step": 2260 }, { "epoch": 0.4122775154377043, "grad_norm": 20.353588104248047, "learning_rate": 3.9693062114057395e-05, "loss": 0.4949, "step": 2270 }, { "epoch": 0.41409371594624045, "grad_norm": 11.559284210205078, "learning_rate": 3.964765710134399e-05, "loss": 0.4353, "step": 2280 }, { "epoch": 0.4159099164547766, "grad_norm": 12.643139839172363, "learning_rate": 3.960225208863059e-05, "loss": 0.3885, "step": 2290 }, { "epoch": 0.41772611696331274, "grad_norm": 4.294188022613525, "learning_rate": 3.955684707591718e-05, "loss": 0.4166, "step": 2300 }, { "epoch": 0.4195423174718489, "grad_norm": 17.501489639282227, "learning_rate": 3.951144206320378e-05, "loss": 0.4092, "step": 2310 }, { "epoch": 0.42135851798038504, "grad_norm": 9.189852714538574, "learning_rate": 3.946603705049038e-05, "loss": 0.5333, "step": 2320 }, { "epoch": 0.4231747184889212, "grad_norm": 19.29057502746582, "learning_rate": 3.942063203777697e-05, "loss": 0.6406, "step": 2330 }, { "epoch": 0.42499091899745733, "grad_norm": 15.98727035522461, "learning_rate": 3.937522702506357e-05, "loss": 0.4677, "step": 2340 }, { "epoch": 0.4268071195059935, "grad_norm": 9.902159690856934, "learning_rate": 3.9329822012350165e-05, "loss": 0.5894, "step": 2350 }, { "epoch": 0.42862332001452963, "grad_norm": 18.965747833251953, "learning_rate": 3.9284416999636764e-05, "loss": 0.6196, "step": 2360 }, { "epoch": 0.4304395205230657, "grad_norm": 18.899520874023438, "learning_rate": 3.923901198692336e-05, "loss": 0.5199, "step": 2370 }, { "epoch": 0.43225572103160187, "grad_norm": 12.890677452087402, "learning_rate": 3.919360697420996e-05, "loss": 0.583, "step": 2380 }, { "epoch": 0.434071921540138, "grad_norm": 17.285070419311523, "learning_rate": 3.914820196149655e-05, "loss": 0.4777, "step": 2390 }, { "epoch": 0.43588812204867416, "grad_norm": 11.499088287353516, "learning_rate": 3.910279694878315e-05, "loss": 0.4478, "step": 2400 }, { "epoch": 0.43588812204867416, "eval_accuracy": 0.7908121410992617, "eval_f1": 0.7894885784356462, "eval_loss": 0.6197062134742737, "eval_precision": 0.778593519395511, "eval_recall": 0.8067083793113867, "eval_runtime": 12.0738, "eval_samples_per_second": 100.963, "eval_steps_per_second": 6.377, "step": 2400 }, { "epoch": 0.4377043225572103, "grad_norm": 11.969683647155762, "learning_rate": 3.905739193606974e-05, "loss": 0.4575, "step": 2410 }, { "epoch": 0.43952052306574646, "grad_norm": 16.60710906982422, "learning_rate": 3.901198692335634e-05, "loss": 0.6813, "step": 2420 }, { "epoch": 0.4413367235742826, "grad_norm": 7.043119430541992, "learning_rate": 3.8966581910642935e-05, "loss": 0.5086, "step": 2430 }, { "epoch": 0.44315292408281876, "grad_norm": 24.139657974243164, "learning_rate": 3.8921176897929534e-05, "loss": 0.6707, "step": 2440 }, { "epoch": 0.4449691245913549, "grad_norm": 10.262349128723145, "learning_rate": 3.887577188521613e-05, "loss": 0.4105, "step": 2450 }, { "epoch": 0.44678532509989105, "grad_norm": 18.123046875, "learning_rate": 3.883036687250273e-05, "loss": 0.5051, "step": 2460 }, { "epoch": 0.44860152560842714, "grad_norm": 14.30826187133789, "learning_rate": 3.878496185978932e-05, "loss": 0.4586, "step": 2470 }, { "epoch": 0.4504177261169633, "grad_norm": 32.13856506347656, "learning_rate": 3.873955684707592e-05, "loss": 0.4564, "step": 2480 }, { "epoch": 0.45223392662549944, "grad_norm": 21.877262115478516, "learning_rate": 3.869415183436251e-05, "loss": 0.5316, "step": 2490 }, { "epoch": 0.4540501271340356, "grad_norm": 15.986939430236816, "learning_rate": 3.864874682164911e-05, "loss": 0.4488, "step": 2500 }, { "epoch": 0.45586632764257173, "grad_norm": 15.963953018188477, "learning_rate": 3.8603341808935705e-05, "loss": 0.6677, "step": 2510 }, { "epoch": 0.4576825281511079, "grad_norm": 29.52568244934082, "learning_rate": 3.8557936796222304e-05, "loss": 0.5992, "step": 2520 }, { "epoch": 0.45949872865964403, "grad_norm": 18.61089324951172, "learning_rate": 3.8512531783508904e-05, "loss": 0.5565, "step": 2530 }, { "epoch": 0.4613149291681802, "grad_norm": 23.38523292541504, "learning_rate": 3.84671267707955e-05, "loss": 0.5018, "step": 2540 }, { "epoch": 0.4631311296767163, "grad_norm": 20.95744514465332, "learning_rate": 3.8421721758082097e-05, "loss": 0.5758, "step": 2550 }, { "epoch": 0.46494733018525247, "grad_norm": 11.371984481811523, "learning_rate": 3.837631674536869e-05, "loss": 0.5553, "step": 2560 }, { "epoch": 0.4667635306937886, "grad_norm": 21.71943473815918, "learning_rate": 3.833091173265529e-05, "loss": 0.5777, "step": 2570 }, { "epoch": 0.4685797312023247, "grad_norm": 18.1218318939209, "learning_rate": 3.828550671994188e-05, "loss": 0.5863, "step": 2580 }, { "epoch": 0.47039593171086086, "grad_norm": 19.874448776245117, "learning_rate": 3.824010170722848e-05, "loss": 0.5183, "step": 2590 }, { "epoch": 0.472212132219397, "grad_norm": 19.063386917114258, "learning_rate": 3.8194696694515074e-05, "loss": 0.5381, "step": 2600 }, { "epoch": 0.47402833272793315, "grad_norm": 9.372021675109863, "learning_rate": 3.8149291681801674e-05, "loss": 0.433, "step": 2610 }, { "epoch": 0.4758445332364693, "grad_norm": 14.822279930114746, "learning_rate": 3.810388666908827e-05, "loss": 0.5518, "step": 2620 }, { "epoch": 0.47766073374500545, "grad_norm": 13.066219329833984, "learning_rate": 3.8058481656374867e-05, "loss": 0.4946, "step": 2630 }, { "epoch": 0.4794769342535416, "grad_norm": 18.113737106323242, "learning_rate": 3.801307664366146e-05, "loss": 0.4824, "step": 2640 }, { "epoch": 0.48129313476207775, "grad_norm": 10.73379898071289, "learning_rate": 3.796767163094806e-05, "loss": 0.5541, "step": 2650 }, { "epoch": 0.4831093352706139, "grad_norm": 25.9276065826416, "learning_rate": 3.792226661823465e-05, "loss": 0.5404, "step": 2660 }, { "epoch": 0.48492553577915004, "grad_norm": 20.394275665283203, "learning_rate": 3.787686160552125e-05, "loss": 0.5106, "step": 2670 }, { "epoch": 0.48674173628768613, "grad_norm": 23.263164520263672, "learning_rate": 3.7831456592807844e-05, "loss": 0.5293, "step": 2680 }, { "epoch": 0.4885579367962223, "grad_norm": 13.967432975769043, "learning_rate": 3.7786051580094444e-05, "loss": 0.4886, "step": 2690 }, { "epoch": 0.49037413730475843, "grad_norm": 18.502605438232422, "learning_rate": 3.774064656738104e-05, "loss": 0.5617, "step": 2700 }, { "epoch": 0.49037413730475843, "eval_accuracy": 0.8039376538146021, "eval_f1": 0.7953241400811288, "eval_loss": 0.5712546110153198, "eval_precision": 0.7878506982758448, "eval_recall": 0.8087375226161377, "eval_runtime": 12.0651, "eval_samples_per_second": 101.035, "eval_steps_per_second": 6.382, "step": 2700 }, { "epoch": 0.4921903378132946, "grad_norm": 36.73835754394531, "learning_rate": 3.7695241554667637e-05, "loss": 0.6704, "step": 2710 }, { "epoch": 0.4940065383218307, "grad_norm": 11.638787269592285, "learning_rate": 3.764983654195423e-05, "loss": 0.5284, "step": 2720 }, { "epoch": 0.49582273883036687, "grad_norm": 22.700679779052734, "learning_rate": 3.760443152924083e-05, "loss": 0.5621, "step": 2730 }, { "epoch": 0.497638939338903, "grad_norm": 10.612008094787598, "learning_rate": 3.755902651652743e-05, "loss": 0.572, "step": 2740 }, { "epoch": 0.49945513984743917, "grad_norm": 8.393928527832031, "learning_rate": 3.751362150381402e-05, "loss": 0.4636, "step": 2750 }, { "epoch": 0.5012713403559753, "grad_norm": 28.1651554107666, "learning_rate": 3.746821649110062e-05, "loss": 0.4955, "step": 2760 }, { "epoch": 0.5030875408645115, "grad_norm": 20.283479690551758, "learning_rate": 3.7422811478387214e-05, "loss": 0.5398, "step": 2770 }, { "epoch": 0.5049037413730476, "grad_norm": 12.401691436767578, "learning_rate": 3.7377406465673814e-05, "loss": 0.5622, "step": 2780 }, { "epoch": 0.5067199418815838, "grad_norm": 31.315277099609375, "learning_rate": 3.7332001452960407e-05, "loss": 0.5752, "step": 2790 }, { "epoch": 0.5085361423901199, "grad_norm": 17.91919708251953, "learning_rate": 3.7286596440247006e-05, "loss": 0.5953, "step": 2800 }, { "epoch": 0.510352342898656, "grad_norm": 10.692752838134766, "learning_rate": 3.72411914275336e-05, "loss": 0.4012, "step": 2810 }, { "epoch": 0.5121685434071922, "grad_norm": 17.449275970458984, "learning_rate": 3.71957864148202e-05, "loss": 0.4245, "step": 2820 }, { "epoch": 0.5139847439157283, "grad_norm": 17.479352951049805, "learning_rate": 3.715038140210679e-05, "loss": 0.4908, "step": 2830 }, { "epoch": 0.5158009444242644, "grad_norm": 20.10633659362793, "learning_rate": 3.710497638939339e-05, "loss": 0.5738, "step": 2840 }, { "epoch": 0.5176171449328005, "grad_norm": 17.699560165405273, "learning_rate": 3.7059571376679984e-05, "loss": 0.4434, "step": 2850 }, { "epoch": 0.5194333454413367, "grad_norm": 15.045440673828125, "learning_rate": 3.7014166363966584e-05, "loss": 0.4992, "step": 2860 }, { "epoch": 0.5212495459498728, "grad_norm": 14.244542121887207, "learning_rate": 3.6968761351253177e-05, "loss": 0.5327, "step": 2870 }, { "epoch": 0.523065746458409, "grad_norm": 11.60004997253418, "learning_rate": 3.6923356338539776e-05, "loss": 0.4859, "step": 2880 }, { "epoch": 0.5248819469669451, "grad_norm": 8.768573760986328, "learning_rate": 3.687795132582637e-05, "loss": 0.5004, "step": 2890 }, { "epoch": 0.5266981474754813, "grad_norm": 22.54417610168457, "learning_rate": 3.683254631311297e-05, "loss": 0.543, "step": 2900 }, { "epoch": 0.5285143479840174, "grad_norm": 20.270061492919922, "learning_rate": 3.678714130039956e-05, "loss": 0.5296, "step": 2910 }, { "epoch": 0.5303305484925536, "grad_norm": 18.757434844970703, "learning_rate": 3.674173628768616e-05, "loss": 0.3825, "step": 2920 }, { "epoch": 0.5321467490010897, "grad_norm": 13.12435245513916, "learning_rate": 3.6696331274972754e-05, "loss": 0.545, "step": 2930 }, { "epoch": 0.5339629495096259, "grad_norm": 23.035865783691406, "learning_rate": 3.6650926262259354e-05, "loss": 0.6143, "step": 2940 }, { "epoch": 0.535779150018162, "grad_norm": 15.766834259033203, "learning_rate": 3.660552124954595e-05, "loss": 0.5893, "step": 2950 }, { "epoch": 0.5375953505266982, "grad_norm": 11.79257869720459, "learning_rate": 3.6560116236832546e-05, "loss": 0.5365, "step": 2960 }, { "epoch": 0.5394115510352343, "grad_norm": 22.071346282958984, "learning_rate": 3.6514711224119146e-05, "loss": 0.5344, "step": 2970 }, { "epoch": 0.5412277515437705, "grad_norm": 16.728076934814453, "learning_rate": 3.646930621140574e-05, "loss": 0.5137, "step": 2980 }, { "epoch": 0.5430439520523066, "grad_norm": 13.112013816833496, "learning_rate": 3.642390119869234e-05, "loss": 0.6091, "step": 2990 }, { "epoch": 0.5448601525608427, "grad_norm": 15.373380661010742, "learning_rate": 3.637849618597893e-05, "loss": 0.4939, "step": 3000 }, { "epoch": 0.5448601525608427, "eval_accuracy": 0.8039376538146021, "eval_f1": 0.7973147509604641, "eval_loss": 0.5636632442474365, "eval_precision": 0.7937266489697907, "eval_recall": 0.8050963810992605, "eval_runtime": 12.0525, "eval_samples_per_second": 101.141, "eval_steps_per_second": 6.389, "step": 3000 }, { "epoch": 0.5466763530693789, "grad_norm": 17.602785110473633, "learning_rate": 3.633309117326553e-05, "loss": 0.4411, "step": 3010 }, { "epoch": 0.548492553577915, "grad_norm": 11.274548530578613, "learning_rate": 3.6287686160552124e-05, "loss": 0.5949, "step": 3020 }, { "epoch": 0.5503087540864512, "grad_norm": 17.69841766357422, "learning_rate": 3.624228114783872e-05, "loss": 0.4447, "step": 3030 }, { "epoch": 0.5521249545949873, "grad_norm": 21.106124877929688, "learning_rate": 3.6196876135125316e-05, "loss": 0.5316, "step": 3040 }, { "epoch": 0.5539411551035234, "grad_norm": 21.903255462646484, "learning_rate": 3.6151471122411916e-05, "loss": 0.476, "step": 3050 }, { "epoch": 0.5557573556120595, "grad_norm": 11.051823616027832, "learning_rate": 3.610606610969851e-05, "loss": 0.5307, "step": 3060 }, { "epoch": 0.5575735561205957, "grad_norm": 5.928410530090332, "learning_rate": 3.606066109698511e-05, "loss": 0.4116, "step": 3070 }, { "epoch": 0.5593897566291318, "grad_norm": 24.413103103637695, "learning_rate": 3.60152560842717e-05, "loss": 0.482, "step": 3080 }, { "epoch": 0.561205957137668, "grad_norm": 17.25383949279785, "learning_rate": 3.59698510715583e-05, "loss": 0.5269, "step": 3090 }, { "epoch": 0.5630221576462041, "grad_norm": 14.473711013793945, "learning_rate": 3.5924446058844894e-05, "loss": 0.5098, "step": 3100 }, { "epoch": 0.5648383581547403, "grad_norm": 14.325135231018066, "learning_rate": 3.587904104613149e-05, "loss": 0.4476, "step": 3110 }, { "epoch": 0.5666545586632764, "grad_norm": 22.374534606933594, "learning_rate": 3.5833636033418086e-05, "loss": 0.4768, "step": 3120 }, { "epoch": 0.5684707591718126, "grad_norm": 22.39207649230957, "learning_rate": 3.5788231020704686e-05, "loss": 0.457, "step": 3130 }, { "epoch": 0.5702869596803487, "grad_norm": 14.626873970031738, "learning_rate": 3.574282600799128e-05, "loss": 0.4465, "step": 3140 }, { "epoch": 0.5721031601888849, "grad_norm": 23.05328369140625, "learning_rate": 3.569742099527788e-05, "loss": 0.4138, "step": 3150 }, { "epoch": 0.573919360697421, "grad_norm": 16.360881805419922, "learning_rate": 3.565201598256448e-05, "loss": 0.4439, "step": 3160 }, { "epoch": 0.5757355612059571, "grad_norm": 25.42070770263672, "learning_rate": 3.560661096985107e-05, "loss": 0.6645, "step": 3170 }, { "epoch": 0.5775517617144933, "grad_norm": 22.610538482666016, "learning_rate": 3.556120595713767e-05, "loss": 0.4836, "step": 3180 }, { "epoch": 0.5793679622230294, "grad_norm": 11.355021476745605, "learning_rate": 3.551580094442426e-05, "loss": 0.5534, "step": 3190 }, { "epoch": 0.5811841627315656, "grad_norm": 18.886524200439453, "learning_rate": 3.547039593171086e-05, "loss": 0.5369, "step": 3200 }, { "epoch": 0.5830003632401017, "grad_norm": 14.892853736877441, "learning_rate": 3.5424990918997456e-05, "loss": 0.4463, "step": 3210 }, { "epoch": 0.5848165637486379, "grad_norm": 31.027605056762695, "learning_rate": 3.5379585906284055e-05, "loss": 0.4775, "step": 3220 }, { "epoch": 0.586632764257174, "grad_norm": 11.664224624633789, "learning_rate": 3.533418089357065e-05, "loss": 0.5938, "step": 3230 }, { "epoch": 0.5884489647657102, "grad_norm": 13.272047996520996, "learning_rate": 3.528877588085725e-05, "loss": 0.4925, "step": 3240 }, { "epoch": 0.5902651652742463, "grad_norm": 13.521268844604492, "learning_rate": 3.524337086814384e-05, "loss": 0.504, "step": 3250 }, { "epoch": 0.5920813657827824, "grad_norm": 10.777715682983398, "learning_rate": 3.519796585543044e-05, "loss": 0.51, "step": 3260 }, { "epoch": 0.5938975662913185, "grad_norm": 16.920635223388672, "learning_rate": 3.515256084271703e-05, "loss": 0.5315, "step": 3270 }, { "epoch": 0.5957137667998547, "grad_norm": 22.00889778137207, "learning_rate": 3.510715583000363e-05, "loss": 0.4741, "step": 3280 }, { "epoch": 0.5975299673083908, "grad_norm": 14.849915504455566, "learning_rate": 3.5061750817290226e-05, "loss": 0.4805, "step": 3290 }, { "epoch": 0.599346167816927, "grad_norm": 22.403329849243164, "learning_rate": 3.5016345804576825e-05, "loss": 0.5531, "step": 3300 }, { "epoch": 0.599346167816927, "eval_accuracy": 0.8146021328958163, "eval_f1": 0.8074596115450074, "eval_loss": 0.5683770179748535, "eval_precision": 0.8052208334869901, "eval_recall": 0.8145117560161068, "eval_runtime": 12.0545, "eval_samples_per_second": 101.124, "eval_steps_per_second": 6.388, "step": 3300 }, { "epoch": 0.6011623683254631, "grad_norm": 13.46020221710205, "learning_rate": 3.497094079186342e-05, "loss": 0.5227, "step": 3310 }, { "epoch": 0.6029785688339993, "grad_norm": 22.012182235717773, "learning_rate": 3.492553577915002e-05, "loss": 0.4315, "step": 3320 }, { "epoch": 0.6047947693425354, "grad_norm": 16.392894744873047, "learning_rate": 3.488013076643661e-05, "loss": 0.474, "step": 3330 }, { "epoch": 0.6066109698510715, "grad_norm": 19.60003089904785, "learning_rate": 3.483472575372321e-05, "loss": 0.3914, "step": 3340 }, { "epoch": 0.6084271703596077, "grad_norm": 24.537080764770508, "learning_rate": 3.47893207410098e-05, "loss": 0.4278, "step": 3350 }, { "epoch": 0.6102433708681438, "grad_norm": 22.935487747192383, "learning_rate": 3.47439157282964e-05, "loss": 0.6627, "step": 3360 }, { "epoch": 0.61205957137668, "grad_norm": 19.0701847076416, "learning_rate": 3.4698510715583e-05, "loss": 0.4398, "step": 3370 }, { "epoch": 0.6138757718852161, "grad_norm": 11.604155540466309, "learning_rate": 3.4653105702869595e-05, "loss": 0.6131, "step": 3380 }, { "epoch": 0.6156919723937523, "grad_norm": 17.911949157714844, "learning_rate": 3.4607700690156195e-05, "loss": 0.4957, "step": 3390 }, { "epoch": 0.6175081729022884, "grad_norm": 12.859588623046875, "learning_rate": 3.456229567744279e-05, "loss": 0.5457, "step": 3400 }, { "epoch": 0.6193243734108246, "grad_norm": 17.096111297607422, "learning_rate": 3.451689066472939e-05, "loss": 0.4664, "step": 3410 }, { "epoch": 0.6211405739193607, "grad_norm": 17.198429107666016, "learning_rate": 3.447148565201598e-05, "loss": 0.3655, "step": 3420 }, { "epoch": 0.6229567744278969, "grad_norm": 7.782280445098877, "learning_rate": 3.442608063930258e-05, "loss": 0.4909, "step": 3430 }, { "epoch": 0.624772974936433, "grad_norm": 13.99974250793457, "learning_rate": 3.438067562658917e-05, "loss": 0.5252, "step": 3440 }, { "epoch": 0.6265891754449692, "grad_norm": 26.579198837280273, "learning_rate": 3.433527061387577e-05, "loss": 0.4605, "step": 3450 }, { "epoch": 0.6284053759535053, "grad_norm": 23.17647361755371, "learning_rate": 3.4289865601162365e-05, "loss": 0.3984, "step": 3460 }, { "epoch": 0.6302215764620414, "grad_norm": 15.169466972351074, "learning_rate": 3.4244460588448965e-05, "loss": 0.4396, "step": 3470 }, { "epoch": 0.6320377769705775, "grad_norm": 18.425457000732422, "learning_rate": 3.4199055575735565e-05, "loss": 0.5219, "step": 3480 }, { "epoch": 0.6338539774791137, "grad_norm": 11.86226749420166, "learning_rate": 3.415365056302216e-05, "loss": 0.5649, "step": 3490 }, { "epoch": 0.6356701779876498, "grad_norm": 18.50494384765625, "learning_rate": 3.410824555030876e-05, "loss": 0.5174, "step": 3500 }, { "epoch": 0.637486378496186, "grad_norm": 36.33973693847656, "learning_rate": 3.406284053759535e-05, "loss": 0.6008, "step": 3510 }, { "epoch": 0.6393025790047221, "grad_norm": 12.04764175415039, "learning_rate": 3.401743552488195e-05, "loss": 0.375, "step": 3520 }, { "epoch": 0.6411187795132582, "grad_norm": 19.642751693725586, "learning_rate": 3.397203051216854e-05, "loss": 0.4088, "step": 3530 }, { "epoch": 0.6429349800217944, "grad_norm": 11.035579681396484, "learning_rate": 3.392662549945514e-05, "loss": 0.4665, "step": 3540 }, { "epoch": 0.6447511805303305, "grad_norm": 9.772668838500977, "learning_rate": 3.3881220486741735e-05, "loss": 0.4081, "step": 3550 }, { "epoch": 0.6465673810388667, "grad_norm": 15.26156997680664, "learning_rate": 3.3835815474028335e-05, "loss": 0.5479, "step": 3560 }, { "epoch": 0.6483835815474028, "grad_norm": 16.603866577148438, "learning_rate": 3.379041046131493e-05, "loss": 0.5206, "step": 3570 }, { "epoch": 0.650199782055939, "grad_norm": 14.417247772216797, "learning_rate": 3.374500544860153e-05, "loss": 0.4639, "step": 3580 }, { "epoch": 0.6520159825644751, "grad_norm": 14.06032943725586, "learning_rate": 3.369960043588813e-05, "loss": 0.5256, "step": 3590 }, { "epoch": 0.6538321830730113, "grad_norm": 19.377899169921875, "learning_rate": 3.365419542317472e-05, "loss": 0.4589, "step": 3600 }, { "epoch": 0.6538321830730113, "eval_accuracy": 0.815422477440525, "eval_f1": 0.8098112032026681, "eval_loss": 0.5438756346702576, "eval_precision": 0.8087363057639664, "eval_recall": 0.8171923146479461, "eval_runtime": 12.0597, "eval_samples_per_second": 101.081, "eval_steps_per_second": 6.385, "step": 3600 }, { "epoch": 0.6556483835815474, "grad_norm": 10.339813232421875, "learning_rate": 3.360879041046132e-05, "loss": 0.5665, "step": 3610 }, { "epoch": 0.6574645840900836, "grad_norm": 12.189675331115723, "learning_rate": 3.356338539774791e-05, "loss": 0.341, "step": 3620 }, { "epoch": 0.6592807845986197, "grad_norm": 17.71584701538086, "learning_rate": 3.351798038503451e-05, "loss": 0.3679, "step": 3630 }, { "epoch": 0.6610969851071559, "grad_norm": 12.258733749389648, "learning_rate": 3.3472575372321105e-05, "loss": 0.5311, "step": 3640 }, { "epoch": 0.662913185615692, "grad_norm": 22.354339599609375, "learning_rate": 3.3427170359607704e-05, "loss": 0.4843, "step": 3650 }, { "epoch": 0.6647293861242282, "grad_norm": 14.635857582092285, "learning_rate": 3.33817653468943e-05, "loss": 0.4105, "step": 3660 }, { "epoch": 0.6665455866327643, "grad_norm": 15.776519775390625, "learning_rate": 3.33363603341809e-05, "loss": 0.5662, "step": 3670 }, { "epoch": 0.6683617871413003, "grad_norm": 16.750410079956055, "learning_rate": 3.329095532146749e-05, "loss": 0.4221, "step": 3680 }, { "epoch": 0.6701779876498365, "grad_norm": 14.167458534240723, "learning_rate": 3.324555030875409e-05, "loss": 0.3916, "step": 3690 }, { "epoch": 0.6719941881583726, "grad_norm": 12.054675102233887, "learning_rate": 3.320014529604069e-05, "loss": 0.4332, "step": 3700 }, { "epoch": 0.6738103886669088, "grad_norm": 17.444786071777344, "learning_rate": 3.315474028332728e-05, "loss": 0.4895, "step": 3710 }, { "epoch": 0.6756265891754449, "grad_norm": 12.62495231628418, "learning_rate": 3.310933527061388e-05, "loss": 0.484, "step": 3720 }, { "epoch": 0.6774427896839811, "grad_norm": 17.694808959960938, "learning_rate": 3.3063930257900474e-05, "loss": 0.5494, "step": 3730 }, { "epoch": 0.6792589901925172, "grad_norm": 9.741250038146973, "learning_rate": 3.3018525245187074e-05, "loss": 0.5297, "step": 3740 }, { "epoch": 0.6810751907010534, "grad_norm": 9.227933883666992, "learning_rate": 3.297312023247367e-05, "loss": 0.4591, "step": 3750 }, { "epoch": 0.6828913912095895, "grad_norm": 22.44287109375, "learning_rate": 3.2927715219760266e-05, "loss": 0.5248, "step": 3760 }, { "epoch": 0.6847075917181257, "grad_norm": 19.741558074951172, "learning_rate": 3.288231020704686e-05, "loss": 0.532, "step": 3770 }, { "epoch": 0.6865237922266618, "grad_norm": 21.53546142578125, "learning_rate": 3.283690519433346e-05, "loss": 0.5514, "step": 3780 }, { "epoch": 0.688339992735198, "grad_norm": 16.261137008666992, "learning_rate": 3.279150018162005e-05, "loss": 0.5721, "step": 3790 }, { "epoch": 0.6901561932437341, "grad_norm": 7.155134677886963, "learning_rate": 3.274609516890665e-05, "loss": 0.4068, "step": 3800 }, { "epoch": 0.6919723937522703, "grad_norm": 8.603271484375, "learning_rate": 3.270069015619325e-05, "loss": 0.3959, "step": 3810 }, { "epoch": 0.6937885942608064, "grad_norm": 18.37700080871582, "learning_rate": 3.2655285143479844e-05, "loss": 0.5014, "step": 3820 }, { "epoch": 0.6956047947693426, "grad_norm": 13.100898742675781, "learning_rate": 3.2609880130766444e-05, "loss": 0.5567, "step": 3830 }, { "epoch": 0.6974209952778787, "grad_norm": 13.200430870056152, "learning_rate": 3.2564475118053036e-05, "loss": 0.4679, "step": 3840 }, { "epoch": 0.6992371957864149, "grad_norm": 12.253862380981445, "learning_rate": 3.2519070105339636e-05, "loss": 0.4516, "step": 3850 }, { "epoch": 0.701053396294951, "grad_norm": 6.870277404785156, "learning_rate": 3.247366509262623e-05, "loss": 0.5378, "step": 3860 }, { "epoch": 0.7028695968034872, "grad_norm": 14.495081901550293, "learning_rate": 3.242826007991283e-05, "loss": 0.5009, "step": 3870 }, { "epoch": 0.7046857973120233, "grad_norm": 3.442812919616699, "learning_rate": 3.238285506719942e-05, "loss": 0.4108, "step": 3880 }, { "epoch": 0.7065019978205593, "grad_norm": 5.441460609436035, "learning_rate": 3.233745005448602e-05, "loss": 0.3479, "step": 3890 }, { "epoch": 0.7083181983290955, "grad_norm": 12.709874153137207, "learning_rate": 3.2292045041772614e-05, "loss": 0.3864, "step": 3900 }, { "epoch": 0.7083181983290955, "eval_accuracy": 0.8105004101722724, "eval_f1": 0.799452724152361, "eval_loss": 0.5857027769088745, "eval_precision": 0.7947749172284474, "eval_recall": 0.8158609235209215, "eval_runtime": 12.0674, "eval_samples_per_second": 101.016, "eval_steps_per_second": 6.381, "step": 3900 }, { "epoch": 0.7101343988376316, "grad_norm": 10.219947814941406, "learning_rate": 3.2246640029059214e-05, "loss": 0.6109, "step": 3910 }, { "epoch": 0.7119505993461678, "grad_norm": 22.518009185791016, "learning_rate": 3.2201235016345806e-05, "loss": 0.5354, "step": 3920 }, { "epoch": 0.7137667998547039, "grad_norm": 9.188867568969727, "learning_rate": 3.2155830003632406e-05, "loss": 0.5216, "step": 3930 }, { "epoch": 0.7155830003632401, "grad_norm": 16.9005184173584, "learning_rate": 3.2110424990919e-05, "loss": 0.4771, "step": 3940 }, { "epoch": 0.7173992008717762, "grad_norm": 14.375580787658691, "learning_rate": 3.20650199782056e-05, "loss": 0.4197, "step": 3950 }, { "epoch": 0.7192154013803124, "grad_norm": 14.258020401000977, "learning_rate": 3.201961496549219e-05, "loss": 0.4487, "step": 3960 }, { "epoch": 0.7210316018888485, "grad_norm": 11.470094680786133, "learning_rate": 3.197420995277879e-05, "loss": 0.4176, "step": 3970 }, { "epoch": 0.7228478023973847, "grad_norm": 12.606728553771973, "learning_rate": 3.1928804940065384e-05, "loss": 0.5001, "step": 3980 }, { "epoch": 0.7246640029059208, "grad_norm": 25.704116821289062, "learning_rate": 3.1883399927351983e-05, "loss": 0.5424, "step": 3990 }, { "epoch": 0.726480203414457, "grad_norm": 15.872344017028809, "learning_rate": 3.183799491463858e-05, "loss": 0.589, "step": 4000 }, { "epoch": 0.7282964039229931, "grad_norm": 8.058246612548828, "learning_rate": 3.1792589901925176e-05, "loss": 0.4356, "step": 4010 }, { "epoch": 0.7301126044315293, "grad_norm": 18.3121337890625, "learning_rate": 3.1747184889211776e-05, "loss": 0.4245, "step": 4020 }, { "epoch": 0.7319288049400654, "grad_norm": 13.85145378112793, "learning_rate": 3.170177987649837e-05, "loss": 0.4336, "step": 4030 }, { "epoch": 0.7337450054486016, "grad_norm": 11.043869018554688, "learning_rate": 3.165637486378497e-05, "loss": 0.4433, "step": 4040 }, { "epoch": 0.7355612059571377, "grad_norm": 14.299675941467285, "learning_rate": 3.161096985107156e-05, "loss": 0.4482, "step": 4050 }, { "epoch": 0.7373774064656738, "grad_norm": 9.129308700561523, "learning_rate": 3.156556483835816e-05, "loss": 0.5591, "step": 4060 }, { "epoch": 0.73919360697421, "grad_norm": 15.059881210327148, "learning_rate": 3.1520159825644753e-05, "loss": 0.4043, "step": 4070 }, { "epoch": 0.7410098074827461, "grad_norm": 14.52391242980957, "learning_rate": 3.147475481293135e-05, "loss": 0.4029, "step": 4080 }, { "epoch": 0.7428260079912823, "grad_norm": 14.165828704833984, "learning_rate": 3.1429349800217946e-05, "loss": 0.5121, "step": 4090 }, { "epoch": 0.7446422084998183, "grad_norm": 19.52725601196289, "learning_rate": 3.1383944787504546e-05, "loss": 0.4375, "step": 4100 }, { "epoch": 0.7464584090083545, "grad_norm": 18.168001174926758, "learning_rate": 3.133853977479114e-05, "loss": 0.541, "step": 4110 }, { "epoch": 0.7482746095168906, "grad_norm": 23.436870574951172, "learning_rate": 3.129313476207774e-05, "loss": 0.5703, "step": 4120 }, { "epoch": 0.7500908100254268, "grad_norm": 16.01010513305664, "learning_rate": 3.124772974936433e-05, "loss": 0.4326, "step": 4130 }, { "epoch": 0.7519070105339629, "grad_norm": 15.457175254821777, "learning_rate": 3.120232473665093e-05, "loss": 0.379, "step": 4140 }, { "epoch": 0.7537232110424991, "grad_norm": 17.524295806884766, "learning_rate": 3.1156919723937523e-05, "loss": 0.392, "step": 4150 }, { "epoch": 0.7555394115510352, "grad_norm": 19.16515350341797, "learning_rate": 3.111151471122412e-05, "loss": 0.5474, "step": 4160 }, { "epoch": 0.7573556120595714, "grad_norm": 12.622529029846191, "learning_rate": 3.1066109698510716e-05, "loss": 0.4343, "step": 4170 }, { "epoch": 0.7591718125681075, "grad_norm": 12.761943817138672, "learning_rate": 3.1020704685797316e-05, "loss": 0.4281, "step": 4180 }, { "epoch": 0.7609880130766437, "grad_norm": 15.795944213867188, "learning_rate": 3.097529967308391e-05, "loss": 0.4434, "step": 4190 }, { "epoch": 0.7628042135851798, "grad_norm": 6.286984920501709, "learning_rate": 3.092989466037051e-05, "loss": 0.4196, "step": 4200 }, { "epoch": 0.7628042135851798, "eval_accuracy": 0.8105004101722724, "eval_f1": 0.8031176631002425, "eval_loss": 0.5337262749671936, "eval_precision": 0.8017654292844819, "eval_recall": 0.811221918165322, "eval_runtime": 12.0674, "eval_samples_per_second": 101.016, "eval_steps_per_second": 6.381, "step": 4200 }, { "epoch": 0.764620414093716, "grad_norm": 53.68039321899414, "learning_rate": 3.088448964765711e-05, "loss": 0.5151, "step": 4210 }, { "epoch": 0.7664366146022521, "grad_norm": 10.041727066040039, "learning_rate": 3.08390846349437e-05, "loss": 0.4146, "step": 4220 }, { "epoch": 0.7682528151107882, "grad_norm": 25.997821807861328, "learning_rate": 3.07936796222303e-05, "loss": 0.556, "step": 4230 }, { "epoch": 0.7700690156193244, "grad_norm": 13.25404167175293, "learning_rate": 3.074827460951689e-05, "loss": 0.513, "step": 4240 }, { "epoch": 0.7718852161278605, "grad_norm": 23.45793342590332, "learning_rate": 3.070286959680349e-05, "loss": 0.4953, "step": 4250 }, { "epoch": 0.7737014166363967, "grad_norm": 18.79665756225586, "learning_rate": 3.0657464584090086e-05, "loss": 0.4196, "step": 4260 }, { "epoch": 0.7755176171449328, "grad_norm": 15.050500869750977, "learning_rate": 3.0612059571376685e-05, "loss": 0.3523, "step": 4270 }, { "epoch": 0.777333817653469, "grad_norm": 17.48199462890625, "learning_rate": 3.056665455866328e-05, "loss": 0.4707, "step": 4280 }, { "epoch": 0.7791500181620051, "grad_norm": 12.54255199432373, "learning_rate": 3.052124954594988e-05, "loss": 0.3119, "step": 4290 }, { "epoch": 0.7809662186705413, "grad_norm": 19.040857315063477, "learning_rate": 3.047584453323647e-05, "loss": 0.3681, "step": 4300 }, { "epoch": 0.7827824191790773, "grad_norm": 22.418601989746094, "learning_rate": 3.043043952052307e-05, "loss": 0.4745, "step": 4310 }, { "epoch": 0.7845986196876135, "grad_norm": 19.793771743774414, "learning_rate": 3.0385034507809663e-05, "loss": 0.4836, "step": 4320 }, { "epoch": 0.7864148201961496, "grad_norm": 10.669327735900879, "learning_rate": 3.0339629495096263e-05, "loss": 0.4829, "step": 4330 }, { "epoch": 0.7882310207046858, "grad_norm": 22.390172958374023, "learning_rate": 3.0294224482382856e-05, "loss": 0.6336, "step": 4340 }, { "epoch": 0.7900472212132219, "grad_norm": 15.641258239746094, "learning_rate": 3.0248819469669455e-05, "loss": 0.5083, "step": 4350 }, { "epoch": 0.791863421721758, "grad_norm": 20.096162796020508, "learning_rate": 3.0203414456956048e-05, "loss": 0.5094, "step": 4360 }, { "epoch": 0.7936796222302942, "grad_norm": 22.68675422668457, "learning_rate": 3.0158009444242648e-05, "loss": 0.5245, "step": 4370 }, { "epoch": 0.7954958227388303, "grad_norm": 29.42097282409668, "learning_rate": 3.011260443152924e-05, "loss": 0.4459, "step": 4380 }, { "epoch": 0.7973120232473665, "grad_norm": 19.64771842956543, "learning_rate": 3.006719941881584e-05, "loss": 0.4514, "step": 4390 }, { "epoch": 0.7991282237559026, "grad_norm": 21.052167892456055, "learning_rate": 3.0021794406102433e-05, "loss": 0.4875, "step": 4400 }, { "epoch": 0.8009444242644388, "grad_norm": 22.617921829223633, "learning_rate": 2.9976389393389033e-05, "loss": 0.5775, "step": 4410 }, { "epoch": 0.8027606247729749, "grad_norm": 18.567598342895508, "learning_rate": 2.9930984380675632e-05, "loss": 0.3457, "step": 4420 }, { "epoch": 0.8045768252815111, "grad_norm": 13.792886734008789, "learning_rate": 2.9885579367962225e-05, "loss": 0.3967, "step": 4430 }, { "epoch": 0.8063930257900472, "grad_norm": 24.021446228027344, "learning_rate": 2.9840174355248825e-05, "loss": 0.5235, "step": 4440 }, { "epoch": 0.8082092262985834, "grad_norm": 14.933148384094238, "learning_rate": 2.9794769342535418e-05, "loss": 0.5006, "step": 4450 }, { "epoch": 0.8100254268071195, "grad_norm": 18.85728645324707, "learning_rate": 2.9749364329822017e-05, "loss": 0.408, "step": 4460 }, { "epoch": 0.8118416273156557, "grad_norm": 18.168296813964844, "learning_rate": 2.970395931710861e-05, "loss": 0.5026, "step": 4470 }, { "epoch": 0.8136578278241918, "grad_norm": 12.585858345031738, "learning_rate": 2.965855430439521e-05, "loss": 0.3576, "step": 4480 }, { "epoch": 0.815474028332728, "grad_norm": 21.00431251525879, "learning_rate": 2.9613149291681803e-05, "loss": 0.494, "step": 4490 }, { "epoch": 0.8172902288412641, "grad_norm": 32.093345642089844, "learning_rate": 2.9567744278968402e-05, "loss": 0.4508, "step": 4500 }, { "epoch": 0.8172902288412641, "eval_accuracy": 0.8162428219852338, "eval_f1": 0.8092636477145345, "eval_loss": 0.4987526535987854, "eval_precision": 0.8023527484407372, "eval_recall": 0.8208478993572457, "eval_runtime": 12.0751, "eval_samples_per_second": 100.951, "eval_steps_per_second": 6.377, "step": 4500 }, { "epoch": 0.8191064293498003, "grad_norm": 9.480962753295898, "learning_rate": 2.9522339266254995e-05, "loss": 0.3415, "step": 4510 }, { "epoch": 0.8209226298583363, "grad_norm": 21.77303123474121, "learning_rate": 2.9476934253541595e-05, "loss": 0.5777, "step": 4520 }, { "epoch": 0.8227388303668725, "grad_norm": 17.37676429748535, "learning_rate": 2.9431529240828188e-05, "loss": 0.5402, "step": 4530 }, { "epoch": 0.8245550308754086, "grad_norm": 8.430058479309082, "learning_rate": 2.9386124228114787e-05, "loss": 0.5605, "step": 4540 }, { "epoch": 0.8263712313839447, "grad_norm": 11.52684211730957, "learning_rate": 2.934071921540138e-05, "loss": 0.4402, "step": 4550 }, { "epoch": 0.8281874318924809, "grad_norm": 15.570836067199707, "learning_rate": 2.929531420268798e-05, "loss": 0.4715, "step": 4560 }, { "epoch": 0.830003632401017, "grad_norm": 17.31182289123535, "learning_rate": 2.9249909189974573e-05, "loss": 0.4377, "step": 4570 }, { "epoch": 0.8318198329095532, "grad_norm": 17.72749900817871, "learning_rate": 2.9204504177261172e-05, "loss": 0.574, "step": 4580 }, { "epoch": 0.8336360334180893, "grad_norm": 4.865232467651367, "learning_rate": 2.9159099164547765e-05, "loss": 0.4036, "step": 4590 }, { "epoch": 0.8354522339266255, "grad_norm": 13.070740699768066, "learning_rate": 2.9113694151834365e-05, "loss": 0.4395, "step": 4600 }, { "epoch": 0.8372684344351616, "grad_norm": 11.224090576171875, "learning_rate": 2.9068289139120958e-05, "loss": 0.5105, "step": 4610 }, { "epoch": 0.8390846349436978, "grad_norm": 10.014636039733887, "learning_rate": 2.9022884126407557e-05, "loss": 0.3832, "step": 4620 }, { "epoch": 0.8409008354522339, "grad_norm": 16.736953735351562, "learning_rate": 2.8977479113694157e-05, "loss": 0.5902, "step": 4630 }, { "epoch": 0.8427170359607701, "grad_norm": 19.752222061157227, "learning_rate": 2.893207410098075e-05, "loss": 0.4689, "step": 4640 }, { "epoch": 0.8445332364693062, "grad_norm": 21.56574058532715, "learning_rate": 2.888666908826735e-05, "loss": 0.424, "step": 4650 }, { "epoch": 0.8463494369778424, "grad_norm": 7.042162895202637, "learning_rate": 2.8841264075553942e-05, "loss": 0.4833, "step": 4660 }, { "epoch": 0.8481656374863785, "grad_norm": 28.993854522705078, "learning_rate": 2.8795859062840542e-05, "loss": 0.549, "step": 4670 }, { "epoch": 0.8499818379949147, "grad_norm": 10.317886352539062, "learning_rate": 2.8750454050127135e-05, "loss": 0.5293, "step": 4680 }, { "epoch": 0.8517980385034508, "grad_norm": 17.66384506225586, "learning_rate": 2.8705049037413734e-05, "loss": 0.4197, "step": 4690 }, { "epoch": 0.853614239011987, "grad_norm": 24.199743270874023, "learning_rate": 2.8659644024700327e-05, "loss": 0.4935, "step": 4700 }, { "epoch": 0.8554304395205231, "grad_norm": 6.78064489364624, "learning_rate": 2.8614239011986927e-05, "loss": 0.4021, "step": 4710 }, { "epoch": 0.8572466400290593, "grad_norm": 12.244144439697266, "learning_rate": 2.856883399927352e-05, "loss": 0.487, "step": 4720 }, { "epoch": 0.8590628405375953, "grad_norm": 13.118432998657227, "learning_rate": 2.852342898656012e-05, "loss": 0.4862, "step": 4730 }, { "epoch": 0.8608790410461314, "grad_norm": 12.08948040008545, "learning_rate": 2.8478023973846712e-05, "loss": 0.5039, "step": 4740 }, { "epoch": 0.8626952415546676, "grad_norm": 18.59223175048828, "learning_rate": 2.8432618961133312e-05, "loss": 0.3387, "step": 4750 }, { "epoch": 0.8645114420632037, "grad_norm": 16.462608337402344, "learning_rate": 2.8387213948419905e-05, "loss": 0.4173, "step": 4760 }, { "epoch": 0.8663276425717399, "grad_norm": 19.474824905395508, "learning_rate": 2.8341808935706504e-05, "loss": 0.4793, "step": 4770 }, { "epoch": 0.868143843080276, "grad_norm": 9.90221118927002, "learning_rate": 2.8296403922993097e-05, "loss": 0.4677, "step": 4780 }, { "epoch": 0.8699600435888122, "grad_norm": 18.47876739501953, "learning_rate": 2.8250998910279697e-05, "loss": 0.3815, "step": 4790 }, { "epoch": 0.8717762440973483, "grad_norm": 16.144685745239258, "learning_rate": 2.820559389756629e-05, "loss": 0.5303, "step": 4800 }, { "epoch": 0.8717762440973483, "eval_accuracy": 0.8351107465135357, "eval_f1": 0.8281745117415222, "eval_loss": 0.48171207308769226, "eval_precision": 0.8221184344103021, "eval_recall": 0.8358880640041798, "eval_runtime": 12.0571, "eval_samples_per_second": 101.102, "eval_steps_per_second": 6.386, "step": 4800 }, { "epoch": 0.8735924446058845, "grad_norm": 15.96438980102539, "learning_rate": 2.816018888485289e-05, "loss": 0.4498, "step": 4810 }, { "epoch": 0.8754086451144206, "grad_norm": 19.435787200927734, "learning_rate": 2.8114783872139482e-05, "loss": 0.3781, "step": 4820 }, { "epoch": 0.8772248456229568, "grad_norm": 17.200559616088867, "learning_rate": 2.8069378859426082e-05, "loss": 0.4631, "step": 4830 }, { "epoch": 0.8790410461314929, "grad_norm": 12.658839225769043, "learning_rate": 2.802397384671268e-05, "loss": 0.4265, "step": 4840 }, { "epoch": 0.8808572466400291, "grad_norm": 7.684325695037842, "learning_rate": 2.7978568833999274e-05, "loss": 0.4284, "step": 4850 }, { "epoch": 0.8826734471485652, "grad_norm": 12.922738075256348, "learning_rate": 2.7933163821285874e-05, "loss": 0.3229, "step": 4860 }, { "epoch": 0.8844896476571014, "grad_norm": 23.311817169189453, "learning_rate": 2.7887758808572467e-05, "loss": 0.4021, "step": 4870 }, { "epoch": 0.8863058481656375, "grad_norm": 14.780502319335938, "learning_rate": 2.7842353795859067e-05, "loss": 0.4722, "step": 4880 }, { "epoch": 0.8881220486741737, "grad_norm": 12.9520902633667, "learning_rate": 2.779694878314566e-05, "loss": 0.3892, "step": 4890 }, { "epoch": 0.8899382491827098, "grad_norm": 22.6149845123291, "learning_rate": 2.775154377043226e-05, "loss": 0.5177, "step": 4900 }, { "epoch": 0.891754449691246, "grad_norm": 19.335704803466797, "learning_rate": 2.7706138757718852e-05, "loss": 0.4524, "step": 4910 }, { "epoch": 0.8935706501997821, "grad_norm": 11.706412315368652, "learning_rate": 2.766073374500545e-05, "loss": 0.3219, "step": 4920 }, { "epoch": 0.8953868507083182, "grad_norm": 24.9698543548584, "learning_rate": 2.7615328732292044e-05, "loss": 0.4699, "step": 4930 }, { "epoch": 0.8972030512168543, "grad_norm": 22.411867141723633, "learning_rate": 2.7569923719578644e-05, "loss": 0.4928, "step": 4940 }, { "epoch": 0.8990192517253904, "grad_norm": 15.261788368225098, "learning_rate": 2.7524518706865237e-05, "loss": 0.4825, "step": 4950 }, { "epoch": 0.9008354522339266, "grad_norm": 3.8997724056243896, "learning_rate": 2.7479113694151837e-05, "loss": 0.3152, "step": 4960 }, { "epoch": 0.9026516527424627, "grad_norm": 17.017913818359375, "learning_rate": 2.743370868143843e-05, "loss": 0.5825, "step": 4970 }, { "epoch": 0.9044678532509989, "grad_norm": 12.277453422546387, "learning_rate": 2.738830366872503e-05, "loss": 0.4168, "step": 4980 }, { "epoch": 0.906284053759535, "grad_norm": 10.170480728149414, "learning_rate": 2.7342898656011622e-05, "loss": 0.3428, "step": 4990 }, { "epoch": 0.9081002542680712, "grad_norm": 17.166027069091797, "learning_rate": 2.729749364329822e-05, "loss": 0.3343, "step": 5000 }, { "epoch": 0.9099164547766073, "grad_norm": 13.139386177062988, "learning_rate": 2.7252088630584814e-05, "loss": 0.3653, "step": 5010 }, { "epoch": 0.9117326552851435, "grad_norm": 17.63907814025879, "learning_rate": 2.7206683617871414e-05, "loss": 0.4842, "step": 5020 }, { "epoch": 0.9135488557936796, "grad_norm": 11.699908256530762, "learning_rate": 2.7161278605158007e-05, "loss": 0.5177, "step": 5030 }, { "epoch": 0.9153650563022158, "grad_norm": 13.266210556030273, "learning_rate": 2.7115873592444607e-05, "loss": 0.4534, "step": 5040 }, { "epoch": 0.9171812568107519, "grad_norm": 9.60502815246582, "learning_rate": 2.7070468579731206e-05, "loss": 0.3299, "step": 5050 }, { "epoch": 0.9189974573192881, "grad_norm": 17.09486198425293, "learning_rate": 2.70250635670178e-05, "loss": 0.4957, "step": 5060 }, { "epoch": 0.9208136578278242, "grad_norm": 17.392698287963867, "learning_rate": 2.69796585543044e-05, "loss": 0.4678, "step": 5070 }, { "epoch": 0.9226298583363604, "grad_norm": 17.839717864990234, "learning_rate": 2.693425354159099e-05, "loss": 0.5732, "step": 5080 }, { "epoch": 0.9244460588448965, "grad_norm": 14.015562057495117, "learning_rate": 2.688884852887759e-05, "loss": 0.3867, "step": 5090 }, { "epoch": 0.9262622593534326, "grad_norm": 13.306390762329102, "learning_rate": 2.6843443516164184e-05, "loss": 0.4422, "step": 5100 }, { "epoch": 0.9262622593534326, "eval_accuracy": 0.8236259228876128, "eval_f1": 0.8174273731975736, "eval_loss": 0.5039647221565247, "eval_precision": 0.8178974051720215, "eval_recall": 0.8269853120695915, "eval_runtime": 12.0903, "eval_samples_per_second": 100.824, "eval_steps_per_second": 6.369, "step": 5100 }, { "epoch": 0.9280784598619688, "grad_norm": 15.518729209899902, "learning_rate": 2.6798038503450784e-05, "loss": 0.5069, "step": 5110 }, { "epoch": 0.9298946603705049, "grad_norm": 20.26007843017578, "learning_rate": 2.6752633490737377e-05, "loss": 0.5047, "step": 5120 }, { "epoch": 0.9317108608790411, "grad_norm": 10.854071617126465, "learning_rate": 2.6707228478023976e-05, "loss": 0.5326, "step": 5130 }, { "epoch": 0.9335270613875772, "grad_norm": 12.248214721679688, "learning_rate": 2.666182346531057e-05, "loss": 0.4037, "step": 5140 }, { "epoch": 0.9353432618961133, "grad_norm": 9.578265190124512, "learning_rate": 2.661641845259717e-05, "loss": 0.3088, "step": 5150 }, { "epoch": 0.9371594624046494, "grad_norm": 19.308855056762695, "learning_rate": 2.657101343988376e-05, "loss": 0.391, "step": 5160 }, { "epoch": 0.9389756629131856, "grad_norm": 16.941064834594727, "learning_rate": 2.652560842717036e-05, "loss": 0.3419, "step": 5170 }, { "epoch": 0.9407918634217217, "grad_norm": 21.8260440826416, "learning_rate": 2.6480203414456954e-05, "loss": 0.671, "step": 5180 }, { "epoch": 0.9426080639302579, "grad_norm": 22.408531188964844, "learning_rate": 2.6434798401743554e-05, "loss": 0.4909, "step": 5190 }, { "epoch": 0.944424264438794, "grad_norm": 21.8310546875, "learning_rate": 2.6389393389030147e-05, "loss": 0.4541, "step": 5200 }, { "epoch": 0.9462404649473302, "grad_norm": 21.634668350219727, "learning_rate": 2.6343988376316746e-05, "loss": 0.3987, "step": 5210 }, { "epoch": 0.9480566654558663, "grad_norm": 16.159862518310547, "learning_rate": 2.6298583363603342e-05, "loss": 0.5047, "step": 5220 }, { "epoch": 0.9498728659644025, "grad_norm": 10.970589637756348, "learning_rate": 2.625317835088994e-05, "loss": 0.4124, "step": 5230 }, { "epoch": 0.9516890664729386, "grad_norm": 19.680744171142578, "learning_rate": 2.6207773338176535e-05, "loss": 0.5401, "step": 5240 }, { "epoch": 0.9535052669814748, "grad_norm": 10.408095359802246, "learning_rate": 2.616236832546313e-05, "loss": 0.4913, "step": 5250 }, { "epoch": 0.9553214674900109, "grad_norm": 8.389443397521973, "learning_rate": 2.611696331274973e-05, "loss": 0.3567, "step": 5260 }, { "epoch": 0.957137667998547, "grad_norm": 11.20021915435791, "learning_rate": 2.6071558300036324e-05, "loss": 0.3231, "step": 5270 }, { "epoch": 0.9589538685070832, "grad_norm": 20.043296813964844, "learning_rate": 2.6026153287322923e-05, "loss": 0.6047, "step": 5280 }, { "epoch": 0.9607700690156193, "grad_norm": 20.225339889526367, "learning_rate": 2.5980748274609516e-05, "loss": 0.442, "step": 5290 }, { "epoch": 0.9625862695241555, "grad_norm": 14.611661911010742, "learning_rate": 2.5935343261896116e-05, "loss": 0.4598, "step": 5300 }, { "epoch": 0.9644024700326916, "grad_norm": 10.53466510772705, "learning_rate": 2.588993824918271e-05, "loss": 0.3438, "step": 5310 }, { "epoch": 0.9662186705412278, "grad_norm": 23.299837112426758, "learning_rate": 2.5844533236469308e-05, "loss": 0.3867, "step": 5320 }, { "epoch": 0.9680348710497639, "grad_norm": 13.314847946166992, "learning_rate": 2.5799128223755905e-05, "loss": 0.5519, "step": 5330 }, { "epoch": 0.9698510715583001, "grad_norm": 10.600733757019043, "learning_rate": 2.57537232110425e-05, "loss": 0.4463, "step": 5340 }, { "epoch": 0.9716672720668362, "grad_norm": 23.0856990814209, "learning_rate": 2.5708318198329097e-05, "loss": 0.4518, "step": 5350 }, { "epoch": 0.9734834725753723, "grad_norm": 16.995450973510742, "learning_rate": 2.5662913185615693e-05, "loss": 0.4835, "step": 5360 }, { "epoch": 0.9752996730839084, "grad_norm": 13.805352210998535, "learning_rate": 2.561750817290229e-05, "loss": 0.4145, "step": 5370 }, { "epoch": 0.9771158735924446, "grad_norm": 7.604394435882568, "learning_rate": 2.5572103160188886e-05, "loss": 0.3946, "step": 5380 }, { "epoch": 0.9789320741009807, "grad_norm": 12.790209770202637, "learning_rate": 2.5526698147475482e-05, "loss": 0.4734, "step": 5390 }, { "epoch": 0.9807482746095169, "grad_norm": 13.206761360168457, "learning_rate": 2.5481293134762078e-05, "loss": 0.4081, "step": 5400 }, { "epoch": 0.9807482746095169, "eval_accuracy": 0.8244462674323215, "eval_f1": 0.8170131934114577, "eval_loss": 0.4968956708908081, "eval_precision": 0.8102785727429149, "eval_recall": 0.8306898382408164, "eval_runtime": 12.0822, "eval_samples_per_second": 100.892, "eval_steps_per_second": 6.373, "step": 5400 }, { "epoch": 0.982564475118053, "grad_norm": 11.270719528198242, "learning_rate": 2.5435888122048675e-05, "loss": 0.4856, "step": 5410 }, { "epoch": 0.9843806756265892, "grad_norm": 8.846707344055176, "learning_rate": 2.539048310933527e-05, "loss": 0.4354, "step": 5420 }, { "epoch": 0.9861968761351253, "grad_norm": 22.749967575073242, "learning_rate": 2.5345078096621867e-05, "loss": 0.479, "step": 5430 }, { "epoch": 0.9880130766436614, "grad_norm": 16.036691665649414, "learning_rate": 2.5299673083908467e-05, "loss": 0.4042, "step": 5440 }, { "epoch": 0.9898292771521976, "grad_norm": 13.034772872924805, "learning_rate": 2.525426807119506e-05, "loss": 0.4203, "step": 5450 }, { "epoch": 0.9916454776607337, "grad_norm": 16.55560302734375, "learning_rate": 2.520886305848166e-05, "loss": 0.325, "step": 5460 }, { "epoch": 0.9934616781692699, "grad_norm": 17.726268768310547, "learning_rate": 2.5163458045768255e-05, "loss": 0.5221, "step": 5470 }, { "epoch": 0.995277878677806, "grad_norm": 16.85834503173828, "learning_rate": 2.511805303305485e-05, "loss": 0.4584, "step": 5480 }, { "epoch": 0.9970940791863422, "grad_norm": 20.59090232849121, "learning_rate": 2.5072648020341448e-05, "loss": 0.687, "step": 5490 }, { "epoch": 0.9989102796948783, "grad_norm": 11.305227279663086, "learning_rate": 2.5027243007628044e-05, "loss": 0.3293, "step": 5500 }, { "epoch": 1.0007264802034144, "grad_norm": 5.661748886108398, "learning_rate": 2.498183799491464e-05, "loss": 0.3034, "step": 5510 }, { "epoch": 1.0025426807119506, "grad_norm": 14.591137886047363, "learning_rate": 2.4936432982201237e-05, "loss": 0.2698, "step": 5520 }, { "epoch": 1.0043588812204867, "grad_norm": 18.574607849121094, "learning_rate": 2.4891027969487833e-05, "loss": 0.3438, "step": 5530 }, { "epoch": 1.006175081729023, "grad_norm": 12.30125904083252, "learning_rate": 2.484562295677443e-05, "loss": 0.3503, "step": 5540 }, { "epoch": 1.007991282237559, "grad_norm": 25.125314712524414, "learning_rate": 2.4800217944061025e-05, "loss": 0.3518, "step": 5550 }, { "epoch": 1.0098074827460952, "grad_norm": 10.978049278259277, "learning_rate": 2.475481293134762e-05, "loss": 0.3292, "step": 5560 }, { "epoch": 1.0116236832546313, "grad_norm": 9.796133995056152, "learning_rate": 2.470940791863422e-05, "loss": 0.2946, "step": 5570 }, { "epoch": 1.0134398837631675, "grad_norm": 28.557024002075195, "learning_rate": 2.4664002905920818e-05, "loss": 0.4914, "step": 5580 }, { "epoch": 1.0152560842717036, "grad_norm": 15.893627166748047, "learning_rate": 2.4618597893207414e-05, "loss": 0.3519, "step": 5590 }, { "epoch": 1.0170722847802398, "grad_norm": 15.343416213989258, "learning_rate": 2.457319288049401e-05, "loss": 0.4028, "step": 5600 }, { "epoch": 1.0188884852887758, "grad_norm": 9.576445579528809, "learning_rate": 2.4527787867780606e-05, "loss": 0.3718, "step": 5610 }, { "epoch": 1.020704685797312, "grad_norm": 7.0438232421875, "learning_rate": 2.4482382855067202e-05, "loss": 0.191, "step": 5620 }, { "epoch": 1.0225208863058481, "grad_norm": 7.872592926025391, "learning_rate": 2.44369778423538e-05, "loss": 0.2752, "step": 5630 }, { "epoch": 1.0243370868143844, "grad_norm": 19.647167205810547, "learning_rate": 2.4391572829640395e-05, "loss": 0.3226, "step": 5640 }, { "epoch": 1.0261532873229204, "grad_norm": 7.968392848968506, "learning_rate": 2.434616781692699e-05, "loss": 0.2653, "step": 5650 }, { "epoch": 1.0279694878314567, "grad_norm": 17.07377052307129, "learning_rate": 2.4300762804213587e-05, "loss": 0.3296, "step": 5660 }, { "epoch": 1.0297856883399927, "grad_norm": 18.10598373413086, "learning_rate": 2.4255357791500184e-05, "loss": 0.2421, "step": 5670 }, { "epoch": 1.0316018888485288, "grad_norm": 10.867464065551758, "learning_rate": 2.420995277878678e-05, "loss": 0.301, "step": 5680 }, { "epoch": 1.033418089357065, "grad_norm": 15.795087814331055, "learning_rate": 2.4164547766073376e-05, "loss": 0.2956, "step": 5690 }, { "epoch": 1.035234289865601, "grad_norm": 19.456615447998047, "learning_rate": 2.4119142753359972e-05, "loss": 0.2555, "step": 5700 }, { "epoch": 1.035234289865601, "eval_accuracy": 0.8285479901558654, "eval_f1": 0.8213955767333186, "eval_loss": 0.5004270076751709, "eval_precision": 0.8160496038609667, "eval_recall": 0.8316668454229845, "eval_runtime": 12.1355, "eval_samples_per_second": 100.449, "eval_steps_per_second": 6.345, "step": 5700 }, { "epoch": 1.0370504903741373, "grad_norm": 10.63526725769043, "learning_rate": 2.407373774064657e-05, "loss": 0.3511, "step": 5710 }, { "epoch": 1.0388666908826734, "grad_norm": 5.104264259338379, "learning_rate": 2.4028332727933165e-05, "loss": 0.2357, "step": 5720 }, { "epoch": 1.0406828913912096, "grad_norm": 16.865144729614258, "learning_rate": 2.398292771521976e-05, "loss": 0.4097, "step": 5730 }, { "epoch": 1.0424990918997457, "grad_norm": 22.08740997314453, "learning_rate": 2.3937522702506357e-05, "loss": 0.3789, "step": 5740 }, { "epoch": 1.044315292408282, "grad_norm": 28.62466812133789, "learning_rate": 2.3892117689792954e-05, "loss": 0.3074, "step": 5750 }, { "epoch": 1.046131492916818, "grad_norm": 27.597490310668945, "learning_rate": 2.384671267707955e-05, "loss": 0.2929, "step": 5760 }, { "epoch": 1.0479476934253542, "grad_norm": 20.991607666015625, "learning_rate": 2.3801307664366146e-05, "loss": 0.4398, "step": 5770 }, { "epoch": 1.0497638939338902, "grad_norm": 20.590211868286133, "learning_rate": 2.3755902651652746e-05, "loss": 0.2788, "step": 5780 }, { "epoch": 1.0515800944424265, "grad_norm": 11.073917388916016, "learning_rate": 2.3710497638939342e-05, "loss": 0.1954, "step": 5790 }, { "epoch": 1.0533962949509625, "grad_norm": 14.334949493408203, "learning_rate": 2.366509262622594e-05, "loss": 0.3096, "step": 5800 }, { "epoch": 1.0552124954594988, "grad_norm": 6.725550651550293, "learning_rate": 2.3619687613512535e-05, "loss": 0.4892, "step": 5810 }, { "epoch": 1.0570286959680348, "grad_norm": 16.188831329345703, "learning_rate": 2.357428260079913e-05, "loss": 0.3451, "step": 5820 }, { "epoch": 1.058844896476571, "grad_norm": 10.715399742126465, "learning_rate": 2.3528877588085727e-05, "loss": 0.4155, "step": 5830 }, { "epoch": 1.0606610969851071, "grad_norm": 18.30307388305664, "learning_rate": 2.3483472575372323e-05, "loss": 0.3095, "step": 5840 }, { "epoch": 1.0624772974936434, "grad_norm": 9.688103675842285, "learning_rate": 2.343806756265892e-05, "loss": 0.3121, "step": 5850 }, { "epoch": 1.0642934980021794, "grad_norm": 22.700828552246094, "learning_rate": 2.3392662549945516e-05, "loss": 0.2068, "step": 5860 }, { "epoch": 1.0661096985107157, "grad_norm": 11.1968994140625, "learning_rate": 2.3347257537232112e-05, "loss": 0.2767, "step": 5870 }, { "epoch": 1.0679258990192517, "grad_norm": 25.934579849243164, "learning_rate": 2.330185252451871e-05, "loss": 0.3119, "step": 5880 }, { "epoch": 1.069742099527788, "grad_norm": 7.8139824867248535, "learning_rate": 2.3256447511805305e-05, "loss": 0.2135, "step": 5890 }, { "epoch": 1.071558300036324, "grad_norm": 18.086198806762695, "learning_rate": 2.32110424990919e-05, "loss": 0.4011, "step": 5900 }, { "epoch": 1.07337450054486, "grad_norm": 22.85544776916504, "learning_rate": 2.3165637486378497e-05, "loss": 0.4042, "step": 5910 }, { "epoch": 1.0751907010533963, "grad_norm": 22.67595672607422, "learning_rate": 2.3120232473665093e-05, "loss": 0.289, "step": 5920 }, { "epoch": 1.0770069015619324, "grad_norm": 8.212250709533691, "learning_rate": 2.307482746095169e-05, "loss": 0.4169, "step": 5930 }, { "epoch": 1.0788231020704686, "grad_norm": 22.84626007080078, "learning_rate": 2.3029422448238286e-05, "loss": 0.3653, "step": 5940 }, { "epoch": 1.0806393025790046, "grad_norm": 13.91925048828125, "learning_rate": 2.2984017435524882e-05, "loss": 0.2087, "step": 5950 }, { "epoch": 1.082455503087541, "grad_norm": 22.284345626831055, "learning_rate": 2.293861242281148e-05, "loss": 0.3626, "step": 5960 }, { "epoch": 1.084271703596077, "grad_norm": 15.127870559692383, "learning_rate": 2.2893207410098075e-05, "loss": 0.2541, "step": 5970 }, { "epoch": 1.0860879041046132, "grad_norm": 6.7608113288879395, "learning_rate": 2.284780239738467e-05, "loss": 0.2726, "step": 5980 }, { "epoch": 1.0879041046131492, "grad_norm": 13.903280258178711, "learning_rate": 2.280239738467127e-05, "loss": 0.3793, "step": 5990 }, { "epoch": 1.0897203051216855, "grad_norm": 15.691337585449219, "learning_rate": 2.2756992371957867e-05, "loss": 0.2741, "step": 6000 }, { "epoch": 1.0897203051216855, "eval_accuracy": 0.8301886792452831, "eval_f1": 0.8241579951716257, "eval_loss": 0.46239912509918213, "eval_precision": 0.8201764967723821, "eval_recall": 0.8354772684817514, "eval_runtime": 12.131, "eval_samples_per_second": 100.486, "eval_steps_per_second": 6.347, "step": 6000 }, { "epoch": 1.0915365056302215, "grad_norm": 23.5091609954834, "learning_rate": 2.2711587359244463e-05, "loss": 0.3384, "step": 6010 }, { "epoch": 1.0933527061387578, "grad_norm": 16.730623245239258, "learning_rate": 2.266618234653106e-05, "loss": 0.4145, "step": 6020 }, { "epoch": 1.0951689066472938, "grad_norm": 12.925875663757324, "learning_rate": 2.2620777333817655e-05, "loss": 0.2396, "step": 6030 }, { "epoch": 1.09698510715583, "grad_norm": 1.2459120750427246, "learning_rate": 2.2575372321104252e-05, "loss": 0.2662, "step": 6040 }, { "epoch": 1.0988013076643661, "grad_norm": 11.28719711303711, "learning_rate": 2.2529967308390848e-05, "loss": 0.2615, "step": 6050 }, { "epoch": 1.1006175081729024, "grad_norm": 17.44615936279297, "learning_rate": 2.2484562295677444e-05, "loss": 0.3594, "step": 6060 }, { "epoch": 1.1024337086814384, "grad_norm": 12.629523277282715, "learning_rate": 2.243915728296404e-05, "loss": 0.3975, "step": 6070 }, { "epoch": 1.1042499091899747, "grad_norm": 16.39533805847168, "learning_rate": 2.2393752270250637e-05, "loss": 0.2663, "step": 6080 }, { "epoch": 1.1060661096985107, "grad_norm": 6.332333564758301, "learning_rate": 2.2348347257537233e-05, "loss": 0.221, "step": 6090 }, { "epoch": 1.1078823102070467, "grad_norm": 8.119811058044434, "learning_rate": 2.230294224482383e-05, "loss": 0.2462, "step": 6100 }, { "epoch": 1.109698510715583, "grad_norm": 13.648475646972656, "learning_rate": 2.2257537232110425e-05, "loss": 0.2634, "step": 6110 }, { "epoch": 1.111514711224119, "grad_norm": 2.348459005355835, "learning_rate": 2.2212132219397022e-05, "loss": 0.3967, "step": 6120 }, { "epoch": 1.1133309117326553, "grad_norm": 20.70992660522461, "learning_rate": 2.2166727206683618e-05, "loss": 0.3852, "step": 6130 }, { "epoch": 1.1151471122411913, "grad_norm": 9.073955535888672, "learning_rate": 2.2121322193970214e-05, "loss": 0.3379, "step": 6140 }, { "epoch": 1.1169633127497276, "grad_norm": 18.57855796813965, "learning_rate": 2.207591718125681e-05, "loss": 0.3118, "step": 6150 }, { "epoch": 1.1187795132582636, "grad_norm": 13.104948043823242, "learning_rate": 2.2030512168543407e-05, "loss": 0.3165, "step": 6160 }, { "epoch": 1.1205957137668, "grad_norm": 17.0866756439209, "learning_rate": 2.1985107155830003e-05, "loss": 0.3737, "step": 6170 }, { "epoch": 1.122411914275336, "grad_norm": 10.766948699951172, "learning_rate": 2.19397021431166e-05, "loss": 0.2323, "step": 6180 }, { "epoch": 1.1242281147838722, "grad_norm": 14.713004112243652, "learning_rate": 2.1894297130403195e-05, "loss": 0.2995, "step": 6190 }, { "epoch": 1.1260443152924082, "grad_norm": 15.085183143615723, "learning_rate": 2.1848892117689795e-05, "loss": 0.3476, "step": 6200 }, { "epoch": 1.1278605158009445, "grad_norm": 12.6240816116333, "learning_rate": 2.180348710497639e-05, "loss": 0.2693, "step": 6210 }, { "epoch": 1.1296767163094805, "grad_norm": 16.312969207763672, "learning_rate": 2.1758082092262988e-05, "loss": 0.4585, "step": 6220 }, { "epoch": 1.1314929168180168, "grad_norm": 4.667062282562256, "learning_rate": 2.1712677079549584e-05, "loss": 0.1519, "step": 6230 }, { "epoch": 1.1333091173265528, "grad_norm": 10.980841636657715, "learning_rate": 2.166727206683618e-05, "loss": 0.3728, "step": 6240 }, { "epoch": 1.135125317835089, "grad_norm": 12.60606575012207, "learning_rate": 2.1621867054122776e-05, "loss": 0.2508, "step": 6250 }, { "epoch": 1.1369415183436251, "grad_norm": 24.3731689453125, "learning_rate": 2.1576462041409373e-05, "loss": 0.3333, "step": 6260 }, { "epoch": 1.1387577188521614, "grad_norm": 14.52236557006836, "learning_rate": 2.153105702869597e-05, "loss": 0.3393, "step": 6270 }, { "epoch": 1.1405739193606974, "grad_norm": 18.642431259155273, "learning_rate": 2.1485652015982565e-05, "loss": 0.2943, "step": 6280 }, { "epoch": 1.1423901198692334, "grad_norm": 17.546001434326172, "learning_rate": 2.144024700326916e-05, "loss": 0.4127, "step": 6290 }, { "epoch": 1.1442063203777697, "grad_norm": 17.17730712890625, "learning_rate": 2.1394841990555758e-05, "loss": 0.4123, "step": 6300 }, { "epoch": 1.1442063203777697, "eval_accuracy": 0.8408531583264971, "eval_f1": 0.8356285591942396, "eval_loss": 0.4855496287345886, "eval_precision": 0.8294180649644707, "eval_recall": 0.8508328584053949, "eval_runtime": 12.1359, "eval_samples_per_second": 100.445, "eval_steps_per_second": 6.345, "step": 6300 }, { "epoch": 1.146022520886306, "grad_norm": 12.82174301147461, "learning_rate": 2.1349436977842354e-05, "loss": 0.222, "step": 6310 }, { "epoch": 1.147838721394842, "grad_norm": 13.084376335144043, "learning_rate": 2.130403196512895e-05, "loss": 0.2926, "step": 6320 }, { "epoch": 1.149654921903378, "grad_norm": 15.497282028198242, "learning_rate": 2.1258626952415546e-05, "loss": 0.3175, "step": 6330 }, { "epoch": 1.1514711224119143, "grad_norm": 13.51550579071045, "learning_rate": 2.1213221939702143e-05, "loss": 0.3562, "step": 6340 }, { "epoch": 1.1532873229204503, "grad_norm": 23.226682662963867, "learning_rate": 2.116781692698874e-05, "loss": 0.3547, "step": 6350 }, { "epoch": 1.1551035234289866, "grad_norm": 19.47138023376465, "learning_rate": 2.1122411914275335e-05, "loss": 0.2397, "step": 6360 }, { "epoch": 1.1569197239375226, "grad_norm": 20.718000411987305, "learning_rate": 2.107700690156193e-05, "loss": 0.2287, "step": 6370 }, { "epoch": 1.1587359244460589, "grad_norm": 15.798551559448242, "learning_rate": 2.1031601888848528e-05, "loss": 0.2555, "step": 6380 }, { "epoch": 1.160552124954595, "grad_norm": 17.811277389526367, "learning_rate": 2.0986196876135124e-05, "loss": 0.4506, "step": 6390 }, { "epoch": 1.1623683254631312, "grad_norm": 11.916951179504395, "learning_rate": 2.094079186342172e-05, "loss": 0.4996, "step": 6400 }, { "epoch": 1.1641845259716672, "grad_norm": 25.842151641845703, "learning_rate": 2.089538685070832e-05, "loss": 0.3922, "step": 6410 }, { "epoch": 1.1660007264802035, "grad_norm": 12.472575187683105, "learning_rate": 2.0849981837994916e-05, "loss": 0.2717, "step": 6420 }, { "epoch": 1.1678169269887395, "grad_norm": 16.09991455078125, "learning_rate": 2.0804576825281512e-05, "loss": 0.265, "step": 6430 }, { "epoch": 1.1696331274972758, "grad_norm": 13.561772346496582, "learning_rate": 2.075917181256811e-05, "loss": 0.3963, "step": 6440 }, { "epoch": 1.1714493280058118, "grad_norm": 8.552865982055664, "learning_rate": 2.0713766799854705e-05, "loss": 0.2881, "step": 6450 }, { "epoch": 1.173265528514348, "grad_norm": 16.097904205322266, "learning_rate": 2.06683617871413e-05, "loss": 0.3422, "step": 6460 }, { "epoch": 1.175081729022884, "grad_norm": 14.92163372039795, "learning_rate": 2.0622956774427897e-05, "loss": 0.3918, "step": 6470 }, { "epoch": 1.1768979295314204, "grad_norm": 23.94252586364746, "learning_rate": 2.0577551761714493e-05, "loss": 0.2916, "step": 6480 }, { "epoch": 1.1787141300399564, "grad_norm": 32.18648147583008, "learning_rate": 2.053214674900109e-05, "loss": 0.4981, "step": 6490 }, { "epoch": 1.1805303305484927, "grad_norm": 25.992433547973633, "learning_rate": 2.0486741736287686e-05, "loss": 0.4043, "step": 6500 }, { "epoch": 1.1823465310570287, "grad_norm": 16.37993621826172, "learning_rate": 2.0441336723574282e-05, "loss": 0.2839, "step": 6510 }, { "epoch": 1.1841627315655647, "grad_norm": 11.15428638458252, "learning_rate": 2.039593171086088e-05, "loss": 0.2675, "step": 6520 }, { "epoch": 1.185978932074101, "grad_norm": 4.630198001861572, "learning_rate": 2.0350526698147475e-05, "loss": 0.3671, "step": 6530 }, { "epoch": 1.1877951325826372, "grad_norm": 7.3554606437683105, "learning_rate": 2.030512168543407e-05, "loss": 0.335, "step": 6540 }, { "epoch": 1.1896113330911733, "grad_norm": 15.69100284576416, "learning_rate": 2.025971667272067e-05, "loss": 0.3132, "step": 6550 }, { "epoch": 1.1914275335997093, "grad_norm": 9.235587120056152, "learning_rate": 2.0214311660007267e-05, "loss": 0.3725, "step": 6560 }, { "epoch": 1.1932437341082456, "grad_norm": 17.381521224975586, "learning_rate": 2.0168906647293863e-05, "loss": 0.4108, "step": 6570 }, { "epoch": 1.1950599346167816, "grad_norm": 6.358768939971924, "learning_rate": 2.012350163458046e-05, "loss": 0.437, "step": 6580 }, { "epoch": 1.1968761351253179, "grad_norm": 25.116188049316406, "learning_rate": 2.0078096621867056e-05, "loss": 0.3024, "step": 6590 }, { "epoch": 1.198692335633854, "grad_norm": 17.047897338867188, "learning_rate": 2.0032691609153652e-05, "loss": 0.4109, "step": 6600 }, { "epoch": 1.198692335633854, "eval_accuracy": 0.8457752255947498, "eval_f1": 0.8384878566461094, "eval_loss": 0.43283388018608093, "eval_precision": 0.8390928465459404, "eval_recall": 0.8434791709994856, "eval_runtime": 12.1336, "eval_samples_per_second": 100.465, "eval_steps_per_second": 6.346, "step": 6600 }, { "epoch": 1.2005085361423902, "grad_norm": 10.83834171295166, "learning_rate": 1.9987286596440248e-05, "loss": 0.2987, "step": 6610 }, { "epoch": 1.2023247366509262, "grad_norm": 11.946487426757812, "learning_rate": 1.9941881583726844e-05, "loss": 0.3454, "step": 6620 }, { "epoch": 1.2041409371594625, "grad_norm": 11.06641674041748, "learning_rate": 1.989647657101344e-05, "loss": 0.3263, "step": 6630 }, { "epoch": 1.2059571376679985, "grad_norm": 10.330728530883789, "learning_rate": 1.9851071558300037e-05, "loss": 0.3618, "step": 6640 }, { "epoch": 1.2077733381765348, "grad_norm": 26.483482360839844, "learning_rate": 1.9805666545586633e-05, "loss": 0.3452, "step": 6650 }, { "epoch": 1.2095895386850708, "grad_norm": 12.150251388549805, "learning_rate": 1.9760261532873233e-05, "loss": 0.3156, "step": 6660 }, { "epoch": 1.211405739193607, "grad_norm": 12.710071563720703, "learning_rate": 1.971485652015983e-05, "loss": 0.2054, "step": 6670 }, { "epoch": 1.213221939702143, "grad_norm": 10.786319732666016, "learning_rate": 1.9669451507446425e-05, "loss": 0.3378, "step": 6680 }, { "epoch": 1.2150381402106794, "grad_norm": 17.936023712158203, "learning_rate": 1.962404649473302e-05, "loss": 0.2841, "step": 6690 }, { "epoch": 1.2168543407192154, "grad_norm": 13.268390655517578, "learning_rate": 1.9578641482019618e-05, "loss": 0.17, "step": 6700 }, { "epoch": 1.2186705412277514, "grad_norm": 6.254006385803223, "learning_rate": 1.9533236469306214e-05, "loss": 0.3391, "step": 6710 }, { "epoch": 1.2204867417362877, "grad_norm": 19.141435623168945, "learning_rate": 1.948783145659281e-05, "loss": 0.281, "step": 6720 }, { "epoch": 1.222302942244824, "grad_norm": 20.09503936767578, "learning_rate": 1.9442426443879406e-05, "loss": 0.3752, "step": 6730 }, { "epoch": 1.22411914275336, "grad_norm": 13.188698768615723, "learning_rate": 1.9397021431166003e-05, "loss": 0.2663, "step": 6740 }, { "epoch": 1.225935343261896, "grad_norm": 16.47735595703125, "learning_rate": 1.93516164184526e-05, "loss": 0.2395, "step": 6750 }, { "epoch": 1.2277515437704323, "grad_norm": 14.550719261169434, "learning_rate": 1.9306211405739195e-05, "loss": 0.4064, "step": 6760 }, { "epoch": 1.2295677442789683, "grad_norm": 24.679447174072266, "learning_rate": 1.926080639302579e-05, "loss": 0.3489, "step": 6770 }, { "epoch": 1.2313839447875046, "grad_norm": 18.579641342163086, "learning_rate": 1.9215401380312388e-05, "loss": 0.3471, "step": 6780 }, { "epoch": 1.2332001452960406, "grad_norm": 9.306077003479004, "learning_rate": 1.9169996367598984e-05, "loss": 0.3261, "step": 6790 }, { "epoch": 1.2350163458045769, "grad_norm": 14.456847190856934, "learning_rate": 1.912459135488558e-05, "loss": 0.2736, "step": 6800 }, { "epoch": 1.236832546313113, "grad_norm": 13.200130462646484, "learning_rate": 1.9079186342172176e-05, "loss": 0.3333, "step": 6810 }, { "epoch": 1.2386487468216492, "grad_norm": 20.47243309020996, "learning_rate": 1.9033781329458773e-05, "loss": 0.2882, "step": 6820 }, { "epoch": 1.2404649473301852, "grad_norm": 4.953060150146484, "learning_rate": 1.8988376316745372e-05, "loss": 0.2492, "step": 6830 }, { "epoch": 1.2422811478387215, "grad_norm": 18.391193389892578, "learning_rate": 1.894297130403197e-05, "loss": 0.3334, "step": 6840 }, { "epoch": 1.2440973483472575, "grad_norm": 15.669410705566406, "learning_rate": 1.8897566291318565e-05, "loss": 0.2825, "step": 6850 }, { "epoch": 1.2459135488557938, "grad_norm": 7.906172752380371, "learning_rate": 1.885216127860516e-05, "loss": 0.3103, "step": 6860 }, { "epoch": 1.2477297493643298, "grad_norm": 28.12116813659668, "learning_rate": 1.8806756265891757e-05, "loss": 0.513, "step": 6870 }, { "epoch": 1.249545949872866, "grad_norm": 15.086318969726562, "learning_rate": 1.8761351253178354e-05, "loss": 0.2614, "step": 6880 }, { "epoch": 1.251362150381402, "grad_norm": 21.694408416748047, "learning_rate": 1.871594624046495e-05, "loss": 0.3005, "step": 6890 }, { "epoch": 1.2531783508899381, "grad_norm": 19.437421798706055, "learning_rate": 1.8670541227751546e-05, "loss": 0.3489, "step": 6900 }, { "epoch": 1.2531783508899381, "eval_accuracy": 0.8613617719442166, "eval_f1": 0.8577346281731705, "eval_loss": 0.41974562406539917, "eval_precision": 0.8551502281540364, "eval_recall": 0.8622491860794987, "eval_runtime": 12.1508, "eval_samples_per_second": 100.323, "eval_steps_per_second": 6.337, "step": 6900 }, { "epoch": 1.2549945513984744, "grad_norm": 10.58027172088623, "learning_rate": 1.8625136215038142e-05, "loss": 0.4004, "step": 6910 }, { "epoch": 1.2568107519070106, "grad_norm": 18.426048278808594, "learning_rate": 1.857973120232474e-05, "loss": 0.3934, "step": 6920 }, { "epoch": 1.2586269524155467, "grad_norm": 14.427044868469238, "learning_rate": 1.8534326189611335e-05, "loss": 0.3299, "step": 6930 }, { "epoch": 1.2604431529240827, "grad_norm": 15.217583656311035, "learning_rate": 1.848892117689793e-05, "loss": 0.2325, "step": 6940 }, { "epoch": 1.262259353432619, "grad_norm": 7.493901252746582, "learning_rate": 1.8443516164184527e-05, "loss": 0.1888, "step": 6950 }, { "epoch": 1.2640755539411552, "grad_norm": 9.405710220336914, "learning_rate": 1.8398111151471124e-05, "loss": 0.3034, "step": 6960 }, { "epoch": 1.2658917544496913, "grad_norm": 22.79953384399414, "learning_rate": 1.835270613875772e-05, "loss": 0.4091, "step": 6970 }, { "epoch": 1.2677079549582273, "grad_norm": 11.145368576049805, "learning_rate": 1.8307301126044316e-05, "loss": 0.3613, "step": 6980 }, { "epoch": 1.2695241554667636, "grad_norm": 12.6839599609375, "learning_rate": 1.8261896113330912e-05, "loss": 0.2778, "step": 6990 }, { "epoch": 1.2713403559752996, "grad_norm": 26.889331817626953, "learning_rate": 1.821649110061751e-05, "loss": 0.5038, "step": 7000 }, { "epoch": 1.2731565564838359, "grad_norm": 24.15523338317871, "learning_rate": 1.8171086087904105e-05, "loss": 0.2681, "step": 7010 }, { "epoch": 1.274972756992372, "grad_norm": 24.74588394165039, "learning_rate": 1.81256810751907e-05, "loss": 0.3978, "step": 7020 }, { "epoch": 1.2767889575009082, "grad_norm": 14.386746406555176, "learning_rate": 1.8080276062477297e-05, "loss": 0.1776, "step": 7030 }, { "epoch": 1.2786051580094442, "grad_norm": 14.167470932006836, "learning_rate": 1.8034871049763894e-05, "loss": 0.4071, "step": 7040 }, { "epoch": 1.2804213585179804, "grad_norm": 17.294830322265625, "learning_rate": 1.7989466037050493e-05, "loss": 0.3855, "step": 7050 }, { "epoch": 1.2822375590265165, "grad_norm": 7.660125255584717, "learning_rate": 1.794406102433709e-05, "loss": 0.3288, "step": 7060 }, { "epoch": 1.2840537595350527, "grad_norm": 9.39700698852539, "learning_rate": 1.7898656011623686e-05, "loss": 0.216, "step": 7070 }, { "epoch": 1.2858699600435888, "grad_norm": 14.8308744430542, "learning_rate": 1.7853250998910282e-05, "loss": 0.293, "step": 7080 }, { "epoch": 1.287686160552125, "grad_norm": 7.683089733123779, "learning_rate": 1.7807845986196878e-05, "loss": 0.2502, "step": 7090 }, { "epoch": 1.289502361060661, "grad_norm": 8.663617134094238, "learning_rate": 1.7762440973483474e-05, "loss": 0.2726, "step": 7100 }, { "epoch": 1.2913185615691973, "grad_norm": 27.20614242553711, "learning_rate": 1.771703596077007e-05, "loss": 0.3407, "step": 7110 }, { "epoch": 1.2931347620777334, "grad_norm": 20.363462448120117, "learning_rate": 1.7671630948056667e-05, "loss": 0.4059, "step": 7120 }, { "epoch": 1.2949509625862694, "grad_norm": 11.423839569091797, "learning_rate": 1.7626225935343263e-05, "loss": 0.3017, "step": 7130 }, { "epoch": 1.2967671630948057, "grad_norm": 21.78324317932129, "learning_rate": 1.758082092262986e-05, "loss": 0.2886, "step": 7140 }, { "epoch": 1.298583363603342, "grad_norm": 10.466778755187988, "learning_rate": 1.7535415909916456e-05, "loss": 0.4254, "step": 7150 }, { "epoch": 1.300399564111878, "grad_norm": 27.826078414916992, "learning_rate": 1.7490010897203052e-05, "loss": 0.3508, "step": 7160 }, { "epoch": 1.302215764620414, "grad_norm": 30.015012741088867, "learning_rate": 1.7444605884489648e-05, "loss": 0.339, "step": 7170 }, { "epoch": 1.3040319651289503, "grad_norm": 5.534029960632324, "learning_rate": 1.7399200871776244e-05, "loss": 0.2054, "step": 7180 }, { "epoch": 1.3058481656374865, "grad_norm": 17.095060348510742, "learning_rate": 1.735379585906284e-05, "loss": 0.418, "step": 7190 }, { "epoch": 1.3076643661460225, "grad_norm": 13.304654121398926, "learning_rate": 1.7308390846349437e-05, "loss": 0.4048, "step": 7200 }, { "epoch": 1.3076643661460225, "eval_accuracy": 0.8679245283018868, "eval_f1": 0.8611075418966936, "eval_loss": 0.39569488167762756, "eval_precision": 0.8537013974843974, "eval_recall": 0.8742274746130478, "eval_runtime": 12.1513, "eval_samples_per_second": 100.319, "eval_steps_per_second": 6.337, "step": 7200 }, { "epoch": 1.3094805666545586, "grad_norm": 11.452223777770996, "learning_rate": 1.7262985833636033e-05, "loss": 0.4095, "step": 7210 }, { "epoch": 1.3112967671630948, "grad_norm": 7.238298416137695, "learning_rate": 1.721758082092263e-05, "loss": 0.2381, "step": 7220 }, { "epoch": 1.3131129676716309, "grad_norm": 25.489473342895508, "learning_rate": 1.7172175808209226e-05, "loss": 0.3537, "step": 7230 }, { "epoch": 1.3149291681801671, "grad_norm": 8.602483749389648, "learning_rate": 1.7126770795495822e-05, "loss": 0.2295, "step": 7240 }, { "epoch": 1.3167453686887032, "grad_norm": 16.307979583740234, "learning_rate": 1.7081365782782418e-05, "loss": 0.4157, "step": 7250 }, { "epoch": 1.3185615691972394, "grad_norm": 12.968489646911621, "learning_rate": 1.7035960770069018e-05, "loss": 0.2462, "step": 7260 }, { "epoch": 1.3203777697057755, "grad_norm": 473.5597839355469, "learning_rate": 1.6990555757355614e-05, "loss": 0.2313, "step": 7270 }, { "epoch": 1.3221939702143117, "grad_norm": 25.61052703857422, "learning_rate": 1.694515074464221e-05, "loss": 0.3654, "step": 7280 }, { "epoch": 1.3240101707228478, "grad_norm": 8.36911678314209, "learning_rate": 1.6899745731928806e-05, "loss": 0.4109, "step": 7290 }, { "epoch": 1.325826371231384, "grad_norm": 5.5236687660217285, "learning_rate": 1.6854340719215403e-05, "loss": 0.3426, "step": 7300 }, { "epoch": 1.32764257173992, "grad_norm": 12.219548225402832, "learning_rate": 1.6808935706502e-05, "loss": 0.2344, "step": 7310 }, { "epoch": 1.329458772248456, "grad_norm": 9.559709548950195, "learning_rate": 1.6763530693788595e-05, "loss": 0.351, "step": 7320 }, { "epoch": 1.3312749727569924, "grad_norm": 12.749125480651855, "learning_rate": 1.671812568107519e-05, "loss": 0.3179, "step": 7330 }, { "epoch": 1.3330911732655286, "grad_norm": 10.120976448059082, "learning_rate": 1.6672720668361788e-05, "loss": 0.2978, "step": 7340 }, { "epoch": 1.3349073737740647, "grad_norm": 20.587955474853516, "learning_rate": 1.6627315655648384e-05, "loss": 0.3815, "step": 7350 }, { "epoch": 1.3367235742826007, "grad_norm": 15.96291446685791, "learning_rate": 1.658191064293498e-05, "loss": 0.3063, "step": 7360 }, { "epoch": 1.338539774791137, "grad_norm": 20.04080581665039, "learning_rate": 1.6536505630221576e-05, "loss": 0.2703, "step": 7370 }, { "epoch": 1.3403559752996732, "grad_norm": 19.636119842529297, "learning_rate": 1.6491100617508173e-05, "loss": 0.2934, "step": 7380 }, { "epoch": 1.3421721758082092, "grad_norm": 17.96088218688965, "learning_rate": 1.644569560479477e-05, "loss": 0.2903, "step": 7390 }, { "epoch": 1.3439883763167453, "grad_norm": 5.417899131774902, "learning_rate": 1.6400290592081365e-05, "loss": 0.4768, "step": 7400 }, { "epoch": 1.3458045768252815, "grad_norm": 27.437042236328125, "learning_rate": 1.635488557936796e-05, "loss": 0.4288, "step": 7410 }, { "epoch": 1.3476207773338176, "grad_norm": 14.335066795349121, "learning_rate": 1.6309480566654558e-05, "loss": 0.3827, "step": 7420 }, { "epoch": 1.3494369778423538, "grad_norm": 20.122777938842773, "learning_rate": 1.6264075553941154e-05, "loss": 0.4897, "step": 7430 }, { "epoch": 1.3512531783508899, "grad_norm": 32.951942443847656, "learning_rate": 1.621867054122775e-05, "loss": 0.3861, "step": 7440 }, { "epoch": 1.3530693788594261, "grad_norm": 11.255241394042969, "learning_rate": 1.6173265528514346e-05, "loss": 0.2971, "step": 7450 }, { "epoch": 1.3548855793679622, "grad_norm": 14.039215087890625, "learning_rate": 1.6127860515800943e-05, "loss": 0.3155, "step": 7460 }, { "epoch": 1.3567017798764984, "grad_norm": 8.711435317993164, "learning_rate": 1.6082455503087542e-05, "loss": 0.2202, "step": 7470 }, { "epoch": 1.3585179803850345, "grad_norm": 11.57476806640625, "learning_rate": 1.603705049037414e-05, "loss": 0.2802, "step": 7480 }, { "epoch": 1.3603341808935707, "grad_norm": 26.275901794433594, "learning_rate": 1.5991645477660735e-05, "loss": 0.424, "step": 7490 }, { "epoch": 1.3621503814021068, "grad_norm": 9.019407272338867, "learning_rate": 1.594624046494733e-05, "loss": 0.2053, "step": 7500 }, { "epoch": 1.3621503814021068, "eval_accuracy": 0.8810500410172273, "eval_f1": 0.8728353936424942, "eval_loss": 0.36287108063697815, "eval_precision": 0.8688572009408948, "eval_recall": 0.8783339042782887, "eval_runtime": 12.133, "eval_samples_per_second": 100.469, "eval_steps_per_second": 6.346, "step": 7500 }, { "epoch": 1.363966581910643, "grad_norm": 13.45000171661377, "learning_rate": 1.5900835452233927e-05, "loss": 0.2865, "step": 7510 }, { "epoch": 1.365782782419179, "grad_norm": 18.8865966796875, "learning_rate": 1.5855430439520524e-05, "loss": 0.3401, "step": 7520 }, { "epoch": 1.3675989829277153, "grad_norm": 10.25676155090332, "learning_rate": 1.581002542680712e-05, "loss": 0.2958, "step": 7530 }, { "epoch": 1.3694151834362513, "grad_norm": 12.922000885009766, "learning_rate": 1.5764620414093716e-05, "loss": 0.3281, "step": 7540 }, { "epoch": 1.3712313839447874, "grad_norm": 21.29969596862793, "learning_rate": 1.5719215401380312e-05, "loss": 0.4653, "step": 7550 }, { "epoch": 1.3730475844533236, "grad_norm": 22.337665557861328, "learning_rate": 1.567381038866691e-05, "loss": 0.4078, "step": 7560 }, { "epoch": 1.37486378496186, "grad_norm": 13.976520538330078, "learning_rate": 1.5628405375953505e-05, "loss": 0.2474, "step": 7570 }, { "epoch": 1.376679985470396, "grad_norm": 15.766996383666992, "learning_rate": 1.55830003632401e-05, "loss": 0.3418, "step": 7580 }, { "epoch": 1.378496185978932, "grad_norm": 10.695988655090332, "learning_rate": 1.5537595350526697e-05, "loss": 0.231, "step": 7590 }, { "epoch": 1.3803123864874682, "grad_norm": 12.22573184967041, "learning_rate": 1.5492190337813294e-05, "loss": 0.3406, "step": 7600 }, { "epoch": 1.3821285869960045, "grad_norm": 8.77241325378418, "learning_rate": 1.544678532509989e-05, "loss": 0.4051, "step": 7610 }, { "epoch": 1.3839447875045405, "grad_norm": 17.83467674255371, "learning_rate": 1.5401380312386486e-05, "loss": 0.2736, "step": 7620 }, { "epoch": 1.3857609880130766, "grad_norm": 10.345674514770508, "learning_rate": 1.5355975299673082e-05, "loss": 0.2945, "step": 7630 }, { "epoch": 1.3875771885216128, "grad_norm": 9.618982315063477, "learning_rate": 1.531057028695968e-05, "loss": 0.307, "step": 7640 }, { "epoch": 1.3893933890301489, "grad_norm": 6.242488861083984, "learning_rate": 1.5265165274246275e-05, "loss": 0.2746, "step": 7650 }, { "epoch": 1.3912095895386851, "grad_norm": 14.939092636108398, "learning_rate": 1.5219760261532873e-05, "loss": 0.2958, "step": 7660 }, { "epoch": 1.3930257900472212, "grad_norm": 16.4776611328125, "learning_rate": 1.5174355248819469e-05, "loss": 0.5158, "step": 7670 }, { "epoch": 1.3948419905557574, "grad_norm": 19.468334197998047, "learning_rate": 1.5128950236106069e-05, "loss": 0.2188, "step": 7680 }, { "epoch": 1.3966581910642935, "grad_norm": 16.519298553466797, "learning_rate": 1.5083545223392665e-05, "loss": 0.2974, "step": 7690 }, { "epoch": 1.3984743915728297, "grad_norm": 8.464622497558594, "learning_rate": 1.5038140210679261e-05, "loss": 0.2988, "step": 7700 }, { "epoch": 1.4002905920813657, "grad_norm": 1.7706962823867798, "learning_rate": 1.4992735197965857e-05, "loss": 0.25, "step": 7710 }, { "epoch": 1.402106792589902, "grad_norm": 13.213313102722168, "learning_rate": 1.4947330185252454e-05, "loss": 0.2841, "step": 7720 }, { "epoch": 1.403922993098438, "grad_norm": 18.352794647216797, "learning_rate": 1.490192517253905e-05, "loss": 0.4059, "step": 7730 }, { "epoch": 1.405739193606974, "grad_norm": 11.154178619384766, "learning_rate": 1.4856520159825646e-05, "loss": 0.4, "step": 7740 }, { "epoch": 1.4075553941155103, "grad_norm": 20.417091369628906, "learning_rate": 1.4811115147112242e-05, "loss": 0.3162, "step": 7750 }, { "epoch": 1.4093715946240466, "grad_norm": 15.249809265136719, "learning_rate": 1.4765710134398839e-05, "loss": 0.2575, "step": 7760 }, { "epoch": 1.4111877951325826, "grad_norm": 12.293340682983398, "learning_rate": 1.4720305121685435e-05, "loss": 0.2857, "step": 7770 }, { "epoch": 1.4130039956411187, "grad_norm": 6.126258850097656, "learning_rate": 1.4674900108972031e-05, "loss": 0.2618, "step": 7780 }, { "epoch": 1.414820196149655, "grad_norm": 15.712937355041504, "learning_rate": 1.4629495096258627e-05, "loss": 0.3071, "step": 7790 }, { "epoch": 1.4166363966581912, "grad_norm": 22.511367797851562, "learning_rate": 1.4584090083545224e-05, "loss": 0.2066, "step": 7800 }, { "epoch": 1.4166363966581912, "eval_accuracy": 0.8712059064807219, "eval_f1": 0.864515063175361, "eval_loss": 0.39585188031196594, "eval_precision": 0.8567226508046806, "eval_recall": 0.8748503082269048, "eval_runtime": 12.1334, "eval_samples_per_second": 100.467, "eval_steps_per_second": 6.346, "step": 7800 }, { "epoch": 1.4184525971667272, "grad_norm": 21.43792152404785, "learning_rate": 1.453868507083182e-05, "loss": 0.3206, "step": 7810 }, { "epoch": 1.4202687976752633, "grad_norm": 15.642193794250488, "learning_rate": 1.4493280058118416e-05, "loss": 0.3246, "step": 7820 }, { "epoch": 1.4220849981837995, "grad_norm": 22.358238220214844, "learning_rate": 1.4447875045405012e-05, "loss": 0.2895, "step": 7830 }, { "epoch": 1.4239011986923356, "grad_norm": 12.986956596374512, "learning_rate": 1.4402470032691609e-05, "loss": 0.309, "step": 7840 }, { "epoch": 1.4257173992008718, "grad_norm": 25.143394470214844, "learning_rate": 1.4357065019978205e-05, "loss": 0.4229, "step": 7850 }, { "epoch": 1.4275335997094079, "grad_norm": 15.178205490112305, "learning_rate": 1.4311660007264801e-05, "loss": 0.2001, "step": 7860 }, { "epoch": 1.429349800217944, "grad_norm": 27.758424758911133, "learning_rate": 1.4266254994551397e-05, "loss": 0.3854, "step": 7870 }, { "epoch": 1.4311660007264801, "grad_norm": 20.106098175048828, "learning_rate": 1.4220849981837994e-05, "loss": 0.3047, "step": 7880 }, { "epoch": 1.4329822012350164, "grad_norm": 21.916183471679688, "learning_rate": 1.4175444969124593e-05, "loss": 0.3352, "step": 7890 }, { "epoch": 1.4347984017435524, "grad_norm": 11.698692321777344, "learning_rate": 1.413003995641119e-05, "loss": 0.2302, "step": 7900 }, { "epoch": 1.4366146022520887, "grad_norm": 18.721933364868164, "learning_rate": 1.4084634943697786e-05, "loss": 0.2219, "step": 7910 }, { "epoch": 1.4384308027606247, "grad_norm": 10.098983764648438, "learning_rate": 1.4039229930984382e-05, "loss": 0.3427, "step": 7920 }, { "epoch": 1.440247003269161, "grad_norm": 5.1341023445129395, "learning_rate": 1.3993824918270978e-05, "loss": 0.1857, "step": 7930 }, { "epoch": 1.442063203777697, "grad_norm": 7.9033522605896, "learning_rate": 1.3948419905557575e-05, "loss": 0.2511, "step": 7940 }, { "epoch": 1.4438794042862333, "grad_norm": 15.394737243652344, "learning_rate": 1.390301489284417e-05, "loss": 0.372, "step": 7950 }, { "epoch": 1.4456956047947693, "grad_norm": 10.518932342529297, "learning_rate": 1.3857609880130767e-05, "loss": 0.2298, "step": 7960 }, { "epoch": 1.4475118053033054, "grad_norm": 15.272256851196289, "learning_rate": 1.3812204867417363e-05, "loss": 0.2183, "step": 7970 }, { "epoch": 1.4493280058118416, "grad_norm": 13.641687393188477, "learning_rate": 1.376679985470396e-05, "loss": 0.3127, "step": 7980 }, { "epoch": 1.4511442063203779, "grad_norm": 20.85528564453125, "learning_rate": 1.3721394841990556e-05, "loss": 0.437, "step": 7990 }, { "epoch": 1.452960406828914, "grad_norm": 17.150014877319336, "learning_rate": 1.3675989829277152e-05, "loss": 0.2353, "step": 8000 }, { "epoch": 1.45477660733745, "grad_norm": 19.58470916748047, "learning_rate": 1.3630584816563748e-05, "loss": 0.2235, "step": 8010 }, { "epoch": 1.4565928078459862, "grad_norm": 11.996252059936523, "learning_rate": 1.3585179803850344e-05, "loss": 0.1931, "step": 8020 }, { "epoch": 1.4584090083545225, "grad_norm": 14.358990669250488, "learning_rate": 1.353977479113694e-05, "loss": 0.1409, "step": 8030 }, { "epoch": 1.4602252088630585, "grad_norm": 25.43513298034668, "learning_rate": 1.3494369778423539e-05, "loss": 0.3949, "step": 8040 }, { "epoch": 1.4620414093715945, "grad_norm": 38.57484817504883, "learning_rate": 1.3448964765710135e-05, "loss": 0.4156, "step": 8050 }, { "epoch": 1.4638576098801308, "grad_norm": 11.345231056213379, "learning_rate": 1.3403559752996731e-05, "loss": 0.2358, "step": 8060 }, { "epoch": 1.4656738103886668, "grad_norm": 9.881817817687988, "learning_rate": 1.3358154740283327e-05, "loss": 0.1973, "step": 8070 }, { "epoch": 1.467490010897203, "grad_norm": 15.946255683898926, "learning_rate": 1.3312749727569924e-05, "loss": 0.3282, "step": 8080 }, { "epoch": 1.4693062114057391, "grad_norm": 21.599016189575195, "learning_rate": 1.326734471485652e-05, "loss": 0.3486, "step": 8090 }, { "epoch": 1.4711224119142754, "grad_norm": 23.88036346435547, "learning_rate": 1.3221939702143118e-05, "loss": 0.2855, "step": 8100 }, { "epoch": 1.4711224119142754, "eval_accuracy": 0.8777686628383922, "eval_f1": 0.8732951551257775, "eval_loss": 0.37594613432884216, "eval_precision": 0.8717582158349084, "eval_recall": 0.8789319049269518, "eval_runtime": 12.1477, "eval_samples_per_second": 100.348, "eval_steps_per_second": 6.339, "step": 8100 }, { "epoch": 1.4729386124228114, "grad_norm": 21.347118377685547, "learning_rate": 1.3176534689429714e-05, "loss": 0.4024, "step": 8110 }, { "epoch": 1.4747548129313477, "grad_norm": 12.109701156616211, "learning_rate": 1.313112967671631e-05, "loss": 0.3624, "step": 8120 }, { "epoch": 1.4765710134398837, "grad_norm": 11.65137004852295, "learning_rate": 1.3085724664002907e-05, "loss": 0.2691, "step": 8130 }, { "epoch": 1.47838721394842, "grad_norm": 14.210288047790527, "learning_rate": 1.3040319651289505e-05, "loss": 0.3066, "step": 8140 }, { "epoch": 1.480203414456956, "grad_norm": 15.840164184570312, "learning_rate": 1.29949146385761e-05, "loss": 0.4073, "step": 8150 }, { "epoch": 1.482019614965492, "grad_norm": 17.042640686035156, "learning_rate": 1.2949509625862697e-05, "loss": 0.2465, "step": 8160 }, { "epoch": 1.4838358154740283, "grad_norm": 4.103309631347656, "learning_rate": 1.2904104613149293e-05, "loss": 0.2429, "step": 8170 }, { "epoch": 1.4856520159825646, "grad_norm": 21.490703582763672, "learning_rate": 1.285869960043589e-05, "loss": 0.3825, "step": 8180 }, { "epoch": 1.4874682164911006, "grad_norm": 22.954036712646484, "learning_rate": 1.2813294587722486e-05, "loss": 0.3533, "step": 8190 }, { "epoch": 1.4892844169996367, "grad_norm": 9.550930976867676, "learning_rate": 1.2767889575009082e-05, "loss": 0.3585, "step": 8200 }, { "epoch": 1.491100617508173, "grad_norm": 23.033842086791992, "learning_rate": 1.2722484562295678e-05, "loss": 0.3724, "step": 8210 }, { "epoch": 1.4929168180167092, "grad_norm": 18.261627197265625, "learning_rate": 1.2677079549582275e-05, "loss": 0.3568, "step": 8220 }, { "epoch": 1.4947330185252452, "grad_norm": 26.01344871520996, "learning_rate": 1.263167453686887e-05, "loss": 0.3325, "step": 8230 }, { "epoch": 1.4965492190337812, "grad_norm": 6.980250358581543, "learning_rate": 1.2586269524155467e-05, "loss": 0.2685, "step": 8240 }, { "epoch": 1.4983654195423175, "grad_norm": 15.290885925292969, "learning_rate": 1.2540864511442063e-05, "loss": 0.2438, "step": 8250 }, { "epoch": 1.5001816200508538, "grad_norm": 16.555368423461914, "learning_rate": 1.2495459498728661e-05, "loss": 0.4081, "step": 8260 }, { "epoch": 1.5019978205593898, "grad_norm": 23.678932189941406, "learning_rate": 1.2450054486015257e-05, "loss": 0.3303, "step": 8270 }, { "epoch": 1.5038140210679258, "grad_norm": 28.935272216796875, "learning_rate": 1.2404649473301854e-05, "loss": 0.3075, "step": 8280 }, { "epoch": 1.505630221576462, "grad_norm": 1.6638036966323853, "learning_rate": 1.235924446058845e-05, "loss": 0.2976, "step": 8290 }, { "epoch": 1.5074464220849983, "grad_norm": 27.825714111328125, "learning_rate": 1.2313839447875046e-05, "loss": 0.2812, "step": 8300 }, { "epoch": 1.5092626225935342, "grad_norm": 18.06635093688965, "learning_rate": 1.2268434435161642e-05, "loss": 0.3069, "step": 8310 }, { "epoch": 1.5110788231020704, "grad_norm": 10.651163101196289, "learning_rate": 1.2223029422448239e-05, "loss": 0.5344, "step": 8320 }, { "epoch": 1.5128950236106067, "grad_norm": 9.965625762939453, "learning_rate": 1.2177624409734835e-05, "loss": 0.3103, "step": 8330 }, { "epoch": 1.5147112241191427, "grad_norm": 23.21745491027832, "learning_rate": 1.2132219397021431e-05, "loss": 0.2692, "step": 8340 }, { "epoch": 1.5165274246276788, "grad_norm": 18.808652877807617, "learning_rate": 1.2086814384308027e-05, "loss": 0.156, "step": 8350 }, { "epoch": 1.518343625136215, "grad_norm": 21.283294677734375, "learning_rate": 1.2041409371594625e-05, "loss": 0.3646, "step": 8360 }, { "epoch": 1.5201598256447513, "grad_norm": 21.1343936920166, "learning_rate": 1.1996004358881222e-05, "loss": 0.3235, "step": 8370 }, { "epoch": 1.5219760261532873, "grad_norm": 15.289054870605469, "learning_rate": 1.1950599346167818e-05, "loss": 0.2583, "step": 8380 }, { "epoch": 1.5237922266618233, "grad_norm": 15.304503440856934, "learning_rate": 1.1905194333454414e-05, "loss": 0.3819, "step": 8390 }, { "epoch": 1.5256084271703596, "grad_norm": 22.24407196044922, "learning_rate": 1.185978932074101e-05, "loss": 0.2542, "step": 8400 }, { "epoch": 1.5256084271703596, "eval_accuracy": 0.8843314191960624, "eval_f1": 0.8780677678973086, "eval_loss": 0.3471013903617859, "eval_precision": 0.8764668066951888, "eval_recall": 0.8831446649071804, "eval_runtime": 12.1617, "eval_samples_per_second": 100.233, "eval_steps_per_second": 6.331, "step": 8400 }, { "epoch": 1.5274246276788959, "grad_norm": 27.7410831451416, "learning_rate": 1.1814384308027607e-05, "loss": 0.3286, "step": 8410 }, { "epoch": 1.529240828187432, "grad_norm": 19.968013763427734, "learning_rate": 1.1768979295314203e-05, "loss": 0.2745, "step": 8420 }, { "epoch": 1.531057028695968, "grad_norm": 4.940372467041016, "learning_rate": 1.17235742826008e-05, "loss": 0.2978, "step": 8430 }, { "epoch": 1.5328732292045042, "grad_norm": 12.394369125366211, "learning_rate": 1.1678169269887395e-05, "loss": 0.2675, "step": 8440 }, { "epoch": 1.5346894297130405, "grad_norm": 14.312457084655762, "learning_rate": 1.1632764257173992e-05, "loss": 0.3254, "step": 8450 }, { "epoch": 1.5365056302215765, "grad_norm": 18.364046096801758, "learning_rate": 1.158735924446059e-05, "loss": 0.2875, "step": 8460 }, { "epoch": 1.5383218307301125, "grad_norm": 20.195308685302734, "learning_rate": 1.1541954231747186e-05, "loss": 0.2987, "step": 8470 }, { "epoch": 1.5401380312386488, "grad_norm": 14.188733100891113, "learning_rate": 1.1496549219033782e-05, "loss": 0.3537, "step": 8480 }, { "epoch": 1.541954231747185, "grad_norm": 8.175540924072266, "learning_rate": 1.1451144206320378e-05, "loss": 0.2177, "step": 8490 }, { "epoch": 1.543770432255721, "grad_norm": 18.209714889526367, "learning_rate": 1.1405739193606975e-05, "loss": 0.3042, "step": 8500 }, { "epoch": 1.5455866327642571, "grad_norm": 9.630953788757324, "learning_rate": 1.136033418089357e-05, "loss": 0.2883, "step": 8510 }, { "epoch": 1.5474028332727934, "grad_norm": 7.398960113525391, "learning_rate": 1.1314929168180167e-05, "loss": 0.2416, "step": 8520 }, { "epoch": 1.5492190337813294, "grad_norm": 16.70703887939453, "learning_rate": 1.1269524155466763e-05, "loss": 0.3105, "step": 8530 }, { "epoch": 1.5510352342898654, "grad_norm": 13.721776008605957, "learning_rate": 1.122411914275336e-05, "loss": 0.2512, "step": 8540 }, { "epoch": 1.5528514347984017, "grad_norm": 15.949941635131836, "learning_rate": 1.1178714130039956e-05, "loss": 0.2245, "step": 8550 }, { "epoch": 1.554667635306938, "grad_norm": 15.553458213806152, "learning_rate": 1.1133309117326552e-05, "loss": 0.2002, "step": 8560 }, { "epoch": 1.556483835815474, "grad_norm": 5.081323623657227, "learning_rate": 1.108790410461315e-05, "loss": 0.231, "step": 8570 }, { "epoch": 1.55830003632401, "grad_norm": 11.08918285369873, "learning_rate": 1.1042499091899746e-05, "loss": 0.2269, "step": 8580 }, { "epoch": 1.5601162368325463, "grad_norm": 16.481983184814453, "learning_rate": 1.0997094079186343e-05, "loss": 0.2977, "step": 8590 }, { "epoch": 1.5619324373410826, "grad_norm": 22.30940055847168, "learning_rate": 1.0951689066472939e-05, "loss": 0.3221, "step": 8600 }, { "epoch": 1.5637486378496186, "grad_norm": 26.49005126953125, "learning_rate": 1.0906284053759535e-05, "loss": 0.3319, "step": 8610 }, { "epoch": 1.5655648383581546, "grad_norm": 18.184383392333984, "learning_rate": 1.0860879041046131e-05, "loss": 0.3648, "step": 8620 }, { "epoch": 1.567381038866691, "grad_norm": 7.18729829788208, "learning_rate": 1.0815474028332728e-05, "loss": 0.3291, "step": 8630 }, { "epoch": 1.5691972393752271, "grad_norm": 24.356779098510742, "learning_rate": 1.0770069015619324e-05, "loss": 0.1853, "step": 8640 }, { "epoch": 1.5710134398837632, "grad_norm": 13.144723892211914, "learning_rate": 1.0724664002905922e-05, "loss": 0.2874, "step": 8650 }, { "epoch": 1.5728296403922992, "grad_norm": 8.509248733520508, "learning_rate": 1.0679258990192518e-05, "loss": 0.1815, "step": 8660 }, { "epoch": 1.5746458409008355, "grad_norm": 19.468769073486328, "learning_rate": 1.0633853977479114e-05, "loss": 0.223, "step": 8670 }, { "epoch": 1.5764620414093717, "grad_norm": 19.752363204956055, "learning_rate": 1.058844896476571e-05, "loss": 0.2087, "step": 8680 }, { "epoch": 1.5782782419179078, "grad_norm": 8.5270414352417, "learning_rate": 1.0543043952052307e-05, "loss": 0.3123, "step": 8690 }, { "epoch": 1.5800944424264438, "grad_norm": 11.519478797912598, "learning_rate": 1.0497638939338905e-05, "loss": 0.407, "step": 8700 }, { "epoch": 1.5800944424264438, "eval_accuracy": 0.889253486464315, "eval_f1": 0.8844054402447329, "eval_loss": 0.35321420431137085, "eval_precision": 0.8739349747133954, "eval_recall": 0.8988169397446168, "eval_runtime": 12.1712, "eval_samples_per_second": 100.155, "eval_steps_per_second": 6.326, "step": 8700 }, { "epoch": 1.58191064293498, "grad_norm": 5.541025638580322, "learning_rate": 1.0452233926625501e-05, "loss": 0.3838, "step": 8710 }, { "epoch": 1.5837268434435163, "grad_norm": 26.33243179321289, "learning_rate": 1.0406828913912097e-05, "loss": 0.4307, "step": 8720 }, { "epoch": 1.5855430439520521, "grad_norm": 17.7266788482666, "learning_rate": 1.0361423901198693e-05, "loss": 0.4372, "step": 8730 }, { "epoch": 1.5873592444605884, "grad_norm": 12.146345138549805, "learning_rate": 1.031601888848529e-05, "loss": 0.3942, "step": 8740 }, { "epoch": 1.5891754449691247, "grad_norm": 15.945060729980469, "learning_rate": 1.0270613875771886e-05, "loss": 0.3127, "step": 8750 }, { "epoch": 1.5909916454776607, "grad_norm": 15.658045768737793, "learning_rate": 1.0225208863058482e-05, "loss": 0.1777, "step": 8760 }, { "epoch": 1.5928078459861967, "grad_norm": 8.869367599487305, "learning_rate": 1.0179803850345078e-05, "loss": 0.3081, "step": 8770 }, { "epoch": 1.594624046494733, "grad_norm": 15.344075202941895, "learning_rate": 1.0134398837631676e-05, "loss": 0.291, "step": 8780 }, { "epoch": 1.5964402470032693, "grad_norm": 14.211679458618164, "learning_rate": 1.0088993824918273e-05, "loss": 0.2659, "step": 8790 }, { "epoch": 1.5982564475118053, "grad_norm": 26.636606216430664, "learning_rate": 1.0043588812204869e-05, "loss": 0.2606, "step": 8800 }, { "epoch": 1.6000726480203413, "grad_norm": 20.942895889282227, "learning_rate": 9.998183799491465e-06, "loss": 0.1804, "step": 8810 }, { "epoch": 1.6018888485288776, "grad_norm": 16.17045021057129, "learning_rate": 9.952778786778061e-06, "loss": 0.2943, "step": 8820 }, { "epoch": 1.6037050490374138, "grad_norm": 6.153861999511719, "learning_rate": 9.907373774064658e-06, "loss": 0.3264, "step": 8830 }, { "epoch": 1.6055212495459499, "grad_norm": 21.781164169311523, "learning_rate": 9.861968761351254e-06, "loss": 0.4721, "step": 8840 }, { "epoch": 1.607337450054486, "grad_norm": 21.82793617248535, "learning_rate": 9.81656374863785e-06, "loss": 0.2914, "step": 8850 }, { "epoch": 1.6091536505630222, "grad_norm": 21.26590919494629, "learning_rate": 9.771158735924446e-06, "loss": 0.256, "step": 8860 }, { "epoch": 1.6109698510715584, "grad_norm": 10.11241626739502, "learning_rate": 9.725753723211043e-06, "loss": 0.2504, "step": 8870 }, { "epoch": 1.6127860515800945, "grad_norm": 4.329545021057129, "learning_rate": 9.68034871049764e-06, "loss": 0.272, "step": 8880 }, { "epoch": 1.6146022520886305, "grad_norm": 13.154899597167969, "learning_rate": 9.634943697784237e-06, "loss": 0.2627, "step": 8890 }, { "epoch": 1.6164184525971668, "grad_norm": 21.979530334472656, "learning_rate": 9.589538685070833e-06, "loss": 0.2251, "step": 8900 }, { "epoch": 1.618234653105703, "grad_norm": 7.475334167480469, "learning_rate": 9.54413367235743e-06, "loss": 0.268, "step": 8910 }, { "epoch": 1.620050853614239, "grad_norm": 24.20920753479004, "learning_rate": 9.498728659644026e-06, "loss": 0.3187, "step": 8920 }, { "epoch": 1.621867054122775, "grad_norm": 14.770585060119629, "learning_rate": 9.453323646930622e-06, "loss": 0.2269, "step": 8930 }, { "epoch": 1.6236832546313114, "grad_norm": 14.928208351135254, "learning_rate": 9.407918634217218e-06, "loss": 0.2472, "step": 8940 }, { "epoch": 1.6254994551398474, "grad_norm": 10.5422945022583, "learning_rate": 9.362513621503814e-06, "loss": 0.4042, "step": 8950 }, { "epoch": 1.6273156556483834, "grad_norm": 16.635868072509766, "learning_rate": 9.31710860879041e-06, "loss": 0.2717, "step": 8960 }, { "epoch": 1.6291318561569197, "grad_norm": 18.028661727905273, "learning_rate": 9.271703596077007e-06, "loss": 0.1889, "step": 8970 }, { "epoch": 1.630948056665456, "grad_norm": 14.402594566345215, "learning_rate": 9.226298583363603e-06, "loss": 0.2593, "step": 8980 }, { "epoch": 1.632764257173992, "grad_norm": 18.25139617919922, "learning_rate": 9.180893570650201e-06, "loss": 0.4313, "step": 8990 }, { "epoch": 1.634580457682528, "grad_norm": 15.58337688446045, "learning_rate": 9.135488557936797e-06, "loss": 0.2691, "step": 9000 }, { "epoch": 1.634580457682528, "eval_accuracy": 0.8859721082854799, "eval_f1": 0.8808923670444524, "eval_loss": 0.3507283329963684, "eval_precision": 0.8735378654629461, "eval_recall": 0.8903885563547935, "eval_runtime": 12.1871, "eval_samples_per_second": 100.024, "eval_steps_per_second": 6.318, "step": 9000 }, { "epoch": 1.6363966581910643, "grad_norm": 16.187101364135742, "learning_rate": 9.090083545223393e-06, "loss": 0.3231, "step": 9010 }, { "epoch": 1.6382128586996005, "grad_norm": 10.623252868652344, "learning_rate": 9.04467853250999e-06, "loss": 0.2615, "step": 9020 }, { "epoch": 1.6400290592081366, "grad_norm": 16.480899810791016, "learning_rate": 8.999273519796586e-06, "loss": 0.1975, "step": 9030 }, { "epoch": 1.6418452597166726, "grad_norm": 27.12870979309082, "learning_rate": 8.953868507083182e-06, "loss": 0.3049, "step": 9040 }, { "epoch": 1.6436614602252089, "grad_norm": 29.148317337036133, "learning_rate": 8.908463494369778e-06, "loss": 0.3735, "step": 9050 }, { "epoch": 1.6454776607337451, "grad_norm": 6.015985012054443, "learning_rate": 8.863058481656375e-06, "loss": 0.1824, "step": 9060 }, { "epoch": 1.6472938612422812, "grad_norm": 8.818500518798828, "learning_rate": 8.817653468942971e-06, "loss": 0.2414, "step": 9070 }, { "epoch": 1.6491100617508172, "grad_norm": 27.29248809814453, "learning_rate": 8.772248456229567e-06, "loss": 0.4476, "step": 9080 }, { "epoch": 1.6509262622593535, "grad_norm": 3.913367986679077, "learning_rate": 8.726843443516165e-06, "loss": 0.2849, "step": 9090 }, { "epoch": 1.6527424627678897, "grad_norm": 21.9657039642334, "learning_rate": 8.681438430802761e-06, "loss": 0.3711, "step": 9100 }, { "epoch": 1.6545586632764258, "grad_norm": 14.766958236694336, "learning_rate": 8.636033418089358e-06, "loss": 0.3725, "step": 9110 }, { "epoch": 1.6563748637849618, "grad_norm": 14.37176513671875, "learning_rate": 8.590628405375954e-06, "loss": 0.2172, "step": 9120 }, { "epoch": 1.658191064293498, "grad_norm": 5.39981746673584, "learning_rate": 8.54522339266255e-06, "loss": 0.422, "step": 9130 }, { "epoch": 1.6600072648020343, "grad_norm": 14.968268394470215, "learning_rate": 8.499818379949146e-06, "loss": 0.3144, "step": 9140 }, { "epoch": 1.6618234653105701, "grad_norm": 5.774266242980957, "learning_rate": 8.454413367235743e-06, "loss": 0.2657, "step": 9150 }, { "epoch": 1.6636396658191064, "grad_norm": 20.72484016418457, "learning_rate": 8.409008354522339e-06, "loss": 0.2577, "step": 9160 }, { "epoch": 1.6654558663276426, "grad_norm": 18.517642974853516, "learning_rate": 8.363603341808935e-06, "loss": 0.4908, "step": 9170 }, { "epoch": 1.6672720668361787, "grad_norm": 3.34096622467041, "learning_rate": 8.318198329095531e-06, "loss": 0.3054, "step": 9180 }, { "epoch": 1.6690882673447147, "grad_norm": 15.43202018737793, "learning_rate": 8.272793316382128e-06, "loss": 0.2922, "step": 9190 }, { "epoch": 1.670904467853251, "grad_norm": 9.061037063598633, "learning_rate": 8.227388303668726e-06, "loss": 0.2724, "step": 9200 }, { "epoch": 1.6727206683617872, "grad_norm": 7.940707206726074, "learning_rate": 8.181983290955322e-06, "loss": 0.2115, "step": 9210 }, { "epoch": 1.6745368688703233, "grad_norm": 5.944194793701172, "learning_rate": 8.136578278241918e-06, "loss": 0.2005, "step": 9220 }, { "epoch": 1.6763530693788593, "grad_norm": 14.915303230285645, "learning_rate": 8.091173265528514e-06, "loss": 0.3257, "step": 9230 }, { "epoch": 1.6781692698873956, "grad_norm": 17.358768463134766, "learning_rate": 8.04576825281511e-06, "loss": 0.4163, "step": 9240 }, { "epoch": 1.6799854703959318, "grad_norm": 16.51521110534668, "learning_rate": 8.000363240101707e-06, "loss": 0.282, "step": 9250 }, { "epoch": 1.6818016709044679, "grad_norm": 12.452226638793945, "learning_rate": 7.954958227388303e-06, "loss": 0.2801, "step": 9260 }, { "epoch": 1.683617871413004, "grad_norm": 21.576208114624023, "learning_rate": 7.909553214674901e-06, "loss": 0.3299, "step": 9270 }, { "epoch": 1.6854340719215402, "grad_norm": 17.944128036499023, "learning_rate": 7.864148201961497e-06, "loss": 0.302, "step": 9280 }, { "epoch": 1.6872502724300764, "grad_norm": 16.343204498291016, "learning_rate": 7.818743189248093e-06, "loss": 0.2561, "step": 9290 }, { "epoch": 1.6890664729386125, "grad_norm": 12.301880836486816, "learning_rate": 7.77333817653469e-06, "loss": 0.3478, "step": 9300 }, { "epoch": 1.6890664729386125, "eval_accuracy": 0.8884331419196062, "eval_f1": 0.8845863378573883, "eval_loss": 0.33353373408317566, "eval_precision": 0.8812408616942775, "eval_recall": 0.8907306256856646, "eval_runtime": 12.1668, "eval_samples_per_second": 100.191, "eval_steps_per_second": 6.329, "step": 9300 }, { "epoch": 1.6908826734471485, "grad_norm": 17.064929962158203, "learning_rate": 7.727933163821286e-06, "loss": 0.3987, "step": 9310 }, { "epoch": 1.6926988739556847, "grad_norm": 6.434702396392822, "learning_rate": 7.682528151107884e-06, "loss": 0.3076, "step": 9320 }, { "epoch": 1.694515074464221, "grad_norm": 6.527015209197998, "learning_rate": 7.63712313839448e-06, "loss": 0.227, "step": 9330 }, { "epoch": 1.696331274972757, "grad_norm": 11.179935455322266, "learning_rate": 7.5917181256810756e-06, "loss": 0.2426, "step": 9340 }, { "epoch": 1.698147475481293, "grad_norm": 10.149739265441895, "learning_rate": 7.546313112967672e-06, "loss": 0.3203, "step": 9350 }, { "epoch": 1.6999636759898293, "grad_norm": 22.71770668029785, "learning_rate": 7.500908100254268e-06, "loss": 0.3208, "step": 9360 }, { "epoch": 1.7017798764983654, "grad_norm": 9.473342895507812, "learning_rate": 7.455503087540864e-06, "loss": 0.3104, "step": 9370 }, { "epoch": 1.7035960770069014, "grad_norm": 15.134129524230957, "learning_rate": 7.4100980748274606e-06, "loss": 0.3233, "step": 9380 }, { "epoch": 1.7054122775154377, "grad_norm": 17.22422981262207, "learning_rate": 7.364693062114058e-06, "loss": 0.2024, "step": 9390 }, { "epoch": 1.707228478023974, "grad_norm": 7.5997724533081055, "learning_rate": 7.319288049400654e-06, "loss": 0.256, "step": 9400 }, { "epoch": 1.70904467853251, "grad_norm": 3.4972565174102783, "learning_rate": 7.273883036687251e-06, "loss": 0.2396, "step": 9410 }, { "epoch": 1.710860879041046, "grad_norm": 14.709694862365723, "learning_rate": 7.228478023973847e-06, "loss": 0.4198, "step": 9420 }, { "epoch": 1.7126770795495823, "grad_norm": 6.8229546546936035, "learning_rate": 7.1830730112604435e-06, "loss": 0.3004, "step": 9430 }, { "epoch": 1.7144932800581185, "grad_norm": 19.955167770385742, "learning_rate": 7.13766799854704e-06, "loss": 0.3674, "step": 9440 }, { "epoch": 1.7163094805666546, "grad_norm": 16.53900909423828, "learning_rate": 7.092262985833637e-06, "loss": 0.5129, "step": 9450 }, { "epoch": 1.7181256810751906, "grad_norm": 19.58238983154297, "learning_rate": 7.046857973120233e-06, "loss": 0.3268, "step": 9460 }, { "epoch": 1.7199418815837269, "grad_norm": 17.560192108154297, "learning_rate": 7.001452960406829e-06, "loss": 0.1925, "step": 9470 }, { "epoch": 1.721758082092263, "grad_norm": 12.490224838256836, "learning_rate": 6.956047947693426e-06, "loss": 0.215, "step": 9480 }, { "epoch": 1.7235742826007991, "grad_norm": 11.736912727355957, "learning_rate": 6.910642934980022e-06, "loss": 0.2565, "step": 9490 }, { "epoch": 1.7253904831093352, "grad_norm": 16.065710067749023, "learning_rate": 6.865237922266618e-06, "loss": 0.2175, "step": 9500 }, { "epoch": 1.7272066836178714, "grad_norm": 24.19011116027832, "learning_rate": 6.819832909553216e-06, "loss": 0.265, "step": 9510 }, { "epoch": 1.7290228841264077, "grad_norm": 17.26852798461914, "learning_rate": 6.774427896839812e-06, "loss": 0.2567, "step": 9520 }, { "epoch": 1.7308390846349437, "grad_norm": 15.206780433654785, "learning_rate": 6.7290228841264085e-06, "loss": 0.385, "step": 9530 }, { "epoch": 1.7326552851434798, "grad_norm": 25.0367374420166, "learning_rate": 6.683617871413005e-06, "loss": 0.3845, "step": 9540 }, { "epoch": 1.734471485652016, "grad_norm": 6.6783270835876465, "learning_rate": 6.638212858699601e-06, "loss": 0.2297, "step": 9550 }, { "epoch": 1.7362876861605523, "grad_norm": 23.788753509521484, "learning_rate": 6.592807845986197e-06, "loss": 0.2331, "step": 9560 }, { "epoch": 1.738103886669088, "grad_norm": 15.42270278930664, "learning_rate": 6.5474028332727935e-06, "loss": 0.3213, "step": 9570 }, { "epoch": 1.7399200871776244, "grad_norm": 13.071663856506348, "learning_rate": 6.50199782055939e-06, "loss": 0.34, "step": 9580 }, { "epoch": 1.7417362876861606, "grad_norm": 10.215317726135254, "learning_rate": 6.456592807845986e-06, "loss": 0.3863, "step": 9590 }, { "epoch": 1.7435524881946967, "grad_norm": 18.26382064819336, "learning_rate": 6.411187795132582e-06, "loss": 0.1977, "step": 9600 }, { "epoch": 1.7435524881946967, "eval_accuracy": 0.8917145200984413, "eval_f1": 0.8880029159699074, "eval_loss": 0.32050377130508423, "eval_precision": 0.8856300730561886, "eval_recall": 0.8930436229083021, "eval_runtime": 12.1617, "eval_samples_per_second": 100.233, "eval_steps_per_second": 6.331, "step": 9600 }, { "epoch": 1.7453686887032327, "grad_norm": 28.250118255615234, "learning_rate": 6.3657827824191785e-06, "loss": 0.5122, "step": 9610 }, { "epoch": 1.747184889211769, "grad_norm": 25.557300567626953, "learning_rate": 6.3203777697057765e-06, "loss": 0.2384, "step": 9620 }, { "epoch": 1.7490010897203052, "grad_norm": 14.092631340026855, "learning_rate": 6.274972756992373e-06, "loss": 0.224, "step": 9630 }, { "epoch": 1.7508172902288412, "grad_norm": 16.70784568786621, "learning_rate": 6.229567744278968e-06, "loss": 0.2133, "step": 9640 }, { "epoch": 1.7526334907373773, "grad_norm": 13.771525382995605, "learning_rate": 6.184162731565565e-06, "loss": 0.4779, "step": 9650 }, { "epoch": 1.7544496912459135, "grad_norm": 13.317017555236816, "learning_rate": 6.1387577188521614e-06, "loss": 0.188, "step": 9660 }, { "epoch": 1.7562658917544498, "grad_norm": 16.88526153564453, "learning_rate": 6.093352706138758e-06, "loss": 0.2979, "step": 9670 }, { "epoch": 1.7580820922629858, "grad_norm": 21.325788497924805, "learning_rate": 6.047947693425354e-06, "loss": 0.31, "step": 9680 }, { "epoch": 1.7598982927715219, "grad_norm": 20.396289825439453, "learning_rate": 6.00254268071195e-06, "loss": 0.308, "step": 9690 }, { "epoch": 1.7617144932800581, "grad_norm": 15.008870124816895, "learning_rate": 5.957137667998547e-06, "loss": 0.1889, "step": 9700 }, { "epoch": 1.7635306937885944, "grad_norm": 15.86091136932373, "learning_rate": 5.9117326552851435e-06, "loss": 0.2219, "step": 9710 }, { "epoch": 1.7653468942971304, "grad_norm": 13.422758102416992, "learning_rate": 5.86632764257174e-06, "loss": 0.2332, "step": 9720 }, { "epoch": 1.7671630948056665, "grad_norm": 13.367154121398926, "learning_rate": 5.820922629858337e-06, "loss": 0.2688, "step": 9730 }, { "epoch": 1.7689792953142027, "grad_norm": 9.540538787841797, "learning_rate": 5.775517617144933e-06, "loss": 0.1648, "step": 9740 }, { "epoch": 1.770795495822739, "grad_norm": 1.7264131307601929, "learning_rate": 5.73011260443153e-06, "loss": 0.3343, "step": 9750 }, { "epoch": 1.772611696331275, "grad_norm": 12.68677806854248, "learning_rate": 5.6847075917181265e-06, "loss": 0.181, "step": 9760 }, { "epoch": 1.774427896839811, "grad_norm": 18.84226417541504, "learning_rate": 5.639302579004723e-06, "loss": 0.3011, "step": 9770 }, { "epoch": 1.7762440973483473, "grad_norm": 18.599489212036133, "learning_rate": 5.593897566291319e-06, "loss": 0.3759, "step": 9780 }, { "epoch": 1.7780602978568834, "grad_norm": 24.071170806884766, "learning_rate": 5.548492553577915e-06, "loss": 0.4405, "step": 9790 }, { "epoch": 1.7798764983654194, "grad_norm": 20.368112564086914, "learning_rate": 5.503087540864512e-06, "loss": 0.1978, "step": 9800 }, { "epoch": 1.7816926988739556, "grad_norm": 4.559482574462891, "learning_rate": 5.4576825281511086e-06, "loss": 0.2037, "step": 9810 }, { "epoch": 1.783508899382492, "grad_norm": 22.67530059814453, "learning_rate": 5.412277515437705e-06, "loss": 0.291, "step": 9820 }, { "epoch": 1.785325099891028, "grad_norm": 14.617053031921387, "learning_rate": 5.366872502724301e-06, "loss": 0.2558, "step": 9830 }, { "epoch": 1.787141300399564, "grad_norm": 9.699358940124512, "learning_rate": 5.321467490010897e-06, "loss": 0.4875, "step": 9840 }, { "epoch": 1.7889575009081002, "grad_norm": 7.0100908279418945, "learning_rate": 5.2760624772974936e-06, "loss": 0.2908, "step": 9850 }, { "epoch": 1.7907737014166365, "grad_norm": 18.469934463500977, "learning_rate": 5.230657464584091e-06, "loss": 0.3165, "step": 9860 }, { "epoch": 1.7925899019251725, "grad_norm": 8.11326789855957, "learning_rate": 5.185252451870687e-06, "loss": 0.2946, "step": 9870 }, { "epoch": 1.7944061024337086, "grad_norm": 14.999030113220215, "learning_rate": 5.139847439157283e-06, "loss": 0.2645, "step": 9880 }, { "epoch": 1.7962223029422448, "grad_norm": 11.65622615814209, "learning_rate": 5.094442426443879e-06, "loss": 0.3627, "step": 9890 }, { "epoch": 1.798038503450781, "grad_norm": 11.40311336517334, "learning_rate": 5.049037413730476e-06, "loss": 0.1679, "step": 9900 }, { "epoch": 1.798038503450781, "eval_accuracy": 0.8941755537325676, "eval_f1": 0.8909275000598975, "eval_loss": 0.31260696053504944, "eval_precision": 0.8874307991095873, "eval_recall": 0.8966734867740875, "eval_runtime": 12.1595, "eval_samples_per_second": 100.251, "eval_steps_per_second": 6.333, "step": 9900 }, { "epoch": 1.7998547039593171, "grad_norm": 17.762371063232422, "learning_rate": 5.003632401017073e-06, "loss": 0.2242, "step": 9910 }, { "epoch": 1.8016709044678532, "grad_norm": 8.07528018951416, "learning_rate": 4.958227388303669e-06, "loss": 0.305, "step": 9920 }, { "epoch": 1.8034871049763894, "grad_norm": 8.580565452575684, "learning_rate": 4.912822375590265e-06, "loss": 0.3421, "step": 9930 }, { "epoch": 1.8053033054849257, "grad_norm": 13.502715110778809, "learning_rate": 4.8674173628768615e-06, "loss": 0.1513, "step": 9940 }, { "epoch": 1.8071195059934617, "grad_norm": 25.522857666015625, "learning_rate": 4.822012350163458e-06, "loss": 0.4245, "step": 9950 }, { "epoch": 1.8089357065019978, "grad_norm": 11.059943199157715, "learning_rate": 4.776607337450055e-06, "loss": 0.2588, "step": 9960 }, { "epoch": 1.810751907010534, "grad_norm": 25.464778900146484, "learning_rate": 4.731202324736651e-06, "loss": 0.3359, "step": 9970 }, { "epoch": 1.8125681075190703, "grad_norm": 14.878934860229492, "learning_rate": 4.685797312023247e-06, "loss": 0.2937, "step": 9980 }, { "epoch": 1.814384308027606, "grad_norm": 17.37669563293457, "learning_rate": 4.640392299309844e-06, "loss": 0.2071, "step": 9990 }, { "epoch": 1.8162005085361423, "grad_norm": 5.666213035583496, "learning_rate": 4.59498728659644e-06, "loss": 0.2143, "step": 10000 }, { "epoch": 1.8180167090446786, "grad_norm": 10.074169158935547, "learning_rate": 4.549582273883037e-06, "loss": 0.3549, "step": 10010 }, { "epoch": 1.8198329095532146, "grad_norm": 22.636964797973633, "learning_rate": 4.504177261169633e-06, "loss": 0.3005, "step": 10020 }, { "epoch": 1.8216491100617507, "grad_norm": 21.29734992980957, "learning_rate": 4.45877224845623e-06, "loss": 0.3036, "step": 10030 }, { "epoch": 1.823465310570287, "grad_norm": 18.074115753173828, "learning_rate": 4.4133672357428265e-06, "loss": 0.2375, "step": 10040 }, { "epoch": 1.8252815110788232, "grad_norm": 13.343547821044922, "learning_rate": 4.367962223029423e-06, "loss": 0.3563, "step": 10050 }, { "epoch": 1.8270977115873592, "grad_norm": 15.384708404541016, "learning_rate": 4.322557210316019e-06, "loss": 0.3085, "step": 10060 }, { "epoch": 1.8289139120958953, "grad_norm": 22.345226287841797, "learning_rate": 4.277152197602616e-06, "loss": 0.2529, "step": 10070 }, { "epoch": 1.8307301126044315, "grad_norm": 3.8110063076019287, "learning_rate": 4.231747184889212e-06, "loss": 0.2276, "step": 10080 }, { "epoch": 1.8325463131129678, "grad_norm": 14.203871726989746, "learning_rate": 4.186342172175809e-06, "loss": 0.2813, "step": 10090 }, { "epoch": 1.8343625136215038, "grad_norm": 18.770957946777344, "learning_rate": 4.140937159462405e-06, "loss": 0.2534, "step": 10100 }, { "epoch": 1.8361787141300399, "grad_norm": 19.307645797729492, "learning_rate": 4.095532146749001e-06, "loss": 0.2505, "step": 10110 }, { "epoch": 1.8379949146385761, "grad_norm": 12.851605415344238, "learning_rate": 4.050127134035598e-06, "loss": 0.1797, "step": 10120 }, { "epoch": 1.8398111151471124, "grad_norm": 17.81605339050293, "learning_rate": 4.0047221213221944e-06, "loss": 0.1766, "step": 10130 }, { "epoch": 1.8416273156556484, "grad_norm": 5.046531677246094, "learning_rate": 3.959317108608791e-06, "loss": 0.2847, "step": 10140 }, { "epoch": 1.8434435161641844, "grad_norm": 15.07939338684082, "learning_rate": 3.913912095895387e-06, "loss": 0.2543, "step": 10150 }, { "epoch": 1.8452597166727207, "grad_norm": 5.4197187423706055, "learning_rate": 3.868507083181983e-06, "loss": 0.276, "step": 10160 }, { "epoch": 1.847075917181257, "grad_norm": 4.651303768157959, "learning_rate": 3.82310207046858e-06, "loss": 0.3026, "step": 10170 }, { "epoch": 1.848892117689793, "grad_norm": 5.725452423095703, "learning_rate": 3.7776970577551765e-06, "loss": 0.321, "step": 10180 }, { "epoch": 1.850708318198329, "grad_norm": 12.18204402923584, "learning_rate": 3.7322920450417728e-06, "loss": 0.2724, "step": 10190 }, { "epoch": 1.8525245187068653, "grad_norm": 3.6419947147369385, "learning_rate": 3.686887032328369e-06, "loss": 0.3451, "step": 10200 }, { "epoch": 1.8525245187068653, "eval_accuracy": 0.896636587366694, "eval_f1": 0.8908937354693052, "eval_loss": 0.31067386269569397, "eval_precision": 0.8844179738985345, "eval_recall": 0.899200938788393, "eval_runtime": 12.1663, "eval_samples_per_second": 100.195, "eval_steps_per_second": 6.329, "step": 10200 }, { "epoch": 1.8543407192154013, "grad_norm": 15.111452102661133, "learning_rate": 3.6414820196149653e-06, "loss": 0.1833, "step": 10210 }, { "epoch": 1.8561569197239374, "grad_norm": 18.825973510742188, "learning_rate": 3.5960770069015624e-06, "loss": 0.3835, "step": 10220 }, { "epoch": 1.8579731202324736, "grad_norm": 12.017671585083008, "learning_rate": 3.5506719941881586e-06, "loss": 0.2242, "step": 10230 }, { "epoch": 1.8597893207410099, "grad_norm": 24.258045196533203, "learning_rate": 3.505266981474755e-06, "loss": 0.4669, "step": 10240 }, { "epoch": 1.861605521249546, "grad_norm": 16.036376953125, "learning_rate": 3.4598619687613515e-06, "loss": 0.3115, "step": 10250 }, { "epoch": 1.863421721758082, "grad_norm": 25.399738311767578, "learning_rate": 3.414456956047948e-06, "loss": 0.3122, "step": 10260 }, { "epoch": 1.8652379222666182, "grad_norm": 20.493247985839844, "learning_rate": 3.369051943334544e-06, "loss": 0.3484, "step": 10270 }, { "epoch": 1.8670541227751545, "grad_norm": 16.851757049560547, "learning_rate": 3.323646930621141e-06, "loss": 0.3464, "step": 10280 }, { "epoch": 1.8688703232836905, "grad_norm": 5.93435001373291, "learning_rate": 3.2782419179077374e-06, "loss": 0.1495, "step": 10290 }, { "epoch": 1.8706865237922266, "grad_norm": 15.460413932800293, "learning_rate": 3.2328369051943336e-06, "loss": 0.2404, "step": 10300 }, { "epoch": 1.8725027243007628, "grad_norm": 14.957904815673828, "learning_rate": 3.18743189248093e-06, "loss": 0.2765, "step": 10310 }, { "epoch": 1.874318924809299, "grad_norm": 24.511220932006836, "learning_rate": 3.142026879767526e-06, "loss": 0.2619, "step": 10320 }, { "epoch": 1.876135125317835, "grad_norm": 15.439196586608887, "learning_rate": 3.096621867054123e-06, "loss": 0.2426, "step": 10330 }, { "epoch": 1.8779513258263711, "grad_norm": 18.046316146850586, "learning_rate": 3.0512168543407195e-06, "loss": 0.2727, "step": 10340 }, { "epoch": 1.8797675263349074, "grad_norm": 9.866218566894531, "learning_rate": 3.0058118416273157e-06, "loss": 0.2591, "step": 10350 }, { "epoch": 1.8815837268434437, "grad_norm": 15.493182182312012, "learning_rate": 2.960406828913912e-06, "loss": 0.2277, "step": 10360 }, { "epoch": 1.8833999273519797, "grad_norm": 13.912703514099121, "learning_rate": 2.9150018162005086e-06, "loss": 0.2501, "step": 10370 }, { "epoch": 1.8852161278605157, "grad_norm": 10.867436408996582, "learning_rate": 2.869596803487105e-06, "loss": 0.3008, "step": 10380 }, { "epoch": 1.887032328369052, "grad_norm": 18.993480682373047, "learning_rate": 2.8241917907737016e-06, "loss": 0.1959, "step": 10390 }, { "epoch": 1.8888485288775883, "grad_norm": 10.849798202514648, "learning_rate": 2.778786778060298e-06, "loss": 0.3108, "step": 10400 }, { "epoch": 1.890664729386124, "grad_norm": 22.30524253845215, "learning_rate": 2.7333817653468945e-06, "loss": 0.3187, "step": 10410 }, { "epoch": 1.8924809298946603, "grad_norm": 4.560070514678955, "learning_rate": 2.687976752633491e-06, "loss": 0.2388, "step": 10420 }, { "epoch": 1.8942971304031966, "grad_norm": 13.859076499938965, "learning_rate": 2.6425717399200874e-06, "loss": 0.2131, "step": 10430 }, { "epoch": 1.8961133309117326, "grad_norm": 4.655661582946777, "learning_rate": 2.5971667272066837e-06, "loss": 0.1688, "step": 10440 }, { "epoch": 1.8979295314202687, "grad_norm": 18.112701416015625, "learning_rate": 2.5517617144932803e-06, "loss": 0.2763, "step": 10450 }, { "epoch": 1.899745731928805, "grad_norm": 9.725381851196289, "learning_rate": 2.5063567017798766e-06, "loss": 0.2694, "step": 10460 }, { "epoch": 1.9015619324373412, "grad_norm": 21.33144760131836, "learning_rate": 2.4609516890664732e-06, "loss": 0.2813, "step": 10470 }, { "epoch": 1.9033781329458772, "grad_norm": 9.807076454162598, "learning_rate": 2.4155466763530695e-06, "loss": 0.2441, "step": 10480 }, { "epoch": 1.9051943334544132, "grad_norm": 5.741889476776123, "learning_rate": 2.3701416636396657e-06, "loss": 0.2222, "step": 10490 }, { "epoch": 1.9070105339629495, "grad_norm": 12.217042922973633, "learning_rate": 2.3247366509262624e-06, "loss": 0.333, "step": 10500 }, { "epoch": 1.9070105339629495, "eval_accuracy": 0.8917145200984413, "eval_f1": 0.8879221499881476, "eval_loss": 0.31236740946769714, "eval_precision": 0.880595048818372, "eval_recall": 0.8979764002075346, "eval_runtime": 12.1598, "eval_samples_per_second": 100.249, "eval_steps_per_second": 6.332, "step": 10500 }, { "epoch": 1.9088267344714858, "grad_norm": 7.902151584625244, "learning_rate": 2.2793316382128587e-06, "loss": 0.2326, "step": 10510 }, { "epoch": 1.9106429349800218, "grad_norm": 17.3931827545166, "learning_rate": 2.233926625499455e-06, "loss": 0.2766, "step": 10520 }, { "epoch": 1.9124591354885578, "grad_norm": 13.122883796691895, "learning_rate": 2.1885216127860516e-06, "loss": 0.2769, "step": 10530 }, { "epoch": 1.914275335997094, "grad_norm": 12.172006607055664, "learning_rate": 2.143116600072648e-06, "loss": 0.2228, "step": 10540 }, { "epoch": 1.9160915365056304, "grad_norm": 6.881805896759033, "learning_rate": 2.0977115873592445e-06, "loss": 0.1705, "step": 10550 }, { "epoch": 1.9179077370141664, "grad_norm": 19.267488479614258, "learning_rate": 2.052306574645841e-06, "loss": 0.3439, "step": 10560 }, { "epoch": 1.9197239375227024, "grad_norm": 4.804058074951172, "learning_rate": 2.0069015619324374e-06, "loss": 0.2191, "step": 10570 }, { "epoch": 1.9215401380312387, "grad_norm": 15.236076354980469, "learning_rate": 1.961496549219034e-06, "loss": 0.2319, "step": 10580 }, { "epoch": 1.923356338539775, "grad_norm": 17.033308029174805, "learning_rate": 1.9160915365056303e-06, "loss": 0.4269, "step": 10590 }, { "epoch": 1.925172539048311, "grad_norm": 15.480613708496094, "learning_rate": 1.870686523792227e-06, "loss": 0.281, "step": 10600 }, { "epoch": 1.926988739556847, "grad_norm": 18.749048233032227, "learning_rate": 1.8252815110788233e-06, "loss": 0.2042, "step": 10610 }, { "epoch": 1.9288049400653833, "grad_norm": 10.639968872070312, "learning_rate": 1.7798764983654195e-06, "loss": 0.3482, "step": 10620 }, { "epoch": 1.9306211405739193, "grad_norm": 19.38547706604004, "learning_rate": 1.7344714856520162e-06, "loss": 0.285, "step": 10630 }, { "epoch": 1.9324373410824554, "grad_norm": 3.3453238010406494, "learning_rate": 1.6890664729386124e-06, "loss": 0.2941, "step": 10640 }, { "epoch": 1.9342535415909916, "grad_norm": 16.062301635742188, "learning_rate": 1.643661460225209e-06, "loss": 0.3375, "step": 10650 }, { "epoch": 1.9360697420995279, "grad_norm": 15.955122947692871, "learning_rate": 1.5982564475118054e-06, "loss": 0.2047, "step": 10660 }, { "epoch": 1.937885942608064, "grad_norm": 25.678117752075195, "learning_rate": 1.5528514347984018e-06, "loss": 0.2348, "step": 10670 }, { "epoch": 1.9397021431166, "grad_norm": 13.082353591918945, "learning_rate": 1.5074464220849983e-06, "loss": 0.2699, "step": 10680 }, { "epoch": 1.9415183436251362, "grad_norm": 15.006932258605957, "learning_rate": 1.4620414093715947e-06, "loss": 0.2522, "step": 10690 }, { "epoch": 1.9433345441336725, "grad_norm": 18.136178970336914, "learning_rate": 1.4166363966581912e-06, "loss": 0.3754, "step": 10700 }, { "epoch": 1.9451507446422085, "grad_norm": 13.17072868347168, "learning_rate": 1.3712313839447874e-06, "loss": 0.2194, "step": 10710 }, { "epoch": 1.9469669451507445, "grad_norm": 16.255809783935547, "learning_rate": 1.325826371231384e-06, "loss": 0.3234, "step": 10720 }, { "epoch": 1.9487831456592808, "grad_norm": 30.35965347290039, "learning_rate": 1.2804213585179804e-06, "loss": 0.3173, "step": 10730 }, { "epoch": 1.950599346167817, "grad_norm": 15.700325965881348, "learning_rate": 1.235016345804577e-06, "loss": 0.2505, "step": 10740 }, { "epoch": 1.952415546676353, "grad_norm": 18.816146850585938, "learning_rate": 1.1896113330911733e-06, "loss": 0.3193, "step": 10750 }, { "epoch": 1.9542317471848891, "grad_norm": 16.41304588317871, "learning_rate": 1.1442063203777698e-06, "loss": 0.2486, "step": 10760 }, { "epoch": 1.9560479476934254, "grad_norm": 16.63722801208496, "learning_rate": 1.0988013076643662e-06, "loss": 0.237, "step": 10770 }, { "epoch": 1.9578641482019616, "grad_norm": 6.757064342498779, "learning_rate": 1.0533962949509627e-06, "loss": 0.2081, "step": 10780 }, { "epoch": 1.9596803487104977, "grad_norm": 3.97802734375, "learning_rate": 1.007991282237559e-06, "loss": 0.2566, "step": 10790 }, { "epoch": 1.9614965492190337, "grad_norm": 5.727281093597412, "learning_rate": 9.625862695241554e-07, "loss": 0.1921, "step": 10800 }, { "epoch": 1.9614965492190337, "eval_accuracy": 0.9007383100902379, "eval_f1": 0.8957933200060735, "eval_loss": 0.3023645579814911, "eval_precision": 0.8902412162565443, "eval_recall": 0.9027657103823287, "eval_runtime": 12.1671, "eval_samples_per_second": 100.188, "eval_steps_per_second": 6.329, "step": 10800 } ], "logging_steps": 10, "max_steps": 11012, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 600, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.701982939756626e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }