| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9614965492190337, | |
| "eval_steps": 300, | |
| "global_step": 10800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018162005085361425, | |
| "grad_norm": 51.7724723815918, | |
| "learning_rate": 4.99545949872866e-05, | |
| "loss": 4.15, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003632401017072285, | |
| "grad_norm": 39.101844787597656, | |
| "learning_rate": 4.9909189974573195e-05, | |
| "loss": 2.9203, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.005448601525608427, | |
| "grad_norm": 59.884159088134766, | |
| "learning_rate": 4.9863784961859795e-05, | |
| "loss": 2.3234, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00726480203414457, | |
| "grad_norm": 30.44273567199707, | |
| "learning_rate": 4.981837994914639e-05, | |
| "loss": 2.2648, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.009081002542680712, | |
| "grad_norm": 31.179187774658203, | |
| "learning_rate": 4.977297493643299e-05, | |
| "loss": 1.9688, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.010897203051216855, | |
| "grad_norm": 24.85585594177246, | |
| "learning_rate": 4.972756992371958e-05, | |
| "loss": 1.8813, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.012713403559752997, | |
| "grad_norm": 48.86698532104492, | |
| "learning_rate": 4.968216491100618e-05, | |
| "loss": 1.9812, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01452960406828914, | |
| "grad_norm": 51.080074310302734, | |
| "learning_rate": 4.963675989829277e-05, | |
| "loss": 1.8727, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.01634580457682528, | |
| "grad_norm": 26.04038429260254, | |
| "learning_rate": 4.959135488557937e-05, | |
| "loss": 1.6398, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.018162005085361425, | |
| "grad_norm": 24.692808151245117, | |
| "learning_rate": 4.9545949872865965e-05, | |
| "loss": 1.4898, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.019978205593897565, | |
| "grad_norm": 20.653032302856445, | |
| "learning_rate": 4.9500544860152565e-05, | |
| "loss": 1.177, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02179440610243371, | |
| "grad_norm": 37.319427490234375, | |
| "learning_rate": 4.945513984743916e-05, | |
| "loss": 1.4352, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02361060661096985, | |
| "grad_norm": 22.78856086730957, | |
| "learning_rate": 4.940973483472576e-05, | |
| "loss": 1.4187, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.025426807119505995, | |
| "grad_norm": 34.050315856933594, | |
| "learning_rate": 4.936432982201235e-05, | |
| "loss": 1.2316, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.027243007628042135, | |
| "grad_norm": 18.459930419921875, | |
| "learning_rate": 4.931892480929895e-05, | |
| "loss": 1.2225, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02905920813657828, | |
| "grad_norm": 18.474990844726562, | |
| "learning_rate": 4.927351979658554e-05, | |
| "loss": 1.1586, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03087540864511442, | |
| "grad_norm": 31.727245330810547, | |
| "learning_rate": 4.922811478387214e-05, | |
| "loss": 1.1371, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03269160915365056, | |
| "grad_norm": 33.16598129272461, | |
| "learning_rate": 4.9182709771158735e-05, | |
| "loss": 0.8711, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03450780966218671, | |
| "grad_norm": 35.099700927734375, | |
| "learning_rate": 4.9137304758445335e-05, | |
| "loss": 1.1367, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03632401017072285, | |
| "grad_norm": 20.833833694458008, | |
| "learning_rate": 4.909189974573193e-05, | |
| "loss": 1.0652, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03814021067925899, | |
| "grad_norm": 29.985502243041992, | |
| "learning_rate": 4.904649473301853e-05, | |
| "loss": 1.0373, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03995641118779513, | |
| "grad_norm": 18.238357543945312, | |
| "learning_rate": 4.900108972030513e-05, | |
| "loss": 0.8555, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04177261169633127, | |
| "grad_norm": 23.067848205566406, | |
| "learning_rate": 4.895568470759172e-05, | |
| "loss": 0.9469, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04358881220486742, | |
| "grad_norm": 32.689029693603516, | |
| "learning_rate": 4.891027969487832e-05, | |
| "loss": 0.9379, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04540501271340356, | |
| "grad_norm": 25.626976013183594, | |
| "learning_rate": 4.886487468216491e-05, | |
| "loss": 0.8861, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0472212132219397, | |
| "grad_norm": 13.55506706237793, | |
| "learning_rate": 4.881946966945151e-05, | |
| "loss": 0.8361, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04903741373047584, | |
| "grad_norm": 22.14944839477539, | |
| "learning_rate": 4.8774064656738104e-05, | |
| "loss": 0.7471, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05085361423901199, | |
| "grad_norm": 20.66185760498047, | |
| "learning_rate": 4.8728659644024704e-05, | |
| "loss": 0.815, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05266981474754813, | |
| "grad_norm": 22.343624114990234, | |
| "learning_rate": 4.86832546313113e-05, | |
| "loss": 0.6479, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.05448601525608427, | |
| "grad_norm": 23.354530334472656, | |
| "learning_rate": 4.86378496185979e-05, | |
| "loss": 0.6668, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05448601525608427, | |
| "eval_accuracy": 0.6562756357670222, | |
| "eval_f1": 0.6498431091967438, | |
| "eval_loss": 1.0104337930679321, | |
| "eval_precision": 0.6553569560524448, | |
| "eval_recall": 0.6624616383794661, | |
| "eval_runtime": 12.1348, | |
| "eval_samples_per_second": 100.455, | |
| "eval_steps_per_second": 6.345, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05630221576462041, | |
| "grad_norm": 19.208993911743164, | |
| "learning_rate": 4.859244460588449e-05, | |
| "loss": 0.9148, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.05811841627315656, | |
| "grad_norm": 41.95314025878906, | |
| "learning_rate": 4.854703959317109e-05, | |
| "loss": 0.7869, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0599346167816927, | |
| "grad_norm": 26.48455047607422, | |
| "learning_rate": 4.850163458045768e-05, | |
| "loss": 0.7795, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06175081729022884, | |
| "grad_norm": 17.859508514404297, | |
| "learning_rate": 4.845622956774428e-05, | |
| "loss": 0.8215, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.06356701779876499, | |
| "grad_norm": 30.228845596313477, | |
| "learning_rate": 4.8410824555030874e-05, | |
| "loss": 0.849, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06538321830730112, | |
| "grad_norm": 26.73940086364746, | |
| "learning_rate": 4.8365419542317474e-05, | |
| "loss": 0.7037, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.06719941881583727, | |
| "grad_norm": 28.508052825927734, | |
| "learning_rate": 4.832001452960407e-05, | |
| "loss": 0.9711, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.06901561932437342, | |
| "grad_norm": 26.7029972076416, | |
| "learning_rate": 4.827460951689067e-05, | |
| "loss": 0.8039, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07083181983290955, | |
| "grad_norm": 20.99094009399414, | |
| "learning_rate": 4.822920450417726e-05, | |
| "loss": 0.74, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0726480203414457, | |
| "grad_norm": 22.60647201538086, | |
| "learning_rate": 4.818379949146386e-05, | |
| "loss": 0.759, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07446422084998183, | |
| "grad_norm": 24.6385555267334, | |
| "learning_rate": 4.813839447875045e-05, | |
| "loss": 0.9516, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.07628042135851798, | |
| "grad_norm": 19.862504959106445, | |
| "learning_rate": 4.809298946603705e-05, | |
| "loss": 0.6793, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07809662186705413, | |
| "grad_norm": 20.13799476623535, | |
| "learning_rate": 4.804758445332365e-05, | |
| "loss": 0.6387, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07991282237559026, | |
| "grad_norm": 11.443085670471191, | |
| "learning_rate": 4.8002179440610244e-05, | |
| "loss": 0.6844, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08172902288412641, | |
| "grad_norm": 14.975491523742676, | |
| "learning_rate": 4.7956774427896844e-05, | |
| "loss": 0.6252, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08354522339266254, | |
| "grad_norm": 34.0562858581543, | |
| "learning_rate": 4.7911369415183437e-05, | |
| "loss": 0.841, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.08536142390119869, | |
| "grad_norm": 14.882052421569824, | |
| "learning_rate": 4.7865964402470036e-05, | |
| "loss": 0.6924, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.08717762440973484, | |
| "grad_norm": 10.927328109741211, | |
| "learning_rate": 4.782055938975663e-05, | |
| "loss": 0.6586, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08899382491827097, | |
| "grad_norm": 18.295116424560547, | |
| "learning_rate": 4.777515437704323e-05, | |
| "loss": 0.7256, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09081002542680712, | |
| "grad_norm": 38.24443435668945, | |
| "learning_rate": 4.772974936432982e-05, | |
| "loss": 0.7914, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09262622593534327, | |
| "grad_norm": 20.668012619018555, | |
| "learning_rate": 4.768434435161642e-05, | |
| "loss": 0.5446, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0944424264438794, | |
| "grad_norm": 17.62775421142578, | |
| "learning_rate": 4.7638939338903014e-05, | |
| "loss": 0.7426, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.09625862695241555, | |
| "grad_norm": 23.430341720581055, | |
| "learning_rate": 4.7593534326189614e-05, | |
| "loss": 0.7063, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.09807482746095168, | |
| "grad_norm": 30.422704696655273, | |
| "learning_rate": 4.7548129313476207e-05, | |
| "loss": 0.6363, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.09989102796948783, | |
| "grad_norm": 20.155187606811523, | |
| "learning_rate": 4.7502724300762806e-05, | |
| "loss": 0.7201, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.10170722847802398, | |
| "grad_norm": 28.10911750793457, | |
| "learning_rate": 4.74573192880494e-05, | |
| "loss": 0.7906, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.10352342898656011, | |
| "grad_norm": 25.283676147460938, | |
| "learning_rate": 4.7411914275336e-05, | |
| "loss": 0.5832, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.10533962949509626, | |
| "grad_norm": 9.627545356750488, | |
| "learning_rate": 4.736650926262259e-05, | |
| "loss": 0.5875, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.10715583000363241, | |
| "grad_norm": 12.515082359313965, | |
| "learning_rate": 4.732110424990919e-05, | |
| "loss": 0.7885, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.10897203051216854, | |
| "grad_norm": 16.580331802368164, | |
| "learning_rate": 4.7275699237195784e-05, | |
| "loss": 0.7129, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.10897203051216854, | |
| "eval_accuracy": 0.7235438884331419, | |
| "eval_f1": 0.7205989414762296, | |
| "eval_loss": 0.7799906134605408, | |
| "eval_precision": 0.730319610475981, | |
| "eval_recall": 0.7152200658078283, | |
| "eval_runtime": 12.0682, | |
| "eval_samples_per_second": 101.009, | |
| "eval_steps_per_second": 6.38, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11078823102070469, | |
| "grad_norm": 18.379596710205078, | |
| "learning_rate": 4.7230294224482384e-05, | |
| "loss": 0.5647, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.11260443152924082, | |
| "grad_norm": 23.94647789001465, | |
| "learning_rate": 4.7184889211768977e-05, | |
| "loss": 0.7223, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.11442063203777697, | |
| "grad_norm": 16.6752986907959, | |
| "learning_rate": 4.7139484199055576e-05, | |
| "loss": 0.5555, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.11623683254631312, | |
| "grad_norm": 19.58445930480957, | |
| "learning_rate": 4.7094079186342176e-05, | |
| "loss": 0.5701, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.11805303305484925, | |
| "grad_norm": 9.683353424072266, | |
| "learning_rate": 4.704867417362877e-05, | |
| "loss": 0.5934, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1198692335633854, | |
| "grad_norm": 16.68168830871582, | |
| "learning_rate": 4.700326916091537e-05, | |
| "loss": 0.6471, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.12168543407192153, | |
| "grad_norm": 16.534196853637695, | |
| "learning_rate": 4.695786414820196e-05, | |
| "loss": 0.7002, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.12350163458045768, | |
| "grad_norm": 12.310294151306152, | |
| "learning_rate": 4.691245913548856e-05, | |
| "loss": 0.6939, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.12531783508899383, | |
| "grad_norm": 24.0400333404541, | |
| "learning_rate": 4.6867054122775154e-05, | |
| "loss": 0.6732, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.12713403559752998, | |
| "grad_norm": 22.213275909423828, | |
| "learning_rate": 4.682164911006175e-05, | |
| "loss": 0.4664, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1289502361060661, | |
| "grad_norm": 41.8505973815918, | |
| "learning_rate": 4.6776244097348346e-05, | |
| "loss": 0.5713, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13076643661460224, | |
| "grad_norm": 39.898765563964844, | |
| "learning_rate": 4.6730839084634946e-05, | |
| "loss": 0.8389, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1325826371231384, | |
| "grad_norm": 74.52597045898438, | |
| "learning_rate": 4.668543407192154e-05, | |
| "loss": 0.7645, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.13439883763167454, | |
| "grad_norm": 15.932312965393066, | |
| "learning_rate": 4.664002905920814e-05, | |
| "loss": 0.8391, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.1362150381402107, | |
| "grad_norm": 18.114957809448242, | |
| "learning_rate": 4.659462404649473e-05, | |
| "loss": 0.7943, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.13803123864874683, | |
| "grad_norm": 16.521848678588867, | |
| "learning_rate": 4.654921903378133e-05, | |
| "loss": 0.5992, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.13984743915728295, | |
| "grad_norm": 27.91718101501465, | |
| "learning_rate": 4.6503814021067924e-05, | |
| "loss": 0.7793, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.1416636396658191, | |
| "grad_norm": 19.695844650268555, | |
| "learning_rate": 4.645840900835452e-05, | |
| "loss": 0.7436, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.14347984017435525, | |
| "grad_norm": 16.544538497924805, | |
| "learning_rate": 4.6413003995641116e-05, | |
| "loss": 0.6805, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.1452960406828914, | |
| "grad_norm": 18.75685691833496, | |
| "learning_rate": 4.6367598982927716e-05, | |
| "loss": 0.4687, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.14711224119142755, | |
| "grad_norm": 21.897932052612305, | |
| "learning_rate": 4.632219397021431e-05, | |
| "loss": 0.7238, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.14892844169996367, | |
| "grad_norm": 31.82994270324707, | |
| "learning_rate": 4.627678895750091e-05, | |
| "loss": 0.7512, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1507446422084998, | |
| "grad_norm": 15.97396183013916, | |
| "learning_rate": 4.62313839447875e-05, | |
| "loss": 0.6934, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.15256084271703596, | |
| "grad_norm": 20.96219253540039, | |
| "learning_rate": 4.61859789320741e-05, | |
| "loss": 0.5324, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.1543770432255721, | |
| "grad_norm": 19.114473342895508, | |
| "learning_rate": 4.61405739193607e-05, | |
| "loss": 0.508, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.15619324373410826, | |
| "grad_norm": 9.712385177612305, | |
| "learning_rate": 4.609516890664729e-05, | |
| "loss": 0.5295, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.15800944424264438, | |
| "grad_norm": 13.762930870056152, | |
| "learning_rate": 4.604976389393389e-05, | |
| "loss": 0.7455, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.15982564475118052, | |
| "grad_norm": 27.86884307861328, | |
| "learning_rate": 4.6004358881220486e-05, | |
| "loss": 0.5587, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.16164184525971667, | |
| "grad_norm": 26.105749130249023, | |
| "learning_rate": 4.5958953868507085e-05, | |
| "loss": 0.5981, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.16345804576825282, | |
| "grad_norm": 26.51416015625, | |
| "learning_rate": 4.591354885579368e-05, | |
| "loss": 0.5396, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16345804576825282, | |
| "eval_accuracy": 0.7424118129614438, | |
| "eval_f1": 0.7311388146519514, | |
| "eval_loss": 0.7538678050041199, | |
| "eval_precision": 0.7369680243126087, | |
| "eval_recall": 0.7387445231992035, | |
| "eval_runtime": 12.1028, | |
| "eval_samples_per_second": 100.721, | |
| "eval_steps_per_second": 6.362, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16527424627678897, | |
| "grad_norm": 22.03255844116211, | |
| "learning_rate": 4.586814384308028e-05, | |
| "loss": 0.6057, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1670904467853251, | |
| "grad_norm": 22.521772384643555, | |
| "learning_rate": 4.582273883036687e-05, | |
| "loss": 0.7402, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.16890664729386123, | |
| "grad_norm": 14.045843124389648, | |
| "learning_rate": 4.577733381765347e-05, | |
| "loss": 0.5914, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.17072284780239738, | |
| "grad_norm": 11.75537395477295, | |
| "learning_rate": 4.573192880494006e-05, | |
| "loss": 0.5954, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.17253904831093353, | |
| "grad_norm": 25.500017166137695, | |
| "learning_rate": 4.568652379222666e-05, | |
| "loss": 0.6219, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.17435524881946968, | |
| "grad_norm": 15.509596824645996, | |
| "learning_rate": 4.5641118779513256e-05, | |
| "loss": 0.5603, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.17617144932800582, | |
| "grad_norm": 20.986408233642578, | |
| "learning_rate": 4.5595713766799855e-05, | |
| "loss": 0.6184, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.17798764983654194, | |
| "grad_norm": 12.335230827331543, | |
| "learning_rate": 4.555030875408645e-05, | |
| "loss": 0.7159, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1798038503450781, | |
| "grad_norm": 14.523093223571777, | |
| "learning_rate": 4.550490374137305e-05, | |
| "loss": 0.5396, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.18162005085361424, | |
| "grad_norm": 18.180063247680664, | |
| "learning_rate": 4.545949872865964e-05, | |
| "loss": 0.4772, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1834362513621504, | |
| "grad_norm": 11.765399932861328, | |
| "learning_rate": 4.541409371594624e-05, | |
| "loss": 0.5037, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.18525245187068654, | |
| "grad_norm": 14.669034957885742, | |
| "learning_rate": 4.536868870323284e-05, | |
| "loss": 0.7092, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.18706865237922266, | |
| "grad_norm": 22.75113296508789, | |
| "learning_rate": 4.532328369051943e-05, | |
| "loss": 0.6244, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1888848528877588, | |
| "grad_norm": 15.64301872253418, | |
| "learning_rate": 4.527787867780603e-05, | |
| "loss": 0.6145, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.19070105339629495, | |
| "grad_norm": 25.85451316833496, | |
| "learning_rate": 4.5232473665092625e-05, | |
| "loss": 0.6305, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1925172539048311, | |
| "grad_norm": 11.775480270385742, | |
| "learning_rate": 4.5187068652379225e-05, | |
| "loss": 0.5969, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.19433345441336725, | |
| "grad_norm": 15.521839141845703, | |
| "learning_rate": 4.514166363966582e-05, | |
| "loss": 0.587, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.19614965492190337, | |
| "grad_norm": 13.683486938476562, | |
| "learning_rate": 4.509625862695242e-05, | |
| "loss": 0.5607, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1979658554304395, | |
| "grad_norm": 20.192411422729492, | |
| "learning_rate": 4.505085361423901e-05, | |
| "loss": 0.6512, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.19978205593897566, | |
| "grad_norm": 10.552505493164062, | |
| "learning_rate": 4.500544860152561e-05, | |
| "loss": 0.4988, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2015982564475118, | |
| "grad_norm": 20.700984954833984, | |
| "learning_rate": 4.49600435888122e-05, | |
| "loss": 0.6752, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.20341445695604796, | |
| "grad_norm": 11.448249816894531, | |
| "learning_rate": 4.49146385760988e-05, | |
| "loss": 0.6486, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.20523065746458408, | |
| "grad_norm": 12.439767837524414, | |
| "learning_rate": 4.48692335633854e-05, | |
| "loss": 0.6803, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.20704685797312022, | |
| "grad_norm": 21.695728302001953, | |
| "learning_rate": 4.4823828550671995e-05, | |
| "loss": 0.6166, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.20886305848165637, | |
| "grad_norm": 19.776832580566406, | |
| "learning_rate": 4.4778423537958595e-05, | |
| "loss": 0.5869, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.21067925899019252, | |
| "grad_norm": 19.56122589111328, | |
| "learning_rate": 4.473301852524519e-05, | |
| "loss": 0.6789, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.21249545949872867, | |
| "grad_norm": 20.19476318359375, | |
| "learning_rate": 4.468761351253179e-05, | |
| "loss": 0.6135, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.21431166000726481, | |
| "grad_norm": 15.969949722290039, | |
| "learning_rate": 4.464220849981838e-05, | |
| "loss": 0.5218, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.21612786051580093, | |
| "grad_norm": 13.37980842590332, | |
| "learning_rate": 4.459680348710498e-05, | |
| "loss": 0.5176, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.21794406102433708, | |
| "grad_norm": 7.233293056488037, | |
| "learning_rate": 4.455139847439157e-05, | |
| "loss": 0.3802, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.21794406102433708, | |
| "eval_accuracy": 0.7202625102543068, | |
| "eval_f1": 0.722200966558384, | |
| "eval_loss": 0.8283492922782898, | |
| "eval_precision": 0.7215506981816255, | |
| "eval_recall": 0.7526848662720801, | |
| "eval_runtime": 12.054, | |
| "eval_samples_per_second": 101.128, | |
| "eval_steps_per_second": 6.388, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.21976026153287323, | |
| "grad_norm": 14.58340835571289, | |
| "learning_rate": 4.450599346167817e-05, | |
| "loss": 0.7496, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.22157646204140938, | |
| "grad_norm": 16.27931785583496, | |
| "learning_rate": 4.4460588448964765e-05, | |
| "loss": 0.6172, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.22339266254994553, | |
| "grad_norm": 19.69707679748535, | |
| "learning_rate": 4.4415183436251365e-05, | |
| "loss": 0.6921, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.22520886305848165, | |
| "grad_norm": 13.30395793914795, | |
| "learning_rate": 4.4369778423537964e-05, | |
| "loss": 0.6034, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2270250635670178, | |
| "grad_norm": 13.300093650817871, | |
| "learning_rate": 4.432437341082456e-05, | |
| "loss": 0.503, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.22884126407555394, | |
| "grad_norm": 14.82442855834961, | |
| "learning_rate": 4.427896839811116e-05, | |
| "loss": 0.7216, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.2306574645840901, | |
| "grad_norm": 29.971027374267578, | |
| "learning_rate": 4.423356338539775e-05, | |
| "loss": 0.6413, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.23247366509262624, | |
| "grad_norm": 24.70488166809082, | |
| "learning_rate": 4.418815837268435e-05, | |
| "loss": 0.5686, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.23428986560116236, | |
| "grad_norm": 18.32679557800293, | |
| "learning_rate": 4.414275335997094e-05, | |
| "loss": 0.5744, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2361060661096985, | |
| "grad_norm": 16.468469619750977, | |
| "learning_rate": 4.409734834725754e-05, | |
| "loss": 0.5514, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.23792226661823465, | |
| "grad_norm": 14.710607528686523, | |
| "learning_rate": 4.4051943334544135e-05, | |
| "loss": 0.5871, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.2397384671267708, | |
| "grad_norm": 15.89440631866455, | |
| "learning_rate": 4.4006538321830734e-05, | |
| "loss": 0.6898, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.24155466763530695, | |
| "grad_norm": 18.804264068603516, | |
| "learning_rate": 4.396113330911733e-05, | |
| "loss": 0.617, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.24337086814384307, | |
| "grad_norm": 17.780223846435547, | |
| "learning_rate": 4.391572829640393e-05, | |
| "loss": 0.5986, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.24518706865237921, | |
| "grad_norm": 18.2768611907959, | |
| "learning_rate": 4.3870323283690526e-05, | |
| "loss": 0.498, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.24700326916091536, | |
| "grad_norm": 20.490026473999023, | |
| "learning_rate": 4.382491827097712e-05, | |
| "loss": 0.5705, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.2488194696694515, | |
| "grad_norm": 14.712557792663574, | |
| "learning_rate": 4.377951325826372e-05, | |
| "loss": 0.5586, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.25063567017798766, | |
| "grad_norm": 15.078400611877441, | |
| "learning_rate": 4.373410824555031e-05, | |
| "loss": 0.7252, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.2524518706865238, | |
| "grad_norm": 16.71666717529297, | |
| "learning_rate": 4.368870323283691e-05, | |
| "loss": 0.4957, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.25426807119505995, | |
| "grad_norm": 21.710941314697266, | |
| "learning_rate": 4.3643298220123504e-05, | |
| "loss": 0.5567, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2560842717035961, | |
| "grad_norm": 13.499922752380371, | |
| "learning_rate": 4.3597893207410104e-05, | |
| "loss": 0.6141, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.2579004722121322, | |
| "grad_norm": 22.025402069091797, | |
| "learning_rate": 4.35524881946967e-05, | |
| "loss": 0.7273, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.25971667272066834, | |
| "grad_norm": 17.700258255004883, | |
| "learning_rate": 4.3507083181983296e-05, | |
| "loss": 0.5406, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.2615328732292045, | |
| "grad_norm": 32.67435073852539, | |
| "learning_rate": 4.346167816926989e-05, | |
| "loss": 0.5677, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.26334907373774064, | |
| "grad_norm": 15.318320274353027, | |
| "learning_rate": 4.341627315655649e-05, | |
| "loss": 0.6266, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2651652742462768, | |
| "grad_norm": 27.953414916992188, | |
| "learning_rate": 4.337086814384308e-05, | |
| "loss": 0.5541, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.26698147475481293, | |
| "grad_norm": 12.882086753845215, | |
| "learning_rate": 4.332546313112968e-05, | |
| "loss": 0.5984, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.2687976752633491, | |
| "grad_norm": 17.85333824157715, | |
| "learning_rate": 4.328005811841628e-05, | |
| "loss": 0.5021, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2706138757718852, | |
| "grad_norm": 11.902690887451172, | |
| "learning_rate": 4.3234653105702874e-05, | |
| "loss": 0.5995, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.2724300762804214, | |
| "grad_norm": 8.324485778808594, | |
| "learning_rate": 4.3189248092989474e-05, | |
| "loss": 0.5337, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2724300762804214, | |
| "eval_accuracy": 0.7637407711238721, | |
| "eval_f1": 0.7516135071707333, | |
| "eval_loss": 0.6853081583976746, | |
| "eval_precision": 0.7710051578059205, | |
| "eval_recall": 0.7442999398826009, | |
| "eval_runtime": 12.0321, | |
| "eval_samples_per_second": 101.312, | |
| "eval_steps_per_second": 6.4, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2742462767889575, | |
| "grad_norm": 17.059507369995117, | |
| "learning_rate": 4.3143843080276066e-05, | |
| "loss": 0.693, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.27606247729749367, | |
| "grad_norm": 20.31708526611328, | |
| "learning_rate": 4.3098438067562666e-05, | |
| "loss": 0.5956, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.27787867780602976, | |
| "grad_norm": 20.84437370300293, | |
| "learning_rate": 4.305303305484926e-05, | |
| "loss": 0.6239, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.2796948783145659, | |
| "grad_norm": 22.729970932006836, | |
| "learning_rate": 4.300762804213586e-05, | |
| "loss": 0.5901, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.28151107882310206, | |
| "grad_norm": 14.622097969055176, | |
| "learning_rate": 4.296222302942245e-05, | |
| "loss": 0.4957, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.2833272793316382, | |
| "grad_norm": 18.853378295898438, | |
| "learning_rate": 4.291681801670905e-05, | |
| "loss": 0.544, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.28514347984017435, | |
| "grad_norm": 20.18765640258789, | |
| "learning_rate": 4.2871413003995644e-05, | |
| "loss": 0.7035, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.2869596803487105, | |
| "grad_norm": 15.882144927978516, | |
| "learning_rate": 4.2826007991282244e-05, | |
| "loss": 0.5633, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.28877588085724665, | |
| "grad_norm": 14.607442855834961, | |
| "learning_rate": 4.2780602978568836e-05, | |
| "loss": 0.6074, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2905920813657828, | |
| "grad_norm": 17.136274337768555, | |
| "learning_rate": 4.2735197965855436e-05, | |
| "loss": 0.5523, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.29240828187431894, | |
| "grad_norm": 7.1735429763793945, | |
| "learning_rate": 4.268979295314203e-05, | |
| "loss": 0.6283, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2942244823828551, | |
| "grad_norm": 11.038073539733887, | |
| "learning_rate": 4.264438794042863e-05, | |
| "loss": 0.6132, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.2960406828913912, | |
| "grad_norm": 15.272370338439941, | |
| "learning_rate": 4.259898292771522e-05, | |
| "loss": 0.4732, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.29785688339992733, | |
| "grad_norm": 23.69139289855957, | |
| "learning_rate": 4.255357791500182e-05, | |
| "loss": 0.5788, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.2996730839084635, | |
| "grad_norm": 16.922348022460938, | |
| "learning_rate": 4.2508172902288414e-05, | |
| "loss": 0.5357, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3014892844169996, | |
| "grad_norm": 16.82611656188965, | |
| "learning_rate": 4.2462767889575014e-05, | |
| "loss": 0.5324, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3033054849255358, | |
| "grad_norm": 6.3888773918151855, | |
| "learning_rate": 4.2417362876861606e-05, | |
| "loss": 0.4909, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.3051216854340719, | |
| "grad_norm": 11.367060661315918, | |
| "learning_rate": 4.2371957864148206e-05, | |
| "loss": 0.3793, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.30693788594260807, | |
| "grad_norm": 13.870577812194824, | |
| "learning_rate": 4.2326552851434806e-05, | |
| "loss": 0.5448, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.3087540864511442, | |
| "grad_norm": 18.472719192504883, | |
| "learning_rate": 4.22811478387214e-05, | |
| "loss": 0.5681, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.31057028695968036, | |
| "grad_norm": 10.446008682250977, | |
| "learning_rate": 4.2235742826008e-05, | |
| "loss": 0.496, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.3123864874682165, | |
| "grad_norm": 13.921605110168457, | |
| "learning_rate": 4.219033781329459e-05, | |
| "loss": 0.5314, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.31420268797675266, | |
| "grad_norm": 17.571805953979492, | |
| "learning_rate": 4.214493280058119e-05, | |
| "loss": 0.6498, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.31601888848528875, | |
| "grad_norm": 21.902027130126953, | |
| "learning_rate": 4.2099527787867784e-05, | |
| "loss": 0.5822, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.3178350889938249, | |
| "grad_norm": 20.000957489013672, | |
| "learning_rate": 4.205412277515438e-05, | |
| "loss": 0.5377, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.31965128950236105, | |
| "grad_norm": 12.021200180053711, | |
| "learning_rate": 4.2008717762440976e-05, | |
| "loss": 0.6164, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.3214674900108972, | |
| "grad_norm": 24.00454330444336, | |
| "learning_rate": 4.1963312749727576e-05, | |
| "loss": 0.6348, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.32328369051943334, | |
| "grad_norm": 14.13219928741455, | |
| "learning_rate": 4.191790773701417e-05, | |
| "loss": 0.5242, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.3250998910279695, | |
| "grad_norm": 32.07684326171875, | |
| "learning_rate": 4.187250272430077e-05, | |
| "loss": 0.8162, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.32691609153650564, | |
| "grad_norm": 16.062604904174805, | |
| "learning_rate": 4.182709771158736e-05, | |
| "loss": 0.568, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.32691609153650564, | |
| "eval_accuracy": 0.7793273174733388, | |
| "eval_f1": 0.7716766196492151, | |
| "eval_loss": 0.6542023420333862, | |
| "eval_precision": 0.7704320356934777, | |
| "eval_recall": 0.7814793806758547, | |
| "eval_runtime": 12.0427, | |
| "eval_samples_per_second": 101.223, | |
| "eval_steps_per_second": 6.394, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3287322920450418, | |
| "grad_norm": 14.167701721191406, | |
| "learning_rate": 4.178169269887396e-05, | |
| "loss": 0.502, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.33054849255357793, | |
| "grad_norm": 16.717453002929688, | |
| "learning_rate": 4.1736287686160554e-05, | |
| "loss": 0.4441, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.3323646930621141, | |
| "grad_norm": 24.774871826171875, | |
| "learning_rate": 4.169088267344715e-05, | |
| "loss": 0.5017, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.3341808935706502, | |
| "grad_norm": 12.445333480834961, | |
| "learning_rate": 4.1645477660733746e-05, | |
| "loss": 0.4088, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.3359970940791863, | |
| "grad_norm": 18.43295669555664, | |
| "learning_rate": 4.1600072648020346e-05, | |
| "loss": 0.6032, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.33781329458772247, | |
| "grad_norm": 26.735172271728516, | |
| "learning_rate": 4.155466763530694e-05, | |
| "loss": 0.5611, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.3396294950962586, | |
| "grad_norm": 18.15043067932129, | |
| "learning_rate": 4.150926262259354e-05, | |
| "loss": 0.4716, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.34144569560479476, | |
| "grad_norm": 18.67064094543457, | |
| "learning_rate": 4.146385760988013e-05, | |
| "loss": 0.5643, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.3432618961133309, | |
| "grad_norm": 21.009254455566406, | |
| "learning_rate": 4.141845259716673e-05, | |
| "loss": 0.6117, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.34507809662186706, | |
| "grad_norm": 10.891605377197266, | |
| "learning_rate": 4.137304758445333e-05, | |
| "loss": 0.5336, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3468942971304032, | |
| "grad_norm": 6.9248504638671875, | |
| "learning_rate": 4.132764257173992e-05, | |
| "loss": 0.5683, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.34871049763893935, | |
| "grad_norm": 20.700204849243164, | |
| "learning_rate": 4.128223755902652e-05, | |
| "loss": 0.5869, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.3505266981474755, | |
| "grad_norm": 17.678829193115234, | |
| "learning_rate": 4.1236832546313116e-05, | |
| "loss": 0.5081, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.35234289865601165, | |
| "grad_norm": 19.98926544189453, | |
| "learning_rate": 4.1191427533599715e-05, | |
| "loss": 0.4912, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.35415909916454774, | |
| "grad_norm": 15.056520462036133, | |
| "learning_rate": 4.114602252088631e-05, | |
| "loss": 0.5509, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3559752996730839, | |
| "grad_norm": 19.50244903564453, | |
| "learning_rate": 4.110061750817291e-05, | |
| "loss": 0.5181, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.35779150018162004, | |
| "grad_norm": 20.018245697021484, | |
| "learning_rate": 4.10552124954595e-05, | |
| "loss": 0.6597, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.3596077006901562, | |
| "grad_norm": 18.31260108947754, | |
| "learning_rate": 4.10098074827461e-05, | |
| "loss": 0.7217, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.36142390119869233, | |
| "grad_norm": 15.864529609680176, | |
| "learning_rate": 4.096440247003269e-05, | |
| "loss": 0.3586, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.3632401017072285, | |
| "grad_norm": 9.000946998596191, | |
| "learning_rate": 4.091899745731929e-05, | |
| "loss": 0.5125, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3650563022157646, | |
| "grad_norm": 4.08746337890625, | |
| "learning_rate": 4.0873592444605886e-05, | |
| "loss": 0.5204, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3668725027243008, | |
| "grad_norm": 7.909645080566406, | |
| "learning_rate": 4.0828187431892485e-05, | |
| "loss": 0.3989, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.3686887032328369, | |
| "grad_norm": 17.512380599975586, | |
| "learning_rate": 4.078278241917908e-05, | |
| "loss": 0.5338, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.37050490374137307, | |
| "grad_norm": 22.659942626953125, | |
| "learning_rate": 4.073737740646568e-05, | |
| "loss": 0.5221, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.37232110424990916, | |
| "grad_norm": 8.159212112426758, | |
| "learning_rate": 4.069197239375227e-05, | |
| "loss": 0.5502, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.3741373047584453, | |
| "grad_norm": 20.236705780029297, | |
| "learning_rate": 4.064656738103887e-05, | |
| "loss": 0.4267, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.37595350526698146, | |
| "grad_norm": 16.24159049987793, | |
| "learning_rate": 4.060116236832546e-05, | |
| "loss": 0.5693, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.3777697057755176, | |
| "grad_norm": 14.921638488769531, | |
| "learning_rate": 4.055575735561206e-05, | |
| "loss": 0.5152, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.37958590628405375, | |
| "grad_norm": 18.668865203857422, | |
| "learning_rate": 4.0510352342898656e-05, | |
| "loss": 0.6135, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.3814021067925899, | |
| "grad_norm": 23.298078536987305, | |
| "learning_rate": 4.0464947330185255e-05, | |
| "loss": 0.5237, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3814021067925899, | |
| "eval_accuracy": 0.7752255947497949, | |
| "eval_f1": 0.7694936459460316, | |
| "eval_loss": 0.629031240940094, | |
| "eval_precision": 0.7649651818415721, | |
| "eval_recall": 0.7791450088077798, | |
| "eval_runtime": 12.0845, | |
| "eval_samples_per_second": 100.873, | |
| "eval_steps_per_second": 6.372, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.38321830730112605, | |
| "grad_norm": 14.145480155944824, | |
| "learning_rate": 4.0419542317471855e-05, | |
| "loss": 0.4478, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.3850345078096622, | |
| "grad_norm": 21.260257720947266, | |
| "learning_rate": 4.037413730475845e-05, | |
| "loss": 0.4185, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.38685070831819834, | |
| "grad_norm": 9.28508186340332, | |
| "learning_rate": 4.032873229204505e-05, | |
| "loss": 0.6288, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.3886669088267345, | |
| "grad_norm": 12.114027976989746, | |
| "learning_rate": 4.028332727933164e-05, | |
| "loss": 0.5159, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.39048310933527064, | |
| "grad_norm": 27.268280029296875, | |
| "learning_rate": 4.023792226661824e-05, | |
| "loss": 0.6385, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.39229930984380673, | |
| "grad_norm": 15.575640678405762, | |
| "learning_rate": 4.019251725390483e-05, | |
| "loss": 0.5996, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.3941155103523429, | |
| "grad_norm": 15.587044715881348, | |
| "learning_rate": 4.014711224119143e-05, | |
| "loss": 0.4721, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.395931710860879, | |
| "grad_norm": 15.621984481811523, | |
| "learning_rate": 4.0101707228478025e-05, | |
| "loss": 0.49, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3977479113694152, | |
| "grad_norm": 10.312201499938965, | |
| "learning_rate": 4.0056302215764625e-05, | |
| "loss": 0.5224, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.3995641118779513, | |
| "grad_norm": 17.903989791870117, | |
| "learning_rate": 4.001089720305122e-05, | |
| "loss": 0.4217, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.40138031238648747, | |
| "grad_norm": 15.996349334716797, | |
| "learning_rate": 3.996549219033782e-05, | |
| "loss": 0.4906, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.4031965128950236, | |
| "grad_norm": 21.02739715576172, | |
| "learning_rate": 3.992008717762441e-05, | |
| "loss": 0.6814, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.40501271340355977, | |
| "grad_norm": 21.1398868560791, | |
| "learning_rate": 3.987468216491101e-05, | |
| "loss": 0.3679, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.4068289139120959, | |
| "grad_norm": 24.41451072692871, | |
| "learning_rate": 3.98292771521976e-05, | |
| "loss": 0.4802, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.40864511442063206, | |
| "grad_norm": 22.847251892089844, | |
| "learning_rate": 3.97838721394842e-05, | |
| "loss": 0.5097, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.41046131492916815, | |
| "grad_norm": 14.888809204101562, | |
| "learning_rate": 3.9738467126770795e-05, | |
| "loss": 0.4294, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.4122775154377043, | |
| "grad_norm": 20.353588104248047, | |
| "learning_rate": 3.9693062114057395e-05, | |
| "loss": 0.4949, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.41409371594624045, | |
| "grad_norm": 11.559284210205078, | |
| "learning_rate": 3.964765710134399e-05, | |
| "loss": 0.4353, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.4159099164547766, | |
| "grad_norm": 12.643139839172363, | |
| "learning_rate": 3.960225208863059e-05, | |
| "loss": 0.3885, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.41772611696331274, | |
| "grad_norm": 4.294188022613525, | |
| "learning_rate": 3.955684707591718e-05, | |
| "loss": 0.4166, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4195423174718489, | |
| "grad_norm": 17.501489639282227, | |
| "learning_rate": 3.951144206320378e-05, | |
| "loss": 0.4092, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.42135851798038504, | |
| "grad_norm": 9.189852714538574, | |
| "learning_rate": 3.946603705049038e-05, | |
| "loss": 0.5333, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.4231747184889212, | |
| "grad_norm": 19.29057502746582, | |
| "learning_rate": 3.942063203777697e-05, | |
| "loss": 0.6406, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.42499091899745733, | |
| "grad_norm": 15.98727035522461, | |
| "learning_rate": 3.937522702506357e-05, | |
| "loss": 0.4677, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.4268071195059935, | |
| "grad_norm": 9.902159690856934, | |
| "learning_rate": 3.9329822012350165e-05, | |
| "loss": 0.5894, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.42862332001452963, | |
| "grad_norm": 18.965747833251953, | |
| "learning_rate": 3.9284416999636764e-05, | |
| "loss": 0.6196, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.4304395205230657, | |
| "grad_norm": 18.899520874023438, | |
| "learning_rate": 3.923901198692336e-05, | |
| "loss": 0.5199, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.43225572103160187, | |
| "grad_norm": 12.890677452087402, | |
| "learning_rate": 3.919360697420996e-05, | |
| "loss": 0.583, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.434071921540138, | |
| "grad_norm": 17.285070419311523, | |
| "learning_rate": 3.914820196149655e-05, | |
| "loss": 0.4777, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.43588812204867416, | |
| "grad_norm": 11.499088287353516, | |
| "learning_rate": 3.910279694878315e-05, | |
| "loss": 0.4478, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.43588812204867416, | |
| "eval_accuracy": 0.7908121410992617, | |
| "eval_f1": 0.7894885784356462, | |
| "eval_loss": 0.6197062134742737, | |
| "eval_precision": 0.778593519395511, | |
| "eval_recall": 0.8067083793113867, | |
| "eval_runtime": 12.0738, | |
| "eval_samples_per_second": 100.963, | |
| "eval_steps_per_second": 6.377, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4377043225572103, | |
| "grad_norm": 11.969683647155762, | |
| "learning_rate": 3.905739193606974e-05, | |
| "loss": 0.4575, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.43952052306574646, | |
| "grad_norm": 16.60710906982422, | |
| "learning_rate": 3.901198692335634e-05, | |
| "loss": 0.6813, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.4413367235742826, | |
| "grad_norm": 7.043119430541992, | |
| "learning_rate": 3.8966581910642935e-05, | |
| "loss": 0.5086, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.44315292408281876, | |
| "grad_norm": 24.139657974243164, | |
| "learning_rate": 3.8921176897929534e-05, | |
| "loss": 0.6707, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.4449691245913549, | |
| "grad_norm": 10.262349128723145, | |
| "learning_rate": 3.887577188521613e-05, | |
| "loss": 0.4105, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.44678532509989105, | |
| "grad_norm": 18.123046875, | |
| "learning_rate": 3.883036687250273e-05, | |
| "loss": 0.5051, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.44860152560842714, | |
| "grad_norm": 14.30826187133789, | |
| "learning_rate": 3.878496185978932e-05, | |
| "loss": 0.4586, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.4504177261169633, | |
| "grad_norm": 32.13856506347656, | |
| "learning_rate": 3.873955684707592e-05, | |
| "loss": 0.4564, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.45223392662549944, | |
| "grad_norm": 21.877262115478516, | |
| "learning_rate": 3.869415183436251e-05, | |
| "loss": 0.5316, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.4540501271340356, | |
| "grad_norm": 15.986939430236816, | |
| "learning_rate": 3.864874682164911e-05, | |
| "loss": 0.4488, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.45586632764257173, | |
| "grad_norm": 15.963953018188477, | |
| "learning_rate": 3.8603341808935705e-05, | |
| "loss": 0.6677, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.4576825281511079, | |
| "grad_norm": 29.52568244934082, | |
| "learning_rate": 3.8557936796222304e-05, | |
| "loss": 0.5992, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.45949872865964403, | |
| "grad_norm": 18.61089324951172, | |
| "learning_rate": 3.8512531783508904e-05, | |
| "loss": 0.5565, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.4613149291681802, | |
| "grad_norm": 23.38523292541504, | |
| "learning_rate": 3.84671267707955e-05, | |
| "loss": 0.5018, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.4631311296767163, | |
| "grad_norm": 20.95744514465332, | |
| "learning_rate": 3.8421721758082097e-05, | |
| "loss": 0.5758, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.46494733018525247, | |
| "grad_norm": 11.371984481811523, | |
| "learning_rate": 3.837631674536869e-05, | |
| "loss": 0.5553, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.4667635306937886, | |
| "grad_norm": 21.71943473815918, | |
| "learning_rate": 3.833091173265529e-05, | |
| "loss": 0.5777, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.4685797312023247, | |
| "grad_norm": 18.1218318939209, | |
| "learning_rate": 3.828550671994188e-05, | |
| "loss": 0.5863, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.47039593171086086, | |
| "grad_norm": 19.874448776245117, | |
| "learning_rate": 3.824010170722848e-05, | |
| "loss": 0.5183, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.472212132219397, | |
| "grad_norm": 19.063386917114258, | |
| "learning_rate": 3.8194696694515074e-05, | |
| "loss": 0.5381, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.47402833272793315, | |
| "grad_norm": 9.372021675109863, | |
| "learning_rate": 3.8149291681801674e-05, | |
| "loss": 0.433, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.4758445332364693, | |
| "grad_norm": 14.822279930114746, | |
| "learning_rate": 3.810388666908827e-05, | |
| "loss": 0.5518, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.47766073374500545, | |
| "grad_norm": 13.066219329833984, | |
| "learning_rate": 3.8058481656374867e-05, | |
| "loss": 0.4946, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.4794769342535416, | |
| "grad_norm": 18.113737106323242, | |
| "learning_rate": 3.801307664366146e-05, | |
| "loss": 0.4824, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.48129313476207775, | |
| "grad_norm": 10.73379898071289, | |
| "learning_rate": 3.796767163094806e-05, | |
| "loss": 0.5541, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.4831093352706139, | |
| "grad_norm": 25.9276065826416, | |
| "learning_rate": 3.792226661823465e-05, | |
| "loss": 0.5404, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.48492553577915004, | |
| "grad_norm": 20.394275665283203, | |
| "learning_rate": 3.787686160552125e-05, | |
| "loss": 0.5106, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.48674173628768613, | |
| "grad_norm": 23.263164520263672, | |
| "learning_rate": 3.7831456592807844e-05, | |
| "loss": 0.5293, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.4885579367962223, | |
| "grad_norm": 13.967432975769043, | |
| "learning_rate": 3.7786051580094444e-05, | |
| "loss": 0.4886, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.49037413730475843, | |
| "grad_norm": 18.502605438232422, | |
| "learning_rate": 3.774064656738104e-05, | |
| "loss": 0.5617, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.49037413730475843, | |
| "eval_accuracy": 0.8039376538146021, | |
| "eval_f1": 0.7953241400811288, | |
| "eval_loss": 0.5712546110153198, | |
| "eval_precision": 0.7878506982758448, | |
| "eval_recall": 0.8087375226161377, | |
| "eval_runtime": 12.0651, | |
| "eval_samples_per_second": 101.035, | |
| "eval_steps_per_second": 6.382, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.4921903378132946, | |
| "grad_norm": 36.73835754394531, | |
| "learning_rate": 3.7695241554667637e-05, | |
| "loss": 0.6704, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.4940065383218307, | |
| "grad_norm": 11.638787269592285, | |
| "learning_rate": 3.764983654195423e-05, | |
| "loss": 0.5284, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.49582273883036687, | |
| "grad_norm": 22.700679779052734, | |
| "learning_rate": 3.760443152924083e-05, | |
| "loss": 0.5621, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.497638939338903, | |
| "grad_norm": 10.612008094787598, | |
| "learning_rate": 3.755902651652743e-05, | |
| "loss": 0.572, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.49945513984743917, | |
| "grad_norm": 8.393928527832031, | |
| "learning_rate": 3.751362150381402e-05, | |
| "loss": 0.4636, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5012713403559753, | |
| "grad_norm": 28.1651554107666, | |
| "learning_rate": 3.746821649110062e-05, | |
| "loss": 0.4955, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.5030875408645115, | |
| "grad_norm": 20.283479690551758, | |
| "learning_rate": 3.7422811478387214e-05, | |
| "loss": 0.5398, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.5049037413730476, | |
| "grad_norm": 12.401691436767578, | |
| "learning_rate": 3.7377406465673814e-05, | |
| "loss": 0.5622, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.5067199418815838, | |
| "grad_norm": 31.315277099609375, | |
| "learning_rate": 3.7332001452960407e-05, | |
| "loss": 0.5752, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.5085361423901199, | |
| "grad_norm": 17.91919708251953, | |
| "learning_rate": 3.7286596440247006e-05, | |
| "loss": 0.5953, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.510352342898656, | |
| "grad_norm": 10.692752838134766, | |
| "learning_rate": 3.72411914275336e-05, | |
| "loss": 0.4012, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.5121685434071922, | |
| "grad_norm": 17.449275970458984, | |
| "learning_rate": 3.71957864148202e-05, | |
| "loss": 0.4245, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.5139847439157283, | |
| "grad_norm": 17.479352951049805, | |
| "learning_rate": 3.715038140210679e-05, | |
| "loss": 0.4908, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.5158009444242644, | |
| "grad_norm": 20.10633659362793, | |
| "learning_rate": 3.710497638939339e-05, | |
| "loss": 0.5738, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.5176171449328005, | |
| "grad_norm": 17.699560165405273, | |
| "learning_rate": 3.7059571376679984e-05, | |
| "loss": 0.4434, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5194333454413367, | |
| "grad_norm": 15.045440673828125, | |
| "learning_rate": 3.7014166363966584e-05, | |
| "loss": 0.4992, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.5212495459498728, | |
| "grad_norm": 14.244542121887207, | |
| "learning_rate": 3.6968761351253177e-05, | |
| "loss": 0.5327, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.523065746458409, | |
| "grad_norm": 11.60004997253418, | |
| "learning_rate": 3.6923356338539776e-05, | |
| "loss": 0.4859, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.5248819469669451, | |
| "grad_norm": 8.768573760986328, | |
| "learning_rate": 3.687795132582637e-05, | |
| "loss": 0.5004, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.5266981474754813, | |
| "grad_norm": 22.54417610168457, | |
| "learning_rate": 3.683254631311297e-05, | |
| "loss": 0.543, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5285143479840174, | |
| "grad_norm": 20.270061492919922, | |
| "learning_rate": 3.678714130039956e-05, | |
| "loss": 0.5296, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.5303305484925536, | |
| "grad_norm": 18.757434844970703, | |
| "learning_rate": 3.674173628768616e-05, | |
| "loss": 0.3825, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.5321467490010897, | |
| "grad_norm": 13.12435245513916, | |
| "learning_rate": 3.6696331274972754e-05, | |
| "loss": 0.545, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.5339629495096259, | |
| "grad_norm": 23.035865783691406, | |
| "learning_rate": 3.6650926262259354e-05, | |
| "loss": 0.6143, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.535779150018162, | |
| "grad_norm": 15.766834259033203, | |
| "learning_rate": 3.660552124954595e-05, | |
| "loss": 0.5893, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.5375953505266982, | |
| "grad_norm": 11.79257869720459, | |
| "learning_rate": 3.6560116236832546e-05, | |
| "loss": 0.5365, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.5394115510352343, | |
| "grad_norm": 22.071346282958984, | |
| "learning_rate": 3.6514711224119146e-05, | |
| "loss": 0.5344, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.5412277515437705, | |
| "grad_norm": 16.728076934814453, | |
| "learning_rate": 3.646930621140574e-05, | |
| "loss": 0.5137, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.5430439520523066, | |
| "grad_norm": 13.112013816833496, | |
| "learning_rate": 3.642390119869234e-05, | |
| "loss": 0.6091, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.5448601525608427, | |
| "grad_norm": 15.373380661010742, | |
| "learning_rate": 3.637849618597893e-05, | |
| "loss": 0.4939, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5448601525608427, | |
| "eval_accuracy": 0.8039376538146021, | |
| "eval_f1": 0.7973147509604641, | |
| "eval_loss": 0.5636632442474365, | |
| "eval_precision": 0.7937266489697907, | |
| "eval_recall": 0.8050963810992605, | |
| "eval_runtime": 12.0525, | |
| "eval_samples_per_second": 101.141, | |
| "eval_steps_per_second": 6.389, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5466763530693789, | |
| "grad_norm": 17.602785110473633, | |
| "learning_rate": 3.633309117326553e-05, | |
| "loss": 0.4411, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.548492553577915, | |
| "grad_norm": 11.274548530578613, | |
| "learning_rate": 3.6287686160552124e-05, | |
| "loss": 0.5949, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.5503087540864512, | |
| "grad_norm": 17.69841766357422, | |
| "learning_rate": 3.624228114783872e-05, | |
| "loss": 0.4447, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.5521249545949873, | |
| "grad_norm": 21.106124877929688, | |
| "learning_rate": 3.6196876135125316e-05, | |
| "loss": 0.5316, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.5539411551035234, | |
| "grad_norm": 21.903255462646484, | |
| "learning_rate": 3.6151471122411916e-05, | |
| "loss": 0.476, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5557573556120595, | |
| "grad_norm": 11.051823616027832, | |
| "learning_rate": 3.610606610969851e-05, | |
| "loss": 0.5307, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.5575735561205957, | |
| "grad_norm": 5.928410530090332, | |
| "learning_rate": 3.606066109698511e-05, | |
| "loss": 0.4116, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.5593897566291318, | |
| "grad_norm": 24.413103103637695, | |
| "learning_rate": 3.60152560842717e-05, | |
| "loss": 0.482, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.561205957137668, | |
| "grad_norm": 17.25383949279785, | |
| "learning_rate": 3.59698510715583e-05, | |
| "loss": 0.5269, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.5630221576462041, | |
| "grad_norm": 14.473711013793945, | |
| "learning_rate": 3.5924446058844894e-05, | |
| "loss": 0.5098, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.5648383581547403, | |
| "grad_norm": 14.325135231018066, | |
| "learning_rate": 3.587904104613149e-05, | |
| "loss": 0.4476, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.5666545586632764, | |
| "grad_norm": 22.374534606933594, | |
| "learning_rate": 3.5833636033418086e-05, | |
| "loss": 0.4768, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.5684707591718126, | |
| "grad_norm": 22.39207649230957, | |
| "learning_rate": 3.5788231020704686e-05, | |
| "loss": 0.457, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.5702869596803487, | |
| "grad_norm": 14.626873970031738, | |
| "learning_rate": 3.574282600799128e-05, | |
| "loss": 0.4465, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.5721031601888849, | |
| "grad_norm": 23.05328369140625, | |
| "learning_rate": 3.569742099527788e-05, | |
| "loss": 0.4138, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.573919360697421, | |
| "grad_norm": 16.360881805419922, | |
| "learning_rate": 3.565201598256448e-05, | |
| "loss": 0.4439, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.5757355612059571, | |
| "grad_norm": 25.42070770263672, | |
| "learning_rate": 3.560661096985107e-05, | |
| "loss": 0.6645, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.5775517617144933, | |
| "grad_norm": 22.610538482666016, | |
| "learning_rate": 3.556120595713767e-05, | |
| "loss": 0.4836, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.5793679622230294, | |
| "grad_norm": 11.355021476745605, | |
| "learning_rate": 3.551580094442426e-05, | |
| "loss": 0.5534, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.5811841627315656, | |
| "grad_norm": 18.886524200439453, | |
| "learning_rate": 3.547039593171086e-05, | |
| "loss": 0.5369, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5830003632401017, | |
| "grad_norm": 14.892853736877441, | |
| "learning_rate": 3.5424990918997456e-05, | |
| "loss": 0.4463, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5848165637486379, | |
| "grad_norm": 31.027605056762695, | |
| "learning_rate": 3.5379585906284055e-05, | |
| "loss": 0.4775, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.586632764257174, | |
| "grad_norm": 11.664224624633789, | |
| "learning_rate": 3.533418089357065e-05, | |
| "loss": 0.5938, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5884489647657102, | |
| "grad_norm": 13.272047996520996, | |
| "learning_rate": 3.528877588085725e-05, | |
| "loss": 0.4925, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5902651652742463, | |
| "grad_norm": 13.521268844604492, | |
| "learning_rate": 3.524337086814384e-05, | |
| "loss": 0.504, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5920813657827824, | |
| "grad_norm": 10.777715682983398, | |
| "learning_rate": 3.519796585543044e-05, | |
| "loss": 0.51, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.5938975662913185, | |
| "grad_norm": 16.920635223388672, | |
| "learning_rate": 3.515256084271703e-05, | |
| "loss": 0.5315, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5957137667998547, | |
| "grad_norm": 22.00889778137207, | |
| "learning_rate": 3.510715583000363e-05, | |
| "loss": 0.4741, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5975299673083908, | |
| "grad_norm": 14.849915504455566, | |
| "learning_rate": 3.5061750817290226e-05, | |
| "loss": 0.4805, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.599346167816927, | |
| "grad_norm": 22.403329849243164, | |
| "learning_rate": 3.5016345804576825e-05, | |
| "loss": 0.5531, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.599346167816927, | |
| "eval_accuracy": 0.8146021328958163, | |
| "eval_f1": 0.8074596115450074, | |
| "eval_loss": 0.5683770179748535, | |
| "eval_precision": 0.8052208334869901, | |
| "eval_recall": 0.8145117560161068, | |
| "eval_runtime": 12.0545, | |
| "eval_samples_per_second": 101.124, | |
| "eval_steps_per_second": 6.388, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6011623683254631, | |
| "grad_norm": 13.46020221710205, | |
| "learning_rate": 3.497094079186342e-05, | |
| "loss": 0.5227, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.6029785688339993, | |
| "grad_norm": 22.012182235717773, | |
| "learning_rate": 3.492553577915002e-05, | |
| "loss": 0.4315, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.6047947693425354, | |
| "grad_norm": 16.392894744873047, | |
| "learning_rate": 3.488013076643661e-05, | |
| "loss": 0.474, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.6066109698510715, | |
| "grad_norm": 19.60003089904785, | |
| "learning_rate": 3.483472575372321e-05, | |
| "loss": 0.3914, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.6084271703596077, | |
| "grad_norm": 24.537080764770508, | |
| "learning_rate": 3.47893207410098e-05, | |
| "loss": 0.4278, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6102433708681438, | |
| "grad_norm": 22.935487747192383, | |
| "learning_rate": 3.47439157282964e-05, | |
| "loss": 0.6627, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.61205957137668, | |
| "grad_norm": 19.0701847076416, | |
| "learning_rate": 3.4698510715583e-05, | |
| "loss": 0.4398, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.6138757718852161, | |
| "grad_norm": 11.604155540466309, | |
| "learning_rate": 3.4653105702869595e-05, | |
| "loss": 0.6131, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.6156919723937523, | |
| "grad_norm": 17.911949157714844, | |
| "learning_rate": 3.4607700690156195e-05, | |
| "loss": 0.4957, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.6175081729022884, | |
| "grad_norm": 12.859588623046875, | |
| "learning_rate": 3.456229567744279e-05, | |
| "loss": 0.5457, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6193243734108246, | |
| "grad_norm": 17.096111297607422, | |
| "learning_rate": 3.451689066472939e-05, | |
| "loss": 0.4664, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.6211405739193607, | |
| "grad_norm": 17.198429107666016, | |
| "learning_rate": 3.447148565201598e-05, | |
| "loss": 0.3655, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.6229567744278969, | |
| "grad_norm": 7.782280445098877, | |
| "learning_rate": 3.442608063930258e-05, | |
| "loss": 0.4909, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.624772974936433, | |
| "grad_norm": 13.99974250793457, | |
| "learning_rate": 3.438067562658917e-05, | |
| "loss": 0.5252, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.6265891754449692, | |
| "grad_norm": 26.579198837280273, | |
| "learning_rate": 3.433527061387577e-05, | |
| "loss": 0.4605, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6284053759535053, | |
| "grad_norm": 23.17647361755371, | |
| "learning_rate": 3.4289865601162365e-05, | |
| "loss": 0.3984, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.6302215764620414, | |
| "grad_norm": 15.169466972351074, | |
| "learning_rate": 3.4244460588448965e-05, | |
| "loss": 0.4396, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.6320377769705775, | |
| "grad_norm": 18.425457000732422, | |
| "learning_rate": 3.4199055575735565e-05, | |
| "loss": 0.5219, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.6338539774791137, | |
| "grad_norm": 11.86226749420166, | |
| "learning_rate": 3.415365056302216e-05, | |
| "loss": 0.5649, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.6356701779876498, | |
| "grad_norm": 18.50494384765625, | |
| "learning_rate": 3.410824555030876e-05, | |
| "loss": 0.5174, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.637486378496186, | |
| "grad_norm": 36.33973693847656, | |
| "learning_rate": 3.406284053759535e-05, | |
| "loss": 0.6008, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.6393025790047221, | |
| "grad_norm": 12.04764175415039, | |
| "learning_rate": 3.401743552488195e-05, | |
| "loss": 0.375, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.6411187795132582, | |
| "grad_norm": 19.642751693725586, | |
| "learning_rate": 3.397203051216854e-05, | |
| "loss": 0.4088, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.6429349800217944, | |
| "grad_norm": 11.035579681396484, | |
| "learning_rate": 3.392662549945514e-05, | |
| "loss": 0.4665, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.6447511805303305, | |
| "grad_norm": 9.772668838500977, | |
| "learning_rate": 3.3881220486741735e-05, | |
| "loss": 0.4081, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6465673810388667, | |
| "grad_norm": 15.26156997680664, | |
| "learning_rate": 3.3835815474028335e-05, | |
| "loss": 0.5479, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.6483835815474028, | |
| "grad_norm": 16.603866577148438, | |
| "learning_rate": 3.379041046131493e-05, | |
| "loss": 0.5206, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.650199782055939, | |
| "grad_norm": 14.417247772216797, | |
| "learning_rate": 3.374500544860153e-05, | |
| "loss": 0.4639, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.6520159825644751, | |
| "grad_norm": 14.06032943725586, | |
| "learning_rate": 3.369960043588813e-05, | |
| "loss": 0.5256, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.6538321830730113, | |
| "grad_norm": 19.377899169921875, | |
| "learning_rate": 3.365419542317472e-05, | |
| "loss": 0.4589, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6538321830730113, | |
| "eval_accuracy": 0.815422477440525, | |
| "eval_f1": 0.8098112032026681, | |
| "eval_loss": 0.5438756346702576, | |
| "eval_precision": 0.8087363057639664, | |
| "eval_recall": 0.8171923146479461, | |
| "eval_runtime": 12.0597, | |
| "eval_samples_per_second": 101.081, | |
| "eval_steps_per_second": 6.385, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6556483835815474, | |
| "grad_norm": 10.339813232421875, | |
| "learning_rate": 3.360879041046132e-05, | |
| "loss": 0.5665, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.6574645840900836, | |
| "grad_norm": 12.189675331115723, | |
| "learning_rate": 3.356338539774791e-05, | |
| "loss": 0.341, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.6592807845986197, | |
| "grad_norm": 17.71584701538086, | |
| "learning_rate": 3.351798038503451e-05, | |
| "loss": 0.3679, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.6610969851071559, | |
| "grad_norm": 12.258733749389648, | |
| "learning_rate": 3.3472575372321105e-05, | |
| "loss": 0.5311, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.662913185615692, | |
| "grad_norm": 22.354339599609375, | |
| "learning_rate": 3.3427170359607704e-05, | |
| "loss": 0.4843, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6647293861242282, | |
| "grad_norm": 14.635857582092285, | |
| "learning_rate": 3.33817653468943e-05, | |
| "loss": 0.4105, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.6665455866327643, | |
| "grad_norm": 15.776519775390625, | |
| "learning_rate": 3.33363603341809e-05, | |
| "loss": 0.5662, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.6683617871413003, | |
| "grad_norm": 16.750410079956055, | |
| "learning_rate": 3.329095532146749e-05, | |
| "loss": 0.4221, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.6701779876498365, | |
| "grad_norm": 14.167458534240723, | |
| "learning_rate": 3.324555030875409e-05, | |
| "loss": 0.3916, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.6719941881583726, | |
| "grad_norm": 12.054675102233887, | |
| "learning_rate": 3.320014529604069e-05, | |
| "loss": 0.4332, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.6738103886669088, | |
| "grad_norm": 17.444786071777344, | |
| "learning_rate": 3.315474028332728e-05, | |
| "loss": 0.4895, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.6756265891754449, | |
| "grad_norm": 12.62495231628418, | |
| "learning_rate": 3.310933527061388e-05, | |
| "loss": 0.484, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.6774427896839811, | |
| "grad_norm": 17.694808959960938, | |
| "learning_rate": 3.3063930257900474e-05, | |
| "loss": 0.5494, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.6792589901925172, | |
| "grad_norm": 9.741250038146973, | |
| "learning_rate": 3.3018525245187074e-05, | |
| "loss": 0.5297, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.6810751907010534, | |
| "grad_norm": 9.227933883666992, | |
| "learning_rate": 3.297312023247367e-05, | |
| "loss": 0.4591, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6828913912095895, | |
| "grad_norm": 22.44287109375, | |
| "learning_rate": 3.2927715219760266e-05, | |
| "loss": 0.5248, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.6847075917181257, | |
| "grad_norm": 19.741558074951172, | |
| "learning_rate": 3.288231020704686e-05, | |
| "loss": 0.532, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.6865237922266618, | |
| "grad_norm": 21.53546142578125, | |
| "learning_rate": 3.283690519433346e-05, | |
| "loss": 0.5514, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.688339992735198, | |
| "grad_norm": 16.261137008666992, | |
| "learning_rate": 3.279150018162005e-05, | |
| "loss": 0.5721, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.6901561932437341, | |
| "grad_norm": 7.155134677886963, | |
| "learning_rate": 3.274609516890665e-05, | |
| "loss": 0.4068, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6919723937522703, | |
| "grad_norm": 8.603271484375, | |
| "learning_rate": 3.270069015619325e-05, | |
| "loss": 0.3959, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.6937885942608064, | |
| "grad_norm": 18.37700080871582, | |
| "learning_rate": 3.2655285143479844e-05, | |
| "loss": 0.5014, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.6956047947693426, | |
| "grad_norm": 13.100898742675781, | |
| "learning_rate": 3.2609880130766444e-05, | |
| "loss": 0.5567, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.6974209952778787, | |
| "grad_norm": 13.200430870056152, | |
| "learning_rate": 3.2564475118053036e-05, | |
| "loss": 0.4679, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.6992371957864149, | |
| "grad_norm": 12.253862380981445, | |
| "learning_rate": 3.2519070105339636e-05, | |
| "loss": 0.4516, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.701053396294951, | |
| "grad_norm": 6.870277404785156, | |
| "learning_rate": 3.247366509262623e-05, | |
| "loss": 0.5378, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.7028695968034872, | |
| "grad_norm": 14.495081901550293, | |
| "learning_rate": 3.242826007991283e-05, | |
| "loss": 0.5009, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.7046857973120233, | |
| "grad_norm": 3.442812919616699, | |
| "learning_rate": 3.238285506719942e-05, | |
| "loss": 0.4108, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.7065019978205593, | |
| "grad_norm": 5.441460609436035, | |
| "learning_rate": 3.233745005448602e-05, | |
| "loss": 0.3479, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.7083181983290955, | |
| "grad_norm": 12.709874153137207, | |
| "learning_rate": 3.2292045041772614e-05, | |
| "loss": 0.3864, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7083181983290955, | |
| "eval_accuracy": 0.8105004101722724, | |
| "eval_f1": 0.799452724152361, | |
| "eval_loss": 0.5857027769088745, | |
| "eval_precision": 0.7947749172284474, | |
| "eval_recall": 0.8158609235209215, | |
| "eval_runtime": 12.0674, | |
| "eval_samples_per_second": 101.016, | |
| "eval_steps_per_second": 6.381, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7101343988376316, | |
| "grad_norm": 10.219947814941406, | |
| "learning_rate": 3.2246640029059214e-05, | |
| "loss": 0.6109, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.7119505993461678, | |
| "grad_norm": 22.518009185791016, | |
| "learning_rate": 3.2201235016345806e-05, | |
| "loss": 0.5354, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.7137667998547039, | |
| "grad_norm": 9.188867568969727, | |
| "learning_rate": 3.2155830003632406e-05, | |
| "loss": 0.5216, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.7155830003632401, | |
| "grad_norm": 16.9005184173584, | |
| "learning_rate": 3.2110424990919e-05, | |
| "loss": 0.4771, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.7173992008717762, | |
| "grad_norm": 14.375580787658691, | |
| "learning_rate": 3.20650199782056e-05, | |
| "loss": 0.4197, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7192154013803124, | |
| "grad_norm": 14.258020401000977, | |
| "learning_rate": 3.201961496549219e-05, | |
| "loss": 0.4487, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.7210316018888485, | |
| "grad_norm": 11.470094680786133, | |
| "learning_rate": 3.197420995277879e-05, | |
| "loss": 0.4176, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.7228478023973847, | |
| "grad_norm": 12.606728553771973, | |
| "learning_rate": 3.1928804940065384e-05, | |
| "loss": 0.5001, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.7246640029059208, | |
| "grad_norm": 25.704116821289062, | |
| "learning_rate": 3.1883399927351983e-05, | |
| "loss": 0.5424, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.726480203414457, | |
| "grad_norm": 15.872344017028809, | |
| "learning_rate": 3.183799491463858e-05, | |
| "loss": 0.589, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7282964039229931, | |
| "grad_norm": 8.058246612548828, | |
| "learning_rate": 3.1792589901925176e-05, | |
| "loss": 0.4356, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.7301126044315293, | |
| "grad_norm": 18.3121337890625, | |
| "learning_rate": 3.1747184889211776e-05, | |
| "loss": 0.4245, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.7319288049400654, | |
| "grad_norm": 13.85145378112793, | |
| "learning_rate": 3.170177987649837e-05, | |
| "loss": 0.4336, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.7337450054486016, | |
| "grad_norm": 11.043869018554688, | |
| "learning_rate": 3.165637486378497e-05, | |
| "loss": 0.4433, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.7355612059571377, | |
| "grad_norm": 14.299675941467285, | |
| "learning_rate": 3.161096985107156e-05, | |
| "loss": 0.4482, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.7373774064656738, | |
| "grad_norm": 9.129308700561523, | |
| "learning_rate": 3.156556483835816e-05, | |
| "loss": 0.5591, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.73919360697421, | |
| "grad_norm": 15.059881210327148, | |
| "learning_rate": 3.1520159825644753e-05, | |
| "loss": 0.4043, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.7410098074827461, | |
| "grad_norm": 14.52391242980957, | |
| "learning_rate": 3.147475481293135e-05, | |
| "loss": 0.4029, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.7428260079912823, | |
| "grad_norm": 14.165828704833984, | |
| "learning_rate": 3.1429349800217946e-05, | |
| "loss": 0.5121, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.7446422084998183, | |
| "grad_norm": 19.52725601196289, | |
| "learning_rate": 3.1383944787504546e-05, | |
| "loss": 0.4375, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.7464584090083545, | |
| "grad_norm": 18.168001174926758, | |
| "learning_rate": 3.133853977479114e-05, | |
| "loss": 0.541, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.7482746095168906, | |
| "grad_norm": 23.436870574951172, | |
| "learning_rate": 3.129313476207774e-05, | |
| "loss": 0.5703, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.7500908100254268, | |
| "grad_norm": 16.01010513305664, | |
| "learning_rate": 3.124772974936433e-05, | |
| "loss": 0.4326, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.7519070105339629, | |
| "grad_norm": 15.457175254821777, | |
| "learning_rate": 3.120232473665093e-05, | |
| "loss": 0.379, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.7537232110424991, | |
| "grad_norm": 17.524295806884766, | |
| "learning_rate": 3.1156919723937523e-05, | |
| "loss": 0.392, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.7555394115510352, | |
| "grad_norm": 19.16515350341797, | |
| "learning_rate": 3.111151471122412e-05, | |
| "loss": 0.5474, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.7573556120595714, | |
| "grad_norm": 12.622529029846191, | |
| "learning_rate": 3.1066109698510716e-05, | |
| "loss": 0.4343, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.7591718125681075, | |
| "grad_norm": 12.761943817138672, | |
| "learning_rate": 3.1020704685797316e-05, | |
| "loss": 0.4281, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.7609880130766437, | |
| "grad_norm": 15.795944213867188, | |
| "learning_rate": 3.097529967308391e-05, | |
| "loss": 0.4434, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.7628042135851798, | |
| "grad_norm": 6.286984920501709, | |
| "learning_rate": 3.092989466037051e-05, | |
| "loss": 0.4196, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7628042135851798, | |
| "eval_accuracy": 0.8105004101722724, | |
| "eval_f1": 0.8031176631002425, | |
| "eval_loss": 0.5337262749671936, | |
| "eval_precision": 0.8017654292844819, | |
| "eval_recall": 0.811221918165322, | |
| "eval_runtime": 12.0674, | |
| "eval_samples_per_second": 101.016, | |
| "eval_steps_per_second": 6.381, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.764620414093716, | |
| "grad_norm": 53.68039321899414, | |
| "learning_rate": 3.088448964765711e-05, | |
| "loss": 0.5151, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.7664366146022521, | |
| "grad_norm": 10.041727066040039, | |
| "learning_rate": 3.08390846349437e-05, | |
| "loss": 0.4146, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.7682528151107882, | |
| "grad_norm": 25.997821807861328, | |
| "learning_rate": 3.07936796222303e-05, | |
| "loss": 0.556, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.7700690156193244, | |
| "grad_norm": 13.25404167175293, | |
| "learning_rate": 3.074827460951689e-05, | |
| "loss": 0.513, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.7718852161278605, | |
| "grad_norm": 23.45793342590332, | |
| "learning_rate": 3.070286959680349e-05, | |
| "loss": 0.4953, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.7737014166363967, | |
| "grad_norm": 18.79665756225586, | |
| "learning_rate": 3.0657464584090086e-05, | |
| "loss": 0.4196, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.7755176171449328, | |
| "grad_norm": 15.050500869750977, | |
| "learning_rate": 3.0612059571376685e-05, | |
| "loss": 0.3523, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.777333817653469, | |
| "grad_norm": 17.48199462890625, | |
| "learning_rate": 3.056665455866328e-05, | |
| "loss": 0.4707, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.7791500181620051, | |
| "grad_norm": 12.54255199432373, | |
| "learning_rate": 3.052124954594988e-05, | |
| "loss": 0.3119, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.7809662186705413, | |
| "grad_norm": 19.040857315063477, | |
| "learning_rate": 3.047584453323647e-05, | |
| "loss": 0.3681, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.7827824191790773, | |
| "grad_norm": 22.418601989746094, | |
| "learning_rate": 3.043043952052307e-05, | |
| "loss": 0.4745, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.7845986196876135, | |
| "grad_norm": 19.793771743774414, | |
| "learning_rate": 3.0385034507809663e-05, | |
| "loss": 0.4836, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.7864148201961496, | |
| "grad_norm": 10.669327735900879, | |
| "learning_rate": 3.0339629495096263e-05, | |
| "loss": 0.4829, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.7882310207046858, | |
| "grad_norm": 22.390172958374023, | |
| "learning_rate": 3.0294224482382856e-05, | |
| "loss": 0.6336, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.7900472212132219, | |
| "grad_norm": 15.641258239746094, | |
| "learning_rate": 3.0248819469669455e-05, | |
| "loss": 0.5083, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.791863421721758, | |
| "grad_norm": 20.096162796020508, | |
| "learning_rate": 3.0203414456956048e-05, | |
| "loss": 0.5094, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.7936796222302942, | |
| "grad_norm": 22.68675422668457, | |
| "learning_rate": 3.0158009444242648e-05, | |
| "loss": 0.5245, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.7954958227388303, | |
| "grad_norm": 29.42097282409668, | |
| "learning_rate": 3.011260443152924e-05, | |
| "loss": 0.4459, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.7973120232473665, | |
| "grad_norm": 19.64771842956543, | |
| "learning_rate": 3.006719941881584e-05, | |
| "loss": 0.4514, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.7991282237559026, | |
| "grad_norm": 21.052167892456055, | |
| "learning_rate": 3.0021794406102433e-05, | |
| "loss": 0.4875, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8009444242644388, | |
| "grad_norm": 22.617921829223633, | |
| "learning_rate": 2.9976389393389033e-05, | |
| "loss": 0.5775, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.8027606247729749, | |
| "grad_norm": 18.567598342895508, | |
| "learning_rate": 2.9930984380675632e-05, | |
| "loss": 0.3457, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.8045768252815111, | |
| "grad_norm": 13.792886734008789, | |
| "learning_rate": 2.9885579367962225e-05, | |
| "loss": 0.3967, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.8063930257900472, | |
| "grad_norm": 24.021446228027344, | |
| "learning_rate": 2.9840174355248825e-05, | |
| "loss": 0.5235, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.8082092262985834, | |
| "grad_norm": 14.933148384094238, | |
| "learning_rate": 2.9794769342535418e-05, | |
| "loss": 0.5006, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8100254268071195, | |
| "grad_norm": 18.85728645324707, | |
| "learning_rate": 2.9749364329822017e-05, | |
| "loss": 0.408, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.8118416273156557, | |
| "grad_norm": 18.168296813964844, | |
| "learning_rate": 2.970395931710861e-05, | |
| "loss": 0.5026, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.8136578278241918, | |
| "grad_norm": 12.585858345031738, | |
| "learning_rate": 2.965855430439521e-05, | |
| "loss": 0.3576, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.815474028332728, | |
| "grad_norm": 21.00431251525879, | |
| "learning_rate": 2.9613149291681803e-05, | |
| "loss": 0.494, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.8172902288412641, | |
| "grad_norm": 32.093345642089844, | |
| "learning_rate": 2.9567744278968402e-05, | |
| "loss": 0.4508, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8172902288412641, | |
| "eval_accuracy": 0.8162428219852338, | |
| "eval_f1": 0.8092636477145345, | |
| "eval_loss": 0.4987526535987854, | |
| "eval_precision": 0.8023527484407372, | |
| "eval_recall": 0.8208478993572457, | |
| "eval_runtime": 12.0751, | |
| "eval_samples_per_second": 100.951, | |
| "eval_steps_per_second": 6.377, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8191064293498003, | |
| "grad_norm": 9.480962753295898, | |
| "learning_rate": 2.9522339266254995e-05, | |
| "loss": 0.3415, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.8209226298583363, | |
| "grad_norm": 21.77303123474121, | |
| "learning_rate": 2.9476934253541595e-05, | |
| "loss": 0.5777, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.8227388303668725, | |
| "grad_norm": 17.37676429748535, | |
| "learning_rate": 2.9431529240828188e-05, | |
| "loss": 0.5402, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.8245550308754086, | |
| "grad_norm": 8.430058479309082, | |
| "learning_rate": 2.9386124228114787e-05, | |
| "loss": 0.5605, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.8263712313839447, | |
| "grad_norm": 11.52684211730957, | |
| "learning_rate": 2.934071921540138e-05, | |
| "loss": 0.4402, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.8281874318924809, | |
| "grad_norm": 15.570836067199707, | |
| "learning_rate": 2.929531420268798e-05, | |
| "loss": 0.4715, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.830003632401017, | |
| "grad_norm": 17.31182289123535, | |
| "learning_rate": 2.9249909189974573e-05, | |
| "loss": 0.4377, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.8318198329095532, | |
| "grad_norm": 17.72749900817871, | |
| "learning_rate": 2.9204504177261172e-05, | |
| "loss": 0.574, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.8336360334180893, | |
| "grad_norm": 4.865232467651367, | |
| "learning_rate": 2.9159099164547765e-05, | |
| "loss": 0.4036, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.8354522339266255, | |
| "grad_norm": 13.070740699768066, | |
| "learning_rate": 2.9113694151834365e-05, | |
| "loss": 0.4395, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8372684344351616, | |
| "grad_norm": 11.224090576171875, | |
| "learning_rate": 2.9068289139120958e-05, | |
| "loss": 0.5105, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.8390846349436978, | |
| "grad_norm": 10.014636039733887, | |
| "learning_rate": 2.9022884126407557e-05, | |
| "loss": 0.3832, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.8409008354522339, | |
| "grad_norm": 16.736953735351562, | |
| "learning_rate": 2.8977479113694157e-05, | |
| "loss": 0.5902, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.8427170359607701, | |
| "grad_norm": 19.752222061157227, | |
| "learning_rate": 2.893207410098075e-05, | |
| "loss": 0.4689, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.8445332364693062, | |
| "grad_norm": 21.56574058532715, | |
| "learning_rate": 2.888666908826735e-05, | |
| "loss": 0.424, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.8463494369778424, | |
| "grad_norm": 7.042162895202637, | |
| "learning_rate": 2.8841264075553942e-05, | |
| "loss": 0.4833, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.8481656374863785, | |
| "grad_norm": 28.993854522705078, | |
| "learning_rate": 2.8795859062840542e-05, | |
| "loss": 0.549, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.8499818379949147, | |
| "grad_norm": 10.317886352539062, | |
| "learning_rate": 2.8750454050127135e-05, | |
| "loss": 0.5293, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.8517980385034508, | |
| "grad_norm": 17.66384506225586, | |
| "learning_rate": 2.8705049037413734e-05, | |
| "loss": 0.4197, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.853614239011987, | |
| "grad_norm": 24.199743270874023, | |
| "learning_rate": 2.8659644024700327e-05, | |
| "loss": 0.4935, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.8554304395205231, | |
| "grad_norm": 6.78064489364624, | |
| "learning_rate": 2.8614239011986927e-05, | |
| "loss": 0.4021, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.8572466400290593, | |
| "grad_norm": 12.244144439697266, | |
| "learning_rate": 2.856883399927352e-05, | |
| "loss": 0.487, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.8590628405375953, | |
| "grad_norm": 13.118432998657227, | |
| "learning_rate": 2.852342898656012e-05, | |
| "loss": 0.4862, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.8608790410461314, | |
| "grad_norm": 12.08948040008545, | |
| "learning_rate": 2.8478023973846712e-05, | |
| "loss": 0.5039, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.8626952415546676, | |
| "grad_norm": 18.59223175048828, | |
| "learning_rate": 2.8432618961133312e-05, | |
| "loss": 0.3387, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.8645114420632037, | |
| "grad_norm": 16.462608337402344, | |
| "learning_rate": 2.8387213948419905e-05, | |
| "loss": 0.4173, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.8663276425717399, | |
| "grad_norm": 19.474824905395508, | |
| "learning_rate": 2.8341808935706504e-05, | |
| "loss": 0.4793, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.868143843080276, | |
| "grad_norm": 9.90221118927002, | |
| "learning_rate": 2.8296403922993097e-05, | |
| "loss": 0.4677, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.8699600435888122, | |
| "grad_norm": 18.47876739501953, | |
| "learning_rate": 2.8250998910279697e-05, | |
| "loss": 0.3815, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.8717762440973483, | |
| "grad_norm": 16.144685745239258, | |
| "learning_rate": 2.820559389756629e-05, | |
| "loss": 0.5303, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8717762440973483, | |
| "eval_accuracy": 0.8351107465135357, | |
| "eval_f1": 0.8281745117415222, | |
| "eval_loss": 0.48171207308769226, | |
| "eval_precision": 0.8221184344103021, | |
| "eval_recall": 0.8358880640041798, | |
| "eval_runtime": 12.0571, | |
| "eval_samples_per_second": 101.102, | |
| "eval_steps_per_second": 6.386, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8735924446058845, | |
| "grad_norm": 15.96438980102539, | |
| "learning_rate": 2.816018888485289e-05, | |
| "loss": 0.4498, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.8754086451144206, | |
| "grad_norm": 19.435787200927734, | |
| "learning_rate": 2.8114783872139482e-05, | |
| "loss": 0.3781, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.8772248456229568, | |
| "grad_norm": 17.200559616088867, | |
| "learning_rate": 2.8069378859426082e-05, | |
| "loss": 0.4631, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.8790410461314929, | |
| "grad_norm": 12.658839225769043, | |
| "learning_rate": 2.802397384671268e-05, | |
| "loss": 0.4265, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.8808572466400291, | |
| "grad_norm": 7.684325695037842, | |
| "learning_rate": 2.7978568833999274e-05, | |
| "loss": 0.4284, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.8826734471485652, | |
| "grad_norm": 12.922738075256348, | |
| "learning_rate": 2.7933163821285874e-05, | |
| "loss": 0.3229, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.8844896476571014, | |
| "grad_norm": 23.311817169189453, | |
| "learning_rate": 2.7887758808572467e-05, | |
| "loss": 0.4021, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.8863058481656375, | |
| "grad_norm": 14.780502319335938, | |
| "learning_rate": 2.7842353795859067e-05, | |
| "loss": 0.4722, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.8881220486741737, | |
| "grad_norm": 12.9520902633667, | |
| "learning_rate": 2.779694878314566e-05, | |
| "loss": 0.3892, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.8899382491827098, | |
| "grad_norm": 22.6149845123291, | |
| "learning_rate": 2.775154377043226e-05, | |
| "loss": 0.5177, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.891754449691246, | |
| "grad_norm": 19.335704803466797, | |
| "learning_rate": 2.7706138757718852e-05, | |
| "loss": 0.4524, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.8935706501997821, | |
| "grad_norm": 11.706412315368652, | |
| "learning_rate": 2.766073374500545e-05, | |
| "loss": 0.3219, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.8953868507083182, | |
| "grad_norm": 24.9698543548584, | |
| "learning_rate": 2.7615328732292044e-05, | |
| "loss": 0.4699, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.8972030512168543, | |
| "grad_norm": 22.411867141723633, | |
| "learning_rate": 2.7569923719578644e-05, | |
| "loss": 0.4928, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.8990192517253904, | |
| "grad_norm": 15.261788368225098, | |
| "learning_rate": 2.7524518706865237e-05, | |
| "loss": 0.4825, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9008354522339266, | |
| "grad_norm": 3.8997724056243896, | |
| "learning_rate": 2.7479113694151837e-05, | |
| "loss": 0.3152, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.9026516527424627, | |
| "grad_norm": 17.017913818359375, | |
| "learning_rate": 2.743370868143843e-05, | |
| "loss": 0.5825, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.9044678532509989, | |
| "grad_norm": 12.277453422546387, | |
| "learning_rate": 2.738830366872503e-05, | |
| "loss": 0.4168, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.906284053759535, | |
| "grad_norm": 10.170480728149414, | |
| "learning_rate": 2.7342898656011622e-05, | |
| "loss": 0.3428, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.9081002542680712, | |
| "grad_norm": 17.166027069091797, | |
| "learning_rate": 2.729749364329822e-05, | |
| "loss": 0.3343, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9099164547766073, | |
| "grad_norm": 13.139386177062988, | |
| "learning_rate": 2.7252088630584814e-05, | |
| "loss": 0.3653, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.9117326552851435, | |
| "grad_norm": 17.63907814025879, | |
| "learning_rate": 2.7206683617871414e-05, | |
| "loss": 0.4842, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.9135488557936796, | |
| "grad_norm": 11.699908256530762, | |
| "learning_rate": 2.7161278605158007e-05, | |
| "loss": 0.5177, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.9153650563022158, | |
| "grad_norm": 13.266210556030273, | |
| "learning_rate": 2.7115873592444607e-05, | |
| "loss": 0.4534, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.9171812568107519, | |
| "grad_norm": 9.60502815246582, | |
| "learning_rate": 2.7070468579731206e-05, | |
| "loss": 0.3299, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.9189974573192881, | |
| "grad_norm": 17.09486198425293, | |
| "learning_rate": 2.70250635670178e-05, | |
| "loss": 0.4957, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.9208136578278242, | |
| "grad_norm": 17.392698287963867, | |
| "learning_rate": 2.69796585543044e-05, | |
| "loss": 0.4678, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.9226298583363604, | |
| "grad_norm": 17.839717864990234, | |
| "learning_rate": 2.693425354159099e-05, | |
| "loss": 0.5732, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.9244460588448965, | |
| "grad_norm": 14.015562057495117, | |
| "learning_rate": 2.688884852887759e-05, | |
| "loss": 0.3867, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.9262622593534326, | |
| "grad_norm": 13.306390762329102, | |
| "learning_rate": 2.6843443516164184e-05, | |
| "loss": 0.4422, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9262622593534326, | |
| "eval_accuracy": 0.8236259228876128, | |
| "eval_f1": 0.8174273731975736, | |
| "eval_loss": 0.5039647221565247, | |
| "eval_precision": 0.8178974051720215, | |
| "eval_recall": 0.8269853120695915, | |
| "eval_runtime": 12.0903, | |
| "eval_samples_per_second": 100.824, | |
| "eval_steps_per_second": 6.369, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9280784598619688, | |
| "grad_norm": 15.518729209899902, | |
| "learning_rate": 2.6798038503450784e-05, | |
| "loss": 0.5069, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.9298946603705049, | |
| "grad_norm": 20.26007843017578, | |
| "learning_rate": 2.6752633490737377e-05, | |
| "loss": 0.5047, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.9317108608790411, | |
| "grad_norm": 10.854071617126465, | |
| "learning_rate": 2.6707228478023976e-05, | |
| "loss": 0.5326, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.9335270613875772, | |
| "grad_norm": 12.248214721679688, | |
| "learning_rate": 2.666182346531057e-05, | |
| "loss": 0.4037, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.9353432618961133, | |
| "grad_norm": 9.578265190124512, | |
| "learning_rate": 2.661641845259717e-05, | |
| "loss": 0.3088, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.9371594624046494, | |
| "grad_norm": 19.308855056762695, | |
| "learning_rate": 2.657101343988376e-05, | |
| "loss": 0.391, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.9389756629131856, | |
| "grad_norm": 16.941064834594727, | |
| "learning_rate": 2.652560842717036e-05, | |
| "loss": 0.3419, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.9407918634217217, | |
| "grad_norm": 21.8260440826416, | |
| "learning_rate": 2.6480203414456954e-05, | |
| "loss": 0.671, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.9426080639302579, | |
| "grad_norm": 22.408531188964844, | |
| "learning_rate": 2.6434798401743554e-05, | |
| "loss": 0.4909, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.944424264438794, | |
| "grad_norm": 21.8310546875, | |
| "learning_rate": 2.6389393389030147e-05, | |
| "loss": 0.4541, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.9462404649473302, | |
| "grad_norm": 21.634668350219727, | |
| "learning_rate": 2.6343988376316746e-05, | |
| "loss": 0.3987, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.9480566654558663, | |
| "grad_norm": 16.159862518310547, | |
| "learning_rate": 2.6298583363603342e-05, | |
| "loss": 0.5047, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.9498728659644025, | |
| "grad_norm": 10.970589637756348, | |
| "learning_rate": 2.625317835088994e-05, | |
| "loss": 0.4124, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.9516890664729386, | |
| "grad_norm": 19.680744171142578, | |
| "learning_rate": 2.6207773338176535e-05, | |
| "loss": 0.5401, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.9535052669814748, | |
| "grad_norm": 10.408095359802246, | |
| "learning_rate": 2.616236832546313e-05, | |
| "loss": 0.4913, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.9553214674900109, | |
| "grad_norm": 8.389443397521973, | |
| "learning_rate": 2.611696331274973e-05, | |
| "loss": 0.3567, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.957137667998547, | |
| "grad_norm": 11.20021915435791, | |
| "learning_rate": 2.6071558300036324e-05, | |
| "loss": 0.3231, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.9589538685070832, | |
| "grad_norm": 20.043296813964844, | |
| "learning_rate": 2.6026153287322923e-05, | |
| "loss": 0.6047, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.9607700690156193, | |
| "grad_norm": 20.225339889526367, | |
| "learning_rate": 2.5980748274609516e-05, | |
| "loss": 0.442, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.9625862695241555, | |
| "grad_norm": 14.611661911010742, | |
| "learning_rate": 2.5935343261896116e-05, | |
| "loss": 0.4598, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.9644024700326916, | |
| "grad_norm": 10.53466510772705, | |
| "learning_rate": 2.588993824918271e-05, | |
| "loss": 0.3438, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.9662186705412278, | |
| "grad_norm": 23.299837112426758, | |
| "learning_rate": 2.5844533236469308e-05, | |
| "loss": 0.3867, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.9680348710497639, | |
| "grad_norm": 13.314847946166992, | |
| "learning_rate": 2.5799128223755905e-05, | |
| "loss": 0.5519, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.9698510715583001, | |
| "grad_norm": 10.600733757019043, | |
| "learning_rate": 2.57537232110425e-05, | |
| "loss": 0.4463, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.9716672720668362, | |
| "grad_norm": 23.0856990814209, | |
| "learning_rate": 2.5708318198329097e-05, | |
| "loss": 0.4518, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.9734834725753723, | |
| "grad_norm": 16.995450973510742, | |
| "learning_rate": 2.5662913185615693e-05, | |
| "loss": 0.4835, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.9752996730839084, | |
| "grad_norm": 13.805352210998535, | |
| "learning_rate": 2.561750817290229e-05, | |
| "loss": 0.4145, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.9771158735924446, | |
| "grad_norm": 7.604394435882568, | |
| "learning_rate": 2.5572103160188886e-05, | |
| "loss": 0.3946, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.9789320741009807, | |
| "grad_norm": 12.790209770202637, | |
| "learning_rate": 2.5526698147475482e-05, | |
| "loss": 0.4734, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.9807482746095169, | |
| "grad_norm": 13.206761360168457, | |
| "learning_rate": 2.5481293134762078e-05, | |
| "loss": 0.4081, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.9807482746095169, | |
| "eval_accuracy": 0.8244462674323215, | |
| "eval_f1": 0.8170131934114577, | |
| "eval_loss": 0.4968956708908081, | |
| "eval_precision": 0.8102785727429149, | |
| "eval_recall": 0.8306898382408164, | |
| "eval_runtime": 12.0822, | |
| "eval_samples_per_second": 100.892, | |
| "eval_steps_per_second": 6.373, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.982564475118053, | |
| "grad_norm": 11.270719528198242, | |
| "learning_rate": 2.5435888122048675e-05, | |
| "loss": 0.4856, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.9843806756265892, | |
| "grad_norm": 8.846707344055176, | |
| "learning_rate": 2.539048310933527e-05, | |
| "loss": 0.4354, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.9861968761351253, | |
| "grad_norm": 22.749967575073242, | |
| "learning_rate": 2.5345078096621867e-05, | |
| "loss": 0.479, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.9880130766436614, | |
| "grad_norm": 16.036691665649414, | |
| "learning_rate": 2.5299673083908467e-05, | |
| "loss": 0.4042, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.9898292771521976, | |
| "grad_norm": 13.034772872924805, | |
| "learning_rate": 2.525426807119506e-05, | |
| "loss": 0.4203, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.9916454776607337, | |
| "grad_norm": 16.55560302734375, | |
| "learning_rate": 2.520886305848166e-05, | |
| "loss": 0.325, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.9934616781692699, | |
| "grad_norm": 17.726268768310547, | |
| "learning_rate": 2.5163458045768255e-05, | |
| "loss": 0.5221, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.995277878677806, | |
| "grad_norm": 16.85834503173828, | |
| "learning_rate": 2.511805303305485e-05, | |
| "loss": 0.4584, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.9970940791863422, | |
| "grad_norm": 20.59090232849121, | |
| "learning_rate": 2.5072648020341448e-05, | |
| "loss": 0.687, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.9989102796948783, | |
| "grad_norm": 11.305227279663086, | |
| "learning_rate": 2.5027243007628044e-05, | |
| "loss": 0.3293, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0007264802034144, | |
| "grad_norm": 5.661748886108398, | |
| "learning_rate": 2.498183799491464e-05, | |
| "loss": 0.3034, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.0025426807119506, | |
| "grad_norm": 14.591137886047363, | |
| "learning_rate": 2.4936432982201237e-05, | |
| "loss": 0.2698, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.0043588812204867, | |
| "grad_norm": 18.574607849121094, | |
| "learning_rate": 2.4891027969487833e-05, | |
| "loss": 0.3438, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.006175081729023, | |
| "grad_norm": 12.30125904083252, | |
| "learning_rate": 2.484562295677443e-05, | |
| "loss": 0.3503, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.007991282237559, | |
| "grad_norm": 25.125314712524414, | |
| "learning_rate": 2.4800217944061025e-05, | |
| "loss": 0.3518, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.0098074827460952, | |
| "grad_norm": 10.978049278259277, | |
| "learning_rate": 2.475481293134762e-05, | |
| "loss": 0.3292, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.0116236832546313, | |
| "grad_norm": 9.796133995056152, | |
| "learning_rate": 2.470940791863422e-05, | |
| "loss": 0.2946, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.0134398837631675, | |
| "grad_norm": 28.557024002075195, | |
| "learning_rate": 2.4664002905920818e-05, | |
| "loss": 0.4914, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.0152560842717036, | |
| "grad_norm": 15.893627166748047, | |
| "learning_rate": 2.4618597893207414e-05, | |
| "loss": 0.3519, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.0170722847802398, | |
| "grad_norm": 15.343416213989258, | |
| "learning_rate": 2.457319288049401e-05, | |
| "loss": 0.4028, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.0188884852887758, | |
| "grad_norm": 9.576445579528809, | |
| "learning_rate": 2.4527787867780606e-05, | |
| "loss": 0.3718, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.020704685797312, | |
| "grad_norm": 7.0438232421875, | |
| "learning_rate": 2.4482382855067202e-05, | |
| "loss": 0.191, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.0225208863058481, | |
| "grad_norm": 7.872592926025391, | |
| "learning_rate": 2.44369778423538e-05, | |
| "loss": 0.2752, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.0243370868143844, | |
| "grad_norm": 19.647167205810547, | |
| "learning_rate": 2.4391572829640395e-05, | |
| "loss": 0.3226, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.0261532873229204, | |
| "grad_norm": 7.968392848968506, | |
| "learning_rate": 2.434616781692699e-05, | |
| "loss": 0.2653, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.0279694878314567, | |
| "grad_norm": 17.07377052307129, | |
| "learning_rate": 2.4300762804213587e-05, | |
| "loss": 0.3296, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.0297856883399927, | |
| "grad_norm": 18.10598373413086, | |
| "learning_rate": 2.4255357791500184e-05, | |
| "loss": 0.2421, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.0316018888485288, | |
| "grad_norm": 10.867464065551758, | |
| "learning_rate": 2.420995277878678e-05, | |
| "loss": 0.301, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.033418089357065, | |
| "grad_norm": 15.795087814331055, | |
| "learning_rate": 2.4164547766073376e-05, | |
| "loss": 0.2956, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.035234289865601, | |
| "grad_norm": 19.456615447998047, | |
| "learning_rate": 2.4119142753359972e-05, | |
| "loss": 0.2555, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.035234289865601, | |
| "eval_accuracy": 0.8285479901558654, | |
| "eval_f1": 0.8213955767333186, | |
| "eval_loss": 0.5004270076751709, | |
| "eval_precision": 0.8160496038609667, | |
| "eval_recall": 0.8316668454229845, | |
| "eval_runtime": 12.1355, | |
| "eval_samples_per_second": 100.449, | |
| "eval_steps_per_second": 6.345, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.0370504903741373, | |
| "grad_norm": 10.63526725769043, | |
| "learning_rate": 2.407373774064657e-05, | |
| "loss": 0.3511, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.0388666908826734, | |
| "grad_norm": 5.104264259338379, | |
| "learning_rate": 2.4028332727933165e-05, | |
| "loss": 0.2357, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.0406828913912096, | |
| "grad_norm": 16.865144729614258, | |
| "learning_rate": 2.398292771521976e-05, | |
| "loss": 0.4097, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.0424990918997457, | |
| "grad_norm": 22.08740997314453, | |
| "learning_rate": 2.3937522702506357e-05, | |
| "loss": 0.3789, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.044315292408282, | |
| "grad_norm": 28.62466812133789, | |
| "learning_rate": 2.3892117689792954e-05, | |
| "loss": 0.3074, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.046131492916818, | |
| "grad_norm": 27.597490310668945, | |
| "learning_rate": 2.384671267707955e-05, | |
| "loss": 0.2929, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.0479476934253542, | |
| "grad_norm": 20.991607666015625, | |
| "learning_rate": 2.3801307664366146e-05, | |
| "loss": 0.4398, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.0497638939338902, | |
| "grad_norm": 20.590211868286133, | |
| "learning_rate": 2.3755902651652746e-05, | |
| "loss": 0.2788, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.0515800944424265, | |
| "grad_norm": 11.073917388916016, | |
| "learning_rate": 2.3710497638939342e-05, | |
| "loss": 0.1954, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.0533962949509625, | |
| "grad_norm": 14.334949493408203, | |
| "learning_rate": 2.366509262622594e-05, | |
| "loss": 0.3096, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.0552124954594988, | |
| "grad_norm": 6.725550651550293, | |
| "learning_rate": 2.3619687613512535e-05, | |
| "loss": 0.4892, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.0570286959680348, | |
| "grad_norm": 16.188831329345703, | |
| "learning_rate": 2.357428260079913e-05, | |
| "loss": 0.3451, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.058844896476571, | |
| "grad_norm": 10.715399742126465, | |
| "learning_rate": 2.3528877588085727e-05, | |
| "loss": 0.4155, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.0606610969851071, | |
| "grad_norm": 18.30307388305664, | |
| "learning_rate": 2.3483472575372323e-05, | |
| "loss": 0.3095, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.0624772974936434, | |
| "grad_norm": 9.688103675842285, | |
| "learning_rate": 2.343806756265892e-05, | |
| "loss": 0.3121, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.0642934980021794, | |
| "grad_norm": 22.700828552246094, | |
| "learning_rate": 2.3392662549945516e-05, | |
| "loss": 0.2068, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.0661096985107157, | |
| "grad_norm": 11.1968994140625, | |
| "learning_rate": 2.3347257537232112e-05, | |
| "loss": 0.2767, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.0679258990192517, | |
| "grad_norm": 25.934579849243164, | |
| "learning_rate": 2.330185252451871e-05, | |
| "loss": 0.3119, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.069742099527788, | |
| "grad_norm": 7.8139824867248535, | |
| "learning_rate": 2.3256447511805305e-05, | |
| "loss": 0.2135, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.071558300036324, | |
| "grad_norm": 18.086198806762695, | |
| "learning_rate": 2.32110424990919e-05, | |
| "loss": 0.4011, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.07337450054486, | |
| "grad_norm": 22.85544776916504, | |
| "learning_rate": 2.3165637486378497e-05, | |
| "loss": 0.4042, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.0751907010533963, | |
| "grad_norm": 22.67595672607422, | |
| "learning_rate": 2.3120232473665093e-05, | |
| "loss": 0.289, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.0770069015619324, | |
| "grad_norm": 8.212250709533691, | |
| "learning_rate": 2.307482746095169e-05, | |
| "loss": 0.4169, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.0788231020704686, | |
| "grad_norm": 22.84626007080078, | |
| "learning_rate": 2.3029422448238286e-05, | |
| "loss": 0.3653, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.0806393025790046, | |
| "grad_norm": 13.91925048828125, | |
| "learning_rate": 2.2984017435524882e-05, | |
| "loss": 0.2087, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.082455503087541, | |
| "grad_norm": 22.284345626831055, | |
| "learning_rate": 2.293861242281148e-05, | |
| "loss": 0.3626, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.084271703596077, | |
| "grad_norm": 15.127870559692383, | |
| "learning_rate": 2.2893207410098075e-05, | |
| "loss": 0.2541, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.0860879041046132, | |
| "grad_norm": 6.7608113288879395, | |
| "learning_rate": 2.284780239738467e-05, | |
| "loss": 0.2726, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.0879041046131492, | |
| "grad_norm": 13.903280258178711, | |
| "learning_rate": 2.280239738467127e-05, | |
| "loss": 0.3793, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.0897203051216855, | |
| "grad_norm": 15.691337585449219, | |
| "learning_rate": 2.2756992371957867e-05, | |
| "loss": 0.2741, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0897203051216855, | |
| "eval_accuracy": 0.8301886792452831, | |
| "eval_f1": 0.8241579951716257, | |
| "eval_loss": 0.46239912509918213, | |
| "eval_precision": 0.8201764967723821, | |
| "eval_recall": 0.8354772684817514, | |
| "eval_runtime": 12.131, | |
| "eval_samples_per_second": 100.486, | |
| "eval_steps_per_second": 6.347, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0915365056302215, | |
| "grad_norm": 23.5091609954834, | |
| "learning_rate": 2.2711587359244463e-05, | |
| "loss": 0.3384, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.0933527061387578, | |
| "grad_norm": 16.730623245239258, | |
| "learning_rate": 2.266618234653106e-05, | |
| "loss": 0.4145, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.0951689066472938, | |
| "grad_norm": 12.925875663757324, | |
| "learning_rate": 2.2620777333817655e-05, | |
| "loss": 0.2396, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.09698510715583, | |
| "grad_norm": 1.2459120750427246, | |
| "learning_rate": 2.2575372321104252e-05, | |
| "loss": 0.2662, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.0988013076643661, | |
| "grad_norm": 11.28719711303711, | |
| "learning_rate": 2.2529967308390848e-05, | |
| "loss": 0.2615, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.1006175081729024, | |
| "grad_norm": 17.44615936279297, | |
| "learning_rate": 2.2484562295677444e-05, | |
| "loss": 0.3594, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.1024337086814384, | |
| "grad_norm": 12.629523277282715, | |
| "learning_rate": 2.243915728296404e-05, | |
| "loss": 0.3975, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.1042499091899747, | |
| "grad_norm": 16.39533805847168, | |
| "learning_rate": 2.2393752270250637e-05, | |
| "loss": 0.2663, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.1060661096985107, | |
| "grad_norm": 6.332333564758301, | |
| "learning_rate": 2.2348347257537233e-05, | |
| "loss": 0.221, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.1078823102070467, | |
| "grad_norm": 8.119811058044434, | |
| "learning_rate": 2.230294224482383e-05, | |
| "loss": 0.2462, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.109698510715583, | |
| "grad_norm": 13.648475646972656, | |
| "learning_rate": 2.2257537232110425e-05, | |
| "loss": 0.2634, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.111514711224119, | |
| "grad_norm": 2.348459005355835, | |
| "learning_rate": 2.2212132219397022e-05, | |
| "loss": 0.3967, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.1133309117326553, | |
| "grad_norm": 20.70992660522461, | |
| "learning_rate": 2.2166727206683618e-05, | |
| "loss": 0.3852, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.1151471122411913, | |
| "grad_norm": 9.073955535888672, | |
| "learning_rate": 2.2121322193970214e-05, | |
| "loss": 0.3379, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.1169633127497276, | |
| "grad_norm": 18.57855796813965, | |
| "learning_rate": 2.207591718125681e-05, | |
| "loss": 0.3118, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.1187795132582636, | |
| "grad_norm": 13.104948043823242, | |
| "learning_rate": 2.2030512168543407e-05, | |
| "loss": 0.3165, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.1205957137668, | |
| "grad_norm": 17.0866756439209, | |
| "learning_rate": 2.1985107155830003e-05, | |
| "loss": 0.3737, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.122411914275336, | |
| "grad_norm": 10.766948699951172, | |
| "learning_rate": 2.19397021431166e-05, | |
| "loss": 0.2323, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.1242281147838722, | |
| "grad_norm": 14.713004112243652, | |
| "learning_rate": 2.1894297130403195e-05, | |
| "loss": 0.2995, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.1260443152924082, | |
| "grad_norm": 15.085183143615723, | |
| "learning_rate": 2.1848892117689795e-05, | |
| "loss": 0.3476, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.1278605158009445, | |
| "grad_norm": 12.6240816116333, | |
| "learning_rate": 2.180348710497639e-05, | |
| "loss": 0.2693, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.1296767163094805, | |
| "grad_norm": 16.312969207763672, | |
| "learning_rate": 2.1758082092262988e-05, | |
| "loss": 0.4585, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.1314929168180168, | |
| "grad_norm": 4.667062282562256, | |
| "learning_rate": 2.1712677079549584e-05, | |
| "loss": 0.1519, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.1333091173265528, | |
| "grad_norm": 10.980841636657715, | |
| "learning_rate": 2.166727206683618e-05, | |
| "loss": 0.3728, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.135125317835089, | |
| "grad_norm": 12.60606575012207, | |
| "learning_rate": 2.1621867054122776e-05, | |
| "loss": 0.2508, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.1369415183436251, | |
| "grad_norm": 24.3731689453125, | |
| "learning_rate": 2.1576462041409373e-05, | |
| "loss": 0.3333, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.1387577188521614, | |
| "grad_norm": 14.52236557006836, | |
| "learning_rate": 2.153105702869597e-05, | |
| "loss": 0.3393, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.1405739193606974, | |
| "grad_norm": 18.642431259155273, | |
| "learning_rate": 2.1485652015982565e-05, | |
| "loss": 0.2943, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.1423901198692334, | |
| "grad_norm": 17.546001434326172, | |
| "learning_rate": 2.144024700326916e-05, | |
| "loss": 0.4127, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.1442063203777697, | |
| "grad_norm": 17.17730712890625, | |
| "learning_rate": 2.1394841990555758e-05, | |
| "loss": 0.4123, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.1442063203777697, | |
| "eval_accuracy": 0.8408531583264971, | |
| "eval_f1": 0.8356285591942396, | |
| "eval_loss": 0.4855496287345886, | |
| "eval_precision": 0.8294180649644707, | |
| "eval_recall": 0.8508328584053949, | |
| "eval_runtime": 12.1359, | |
| "eval_samples_per_second": 100.445, | |
| "eval_steps_per_second": 6.345, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.146022520886306, | |
| "grad_norm": 12.82174301147461, | |
| "learning_rate": 2.1349436977842354e-05, | |
| "loss": 0.222, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.147838721394842, | |
| "grad_norm": 13.084376335144043, | |
| "learning_rate": 2.130403196512895e-05, | |
| "loss": 0.2926, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.149654921903378, | |
| "grad_norm": 15.497282028198242, | |
| "learning_rate": 2.1258626952415546e-05, | |
| "loss": 0.3175, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.1514711224119143, | |
| "grad_norm": 13.51550579071045, | |
| "learning_rate": 2.1213221939702143e-05, | |
| "loss": 0.3562, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.1532873229204503, | |
| "grad_norm": 23.226682662963867, | |
| "learning_rate": 2.116781692698874e-05, | |
| "loss": 0.3547, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.1551035234289866, | |
| "grad_norm": 19.47138023376465, | |
| "learning_rate": 2.1122411914275335e-05, | |
| "loss": 0.2397, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.1569197239375226, | |
| "grad_norm": 20.718000411987305, | |
| "learning_rate": 2.107700690156193e-05, | |
| "loss": 0.2287, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.1587359244460589, | |
| "grad_norm": 15.798551559448242, | |
| "learning_rate": 2.1031601888848528e-05, | |
| "loss": 0.2555, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.160552124954595, | |
| "grad_norm": 17.811277389526367, | |
| "learning_rate": 2.0986196876135124e-05, | |
| "loss": 0.4506, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.1623683254631312, | |
| "grad_norm": 11.916951179504395, | |
| "learning_rate": 2.094079186342172e-05, | |
| "loss": 0.4996, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.1641845259716672, | |
| "grad_norm": 25.842151641845703, | |
| "learning_rate": 2.089538685070832e-05, | |
| "loss": 0.3922, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.1660007264802035, | |
| "grad_norm": 12.472575187683105, | |
| "learning_rate": 2.0849981837994916e-05, | |
| "loss": 0.2717, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.1678169269887395, | |
| "grad_norm": 16.09991455078125, | |
| "learning_rate": 2.0804576825281512e-05, | |
| "loss": 0.265, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.1696331274972758, | |
| "grad_norm": 13.561772346496582, | |
| "learning_rate": 2.075917181256811e-05, | |
| "loss": 0.3963, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.1714493280058118, | |
| "grad_norm": 8.552865982055664, | |
| "learning_rate": 2.0713766799854705e-05, | |
| "loss": 0.2881, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.173265528514348, | |
| "grad_norm": 16.097904205322266, | |
| "learning_rate": 2.06683617871413e-05, | |
| "loss": 0.3422, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.175081729022884, | |
| "grad_norm": 14.92163372039795, | |
| "learning_rate": 2.0622956774427897e-05, | |
| "loss": 0.3918, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.1768979295314204, | |
| "grad_norm": 23.94252586364746, | |
| "learning_rate": 2.0577551761714493e-05, | |
| "loss": 0.2916, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.1787141300399564, | |
| "grad_norm": 32.18648147583008, | |
| "learning_rate": 2.053214674900109e-05, | |
| "loss": 0.4981, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.1805303305484927, | |
| "grad_norm": 25.992433547973633, | |
| "learning_rate": 2.0486741736287686e-05, | |
| "loss": 0.4043, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.1823465310570287, | |
| "grad_norm": 16.37993621826172, | |
| "learning_rate": 2.0441336723574282e-05, | |
| "loss": 0.2839, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.1841627315655647, | |
| "grad_norm": 11.15428638458252, | |
| "learning_rate": 2.039593171086088e-05, | |
| "loss": 0.2675, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.185978932074101, | |
| "grad_norm": 4.630198001861572, | |
| "learning_rate": 2.0350526698147475e-05, | |
| "loss": 0.3671, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.1877951325826372, | |
| "grad_norm": 7.3554606437683105, | |
| "learning_rate": 2.030512168543407e-05, | |
| "loss": 0.335, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.1896113330911733, | |
| "grad_norm": 15.69100284576416, | |
| "learning_rate": 2.025971667272067e-05, | |
| "loss": 0.3132, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.1914275335997093, | |
| "grad_norm": 9.235587120056152, | |
| "learning_rate": 2.0214311660007267e-05, | |
| "loss": 0.3725, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.1932437341082456, | |
| "grad_norm": 17.381521224975586, | |
| "learning_rate": 2.0168906647293863e-05, | |
| "loss": 0.4108, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.1950599346167816, | |
| "grad_norm": 6.358768939971924, | |
| "learning_rate": 2.012350163458046e-05, | |
| "loss": 0.437, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.1968761351253179, | |
| "grad_norm": 25.116188049316406, | |
| "learning_rate": 2.0078096621867056e-05, | |
| "loss": 0.3024, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.198692335633854, | |
| "grad_norm": 17.047897338867188, | |
| "learning_rate": 2.0032691609153652e-05, | |
| "loss": 0.4109, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.198692335633854, | |
| "eval_accuracy": 0.8457752255947498, | |
| "eval_f1": 0.8384878566461094, | |
| "eval_loss": 0.43283388018608093, | |
| "eval_precision": 0.8390928465459404, | |
| "eval_recall": 0.8434791709994856, | |
| "eval_runtime": 12.1336, | |
| "eval_samples_per_second": 100.465, | |
| "eval_steps_per_second": 6.346, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.2005085361423902, | |
| "grad_norm": 10.83834171295166, | |
| "learning_rate": 1.9987286596440248e-05, | |
| "loss": 0.2987, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.2023247366509262, | |
| "grad_norm": 11.946487426757812, | |
| "learning_rate": 1.9941881583726844e-05, | |
| "loss": 0.3454, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.2041409371594625, | |
| "grad_norm": 11.06641674041748, | |
| "learning_rate": 1.989647657101344e-05, | |
| "loss": 0.3263, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.2059571376679985, | |
| "grad_norm": 10.330728530883789, | |
| "learning_rate": 1.9851071558300037e-05, | |
| "loss": 0.3618, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.2077733381765348, | |
| "grad_norm": 26.483482360839844, | |
| "learning_rate": 1.9805666545586633e-05, | |
| "loss": 0.3452, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.2095895386850708, | |
| "grad_norm": 12.150251388549805, | |
| "learning_rate": 1.9760261532873233e-05, | |
| "loss": 0.3156, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.211405739193607, | |
| "grad_norm": 12.710071563720703, | |
| "learning_rate": 1.971485652015983e-05, | |
| "loss": 0.2054, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.213221939702143, | |
| "grad_norm": 10.786319732666016, | |
| "learning_rate": 1.9669451507446425e-05, | |
| "loss": 0.3378, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.2150381402106794, | |
| "grad_norm": 17.936023712158203, | |
| "learning_rate": 1.962404649473302e-05, | |
| "loss": 0.2841, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.2168543407192154, | |
| "grad_norm": 13.268390655517578, | |
| "learning_rate": 1.9578641482019618e-05, | |
| "loss": 0.17, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.2186705412277514, | |
| "grad_norm": 6.254006385803223, | |
| "learning_rate": 1.9533236469306214e-05, | |
| "loss": 0.3391, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.2204867417362877, | |
| "grad_norm": 19.141435623168945, | |
| "learning_rate": 1.948783145659281e-05, | |
| "loss": 0.281, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.222302942244824, | |
| "grad_norm": 20.09503936767578, | |
| "learning_rate": 1.9442426443879406e-05, | |
| "loss": 0.3752, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.22411914275336, | |
| "grad_norm": 13.188698768615723, | |
| "learning_rate": 1.9397021431166003e-05, | |
| "loss": 0.2663, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.225935343261896, | |
| "grad_norm": 16.47735595703125, | |
| "learning_rate": 1.93516164184526e-05, | |
| "loss": 0.2395, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.2277515437704323, | |
| "grad_norm": 14.550719261169434, | |
| "learning_rate": 1.9306211405739195e-05, | |
| "loss": 0.4064, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.2295677442789683, | |
| "grad_norm": 24.679447174072266, | |
| "learning_rate": 1.926080639302579e-05, | |
| "loss": 0.3489, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.2313839447875046, | |
| "grad_norm": 18.579641342163086, | |
| "learning_rate": 1.9215401380312388e-05, | |
| "loss": 0.3471, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.2332001452960406, | |
| "grad_norm": 9.306077003479004, | |
| "learning_rate": 1.9169996367598984e-05, | |
| "loss": 0.3261, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.2350163458045769, | |
| "grad_norm": 14.456847190856934, | |
| "learning_rate": 1.912459135488558e-05, | |
| "loss": 0.2736, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.236832546313113, | |
| "grad_norm": 13.200130462646484, | |
| "learning_rate": 1.9079186342172176e-05, | |
| "loss": 0.3333, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.2386487468216492, | |
| "grad_norm": 20.47243309020996, | |
| "learning_rate": 1.9033781329458773e-05, | |
| "loss": 0.2882, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.2404649473301852, | |
| "grad_norm": 4.953060150146484, | |
| "learning_rate": 1.8988376316745372e-05, | |
| "loss": 0.2492, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.2422811478387215, | |
| "grad_norm": 18.391193389892578, | |
| "learning_rate": 1.894297130403197e-05, | |
| "loss": 0.3334, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.2440973483472575, | |
| "grad_norm": 15.669410705566406, | |
| "learning_rate": 1.8897566291318565e-05, | |
| "loss": 0.2825, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.2459135488557938, | |
| "grad_norm": 7.906172752380371, | |
| "learning_rate": 1.885216127860516e-05, | |
| "loss": 0.3103, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.2477297493643298, | |
| "grad_norm": 28.12116813659668, | |
| "learning_rate": 1.8806756265891757e-05, | |
| "loss": 0.513, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.249545949872866, | |
| "grad_norm": 15.086318969726562, | |
| "learning_rate": 1.8761351253178354e-05, | |
| "loss": 0.2614, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.251362150381402, | |
| "grad_norm": 21.694408416748047, | |
| "learning_rate": 1.871594624046495e-05, | |
| "loss": 0.3005, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.2531783508899381, | |
| "grad_norm": 19.437421798706055, | |
| "learning_rate": 1.8670541227751546e-05, | |
| "loss": 0.3489, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.2531783508899381, | |
| "eval_accuracy": 0.8613617719442166, | |
| "eval_f1": 0.8577346281731705, | |
| "eval_loss": 0.41974562406539917, | |
| "eval_precision": 0.8551502281540364, | |
| "eval_recall": 0.8622491860794987, | |
| "eval_runtime": 12.1508, | |
| "eval_samples_per_second": 100.323, | |
| "eval_steps_per_second": 6.337, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.2549945513984744, | |
| "grad_norm": 10.58027172088623, | |
| "learning_rate": 1.8625136215038142e-05, | |
| "loss": 0.4004, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.2568107519070106, | |
| "grad_norm": 18.426048278808594, | |
| "learning_rate": 1.857973120232474e-05, | |
| "loss": 0.3934, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.2586269524155467, | |
| "grad_norm": 14.427044868469238, | |
| "learning_rate": 1.8534326189611335e-05, | |
| "loss": 0.3299, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.2604431529240827, | |
| "grad_norm": 15.217583656311035, | |
| "learning_rate": 1.848892117689793e-05, | |
| "loss": 0.2325, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.262259353432619, | |
| "grad_norm": 7.493901252746582, | |
| "learning_rate": 1.8443516164184527e-05, | |
| "loss": 0.1888, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.2640755539411552, | |
| "grad_norm": 9.405710220336914, | |
| "learning_rate": 1.8398111151471124e-05, | |
| "loss": 0.3034, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.2658917544496913, | |
| "grad_norm": 22.79953384399414, | |
| "learning_rate": 1.835270613875772e-05, | |
| "loss": 0.4091, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.2677079549582273, | |
| "grad_norm": 11.145368576049805, | |
| "learning_rate": 1.8307301126044316e-05, | |
| "loss": 0.3613, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.2695241554667636, | |
| "grad_norm": 12.6839599609375, | |
| "learning_rate": 1.8261896113330912e-05, | |
| "loss": 0.2778, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.2713403559752996, | |
| "grad_norm": 26.889331817626953, | |
| "learning_rate": 1.821649110061751e-05, | |
| "loss": 0.5038, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.2731565564838359, | |
| "grad_norm": 24.15523338317871, | |
| "learning_rate": 1.8171086087904105e-05, | |
| "loss": 0.2681, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.274972756992372, | |
| "grad_norm": 24.74588394165039, | |
| "learning_rate": 1.81256810751907e-05, | |
| "loss": 0.3978, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.2767889575009082, | |
| "grad_norm": 14.386746406555176, | |
| "learning_rate": 1.8080276062477297e-05, | |
| "loss": 0.1776, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.2786051580094442, | |
| "grad_norm": 14.167470932006836, | |
| "learning_rate": 1.8034871049763894e-05, | |
| "loss": 0.4071, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.2804213585179804, | |
| "grad_norm": 17.294830322265625, | |
| "learning_rate": 1.7989466037050493e-05, | |
| "loss": 0.3855, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.2822375590265165, | |
| "grad_norm": 7.660125255584717, | |
| "learning_rate": 1.794406102433709e-05, | |
| "loss": 0.3288, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.2840537595350527, | |
| "grad_norm": 9.39700698852539, | |
| "learning_rate": 1.7898656011623686e-05, | |
| "loss": 0.216, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.2858699600435888, | |
| "grad_norm": 14.8308744430542, | |
| "learning_rate": 1.7853250998910282e-05, | |
| "loss": 0.293, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.287686160552125, | |
| "grad_norm": 7.683089733123779, | |
| "learning_rate": 1.7807845986196878e-05, | |
| "loss": 0.2502, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.289502361060661, | |
| "grad_norm": 8.663617134094238, | |
| "learning_rate": 1.7762440973483474e-05, | |
| "loss": 0.2726, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.2913185615691973, | |
| "grad_norm": 27.20614242553711, | |
| "learning_rate": 1.771703596077007e-05, | |
| "loss": 0.3407, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.2931347620777334, | |
| "grad_norm": 20.363462448120117, | |
| "learning_rate": 1.7671630948056667e-05, | |
| "loss": 0.4059, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.2949509625862694, | |
| "grad_norm": 11.423839569091797, | |
| "learning_rate": 1.7626225935343263e-05, | |
| "loss": 0.3017, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.2967671630948057, | |
| "grad_norm": 21.78324317932129, | |
| "learning_rate": 1.758082092262986e-05, | |
| "loss": 0.2886, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.298583363603342, | |
| "grad_norm": 10.466778755187988, | |
| "learning_rate": 1.7535415909916456e-05, | |
| "loss": 0.4254, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.300399564111878, | |
| "grad_norm": 27.826078414916992, | |
| "learning_rate": 1.7490010897203052e-05, | |
| "loss": 0.3508, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.302215764620414, | |
| "grad_norm": 30.015012741088867, | |
| "learning_rate": 1.7444605884489648e-05, | |
| "loss": 0.339, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.3040319651289503, | |
| "grad_norm": 5.534029960632324, | |
| "learning_rate": 1.7399200871776244e-05, | |
| "loss": 0.2054, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.3058481656374865, | |
| "grad_norm": 17.095060348510742, | |
| "learning_rate": 1.735379585906284e-05, | |
| "loss": 0.418, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.3076643661460225, | |
| "grad_norm": 13.304654121398926, | |
| "learning_rate": 1.7308390846349437e-05, | |
| "loss": 0.4048, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.3076643661460225, | |
| "eval_accuracy": 0.8679245283018868, | |
| "eval_f1": 0.8611075418966936, | |
| "eval_loss": 0.39569488167762756, | |
| "eval_precision": 0.8537013974843974, | |
| "eval_recall": 0.8742274746130478, | |
| "eval_runtime": 12.1513, | |
| "eval_samples_per_second": 100.319, | |
| "eval_steps_per_second": 6.337, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.3094805666545586, | |
| "grad_norm": 11.452223777770996, | |
| "learning_rate": 1.7262985833636033e-05, | |
| "loss": 0.4095, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.3112967671630948, | |
| "grad_norm": 7.238298416137695, | |
| "learning_rate": 1.721758082092263e-05, | |
| "loss": 0.2381, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.3131129676716309, | |
| "grad_norm": 25.489473342895508, | |
| "learning_rate": 1.7172175808209226e-05, | |
| "loss": 0.3537, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.3149291681801671, | |
| "grad_norm": 8.602483749389648, | |
| "learning_rate": 1.7126770795495822e-05, | |
| "loss": 0.2295, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.3167453686887032, | |
| "grad_norm": 16.307979583740234, | |
| "learning_rate": 1.7081365782782418e-05, | |
| "loss": 0.4157, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.3185615691972394, | |
| "grad_norm": 12.968489646911621, | |
| "learning_rate": 1.7035960770069018e-05, | |
| "loss": 0.2462, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.3203777697057755, | |
| "grad_norm": 473.5597839355469, | |
| "learning_rate": 1.6990555757355614e-05, | |
| "loss": 0.2313, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.3221939702143117, | |
| "grad_norm": 25.61052703857422, | |
| "learning_rate": 1.694515074464221e-05, | |
| "loss": 0.3654, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.3240101707228478, | |
| "grad_norm": 8.36911678314209, | |
| "learning_rate": 1.6899745731928806e-05, | |
| "loss": 0.4109, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.325826371231384, | |
| "grad_norm": 5.5236687660217285, | |
| "learning_rate": 1.6854340719215403e-05, | |
| "loss": 0.3426, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.32764257173992, | |
| "grad_norm": 12.219548225402832, | |
| "learning_rate": 1.6808935706502e-05, | |
| "loss": 0.2344, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.329458772248456, | |
| "grad_norm": 9.559709548950195, | |
| "learning_rate": 1.6763530693788595e-05, | |
| "loss": 0.351, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.3312749727569924, | |
| "grad_norm": 12.749125480651855, | |
| "learning_rate": 1.671812568107519e-05, | |
| "loss": 0.3179, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.3330911732655286, | |
| "grad_norm": 10.120976448059082, | |
| "learning_rate": 1.6672720668361788e-05, | |
| "loss": 0.2978, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.3349073737740647, | |
| "grad_norm": 20.587955474853516, | |
| "learning_rate": 1.6627315655648384e-05, | |
| "loss": 0.3815, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.3367235742826007, | |
| "grad_norm": 15.96291446685791, | |
| "learning_rate": 1.658191064293498e-05, | |
| "loss": 0.3063, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.338539774791137, | |
| "grad_norm": 20.04080581665039, | |
| "learning_rate": 1.6536505630221576e-05, | |
| "loss": 0.2703, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.3403559752996732, | |
| "grad_norm": 19.636119842529297, | |
| "learning_rate": 1.6491100617508173e-05, | |
| "loss": 0.2934, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.3421721758082092, | |
| "grad_norm": 17.96088218688965, | |
| "learning_rate": 1.644569560479477e-05, | |
| "loss": 0.2903, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.3439883763167453, | |
| "grad_norm": 5.417899131774902, | |
| "learning_rate": 1.6400290592081365e-05, | |
| "loss": 0.4768, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.3458045768252815, | |
| "grad_norm": 27.437042236328125, | |
| "learning_rate": 1.635488557936796e-05, | |
| "loss": 0.4288, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.3476207773338176, | |
| "grad_norm": 14.335066795349121, | |
| "learning_rate": 1.6309480566654558e-05, | |
| "loss": 0.3827, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.3494369778423538, | |
| "grad_norm": 20.122777938842773, | |
| "learning_rate": 1.6264075553941154e-05, | |
| "loss": 0.4897, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.3512531783508899, | |
| "grad_norm": 32.951942443847656, | |
| "learning_rate": 1.621867054122775e-05, | |
| "loss": 0.3861, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.3530693788594261, | |
| "grad_norm": 11.255241394042969, | |
| "learning_rate": 1.6173265528514346e-05, | |
| "loss": 0.2971, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.3548855793679622, | |
| "grad_norm": 14.039215087890625, | |
| "learning_rate": 1.6127860515800943e-05, | |
| "loss": 0.3155, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.3567017798764984, | |
| "grad_norm": 8.711435317993164, | |
| "learning_rate": 1.6082455503087542e-05, | |
| "loss": 0.2202, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.3585179803850345, | |
| "grad_norm": 11.57476806640625, | |
| "learning_rate": 1.603705049037414e-05, | |
| "loss": 0.2802, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.3603341808935707, | |
| "grad_norm": 26.275901794433594, | |
| "learning_rate": 1.5991645477660735e-05, | |
| "loss": 0.424, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.3621503814021068, | |
| "grad_norm": 9.019407272338867, | |
| "learning_rate": 1.594624046494733e-05, | |
| "loss": 0.2053, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.3621503814021068, | |
| "eval_accuracy": 0.8810500410172273, | |
| "eval_f1": 0.8728353936424942, | |
| "eval_loss": 0.36287108063697815, | |
| "eval_precision": 0.8688572009408948, | |
| "eval_recall": 0.8783339042782887, | |
| "eval_runtime": 12.133, | |
| "eval_samples_per_second": 100.469, | |
| "eval_steps_per_second": 6.346, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.363966581910643, | |
| "grad_norm": 13.45000171661377, | |
| "learning_rate": 1.5900835452233927e-05, | |
| "loss": 0.2865, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.365782782419179, | |
| "grad_norm": 18.8865966796875, | |
| "learning_rate": 1.5855430439520524e-05, | |
| "loss": 0.3401, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.3675989829277153, | |
| "grad_norm": 10.25676155090332, | |
| "learning_rate": 1.581002542680712e-05, | |
| "loss": 0.2958, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.3694151834362513, | |
| "grad_norm": 12.922000885009766, | |
| "learning_rate": 1.5764620414093716e-05, | |
| "loss": 0.3281, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.3712313839447874, | |
| "grad_norm": 21.29969596862793, | |
| "learning_rate": 1.5719215401380312e-05, | |
| "loss": 0.4653, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.3730475844533236, | |
| "grad_norm": 22.337665557861328, | |
| "learning_rate": 1.567381038866691e-05, | |
| "loss": 0.4078, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.37486378496186, | |
| "grad_norm": 13.976520538330078, | |
| "learning_rate": 1.5628405375953505e-05, | |
| "loss": 0.2474, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.376679985470396, | |
| "grad_norm": 15.766996383666992, | |
| "learning_rate": 1.55830003632401e-05, | |
| "loss": 0.3418, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.378496185978932, | |
| "grad_norm": 10.695988655090332, | |
| "learning_rate": 1.5537595350526697e-05, | |
| "loss": 0.231, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.3803123864874682, | |
| "grad_norm": 12.22573184967041, | |
| "learning_rate": 1.5492190337813294e-05, | |
| "loss": 0.3406, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.3821285869960045, | |
| "grad_norm": 8.77241325378418, | |
| "learning_rate": 1.544678532509989e-05, | |
| "loss": 0.4051, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.3839447875045405, | |
| "grad_norm": 17.83467674255371, | |
| "learning_rate": 1.5401380312386486e-05, | |
| "loss": 0.2736, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.3857609880130766, | |
| "grad_norm": 10.345674514770508, | |
| "learning_rate": 1.5355975299673082e-05, | |
| "loss": 0.2945, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.3875771885216128, | |
| "grad_norm": 9.618982315063477, | |
| "learning_rate": 1.531057028695968e-05, | |
| "loss": 0.307, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.3893933890301489, | |
| "grad_norm": 6.242488861083984, | |
| "learning_rate": 1.5265165274246275e-05, | |
| "loss": 0.2746, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.3912095895386851, | |
| "grad_norm": 14.939092636108398, | |
| "learning_rate": 1.5219760261532873e-05, | |
| "loss": 0.2958, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.3930257900472212, | |
| "grad_norm": 16.4776611328125, | |
| "learning_rate": 1.5174355248819469e-05, | |
| "loss": 0.5158, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.3948419905557574, | |
| "grad_norm": 19.468334197998047, | |
| "learning_rate": 1.5128950236106069e-05, | |
| "loss": 0.2188, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.3966581910642935, | |
| "grad_norm": 16.519298553466797, | |
| "learning_rate": 1.5083545223392665e-05, | |
| "loss": 0.2974, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.3984743915728297, | |
| "grad_norm": 8.464622497558594, | |
| "learning_rate": 1.5038140210679261e-05, | |
| "loss": 0.2988, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.4002905920813657, | |
| "grad_norm": 1.7706962823867798, | |
| "learning_rate": 1.4992735197965857e-05, | |
| "loss": 0.25, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.402106792589902, | |
| "grad_norm": 13.213313102722168, | |
| "learning_rate": 1.4947330185252454e-05, | |
| "loss": 0.2841, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.403922993098438, | |
| "grad_norm": 18.352794647216797, | |
| "learning_rate": 1.490192517253905e-05, | |
| "loss": 0.4059, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.405739193606974, | |
| "grad_norm": 11.154178619384766, | |
| "learning_rate": 1.4856520159825646e-05, | |
| "loss": 0.4, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.4075553941155103, | |
| "grad_norm": 20.417091369628906, | |
| "learning_rate": 1.4811115147112242e-05, | |
| "loss": 0.3162, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.4093715946240466, | |
| "grad_norm": 15.249809265136719, | |
| "learning_rate": 1.4765710134398839e-05, | |
| "loss": 0.2575, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.4111877951325826, | |
| "grad_norm": 12.293340682983398, | |
| "learning_rate": 1.4720305121685435e-05, | |
| "loss": 0.2857, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.4130039956411187, | |
| "grad_norm": 6.126258850097656, | |
| "learning_rate": 1.4674900108972031e-05, | |
| "loss": 0.2618, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.414820196149655, | |
| "grad_norm": 15.712937355041504, | |
| "learning_rate": 1.4629495096258627e-05, | |
| "loss": 0.3071, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.4166363966581912, | |
| "grad_norm": 22.511367797851562, | |
| "learning_rate": 1.4584090083545224e-05, | |
| "loss": 0.2066, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.4166363966581912, | |
| "eval_accuracy": 0.8712059064807219, | |
| "eval_f1": 0.864515063175361, | |
| "eval_loss": 0.39585188031196594, | |
| "eval_precision": 0.8567226508046806, | |
| "eval_recall": 0.8748503082269048, | |
| "eval_runtime": 12.1334, | |
| "eval_samples_per_second": 100.467, | |
| "eval_steps_per_second": 6.346, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.4184525971667272, | |
| "grad_norm": 21.43792152404785, | |
| "learning_rate": 1.453868507083182e-05, | |
| "loss": 0.3206, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.4202687976752633, | |
| "grad_norm": 15.642193794250488, | |
| "learning_rate": 1.4493280058118416e-05, | |
| "loss": 0.3246, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.4220849981837995, | |
| "grad_norm": 22.358238220214844, | |
| "learning_rate": 1.4447875045405012e-05, | |
| "loss": 0.2895, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.4239011986923356, | |
| "grad_norm": 12.986956596374512, | |
| "learning_rate": 1.4402470032691609e-05, | |
| "loss": 0.309, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.4257173992008718, | |
| "grad_norm": 25.143394470214844, | |
| "learning_rate": 1.4357065019978205e-05, | |
| "loss": 0.4229, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.4275335997094079, | |
| "grad_norm": 15.178205490112305, | |
| "learning_rate": 1.4311660007264801e-05, | |
| "loss": 0.2001, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.429349800217944, | |
| "grad_norm": 27.758424758911133, | |
| "learning_rate": 1.4266254994551397e-05, | |
| "loss": 0.3854, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.4311660007264801, | |
| "grad_norm": 20.106098175048828, | |
| "learning_rate": 1.4220849981837994e-05, | |
| "loss": 0.3047, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.4329822012350164, | |
| "grad_norm": 21.916183471679688, | |
| "learning_rate": 1.4175444969124593e-05, | |
| "loss": 0.3352, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.4347984017435524, | |
| "grad_norm": 11.698692321777344, | |
| "learning_rate": 1.413003995641119e-05, | |
| "loss": 0.2302, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.4366146022520887, | |
| "grad_norm": 18.721933364868164, | |
| "learning_rate": 1.4084634943697786e-05, | |
| "loss": 0.2219, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.4384308027606247, | |
| "grad_norm": 10.098983764648438, | |
| "learning_rate": 1.4039229930984382e-05, | |
| "loss": 0.3427, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.440247003269161, | |
| "grad_norm": 5.1341023445129395, | |
| "learning_rate": 1.3993824918270978e-05, | |
| "loss": 0.1857, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.442063203777697, | |
| "grad_norm": 7.9033522605896, | |
| "learning_rate": 1.3948419905557575e-05, | |
| "loss": 0.2511, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.4438794042862333, | |
| "grad_norm": 15.394737243652344, | |
| "learning_rate": 1.390301489284417e-05, | |
| "loss": 0.372, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.4456956047947693, | |
| "grad_norm": 10.518932342529297, | |
| "learning_rate": 1.3857609880130767e-05, | |
| "loss": 0.2298, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.4475118053033054, | |
| "grad_norm": 15.272256851196289, | |
| "learning_rate": 1.3812204867417363e-05, | |
| "loss": 0.2183, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.4493280058118416, | |
| "grad_norm": 13.641687393188477, | |
| "learning_rate": 1.376679985470396e-05, | |
| "loss": 0.3127, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.4511442063203779, | |
| "grad_norm": 20.85528564453125, | |
| "learning_rate": 1.3721394841990556e-05, | |
| "loss": 0.437, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.452960406828914, | |
| "grad_norm": 17.150014877319336, | |
| "learning_rate": 1.3675989829277152e-05, | |
| "loss": 0.2353, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.45477660733745, | |
| "grad_norm": 19.58470916748047, | |
| "learning_rate": 1.3630584816563748e-05, | |
| "loss": 0.2235, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.4565928078459862, | |
| "grad_norm": 11.996252059936523, | |
| "learning_rate": 1.3585179803850344e-05, | |
| "loss": 0.1931, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.4584090083545225, | |
| "grad_norm": 14.358990669250488, | |
| "learning_rate": 1.353977479113694e-05, | |
| "loss": 0.1409, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.4602252088630585, | |
| "grad_norm": 25.43513298034668, | |
| "learning_rate": 1.3494369778423539e-05, | |
| "loss": 0.3949, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.4620414093715945, | |
| "grad_norm": 38.57484817504883, | |
| "learning_rate": 1.3448964765710135e-05, | |
| "loss": 0.4156, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.4638576098801308, | |
| "grad_norm": 11.345231056213379, | |
| "learning_rate": 1.3403559752996731e-05, | |
| "loss": 0.2358, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.4656738103886668, | |
| "grad_norm": 9.881817817687988, | |
| "learning_rate": 1.3358154740283327e-05, | |
| "loss": 0.1973, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.467490010897203, | |
| "grad_norm": 15.946255683898926, | |
| "learning_rate": 1.3312749727569924e-05, | |
| "loss": 0.3282, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.4693062114057391, | |
| "grad_norm": 21.599016189575195, | |
| "learning_rate": 1.326734471485652e-05, | |
| "loss": 0.3486, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.4711224119142754, | |
| "grad_norm": 23.88036346435547, | |
| "learning_rate": 1.3221939702143118e-05, | |
| "loss": 0.2855, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.4711224119142754, | |
| "eval_accuracy": 0.8777686628383922, | |
| "eval_f1": 0.8732951551257775, | |
| "eval_loss": 0.37594613432884216, | |
| "eval_precision": 0.8717582158349084, | |
| "eval_recall": 0.8789319049269518, | |
| "eval_runtime": 12.1477, | |
| "eval_samples_per_second": 100.348, | |
| "eval_steps_per_second": 6.339, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.4729386124228114, | |
| "grad_norm": 21.347118377685547, | |
| "learning_rate": 1.3176534689429714e-05, | |
| "loss": 0.4024, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.4747548129313477, | |
| "grad_norm": 12.109701156616211, | |
| "learning_rate": 1.313112967671631e-05, | |
| "loss": 0.3624, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.4765710134398837, | |
| "grad_norm": 11.65137004852295, | |
| "learning_rate": 1.3085724664002907e-05, | |
| "loss": 0.2691, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.47838721394842, | |
| "grad_norm": 14.210288047790527, | |
| "learning_rate": 1.3040319651289505e-05, | |
| "loss": 0.3066, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.480203414456956, | |
| "grad_norm": 15.840164184570312, | |
| "learning_rate": 1.29949146385761e-05, | |
| "loss": 0.4073, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.482019614965492, | |
| "grad_norm": 17.042640686035156, | |
| "learning_rate": 1.2949509625862697e-05, | |
| "loss": 0.2465, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.4838358154740283, | |
| "grad_norm": 4.103309631347656, | |
| "learning_rate": 1.2904104613149293e-05, | |
| "loss": 0.2429, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.4856520159825646, | |
| "grad_norm": 21.490703582763672, | |
| "learning_rate": 1.285869960043589e-05, | |
| "loss": 0.3825, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.4874682164911006, | |
| "grad_norm": 22.954036712646484, | |
| "learning_rate": 1.2813294587722486e-05, | |
| "loss": 0.3533, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.4892844169996367, | |
| "grad_norm": 9.550930976867676, | |
| "learning_rate": 1.2767889575009082e-05, | |
| "loss": 0.3585, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.491100617508173, | |
| "grad_norm": 23.033842086791992, | |
| "learning_rate": 1.2722484562295678e-05, | |
| "loss": 0.3724, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.4929168180167092, | |
| "grad_norm": 18.261627197265625, | |
| "learning_rate": 1.2677079549582275e-05, | |
| "loss": 0.3568, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.4947330185252452, | |
| "grad_norm": 26.01344871520996, | |
| "learning_rate": 1.263167453686887e-05, | |
| "loss": 0.3325, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.4965492190337812, | |
| "grad_norm": 6.980250358581543, | |
| "learning_rate": 1.2586269524155467e-05, | |
| "loss": 0.2685, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.4983654195423175, | |
| "grad_norm": 15.290885925292969, | |
| "learning_rate": 1.2540864511442063e-05, | |
| "loss": 0.2438, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.5001816200508538, | |
| "grad_norm": 16.555368423461914, | |
| "learning_rate": 1.2495459498728661e-05, | |
| "loss": 0.4081, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.5019978205593898, | |
| "grad_norm": 23.678932189941406, | |
| "learning_rate": 1.2450054486015257e-05, | |
| "loss": 0.3303, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.5038140210679258, | |
| "grad_norm": 28.935272216796875, | |
| "learning_rate": 1.2404649473301854e-05, | |
| "loss": 0.3075, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.505630221576462, | |
| "grad_norm": 1.6638036966323853, | |
| "learning_rate": 1.235924446058845e-05, | |
| "loss": 0.2976, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.5074464220849983, | |
| "grad_norm": 27.825714111328125, | |
| "learning_rate": 1.2313839447875046e-05, | |
| "loss": 0.2812, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.5092626225935342, | |
| "grad_norm": 18.06635093688965, | |
| "learning_rate": 1.2268434435161642e-05, | |
| "loss": 0.3069, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.5110788231020704, | |
| "grad_norm": 10.651163101196289, | |
| "learning_rate": 1.2223029422448239e-05, | |
| "loss": 0.5344, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.5128950236106067, | |
| "grad_norm": 9.965625762939453, | |
| "learning_rate": 1.2177624409734835e-05, | |
| "loss": 0.3103, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.5147112241191427, | |
| "grad_norm": 23.21745491027832, | |
| "learning_rate": 1.2132219397021431e-05, | |
| "loss": 0.2692, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.5165274246276788, | |
| "grad_norm": 18.808652877807617, | |
| "learning_rate": 1.2086814384308027e-05, | |
| "loss": 0.156, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.518343625136215, | |
| "grad_norm": 21.283294677734375, | |
| "learning_rate": 1.2041409371594625e-05, | |
| "loss": 0.3646, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.5201598256447513, | |
| "grad_norm": 21.1343936920166, | |
| "learning_rate": 1.1996004358881222e-05, | |
| "loss": 0.3235, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.5219760261532873, | |
| "grad_norm": 15.289054870605469, | |
| "learning_rate": 1.1950599346167818e-05, | |
| "loss": 0.2583, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.5237922266618233, | |
| "grad_norm": 15.304503440856934, | |
| "learning_rate": 1.1905194333454414e-05, | |
| "loss": 0.3819, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.5256084271703596, | |
| "grad_norm": 22.24407196044922, | |
| "learning_rate": 1.185978932074101e-05, | |
| "loss": 0.2542, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.5256084271703596, | |
| "eval_accuracy": 0.8843314191960624, | |
| "eval_f1": 0.8780677678973086, | |
| "eval_loss": 0.3471013903617859, | |
| "eval_precision": 0.8764668066951888, | |
| "eval_recall": 0.8831446649071804, | |
| "eval_runtime": 12.1617, | |
| "eval_samples_per_second": 100.233, | |
| "eval_steps_per_second": 6.331, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.5274246276788959, | |
| "grad_norm": 27.7410831451416, | |
| "learning_rate": 1.1814384308027607e-05, | |
| "loss": 0.3286, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.529240828187432, | |
| "grad_norm": 19.968013763427734, | |
| "learning_rate": 1.1768979295314203e-05, | |
| "loss": 0.2745, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.531057028695968, | |
| "grad_norm": 4.940372467041016, | |
| "learning_rate": 1.17235742826008e-05, | |
| "loss": 0.2978, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.5328732292045042, | |
| "grad_norm": 12.394369125366211, | |
| "learning_rate": 1.1678169269887395e-05, | |
| "loss": 0.2675, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.5346894297130405, | |
| "grad_norm": 14.312457084655762, | |
| "learning_rate": 1.1632764257173992e-05, | |
| "loss": 0.3254, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.5365056302215765, | |
| "grad_norm": 18.364046096801758, | |
| "learning_rate": 1.158735924446059e-05, | |
| "loss": 0.2875, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.5383218307301125, | |
| "grad_norm": 20.195308685302734, | |
| "learning_rate": 1.1541954231747186e-05, | |
| "loss": 0.2987, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.5401380312386488, | |
| "grad_norm": 14.188733100891113, | |
| "learning_rate": 1.1496549219033782e-05, | |
| "loss": 0.3537, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.541954231747185, | |
| "grad_norm": 8.175540924072266, | |
| "learning_rate": 1.1451144206320378e-05, | |
| "loss": 0.2177, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.543770432255721, | |
| "grad_norm": 18.209714889526367, | |
| "learning_rate": 1.1405739193606975e-05, | |
| "loss": 0.3042, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.5455866327642571, | |
| "grad_norm": 9.630953788757324, | |
| "learning_rate": 1.136033418089357e-05, | |
| "loss": 0.2883, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.5474028332727934, | |
| "grad_norm": 7.398960113525391, | |
| "learning_rate": 1.1314929168180167e-05, | |
| "loss": 0.2416, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.5492190337813294, | |
| "grad_norm": 16.70703887939453, | |
| "learning_rate": 1.1269524155466763e-05, | |
| "loss": 0.3105, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.5510352342898654, | |
| "grad_norm": 13.721776008605957, | |
| "learning_rate": 1.122411914275336e-05, | |
| "loss": 0.2512, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.5528514347984017, | |
| "grad_norm": 15.949941635131836, | |
| "learning_rate": 1.1178714130039956e-05, | |
| "loss": 0.2245, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.554667635306938, | |
| "grad_norm": 15.553458213806152, | |
| "learning_rate": 1.1133309117326552e-05, | |
| "loss": 0.2002, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.556483835815474, | |
| "grad_norm": 5.081323623657227, | |
| "learning_rate": 1.108790410461315e-05, | |
| "loss": 0.231, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.55830003632401, | |
| "grad_norm": 11.08918285369873, | |
| "learning_rate": 1.1042499091899746e-05, | |
| "loss": 0.2269, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.5601162368325463, | |
| "grad_norm": 16.481983184814453, | |
| "learning_rate": 1.0997094079186343e-05, | |
| "loss": 0.2977, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.5619324373410826, | |
| "grad_norm": 22.30940055847168, | |
| "learning_rate": 1.0951689066472939e-05, | |
| "loss": 0.3221, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.5637486378496186, | |
| "grad_norm": 26.49005126953125, | |
| "learning_rate": 1.0906284053759535e-05, | |
| "loss": 0.3319, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.5655648383581546, | |
| "grad_norm": 18.184383392333984, | |
| "learning_rate": 1.0860879041046131e-05, | |
| "loss": 0.3648, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.567381038866691, | |
| "grad_norm": 7.18729829788208, | |
| "learning_rate": 1.0815474028332728e-05, | |
| "loss": 0.3291, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.5691972393752271, | |
| "grad_norm": 24.356779098510742, | |
| "learning_rate": 1.0770069015619324e-05, | |
| "loss": 0.1853, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.5710134398837632, | |
| "grad_norm": 13.144723892211914, | |
| "learning_rate": 1.0724664002905922e-05, | |
| "loss": 0.2874, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.5728296403922992, | |
| "grad_norm": 8.509248733520508, | |
| "learning_rate": 1.0679258990192518e-05, | |
| "loss": 0.1815, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.5746458409008355, | |
| "grad_norm": 19.468769073486328, | |
| "learning_rate": 1.0633853977479114e-05, | |
| "loss": 0.223, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.5764620414093717, | |
| "grad_norm": 19.752363204956055, | |
| "learning_rate": 1.058844896476571e-05, | |
| "loss": 0.2087, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.5782782419179078, | |
| "grad_norm": 8.5270414352417, | |
| "learning_rate": 1.0543043952052307e-05, | |
| "loss": 0.3123, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.5800944424264438, | |
| "grad_norm": 11.519478797912598, | |
| "learning_rate": 1.0497638939338905e-05, | |
| "loss": 0.407, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.5800944424264438, | |
| "eval_accuracy": 0.889253486464315, | |
| "eval_f1": 0.8844054402447329, | |
| "eval_loss": 0.35321420431137085, | |
| "eval_precision": 0.8739349747133954, | |
| "eval_recall": 0.8988169397446168, | |
| "eval_runtime": 12.1712, | |
| "eval_samples_per_second": 100.155, | |
| "eval_steps_per_second": 6.326, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.58191064293498, | |
| "grad_norm": 5.541025638580322, | |
| "learning_rate": 1.0452233926625501e-05, | |
| "loss": 0.3838, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.5837268434435163, | |
| "grad_norm": 26.33243179321289, | |
| "learning_rate": 1.0406828913912097e-05, | |
| "loss": 0.4307, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.5855430439520521, | |
| "grad_norm": 17.7266788482666, | |
| "learning_rate": 1.0361423901198693e-05, | |
| "loss": 0.4372, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.5873592444605884, | |
| "grad_norm": 12.146345138549805, | |
| "learning_rate": 1.031601888848529e-05, | |
| "loss": 0.3942, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.5891754449691247, | |
| "grad_norm": 15.945060729980469, | |
| "learning_rate": 1.0270613875771886e-05, | |
| "loss": 0.3127, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.5909916454776607, | |
| "grad_norm": 15.658045768737793, | |
| "learning_rate": 1.0225208863058482e-05, | |
| "loss": 0.1777, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.5928078459861967, | |
| "grad_norm": 8.869367599487305, | |
| "learning_rate": 1.0179803850345078e-05, | |
| "loss": 0.3081, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.594624046494733, | |
| "grad_norm": 15.344075202941895, | |
| "learning_rate": 1.0134398837631676e-05, | |
| "loss": 0.291, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.5964402470032693, | |
| "grad_norm": 14.211679458618164, | |
| "learning_rate": 1.0088993824918273e-05, | |
| "loss": 0.2659, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.5982564475118053, | |
| "grad_norm": 26.636606216430664, | |
| "learning_rate": 1.0043588812204869e-05, | |
| "loss": 0.2606, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.6000726480203413, | |
| "grad_norm": 20.942895889282227, | |
| "learning_rate": 9.998183799491465e-06, | |
| "loss": 0.1804, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.6018888485288776, | |
| "grad_norm": 16.17045021057129, | |
| "learning_rate": 9.952778786778061e-06, | |
| "loss": 0.2943, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.6037050490374138, | |
| "grad_norm": 6.153861999511719, | |
| "learning_rate": 9.907373774064658e-06, | |
| "loss": 0.3264, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.6055212495459499, | |
| "grad_norm": 21.781164169311523, | |
| "learning_rate": 9.861968761351254e-06, | |
| "loss": 0.4721, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.607337450054486, | |
| "grad_norm": 21.82793617248535, | |
| "learning_rate": 9.81656374863785e-06, | |
| "loss": 0.2914, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.6091536505630222, | |
| "grad_norm": 21.26590919494629, | |
| "learning_rate": 9.771158735924446e-06, | |
| "loss": 0.256, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.6109698510715584, | |
| "grad_norm": 10.11241626739502, | |
| "learning_rate": 9.725753723211043e-06, | |
| "loss": 0.2504, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.6127860515800945, | |
| "grad_norm": 4.329545021057129, | |
| "learning_rate": 9.68034871049764e-06, | |
| "loss": 0.272, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.6146022520886305, | |
| "grad_norm": 13.154899597167969, | |
| "learning_rate": 9.634943697784237e-06, | |
| "loss": 0.2627, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.6164184525971668, | |
| "grad_norm": 21.979530334472656, | |
| "learning_rate": 9.589538685070833e-06, | |
| "loss": 0.2251, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.618234653105703, | |
| "grad_norm": 7.475334167480469, | |
| "learning_rate": 9.54413367235743e-06, | |
| "loss": 0.268, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.620050853614239, | |
| "grad_norm": 24.20920753479004, | |
| "learning_rate": 9.498728659644026e-06, | |
| "loss": 0.3187, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.621867054122775, | |
| "grad_norm": 14.770585060119629, | |
| "learning_rate": 9.453323646930622e-06, | |
| "loss": 0.2269, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.6236832546313114, | |
| "grad_norm": 14.928208351135254, | |
| "learning_rate": 9.407918634217218e-06, | |
| "loss": 0.2472, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.6254994551398474, | |
| "grad_norm": 10.5422945022583, | |
| "learning_rate": 9.362513621503814e-06, | |
| "loss": 0.4042, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.6273156556483834, | |
| "grad_norm": 16.635868072509766, | |
| "learning_rate": 9.31710860879041e-06, | |
| "loss": 0.2717, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.6291318561569197, | |
| "grad_norm": 18.028661727905273, | |
| "learning_rate": 9.271703596077007e-06, | |
| "loss": 0.1889, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.630948056665456, | |
| "grad_norm": 14.402594566345215, | |
| "learning_rate": 9.226298583363603e-06, | |
| "loss": 0.2593, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.632764257173992, | |
| "grad_norm": 18.25139617919922, | |
| "learning_rate": 9.180893570650201e-06, | |
| "loss": 0.4313, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.634580457682528, | |
| "grad_norm": 15.58337688446045, | |
| "learning_rate": 9.135488557936797e-06, | |
| "loss": 0.2691, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.634580457682528, | |
| "eval_accuracy": 0.8859721082854799, | |
| "eval_f1": 0.8808923670444524, | |
| "eval_loss": 0.3507283329963684, | |
| "eval_precision": 0.8735378654629461, | |
| "eval_recall": 0.8903885563547935, | |
| "eval_runtime": 12.1871, | |
| "eval_samples_per_second": 100.024, | |
| "eval_steps_per_second": 6.318, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.6363966581910643, | |
| "grad_norm": 16.187101364135742, | |
| "learning_rate": 9.090083545223393e-06, | |
| "loss": 0.3231, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.6382128586996005, | |
| "grad_norm": 10.623252868652344, | |
| "learning_rate": 9.04467853250999e-06, | |
| "loss": 0.2615, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.6400290592081366, | |
| "grad_norm": 16.480899810791016, | |
| "learning_rate": 8.999273519796586e-06, | |
| "loss": 0.1975, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.6418452597166726, | |
| "grad_norm": 27.12870979309082, | |
| "learning_rate": 8.953868507083182e-06, | |
| "loss": 0.3049, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.6436614602252089, | |
| "grad_norm": 29.148317337036133, | |
| "learning_rate": 8.908463494369778e-06, | |
| "loss": 0.3735, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.6454776607337451, | |
| "grad_norm": 6.015985012054443, | |
| "learning_rate": 8.863058481656375e-06, | |
| "loss": 0.1824, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.6472938612422812, | |
| "grad_norm": 8.818500518798828, | |
| "learning_rate": 8.817653468942971e-06, | |
| "loss": 0.2414, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.6491100617508172, | |
| "grad_norm": 27.29248809814453, | |
| "learning_rate": 8.772248456229567e-06, | |
| "loss": 0.4476, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.6509262622593535, | |
| "grad_norm": 3.913367986679077, | |
| "learning_rate": 8.726843443516165e-06, | |
| "loss": 0.2849, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.6527424627678897, | |
| "grad_norm": 21.9657039642334, | |
| "learning_rate": 8.681438430802761e-06, | |
| "loss": 0.3711, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.6545586632764258, | |
| "grad_norm": 14.766958236694336, | |
| "learning_rate": 8.636033418089358e-06, | |
| "loss": 0.3725, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.6563748637849618, | |
| "grad_norm": 14.37176513671875, | |
| "learning_rate": 8.590628405375954e-06, | |
| "loss": 0.2172, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.658191064293498, | |
| "grad_norm": 5.39981746673584, | |
| "learning_rate": 8.54522339266255e-06, | |
| "loss": 0.422, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.6600072648020343, | |
| "grad_norm": 14.968268394470215, | |
| "learning_rate": 8.499818379949146e-06, | |
| "loss": 0.3144, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.6618234653105701, | |
| "grad_norm": 5.774266242980957, | |
| "learning_rate": 8.454413367235743e-06, | |
| "loss": 0.2657, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.6636396658191064, | |
| "grad_norm": 20.72484016418457, | |
| "learning_rate": 8.409008354522339e-06, | |
| "loss": 0.2577, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.6654558663276426, | |
| "grad_norm": 18.517642974853516, | |
| "learning_rate": 8.363603341808935e-06, | |
| "loss": 0.4908, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.6672720668361787, | |
| "grad_norm": 3.34096622467041, | |
| "learning_rate": 8.318198329095531e-06, | |
| "loss": 0.3054, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.6690882673447147, | |
| "grad_norm": 15.43202018737793, | |
| "learning_rate": 8.272793316382128e-06, | |
| "loss": 0.2922, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.670904467853251, | |
| "grad_norm": 9.061037063598633, | |
| "learning_rate": 8.227388303668726e-06, | |
| "loss": 0.2724, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.6727206683617872, | |
| "grad_norm": 7.940707206726074, | |
| "learning_rate": 8.181983290955322e-06, | |
| "loss": 0.2115, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.6745368688703233, | |
| "grad_norm": 5.944194793701172, | |
| "learning_rate": 8.136578278241918e-06, | |
| "loss": 0.2005, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.6763530693788593, | |
| "grad_norm": 14.915303230285645, | |
| "learning_rate": 8.091173265528514e-06, | |
| "loss": 0.3257, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.6781692698873956, | |
| "grad_norm": 17.358768463134766, | |
| "learning_rate": 8.04576825281511e-06, | |
| "loss": 0.4163, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.6799854703959318, | |
| "grad_norm": 16.51521110534668, | |
| "learning_rate": 8.000363240101707e-06, | |
| "loss": 0.282, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.6818016709044679, | |
| "grad_norm": 12.452226638793945, | |
| "learning_rate": 7.954958227388303e-06, | |
| "loss": 0.2801, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.683617871413004, | |
| "grad_norm": 21.576208114624023, | |
| "learning_rate": 7.909553214674901e-06, | |
| "loss": 0.3299, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.6854340719215402, | |
| "grad_norm": 17.944128036499023, | |
| "learning_rate": 7.864148201961497e-06, | |
| "loss": 0.302, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.6872502724300764, | |
| "grad_norm": 16.343204498291016, | |
| "learning_rate": 7.818743189248093e-06, | |
| "loss": 0.2561, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.6890664729386125, | |
| "grad_norm": 12.301880836486816, | |
| "learning_rate": 7.77333817653469e-06, | |
| "loss": 0.3478, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.6890664729386125, | |
| "eval_accuracy": 0.8884331419196062, | |
| "eval_f1": 0.8845863378573883, | |
| "eval_loss": 0.33353373408317566, | |
| "eval_precision": 0.8812408616942775, | |
| "eval_recall": 0.8907306256856646, | |
| "eval_runtime": 12.1668, | |
| "eval_samples_per_second": 100.191, | |
| "eval_steps_per_second": 6.329, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.6908826734471485, | |
| "grad_norm": 17.064929962158203, | |
| "learning_rate": 7.727933163821286e-06, | |
| "loss": 0.3987, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.6926988739556847, | |
| "grad_norm": 6.434702396392822, | |
| "learning_rate": 7.682528151107884e-06, | |
| "loss": 0.3076, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.694515074464221, | |
| "grad_norm": 6.527015209197998, | |
| "learning_rate": 7.63712313839448e-06, | |
| "loss": 0.227, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.696331274972757, | |
| "grad_norm": 11.179935455322266, | |
| "learning_rate": 7.5917181256810756e-06, | |
| "loss": 0.2426, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.698147475481293, | |
| "grad_norm": 10.149739265441895, | |
| "learning_rate": 7.546313112967672e-06, | |
| "loss": 0.3203, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.6999636759898293, | |
| "grad_norm": 22.71770668029785, | |
| "learning_rate": 7.500908100254268e-06, | |
| "loss": 0.3208, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.7017798764983654, | |
| "grad_norm": 9.473342895507812, | |
| "learning_rate": 7.455503087540864e-06, | |
| "loss": 0.3104, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.7035960770069014, | |
| "grad_norm": 15.134129524230957, | |
| "learning_rate": 7.4100980748274606e-06, | |
| "loss": 0.3233, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.7054122775154377, | |
| "grad_norm": 17.22422981262207, | |
| "learning_rate": 7.364693062114058e-06, | |
| "loss": 0.2024, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.707228478023974, | |
| "grad_norm": 7.5997724533081055, | |
| "learning_rate": 7.319288049400654e-06, | |
| "loss": 0.256, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.70904467853251, | |
| "grad_norm": 3.4972565174102783, | |
| "learning_rate": 7.273883036687251e-06, | |
| "loss": 0.2396, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.710860879041046, | |
| "grad_norm": 14.709694862365723, | |
| "learning_rate": 7.228478023973847e-06, | |
| "loss": 0.4198, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.7126770795495823, | |
| "grad_norm": 6.8229546546936035, | |
| "learning_rate": 7.1830730112604435e-06, | |
| "loss": 0.3004, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.7144932800581185, | |
| "grad_norm": 19.955167770385742, | |
| "learning_rate": 7.13766799854704e-06, | |
| "loss": 0.3674, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.7163094805666546, | |
| "grad_norm": 16.53900909423828, | |
| "learning_rate": 7.092262985833637e-06, | |
| "loss": 0.5129, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.7181256810751906, | |
| "grad_norm": 19.58238983154297, | |
| "learning_rate": 7.046857973120233e-06, | |
| "loss": 0.3268, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.7199418815837269, | |
| "grad_norm": 17.560192108154297, | |
| "learning_rate": 7.001452960406829e-06, | |
| "loss": 0.1925, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.721758082092263, | |
| "grad_norm": 12.490224838256836, | |
| "learning_rate": 6.956047947693426e-06, | |
| "loss": 0.215, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.7235742826007991, | |
| "grad_norm": 11.736912727355957, | |
| "learning_rate": 6.910642934980022e-06, | |
| "loss": 0.2565, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.7253904831093352, | |
| "grad_norm": 16.065710067749023, | |
| "learning_rate": 6.865237922266618e-06, | |
| "loss": 0.2175, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.7272066836178714, | |
| "grad_norm": 24.19011116027832, | |
| "learning_rate": 6.819832909553216e-06, | |
| "loss": 0.265, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.7290228841264077, | |
| "grad_norm": 17.26852798461914, | |
| "learning_rate": 6.774427896839812e-06, | |
| "loss": 0.2567, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.7308390846349437, | |
| "grad_norm": 15.206780433654785, | |
| "learning_rate": 6.7290228841264085e-06, | |
| "loss": 0.385, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.7326552851434798, | |
| "grad_norm": 25.0367374420166, | |
| "learning_rate": 6.683617871413005e-06, | |
| "loss": 0.3845, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.734471485652016, | |
| "grad_norm": 6.6783270835876465, | |
| "learning_rate": 6.638212858699601e-06, | |
| "loss": 0.2297, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.7362876861605523, | |
| "grad_norm": 23.788753509521484, | |
| "learning_rate": 6.592807845986197e-06, | |
| "loss": 0.2331, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.738103886669088, | |
| "grad_norm": 15.42270278930664, | |
| "learning_rate": 6.5474028332727935e-06, | |
| "loss": 0.3213, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.7399200871776244, | |
| "grad_norm": 13.071663856506348, | |
| "learning_rate": 6.50199782055939e-06, | |
| "loss": 0.34, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.7417362876861606, | |
| "grad_norm": 10.215317726135254, | |
| "learning_rate": 6.456592807845986e-06, | |
| "loss": 0.3863, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.7435524881946967, | |
| "grad_norm": 18.26382064819336, | |
| "learning_rate": 6.411187795132582e-06, | |
| "loss": 0.1977, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.7435524881946967, | |
| "eval_accuracy": 0.8917145200984413, | |
| "eval_f1": 0.8880029159699074, | |
| "eval_loss": 0.32050377130508423, | |
| "eval_precision": 0.8856300730561886, | |
| "eval_recall": 0.8930436229083021, | |
| "eval_runtime": 12.1617, | |
| "eval_samples_per_second": 100.233, | |
| "eval_steps_per_second": 6.331, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.7453686887032327, | |
| "grad_norm": 28.250118255615234, | |
| "learning_rate": 6.3657827824191785e-06, | |
| "loss": 0.5122, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.747184889211769, | |
| "grad_norm": 25.557300567626953, | |
| "learning_rate": 6.3203777697057765e-06, | |
| "loss": 0.2384, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.7490010897203052, | |
| "grad_norm": 14.092631340026855, | |
| "learning_rate": 6.274972756992373e-06, | |
| "loss": 0.224, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.7508172902288412, | |
| "grad_norm": 16.70784568786621, | |
| "learning_rate": 6.229567744278968e-06, | |
| "loss": 0.2133, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.7526334907373773, | |
| "grad_norm": 13.771525382995605, | |
| "learning_rate": 6.184162731565565e-06, | |
| "loss": 0.4779, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.7544496912459135, | |
| "grad_norm": 13.317017555236816, | |
| "learning_rate": 6.1387577188521614e-06, | |
| "loss": 0.188, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.7562658917544498, | |
| "grad_norm": 16.88526153564453, | |
| "learning_rate": 6.093352706138758e-06, | |
| "loss": 0.2979, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.7580820922629858, | |
| "grad_norm": 21.325788497924805, | |
| "learning_rate": 6.047947693425354e-06, | |
| "loss": 0.31, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.7598982927715219, | |
| "grad_norm": 20.396289825439453, | |
| "learning_rate": 6.00254268071195e-06, | |
| "loss": 0.308, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.7617144932800581, | |
| "grad_norm": 15.008870124816895, | |
| "learning_rate": 5.957137667998547e-06, | |
| "loss": 0.1889, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.7635306937885944, | |
| "grad_norm": 15.86091136932373, | |
| "learning_rate": 5.9117326552851435e-06, | |
| "loss": 0.2219, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.7653468942971304, | |
| "grad_norm": 13.422758102416992, | |
| "learning_rate": 5.86632764257174e-06, | |
| "loss": 0.2332, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.7671630948056665, | |
| "grad_norm": 13.367154121398926, | |
| "learning_rate": 5.820922629858337e-06, | |
| "loss": 0.2688, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.7689792953142027, | |
| "grad_norm": 9.540538787841797, | |
| "learning_rate": 5.775517617144933e-06, | |
| "loss": 0.1648, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.770795495822739, | |
| "grad_norm": 1.7264131307601929, | |
| "learning_rate": 5.73011260443153e-06, | |
| "loss": 0.3343, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.772611696331275, | |
| "grad_norm": 12.68677806854248, | |
| "learning_rate": 5.6847075917181265e-06, | |
| "loss": 0.181, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.774427896839811, | |
| "grad_norm": 18.84226417541504, | |
| "learning_rate": 5.639302579004723e-06, | |
| "loss": 0.3011, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.7762440973483473, | |
| "grad_norm": 18.599489212036133, | |
| "learning_rate": 5.593897566291319e-06, | |
| "loss": 0.3759, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.7780602978568834, | |
| "grad_norm": 24.071170806884766, | |
| "learning_rate": 5.548492553577915e-06, | |
| "loss": 0.4405, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.7798764983654194, | |
| "grad_norm": 20.368112564086914, | |
| "learning_rate": 5.503087540864512e-06, | |
| "loss": 0.1978, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.7816926988739556, | |
| "grad_norm": 4.559482574462891, | |
| "learning_rate": 5.4576825281511086e-06, | |
| "loss": 0.2037, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.783508899382492, | |
| "grad_norm": 22.67530059814453, | |
| "learning_rate": 5.412277515437705e-06, | |
| "loss": 0.291, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.785325099891028, | |
| "grad_norm": 14.617053031921387, | |
| "learning_rate": 5.366872502724301e-06, | |
| "loss": 0.2558, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.787141300399564, | |
| "grad_norm": 9.699358940124512, | |
| "learning_rate": 5.321467490010897e-06, | |
| "loss": 0.4875, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.7889575009081002, | |
| "grad_norm": 7.0100908279418945, | |
| "learning_rate": 5.2760624772974936e-06, | |
| "loss": 0.2908, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.7907737014166365, | |
| "grad_norm": 18.469934463500977, | |
| "learning_rate": 5.230657464584091e-06, | |
| "loss": 0.3165, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.7925899019251725, | |
| "grad_norm": 8.11326789855957, | |
| "learning_rate": 5.185252451870687e-06, | |
| "loss": 0.2946, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.7944061024337086, | |
| "grad_norm": 14.999030113220215, | |
| "learning_rate": 5.139847439157283e-06, | |
| "loss": 0.2645, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.7962223029422448, | |
| "grad_norm": 11.65622615814209, | |
| "learning_rate": 5.094442426443879e-06, | |
| "loss": 0.3627, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.798038503450781, | |
| "grad_norm": 11.40311336517334, | |
| "learning_rate": 5.049037413730476e-06, | |
| "loss": 0.1679, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.798038503450781, | |
| "eval_accuracy": 0.8941755537325676, | |
| "eval_f1": 0.8909275000598975, | |
| "eval_loss": 0.31260696053504944, | |
| "eval_precision": 0.8874307991095873, | |
| "eval_recall": 0.8966734867740875, | |
| "eval_runtime": 12.1595, | |
| "eval_samples_per_second": 100.251, | |
| "eval_steps_per_second": 6.333, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.7998547039593171, | |
| "grad_norm": 17.762371063232422, | |
| "learning_rate": 5.003632401017073e-06, | |
| "loss": 0.2242, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.8016709044678532, | |
| "grad_norm": 8.07528018951416, | |
| "learning_rate": 4.958227388303669e-06, | |
| "loss": 0.305, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.8034871049763894, | |
| "grad_norm": 8.580565452575684, | |
| "learning_rate": 4.912822375590265e-06, | |
| "loss": 0.3421, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.8053033054849257, | |
| "grad_norm": 13.502715110778809, | |
| "learning_rate": 4.8674173628768615e-06, | |
| "loss": 0.1513, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.8071195059934617, | |
| "grad_norm": 25.522857666015625, | |
| "learning_rate": 4.822012350163458e-06, | |
| "loss": 0.4245, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.8089357065019978, | |
| "grad_norm": 11.059943199157715, | |
| "learning_rate": 4.776607337450055e-06, | |
| "loss": 0.2588, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.810751907010534, | |
| "grad_norm": 25.464778900146484, | |
| "learning_rate": 4.731202324736651e-06, | |
| "loss": 0.3359, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.8125681075190703, | |
| "grad_norm": 14.878934860229492, | |
| "learning_rate": 4.685797312023247e-06, | |
| "loss": 0.2937, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.814384308027606, | |
| "grad_norm": 17.37669563293457, | |
| "learning_rate": 4.640392299309844e-06, | |
| "loss": 0.2071, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.8162005085361423, | |
| "grad_norm": 5.666213035583496, | |
| "learning_rate": 4.59498728659644e-06, | |
| "loss": 0.2143, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.8180167090446786, | |
| "grad_norm": 10.074169158935547, | |
| "learning_rate": 4.549582273883037e-06, | |
| "loss": 0.3549, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.8198329095532146, | |
| "grad_norm": 22.636964797973633, | |
| "learning_rate": 4.504177261169633e-06, | |
| "loss": 0.3005, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.8216491100617507, | |
| "grad_norm": 21.29734992980957, | |
| "learning_rate": 4.45877224845623e-06, | |
| "loss": 0.3036, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.823465310570287, | |
| "grad_norm": 18.074115753173828, | |
| "learning_rate": 4.4133672357428265e-06, | |
| "loss": 0.2375, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.8252815110788232, | |
| "grad_norm": 13.343547821044922, | |
| "learning_rate": 4.367962223029423e-06, | |
| "loss": 0.3563, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.8270977115873592, | |
| "grad_norm": 15.384708404541016, | |
| "learning_rate": 4.322557210316019e-06, | |
| "loss": 0.3085, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.8289139120958953, | |
| "grad_norm": 22.345226287841797, | |
| "learning_rate": 4.277152197602616e-06, | |
| "loss": 0.2529, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.8307301126044315, | |
| "grad_norm": 3.8110063076019287, | |
| "learning_rate": 4.231747184889212e-06, | |
| "loss": 0.2276, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.8325463131129678, | |
| "grad_norm": 14.203871726989746, | |
| "learning_rate": 4.186342172175809e-06, | |
| "loss": 0.2813, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.8343625136215038, | |
| "grad_norm": 18.770957946777344, | |
| "learning_rate": 4.140937159462405e-06, | |
| "loss": 0.2534, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.8361787141300399, | |
| "grad_norm": 19.307645797729492, | |
| "learning_rate": 4.095532146749001e-06, | |
| "loss": 0.2505, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.8379949146385761, | |
| "grad_norm": 12.851605415344238, | |
| "learning_rate": 4.050127134035598e-06, | |
| "loss": 0.1797, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.8398111151471124, | |
| "grad_norm": 17.81605339050293, | |
| "learning_rate": 4.0047221213221944e-06, | |
| "loss": 0.1766, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.8416273156556484, | |
| "grad_norm": 5.046531677246094, | |
| "learning_rate": 3.959317108608791e-06, | |
| "loss": 0.2847, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.8434435161641844, | |
| "grad_norm": 15.07939338684082, | |
| "learning_rate": 3.913912095895387e-06, | |
| "loss": 0.2543, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.8452597166727207, | |
| "grad_norm": 5.4197187423706055, | |
| "learning_rate": 3.868507083181983e-06, | |
| "loss": 0.276, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.847075917181257, | |
| "grad_norm": 4.651303768157959, | |
| "learning_rate": 3.82310207046858e-06, | |
| "loss": 0.3026, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.848892117689793, | |
| "grad_norm": 5.725452423095703, | |
| "learning_rate": 3.7776970577551765e-06, | |
| "loss": 0.321, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.850708318198329, | |
| "grad_norm": 12.18204402923584, | |
| "learning_rate": 3.7322920450417728e-06, | |
| "loss": 0.2724, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.8525245187068653, | |
| "grad_norm": 3.6419947147369385, | |
| "learning_rate": 3.686887032328369e-06, | |
| "loss": 0.3451, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.8525245187068653, | |
| "eval_accuracy": 0.896636587366694, | |
| "eval_f1": 0.8908937354693052, | |
| "eval_loss": 0.31067386269569397, | |
| "eval_precision": 0.8844179738985345, | |
| "eval_recall": 0.899200938788393, | |
| "eval_runtime": 12.1663, | |
| "eval_samples_per_second": 100.195, | |
| "eval_steps_per_second": 6.329, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.8543407192154013, | |
| "grad_norm": 15.111452102661133, | |
| "learning_rate": 3.6414820196149653e-06, | |
| "loss": 0.1833, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.8561569197239374, | |
| "grad_norm": 18.825973510742188, | |
| "learning_rate": 3.5960770069015624e-06, | |
| "loss": 0.3835, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.8579731202324736, | |
| "grad_norm": 12.017671585083008, | |
| "learning_rate": 3.5506719941881586e-06, | |
| "loss": 0.2242, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.8597893207410099, | |
| "grad_norm": 24.258045196533203, | |
| "learning_rate": 3.505266981474755e-06, | |
| "loss": 0.4669, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 1.861605521249546, | |
| "grad_norm": 16.036376953125, | |
| "learning_rate": 3.4598619687613515e-06, | |
| "loss": 0.3115, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.863421721758082, | |
| "grad_norm": 25.399738311767578, | |
| "learning_rate": 3.414456956047948e-06, | |
| "loss": 0.3122, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.8652379222666182, | |
| "grad_norm": 20.493247985839844, | |
| "learning_rate": 3.369051943334544e-06, | |
| "loss": 0.3484, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 1.8670541227751545, | |
| "grad_norm": 16.851757049560547, | |
| "learning_rate": 3.323646930621141e-06, | |
| "loss": 0.3464, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 1.8688703232836905, | |
| "grad_norm": 5.93435001373291, | |
| "learning_rate": 3.2782419179077374e-06, | |
| "loss": 0.1495, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.8706865237922266, | |
| "grad_norm": 15.460413932800293, | |
| "learning_rate": 3.2328369051943336e-06, | |
| "loss": 0.2404, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.8725027243007628, | |
| "grad_norm": 14.957904815673828, | |
| "learning_rate": 3.18743189248093e-06, | |
| "loss": 0.2765, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 1.874318924809299, | |
| "grad_norm": 24.511220932006836, | |
| "learning_rate": 3.142026879767526e-06, | |
| "loss": 0.2619, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.876135125317835, | |
| "grad_norm": 15.439196586608887, | |
| "learning_rate": 3.096621867054123e-06, | |
| "loss": 0.2426, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 1.8779513258263711, | |
| "grad_norm": 18.046316146850586, | |
| "learning_rate": 3.0512168543407195e-06, | |
| "loss": 0.2727, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 1.8797675263349074, | |
| "grad_norm": 9.866218566894531, | |
| "learning_rate": 3.0058118416273157e-06, | |
| "loss": 0.2591, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.8815837268434437, | |
| "grad_norm": 15.493182182312012, | |
| "learning_rate": 2.960406828913912e-06, | |
| "loss": 0.2277, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 1.8833999273519797, | |
| "grad_norm": 13.912703514099121, | |
| "learning_rate": 2.9150018162005086e-06, | |
| "loss": 0.2501, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 1.8852161278605157, | |
| "grad_norm": 10.867436408996582, | |
| "learning_rate": 2.869596803487105e-06, | |
| "loss": 0.3008, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.887032328369052, | |
| "grad_norm": 18.993480682373047, | |
| "learning_rate": 2.8241917907737016e-06, | |
| "loss": 0.1959, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 1.8888485288775883, | |
| "grad_norm": 10.849798202514648, | |
| "learning_rate": 2.778786778060298e-06, | |
| "loss": 0.3108, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.890664729386124, | |
| "grad_norm": 22.30524253845215, | |
| "learning_rate": 2.7333817653468945e-06, | |
| "loss": 0.3187, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.8924809298946603, | |
| "grad_norm": 4.560070514678955, | |
| "learning_rate": 2.687976752633491e-06, | |
| "loss": 0.2388, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 1.8942971304031966, | |
| "grad_norm": 13.859076499938965, | |
| "learning_rate": 2.6425717399200874e-06, | |
| "loss": 0.2131, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 1.8961133309117326, | |
| "grad_norm": 4.655661582946777, | |
| "learning_rate": 2.5971667272066837e-06, | |
| "loss": 0.1688, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.8979295314202687, | |
| "grad_norm": 18.112701416015625, | |
| "learning_rate": 2.5517617144932803e-06, | |
| "loss": 0.2763, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.899745731928805, | |
| "grad_norm": 9.725381851196289, | |
| "learning_rate": 2.5063567017798766e-06, | |
| "loss": 0.2694, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 1.9015619324373412, | |
| "grad_norm": 21.33144760131836, | |
| "learning_rate": 2.4609516890664732e-06, | |
| "loss": 0.2813, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.9033781329458772, | |
| "grad_norm": 9.807076454162598, | |
| "learning_rate": 2.4155466763530695e-06, | |
| "loss": 0.2441, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 1.9051943334544132, | |
| "grad_norm": 5.741889476776123, | |
| "learning_rate": 2.3701416636396657e-06, | |
| "loss": 0.2222, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 1.9070105339629495, | |
| "grad_norm": 12.217042922973633, | |
| "learning_rate": 2.3247366509262624e-06, | |
| "loss": 0.333, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.9070105339629495, | |
| "eval_accuracy": 0.8917145200984413, | |
| "eval_f1": 0.8879221499881476, | |
| "eval_loss": 0.31236740946769714, | |
| "eval_precision": 0.880595048818372, | |
| "eval_recall": 0.8979764002075346, | |
| "eval_runtime": 12.1598, | |
| "eval_samples_per_second": 100.249, | |
| "eval_steps_per_second": 6.332, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.9088267344714858, | |
| "grad_norm": 7.902151584625244, | |
| "learning_rate": 2.2793316382128587e-06, | |
| "loss": 0.2326, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 1.9106429349800218, | |
| "grad_norm": 17.3931827545166, | |
| "learning_rate": 2.233926625499455e-06, | |
| "loss": 0.2766, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 1.9124591354885578, | |
| "grad_norm": 13.122883796691895, | |
| "learning_rate": 2.1885216127860516e-06, | |
| "loss": 0.2769, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.914275335997094, | |
| "grad_norm": 12.172006607055664, | |
| "learning_rate": 2.143116600072648e-06, | |
| "loss": 0.2228, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 1.9160915365056304, | |
| "grad_norm": 6.881805896759033, | |
| "learning_rate": 2.0977115873592445e-06, | |
| "loss": 0.1705, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.9179077370141664, | |
| "grad_norm": 19.267488479614258, | |
| "learning_rate": 2.052306574645841e-06, | |
| "loss": 0.3439, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.9197239375227024, | |
| "grad_norm": 4.804058074951172, | |
| "learning_rate": 2.0069015619324374e-06, | |
| "loss": 0.2191, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 1.9215401380312387, | |
| "grad_norm": 15.236076354980469, | |
| "learning_rate": 1.961496549219034e-06, | |
| "loss": 0.2319, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 1.923356338539775, | |
| "grad_norm": 17.033308029174805, | |
| "learning_rate": 1.9160915365056303e-06, | |
| "loss": 0.4269, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.925172539048311, | |
| "grad_norm": 15.480613708496094, | |
| "learning_rate": 1.870686523792227e-06, | |
| "loss": 0.281, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.926988739556847, | |
| "grad_norm": 18.749048233032227, | |
| "learning_rate": 1.8252815110788233e-06, | |
| "loss": 0.2042, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 1.9288049400653833, | |
| "grad_norm": 10.639968872070312, | |
| "learning_rate": 1.7798764983654195e-06, | |
| "loss": 0.3482, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.9306211405739193, | |
| "grad_norm": 19.38547706604004, | |
| "learning_rate": 1.7344714856520162e-06, | |
| "loss": 0.285, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 1.9324373410824554, | |
| "grad_norm": 3.3453238010406494, | |
| "learning_rate": 1.6890664729386124e-06, | |
| "loss": 0.2941, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 1.9342535415909916, | |
| "grad_norm": 16.062301635742188, | |
| "learning_rate": 1.643661460225209e-06, | |
| "loss": 0.3375, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.9360697420995279, | |
| "grad_norm": 15.955122947692871, | |
| "learning_rate": 1.5982564475118054e-06, | |
| "loss": 0.2047, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 1.937885942608064, | |
| "grad_norm": 25.678117752075195, | |
| "learning_rate": 1.5528514347984018e-06, | |
| "loss": 0.2348, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 1.9397021431166, | |
| "grad_norm": 13.082353591918945, | |
| "learning_rate": 1.5074464220849983e-06, | |
| "loss": 0.2699, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.9415183436251362, | |
| "grad_norm": 15.006932258605957, | |
| "learning_rate": 1.4620414093715947e-06, | |
| "loss": 0.2522, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 1.9433345441336725, | |
| "grad_norm": 18.136178970336914, | |
| "learning_rate": 1.4166363966581912e-06, | |
| "loss": 0.3754, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.9451507446422085, | |
| "grad_norm": 13.17072868347168, | |
| "learning_rate": 1.3712313839447874e-06, | |
| "loss": 0.2194, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 1.9469669451507445, | |
| "grad_norm": 16.255809783935547, | |
| "learning_rate": 1.325826371231384e-06, | |
| "loss": 0.3234, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 1.9487831456592808, | |
| "grad_norm": 30.35965347290039, | |
| "learning_rate": 1.2804213585179804e-06, | |
| "loss": 0.3173, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 1.950599346167817, | |
| "grad_norm": 15.700325965881348, | |
| "learning_rate": 1.235016345804577e-06, | |
| "loss": 0.2505, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 1.952415546676353, | |
| "grad_norm": 18.816146850585938, | |
| "learning_rate": 1.1896113330911733e-06, | |
| "loss": 0.3193, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.9542317471848891, | |
| "grad_norm": 16.41304588317871, | |
| "learning_rate": 1.1442063203777698e-06, | |
| "loss": 0.2486, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 1.9560479476934254, | |
| "grad_norm": 16.63722801208496, | |
| "learning_rate": 1.0988013076643662e-06, | |
| "loss": 0.237, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 1.9578641482019616, | |
| "grad_norm": 6.757064342498779, | |
| "learning_rate": 1.0533962949509627e-06, | |
| "loss": 0.2081, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 1.9596803487104977, | |
| "grad_norm": 3.97802734375, | |
| "learning_rate": 1.007991282237559e-06, | |
| "loss": 0.2566, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 1.9614965492190337, | |
| "grad_norm": 5.727281093597412, | |
| "learning_rate": 9.625862695241554e-07, | |
| "loss": 0.1921, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.9614965492190337, | |
| "eval_accuracy": 0.9007383100902379, | |
| "eval_f1": 0.8957933200060735, | |
| "eval_loss": 0.3023645579814911, | |
| "eval_precision": 0.8902412162565443, | |
| "eval_recall": 0.9027657103823287, | |
| "eval_runtime": 12.1671, | |
| "eval_samples_per_second": 100.188, | |
| "eval_steps_per_second": 6.329, | |
| "step": 10800 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 11012, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 600, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.701982939756626e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |