| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.999935917975008, | |
| "eval_steps": 500, | |
| "global_step": 7802, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001281640499839795, | |
| "grad_norm": 0.6719354391098022, | |
| "learning_rate": 2.560819462227913e-07, | |
| "loss": 0.1253, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00256328099967959, | |
| "grad_norm": 0.6422625184059143, | |
| "learning_rate": 5.121638924455826e-07, | |
| "loss": 0.1306, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003844921499519385, | |
| "grad_norm": 0.6302109956741333, | |
| "learning_rate": 7.682458386683739e-07, | |
| "loss": 0.118, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00512656199935918, | |
| "grad_norm": 0.6015211939811707, | |
| "learning_rate": 1.0243277848911651e-06, | |
| "loss": 0.1186, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006408202499198975, | |
| "grad_norm": 0.49298664927482605, | |
| "learning_rate": 1.2804097311139565e-06, | |
| "loss": 0.1123, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00768984299903877, | |
| "grad_norm": 0.4810634255409241, | |
| "learning_rate": 1.5364916773367477e-06, | |
| "loss": 0.1097, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.008971483498878564, | |
| "grad_norm": 0.4184609353542328, | |
| "learning_rate": 1.7925736235595393e-06, | |
| "loss": 0.101, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01025312399871836, | |
| "grad_norm": 0.33549964427948, | |
| "learning_rate": 2.0486555697823303e-06, | |
| "loss": 0.0952, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.011534764498558154, | |
| "grad_norm": 0.3348263204097748, | |
| "learning_rate": 2.304737516005122e-06, | |
| "loss": 0.0927, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.01281640499839795, | |
| "grad_norm": 0.31015241146087646, | |
| "learning_rate": 2.560819462227913e-06, | |
| "loss": 0.089, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.014098045498237744, | |
| "grad_norm": 0.2687341272830963, | |
| "learning_rate": 2.8169014084507046e-06, | |
| "loss": 0.085, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01537968599807754, | |
| "grad_norm": 0.24938784539699554, | |
| "learning_rate": 3.0729833546734954e-06, | |
| "loss": 0.0854, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.016661326497917333, | |
| "grad_norm": 0.23371560871601105, | |
| "learning_rate": 3.329065300896287e-06, | |
| "loss": 0.0849, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.017942966997757128, | |
| "grad_norm": 0.23148757219314575, | |
| "learning_rate": 3.5851472471190786e-06, | |
| "loss": 0.0822, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.019224607497596923, | |
| "grad_norm": 0.24376137554645538, | |
| "learning_rate": 3.84122919334187e-06, | |
| "loss": 0.0832, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02050624799743672, | |
| "grad_norm": 0.24407723546028137, | |
| "learning_rate": 4.0973111395646605e-06, | |
| "loss": 0.0795, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.021787888497276513, | |
| "grad_norm": 0.2162434309720993, | |
| "learning_rate": 4.3533930857874526e-06, | |
| "loss": 0.08, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.02306952899711631, | |
| "grad_norm": 0.2380385547876358, | |
| "learning_rate": 4.609475032010244e-06, | |
| "loss": 0.077, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.024351169496956104, | |
| "grad_norm": 0.21600289642810822, | |
| "learning_rate": 4.865556978233035e-06, | |
| "loss": 0.0801, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0256328099967959, | |
| "grad_norm": 0.21687458455562592, | |
| "learning_rate": 5.121638924455826e-06, | |
| "loss": 0.0777, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.026914450496635694, | |
| "grad_norm": 0.20008929073810577, | |
| "learning_rate": 5.377720870678618e-06, | |
| "loss": 0.0767, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.02819609099647549, | |
| "grad_norm": 0.2190534472465515, | |
| "learning_rate": 5.633802816901409e-06, | |
| "loss": 0.0792, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.029477731496315284, | |
| "grad_norm": 0.2292010486125946, | |
| "learning_rate": 5.8898847631242005e-06, | |
| "loss": 0.0775, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03075937199615508, | |
| "grad_norm": 0.22605210542678833, | |
| "learning_rate": 6.145966709346991e-06, | |
| "loss": 0.0748, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03204101249599487, | |
| "grad_norm": 0.24900269508361816, | |
| "learning_rate": 6.402048655569783e-06, | |
| "loss": 0.0765, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.033322652995834666, | |
| "grad_norm": 0.22502242028713226, | |
| "learning_rate": 6.658130601792574e-06, | |
| "loss": 0.0753, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.03460429349567446, | |
| "grad_norm": 0.2257997840642929, | |
| "learning_rate": 6.914212548015365e-06, | |
| "loss": 0.0775, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.035885933995514256, | |
| "grad_norm": 0.20458129048347473, | |
| "learning_rate": 7.170294494238157e-06, | |
| "loss": 0.074, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.03716757449535405, | |
| "grad_norm": 0.19960536062717438, | |
| "learning_rate": 7.426376440460948e-06, | |
| "loss": 0.0753, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.038449214995193846, | |
| "grad_norm": 0.20938412845134735, | |
| "learning_rate": 7.68245838668374e-06, | |
| "loss": 0.0731, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03973085549503364, | |
| "grad_norm": 0.215741828083992, | |
| "learning_rate": 7.93854033290653e-06, | |
| "loss": 0.0744, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04101249599487344, | |
| "grad_norm": 0.19086730480194092, | |
| "learning_rate": 8.194622279129321e-06, | |
| "loss": 0.072, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04229413649471323, | |
| "grad_norm": 0.204061821103096, | |
| "learning_rate": 8.450704225352114e-06, | |
| "loss": 0.0728, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.04357577699455303, | |
| "grad_norm": 0.20682527124881744, | |
| "learning_rate": 8.706786171574905e-06, | |
| "loss": 0.0742, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.04485741749439282, | |
| "grad_norm": 0.20134615898132324, | |
| "learning_rate": 8.962868117797696e-06, | |
| "loss": 0.0751, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04613905799423262, | |
| "grad_norm": 0.19907858967781067, | |
| "learning_rate": 9.218950064020487e-06, | |
| "loss": 0.0728, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.04742069849407241, | |
| "grad_norm": 0.21698465943336487, | |
| "learning_rate": 9.475032010243279e-06, | |
| "loss": 0.0748, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.04870233899391221, | |
| "grad_norm": 0.21630790829658508, | |
| "learning_rate": 9.73111395646607e-06, | |
| "loss": 0.0737, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.049983979493752, | |
| "grad_norm": 0.2053464651107788, | |
| "learning_rate": 9.987195902688861e-06, | |
| "loss": 0.0733, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0512656199935918, | |
| "grad_norm": 0.22032414376735687, | |
| "learning_rate": 1.0243277848911652e-05, | |
| "loss": 0.0727, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05254726049343159, | |
| "grad_norm": 0.2172485888004303, | |
| "learning_rate": 1.0499359795134443e-05, | |
| "loss": 0.071, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.05382890099327139, | |
| "grad_norm": 0.19321809709072113, | |
| "learning_rate": 1.0755441741357236e-05, | |
| "loss": 0.0739, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.05511054149311118, | |
| "grad_norm": 0.1873639076948166, | |
| "learning_rate": 1.1011523687580026e-05, | |
| "loss": 0.0685, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.05639218199295098, | |
| "grad_norm": 0.21123793721199036, | |
| "learning_rate": 1.1267605633802819e-05, | |
| "loss": 0.0703, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.05767382249279077, | |
| "grad_norm": 0.20596912503242493, | |
| "learning_rate": 1.1523687580025608e-05, | |
| "loss": 0.0723, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.05895546299263057, | |
| "grad_norm": 0.20657142996788025, | |
| "learning_rate": 1.1779769526248401e-05, | |
| "loss": 0.0705, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.06023710349247036, | |
| "grad_norm": 0.19684277474880219, | |
| "learning_rate": 1.2035851472471192e-05, | |
| "loss": 0.0711, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.06151874399231016, | |
| "grad_norm": 0.19987432658672333, | |
| "learning_rate": 1.2291933418693982e-05, | |
| "loss": 0.0733, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.06280038449214995, | |
| "grad_norm": 0.2000504434108734, | |
| "learning_rate": 1.2548015364916774e-05, | |
| "loss": 0.0708, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.06408202499198974, | |
| "grad_norm": 0.1920643448829651, | |
| "learning_rate": 1.2804097311139566e-05, | |
| "loss": 0.0707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06408202499198974, | |
| "eval_f1": 0.7841977645041103, | |
| "eval_loss": 0.10520466417074203, | |
| "eval_precision": 0.7908863175474173, | |
| "eval_recall": 0.7776213933849402, | |
| "eval_runtime": 2.9914, | |
| "eval_samples_per_second": 2164.9, | |
| "eval_steps_per_second": 8.692, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06536366549182954, | |
| "grad_norm": 0.18772949278354645, | |
| "learning_rate": 1.3060179257362357e-05, | |
| "loss": 0.0694, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.06664530599166933, | |
| "grad_norm": 0.17525950074195862, | |
| "learning_rate": 1.3316261203585148e-05, | |
| "loss": 0.0709, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.06792694649150913, | |
| "grad_norm": 0.20220516622066498, | |
| "learning_rate": 1.3572343149807941e-05, | |
| "loss": 0.0714, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.06920858699134892, | |
| "grad_norm": 0.2033197432756424, | |
| "learning_rate": 1.382842509603073e-05, | |
| "loss": 0.0719, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.07049022749118872, | |
| "grad_norm": 0.1880401372909546, | |
| "learning_rate": 1.4084507042253522e-05, | |
| "loss": 0.0717, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.07177186799102851, | |
| "grad_norm": 0.20734059810638428, | |
| "learning_rate": 1.4340588988476314e-05, | |
| "loss": 0.0696, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.07305350849086831, | |
| "grad_norm": 0.18614107370376587, | |
| "learning_rate": 1.4596670934699104e-05, | |
| "loss": 0.0728, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.0743351489907081, | |
| "grad_norm": 0.19527311623096466, | |
| "learning_rate": 1.4852752880921897e-05, | |
| "loss": 0.0721, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.0756167894905479, | |
| "grad_norm": 0.19726967811584473, | |
| "learning_rate": 1.5108834827144688e-05, | |
| "loss": 0.0715, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.07689842999038769, | |
| "grad_norm": 0.2089812010526657, | |
| "learning_rate": 1.536491677336748e-05, | |
| "loss": 0.0702, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07818007049022749, | |
| "grad_norm": 0.186571404337883, | |
| "learning_rate": 1.562099871959027e-05, | |
| "loss": 0.071, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.07946171099006728, | |
| "grad_norm": 0.20535047352313995, | |
| "learning_rate": 1.587708066581306e-05, | |
| "loss": 0.0718, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.08074335148990708, | |
| "grad_norm": 0.20184259116649628, | |
| "learning_rate": 1.6133162612035853e-05, | |
| "loss": 0.0701, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.08202499198974687, | |
| "grad_norm": 0.20490041375160217, | |
| "learning_rate": 1.6389244558258642e-05, | |
| "loss": 0.0713, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.08330663248958667, | |
| "grad_norm": 0.2261057049036026, | |
| "learning_rate": 1.6645326504481435e-05, | |
| "loss": 0.0715, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.08458827298942646, | |
| "grad_norm": 0.2002391666173935, | |
| "learning_rate": 1.6901408450704228e-05, | |
| "loss": 0.072, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.08586991348926626, | |
| "grad_norm": 0.1953844130039215, | |
| "learning_rate": 1.7157490396927017e-05, | |
| "loss": 0.0691, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.08715155398910605, | |
| "grad_norm": 0.19759021699428558, | |
| "learning_rate": 1.741357234314981e-05, | |
| "loss": 0.0713, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.08843319448894585, | |
| "grad_norm": 0.19472911953926086, | |
| "learning_rate": 1.76696542893726e-05, | |
| "loss": 0.0687, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.08971483498878564, | |
| "grad_norm": 0.2004728615283966, | |
| "learning_rate": 1.7925736235595393e-05, | |
| "loss": 0.0713, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.09099647548862544, | |
| "grad_norm": 0.2075321078300476, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.0681, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.09227811598846523, | |
| "grad_norm": 0.2087249606847763, | |
| "learning_rate": 1.8437900128040975e-05, | |
| "loss": 0.0682, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.09355975648830503, | |
| "grad_norm": 0.19576896727085114, | |
| "learning_rate": 1.8693982074263764e-05, | |
| "loss": 0.0695, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.09484139698814482, | |
| "grad_norm": 0.1800433099269867, | |
| "learning_rate": 1.8950064020486557e-05, | |
| "loss": 0.0686, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.09612303748798462, | |
| "grad_norm": 0.2001904547214508, | |
| "learning_rate": 1.920614596670935e-05, | |
| "loss": 0.0686, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09740467798782441, | |
| "grad_norm": 0.20266686379909515, | |
| "learning_rate": 1.946222791293214e-05, | |
| "loss": 0.0697, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.09868631848766421, | |
| "grad_norm": 0.1891346275806427, | |
| "learning_rate": 1.9718309859154933e-05, | |
| "loss": 0.0681, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.099967958987504, | |
| "grad_norm": 0.18907149136066437, | |
| "learning_rate": 1.9974391805377722e-05, | |
| "loss": 0.07, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1012495994873438, | |
| "grad_norm": 0.19888722896575928, | |
| "learning_rate": 1.9999918912065962e-05, | |
| "loss": 0.0696, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.1025312399871836, | |
| "grad_norm": 0.19157752394676208, | |
| "learning_rate": 1.9999638609784763e-05, | |
| "loss": 0.0694, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10381288048702339, | |
| "grad_norm": 0.21350421011447906, | |
| "learning_rate": 1.99991580976816e-05, | |
| "loss": 0.0703, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.10509452098686319, | |
| "grad_norm": 0.20590972900390625, | |
| "learning_rate": 1.9998477385377137e-05, | |
| "loss": 0.0708, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.10637616148670298, | |
| "grad_norm": 0.22003325819969177, | |
| "learning_rate": 1.9997596486500402e-05, | |
| "loss": 0.0694, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.10765780198654278, | |
| "grad_norm": 0.2006986439228058, | |
| "learning_rate": 1.9996515418688493e-05, | |
| "loss": 0.0707, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.10893944248638257, | |
| "grad_norm": 0.1951693445444107, | |
| "learning_rate": 1.9995234203586223e-05, | |
| "loss": 0.069, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.11022108298622237, | |
| "grad_norm": 0.20470306277275085, | |
| "learning_rate": 1.999375286684571e-05, | |
| "loss": 0.0685, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.11150272348606216, | |
| "grad_norm": 0.21397650241851807, | |
| "learning_rate": 1.9992071438125846e-05, | |
| "loss": 0.0691, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.11278436398590196, | |
| "grad_norm": 0.19602704048156738, | |
| "learning_rate": 1.9990189951091697e-05, | |
| "loss": 0.0689, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.11406600448574175, | |
| "grad_norm": 0.20522291958332062, | |
| "learning_rate": 1.998810844341384e-05, | |
| "loss": 0.0714, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.11534764498558155, | |
| "grad_norm": 0.2263880968093872, | |
| "learning_rate": 1.998582695676762e-05, | |
| "loss": 0.0702, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11662928548542134, | |
| "grad_norm": 0.20244184136390686, | |
| "learning_rate": 1.9983345536832282e-05, | |
| "loss": 0.0701, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.11791092598526114, | |
| "grad_norm": 0.17990508675575256, | |
| "learning_rate": 1.998066423329009e-05, | |
| "loss": 0.0695, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.11919256648510093, | |
| "grad_norm": 0.18233992159366608, | |
| "learning_rate": 1.9977783099825314e-05, | |
| "loss": 0.067, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.12047420698494073, | |
| "grad_norm": 0.20347543060779572, | |
| "learning_rate": 1.9974702194123156e-05, | |
| "loss": 0.071, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.12175584748478052, | |
| "grad_norm": 0.19996987283229828, | |
| "learning_rate": 1.9971421577868605e-05, | |
| "loss": 0.069, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.12303748798462032, | |
| "grad_norm": 0.17577455937862396, | |
| "learning_rate": 1.9967941316745197e-05, | |
| "loss": 0.0648, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.12431912848446011, | |
| "grad_norm": 0.20562253892421722, | |
| "learning_rate": 1.99642614804337e-05, | |
| "loss": 0.0708, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1256007689842999, | |
| "grad_norm": 0.20121215283870697, | |
| "learning_rate": 1.996038214261071e-05, | |
| "loss": 0.0692, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1268824094841397, | |
| "grad_norm": 0.17846404016017914, | |
| "learning_rate": 1.9956303380947182e-05, | |
| "loss": 0.0663, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.12816404998397948, | |
| "grad_norm": 0.2094384878873825, | |
| "learning_rate": 1.995202527710689e-05, | |
| "loss": 0.0704, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12816404998397948, | |
| "eval_f1": 0.8013096465167041, | |
| "eval_loss": 0.09550511837005615, | |
| "eval_precision": 0.8294213631228818, | |
| "eval_recall": 0.7750410509031199, | |
| "eval_runtime": 2.9934, | |
| "eval_samples_per_second": 2163.405, | |
| "eval_steps_per_second": 8.686, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1294456904838193, | |
| "grad_norm": 0.2003297060728073, | |
| "learning_rate": 1.9947547916744762e-05, | |
| "loss": 0.0668, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.13072733098365907, | |
| "grad_norm": 0.18323542177677155, | |
| "learning_rate": 1.99428713895052e-05, | |
| "loss": 0.0675, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.13200897148349888, | |
| "grad_norm": 0.1899520307779312, | |
| "learning_rate": 1.993799578902025e-05, | |
| "loss": 0.0694, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.13329061198333866, | |
| "grad_norm": 0.19993416965007782, | |
| "learning_rate": 1.9932921212907753e-05, | |
| "loss": 0.0702, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.13457225248317847, | |
| "grad_norm": 0.2051459103822708, | |
| "learning_rate": 1.9927647762769372e-05, | |
| "loss": 0.0712, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.13585389298301825, | |
| "grad_norm": 0.19359096884727478, | |
| "learning_rate": 1.9922175544188587e-05, | |
| "loss": 0.0707, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.13713553348285806, | |
| "grad_norm": 0.18876825273036957, | |
| "learning_rate": 1.9916504666728533e-05, | |
| "loss": 0.0683, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.13841717398269784, | |
| "grad_norm": 0.18740342557430267, | |
| "learning_rate": 1.9910635243929864e-05, | |
| "loss": 0.069, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.13969881448253765, | |
| "grad_norm": 0.18416374921798706, | |
| "learning_rate": 1.9904567393308425e-05, | |
| "loss": 0.07, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.14098045498237743, | |
| "grad_norm": 0.17465655505657196, | |
| "learning_rate": 1.9898301236352945e-05, | |
| "loss": 0.0681, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.14226209548221724, | |
| "grad_norm": 0.19771921634674072, | |
| "learning_rate": 1.9891836898522566e-05, | |
| "loss": 0.0677, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.14354373598205702, | |
| "grad_norm": 0.19512903690338135, | |
| "learning_rate": 1.9885174509244366e-05, | |
| "loss": 0.0702, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.14482537648189683, | |
| "grad_norm": 0.18367761373519897, | |
| "learning_rate": 1.9878314201910734e-05, | |
| "loss": 0.068, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.14610701698173661, | |
| "grad_norm": 0.19701771438121796, | |
| "learning_rate": 1.9871256113876736e-05, | |
| "loss": 0.0711, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.14738865748157642, | |
| "grad_norm": 0.18831095099449158, | |
| "learning_rate": 1.9864000386457323e-05, | |
| "loss": 0.0703, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1486702979814162, | |
| "grad_norm": 0.1935907006263733, | |
| "learning_rate": 1.9856547164924542e-05, | |
| "loss": 0.0698, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.14995193848125601, | |
| "grad_norm": 0.2067263126373291, | |
| "learning_rate": 1.98488965985046e-05, | |
| "loss": 0.0716, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.1512335789810958, | |
| "grad_norm": 0.20484298467636108, | |
| "learning_rate": 1.9841048840374885e-05, | |
| "loss": 0.0672, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.1525152194809356, | |
| "grad_norm": 0.19146059453487396, | |
| "learning_rate": 1.98330040476609e-05, | |
| "loss": 0.0699, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.15379685998077539, | |
| "grad_norm": 0.1922774314880371, | |
| "learning_rate": 1.982476238143312e-05, | |
| "loss": 0.0689, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1550785004806152, | |
| "grad_norm": 0.17350053787231445, | |
| "learning_rate": 1.981632400670376e-05, | |
| "loss": 0.0698, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.15636014098045498, | |
| "grad_norm": 0.20516818761825562, | |
| "learning_rate": 1.9807689092423478e-05, | |
| "loss": 0.0687, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.15764178148029478, | |
| "grad_norm": 0.18992391228675842, | |
| "learning_rate": 1.9798857811477984e-05, | |
| "loss": 0.0646, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.15892342198013457, | |
| "grad_norm": 0.1936413198709488, | |
| "learning_rate": 1.9789830340684593e-05, | |
| "loss": 0.0693, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.16020506247997437, | |
| "grad_norm": 0.19793152809143066, | |
| "learning_rate": 1.978060686078866e-05, | |
| "loss": 0.0685, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.16148670297981416, | |
| "grad_norm": 0.19546934962272644, | |
| "learning_rate": 1.9771187556459988e-05, | |
| "loss": 0.0698, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.16276834347965397, | |
| "grad_norm": 0.21572382748126984, | |
| "learning_rate": 1.976157261628912e-05, | |
| "loss": 0.07, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.16404998397949375, | |
| "grad_norm": 0.2009066939353943, | |
| "learning_rate": 1.975176223278356e-05, | |
| "loss": 0.0668, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.16533162447933356, | |
| "grad_norm": 0.19803877174854279, | |
| "learning_rate": 1.9741756602363914e-05, | |
| "loss": 0.0672, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.16661326497917334, | |
| "grad_norm": 0.1853305697441101, | |
| "learning_rate": 1.9731555925359974e-05, | |
| "loss": 0.0676, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.16789490547901315, | |
| "grad_norm": 0.1944134682416916, | |
| "learning_rate": 1.9721160406006697e-05, | |
| "loss": 0.0685, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.16917654597885293, | |
| "grad_norm": 0.20351552963256836, | |
| "learning_rate": 1.9710570252440106e-05, | |
| "loss": 0.0671, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.17045818647869274, | |
| "grad_norm": 0.18567000329494476, | |
| "learning_rate": 1.969978567669314e-05, | |
| "loss": 0.0676, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.17173982697853252, | |
| "grad_norm": 0.17873169481754303, | |
| "learning_rate": 1.968880689469141e-05, | |
| "loss": 0.0682, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.17302146747837233, | |
| "grad_norm": 0.2007400393486023, | |
| "learning_rate": 1.9677634126248847e-05, | |
| "loss": 0.067, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.1743031079782121, | |
| "grad_norm": 0.1938178986310959, | |
| "learning_rate": 1.9666267595063344e-05, | |
| "loss": 0.0683, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.17558474847805192, | |
| "grad_norm": 0.21833261847496033, | |
| "learning_rate": 1.9654707528712236e-05, | |
| "loss": 0.0682, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.1768663889778917, | |
| "grad_norm": 0.1728629469871521, | |
| "learning_rate": 1.9642954158647776e-05, | |
| "loss": 0.0686, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.1781480294777315, | |
| "grad_norm": 0.19660884141921997, | |
| "learning_rate": 1.9631007720192475e-05, | |
| "loss": 0.0663, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.1794296699775713, | |
| "grad_norm": 0.19190044701099396, | |
| "learning_rate": 1.9618868452534415e-05, | |
| "loss": 0.0684, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.1807113104774111, | |
| "grad_norm": 0.20303724706172943, | |
| "learning_rate": 1.9606536598722435e-05, | |
| "loss": 0.0658, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.18199295097725088, | |
| "grad_norm": 0.18915805220603943, | |
| "learning_rate": 1.959401240566129e-05, | |
| "loss": 0.0696, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.1832745914770907, | |
| "grad_norm": 0.19406339526176453, | |
| "learning_rate": 1.9581296124106682e-05, | |
| "loss": 0.0671, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.18455623197693047, | |
| "grad_norm": 0.18201453983783722, | |
| "learning_rate": 1.9568388008660262e-05, | |
| "loss": 0.0684, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.18583787247677028, | |
| "grad_norm": 0.18637534976005554, | |
| "learning_rate": 1.955528831776452e-05, | |
| "loss": 0.0653, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.18711951297661006, | |
| "grad_norm": 0.1933770328760147, | |
| "learning_rate": 1.9541997313697614e-05, | |
| "loss": 0.0698, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.18840115347644987, | |
| "grad_norm": 0.18085801601409912, | |
| "learning_rate": 1.952851526256811e-05, | |
| "loss": 0.0664, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.18968279397628965, | |
| "grad_norm": 0.20404258370399475, | |
| "learning_rate": 1.951484243430968e-05, | |
| "loss": 0.0675, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.19096443447612946, | |
| "grad_norm": 0.1916743814945221, | |
| "learning_rate": 1.9500979102675654e-05, | |
| "loss": 0.0669, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.19224607497596924, | |
| "grad_norm": 0.20056353509426117, | |
| "learning_rate": 1.9486925545233587e-05, | |
| "loss": 0.066, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19224607497596924, | |
| "eval_f1": 0.825402613187481, | |
| "eval_loss": 0.08602051436901093, | |
| "eval_precision": 0.8564762265102788, | |
| "eval_recall": 0.7965048088200798, | |
| "eval_runtime": 2.9899, | |
| "eval_samples_per_second": 2165.931, | |
| "eval_steps_per_second": 8.696, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19352771547580905, | |
| "grad_norm": 0.21483555436134338, | |
| "learning_rate": 1.9472682043359676e-05, | |
| "loss": 0.068, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.19480935597564883, | |
| "grad_norm": 0.19758599996566772, | |
| "learning_rate": 1.9458248882233117e-05, | |
| "loss": 0.0683, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.19609099647548864, | |
| "grad_norm": 0.18787485361099243, | |
| "learning_rate": 1.9443626350830417e-05, | |
| "loss": 0.0663, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.19737263697532842, | |
| "grad_norm": 0.20583724975585938, | |
| "learning_rate": 1.9428814741919595e-05, | |
| "loss": 0.0692, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.19865427747516823, | |
| "grad_norm": 0.1839466392993927, | |
| "learning_rate": 1.941381435205433e-05, | |
| "loss": 0.0684, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.199935917975008, | |
| "grad_norm": 0.2200762778520584, | |
| "learning_rate": 1.9398625481568015e-05, | |
| "loss": 0.0694, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.20121755847484782, | |
| "grad_norm": 0.1883755922317505, | |
| "learning_rate": 1.9383248434567745e-05, | |
| "loss": 0.0671, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.2024991989746876, | |
| "grad_norm": 0.19850397109985352, | |
| "learning_rate": 1.9367683518928228e-05, | |
| "loss": 0.0645, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.2037808394745274, | |
| "grad_norm": 0.19924448430538177, | |
| "learning_rate": 1.9351931046285622e-05, | |
| "loss": 0.0704, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2050624799743672, | |
| "grad_norm": 0.1877722144126892, | |
| "learning_rate": 1.933599133203131e-05, | |
| "loss": 0.0686, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.20634412047420697, | |
| "grad_norm": 0.20414628088474274, | |
| "learning_rate": 1.931986469530555e-05, | |
| "loss": 0.067, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.20762576097404678, | |
| "grad_norm": 0.18630723655223846, | |
| "learning_rate": 1.9303551458991127e-05, | |
| "loss": 0.0667, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.20890740147388656, | |
| "grad_norm": 0.18654927611351013, | |
| "learning_rate": 1.928705194970685e-05, | |
| "loss": 0.066, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.21018904197372637, | |
| "grad_norm": 0.1941138505935669, | |
| "learning_rate": 1.9270366497801044e-05, | |
| "loss": 0.0659, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.21147068247356615, | |
| "grad_norm": 0.23397959768772125, | |
| "learning_rate": 1.9253495437344914e-05, | |
| "loss": 0.0684, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.21275232297340596, | |
| "grad_norm": 0.19757859408855438, | |
| "learning_rate": 1.9236439106125874e-05, | |
| "loss": 0.0675, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.21403396347324574, | |
| "grad_norm": 0.20172664523124695, | |
| "learning_rate": 1.9219197845640766e-05, | |
| "loss": 0.0664, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.21531560397308555, | |
| "grad_norm": 0.20976141095161438, | |
| "learning_rate": 1.9201772001089033e-05, | |
| "loss": 0.0665, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.21659724447292533, | |
| "grad_norm": 0.19794906675815582, | |
| "learning_rate": 1.9184161921365806e-05, | |
| "loss": 0.0657, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.21787888497276514, | |
| "grad_norm": 0.22197921574115753, | |
| "learning_rate": 1.9166367959054924e-05, | |
| "loss": 0.0651, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.21916052547260492, | |
| "grad_norm": 0.2003927230834961, | |
| "learning_rate": 1.9148390470421862e-05, | |
| "loss": 0.0664, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.22044216597244473, | |
| "grad_norm": 0.1864669919013977, | |
| "learning_rate": 1.9130229815406605e-05, | |
| "loss": 0.0665, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.2217238064722845, | |
| "grad_norm": 0.19202478229999542, | |
| "learning_rate": 1.911188635761645e-05, | |
| "loss": 0.0692, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.22300544697212432, | |
| "grad_norm": 0.19340120255947113, | |
| "learning_rate": 1.909336046431871e-05, | |
| "loss": 0.065, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2242870874719641, | |
| "grad_norm": 0.19901137053966522, | |
| "learning_rate": 1.907465250643337e-05, | |
| "loss": 0.0666, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2255687279718039, | |
| "grad_norm": 0.19657251238822937, | |
| "learning_rate": 1.9055762858525654e-05, | |
| "loss": 0.0674, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2268503684716437, | |
| "grad_norm": 0.19742804765701294, | |
| "learning_rate": 1.9036691898798535e-05, | |
| "loss": 0.0672, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2281320089714835, | |
| "grad_norm": 0.2104116529226303, | |
| "learning_rate": 1.9017440009085165e-05, | |
| "loss": 0.0704, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.22941364947132328, | |
| "grad_norm": 0.18256083130836487, | |
| "learning_rate": 1.8998007574841206e-05, | |
| "loss": 0.0692, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.2306952899711631, | |
| "grad_norm": 0.18848790228366852, | |
| "learning_rate": 1.8978394985137144e-05, | |
| "loss": 0.0666, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23197693047100287, | |
| "grad_norm": 0.1659405380487442, | |
| "learning_rate": 1.8958602632650474e-05, | |
| "loss": 0.0641, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.23325857097084268, | |
| "grad_norm": 0.18415990471839905, | |
| "learning_rate": 1.8938630913657863e-05, | |
| "loss": 0.0684, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.23454021147068246, | |
| "grad_norm": 0.18959344923496246, | |
| "learning_rate": 1.8918480228027182e-05, | |
| "loss": 0.0676, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.23582185197052227, | |
| "grad_norm": 0.20328116416931152, | |
| "learning_rate": 1.8898150979209536e-05, | |
| "loss": 0.0649, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.23710349247036205, | |
| "grad_norm": 0.21128340065479279, | |
| "learning_rate": 1.8877643574231165e-05, | |
| "loss": 0.0654, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.23838513297020186, | |
| "grad_norm": 0.18140177428722382, | |
| "learning_rate": 1.8856958423685288e-05, | |
| "loss": 0.0688, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.23966677347004164, | |
| "grad_norm": 0.18850302696228027, | |
| "learning_rate": 1.883609594172391e-05, | |
| "loss": 0.0667, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.24094841396988145, | |
| "grad_norm": 0.20093129575252533, | |
| "learning_rate": 1.8815056546049505e-05, | |
| "loss": 0.0678, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.24223005446972123, | |
| "grad_norm": 0.1854446977376938, | |
| "learning_rate": 1.8793840657906664e-05, | |
| "loss": 0.0666, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.24351169496956104, | |
| "grad_norm": 0.20104651153087616, | |
| "learning_rate": 1.8772448702073654e-05, | |
| "loss": 0.0687, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.24479333546940082, | |
| "grad_norm": 0.19386902451515198, | |
| "learning_rate": 1.8750881106853923e-05, | |
| "loss": 0.0672, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.24607497596924063, | |
| "grad_norm": 0.19654834270477295, | |
| "learning_rate": 1.8729138304067512e-05, | |
| "loss": 0.0672, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.24735661646908041, | |
| "grad_norm": 0.17995163798332214, | |
| "learning_rate": 1.8707220729042427e-05, | |
| "loss": 0.0674, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.24863825696892022, | |
| "grad_norm": 0.2079264372587204, | |
| "learning_rate": 1.8685128820605904e-05, | |
| "loss": 0.0658, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.24991989746876, | |
| "grad_norm": 0.20509222149848938, | |
| "learning_rate": 1.8662863021075632e-05, | |
| "loss": 0.068, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2512015379685998, | |
| "grad_norm": 0.20490337908267975, | |
| "learning_rate": 1.8640423776250895e-05, | |
| "loss": 0.066, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.2524831784684396, | |
| "grad_norm": 0.19565828144550323, | |
| "learning_rate": 1.8617811535403658e-05, | |
| "loss": 0.0686, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.2537648189682794, | |
| "grad_norm": 0.20575258135795593, | |
| "learning_rate": 1.859502675126955e-05, | |
| "loss": 0.0664, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.2550464594681192, | |
| "grad_norm": 0.2111874371767044, | |
| "learning_rate": 1.857206988003881e-05, | |
| "loss": 0.0667, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.25632809996795897, | |
| "grad_norm": 0.19794400036334991, | |
| "learning_rate": 1.8548941381347165e-05, | |
| "loss": 0.0677, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.25632809996795897, | |
| "eval_f1": 0.8409049773755656, | |
| "eval_loss": 0.08060676604509354, | |
| "eval_precision": 0.8658218412225122, | |
| "eval_recall": 0.8173821252638986, | |
| "eval_runtime": 2.9949, | |
| "eval_samples_per_second": 2162.361, | |
| "eval_steps_per_second": 8.681, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2576097404677988, | |
| "grad_norm": 0.18614304065704346, | |
| "learning_rate": 1.8525641718266612e-05, | |
| "loss": 0.0645, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.2588913809676386, | |
| "grad_norm": 0.19621381163597107, | |
| "learning_rate": 1.8502171357296144e-05, | |
| "loss": 0.0673, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.2601730214674784, | |
| "grad_norm": 0.20377185940742493, | |
| "learning_rate": 1.8478530768352424e-05, | |
| "loss": 0.0677, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.26145466196731815, | |
| "grad_norm": 0.2045605629682541, | |
| "learning_rate": 1.845472042476037e-05, | |
| "loss": 0.0679, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.26273630246715796, | |
| "grad_norm": 0.16738948225975037, | |
| "learning_rate": 1.8430740803243674e-05, | |
| "loss": 0.0639, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.26401794296699777, | |
| "grad_norm": 0.17515943944454193, | |
| "learning_rate": 1.8406592383915263e-05, | |
| "loss": 0.0627, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.2652995834668376, | |
| "grad_norm": 0.18184049427509308, | |
| "learning_rate": 1.838227565026768e-05, | |
| "loss": 0.0675, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.2665812239666773, | |
| "grad_norm": 0.1939259171485901, | |
| "learning_rate": 1.835779108916342e-05, | |
| "loss": 0.0665, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.26786286446651714, | |
| "grad_norm": 0.18487761914730072, | |
| "learning_rate": 1.833313919082515e-05, | |
| "loss": 0.0685, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.26914450496635695, | |
| "grad_norm": 0.20124389231204987, | |
| "learning_rate": 1.8308320448825934e-05, | |
| "loss": 0.0661, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.27042614546619675, | |
| "grad_norm": 0.19602565467357635, | |
| "learning_rate": 1.8283335360079317e-05, | |
| "loss": 0.0679, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.2717077859660365, | |
| "grad_norm": 0.19158747792243958, | |
| "learning_rate": 1.8258184424829392e-05, | |
| "loss": 0.065, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.2729894264658763, | |
| "grad_norm": 0.21096321940422058, | |
| "learning_rate": 1.8232868146640792e-05, | |
| "loss": 0.0633, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.2742710669657161, | |
| "grad_norm": 0.18472693860530853, | |
| "learning_rate": 1.8207387032388584e-05, | |
| "loss": 0.0659, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.27555270746555593, | |
| "grad_norm": 0.2008291631937027, | |
| "learning_rate": 1.818174159224814e-05, | |
| "loss": 0.067, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.2768343479653957, | |
| "grad_norm": 0.20867876708507538, | |
| "learning_rate": 1.815593233968492e-05, | |
| "loss": 0.0643, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.2781159884652355, | |
| "grad_norm": 0.191690132021904, | |
| "learning_rate": 1.812995979144418e-05, | |
| "loss": 0.0657, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.2793976289650753, | |
| "grad_norm": 0.18234828114509583, | |
| "learning_rate": 1.8103824467540642e-05, | |
| "loss": 0.0667, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.2806792694649151, | |
| "grad_norm": 0.1962231993675232, | |
| "learning_rate": 1.8077526891248073e-05, | |
| "loss": 0.0671, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.28196090996475487, | |
| "grad_norm": 0.1902410089969635, | |
| "learning_rate": 1.8051067589088803e-05, | |
| "loss": 0.066, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2832425504645947, | |
| "grad_norm": 0.2025291472673416, | |
| "learning_rate": 1.8024447090823198e-05, | |
| "loss": 0.0669, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2845241909644345, | |
| "grad_norm": 0.1987154334783554, | |
| "learning_rate": 1.7997665929439038e-05, | |
| "loss": 0.0675, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.2858058314642743, | |
| "grad_norm": 0.17381438612937927, | |
| "learning_rate": 1.7970724641140864e-05, | |
| "loss": 0.0663, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.28708747196411405, | |
| "grad_norm": 0.1927717924118042, | |
| "learning_rate": 1.7943623765339217e-05, | |
| "loss": 0.0675, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.28836911246395386, | |
| "grad_norm": 0.18249468505382538, | |
| "learning_rate": 1.7916363844639854e-05, | |
| "loss": 0.0672, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.28965075296379367, | |
| "grad_norm": 0.2089211344718933, | |
| "learning_rate": 1.7888945424832896e-05, | |
| "loss": 0.0671, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.2909323934636335, | |
| "grad_norm": 0.20524665713310242, | |
| "learning_rate": 1.7861369054881867e-05, | |
| "loss": 0.0685, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.29221403396347323, | |
| "grad_norm": 0.2076677680015564, | |
| "learning_rate": 1.7833635286912743e-05, | |
| "loss": 0.0655, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.29349567446331304, | |
| "grad_norm": 0.20620182156562805, | |
| "learning_rate": 1.780574467620286e-05, | |
| "loss": 0.0649, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.29477731496315285, | |
| "grad_norm": 0.188080295920372, | |
| "learning_rate": 1.7777697781169813e-05, | |
| "loss": 0.0649, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.29605895546299266, | |
| "grad_norm": 0.18712954223155975, | |
| "learning_rate": 1.7749495163360292e-05, | |
| "loss": 0.0662, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.2973405959628324, | |
| "grad_norm": 0.19281476736068726, | |
| "learning_rate": 1.772113738743881e-05, | |
| "loss": 0.0655, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.2986222364626722, | |
| "grad_norm": 0.18981251120567322, | |
| "learning_rate": 1.7692625021176412e-05, | |
| "loss": 0.0642, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.29990387696251203, | |
| "grad_norm": 0.1883469671010971, | |
| "learning_rate": 1.7663958635439303e-05, | |
| "loss": 0.0664, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.30118551746235184, | |
| "grad_norm": 0.1924140453338623, | |
| "learning_rate": 1.7635138804177424e-05, | |
| "loss": 0.0659, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3024671579621916, | |
| "grad_norm": 0.18589209020137787, | |
| "learning_rate": 1.760616610441296e-05, | |
| "loss": 0.0677, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.3037487984620314, | |
| "grad_norm": 0.18945348262786865, | |
| "learning_rate": 1.757704111622878e-05, | |
| "loss": 0.0662, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.3050304389618712, | |
| "grad_norm": 0.1850900948047638, | |
| "learning_rate": 1.7547764422756827e-05, | |
| "loss": 0.0639, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.306312079461711, | |
| "grad_norm": 0.18490533530712128, | |
| "learning_rate": 1.751833661016644e-05, | |
| "loss": 0.065, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.30759371996155077, | |
| "grad_norm": 0.21076135337352753, | |
| "learning_rate": 1.748875826765263e-05, | |
| "loss": 0.0659, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3088753604613906, | |
| "grad_norm": 0.18776057660579681, | |
| "learning_rate": 1.7459029987424267e-05, | |
| "loss": 0.0652, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3101570009612304, | |
| "grad_norm": 0.18914464116096497, | |
| "learning_rate": 1.7429152364692225e-05, | |
| "loss": 0.0671, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.31143864146107014, | |
| "grad_norm": 0.16848713159561157, | |
| "learning_rate": 1.7399125997657478e-05, | |
| "loss": 0.0655, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.31272028196090995, | |
| "grad_norm": 0.18632112443447113, | |
| "learning_rate": 1.736895148749911e-05, | |
| "loss": 0.0692, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.31400192246074976, | |
| "grad_norm": 0.2024405598640442, | |
| "learning_rate": 1.7338629438362294e-05, | |
| "loss": 0.0669, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.31528356296058957, | |
| "grad_norm": 0.18051929771900177, | |
| "learning_rate": 1.7308160457346165e-05, | |
| "loss": 0.0647, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3165652034604293, | |
| "grad_norm": 0.19417473673820496, | |
| "learning_rate": 1.7277545154491703e-05, | |
| "loss": 0.0653, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.31784684396026913, | |
| "grad_norm": 0.2000834196805954, | |
| "learning_rate": 1.724678414276949e-05, | |
| "loss": 0.0648, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.31912848446010894, | |
| "grad_norm": 0.19750289618968964, | |
| "learning_rate": 1.721587803806745e-05, | |
| "loss": 0.0681, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.32041012495994875, | |
| "grad_norm": 0.20135164260864258, | |
| "learning_rate": 1.7184827459178518e-05, | |
| "loss": 0.0673, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.32041012495994875, | |
| "eval_f1": 0.8547925608011445, | |
| "eval_loss": 0.07640008628368378, | |
| "eval_precision": 0.8690909090909091, | |
| "eval_recall": 0.8409570724841661, | |
| "eval_runtime": 2.9969, | |
| "eval_samples_per_second": 2160.934, | |
| "eval_steps_per_second": 8.676, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3216917654597885, | |
| "grad_norm": 0.2127694934606552, | |
| "learning_rate": 1.7153633027788252e-05, | |
| "loss": 0.065, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.3229734059596283, | |
| "grad_norm": 0.184393048286438, | |
| "learning_rate": 1.7122295368462375e-05, | |
| "loss": 0.0679, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.3242550464594681, | |
| "grad_norm": 0.20343032479286194, | |
| "learning_rate": 1.7090815108634283e-05, | |
| "loss": 0.0662, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.32553668695930793, | |
| "grad_norm": 0.1871260553598404, | |
| "learning_rate": 1.7059192878592482e-05, | |
| "loss": 0.0661, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.3268183274591477, | |
| "grad_norm": 0.1933838278055191, | |
| "learning_rate": 1.7027429311467945e-05, | |
| "loss": 0.0689, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3280999679589875, | |
| "grad_norm": 0.19737669825553894, | |
| "learning_rate": 1.699552504322147e-05, | |
| "loss": 0.0651, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.3293816084588273, | |
| "grad_norm": 0.17696569859981537, | |
| "learning_rate": 1.6963480712630936e-05, | |
| "loss": 0.0666, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.3306632489586671, | |
| "grad_norm": 0.20725034177303314, | |
| "learning_rate": 1.69312969612785e-05, | |
| "loss": 0.0675, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.33194488945850686, | |
| "grad_norm": 0.1974012404680252, | |
| "learning_rate": 1.6898974433537755e-05, | |
| "loss": 0.067, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.3332265299583467, | |
| "grad_norm": 0.1718105524778366, | |
| "learning_rate": 1.6866513776560844e-05, | |
| "loss": 0.0659, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3345081704581865, | |
| "grad_norm": 0.19242900609970093, | |
| "learning_rate": 1.6833915640265485e-05, | |
| "loss": 0.0669, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.3357898109580263, | |
| "grad_norm": 0.20493321120738983, | |
| "learning_rate": 1.6801180677321985e-05, | |
| "loss": 0.0613, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.33707145145786604, | |
| "grad_norm": 0.1979932188987732, | |
| "learning_rate": 1.6768309543140126e-05, | |
| "loss": 0.0643, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.33835309195770585, | |
| "grad_norm": 0.17953254282474518, | |
| "learning_rate": 1.6735302895856093e-05, | |
| "loss": 0.0641, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.33963473245754566, | |
| "grad_norm": 0.18145443499088287, | |
| "learning_rate": 1.6702161396319266e-05, | |
| "loss": 0.0663, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.34091637295738547, | |
| "grad_norm": 0.198946014046669, | |
| "learning_rate": 1.666888570807899e-05, | |
| "loss": 0.0649, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.3421980134572252, | |
| "grad_norm": 0.17763356864452362, | |
| "learning_rate": 1.6635476497371314e-05, | |
| "loss": 0.0671, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.34347965395706503, | |
| "grad_norm": 0.2077655792236328, | |
| "learning_rate": 1.6601934433105612e-05, | |
| "loss": 0.066, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.34476129445690484, | |
| "grad_norm": 0.17525456845760345, | |
| "learning_rate": 1.6568260186851236e-05, | |
| "loss": 0.0686, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.34604293495674465, | |
| "grad_norm": 0.20130480825901031, | |
| "learning_rate": 1.6534454432824034e-05, | |
| "loss": 0.0628, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3473245754565844, | |
| "grad_norm": 0.20377837121486664, | |
| "learning_rate": 1.6500517847872864e-05, | |
| "loss": 0.0659, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.3486062159564242, | |
| "grad_norm": 0.19869670271873474, | |
| "learning_rate": 1.6466451111466044e-05, | |
| "loss": 0.0664, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.349887856456264, | |
| "grad_norm": 0.177223801612854, | |
| "learning_rate": 1.643225490567775e-05, | |
| "loss": 0.0653, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.35116949695610383, | |
| "grad_norm": 0.17571556568145752, | |
| "learning_rate": 1.639792991517435e-05, | |
| "loss": 0.067, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.3524511374559436, | |
| "grad_norm": 0.18533557653427124, | |
| "learning_rate": 1.6363476827200705e-05, | |
| "loss": 0.067, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.3537327779557834, | |
| "grad_norm": 0.2359856367111206, | |
| "learning_rate": 1.6328896331566403e-05, | |
| "loss": 0.0689, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.3550144184556232, | |
| "grad_norm": 0.18869027495384216, | |
| "learning_rate": 1.6294189120631956e-05, | |
| "loss": 0.065, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.356296058955463, | |
| "grad_norm": 0.17907142639160156, | |
| "learning_rate": 1.6259355889294927e-05, | |
| "loss": 0.0677, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.35757769945530277, | |
| "grad_norm": 0.20352387428283691, | |
| "learning_rate": 1.6224397334976023e-05, | |
| "loss": 0.0667, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.3588593399551426, | |
| "grad_norm": 0.17456890642642975, | |
| "learning_rate": 1.6189314157605136e-05, | |
| "loss": 0.0658, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3601409804549824, | |
| "grad_norm": 0.20184475183486938, | |
| "learning_rate": 1.615410705960732e-05, | |
| "loss": 0.0665, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.3614226209548222, | |
| "grad_norm": 0.19572722911834717, | |
| "learning_rate": 1.6118776745888726e-05, | |
| "loss": 0.0662, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.36270426145466195, | |
| "grad_norm": 0.18092940747737885, | |
| "learning_rate": 1.608332392382251e-05, | |
| "loss": 0.0619, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.36398590195450176, | |
| "grad_norm": 0.21048863232135773, | |
| "learning_rate": 1.6047749303234637e-05, | |
| "loss": 0.0647, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.36526754245434156, | |
| "grad_norm": 0.22709940373897552, | |
| "learning_rate": 1.601205359638969e-05, | |
| "loss": 0.0638, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.3665491829541814, | |
| "grad_norm": 0.17789261043071747, | |
| "learning_rate": 1.597623751797662e-05, | |
| "loss": 0.0644, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.3678308234540211, | |
| "grad_norm": 0.1861618459224701, | |
| "learning_rate": 1.5940301785094398e-05, | |
| "loss": 0.0643, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.36911246395386094, | |
| "grad_norm": 0.2651076018810272, | |
| "learning_rate": 1.5904247117237703e-05, | |
| "loss": 0.0653, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.37039410445370075, | |
| "grad_norm": 0.19479645788669586, | |
| "learning_rate": 1.586807423628248e-05, | |
| "loss": 0.0669, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.37167574495354055, | |
| "grad_norm": 0.19867412745952606, | |
| "learning_rate": 1.583178386647151e-05, | |
| "loss": 0.0644, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3729573854533803, | |
| "grad_norm": 0.2065819799900055, | |
| "learning_rate": 1.57953767343999e-05, | |
| "loss": 0.0688, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.3742390259532201, | |
| "grad_norm": 0.16808786988258362, | |
| "learning_rate": 1.5758853569000536e-05, | |
| "loss": 0.0639, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.3755206664530599, | |
| "grad_norm": 0.19506502151489258, | |
| "learning_rate": 1.572221510152949e-05, | |
| "loss": 0.0656, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.37680230695289973, | |
| "grad_norm": 0.20851702988147736, | |
| "learning_rate": 1.5685462065551375e-05, | |
| "loss": 0.0648, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.3780839474527395, | |
| "grad_norm": 0.18304401636123657, | |
| "learning_rate": 1.5648595196924665e-05, | |
| "loss": 0.0665, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3793655879525793, | |
| "grad_norm": 0.19657114148139954, | |
| "learning_rate": 1.5611615233786957e-05, | |
| "loss": 0.0651, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.3806472284524191, | |
| "grad_norm": 0.17807355523109436, | |
| "learning_rate": 1.5574522916540188e-05, | |
| "loss": 0.0673, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.3819288689522589, | |
| "grad_norm": 0.1849467009305954, | |
| "learning_rate": 1.5537318987835824e-05, | |
| "loss": 0.0671, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.38321050945209867, | |
| "grad_norm": 0.19102276861667633, | |
| "learning_rate": 1.5500004192559976e-05, | |
| "loss": 0.0663, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.3844921499519385, | |
| "grad_norm": 0.19463396072387695, | |
| "learning_rate": 1.5462579277818498e-05, | |
| "loss": 0.0667, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3844921499519385, | |
| "eval_f1": 0.8683329347046161, | |
| "eval_loss": 0.07221938669681549, | |
| "eval_precision": 0.8857038302024884, | |
| "eval_recall": 0.851630307295332, | |
| "eval_runtime": 2.9907, | |
| "eval_samples_per_second": 2165.391, | |
| "eval_steps_per_second": 8.694, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3857737904517783, | |
| "grad_norm": 0.19808001816272736, | |
| "learning_rate": 1.5425044992922015e-05, | |
| "loss": 0.0665, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.3870554309516181, | |
| "grad_norm": 0.19178569316864014, | |
| "learning_rate": 1.5387402089370948e-05, | |
| "loss": 0.0647, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.38833707145145785, | |
| "grad_norm": 0.20021072030067444, | |
| "learning_rate": 1.5349651320840423e-05, | |
| "loss": 0.0668, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.38961871195129766, | |
| "grad_norm": 0.21625490486621857, | |
| "learning_rate": 1.531179344316522e-05, | |
| "loss": 0.0659, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.39090035245113747, | |
| "grad_norm": 0.18152420222759247, | |
| "learning_rate": 1.527382921432464e-05, | |
| "loss": 0.0665, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.3921819929509773, | |
| "grad_norm": 0.2041441947221756, | |
| "learning_rate": 1.5235759394427287e-05, | |
| "loss": 0.067, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.39346363345081703, | |
| "grad_norm": 0.18800385296344757, | |
| "learning_rate": 1.5197584745695904e-05, | |
| "loss": 0.0643, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.39474527395065684, | |
| "grad_norm": 0.21839666366577148, | |
| "learning_rate": 1.515930603245207e-05, | |
| "loss": 0.0651, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.39602691445049665, | |
| "grad_norm": 0.20969919860363007, | |
| "learning_rate": 1.512092402110092e-05, | |
| "loss": 0.0671, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.39730855495033646, | |
| "grad_norm": 0.2534889578819275, | |
| "learning_rate": 1.508243948011579e-05, | |
| "loss": 0.0642, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3985901954501762, | |
| "grad_norm": 0.18450938165187836, | |
| "learning_rate": 1.5043853180022838e-05, | |
| "loss": 0.0658, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.399871835950016, | |
| "grad_norm": 0.18956618010997772, | |
| "learning_rate": 1.5005165893385612e-05, | |
| "loss": 0.0663, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.40115347644985583, | |
| "grad_norm": 0.16871580481529236, | |
| "learning_rate": 1.4966378394789581e-05, | |
| "loss": 0.0658, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.40243511694969564, | |
| "grad_norm": 0.18960537016391754, | |
| "learning_rate": 1.4927491460826626e-05, | |
| "loss": 0.0653, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.4037167574495354, | |
| "grad_norm": 0.20116572082042694, | |
| "learning_rate": 1.48885058700795e-05, | |
| "loss": 0.0664, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.4049983979493752, | |
| "grad_norm": 0.2119298279285431, | |
| "learning_rate": 1.4849422403106228e-05, | |
| "loss": 0.0649, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.406280038449215, | |
| "grad_norm": 0.19871027767658234, | |
| "learning_rate": 1.4810241842424491e-05, | |
| "loss": 0.0669, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.4075616789490548, | |
| "grad_norm": 0.19416335225105286, | |
| "learning_rate": 1.4770964972495938e-05, | |
| "loss": 0.0628, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.40884331944889457, | |
| "grad_norm": 0.18964780867099762, | |
| "learning_rate": 1.4731592579710507e-05, | |
| "loss": 0.0634, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.4101249599487344, | |
| "grad_norm": 0.18982967734336853, | |
| "learning_rate": 1.4692125452370664e-05, | |
| "loss": 0.0651, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4114066004485742, | |
| "grad_norm": 0.20831823348999023, | |
| "learning_rate": 1.4652564380675616e-05, | |
| "loss": 0.0671, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.41268824094841394, | |
| "grad_norm": 0.190037801861763, | |
| "learning_rate": 1.4612910156705497e-05, | |
| "loss": 0.0697, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.41396988144825375, | |
| "grad_norm": 0.20448094606399536, | |
| "learning_rate": 1.457316357440552e-05, | |
| "loss": 0.0656, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.41525152194809356, | |
| "grad_norm": 0.18915881216526031, | |
| "learning_rate": 1.4533325429570052e-05, | |
| "loss": 0.0651, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.41653316244793337, | |
| "grad_norm": 0.19686464965343475, | |
| "learning_rate": 1.4493396519826717e-05, | |
| "loss": 0.0661, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.4178148029477731, | |
| "grad_norm": 0.17895036935806274, | |
| "learning_rate": 1.4453377644620389e-05, | |
| "loss": 0.0677, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.41909644344761293, | |
| "grad_norm": 0.17503611743450165, | |
| "learning_rate": 1.441326960519721e-05, | |
| "loss": 0.0644, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.42037808394745274, | |
| "grad_norm": 0.18629562854766846, | |
| "learning_rate": 1.4373073204588556e-05, | |
| "loss": 0.0661, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.42165972444729255, | |
| "grad_norm": 0.17520345747470856, | |
| "learning_rate": 1.4332789247594923e-05, | |
| "loss": 0.0661, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.4229413649471323, | |
| "grad_norm": 0.1670594960451126, | |
| "learning_rate": 1.4292418540769845e-05, | |
| "loss": 0.0643, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4242230054469721, | |
| "grad_norm": 0.17656877636909485, | |
| "learning_rate": 1.425196189240374e-05, | |
| "loss": 0.0654, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.4255046459468119, | |
| "grad_norm": 0.20574980974197388, | |
| "learning_rate": 1.4211420112507714e-05, | |
| "loss": 0.0665, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.42678628644665173, | |
| "grad_norm": 0.17492710053920746, | |
| "learning_rate": 1.4170794012797357e-05, | |
| "loss": 0.0669, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.4280679269464915, | |
| "grad_norm": 0.18844270706176758, | |
| "learning_rate": 1.4130084406676488e-05, | |
| "loss": 0.0644, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.4293495674463313, | |
| "grad_norm": 0.18853574991226196, | |
| "learning_rate": 1.4089292109220852e-05, | |
| "loss": 0.0654, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.4306312079461711, | |
| "grad_norm": 0.19601227343082428, | |
| "learning_rate": 1.4048417937161833e-05, | |
| "loss": 0.0658, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.4319128484460109, | |
| "grad_norm": 0.19078759849071503, | |
| "learning_rate": 1.4007462708870076e-05, | |
| "loss": 0.0655, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.43319448894585066, | |
| "grad_norm": 0.187064990401268, | |
| "learning_rate": 1.3966427244339111e-05, | |
| "loss": 0.0656, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.4344761294456905, | |
| "grad_norm": 0.17603109776973724, | |
| "learning_rate": 1.3925312365168934e-05, | |
| "loss": 0.0629, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.4357577699455303, | |
| "grad_norm": 0.19391457736492157, | |
| "learning_rate": 1.3884118894549562e-05, | |
| "loss": 0.0634, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.4370394104453701, | |
| "grad_norm": 0.19418643414974213, | |
| "learning_rate": 1.3842847657244535e-05, | |
| "loss": 0.064, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.43832105094520984, | |
| "grad_norm": 0.21033746004104614, | |
| "learning_rate": 1.3801499479574431e-05, | |
| "loss": 0.0648, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.43960269144504965, | |
| "grad_norm": 0.18090617656707764, | |
| "learning_rate": 1.3760075189400295e-05, | |
| "loss": 0.0658, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.44088433194488946, | |
| "grad_norm": 0.19315758347511292, | |
| "learning_rate": 1.3718575616107072e-05, | |
| "loss": 0.0673, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.44216597244472927, | |
| "grad_norm": 0.21332131326198578, | |
| "learning_rate": 1.3677001590587011e-05, | |
| "loss": 0.0653, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.443447612944569, | |
| "grad_norm": 0.16083082556724548, | |
| "learning_rate": 1.3635353945223022e-05, | |
| "loss": 0.0618, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.44472925344440883, | |
| "grad_norm": 0.1910344362258911, | |
| "learning_rate": 1.3593633513872e-05, | |
| "loss": 0.063, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.44601089394424864, | |
| "grad_norm": 0.21517983078956604, | |
| "learning_rate": 1.3551841131848144e-05, | |
| "loss": 0.0632, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.44729253444408845, | |
| "grad_norm": 0.18821708858013153, | |
| "learning_rate": 1.3509977635906241e-05, | |
| "loss": 0.065, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.4485741749439282, | |
| "grad_norm": 0.19319505989551544, | |
| "learning_rate": 1.3468043864224878e-05, | |
| "loss": 0.0649, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4485741749439282, | |
| "eval_f1": 0.8754380754380754, | |
| "eval_loss": 0.07064414024353027, | |
| "eval_precision": 0.8868696594054639, | |
| "eval_recall": 0.8642974431151771, | |
| "eval_runtime": 2.9906, | |
| "eval_samples_per_second": 2165.465, | |
| "eval_steps_per_second": 8.694, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.449855815443768, | |
| "grad_norm": 0.19362811744213104, | |
| "learning_rate": 1.3426040656389696e-05, | |
| "loss": 0.0659, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.4511374559436078, | |
| "grad_norm": 0.19557055830955505, | |
| "learning_rate": 1.3383968853376568e-05, | |
| "loss": 0.0674, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.45241909644344763, | |
| "grad_norm": 0.17842736840248108, | |
| "learning_rate": 1.3341829297534745e-05, | |
| "loss": 0.0647, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.4537007369432874, | |
| "grad_norm": 0.19172975420951843, | |
| "learning_rate": 1.3299622832570025e-05, | |
| "loss": 0.0653, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.4549823774431272, | |
| "grad_norm": 0.17918632924556732, | |
| "learning_rate": 1.3257350303527829e-05, | |
| "loss": 0.0662, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.456264017942967, | |
| "grad_norm": 0.1870853751897812, | |
| "learning_rate": 1.3215012556776287e-05, | |
| "loss": 0.0666, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.4575456584428068, | |
| "grad_norm": 0.18598654866218567, | |
| "learning_rate": 1.317261043998932e-05, | |
| "loss": 0.0645, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.45882729894264657, | |
| "grad_norm": 0.20888212323188782, | |
| "learning_rate": 1.3130144802129624e-05, | |
| "loss": 0.0631, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.4601089394424864, | |
| "grad_norm": 0.176805317401886, | |
| "learning_rate": 1.3087616493431705e-05, | |
| "loss": 0.0645, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.4613905799423262, | |
| "grad_norm": 0.17067846655845642, | |
| "learning_rate": 1.3045026365384854e-05, | |
| "loss": 0.0663, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.462672220442166, | |
| "grad_norm": 0.18683072924613953, | |
| "learning_rate": 1.3002375270716076e-05, | |
| "loss": 0.0629, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.46395386094200575, | |
| "grad_norm": 0.16378134489059448, | |
| "learning_rate": 1.2959664063373044e-05, | |
| "loss": 0.0653, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.46523550144184556, | |
| "grad_norm": 0.1760198473930359, | |
| "learning_rate": 1.2916893598506981e-05, | |
| "loss": 0.0655, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.46651714194168536, | |
| "grad_norm": 0.1707535684108734, | |
| "learning_rate": 1.2874064732455552e-05, | |
| "loss": 0.0631, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.4677987824415252, | |
| "grad_norm": 0.18154825270175934, | |
| "learning_rate": 1.2831178322725706e-05, | |
| "loss": 0.0657, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.4690804229413649, | |
| "grad_norm": 0.20046283304691315, | |
| "learning_rate": 1.2788235227976529e-05, | |
| "loss": 0.065, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.47036206344120474, | |
| "grad_norm": 0.19358018040657043, | |
| "learning_rate": 1.2745236308002018e-05, | |
| "loss": 0.066, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.47164370394104455, | |
| "grad_norm": 0.185184046626091, | |
| "learning_rate": 1.2702182423713904e-05, | |
| "loss": 0.0656, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.47292534444088435, | |
| "grad_norm": 0.2039334625005722, | |
| "learning_rate": 1.2659074437124388e-05, | |
| "loss": 0.0643, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.4742069849407241, | |
| "grad_norm": 0.2065436691045761, | |
| "learning_rate": 1.2615913211328894e-05, | |
| "loss": 0.0652, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.4754886254405639, | |
| "grad_norm": 0.17350079119205475, | |
| "learning_rate": 1.2572699610488783e-05, | |
| "loss": 0.0654, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.4767702659404037, | |
| "grad_norm": 0.20188377797603607, | |
| "learning_rate": 1.2529434499814058e-05, | |
| "loss": 0.0653, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.47805190644024353, | |
| "grad_norm": 0.18335194885730743, | |
| "learning_rate": 1.2486118745546035e-05, | |
| "loss": 0.0655, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.4793335469400833, | |
| "grad_norm": 0.194752037525177, | |
| "learning_rate": 1.244275321494e-05, | |
| "loss": 0.0649, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.4806151874399231, | |
| "grad_norm": 0.20098841190338135, | |
| "learning_rate": 1.2399338776247846e-05, | |
| "loss": 0.0641, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.4818968279397629, | |
| "grad_norm": 0.2057989090681076, | |
| "learning_rate": 1.2355876298700693e-05, | |
| "loss": 0.0643, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.4831784684396027, | |
| "grad_norm": 0.18653416633605957, | |
| "learning_rate": 1.2312366652491476e-05, | |
| "loss": 0.0635, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.48446010893944247, | |
| "grad_norm": 0.19041769206523895, | |
| "learning_rate": 1.2268810708757533e-05, | |
| "loss": 0.065, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.4857417494392823, | |
| "grad_norm": 0.19951310753822327, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.0641, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.4870233899391221, | |
| "grad_norm": 0.1979627162218094, | |
| "learning_rate": 1.2181563417882104e-05, | |
| "loss": 0.0647, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4883050304389619, | |
| "grad_norm": 0.20096543431282043, | |
| "learning_rate": 1.2137873817580213e-05, | |
| "loss": 0.0647, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.48958667093880165, | |
| "grad_norm": 0.19415560364723206, | |
| "learning_rate": 1.2094141413397785e-05, | |
| "loss": 0.0645, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.49086831143864146, | |
| "grad_norm": 0.180967316031456, | |
| "learning_rate": 1.2050367080932159e-05, | |
| "loss": 0.0629, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.49214995193848127, | |
| "grad_norm": 0.18070431053638458, | |
| "learning_rate": 1.2006551696620135e-05, | |
| "loss": 0.0649, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.4934315924383211, | |
| "grad_norm": 0.174631729722023, | |
| "learning_rate": 1.1962696137720443e-05, | |
| "loss": 0.0649, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.49471323293816083, | |
| "grad_norm": 0.1981905698776245, | |
| "learning_rate": 1.1918801282296184e-05, | |
| "loss": 0.0648, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.49599487343800064, | |
| "grad_norm": 0.19266869127750397, | |
| "learning_rate": 1.1874868009197236e-05, | |
| "loss": 0.0653, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.49727651393784045, | |
| "grad_norm": 0.19425834715366364, | |
| "learning_rate": 1.1830897198042662e-05, | |
| "loss": 0.0646, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.49855815443768026, | |
| "grad_norm": 0.20095306634902954, | |
| "learning_rate": 1.1786889729203113e-05, | |
| "loss": 0.0651, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.49983979493752, | |
| "grad_norm": 0.16680414974689484, | |
| "learning_rate": 1.1742846483783177e-05, | |
| "loss": 0.0633, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.5011214354373599, | |
| "grad_norm": 0.2134004682302475, | |
| "learning_rate": 1.1698768343603753e-05, | |
| "loss": 0.066, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.5024030759371996, | |
| "grad_norm": 0.19804078340530396, | |
| "learning_rate": 1.16546561911844e-05, | |
| "loss": 0.0654, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.5036847164370394, | |
| "grad_norm": 0.1972855180501938, | |
| "learning_rate": 1.1610510909725644e-05, | |
| "loss": 0.0654, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.5049663569368792, | |
| "grad_norm": 0.18941302597522736, | |
| "learning_rate": 1.1566333383091333e-05, | |
| "loss": 0.0656, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.506247997436719, | |
| "grad_norm": 0.2275722473859787, | |
| "learning_rate": 1.15221244957909e-05, | |
| "loss": 0.0642, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.5075296379365588, | |
| "grad_norm": 0.19969482719898224, | |
| "learning_rate": 1.1477885132961679e-05, | |
| "loss": 0.0657, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.5088112784363986, | |
| "grad_norm": 0.1895623505115509, | |
| "learning_rate": 1.1433616180351176e-05, | |
| "loss": 0.0653, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.5100929189362384, | |
| "grad_norm": 0.19780370593070984, | |
| "learning_rate": 1.1389318524299332e-05, | |
| "loss": 0.0646, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.5113745594360782, | |
| "grad_norm": 0.18038791418075562, | |
| "learning_rate": 1.1344993051720792e-05, | |
| "loss": 0.0642, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.5126561999359179, | |
| "grad_norm": 0.2090374231338501, | |
| "learning_rate": 1.1300640650087114e-05, | |
| "loss": 0.0649, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5126561999359179, | |
| "eval_f1": 0.8851620576865894, | |
| "eval_loss": 0.06776601821184158, | |
| "eval_precision": 0.8978163831584027, | |
| "eval_recall": 0.8728594886230354, | |
| "eval_runtime": 2.9842, | |
| "eval_samples_per_second": 2170.11, | |
| "eval_steps_per_second": 8.713, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5139378404357577, | |
| "grad_norm": 0.21133767068386078, | |
| "learning_rate": 1.1256262207409038e-05, | |
| "loss": 0.0651, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.5152194809355976, | |
| "grad_norm": 0.20390534400939941, | |
| "learning_rate": 1.1211858612218689e-05, | |
| "loss": 0.0661, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.5165011214354374, | |
| "grad_norm": 0.16816197335720062, | |
| "learning_rate": 1.1167430753551779e-05, | |
| "loss": 0.0642, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.5177827619352772, | |
| "grad_norm": 0.17969034612178802, | |
| "learning_rate": 1.1122979520929826e-05, | |
| "loss": 0.0619, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.519064402435117, | |
| "grad_norm": 0.17689917981624603, | |
| "learning_rate": 1.1078505804342327e-05, | |
| "loss": 0.0659, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5203460429349568, | |
| "grad_norm": 0.19302503764629364, | |
| "learning_rate": 1.1034010494228951e-05, | |
| "loss": 0.0631, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.5216276834347965, | |
| "grad_norm": 0.21062147617340088, | |
| "learning_rate": 1.0989494481461707e-05, | |
| "loss": 0.0665, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.5229093239346363, | |
| "grad_norm": 0.18397943675518036, | |
| "learning_rate": 1.0944958657327101e-05, | |
| "loss": 0.064, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.5241909644344761, | |
| "grad_norm": 0.18781358003616333, | |
| "learning_rate": 1.0900403913508301e-05, | |
| "loss": 0.0657, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.5254726049343159, | |
| "grad_norm": 0.20603561401367188, | |
| "learning_rate": 1.085583114206728e-05, | |
| "loss": 0.0626, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5267542454341557, | |
| "grad_norm": 0.18262669444084167, | |
| "learning_rate": 1.0811241235426947e-05, | |
| "loss": 0.0626, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.5280358859339955, | |
| "grad_norm": 0.18097414076328278, | |
| "learning_rate": 1.0766635086353298e-05, | |
| "loss": 0.0626, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.5293175264338353, | |
| "grad_norm": 0.1961183249950409, | |
| "learning_rate": 1.0722013587937528e-05, | |
| "loss": 0.063, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.5305991669336751, | |
| "grad_norm": 0.18248215317726135, | |
| "learning_rate": 1.0677377633578137e-05, | |
| "loss": 0.0656, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.5318808074335148, | |
| "grad_norm": 0.17086833715438843, | |
| "learning_rate": 1.0632728116963082e-05, | |
| "loss": 0.0644, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.5331624479333547, | |
| "grad_norm": 0.19590778648853302, | |
| "learning_rate": 1.0588065932051843e-05, | |
| "loss": 0.0638, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.5344440884331945, | |
| "grad_norm": 0.18840567767620087, | |
| "learning_rate": 1.0543391973057537e-05, | |
| "loss": 0.0639, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.5357257289330343, | |
| "grad_norm": 0.18893346190452576, | |
| "learning_rate": 1.049870713442904e-05, | |
| "loss": 0.0607, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.5370073694328741, | |
| "grad_norm": 0.19564594328403473, | |
| "learning_rate": 1.0454012310833034e-05, | |
| "loss": 0.0649, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.5382890099327139, | |
| "grad_norm": 0.19073913991451263, | |
| "learning_rate": 1.0409308397136128e-05, | |
| "loss": 0.0652, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5395706504325537, | |
| "grad_norm": 0.1852230280637741, | |
| "learning_rate": 1.036459628838693e-05, | |
| "loss": 0.0643, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.5408522909323935, | |
| "grad_norm": 0.18030205368995667, | |
| "learning_rate": 1.0319876879798123e-05, | |
| "loss": 0.065, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.5421339314322332, | |
| "grad_norm": 0.19292110204696655, | |
| "learning_rate": 1.0275151066728548e-05, | |
| "loss": 0.061, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.543415571932073, | |
| "grad_norm": 0.1795249581336975, | |
| "learning_rate": 1.0230419744665278e-05, | |
| "loss": 0.0648, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.5446972124319128, | |
| "grad_norm": 0.18629731237888336, | |
| "learning_rate": 1.0185683809205675e-05, | |
| "loss": 0.0652, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.5459788529317526, | |
| "grad_norm": 0.19541800022125244, | |
| "learning_rate": 1.0140944156039481e-05, | |
| "loss": 0.0656, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.5472604934315924, | |
| "grad_norm": 0.201457679271698, | |
| "learning_rate": 1.0096201680930867e-05, | |
| "loss": 0.0651, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.5485421339314323, | |
| "grad_norm": 0.20298662781715393, | |
| "learning_rate": 1.0051457279700502e-05, | |
| "loss": 0.0657, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.5498237744312721, | |
| "grad_norm": 0.2091987133026123, | |
| "learning_rate": 1.0006711848207625e-05, | |
| "loss": 0.0642, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.5511054149311119, | |
| "grad_norm": 0.195583313703537, | |
| "learning_rate": 9.961966282332094e-06, | |
| "loss": 0.064, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.5523870554309516, | |
| "grad_norm": 0.16657018661499023, | |
| "learning_rate": 9.917221477956472e-06, | |
| "loss": 0.0629, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.5536686959307914, | |
| "grad_norm": 0.18883733451366425, | |
| "learning_rate": 9.87247833094806e-06, | |
| "loss": 0.0645, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.5549503364306312, | |
| "grad_norm": 0.18239256739616394, | |
| "learning_rate": 9.827737737140983e-06, | |
| "loss": 0.0639, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.556231976930471, | |
| "grad_norm": 0.20366910099983215, | |
| "learning_rate": 9.783000592318249e-06, | |
| "loss": 0.0649, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.5575136174303108, | |
| "grad_norm": 0.19524288177490234, | |
| "learning_rate": 9.738267792193806e-06, | |
| "loss": 0.0636, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.5587952579301506, | |
| "grad_norm": 0.19309386610984802, | |
| "learning_rate": 9.693540232394613e-06, | |
| "loss": 0.0648, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.5600768984299904, | |
| "grad_norm": 0.2024122029542923, | |
| "learning_rate": 9.648818808442716e-06, | |
| "loss": 0.0648, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.5613585389298302, | |
| "grad_norm": 0.1884394735097885, | |
| "learning_rate": 9.604104415737309e-06, | |
| "loss": 0.0656, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.5626401794296699, | |
| "grad_norm": 0.20835010707378387, | |
| "learning_rate": 9.559397949536799e-06, | |
| "loss": 0.0632, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.5639218199295097, | |
| "grad_norm": 0.189108744263649, | |
| "learning_rate": 9.514700304940901e-06, | |
| "loss": 0.0635, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5652034604293495, | |
| "grad_norm": 0.1688823103904724, | |
| "learning_rate": 9.470012376872706e-06, | |
| "loss": 0.0644, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.5664851009291894, | |
| "grad_norm": 0.19286887347698212, | |
| "learning_rate": 9.425335060060757e-06, | |
| "loss": 0.0637, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.5677667414290292, | |
| "grad_norm": 0.18644267320632935, | |
| "learning_rate": 9.380669249021146e-06, | |
| "loss": 0.061, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.569048381928869, | |
| "grad_norm": 0.2195032835006714, | |
| "learning_rate": 9.336015838039598e-06, | |
| "loss": 0.0663, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.5703300224287088, | |
| "grad_norm": 0.20251427590847015, | |
| "learning_rate": 9.291375721153579e-06, | |
| "loss": 0.066, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.5716116629285486, | |
| "grad_norm": 0.1976834088563919, | |
| "learning_rate": 9.24674979213436e-06, | |
| "loss": 0.0632, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.5728933034283883, | |
| "grad_norm": 0.18137118220329285, | |
| "learning_rate": 9.202138944469168e-06, | |
| "loss": 0.0637, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.5741749439282281, | |
| "grad_norm": 0.18933522701263428, | |
| "learning_rate": 9.157544071343275e-06, | |
| "loss": 0.0635, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.5754565844280679, | |
| "grad_norm": 0.20421184599399567, | |
| "learning_rate": 9.112966065622095e-06, | |
| "loss": 0.0649, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.5767382249279077, | |
| "grad_norm": 0.18742768466472626, | |
| "learning_rate": 9.068405819833352e-06, | |
| "loss": 0.0639, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5767382249279077, | |
| "eval_f1": 0.8894003777148253, | |
| "eval_loss": 0.06641285866498947, | |
| "eval_precision": 0.8951057258256118, | |
| "eval_recall": 0.8837673000234577, | |
| "eval_runtime": 2.9904, | |
| "eval_samples_per_second": 2165.589, | |
| "eval_steps_per_second": 8.694, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5780198654277475, | |
| "grad_norm": 0.17334450781345367, | |
| "learning_rate": 9.02386422614918e-06, | |
| "loss": 0.0633, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.5793015059275873, | |
| "grad_norm": 0.16847829520702362, | |
| "learning_rate": 8.979342176368247e-06, | |
| "loss": 0.0655, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.5805831464274271, | |
| "grad_norm": 0.17423571646213531, | |
| "learning_rate": 8.934840561897944e-06, | |
| "loss": 0.0652, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.581864786927267, | |
| "grad_norm": 0.18410921096801758, | |
| "learning_rate": 8.890360273736504e-06, | |
| "loss": 0.0607, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.5831464274271067, | |
| "grad_norm": 0.1774194836616516, | |
| "learning_rate": 8.84590220245516e-06, | |
| "loss": 0.0645, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5844280679269465, | |
| "grad_norm": 0.17994000017642975, | |
| "learning_rate": 8.80146723818034e-06, | |
| "loss": 0.0653, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.5857097084267863, | |
| "grad_norm": 0.1958846151828766, | |
| "learning_rate": 8.757056270575823e-06, | |
| "loss": 0.0634, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.5869913489266261, | |
| "grad_norm": 0.19152416288852692, | |
| "learning_rate": 8.712670188824937e-06, | |
| "loss": 0.0617, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.5882729894264659, | |
| "grad_norm": 0.20121438801288605, | |
| "learning_rate": 8.668309881612746e-06, | |
| "loss": 0.0635, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.5895546299263057, | |
| "grad_norm": 0.2359953075647354, | |
| "learning_rate": 8.623976237108271e-06, | |
| "loss": 0.0615, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5908362704261455, | |
| "grad_norm": 0.17650754749774933, | |
| "learning_rate": 8.579670142946701e-06, | |
| "loss": 0.065, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.5921179109259853, | |
| "grad_norm": 0.17972248792648315, | |
| "learning_rate": 8.53539248621161e-06, | |
| "loss": 0.0648, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.593399551425825, | |
| "grad_norm": 0.17738933861255646, | |
| "learning_rate": 8.491144153417217e-06, | |
| "loss": 0.0676, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.5946811919256648, | |
| "grad_norm": 0.19836388528347015, | |
| "learning_rate": 8.446926030490623e-06, | |
| "loss": 0.0643, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.5959628324255046, | |
| "grad_norm": 0.1889767348766327, | |
| "learning_rate": 8.40273900275407e-06, | |
| "loss": 0.065, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5972444729253444, | |
| "grad_norm": 0.1591385453939438, | |
| "learning_rate": 8.358583954907228e-06, | |
| "loss": 0.063, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.5985261134251842, | |
| "grad_norm": 0.1870860904455185, | |
| "learning_rate": 8.314461771009478e-06, | |
| "loss": 0.0629, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.5998077539250241, | |
| "grad_norm": 0.18506884574890137, | |
| "learning_rate": 8.270373334462193e-06, | |
| "loss": 0.0649, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.6010893944248639, | |
| "grad_norm": 0.15451203286647797, | |
| "learning_rate": 8.226319527991088e-06, | |
| "loss": 0.0628, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.6023710349247037, | |
| "grad_norm": 0.1646854281425476, | |
| "learning_rate": 8.182301233628506e-06, | |
| "loss": 0.063, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.6036526754245434, | |
| "grad_norm": 0.18889601528644562, | |
| "learning_rate": 8.138319332695788e-06, | |
| "loss": 0.0639, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.6049343159243832, | |
| "grad_norm": 0.18762964010238647, | |
| "learning_rate": 8.094374705785613e-06, | |
| "loss": 0.0647, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.606215956424223, | |
| "grad_norm": 0.17451703548431396, | |
| "learning_rate": 8.050468232744367e-06, | |
| "loss": 0.0619, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.6074975969240628, | |
| "grad_norm": 0.1720801442861557, | |
| "learning_rate": 8.006600792654546e-06, | |
| "loss": 0.0631, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.6087792374239026, | |
| "grad_norm": 0.19186915457248688, | |
| "learning_rate": 7.962773263817114e-06, | |
| "loss": 0.064, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.6100608779237424, | |
| "grad_norm": 0.1681632250547409, | |
| "learning_rate": 7.918986523733964e-06, | |
| "loss": 0.0635, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.6113425184235822, | |
| "grad_norm": 0.17930343747138977, | |
| "learning_rate": 7.875241449090327e-06, | |
| "loss": 0.0637, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.612624158923422, | |
| "grad_norm": 0.20653793215751648, | |
| "learning_rate": 7.831538915737204e-06, | |
| "loss": 0.0648, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.6139057994232617, | |
| "grad_norm": 0.18635816872119904, | |
| "learning_rate": 7.787879798673869e-06, | |
| "loss": 0.0652, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.6151874399231015, | |
| "grad_norm": 0.19335713982582092, | |
| "learning_rate": 7.744264972030319e-06, | |
| "loss": 0.0616, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6164690804229414, | |
| "grad_norm": 0.18472839891910553, | |
| "learning_rate": 7.700695309049768e-06, | |
| "loss": 0.0644, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.6177507209227812, | |
| "grad_norm": 0.1953488439321518, | |
| "learning_rate": 7.657171682071198e-06, | |
| "loss": 0.0649, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.619032361422621, | |
| "grad_norm": 0.19706253707408905, | |
| "learning_rate": 7.613694962511863e-06, | |
| "loss": 0.0636, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.6203140019224608, | |
| "grad_norm": 0.19095227122306824, | |
| "learning_rate": 7.57026602084984e-06, | |
| "loss": 0.0608, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.6215956424223006, | |
| "grad_norm": 0.18028537929058075, | |
| "learning_rate": 7.52688572660663e-06, | |
| "loss": 0.0618, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.6228772829221403, | |
| "grad_norm": 0.19275839626789093, | |
| "learning_rate": 7.483554948329716e-06, | |
| "loss": 0.0629, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.6241589234219801, | |
| "grad_norm": 0.18970589339733124, | |
| "learning_rate": 7.440274553575194e-06, | |
| "loss": 0.0636, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.6254405639218199, | |
| "grad_norm": 0.18009796738624573, | |
| "learning_rate": 7.397045408890388e-06, | |
| "loss": 0.0649, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.6267222044216597, | |
| "grad_norm": 0.1802019476890564, | |
| "learning_rate": 7.353868379796518e-06, | |
| "loss": 0.0642, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.6280038449214995, | |
| "grad_norm": 0.1904575377702713, | |
| "learning_rate": 7.3107443307713555e-06, | |
| "loss": 0.064, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6292854854213393, | |
| "grad_norm": 0.17662177979946136, | |
| "learning_rate": 7.267674125231919e-06, | |
| "loss": 0.0643, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.6305671259211791, | |
| "grad_norm": 0.20192337036132812, | |
| "learning_rate": 7.224658625517191e-06, | |
| "loss": 0.0658, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.631848766421019, | |
| "grad_norm": 0.2027018964290619, | |
| "learning_rate": 7.181698692870854e-06, | |
| "loss": 0.0633, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.6331304069208586, | |
| "grad_norm": 0.18302646279335022, | |
| "learning_rate": 7.138795187424035e-06, | |
| "loss": 0.0649, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.6344120474206985, | |
| "grad_norm": 0.19563300907611847, | |
| "learning_rate": 7.0959489681780945e-06, | |
| "loss": 0.0658, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.6356936879205383, | |
| "grad_norm": 0.19320793449878693, | |
| "learning_rate": 7.053160892987434e-06, | |
| "loss": 0.0618, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.6369753284203781, | |
| "grad_norm": 0.18894124031066895, | |
| "learning_rate": 7.010431818542298e-06, | |
| "loss": 0.0629, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.6382569689202179, | |
| "grad_norm": 0.18019933998584747, | |
| "learning_rate": 6.967762600351646e-06, | |
| "loss": 0.065, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.6395386094200577, | |
| "grad_norm": 0.19297577440738678, | |
| "learning_rate": 6.9251540927260115e-06, | |
| "loss": 0.0624, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.6408202499198975, | |
| "grad_norm": 0.20897281169891357, | |
| "learning_rate": 6.8826071487603926e-06, | |
| "loss": 0.0642, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6408202499198975, | |
| "eval_f1": 0.8921365599668659, | |
| "eval_loss": 0.06497912108898163, | |
| "eval_precision": 0.900179104477612, | |
| "eval_recall": 0.8842364532019704, | |
| "eval_runtime": 2.9972, | |
| "eval_samples_per_second": 2160.691, | |
| "eval_steps_per_second": 8.675, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6421018904197373, | |
| "grad_norm": 0.16397173702716827, | |
| "learning_rate": 6.840122620317185e-06, | |
| "loss": 0.0637, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.643383530919577, | |
| "grad_norm": 0.1813378483057022, | |
| "learning_rate": 6.797701358009114e-06, | |
| "loss": 0.0653, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.6446651714194168, | |
| "grad_norm": 0.17247265577316284, | |
| "learning_rate": 6.755344211182221e-06, | |
| "loss": 0.0653, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.6459468119192566, | |
| "grad_norm": 0.20150932669639587, | |
| "learning_rate": 6.7130520278988255e-06, | |
| "loss": 0.064, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.6472284524190964, | |
| "grad_norm": 0.17291051149368286, | |
| "learning_rate": 6.670825654920579e-06, | |
| "loss": 0.0623, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.6485100929189362, | |
| "grad_norm": 0.1938193440437317, | |
| "learning_rate": 6.628665937691503e-06, | |
| "loss": 0.0632, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.649791733418776, | |
| "grad_norm": 0.2040683478116989, | |
| "learning_rate": 6.586573720321043e-06, | |
| "loss": 0.0624, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.6510733739186159, | |
| "grad_norm": 0.20392175018787384, | |
| "learning_rate": 6.544549845567195e-06, | |
| "loss": 0.0643, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.6523550144184557, | |
| "grad_norm": 0.16794058680534363, | |
| "learning_rate": 6.502595154819617e-06, | |
| "loss": 0.0637, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.6536366549182954, | |
| "grad_norm": 0.17543956637382507, | |
| "learning_rate": 6.460710488082774e-06, | |
| "loss": 0.0647, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6549182954181352, | |
| "grad_norm": 0.1936439424753189, | |
| "learning_rate": 6.418896683959146e-06, | |
| "loss": 0.0641, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.656199935917975, | |
| "grad_norm": 0.2110176831483841, | |
| "learning_rate": 6.377154579632416e-06, | |
| "loss": 0.0645, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.6574815764178148, | |
| "grad_norm": 0.17867045104503632, | |
| "learning_rate": 6.3354850108507084e-06, | |
| "loss": 0.0636, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.6587632169176546, | |
| "grad_norm": 0.18331512808799744, | |
| "learning_rate": 6.293888811909874e-06, | |
| "loss": 0.0627, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.6600448574174944, | |
| "grad_norm": 0.17740613222122192, | |
| "learning_rate": 6.252366815636768e-06, | |
| "loss": 0.0607, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.6613264979173342, | |
| "grad_norm": 0.18722304701805115, | |
| "learning_rate": 6.2109198533725836e-06, | |
| "loss": 0.0649, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.662608138417174, | |
| "grad_norm": 0.1758435070514679, | |
| "learning_rate": 6.169548754956201e-06, | |
| "loss": 0.0619, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.6638897789170137, | |
| "grad_norm": 0.2046230286359787, | |
| "learning_rate": 6.128254348707579e-06, | |
| "loss": 0.0659, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.6651714194168535, | |
| "grad_norm": 0.19040079414844513, | |
| "learning_rate": 6.087037461411176e-06, | |
| "loss": 0.0613, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.6664530599166933, | |
| "grad_norm": 0.19141903519630432, | |
| "learning_rate": 6.045898918299373e-06, | |
| "loss": 0.062, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6677347004165332, | |
| "grad_norm": 0.19412516057491302, | |
| "learning_rate": 6.004839543035981e-06, | |
| "loss": 0.0632, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.669016340916373, | |
| "grad_norm": 0.19847750663757324, | |
| "learning_rate": 5.9638601576997305e-06, | |
| "loss": 0.0635, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.6702979814162128, | |
| "grad_norm": 0.17892880737781525, | |
| "learning_rate": 5.922961582767812e-06, | |
| "loss": 0.0654, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.6715796219160526, | |
| "grad_norm": 0.2220013439655304, | |
| "learning_rate": 5.882144637099465e-06, | |
| "loss": 0.0654, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.6728612624158924, | |
| "grad_norm": 0.20190030336380005, | |
| "learning_rate": 5.841410137919563e-06, | |
| "loss": 0.0623, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.6741429029157321, | |
| "grad_norm": 0.18294011056423187, | |
| "learning_rate": 5.8007589008022605e-06, | |
| "loss": 0.0634, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.6754245434155719, | |
| "grad_norm": 0.18562212586402893, | |
| "learning_rate": 5.76019173965467e-06, | |
| "loss": 0.0639, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.6767061839154117, | |
| "grad_norm": 0.17406517267227173, | |
| "learning_rate": 5.719709466700558e-06, | |
| "loss": 0.0632, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.6779878244152515, | |
| "grad_norm": 0.19202564656734467, | |
| "learning_rate": 5.679312892464074e-06, | |
| "loss": 0.063, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.6792694649150913, | |
| "grad_norm": 0.18389640748500824, | |
| "learning_rate": 5.639002825753546e-06, | |
| "loss": 0.0643, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6805511054149311, | |
| "grad_norm": 0.18304041028022766, | |
| "learning_rate": 5.598780073645267e-06, | |
| "loss": 0.0633, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.6818327459147709, | |
| "grad_norm": 0.1976161152124405, | |
| "learning_rate": 5.558645441467347e-06, | |
| "loss": 0.0632, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.6831143864146108, | |
| "grad_norm": 0.1801530420780182, | |
| "learning_rate": 5.51859973278358e-06, | |
| "loss": 0.0637, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.6843960269144505, | |
| "grad_norm": 0.20090307295322418, | |
| "learning_rate": 5.478643749377364e-06, | |
| "loss": 0.0661, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.6856776674142903, | |
| "grad_norm": 0.1645522266626358, | |
| "learning_rate": 5.438778291235647e-06, | |
| "loss": 0.0618, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.6869593079141301, | |
| "grad_norm": 0.19986960291862488, | |
| "learning_rate": 5.39900415653289e-06, | |
| "loss": 0.065, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.6882409484139699, | |
| "grad_norm": 0.18243259191513062, | |
| "learning_rate": 5.359322141615124e-06, | |
| "loss": 0.0646, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.6895225889138097, | |
| "grad_norm": 0.17021586000919342, | |
| "learning_rate": 5.319733040983972e-06, | |
| "loss": 0.06, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.6908042294136495, | |
| "grad_norm": 0.18343709409236908, | |
| "learning_rate": 5.280237647280759e-06, | |
| "loss": 0.0637, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.6920858699134893, | |
| "grad_norm": 0.19553858041763306, | |
| "learning_rate": 5.24083675127064e-06, | |
| "loss": 0.0623, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.6933675104133291, | |
| "grad_norm": 0.18324916064739227, | |
| "learning_rate": 5.201531141826759e-06, | |
| "loss": 0.0651, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.6946491509131688, | |
| "grad_norm": 0.16518688201904297, | |
| "learning_rate": 5.162321605914461e-06, | |
| "loss": 0.0662, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.6959307914130086, | |
| "grad_norm": 0.19472633302211761, | |
| "learning_rate": 5.123208928575538e-06, | |
| "loss": 0.0647, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.6972124319128484, | |
| "grad_norm": 0.17345207929611206, | |
| "learning_rate": 5.084193892912506e-06, | |
| "loss": 0.0634, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.6984940724126882, | |
| "grad_norm": 0.19299833476543427, | |
| "learning_rate": 5.0452772800729375e-06, | |
| "loss": 0.0632, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.699775712912528, | |
| "grad_norm": 0.17639560997486115, | |
| "learning_rate": 5.006459869233795e-06, | |
| "loss": 0.0625, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.7010573534123679, | |
| "grad_norm": 0.1841004192829132, | |
| "learning_rate": 4.967742437585859e-06, | |
| "loss": 0.0654, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.7023389939122077, | |
| "grad_norm": 0.17446312308311462, | |
| "learning_rate": 4.929125760318159e-06, | |
| "loss": 0.0632, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.7036206344120475, | |
| "grad_norm": 0.190720796585083, | |
| "learning_rate": 4.890610610602437e-06, | |
| "loss": 0.0659, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.7049022749118872, | |
| "grad_norm": 0.19830749928951263, | |
| "learning_rate": 4.852197759577688e-06, | |
| "loss": 0.0621, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7049022749118872, | |
| "eval_f1": 0.8970788647271435, | |
| "eval_loss": 0.06400919705629349, | |
| "eval_precision": 0.9064782660759191, | |
| "eval_recall": 0.8878723903354445, | |
| "eval_runtime": 2.989, | |
| "eval_samples_per_second": 2166.596, | |
| "eval_steps_per_second": 8.699, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.706183915411727, | |
| "grad_norm": 0.20006248354911804, | |
| "learning_rate": 4.813887976334722e-06, | |
| "loss": 0.0631, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.7074655559115668, | |
| "grad_norm": 0.19379836320877075, | |
| "learning_rate": 4.775682027900739e-06, | |
| "loss": 0.0642, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.7087471964114066, | |
| "grad_norm": 0.18635933101177216, | |
| "learning_rate": 4.737580679223996e-06, | |
| "loss": 0.0632, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.7100288369112464, | |
| "grad_norm": 0.18606948852539062, | |
| "learning_rate": 4.699584693158494e-06, | |
| "loss": 0.0643, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.7113104774110862, | |
| "grad_norm": 0.18975263833999634, | |
| "learning_rate": 4.661694830448673e-06, | |
| "loss": 0.0635, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.712592117910926, | |
| "grad_norm": 0.18707899749279022, | |
| "learning_rate": 4.623911849714226e-06, | |
| "loss": 0.0622, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.7138737584107658, | |
| "grad_norm": 0.1804761439561844, | |
| "learning_rate": 4.586236507434876e-06, | |
| "loss": 0.0623, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.7151553989106055, | |
| "grad_norm": 0.19166141748428345, | |
| "learning_rate": 4.548669557935233e-06, | |
| "loss": 0.0616, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.7164370394104453, | |
| "grad_norm": 0.19050787389278412, | |
| "learning_rate": 4.511211753369712e-06, | |
| "loss": 0.0656, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.7177186799102852, | |
| "grad_norm": 0.18800762295722961, | |
| "learning_rate": 4.473863843707454e-06, | |
| "loss": 0.0629, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.719000320410125, | |
| "grad_norm": 0.17600564658641815, | |
| "learning_rate": 4.436626576717313e-06, | |
| "loss": 0.0643, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.7202819609099648, | |
| "grad_norm": 0.18252992630004883, | |
| "learning_rate": 4.399500697952898e-06, | |
| "loss": 0.0649, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.7215636014098046, | |
| "grad_norm": 0.21690472960472107, | |
| "learning_rate": 4.362486950737626e-06, | |
| "loss": 0.0652, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.7228452419096444, | |
| "grad_norm": 0.19096529483795166, | |
| "learning_rate": 4.325586076149858e-06, | |
| "loss": 0.0637, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.7241268824094841, | |
| "grad_norm": 0.17736491560935974, | |
| "learning_rate": 4.288798813008039e-06, | |
| "loss": 0.0634, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.7254085229093239, | |
| "grad_norm": 0.19415591657161713, | |
| "learning_rate": 4.2521258978559324e-06, | |
| "loss": 0.0629, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.7266901634091637, | |
| "grad_norm": 0.2018127739429474, | |
| "learning_rate": 4.215568064947854e-06, | |
| "loss": 0.0625, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.7279718039090035, | |
| "grad_norm": 0.17937569320201874, | |
| "learning_rate": 4.179126046233977e-06, | |
| "loss": 0.0633, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.7292534444088433, | |
| "grad_norm": 0.21321403980255127, | |
| "learning_rate": 4.142800571345678e-06, | |
| "loss": 0.064, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.7305350849086831, | |
| "grad_norm": 0.16154085099697113, | |
| "learning_rate": 4.106592367580931e-06, | |
| "loss": 0.0628, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.7318167254085229, | |
| "grad_norm": 0.20596788823604584, | |
| "learning_rate": 4.070502159889731e-06, | |
| "loss": 0.0655, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.7330983659083627, | |
| "grad_norm": 0.18368017673492432, | |
| "learning_rate": 4.034530670859598e-06, | |
| "loss": 0.0639, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.7343800064082024, | |
| "grad_norm": 0.184966579079628, | |
| "learning_rate": 3.998678620701102e-06, | |
| "loss": 0.0619, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.7356616469080423, | |
| "grad_norm": 0.19185282289981842, | |
| "learning_rate": 3.9629467272334495e-06, | |
| "loss": 0.0658, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.7369432874078821, | |
| "grad_norm": 0.19974376261234283, | |
| "learning_rate": 3.927335705870089e-06, | |
| "loss": 0.0642, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.7382249279077219, | |
| "grad_norm": 0.17768967151641846, | |
| "learning_rate": 3.8918462696044145e-06, | |
| "loss": 0.0638, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.7395065684075617, | |
| "grad_norm": 0.19170130789279938, | |
| "learning_rate": 3.8564791289954805e-06, | |
| "loss": 0.0607, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.7407882089074015, | |
| "grad_norm": 0.21157631278038025, | |
| "learning_rate": 3.8212349921537626e-06, | |
| "loss": 0.0647, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.7420698494072413, | |
| "grad_norm": 0.16236081719398499, | |
| "learning_rate": 3.7861145647269994e-06, | |
| "loss": 0.0619, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.7433514899070811, | |
| "grad_norm": 0.18598569929599762, | |
| "learning_rate": 3.751118549886065e-06, | |
| "loss": 0.0637, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.7446331304069208, | |
| "grad_norm": 0.19498658180236816, | |
| "learning_rate": 3.7162476483108612e-06, | |
| "loss": 0.0618, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.7459147709067606, | |
| "grad_norm": 0.19161143898963928, | |
| "learning_rate": 3.681502558176321e-06, | |
| "loss": 0.0624, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.7471964114066004, | |
| "grad_norm": 0.21474111080169678, | |
| "learning_rate": 3.646883975138421e-06, | |
| "loss": 0.0652, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.7484780519064402, | |
| "grad_norm": 0.18814519047737122, | |
| "learning_rate": 3.612392592320233e-06, | |
| "loss": 0.0629, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.74975969240628, | |
| "grad_norm": 0.18717658519744873, | |
| "learning_rate": 3.578029100298087e-06, | |
| "loss": 0.063, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.7510413329061199, | |
| "grad_norm": 0.1913532316684723, | |
| "learning_rate": 3.5437941870877078e-06, | |
| "loss": 0.0623, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.7523229734059597, | |
| "grad_norm": 0.19016607105731964, | |
| "learning_rate": 3.509688538130448e-06, | |
| "loss": 0.0643, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.7536046139057995, | |
| "grad_norm": 0.19770283997058868, | |
| "learning_rate": 3.475712836279579e-06, | |
| "loss": 0.062, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.7548862544056392, | |
| "grad_norm": 0.19611884653568268, | |
| "learning_rate": 3.441867761786607e-06, | |
| "loss": 0.0617, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.756167894905479, | |
| "grad_norm": 0.176730215549469, | |
| "learning_rate": 3.408153992287655e-06, | |
| "loss": 0.0619, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.7574495354053188, | |
| "grad_norm": 0.20056287944316864, | |
| "learning_rate": 3.374572202789895e-06, | |
| "loss": 0.0632, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.7587311759051586, | |
| "grad_norm": 0.20373013615608215, | |
| "learning_rate": 3.3411230656580372e-06, | |
| "loss": 0.0639, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.7600128164049984, | |
| "grad_norm": 0.17553798854351044, | |
| "learning_rate": 3.307807250600864e-06, | |
| "loss": 0.0614, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.7612944569048382, | |
| "grad_norm": 0.18124283850193024, | |
| "learning_rate": 3.2746254246578167e-06, | |
| "loss": 0.0654, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.762576097404678, | |
| "grad_norm": 0.17987513542175293, | |
| "learning_rate": 3.2415782521856543e-06, | |
| "loss": 0.064, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.7638577379045178, | |
| "grad_norm": 0.1896025538444519, | |
| "learning_rate": 3.208666394845139e-06, | |
| "loss": 0.0647, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.7651393784043575, | |
| "grad_norm": 0.16292789578437805, | |
| "learning_rate": 3.1758905115877968e-06, | |
| "loss": 0.062, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.7664210189041973, | |
| "grad_norm": 0.1821797639131546, | |
| "learning_rate": 3.1432512586427155e-06, | |
| "loss": 0.0627, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.7677026594040371, | |
| "grad_norm": 0.18443669378757477, | |
| "learning_rate": 3.1107492895034197e-06, | |
| "loss": 0.0628, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.768984299903877, | |
| "grad_norm": 0.1898009479045868, | |
| "learning_rate": 3.078385254914764e-06, | |
| "loss": 0.0639, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.768984299903877, | |
| "eval_f1": 0.8969218068168416, | |
| "eval_loss": 0.0637771412730217, | |
| "eval_precision": 0.9019212523719166, | |
| "eval_recall": 0.8919774806474314, | |
| "eval_runtime": 3.0047, | |
| "eval_samples_per_second": 2155.325, | |
| "eval_steps_per_second": 8.653, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7702659404037168, | |
| "grad_norm": 0.17967712879180908, | |
| "learning_rate": 3.0461598028599305e-06, | |
| "loss": 0.0617, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.7715475809035566, | |
| "grad_norm": 0.196019247174263, | |
| "learning_rate": 3.014073578547437e-06, | |
| "loss": 0.0645, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.7728292214033964, | |
| "grad_norm": 0.1881665289402008, | |
| "learning_rate": 2.982127224398228e-06, | |
| "loss": 0.0633, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.7741108619032362, | |
| "grad_norm": 0.19193509221076965, | |
| "learning_rate": 2.9503213800328035e-06, | |
| "loss": 0.0638, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.7753925024030759, | |
| "grad_norm": 0.21863171458244324, | |
| "learning_rate": 2.918656682258423e-06, | |
| "loss": 0.0623, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.7766741429029157, | |
| "grad_norm": 0.18367791175842285, | |
| "learning_rate": 2.8871337650563504e-06, | |
| "loss": 0.0646, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.7779557834027555, | |
| "grad_norm": 0.1860390156507492, | |
| "learning_rate": 2.855753259569153e-06, | |
| "loss": 0.0651, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.7792374239025953, | |
| "grad_norm": 0.18775425851345062, | |
| "learning_rate": 2.8245157940880784e-06, | |
| "loss": 0.0619, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.7805190644024351, | |
| "grad_norm": 0.19896088540554047, | |
| "learning_rate": 2.793421994040478e-06, | |
| "loss": 0.0641, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.7818007049022749, | |
| "grad_norm": 0.16440510749816895, | |
| "learning_rate": 2.7624724819772586e-06, | |
| "loss": 0.0634, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.7830823454021147, | |
| "grad_norm": 0.17060019075870514, | |
| "learning_rate": 2.7316678775604464e-06, | |
| "loss": 0.065, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.7843639859019546, | |
| "grad_norm": 0.18785084784030914, | |
| "learning_rate": 2.7010087975507714e-06, | |
| "loss": 0.0613, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.7856456264017943, | |
| "grad_norm": 0.18072864413261414, | |
| "learning_rate": 2.6704958557953063e-06, | |
| "loss": 0.0609, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.7869272669016341, | |
| "grad_norm": 0.18689529597759247, | |
| "learning_rate": 2.6401296632151918e-06, | |
| "loss": 0.0629, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.7882089074014739, | |
| "grad_norm": 0.18816889822483063, | |
| "learning_rate": 2.6099108277934105e-06, | |
| "loss": 0.06, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.7894905479013137, | |
| "grad_norm": 0.1944245547056198, | |
| "learning_rate": 2.579839954562585e-06, | |
| "loss": 0.0642, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.7907721884011535, | |
| "grad_norm": 0.1741659939289093, | |
| "learning_rate": 2.5499176455928933e-06, | |
| "loss": 0.0632, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.7920538289009933, | |
| "grad_norm": 0.18417255580425262, | |
| "learning_rate": 2.520144499980002e-06, | |
| "loss": 0.0641, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.7933354694008331, | |
| "grad_norm": 0.20959390699863434, | |
| "learning_rate": 2.490521113833071e-06, | |
| "loss": 0.0653, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.7946171099006729, | |
| "grad_norm": 0.17131665349006653, | |
| "learning_rate": 2.4610480802628235e-06, | |
| "loss": 0.0615, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.7958987504005126, | |
| "grad_norm": 0.19155119359493256, | |
| "learning_rate": 2.4317259893696643e-06, | |
| "loss": 0.0627, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.7971803909003524, | |
| "grad_norm": 0.20838290452957153, | |
| "learning_rate": 2.402555428231872e-06, | |
| "loss": 0.0653, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.7984620314001922, | |
| "grad_norm": 0.18234503269195557, | |
| "learning_rate": 2.3735369808938338e-06, | |
| "loss": 0.0649, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.799743671900032, | |
| "grad_norm": 0.16235202550888062, | |
| "learning_rate": 2.344671228354366e-06, | |
| "loss": 0.0624, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.8010253123998718, | |
| "grad_norm": 0.17800317704677582, | |
| "learning_rate": 2.3159587485550728e-06, | |
| "loss": 0.0621, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.8023069528997117, | |
| "grad_norm": 0.18719688057899475, | |
| "learning_rate": 2.2874001163687764e-06, | |
| "loss": 0.063, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.8035885933995515, | |
| "grad_norm": 0.2191469967365265, | |
| "learning_rate": 2.258995903588008e-06, | |
| "loss": 0.0637, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.8048702338993913, | |
| "grad_norm": 0.1862994134426117, | |
| "learning_rate": 2.23074667891356e-06, | |
| "loss": 0.0651, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.806151874399231, | |
| "grad_norm": 0.17251913249492645, | |
| "learning_rate": 2.202653007943093e-06, | |
| "loss": 0.064, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.8074335148990708, | |
| "grad_norm": 0.19216704368591309, | |
| "learning_rate": 2.1747154531598226e-06, | |
| "loss": 0.0612, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.8087151553989106, | |
| "grad_norm": 0.2258075773715973, | |
| "learning_rate": 2.146934573921249e-06, | |
| "loss": 0.0634, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.8099967958987504, | |
| "grad_norm": 0.19085222482681274, | |
| "learning_rate": 2.119310926447965e-06, | |
| "loss": 0.0626, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.8112784363985902, | |
| "grad_norm": 0.16878673434257507, | |
| "learning_rate": 2.0918450638125097e-06, | |
| "loss": 0.0619, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.81256007689843, | |
| "grad_norm": 0.1871701180934906, | |
| "learning_rate": 2.0645375359283047e-06, | |
| "loss": 0.0627, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.8138417173982698, | |
| "grad_norm": 0.18515709042549133, | |
| "learning_rate": 2.037388889538642e-06, | |
| "loss": 0.0646, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.8151233578981096, | |
| "grad_norm": 0.18814954161643982, | |
| "learning_rate": 2.0103996682057235e-06, | |
| "loss": 0.0614, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.8164049983979493, | |
| "grad_norm": 0.18813377618789673, | |
| "learning_rate": 1.983570412299799e-06, | |
| "loss": 0.0626, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.8176866388977891, | |
| "grad_norm": 0.176827535033226, | |
| "learning_rate": 1.956901658988345e-06, | |
| "loss": 0.0617, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.818968279397629, | |
| "grad_norm": 0.18096593022346497, | |
| "learning_rate": 1.930393942225283e-06, | |
| "loss": 0.0635, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.8202499198974688, | |
| "grad_norm": 0.14722853899002075, | |
| "learning_rate": 1.9040477927403223e-06, | |
| "loss": 0.0633, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.8215315603973086, | |
| "grad_norm": 0.20519372820854187, | |
| "learning_rate": 1.8778637380283193e-06, | |
| "loss": 0.0647, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.8228132008971484, | |
| "grad_norm": 0.1677417904138565, | |
| "learning_rate": 1.8518423023387066e-06, | |
| "loss": 0.0617, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.8240948413969882, | |
| "grad_norm": 0.17646180093288422, | |
| "learning_rate": 1.8259840066650136e-06, | |
| "loss": 0.0634, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.8253764818968279, | |
| "grad_norm": 0.1692490577697754, | |
| "learning_rate": 1.800289368734436e-06, | |
| "loss": 0.0658, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.8266581223966677, | |
| "grad_norm": 0.20320384204387665, | |
| "learning_rate": 1.7747589029974454e-06, | |
| "loss": 0.0655, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.8279397628965075, | |
| "grad_norm": 0.17576946318149567, | |
| "learning_rate": 1.7493931206175185e-06, | |
| "loss": 0.0631, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.8292214033963473, | |
| "grad_norm": 0.1861860305070877, | |
| "learning_rate": 1.7241925294608853e-06, | |
| "loss": 0.0604, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.8305030438961871, | |
| "grad_norm": 0.18089807033538818, | |
| "learning_rate": 1.6991576340863669e-06, | |
| "loss": 0.0609, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.8317846843960269, | |
| "grad_norm": 0.19535063207149506, | |
| "learning_rate": 1.674288935735273e-06, | |
| "loss": 0.0628, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.8330663248958667, | |
| "grad_norm": 0.19545066356658936, | |
| "learning_rate": 1.6495869323213654e-06, | |
| "loss": 0.0663, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8330663248958667, | |
| "eval_f1": 0.9011988424968995, | |
| "eval_loss": 0.06293080747127533, | |
| "eval_precision": 0.9075770191507078, | |
| "eval_recall": 0.8949096880131363, | |
| "eval_runtime": 2.9883, | |
| "eval_samples_per_second": 2167.101, | |
| "eval_steps_per_second": 8.701, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8343479653957065, | |
| "grad_norm": 0.18452438712120056, | |
| "learning_rate": 1.625052118420889e-06, | |
| "loss": 0.0621, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.8356296058955462, | |
| "grad_norm": 0.18466158211231232, | |
| "learning_rate": 1.600684985262665e-06, | |
| "loss": 0.0614, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.836911246395386, | |
| "grad_norm": 0.20921823382377625, | |
| "learning_rate": 1.5764860207182642e-06, | |
| "loss": 0.0635, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.8381928868952259, | |
| "grad_norm": 0.2036418616771698, | |
| "learning_rate": 1.5524557092922377e-06, | |
| "loss": 0.062, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.8394745273950657, | |
| "grad_norm": 0.19418755173683167, | |
| "learning_rate": 1.5285945321124073e-06, | |
| "loss": 0.0633, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.8407561678949055, | |
| "grad_norm": 0.21860581636428833, | |
| "learning_rate": 1.504902966920243e-06, | |
| "loss": 0.0654, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.8420378083947453, | |
| "grad_norm": 0.18533924221992493, | |
| "learning_rate": 1.4813814880612942e-06, | |
| "loss": 0.0629, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.8433194488945851, | |
| "grad_norm": 0.2107323408126831, | |
| "learning_rate": 1.4580305664756856e-06, | |
| "loss": 0.0642, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.8446010893944249, | |
| "grad_norm": 0.17865046858787537, | |
| "learning_rate": 1.4348506696887e-06, | |
| "loss": 0.0639, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.8458827298942646, | |
| "grad_norm": 0.18889763951301575, | |
| "learning_rate": 1.4118422618014093e-06, | |
| "loss": 0.0626, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.8471643703941044, | |
| "grad_norm": 0.1740342676639557, | |
| "learning_rate": 1.389005803481389e-06, | |
| "loss": 0.0624, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.8484460108939442, | |
| "grad_norm": 0.1994953602552414, | |
| "learning_rate": 1.366341751953487e-06, | |
| "loss": 0.0623, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.849727651393784, | |
| "grad_norm": 0.18933704495429993, | |
| "learning_rate": 1.3438505609906738e-06, | |
| "loss": 0.0611, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.8510092918936238, | |
| "grad_norm": 0.19953961670398712, | |
| "learning_rate": 1.321532680904959e-06, | |
| "loss": 0.0643, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.8522909323934637, | |
| "grad_norm": 0.1963312029838562, | |
| "learning_rate": 1.299388558538368e-06, | |
| "loss": 0.062, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.8535725728933035, | |
| "grad_norm": 0.18360593914985657, | |
| "learning_rate": 1.277418637254002e-06, | |
| "loss": 0.0627, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.8548542133931433, | |
| "grad_norm": 0.1813330501317978, | |
| "learning_rate": 1.255623356927168e-06, | |
| "loss": 0.0629, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.856135853892983, | |
| "grad_norm": 0.17133601009845734, | |
| "learning_rate": 1.2340031539365483e-06, | |
| "loss": 0.0631, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.8574174943928228, | |
| "grad_norm": 0.17428767681121826, | |
| "learning_rate": 1.2125584611554886e-06, | |
| "loss": 0.0634, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.8586991348926626, | |
| "grad_norm": 0.18506939709186554, | |
| "learning_rate": 1.1912897079433183e-06, | |
| "loss": 0.0636, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.8599807753925024, | |
| "grad_norm": 0.1925283968448639, | |
| "learning_rate": 1.1701973201367544e-06, | |
| "loss": 0.0625, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.8612624158923422, | |
| "grad_norm": 0.20190182328224182, | |
| "learning_rate": 1.1492817200413785e-06, | |
| "loss": 0.0651, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.862544056392182, | |
| "grad_norm": 0.18169893324375153, | |
| "learning_rate": 1.128543326423187e-06, | |
| "loss": 0.0619, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.8638256968920218, | |
| "grad_norm": 0.212158665060997, | |
| "learning_rate": 1.1079825545001887e-06, | |
| "loss": 0.0639, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.8651073373918616, | |
| "grad_norm": 0.18513430655002594, | |
| "learning_rate": 1.0875998159341128e-06, | |
| "loss": 0.0646, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.8663889778917013, | |
| "grad_norm": 0.18722467124462128, | |
| "learning_rate": 1.0673955188221518e-06, | |
| "loss": 0.0611, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.8676706183915411, | |
| "grad_norm": 0.17140091955661774, | |
| "learning_rate": 1.0473700676887988e-06, | |
| "loss": 0.0645, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.868952258891381, | |
| "grad_norm": 0.19441086053848267, | |
| "learning_rate": 1.0275238634777441e-06, | |
| "loss": 0.0644, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.8702338993912208, | |
| "grad_norm": 0.22514182329177856, | |
| "learning_rate": 1.0078573035438476e-06, | |
| "loss": 0.0627, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.8715155398910606, | |
| "grad_norm": 0.18342842161655426, | |
| "learning_rate": 9.88370781645188e-07, | |
| "loss": 0.0621, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.8727971803909004, | |
| "grad_norm": 0.17307966947555542, | |
| "learning_rate": 9.690646879351706e-07, | |
| "loss": 0.0625, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.8740788208907402, | |
| "grad_norm": 0.1793297678232193, | |
| "learning_rate": 9.499394089547242e-07, | |
| "loss": 0.0605, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.87536046139058, | |
| "grad_norm": 0.16885530948638916, | |
| "learning_rate": 9.309953276245565e-07, | |
| "loss": 0.0631, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.8766421018904197, | |
| "grad_norm": 0.1713690608739853, | |
| "learning_rate": 9.122328232374899e-07, | |
| "loss": 0.0642, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.8779237423902595, | |
| "grad_norm": 0.17423969507217407, | |
| "learning_rate": 8.936522714508678e-07, | |
| "loss": 0.0619, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.8792053828900993, | |
| "grad_norm": 0.18652468919754028, | |
| "learning_rate": 8.752540442790314e-07, | |
| "loss": 0.0625, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.8804870233899391, | |
| "grad_norm": 0.17617492377758026, | |
| "learning_rate": 8.570385100858692e-07, | |
| "loss": 0.0624, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.8817686638897789, | |
| "grad_norm": 0.18869616091251373, | |
| "learning_rate": 8.390060335774486e-07, | |
| "loss": 0.0638, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.8830503043896187, | |
| "grad_norm": 0.1805189996957779, | |
| "learning_rate": 8.211569757947069e-07, | |
| "loss": 0.0621, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.8843319448894585, | |
| "grad_norm": 0.17392995953559875, | |
| "learning_rate": 8.034916941062287e-07, | |
| "loss": 0.0657, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.8856135853892984, | |
| "grad_norm": 0.20601050555706024, | |
| "learning_rate": 7.860105422010867e-07, | |
| "loss": 0.064, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.886895225889138, | |
| "grad_norm": 0.1817730814218521, | |
| "learning_rate": 7.687138700817598e-07, | |
| "loss": 0.0618, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.8881768663889779, | |
| "grad_norm": 0.1970674693584442, | |
| "learning_rate": 7.516020240571286e-07, | |
| "loss": 0.0615, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.8894585068888177, | |
| "grad_norm": 0.17473480105400085, | |
| "learning_rate": 7.346753467355372e-07, | |
| "loss": 0.0632, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.8907401473886575, | |
| "grad_norm": 0.18870329856872559, | |
| "learning_rate": 7.179341770179404e-07, | |
| "loss": 0.0629, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.8920217878884973, | |
| "grad_norm": 0.16362690925598145, | |
| "learning_rate": 7.013788500911112e-07, | |
| "loss": 0.0633, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.8933034283883371, | |
| "grad_norm": 0.20100702345371246, | |
| "learning_rate": 6.850096974209353e-07, | |
| "loss": 0.0622, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.8945850688881769, | |
| "grad_norm": 0.170489102602005, | |
| "learning_rate": 6.688270467457703e-07, | |
| "loss": 0.0616, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.8958667093880167, | |
| "grad_norm": 0.20799265801906586, | |
| "learning_rate": 6.528312220698885e-07, | |
| "loss": 0.0646, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.8971483498878564, | |
| "grad_norm": 0.18187017738819122, | |
| "learning_rate": 6.370225436569799e-07, | |
| "loss": 0.0638, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8971483498878564, | |
| "eval_f1": 0.9003476928516707, | |
| "eval_loss": 0.06308671087026596, | |
| "eval_precision": 0.9047731848868885, | |
| "eval_recall": 0.89596528266479, | |
| "eval_runtime": 2.9924, | |
| "eval_samples_per_second": 2164.151, | |
| "eval_steps_per_second": 8.689, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8984299903876962, | |
| "grad_norm": 0.19084925949573517, | |
| "learning_rate": 6.214013280237552e-07, | |
| "loss": 0.0641, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.899711630887536, | |
| "grad_norm": 0.18255262076854706, | |
| "learning_rate": 6.059678879336006e-07, | |
| "loss": 0.0634, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.9009932713873758, | |
| "grad_norm": 0.2704687714576721, | |
| "learning_rate": 5.907225323903076e-07, | |
| "loss": 0.0604, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.9022749118872156, | |
| "grad_norm": 0.1986435055732727, | |
| "learning_rate": 5.756655666319011e-07, | |
| "loss": 0.0641, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.9035565523870555, | |
| "grad_norm": 0.19538725912570953, | |
| "learning_rate": 5.607972921245197e-07, | |
| "loss": 0.0617, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.9048381928868953, | |
| "grad_norm": 0.2058635950088501, | |
| "learning_rate": 5.461180065563787e-07, | |
| "loss": 0.0643, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.9061198333867351, | |
| "grad_norm": 0.1940140575170517, | |
| "learning_rate": 5.316280038318178e-07, | |
| "loss": 0.0659, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.9074014738865748, | |
| "grad_norm": 0.19371388852596283, | |
| "learning_rate": 5.173275740654049e-07, | |
| "loss": 0.0634, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.9086831143864146, | |
| "grad_norm": 0.19026151299476624, | |
| "learning_rate": 5.032170035761397e-07, | |
| "loss": 0.0638, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.9099647548862544, | |
| "grad_norm": 0.17401890456676483, | |
| "learning_rate": 4.892965748817102e-07, | |
| "loss": 0.0601, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.9112463953860942, | |
| "grad_norm": 0.19419656693935394, | |
| "learning_rate": 4.7556656669284553e-07, | |
| "loss": 0.0603, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.912528035885934, | |
| "grad_norm": 0.17826074361801147, | |
| "learning_rate": 4.6202725390772863e-07, | |
| "loss": 0.0618, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.9138096763857738, | |
| "grad_norm": 0.20400400459766388, | |
| "learning_rate": 4.486789076064968e-07, | |
| "loss": 0.0633, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.9150913168856136, | |
| "grad_norm": 0.1725650578737259, | |
| "learning_rate": 4.355217950458124e-07, | |
| "loss": 0.062, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.9163729573854534, | |
| "grad_norm": 0.19925615191459656, | |
| "learning_rate": 4.225561796535127e-07, | |
| "loss": 0.0623, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.9176545978852931, | |
| "grad_norm": 0.19225375354290009, | |
| "learning_rate": 4.0978232102333095e-07, | |
| "loss": 0.0647, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.9189362383851329, | |
| "grad_norm": 0.167951762676239, | |
| "learning_rate": 3.9720047490970916e-07, | |
| "loss": 0.0614, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.9202178788849728, | |
| "grad_norm": 0.17831383645534515, | |
| "learning_rate": 3.8481089322266683e-07, | |
| "loss": 0.0624, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.9214995193848126, | |
| "grad_norm": 0.18907809257507324, | |
| "learning_rate": 3.726138240227628e-07, | |
| "loss": 0.0659, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.9227811598846524, | |
| "grad_norm": 0.16613955795764923, | |
| "learning_rate": 3.606095115161279e-07, | |
| "loss": 0.0602, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.9240628003844922, | |
| "grad_norm": 0.19376474618911743, | |
| "learning_rate": 3.4879819604957564e-07, | |
| "loss": 0.0637, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.925344440884332, | |
| "grad_norm": 0.18962319195270538, | |
| "learning_rate": 3.3718011410578954e-07, | |
| "loss": 0.0639, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.9266260813841717, | |
| "grad_norm": 0.19449980556964874, | |
| "learning_rate": 3.2575549829858557e-07, | |
| "loss": 0.0633, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.9279077218840115, | |
| "grad_norm": 0.17876175045967102, | |
| "learning_rate": 3.1452457736826146e-07, | |
| "loss": 0.0621, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.9291893623838513, | |
| "grad_norm": 0.21437576413154602, | |
| "learning_rate": 3.0348757617701064e-07, | |
| "loss": 0.0654, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.9304710028836911, | |
| "grad_norm": 0.17090333998203278, | |
| "learning_rate": 2.926447157044243e-07, | |
| "loss": 0.0612, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.9317526433835309, | |
| "grad_norm": 0.17806924879550934, | |
| "learning_rate": 2.8199621304306425e-07, | |
| "loss": 0.0641, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.9330342838833707, | |
| "grad_norm": 0.1961851716041565, | |
| "learning_rate": 2.715422813941193e-07, | |
| "loss": 0.064, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.9343159243832105, | |
| "grad_norm": 0.1685817539691925, | |
| "learning_rate": 2.612831300631291e-07, | |
| "loss": 0.063, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.9355975648830503, | |
| "grad_norm": 0.18308007717132568, | |
| "learning_rate": 2.5121896445580383e-07, | |
| "loss": 0.0615, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.93687920538289, | |
| "grad_norm": 0.19477280974388123, | |
| "learning_rate": 2.413499860739088e-07, | |
| "loss": 0.0623, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.9381608458827299, | |
| "grad_norm": 0.21266412734985352, | |
| "learning_rate": 2.316763925112242e-07, | |
| "loss": 0.0637, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.9394424863825697, | |
| "grad_norm": 0.1711306869983673, | |
| "learning_rate": 2.2219837744959284e-07, | |
| "loss": 0.062, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.9407241268824095, | |
| "grad_norm": 0.1893559992313385, | |
| "learning_rate": 2.1291613065504313e-07, | |
| "loss": 0.0619, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.9420057673822493, | |
| "grad_norm": 0.1796388030052185, | |
| "learning_rate": 2.0382983797399224e-07, | |
| "loss": 0.0629, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.9432874078820891, | |
| "grad_norm": 0.19146645069122314, | |
| "learning_rate": 1.9493968132951456e-07, | |
| "loss": 0.0645, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.9445690483819289, | |
| "grad_norm": 0.16710014641284943, | |
| "learning_rate": 1.8624583871771352e-07, | |
| "loss": 0.063, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.9458506888817687, | |
| "grad_norm": 0.29610446095466614, | |
| "learning_rate": 1.777484842041488e-07, | |
| "loss": 0.0612, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.9471323293816084, | |
| "grad_norm": 0.16617882251739502, | |
| "learning_rate": 1.6944778792034822e-07, | |
| "loss": 0.0638, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.9484139698814482, | |
| "grad_norm": 0.20489302277565002, | |
| "learning_rate": 1.6134391606041354e-07, | |
| "loss": 0.0638, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.949695610381288, | |
| "grad_norm": 0.19994662702083588, | |
| "learning_rate": 1.5343703087768225e-07, | |
| "loss": 0.0642, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.9509772508811278, | |
| "grad_norm": 0.1873672902584076, | |
| "learning_rate": 1.4572729068148106e-07, | |
| "loss": 0.0625, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.9522588913809676, | |
| "grad_norm": 0.18355967104434967, | |
| "learning_rate": 1.3821484983396194e-07, | |
| "loss": 0.0623, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.9535405318808075, | |
| "grad_norm": 0.1817985624074936, | |
| "learning_rate": 1.308998587470056e-07, | |
| "loss": 0.0641, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.9548221723806473, | |
| "grad_norm": 0.19310711324214935, | |
| "learning_rate": 1.2378246387920845e-07, | |
| "loss": 0.0619, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.9561038128804871, | |
| "grad_norm": 0.18324962258338928, | |
| "learning_rate": 1.1686280773295699e-07, | |
| "loss": 0.0643, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.9573854533803268, | |
| "grad_norm": 0.20702552795410156, | |
| "learning_rate": 1.1014102885157252e-07, | |
| "loss": 0.0663, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.9586670938801666, | |
| "grad_norm": 0.1761350929737091, | |
| "learning_rate": 1.0361726181653209e-07, | |
| "loss": 0.0637, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.9599487343800064, | |
| "grad_norm": 0.17296813428401947, | |
| "learning_rate": 9.729163724478074e-08, | |
| "loss": 0.0619, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.9612303748798462, | |
| "grad_norm": 0.20203419029712677, | |
| "learning_rate": 9.116428178611248e-08, | |
| "loss": 0.0641, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9612303748798462, | |
| "eval_f1": 0.9000766373872546, | |
| "eval_loss": 0.06302953511476517, | |
| "eval_precision": 0.9048239895697523, | |
| "eval_recall": 0.8953788411916491, | |
| "eval_runtime": 2.9885, | |
| "eval_samples_per_second": 2167.001, | |
| "eval_steps_per_second": 8.7, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.962512015379686, | |
| "grad_norm": 0.19427528977394104, | |
| "learning_rate": 8.523531812063446e-08, | |
| "loss": 0.0604, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.9637936558795258, | |
| "grad_norm": 0.19381150603294373, | |
| "learning_rate": 7.950486495631349e-08, | |
| "loss": 0.0646, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.9650752963793656, | |
| "grad_norm": 0.18169087171554565, | |
| "learning_rate": 7.397303702659675e-08, | |
| "loss": 0.063, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.9663569368792054, | |
| "grad_norm": 0.18334563076496124, | |
| "learning_rate": 6.863994508811478e-08, | |
| "loss": 0.0627, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.9676385773790451, | |
| "grad_norm": 0.19370083510875702, | |
| "learning_rate": 6.350569591846434e-08, | |
| "loss": 0.0633, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.9689202178788849, | |
| "grad_norm": 0.18291835486888885, | |
| "learning_rate": 5.8570392314074576e-08, | |
| "loss": 0.0639, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.9702018583787247, | |
| "grad_norm": 0.18133868277072906, | |
| "learning_rate": 5.3834133088139784e-08, | |
| "loss": 0.0644, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.9714834988785646, | |
| "grad_norm": 0.1771089881658554, | |
| "learning_rate": 4.929701306864876e-08, | |
| "loss": 0.0611, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.9727651393784044, | |
| "grad_norm": 0.17924901843070984, | |
| "learning_rate": 4.4959123096482984e-08, | |
| "loss": 0.0636, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.9740467798782442, | |
| "grad_norm": 0.1930427998304367, | |
| "learning_rate": 4.0820550023601416e-08, | |
| "loss": 0.064, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.975328420378084, | |
| "grad_norm": 0.18830351531505585, | |
| "learning_rate": 3.68813767112941e-08, | |
| "loss": 0.0631, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.9766100608779238, | |
| "grad_norm": 0.19777809083461761, | |
| "learning_rate": 3.31416820285313e-08, | |
| "loss": 0.0634, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.9778917013777635, | |
| "grad_norm": 0.17489665746688843, | |
| "learning_rate": 2.960154085037803e-08, | |
| "loss": 0.0635, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.9791733418776033, | |
| "grad_norm": 0.18989871442317963, | |
| "learning_rate": 2.6261024056501995e-08, | |
| "loss": 0.0635, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.9804549823774431, | |
| "grad_norm": 0.19326794147491455, | |
| "learning_rate": 2.3120198529745785e-08, | |
| "loss": 0.0629, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.9817366228772829, | |
| "grad_norm": 0.16711817681789398, | |
| "learning_rate": 2.0179127154793533e-08, | |
| "loss": 0.0626, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.9830182633771227, | |
| "grad_norm": 0.17362891137599945, | |
| "learning_rate": 1.7437868816911894e-08, | |
| "loss": 0.0631, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.9842999038769625, | |
| "grad_norm": 0.17052555084228516, | |
| "learning_rate": 1.4896478400767688e-08, | |
| "loss": 0.0623, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.9855815443768023, | |
| "grad_norm": 0.1941647231578827, | |
| "learning_rate": 1.2555006789334301e-08, | |
| "loss": 0.0631, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.9868631848766422, | |
| "grad_norm": 0.18964990973472595, | |
| "learning_rate": 1.0413500862864745e-08, | |
| "loss": 0.0635, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9881448253764819, | |
| "grad_norm": 0.21469241380691528, | |
| "learning_rate": 8.47200349796129e-09, | |
| "loss": 0.0633, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.9894264658763217, | |
| "grad_norm": 0.174968883395195, | |
| "learning_rate": 6.730553566713926e-09, | |
| "loss": 0.0627, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.9907081063761615, | |
| "grad_norm": 0.17662419378757477, | |
| "learning_rate": 5.189185935919883e-09, | |
| "loss": 0.0615, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.9919897468760013, | |
| "grad_norm": 0.2064337432384491, | |
| "learning_rate": 3.847931466388621e-09, | |
| "loss": 0.0614, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.9932713873758411, | |
| "grad_norm": 0.2198801338672638, | |
| "learning_rate": 2.7068170123234483e-09, | |
| "loss": 0.0633, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.9945530278756809, | |
| "grad_norm": 0.20898166298866272, | |
| "learning_rate": 1.765865420779722e-09, | |
| "loss": 0.0641, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.9958346683755207, | |
| "grad_norm": 0.17097249627113342, | |
| "learning_rate": 1.0250955312152144e-09, | |
| "loss": 0.0628, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.9971163088753605, | |
| "grad_norm": 0.15358439087867737, | |
| "learning_rate": 4.845221751070827e-10, | |
| "loss": 0.0634, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.9983979493752002, | |
| "grad_norm": 0.18262039124965668, | |
| "learning_rate": 1.4415617565433082e-10, | |
| "loss": 0.0618, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.99967958987504, | |
| "grad_norm": 0.21999365091323853, | |
| "learning_rate": 4.004347565755851e-12, | |
| "loss": 0.0642, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.999935917975008, | |
| "step": 7802, | |
| "total_flos": 4.995590848700744e+17, | |
| "train_loss": 0.0661678893047093, | |
| "train_runtime": 4443.5679, | |
| "train_samples_per_second": 449.496, | |
| "train_steps_per_second": 1.756 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7802, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.995590848700744e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |