{ "best_global_step": 1000, "best_metric": 0.6724504812400831, "best_model_checkpoint": "training/fourier-spectral-norm-classifier/checkpoint-1000", "epoch": 1.5353121801432958, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "SWA": "started", "epoch": 0, "step": 0 }, { "epoch": 0.00511770726714432, "grad_norm": 1.7937116622924805, "learning_rate": 8.19672131147541e-09, "loss": 0.8149, "step": 5 }, { "epoch": 0.01023541453428864, "grad_norm": 1.8986879587173462, "learning_rate": 1.844262295081967e-08, "loss": 0.8145, "step": 10 }, { "epoch": 0.015353121801432957, "grad_norm": 1.8692522048950195, "learning_rate": 2.8688524590163933e-08, "loss": 0.8031, "step": 15 }, { "epoch": 0.02047082906857728, "grad_norm": 1.6589646339416504, "learning_rate": 3.8934426229508196e-08, "loss": 0.8208, "step": 20 }, { "epoch": 0.0255885363357216, "grad_norm": 2.377978563308716, "learning_rate": 4.918032786885246e-08, "loss": 0.8054, "step": 25 }, { "epoch": 0.030706243602865915, "grad_norm": 2.000364065170288, "learning_rate": 5.9426229508196716e-08, "loss": 0.8064, "step": 30 }, { "epoch": 0.03582395087001024, "grad_norm": 1.8844542503356934, "learning_rate": 6.967213114754098e-08, "loss": 0.8047, "step": 35 }, { "epoch": 0.04094165813715456, "grad_norm": 2.0933573246002197, "learning_rate": 7.991803278688524e-08, "loss": 0.8156, "step": 40 }, { "epoch": 0.04605936540429888, "grad_norm": 1.8126033544540405, "learning_rate": 9.01639344262295e-08, "loss": 0.8074, "step": 45 }, { "epoch": 0.0511770726714432, "grad_norm": 2.5709195137023926, "learning_rate": 1.0040983606557377e-07, "loss": 0.8124, "step": 50 }, { "epoch": 0.05629477993858751, "grad_norm": 2.1875293254852295, "learning_rate": 1.1065573770491803e-07, "loss": 0.8143, "step": 55 }, { "epoch": 0.06141248720573183, "grad_norm": 2.0810351371765137, "learning_rate": 1.209016393442623e-07, "loss": 0.8149, "step": 60 }, { "epoch": 0.06653019447287616, "grad_norm": 1.7912037372589111, "learning_rate": 1.3114754098360656e-07, "loss": 0.8022, "step": 65 }, { "epoch": 0.07164790174002048, "grad_norm": 1.7301534414291382, "learning_rate": 1.413934426229508e-07, "loss": 0.8149, "step": 70 }, { "epoch": 0.0767656090071648, "grad_norm": 1.9520158767700195, "learning_rate": 1.5163934426229508e-07, "loss": 0.8201, "step": 75 }, { "epoch": 0.08188331627430911, "grad_norm": 2.11938214302063, "learning_rate": 1.6188524590163935e-07, "loss": 0.8079, "step": 80 }, { "epoch": 0.08700102354145343, "grad_norm": 2.1483607292175293, "learning_rate": 1.7213114754098358e-07, "loss": 0.8084, "step": 85 }, { "epoch": 0.09211873080859775, "grad_norm": 2.1716372966766357, "learning_rate": 1.8237704918032787e-07, "loss": 0.8188, "step": 90 }, { "epoch": 0.09723643807574207, "grad_norm": 2.3327996730804443, "learning_rate": 1.926229508196721e-07, "loss": 0.8153, "step": 95 }, { "epoch": 0.1023541453428864, "grad_norm": 1.762168526649475, "learning_rate": 2.028688524590164e-07, "loss": 0.8064, "step": 100 }, { "epoch": 0.10747185261003071, "grad_norm": 1.7200757265090942, "learning_rate": 2.1311475409836064e-07, "loss": 0.8063, "step": 105 }, { "epoch": 0.11258955987717502, "grad_norm": 2.490513324737549, "learning_rate": 2.233606557377049e-07, "loss": 0.8192, "step": 110 }, { "epoch": 0.11770726714431934, "grad_norm": 2.244020938873291, "learning_rate": 2.336065573770492e-07, "loss": 0.8153, "step": 115 }, { "epoch": 0.12282497441146366, "grad_norm": 2.1315150260925293, "learning_rate": 2.438524590163934e-07, "loss": 0.807, "step": 120 }, { "epoch": 0.12794268167860798, "grad_norm": 2.320936918258667, "learning_rate": 2.540983606557377e-07, "loss": 0.8163, "step": 125 }, { "epoch": 0.1330603889457523, "grad_norm": 2.7143912315368652, "learning_rate": 2.643442622950819e-07, "loss": 0.8166, "step": 130 }, { "epoch": 0.13817809621289662, "grad_norm": 1.649880290031433, "learning_rate": 2.7459016393442624e-07, "loss": 0.8113, "step": 135 }, { "epoch": 0.14329580348004095, "grad_norm": 2.171790361404419, "learning_rate": 2.848360655737705e-07, "loss": 0.805, "step": 140 }, { "epoch": 0.14841351074718526, "grad_norm": 2.093440294265747, "learning_rate": 2.950819672131147e-07, "loss": 0.8118, "step": 145 }, { "epoch": 0.1535312180143296, "grad_norm": 1.9067059755325317, "learning_rate": 3.05327868852459e-07, "loss": 0.8047, "step": 150 }, { "epoch": 0.1586489252814739, "grad_norm": 1.9988980293273926, "learning_rate": 3.155737704918033e-07, "loss": 0.8091, "step": 155 }, { "epoch": 0.16376663254861823, "grad_norm": 1.696977972984314, "learning_rate": 3.258196721311475e-07, "loss": 0.8101, "step": 160 }, { "epoch": 0.16888433981576254, "grad_norm": 2.098017454147339, "learning_rate": 3.3606557377049177e-07, "loss": 0.81, "step": 165 }, { "epoch": 0.17400204708290687, "grad_norm": 2.0255584716796875, "learning_rate": 3.463114754098361e-07, "loss": 0.814, "step": 170 }, { "epoch": 0.17911975435005117, "grad_norm": 1.8376339673995972, "learning_rate": 3.565573770491803e-07, "loss": 0.8053, "step": 175 }, { "epoch": 0.1842374616171955, "grad_norm": 1.9230207204818726, "learning_rate": 3.6680327868852456e-07, "loss": 0.8022, "step": 180 }, { "epoch": 0.18935516888433981, "grad_norm": 1.939705729484558, "learning_rate": 3.770491803278688e-07, "loss": 0.8075, "step": 185 }, { "epoch": 0.19447287615148415, "grad_norm": 1.6276813745498657, "learning_rate": 3.8729508196721314e-07, "loss": 0.8097, "step": 190 }, { "epoch": 0.19959058341862845, "grad_norm": 1.7544569969177246, "learning_rate": 3.9754098360655735e-07, "loss": 0.8046, "step": 195 }, { "epoch": 0.2047082906857728, "grad_norm": 1.7406467199325562, "learning_rate": 4.077868852459016e-07, "loss": 0.8149, "step": 200 }, { "epoch": 0.2098259979529171, "grad_norm": 1.7330560684204102, "learning_rate": 4.180327868852459e-07, "loss": 0.8077, "step": 205 }, { "epoch": 0.21494370522006143, "grad_norm": 1.417546033859253, "learning_rate": 4.2827868852459014e-07, "loss": 0.807, "step": 210 }, { "epoch": 0.22006141248720573, "grad_norm": 2.1064000129699707, "learning_rate": 4.385245901639344e-07, "loss": 0.8041, "step": 215 }, { "epoch": 0.22517911975435004, "grad_norm": 1.637609601020813, "learning_rate": 4.487704918032787e-07, "loss": 0.7992, "step": 220 }, { "epoch": 0.23029682702149437, "grad_norm": 1.659397840499878, "learning_rate": 4.590163934426229e-07, "loss": 0.802, "step": 225 }, { "epoch": 0.23541453428863868, "grad_norm": 1.6912051439285278, "learning_rate": 4.692622950819672e-07, "loss": 0.8005, "step": 230 }, { "epoch": 0.240532241555783, "grad_norm": 1.9433246850967407, "learning_rate": 4.795081967213115e-07, "loss": 0.8079, "step": 235 }, { "epoch": 0.24564994882292732, "grad_norm": 1.9640270471572876, "learning_rate": 4.897540983606557e-07, "loss": 0.8127, "step": 240 }, { "epoch": 0.2507676560900716, "grad_norm": 2.3167271614074707, "learning_rate": 5e-07, "loss": 0.8058, "step": 245 }, { "epoch": 0.25588536335721596, "grad_norm": 1.6469106674194336, "learning_rate": 5.102459016393442e-07, "loss": 0.8011, "step": 250 }, { "epoch": 0.2610030706243603, "grad_norm": 1.5691314935684204, "learning_rate": 5.204918032786885e-07, "loss": 0.7968, "step": 255 }, { "epoch": 0.2661207778915046, "grad_norm": 1.663665533065796, "learning_rate": 5.307377049180327e-07, "loss": 0.8018, "step": 260 }, { "epoch": 0.2712384851586489, "grad_norm": 1.99347984790802, "learning_rate": 5.40983606557377e-07, "loss": 0.8006, "step": 265 }, { "epoch": 0.27635619242579323, "grad_norm": 1.4906947612762451, "learning_rate": 5.512295081967213e-07, "loss": 0.7977, "step": 270 }, { "epoch": 0.28147389969293757, "grad_norm": 1.786527395248413, "learning_rate": 5.614754098360656e-07, "loss": 0.8041, "step": 275 }, { "epoch": 0.2865916069600819, "grad_norm": 1.9175364971160889, "learning_rate": 5.717213114754098e-07, "loss": 0.8079, "step": 280 }, { "epoch": 0.2917093142272262, "grad_norm": 1.678741216659546, "learning_rate": 5.819672131147541e-07, "loss": 0.7974, "step": 285 }, { "epoch": 0.2968270214943705, "grad_norm": 2.0347344875335693, "learning_rate": 5.922131147540983e-07, "loss": 0.8011, "step": 290 }, { "epoch": 0.30194472876151485, "grad_norm": 1.8914201259613037, "learning_rate": 6.024590163934425e-07, "loss": 0.8026, "step": 295 }, { "epoch": 0.3070624360286592, "grad_norm": 1.6236293315887451, "learning_rate": 6.127049180327869e-07, "loss": 0.7981, "step": 300 }, { "epoch": 0.31218014329580346, "grad_norm": 1.4731358289718628, "learning_rate": 6.229508196721311e-07, "loss": 0.7972, "step": 305 }, { "epoch": 0.3172978505629478, "grad_norm": 1.7494508028030396, "learning_rate": 6.331967213114754e-07, "loss": 0.797, "step": 310 }, { "epoch": 0.3224155578300921, "grad_norm": 1.696869134902954, "learning_rate": 6.434426229508197e-07, "loss": 0.7972, "step": 315 }, { "epoch": 0.32753326509723646, "grad_norm": 1.5431866645812988, "learning_rate": 6.536885245901639e-07, "loss": 0.7919, "step": 320 }, { "epoch": 0.33265097236438074, "grad_norm": 1.6396448612213135, "learning_rate": 6.639344262295081e-07, "loss": 0.7986, "step": 325 }, { "epoch": 0.33776867963152507, "grad_norm": 1.7315205335617065, "learning_rate": 6.741803278688525e-07, "loss": 0.7966, "step": 330 }, { "epoch": 0.3428863868986694, "grad_norm": 1.6142867803573608, "learning_rate": 6.844262295081967e-07, "loss": 0.7964, "step": 335 }, { "epoch": 0.34800409416581374, "grad_norm": 1.332783818244934, "learning_rate": 6.94672131147541e-07, "loss": 0.7969, "step": 340 }, { "epoch": 0.353121801432958, "grad_norm": 1.434688687324524, "learning_rate": 7.049180327868852e-07, "loss": 0.8015, "step": 345 }, { "epoch": 0.35823950870010235, "grad_norm": 1.7243021726608276, "learning_rate": 7.151639344262295e-07, "loss": 0.791, "step": 350 }, { "epoch": 0.3633572159672467, "grad_norm": 1.603244662284851, "learning_rate": 7.254098360655737e-07, "loss": 0.7926, "step": 355 }, { "epoch": 0.368474923234391, "grad_norm": 1.645308256149292, "learning_rate": 7.356557377049179e-07, "loss": 0.7988, "step": 360 }, { "epoch": 0.3735926305015353, "grad_norm": 1.3321951627731323, "learning_rate": 7.459016393442623e-07, "loss": 0.7923, "step": 365 }, { "epoch": 0.37871033776867963, "grad_norm": 2.1083521842956543, "learning_rate": 7.561475409836066e-07, "loss": 0.7935, "step": 370 }, { "epoch": 0.38382804503582396, "grad_norm": 1.3414019346237183, "learning_rate": 7.663934426229508e-07, "loss": 0.7894, "step": 375 }, { "epoch": 0.3889457523029683, "grad_norm": 1.8279671669006348, "learning_rate": 7.766393442622951e-07, "loss": 0.7916, "step": 380 }, { "epoch": 0.3940634595701126, "grad_norm": 1.6233114004135132, "learning_rate": 7.868852459016393e-07, "loss": 0.7886, "step": 385 }, { "epoch": 0.3991811668372569, "grad_norm": 1.4336532354354858, "learning_rate": 7.971311475409835e-07, "loss": 0.7884, "step": 390 }, { "epoch": 0.40429887410440124, "grad_norm": 1.597020149230957, "learning_rate": 8.073770491803278e-07, "loss": 0.7904, "step": 395 }, { "epoch": 0.4094165813715456, "grad_norm": 1.3191157579421997, "learning_rate": 8.176229508196721e-07, "loss": 0.787, "step": 400 }, { "epoch": 0.41453428863868985, "grad_norm": 1.6425617933273315, "learning_rate": 8.278688524590164e-07, "loss": 0.7887, "step": 405 }, { "epoch": 0.4196519959058342, "grad_norm": 1.3924281597137451, "learning_rate": 8.381147540983607e-07, "loss": 0.7976, "step": 410 }, { "epoch": 0.4247697031729785, "grad_norm": 1.2975757122039795, "learning_rate": 8.483606557377049e-07, "loss": 0.7895, "step": 415 }, { "epoch": 0.42988741044012285, "grad_norm": 1.3045737743377686, "learning_rate": 8.586065573770491e-07, "loss": 0.7894, "step": 420 }, { "epoch": 0.43500511770726713, "grad_norm": 1.9618183374404907, "learning_rate": 8.688524590163933e-07, "loss": 0.7865, "step": 425 }, { "epoch": 0.44012282497441146, "grad_norm": 1.3976588249206543, "learning_rate": 8.790983606557376e-07, "loss": 0.7896, "step": 430 }, { "epoch": 0.4452405322415558, "grad_norm": 1.1260899305343628, "learning_rate": 8.89344262295082e-07, "loss": 0.7861, "step": 435 }, { "epoch": 0.4503582395087001, "grad_norm": 1.293816089630127, "learning_rate": 8.995901639344262e-07, "loss": 0.7826, "step": 440 }, { "epoch": 0.4554759467758444, "grad_norm": 1.4861347675323486, "learning_rate": 9.098360655737705e-07, "loss": 0.7822, "step": 445 }, { "epoch": 0.46059365404298874, "grad_norm": 1.378319501876831, "learning_rate": 9.200819672131147e-07, "loss": 0.778, "step": 450 }, { "epoch": 0.4657113613101331, "grad_norm": 1.2947815656661987, "learning_rate": 9.303278688524589e-07, "loss": 0.7853, "step": 455 }, { "epoch": 0.47082906857727735, "grad_norm": 0.9865773916244507, "learning_rate": 9.405737704918032e-07, "loss": 0.7797, "step": 460 }, { "epoch": 0.4759467758444217, "grad_norm": 1.4883133172988892, "learning_rate": 9.508196721311474e-07, "loss": 0.7804, "step": 465 }, { "epoch": 0.481064483111566, "grad_norm": 1.1394942998886108, "learning_rate": 9.610655737704918e-07, "loss": 0.7818, "step": 470 }, { "epoch": 0.48618219037871035, "grad_norm": 1.104995846748352, "learning_rate": 9.71311475409836e-07, "loss": 0.7775, "step": 475 }, { "epoch": 0.49129989764585463, "grad_norm": 1.258623719215393, "learning_rate": 9.815573770491803e-07, "loss": 0.7731, "step": 480 }, { "epoch": 0.49641760491299897, "grad_norm": 1.4409220218658447, "learning_rate": 9.918032786885245e-07, "loss": 0.7811, "step": 485 }, { "epoch": 0.5015353121801432, "grad_norm": 0.9952474236488342, "learning_rate": 9.999994895105863e-07, "loss": 0.7821, "step": 490 }, { "epoch": 0.5066530194472876, "grad_norm": 1.2250083684921265, "learning_rate": 9.99981622490561e-07, "loss": 0.7822, "step": 495 }, { "epoch": 0.5117707267144319, "grad_norm": 1.1539254188537598, "learning_rate": 9.999382320422427e-07, "loss": 0.776, "step": 500 }, { "epoch": 0.5117707267144319, "eval_accuracy": 0.59523, "eval_loss": 0.6936843991279602, "eval_macro_f1": 0.5690192634397302, "eval_precision": 0.6518208624514151, "eval_recall": 0.6078906162164894, "eval_runtime": 73.7478, "eval_samples_per_second": 1355.972, "eval_steps_per_second": 1.329, "step": 500 }, { "epoch": 0.5168884339815762, "grad_norm": 1.2244267463684082, "learning_rate": 9.998693203806588e-07, "loss": 0.7771, "step": 505 }, { "epoch": 0.5220061412487206, "grad_norm": 1.1900156736373901, "learning_rate": 9.997748910236623e-07, "loss": 0.7815, "step": 510 }, { "epoch": 0.5271238485158649, "grad_norm": 1.2272601127624512, "learning_rate": 9.996549487917522e-07, "loss": 0.7829, "step": 515 }, { "epoch": 0.5322415557830092, "grad_norm": 1.160675287246704, "learning_rate": 9.995094998078276e-07, "loss": 0.7785, "step": 520 }, { "epoch": 0.5373592630501536, "grad_norm": 1.2759345769882202, "learning_rate": 9.993385514968745e-07, "loss": 0.7755, "step": 525 }, { "epoch": 0.5424769703172978, "grad_norm": 1.0531632900238037, "learning_rate": 9.99142112585588e-07, "loss": 0.7781, "step": 530 }, { "epoch": 0.5475946775844421, "grad_norm": 1.0040606260299683, "learning_rate": 9.989201931019251e-07, "loss": 0.7744, "step": 535 }, { "epoch": 0.5527123848515865, "grad_norm": 1.2468197345733643, "learning_rate": 9.98672804374595e-07, "loss": 0.7712, "step": 540 }, { "epoch": 0.5578300921187308, "grad_norm": 1.1564112901687622, "learning_rate": 9.983999590324778e-07, "loss": 0.7797, "step": 545 }, { "epoch": 0.5629477993858751, "grad_norm": 0.8854450583457947, "learning_rate": 9.981016710039832e-07, "loss": 0.7723, "step": 550 }, { "epoch": 0.5680655066530195, "grad_norm": 1.142919659614563, "learning_rate": 9.977779555163369e-07, "loss": 0.7739, "step": 555 }, { "epoch": 0.5731832139201638, "grad_norm": 1.058153748512268, "learning_rate": 9.974288290948042e-07, "loss": 0.774, "step": 560 }, { "epoch": 0.5783009211873081, "grad_norm": 1.1157392263412476, "learning_rate": 9.970543095618468e-07, "loss": 0.7742, "step": 565 }, { "epoch": 0.5834186284544524, "grad_norm": 1.0850578546524048, "learning_rate": 9.96654416036212e-07, "loss": 0.7734, "step": 570 }, { "epoch": 0.5885363357215967, "grad_norm": 0.9722121953964233, "learning_rate": 9.96229168931958e-07, "loss": 0.77, "step": 575 }, { "epoch": 0.593654042988741, "grad_norm": 1.332795262336731, "learning_rate": 9.957785899574102e-07, "loss": 0.7725, "step": 580 }, { "epoch": 0.5987717502558854, "grad_norm": 0.8639675378799438, "learning_rate": 9.953027021140543e-07, "loss": 0.7646, "step": 585 }, { "epoch": 0.6038894575230297, "grad_norm": 0.9253244400024414, "learning_rate": 9.948015296953623e-07, "loss": 0.7743, "step": 590 }, { "epoch": 0.609007164790174, "grad_norm": 0.8843643069267273, "learning_rate": 9.942750982855503e-07, "loss": 0.7717, "step": 595 }, { "epoch": 0.6141248720573184, "grad_norm": 1.046048879623413, "learning_rate": 9.937234347582753e-07, "loss": 0.7721, "step": 600 }, { "epoch": 0.6192425793244627, "grad_norm": 0.8906111717224121, "learning_rate": 9.931465672752613e-07, "loss": 0.7657, "step": 605 }, { "epoch": 0.6243602865916069, "grad_norm": 0.9637787342071533, "learning_rate": 9.925445252848621e-07, "loss": 0.7666, "step": 610 }, { "epoch": 0.6294779938587513, "grad_norm": 0.9004104733467102, "learning_rate": 9.919173395205584e-07, "loss": 0.7664, "step": 615 }, { "epoch": 0.6345957011258956, "grad_norm": 1.4724570512771606, "learning_rate": 9.912650419993893e-07, "loss": 0.7679, "step": 620 }, { "epoch": 0.6397134083930399, "grad_norm": 0.8644343614578247, "learning_rate": 9.905876660203161e-07, "loss": 0.7671, "step": 625 }, { "epoch": 0.6448311156601843, "grad_norm": 0.8368955254554749, "learning_rate": 9.898852461625245e-07, "loss": 0.7717, "step": 630 }, { "epoch": 0.6499488229273286, "grad_norm": 0.9413282871246338, "learning_rate": 9.891578182836583e-07, "loss": 0.7693, "step": 635 }, { "epoch": 0.6550665301944729, "grad_norm": 0.9777762293815613, "learning_rate": 9.884054195179886e-07, "loss": 0.7656, "step": 640 }, { "epoch": 0.6601842374616171, "grad_norm": 0.8983454704284668, "learning_rate": 9.876280882745193e-07, "loss": 0.7605, "step": 645 }, { "epoch": 0.6653019447287615, "grad_norm": 0.8708799481391907, "learning_rate": 9.868258642350254e-07, "loss": 0.7673, "step": 650 }, { "epoch": 0.6704196519959058, "grad_norm": 0.8354130387306213, "learning_rate": 9.859987883520275e-07, "loss": 0.767, "step": 655 }, { "epoch": 0.6755373592630501, "grad_norm": 0.868485152721405, "learning_rate": 9.851469028467015e-07, "loss": 0.7647, "step": 660 }, { "epoch": 0.6806550665301945, "grad_norm": 0.9445936679840088, "learning_rate": 9.84270251206723e-07, "loss": 0.7605, "step": 665 }, { "epoch": 0.6857727737973388, "grad_norm": 0.7952156662940979, "learning_rate": 9.833688781840475e-07, "loss": 0.7664, "step": 670 }, { "epoch": 0.6908904810644831, "grad_norm": 1.1992422342300415, "learning_rate": 9.824428297926254e-07, "loss": 0.7617, "step": 675 }, { "epoch": 0.6960081883316275, "grad_norm": 0.8914986252784729, "learning_rate": 9.81492153306054e-07, "loss": 0.764, "step": 680 }, { "epoch": 0.7011258955987717, "grad_norm": 0.7945632338523865, "learning_rate": 9.80516897255163e-07, "loss": 0.7617, "step": 685 }, { "epoch": 0.706243602865916, "grad_norm": 0.7822641134262085, "learning_rate": 9.795171114255384e-07, "loss": 0.7613, "step": 690 }, { "epoch": 0.7113613101330604, "grad_norm": 0.7989721298217773, "learning_rate": 9.784928468549793e-07, "loss": 0.7615, "step": 695 }, { "epoch": 0.7164790174002047, "grad_norm": 0.7325178980827332, "learning_rate": 9.77444155830895e-07, "loss": 0.7572, "step": 700 }, { "epoch": 0.721596724667349, "grad_norm": 0.8934036493301392, "learning_rate": 9.763710918876329e-07, "loss": 0.7589, "step": 705 }, { "epoch": 0.7267144319344934, "grad_norm": 0.7769590616226196, "learning_rate": 9.752737098037477e-07, "loss": 0.7573, "step": 710 }, { "epoch": 0.7318321392016377, "grad_norm": 1.0458475351333618, "learning_rate": 9.741520655992047e-07, "loss": 0.759, "step": 715 }, { "epoch": 0.736949846468782, "grad_norm": 0.649872899055481, "learning_rate": 9.730062165325185e-07, "loss": 0.7607, "step": 720 }, { "epoch": 0.7420675537359263, "grad_norm": 0.7517932057380676, "learning_rate": 9.718362210978329e-07, "loss": 0.7567, "step": 725 }, { "epoch": 0.7471852610030706, "grad_norm": 0.9947759509086609, "learning_rate": 9.706421390219315e-07, "loss": 0.7593, "step": 730 }, { "epoch": 0.7523029682702149, "grad_norm": 0.719109833240509, "learning_rate": 9.694240312611917e-07, "loss": 0.7615, "step": 735 }, { "epoch": 0.7574206755373593, "grad_norm": 1.0175235271453857, "learning_rate": 9.681819599984712e-07, "loss": 0.7555, "step": 740 }, { "epoch": 0.7625383828045036, "grad_norm": 0.8200032711029053, "learning_rate": 9.66915988639934e-07, "loss": 0.7565, "step": 745 }, { "epoch": 0.7676560900716479, "grad_norm": 0.926680326461792, "learning_rate": 9.656261818118139e-07, "loss": 0.7628, "step": 750 }, { "epoch": 0.7727737973387923, "grad_norm": 0.6904947757720947, "learning_rate": 9.64312605357115e-07, "loss": 0.7584, "step": 755 }, { "epoch": 0.7778915046059366, "grad_norm": 0.7391018867492676, "learning_rate": 9.62975326332251e-07, "loss": 0.7582, "step": 760 }, { "epoch": 0.7830092118730808, "grad_norm": 0.7193120121955872, "learning_rate": 9.616144130036214e-07, "loss": 0.7557, "step": 765 }, { "epoch": 0.7881269191402251, "grad_norm": 0.8275336623191833, "learning_rate": 9.602299348441277e-07, "loss": 0.7575, "step": 770 }, { "epoch": 0.7932446264073695, "grad_norm": 0.9943181276321411, "learning_rate": 9.58821962529625e-07, "loss": 0.7568, "step": 775 }, { "epoch": 0.7983623336745138, "grad_norm": 0.7646188139915466, "learning_rate": 9.573905679353166e-07, "loss": 0.752, "step": 780 }, { "epoch": 0.8034800409416581, "grad_norm": 0.7356329560279846, "learning_rate": 9.55935824132082e-07, "loss": 0.7552, "step": 785 }, { "epoch": 0.8085977482088025, "grad_norm": 0.795838475227356, "learning_rate": 9.544578053827495e-07, "loss": 0.7543, "step": 790 }, { "epoch": 0.8137154554759468, "grad_norm": 0.9953216314315796, "learning_rate": 9.529565871383034e-07, "loss": 0.7558, "step": 795 }, { "epoch": 0.8188331627430911, "grad_norm": 0.797937273979187, "learning_rate": 9.514322460340329e-07, "loss": 0.7542, "step": 800 }, { "epoch": 0.8239508700102354, "grad_norm": 0.7371375560760498, "learning_rate": 9.498848598856198e-07, "loss": 0.7532, "step": 805 }, { "epoch": 0.8290685772773797, "grad_norm": 0.8336758613586426, "learning_rate": 9.48314507685166e-07, "loss": 0.756, "step": 810 }, { "epoch": 0.834186284544524, "grad_norm": 0.7204869389533997, "learning_rate": 9.467212695971619e-07, "loss": 0.7564, "step": 815 }, { "epoch": 0.8393039918116684, "grad_norm": 0.6758232712745667, "learning_rate": 9.451052269543929e-07, "loss": 0.7548, "step": 820 }, { "epoch": 0.8444216990788127, "grad_norm": 0.7348074913024902, "learning_rate": 9.434664622537883e-07, "loss": 0.7535, "step": 825 }, { "epoch": 0.849539406345957, "grad_norm": 0.747559130191803, "learning_rate": 9.418050591522093e-07, "loss": 0.752, "step": 830 }, { "epoch": 0.8546571136131014, "grad_norm": 0.7392817735671997, "learning_rate": 9.401211024621792e-07, "loss": 0.7492, "step": 835 }, { "epoch": 0.8597748208802457, "grad_norm": 0.6318978071212769, "learning_rate": 9.384146781475533e-07, "loss": 0.7577, "step": 840 }, { "epoch": 0.8648925281473899, "grad_norm": 0.5832816362380981, "learning_rate": 9.366858733191307e-07, "loss": 0.7506, "step": 845 }, { "epoch": 0.8700102354145343, "grad_norm": 0.6932022571563721, "learning_rate": 9.349347762302071e-07, "loss": 0.7523, "step": 850 }, { "epoch": 0.8751279426816786, "grad_norm": 0.7047157287597656, "learning_rate": 9.331614762720703e-07, "loss": 0.7487, "step": 855 }, { "epoch": 0.8802456499488229, "grad_norm": 0.6591235995292664, "learning_rate": 9.313660639694358e-07, "loss": 0.7538, "step": 860 }, { "epoch": 0.8853633572159673, "grad_norm": 0.66665118932724, "learning_rate": 9.295486309758269e-07, "loss": 0.7518, "step": 865 }, { "epoch": 0.8904810644831116, "grad_norm": 0.6165961027145386, "learning_rate": 9.277092700688951e-07, "loss": 0.7495, "step": 870 }, { "epoch": 0.8955987717502559, "grad_norm": 0.7449588179588318, "learning_rate": 9.258480751456838e-07, "loss": 0.7515, "step": 875 }, { "epoch": 0.9007164790174002, "grad_norm": 0.7553215622901917, "learning_rate": 9.239651412178357e-07, "loss": 0.7534, "step": 880 }, { "epoch": 0.9058341862845445, "grad_norm": 0.747010350227356, "learning_rate": 9.220605644067419e-07, "loss": 0.7548, "step": 885 }, { "epoch": 0.9109518935516888, "grad_norm": 0.7272236347198486, "learning_rate": 9.20134441938635e-07, "loss": 0.7531, "step": 890 }, { "epoch": 0.9160696008188332, "grad_norm": 0.8726323246955872, "learning_rate": 9.181868721396266e-07, "loss": 0.7479, "step": 895 }, { "epoch": 0.9211873080859775, "grad_norm": 0.7914009094238281, "learning_rate": 9.16217954430687e-07, "loss": 0.7522, "step": 900 }, { "epoch": 0.9263050153531218, "grad_norm": 0.6367310285568237, "learning_rate": 9.142277893225708e-07, "loss": 0.7497, "step": 905 }, { "epoch": 0.9314227226202662, "grad_norm": 0.8285405039787292, "learning_rate": 9.122164784106842e-07, "loss": 0.753, "step": 910 }, { "epoch": 0.9365404298874105, "grad_norm": 0.7742036581039429, "learning_rate": 9.101841243699015e-07, "loss": 0.7534, "step": 915 }, { "epoch": 0.9416581371545547, "grad_norm": 0.7512480020523071, "learning_rate": 9.081308309493209e-07, "loss": 0.747, "step": 920 }, { "epoch": 0.946775844421699, "grad_norm": 0.5556691288948059, "learning_rate": 9.060567029669699e-07, "loss": 0.7465, "step": 925 }, { "epoch": 0.9518935516888434, "grad_norm": 1.0232101678848267, "learning_rate": 9.039618463044536e-07, "loss": 0.7485, "step": 930 }, { "epoch": 0.9570112589559877, "grad_norm": 0.8321600556373596, "learning_rate": 9.018463679015505e-07, "loss": 0.7488, "step": 935 }, { "epoch": 0.962128966223132, "grad_norm": 0.7009038329124451, "learning_rate": 8.997103757507521e-07, "loss": 0.7483, "step": 940 }, { "epoch": 0.9672466734902764, "grad_norm": 0.6939564347267151, "learning_rate": 8.975539788917514e-07, "loss": 0.7485, "step": 945 }, { "epoch": 0.9723643807574207, "grad_norm": 0.7738851308822632, "learning_rate": 8.953772874058757e-07, "loss": 0.7479, "step": 950 }, { "epoch": 0.977482088024565, "grad_norm": 0.5913597941398621, "learning_rate": 8.931804124104672e-07, "loss": 0.7473, "step": 955 }, { "epoch": 0.9825997952917093, "grad_norm": 0.8486027717590332, "learning_rate": 8.909634660532106e-07, "loss": 0.7479, "step": 960 }, { "epoch": 0.9877175025588536, "grad_norm": 0.6463382840156555, "learning_rate": 8.887265615064083e-07, "loss": 0.7486, "step": 965 }, { "epoch": 0.9928352098259979, "grad_norm": 0.6264991164207458, "learning_rate": 8.864698129612031e-07, "loss": 0.7467, "step": 970 }, { "epoch": 0.9979529170931423, "grad_norm": 0.7566510438919067, "learning_rate": 8.841933356217488e-07, "loss": 0.7463, "step": 975 }, { "epoch": 1.0030706243602865, "grad_norm": 0.7290503978729248, "learning_rate": 8.818972456993288e-07, "loss": 0.7504, "step": 980 }, { "epoch": 1.008188331627431, "grad_norm": 0.8277891874313354, "learning_rate": 8.795816604064241e-07, "loss": 0.7472, "step": 985 }, { "epoch": 1.0133060388945752, "grad_norm": 0.6427952647209167, "learning_rate": 8.772466979507302e-07, "loss": 0.7487, "step": 990 }, { "epoch": 1.0184237461617196, "grad_norm": 0.6775041818618774, "learning_rate": 8.748924775291216e-07, "loss": 0.745, "step": 995 }, { "epoch": 1.0235414534288638, "grad_norm": 0.6815404891967773, "learning_rate": 8.725191193215675e-07, "loss": 0.7485, "step": 1000 }, { "epoch": 1.0235414534288638, "eval_accuracy": 0.67557, "eval_loss": 0.6936712265014648, "eval_macro_f1": 0.6724504812400831, "eval_precision": 0.6760463081581009, "eval_recall": 0.6725003053739838, "eval_runtime": 73.7408, "eval_samples_per_second": 1356.102, "eval_steps_per_second": 1.329, "step": 1000 }, { "epoch": 1.0286591606960083, "grad_norm": 0.8586804866790771, "learning_rate": 8.701267444849974e-07, "loss": 0.7457, "step": 1005 }, { "epoch": 1.0337768679631525, "grad_norm": 0.5989358425140381, "learning_rate": 8.677154751471152e-07, "loss": 0.7443, "step": 1010 }, { "epoch": 1.0388945752302967, "grad_norm": 0.6888963580131531, "learning_rate": 8.65285434400165e-07, "loss": 0.7458, "step": 1015 }, { "epoch": 1.0440122824974412, "grad_norm": 0.6407850384712219, "learning_rate": 8.628367462946482e-07, "loss": 0.7493, "step": 1020 }, { "epoch": 1.0491299897645854, "grad_norm": 0.6202091574668884, "learning_rate": 8.603695358329896e-07, "loss": 0.7471, "step": 1025 }, { "epoch": 1.0542476970317298, "grad_norm": 0.7456187605857849, "learning_rate": 8.57883928963157e-07, "loss": 0.7431, "step": 1030 }, { "epoch": 1.059365404298874, "grad_norm": 0.6171067357063293, "learning_rate": 8.553800525722317e-07, "loss": 0.7435, "step": 1035 }, { "epoch": 1.0644831115660185, "grad_norm": 0.8527712821960449, "learning_rate": 8.528580344799305e-07, "loss": 0.7453, "step": 1040 }, { "epoch": 1.0696008188331627, "grad_norm": 0.6724162697792053, "learning_rate": 8.503180034320816e-07, "loss": 0.7467, "step": 1045 }, { "epoch": 1.0747185261003072, "grad_norm": 0.581979513168335, "learning_rate": 8.477600890940513e-07, "loss": 0.7508, "step": 1050 }, { "epoch": 1.0798362333674514, "grad_norm": 0.6551439166069031, "learning_rate": 8.451844220441253e-07, "loss": 0.7469, "step": 1055 }, { "epoch": 1.0849539406345956, "grad_norm": 0.6437426209449768, "learning_rate": 8.42591133766843e-07, "loss": 0.7468, "step": 1060 }, { "epoch": 1.09007164790174, "grad_norm": 0.5788704752922058, "learning_rate": 8.39980356646285e-07, "loss": 0.7424, "step": 1065 }, { "epoch": 1.0951893551688843, "grad_norm": 0.5575606226921082, "learning_rate": 8.373522239593149e-07, "loss": 0.7396, "step": 1070 }, { "epoch": 1.1003070624360287, "grad_norm": 0.737180769443512, "learning_rate": 8.347068698687765e-07, "loss": 0.744, "step": 1075 }, { "epoch": 1.105424769703173, "grad_norm": 0.592766284942627, "learning_rate": 8.320444294166439e-07, "loss": 0.7469, "step": 1080 }, { "epoch": 1.1105424769703174, "grad_norm": 0.63823401927948, "learning_rate": 8.293650385171287e-07, "loss": 0.7447, "step": 1085 }, { "epoch": 1.1156601842374616, "grad_norm": 0.6114454865455627, "learning_rate": 8.266688339497412e-07, "loss": 0.7475, "step": 1090 }, { "epoch": 1.120777891504606, "grad_norm": 0.53263258934021, "learning_rate": 8.239559533523082e-07, "loss": 0.7455, "step": 1095 }, { "epoch": 1.1258955987717503, "grad_norm": 0.7016158699989319, "learning_rate": 8.212265352139466e-07, "loss": 0.742, "step": 1100 }, { "epoch": 1.1310133060388945, "grad_norm": 0.6125472784042358, "learning_rate": 8.184807188679939e-07, "loss": 0.7383, "step": 1105 }, { "epoch": 1.136131013306039, "grad_norm": 0.6008788347244263, "learning_rate": 8.157186444848952e-07, "loss": 0.7435, "step": 1110 }, { "epoch": 1.1412487205731832, "grad_norm": 0.6357280015945435, "learning_rate": 8.129404530650479e-07, "loss": 0.7443, "step": 1115 }, { "epoch": 1.1463664278403276, "grad_norm": 0.6422165036201477, "learning_rate": 8.101462864316038e-07, "loss": 0.7449, "step": 1120 }, { "epoch": 1.1514841351074718, "grad_norm": 0.6852079629898071, "learning_rate": 8.07336287223229e-07, "loss": 0.7428, "step": 1125 }, { "epoch": 1.156601842374616, "grad_norm": 0.5539452433586121, "learning_rate": 8.045105988868224e-07, "loss": 0.7455, "step": 1130 }, { "epoch": 1.1617195496417605, "grad_norm": 0.5939313173294067, "learning_rate": 8.016693656701931e-07, "loss": 0.7376, "step": 1135 }, { "epoch": 1.1668372569089047, "grad_norm": 0.7522106766700745, "learning_rate": 7.98812732614697e-07, "loss": 0.7464, "step": 1140 }, { "epoch": 1.1719549641760492, "grad_norm": 0.6572809815406799, "learning_rate": 7.959408455478313e-07, "loss": 0.7448, "step": 1145 }, { "epoch": 1.1770726714431934, "grad_norm": 0.5842403173446655, "learning_rate": 7.93053851075792e-07, "loss": 0.7396, "step": 1150 }, { "epoch": 1.1821903787103378, "grad_norm": 0.5845000147819519, "learning_rate": 7.901518965759888e-07, "loss": 0.7438, "step": 1155 }, { "epoch": 1.187308085977482, "grad_norm": 0.5873178839683533, "learning_rate": 7.872351301895217e-07, "loss": 0.7421, "step": 1160 }, { "epoch": 1.1924257932446265, "grad_norm": 0.6385728120803833, "learning_rate": 7.843037008136189e-07, "loss": 0.7431, "step": 1165 }, { "epoch": 1.1975435005117707, "grad_norm": 0.5818535685539246, "learning_rate": 7.813577580940356e-07, "loss": 0.7416, "step": 1170 }, { "epoch": 1.202661207778915, "grad_norm": 0.5611526370048523, "learning_rate": 7.783974524174149e-07, "loss": 0.743, "step": 1175 }, { "epoch": 1.2077789150460594, "grad_norm": 0.6002296805381775, "learning_rate": 7.754229349036102e-07, "loss": 0.7407, "step": 1180 }, { "epoch": 1.2128966223132036, "grad_norm": 0.6006008982658386, "learning_rate": 7.724343573979718e-07, "loss": 0.7437, "step": 1185 }, { "epoch": 1.218014329580348, "grad_norm": 0.6336845755577087, "learning_rate": 7.694318724635945e-07, "loss": 0.7405, "step": 1190 }, { "epoch": 1.2231320368474923, "grad_norm": 0.6916839480400085, "learning_rate": 7.664156333735293e-07, "loss": 0.7468, "step": 1195 }, { "epoch": 1.2282497441146367, "grad_norm": 0.5944891571998596, "learning_rate": 7.633857941029602e-07, "loss": 0.7485, "step": 1200 }, { "epoch": 1.233367451381781, "grad_norm": 0.5755409598350525, "learning_rate": 7.603425093213429e-07, "loss": 0.7418, "step": 1205 }, { "epoch": 1.2384851586489254, "grad_norm": 0.6128578186035156, "learning_rate": 7.572859343845092e-07, "loss": 0.7396, "step": 1210 }, { "epoch": 1.2436028659160696, "grad_norm": 0.6123960614204407, "learning_rate": 7.542162253267363e-07, "loss": 0.7363, "step": 1215 }, { "epoch": 1.2487205731832138, "grad_norm": 0.6969608664512634, "learning_rate": 7.511335388527822e-07, "loss": 0.7406, "step": 1220 }, { "epoch": 1.2538382804503583, "grad_norm": 0.6491796970367432, "learning_rate": 7.480380323298851e-07, "loss": 0.7429, "step": 1225 }, { "epoch": 1.2589559877175025, "grad_norm": 0.5883914828300476, "learning_rate": 7.449298637797309e-07, "loss": 0.7375, "step": 1230 }, { "epoch": 1.264073694984647, "grad_norm": 0.6160842776298523, "learning_rate": 7.418091918703854e-07, "loss": 0.7393, "step": 1235 }, { "epoch": 1.2691914022517912, "grad_norm": 0.5568389892578125, "learning_rate": 7.386761759081954e-07, "loss": 0.7387, "step": 1240 }, { "epoch": 1.2743091095189354, "grad_norm": 0.532599151134491, "learning_rate": 7.35530975829656e-07, "loss": 0.741, "step": 1245 }, { "epoch": 1.2794268167860798, "grad_norm": 0.5400995016098022, "learning_rate": 7.323737521932457e-07, "loss": 0.7367, "step": 1250 }, { "epoch": 1.2845445240532243, "grad_norm": 0.5307775735855103, "learning_rate": 7.292046661712307e-07, "loss": 0.7399, "step": 1255 }, { "epoch": 1.2896622313203685, "grad_norm": 0.5908007621765137, "learning_rate": 7.260238795414366e-07, "loss": 0.74, "step": 1260 }, { "epoch": 1.2947799385875127, "grad_norm": 0.5410370826721191, "learning_rate": 7.228315546789907e-07, "loss": 0.7388, "step": 1265 }, { "epoch": 1.2998976458546572, "grad_norm": 0.5406989455223083, "learning_rate": 7.19627854548032e-07, "loss": 0.7337, "step": 1270 }, { "epoch": 1.3050153531218014, "grad_norm": 0.589767575263977, "learning_rate": 7.164129426933927e-07, "loss": 0.7426, "step": 1275 }, { "epoch": 1.3101330603889458, "grad_norm": 0.5926154255867004, "learning_rate": 7.131869832322496e-07, "loss": 0.7374, "step": 1280 }, { "epoch": 1.31525076765609, "grad_norm": 0.7507414817810059, "learning_rate": 7.099501408457452e-07, "loss": 0.7375, "step": 1285 }, { "epoch": 1.3203684749232343, "grad_norm": 0.6162967681884766, "learning_rate": 7.06702580770582e-07, "loss": 0.7381, "step": 1290 }, { "epoch": 1.3254861821903787, "grad_norm": 0.5118803977966309, "learning_rate": 7.034444687905868e-07, "loss": 0.7344, "step": 1295 }, { "epoch": 1.330603889457523, "grad_norm": 0.5982370972633362, "learning_rate": 7.001759712282478e-07, "loss": 0.7382, "step": 1300 }, { "epoch": 1.3357215967246674, "grad_norm": 0.6339845657348633, "learning_rate": 6.968972549362238e-07, "loss": 0.7386, "step": 1305 }, { "epoch": 1.3408393039918116, "grad_norm": 0.5755071043968201, "learning_rate": 6.936084872888271e-07, "loss": 0.7349, "step": 1310 }, { "epoch": 1.345957011258956, "grad_norm": 0.6089357137680054, "learning_rate": 6.90309836173479e-07, "loss": 0.7377, "step": 1315 }, { "epoch": 1.3510747185261003, "grad_norm": 0.6137183308601379, "learning_rate": 6.87001469982139e-07, "loss": 0.7417, "step": 1320 }, { "epoch": 1.3561924257932447, "grad_norm": 0.6864479184150696, "learning_rate": 6.836835576027093e-07, "loss": 0.7321, "step": 1325 }, { "epoch": 1.361310133060389, "grad_norm": 0.5657494068145752, "learning_rate": 6.803562684104125e-07, "loss": 0.7411, "step": 1330 }, { "epoch": 1.3664278403275332, "grad_norm": 0.6047109365463257, "learning_rate": 6.770197722591456e-07, "loss": 0.7399, "step": 1335 }, { "epoch": 1.3715455475946776, "grad_norm": 0.5772355198860168, "learning_rate": 6.736742394728097e-07, "loss": 0.7374, "step": 1340 }, { "epoch": 1.3766632548618218, "grad_norm": 0.7158586382865906, "learning_rate": 6.703198408366142e-07, "loss": 0.739, "step": 1345 }, { "epoch": 1.3817809621289663, "grad_norm": 0.5718494057655334, "learning_rate": 6.669567475883592e-07, "loss": 0.7435, "step": 1350 }, { "epoch": 1.3868986693961105, "grad_norm": 0.6494776606559753, "learning_rate": 6.635851314096935e-07, "loss": 0.7358, "step": 1355 }, { "epoch": 1.3920163766632547, "grad_norm": 0.5958154201507568, "learning_rate": 6.602051644173509e-07, "loss": 0.7375, "step": 1360 }, { "epoch": 1.3971340839303992, "grad_norm": 0.5509739518165588, "learning_rate": 6.568170191543634e-07, "loss": 0.7412, "step": 1365 }, { "epoch": 1.4022517911975436, "grad_norm": 0.5368937253952026, "learning_rate": 6.534208685812536e-07, "loss": 0.7393, "step": 1370 }, { "epoch": 1.4073694984646878, "grad_norm": 0.5369133353233337, "learning_rate": 6.500168860672047e-07, "loss": 0.7398, "step": 1375 }, { "epoch": 1.412487205731832, "grad_norm": 0.5789251327514648, "learning_rate": 6.466052453812111e-07, "loss": 0.7371, "step": 1380 }, { "epoch": 1.4176049129989765, "grad_norm": 0.5568552017211914, "learning_rate": 6.431861206832069e-07, "loss": 0.7363, "step": 1385 }, { "epoch": 1.4227226202661207, "grad_norm": 0.5325226783752441, "learning_rate": 6.397596865151752e-07, "loss": 0.7348, "step": 1390 }, { "epoch": 1.4278403275332652, "grad_norm": 0.5849957466125488, "learning_rate": 6.363261177922388e-07, "loss": 0.7363, "step": 1395 }, { "epoch": 1.4329580348004094, "grad_norm": 0.6208518743515015, "learning_rate": 6.328855897937303e-07, "loss": 0.7365, "step": 1400 }, { "epoch": 1.4380757420675536, "grad_norm": 0.5599240064620972, "learning_rate": 6.294382781542445e-07, "loss": 0.7371, "step": 1405 }, { "epoch": 1.443193449334698, "grad_norm": 0.5623425841331482, "learning_rate": 6.25984358854672e-07, "loss": 0.74, "step": 1410 }, { "epoch": 1.4483111566018425, "grad_norm": 0.6866716146469116, "learning_rate": 6.225240082132172e-07, "loss": 0.7383, "step": 1415 }, { "epoch": 1.4534288638689867, "grad_norm": 0.5852178931236267, "learning_rate": 6.190574028763952e-07, "loss": 0.7381, "step": 1420 }, { "epoch": 1.458546571136131, "grad_norm": 0.5319634079933167, "learning_rate": 6.15584719810016e-07, "loss": 0.7349, "step": 1425 }, { "epoch": 1.4636642784032754, "grad_norm": 0.5798255205154419, "learning_rate": 6.121061362901498e-07, "loss": 0.7331, "step": 1430 }, { "epoch": 1.4687819856704196, "grad_norm": 0.4803605079650879, "learning_rate": 6.086218298940778e-07, "loss": 0.7356, "step": 1435 }, { "epoch": 1.473899692937564, "grad_norm": 0.7146285772323608, "learning_rate": 6.051319784912261e-07, "loss": 0.7384, "step": 1440 }, { "epoch": 1.4790174002047083, "grad_norm": 0.47007301449775696, "learning_rate": 6.016367602340868e-07, "loss": 0.7332, "step": 1445 }, { "epoch": 1.4841351074718525, "grad_norm": 0.6568506956100464, "learning_rate": 5.981363535491233e-07, "loss": 0.7378, "step": 1450 }, { "epoch": 1.489252814738997, "grad_norm": 0.5178249478340149, "learning_rate": 5.946309371276614e-07, "loss": 0.7338, "step": 1455 }, { "epoch": 1.4943705220061412, "grad_norm": 0.5785830616950989, "learning_rate": 5.911206899167676e-07, "loss": 0.7392, "step": 1460 }, { "epoch": 1.4994882292732856, "grad_norm": 0.5021066665649414, "learning_rate": 5.87605791110114e-07, "loss": 0.7342, "step": 1465 }, { "epoch": 1.5046059365404298, "grad_norm": 0.5594333410263062, "learning_rate": 5.840864201388312e-07, "loss": 0.7351, "step": 1470 }, { "epoch": 1.509723643807574, "grad_norm": 0.5204704999923706, "learning_rate": 5.805627566623475e-07, "loss": 0.7375, "step": 1475 }, { "epoch": 1.5148413510747185, "grad_norm": 0.6187242865562439, "learning_rate": 5.770349805592185e-07, "loss": 0.7351, "step": 1480 }, { "epoch": 1.519959058341863, "grad_norm": 0.5294100046157837, "learning_rate": 5.735032719179443e-07, "loss": 0.7383, "step": 1485 }, { "epoch": 1.5250767656090072, "grad_norm": 0.5450606942176819, "learning_rate": 5.699678110277762e-07, "loss": 0.7365, "step": 1490 }, { "epoch": 1.5301944728761514, "grad_norm": 0.5091442465782166, "learning_rate": 5.664287783695122e-07, "loss": 0.7343, "step": 1495 }, { "epoch": 1.5353121801432958, "grad_norm": 0.557119607925415, "learning_rate": 5.628863546062856e-07, "loss": 0.7298, "step": 1500 }, { "epoch": 1.5353121801432958, "eval_accuracy": 0.67304, "eval_loss": 0.6938837766647339, "eval_macro_f1": 0.6609359830000188, "eval_precision": 0.685850518502884, "eval_recall": 0.6657447133221994, "eval_runtime": 73.8645, "eval_samples_per_second": 1353.83, "eval_steps_per_second": 1.327, "step": 1500 } ], "logging_steps": 5, "max_steps": 4885, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0201035364007936e+17, "train_batch_size": 512, "trial_name": null, "trial_params": null }