Instructions to use dzungpham/graphcodebert-code-classification with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dzungpham/graphcodebert-code-classification with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("dzungpham/graphcodebert-code-classification", dtype="auto") - Notebooks
- Google Colab
- Kaggle
graphcodebert-code-classification / fourier-spectral-norm-classifier /checkpoint-1500 /trainer_state.json
| { | |
| "best_global_step": 1000, | |
| "best_metric": 0.6724504812400831, | |
| "best_model_checkpoint": "training/fourier-spectral-norm-classifier/checkpoint-1000", | |
| "epoch": 1.5353121801432958, | |
| "eval_steps": 500, | |
| "global_step": 1500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "SWA": "started", | |
| "epoch": 0, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.00511770726714432, | |
| "grad_norm": 1.7937116622924805, | |
| "learning_rate": 8.19672131147541e-09, | |
| "loss": 0.8149, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01023541453428864, | |
| "grad_norm": 1.8986879587173462, | |
| "learning_rate": 1.844262295081967e-08, | |
| "loss": 0.8145, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015353121801432957, | |
| "grad_norm": 1.8692522048950195, | |
| "learning_rate": 2.8688524590163933e-08, | |
| "loss": 0.8031, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02047082906857728, | |
| "grad_norm": 1.6589646339416504, | |
| "learning_rate": 3.8934426229508196e-08, | |
| "loss": 0.8208, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0255885363357216, | |
| "grad_norm": 2.377978563308716, | |
| "learning_rate": 4.918032786885246e-08, | |
| "loss": 0.8054, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.030706243602865915, | |
| "grad_norm": 2.000364065170288, | |
| "learning_rate": 5.9426229508196716e-08, | |
| "loss": 0.8064, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03582395087001024, | |
| "grad_norm": 1.8844542503356934, | |
| "learning_rate": 6.967213114754098e-08, | |
| "loss": 0.8047, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04094165813715456, | |
| "grad_norm": 2.0933573246002197, | |
| "learning_rate": 7.991803278688524e-08, | |
| "loss": 0.8156, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04605936540429888, | |
| "grad_norm": 1.8126033544540405, | |
| "learning_rate": 9.01639344262295e-08, | |
| "loss": 0.8074, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0511770726714432, | |
| "grad_norm": 2.5709195137023926, | |
| "learning_rate": 1.0040983606557377e-07, | |
| "loss": 0.8124, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05629477993858751, | |
| "grad_norm": 2.1875293254852295, | |
| "learning_rate": 1.1065573770491803e-07, | |
| "loss": 0.8143, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06141248720573183, | |
| "grad_norm": 2.0810351371765137, | |
| "learning_rate": 1.209016393442623e-07, | |
| "loss": 0.8149, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06653019447287616, | |
| "grad_norm": 1.7912037372589111, | |
| "learning_rate": 1.3114754098360656e-07, | |
| "loss": 0.8022, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07164790174002048, | |
| "grad_norm": 1.7301534414291382, | |
| "learning_rate": 1.413934426229508e-07, | |
| "loss": 0.8149, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0767656090071648, | |
| "grad_norm": 1.9520158767700195, | |
| "learning_rate": 1.5163934426229508e-07, | |
| "loss": 0.8201, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08188331627430911, | |
| "grad_norm": 2.11938214302063, | |
| "learning_rate": 1.6188524590163935e-07, | |
| "loss": 0.8079, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08700102354145343, | |
| "grad_norm": 2.1483607292175293, | |
| "learning_rate": 1.7213114754098358e-07, | |
| "loss": 0.8084, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09211873080859775, | |
| "grad_norm": 2.1716372966766357, | |
| "learning_rate": 1.8237704918032787e-07, | |
| "loss": 0.8188, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09723643807574207, | |
| "grad_norm": 2.3327996730804443, | |
| "learning_rate": 1.926229508196721e-07, | |
| "loss": 0.8153, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1023541453428864, | |
| "grad_norm": 1.762168526649475, | |
| "learning_rate": 2.028688524590164e-07, | |
| "loss": 0.8064, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10747185261003071, | |
| "grad_norm": 1.7200757265090942, | |
| "learning_rate": 2.1311475409836064e-07, | |
| "loss": 0.8063, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11258955987717502, | |
| "grad_norm": 2.490513324737549, | |
| "learning_rate": 2.233606557377049e-07, | |
| "loss": 0.8192, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11770726714431934, | |
| "grad_norm": 2.244020938873291, | |
| "learning_rate": 2.336065573770492e-07, | |
| "loss": 0.8153, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12282497441146366, | |
| "grad_norm": 2.1315150260925293, | |
| "learning_rate": 2.438524590163934e-07, | |
| "loss": 0.807, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12794268167860798, | |
| "grad_norm": 2.320936918258667, | |
| "learning_rate": 2.540983606557377e-07, | |
| "loss": 0.8163, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1330603889457523, | |
| "grad_norm": 2.7143912315368652, | |
| "learning_rate": 2.643442622950819e-07, | |
| "loss": 0.8166, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13817809621289662, | |
| "grad_norm": 1.649880290031433, | |
| "learning_rate": 2.7459016393442624e-07, | |
| "loss": 0.8113, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14329580348004095, | |
| "grad_norm": 2.171790361404419, | |
| "learning_rate": 2.848360655737705e-07, | |
| "loss": 0.805, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14841351074718526, | |
| "grad_norm": 2.093440294265747, | |
| "learning_rate": 2.950819672131147e-07, | |
| "loss": 0.8118, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1535312180143296, | |
| "grad_norm": 1.9067059755325317, | |
| "learning_rate": 3.05327868852459e-07, | |
| "loss": 0.8047, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1586489252814739, | |
| "grad_norm": 1.9988980293273926, | |
| "learning_rate": 3.155737704918033e-07, | |
| "loss": 0.8091, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.16376663254861823, | |
| "grad_norm": 1.696977972984314, | |
| "learning_rate": 3.258196721311475e-07, | |
| "loss": 0.8101, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.16888433981576254, | |
| "grad_norm": 2.098017454147339, | |
| "learning_rate": 3.3606557377049177e-07, | |
| "loss": 0.81, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.17400204708290687, | |
| "grad_norm": 2.0255584716796875, | |
| "learning_rate": 3.463114754098361e-07, | |
| "loss": 0.814, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.17911975435005117, | |
| "grad_norm": 1.8376339673995972, | |
| "learning_rate": 3.565573770491803e-07, | |
| "loss": 0.8053, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.1842374616171955, | |
| "grad_norm": 1.9230207204818726, | |
| "learning_rate": 3.6680327868852456e-07, | |
| "loss": 0.8022, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18935516888433981, | |
| "grad_norm": 1.939705729484558, | |
| "learning_rate": 3.770491803278688e-07, | |
| "loss": 0.8075, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.19447287615148415, | |
| "grad_norm": 1.6276813745498657, | |
| "learning_rate": 3.8729508196721314e-07, | |
| "loss": 0.8097, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.19959058341862845, | |
| "grad_norm": 1.7544569969177246, | |
| "learning_rate": 3.9754098360655735e-07, | |
| "loss": 0.8046, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2047082906857728, | |
| "grad_norm": 1.7406467199325562, | |
| "learning_rate": 4.077868852459016e-07, | |
| "loss": 0.8149, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2098259979529171, | |
| "grad_norm": 1.7330560684204102, | |
| "learning_rate": 4.180327868852459e-07, | |
| "loss": 0.8077, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.21494370522006143, | |
| "grad_norm": 1.417546033859253, | |
| "learning_rate": 4.2827868852459014e-07, | |
| "loss": 0.807, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22006141248720573, | |
| "grad_norm": 2.1064000129699707, | |
| "learning_rate": 4.385245901639344e-07, | |
| "loss": 0.8041, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.22517911975435004, | |
| "grad_norm": 1.637609601020813, | |
| "learning_rate": 4.487704918032787e-07, | |
| "loss": 0.7992, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23029682702149437, | |
| "grad_norm": 1.659397840499878, | |
| "learning_rate": 4.590163934426229e-07, | |
| "loss": 0.802, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.23541453428863868, | |
| "grad_norm": 1.6912051439285278, | |
| "learning_rate": 4.692622950819672e-07, | |
| "loss": 0.8005, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.240532241555783, | |
| "grad_norm": 1.9433246850967407, | |
| "learning_rate": 4.795081967213115e-07, | |
| "loss": 0.8079, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.24564994882292732, | |
| "grad_norm": 1.9640270471572876, | |
| "learning_rate": 4.897540983606557e-07, | |
| "loss": 0.8127, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2507676560900716, | |
| "grad_norm": 2.3167271614074707, | |
| "learning_rate": 5e-07, | |
| "loss": 0.8058, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.25588536335721596, | |
| "grad_norm": 1.6469106674194336, | |
| "learning_rate": 5.102459016393442e-07, | |
| "loss": 0.8011, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2610030706243603, | |
| "grad_norm": 1.5691314935684204, | |
| "learning_rate": 5.204918032786885e-07, | |
| "loss": 0.7968, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2661207778915046, | |
| "grad_norm": 1.663665533065796, | |
| "learning_rate": 5.307377049180327e-07, | |
| "loss": 0.8018, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2712384851586489, | |
| "grad_norm": 1.99347984790802, | |
| "learning_rate": 5.40983606557377e-07, | |
| "loss": 0.8006, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.27635619242579323, | |
| "grad_norm": 1.4906947612762451, | |
| "learning_rate": 5.512295081967213e-07, | |
| "loss": 0.7977, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.28147389969293757, | |
| "grad_norm": 1.786527395248413, | |
| "learning_rate": 5.614754098360656e-07, | |
| "loss": 0.8041, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2865916069600819, | |
| "grad_norm": 1.9175364971160889, | |
| "learning_rate": 5.717213114754098e-07, | |
| "loss": 0.8079, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2917093142272262, | |
| "grad_norm": 1.678741216659546, | |
| "learning_rate": 5.819672131147541e-07, | |
| "loss": 0.7974, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.2968270214943705, | |
| "grad_norm": 2.0347344875335693, | |
| "learning_rate": 5.922131147540983e-07, | |
| "loss": 0.8011, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.30194472876151485, | |
| "grad_norm": 1.8914201259613037, | |
| "learning_rate": 6.024590163934425e-07, | |
| "loss": 0.8026, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3070624360286592, | |
| "grad_norm": 1.6236293315887451, | |
| "learning_rate": 6.127049180327869e-07, | |
| "loss": 0.7981, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31218014329580346, | |
| "grad_norm": 1.4731358289718628, | |
| "learning_rate": 6.229508196721311e-07, | |
| "loss": 0.7972, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3172978505629478, | |
| "grad_norm": 1.7494508028030396, | |
| "learning_rate": 6.331967213114754e-07, | |
| "loss": 0.797, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3224155578300921, | |
| "grad_norm": 1.696869134902954, | |
| "learning_rate": 6.434426229508197e-07, | |
| "loss": 0.7972, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.32753326509723646, | |
| "grad_norm": 1.5431866645812988, | |
| "learning_rate": 6.536885245901639e-07, | |
| "loss": 0.7919, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.33265097236438074, | |
| "grad_norm": 1.6396448612213135, | |
| "learning_rate": 6.639344262295081e-07, | |
| "loss": 0.7986, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.33776867963152507, | |
| "grad_norm": 1.7315205335617065, | |
| "learning_rate": 6.741803278688525e-07, | |
| "loss": 0.7966, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3428863868986694, | |
| "grad_norm": 1.6142867803573608, | |
| "learning_rate": 6.844262295081967e-07, | |
| "loss": 0.7964, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.34800409416581374, | |
| "grad_norm": 1.332783818244934, | |
| "learning_rate": 6.94672131147541e-07, | |
| "loss": 0.7969, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.353121801432958, | |
| "grad_norm": 1.434688687324524, | |
| "learning_rate": 7.049180327868852e-07, | |
| "loss": 0.8015, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.35823950870010235, | |
| "grad_norm": 1.7243021726608276, | |
| "learning_rate": 7.151639344262295e-07, | |
| "loss": 0.791, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3633572159672467, | |
| "grad_norm": 1.603244662284851, | |
| "learning_rate": 7.254098360655737e-07, | |
| "loss": 0.7926, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.368474923234391, | |
| "grad_norm": 1.645308256149292, | |
| "learning_rate": 7.356557377049179e-07, | |
| "loss": 0.7988, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3735926305015353, | |
| "grad_norm": 1.3321951627731323, | |
| "learning_rate": 7.459016393442623e-07, | |
| "loss": 0.7923, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.37871033776867963, | |
| "grad_norm": 2.1083521842956543, | |
| "learning_rate": 7.561475409836066e-07, | |
| "loss": 0.7935, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.38382804503582396, | |
| "grad_norm": 1.3414019346237183, | |
| "learning_rate": 7.663934426229508e-07, | |
| "loss": 0.7894, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3889457523029683, | |
| "grad_norm": 1.8279671669006348, | |
| "learning_rate": 7.766393442622951e-07, | |
| "loss": 0.7916, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3940634595701126, | |
| "grad_norm": 1.6233114004135132, | |
| "learning_rate": 7.868852459016393e-07, | |
| "loss": 0.7886, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3991811668372569, | |
| "grad_norm": 1.4336532354354858, | |
| "learning_rate": 7.971311475409835e-07, | |
| "loss": 0.7884, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.40429887410440124, | |
| "grad_norm": 1.597020149230957, | |
| "learning_rate": 8.073770491803278e-07, | |
| "loss": 0.7904, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4094165813715456, | |
| "grad_norm": 1.3191157579421997, | |
| "learning_rate": 8.176229508196721e-07, | |
| "loss": 0.787, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.41453428863868985, | |
| "grad_norm": 1.6425617933273315, | |
| "learning_rate": 8.278688524590164e-07, | |
| "loss": 0.7887, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4196519959058342, | |
| "grad_norm": 1.3924281597137451, | |
| "learning_rate": 8.381147540983607e-07, | |
| "loss": 0.7976, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4247697031729785, | |
| "grad_norm": 1.2975757122039795, | |
| "learning_rate": 8.483606557377049e-07, | |
| "loss": 0.7895, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.42988741044012285, | |
| "grad_norm": 1.3045737743377686, | |
| "learning_rate": 8.586065573770491e-07, | |
| "loss": 0.7894, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.43500511770726713, | |
| "grad_norm": 1.9618183374404907, | |
| "learning_rate": 8.688524590163933e-07, | |
| "loss": 0.7865, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.44012282497441146, | |
| "grad_norm": 1.3976588249206543, | |
| "learning_rate": 8.790983606557376e-07, | |
| "loss": 0.7896, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4452405322415558, | |
| "grad_norm": 1.1260899305343628, | |
| "learning_rate": 8.89344262295082e-07, | |
| "loss": 0.7861, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4503582395087001, | |
| "grad_norm": 1.293816089630127, | |
| "learning_rate": 8.995901639344262e-07, | |
| "loss": 0.7826, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4554759467758444, | |
| "grad_norm": 1.4861347675323486, | |
| "learning_rate": 9.098360655737705e-07, | |
| "loss": 0.7822, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.46059365404298874, | |
| "grad_norm": 1.378319501876831, | |
| "learning_rate": 9.200819672131147e-07, | |
| "loss": 0.778, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4657113613101331, | |
| "grad_norm": 1.2947815656661987, | |
| "learning_rate": 9.303278688524589e-07, | |
| "loss": 0.7853, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.47082906857727735, | |
| "grad_norm": 0.9865773916244507, | |
| "learning_rate": 9.405737704918032e-07, | |
| "loss": 0.7797, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4759467758444217, | |
| "grad_norm": 1.4883133172988892, | |
| "learning_rate": 9.508196721311474e-07, | |
| "loss": 0.7804, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.481064483111566, | |
| "grad_norm": 1.1394942998886108, | |
| "learning_rate": 9.610655737704918e-07, | |
| "loss": 0.7818, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.48618219037871035, | |
| "grad_norm": 1.104995846748352, | |
| "learning_rate": 9.71311475409836e-07, | |
| "loss": 0.7775, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.49129989764585463, | |
| "grad_norm": 1.258623719215393, | |
| "learning_rate": 9.815573770491803e-07, | |
| "loss": 0.7731, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.49641760491299897, | |
| "grad_norm": 1.4409220218658447, | |
| "learning_rate": 9.918032786885245e-07, | |
| "loss": 0.7811, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5015353121801432, | |
| "grad_norm": 0.9952474236488342, | |
| "learning_rate": 9.999994895105863e-07, | |
| "loss": 0.7821, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5066530194472876, | |
| "grad_norm": 1.2250083684921265, | |
| "learning_rate": 9.99981622490561e-07, | |
| "loss": 0.7822, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5117707267144319, | |
| "grad_norm": 1.1539254188537598, | |
| "learning_rate": 9.999382320422427e-07, | |
| "loss": 0.776, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5117707267144319, | |
| "eval_accuracy": 0.59523, | |
| "eval_loss": 0.6936843991279602, | |
| "eval_macro_f1": 0.5690192634397302, | |
| "eval_precision": 0.6518208624514151, | |
| "eval_recall": 0.6078906162164894, | |
| "eval_runtime": 73.7478, | |
| "eval_samples_per_second": 1355.972, | |
| "eval_steps_per_second": 1.329, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5168884339815762, | |
| "grad_norm": 1.2244267463684082, | |
| "learning_rate": 9.998693203806588e-07, | |
| "loss": 0.7771, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5220061412487206, | |
| "grad_norm": 1.1900156736373901, | |
| "learning_rate": 9.997748910236623e-07, | |
| "loss": 0.7815, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5271238485158649, | |
| "grad_norm": 1.2272601127624512, | |
| "learning_rate": 9.996549487917522e-07, | |
| "loss": 0.7829, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5322415557830092, | |
| "grad_norm": 1.160675287246704, | |
| "learning_rate": 9.995094998078276e-07, | |
| "loss": 0.7785, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5373592630501536, | |
| "grad_norm": 1.2759345769882202, | |
| "learning_rate": 9.993385514968745e-07, | |
| "loss": 0.7755, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5424769703172978, | |
| "grad_norm": 1.0531632900238037, | |
| "learning_rate": 9.99142112585588e-07, | |
| "loss": 0.7781, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5475946775844421, | |
| "grad_norm": 1.0040606260299683, | |
| "learning_rate": 9.989201931019251e-07, | |
| "loss": 0.7744, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5527123848515865, | |
| "grad_norm": 1.2468197345733643, | |
| "learning_rate": 9.98672804374595e-07, | |
| "loss": 0.7712, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5578300921187308, | |
| "grad_norm": 1.1564112901687622, | |
| "learning_rate": 9.983999590324778e-07, | |
| "loss": 0.7797, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5629477993858751, | |
| "grad_norm": 0.8854450583457947, | |
| "learning_rate": 9.981016710039832e-07, | |
| "loss": 0.7723, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5680655066530195, | |
| "grad_norm": 1.142919659614563, | |
| "learning_rate": 9.977779555163369e-07, | |
| "loss": 0.7739, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5731832139201638, | |
| "grad_norm": 1.058153748512268, | |
| "learning_rate": 9.974288290948042e-07, | |
| "loss": 0.774, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5783009211873081, | |
| "grad_norm": 1.1157392263412476, | |
| "learning_rate": 9.970543095618468e-07, | |
| "loss": 0.7742, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5834186284544524, | |
| "grad_norm": 1.0850578546524048, | |
| "learning_rate": 9.96654416036212e-07, | |
| "loss": 0.7734, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5885363357215967, | |
| "grad_norm": 0.9722121953964233, | |
| "learning_rate": 9.96229168931958e-07, | |
| "loss": 0.77, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.593654042988741, | |
| "grad_norm": 1.332795262336731, | |
| "learning_rate": 9.957785899574102e-07, | |
| "loss": 0.7725, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5987717502558854, | |
| "grad_norm": 0.8639675378799438, | |
| "learning_rate": 9.953027021140543e-07, | |
| "loss": 0.7646, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6038894575230297, | |
| "grad_norm": 0.9253244400024414, | |
| "learning_rate": 9.948015296953623e-07, | |
| "loss": 0.7743, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.609007164790174, | |
| "grad_norm": 0.8843643069267273, | |
| "learning_rate": 9.942750982855503e-07, | |
| "loss": 0.7717, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6141248720573184, | |
| "grad_norm": 1.046048879623413, | |
| "learning_rate": 9.937234347582753e-07, | |
| "loss": 0.7721, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6192425793244627, | |
| "grad_norm": 0.8906111717224121, | |
| "learning_rate": 9.931465672752613e-07, | |
| "loss": 0.7657, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6243602865916069, | |
| "grad_norm": 0.9637787342071533, | |
| "learning_rate": 9.925445252848621e-07, | |
| "loss": 0.7666, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6294779938587513, | |
| "grad_norm": 0.9004104733467102, | |
| "learning_rate": 9.919173395205584e-07, | |
| "loss": 0.7664, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6345957011258956, | |
| "grad_norm": 1.4724570512771606, | |
| "learning_rate": 9.912650419993893e-07, | |
| "loss": 0.7679, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6397134083930399, | |
| "grad_norm": 0.8644343614578247, | |
| "learning_rate": 9.905876660203161e-07, | |
| "loss": 0.7671, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6448311156601843, | |
| "grad_norm": 0.8368955254554749, | |
| "learning_rate": 9.898852461625245e-07, | |
| "loss": 0.7717, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6499488229273286, | |
| "grad_norm": 0.9413282871246338, | |
| "learning_rate": 9.891578182836583e-07, | |
| "loss": 0.7693, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6550665301944729, | |
| "grad_norm": 0.9777762293815613, | |
| "learning_rate": 9.884054195179886e-07, | |
| "loss": 0.7656, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6601842374616171, | |
| "grad_norm": 0.8983454704284668, | |
| "learning_rate": 9.876280882745193e-07, | |
| "loss": 0.7605, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6653019447287615, | |
| "grad_norm": 0.8708799481391907, | |
| "learning_rate": 9.868258642350254e-07, | |
| "loss": 0.7673, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6704196519959058, | |
| "grad_norm": 0.8354130387306213, | |
| "learning_rate": 9.859987883520275e-07, | |
| "loss": 0.767, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6755373592630501, | |
| "grad_norm": 0.868485152721405, | |
| "learning_rate": 9.851469028467015e-07, | |
| "loss": 0.7647, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6806550665301945, | |
| "grad_norm": 0.9445936679840088, | |
| "learning_rate": 9.84270251206723e-07, | |
| "loss": 0.7605, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6857727737973388, | |
| "grad_norm": 0.7952156662940979, | |
| "learning_rate": 9.833688781840475e-07, | |
| "loss": 0.7664, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6908904810644831, | |
| "grad_norm": 1.1992422342300415, | |
| "learning_rate": 9.824428297926254e-07, | |
| "loss": 0.7617, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6960081883316275, | |
| "grad_norm": 0.8914986252784729, | |
| "learning_rate": 9.81492153306054e-07, | |
| "loss": 0.764, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7011258955987717, | |
| "grad_norm": 0.7945632338523865, | |
| "learning_rate": 9.80516897255163e-07, | |
| "loss": 0.7617, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.706243602865916, | |
| "grad_norm": 0.7822641134262085, | |
| "learning_rate": 9.795171114255384e-07, | |
| "loss": 0.7613, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7113613101330604, | |
| "grad_norm": 0.7989721298217773, | |
| "learning_rate": 9.784928468549793e-07, | |
| "loss": 0.7615, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7164790174002047, | |
| "grad_norm": 0.7325178980827332, | |
| "learning_rate": 9.77444155830895e-07, | |
| "loss": 0.7572, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.721596724667349, | |
| "grad_norm": 0.8934036493301392, | |
| "learning_rate": 9.763710918876329e-07, | |
| "loss": 0.7589, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7267144319344934, | |
| "grad_norm": 0.7769590616226196, | |
| "learning_rate": 9.752737098037477e-07, | |
| "loss": 0.7573, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7318321392016377, | |
| "grad_norm": 1.0458475351333618, | |
| "learning_rate": 9.741520655992047e-07, | |
| "loss": 0.759, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.736949846468782, | |
| "grad_norm": 0.649872899055481, | |
| "learning_rate": 9.730062165325185e-07, | |
| "loss": 0.7607, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7420675537359263, | |
| "grad_norm": 0.7517932057380676, | |
| "learning_rate": 9.718362210978329e-07, | |
| "loss": 0.7567, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7471852610030706, | |
| "grad_norm": 0.9947759509086609, | |
| "learning_rate": 9.706421390219315e-07, | |
| "loss": 0.7593, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7523029682702149, | |
| "grad_norm": 0.719109833240509, | |
| "learning_rate": 9.694240312611917e-07, | |
| "loss": 0.7615, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7574206755373593, | |
| "grad_norm": 1.0175235271453857, | |
| "learning_rate": 9.681819599984712e-07, | |
| "loss": 0.7555, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7625383828045036, | |
| "grad_norm": 0.8200032711029053, | |
| "learning_rate": 9.66915988639934e-07, | |
| "loss": 0.7565, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7676560900716479, | |
| "grad_norm": 0.926680326461792, | |
| "learning_rate": 9.656261818118139e-07, | |
| "loss": 0.7628, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7727737973387923, | |
| "grad_norm": 0.6904947757720947, | |
| "learning_rate": 9.64312605357115e-07, | |
| "loss": 0.7584, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7778915046059366, | |
| "grad_norm": 0.7391018867492676, | |
| "learning_rate": 9.62975326332251e-07, | |
| "loss": 0.7582, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7830092118730808, | |
| "grad_norm": 0.7193120121955872, | |
| "learning_rate": 9.616144130036214e-07, | |
| "loss": 0.7557, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7881269191402251, | |
| "grad_norm": 0.8275336623191833, | |
| "learning_rate": 9.602299348441277e-07, | |
| "loss": 0.7575, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7932446264073695, | |
| "grad_norm": 0.9943181276321411, | |
| "learning_rate": 9.58821962529625e-07, | |
| "loss": 0.7568, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7983623336745138, | |
| "grad_norm": 0.7646188139915466, | |
| "learning_rate": 9.573905679353166e-07, | |
| "loss": 0.752, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8034800409416581, | |
| "grad_norm": 0.7356329560279846, | |
| "learning_rate": 9.55935824132082e-07, | |
| "loss": 0.7552, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8085977482088025, | |
| "grad_norm": 0.795838475227356, | |
| "learning_rate": 9.544578053827495e-07, | |
| "loss": 0.7543, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8137154554759468, | |
| "grad_norm": 0.9953216314315796, | |
| "learning_rate": 9.529565871383034e-07, | |
| "loss": 0.7558, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8188331627430911, | |
| "grad_norm": 0.797937273979187, | |
| "learning_rate": 9.514322460340329e-07, | |
| "loss": 0.7542, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8239508700102354, | |
| "grad_norm": 0.7371375560760498, | |
| "learning_rate": 9.498848598856198e-07, | |
| "loss": 0.7532, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8290685772773797, | |
| "grad_norm": 0.8336758613586426, | |
| "learning_rate": 9.48314507685166e-07, | |
| "loss": 0.756, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.834186284544524, | |
| "grad_norm": 0.7204869389533997, | |
| "learning_rate": 9.467212695971619e-07, | |
| "loss": 0.7564, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8393039918116684, | |
| "grad_norm": 0.6758232712745667, | |
| "learning_rate": 9.451052269543929e-07, | |
| "loss": 0.7548, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8444216990788127, | |
| "grad_norm": 0.7348074913024902, | |
| "learning_rate": 9.434664622537883e-07, | |
| "loss": 0.7535, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.849539406345957, | |
| "grad_norm": 0.747559130191803, | |
| "learning_rate": 9.418050591522093e-07, | |
| "loss": 0.752, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8546571136131014, | |
| "grad_norm": 0.7392817735671997, | |
| "learning_rate": 9.401211024621792e-07, | |
| "loss": 0.7492, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8597748208802457, | |
| "grad_norm": 0.6318978071212769, | |
| "learning_rate": 9.384146781475533e-07, | |
| "loss": 0.7577, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8648925281473899, | |
| "grad_norm": 0.5832816362380981, | |
| "learning_rate": 9.366858733191307e-07, | |
| "loss": 0.7506, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8700102354145343, | |
| "grad_norm": 0.6932022571563721, | |
| "learning_rate": 9.349347762302071e-07, | |
| "loss": 0.7523, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8751279426816786, | |
| "grad_norm": 0.7047157287597656, | |
| "learning_rate": 9.331614762720703e-07, | |
| "loss": 0.7487, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.8802456499488229, | |
| "grad_norm": 0.6591235995292664, | |
| "learning_rate": 9.313660639694358e-07, | |
| "loss": 0.7538, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8853633572159673, | |
| "grad_norm": 0.66665118932724, | |
| "learning_rate": 9.295486309758269e-07, | |
| "loss": 0.7518, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8904810644831116, | |
| "grad_norm": 0.6165961027145386, | |
| "learning_rate": 9.277092700688951e-07, | |
| "loss": 0.7495, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8955987717502559, | |
| "grad_norm": 0.7449588179588318, | |
| "learning_rate": 9.258480751456838e-07, | |
| "loss": 0.7515, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9007164790174002, | |
| "grad_norm": 0.7553215622901917, | |
| "learning_rate": 9.239651412178357e-07, | |
| "loss": 0.7534, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9058341862845445, | |
| "grad_norm": 0.747010350227356, | |
| "learning_rate": 9.220605644067419e-07, | |
| "loss": 0.7548, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9109518935516888, | |
| "grad_norm": 0.7272236347198486, | |
| "learning_rate": 9.20134441938635e-07, | |
| "loss": 0.7531, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9160696008188332, | |
| "grad_norm": 0.8726323246955872, | |
| "learning_rate": 9.181868721396266e-07, | |
| "loss": 0.7479, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9211873080859775, | |
| "grad_norm": 0.7914009094238281, | |
| "learning_rate": 9.16217954430687e-07, | |
| "loss": 0.7522, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9263050153531218, | |
| "grad_norm": 0.6367310285568237, | |
| "learning_rate": 9.142277893225708e-07, | |
| "loss": 0.7497, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9314227226202662, | |
| "grad_norm": 0.8285405039787292, | |
| "learning_rate": 9.122164784106842e-07, | |
| "loss": 0.753, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9365404298874105, | |
| "grad_norm": 0.7742036581039429, | |
| "learning_rate": 9.101841243699015e-07, | |
| "loss": 0.7534, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9416581371545547, | |
| "grad_norm": 0.7512480020523071, | |
| "learning_rate": 9.081308309493209e-07, | |
| "loss": 0.747, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.946775844421699, | |
| "grad_norm": 0.5556691288948059, | |
| "learning_rate": 9.060567029669699e-07, | |
| "loss": 0.7465, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9518935516888434, | |
| "grad_norm": 1.0232101678848267, | |
| "learning_rate": 9.039618463044536e-07, | |
| "loss": 0.7485, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9570112589559877, | |
| "grad_norm": 0.8321600556373596, | |
| "learning_rate": 9.018463679015505e-07, | |
| "loss": 0.7488, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.962128966223132, | |
| "grad_norm": 0.7009038329124451, | |
| "learning_rate": 8.997103757507521e-07, | |
| "loss": 0.7483, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9672466734902764, | |
| "grad_norm": 0.6939564347267151, | |
| "learning_rate": 8.975539788917514e-07, | |
| "loss": 0.7485, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.9723643807574207, | |
| "grad_norm": 0.7738851308822632, | |
| "learning_rate": 8.953772874058757e-07, | |
| "loss": 0.7479, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.977482088024565, | |
| "grad_norm": 0.5913597941398621, | |
| "learning_rate": 8.931804124104672e-07, | |
| "loss": 0.7473, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.9825997952917093, | |
| "grad_norm": 0.8486027717590332, | |
| "learning_rate": 8.909634660532106e-07, | |
| "loss": 0.7479, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9877175025588536, | |
| "grad_norm": 0.6463382840156555, | |
| "learning_rate": 8.887265615064083e-07, | |
| "loss": 0.7486, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.9928352098259979, | |
| "grad_norm": 0.6264991164207458, | |
| "learning_rate": 8.864698129612031e-07, | |
| "loss": 0.7467, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9979529170931423, | |
| "grad_norm": 0.7566510438919067, | |
| "learning_rate": 8.841933356217488e-07, | |
| "loss": 0.7463, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.0030706243602865, | |
| "grad_norm": 0.7290503978729248, | |
| "learning_rate": 8.818972456993288e-07, | |
| "loss": 0.7504, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.008188331627431, | |
| "grad_norm": 0.8277891874313354, | |
| "learning_rate": 8.795816604064241e-07, | |
| "loss": 0.7472, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.0133060388945752, | |
| "grad_norm": 0.6427952647209167, | |
| "learning_rate": 8.772466979507302e-07, | |
| "loss": 0.7487, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.0184237461617196, | |
| "grad_norm": 0.6775041818618774, | |
| "learning_rate": 8.748924775291216e-07, | |
| "loss": 0.745, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.0235414534288638, | |
| "grad_norm": 0.6815404891967773, | |
| "learning_rate": 8.725191193215675e-07, | |
| "loss": 0.7485, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0235414534288638, | |
| "eval_accuracy": 0.67557, | |
| "eval_loss": 0.6936712265014648, | |
| "eval_macro_f1": 0.6724504812400831, | |
| "eval_precision": 0.6760463081581009, | |
| "eval_recall": 0.6725003053739838, | |
| "eval_runtime": 73.7408, | |
| "eval_samples_per_second": 1356.102, | |
| "eval_steps_per_second": 1.329, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0286591606960083, | |
| "grad_norm": 0.8586804866790771, | |
| "learning_rate": 8.701267444849974e-07, | |
| "loss": 0.7457, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.0337768679631525, | |
| "grad_norm": 0.5989358425140381, | |
| "learning_rate": 8.677154751471152e-07, | |
| "loss": 0.7443, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.0388945752302967, | |
| "grad_norm": 0.6888963580131531, | |
| "learning_rate": 8.65285434400165e-07, | |
| "loss": 0.7458, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.0440122824974412, | |
| "grad_norm": 0.6407850384712219, | |
| "learning_rate": 8.628367462946482e-07, | |
| "loss": 0.7493, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.0491299897645854, | |
| "grad_norm": 0.6202091574668884, | |
| "learning_rate": 8.603695358329896e-07, | |
| "loss": 0.7471, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.0542476970317298, | |
| "grad_norm": 0.7456187605857849, | |
| "learning_rate": 8.57883928963157e-07, | |
| "loss": 0.7431, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.059365404298874, | |
| "grad_norm": 0.6171067357063293, | |
| "learning_rate": 8.553800525722317e-07, | |
| "loss": 0.7435, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.0644831115660185, | |
| "grad_norm": 0.8527712821960449, | |
| "learning_rate": 8.528580344799305e-07, | |
| "loss": 0.7453, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.0696008188331627, | |
| "grad_norm": 0.6724162697792053, | |
| "learning_rate": 8.503180034320816e-07, | |
| "loss": 0.7467, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.0747185261003072, | |
| "grad_norm": 0.581979513168335, | |
| "learning_rate": 8.477600890940513e-07, | |
| "loss": 0.7508, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0798362333674514, | |
| "grad_norm": 0.6551439166069031, | |
| "learning_rate": 8.451844220441253e-07, | |
| "loss": 0.7469, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.0849539406345956, | |
| "grad_norm": 0.6437426209449768, | |
| "learning_rate": 8.42591133766843e-07, | |
| "loss": 0.7468, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.09007164790174, | |
| "grad_norm": 0.5788704752922058, | |
| "learning_rate": 8.39980356646285e-07, | |
| "loss": 0.7424, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.0951893551688843, | |
| "grad_norm": 0.5575606226921082, | |
| "learning_rate": 8.373522239593149e-07, | |
| "loss": 0.7396, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.1003070624360287, | |
| "grad_norm": 0.737180769443512, | |
| "learning_rate": 8.347068698687765e-07, | |
| "loss": 0.744, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.105424769703173, | |
| "grad_norm": 0.592766284942627, | |
| "learning_rate": 8.320444294166439e-07, | |
| "loss": 0.7469, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1105424769703174, | |
| "grad_norm": 0.63823401927948, | |
| "learning_rate": 8.293650385171287e-07, | |
| "loss": 0.7447, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.1156601842374616, | |
| "grad_norm": 0.6114454865455627, | |
| "learning_rate": 8.266688339497412e-07, | |
| "loss": 0.7475, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.120777891504606, | |
| "grad_norm": 0.53263258934021, | |
| "learning_rate": 8.239559533523082e-07, | |
| "loss": 0.7455, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.1258955987717503, | |
| "grad_norm": 0.7016158699989319, | |
| "learning_rate": 8.212265352139466e-07, | |
| "loss": 0.742, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1310133060388945, | |
| "grad_norm": 0.6125472784042358, | |
| "learning_rate": 8.184807188679939e-07, | |
| "loss": 0.7383, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.136131013306039, | |
| "grad_norm": 0.6008788347244263, | |
| "learning_rate": 8.157186444848952e-07, | |
| "loss": 0.7435, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.1412487205731832, | |
| "grad_norm": 0.6357280015945435, | |
| "learning_rate": 8.129404530650479e-07, | |
| "loss": 0.7443, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.1463664278403276, | |
| "grad_norm": 0.6422165036201477, | |
| "learning_rate": 8.101462864316038e-07, | |
| "loss": 0.7449, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.1514841351074718, | |
| "grad_norm": 0.6852079629898071, | |
| "learning_rate": 8.07336287223229e-07, | |
| "loss": 0.7428, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.156601842374616, | |
| "grad_norm": 0.5539452433586121, | |
| "learning_rate": 8.045105988868224e-07, | |
| "loss": 0.7455, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.1617195496417605, | |
| "grad_norm": 0.5939313173294067, | |
| "learning_rate": 8.016693656701931e-07, | |
| "loss": 0.7376, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.1668372569089047, | |
| "grad_norm": 0.7522106766700745, | |
| "learning_rate": 7.98812732614697e-07, | |
| "loss": 0.7464, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.1719549641760492, | |
| "grad_norm": 0.6572809815406799, | |
| "learning_rate": 7.959408455478313e-07, | |
| "loss": 0.7448, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.1770726714431934, | |
| "grad_norm": 0.5842403173446655, | |
| "learning_rate": 7.93053851075792e-07, | |
| "loss": 0.7396, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.1821903787103378, | |
| "grad_norm": 0.5845000147819519, | |
| "learning_rate": 7.901518965759888e-07, | |
| "loss": 0.7438, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.187308085977482, | |
| "grad_norm": 0.5873178839683533, | |
| "learning_rate": 7.872351301895217e-07, | |
| "loss": 0.7421, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1924257932446265, | |
| "grad_norm": 0.6385728120803833, | |
| "learning_rate": 7.843037008136189e-07, | |
| "loss": 0.7431, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.1975435005117707, | |
| "grad_norm": 0.5818535685539246, | |
| "learning_rate": 7.813577580940356e-07, | |
| "loss": 0.7416, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.202661207778915, | |
| "grad_norm": 0.5611526370048523, | |
| "learning_rate": 7.783974524174149e-07, | |
| "loss": 0.743, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.2077789150460594, | |
| "grad_norm": 0.6002296805381775, | |
| "learning_rate": 7.754229349036102e-07, | |
| "loss": 0.7407, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.2128966223132036, | |
| "grad_norm": 0.6006008982658386, | |
| "learning_rate": 7.724343573979718e-07, | |
| "loss": 0.7437, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.218014329580348, | |
| "grad_norm": 0.6336845755577087, | |
| "learning_rate": 7.694318724635945e-07, | |
| "loss": 0.7405, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.2231320368474923, | |
| "grad_norm": 0.6916839480400085, | |
| "learning_rate": 7.664156333735293e-07, | |
| "loss": 0.7468, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.2282497441146367, | |
| "grad_norm": 0.5944891571998596, | |
| "learning_rate": 7.633857941029602e-07, | |
| "loss": 0.7485, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.233367451381781, | |
| "grad_norm": 0.5755409598350525, | |
| "learning_rate": 7.603425093213429e-07, | |
| "loss": 0.7418, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.2384851586489254, | |
| "grad_norm": 0.6128578186035156, | |
| "learning_rate": 7.572859343845092e-07, | |
| "loss": 0.7396, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.2436028659160696, | |
| "grad_norm": 0.6123960614204407, | |
| "learning_rate": 7.542162253267363e-07, | |
| "loss": 0.7363, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.2487205731832138, | |
| "grad_norm": 0.6969608664512634, | |
| "learning_rate": 7.511335388527822e-07, | |
| "loss": 0.7406, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.2538382804503583, | |
| "grad_norm": 0.6491796970367432, | |
| "learning_rate": 7.480380323298851e-07, | |
| "loss": 0.7429, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.2589559877175025, | |
| "grad_norm": 0.5883914828300476, | |
| "learning_rate": 7.449298637797309e-07, | |
| "loss": 0.7375, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.264073694984647, | |
| "grad_norm": 0.6160842776298523, | |
| "learning_rate": 7.418091918703854e-07, | |
| "loss": 0.7393, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.2691914022517912, | |
| "grad_norm": 0.5568389892578125, | |
| "learning_rate": 7.386761759081954e-07, | |
| "loss": 0.7387, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.2743091095189354, | |
| "grad_norm": 0.532599151134491, | |
| "learning_rate": 7.35530975829656e-07, | |
| "loss": 0.741, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.2794268167860798, | |
| "grad_norm": 0.5400995016098022, | |
| "learning_rate": 7.323737521932457e-07, | |
| "loss": 0.7367, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2845445240532243, | |
| "grad_norm": 0.5307775735855103, | |
| "learning_rate": 7.292046661712307e-07, | |
| "loss": 0.7399, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.2896622313203685, | |
| "grad_norm": 0.5908007621765137, | |
| "learning_rate": 7.260238795414366e-07, | |
| "loss": 0.74, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.2947799385875127, | |
| "grad_norm": 0.5410370826721191, | |
| "learning_rate": 7.228315546789907e-07, | |
| "loss": 0.7388, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.2998976458546572, | |
| "grad_norm": 0.5406989455223083, | |
| "learning_rate": 7.19627854548032e-07, | |
| "loss": 0.7337, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3050153531218014, | |
| "grad_norm": 0.589767575263977, | |
| "learning_rate": 7.164129426933927e-07, | |
| "loss": 0.7426, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.3101330603889458, | |
| "grad_norm": 0.5926154255867004, | |
| "learning_rate": 7.131869832322496e-07, | |
| "loss": 0.7374, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.31525076765609, | |
| "grad_norm": 0.7507414817810059, | |
| "learning_rate": 7.099501408457452e-07, | |
| "loss": 0.7375, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.3203684749232343, | |
| "grad_norm": 0.6162967681884766, | |
| "learning_rate": 7.06702580770582e-07, | |
| "loss": 0.7381, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.3254861821903787, | |
| "grad_norm": 0.5118803977966309, | |
| "learning_rate": 7.034444687905868e-07, | |
| "loss": 0.7344, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.330603889457523, | |
| "grad_norm": 0.5982370972633362, | |
| "learning_rate": 7.001759712282478e-07, | |
| "loss": 0.7382, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.3357215967246674, | |
| "grad_norm": 0.6339845657348633, | |
| "learning_rate": 6.968972549362238e-07, | |
| "loss": 0.7386, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.3408393039918116, | |
| "grad_norm": 0.5755071043968201, | |
| "learning_rate": 6.936084872888271e-07, | |
| "loss": 0.7349, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.345957011258956, | |
| "grad_norm": 0.6089357137680054, | |
| "learning_rate": 6.90309836173479e-07, | |
| "loss": 0.7377, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.3510747185261003, | |
| "grad_norm": 0.6137183308601379, | |
| "learning_rate": 6.87001469982139e-07, | |
| "loss": 0.7417, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.3561924257932447, | |
| "grad_norm": 0.6864479184150696, | |
| "learning_rate": 6.836835576027093e-07, | |
| "loss": 0.7321, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.361310133060389, | |
| "grad_norm": 0.5657494068145752, | |
| "learning_rate": 6.803562684104125e-07, | |
| "loss": 0.7411, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3664278403275332, | |
| "grad_norm": 0.6047109365463257, | |
| "learning_rate": 6.770197722591456e-07, | |
| "loss": 0.7399, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.3715455475946776, | |
| "grad_norm": 0.5772355198860168, | |
| "learning_rate": 6.736742394728097e-07, | |
| "loss": 0.7374, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.3766632548618218, | |
| "grad_norm": 0.7158586382865906, | |
| "learning_rate": 6.703198408366142e-07, | |
| "loss": 0.739, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.3817809621289663, | |
| "grad_norm": 0.5718494057655334, | |
| "learning_rate": 6.669567475883592e-07, | |
| "loss": 0.7435, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3868986693961105, | |
| "grad_norm": 0.6494776606559753, | |
| "learning_rate": 6.635851314096935e-07, | |
| "loss": 0.7358, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.3920163766632547, | |
| "grad_norm": 0.5958154201507568, | |
| "learning_rate": 6.602051644173509e-07, | |
| "loss": 0.7375, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.3971340839303992, | |
| "grad_norm": 0.5509739518165588, | |
| "learning_rate": 6.568170191543634e-07, | |
| "loss": 0.7412, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.4022517911975436, | |
| "grad_norm": 0.5368937253952026, | |
| "learning_rate": 6.534208685812536e-07, | |
| "loss": 0.7393, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.4073694984646878, | |
| "grad_norm": 0.5369133353233337, | |
| "learning_rate": 6.500168860672047e-07, | |
| "loss": 0.7398, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.412487205731832, | |
| "grad_norm": 0.5789251327514648, | |
| "learning_rate": 6.466052453812111e-07, | |
| "loss": 0.7371, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.4176049129989765, | |
| "grad_norm": 0.5568552017211914, | |
| "learning_rate": 6.431861206832069e-07, | |
| "loss": 0.7363, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.4227226202661207, | |
| "grad_norm": 0.5325226783752441, | |
| "learning_rate": 6.397596865151752e-07, | |
| "loss": 0.7348, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.4278403275332652, | |
| "grad_norm": 0.5849957466125488, | |
| "learning_rate": 6.363261177922388e-07, | |
| "loss": 0.7363, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.4329580348004094, | |
| "grad_norm": 0.6208518743515015, | |
| "learning_rate": 6.328855897937303e-07, | |
| "loss": 0.7365, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.4380757420675536, | |
| "grad_norm": 0.5599240064620972, | |
| "learning_rate": 6.294382781542445e-07, | |
| "loss": 0.7371, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 1.443193449334698, | |
| "grad_norm": 0.5623425841331482, | |
| "learning_rate": 6.25984358854672e-07, | |
| "loss": 0.74, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.4483111566018425, | |
| "grad_norm": 0.6866716146469116, | |
| "learning_rate": 6.225240082132172e-07, | |
| "loss": 0.7383, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.4534288638689867, | |
| "grad_norm": 0.5852178931236267, | |
| "learning_rate": 6.190574028763952e-07, | |
| "loss": 0.7381, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.458546571136131, | |
| "grad_norm": 0.5319634079933167, | |
| "learning_rate": 6.15584719810016e-07, | |
| "loss": 0.7349, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.4636642784032754, | |
| "grad_norm": 0.5798255205154419, | |
| "learning_rate": 6.121061362901498e-07, | |
| "loss": 0.7331, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.4687819856704196, | |
| "grad_norm": 0.4803605079650879, | |
| "learning_rate": 6.086218298940778e-07, | |
| "loss": 0.7356, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 1.473899692937564, | |
| "grad_norm": 0.7146285772323608, | |
| "learning_rate": 6.051319784912261e-07, | |
| "loss": 0.7384, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.4790174002047083, | |
| "grad_norm": 0.47007301449775696, | |
| "learning_rate": 6.016367602340868e-07, | |
| "loss": 0.7332, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 1.4841351074718525, | |
| "grad_norm": 0.6568506956100464, | |
| "learning_rate": 5.981363535491233e-07, | |
| "loss": 0.7378, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.489252814738997, | |
| "grad_norm": 0.5178249478340149, | |
| "learning_rate": 5.946309371276614e-07, | |
| "loss": 0.7338, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 1.4943705220061412, | |
| "grad_norm": 0.5785830616950989, | |
| "learning_rate": 5.911206899167676e-07, | |
| "loss": 0.7392, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.4994882292732856, | |
| "grad_norm": 0.5021066665649414, | |
| "learning_rate": 5.87605791110114e-07, | |
| "loss": 0.7342, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 1.5046059365404298, | |
| "grad_norm": 0.5594333410263062, | |
| "learning_rate": 5.840864201388312e-07, | |
| "loss": 0.7351, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.509723643807574, | |
| "grad_norm": 0.5204704999923706, | |
| "learning_rate": 5.805627566623475e-07, | |
| "loss": 0.7375, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.5148413510747185, | |
| "grad_norm": 0.6187242865562439, | |
| "learning_rate": 5.770349805592185e-07, | |
| "loss": 0.7351, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.519959058341863, | |
| "grad_norm": 0.5294100046157837, | |
| "learning_rate": 5.735032719179443e-07, | |
| "loss": 0.7383, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 1.5250767656090072, | |
| "grad_norm": 0.5450606942176819, | |
| "learning_rate": 5.699678110277762e-07, | |
| "loss": 0.7365, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.5301944728761514, | |
| "grad_norm": 0.5091442465782166, | |
| "learning_rate": 5.664287783695122e-07, | |
| "loss": 0.7343, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 1.5353121801432958, | |
| "grad_norm": 0.557119607925415, | |
| "learning_rate": 5.628863546062856e-07, | |
| "loss": 0.7298, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.5353121801432958, | |
| "eval_accuracy": 0.67304, | |
| "eval_loss": 0.6938837766647339, | |
| "eval_macro_f1": 0.6609359830000188, | |
| "eval_precision": 0.685850518502884, | |
| "eval_recall": 0.6657447133221994, | |
| "eval_runtime": 73.8645, | |
| "eval_samples_per_second": 1353.83, | |
| "eval_steps_per_second": 1.327, | |
| "step": 1500 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4885, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0201035364007936e+17, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |