diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7792 @@ +{ + "best_global_step": 264710, + "best_metric": 0.870147919438821, + "best_model_checkpoint": "T:\\laupodteam\\AIOS\\Bram\\language_modeling\\Models\\language_models\\CardioCCC\\EuroBERT\\multilabel_3ldense_20epochs_40splits/fold_0\\checkpoint-264710", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 264710, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009670960673945073, + "grad_norm": 1.4626084566116333, + "learning_rate": 1.9980733633032377e-05, + "loss": 1.326, + "step": 256 + }, + { + "epoch": 0.019341921347890145, + "grad_norm": 2.9422495365142822, + "learning_rate": 1.9961391711684486e-05, + "loss": 0.9436, + "step": 512 + }, + { + "epoch": 0.029012882021835214, + "grad_norm": 3.939150333404541, + "learning_rate": 1.9942049790336598e-05, + "loss": 0.8108, + "step": 768 + }, + { + "epoch": 0.03868384269578029, + "grad_norm": 3.260474681854248, + "learning_rate": 1.9922707868988706e-05, + "loss": 0.746, + "step": 1024 + }, + { + "epoch": 0.04835480336972536, + "grad_norm": 2.8487510681152344, + "learning_rate": 1.9903365947640814e-05, + "loss": 0.6876, + "step": 1280 + }, + { + "epoch": 0.05802576404367043, + "grad_norm": 3.7067031860351562, + "learning_rate": 1.9884024026292926e-05, + "loss": 0.6491, + "step": 1536 + }, + { + "epoch": 0.06769672471761551, + "grad_norm": 3.1346232891082764, + "learning_rate": 1.9864682104945038e-05, + "loss": 0.6117, + "step": 1792 + }, + { + "epoch": 0.07736768539156058, + "grad_norm": 3.9794366359710693, + "learning_rate": 1.9845340183597147e-05, + "loss": 0.584, + "step": 2048 + }, + { + "epoch": 0.08703864606550565, + "grad_norm": 3.7597267627716064, + "learning_rate": 1.9825998262249255e-05, + "loss": 0.5541, + "step": 2304 + }, + { + "epoch": 0.09670960673945073, + "grad_norm": 3.669264078140259, + "learning_rate": 1.9806656340901363e-05, + "loss": 0.5273, + "step": 2560 + }, + { + "epoch": 0.10638056741339579, + "grad_norm": 3.203001022338867, + "learning_rate": 1.9787314419553475e-05, + "loss": 0.5148, + "step": 2816 + }, + { + "epoch": 0.11605152808734086, + "grad_norm": 4.039089202880859, + "learning_rate": 1.9767972498205587e-05, + "loss": 0.4907, + "step": 3072 + }, + { + "epoch": 0.12572248876128594, + "grad_norm": 3.2784998416900635, + "learning_rate": 1.9748630576857695e-05, + "loss": 0.4825, + "step": 3328 + }, + { + "epoch": 0.13539344943523102, + "grad_norm": 3.981826066970825, + "learning_rate": 1.9729288655509804e-05, + "loss": 0.4639, + "step": 3584 + }, + { + "epoch": 0.1450644101091761, + "grad_norm": 4.3725690841674805, + "learning_rate": 1.9709946734161916e-05, + "loss": 0.4522, + "step": 3840 + }, + { + "epoch": 0.15473537078312116, + "grad_norm": 4.14705753326416, + "learning_rate": 1.969068036719429e-05, + "loss": 0.4452, + "step": 4096 + }, + { + "epoch": 0.16440633145706623, + "grad_norm": 3.336223840713501, + "learning_rate": 1.96713384458464e-05, + "loss": 0.4292, + "step": 4352 + }, + { + "epoch": 0.1740772921310113, + "grad_norm": 4.36456823348999, + "learning_rate": 1.9651996524498508e-05, + "loss": 0.4108, + "step": 4608 + }, + { + "epoch": 0.18374825280495638, + "grad_norm": 3.929685354232788, + "learning_rate": 1.9632654603150617e-05, + "loss": 0.403, + "step": 4864 + }, + { + "epoch": 0.19341921347890145, + "grad_norm": 4.6235671043396, + "learning_rate": 1.961331268180273e-05, + "loss": 0.3899, + "step": 5120 + }, + { + "epoch": 0.2030901741528465, + "grad_norm": 4.0524492263793945, + "learning_rate": 1.959397076045484e-05, + "loss": 0.3913, + "step": 5376 + }, + { + "epoch": 0.21276113482679157, + "grad_norm": 3.8945560455322266, + "learning_rate": 1.957462883910695e-05, + "loss": 0.3781, + "step": 5632 + }, + { + "epoch": 0.22243209550073664, + "grad_norm": 4.181601047515869, + "learning_rate": 1.9555286917759057e-05, + "loss": 0.3758, + "step": 5888 + }, + { + "epoch": 0.23210305617468172, + "grad_norm": 5.5385308265686035, + "learning_rate": 1.9536020550791436e-05, + "loss": 0.3646, + "step": 6144 + }, + { + "epoch": 0.2417740168486268, + "grad_norm": 5.160412311553955, + "learning_rate": 1.9516678629443545e-05, + "loss": 0.3572, + "step": 6400 + }, + { + "epoch": 0.2514449775225719, + "grad_norm": 6.071929931640625, + "learning_rate": 1.9497336708095653e-05, + "loss": 0.3453, + "step": 6656 + }, + { + "epoch": 0.26111593819651696, + "grad_norm": 4.409694671630859, + "learning_rate": 1.947799478674776e-05, + "loss": 0.3504, + "step": 6912 + }, + { + "epoch": 0.27078689887046203, + "grad_norm": 4.06134033203125, + "learning_rate": 1.9458652865399873e-05, + "loss": 0.3404, + "step": 7168 + }, + { + "epoch": 0.2804578595444071, + "grad_norm": 3.8032515048980713, + "learning_rate": 1.9439310944051982e-05, + "loss": 0.34, + "step": 7424 + }, + { + "epoch": 0.2901288202183522, + "grad_norm": 4.95781135559082, + "learning_rate": 1.9419969022704094e-05, + "loss": 0.3249, + "step": 7680 + }, + { + "epoch": 0.29979978089229725, + "grad_norm": 3.898789167404175, + "learning_rate": 1.9400627101356202e-05, + "loss": 0.3218, + "step": 7936 + }, + { + "epoch": 0.3094707415662423, + "grad_norm": 4.6803717613220215, + "learning_rate": 1.9381285180008314e-05, + "loss": 0.3144, + "step": 8192 + }, + { + "epoch": 0.3191417022401874, + "grad_norm": 6.2057600021362305, + "learning_rate": 1.936201881304069e-05, + "loss": 0.3056, + "step": 8448 + }, + { + "epoch": 0.32881266291413247, + "grad_norm": 4.742001056671143, + "learning_rate": 1.9342676891692798e-05, + "loss": 0.3031, + "step": 8704 + }, + { + "epoch": 0.33848362358807754, + "grad_norm": 5.605335235595703, + "learning_rate": 1.9323334970344906e-05, + "loss": 0.2989, + "step": 8960 + }, + { + "epoch": 0.3481545842620226, + "grad_norm": 5.512292861938477, + "learning_rate": 1.9303993048997015e-05, + "loss": 0.2838, + "step": 9216 + }, + { + "epoch": 0.3578255449359677, + "grad_norm": 5.346792697906494, + "learning_rate": 1.9284651127649127e-05, + "loss": 0.2871, + "step": 9472 + }, + { + "epoch": 0.36749650560991276, + "grad_norm": 3.5019690990448, + "learning_rate": 1.926530920630124e-05, + "loss": 0.2794, + "step": 9728 + }, + { + "epoch": 0.37716746628385783, + "grad_norm": 4.682333946228027, + "learning_rate": 1.9245967284953347e-05, + "loss": 0.2736, + "step": 9984 + }, + { + "epoch": 0.3868384269578029, + "grad_norm": 7.038910865783691, + "learning_rate": 1.9226625363605455e-05, + "loss": 0.2786, + "step": 10240 + }, + { + "epoch": 0.396509387631748, + "grad_norm": 4.099844455718994, + "learning_rate": 1.9207283442257567e-05, + "loss": 0.2721, + "step": 10496 + }, + { + "epoch": 0.406180348305693, + "grad_norm": 3.918942928314209, + "learning_rate": 1.9187941520909676e-05, + "loss": 0.2704, + "step": 10752 + }, + { + "epoch": 0.41585130897963807, + "grad_norm": 5.411423683166504, + "learning_rate": 1.9168599599561784e-05, + "loss": 0.2604, + "step": 11008 + }, + { + "epoch": 0.42552226965358314, + "grad_norm": 7.702000617980957, + "learning_rate": 1.9149257678213896e-05, + "loss": 0.2618, + "step": 11264 + }, + { + "epoch": 0.4351932303275282, + "grad_norm": 4.103521347045898, + "learning_rate": 1.912999131124627e-05, + "loss": 0.2511, + "step": 11520 + }, + { + "epoch": 0.4448641910014733, + "grad_norm": 6.066595077514648, + "learning_rate": 1.911064938989838e-05, + "loss": 0.2551, + "step": 11776 + }, + { + "epoch": 0.45453515167541836, + "grad_norm": 6.25346565246582, + "learning_rate": 1.9091307468550492e-05, + "loss": 0.2499, + "step": 12032 + }, + { + "epoch": 0.46420611234936343, + "grad_norm": 4.310111045837402, + "learning_rate": 1.90719655472026e-05, + "loss": 0.2403, + "step": 12288 + }, + { + "epoch": 0.4738770730233085, + "grad_norm": 6.692182540893555, + "learning_rate": 1.9052623625854712e-05, + "loss": 0.24, + "step": 12544 + }, + { + "epoch": 0.4835480336972536, + "grad_norm": 4.0325751304626465, + "learning_rate": 1.903328170450682e-05, + "loss": 0.2383, + "step": 12800 + }, + { + "epoch": 0.49321899437119865, + "grad_norm": 4.97512149810791, + "learning_rate": 1.901393978315893e-05, + "loss": 0.2333, + "step": 13056 + }, + { + "epoch": 0.5028899550451438, + "grad_norm": 4.146473407745361, + "learning_rate": 1.899459786181104e-05, + "loss": 0.2342, + "step": 13312 + }, + { + "epoch": 0.5125609157190888, + "grad_norm": 6.477240562438965, + "learning_rate": 1.897525594046315e-05, + "loss": 0.23, + "step": 13568 + }, + { + "epoch": 0.5222318763930339, + "grad_norm": 5.060920715332031, + "learning_rate": 1.895591401911526e-05, + "loss": 0.2317, + "step": 13824 + }, + { + "epoch": 0.5319028370669789, + "grad_norm": 7.546407222747803, + "learning_rate": 1.893657209776737e-05, + "loss": 0.2235, + "step": 14080 + }, + { + "epoch": 0.5415737977409241, + "grad_norm": 6.76540994644165, + "learning_rate": 1.8917230176419478e-05, + "loss": 0.2189, + "step": 14336 + }, + { + "epoch": 0.5512447584148691, + "grad_norm": 6.734369277954102, + "learning_rate": 1.889788825507159e-05, + "loss": 0.2136, + "step": 14592 + }, + { + "epoch": 0.5609157190888142, + "grad_norm": 4.962408065795898, + "learning_rate": 1.8878546333723698e-05, + "loss": 0.2116, + "step": 14848 + }, + { + "epoch": 0.5705866797627592, + "grad_norm": 6.1386332511901855, + "learning_rate": 1.8859279966756074e-05, + "loss": 0.212, + "step": 15104 + }, + { + "epoch": 0.5802576404367044, + "grad_norm": 6.326384544372559, + "learning_rate": 1.8839938045408182e-05, + "loss": 0.2074, + "step": 15360 + }, + { + "epoch": 0.5899286011106494, + "grad_norm": 6.425912857055664, + "learning_rate": 1.8820596124060294e-05, + "loss": 0.2043, + "step": 15616 + }, + { + "epoch": 0.5995995617845945, + "grad_norm": 5.6023945808410645, + "learning_rate": 1.8801254202712406e-05, + "loss": 0.205, + "step": 15872 + }, + { + "epoch": 0.6092705224585395, + "grad_norm": 5.436675071716309, + "learning_rate": 1.8781912281364514e-05, + "loss": 0.2, + "step": 16128 + }, + { + "epoch": 0.6189414831324846, + "grad_norm": 3.5356578826904297, + "learning_rate": 1.8762570360016623e-05, + "loss": 0.1957, + "step": 16384 + }, + { + "epoch": 0.6286124438064297, + "grad_norm": 4.914231777191162, + "learning_rate": 1.874322843866873e-05, + "loss": 0.1957, + "step": 16640 + }, + { + "epoch": 0.6382834044803748, + "grad_norm": 5.829110145568848, + "learning_rate": 1.8723886517320843e-05, + "loss": 0.1936, + "step": 16896 + }, + { + "epoch": 0.6479543651543198, + "grad_norm": 9.775616645812988, + "learning_rate": 1.8704544595972955e-05, + "loss": 0.194, + "step": 17152 + }, + { + "epoch": 0.6576253258282649, + "grad_norm": 3.6314735412597656, + "learning_rate": 1.8685202674625063e-05, + "loss": 0.1862, + "step": 17408 + }, + { + "epoch": 0.66729628650221, + "grad_norm": 4.77644681930542, + "learning_rate": 1.8665860753277172e-05, + "loss": 0.1828, + "step": 17664 + }, + { + "epoch": 0.6769672471761551, + "grad_norm": 7.319884300231934, + "learning_rate": 1.8646518831929284e-05, + "loss": 0.1879, + "step": 17920 + }, + { + "epoch": 0.6866382078501001, + "grad_norm": 5.536057949066162, + "learning_rate": 1.8627176910581392e-05, + "loss": 0.1836, + "step": 18176 + }, + { + "epoch": 0.6963091685240452, + "grad_norm": 5.481319904327393, + "learning_rate": 1.86078349892335e-05, + "loss": 0.1802, + "step": 18432 + }, + { + "epoch": 0.7059801291979902, + "grad_norm": 6.011005401611328, + "learning_rate": 1.8588493067885612e-05, + "loss": 0.1824, + "step": 18688 + }, + { + "epoch": 0.7156510898719354, + "grad_norm": 5.187521457672119, + "learning_rate": 1.8569151146537724e-05, + "loss": 0.1763, + "step": 18944 + }, + { + "epoch": 0.7253220505458804, + "grad_norm": 6.904630661010742, + "learning_rate": 1.8549884779570096e-05, + "loss": 0.1719, + "step": 19200 + }, + { + "epoch": 0.7349930112198255, + "grad_norm": 7.806436538696289, + "learning_rate": 1.8530542858222208e-05, + "loss": 0.1707, + "step": 19456 + }, + { + "epoch": 0.7446639718937705, + "grad_norm": 3.597665548324585, + "learning_rate": 1.8511200936874317e-05, + "loss": 0.1706, + "step": 19712 + }, + { + "epoch": 0.7543349325677157, + "grad_norm": 4.756324291229248, + "learning_rate": 1.849185901552643e-05, + "loss": 0.1689, + "step": 19968 + }, + { + "epoch": 0.7640058932416607, + "grad_norm": 5.917934894561768, + "learning_rate": 1.8472517094178537e-05, + "loss": 0.1671, + "step": 20224 + }, + { + "epoch": 0.7736768539156058, + "grad_norm": 7.077094554901123, + "learning_rate": 1.8453175172830645e-05, + "loss": 0.1655, + "step": 20480 + }, + { + "epoch": 0.7833478145895508, + "grad_norm": 4.210581302642822, + "learning_rate": 1.8433833251482757e-05, + "loss": 0.1576, + "step": 20736 + }, + { + "epoch": 0.793018775263496, + "grad_norm": 4.523745059967041, + "learning_rate": 1.8414491330134866e-05, + "loss": 0.1601, + "step": 20992 + }, + { + "epoch": 0.802689735937441, + "grad_norm": 4.547347545623779, + "learning_rate": 1.8395149408786977e-05, + "loss": 0.1583, + "step": 21248 + }, + { + "epoch": 0.812360696611386, + "grad_norm": 4.029369831085205, + "learning_rate": 1.837588304181935e-05, + "loss": 0.1576, + "step": 21504 + }, + { + "epoch": 0.8220316572853311, + "grad_norm": 5.123602867126465, + "learning_rate": 1.835654112047146e-05, + "loss": 0.1514, + "step": 21760 + }, + { + "epoch": 0.8317026179592761, + "grad_norm": 6.265158176422119, + "learning_rate": 1.833719919912357e-05, + "loss": 0.157, + "step": 22016 + }, + { + "epoch": 0.8413735786332213, + "grad_norm": 8.210796356201172, + "learning_rate": 1.8317857277775682e-05, + "loss": 0.1503, + "step": 22272 + }, + { + "epoch": 0.8510445393071663, + "grad_norm": 10.078185081481934, + "learning_rate": 1.829851535642779e-05, + "loss": 0.15, + "step": 22528 + }, + { + "epoch": 0.8607154999811114, + "grad_norm": 6.486992359161377, + "learning_rate": 1.82791734350799e-05, + "loss": 0.1484, + "step": 22784 + }, + { + "epoch": 0.8703864606550564, + "grad_norm": 6.386577129364014, + "learning_rate": 1.825983151373201e-05, + "loss": 0.1476, + "step": 23040 + }, + { + "epoch": 0.8800574213290016, + "grad_norm": 7.5579328536987305, + "learning_rate": 1.8240489592384122e-05, + "loss": 0.1458, + "step": 23296 + }, + { + "epoch": 0.8897283820029466, + "grad_norm": 3.283404588699341, + "learning_rate": 1.8221223225416495e-05, + "loss": 0.144, + "step": 23552 + }, + { + "epoch": 0.8993993426768917, + "grad_norm": 7.462500095367432, + "learning_rate": 1.8201881304068606e-05, + "loss": 0.1475, + "step": 23808 + }, + { + "epoch": 0.9090703033508367, + "grad_norm": 4.219975471496582, + "learning_rate": 1.8182539382720715e-05, + "loss": 0.1415, + "step": 24064 + }, + { + "epoch": 0.9187412640247818, + "grad_norm": 5.009161949157715, + "learning_rate": 1.8163197461372823e-05, + "loss": 0.1395, + "step": 24320 + }, + { + "epoch": 0.9284122246987269, + "grad_norm": 5.921178340911865, + "learning_rate": 1.8143855540024935e-05, + "loss": 0.1381, + "step": 24576 + }, + { + "epoch": 0.938083185372672, + "grad_norm": 5.374543190002441, + "learning_rate": 1.8124513618677044e-05, + "loss": 0.1352, + "step": 24832 + }, + { + "epoch": 0.947754146046617, + "grad_norm": 3.8733317852020264, + "learning_rate": 1.8105171697329152e-05, + "loss": 0.1346, + "step": 25088 + }, + { + "epoch": 0.9574251067205621, + "grad_norm": 4.049707412719727, + "learning_rate": 1.8085829775981264e-05, + "loss": 0.1319, + "step": 25344 + }, + { + "epoch": 0.9670960673945072, + "grad_norm": 7.517127990722656, + "learning_rate": 1.806656340901364e-05, + "loss": 0.1271, + "step": 25600 + }, + { + "epoch": 0.9767670280684523, + "grad_norm": 4.687051773071289, + "learning_rate": 1.8047221487665748e-05, + "loss": 0.1279, + "step": 25856 + }, + { + "epoch": 0.9864379887423973, + "grad_norm": 4.79626989364624, + "learning_rate": 1.802787956631786e-05, + "loss": 0.1307, + "step": 26112 + }, + { + "epoch": 0.9961089494163424, + "grad_norm": 4.730831623077393, + "learning_rate": 1.8008537644969968e-05, + "loss": 0.1302, + "step": 26368 + }, + { + "epoch": 1.0, + "eval_f1_B-DISEASE": 0.7781765393765878, + "eval_f1_B-MEDICATION": 0.9037345902828137, + "eval_f1_B-PROCEDURE": 0.7974038223998922, + "eval_f1_B-SYMPTOM": 0.7777604788347432, + "eval_f1_I-DISEASE": 0.8292819119234635, + "eval_f1_I-MEDICATION": 0.9226245076350295, + "eval_f1_I-PROCEDURE": 0.8287028141772023, + "eval_f1_I-SYMPTOM": 0.7961978076970171, + "eval_f1_O": 0.9184682391128007, + "eval_f1_macro": 0.8391500790488389, + "eval_f1_micro": 0.8767886171279684, + "eval_loss": 0.26655662059783936, + "eval_precision_B-DISEASE": 0.7914206036745407, + "eval_precision_B-MEDICATION": 0.9209310918159985, + "eval_precision_B-PROCEDURE": 0.839345657415725, + "eval_precision_B-SYMPTOM": 0.7863714275719733, + "eval_precision_I-DISEASE": 0.8204249990792915, + "eval_precision_I-MEDICATION": 0.9416267415606586, + "eval_precision_I-PROCEDURE": 0.8691026329930145, + "eval_precision_I-SYMPTOM": 0.7889775161164393, + "eval_precision_O": 0.9252825164715074, + "eval_precision_macro": 0.8537203540776832, + "eval_precision_micro": 0.8833806029697474, + "eval_rauc_macro": 0.9021063420010184, + "eval_rauc_micro": 0.9278253367848399, + "eval_recall_B-DISEASE": 0.7653684461013722, + "eval_recall_B-MEDICATION": 0.8871685353265706, + "eval_recall_B-PROCEDURE": 0.7594541409993155, + "eval_recall_B-SYMPTOM": 0.7693360711841205, + "eval_recall_I-DISEASE": 0.838332141647537, + "eval_recall_I-MEDICATION": 0.9043740413603427, + "eval_recall_I-PROCEDURE": 0.7918920903816495, + "eval_recall_I-SYMPTOM": 0.8035514721684934, + "eval_recall_O": 0.9117535959899989, + "eval_recall_macro": 0.8256922816843777, + "eval_recall_micro": 0.8702942843552489, + "eval_roc_auc_B-DISEASE": 0.8808075963149171, + "eval_roc_auc_B-MEDICATION": 0.9434285957498447, + "eval_roc_auc_B-PROCEDURE": 0.87847497845947, + "eval_roc_auc_B-SYMPTOM": 0.8824080001390723, + "eval_roc_auc_I-DISEASE": 0.9071844217170272, + "eval_roc_auc_I-MEDICATION": 0.9517977149811767, + "eval_roc_auc_I-PROCEDURE": 0.8901357064770181, + "eval_roc_auc_I-SYMPTOM": 0.8850274935590098, + "eval_roc_auc_O": 0.8996925706116287, + "eval_runtime": 61.1466, + "eval_samples_per_second": 177.818, + "eval_steps_per_second": 22.242, + "step": 26471 + }, + { + "epoch": 1.0057799100902876, + "grad_norm": 3.7280209064483643, + "learning_rate": 1.798919572362208e-05, + "loss": 0.1119, + "step": 26624 + }, + { + "epoch": 1.0154508707642325, + "grad_norm": 5.684998512268066, + "learning_rate": 1.796985380227419e-05, + "loss": 0.0936, + "step": 26880 + }, + { + "epoch": 1.0251218314381776, + "grad_norm": 3.695190191268921, + "learning_rate": 1.7950511880926297e-05, + "loss": 0.0955, + "step": 27136 + }, + { + "epoch": 1.0347927921121227, + "grad_norm": 4.493898868560791, + "learning_rate": 1.793116995957841e-05, + "loss": 0.0922, + "step": 27392 + }, + { + "epoch": 1.0444637527860678, + "grad_norm": 4.638331413269043, + "learning_rate": 1.7911828038230517e-05, + "loss": 0.0923, + "step": 27648 + }, + { + "epoch": 1.0541347134600128, + "grad_norm": 4.798746585845947, + "learning_rate": 1.7892561671262893e-05, + "loss": 0.0918, + "step": 27904 + }, + { + "epoch": 1.0638056741339579, + "grad_norm": 4.274206638336182, + "learning_rate": 1.7873219749915e-05, + "loss": 0.0945, + "step": 28160 + }, + { + "epoch": 1.073476634807903, + "grad_norm": 3.235424280166626, + "learning_rate": 1.7853877828567113e-05, + "loss": 0.0919, + "step": 28416 + }, + { + "epoch": 1.0831475954818481, + "grad_norm": 4.507290363311768, + "learning_rate": 1.783453590721922e-05, + "loss": 0.0932, + "step": 28672 + }, + { + "epoch": 1.092818556155793, + "grad_norm": 10.315567016601562, + "learning_rate": 1.7815193985871333e-05, + "loss": 0.0952, + "step": 28928 + }, + { + "epoch": 1.1024895168297382, + "grad_norm": 8.936885833740234, + "learning_rate": 1.7795852064523442e-05, + "loss": 0.0899, + "step": 29184 + }, + { + "epoch": 1.1121604775036833, + "grad_norm": 2.79886531829834, + "learning_rate": 1.777651014317555e-05, + "loss": 0.0901, + "step": 29440 + }, + { + "epoch": 1.1218314381776284, + "grad_norm": 3.2710986137390137, + "learning_rate": 1.7757168221827662e-05, + "loss": 0.0883, + "step": 29696 + }, + { + "epoch": 1.1315023988515733, + "grad_norm": 4.917572498321533, + "learning_rate": 1.7737826300479774e-05, + "loss": 0.0896, + "step": 29952 + }, + { + "epoch": 1.1411733595255185, + "grad_norm": 4.567966938018799, + "learning_rate": 1.7718484379131882e-05, + "loss": 0.0914, + "step": 30208 + }, + { + "epoch": 1.1508443201994636, + "grad_norm": 3.6949679851531982, + "learning_rate": 1.769914245778399e-05, + "loss": 0.0871, + "step": 30464 + }, + { + "epoch": 1.1605152808734087, + "grad_norm": 8.372673988342285, + "learning_rate": 1.7679876090816366e-05, + "loss": 0.0909, + "step": 30720 + }, + { + "epoch": 1.1701862415473536, + "grad_norm": 3.8434600830078125, + "learning_rate": 1.7660534169468478e-05, + "loss": 0.0849, + "step": 30976 + }, + { + "epoch": 1.1798572022212988, + "grad_norm": 4.115856647491455, + "learning_rate": 1.7641192248120587e-05, + "loss": 0.0904, + "step": 31232 + }, + { + "epoch": 1.1895281628952439, + "grad_norm": 3.246572971343994, + "learning_rate": 1.7621850326772695e-05, + "loss": 0.0907, + "step": 31488 + }, + { + "epoch": 1.199199123569189, + "grad_norm": 3.668151617050171, + "learning_rate": 1.7602508405424807e-05, + "loss": 0.0864, + "step": 31744 + }, + { + "epoch": 1.208870084243134, + "grad_norm": 3.0340752601623535, + "learning_rate": 1.7583166484076915e-05, + "loss": 0.0848, + "step": 32000 + }, + { + "epoch": 1.218541044917079, + "grad_norm": 4.029708385467529, + "learning_rate": 1.7563824562729027e-05, + "loss": 0.0857, + "step": 32256 + }, + { + "epoch": 1.2282120055910242, + "grad_norm": 5.181060791015625, + "learning_rate": 1.7544482641381136e-05, + "loss": 0.0863, + "step": 32512 + }, + { + "epoch": 1.2378829662649693, + "grad_norm": 5.2258124351501465, + "learning_rate": 1.752521627441351e-05, + "loss": 0.0867, + "step": 32768 + }, + { + "epoch": 1.2475539269389142, + "grad_norm": 10.210968017578125, + "learning_rate": 1.750587435306562e-05, + "loss": 0.0878, + "step": 33024 + }, + { + "epoch": 1.2572248876128593, + "grad_norm": 3.4898252487182617, + "learning_rate": 1.748653243171773e-05, + "loss": 0.0831, + "step": 33280 + }, + { + "epoch": 1.2668958482868045, + "grad_norm": 4.27427864074707, + "learning_rate": 1.746719051036984e-05, + "loss": 0.0864, + "step": 33536 + }, + { + "epoch": 1.2765668089607494, + "grad_norm": 5.286701679229736, + "learning_rate": 1.744784858902195e-05, + "loss": 0.0799, + "step": 33792 + }, + { + "epoch": 1.2862377696346945, + "grad_norm": 3.2482128143310547, + "learning_rate": 1.742850666767406e-05, + "loss": 0.084, + "step": 34048 + }, + { + "epoch": 1.2959087303086396, + "grad_norm": 2.9942526817321777, + "learning_rate": 1.7409164746326172e-05, + "loss": 0.0834, + "step": 34304 + }, + { + "epoch": 1.3055796909825848, + "grad_norm": 9.361547470092773, + "learning_rate": 1.738982282497828e-05, + "loss": 0.0851, + "step": 34560 + }, + { + "epoch": 1.3152506516565299, + "grad_norm": 5.259056568145752, + "learning_rate": 1.737048090363039e-05, + "loss": 0.0824, + "step": 34816 + }, + { + "epoch": 1.3249216123304748, + "grad_norm": 4.652898788452148, + "learning_rate": 1.7351138982282497e-05, + "loss": 0.0807, + "step": 35072 + }, + { + "epoch": 1.33459257300442, + "grad_norm": 3.225607395172119, + "learning_rate": 1.733179706093461e-05, + "loss": 0.0833, + "step": 35328 + }, + { + "epoch": 1.344263533678365, + "grad_norm": 4.242973804473877, + "learning_rate": 1.7312530693966985e-05, + "loss": 0.0785, + "step": 35584 + }, + { + "epoch": 1.35393449435231, + "grad_norm": 3.6310012340545654, + "learning_rate": 1.7293188772619093e-05, + "loss": 0.082, + "step": 35840 + }, + { + "epoch": 1.363605455026255, + "grad_norm": 4.149777412414551, + "learning_rate": 1.72738468512712e-05, + "loss": 0.0788, + "step": 36096 + }, + { + "epoch": 1.3732764157002002, + "grad_norm": 3.4874989986419678, + "learning_rate": 1.7254504929923313e-05, + "loss": 0.0813, + "step": 36352 + }, + { + "epoch": 1.3829473763741453, + "grad_norm": 4.847866535186768, + "learning_rate": 1.7235163008575425e-05, + "loss": 0.0762, + "step": 36608 + }, + { + "epoch": 1.3926183370480905, + "grad_norm": 6.0248332023620605, + "learning_rate": 1.7215821087227534e-05, + "loss": 0.084, + "step": 36864 + }, + { + "epoch": 1.4022892977220354, + "grad_norm": 12.932506561279297, + "learning_rate": 1.7196479165879642e-05, + "loss": 0.0774, + "step": 37120 + }, + { + "epoch": 1.4119602583959805, + "grad_norm": 4.421403884887695, + "learning_rate": 1.7177137244531754e-05, + "loss": 0.0745, + "step": 37376 + }, + { + "epoch": 1.4216312190699256, + "grad_norm": 4.352053165435791, + "learning_rate": 1.7157795323183862e-05, + "loss": 0.0766, + "step": 37632 + }, + { + "epoch": 1.4313021797438705, + "grad_norm": 3.0803287029266357, + "learning_rate": 1.7138528956216238e-05, + "loss": 0.0786, + "step": 37888 + }, + { + "epoch": 1.4409731404178157, + "grad_norm": 9.046032905578613, + "learning_rate": 1.7119187034868347e-05, + "loss": 0.0792, + "step": 38144 + }, + { + "epoch": 1.4506441010917608, + "grad_norm": 5.227222442626953, + "learning_rate": 1.709984511352046e-05, + "loss": 0.0782, + "step": 38400 + }, + { + "epoch": 1.460315061765706, + "grad_norm": 9.277040481567383, + "learning_rate": 1.7080503192172567e-05, + "loss": 0.0734, + "step": 38656 + }, + { + "epoch": 1.469986022439651, + "grad_norm": 6.931709289550781, + "learning_rate": 1.706116127082468e-05, + "loss": 0.0745, + "step": 38912 + }, + { + "epoch": 1.479656983113596, + "grad_norm": 2.403529167175293, + "learning_rate": 1.7041819349476787e-05, + "loss": 0.0713, + "step": 39168 + }, + { + "epoch": 1.489327943787541, + "grad_norm": 3.0608630180358887, + "learning_rate": 1.7022477428128895e-05, + "loss": 0.0724, + "step": 39424 + }, + { + "epoch": 1.4989989044614862, + "grad_norm": 2.9378268718719482, + "learning_rate": 1.7003135506781007e-05, + "loss": 0.0719, + "step": 39680 + }, + { + "epoch": 1.508669865135431, + "grad_norm": 4.745122909545898, + "learning_rate": 1.6983869139813383e-05, + "loss": 0.0755, + "step": 39936 + }, + { + "epoch": 1.5183408258093762, + "grad_norm": 7.573899745941162, + "learning_rate": 1.696452721846549e-05, + "loss": 0.0731, + "step": 40192 + }, + { + "epoch": 1.5280117864833214, + "grad_norm": 10.226018905639648, + "learning_rate": 1.69451852971176e-05, + "loss": 0.076, + "step": 40448 + }, + { + "epoch": 1.5376827471572665, + "grad_norm": 5.051877021789551, + "learning_rate": 1.692584337576971e-05, + "loss": 0.075, + "step": 40704 + }, + { + "epoch": 1.5473537078312116, + "grad_norm": 4.395775318145752, + "learning_rate": 1.6906501454421823e-05, + "loss": 0.0735, + "step": 40960 + }, + { + "epoch": 1.5570246685051568, + "grad_norm": 4.498766899108887, + "learning_rate": 1.6887159533073932e-05, + "loss": 0.0729, + "step": 41216 + }, + { + "epoch": 1.5666956291791017, + "grad_norm": 5.933803558349609, + "learning_rate": 1.686781761172604e-05, + "loss": 0.0748, + "step": 41472 + }, + { + "epoch": 1.5763665898530468, + "grad_norm": 2.9409236907958984, + "learning_rate": 1.6848475690378152e-05, + "loss": 0.07, + "step": 41728 + }, + { + "epoch": 1.5860375505269917, + "grad_norm": 8.31312370300293, + "learning_rate": 1.6829209323410528e-05, + "loss": 0.0703, + "step": 41984 + }, + { + "epoch": 1.5957085112009368, + "grad_norm": 1.95456862449646, + "learning_rate": 1.6809867402062636e-05, + "loss": 0.0709, + "step": 42240 + }, + { + "epoch": 1.605379471874882, + "grad_norm": 3.6376004219055176, + "learning_rate": 1.6790525480714745e-05, + "loss": 0.0656, + "step": 42496 + }, + { + "epoch": 1.615050432548827, + "grad_norm": 3.3740224838256836, + "learning_rate": 1.6771183559366853e-05, + "loss": 0.0728, + "step": 42752 + }, + { + "epoch": 1.6247213932227722, + "grad_norm": 2.608504295349121, + "learning_rate": 1.6751841638018965e-05, + "loss": 0.0723, + "step": 43008 + }, + { + "epoch": 1.634392353896717, + "grad_norm": 5.380160808563232, + "learning_rate": 1.6732499716671077e-05, + "loss": 0.0686, + "step": 43264 + }, + { + "epoch": 1.6440633145706622, + "grad_norm": 1.6728038787841797, + "learning_rate": 1.6713157795323185e-05, + "loss": 0.0668, + "step": 43520 + }, + { + "epoch": 1.6537342752446071, + "grad_norm": 7.20682430267334, + "learning_rate": 1.6693815873975294e-05, + "loss": 0.0689, + "step": 43776 + }, + { + "epoch": 1.6634052359185523, + "grad_norm": 9.442398071289062, + "learning_rate": 1.667454950700767e-05, + "loss": 0.0669, + "step": 44032 + }, + { + "epoch": 1.6730761965924974, + "grad_norm": 3.7477312088012695, + "learning_rate": 1.665520758565978e-05, + "loss": 0.0647, + "step": 44288 + }, + { + "epoch": 1.6827471572664425, + "grad_norm": 4.700344085693359, + "learning_rate": 1.663586566431189e-05, + "loss": 0.0712, + "step": 44544 + }, + { + "epoch": 1.6924181179403877, + "grad_norm": 3.5288517475128174, + "learning_rate": 1.6616523742963998e-05, + "loss": 0.0678, + "step": 44800 + }, + { + "epoch": 1.7020890786143328, + "grad_norm": 7.689276695251465, + "learning_rate": 1.659718182161611e-05, + "loss": 0.0668, + "step": 45056 + }, + { + "epoch": 1.7117600392882777, + "grad_norm": 4.36802339553833, + "learning_rate": 1.657783990026822e-05, + "loss": 0.0625, + "step": 45312 + }, + { + "epoch": 1.7214309999622228, + "grad_norm": 2.3436222076416016, + "learning_rate": 1.6558573533300594e-05, + "loss": 0.0655, + "step": 45568 + }, + { + "epoch": 1.7311019606361677, + "grad_norm": 1.5666533708572388, + "learning_rate": 1.6539231611952706e-05, + "loss": 0.0644, + "step": 45824 + }, + { + "epoch": 1.7407729213101129, + "grad_norm": 4.3904266357421875, + "learning_rate": 1.6519889690604814e-05, + "loss": 0.0634, + "step": 46080 + }, + { + "epoch": 1.750443881984058, + "grad_norm": 2.4941790103912354, + "learning_rate": 1.6500547769256926e-05, + "loss": 0.0641, + "step": 46336 + }, + { + "epoch": 1.760114842658003, + "grad_norm": 3.0806963443756104, + "learning_rate": 1.6481205847909034e-05, + "loss": 0.0664, + "step": 46592 + }, + { + "epoch": 1.7697858033319482, + "grad_norm": 6.179355621337891, + "learning_rate": 1.6461863926561143e-05, + "loss": 0.0626, + "step": 46848 + }, + { + "epoch": 1.7794567640058934, + "grad_norm": 1.792417287826538, + "learning_rate": 1.644252200521325e-05, + "loss": 0.0623, + "step": 47104 + }, + { + "epoch": 1.7891277246798383, + "grad_norm": 3.0433876514434814, + "learning_rate": 1.6423180083865363e-05, + "loss": 0.0629, + "step": 47360 + }, + { + "epoch": 1.7987986853537834, + "grad_norm": 8.955931663513184, + "learning_rate": 1.6403838162517475e-05, + "loss": 0.064, + "step": 47616 + }, + { + "epoch": 1.8084696460277283, + "grad_norm": 1.9222790002822876, + "learning_rate": 1.6384496241169583e-05, + "loss": 0.062, + "step": 47872 + }, + { + "epoch": 1.8181406067016734, + "grad_norm": 6.602641582489014, + "learning_rate": 1.6365154319821692e-05, + "loss": 0.0625, + "step": 48128 + }, + { + "epoch": 1.8278115673756186, + "grad_norm": 3.8623206615448, + "learning_rate": 1.6345812398473804e-05, + "loss": 0.0641, + "step": 48384 + }, + { + "epoch": 1.8374825280495637, + "grad_norm": 3.5689499378204346, + "learning_rate": 1.6326470477125912e-05, + "loss": 0.061, + "step": 48640 + }, + { + "epoch": 1.8471534887235088, + "grad_norm": 5.1966705322265625, + "learning_rate": 1.6307128555778024e-05, + "loss": 0.0603, + "step": 48896 + }, + { + "epoch": 1.856824449397454, + "grad_norm": 8.878084182739258, + "learning_rate": 1.6287862188810396e-05, + "loss": 0.0609, + "step": 49152 + }, + { + "epoch": 1.8664954100713989, + "grad_norm": 8.031649589538574, + "learning_rate": 1.6268520267462508e-05, + "loss": 0.0609, + "step": 49408 + }, + { + "epoch": 1.876166370745344, + "grad_norm": 5.966855525970459, + "learning_rate": 1.6249178346114616e-05, + "loss": 0.0603, + "step": 49664 + }, + { + "epoch": 1.8858373314192889, + "grad_norm": 5.956678867340088, + "learning_rate": 1.622983642476673e-05, + "loss": 0.0571, + "step": 49920 + }, + { + "epoch": 1.895508292093234, + "grad_norm": 4.6985650062561035, + "learning_rate": 1.6210494503418837e-05, + "loss": 0.0608, + "step": 50176 + }, + { + "epoch": 1.9051792527671791, + "grad_norm": 2.0274322032928467, + "learning_rate": 1.6191152582070945e-05, + "loss": 0.0601, + "step": 50432 + }, + { + "epoch": 1.9148502134411243, + "grad_norm": 2.5451152324676514, + "learning_rate": 1.6171810660723057e-05, + "loss": 0.0585, + "step": 50688 + }, + { + "epoch": 1.9245211741150694, + "grad_norm": 10.576590538024902, + "learning_rate": 1.6152468739375165e-05, + "loss": 0.0604, + "step": 50944 + }, + { + "epoch": 1.9341921347890145, + "grad_norm": 2.8857650756835938, + "learning_rate": 1.6133126818027277e-05, + "loss": 0.0577, + "step": 51200 + }, + { + "epoch": 1.9438630954629594, + "grad_norm": 3.5323078632354736, + "learning_rate": 1.6113784896679386e-05, + "loss": 0.0593, + "step": 51456 + }, + { + "epoch": 1.9535340561369046, + "grad_norm": 1.838157057762146, + "learning_rate": 1.6094442975331498e-05, + "loss": 0.0607, + "step": 51712 + }, + { + "epoch": 1.9632050168108495, + "grad_norm": 3.0781214237213135, + "learning_rate": 1.6075101053983606e-05, + "loss": 0.0579, + "step": 51968 + }, + { + "epoch": 1.9728759774847946, + "grad_norm": 3.3382725715637207, + "learning_rate": 1.6055759132635714e-05, + "loss": 0.0583, + "step": 52224 + }, + { + "epoch": 1.9825469381587397, + "grad_norm": 11.693552017211914, + "learning_rate": 1.6036417211287826e-05, + "loss": 0.06, + "step": 52480 + }, + { + "epoch": 1.9922178988326849, + "grad_norm": 2.506922960281372, + "learning_rate": 1.6017075289939935e-05, + "loss": 0.0565, + "step": 52736 + }, + { + "epoch": 2.0, + "eval_f1_B-DISEASE": 0.795667459204617, + "eval_f1_B-MEDICATION": 0.921751878321422, + "eval_f1_B-PROCEDURE": 0.8160781701088163, + "eval_f1_B-SYMPTOM": 0.7897939991838038, + "eval_f1_I-DISEASE": 0.840603319768283, + "eval_f1_I-MEDICATION": 0.9362919771083036, + "eval_f1_I-PROCEDURE": 0.8357941949344769, + "eval_f1_I-SYMPTOM": 0.7996366525364875, + "eval_f1_O": 0.9230678952157123, + "eval_f1_macro": 0.8509650607091025, + "eval_f1_micro": 0.8840404124180135, + "eval_loss": 0.31923601031303406, + "eval_precision_B-DISEASE": 0.8158783079808294, + "eval_precision_B-MEDICATION": 0.9499527856468366, + "eval_precision_B-PROCEDURE": 0.848526831070472, + "eval_precision_B-SYMPTOM": 0.8200744316297579, + "eval_precision_I-DISEASE": 0.8429946510571731, + "eval_precision_I-MEDICATION": 0.9561669515355351, + "eval_precision_I-PROCEDURE": 0.8788058788058788, + "eval_precision_I-SYMPTOM": 0.8278156741398548, + "eval_precision_O": 0.9126516403530364, + "eval_precision_macro": 0.8725407946910416, + "eval_precision_micro": 0.8876748775758254, + "eval_rauc_macro": 0.904220184377927, + "eval_rauc_micro": 0.9331179355327206, + "eval_recall_B-DISEASE": 0.7764337272943602, + "eval_recall_B-MEDICATION": 0.8951770777718455, + "eval_recall_B-PROCEDURE": 0.7860198494182067, + "eval_recall_B-SYMPTOM": 0.7616700889801505, + "eval_recall_I-DISEASE": 0.8382255171289154, + "eval_recall_I-MEDICATION": 0.9172264240757392, + "eval_recall_I-PROCEDURE": 0.7967963312035382, + "eval_recall_I-SYMPTOM": 0.7733129163980228, + "eval_recall_O": 0.9337246601871894, + "eval_recall_macro": 0.8309540658286632, + "eval_recall_micro": 0.8804355875575709, + "eval_roc_auc_B-DISEASE": 0.8865867148232266, + "eval_roc_auc_B-MEDICATION": 0.9474921532385823, + "eval_roc_auc_B-PROCEDURE": 0.8918013132047432, + "eval_roc_auc_B-SYMPTOM": 0.8790279783173326, + "eval_roc_auc_I-DISEASE": 0.9089187734702582, + "eval_roc_auc_I-MEDICATION": 0.9583212327636237, + "eval_roc_auc_I-PROCEDURE": 0.8930449895535946, + "eval_roc_auc_I-SYMPTOM": 0.8741219681493123, + "eval_roc_auc_O": 0.8986665358806698, + "eval_runtime": 61.4999, + "eval_samples_per_second": 176.797, + "eval_steps_per_second": 22.114, + "step": 52942 + }, + { + "epoch": 2.00188885950663, + "grad_norm": 6.549783706665039, + "learning_rate": 1.5997733368592046e-05, + "loss": 0.0527, + "step": 52992 + }, + { + "epoch": 2.011559820180575, + "grad_norm": 17.193124771118164, + "learning_rate": 1.597846700162442e-05, + "loss": 0.0401, + "step": 53248 + }, + { + "epoch": 2.0212307808545202, + "grad_norm": 9.684758186340332, + "learning_rate": 1.595912508027653e-05, + "loss": 0.0405, + "step": 53504 + }, + { + "epoch": 2.030901741528465, + "grad_norm": 2.453227996826172, + "learning_rate": 1.5939783158928642e-05, + "loss": 0.0412, + "step": 53760 + }, + { + "epoch": 2.04057270220241, + "grad_norm": 5.099764347076416, + "learning_rate": 1.592044123758075e-05, + "loss": 0.0392, + "step": 54016 + }, + { + "epoch": 2.050243662876355, + "grad_norm": 2.8053245544433594, + "learning_rate": 1.590109931623286e-05, + "loss": 0.0388, + "step": 54272 + }, + { + "epoch": 2.0599146235503003, + "grad_norm": 1.1759517192840576, + "learning_rate": 1.5881832949265235e-05, + "loss": 0.0411, + "step": 54528 + }, + { + "epoch": 2.0695855842242454, + "grad_norm": 5.915517330169678, + "learning_rate": 1.5862491027917343e-05, + "loss": 0.0389, + "step": 54784 + }, + { + "epoch": 2.0792565448981906, + "grad_norm": 8.0426664352417, + "learning_rate": 1.5843149106569455e-05, + "loss": 0.0382, + "step": 55040 + }, + { + "epoch": 2.0889275055721357, + "grad_norm": 10.392659187316895, + "learning_rate": 1.5823807185221564e-05, + "loss": 0.0421, + "step": 55296 + }, + { + "epoch": 2.098598466246081, + "grad_norm": 7.73749303817749, + "learning_rate": 1.5804465263873675e-05, + "loss": 0.0389, + "step": 55552 + }, + { + "epoch": 2.1082694269200255, + "grad_norm": 5.712843418121338, + "learning_rate": 1.5785123342525784e-05, + "loss": 0.0403, + "step": 55808 + }, + { + "epoch": 2.1179403875939706, + "grad_norm": 2.4857349395751953, + "learning_rate": 1.5765781421177896e-05, + "loss": 0.0389, + "step": 56064 + }, + { + "epoch": 2.1276113482679158, + "grad_norm": 2.2378458976745605, + "learning_rate": 1.5746439499830004e-05, + "loss": 0.0395, + "step": 56320 + }, + { + "epoch": 2.137282308941861, + "grad_norm": 5.0309739112854, + "learning_rate": 1.5727097578482113e-05, + "loss": 0.0407, + "step": 56576 + }, + { + "epoch": 2.146953269615806, + "grad_norm": 3.6221115589141846, + "learning_rate": 1.570775565713422e-05, + "loss": 0.0413, + "step": 56832 + }, + { + "epoch": 2.156624230289751, + "grad_norm": 4.810079574584961, + "learning_rate": 1.5688413735786333e-05, + "loss": 0.04, + "step": 57088 + }, + { + "epoch": 2.1662951909636963, + "grad_norm": 2.51513671875, + "learning_rate": 1.5669071814438445e-05, + "loss": 0.0404, + "step": 57344 + }, + { + "epoch": 2.1759661516376414, + "grad_norm": 2.1976306438446045, + "learning_rate": 1.5649805447470817e-05, + "loss": 0.0418, + "step": 57600 + }, + { + "epoch": 2.185637112311586, + "grad_norm": 2.1660706996917725, + "learning_rate": 1.563046352612293e-05, + "loss": 0.0379, + "step": 57856 + }, + { + "epoch": 2.195308072985531, + "grad_norm": 3.0370383262634277, + "learning_rate": 1.561112160477504e-05, + "loss": 0.0386, + "step": 58112 + }, + { + "epoch": 2.2049790336594763, + "grad_norm": 2.958423614501953, + "learning_rate": 1.5591855237807413e-05, + "loss": 0.0395, + "step": 58368 + }, + { + "epoch": 2.2146499943334215, + "grad_norm": 0.8426064848899841, + "learning_rate": 1.5572513316459525e-05, + "loss": 0.0396, + "step": 58624 + }, + { + "epoch": 2.2243209550073666, + "grad_norm": 2.25600528717041, + "learning_rate": 1.5553171395111633e-05, + "loss": 0.0405, + "step": 58880 + }, + { + "epoch": 2.2339919156813117, + "grad_norm": 2.133103609085083, + "learning_rate": 1.553382947376374e-05, + "loss": 0.0391, + "step": 59136 + }, + { + "epoch": 2.243662876355257, + "grad_norm": 27.295085906982422, + "learning_rate": 1.5514487552415853e-05, + "loss": 0.0382, + "step": 59392 + }, + { + "epoch": 2.253333837029202, + "grad_norm": 3.368842840194702, + "learning_rate": 1.5495145631067962e-05, + "loss": 0.0407, + "step": 59648 + }, + { + "epoch": 2.2630047977031467, + "grad_norm": 1.4239710569381714, + "learning_rate": 1.5475803709720074e-05, + "loss": 0.0376, + "step": 59904 + }, + { + "epoch": 2.272675758377092, + "grad_norm": 4.708951473236084, + "learning_rate": 1.5456461788372182e-05, + "loss": 0.0378, + "step": 60160 + }, + { + "epoch": 2.282346719051037, + "grad_norm": 11.830906867980957, + "learning_rate": 1.5437119867024294e-05, + "loss": 0.0391, + "step": 60416 + }, + { + "epoch": 2.292017679724982, + "grad_norm": 2.8490591049194336, + "learning_rate": 1.5417777945676402e-05, + "loss": 0.0395, + "step": 60672 + }, + { + "epoch": 2.301688640398927, + "grad_norm": 3.398808240890503, + "learning_rate": 1.539843602432851e-05, + "loss": 0.0407, + "step": 60928 + }, + { + "epoch": 2.3113596010728723, + "grad_norm": 4.087090492248535, + "learning_rate": 1.537909410298062e-05, + "loss": 0.0361, + "step": 61184 + }, + { + "epoch": 2.3210305617468174, + "grad_norm": 2.446629762649536, + "learning_rate": 1.535975218163273e-05, + "loss": 0.0361, + "step": 61440 + }, + { + "epoch": 2.330701522420762, + "grad_norm": 1.8066984415054321, + "learning_rate": 1.5340410260284843e-05, + "loss": 0.0399, + "step": 61696 + }, + { + "epoch": 2.3403724830947072, + "grad_norm": 2.6665291786193848, + "learning_rate": 1.532106833893695e-05, + "loss": 0.0396, + "step": 61952 + }, + { + "epoch": 2.3500434437686524, + "grad_norm": 0.9438909292221069, + "learning_rate": 1.530172641758906e-05, + "loss": 0.0385, + "step": 62208 + }, + { + "epoch": 2.3597144044425975, + "grad_norm": 1.689215898513794, + "learning_rate": 1.528238449624117e-05, + "loss": 0.0399, + "step": 62464 + }, + { + "epoch": 2.3693853651165426, + "grad_norm": 2.397761583328247, + "learning_rate": 1.526304257489328e-05, + "loss": 0.0363, + "step": 62720 + }, + { + "epoch": 2.3790563257904878, + "grad_norm": 17.148651123046875, + "learning_rate": 1.524370065354539e-05, + "loss": 0.0379, + "step": 62976 + }, + { + "epoch": 2.388727286464433, + "grad_norm": 7.684645652770996, + "learning_rate": 1.52243587321975e-05, + "loss": 0.038, + "step": 63232 + }, + { + "epoch": 2.398398247138378, + "grad_norm": 2.3121824264526367, + "learning_rate": 1.520501681084961e-05, + "loss": 0.0363, + "step": 63488 + }, + { + "epoch": 2.408069207812323, + "grad_norm": 4.012951374053955, + "learning_rate": 1.518567488950172e-05, + "loss": 0.0369, + "step": 63744 + }, + { + "epoch": 2.417740168486268, + "grad_norm": 3.992743730545044, + "learning_rate": 1.5166332968153829e-05, + "loss": 0.037, + "step": 64000 + }, + { + "epoch": 2.427411129160213, + "grad_norm": 2.0966529846191406, + "learning_rate": 1.5147066601186205e-05, + "loss": 0.0398, + "step": 64256 + }, + { + "epoch": 2.437082089834158, + "grad_norm": 4.283209800720215, + "learning_rate": 1.5127724679838316e-05, + "loss": 0.0375, + "step": 64512 + }, + { + "epoch": 2.446753050508103, + "grad_norm": 7.317829608917236, + "learning_rate": 1.5108382758490425e-05, + "loss": 0.0385, + "step": 64768 + }, + { + "epoch": 2.4564240111820483, + "grad_norm": 2.1492598056793213, + "learning_rate": 1.5089040837142535e-05, + "loss": 0.038, + "step": 65024 + }, + { + "epoch": 2.4660949718559935, + "grad_norm": 4.824232578277588, + "learning_rate": 1.5069698915794643e-05, + "loss": 0.0404, + "step": 65280 + }, + { + "epoch": 2.4757659325299386, + "grad_norm": 2.757894992828369, + "learning_rate": 1.5050356994446755e-05, + "loss": 0.0379, + "step": 65536 + }, + { + "epoch": 2.4854368932038833, + "grad_norm": 3.4582314491271973, + "learning_rate": 1.5031015073098865e-05, + "loss": 0.0352, + "step": 65792 + }, + { + "epoch": 2.4951078538778284, + "grad_norm": 2.4710917472839355, + "learning_rate": 1.5011673151750974e-05, + "loss": 0.0363, + "step": 66048 + }, + { + "epoch": 2.5047788145517735, + "grad_norm": 2.776700973510742, + "learning_rate": 1.4992331230403084e-05, + "loss": 0.0358, + "step": 66304 + }, + { + "epoch": 2.5144497752257187, + "grad_norm": 3.758176326751709, + "learning_rate": 1.4972989309055194e-05, + "loss": 0.0356, + "step": 66560 + }, + { + "epoch": 2.524120735899664, + "grad_norm": 2.5761642456054688, + "learning_rate": 1.4953647387707304e-05, + "loss": 0.0404, + "step": 66816 + }, + { + "epoch": 2.533791696573609, + "grad_norm": 1.3668540716171265, + "learning_rate": 1.4934381020739678e-05, + "loss": 0.0366, + "step": 67072 + }, + { + "epoch": 2.543462657247554, + "grad_norm": 3.8430099487304688, + "learning_rate": 1.4915039099391788e-05, + "loss": 0.0372, + "step": 67328 + }, + { + "epoch": 2.5531336179214987, + "grad_norm": 5.29500675201416, + "learning_rate": 1.4895697178043897e-05, + "loss": 0.0358, + "step": 67584 + }, + { + "epoch": 2.5628045785954443, + "grad_norm": 0.8562812805175781, + "learning_rate": 1.4876355256696009e-05, + "loss": 0.0374, + "step": 67840 + }, + { + "epoch": 2.572475539269389, + "grad_norm": 4.466825008392334, + "learning_rate": 1.4857013335348119e-05, + "loss": 0.0359, + "step": 68096 + }, + { + "epoch": 2.582146499943334, + "grad_norm": 12.638843536376953, + "learning_rate": 1.4837746968380493e-05, + "loss": 0.0362, + "step": 68352 + }, + { + "epoch": 2.5918174606172792, + "grad_norm": 4.4277119636535645, + "learning_rate": 1.4818405047032603e-05, + "loss": 0.036, + "step": 68608 + }, + { + "epoch": 2.6014884212912244, + "grad_norm": 1.1809728145599365, + "learning_rate": 1.4799063125684711e-05, + "loss": 0.035, + "step": 68864 + }, + { + "epoch": 2.6111593819651695, + "grad_norm": 4.5768327713012695, + "learning_rate": 1.4779721204336823e-05, + "loss": 0.0374, + "step": 69120 + }, + { + "epoch": 2.6208303426391146, + "grad_norm": 4.537430763244629, + "learning_rate": 1.4760379282988933e-05, + "loss": 0.0344, + "step": 69376 + }, + { + "epoch": 2.6305013033130598, + "grad_norm": 1.4762442111968994, + "learning_rate": 1.4741037361641042e-05, + "loss": 0.0374, + "step": 69632 + }, + { + "epoch": 2.6401722639870044, + "grad_norm": 1.5577633380889893, + "learning_rate": 1.4721695440293152e-05, + "loss": 0.0353, + "step": 69888 + }, + { + "epoch": 2.6498432246609496, + "grad_norm": 4.222722053527832, + "learning_rate": 1.4702353518945262e-05, + "loss": 0.0359, + "step": 70144 + }, + { + "epoch": 2.6595141853348947, + "grad_norm": 1.9563344717025757, + "learning_rate": 1.4683011597597372e-05, + "loss": 0.0379, + "step": 70400 + }, + { + "epoch": 2.66918514600884, + "grad_norm": 5.85068416595459, + "learning_rate": 1.466366967624948e-05, + "loss": 0.0344, + "step": 70656 + }, + { + "epoch": 2.678856106682785, + "grad_norm": 2.2116239070892334, + "learning_rate": 1.464432775490159e-05, + "loss": 0.039, + "step": 70912 + }, + { + "epoch": 2.68852706735673, + "grad_norm": 4.683871269226074, + "learning_rate": 1.4624985833553702e-05, + "loss": 0.0343, + "step": 71168 + }, + { + "epoch": 2.698198028030675, + "grad_norm": 1.9998408555984497, + "learning_rate": 1.460564391220581e-05, + "loss": 0.0367, + "step": 71424 + }, + { + "epoch": 2.70786898870462, + "grad_norm": 1.950804352760315, + "learning_rate": 1.4586301990857921e-05, + "loss": 0.0354, + "step": 71680 + }, + { + "epoch": 2.7175399493785655, + "grad_norm": 2.9149844646453857, + "learning_rate": 1.456696006951003e-05, + "loss": 0.0339, + "step": 71936 + }, + { + "epoch": 2.72721091005251, + "grad_norm": 4.158403396606445, + "learning_rate": 1.4547618148162141e-05, + "loss": 0.0358, + "step": 72192 + }, + { + "epoch": 2.7368818707264553, + "grad_norm": 1.9110437631607056, + "learning_rate": 1.4528351781194517e-05, + "loss": 0.0363, + "step": 72448 + }, + { + "epoch": 2.7465528314004004, + "grad_norm": 4.942687034606934, + "learning_rate": 1.4509009859846625e-05, + "loss": 0.0331, + "step": 72704 + }, + { + "epoch": 2.7562237920743455, + "grad_norm": 4.669269561767578, + "learning_rate": 1.4489667938498735e-05, + "loss": 0.0339, + "step": 72960 + }, + { + "epoch": 2.7658947527482907, + "grad_norm": 2.782804012298584, + "learning_rate": 1.4470326017150846e-05, + "loss": 0.0337, + "step": 73216 + }, + { + "epoch": 2.775565713422236, + "grad_norm": 0.8589828014373779, + "learning_rate": 1.4450984095802956e-05, + "loss": 0.0348, + "step": 73472 + }, + { + "epoch": 2.785236674096181, + "grad_norm": 6.355395793914795, + "learning_rate": 1.4431642174455066e-05, + "loss": 0.0347, + "step": 73728 + }, + { + "epoch": 2.7949076347701256, + "grad_norm": 1.7806596755981445, + "learning_rate": 1.4412300253107174e-05, + "loss": 0.035, + "step": 73984 + }, + { + "epoch": 2.8045785954440707, + "grad_norm": 5.5398850440979, + "learning_rate": 1.4392958331759286e-05, + "loss": 0.0309, + "step": 74240 + }, + { + "epoch": 2.814249556118016, + "grad_norm": 1.2205835580825806, + "learning_rate": 1.437369196479166e-05, + "loss": 0.0318, + "step": 74496 + }, + { + "epoch": 2.823920516791961, + "grad_norm": 4.248105525970459, + "learning_rate": 1.435435004344377e-05, + "loss": 0.0347, + "step": 74752 + }, + { + "epoch": 2.833591477465906, + "grad_norm": 1.5058479309082031, + "learning_rate": 1.4335083676476144e-05, + "loss": 0.0338, + "step": 75008 + }, + { + "epoch": 2.8432624381398512, + "grad_norm": 3.8759660720825195, + "learning_rate": 1.4315741755128254e-05, + "loss": 0.0343, + "step": 75264 + }, + { + "epoch": 2.8529333988137964, + "grad_norm": 16.488771438598633, + "learning_rate": 1.4296399833780366e-05, + "loss": 0.0338, + "step": 75520 + }, + { + "epoch": 2.862604359487741, + "grad_norm": 6.564029693603516, + "learning_rate": 1.4277057912432475e-05, + "loss": 0.0312, + "step": 75776 + }, + { + "epoch": 2.8722753201616866, + "grad_norm": 1.345203161239624, + "learning_rate": 1.4257715991084585e-05, + "loss": 0.0332, + "step": 76032 + }, + { + "epoch": 2.8819462808356313, + "grad_norm": 2.0033822059631348, + "learning_rate": 1.4238374069736693e-05, + "loss": 0.0322, + "step": 76288 + }, + { + "epoch": 2.8916172415095764, + "grad_norm": 6.844017505645752, + "learning_rate": 1.4219032148388805e-05, + "loss": 0.0319, + "step": 76544 + }, + { + "epoch": 2.9012882021835216, + "grad_norm": 4.2425150871276855, + "learning_rate": 1.4199690227040915e-05, + "loss": 0.0322, + "step": 76800 + }, + { + "epoch": 2.9109591628574667, + "grad_norm": 1.8265749216079712, + "learning_rate": 1.4180348305693024e-05, + "loss": 0.0322, + "step": 77056 + }, + { + "epoch": 2.920630123531412, + "grad_norm": 3.0552210807800293, + "learning_rate": 1.4161006384345134e-05, + "loss": 0.033, + "step": 77312 + }, + { + "epoch": 2.930301084205357, + "grad_norm": 2.102796792984009, + "learning_rate": 1.4141664462997244e-05, + "loss": 0.0346, + "step": 77568 + }, + { + "epoch": 2.939972044879302, + "grad_norm": 1.903757929801941, + "learning_rate": 1.4122322541649354e-05, + "loss": 0.0315, + "step": 77824 + }, + { + "epoch": 2.9496430055532468, + "grad_norm": 9.322936058044434, + "learning_rate": 1.4102980620301462e-05, + "loss": 0.0336, + "step": 78080 + }, + { + "epoch": 2.959313966227192, + "grad_norm": 1.862209677696228, + "learning_rate": 1.4083714253333838e-05, + "loss": 0.0328, + "step": 78336 + }, + { + "epoch": 2.968984926901137, + "grad_norm": 3.4806630611419678, + "learning_rate": 1.4064372331985946e-05, + "loss": 0.0321, + "step": 78592 + }, + { + "epoch": 2.978655887575082, + "grad_norm": 7.490905284881592, + "learning_rate": 1.4045030410638058e-05, + "loss": 0.0321, + "step": 78848 + }, + { + "epoch": 2.9883268482490273, + "grad_norm": 2.008312940597534, + "learning_rate": 1.4025688489290168e-05, + "loss": 0.032, + "step": 79104 + }, + { + "epoch": 2.9979978089229724, + "grad_norm": 5.4629645347595215, + "learning_rate": 1.4006346567942277e-05, + "loss": 0.0316, + "step": 79360 + }, + { + "epoch": 3.0, + "eval_f1_B-DISEASE": 0.8058758050409478, + "eval_f1_B-MEDICATION": 0.9331298806211291, + "eval_f1_B-PROCEDURE": 0.8246479186103336, + "eval_f1_B-SYMPTOM": 0.7958159437899519, + "eval_f1_I-DISEASE": 0.8446797498113607, + "eval_f1_I-MEDICATION": 0.9445428820775924, + "eval_f1_I-PROCEDURE": 0.8431497703012539, + "eval_f1_I-SYMPTOM": 0.8064467139982535, + "eval_f1_O": 0.9242880224344192, + "eval_f1_macro": 0.8580640762983602, + "eval_f1_micro": 0.8867284724320147, + "eval_loss": 0.346194326877594, + "eval_precision_B-DISEASE": 0.8078026619909142, + "eval_precision_B-MEDICATION": 0.9413256066642521, + "eval_precision_B-PROCEDURE": 0.8576841327049256, + "eval_precision_B-SYMPTOM": 0.8196756992055719, + "eval_precision_I-DISEASE": 0.8398561507936508, + "eval_precision_I-MEDICATION": 0.9445678620543743, + "eval_precision_I-PROCEDURE": 0.8685191753683246, + "eval_precision_I-SYMPTOM": 0.8141611818689563, + "eval_precision_O": 0.922143895574372, + "eval_precision_macro": 0.8684151518028158, + "eval_precision_micro": 0.8893727212165927, + "eval_rauc_macro": 0.9135258778318731, + "eval_rauc_micro": 0.9350417116157864, + "eval_recall_B-DISEASE": 0.8039581185055922, + "eval_recall_B-MEDICATION": 0.9250756362342054, + "eval_recall_B-PROCEDURE": 0.7940622861054073, + "eval_recall_B-SYMPTOM": 0.773305954825462, + "eval_recall_I-DISEASE": 0.8495590762553469, + "eval_recall_I-MEDICATION": 0.944517903422013, + "eval_recall_I-PROCEDURE": 0.8192203807518748, + "eval_recall_I-SYMPTOM": 0.7988770685579196, + "eval_recall_O": 0.9264421433839181, + "eval_recall_macro": 0.8483353964490822, + "eval_recall_micro": 0.8840999005950059, + "eval_roc_auc_B-DISEASE": 0.9001994739703317, + "eval_roc_auc_B-MEDICATION": 0.9624199730237505, + "eval_roc_auc_B-PROCEDURE": 0.89589622737178, + "eval_roc_auc_B-SYMPTOM": 0.8848133448211472, + "eval_roc_auc_I-DISEASE": 0.914201809349682, + "eval_roc_auc_I-MEDICATION": 0.9718740532463399, + "eval_roc_auc_I-PROCEDURE": 0.9035684876000457, + "eval_roc_auc_I-SYMPTOM": 0.8852284244551051, + "eval_roc_auc_O": 0.903531106648675, + "eval_runtime": 61.221, + "eval_samples_per_second": 177.602, + "eval_steps_per_second": 22.215, + "step": 79413 + }, + { + "epoch": 3.0076687695969175, + "grad_norm": 4.327937602996826, + "learning_rate": 1.3987004646594387e-05, + "loss": 0.0237, + "step": 79616 + }, + { + "epoch": 3.0173397302708627, + "grad_norm": 1.6373872756958008, + "learning_rate": 1.3967662725246499e-05, + "loss": 0.022, + "step": 79872 + }, + { + "epoch": 3.0270106909448073, + "grad_norm": 3.250305652618408, + "learning_rate": 1.3948320803898607e-05, + "loss": 0.0246, + "step": 80128 + }, + { + "epoch": 3.0366816516187525, + "grad_norm": 1.6971690654754639, + "learning_rate": 1.3928978882550717e-05, + "loss": 0.0236, + "step": 80384 + }, + { + "epoch": 3.0463526122926976, + "grad_norm": 5.478879451751709, + "learning_rate": 1.3909636961202826e-05, + "loss": 0.0219, + "step": 80640 + }, + { + "epoch": 3.0560235729666427, + "grad_norm": 2.4806175231933594, + "learning_rate": 1.3890295039854938e-05, + "loss": 0.0236, + "step": 80896 + }, + { + "epoch": 3.065694533640588, + "grad_norm": 1.5560436248779297, + "learning_rate": 1.3870953118507046e-05, + "loss": 0.0229, + "step": 81152 + }, + { + "epoch": 3.075365494314533, + "grad_norm": 10.51571273803711, + "learning_rate": 1.3851611197159156e-05, + "loss": 0.0246, + "step": 81408 + }, + { + "epoch": 3.085036454988478, + "grad_norm": 4.828140735626221, + "learning_rate": 1.3832269275811265e-05, + "loss": 0.0225, + "step": 81664 + }, + { + "epoch": 3.0947074156624232, + "grad_norm": 0.9167439937591553, + "learning_rate": 1.3812927354463376e-05, + "loss": 0.0241, + "step": 81920 + }, + { + "epoch": 3.104378376336368, + "grad_norm": 4.8147454261779785, + "learning_rate": 1.3793585433115487e-05, + "loss": 0.0226, + "step": 82176 + }, + { + "epoch": 3.114049337010313, + "grad_norm": 5.181445121765137, + "learning_rate": 1.3774243511767595e-05, + "loss": 0.0202, + "step": 82432 + }, + { + "epoch": 3.123720297684258, + "grad_norm": 1.4374691247940063, + "learning_rate": 1.375497714479997e-05, + "loss": 0.0221, + "step": 82688 + }, + { + "epoch": 3.1333912583582033, + "grad_norm": 3.805264949798584, + "learning_rate": 1.3735635223452082e-05, + "loss": 0.0241, + "step": 82944 + }, + { + "epoch": 3.1430622190321484, + "grad_norm": 3.3050577640533447, + "learning_rate": 1.3716368856484456e-05, + "loss": 0.0225, + "step": 83200 + }, + { + "epoch": 3.1527331797060936, + "grad_norm": 1.6958988904953003, + "learning_rate": 1.3697026935136567e-05, + "loss": 0.0233, + "step": 83456 + }, + { + "epoch": 3.1624041403800387, + "grad_norm": 2.1749958992004395, + "learning_rate": 1.3677685013788675e-05, + "loss": 0.024, + "step": 83712 + }, + { + "epoch": 3.1720751010539834, + "grad_norm": 1.0387561321258545, + "learning_rate": 1.3658343092440785e-05, + "loss": 0.0235, + "step": 83968 + }, + { + "epoch": 3.1817460617279285, + "grad_norm": 4.311033248901367, + "learning_rate": 1.3639001171092895e-05, + "loss": 0.025, + "step": 84224 + }, + { + "epoch": 3.1914170224018736, + "grad_norm": 1.0629218816757202, + "learning_rate": 1.3619659249745005e-05, + "loss": 0.0242, + "step": 84480 + }, + { + "epoch": 3.2010879830758188, + "grad_norm": 4.607676029205322, + "learning_rate": 1.3600317328397114e-05, + "loss": 0.0218, + "step": 84736 + }, + { + "epoch": 3.210758943749764, + "grad_norm": 1.2249504327774048, + "learning_rate": 1.3580975407049224e-05, + "loss": 0.0217, + "step": 84992 + }, + { + "epoch": 3.220429904423709, + "grad_norm": 3.8173789978027344, + "learning_rate": 1.3561633485701336e-05, + "loss": 0.0212, + "step": 85248 + }, + { + "epoch": 3.230100865097654, + "grad_norm": 5.286001205444336, + "learning_rate": 1.3542291564353444e-05, + "loss": 0.0234, + "step": 85504 + }, + { + "epoch": 3.2397718257715993, + "grad_norm": 0.9266921281814575, + "learning_rate": 1.3522949643005554e-05, + "loss": 0.0227, + "step": 85760 + }, + { + "epoch": 3.2494427864455444, + "grad_norm": 1.9286329746246338, + "learning_rate": 1.3503607721657663e-05, + "loss": 0.0216, + "step": 86016 + }, + { + "epoch": 3.259113747119489, + "grad_norm": 7.073780536651611, + "learning_rate": 1.3484265800309775e-05, + "loss": 0.0229, + "step": 86272 + }, + { + "epoch": 3.268784707793434, + "grad_norm": 11.505043983459473, + "learning_rate": 1.3464923878961885e-05, + "loss": 0.0216, + "step": 86528 + }, + { + "epoch": 3.2784556684673793, + "grad_norm": 2.463899850845337, + "learning_rate": 1.3445581957613993e-05, + "loss": 0.0238, + "step": 86784 + }, + { + "epoch": 3.2881266291413245, + "grad_norm": 2.2217423915863037, + "learning_rate": 1.3426240036266103e-05, + "loss": 0.0244, + "step": 87040 + }, + { + "epoch": 3.2977975898152696, + "grad_norm": 2.690162181854248, + "learning_rate": 1.3406898114918213e-05, + "loss": 0.0233, + "step": 87296 + }, + { + "epoch": 3.3074685504892147, + "grad_norm": 0.6873595118522644, + "learning_rate": 1.3387631747950589e-05, + "loss": 0.0212, + "step": 87552 + }, + { + "epoch": 3.31713951116316, + "grad_norm": 12.570816040039062, + "learning_rate": 1.3368289826602698e-05, + "loss": 0.021, + "step": 87808 + }, + { + "epoch": 3.3268104718371045, + "grad_norm": 1.3393861055374146, + "learning_rate": 1.3348947905254808e-05, + "loss": 0.0204, + "step": 88064 + }, + { + "epoch": 3.3364814325110497, + "grad_norm": 0.6096575856208801, + "learning_rate": 1.332960598390692e-05, + "loss": 0.0218, + "step": 88320 + }, + { + "epoch": 3.346152393184995, + "grad_norm": 4.549058437347412, + "learning_rate": 1.3310264062559028e-05, + "loss": 0.0232, + "step": 88576 + }, + { + "epoch": 3.35582335385894, + "grad_norm": 2.827040195465088, + "learning_rate": 1.3290922141211138e-05, + "loss": 0.023, + "step": 88832 + }, + { + "epoch": 3.365494314532885, + "grad_norm": 4.081531524658203, + "learning_rate": 1.3271580219863246e-05, + "loss": 0.0202, + "step": 89088 + }, + { + "epoch": 3.37516527520683, + "grad_norm": 16.549381256103516, + "learning_rate": 1.3252238298515358e-05, + "loss": 0.023, + "step": 89344 + }, + { + "epoch": 3.3848362358807753, + "grad_norm": 1.166902780532837, + "learning_rate": 1.3232896377167468e-05, + "loss": 0.0217, + "step": 89600 + }, + { + "epoch": 3.3945071965547204, + "grad_norm": 1.8393259048461914, + "learning_rate": 1.3213554455819577e-05, + "loss": 0.0226, + "step": 89856 + }, + { + "epoch": 3.4041781572286656, + "grad_norm": 3.180155038833618, + "learning_rate": 1.3194288088851953e-05, + "loss": 0.0216, + "step": 90112 + }, + { + "epoch": 3.4138491179026103, + "grad_norm": 0.5230717658996582, + "learning_rate": 1.3174946167504061e-05, + "loss": 0.022, + "step": 90368 + }, + { + "epoch": 3.4235200785765554, + "grad_norm": 15.037604331970215, + "learning_rate": 1.3155604246156173e-05, + "loss": 0.0234, + "step": 90624 + }, + { + "epoch": 3.4331910392505005, + "grad_norm": 11.155952453613281, + "learning_rate": 1.3136262324808283e-05, + "loss": 0.0224, + "step": 90880 + }, + { + "epoch": 3.4428619999244456, + "grad_norm": 2.1080737113952637, + "learning_rate": 1.3116920403460391e-05, + "loss": 0.023, + "step": 91136 + }, + { + "epoch": 3.4525329605983908, + "grad_norm": 0.7303668856620789, + "learning_rate": 1.3097578482112501e-05, + "loss": 0.0217, + "step": 91392 + }, + { + "epoch": 3.462203921272336, + "grad_norm": 0.6222452521324158, + "learning_rate": 1.3078236560764612e-05, + "loss": 0.0219, + "step": 91648 + }, + { + "epoch": 3.471874881946281, + "grad_norm": 0.8807787299156189, + "learning_rate": 1.3058894639416722e-05, + "loss": 0.0236, + "step": 91904 + }, + { + "epoch": 3.4815458426202257, + "grad_norm": 0.9494552612304688, + "learning_rate": 1.3039628272449096e-05, + "loss": 0.0208, + "step": 92160 + }, + { + "epoch": 3.491216803294171, + "grad_norm": 14.498435020446777, + "learning_rate": 1.3020286351101206e-05, + "loss": 0.0241, + "step": 92416 + }, + { + "epoch": 3.500887763968116, + "grad_norm": 1.3170123100280762, + "learning_rate": 1.3000944429753314e-05, + "loss": 0.0213, + "step": 92672 + }, + { + "epoch": 3.510558724642061, + "grad_norm": 1.3304574489593506, + "learning_rate": 1.2981602508405426e-05, + "loss": 0.0216, + "step": 92928 + }, + { + "epoch": 3.520229685316006, + "grad_norm": 0.5469146966934204, + "learning_rate": 1.2962260587057536e-05, + "loss": 0.0198, + "step": 93184 + }, + { + "epoch": 3.5299006459899513, + "grad_norm": 1.7149643898010254, + "learning_rate": 1.2942918665709645e-05, + "loss": 0.0197, + "step": 93440 + }, + { + "epoch": 3.5395716066638965, + "grad_norm": 2.092782974243164, + "learning_rate": 1.292365229874202e-05, + "loss": 0.0225, + "step": 93696 + }, + { + "epoch": 3.549242567337841, + "grad_norm": 1.180370807647705, + "learning_rate": 1.2904310377394132e-05, + "loss": 0.0204, + "step": 93952 + }, + { + "epoch": 3.5589135280117867, + "grad_norm": 16.842605590820312, + "learning_rate": 1.288496845604624e-05, + "loss": 0.0214, + "step": 94208 + }, + { + "epoch": 3.5685844886857314, + "grad_norm": 3.62001895904541, + "learning_rate": 1.286562653469835e-05, + "loss": 0.0241, + "step": 94464 + }, + { + "epoch": 3.5782554493596765, + "grad_norm": 2.4327309131622314, + "learning_rate": 1.284628461335046e-05, + "loss": 0.0192, + "step": 94720 + }, + { + "epoch": 3.5879264100336217, + "grad_norm": 5.820268154144287, + "learning_rate": 1.2826942692002571e-05, + "loss": 0.0223, + "step": 94976 + }, + { + "epoch": 3.597597370707567, + "grad_norm": 3.0629537105560303, + "learning_rate": 1.280760077065468e-05, + "loss": 0.02, + "step": 95232 + }, + { + "epoch": 3.607268331381512, + "grad_norm": 2.9143710136413574, + "learning_rate": 1.278825884930679e-05, + "loss": 0.0198, + "step": 95488 + }, + { + "epoch": 3.616939292055457, + "grad_norm": 1.7662220001220703, + "learning_rate": 1.2768916927958898e-05, + "loss": 0.0225, + "step": 95744 + }, + { + "epoch": 3.626610252729402, + "grad_norm": 3.5561130046844482, + "learning_rate": 1.274957500661101e-05, + "loss": 0.0228, + "step": 96000 + }, + { + "epoch": 3.636281213403347, + "grad_norm": 5.7032470703125, + "learning_rate": 1.273023308526312e-05, + "loss": 0.0208, + "step": 96256 + }, + { + "epoch": 3.645952174077292, + "grad_norm": 1.8125163316726685, + "learning_rate": 1.2710891163915228e-05, + "loss": 0.0212, + "step": 96512 + }, + { + "epoch": 3.655623134751237, + "grad_norm": 4.229706764221191, + "learning_rate": 1.2691549242567338e-05, + "loss": 0.0238, + "step": 96768 + }, + { + "epoch": 3.6652940954251823, + "grad_norm": 1.200060486793518, + "learning_rate": 1.267220732121945e-05, + "loss": 0.0189, + "step": 97024 + }, + { + "epoch": 3.6749650560991274, + "grad_norm": 3.1629979610443115, + "learning_rate": 1.2652865399871559e-05, + "loss": 0.0207, + "step": 97280 + }, + { + "epoch": 3.6846360167730725, + "grad_norm": 3.1145436763763428, + "learning_rate": 1.2633523478523669e-05, + "loss": 0.0201, + "step": 97536 + }, + { + "epoch": 3.6943069774470176, + "grad_norm": 2.600019693374634, + "learning_rate": 1.2614181557175777e-05, + "loss": 0.0208, + "step": 97792 + }, + { + "epoch": 3.7039779381209623, + "grad_norm": 3.338853120803833, + "learning_rate": 1.2594915190208153e-05, + "loss": 0.0209, + "step": 98048 + }, + { + "epoch": 3.713648898794908, + "grad_norm": 6.754782676696777, + "learning_rate": 1.2575573268860263e-05, + "loss": 0.0187, + "step": 98304 + }, + { + "epoch": 3.7233198594688526, + "grad_norm": 3.4177420139312744, + "learning_rate": 1.2556231347512373e-05, + "loss": 0.0216, + "step": 98560 + }, + { + "epoch": 3.7329908201427977, + "grad_norm": 1.6833980083465576, + "learning_rate": 1.2536889426164482e-05, + "loss": 0.0202, + "step": 98816 + }, + { + "epoch": 3.742661780816743, + "grad_norm": 2.7053074836730957, + "learning_rate": 1.2517547504816592e-05, + "loss": 0.0191, + "step": 99072 + }, + { + "epoch": 3.752332741490688, + "grad_norm": 2.020542621612549, + "learning_rate": 1.2498205583468704e-05, + "loss": 0.0195, + "step": 99328 + }, + { + "epoch": 3.762003702164633, + "grad_norm": 4.648097515106201, + "learning_rate": 1.2478863662120812e-05, + "loss": 0.0208, + "step": 99584 + }, + { + "epoch": 3.771674662838578, + "grad_norm": 1.1168490648269653, + "learning_rate": 1.2459521740772922e-05, + "loss": 0.0194, + "step": 99840 + }, + { + "epoch": 3.7813456235125233, + "grad_norm": 0.9811049103736877, + "learning_rate": 1.2440255373805296e-05, + "loss": 0.0207, + "step": 100096 + }, + { + "epoch": 3.791016584186468, + "grad_norm": 3.2866318225860596, + "learning_rate": 1.2420913452457408e-05, + "loss": 0.0208, + "step": 100352 + }, + { + "epoch": 3.800687544860413, + "grad_norm": 1.0944135189056396, + "learning_rate": 1.2401571531109518e-05, + "loss": 0.0216, + "step": 100608 + }, + { + "epoch": 3.8103585055343583, + "grad_norm": 1.5802284479141235, + "learning_rate": 1.2382229609761627e-05, + "loss": 0.0215, + "step": 100864 + }, + { + "epoch": 3.8200294662083034, + "grad_norm": 15.70626163482666, + "learning_rate": 1.2362887688413737e-05, + "loss": 0.0224, + "step": 101120 + }, + { + "epoch": 3.8297004268822485, + "grad_norm": 2.241199016571045, + "learning_rate": 1.2343545767065847e-05, + "loss": 0.0203, + "step": 101376 + }, + { + "epoch": 3.8393713875561937, + "grad_norm": 1.7189942598342896, + "learning_rate": 1.2324203845717957e-05, + "loss": 0.0202, + "step": 101632 + }, + { + "epoch": 3.849042348230139, + "grad_norm": 3.025250196456909, + "learning_rate": 1.2304861924370065e-05, + "loss": 0.0214, + "step": 101888 + }, + { + "epoch": 3.8587133089040835, + "grad_norm": 0.44517338275909424, + "learning_rate": 1.2285595557402441e-05, + "loss": 0.0213, + "step": 102144 + }, + { + "epoch": 3.868384269578029, + "grad_norm": 1.4796372652053833, + "learning_rate": 1.226625363605455e-05, + "loss": 0.0214, + "step": 102400 + }, + { + "epoch": 3.8780552302519737, + "grad_norm": 3.5950703620910645, + "learning_rate": 1.2246911714706661e-05, + "loss": 0.0195, + "step": 102656 + }, + { + "epoch": 3.887726190925919, + "grad_norm": 3.87646222114563, + "learning_rate": 1.2227569793358771e-05, + "loss": 0.0194, + "step": 102912 + }, + { + "epoch": 3.897397151599864, + "grad_norm": 6.18682336807251, + "learning_rate": 1.220822787201088e-05, + "loss": 0.0186, + "step": 103168 + }, + { + "epoch": 3.907068112273809, + "grad_norm": 4.5957207679748535, + "learning_rate": 1.218888595066299e-05, + "loss": 0.0193, + "step": 103424 + }, + { + "epoch": 3.9167390729477543, + "grad_norm": 11.035417556762695, + "learning_rate": 1.2169544029315102e-05, + "loss": 0.0193, + "step": 103680 + }, + { + "epoch": 3.9264100336216994, + "grad_norm": 1.2352112531661987, + "learning_rate": 1.215020210796721e-05, + "loss": 0.0181, + "step": 103936 + }, + { + "epoch": 3.9360809942956445, + "grad_norm": 0.506557047367096, + "learning_rate": 1.213086018661932e-05, + "loss": 0.0187, + "step": 104192 + }, + { + "epoch": 3.945751954969589, + "grad_norm": 2.8846030235290527, + "learning_rate": 1.2111593819651694e-05, + "loss": 0.0196, + "step": 104448 + }, + { + "epoch": 3.9554229156435343, + "grad_norm": 1.4602288007736206, + "learning_rate": 1.2092251898303806e-05, + "loss": 0.0192, + "step": 104704 + }, + { + "epoch": 3.9650938763174794, + "grad_norm": 13.7206392288208, + "learning_rate": 1.207298553133618e-05, + "loss": 0.0191, + "step": 104960 + }, + { + "epoch": 3.9747648369914246, + "grad_norm": 1.7238157987594604, + "learning_rate": 1.205364360998829e-05, + "loss": 0.0201, + "step": 105216 + }, + { + "epoch": 3.9844357976653697, + "grad_norm": 2.0438215732574463, + "learning_rate": 1.20343016886404e-05, + "loss": 0.0182, + "step": 105472 + }, + { + "epoch": 3.994106758339315, + "grad_norm": 1.2184364795684814, + "learning_rate": 1.2014959767292509e-05, + "loss": 0.019, + "step": 105728 + }, + { + "epoch": 4.0, + "eval_f1_B-DISEASE": 0.808675320486979, + "eval_f1_B-MEDICATION": 0.9266381766381766, + "eval_f1_B-PROCEDURE": 0.8295174204717594, + "eval_f1_B-SYMPTOM": 0.7980295566502463, + "eval_f1_I-DISEASE": 0.8489476041200179, + "eval_f1_I-MEDICATION": 0.9397869262133368, + "eval_f1_I-PROCEDURE": 0.847453216112688, + "eval_f1_I-SYMPTOM": 0.8079173624618605, + "eval_f1_O": 0.9249064875950979, + "eval_f1_macro": 0.8590968967500181, + "eval_f1_micro": 0.8881361203919136, + "eval_loss": 0.4215824604034424, + "eval_precision_B-DISEASE": 0.8221985408640052, + "eval_precision_B-MEDICATION": 0.927133440228042, + "eval_precision_B-PROCEDURE": 0.8393623543838136, + "eval_precision_B-SYMPTOM": 0.8061812467260345, + "eval_precision_I-DISEASE": 0.8662832447676619, + "eval_precision_I-MEDICATION": 0.9348440443793176, + "eval_precision_I-PROCEDURE": 0.8446231286120726, + "eval_precision_I-SYMPTOM": 0.8113340449395586, + "eval_precision_O": 0.9229153657042629, + "eval_precision_macro": 0.8638750456227521, + "eval_precision_micro": 0.88976580540172, + "eval_rauc_macro": 0.9166627680103409, + "eval_rauc_micro": 0.9362568980657558, + "eval_recall_B-DISEASE": 0.795589751725232, + "eval_recall_B-MEDICATION": 0.9261434418935753, + "eval_recall_B-PROCEDURE": 0.8199007529089665, + "eval_recall_B-SYMPTOM": 0.7900410677618069, + "eval_recall_I-DISEASE": 0.8322921762691454, + "eval_recall_I-MEDICATION": 0.9447823557412599, + "eval_recall_I-PROCEDURE": 0.8503023329824475, + "eval_recall_I-SYMPTOM": 0.8045293359123147, + "eval_recall_O": 0.9269062194588178, + "eval_recall_macro": 0.8544986038503963, + "eval_recall_micro": 0.886512394293185, + "eval_roc_auc_B-DISEASE": 0.8961942453514548, + "eval_roc_auc_B-MEDICATION": 0.9629229597024016, + "eval_roc_auc_B-PROCEDURE": 0.9085987950526558, + "eval_roc_auc_B-SYMPTOM": 0.8929666290562625, + "eval_roc_auc_I-DISEASE": 0.9077573774887775, + "eval_roc_auc_I-MEDICATION": 0.9719339273090749, + "eval_roc_auc_I-PROCEDURE": 0.9175308611542671, + "eval_roc_auc_I-SYMPTOM": 0.8876856873124548, + "eval_roc_auc_O": 0.9043744296657181, + "eval_runtime": 61.4869, + "eval_samples_per_second": 176.835, + "eval_steps_per_second": 22.119, + "step": 105884 + }, + { + "epoch": 4.00377771901326, + "grad_norm": 0.7948421835899353, + "learning_rate": 1.199561784594462e-05, + "loss": 0.0178, + "step": 105984 + }, + { + "epoch": 4.013448679687205, + "grad_norm": 0.8098104596138, + "learning_rate": 1.1976275924596729e-05, + "loss": 0.0143, + "step": 106240 + }, + { + "epoch": 4.02311964036115, + "grad_norm": 3.08135724067688, + "learning_rate": 1.195693400324884e-05, + "loss": 0.0134, + "step": 106496 + }, + { + "epoch": 4.032790601035095, + "grad_norm": 1.1449787616729736, + "learning_rate": 1.1937592081900948e-05, + "loss": 0.0133, + "step": 106752 + }, + { + "epoch": 4.0424615617090405, + "grad_norm": 2.6626508235931396, + "learning_rate": 1.1918325714933323e-05, + "loss": 0.0136, + "step": 107008 + }, + { + "epoch": 4.052132522382985, + "grad_norm": 1.2061920166015625, + "learning_rate": 1.1898983793585435e-05, + "loss": 0.0145, + "step": 107264 + }, + { + "epoch": 4.06180348305693, + "grad_norm": 0.4288395941257477, + "learning_rate": 1.1879641872237544e-05, + "loss": 0.0127, + "step": 107520 + }, + { + "epoch": 4.071474443730875, + "grad_norm": 3.4469873905181885, + "learning_rate": 1.1860299950889654e-05, + "loss": 0.0143, + "step": 107776 + }, + { + "epoch": 4.08114540440482, + "grad_norm": 3.4651808738708496, + "learning_rate": 1.1840958029541762e-05, + "loss": 0.0137, + "step": 108032 + }, + { + "epoch": 4.090816365078766, + "grad_norm": 4.205618381500244, + "learning_rate": 1.1821616108193874e-05, + "loss": 0.0136, + "step": 108288 + }, + { + "epoch": 4.10048732575271, + "grad_norm": 0.8337300419807434, + "learning_rate": 1.1802274186845984e-05, + "loss": 0.0126, + "step": 108544 + }, + { + "epoch": 4.110158286426656, + "grad_norm": 5.309538841247559, + "learning_rate": 1.1782932265498093e-05, + "loss": 0.0137, + "step": 108800 + }, + { + "epoch": 4.119829247100601, + "grad_norm": 0.6177698969841003, + "learning_rate": 1.1763590344150204e-05, + "loss": 0.0147, + "step": 109056 + }, + { + "epoch": 4.129500207774546, + "grad_norm": 2.2366254329681396, + "learning_rate": 1.1744248422802313e-05, + "loss": 0.0144, + "step": 109312 + }, + { + "epoch": 4.139171168448491, + "grad_norm": 1.5923917293548584, + "learning_rate": 1.1724906501454423e-05, + "loss": 0.0124, + "step": 109568 + }, + { + "epoch": 4.1488421291224356, + "grad_norm": 0.6197337508201599, + "learning_rate": 1.1705564580106531e-05, + "loss": 0.0128, + "step": 109824 + }, + { + "epoch": 4.158513089796381, + "grad_norm": 1.5513421297073364, + "learning_rate": 1.1686222658758643e-05, + "loss": 0.0133, + "step": 110080 + }, + { + "epoch": 4.168184050470326, + "grad_norm": 0.4733668863773346, + "learning_rate": 1.1666880737410753e-05, + "loss": 0.013, + "step": 110336 + }, + { + "epoch": 4.177855011144271, + "grad_norm": 0.9195311069488525, + "learning_rate": 1.1647538816062862e-05, + "loss": 0.0147, + "step": 110592 + }, + { + "epoch": 4.187525971818216, + "grad_norm": 0.4619844853878021, + "learning_rate": 1.1628272449095237e-05, + "loss": 0.0149, + "step": 110848 + }, + { + "epoch": 4.197196932492162, + "grad_norm": 0.4427216351032257, + "learning_rate": 1.1608930527747346e-05, + "loss": 0.0146, + "step": 111104 + }, + { + "epoch": 4.206867893166106, + "grad_norm": 2.1087565422058105, + "learning_rate": 1.1589588606399458e-05, + "loss": 0.0141, + "step": 111360 + }, + { + "epoch": 4.216538853840051, + "grad_norm": 1.2194585800170898, + "learning_rate": 1.1570246685051568e-05, + "loss": 0.0164, + "step": 111616 + }, + { + "epoch": 4.226209814513997, + "grad_norm": 1.4488071203231812, + "learning_rate": 1.1550904763703676e-05, + "loss": 0.0158, + "step": 111872 + }, + { + "epoch": 4.235880775187941, + "grad_norm": 5.222316265106201, + "learning_rate": 1.1531562842355786e-05, + "loss": 0.0144, + "step": 112128 + }, + { + "epoch": 4.245551735861887, + "grad_norm": 0.9832548499107361, + "learning_rate": 1.1512220921007897e-05, + "loss": 0.0148, + "step": 112384 + }, + { + "epoch": 4.2552226965358315, + "grad_norm": 3.2491071224212646, + "learning_rate": 1.1492878999660007e-05, + "loss": 0.0141, + "step": 112640 + }, + { + "epoch": 4.264893657209777, + "grad_norm": 0.40463200211524963, + "learning_rate": 1.1473537078312115e-05, + "loss": 0.0153, + "step": 112896 + }, + { + "epoch": 4.274564617883722, + "grad_norm": 0.7759467363357544, + "learning_rate": 1.1454195156964225e-05, + "loss": 0.0137, + "step": 113152 + }, + { + "epoch": 4.2842355785576665, + "grad_norm": 5.856504917144775, + "learning_rate": 1.1434853235616337e-05, + "loss": 0.0154, + "step": 113408 + }, + { + "epoch": 4.293906539231612, + "grad_norm": 1.3605588674545288, + "learning_rate": 1.1415586868648711e-05, + "loss": 0.0154, + "step": 113664 + }, + { + "epoch": 4.303577499905557, + "grad_norm": 0.4841889441013336, + "learning_rate": 1.1396244947300821e-05, + "loss": 0.0143, + "step": 113920 + }, + { + "epoch": 4.313248460579502, + "grad_norm": 2.290330171585083, + "learning_rate": 1.137690302595293e-05, + "loss": 0.0124, + "step": 114176 + }, + { + "epoch": 4.322919421253447, + "grad_norm": 0.6299089193344116, + "learning_rate": 1.135756110460504e-05, + "loss": 0.015, + "step": 114432 + }, + { + "epoch": 4.3325903819273925, + "grad_norm": 2.6966304779052734, + "learning_rate": 1.1338219183257152e-05, + "loss": 0.0147, + "step": 114688 + }, + { + "epoch": 4.342261342601337, + "grad_norm": 3.5934536457061768, + "learning_rate": 1.131887726190926e-05, + "loss": 0.0135, + "step": 114944 + }, + { + "epoch": 4.351932303275283, + "grad_norm": 2.391207218170166, + "learning_rate": 1.129953534056137e-05, + "loss": 0.0127, + "step": 115200 + }, + { + "epoch": 4.3616032639492275, + "grad_norm": 7.104966640472412, + "learning_rate": 1.1280193419213479e-05, + "loss": 0.0127, + "step": 115456 + }, + { + "epoch": 4.371274224623172, + "grad_norm": 4.91796875, + "learning_rate": 1.126085149786559e-05, + "loss": 0.0136, + "step": 115712 + }, + { + "epoch": 4.380945185297118, + "grad_norm": 0.9888034462928772, + "learning_rate": 1.1241509576517699e-05, + "loss": 0.0143, + "step": 115968 + }, + { + "epoch": 4.390616145971062, + "grad_norm": 5.1639018058776855, + "learning_rate": 1.1222167655169809e-05, + "loss": 0.0127, + "step": 116224 + }, + { + "epoch": 4.400287106645008, + "grad_norm": 8.415788650512695, + "learning_rate": 1.1202825733821917e-05, + "loss": 0.0145, + "step": 116480 + }, + { + "epoch": 4.409958067318953, + "grad_norm": 4.235672950744629, + "learning_rate": 1.118348381247403e-05, + "loss": 0.0154, + "step": 116736 + }, + { + "epoch": 4.419629027992898, + "grad_norm": 1.2345690727233887, + "learning_rate": 1.116414189112614e-05, + "loss": 0.0143, + "step": 116992 + }, + { + "epoch": 4.429299988666843, + "grad_norm": 8.792167663574219, + "learning_rate": 1.1144799969778248e-05, + "loss": 0.0162, + "step": 117248 + }, + { + "epoch": 4.438970949340788, + "grad_norm": 2.2384276390075684, + "learning_rate": 1.1125533602810623e-05, + "loss": 0.0141, + "step": 117504 + }, + { + "epoch": 4.448641910014733, + "grad_norm": 1.041791558265686, + "learning_rate": 1.1106191681462735e-05, + "loss": 0.0136, + "step": 117760 + }, + { + "epoch": 4.458312870688678, + "grad_norm": 4.7409515380859375, + "learning_rate": 1.1086849760114844e-05, + "loss": 0.0136, + "step": 118016 + }, + { + "epoch": 4.4679838313626234, + "grad_norm": 0.2378958761692047, + "learning_rate": 1.1067507838766954e-05, + "loss": 0.0147, + "step": 118272 + }, + { + "epoch": 4.477654792036568, + "grad_norm": 0.31944742798805237, + "learning_rate": 1.1048165917419062e-05, + "loss": 0.0135, + "step": 118528 + }, + { + "epoch": 4.487325752710514, + "grad_norm": 1.0051418542861938, + "learning_rate": 1.1028899550451438e-05, + "loss": 0.0137, + "step": 118784 + }, + { + "epoch": 4.496996713384458, + "grad_norm": 1.6819483041763306, + "learning_rate": 1.1009557629103548e-05, + "loss": 0.0156, + "step": 119040 + }, + { + "epoch": 4.506667674058404, + "grad_norm": 0.40426477789878845, + "learning_rate": 1.0990215707755658e-05, + "loss": 0.0146, + "step": 119296 + }, + { + "epoch": 4.516338634732349, + "grad_norm": 3.140392303466797, + "learning_rate": 1.0970873786407768e-05, + "loss": 0.0129, + "step": 119552 + }, + { + "epoch": 4.526009595406293, + "grad_norm": 4.579553604125977, + "learning_rate": 1.0951531865059877e-05, + "loss": 0.0124, + "step": 119808 + }, + { + "epoch": 4.535680556080239, + "grad_norm": 5.144607067108154, + "learning_rate": 1.0932189943711989e-05, + "loss": 0.0153, + "step": 120064 + }, + { + "epoch": 4.545351516754184, + "grad_norm": 0.48044607043266296, + "learning_rate": 1.0912848022364097e-05, + "loss": 0.013, + "step": 120320 + }, + { + "epoch": 4.555022477428129, + "grad_norm": 42.02862548828125, + "learning_rate": 1.0893506101016207e-05, + "loss": 0.0137, + "step": 120576 + }, + { + "epoch": 4.564693438102074, + "grad_norm": 6.357956886291504, + "learning_rate": 1.0874239734048581e-05, + "loss": 0.0154, + "step": 120832 + }, + { + "epoch": 4.574364398776019, + "grad_norm": 1.7824290990829468, + "learning_rate": 1.0854897812700693e-05, + "loss": 0.0127, + "step": 121088 + }, + { + "epoch": 4.584035359449964, + "grad_norm": 1.5723809003829956, + "learning_rate": 1.0835555891352803e-05, + "loss": 0.0129, + "step": 121344 + }, + { + "epoch": 4.593706320123909, + "grad_norm": 0.8262581825256348, + "learning_rate": 1.0816213970004911e-05, + "loss": 0.0151, + "step": 121600 + }, + { + "epoch": 4.603377280797854, + "grad_norm": 0.8032371401786804, + "learning_rate": 1.0796872048657022e-05, + "loss": 0.0158, + "step": 121856 + }, + { + "epoch": 4.613048241471799, + "grad_norm": 0.9602940082550049, + "learning_rate": 1.0777530127309133e-05, + "loss": 0.0142, + "step": 122112 + }, + { + "epoch": 4.622719202145745, + "grad_norm": 1.9872223138809204, + "learning_rate": 1.0758188205961242e-05, + "loss": 0.0134, + "step": 122368 + }, + { + "epoch": 4.632390162819689, + "grad_norm": 5.095240116119385, + "learning_rate": 1.0738846284613352e-05, + "loss": 0.014, + "step": 122624 + }, + { + "epoch": 4.642061123493635, + "grad_norm": 5.41306209564209, + "learning_rate": 1.071950436326546e-05, + "loss": 0.0124, + "step": 122880 + }, + { + "epoch": 4.6517320841675796, + "grad_norm": 10.439872741699219, + "learning_rate": 1.0700162441917572e-05, + "loss": 0.0148, + "step": 123136 + }, + { + "epoch": 4.661403044841524, + "grad_norm": 0.26815417408943176, + "learning_rate": 1.068082052056968e-05, + "loss": 0.0132, + "step": 123392 + }, + { + "epoch": 4.67107400551547, + "grad_norm": 0.6603275537490845, + "learning_rate": 1.0661554153602056e-05, + "loss": 0.0116, + "step": 123648 + }, + { + "epoch": 4.6807449661894145, + "grad_norm": 7.041431903839111, + "learning_rate": 1.0642212232254165e-05, + "loss": 0.0129, + "step": 123904 + }, + { + "epoch": 4.69041592686336, + "grad_norm": 0.7987198233604431, + "learning_rate": 1.0622870310906275e-05, + "loss": 0.0129, + "step": 124160 + }, + { + "epoch": 4.700086887537305, + "grad_norm": 0.14286652207374573, + "learning_rate": 1.0603528389558387e-05, + "loss": 0.0124, + "step": 124416 + }, + { + "epoch": 4.70975784821125, + "grad_norm": 0.9553490877151489, + "learning_rate": 1.0584186468210495e-05, + "loss": 0.0124, + "step": 124672 + }, + { + "epoch": 4.719428808885195, + "grad_norm": 0.6493328809738159, + "learning_rate": 1.0564844546862605e-05, + "loss": 0.012, + "step": 124928 + }, + { + "epoch": 4.729099769559141, + "grad_norm": 0.2133503556251526, + "learning_rate": 1.0545502625514714e-05, + "loss": 0.014, + "step": 125184 + }, + { + "epoch": 4.738770730233085, + "grad_norm": 2.6697592735290527, + "learning_rate": 1.0526160704166826e-05, + "loss": 0.013, + "step": 125440 + }, + { + "epoch": 4.748441690907031, + "grad_norm": 0.8658607602119446, + "learning_rate": 1.0506894337199201e-05, + "loss": 0.0131, + "step": 125696 + }, + { + "epoch": 4.7581126515809755, + "grad_norm": 1.9833451509475708, + "learning_rate": 1.048755241585131e-05, + "loss": 0.013, + "step": 125952 + }, + { + "epoch": 4.76778361225492, + "grad_norm": 0.19457457959651947, + "learning_rate": 1.046821049450342e-05, + "loss": 0.0142, + "step": 126208 + }, + { + "epoch": 4.777454572928866, + "grad_norm": 0.9841840863227844, + "learning_rate": 1.044886857315553e-05, + "loss": 0.0123, + "step": 126464 + }, + { + "epoch": 4.7871255336028105, + "grad_norm": 1.9175962209701538, + "learning_rate": 1.042952665180764e-05, + "loss": 0.0136, + "step": 126720 + }, + { + "epoch": 4.796796494276756, + "grad_norm": 1.7247178554534912, + "learning_rate": 1.0410184730459748e-05, + "loss": 0.0136, + "step": 126976 + }, + { + "epoch": 4.806467454950701, + "grad_norm": 2.0652236938476562, + "learning_rate": 1.0390842809111859e-05, + "loss": 0.0112, + "step": 127232 + }, + { + "epoch": 4.816138415624646, + "grad_norm": 1.1892759799957275, + "learning_rate": 1.037150088776397e-05, + "loss": 0.0125, + "step": 127488 + }, + { + "epoch": 4.825809376298591, + "grad_norm": 29.004505157470703, + "learning_rate": 1.0352158966416079e-05, + "loss": 0.0112, + "step": 127744 + }, + { + "epoch": 4.835480336972536, + "grad_norm": 0.5393902659416199, + "learning_rate": 1.0332817045068189e-05, + "loss": 0.0124, + "step": 128000 + }, + { + "epoch": 4.845151297646481, + "grad_norm": 0.9426546096801758, + "learning_rate": 1.0313475123720297e-05, + "loss": 0.011, + "step": 128256 + }, + { + "epoch": 4.854822258320426, + "grad_norm": 1.1209189891815186, + "learning_rate": 1.029413320237241e-05, + "loss": 0.0117, + "step": 128512 + }, + { + "epoch": 4.8644932189943715, + "grad_norm": 0.8620722889900208, + "learning_rate": 1.027479128102452e-05, + "loss": 0.0121, + "step": 128768 + }, + { + "epoch": 4.874164179668316, + "grad_norm": 0.14039096236228943, + "learning_rate": 1.0255524914056893e-05, + "loss": 0.0145, + "step": 129024 + }, + { + "epoch": 4.883835140342262, + "grad_norm": 0.5701342821121216, + "learning_rate": 1.0236182992709003e-05, + "loss": 0.0114, + "step": 129280 + }, + { + "epoch": 4.893506101016206, + "grad_norm": 0.802097499370575, + "learning_rate": 1.0216841071361112e-05, + "loss": 0.0124, + "step": 129536 + }, + { + "epoch": 4.903177061690151, + "grad_norm": 0.49824589490890503, + "learning_rate": 1.0197499150013224e-05, + "loss": 0.0133, + "step": 129792 + }, + { + "epoch": 4.912848022364097, + "grad_norm": 1.2195169925689697, + "learning_rate": 1.0178157228665332e-05, + "loss": 0.0134, + "step": 130048 + }, + { + "epoch": 4.922518983038041, + "grad_norm": 23.420486450195312, + "learning_rate": 1.0158815307317442e-05, + "loss": 0.0125, + "step": 130304 + }, + { + "epoch": 4.932189943711987, + "grad_norm": 0.8538005948066711, + "learning_rate": 1.013947338596955e-05, + "loss": 0.0124, + "step": 130560 + }, + { + "epoch": 4.941860904385932, + "grad_norm": 3.506213903427124, + "learning_rate": 1.0120131464621663e-05, + "loss": 0.0125, + "step": 130816 + }, + { + "epoch": 4.951531865059877, + "grad_norm": 0.5479403734207153, + "learning_rate": 1.0100789543273773e-05, + "loss": 0.0137, + "step": 131072 + }, + { + "epoch": 4.961202825733822, + "grad_norm": 0.24773092567920685, + "learning_rate": 1.0081447621925881e-05, + "loss": 0.011, + "step": 131328 + }, + { + "epoch": 4.970873786407767, + "grad_norm": 0.3392595946788788, + "learning_rate": 1.0062105700577991e-05, + "loss": 0.0113, + "step": 131584 + }, + { + "epoch": 4.980544747081712, + "grad_norm": 9.13214111328125, + "learning_rate": 1.0042763779230103e-05, + "loss": 0.0103, + "step": 131840 + }, + { + "epoch": 4.990215707755657, + "grad_norm": 0.6798635721206665, + "learning_rate": 1.0023421857882212e-05, + "loss": 0.0112, + "step": 132096 + }, + { + "epoch": 4.999886668429602, + "grad_norm": 3.8642024993896484, + "learning_rate": 1.0004155490914587e-05, + "loss": 0.0132, + "step": 132352 + }, + { + "epoch": 5.0, + "eval_f1_B-DISEASE": 0.8076930929582066, + "eval_f1_B-MEDICATION": 0.9320128479657388, + "eval_f1_B-PROCEDURE": 0.8299265901797881, + "eval_f1_B-SYMPTOM": 0.802206734344465, + "eval_f1_I-DISEASE": 0.8443607103813086, + "eval_f1_I-MEDICATION": 0.9473012496676416, + "eval_f1_I-PROCEDURE": 0.8478063590704366, + "eval_f1_I-SYMPTOM": 0.8098297732572185, + "eval_f1_O": 0.9250181548686375, + "eval_f1_macro": 0.8606839458548268, + "eval_f1_micro": 0.8883442580707792, + "eval_loss": 0.4413922429084778, + "eval_precision_B-DISEASE": 0.83231643172733, + "eval_precision_B-MEDICATION": 0.9345142243692969, + "eval_precision_B-PROCEDURE": 0.8534490552391285, + "eval_precision_B-SYMPTOM": 0.8108590008041114, + "eval_precision_I-DISEASE": 0.8482866891605315, + "eval_precision_I-MEDICATION": 0.952467518579907, + "eval_precision_I-PROCEDURE": 0.8695185973099094, + "eval_precision_I-SYMPTOM": 0.8183803238035672, + "eval_precision_O": 0.9206075463451251, + "eval_precision_macro": 0.8711554874821008, + "eval_precision_micro": 0.8905099095411801, + "eval_rauc_macro": 0.9146732071749695, + "eval_rauc_micro": 0.9361508470284178, + "eval_recall_B-DISEASE": 0.784484810026176, + "eval_recall_B-MEDICATION": 0.9295248264815803, + "eval_recall_B-PROCEDURE": 0.8076659822039699, + "eval_recall_B-SYMPTOM": 0.7937371663244354, + "eval_recall_I-DISEASE": 0.8404709040504773, + "eval_recall_I-MEDICATION": 0.9421907230126408, + "eval_recall_I-PROCEDURE": 0.8271520314654786, + "eval_recall_I-SYMPTOM": 0.8014560498603052, + "eval_recall_O": 0.9294712290821007, + "eval_recall_macro": 0.8506837469452404, + "eval_recall_micro": 0.8861891144410676, + "eval_roc_auc_B-DISEASE": 0.8907720240571226, + "eval_roc_auc_B-MEDICATION": 0.964629291931633, + "eval_roc_auc_B-PROCEDURE": 0.9026383818040776, + "eval_roc_auc_B-SYMPTOM": 0.8948664885493695, + "eval_roc_auc_I-DISEASE": 0.9104202960745905, + "eval_roc_auc_I-MEDICATION": 0.9707688588965032, + "eval_roc_auc_I-PROCEDURE": 0.9075291450310622, + "eval_roc_auc_I-SYMPTOM": 0.8868675259287415, + "eval_roc_auc_O": 0.9035668523016245, + "eval_runtime": 61.1266, + "eval_samples_per_second": 177.877, + "eval_steps_per_second": 22.249, + "step": 132355 + }, + { + "epoch": 5.009557629103547, + "grad_norm": 0.4311939775943756, + "learning_rate": 9.984813569566696e-06, + "loss": 0.0071, + "step": 132608 + }, + { + "epoch": 5.019228589777493, + "grad_norm": 1.0362738370895386, + "learning_rate": 9.965471648218806e-06, + "loss": 0.0086, + "step": 132864 + }, + { + "epoch": 5.028899550451437, + "grad_norm": 1.9174968004226685, + "learning_rate": 9.946129726870916e-06, + "loss": 0.0069, + "step": 133120 + }, + { + "epoch": 5.038570511125383, + "grad_norm": 0.2945270240306854, + "learning_rate": 9.926787805523026e-06, + "loss": 0.0086, + "step": 133376 + }, + { + "epoch": 5.048241471799328, + "grad_norm": 0.024452047422528267, + "learning_rate": 9.907445884175136e-06, + "loss": 0.0068, + "step": 133632 + }, + { + "epoch": 5.057912432473272, + "grad_norm": 0.9929279088973999, + "learning_rate": 9.888179517207512e-06, + "loss": 0.0077, + "step": 133888 + }, + { + "epoch": 5.067583393147218, + "grad_norm": 3.7908236980438232, + "learning_rate": 9.86883759585962e-06, + "loss": 0.0088, + "step": 134144 + }, + { + "epoch": 5.0772543538211625, + "grad_norm": 1.9607737064361572, + "learning_rate": 9.84949567451173e-06, + "loss": 0.0077, + "step": 134400 + }, + { + "epoch": 5.086925314495108, + "grad_norm": 4.287957668304443, + "learning_rate": 9.83015375316384e-06, + "loss": 0.0091, + "step": 134656 + }, + { + "epoch": 5.096596275169053, + "grad_norm": 0.9552005529403687, + "learning_rate": 9.81081183181595e-06, + "loss": 0.0081, + "step": 134912 + }, + { + "epoch": 5.106267235842998, + "grad_norm": 3.186450481414795, + "learning_rate": 9.79146991046806e-06, + "loss": 0.0074, + "step": 135168 + }, + { + "epoch": 5.115938196516943, + "grad_norm": 0.3131207525730133, + "learning_rate": 9.772127989120171e-06, + "loss": 0.0076, + "step": 135424 + }, + { + "epoch": 5.125609157190888, + "grad_norm": 0.2811489701271057, + "learning_rate": 9.75278606777228e-06, + "loss": 0.009, + "step": 135680 + }, + { + "epoch": 5.135280117864833, + "grad_norm": 0.3740166127681732, + "learning_rate": 9.73344414642439e-06, + "loss": 0.0078, + "step": 135936 + }, + { + "epoch": 5.144951078538778, + "grad_norm": 1.3230619430541992, + "learning_rate": 9.7141022250765e-06, + "loss": 0.008, + "step": 136192 + }, + { + "epoch": 5.1546220392127235, + "grad_norm": 0.6120270490646362, + "learning_rate": 9.694835858108874e-06, + "loss": 0.0073, + "step": 136448 + }, + { + "epoch": 5.164292999886668, + "grad_norm": 4.456196308135986, + "learning_rate": 9.675493936760985e-06, + "loss": 0.01, + "step": 136704 + }, + { + "epoch": 5.173963960560614, + "grad_norm": 0.2429385632276535, + "learning_rate": 9.656152015413094e-06, + "loss": 0.0088, + "step": 136960 + }, + { + "epoch": 5.1836349212345585, + "grad_norm": 1.2598743438720703, + "learning_rate": 9.636810094065204e-06, + "loss": 0.0103, + "step": 137216 + }, + { + "epoch": 5.193305881908504, + "grad_norm": 1.915220022201538, + "learning_rate": 9.617468172717314e-06, + "loss": 0.0098, + "step": 137472 + }, + { + "epoch": 5.202976842582449, + "grad_norm": 0.8882638812065125, + "learning_rate": 9.598126251369424e-06, + "loss": 0.0094, + "step": 137728 + }, + { + "epoch": 5.212647803256393, + "grad_norm": 0.10290802270174026, + "learning_rate": 9.578784330021533e-06, + "loss": 0.0085, + "step": 137984 + }, + { + "epoch": 5.222318763930339, + "grad_norm": 3.6905243396759033, + "learning_rate": 9.559442408673644e-06, + "loss": 0.0105, + "step": 138240 + }, + { + "epoch": 5.231989724604284, + "grad_norm": 0.27331113815307617, + "learning_rate": 9.540100487325753e-06, + "loss": 0.0096, + "step": 138496 + }, + { + "epoch": 5.241660685278229, + "grad_norm": 0.3453868627548218, + "learning_rate": 9.520758565977863e-06, + "loss": 0.0093, + "step": 138752 + }, + { + "epoch": 5.251331645952174, + "grad_norm": 6.686705589294434, + "learning_rate": 9.501416644629973e-06, + "loss": 0.0103, + "step": 139008 + }, + { + "epoch": 5.2610026066261195, + "grad_norm": 6.24531364440918, + "learning_rate": 9.482074723282083e-06, + "loss": 0.0089, + "step": 139264 + }, + { + "epoch": 5.270673567300064, + "grad_norm": 5.73491907119751, + "learning_rate": 9.462732801934192e-06, + "loss": 0.0099, + "step": 139520 + }, + { + "epoch": 5.280344527974009, + "grad_norm": 3.492011308670044, + "learning_rate": 9.443390880586304e-06, + "loss": 0.0078, + "step": 139776 + }, + { + "epoch": 5.2900154886479545, + "grad_norm": 0.5170190930366516, + "learning_rate": 9.424048959238412e-06, + "loss": 0.0078, + "step": 140032 + }, + { + "epoch": 5.299686449321899, + "grad_norm": 0.07176166027784348, + "learning_rate": 9.404707037890522e-06, + "loss": 0.008, + "step": 140288 + }, + { + "epoch": 5.309357409995845, + "grad_norm": 1.4914041757583618, + "learning_rate": 9.385440670922898e-06, + "loss": 0.009, + "step": 140544 + }, + { + "epoch": 5.319028370669789, + "grad_norm": 2.964844226837158, + "learning_rate": 9.366098749575008e-06, + "loss": 0.0085, + "step": 140800 + }, + { + "epoch": 5.328699331343735, + "grad_norm": 0.2556954324245453, + "learning_rate": 9.346756828227116e-06, + "loss": 0.0091, + "step": 141056 + }, + { + "epoch": 5.33837029201768, + "grad_norm": 3.6987621784210205, + "learning_rate": 9.327414906879228e-06, + "loss": 0.0079, + "step": 141312 + }, + { + "epoch": 5.348041252691624, + "grad_norm": 4.68747615814209, + "learning_rate": 9.308072985531337e-06, + "loss": 0.0087, + "step": 141568 + }, + { + "epoch": 5.35771221336557, + "grad_norm": 0.4118718206882477, + "learning_rate": 9.288731064183447e-06, + "loss": 0.0096, + "step": 141824 + }, + { + "epoch": 5.367383174039515, + "grad_norm": 0.3415529429912567, + "learning_rate": 9.269389142835557e-06, + "loss": 0.0088, + "step": 142080 + }, + { + "epoch": 5.37705413471346, + "grad_norm": 0.10233204811811447, + "learning_rate": 9.250047221487667e-06, + "loss": 0.0082, + "step": 142336 + }, + { + "epoch": 5.386725095387405, + "grad_norm": 0.2785378396511078, + "learning_rate": 9.230705300139775e-06, + "loss": 0.0107, + "step": 142592 + }, + { + "epoch": 5.39639605606135, + "grad_norm": 1.3955039978027344, + "learning_rate": 9.211438933172151e-06, + "loss": 0.0081, + "step": 142848 + }, + { + "epoch": 5.406067016735295, + "grad_norm": 6.009440898895264, + "learning_rate": 9.192097011824261e-06, + "loss": 0.0078, + "step": 143104 + }, + { + "epoch": 5.415737977409241, + "grad_norm": 0.10272625833749771, + "learning_rate": 9.172755090476371e-06, + "loss": 0.0097, + "step": 143360 + }, + { + "epoch": 5.425408938083185, + "grad_norm": 5.104133605957031, + "learning_rate": 9.153413169128481e-06, + "loss": 0.0079, + "step": 143616 + }, + { + "epoch": 5.43507989875713, + "grad_norm": 0.218174010515213, + "learning_rate": 9.13407124778059e-06, + "loss": 0.0088, + "step": 143872 + }, + { + "epoch": 5.444750859431076, + "grad_norm": 4.509812355041504, + "learning_rate": 9.1147293264327e-06, + "loss": 0.0088, + "step": 144128 + }, + { + "epoch": 5.45442182010502, + "grad_norm": 0.9697214961051941, + "learning_rate": 9.09538740508481e-06, + "loss": 0.0088, + "step": 144384 + }, + { + "epoch": 5.464092780778966, + "grad_norm": 1.000596046447754, + "learning_rate": 9.07604548373692e-06, + "loss": 0.0089, + "step": 144640 + }, + { + "epoch": 5.473763741452911, + "grad_norm": 0.22552327811717987, + "learning_rate": 9.05670356238903e-06, + "loss": 0.009, + "step": 144896 + }, + { + "epoch": 5.483434702126856, + "grad_norm": 0.7269870042800903, + "learning_rate": 9.03736164104114e-06, + "loss": 0.0097, + "step": 145152 + }, + { + "epoch": 5.493105662800801, + "grad_norm": 0.3465439975261688, + "learning_rate": 9.018095274073515e-06, + "loss": 0.0102, + "step": 145408 + }, + { + "epoch": 5.502776623474746, + "grad_norm": 0.6706210970878601, + "learning_rate": 8.998753352725625e-06, + "loss": 0.009, + "step": 145664 + }, + { + "epoch": 5.512447584148691, + "grad_norm": 36.635414123535156, + "learning_rate": 8.979411431377735e-06, + "loss": 0.0084, + "step": 145920 + }, + { + "epoch": 5.522118544822636, + "grad_norm": 1.6504905223846436, + "learning_rate": 8.960069510029845e-06, + "loss": 0.008, + "step": 146176 + }, + { + "epoch": 5.531789505496581, + "grad_norm": 0.5836831331253052, + "learning_rate": 8.940727588681955e-06, + "loss": 0.0087, + "step": 146432 + }, + { + "epoch": 5.541460466170526, + "grad_norm": 0.684615433216095, + "learning_rate": 8.921385667334065e-06, + "loss": 0.0076, + "step": 146688 + }, + { + "epoch": 5.551131426844472, + "grad_norm": 2.892833709716797, + "learning_rate": 8.902043745986174e-06, + "loss": 0.0085, + "step": 146944 + }, + { + "epoch": 5.560802387518416, + "grad_norm": 20.16980743408203, + "learning_rate": 8.882701824638284e-06, + "loss": 0.0084, + "step": 147200 + }, + { + "epoch": 5.570473348192362, + "grad_norm": 3.590690851211548, + "learning_rate": 8.86343545767066e-06, + "loss": 0.008, + "step": 147456 + }, + { + "epoch": 5.5801443088663065, + "grad_norm": 0.8185898661613464, + "learning_rate": 8.844093536322768e-06, + "loss": 0.0074, + "step": 147712 + }, + { + "epoch": 5.589815269540251, + "grad_norm": 0.3533550202846527, + "learning_rate": 8.82475161497488e-06, + "loss": 0.0079, + "step": 147968 + }, + { + "epoch": 5.599486230214197, + "grad_norm": 1.9578065872192383, + "learning_rate": 8.805409693626988e-06, + "loss": 0.0077, + "step": 148224 + }, + { + "epoch": 5.6091571908881415, + "grad_norm": 0.17547504603862762, + "learning_rate": 8.786067772279098e-06, + "loss": 0.0086, + "step": 148480 + }, + { + "epoch": 5.618828151562087, + "grad_norm": 1.6071051359176636, + "learning_rate": 8.766725850931208e-06, + "loss": 0.0085, + "step": 148736 + }, + { + "epoch": 5.628499112236032, + "grad_norm": 0.8156256079673767, + "learning_rate": 8.747383929583318e-06, + "loss": 0.007, + "step": 148992 + }, + { + "epoch": 5.638170072909977, + "grad_norm": 11.689854621887207, + "learning_rate": 8.728042008235429e-06, + "loss": 0.0096, + "step": 149248 + }, + { + "epoch": 5.647841033583922, + "grad_norm": 0.9380402565002441, + "learning_rate": 8.708700086887539e-06, + "loss": 0.0082, + "step": 149504 + }, + { + "epoch": 5.657511994257867, + "grad_norm": 0.5615717172622681, + "learning_rate": 8.689433719919913e-06, + "loss": 0.0079, + "step": 149760 + }, + { + "epoch": 5.667182954931812, + "grad_norm": 1.7645057439804077, + "learning_rate": 8.670091798572023e-06, + "loss": 0.0103, + "step": 150016 + }, + { + "epoch": 5.676853915605757, + "grad_norm": 4.009266376495361, + "learning_rate": 8.650749877224133e-06, + "loss": 0.0089, + "step": 150272 + }, + { + "epoch": 5.6865248762797025, + "grad_norm": 0.6033828854560852, + "learning_rate": 8.631407955876243e-06, + "loss": 0.0074, + "step": 150528 + }, + { + "epoch": 5.696195836953647, + "grad_norm": 5.695953369140625, + "learning_rate": 8.612066034528353e-06, + "loss": 0.0087, + "step": 150784 + }, + { + "epoch": 5.705866797627593, + "grad_norm": 2.7361793518066406, + "learning_rate": 8.592724113180463e-06, + "loss": 0.0083, + "step": 151040 + }, + { + "epoch": 5.715537758301537, + "grad_norm": 0.26094111800193787, + "learning_rate": 8.573382191832572e-06, + "loss": 0.0083, + "step": 151296 + }, + { + "epoch": 5.725208718975482, + "grad_norm": 0.7956266403198242, + "learning_rate": 8.554040270484682e-06, + "loss": 0.0098, + "step": 151552 + }, + { + "epoch": 5.734879679649428, + "grad_norm": 0.28114378452301025, + "learning_rate": 8.534698349136792e-06, + "loss": 0.0093, + "step": 151808 + }, + { + "epoch": 5.744550640323372, + "grad_norm": 2.0160458087921143, + "learning_rate": 8.515356427788902e-06, + "loss": 0.0099, + "step": 152064 + }, + { + "epoch": 5.754221600997318, + "grad_norm": 1.1137139797210693, + "learning_rate": 8.496014506441012e-06, + "loss": 0.0082, + "step": 152320 + }, + { + "epoch": 5.763892561671263, + "grad_norm": 3.6147210597991943, + "learning_rate": 8.476672585093122e-06, + "loss": 0.0083, + "step": 152576 + }, + { + "epoch": 5.773563522345208, + "grad_norm": 4.814803123474121, + "learning_rate": 8.457330663745231e-06, + "loss": 0.007, + "step": 152832 + }, + { + "epoch": 5.783234483019153, + "grad_norm": 0.7147836089134216, + "learning_rate": 8.437988742397341e-06, + "loss": 0.0088, + "step": 153088 + }, + { + "epoch": 5.7929054436930985, + "grad_norm": 1.4864364862442017, + "learning_rate": 8.418646821049451e-06, + "loss": 0.0063, + "step": 153344 + }, + { + "epoch": 5.802576404367043, + "grad_norm": 0.7108877301216125, + "learning_rate": 8.399304899701561e-06, + "loss": 0.0076, + "step": 153600 + }, + { + "epoch": 5.812247365040989, + "grad_norm": 2.6084437370300293, + "learning_rate": 8.379962978353671e-06, + "loss": 0.0086, + "step": 153856 + }, + { + "epoch": 5.821918325714933, + "grad_norm": 0.09771878272294998, + "learning_rate": 8.360696611386045e-06, + "loss": 0.009, + "step": 154112 + }, + { + "epoch": 5.831589286388878, + "grad_norm": 0.6635453104972839, + "learning_rate": 8.341354690038155e-06, + "loss": 0.0071, + "step": 154368 + }, + { + "epoch": 5.841260247062824, + "grad_norm": 1.141113042831421, + "learning_rate": 8.322088323070531e-06, + "loss": 0.0076, + "step": 154624 + }, + { + "epoch": 5.850931207736768, + "grad_norm": 0.8303898572921753, + "learning_rate": 8.30274640172264e-06, + "loss": 0.0096, + "step": 154880 + }, + { + "epoch": 5.860602168410714, + "grad_norm": 0.5006212592124939, + "learning_rate": 8.28340448037475e-06, + "loss": 0.0076, + "step": 155136 + }, + { + "epoch": 5.870273129084659, + "grad_norm": 1.09455406665802, + "learning_rate": 8.26406255902686e-06, + "loss": 0.007, + "step": 155392 + }, + { + "epoch": 5.879944089758604, + "grad_norm": 0.5454290509223938, + "learning_rate": 8.24472063767897e-06, + "loss": 0.0081, + "step": 155648 + }, + { + "epoch": 5.889615050432549, + "grad_norm": 14.345696449279785, + "learning_rate": 8.22537871633108e-06, + "loss": 0.0097, + "step": 155904 + }, + { + "epoch": 5.8992860111064935, + "grad_norm": 2.6186184883117676, + "learning_rate": 8.20603679498319e-06, + "loss": 0.0089, + "step": 156160 + }, + { + "epoch": 5.908956971780439, + "grad_norm": 1.894392967224121, + "learning_rate": 8.186694873635299e-06, + "loss": 0.0078, + "step": 156416 + }, + { + "epoch": 5.918627932454384, + "grad_norm": 0.8504657745361328, + "learning_rate": 8.167352952287409e-06, + "loss": 0.0089, + "step": 156672 + }, + { + "epoch": 5.928298893128329, + "grad_norm": 1.3975979089736938, + "learning_rate": 8.148011030939519e-06, + "loss": 0.0076, + "step": 156928 + }, + { + "epoch": 5.937969853802274, + "grad_norm": 0.12042956799268723, + "learning_rate": 8.128669109591629e-06, + "loss": 0.0073, + "step": 157184 + }, + { + "epoch": 5.94764081447622, + "grad_norm": 0.7101040482521057, + "learning_rate": 8.10932718824374e-06, + "loss": 0.0084, + "step": 157440 + }, + { + "epoch": 5.957311775150164, + "grad_norm": 0.049012139439582825, + "learning_rate": 8.08998526689585e-06, + "loss": 0.0078, + "step": 157696 + }, + { + "epoch": 5.966982735824109, + "grad_norm": 3.6799347400665283, + "learning_rate": 8.070643345547958e-06, + "loss": 0.0069, + "step": 157952 + }, + { + "epoch": 5.976653696498055, + "grad_norm": 5.190356731414795, + "learning_rate": 8.051301424200068e-06, + "loss": 0.0083, + "step": 158208 + }, + { + "epoch": 5.986324657171999, + "grad_norm": 0.405319482088089, + "learning_rate": 8.031959502852178e-06, + "loss": 0.0076, + "step": 158464 + }, + { + "epoch": 5.995995617845945, + "grad_norm": 7.220467567443848, + "learning_rate": 8.012617581504288e-06, + "loss": 0.0076, + "step": 158720 + }, + { + "epoch": 6.0, + "eval_f1_B-DISEASE": 0.8137172062733137, + "eval_f1_B-MEDICATION": 0.9366835994194485, + "eval_f1_B-PROCEDURE": 0.8338765045843654, + "eval_f1_B-SYMPTOM": 0.802911592516026, + "eval_f1_I-DISEASE": 0.8454658855143709, + "eval_f1_I-MEDICATION": 0.9528172976448839, + "eval_f1_I-PROCEDURE": 0.8516517120332436, + "eval_f1_I-SYMPTOM": 0.8137056001030496, + "eval_f1_O": 0.9249487903086626, + "eval_f1_macro": 0.8639753542663738, + "eval_f1_micro": 0.8895774266140277, + "eval_loss": 0.4695410430431366, + "eval_precision_B-DISEASE": 0.846968981938307, + "eval_precision_B-MEDICATION": 0.9552266419981499, + "eval_precision_B-PROCEDURE": 0.8472406181015453, + "eval_precision_B-SYMPTOM": 0.8153335920685884, + "eval_precision_I-DISEASE": 0.8649620429239743, + "eval_precision_I-MEDICATION": 0.9655191138140747, + "eval_precision_I-PROCEDURE": 0.8750075317409081, + "eval_precision_I-SYMPTOM": 0.8128417937335135, + "eval_precision_O": 0.9181102771519474, + "eval_precision_macro": 0.8779122881634454, + "eval_precision_micro": 0.8910716505650682, + "eval_rauc_macro": 0.9145927366341076, + "eval_rauc_micro": 0.9371255777552341, + "eval_recall_B-DISEASE": 0.7829777107955898, + "eval_recall_B-MEDICATION": 0.9188467698878804, + "eval_recall_B-PROCEDURE": 0.820927446954141, + "eval_recall_B-SYMPTOM": 0.7908624229979466, + "eval_recall_I-DISEASE": 0.8268292376974121, + "eval_recall_I-MEDICATION": 0.9404453377056117, + "eval_recall_I-PROCEDURE": 0.8295103103299143, + "eval_recall_I-SYMPTOM": 0.8145712443584784, + "eval_recall_O": 0.9318899408213067, + "eval_recall_macro": 0.8507622690609201, + "eval_recall_micro": 0.8880882055547169, + "eval_roc_auc_B-DISEASE": 0.8901727076261692, + "eval_roc_auc_B-MEDICATION": 0.9593353648433499, + "eval_roc_auc_B-PROCEDURE": 0.9091887857316173, + "eval_roc_auc_B-SYMPTOM": 0.8934942497238074, + "eval_roc_auc_I-DISEASE": 0.9049857727909453, + "eval_roc_auc_I-MEDICATION": 0.9699894527029364, + "eval_roc_auc_I-PROCEDURE": 0.9089825820626593, + "eval_roc_auc_I-SYMPTOM": 0.8926697963231113, + "eval_roc_auc_O": 0.9025159179023717, + "eval_runtime": 60.8686, + "eval_samples_per_second": 178.631, + "eval_steps_per_second": 22.343, + "step": 158826 + }, + { + "epoch": 6.0056665785198895, + "grad_norm": 2.5522048473358154, + "learning_rate": 7.993275660156398e-06, + "loss": 0.0067, + "step": 158976 + }, + { + "epoch": 6.015337539193835, + "grad_norm": 0.1445242464542389, + "learning_rate": 7.973933738808508e-06, + "loss": 0.0056, + "step": 159232 + }, + { + "epoch": 6.02500849986778, + "grad_norm": 0.8560025095939636, + "learning_rate": 7.954591817460619e-06, + "loss": 0.004, + "step": 159488 + }, + { + "epoch": 6.034679460541725, + "grad_norm": 0.5683347582817078, + "learning_rate": 7.935249896112727e-06, + "loss": 0.0048, + "step": 159744 + }, + { + "epoch": 6.04435042121567, + "grad_norm": 0.10347987711429596, + "learning_rate": 7.916059083525368e-06, + "loss": 0.0048, + "step": 160000 + }, + { + "epoch": 6.054021381889615, + "grad_norm": 0.765963613986969, + "learning_rate": 7.896717162177477e-06, + "loss": 0.0044, + "step": 160256 + }, + { + "epoch": 6.06369234256356, + "grad_norm": 0.23950816690921783, + "learning_rate": 7.877375240829588e-06, + "loss": 0.0039, + "step": 160512 + }, + { + "epoch": 6.073363303237505, + "grad_norm": 1.28322434425354, + "learning_rate": 7.858033319481697e-06, + "loss": 0.0058, + "step": 160768 + }, + { + "epoch": 6.0830342639114505, + "grad_norm": 0.0957738608121872, + "learning_rate": 7.838691398133807e-06, + "loss": 0.0051, + "step": 161024 + }, + { + "epoch": 6.092705224585395, + "grad_norm": 0.09481767565011978, + "learning_rate": 7.819349476785917e-06, + "loss": 0.006, + "step": 161280 + }, + { + "epoch": 6.102376185259341, + "grad_norm": 1.084963321685791, + "learning_rate": 7.800007555438027e-06, + "loss": 0.0052, + "step": 161536 + }, + { + "epoch": 6.1120471459332855, + "grad_norm": 0.19962504506111145, + "learning_rate": 7.780665634090136e-06, + "loss": 0.0063, + "step": 161792 + }, + { + "epoch": 6.12171810660723, + "grad_norm": 0.44316428899765015, + "learning_rate": 7.761323712742248e-06, + "loss": 0.0055, + "step": 162048 + }, + { + "epoch": 6.131389067281176, + "grad_norm": 0.096375972032547, + "learning_rate": 7.741981791394356e-06, + "loss": 0.0046, + "step": 162304 + }, + { + "epoch": 6.14106002795512, + "grad_norm": 0.06923657655715942, + "learning_rate": 7.722639870046466e-06, + "loss": 0.0065, + "step": 162560 + }, + { + "epoch": 6.150730988629066, + "grad_norm": 0.05641581490635872, + "learning_rate": 7.703373503078842e-06, + "loss": 0.0053, + "step": 162816 + }, + { + "epoch": 6.160401949303011, + "grad_norm": 0.10804527252912521, + "learning_rate": 7.684031581730952e-06, + "loss": 0.0049, + "step": 163072 + }, + { + "epoch": 6.170072909976956, + "grad_norm": 0.3508886694908142, + "learning_rate": 7.664689660383062e-06, + "loss": 0.0057, + "step": 163328 + }, + { + "epoch": 6.179743870650901, + "grad_norm": 0.19692179560661316, + "learning_rate": 7.645347739035172e-06, + "loss": 0.0051, + "step": 163584 + }, + { + "epoch": 6.1894148313248465, + "grad_norm": 7.462360382080078, + "learning_rate": 7.6260058176872806e-06, + "loss": 0.0052, + "step": 163840 + }, + { + "epoch": 6.199085791998791, + "grad_norm": 0.0778375118970871, + "learning_rate": 7.6066638963393915e-06, + "loss": 0.0057, + "step": 164096 + }, + { + "epoch": 6.208756752672736, + "grad_norm": 3.7269153594970703, + "learning_rate": 7.587321974991501e-06, + "loss": 0.0066, + "step": 164352 + }, + { + "epoch": 6.218427713346681, + "grad_norm": 0.3314465582370758, + "learning_rate": 7.567980053643611e-06, + "loss": 0.0066, + "step": 164608 + }, + { + "epoch": 6.228098674020626, + "grad_norm": 0.4546041786670685, + "learning_rate": 7.548713686675986e-06, + "loss": 0.0054, + "step": 164864 + }, + { + "epoch": 6.237769634694572, + "grad_norm": 0.1140669584274292, + "learning_rate": 7.529371765328095e-06, + "loss": 0.0053, + "step": 165120 + }, + { + "epoch": 6.247440595368516, + "grad_norm": 0.03010609745979309, + "learning_rate": 7.510029843980205e-06, + "loss": 0.0045, + "step": 165376 + }, + { + "epoch": 6.257111556042462, + "grad_norm": 0.015513704158365726, + "learning_rate": 7.4906879226323145e-06, + "loss": 0.0056, + "step": 165632 + }, + { + "epoch": 6.266782516716407, + "grad_norm": 0.09134875982999802, + "learning_rate": 7.4713460012844255e-06, + "loss": 0.0066, + "step": 165888 + }, + { + "epoch": 6.276453477390351, + "grad_norm": 0.016765909269452095, + "learning_rate": 7.452004079936535e-06, + "loss": 0.0053, + "step": 166144 + }, + { + "epoch": 6.286124438064297, + "grad_norm": 0.5640433430671692, + "learning_rate": 7.432662158588645e-06, + "loss": 0.0049, + "step": 166400 + }, + { + "epoch": 6.295795398738242, + "grad_norm": 0.21902374923229218, + "learning_rate": 7.413320237240754e-06, + "loss": 0.0064, + "step": 166656 + }, + { + "epoch": 6.305466359412187, + "grad_norm": 5.544469833374023, + "learning_rate": 7.393978315892864e-06, + "loss": 0.0052, + "step": 166912 + }, + { + "epoch": 6.315137320086132, + "grad_norm": 0.7227392792701721, + "learning_rate": 7.3746363945449736e-06, + "loss": 0.0054, + "step": 167168 + }, + { + "epoch": 6.324808280760077, + "grad_norm": 5.256189346313477, + "learning_rate": 7.3552944731970845e-06, + "loss": 0.0057, + "step": 167424 + }, + { + "epoch": 6.334479241434022, + "grad_norm": 4.659141540527344, + "learning_rate": 7.335952551849194e-06, + "loss": 0.0048, + "step": 167680 + }, + { + "epoch": 6.344150202107967, + "grad_norm": 0.9072468876838684, + "learning_rate": 7.316610630501304e-06, + "loss": 0.0046, + "step": 167936 + }, + { + "epoch": 6.353821162781912, + "grad_norm": 0.14295679330825806, + "learning_rate": 7.297268709153413e-06, + "loss": 0.0052, + "step": 168192 + }, + { + "epoch": 6.363492123455857, + "grad_norm": 0.07267609983682632, + "learning_rate": 7.277926787805523e-06, + "loss": 0.0044, + "step": 168448 + }, + { + "epoch": 6.373163084129803, + "grad_norm": 0.5884820818901062, + "learning_rate": 7.258584866457633e-06, + "loss": 0.0059, + "step": 168704 + }, + { + "epoch": 6.382834044803747, + "grad_norm": 0.9420909881591797, + "learning_rate": 7.239318499490009e-06, + "loss": 0.0063, + "step": 168960 + }, + { + "epoch": 6.392505005477693, + "grad_norm": 0.13994884490966797, + "learning_rate": 7.2199765781421184e-06, + "loss": 0.0047, + "step": 169216 + }, + { + "epoch": 6.4021759661516375, + "grad_norm": 0.12359564751386642, + "learning_rate": 7.2006346567942286e-06, + "loss": 0.0052, + "step": 169472 + }, + { + "epoch": 6.411846926825583, + "grad_norm": 2.2115604877471924, + "learning_rate": 7.181292735446338e-06, + "loss": 0.0057, + "step": 169728 + }, + { + "epoch": 6.421517887499528, + "grad_norm": 0.019372638314962387, + "learning_rate": 7.162101922858978e-06, + "loss": 0.0058, + "step": 169984 + }, + { + "epoch": 6.4311888481734725, + "grad_norm": 0.07033487409353256, + "learning_rate": 7.142760001511088e-06, + "loss": 0.007, + "step": 170240 + }, + { + "epoch": 6.440859808847418, + "grad_norm": 0.0495685413479805, + "learning_rate": 7.123418080163198e-06, + "loss": 0.004, + "step": 170496 + }, + { + "epoch": 6.450530769521363, + "grad_norm": 5.647730350494385, + "learning_rate": 7.104076158815309e-06, + "loss": 0.0056, + "step": 170752 + }, + { + "epoch": 6.460201730195308, + "grad_norm": 0.04144500568509102, + "learning_rate": 7.084734237467418e-06, + "loss": 0.0045, + "step": 171008 + }, + { + "epoch": 6.469872690869253, + "grad_norm": 0.04790091514587402, + "learning_rate": 7.065392316119528e-06, + "loss": 0.005, + "step": 171264 + }, + { + "epoch": 6.479543651543199, + "grad_norm": 0.5807636976242065, + "learning_rate": 7.046050394771637e-06, + "loss": 0.006, + "step": 171520 + }, + { + "epoch": 6.489214612217143, + "grad_norm": 0.04957037419080734, + "learning_rate": 7.026708473423747e-06, + "loss": 0.0055, + "step": 171776 + }, + { + "epoch": 6.498885572891089, + "grad_norm": 0.29678839445114136, + "learning_rate": 7.0073665520758576e-06, + "loss": 0.0055, + "step": 172032 + }, + { + "epoch": 6.5085565335650335, + "grad_norm": 0.12547393143177032, + "learning_rate": 6.988024630727968e-06, + "loss": 0.004, + "step": 172288 + }, + { + "epoch": 6.518227494238978, + "grad_norm": 0.032738201320171356, + "learning_rate": 6.9687582637603425e-06, + "loss": 0.0062, + "step": 172544 + }, + { + "epoch": 6.527898454912924, + "grad_norm": 0.013363759964704514, + "learning_rate": 6.949416342412452e-06, + "loss": 0.0053, + "step": 172800 + }, + { + "epoch": 6.537569415586868, + "grad_norm": 0.12095487862825394, + "learning_rate": 6.930074421064562e-06, + "loss": 0.0059, + "step": 173056 + }, + { + "epoch": 6.547240376260814, + "grad_norm": 1.4809820652008057, + "learning_rate": 6.910732499716671e-06, + "loss": 0.0045, + "step": 173312 + }, + { + "epoch": 6.556911336934759, + "grad_norm": 0.11994415521621704, + "learning_rate": 6.891390578368782e-06, + "loss": 0.0045, + "step": 173568 + }, + { + "epoch": 6.566582297608704, + "grad_norm": 0.5074435472488403, + "learning_rate": 6.8720486570208915e-06, + "loss": 0.0047, + "step": 173824 + }, + { + "epoch": 6.576253258282649, + "grad_norm": 1.6332694292068481, + "learning_rate": 6.852706735673002e-06, + "loss": 0.0035, + "step": 174080 + }, + { + "epoch": 6.585924218956594, + "grad_norm": 0.6540184020996094, + "learning_rate": 6.833364814325111e-06, + "loss": 0.005, + "step": 174336 + }, + { + "epoch": 6.595595179630539, + "grad_norm": 4.0721588134765625, + "learning_rate": 6.814022892977221e-06, + "loss": 0.0043, + "step": 174592 + }, + { + "epoch": 6.605266140304484, + "grad_norm": 0.34634193778038025, + "learning_rate": 6.79468097162933e-06, + "loss": 0.0063, + "step": 174848 + }, + { + "epoch": 6.6149371009784295, + "grad_norm": 0.140211820602417, + "learning_rate": 6.775339050281441e-06, + "loss": 0.0036, + "step": 175104 + }, + { + "epoch": 6.624608061652374, + "grad_norm": 1.9943935871124268, + "learning_rate": 6.7559971289335505e-06, + "loss": 0.0067, + "step": 175360 + }, + { + "epoch": 6.63427902232632, + "grad_norm": 10.475574493408203, + "learning_rate": 6.736655207585661e-06, + "loss": 0.0054, + "step": 175616 + }, + { + "epoch": 6.643949983000264, + "grad_norm": 12.580154418945312, + "learning_rate": 6.71731328623777e-06, + "loss": 0.0065, + "step": 175872 + }, + { + "epoch": 6.653620943674209, + "grad_norm": 0.5796445608139038, + "learning_rate": 6.69797136488988e-06, + "loss": 0.0047, + "step": 176128 + }, + { + "epoch": 6.663291904348155, + "grad_norm": 0.03411826118826866, + "learning_rate": 6.678629443541989e-06, + "loss": 0.0038, + "step": 176384 + }, + { + "epoch": 6.672962865022099, + "grad_norm": 0.3479785621166229, + "learning_rate": 6.6592875221941e-06, + "loss": 0.0059, + "step": 176640 + }, + { + "epoch": 6.682633825696045, + "grad_norm": 0.37940141558647156, + "learning_rate": 6.63994560084621e-06, + "loss": 0.0053, + "step": 176896 + }, + { + "epoch": 6.69230478636999, + "grad_norm": 0.2881454527378082, + "learning_rate": 6.62060367949832e-06, + "loss": 0.0043, + "step": 177152 + }, + { + "epoch": 6.701975747043935, + "grad_norm": 0.32211869955062866, + "learning_rate": 6.601261758150429e-06, + "loss": 0.0071, + "step": 177408 + }, + { + "epoch": 6.71164670771788, + "grad_norm": 0.0830293819308281, + "learning_rate": 6.581919836802539e-06, + "loss": 0.0047, + "step": 177664 + }, + { + "epoch": 6.7213176683918245, + "grad_norm": 0.6374879479408264, + "learning_rate": 6.5625779154546484e-06, + "loss": 0.005, + "step": 177920 + }, + { + "epoch": 6.73098862906577, + "grad_norm": 0.42339634895324707, + "learning_rate": 6.543235994106759e-06, + "loss": 0.0057, + "step": 178176 + }, + { + "epoch": 6.740659589739715, + "grad_norm": 0.21855546534061432, + "learning_rate": 6.523894072758869e-06, + "loss": 0.0044, + "step": 178432 + }, + { + "epoch": 6.75033055041366, + "grad_norm": 0.17506131529808044, + "learning_rate": 6.5046277057912435e-06, + "loss": 0.0048, + "step": 178688 + }, + { + "epoch": 6.760001511087605, + "grad_norm": 0.11796054244041443, + "learning_rate": 6.485285784443354e-06, + "loss": 0.0055, + "step": 178944 + }, + { + "epoch": 6.769672471761551, + "grad_norm": 0.7898224592208862, + "learning_rate": 6.465943863095463e-06, + "loss": 0.0042, + "step": 179200 + }, + { + "epoch": 6.779343432435495, + "grad_norm": 0.18970559537410736, + "learning_rate": 6.446677496127839e-06, + "loss": 0.0044, + "step": 179456 + }, + { + "epoch": 6.789014393109441, + "grad_norm": 1.1956768035888672, + "learning_rate": 6.427335574779948e-06, + "loss": 0.0049, + "step": 179712 + }, + { + "epoch": 6.798685353783386, + "grad_norm": 0.6470310688018799, + "learning_rate": 6.407993653432059e-06, + "loss": 0.005, + "step": 179968 + }, + { + "epoch": 6.808356314457331, + "grad_norm": 4.411340236663818, + "learning_rate": 6.388651732084168e-06, + "loss": 0.0043, + "step": 180224 + }, + { + "epoch": 6.818027275131276, + "grad_norm": 0.09347503632307053, + "learning_rate": 6.369309810736278e-06, + "loss": 0.0043, + "step": 180480 + }, + { + "epoch": 6.8276982358052205, + "grad_norm": 0.007505136076360941, + "learning_rate": 6.3499678893883875e-06, + "loss": 0.0055, + "step": 180736 + }, + { + "epoch": 6.837369196479166, + "grad_norm": 0.286432683467865, + "learning_rate": 6.330625968040498e-06, + "loss": 0.0039, + "step": 180992 + }, + { + "epoch": 6.847040157153111, + "grad_norm": 0.2944409251213074, + "learning_rate": 6.311284046692607e-06, + "loss": 0.004, + "step": 181248 + }, + { + "epoch": 6.856711117827056, + "grad_norm": 0.32838645577430725, + "learning_rate": 6.291942125344718e-06, + "loss": 0.0046, + "step": 181504 + }, + { + "epoch": 6.866382078501001, + "grad_norm": 0.04531640559434891, + "learning_rate": 6.272600203996827e-06, + "loss": 0.0048, + "step": 181760 + }, + { + "epoch": 6.876053039174947, + "grad_norm": 0.060415927320718765, + "learning_rate": 6.253258282648937e-06, + "loss": 0.0038, + "step": 182016 + }, + { + "epoch": 6.885723999848891, + "grad_norm": 0.0581819973886013, + "learning_rate": 6.233916361301047e-06, + "loss": 0.004, + "step": 182272 + }, + { + "epoch": 6.895394960522836, + "grad_norm": 0.0427870936691761, + "learning_rate": 6.214574439953157e-06, + "loss": 0.005, + "step": 182528 + }, + { + "epoch": 6.9050659211967815, + "grad_norm": 1.7197208404541016, + "learning_rate": 6.195232518605266e-06, + "loss": 0.0043, + "step": 182784 + }, + { + "epoch": 6.914736881870726, + "grad_norm": 0.09247897565364838, + "learning_rate": 6.175890597257377e-06, + "loss": 0.0056, + "step": 183040 + }, + { + "epoch": 6.924407842544672, + "grad_norm": 0.019822193309664726, + "learning_rate": 6.156548675909486e-06, + "loss": 0.0055, + "step": 183296 + }, + { + "epoch": 6.9340788032186165, + "grad_norm": 1.541544795036316, + "learning_rate": 6.137282308941861e-06, + "loss": 0.0057, + "step": 183552 + }, + { + "epoch": 6.943749763892562, + "grad_norm": 1.1010373830795288, + "learning_rate": 6.117940387593971e-06, + "loss": 0.0048, + "step": 183808 + }, + { + "epoch": 6.953420724566507, + "grad_norm": 0.32369253039360046, + "learning_rate": 6.0985984662460805e-06, + "loss": 0.0044, + "step": 184064 + }, + { + "epoch": 6.963091685240451, + "grad_norm": 0.36643514037132263, + "learning_rate": 6.079332099278456e-06, + "loss": 0.0052, + "step": 184320 + }, + { + "epoch": 6.972762645914397, + "grad_norm": 4.76845645904541, + "learning_rate": 6.0599901779305655e-06, + "loss": 0.0055, + "step": 184576 + }, + { + "epoch": 6.982433606588342, + "grad_norm": 0.9248315095901489, + "learning_rate": 6.0406482565826765e-06, + "loss": 0.0057, + "step": 184832 + }, + { + "epoch": 6.992104567262287, + "grad_norm": 7.52930212020874, + "learning_rate": 6.021306335234786e-06, + "loss": 0.0057, + "step": 185088 + }, + { + "epoch": 7.0, + "eval_f1_B-DISEASE": 0.8170322475036831, + "eval_f1_B-MEDICATION": 0.9388233174159835, + "eval_f1_B-PROCEDURE": 0.839606661240293, + "eval_f1_B-SYMPTOM": 0.8083601954871582, + "eval_f1_I-DISEASE": 0.8508488239606706, + "eval_f1_I-MEDICATION": 0.9480509127627348, + "eval_f1_I-PROCEDURE": 0.8557830786803291, + "eval_f1_I-SYMPTOM": 0.8172193160347818, + "eval_f1_O": 0.9271939022229883, + "eval_f1_macro": 0.8669909394787357, + "eval_f1_micro": 0.8922796408524778, + "eval_loss": 0.4217996597290039, + "eval_precision_B-DISEASE": 0.8439005833121989, + "eval_precision_B-MEDICATION": 0.9507299270072993, + "eval_precision_B-PROCEDURE": 0.8642271636248359, + "eval_precision_B-SYMPTOM": 0.8188329471244997, + "eval_precision_I-DISEASE": 0.8612162292396158, + "eval_precision_I-MEDICATION": 0.9526327031934209, + "eval_precision_I-PROCEDURE": 0.8682078109649086, + "eval_precision_I-SYMPTOM": 0.8107099237044831, + "eval_precision_O": 0.9254912549627295, + "eval_precision_macro": 0.8773276159037767, + "eval_precision_micro": 0.8938970887232961, + "eval_rauc_macro": 0.9183518246652276, + "eval_rauc_micro": 0.9385967353145594, + "eval_recall_B-DISEASE": 0.7918220036487665, + "eval_recall_B-MEDICATION": 0.9272112475529454, + "eval_recall_B-PROCEDURE": 0.8163501026694046, + "eval_recall_B-SYMPTOM": 0.7981519507186858, + "eval_recall_I-DISEASE": 0.8407280573012708, + "eval_recall_I-MEDICATION": 0.943512984608875, + "eval_recall_I-PROCEDURE": 0.8437089443234025, + "eval_recall_I-SYMPTOM": 0.8238340855362132, + "eval_recall_O": 0.9289028258275723, + "eval_recall_macro": 0.8571358002430152, + "eval_recall_micro": 0.8906680357402255, + "eval_roc_auc_B-DISEASE": 0.8945483627101096, + "eval_roc_auc_B-MEDICATION": 0.9635074195320126, + "eval_roc_auc_B-PROCEDURE": 0.9070703509413875, + "eval_roc_auc_B-SYMPTOM": 0.897166398981839, + "eval_roc_auc_I-DISEASE": 0.9115174466724183, + "eval_roc_auc_I-MEDICATION": 0.9714307242336749, + "eval_roc_auc_I-PROCEDURE": 0.9156151956077059, + "eval_roc_auc_I-SYMPTOM": 0.896927321744982, + "eval_roc_auc_O": 0.9073832015629196, + "eval_runtime": 61.0949, + "eval_samples_per_second": 177.969, + "eval_steps_per_second": 22.26, + "step": 185297 + }, + { + "epoch": 7.001775527936232, + "grad_norm": 0.7092540860176086, + "learning_rate": 6.001964413886896e-06, + "loss": 0.0041, + "step": 185344 + }, + { + "epoch": 7.0114464886101775, + "grad_norm": 0.18654197454452515, + "learning_rate": 5.982622492539005e-06, + "loss": 0.0024, + "step": 185600 + }, + { + "epoch": 7.021117449284122, + "grad_norm": 0.8840870261192322, + "learning_rate": 5.963280571191115e-06, + "loss": 0.0023, + "step": 185856 + }, + { + "epoch": 7.030788409958068, + "grad_norm": 0.08226889371871948, + "learning_rate": 5.943938649843225e-06, + "loss": 0.0041, + "step": 186112 + }, + { + "epoch": 7.040459370632012, + "grad_norm": 0.1381804347038269, + "learning_rate": 5.9245967284953355e-06, + "loss": 0.0031, + "step": 186368 + }, + { + "epoch": 7.050130331305957, + "grad_norm": 2.1430623531341553, + "learning_rate": 5.905254807147445e-06, + "loss": 0.0022, + "step": 186624 + }, + { + "epoch": 7.059801291979903, + "grad_norm": 0.060144223272800446, + "learning_rate": 5.885912885799555e-06, + "loss": 0.0022, + "step": 186880 + }, + { + "epoch": 7.069472252653847, + "grad_norm": 0.06146460771560669, + "learning_rate": 5.866570964451664e-06, + "loss": 0.0035, + "step": 187136 + }, + { + "epoch": 7.079143213327793, + "grad_norm": 1.6876006126403809, + "learning_rate": 5.847229043103775e-06, + "loss": 0.0034, + "step": 187392 + }, + { + "epoch": 7.088814174001738, + "grad_norm": 2.21935772895813, + "learning_rate": 5.8278871217558845e-06, + "loss": 0.0045, + "step": 187648 + }, + { + "epoch": 7.098485134675683, + "grad_norm": 0.2139071524143219, + "learning_rate": 5.808545200407995e-06, + "loss": 0.0028, + "step": 187904 + }, + { + "epoch": 7.108156095349628, + "grad_norm": 0.2530527710914612, + "learning_rate": 5.789203279060104e-06, + "loss": 0.0029, + "step": 188160 + }, + { + "epoch": 7.117827056023573, + "grad_norm": 0.06350823491811752, + "learning_rate": 5.769861357712214e-06, + "loss": 0.0028, + "step": 188416 + }, + { + "epoch": 7.127498016697518, + "grad_norm": 0.3266438841819763, + "learning_rate": 5.750519436364323e-06, + "loss": 0.0057, + "step": 188672 + }, + { + "epoch": 7.137168977371463, + "grad_norm": 0.02727964147925377, + "learning_rate": 5.731253069396698e-06, + "loss": 0.0042, + "step": 188928 + }, + { + "epoch": 7.146839938045408, + "grad_norm": 0.007432880811393261, + "learning_rate": 5.711911148048809e-06, + "loss": 0.0035, + "step": 189184 + }, + { + "epoch": 7.156510898719353, + "grad_norm": 0.09012371301651001, + "learning_rate": 5.692569226700918e-06, + "loss": 0.0039, + "step": 189440 + }, + { + "epoch": 7.166181859393299, + "grad_norm": 0.19539253413677216, + "learning_rate": 5.6732273053530285e-06, + "loss": 0.0039, + "step": 189696 + }, + { + "epoch": 7.175852820067243, + "grad_norm": 3.682310104370117, + "learning_rate": 5.653885384005138e-06, + "loss": 0.0031, + "step": 189952 + }, + { + "epoch": 7.185523780741189, + "grad_norm": 0.4140027165412903, + "learning_rate": 5.634543462657248e-06, + "loss": 0.0044, + "step": 190208 + }, + { + "epoch": 7.195194741415134, + "grad_norm": 0.047425903379917145, + "learning_rate": 5.615201541309357e-06, + "loss": 0.0032, + "step": 190464 + }, + { + "epoch": 7.204865702089078, + "grad_norm": 0.08099602907896042, + "learning_rate": 5.595859619961468e-06, + "loss": 0.0027, + "step": 190720 + }, + { + "epoch": 7.214536662763024, + "grad_norm": 2.0335052013397217, + "learning_rate": 5.5765176986135775e-06, + "loss": 0.0038, + "step": 190976 + }, + { + "epoch": 7.2242076234369685, + "grad_norm": 0.7074165940284729, + "learning_rate": 5.557251331645953e-06, + "loss": 0.0033, + "step": 191232 + }, + { + "epoch": 7.233878584110914, + "grad_norm": 0.00729184877127409, + "learning_rate": 5.537909410298062e-06, + "loss": 0.0033, + "step": 191488 + }, + { + "epoch": 7.243549544784859, + "grad_norm": 0.2962284982204437, + "learning_rate": 5.518643043330437e-06, + "loss": 0.0033, + "step": 191744 + }, + { + "epoch": 7.253220505458804, + "grad_norm": 0.03666882589459419, + "learning_rate": 5.499301121982547e-06, + "loss": 0.0032, + "step": 192000 + }, + { + "epoch": 7.262891466132749, + "grad_norm": 2.278210163116455, + "learning_rate": 5.479959200634657e-06, + "loss": 0.0026, + "step": 192256 + }, + { + "epoch": 7.272562426806694, + "grad_norm": 0.6450464129447937, + "learning_rate": 5.460617279286768e-06, + "loss": 0.0031, + "step": 192512 + }, + { + "epoch": 7.282233387480639, + "grad_norm": 0.011020343750715256, + "learning_rate": 5.441275357938877e-06, + "loss": 0.0026, + "step": 192768 + }, + { + "epoch": 7.291904348154584, + "grad_norm": 0.848145604133606, + "learning_rate": 5.421933436590987e-06, + "loss": 0.0031, + "step": 193024 + }, + { + "epoch": 7.30157530882853, + "grad_norm": 0.47598353028297424, + "learning_rate": 5.402591515243096e-06, + "loss": 0.0035, + "step": 193280 + }, + { + "epoch": 7.311246269502474, + "grad_norm": 0.08850053697824478, + "learning_rate": 5.3832495938952064e-06, + "loss": 0.0028, + "step": 193536 + }, + { + "epoch": 7.32091723017642, + "grad_norm": 0.14100511372089386, + "learning_rate": 5.363907672547316e-06, + "loss": 0.0028, + "step": 193792 + }, + { + "epoch": 7.3305881908503645, + "grad_norm": 0.0643409714102745, + "learning_rate": 5.344565751199427e-06, + "loss": 0.0034, + "step": 194048 + }, + { + "epoch": 7.340259151524309, + "grad_norm": 0.02023415081202984, + "learning_rate": 5.325223829851536e-06, + "loss": 0.0034, + "step": 194304 + }, + { + "epoch": 7.349930112198255, + "grad_norm": 0.2690616846084595, + "learning_rate": 5.305881908503646e-06, + "loss": 0.0036, + "step": 194560 + }, + { + "epoch": 7.3596010728721994, + "grad_norm": 1.2020032405853271, + "learning_rate": 5.286539987155755e-06, + "loss": 0.0021, + "step": 194816 + }, + { + "epoch": 7.369272033546145, + "grad_norm": 0.49315980076789856, + "learning_rate": 5.2671980658078655e-06, + "loss": 0.0033, + "step": 195072 + }, + { + "epoch": 7.37894299422009, + "grad_norm": 0.03424458205699921, + "learning_rate": 5.247856144459975e-06, + "loss": 0.0034, + "step": 195328 + }, + { + "epoch": 7.388613954894035, + "grad_norm": 0.5558503270149231, + "learning_rate": 5.228514223112086e-06, + "loss": 0.0039, + "step": 195584 + }, + { + "epoch": 7.39828491556798, + "grad_norm": 0.1527431756258011, + "learning_rate": 5.209172301764195e-06, + "loss": 0.0025, + "step": 195840 + }, + { + "epoch": 7.4079558762419255, + "grad_norm": 0.03023804910480976, + "learning_rate": 5.189830380416305e-06, + "loss": 0.0029, + "step": 196096 + }, + { + "epoch": 7.41762683691587, + "grad_norm": 0.006813144311308861, + "learning_rate": 5.1704884590684145e-06, + "loss": 0.002, + "step": 196352 + }, + { + "epoch": 7.427297797589815, + "grad_norm": 0.06950301676988602, + "learning_rate": 5.151146537720525e-06, + "loss": 0.0035, + "step": 196608 + }, + { + "epoch": 7.4369687582637605, + "grad_norm": 0.12590453028678894, + "learning_rate": 5.131804616372634e-06, + "loss": 0.0027, + "step": 196864 + }, + { + "epoch": 7.446639718937705, + "grad_norm": 1.1601083278656006, + "learning_rate": 5.112462695024745e-06, + "loss": 0.0025, + "step": 197120 + }, + { + "epoch": 7.456310679611651, + "grad_norm": 1.2641323804855347, + "learning_rate": 5.093120773676854e-06, + "loss": 0.0031, + "step": 197376 + }, + { + "epoch": 7.465981640285595, + "grad_norm": 0.9254095554351807, + "learning_rate": 5.073778852328964e-06, + "loss": 0.003, + "step": 197632 + }, + { + "epoch": 7.475652600959541, + "grad_norm": 0.05598944053053856, + "learning_rate": 5.0544369309810736e-06, + "loss": 0.003, + "step": 197888 + }, + { + "epoch": 7.485323561633486, + "grad_norm": 0.040579646825790405, + "learning_rate": 5.035170564013449e-06, + "loss": 0.0028, + "step": 198144 + }, + { + "epoch": 7.494994522307431, + "grad_norm": 8.562992095947266, + "learning_rate": 5.0158286426655585e-06, + "loss": 0.0043, + "step": 198400 + }, + { + "epoch": 7.504665482981376, + "grad_norm": 0.09161358326673508, + "learning_rate": 4.996562275697933e-06, + "loss": 0.0028, + "step": 198656 + }, + { + "epoch": 7.514336443655321, + "grad_norm": 0.06476159393787384, + "learning_rate": 4.9772203543500435e-06, + "loss": 0.0029, + "step": 198912 + }, + { + "epoch": 7.524007404329266, + "grad_norm": 0.025177879258990288, + "learning_rate": 4.957878433002154e-06, + "loss": 0.0041, + "step": 199168 + }, + { + "epoch": 7.533678365003211, + "grad_norm": 0.06914424151182175, + "learning_rate": 4.938536511654263e-06, + "loss": 0.0036, + "step": 199424 + }, + { + "epoch": 7.543349325677156, + "grad_norm": 0.007711977697908878, + "learning_rate": 4.919194590306373e-06, + "loss": 0.0037, + "step": 199680 + }, + { + "epoch": 7.553020286351101, + "grad_norm": 0.5741889476776123, + "learning_rate": 4.899852668958483e-06, + "loss": 0.0021, + "step": 199936 + }, + { + "epoch": 7.562691247025047, + "grad_norm": 0.017532778903841972, + "learning_rate": 4.880510747610593e-06, + "loss": 0.0039, + "step": 200192 + }, + { + "epoch": 7.572362207698991, + "grad_norm": 0.013444333337247372, + "learning_rate": 4.8611688262627025e-06, + "loss": 0.0032, + "step": 200448 + }, + { + "epoch": 7.582033168372936, + "grad_norm": 0.1252066045999527, + "learning_rate": 4.8418269049148135e-06, + "loss": 0.0026, + "step": 200704 + }, + { + "epoch": 7.591704129046882, + "grad_norm": 0.16216708719730377, + "learning_rate": 4.822560537947188e-06, + "loss": 0.0028, + "step": 200960 + }, + { + "epoch": 7.601375089720826, + "grad_norm": 0.20117320120334625, + "learning_rate": 4.803218616599298e-06, + "loss": 0.0026, + "step": 201216 + }, + { + "epoch": 7.611046050394772, + "grad_norm": 0.4220730662345886, + "learning_rate": 4.783876695251408e-06, + "loss": 0.0033, + "step": 201472 + }, + { + "epoch": 7.620717011068717, + "grad_norm": 0.04324984550476074, + "learning_rate": 4.764534773903518e-06, + "loss": 0.0032, + "step": 201728 + }, + { + "epoch": 7.630387971742662, + "grad_norm": 0.24287299811840057, + "learning_rate": 4.745192852555627e-06, + "loss": 0.0029, + "step": 201984 + }, + { + "epoch": 7.640058932416607, + "grad_norm": 0.01797611638903618, + "learning_rate": 4.725850931207737e-06, + "loss": 0.0023, + "step": 202240 + }, + { + "epoch": 7.6497298930905515, + "grad_norm": 0.027291102334856987, + "learning_rate": 4.706509009859847e-06, + "loss": 0.0037, + "step": 202496 + }, + { + "epoch": 7.659400853764497, + "grad_norm": 0.015763144940137863, + "learning_rate": 4.687167088511957e-06, + "loss": 0.0024, + "step": 202752 + }, + { + "epoch": 7.669071814438442, + "grad_norm": 2.068912982940674, + "learning_rate": 4.6679007215443315e-06, + "loss": 0.0027, + "step": 203008 + }, + { + "epoch": 7.678742775112387, + "grad_norm": 0.1518033742904663, + "learning_rate": 4.648558800196442e-06, + "loss": 0.0025, + "step": 203264 + }, + { + "epoch": 7.688413735786332, + "grad_norm": 0.7454547882080078, + "learning_rate": 4.629216878848552e-06, + "loss": 0.0027, + "step": 203520 + }, + { + "epoch": 7.698084696460278, + "grad_norm": 0.04685609042644501, + "learning_rate": 4.609874957500661e-06, + "loss": 0.0034, + "step": 203776 + }, + { + "epoch": 7.707755657134222, + "grad_norm": 0.012423527427017689, + "learning_rate": 4.590533036152771e-06, + "loss": 0.002, + "step": 204032 + }, + { + "epoch": 7.717426617808167, + "grad_norm": 0.5419167876243591, + "learning_rate": 4.571191114804881e-06, + "loss": 0.0025, + "step": 204288 + }, + { + "epoch": 7.7270975784821125, + "grad_norm": 0.3725039064884186, + "learning_rate": 4.551849193456991e-06, + "loss": 0.0028, + "step": 204544 + }, + { + "epoch": 7.736768539156057, + "grad_norm": 0.09141060709953308, + "learning_rate": 4.532507272109101e-06, + "loss": 0.0024, + "step": 204800 + }, + { + "epoch": 7.746439499830003, + "grad_norm": 0.13550949096679688, + "learning_rate": 4.513165350761211e-06, + "loss": 0.0025, + "step": 205056 + }, + { + "epoch": 7.7561104605039475, + "grad_norm": 0.005226655397564173, + "learning_rate": 4.49382342941332e-06, + "loss": 0.0027, + "step": 205312 + }, + { + "epoch": 7.765781421177893, + "grad_norm": 1.0764216184616089, + "learning_rate": 4.47448150806543e-06, + "loss": 0.0029, + "step": 205568 + }, + { + "epoch": 7.775452381851838, + "grad_norm": 0.019114414229989052, + "learning_rate": 4.45513958671754e-06, + "loss": 0.0023, + "step": 205824 + }, + { + "epoch": 7.785123342525783, + "grad_norm": 0.214304581284523, + "learning_rate": 4.43579766536965e-06, + "loss": 0.0028, + "step": 206080 + }, + { + "epoch": 7.794794303199728, + "grad_norm": 0.9313531517982483, + "learning_rate": 4.41645574402176e-06, + "loss": 0.0027, + "step": 206336 + }, + { + "epoch": 7.804465263873674, + "grad_norm": 0.04551494121551514, + "learning_rate": 4.3971893770541355e-06, + "loss": 0.0025, + "step": 206592 + }, + { + "epoch": 7.814136224547618, + "grad_norm": 0.25390639901161194, + "learning_rate": 4.377847455706245e-06, + "loss": 0.0028, + "step": 206848 + }, + { + "epoch": 7.823807185221563, + "grad_norm": 0.4188549816608429, + "learning_rate": 4.358505534358355e-06, + "loss": 0.0035, + "step": 207104 + }, + { + "epoch": 7.8334781458955085, + "grad_norm": 5.584123134613037, + "learning_rate": 4.339163613010465e-06, + "loss": 0.0029, + "step": 207360 + }, + { + "epoch": 7.843149106569453, + "grad_norm": 0.09376902878284454, + "learning_rate": 4.319821691662574e-06, + "loss": 0.0025, + "step": 207616 + }, + { + "epoch": 7.852820067243399, + "grad_norm": 0.007881022058427334, + "learning_rate": 4.3004797703146844e-06, + "loss": 0.0031, + "step": 207872 + }, + { + "epoch": 7.8624910279173434, + "grad_norm": 0.053988266736269, + "learning_rate": 4.2811378489667946e-06, + "loss": 0.0031, + "step": 208128 + }, + { + "epoch": 7.872161988591289, + "grad_norm": 0.37332120537757874, + "learning_rate": 4.261795927618904e-06, + "loss": 0.0021, + "step": 208384 + }, + { + "epoch": 7.881832949265234, + "grad_norm": 0.10056313127279282, + "learning_rate": 4.242454006271014e-06, + "loss": 0.003, + "step": 208640 + }, + { + "epoch": 7.891503909939178, + "grad_norm": 0.021642550826072693, + "learning_rate": 4.223187639303389e-06, + "loss": 0.003, + "step": 208896 + }, + { + "epoch": 7.901174870613124, + "grad_norm": 0.15152160823345184, + "learning_rate": 4.203845717955499e-06, + "loss": 0.0028, + "step": 209152 + }, + { + "epoch": 7.910845831287069, + "grad_norm": 0.05597339943051338, + "learning_rate": 4.184503796607608e-06, + "loss": 0.0026, + "step": 209408 + }, + { + "epoch": 7.920516791961014, + "grad_norm": 0.026413604617118835, + "learning_rate": 4.165161875259718e-06, + "loss": 0.0027, + "step": 209664 + }, + { + "epoch": 7.930187752634959, + "grad_norm": 0.086790531873703, + "learning_rate": 4.1458199539118285e-06, + "loss": 0.0018, + "step": 209920 + }, + { + "epoch": 7.9398587133089045, + "grad_norm": 0.40138956904411316, + "learning_rate": 4.126478032563938e-06, + "loss": 0.0026, + "step": 210176 + }, + { + "epoch": 7.949529673982849, + "grad_norm": 0.504784107208252, + "learning_rate": 4.107136111216048e-06, + "loss": 0.003, + "step": 210432 + }, + { + "epoch": 7.959200634656794, + "grad_norm": 0.5855137705802917, + "learning_rate": 4.087794189868158e-06, + "loss": 0.0038, + "step": 210688 + }, + { + "epoch": 7.968871595330739, + "grad_norm": 0.23986396193504333, + "learning_rate": 4.068452268520267e-06, + "loss": 0.0027, + "step": 210944 + }, + { + "epoch": 7.978542556004684, + "grad_norm": 0.08508482575416565, + "learning_rate": 4.049110347172377e-06, + "loss": 0.0039, + "step": 211200 + }, + { + "epoch": 7.98821351667863, + "grad_norm": 2.018085241317749, + "learning_rate": 4.0297684258244875e-06, + "loss": 0.0018, + "step": 211456 + }, + { + "epoch": 7.997884477352574, + "grad_norm": 0.023388510569930077, + "learning_rate": 4.010502058856862e-06, + "loss": 0.0022, + "step": 211712 + }, + { + "epoch": 8.0, + "eval_f1_B-DISEASE": 0.8192092717409173, + "eval_f1_B-MEDICATION": 0.9423820224719102, + "eval_f1_B-PROCEDURE": 0.8420637009739406, + "eval_f1_B-SYMPTOM": 0.8127123442808607, + "eval_f1_I-DISEASE": 0.850955341029534, + "eval_f1_I-MEDICATION": 0.949058663755226, + "eval_f1_I-PROCEDURE": 0.8554017038172543, + "eval_f1_I-SYMPTOM": 0.8136104891443594, + "eval_f1_O": 0.9274238987873996, + "eval_f1_macro": 0.868090826222378, + "eval_f1_micro": 0.8928833447754752, + "eval_loss": 0.4737370014190674, + "eval_precision_B-DISEASE": 0.8500213219616205, + "eval_precision_B-MEDICATION": 0.9520523065746458, + "eval_precision_B-PROCEDURE": 0.8641274986493788, + "eval_precision_B-SYMPTOM": 0.8414187307635937, + "eval_precision_I-DISEASE": 0.8638646013066827, + "eval_precision_I-MEDICATION": 0.9557009546283385, + "eval_precision_I-PROCEDURE": 0.8787206030410202, + "eval_precision_I-SYMPTOM": 0.8445731862439002, + "eval_precision_O": 0.9156305475079966, + "eval_precision_macro": 0.8851233056307976, + "eval_precision_micro": 0.895332761029088, + "eval_rauc_macro": 0.9153957194153303, + "eval_rauc_micro": 0.9385898278409849, + "eval_recall_B-DISEASE": 0.7905528674545887, + "eval_recall_B-MEDICATION": 0.9329062110695854, + "eval_recall_B-PROCEDURE": 0.8210985626283368, + "eval_recall_B-SYMPTOM": 0.7859000684462697, + "eval_recall_I-DISEASE": 0.8384262221051443, + "eval_recall_I-MEDICATION": 0.942508065795737, + "eval_recall_I-PROCEDURE": 0.8332884525936988, + "eval_recall_I-SYMPTOM": 0.7848377390930582, + "eval_recall_O": 0.9395250115419437, + "eval_recall_macro": 0.8521159111920403, + "eval_recall_micro": 0.8904472939909823, + "eval_roc_auc_B-DISEASE": 0.8939787349011639, + "eval_roc_auc_B-MEDICATION": 0.966357083606876, + "eval_roc_auc_B-PROCEDURE": 0.9094372113385102, + "eval_roc_auc_B-SYMPTOM": 0.8913483585328563, + "eval_roc_auc_I-DISEASE": 0.9105856356600845, + "eval_roc_auc_I-MEDICATION": 0.9709506682682745, + "eval_roc_auc_I-PROCEDURE": 0.9110413996799644, + "eval_roc_auc_I-SYMPTOM": 0.8811634945418189, + "eval_roc_auc_O": 0.9036988882084248, + "eval_runtime": 61.2016, + "eval_samples_per_second": 177.659, + "eval_steps_per_second": 22.222, + "step": 211768 + }, + { + "epoch": 8.00755543802652, + "grad_norm": 0.04221045598387718, + "learning_rate": 3.9911601375089725e-06, + "loss": 0.0014, + "step": 211968 + }, + { + "epoch": 8.017226398700465, + "grad_norm": 0.10489369928836823, + "learning_rate": 3.971818216161083e-06, + "loss": 0.0014, + "step": 212224 + }, + { + "epoch": 8.02689735937441, + "grad_norm": 0.005116314627230167, + "learning_rate": 3.952476294813192e-06, + "loss": 0.0017, + "step": 212480 + }, + { + "epoch": 8.036568320048355, + "grad_norm": 0.013480676338076591, + "learning_rate": 3.933134373465302e-06, + "loss": 0.0015, + "step": 212736 + }, + { + "epoch": 8.0462392807223, + "grad_norm": 0.009527523070573807, + "learning_rate": 3.913792452117412e-06, + "loss": 0.0017, + "step": 212992 + }, + { + "epoch": 8.055910241396244, + "grad_norm": 0.06975742429494858, + "learning_rate": 3.8944505307695214e-06, + "loss": 0.0018, + "step": 213248 + }, + { + "epoch": 8.06558120207019, + "grad_norm": 1.0523467063903809, + "learning_rate": 3.8751086094216316e-06, + "loss": 0.0008, + "step": 213504 + }, + { + "epoch": 8.075252162744135, + "grad_norm": 0.021572506055235863, + "learning_rate": 3.855766688073742e-06, + "loss": 0.0015, + "step": 213760 + }, + { + "epoch": 8.084923123418081, + "grad_norm": 0.004556609317660332, + "learning_rate": 3.8365003211061165e-06, + "loss": 0.0025, + "step": 214016 + }, + { + "epoch": 8.094594084092025, + "grad_norm": 0.01564151607453823, + "learning_rate": 3.817158399758226e-06, + "loss": 0.002, + "step": 214272 + }, + { + "epoch": 8.10426504476597, + "grad_norm": 0.02460288256406784, + "learning_rate": 3.797816478410336e-06, + "loss": 0.0023, + "step": 214528 + }, + { + "epoch": 8.113936005439916, + "grad_norm": 0.19753815233707428, + "learning_rate": 3.778474557062446e-06, + "loss": 0.0022, + "step": 214784 + }, + { + "epoch": 8.12360696611386, + "grad_norm": 0.0022627103608101606, + "learning_rate": 3.7591326357145558e-06, + "loss": 0.0027, + "step": 215040 + }, + { + "epoch": 8.133277926787805, + "grad_norm": 0.027415508404374123, + "learning_rate": 3.7397907143666655e-06, + "loss": 0.0013, + "step": 215296 + }, + { + "epoch": 8.14294888746175, + "grad_norm": 0.13008584082126617, + "learning_rate": 3.7204487930187756e-06, + "loss": 0.0023, + "step": 215552 + }, + { + "epoch": 8.152619848135696, + "grad_norm": 0.03469686210155487, + "learning_rate": 3.7011068716708853e-06, + "loss": 0.0017, + "step": 215808 + }, + { + "epoch": 8.16229080880964, + "grad_norm": 0.27707692980766296, + "learning_rate": 3.681764950322995e-06, + "loss": 0.0016, + "step": 216064 + }, + { + "epoch": 8.171961769483586, + "grad_norm": 0.001276107388548553, + "learning_rate": 3.662423028975105e-06, + "loss": 0.0019, + "step": 216320 + }, + { + "epoch": 8.181632730157531, + "grad_norm": 0.1010005995631218, + "learning_rate": 3.6431566620074804e-06, + "loss": 0.0017, + "step": 216576 + }, + { + "epoch": 8.191303690831475, + "grad_norm": 0.0014761561760678887, + "learning_rate": 3.62381474065959e-06, + "loss": 0.0012, + "step": 216832 + }, + { + "epoch": 8.20097465150542, + "grad_norm": 0.01065876055508852, + "learning_rate": 3.6044728193117002e-06, + "loss": 0.0018, + "step": 217088 + }, + { + "epoch": 8.210645612179366, + "grad_norm": 0.013767397962510586, + "learning_rate": 3.58513089796381e-06, + "loss": 0.0019, + "step": 217344 + }, + { + "epoch": 8.220316572853312, + "grad_norm": 0.06974133104085922, + "learning_rate": 3.5657889766159196e-06, + "loss": 0.0016, + "step": 217600 + }, + { + "epoch": 8.229987533527256, + "grad_norm": 0.0005238667945377529, + "learning_rate": 3.5464470552680298e-06, + "loss": 0.002, + "step": 217856 + }, + { + "epoch": 8.239658494201201, + "grad_norm": 0.0016113127348944545, + "learning_rate": 3.5271051339201395e-06, + "loss": 0.0021, + "step": 218112 + }, + { + "epoch": 8.249329454875147, + "grad_norm": 0.006277918349951506, + "learning_rate": 3.5077632125722496e-06, + "loss": 0.002, + "step": 218368 + }, + { + "epoch": 8.259000415549092, + "grad_norm": 6.543371200561523, + "learning_rate": 3.4884212912243593e-06, + "loss": 0.0032, + "step": 218624 + }, + { + "epoch": 8.268671376223036, + "grad_norm": 0.002728199353441596, + "learning_rate": 3.469154924256734e-06, + "loss": 0.0017, + "step": 218880 + }, + { + "epoch": 8.278342336896982, + "grad_norm": 0.0380263552069664, + "learning_rate": 3.449813002908844e-06, + "loss": 0.0015, + "step": 219136 + }, + { + "epoch": 8.288013297570927, + "grad_norm": 0.09521731734275818, + "learning_rate": 3.430471081560954e-06, + "loss": 0.0014, + "step": 219392 + }, + { + "epoch": 8.297684258244871, + "grad_norm": 0.05859646946191788, + "learning_rate": 3.4111291602130637e-06, + "loss": 0.0016, + "step": 219648 + }, + { + "epoch": 8.307355218918817, + "grad_norm": 0.057360127568244934, + "learning_rate": 3.3917872388651734e-06, + "loss": 0.0015, + "step": 219904 + }, + { + "epoch": 8.317026179592762, + "grad_norm": 0.005022614262998104, + "learning_rate": 3.3724453175172835e-06, + "loss": 0.0018, + "step": 220160 + }, + { + "epoch": 8.326697140266708, + "grad_norm": 0.009211408905684948, + "learning_rate": 3.353103396169393e-06, + "loss": 0.0019, + "step": 220416 + }, + { + "epoch": 8.336368100940652, + "grad_norm": 0.8216173052787781, + "learning_rate": 3.333761474821503e-06, + "loss": 0.0015, + "step": 220672 + }, + { + "epoch": 8.346039061614597, + "grad_norm": 0.036033984273672104, + "learning_rate": 3.314419553473613e-06, + "loss": 0.0019, + "step": 220928 + }, + { + "epoch": 8.355710022288543, + "grad_norm": 0.28528496623039246, + "learning_rate": 3.295153186505988e-06, + "loss": 0.0016, + "step": 221184 + }, + { + "epoch": 8.365380982962487, + "grad_norm": 0.21900594234466553, + "learning_rate": 3.2758112651580976e-06, + "loss": 0.0011, + "step": 221440 + }, + { + "epoch": 8.375051943636432, + "grad_norm": 0.1995537430047989, + "learning_rate": 3.2564693438102073e-06, + "loss": 0.0011, + "step": 221696 + }, + { + "epoch": 8.384722904310378, + "grad_norm": 0.05183367431163788, + "learning_rate": 3.2371274224623174e-06, + "loss": 0.0016, + "step": 221952 + }, + { + "epoch": 8.394393864984323, + "grad_norm": 0.8787815570831299, + "learning_rate": 3.217785501114427e-06, + "loss": 0.0016, + "step": 222208 + }, + { + "epoch": 8.404064825658267, + "grad_norm": 0.013117530383169651, + "learning_rate": 3.198443579766537e-06, + "loss": 0.0017, + "step": 222464 + }, + { + "epoch": 8.413735786332213, + "grad_norm": 0.005170230288058519, + "learning_rate": 3.179101658418647e-06, + "loss": 0.0018, + "step": 222720 + }, + { + "epoch": 8.423406747006158, + "grad_norm": 0.07186521589756012, + "learning_rate": 3.1597597370707567e-06, + "loss": 0.0024, + "step": 222976 + }, + { + "epoch": 8.433077707680102, + "grad_norm": 0.4515911042690277, + "learning_rate": 3.1404178157228664e-06, + "loss": 0.0023, + "step": 223232 + }, + { + "epoch": 8.442748668354048, + "grad_norm": 0.05934571474790573, + "learning_rate": 3.121151448755242e-06, + "loss": 0.0024, + "step": 223488 + }, + { + "epoch": 8.452419629027993, + "grad_norm": 0.5439963936805725, + "learning_rate": 3.1018095274073517e-06, + "loss": 0.0015, + "step": 223744 + }, + { + "epoch": 8.462090589701939, + "grad_norm": 0.002186891855672002, + "learning_rate": 3.0824676060594614e-06, + "loss": 0.0012, + "step": 224000 + }, + { + "epoch": 8.471761550375883, + "grad_norm": 0.005363143049180508, + "learning_rate": 3.0631256847115716e-06, + "loss": 0.0019, + "step": 224256 + }, + { + "epoch": 8.481432511049828, + "grad_norm": 0.8389096856117249, + "learning_rate": 3.0437837633636813e-06, + "loss": 0.0014, + "step": 224512 + }, + { + "epoch": 8.491103471723774, + "grad_norm": 0.009227742440998554, + "learning_rate": 3.024441842015791e-06, + "loss": 0.0015, + "step": 224768 + }, + { + "epoch": 8.500774432397717, + "grad_norm": 0.021440809592604637, + "learning_rate": 3.005099920667901e-06, + "loss": 0.0012, + "step": 225024 + }, + { + "epoch": 8.510445393071663, + "grad_norm": 0.053202465176582336, + "learning_rate": 2.985757999320011e-06, + "loss": 0.0021, + "step": 225280 + }, + { + "epoch": 8.520116353745609, + "grad_norm": 0.09795872122049332, + "learning_rate": 2.9664160779721205e-06, + "loss": 0.0011, + "step": 225536 + }, + { + "epoch": 8.529787314419554, + "grad_norm": 0.07053809612989426, + "learning_rate": 2.9470741566242306e-06, + "loss": 0.002, + "step": 225792 + }, + { + "epoch": 8.539458275093498, + "grad_norm": 0.015584302134811878, + "learning_rate": 2.9277322352763403e-06, + "loss": 0.0017, + "step": 226048 + }, + { + "epoch": 8.549129235767444, + "grad_norm": 0.014636941254138947, + "learning_rate": 2.9083903139284505e-06, + "loss": 0.002, + "step": 226304 + }, + { + "epoch": 8.55880019644139, + "grad_norm": 0.002259760396555066, + "learning_rate": 2.88904839258056e-06, + "loss": 0.0022, + "step": 226560 + }, + { + "epoch": 8.568471157115333, + "grad_norm": 0.1187112033367157, + "learning_rate": 2.86970647123267e-06, + "loss": 0.0019, + "step": 226816 + }, + { + "epoch": 8.578142117789278, + "grad_norm": 1.316362738609314, + "learning_rate": 2.85036454988478e-06, + "loss": 0.0018, + "step": 227072 + }, + { + "epoch": 8.587813078463224, + "grad_norm": 0.02268380858004093, + "learning_rate": 2.8310226285368897e-06, + "loss": 0.0015, + "step": 227328 + }, + { + "epoch": 8.59748403913717, + "grad_norm": 0.11864591389894485, + "learning_rate": 2.8117562615692645e-06, + "loss": 0.0016, + "step": 227584 + }, + { + "epoch": 8.607154999811113, + "grad_norm": 0.1629875749349594, + "learning_rate": 2.7924143402213743e-06, + "loss": 0.0013, + "step": 227840 + }, + { + "epoch": 8.616825960485059, + "grad_norm": 0.0012598687317222357, + "learning_rate": 2.7730724188734844e-06, + "loss": 0.0015, + "step": 228096 + }, + { + "epoch": 8.626496921159005, + "grad_norm": 0.012492740526795387, + "learning_rate": 2.753730497525594e-06, + "loss": 0.0017, + "step": 228352 + }, + { + "epoch": 8.63616788183295, + "grad_norm": 0.14352132380008698, + "learning_rate": 2.734388576177704e-06, + "loss": 0.0013, + "step": 228608 + }, + { + "epoch": 8.645838842506894, + "grad_norm": 0.022044667974114418, + "learning_rate": 2.715046654829814e-06, + "loss": 0.0015, + "step": 228864 + }, + { + "epoch": 8.65550980318084, + "grad_norm": 0.28277599811553955, + "learning_rate": 2.6957047334819236e-06, + "loss": 0.0024, + "step": 229120 + }, + { + "epoch": 8.665180763854785, + "grad_norm": 0.005805708467960358, + "learning_rate": 2.6763628121340333e-06, + "loss": 0.0018, + "step": 229376 + }, + { + "epoch": 8.674851724528729, + "grad_norm": 0.042201265692710876, + "learning_rate": 2.6570208907861435e-06, + "loss": 0.0013, + "step": 229632 + }, + { + "epoch": 8.684522685202674, + "grad_norm": 0.5332639813423157, + "learning_rate": 2.637678969438253e-06, + "loss": 0.001, + "step": 229888 + }, + { + "epoch": 8.69419364587662, + "grad_norm": 0.001226294320076704, + "learning_rate": 2.618337048090363e-06, + "loss": 0.0021, + "step": 230144 + }, + { + "epoch": 8.703864606550566, + "grad_norm": 0.008391711860895157, + "learning_rate": 2.598995126742473e-06, + "loss": 0.0018, + "step": 230400 + }, + { + "epoch": 8.71353556722451, + "grad_norm": 0.005565817467868328, + "learning_rate": 2.5797287597748482e-06, + "loss": 0.0014, + "step": 230656 + }, + { + "epoch": 8.723206527898455, + "grad_norm": 0.004753118846565485, + "learning_rate": 2.560386838426958e-06, + "loss": 0.0012, + "step": 230912 + }, + { + "epoch": 8.7328774885724, + "grad_norm": 0.0013271772768348455, + "learning_rate": 2.541044917079068e-06, + "loss": 0.0021, + "step": 231168 + }, + { + "epoch": 8.742548449246344, + "grad_norm": 0.06540732830762863, + "learning_rate": 2.5217029957311778e-06, + "loss": 0.0017, + "step": 231424 + }, + { + "epoch": 8.75221940992029, + "grad_norm": 0.16908077895641327, + "learning_rate": 2.5023610743832875e-06, + "loss": 0.002, + "step": 231680 + }, + { + "epoch": 8.761890370594235, + "grad_norm": 0.29503974318504333, + "learning_rate": 2.4830191530353976e-06, + "loss": 0.0016, + "step": 231936 + }, + { + "epoch": 8.771561331268181, + "grad_norm": 9.24250316619873, + "learning_rate": 2.4636772316875073e-06, + "loss": 0.0019, + "step": 232192 + }, + { + "epoch": 8.781232291942125, + "grad_norm": 0.09708067774772644, + "learning_rate": 2.444335310339617e-06, + "loss": 0.0011, + "step": 232448 + }, + { + "epoch": 8.79090325261607, + "grad_norm": 0.001615343731828034, + "learning_rate": 2.425068943371992e-06, + "loss": 0.0017, + "step": 232704 + }, + { + "epoch": 8.800574213290016, + "grad_norm": 0.21001236140727997, + "learning_rate": 2.405727022024102e-06, + "loss": 0.0021, + "step": 232960 + }, + { + "epoch": 8.81024517396396, + "grad_norm": 0.014585831202566624, + "learning_rate": 2.3863851006762117e-06, + "loss": 0.001, + "step": 233216 + }, + { + "epoch": 8.819916134637905, + "grad_norm": 0.6824801564216614, + "learning_rate": 2.367043179328322e-06, + "loss": 0.0016, + "step": 233472 + }, + { + "epoch": 8.829587095311851, + "grad_norm": 0.014681616798043251, + "learning_rate": 2.3477012579804315e-06, + "loss": 0.002, + "step": 233728 + }, + { + "epoch": 8.839258055985797, + "grad_norm": 0.010957110673189163, + "learning_rate": 2.3283593366325417e-06, + "loss": 0.0013, + "step": 233984 + }, + { + "epoch": 8.84892901665974, + "grad_norm": 1.2413954734802246, + "learning_rate": 2.3090174152846514e-06, + "loss": 0.0011, + "step": 234240 + }, + { + "epoch": 8.858599977333686, + "grad_norm": 0.1283838450908661, + "learning_rate": 2.289751048317026e-06, + "loss": 0.0011, + "step": 234496 + }, + { + "epoch": 8.868270938007631, + "grad_norm": 0.030070781707763672, + "learning_rate": 2.2704846813494014e-06, + "loss": 0.0013, + "step": 234752 + }, + { + "epoch": 8.877941898681575, + "grad_norm": 0.10878092795610428, + "learning_rate": 2.251142760001511e-06, + "loss": 0.0012, + "step": 235008 + }, + { + "epoch": 8.88761285935552, + "grad_norm": 0.0010157637298107147, + "learning_rate": 2.2318008386536213e-06, + "loss": 0.001, + "step": 235264 + }, + { + "epoch": 8.897283820029466, + "grad_norm": 0.0011938668321818113, + "learning_rate": 2.212458917305731e-06, + "loss": 0.0012, + "step": 235520 + }, + { + "epoch": 8.906954780703412, + "grad_norm": 0.0009772476041689515, + "learning_rate": 2.1931169959578407e-06, + "loss": 0.0012, + "step": 235776 + }, + { + "epoch": 8.916625741377356, + "grad_norm": 0.02653772011399269, + "learning_rate": 2.173775074609951e-06, + "loss": 0.0011, + "step": 236032 + }, + { + "epoch": 8.926296702051301, + "grad_norm": 0.002025567227974534, + "learning_rate": 2.1544331532620605e-06, + "loss": 0.0011, + "step": 236288 + }, + { + "epoch": 8.935967662725247, + "grad_norm": 0.07364089787006378, + "learning_rate": 2.1350912319141702e-06, + "loss": 0.0017, + "step": 236544 + }, + { + "epoch": 8.94563862339919, + "grad_norm": 0.000813729246146977, + "learning_rate": 2.1157493105662803e-06, + "loss": 0.0012, + "step": 236800 + }, + { + "epoch": 8.955309584073136, + "grad_norm": 0.3964155912399292, + "learning_rate": 2.09640738921839e-06, + "loss": 0.0013, + "step": 237056 + }, + { + "epoch": 8.964980544747082, + "grad_norm": 0.02675843983888626, + "learning_rate": 2.0770654678704998e-06, + "loss": 0.0013, + "step": 237312 + }, + { + "epoch": 8.974651505421027, + "grad_norm": 0.45542263984680176, + "learning_rate": 2.05772354652261e-06, + "loss": 0.0009, + "step": 237568 + }, + { + "epoch": 8.984322466094971, + "grad_norm": 0.004206574056297541, + "learning_rate": 2.0383816251747196e-06, + "loss": 0.0011, + "step": 237824 + }, + { + "epoch": 8.993993426768917, + "grad_norm": 0.029356837272644043, + "learning_rate": 2.0190397038268293e-06, + "loss": 0.0011, + "step": 238080 + }, + { + "epoch": 9.0, + "eval_f1_B-DISEASE": 0.820265182517597, + "eval_f1_B-MEDICATION": 0.9411343069617418, + "eval_f1_B-PROCEDURE": 0.8432354734732991, + "eval_f1_B-SYMPTOM": 0.8137595552466991, + "eval_f1_I-DISEASE": 0.853021119122057, + "eval_f1_I-MEDICATION": 0.95037530170013, + "eval_f1_I-PROCEDURE": 0.8561874578893519, + "eval_f1_I-SYMPTOM": 0.8206522616146783, + "eval_f1_O": 0.9278188308044935, + "eval_f1_macro": 0.8696054988144497, + "eval_f1_micro": 0.8938386961399782, + "eval_loss": 0.507999837398529, + "eval_precision_B-DISEASE": 0.8472398343055203, + "eval_precision_B-MEDICATION": 0.9476723204619271, + "eval_precision_B-PROCEDURE": 0.8588731144631766, + "eval_precision_B-SYMPTOM": 0.826393789696542, + "eval_precision_I-DISEASE": 0.8680188544493643, + "eval_precision_I-MEDICATION": 0.9531815279846776, + "eval_precision_I-PROCEDURE": 0.8730946382675522, + "eval_precision_I-SYMPTOM": 0.8215935894536173, + "eval_precision_O": 0.9236782838636837, + "eval_precision_macro": 0.8799717725495623, + "eval_precision_micro": 0.895900159381018, + "eval_rauc_macro": 0.9196513344131847, + "eval_rauc_micro": 0.9392897520293723, + "eval_recall_B-DISEASE": 0.7949551836281431, + "eval_recall_B-MEDICATION": 0.9346858871685353, + "eval_recall_B-PROCEDURE": 0.8281570841889117, + "eval_recall_B-SYMPTOM": 0.8015058179329226, + "eval_recall_I-DISEASE": 0.8385328466237659, + "eval_recall_I-MEDICATION": 0.9475855503252764, + "eval_recall_I-PROCEDURE": 0.8399226419251389, + "eval_recall_I-SYMPTOM": 0.8197130883301096, + "eval_recall_O": 0.9319966663269038, + "eval_recall_macro": 0.8596727518277453, + "eval_recall_micro": 0.8917866979597766, + "eval_roc_auc_B-DISEASE": 0.8961441020336804, + "eval_roc_auc_B-MEDICATION": 0.9672374649513288, + "eval_roc_auc_B-PROCEDURE": 0.9129064100226968, + "eval_roc_auc_B-SYMPTOM": 0.8989321500948878, + "eval_roc_auc_I-DISEASE": 0.9109411921441964, + "eval_roc_auc_I-MEDICATION": 0.973469577978567, + "eval_roc_auc_I-PROCEDURE": 0.9140138334490733, + "eval_roc_auc_I-SYMPTOM": 0.8959855777737502, + "eval_roc_auc_O": 0.9072317012704816, + "eval_runtime": 61.1454, + "eval_samples_per_second": 177.822, + "eval_steps_per_second": 22.242, + "step": 238239 + }, + { + "epoch": 9.003664387442862, + "grad_norm": 0.0012333148624747992, + "learning_rate": 1.9996977824789394e-06, + "loss": 0.0009, + "step": 238336 + }, + { + "epoch": 9.013335348116808, + "grad_norm": 0.05198514088988304, + "learning_rate": 1.980355861131049e-06, + "loss": 0.0013, + "step": 238592 + }, + { + "epoch": 9.023006308790752, + "grad_norm": 0.002556259511038661, + "learning_rate": 1.9610894941634244e-06, + "loss": 0.0007, + "step": 238848 + }, + { + "epoch": 9.032677269464697, + "grad_norm": 0.017605243250727654, + "learning_rate": 1.941747572815534e-06, + "loss": 0.0003, + "step": 239104 + }, + { + "epoch": 9.042348230138643, + "grad_norm": 0.2546218931674957, + "learning_rate": 1.9224056514676438e-06, + "loss": 0.0011, + "step": 239360 + }, + { + "epoch": 9.052019190812587, + "grad_norm": 0.005463745910674334, + "learning_rate": 1.903063730119754e-06, + "loss": 0.0007, + "step": 239616 + }, + { + "epoch": 9.061690151486532, + "grad_norm": 0.013387962244451046, + "learning_rate": 1.8837218087718636e-06, + "loss": 0.0009, + "step": 239872 + }, + { + "epoch": 9.071361112160478, + "grad_norm": 0.0036850119940936565, + "learning_rate": 1.8643798874239735e-06, + "loss": 0.0008, + "step": 240128 + }, + { + "epoch": 9.081032072834423, + "grad_norm": 0.002097562188282609, + "learning_rate": 1.8450379660760835e-06, + "loss": 0.0006, + "step": 240384 + }, + { + "epoch": 9.090703033508367, + "grad_norm": 0.009374704211950302, + "learning_rate": 1.8256960447281932e-06, + "loss": 0.0007, + "step": 240640 + }, + { + "epoch": 9.100373994182313, + "grad_norm": 0.009544138796627522, + "learning_rate": 1.806354123380303e-06, + "loss": 0.001, + "step": 240896 + }, + { + "epoch": 9.110044954856258, + "grad_norm": 0.014574944972991943, + "learning_rate": 1.787012202032413e-06, + "loss": 0.001, + "step": 241152 + }, + { + "epoch": 9.119715915530202, + "grad_norm": 0.0014770117122679949, + "learning_rate": 1.7676702806845227e-06, + "loss": 0.001, + "step": 241408 + }, + { + "epoch": 9.129386876204148, + "grad_norm": 0.00023075766512192786, + "learning_rate": 1.7483283593366326e-06, + "loss": 0.0006, + "step": 241664 + }, + { + "epoch": 9.139057836878093, + "grad_norm": 0.036480896174907684, + "learning_rate": 1.7289864379887425e-06, + "loss": 0.0007, + "step": 241920 + }, + { + "epoch": 9.148728797552039, + "grad_norm": 0.005274293944239616, + "learning_rate": 1.7096445166408524e-06, + "loss": 0.0008, + "step": 242176 + }, + { + "epoch": 9.158399758225983, + "grad_norm": 0.0636928603053093, + "learning_rate": 1.6903025952929622e-06, + "loss": 0.0005, + "step": 242432 + }, + { + "epoch": 9.168070718899928, + "grad_norm": 0.007083303295075893, + "learning_rate": 1.670960673945072e-06, + "loss": 0.0012, + "step": 242688 + }, + { + "epoch": 9.177741679573874, + "grad_norm": 0.0029853142332285643, + "learning_rate": 1.651618752597182e-06, + "loss": 0.0013, + "step": 242944 + }, + { + "epoch": 9.187412640247818, + "grad_norm": 0.004745794460177422, + "learning_rate": 1.6322768312492917e-06, + "loss": 0.0008, + "step": 243200 + }, + { + "epoch": 9.197083600921763, + "grad_norm": 0.029157089069485664, + "learning_rate": 1.6129349099014016e-06, + "loss": 0.0009, + "step": 243456 + }, + { + "epoch": 9.206754561595709, + "grad_norm": 0.09862171113491058, + "learning_rate": 1.5935929885535115e-06, + "loss": 0.001, + "step": 243712 + }, + { + "epoch": 9.216425522269654, + "grad_norm": 0.02549828216433525, + "learning_rate": 1.5742510672056212e-06, + "loss": 0.0013, + "step": 243968 + }, + { + "epoch": 9.226096482943598, + "grad_norm": 0.0003341349947731942, + "learning_rate": 1.5549091458577311e-06, + "loss": 0.0011, + "step": 244224 + }, + { + "epoch": 9.235767443617544, + "grad_norm": 0.00141440168954432, + "learning_rate": 1.535567224509841e-06, + "loss": 0.0007, + "step": 244480 + }, + { + "epoch": 9.24543840429149, + "grad_norm": 0.0013356832787394524, + "learning_rate": 1.516225303161951e-06, + "loss": 0.0014, + "step": 244736 + }, + { + "epoch": 9.255109364965433, + "grad_norm": 0.06080584600567818, + "learning_rate": 1.4968833818140607e-06, + "loss": 0.0006, + "step": 244992 + }, + { + "epoch": 9.264780325639379, + "grad_norm": 0.11237218230962753, + "learning_rate": 1.4775414604661706e-06, + "loss": 0.0012, + "step": 245248 + }, + { + "epoch": 9.274451286313324, + "grad_norm": 0.23928683996200562, + "learning_rate": 1.4581995391182805e-06, + "loss": 0.0005, + "step": 245504 + }, + { + "epoch": 9.28412224698727, + "grad_norm": 0.001771116629242897, + "learning_rate": 1.4388576177703902e-06, + "loss": 0.0011, + "step": 245760 + }, + { + "epoch": 9.293793207661214, + "grad_norm": 0.0017612532246857882, + "learning_rate": 1.4195156964225001e-06, + "loss": 0.0007, + "step": 246016 + }, + { + "epoch": 9.303464168335159, + "grad_norm": 0.00843687541782856, + "learning_rate": 1.4002493294548752e-06, + "loss": 0.0018, + "step": 246272 + }, + { + "epoch": 9.313135129009105, + "grad_norm": 0.07218307256698608, + "learning_rate": 1.380907408106985e-06, + "loss": 0.0007, + "step": 246528 + }, + { + "epoch": 9.322806089683048, + "grad_norm": 0.00684273662045598, + "learning_rate": 1.361565486759095e-06, + "loss": 0.0005, + "step": 246784 + }, + { + "epoch": 9.332477050356994, + "grad_norm": 0.0004243789007887244, + "learning_rate": 1.34229911979147e-06, + "loss": 0.0005, + "step": 247040 + }, + { + "epoch": 9.34214801103094, + "grad_norm": 0.000982985831797123, + "learning_rate": 1.32295719844358e-06, + "loss": 0.0009, + "step": 247296 + }, + { + "epoch": 9.351818971704885, + "grad_norm": 0.008006760850548744, + "learning_rate": 1.3036152770956897e-06, + "loss": 0.0008, + "step": 247552 + }, + { + "epoch": 9.361489932378829, + "grad_norm": 0.0012839402770623565, + "learning_rate": 1.2842733557477996e-06, + "loss": 0.0012, + "step": 247808 + }, + { + "epoch": 9.371160893052775, + "grad_norm": 0.0076642511412501335, + "learning_rate": 1.2649314343999095e-06, + "loss": 0.0007, + "step": 248064 + }, + { + "epoch": 9.38083185372672, + "grad_norm": 0.0976879671216011, + "learning_rate": 1.2455895130520192e-06, + "loss": 0.0006, + "step": 248320 + }, + { + "epoch": 9.390502814400666, + "grad_norm": 2.341963291168213, + "learning_rate": 1.2262475917041291e-06, + "loss": 0.0009, + "step": 248576 + }, + { + "epoch": 9.40017377507461, + "grad_norm": 0.002288688672706485, + "learning_rate": 1.206905670356239e-06, + "loss": 0.0007, + "step": 248832 + }, + { + "epoch": 9.409844735748555, + "grad_norm": 0.0005827232380397618, + "learning_rate": 1.187563749008349e-06, + "loss": 0.0009, + "step": 249088 + }, + { + "epoch": 9.4195156964225, + "grad_norm": 0.00030542805325239897, + "learning_rate": 1.1682218276604587e-06, + "loss": 0.0007, + "step": 249344 + }, + { + "epoch": 9.429186657096444, + "grad_norm": 0.00033465458545833826, + "learning_rate": 1.1488799063125686e-06, + "loss": 0.0007, + "step": 249600 + }, + { + "epoch": 9.43885761777039, + "grad_norm": 0.0005558193661272526, + "learning_rate": 1.1295379849646785e-06, + "loss": 0.001, + "step": 249856 + }, + { + "epoch": 9.448528578444336, + "grad_norm": 0.008819201961159706, + "learning_rate": 1.1101960636167882e-06, + "loss": 0.0008, + "step": 250112 + }, + { + "epoch": 9.458199539118281, + "grad_norm": 0.011124982498586178, + "learning_rate": 1.0908541422688981e-06, + "loss": 0.001, + "step": 250368 + }, + { + "epoch": 9.467870499792225, + "grad_norm": 0.07561460882425308, + "learning_rate": 1.071512220921008e-06, + "loss": 0.001, + "step": 250624 + }, + { + "epoch": 9.47754146046617, + "grad_norm": 0.00775932939723134, + "learning_rate": 1.0521702995731177e-06, + "loss": 0.0008, + "step": 250880 + }, + { + "epoch": 9.487212421140116, + "grad_norm": 0.0026791319251060486, + "learning_rate": 1.032903932605493e-06, + "loss": 0.0009, + "step": 251136 + }, + { + "epoch": 9.49688338181406, + "grad_norm": 0.0014814439928159118, + "learning_rate": 1.0135620112576027e-06, + "loss": 0.0009, + "step": 251392 + }, + { + "epoch": 9.506554342488005, + "grad_norm": 0.17079958319664001, + "learning_rate": 9.942200899097126e-07, + "loss": 0.0008, + "step": 251648 + }, + { + "epoch": 9.516225303161951, + "grad_norm": 0.006455567199736834, + "learning_rate": 9.748781685618225e-07, + "loss": 0.0008, + "step": 251904 + }, + { + "epoch": 9.525896263835897, + "grad_norm": 0.0035214037634432316, + "learning_rate": 9.555362472139324e-07, + "loss": 0.0003, + "step": 252160 + }, + { + "epoch": 9.53556722450984, + "grad_norm": 0.0005882234545424581, + "learning_rate": 9.361943258660422e-07, + "loss": 0.0004, + "step": 252416 + }, + { + "epoch": 9.545238185183786, + "grad_norm": 0.00487788300961256, + "learning_rate": 9.168524045181521e-07, + "loss": 0.0011, + "step": 252672 + }, + { + "epoch": 9.554909145857732, + "grad_norm": 0.0005454017664305866, + "learning_rate": 8.975104831702619e-07, + "loss": 0.0009, + "step": 252928 + }, + { + "epoch": 9.564580106531675, + "grad_norm": 0.0002963479782920331, + "learning_rate": 8.782441162026368e-07, + "loss": 0.0008, + "step": 253184 + }, + { + "epoch": 9.574251067205621, + "grad_norm": 0.014803556725382805, + "learning_rate": 8.589021948547467e-07, + "loss": 0.0007, + "step": 253440 + }, + { + "epoch": 9.583922027879566, + "grad_norm": 0.0010204812278971076, + "learning_rate": 8.395602735068565e-07, + "loss": 0.0006, + "step": 253696 + }, + { + "epoch": 9.593592988553512, + "grad_norm": 0.000603766180574894, + "learning_rate": 8.202183521589665e-07, + "loss": 0.0005, + "step": 253952 + }, + { + "epoch": 9.603263949227456, + "grad_norm": 0.0006934937555342913, + "learning_rate": 8.008764308110763e-07, + "loss": 0.0004, + "step": 254208 + }, + { + "epoch": 9.612934909901401, + "grad_norm": 0.002163499826565385, + "learning_rate": 7.815345094631861e-07, + "loss": 0.0012, + "step": 254464 + }, + { + "epoch": 9.622605870575347, + "grad_norm": 0.009007874876260757, + "learning_rate": 7.62192588115296e-07, + "loss": 0.0009, + "step": 254720 + }, + { + "epoch": 9.632276831249293, + "grad_norm": 0.001189779955893755, + "learning_rate": 7.42850666767406e-07, + "loss": 0.0007, + "step": 254976 + }, + { + "epoch": 9.641947791923236, + "grad_norm": 0.006172012072056532, + "learning_rate": 7.235087454195158e-07, + "loss": 0.0008, + "step": 255232 + }, + { + "epoch": 9.651618752597182, + "grad_norm": 0.0028820731677114964, + "learning_rate": 7.041668240716256e-07, + "loss": 0.0009, + "step": 255488 + }, + { + "epoch": 9.661289713271128, + "grad_norm": 0.002549013588577509, + "learning_rate": 6.848249027237356e-07, + "loss": 0.0005, + "step": 255744 + }, + { + "epoch": 9.670960673945071, + "grad_norm": 0.0011592130176723003, + "learning_rate": 6.654829813758454e-07, + "loss": 0.0005, + "step": 256000 + }, + { + "epoch": 9.680631634619017, + "grad_norm": 0.009473592974245548, + "learning_rate": 6.461410600279553e-07, + "loss": 0.0006, + "step": 256256 + }, + { + "epoch": 9.690302595292962, + "grad_norm": 0.013225371949374676, + "learning_rate": 6.267991386800651e-07, + "loss": 0.0006, + "step": 256512 + }, + { + "epoch": 9.699973555966906, + "grad_norm": 0.0005315671442076564, + "learning_rate": 6.0753277171244e-07, + "loss": 0.0009, + "step": 256768 + }, + { + "epoch": 9.709644516640852, + "grad_norm": 0.07691678404808044, + "learning_rate": 5.881908503645499e-07, + "loss": 0.001, + "step": 257024 + }, + { + "epoch": 9.719315477314797, + "grad_norm": 0.00017236363783013076, + "learning_rate": 5.688489290166599e-07, + "loss": 0.0007, + "step": 257280 + }, + { + "epoch": 9.728986437988743, + "grad_norm": 0.00099793984554708, + "learning_rate": 5.495070076687697e-07, + "loss": 0.0016, + "step": 257536 + }, + { + "epoch": 9.738657398662687, + "grad_norm": 0.0005514703807421029, + "learning_rate": 5.301650863208795e-07, + "loss": 0.0004, + "step": 257792 + }, + { + "epoch": 9.748328359336632, + "grad_norm": 0.0029203654266893864, + "learning_rate": 5.108231649729894e-07, + "loss": 0.0007, + "step": 258048 + }, + { + "epoch": 9.757999320010578, + "grad_norm": 0.0004254644736647606, + "learning_rate": 4.914812436250992e-07, + "loss": 0.0016, + "step": 258304 + }, + { + "epoch": 9.767670280684523, + "grad_norm": 0.0031043547205626965, + "learning_rate": 4.721393222772091e-07, + "loss": 0.0005, + "step": 258560 + }, + { + "epoch": 9.777341241358467, + "grad_norm": 0.002943431492894888, + "learning_rate": 4.5279740092931894e-07, + "loss": 0.0008, + "step": 258816 + }, + { + "epoch": 9.787012202032413, + "grad_norm": 0.00015551786054857075, + "learning_rate": 4.334554795814288e-07, + "loss": 0.0011, + "step": 259072 + }, + { + "epoch": 9.796683162706358, + "grad_norm": 0.0021992865949869156, + "learning_rate": 4.1411355823353867e-07, + "loss": 0.0011, + "step": 259328 + }, + { + "epoch": 9.806354123380302, + "grad_norm": 0.3576786518096924, + "learning_rate": 3.947716368856485e-07, + "loss": 0.0011, + "step": 259584 + }, + { + "epoch": 9.816025084054248, + "grad_norm": 0.001682179281488061, + "learning_rate": 3.7542971553775834e-07, + "loss": 0.0007, + "step": 259840 + }, + { + "epoch": 9.825696044728193, + "grad_norm": 0.0001894651068141684, + "learning_rate": 3.560877941898682e-07, + "loss": 0.0007, + "step": 260096 + }, + { + "epoch": 9.835367005402139, + "grad_norm": 0.11490330845117569, + "learning_rate": 3.3674587284197807e-07, + "loss": 0.0006, + "step": 260352 + }, + { + "epoch": 9.845037966076083, + "grad_norm": 0.00011810084106400609, + "learning_rate": 3.1740395149408793e-07, + "loss": 0.0005, + "step": 260608 + }, + { + "epoch": 9.854708926750028, + "grad_norm": 0.0006803704309277236, + "learning_rate": 2.9806203014619774e-07, + "loss": 0.0005, + "step": 260864 + }, + { + "epoch": 9.864379887423974, + "grad_norm": 0.0036548932548612356, + "learning_rate": 2.787201087983076e-07, + "loss": 0.0007, + "step": 261120 + }, + { + "epoch": 9.874050848097918, + "grad_norm": 0.0011051982874050736, + "learning_rate": 2.5937818745041747e-07, + "loss": 0.0009, + "step": 261376 + }, + { + "epoch": 9.883721808771863, + "grad_norm": 0.010463064536452293, + "learning_rate": 2.4003626610252733e-07, + "loss": 0.0006, + "step": 261632 + }, + { + "epoch": 9.893392769445809, + "grad_norm": 0.0015552444383502007, + "learning_rate": 2.2069434475463717e-07, + "loss": 0.0004, + "step": 261888 + }, + { + "epoch": 9.903063730119754, + "grad_norm": 0.00023192820663098246, + "learning_rate": 2.0135242340674704e-07, + "loss": 0.0005, + "step": 262144 + }, + { + "epoch": 9.912734690793698, + "grad_norm": 2.180852174758911, + "learning_rate": 1.8201050205885687e-07, + "loss": 0.0008, + "step": 262400 + }, + { + "epoch": 9.922405651467644, + "grad_norm": 0.2690439820289612, + "learning_rate": 1.6266858071096674e-07, + "loss": 0.0005, + "step": 262656 + }, + { + "epoch": 9.93207661214159, + "grad_norm": 0.0014845479745417833, + "learning_rate": 1.4340221374334178e-07, + "loss": 0.0007, + "step": 262912 + }, + { + "epoch": 9.941747572815533, + "grad_norm": 0.0010751072550192475, + "learning_rate": 1.2406029239545164e-07, + "loss": 0.0017, + "step": 263168 + }, + { + "epoch": 9.951418533489479, + "grad_norm": 0.005477603990584612, + "learning_rate": 1.0479392542782669e-07, + "loss": 0.0003, + "step": 263424 + }, + { + "epoch": 9.961089494163424, + "grad_norm": 0.003662185976281762, + "learning_rate": 8.545200407993654e-08, + "loss": 0.001, + "step": 263680 + }, + { + "epoch": 9.97076045483737, + "grad_norm": 0.0006105359643697739, + "learning_rate": 6.611008273204639e-08, + "loss": 0.001, + "step": 263936 + }, + { + "epoch": 9.980431415511314, + "grad_norm": 0.0005147479241713881, + "learning_rate": 4.676816138415625e-08, + "loss": 0.0006, + "step": 264192 + }, + { + "epoch": 9.99010237618526, + "grad_norm": 0.002465909579768777, + "learning_rate": 2.7426240036266103e-08, + "loss": 0.0007, + "step": 264448 + }, + { + "epoch": 9.999773336859205, + "grad_norm": 0.0049901618622243404, + "learning_rate": 8.084318688375959e-09, + "loss": 0.0008, + "step": 264704 + }, + { + "epoch": 10.0, + "eval_f1_B-DISEASE": 0.8212511199804513, + "eval_f1_B-MEDICATION": 0.9441194996850535, + "eval_f1_B-PROCEDURE": 0.8438837081733407, + "eval_f1_B-SYMPTOM": 0.8142008132624335, + "eval_f1_I-DISEASE": 0.8531332420369243, + "eval_f1_I-MEDICATION": 0.9504971609841921, + "eval_f1_I-PROCEDURE": 0.855959918699867, + "eval_f1_I-SYMPTOM": 0.8203061784176257, + "eval_f1_O": 0.9279796337095003, + "eval_f1_macro": 0.870147919438821, + "eval_f1_micro": 0.8940428457113243, + "eval_loss": 0.5246506333351135, + "eval_precision_B-DISEASE": 0.8439357160793505, + "eval_precision_B-MEDICATION": 0.9548598471059337, + "eval_precision_B-PROCEDURE": 0.8662552367223749, + "eval_precision_B-SYMPTOM": 0.8270493539504342, + "eval_precision_I-DISEASE": 0.8624838159699522, + "eval_precision_I-MEDICATION": 0.9581855315489627, + "eval_precision_I-PROCEDURE": 0.8779157051309547, + "eval_precision_I-SYMPTOM": 0.8292179488587081, + "eval_precision_O": 0.9223528464068083, + "eval_precision_macro": 0.8824728890859421, + "eval_precision_micro": 0.8962622493770654, + "eval_rauc_macro": 0.918932582829971, + "eval_rauc_micro": 0.9393388823070546, + "eval_recall_B-DISEASE": 0.7997541048623781, + "eval_recall_B-MEDICATION": 0.9336180815091654, + "eval_recall_B-PROCEDURE": 0.8226386036960985, + "eval_recall_B-SYMPTOM": 0.8017453798767967, + "eval_recall_I-DISEASE": 0.8439832411344849, + "eval_recall_I-MEDICATION": 0.942931189506532, + "eval_recall_I-PROCEDURE": 0.8350755220446033, + "eval_recall_I-SYMPTOM": 0.8115839243498818, + "eval_recall_O": 0.9336754945048357, + "eval_recall_macro": 0.858333949053864, + "eval_recall_micro": 0.8918344066604195, + "eval_roc_auc_B-DISEASE": 0.898501130076159, + "eval_roc_auc_B-MEDICATION": 0.9667188383374489, + "eval_roc_auc_B-PROCEDURE": 0.9102252873491329, + "eval_roc_auc_B-SYMPTOM": 0.8990597025985936, + "eval_roc_auc_I-DISEASE": 0.9132048321619536, + "eval_roc_auc_I-MEDICATION": 0.9711798590609849, + "eval_roc_auc_I-PROCEDURE": 0.9118804724182935, + "eval_roc_auc_I-SYMPTOM": 0.8927663421914375, + "eval_roc_auc_O": 0.9068567812757345, + "eval_runtime": 60.9219, + "eval_samples_per_second": 178.474, + "eval_steps_per_second": 22.324, + "step": 264710 + } + ], + "logging_steps": 256, + "max_steps": 264710, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.5034084397459712e+18, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}